lib/Target/PowerPC/PPCISelLowering.cpp

   1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file implements the PPCISelLowering class.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "PPCISelLowering.h"
  14 #include "MCTargetDesc/PPCPredicates.h"
  15 #include "PPC.h"
  16 #include "PPCCCState.h"
  17 #include "PPCCallingConv.h"
  18 #include "PPCFrameLowering.h"
  19 #include "PPCInstrInfo.h"
  20 #include "PPCMachineFunctionInfo.h"
  21 #include "PPCPerfectShuffle.h"
  22 #include "PPCRegisterInfo.h"
  23 #include "PPCSubtarget.h"
  24 #include "PPCTargetMachine.h"
  25 #include "llvm/ADT/APFloat.h"
  26 #include "llvm/ADT/APInt.h"
  27 #include "llvm/ADT/ArrayRef.h"
  28 #include "llvm/ADT/DenseMap.h"
  29 #include "llvm/ADT/None.h"
  30 #include "llvm/ADT/STLExtras.h"
  31 #include "llvm/ADT/SmallPtrSet.h"
  32 #include "llvm/ADT/SmallSet.h"
  33 #include "llvm/ADT/SmallVector.h"
  34 #include "llvm/ADT/Statistic.h"
  35 #include "llvm/ADT/StringRef.h"
  36 #include "llvm/ADT/StringSwitch.h"
  37 #include "llvm/CodeGen/CallingConvLower.h"
  38 #include "llvm/CodeGen/ISDOpcodes.h"
  39 #include "llvm/CodeGen/MachineBasicBlock.h"
  40 #include "llvm/CodeGen/MachineFrameInfo.h"
  41 #include "llvm/CodeGen/MachineFunction.h"
  42 #include "llvm/CodeGen/MachineInstr.h"
  43 #include "llvm/CodeGen/MachineInstrBuilder.h"
  44 #include "llvm/CodeGen/MachineJumpTableInfo.h"
  45 #include "llvm/CodeGen/MachineLoopInfo.h"
  46 #include "llvm/CodeGen/MachineMemOperand.h"
  47 #include "llvm/CodeGen/MachineModuleInfo.h"
  48 #include "llvm/CodeGen/MachineOperand.h"
  49 #include "llvm/CodeGen/MachineRegisterInfo.h"
  50 #include "llvm/CodeGen/RuntimeLibcalls.h"
  51 #include "llvm/CodeGen/SelectionDAG.h"
  52 #include "llvm/CodeGen/SelectionDAGNodes.h"
  53 #include "llvm/CodeGen/TargetInstrInfo.h"
  54 #include "llvm/CodeGen/TargetLowering.h"
  55 #include "llvm/CodeGen/TargetRegisterInfo.h"
  56 #include "llvm/CodeGen/ValueTypes.h"
  57 #include "llvm/IR/CallSite.h"
  58 #include "llvm/IR/CallingConv.h"
  59 #include "llvm/IR/Constant.h"
  60 #include "llvm/IR/Constants.h"
  61 #include "llvm/IR/DataLayout.h"
  62 #include "llvm/IR/DebugLoc.h"
  63 #include "llvm/IR/DerivedTypes.h"
  64 #include "llvm/IR/Function.h"
  65 #include "llvm/IR/GlobalValue.h"
  66 #include "llvm/IR/IRBuilder.h"
  67 #include "llvm/IR/Instructions.h"
  68 #include "llvm/IR/Intrinsics.h"
  69 #include "llvm/IR/Module.h"
  70 #include "llvm/IR/Type.h"
  71 #include "llvm/IR/Use.h"
  72 #include "llvm/IR/Value.h"
  73 #include "llvm/MC/MCContext.h"
  74 #include "llvm/MC/MCExpr.h"
  75 #include "llvm/MC/MCRegisterInfo.h"
  76 #include "llvm/MC/MCSymbolXCOFF.h"
  77 #include "llvm/Support/AtomicOrdering.h"
  78 #include "llvm/Support/BranchProbability.h"
  79 #include "llvm/Support/Casting.h"
  80 #include "llvm/Support/CodeGen.h"
  81 #include "llvm/Support/CommandLine.h"
  82 #include "llvm/Support/Compiler.h"
  83 #include "llvm/Support/Debug.h"
  84 #include "llvm/Support/ErrorHandling.h"
  85 #include "llvm/Support/Format.h"
  86 #include "llvm/Support/KnownBits.h"
  87 #include "llvm/Support/MachineValueType.h"
  88 #include "llvm/Support/MathExtras.h"
  89 #include "llvm/Support/raw_ostream.h"
  90 #include "llvm/Target/TargetMachine.h"
  91 #include "llvm/Target/TargetOptions.h"
  92 #include <algorithm>
  93 #include <cassert>
  94 #include <cstdint>
  95 #include <iterator>
  96 #include <list>
  97 #include <utility>
  98 #include <vector>
  99
 100 using namespace llvm;
 101
 102 #define DEBUG_TYPE "ppc-lowering"
 103
 104 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
 105 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
 106
 107 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
 108 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
 109
 110 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
 111 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
 112
 113 static cl::opt<bool> DisableSCO("disable-ppc-sco",
 114 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
 115
 116 static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
 117 cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
 118
 119 static cl::opt<bool> EnableQuadPrecision("enable-ppc-quad-precision",
 120 cl::desc("enable quad precision float support on ppc"), cl::Hidden);
 121
 122 STATISTIC(NumTailCalls, "Number of tail calls");
 123 STATISTIC(NumSiblingCalls, "Number of sibling calls");
 124
 125 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
 126
 127 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
 128
 129 // FIXME: Remove this once the bug has been fixed!
 130 extern cl::opt<bool> ANDIGlueBug;
 131
 132 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
 133                                      const PPCSubtarget &STI)
 134     : TargetLowering(TM), Subtarget(STI) {
 135   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 136   setUseUnderscoreSetJmp(true);
 137   setUseUnderscoreLongJmp(true);
 138
 139   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
 140   // arguments are at least 4/8 bytes aligned.
 141   bool isPPC64 = Subtarget.isPPC64();
 142   setMinStackArgumentAlignment(isPPC64 ? llvm::Align(8) : llvm::Align(4));
 143
 144   // Set up the register classes.
 145   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
 146   if (!useSoftFloat()) {
 147     if (hasSPE()) {
 148       addRegisterClass(MVT::f32, &PPC::SPE4RCRegClass);
 149       addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
 150     } else {
 151       addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
 152       addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
 153     }
 154   }
 155
 156   // Match BITREVERSE to customized fast code sequence in the td file.
 157   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
 158   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
 159
 160   // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
 161   setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
 162
 163   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
 164   for (MVT VT : MVT::integer_valuetypes()) {
 165     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
 166     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
 167   }
 168
 169   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 170
 171   // PowerPC has pre-inc load and store's.
 172   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
 173   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
 174   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
 175   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
 176   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
 177   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
 178   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
 179   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
 180   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
 181   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
 182   if (!Subtarget.hasSPE()) {
 183     setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
 184     setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
 185     setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
 186     setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
 187   }
 188
 189   // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
 190   const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
 191   for (MVT VT : ScalarIntVTs) {
 192     setOperationAction(ISD::ADDC, VT, Legal);
 193     setOperationAction(ISD::ADDE, VT, Legal);
 194     setOperationAction(ISD::SUBC, VT, Legal);
 195     setOperationAction(ISD::SUBE, VT, Legal);
 196   }
 197
 198   if (Subtarget.useCRBits()) {
 199     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 200
 201     if (isPPC64 || Subtarget.hasFPCVT()) {
 202       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
 203       AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
 204                          isPPC64 ? MVT::i64 : MVT::i32);
 205       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
 206       AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
 207                         isPPC64 ? MVT::i64 : MVT::i32);
 208     } else {
 209       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
 210       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
 211     }
 212
 213     // PowerPC does not support direct load/store of condition registers.
 214     setOperationAction(ISD::LOAD, MVT::i1, Custom);
 215     setOperationAction(ISD::STORE, MVT::i1, Custom);
 216
 217     // FIXME: Remove this once the ANDI glue bug is fixed:
 218     if (ANDIGlueBug)
 219       setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
 220
 221     for (MVT VT : MVT::integer_valuetypes()) {
 222       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
 223       setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
 224       setTruncStoreAction(VT, MVT::i1, Expand);
 225     }
 226
 227     addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
 228   }
 229
 230   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
 231   // PPC (the libcall is not available).
 232   setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
 233   setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
 234
 235   // We do not currently implement these libm ops for PowerPC.
 236   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
 237   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
 238   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
 239   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
 240   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
 241   setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
 242
 243   // PowerPC has no SREM/UREM instructions unless we are on P9
 244   // On P9 we may use a hardware instruction to compute the remainder.
 245   // The instructions are not legalized directly because in the cases where the
 246   // result of both the remainder and the division is required it is more
 247   // efficient to compute the remainder from the result of the division rather
 248   // than use the remainder instruction.
 249   if (Subtarget.isISA3_0()) {
 250     setOperationAction(ISD::SREM, MVT::i32, Custom);
 251     setOperationAction(ISD::UREM, MVT::i32, Custom);
 252     setOperationAction(ISD::SREM, MVT::i64, Custom);
 253     setOperationAction(ISD::UREM, MVT::i64, Custom);
 254   } else {
 255     setOperationAction(ISD::SREM, MVT::i32, Expand);
 256     setOperationAction(ISD::UREM, MVT::i32, Expand);
 257     setOperationAction(ISD::SREM, MVT::i64, Expand);
 258     setOperationAction(ISD::UREM, MVT::i64, Expand);
 259   }
 260
 261   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
 262   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
 263   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
 264   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
 265   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
 266   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
 267   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
 268   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
 269   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
 270
 271   // We don't support sin/cos/sqrt/fmod/pow
 272   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 273   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 274   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
 275   setOperationAction(ISD::FREM , MVT::f64, Expand);
 276   setOperationAction(ISD::FPOW , MVT::f64, Expand);
 277   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 278   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 279   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
 280   setOperationAction(ISD::FREM , MVT::f32, Expand);
 281   setOperationAction(ISD::FPOW , MVT::f32, Expand);
 282   if (Subtarget.hasSPE()) {
 283     setOperationAction(ISD::FMA  , MVT::f64, Expand);
 284     setOperationAction(ISD::FMA  , MVT::f32, Expand);
 285   } else {
 286     setOperationAction(ISD::FMA  , MVT::f64, Legal);
 287     setOperationAction(ISD::FMA  , MVT::f32, Legal);
 288   }
 289
 290   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
 291
 292   // If we're enabling GP optimizations, use hardware square root
 293   if (!Subtarget.hasFSQRT() &&
 294       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
 295         Subtarget.hasFRE()))
 296     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 297
 298   if (!Subtarget.hasFSQRT() &&
 299       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
 300         Subtarget.hasFRES()))
 301     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 302
 303   if (Subtarget.hasFCPSGN()) {
 304     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
 305     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
 306   } else {
 307     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 308     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 309   }
 310
 311   if (Subtarget.hasFPRND()) {
 312     setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
 313     setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
 314     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
 315     setOperationAction(ISD::FROUND, MVT::f64, Legal);
 316
 317     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
 318     setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
 319     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
 320     setOperationAction(ISD::FROUND, MVT::f32, Legal);
 321   }
 322
 323   // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
 324   // to speed up scalar BSWAP64.
 325   // CTPOP or CTTZ were introduced in P8/P9 respectively
 326   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
 327   if (Subtarget.hasP9Vector())
 328     setOperationAction(ISD::BSWAP, MVT::i64  , Custom);
 329   else
 330     setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
 331   if (Subtarget.isISA3_0()) {
 332     setOperationAction(ISD::CTTZ , MVT::i32  , Legal);
 333     setOperationAction(ISD::CTTZ , MVT::i64  , Legal);
 334   } else {
 335     setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
 336     setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
 337   }
 338
 339   if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
 340     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
 341     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
 342   } else {
 343     setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
 344     setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
 345   }
 346
 347   // PowerPC does not have ROTR
 348   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
 349   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
 350
 351   if (!Subtarget.useCRBits()) {
 352     // PowerPC does not have Select
 353     setOperationAction(ISD::SELECT, MVT::i32, Expand);
 354     setOperationAction(ISD::SELECT, MVT::i64, Expand);
 355     setOperationAction(ISD::SELECT, MVT::f32, Expand);
 356     setOperationAction(ISD::SELECT, MVT::f64, Expand);
 357   }
 358
 359   // PowerPC wants to turn select_cc of FP into fsel when possible.
 360   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
 361   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
 362
 363   // PowerPC wants to optimize integer setcc a bit
 364   if (!Subtarget.useCRBits())
 365     setOperationAction(ISD::SETCC, MVT::i32, Custom);
 366
 367   // PowerPC does not have BRCOND which requires SetCC
 368   if (!Subtarget.useCRBits())
 369     setOperationAction(ISD::BRCOND, MVT::Other, Expand);
 370
 371   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
 372
 373   if (Subtarget.hasSPE()) {
 374     // SPE has built-in conversions
 375     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
 376     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
 377     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
 378   } else {
 379     // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
 380     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 381
 382     // PowerPC does not have [U|S]INT_TO_FP
 383     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
 384     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
 385   }
 386
 387   if (Subtarget.hasDirectMove() && isPPC64) {
 388     setOperationAction(ISD::BITCAST, MVT::f32, Legal);
 389     setOperationAction(ISD::BITCAST, MVT::i32, Legal);
 390     setOperationAction(ISD::BITCAST, MVT::i64, Legal);
 391     setOperationAction(ISD::BITCAST, MVT::f64, Legal);
 392   } else {
 393     setOperationAction(ISD::BITCAST, MVT::f32, Expand);
 394     setOperationAction(ISD::BITCAST, MVT::i32, Expand);
 395     setOperationAction(ISD::BITCAST, MVT::i64, Expand);
 396     setOperationAction(ISD::BITCAST, MVT::f64, Expand);
 397   }
 398
 399   // We cannot sextinreg(i1).  Expand to shifts.
 400   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 401
 402   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
 403   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
 404   // support continuation, user-level threading, and etc.. As a result, no
 405   // other SjLj exception interfaces are implemented and please don't build
 406   // your own exception handling based on them.
 407   // LLVM/Clang supports zero-cost DWARF exception handling.
 408   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
 409   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
 410
 411   // We want to legalize GlobalAddress and ConstantPool nodes into the
 412   // appropriate instructions to materialize the address.
 413   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
 414   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
 415   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
 416   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
 417   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
 418   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
 419   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
 420   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
 421   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
 422   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
 423
 424   // TRAP is legal.
 425   setOperationAction(ISD::TRAP, MVT::Other, Legal);
 426
 427   // TRAMPOLINE is custom lowered.
 428   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
 429   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
 430
 431   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 432   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 433
 434   if (Subtarget.is64BitELFABI()) {
 435     // VAARG always uses double-word chunks, so promote anything smaller.
 436     setOperationAction(ISD::VAARG, MVT::i1, Promote);
 437     AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
 438     setOperationAction(ISD::VAARG, MVT::i8, Promote);
 439     AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
 440     setOperationAction(ISD::VAARG, MVT::i16, Promote);
 441     AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
 442     setOperationAction(ISD::VAARG, MVT::i32, Promote);
 443     AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
 444     setOperationAction(ISD::VAARG, MVT::Other, Expand);
 445   } else if (Subtarget.is32BitELFABI()) {
 446     // VAARG is custom lowered with the 32-bit SVR4 ABI.
 447     setOperationAction(ISD::VAARG, MVT::Other, Custom);
 448     setOperationAction(ISD::VAARG, MVT::i64, Custom);
 449   } else
 450     setOperationAction(ISD::VAARG, MVT::Other, Expand);
 451
 452   // VACOPY is custom lowered with the 32-bit SVR4 ABI.
 453   if (Subtarget.is32BitELFABI())
 454     setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
 455   else
 456     setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 457
 458   // Use the default implementation.
 459   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 460   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 461   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
 462   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
 463   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
 464   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
 465   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
 466   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
 467   setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
 468
 469   // We want to custom lower some of our intrinsics.
 470   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 471
 472   // To handle counter-based loop conditions.
 473   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
 474
 475   setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
 476   setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
 477   setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
 478   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
 479
 480   // Comparisons that require checking two conditions.
 481   if (Subtarget.hasSPE()) {
 482     setCondCodeAction(ISD::SETO, MVT::f32, Expand);
 483     setCondCodeAction(ISD::SETO, MVT::f64, Expand);
 484     setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
 485     setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
 486   }
 487   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
 488   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
 489   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
 490   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
 491   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
 492   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
 493   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
 494   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
 495   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
 496   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
 497   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
 498   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
 499
 500   if (Subtarget.has64BitSupport()) {
 501     // They also have instructions for converting between i64 and fp.
 502     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 503     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
 504     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 505     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
 506     // This is just the low 32 bits of a (signed) fp->i64 conversion.
 507     // We cannot do this with Promote because i64 is not a legal type.
 508     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 509
 510     if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
 511       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 512   } else {
 513     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
 514     if (Subtarget.hasSPE())
 515       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
 516     else
 517       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
 518   }
 519
 520   // With the instructions enabled under FPCVT, we can do everything.
 521   if (Subtarget.hasFPCVT()) {
 522     if (Subtarget.has64BitSupport()) {
 523       setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 524       setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
 525       setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 526       setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 527     }
 528
 529     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 530     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 531     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 532     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
 533   }
 534
 535   if (Subtarget.use64BitRegs()) {
 536     // 64-bit PowerPC implementations can support i64 types directly
 537     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
 538     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 539     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 540     // 64-bit PowerPC wants to expand i128 shifts itself.
 541     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
 542     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
 543     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
 544   } else {
 545     // 32-bit PowerPC wants to expand i64 shifts itself.
 546     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
 547     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
 548     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
 549   }
 550
 551   if (Subtarget.hasAltivec()) {
 552     // First set operation action for all vector types to expand. Then we
 553     // will selectively turn on ones that can be effectively codegen'd.
 554     for (MVT VT : MVT::vector_valuetypes()) {
 555       // add/sub are legal for all supported vector VT's.
 556       setOperationAction(ISD::ADD, VT, Legal);
 557       setOperationAction(ISD::SUB, VT, Legal);
 558
 559       // For v2i64, these are only valid with P8Vector. This is corrected after
 560       // the loop.
 561       if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
 562         setOperationAction(ISD::SMAX, VT, Legal);
 563         setOperationAction(ISD::SMIN, VT, Legal);
 564         setOperationAction(ISD::UMAX, VT, Legal);
 565         setOperationAction(ISD::UMIN, VT, Legal);
 566       }
 567       else {
 568         setOperationAction(ISD::SMAX, VT, Expand);
 569         setOperationAction(ISD::SMIN, VT, Expand);
 570         setOperationAction(ISD::UMAX, VT, Expand);
 571         setOperationAction(ISD::UMIN, VT, Expand);
 572       }
 573
 574       if (Subtarget.hasVSX()) {
 575         setOperationAction(ISD::FMAXNUM, VT, Legal);
 576         setOperationAction(ISD::FMINNUM, VT, Legal);
 577       }
 578
 579       // Vector instructions introduced in P8
 580       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
 581         setOperationAction(ISD::CTPOP, VT, Legal);
 582         setOperationAction(ISD::CTLZ, VT, Legal);
 583       }
 584       else {
 585         setOperationAction(ISD::CTPOP, VT, Expand);
 586         setOperationAction(ISD::CTLZ, VT, Expand);
 587       }
 588
 589       // Vector instructions introduced in P9
 590       if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
 591         setOperationAction(ISD::CTTZ, VT, Legal);
 592       else
 593         setOperationAction(ISD::CTTZ, VT, Expand);
 594
 595       // We promote all shuffles to v16i8.
 596       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
 597       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
 598
 599       // We promote all non-typed operations to v4i32.
 600       setOperationAction(ISD::AND   , VT, Promote);
 601       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
 602       setOperationAction(ISD::OR    , VT, Promote);
 603       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
 604       setOperationAction(ISD::XOR   , VT, Promote);
 605       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
 606       setOperationAction(ISD::LOAD  , VT, Promote);
 607       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
 608       setOperationAction(ISD::SELECT, VT, Promote);
 609       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
 610       setOperationAction(ISD::VSELECT, VT, Legal);
 611       setOperationAction(ISD::SELECT_CC, VT, Promote);
 612       AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
 613       setOperationAction(ISD::STORE, VT, Promote);
 614       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
 615
 616       // No other operations are legal.
 617       setOperationAction(ISD::MUL , VT, Expand);
 618       setOperationAction(ISD::SDIV, VT, Expand);
 619       setOperationAction(ISD::SREM, VT, Expand);
 620       setOperationAction(ISD::UDIV, VT, Expand);
 621       setOperationAction(ISD::UREM, VT, Expand);
 622       setOperationAction(ISD::FDIV, VT, Expand);
 623       setOperationAction(ISD::FREM, VT, Expand);
 624       setOperationAction(ISD::FNEG, VT, Expand);
 625       setOperationAction(ISD::FSQRT, VT, Expand);
 626       setOperationAction(ISD::FLOG, VT, Expand);
 627       setOperationAction(ISD::FLOG10, VT, Expand);
 628       setOperationAction(ISD::FLOG2, VT, Expand);
 629       setOperationAction(ISD::FEXP, VT, Expand);
 630       setOperationAction(ISD::FEXP2, VT, Expand);
 631       setOperationAction(ISD::FSIN, VT, Expand);
 632       setOperationAction(ISD::FCOS, VT, Expand);
 633       setOperationAction(ISD::FABS, VT, Expand);
 634       setOperationAction(ISD::FFLOOR, VT, Expand);
 635       setOperationAction(ISD::FCEIL,  VT, Expand);
 636       setOperationAction(ISD::FTRUNC, VT, Expand);
 637       setOperationAction(ISD::FRINT,  VT, Expand);
 638       setOperationAction(ISD::FNEARBYINT, VT, Expand);
 639       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
 640       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
 641       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
 642       setOperationAction(ISD::MULHU, VT, Expand);
 643       setOperationAction(ISD::MULHS, VT, Expand);
 644       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 645       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 646       setOperationAction(ISD::UDIVREM, VT, Expand);
 647       setOperationAction(ISD::SDIVREM, VT, Expand);
 648       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
 649       setOperationAction(ISD::FPOW, VT, Expand);
 650       setOperationAction(ISD::BSWAP, VT, Expand);
 651       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
 652       setOperationAction(ISD::ROTL, VT, Expand);
 653       setOperationAction(ISD::ROTR, VT, Expand);
 654
 655       for (MVT InnerVT : MVT::vector_valuetypes()) {
 656         setTruncStoreAction(VT, InnerVT, Expand);
 657         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
 658         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
 659         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
 660       }
 661     }
 662     if (!Subtarget.hasP8Vector()) {
 663       setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
 664       setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
 665       setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
 666       setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
 667     }
 668
 669     for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})
 670       setOperationAction(ISD::ABS, VT, Custom);
 671
 672     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
 673     // with merges, splats, etc.
 674     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
 675
 676     // Vector truncates to sub-word integer that fit in an Altivec/VSX register
 677     // are cheap, so handle them before they get expanded to scalar.
 678     setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
 679     setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
 680     setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
 681     setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
 682     setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
 683
 684     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
 685     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
 686     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
 687     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
 688     setOperationAction(ISD::SELECT, MVT::v4i32,
 689                        Subtarget.useCRBits() ? Legal : Expand);
 690     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
 691     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
 692     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
 693     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
 694     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
 695     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
 696     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
 697     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
 698     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
 699
 700     // Without hasP8Altivec set, v2i64 SMAX isn't available.
 701     // But ABS custom lowering requires SMAX support.
 702     if (!Subtarget.hasP8Altivec())
 703       setOperationAction(ISD::ABS, MVT::v2i64, Expand);
 704
 705     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
 706     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
 707     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
 708     addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
 709
 710     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
 711     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
 712
 713     if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
 714       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 715       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
 716     }
 717
 718     if (Subtarget.hasP8Altivec())
 719       setOperationAction(ISD::MUL, MVT::v4i32, Legal);
 720     else
 721       setOperationAction(ISD::MUL, MVT::v4i32, Custom);
 722
 723     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
 724     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 725
 726     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 727     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
 728
 729     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
 730     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
 731     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
 732     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
 733
 734     // Altivec does not contain unordered floating-point compare instructions
 735     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
 736     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
 737     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
 738     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
 739
 740     if (Subtarget.hasVSX()) {
 741       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
 742       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
 743       if (Subtarget.hasP8Vector()) {
 744         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
 745         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
 746       }
 747       if (Subtarget.hasDirectMove() && isPPC64) {
 748         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
 749         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
 750         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
 751         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
 752         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
 753         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
 754         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
 755         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
 756       }
 757       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
 758
 759       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
 760       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
 761       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
 762       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
 763       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
 764
 765       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
 766
 767       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
 768       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
 769
 770       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
 771       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
 772
 773       // Share the Altivec comparison restrictions.
 774       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
 775       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
 776       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
 777       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
 778
 779       setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
 780       setOperationAction(ISD::STORE, MVT::v2f64, Legal);
 781
 782       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
 783
 784       if (Subtarget.hasP8Vector())
 785         addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
 786
 787       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
 788
 789       addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
 790       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
 791       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
 792
 793       if (Subtarget.hasP8Altivec()) {
 794         setOperationAction(ISD::SHL, MVT::v2i64, Legal);
 795         setOperationAction(ISD::SRA, MVT::v2i64, Legal);
 796         setOperationAction(ISD::SRL, MVT::v2i64, Legal);
 797
 798         // 128 bit shifts can be accomplished via 3 instructions for SHL and
 799         // SRL, but not for SRA because of the instructions available:
 800         // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
 801         // doing
 802         setOperationAction(ISD::SHL, MVT::v1i128, Expand);
 803         setOperationAction(ISD::SRL, MVT::v1i128, Expand);
 804         setOperationAction(ISD::SRA, MVT::v1i128, Expand);
 805
 806         setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
 807       }
 808       else {
 809         setOperationAction(ISD::SHL, MVT::v2i64, Expand);
 810         setOperationAction(ISD::SRA, MVT::v2i64, Expand);
 811         setOperationAction(ISD::SRL, MVT::v2i64, Expand);
 812
 813         setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
 814
 815         // VSX v2i64 only supports non-arithmetic operations.
 816         setOperationAction(ISD::ADD, MVT::v2i64, Expand);
 817         setOperationAction(ISD::SUB, MVT::v2i64, Expand);
 818       }
 819
 820       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
 821       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
 822       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
 823       AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
 824
 825       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
 826
 827       setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
 828       setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
 829       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
 830       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
 831
 832       // Custom handling for partial vectors of integers converted to
 833       // floating point. We already have optimal handling for v2i32 through
 834       // the DAG combine, so those aren't necessary.
 835       setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
 836       setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
 837       setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
 838       setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
 839       setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
 840       setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
 841       setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
 842       setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
 843
 844       setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
 845       setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
 846       setOperationAction(ISD::FABS, MVT::v4f32, Legal);
 847       setOperationAction(ISD::FABS, MVT::v2f64, Legal);
 848       setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
 849       setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
 850
 851       if (Subtarget.hasDirectMove())
 852         setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
 853       setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
 854
 855       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
 856     }
 857
 858     if (Subtarget.hasP8Altivec()) {
 859       addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
 860       addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
 861     }
 862
 863     if (Subtarget.hasP9Vector()) {
 864       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
 865       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
 866
 867       // 128 bit shifts can be accomplished via 3 instructions for SHL and
 868       // SRL, but not for SRA because of the instructions available:
 869       // VS{RL} and VS{RL}O.
 870       setOperationAction(ISD::SHL, MVT::v1i128, Legal);
 871       setOperationAction(ISD::SRL, MVT::v1i128, Legal);
 872       setOperationAction(ISD::SRA, MVT::v1i128, Expand);
 873
 874       if (EnableQuadPrecision) {
 875         addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
 876         setOperationAction(ISD::FADD, MVT::f128, Legal);
 877         setOperationAction(ISD::FSUB, MVT::f128, Legal);
 878         setOperationAction(ISD::FDIV, MVT::f128, Legal);
 879         setOperationAction(ISD::FMUL, MVT::f128, Legal);
 880         setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
 881         // No extending loads to f128 on PPC.
 882         for (MVT FPT : MVT::fp_valuetypes())
 883           setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
 884         setOperationAction(ISD::FMA, MVT::f128, Legal);
 885         setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
 886         setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
 887         setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
 888         setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
 889         setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
 890         setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
 891
 892         setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
 893         setOperationAction(ISD::FRINT, MVT::f128, Legal);
 894         setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
 895         setOperationAction(ISD::FCEIL, MVT::f128, Legal);
 896         setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
 897         setOperationAction(ISD::FROUND, MVT::f128, Legal);
 898
 899         setOperationAction(ISD::SELECT, MVT::f128, Expand);
 900         setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
 901         setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
 902         setTruncStoreAction(MVT::f128, MVT::f64, Expand);
 903         setTruncStoreAction(MVT::f128, MVT::f32, Expand);
 904         setOperationAction(ISD::BITCAST, MVT::i128, Custom);
 905         // No implementation for these ops for PowerPC.
 906         setOperationAction(ISD::FSIN , MVT::f128, Expand);
 907         setOperationAction(ISD::FCOS , MVT::f128, Expand);
 908         setOperationAction(ISD::FPOW, MVT::f128, Expand);
 909         setOperationAction(ISD::FPOWI, MVT::f128, Expand);
 910         setOperationAction(ISD::FREM, MVT::f128, Expand);
 911       }
 912       setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
 913
 914     }
 915
 916     if (Subtarget.hasP9Altivec()) {
 917       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
 918       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
 919     }
 920   }
 921
 922   if (Subtarget.hasQPX()) {
 923     setOperationAction(ISD::FADD, MVT::v4f64, Legal);
 924     setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
 925     setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
 926     setOperationAction(ISD::FREM, MVT::v4f64, Expand);
 927
 928     setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
 929     setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
 930
 931     setOperationAction(ISD::LOAD  , MVT::v4f64, Custom);
 932     setOperationAction(ISD::STORE , MVT::v4f64, Custom);
 933
 934     setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
 935     setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
 936
 937     if (!Subtarget.useCRBits())
 938       setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
 939     setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
 940
 941     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
 942     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
 943     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
 944     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
 945     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
 946     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
 947     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
 948
 949     setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
 950     setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
 951
 952     setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
 953     setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
 954
 955     setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
 956     setOperationAction(ISD::FABS , MVT::v4f64, Legal);
 957     setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
 958     setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
 959     setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
 960     setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
 961     setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
 962     setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
 963     setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
 964     setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
 965
 966     setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
 967     setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
 968
 969     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
 970     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
 971
 972     addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
 973
 974     setOperationAction(ISD::FADD, MVT::v4f32, Legal);
 975     setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
 976     setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
 977     setOperationAction(ISD::FREM, MVT::v4f32, Expand);
 978
 979     setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
 980     setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
 981
 982     setOperationAction(ISD::LOAD  , MVT::v4f32, Custom);
 983     setOperationAction(ISD::STORE , MVT::v4f32, Custom);
 984
 985     if (!Subtarget.useCRBits())
 986       setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
 987     setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
 988
 989     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
 990     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
 991     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
 992     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
 993     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
 994     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
 995     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
 996
 997     setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
 998     setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
 999
1000     setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
1001     setOperationAction(ISD::FABS , MVT::v4f32, Legal);
1002     setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
1003     setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
1004     setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
1005     setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
1006     setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
1007     setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
1008     setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
1009     setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
1010
1011     setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
1012     setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
1013
1014     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
1015     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
1016
1017     addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
1018
1019     setOperationAction(ISD::AND , MVT::v4i1, Legal);
1020     setOperationAction(ISD::OR , MVT::v4i1, Legal);
1021     setOperationAction(ISD::XOR , MVT::v4i1, Legal);
1022
1023     if (!Subtarget.useCRBits())
1024       setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
1025     setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
1026
1027     setOperationAction(ISD::LOAD  , MVT::v4i1, Custom);
1028     setOperationAction(ISD::STORE , MVT::v4i1, Custom);
1029
1030     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
1031     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
1032     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
1033     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
1034     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
1035     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
1036     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
1037
1038     setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
1039     setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
1040
1041     addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
1042
1043     setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
1044     setOperationAction(ISD::FCEIL,  MVT::v4f64, Legal);
1045     setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
1046     setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
1047
1048     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
1049     setOperationAction(ISD::FCEIL,  MVT::v4f32, Legal);
1050     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
1051     setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1052
1053     setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
1054     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
1055
1056     // These need to set FE_INEXACT, and so cannot be vectorized here.
1057     setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
1058     setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
1059
1060     if (TM.Options.UnsafeFPMath) {
1061       setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
1062       setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
1063
1064       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
1065       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
1066     } else {
1067       setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
1068       setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
1069
1070       setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
1071       setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
1072     }
1073   }
1074
1075   if (Subtarget.has64BitSupport())
1076     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1077
1078   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1079
1080   if (!isPPC64) {
1081     setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
1082     setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1083   }
1084
1085   setBooleanContents(ZeroOrOneBooleanContent);
1086
1087   if (Subtarget.hasAltivec()) {
1088     // Altivec instructions set fields to all zeros or all ones.
1089     setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1090   }
1091
1092   if (!isPPC64) {
1093     // These libcalls are not available in 32-bit.
1094     setLibcallName(RTLIB::SHL_I128, nullptr);
1095     setLibcallName(RTLIB::SRL_I128, nullptr);
1096     setLibcallName(RTLIB::SRA_I128, nullptr);
1097   }
1098
1099   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1100
1101   // We have target-specific dag combine patterns for the following nodes:
1102   setTargetDAGCombine(ISD::ADD);
1103   setTargetDAGCombine(ISD::SHL);
1104   setTargetDAGCombine(ISD::SRA);
1105   setTargetDAGCombine(ISD::SRL);
1106   setTargetDAGCombine(ISD::MUL);
1107   setTargetDAGCombine(ISD::SINT_TO_FP);
1108   setTargetDAGCombine(ISD::BUILD_VECTOR);
1109   if (Subtarget.hasFPCVT())
1110     setTargetDAGCombine(ISD::UINT_TO_FP);
1111   setTargetDAGCombine(ISD::LOAD);
1112   setTargetDAGCombine(ISD::STORE);
1113   setTargetDAGCombine(ISD::BR_CC);
1114   if (Subtarget.useCRBits())
1115     setTargetDAGCombine(ISD::BRCOND);
1116   setTargetDAGCombine(ISD::BSWAP);
1117   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
1118   setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
1119   setTargetDAGCombine(ISD::INTRINSIC_VOID);
1120
1121   setTargetDAGCombine(ISD::SIGN_EXTEND);
1122   setTargetDAGCombine(ISD::ZERO_EXTEND);
1123   setTargetDAGCombine(ISD::ANY_EXTEND);
1124
1125   setTargetDAGCombine(ISD::TRUNCATE);
1126   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1127
1128
1129   if (Subtarget.useCRBits()) {
1130     setTargetDAGCombine(ISD::TRUNCATE);
1131     setTargetDAGCombine(ISD::SETCC);
1132     setTargetDAGCombine(ISD::SELECT_CC);
1133   }
1134
1135   // Use reciprocal estimates.
1136   if (TM.Options.UnsafeFPMath) {
1137     setTargetDAGCombine(ISD::FDIV);
1138     setTargetDAGCombine(ISD::FSQRT);
1139   }
1140
1141   if (Subtarget.hasP9Altivec()) {
1142     setTargetDAGCombine(ISD::ABS);
1143     setTargetDAGCombine(ISD::VSELECT);
1144   }
1145
1146   // Darwin long double math library functions have $LDBL128 appended.
1147   if (Subtarget.isDarwin()) {
1148     setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
1149     setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
1150     setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
1151     setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
1152     setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
1153     setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
1154     setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
1155     setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
1156     setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
1157     setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
1158   }
1159
1160   if (EnableQuadPrecision) {
1161     setLibcallName(RTLIB::LOG_F128, "logf128");
1162     setLibcallName(RTLIB::LOG2_F128, "log2f128");
1163     setLibcallName(RTLIB::LOG10_F128, "log10f128");
1164     setLibcallName(RTLIB::EXP_F128, "expf128");
1165     setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1166     setLibcallName(RTLIB::SIN_F128, "sinf128");
1167     setLibcallName(RTLIB::COS_F128, "cosf128");
1168     setLibcallName(RTLIB::POW_F128, "powf128");
1169     setLibcallName(RTLIB::FMIN_F128, "fminf128");
1170     setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1171     setLibcallName(RTLIB::POWI_F128, "__powikf2");
1172     setLibcallName(RTLIB::REM_F128, "fmodf128");
1173   }
1174
1175   // With 32 condition bits, we don't need to sink (and duplicate) compares
1176   // aggressively in CodeGenPrep.
1177   if (Subtarget.useCRBits()) {
1178     setHasMultipleConditionRegisters();
1179     setJumpIsExpensive();
1180   }
1181
1182   setMinFunctionAlignment(llvm::Align(4));
1183   if (Subtarget.isDarwin())
1184     setPrefFunctionAlignment(llvm::Align(16));
1185
1186   switch (Subtarget.getDarwinDirective()) {
1187   default: break;
1188   case PPC::DIR_970:
1189   case PPC::DIR_A2:
1190   case PPC::DIR_E500:
1191   case PPC::DIR_E500mc:
1192   case PPC::DIR_E5500:
1193   case PPC::DIR_PWR4:
1194   case PPC::DIR_PWR5:
1195   case PPC::DIR_PWR5X:
1196   case PPC::DIR_PWR6:
1197   case PPC::DIR_PWR6X:
1198   case PPC::DIR_PWR7:
1199   case PPC::DIR_PWR8:
1200   case PPC::DIR_PWR9:
1201     setPrefLoopAlignment(llvm::Align(16));
1202     setPrefFunctionAlignment(llvm::Align(16));
1203     break;
1204   }
1205
1206   if (Subtarget.enableMachineScheduler())
1207     setSchedulingPreference(Sched::Source);
1208   else
1209     setSchedulingPreference(Sched::Hybrid);
1210
1211   computeRegisterProperties(STI.getRegisterInfo());
1212
1213   // The Freescale cores do better with aggressive inlining of memcpy and
1214   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1215   if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
1216       Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
1217     MaxStoresPerMemset = 32;
1218     MaxStoresPerMemsetOptSize = 16;
1219     MaxStoresPerMemcpy = 32;
1220     MaxStoresPerMemcpyOptSize = 8;
1221     MaxStoresPerMemmove = 32;
1222     MaxStoresPerMemmoveOptSize = 8;
1223   } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
1224     // The A2 also benefits from (very) aggressive inlining of memcpy and
1225     // friends. The overhead of a the function call, even when warm, can be
1226     // over one hundred cycles.
1227     MaxStoresPerMemset = 128;
1228     MaxStoresPerMemcpy = 128;
1229     MaxStoresPerMemmove = 128;
1230     MaxLoadsPerMemcmp = 128;
1231   } else {
1232     MaxLoadsPerMemcmp = 8;
1233     MaxLoadsPerMemcmpOptSize = 4;
1234   }
1235 }
1236
1237 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1238 /// the desired ByVal argument alignment.
1239 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
1240                              unsigned MaxMaxAlign) {
1241   if (MaxAlign == MaxMaxAlign)
1242     return;
1243   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1244     if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
1245       MaxAlign = 32;
1246     else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
1247       MaxAlign = 16;
1248   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1249     unsigned EltAlign = 0;
1250     getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1251     if (EltAlign > MaxAlign)
1252       MaxAlign = EltAlign;
1253   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1254     for (auto *EltTy : STy->elements()) {
1255       unsigned EltAlign = 0;
1256       getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1257       if (EltAlign > MaxAlign)
1258         MaxAlign = EltAlign;
1259       if (MaxAlign == MaxMaxAlign)
1260         break;
1261     }
1262   }
1263 }
1264
1265 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1266 /// function arguments in the caller parameter area.
1267 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1268                                                   const DataLayout &DL) const {
1269   // Darwin passes everything on 4 byte boundary.
1270   if (Subtarget.isDarwin())
1271     return 4;
1272
1273   // 16byte and wider vectors are passed on 16byte boundary.
1274   // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1275   unsigned Align = Subtarget.isPPC64() ? 8 : 4;
1276   if (Subtarget.hasAltivec() || Subtarget.hasQPX())
1277     getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
1278   return Align;
1279 }
1280
1281 bool PPCTargetLowering::useSoftFloat() const {
1282   return Subtarget.useSoftFloat();
1283 }
1284
1285 bool PPCTargetLowering::hasSPE() const {
1286   return Subtarget.hasSPE();
1287 }
1288
1289 bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1290   return VT.isScalarInteger();
1291 }
1292
1293 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1294   switch ((PPCISD::NodeType)Opcode) {
1295   case PPCISD::FIRST_NUMBER:    break;
1296   case PPCISD::FSEL:            return "PPCISD::FSEL";
1297   case PPCISD::FCFID:           return "PPCISD::FCFID";
1298   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
1299   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
1300   case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
1301   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
1302   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
1303   case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
1304   case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
1305   case PPCISD::FP_TO_UINT_IN_VSR:
1306                                 return "PPCISD::FP_TO_UINT_IN_VSR,";
1307   case PPCISD::FP_TO_SINT_IN_VSR:
1308                                 return "PPCISD::FP_TO_SINT_IN_VSR";
1309   case PPCISD::FRE:             return "PPCISD::FRE";
1310   case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
1311   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
1312   case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
1313   case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
1314   case PPCISD::VPERM:           return "PPCISD::VPERM";
1315   case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
1316   case PPCISD::VECINSERT:       return "PPCISD::VECINSERT";
1317   case PPCISD::XXREVERSE:       return "PPCISD::XXREVERSE";
1318   case PPCISD::XXPERMDI:        return "PPCISD::XXPERMDI";
1319   case PPCISD::VECSHL:          return "PPCISD::VECSHL";
1320   case PPCISD::CMPB:            return "PPCISD::CMPB";
1321   case PPCISD::Hi:              return "PPCISD::Hi";
1322   case PPCISD::Lo:              return "PPCISD::Lo";
1323   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
1324   case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1325   case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1326   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
1327   case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
1328   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
1329   case PPCISD::SRL:             return "PPCISD::SRL";
1330   case PPCISD::SRA:             return "PPCISD::SRA";
1331   case PPCISD::SHL:             return "PPCISD::SHL";
1332   case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
1333   case PPCISD::CALL:            return "PPCISD::CALL";
1334   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
1335   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
1336   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
1337   case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
1338   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
1339   case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
1340   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
1341   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1342   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
1343   case PPCISD::MFVSR:           return "PPCISD::MFVSR";
1344   case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
1345   case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
1346   case PPCISD::SINT_VEC_TO_FP:  return "PPCISD::SINT_VEC_TO_FP";
1347   case PPCISD::UINT_VEC_TO_FP:  return "PPCISD::UINT_VEC_TO_FP";
1348   case PPCISD::ANDIo_1_EQ_BIT:  return "PPCISD::ANDIo_1_EQ_BIT";
1349   case PPCISD::ANDIo_1_GT_BIT:  return "PPCISD::ANDIo_1_GT_BIT";
1350   case PPCISD::VCMP:            return "PPCISD::VCMP";
1351   case PPCISD::VCMPo:           return "PPCISD::VCMPo";
1352   case PPCISD::LBRX:            return "PPCISD::LBRX";
1353   case PPCISD::STBRX:           return "PPCISD::STBRX";
1354   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
1355   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
1356   case PPCISD::LXSIZX:          return "PPCISD::LXSIZX";
1357   case PPCISD::STXSIX:          return "PPCISD::STXSIX";
1358   case PPCISD::VEXTS:           return "PPCISD::VEXTS";
1359   case PPCISD::SExtVElems:      return "PPCISD::SExtVElems";
1360   case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
1361   case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
1362   case PPCISD::LOAD_VEC_BE:     return "PPCISD::LOAD_VEC_BE";
1363   case PPCISD::STORE_VEC_BE:    return "PPCISD::STORE_VEC_BE";
1364   case PPCISD::ST_VSR_SCAL_INT:
1365                                 return "PPCISD::ST_VSR_SCAL_INT";
1366   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
1367   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
1368   case PPCISD::BDZ:             return "PPCISD::BDZ";
1369   case PPCISD::MFFS:            return "PPCISD::MFFS";
1370   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
1371   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
1372   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
1373   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
1374   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
1375   case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
1376   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1377   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
1378   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
1379   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
1380   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
1381   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
1382   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1383   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
1384   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
1385   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
1386   case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1387   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1388   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
1389   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
1390   case PPCISD::SC:              return "PPCISD::SC";
1391   case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
1392   case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
1393   case PPCISD::RFEBB:           return "PPCISD::RFEBB";
1394   case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
1395   case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
1396   case PPCISD::VABSD:           return "PPCISD::VABSD";
1397   case PPCISD::QVFPERM:         return "PPCISD::QVFPERM";
1398   case PPCISD::QVGPCI:          return "PPCISD::QVGPCI";
1399   case PPCISD::QVALIGNI:        return "PPCISD::QVALIGNI";
1400   case PPCISD::QVESPLATI:       return "PPCISD::QVESPLATI";
1401   case PPCISD::QBFLT:           return "PPCISD::QBFLT";
1402   case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";
1403   case PPCISD::BUILD_FP128:     return "PPCISD::BUILD_FP128";
1404   case PPCISD::BUILD_SPE64:     return "PPCISD::BUILD_SPE64";
1405   case PPCISD::EXTRACT_SPE:     return "PPCISD::EXTRACT_SPE";
1406   case PPCISD::EXTSWSLI:        return "PPCISD::EXTSWSLI";
1407   case PPCISD::LD_VSX_LH:       return "PPCISD::LD_VSX_LH";
1408   case PPCISD::FP_EXTEND_LH:    return "PPCISD::FP_EXTEND_LH";
1409   }
1410   return nullptr;
1411 }
1412
1413 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1414                                           EVT VT) const {
1415   if (!VT.isVector())
1416     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1417
1418   if (Subtarget.hasQPX())
1419     return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
1420
1421   return VT.changeVectorElementTypeToInteger();
1422 }
1423
1424 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1425   assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1426   return true;
1427 }
1428
1429 //===----------------------------------------------------------------------===//
1430 // Node matching predicates, for use by the tblgen matching code.
1431 //===----------------------------------------------------------------------===//
1432
1433 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1434 static bool isFloatingPointZero(SDValue Op) {
1435   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1436     return CFP->getValueAPF().isZero();
1437   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1438     // Maybe this has already been legalized into the constant pool?
1439     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1440       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1441         return CFP->getValueAPF().isZero();
1442   }
1443   return false;
1444 }
1445
1446 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
1447 /// true if Op is undef or if it matches the specified value.
1448 static bool isConstantOrUndef(int Op, int Val) {
1449   return Op < 0 || Op == Val;
1450 }
1451
1452 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1453 /// VPKUHUM instruction.
1454 /// The ShuffleKind distinguishes between big-endian operations with
1455 /// two different inputs (0), either-endian operations with two identical
1456 /// inputs (1), and little-endian operations with two different inputs (2).
1457 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1458 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1459                                SelectionDAG &DAG) {
1460   bool IsLE = DAG.getDataLayout().isLittleEndian();
1461   if (ShuffleKind == 0) {
1462     if (IsLE)
1463       return false;
1464     for (unsigned i = 0; i != 16; ++i)
1465       if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1466         return false;
1467   } else if (ShuffleKind == 2) {
1468     if (!IsLE)
1469       return false;
1470     for (unsigned i = 0; i != 16; ++i)
1471       if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1472         return false;
1473   } else if (ShuffleKind == 1) {
1474     unsigned j = IsLE ? 0 : 1;
1475     for (unsigned i = 0; i != 8; ++i)
1476       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
1477           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
1478         return false;
1479   }
1480   return true;
1481 }
1482
1483 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1484 /// VPKUWUM instruction.
1485 /// The ShuffleKind distinguishes between big-endian operations with
1486 /// two different inputs (0), either-endian operations with two identical
1487 /// inputs (1), and little-endian operations with two different inputs (2).
1488 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1489 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1490                                SelectionDAG &DAG) {
1491   bool IsLE = DAG.getDataLayout().isLittleEndian();
1492   if (ShuffleKind == 0) {
1493     if (IsLE)
1494       return false;
1495     for (unsigned i = 0; i != 16; i += 2)
1496       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
1497           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
1498         return false;
1499   } else if (ShuffleKind == 2) {
1500     if (!IsLE)
1501       return false;
1502     for (unsigned i = 0; i != 16; i += 2)
1503       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1504           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
1505         return false;
1506   } else if (ShuffleKind == 1) {
1507     unsigned j = IsLE ? 0 : 2;
1508     for (unsigned i = 0; i != 8; i += 2)
1509       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1510           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1511           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1512           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
1513         return false;
1514   }
1515   return true;
1516 }
1517
1518 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1519 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1520 /// current subtarget.
1521 ///
1522 /// The ShuffleKind distinguishes between big-endian operations with
1523 /// two different inputs (0), either-endian operations with two identical
1524 /// inputs (1), and little-endian operations with two different inputs (2).
1525 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1526 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1527                                SelectionDAG &DAG) {
1528   const PPCSubtarget& Subtarget =
1529       static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1530   if (!Subtarget.hasP8Vector())
1531     return false;
1532
1533   bool IsLE = DAG.getDataLayout().isLittleEndian();
1534   if (ShuffleKind == 0) {
1535     if (IsLE)
1536       return false;
1537     for (unsigned i = 0; i != 16; i += 4)
1538       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
1539           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+5) ||
1540           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+6) ||
1541           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+7))
1542         return false;
1543   } else if (ShuffleKind == 2) {
1544     if (!IsLE)
1545       return false;
1546     for (unsigned i = 0; i != 16; i += 4)
1547       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1548           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1) ||
1549           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+2) ||
1550           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+3))
1551         return false;
1552   } else if (ShuffleKind == 1) {
1553     unsigned j = IsLE ? 0 : 4;
1554     for (unsigned i = 0; i != 8; i += 4)
1555       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1556           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1557           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+j+2) ||
1558           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+j+3) ||
1559           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1560           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1) ||
1561           !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1562           !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1563         return false;
1564   }
1565   return true;
1566 }
1567
1568 /// isVMerge - Common function, used to match vmrg* shuffles.
1569 ///
1570 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1571                      unsigned LHSStart, unsigned RHSStart) {
1572   if (N->getValueType(0) != MVT::v16i8)
1573     return false;
1574   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1575          "Unsupported merge size!");
1576
1577   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
1578     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
1579       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1580                              LHSStart+j+i*UnitSize) ||
1581           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1582                              RHSStart+j+i*UnitSize))
1583         return false;
1584     }
1585   return true;
1586 }
1587
1588 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1589 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1590 /// The ShuffleKind distinguishes between big-endian merges with two
1591 /// different inputs (0), either-endian merges with two identical inputs (1),
1592 /// and little-endian merges with two different inputs (2).  For the latter,
1593 /// the input operands are swapped (see PPCInstrAltivec.td).
1594 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1595                              unsigned ShuffleKind, SelectionDAG &DAG) {
1596   if (DAG.getDataLayout().isLittleEndian()) {
1597     if (ShuffleKind == 1) // unary
1598       return isVMerge(N, UnitSize, 0, 0);
1599     else if (ShuffleKind == 2) // swapped
1600       return isVMerge(N, UnitSize, 0, 16);
1601     else
1602       return false;
1603   } else {
1604     if (ShuffleKind == 1) // unary
1605       return isVMerge(N, UnitSize, 8, 8);
1606     else if (ShuffleKind == 0) // normal
1607       return isVMerge(N, UnitSize, 8, 24);
1608     else
1609       return false;
1610   }
1611 }
1612
1613 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1614 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1615 /// The ShuffleKind distinguishes between big-endian merges with two
1616 /// different inputs (0), either-endian merges with two identical inputs (1),
1617 /// and little-endian merges with two different inputs (2).  For the latter,
1618 /// the input operands are swapped (see PPCInstrAltivec.td).
1619 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1620                              unsigned ShuffleKind, SelectionDAG &DAG) {
1621   if (DAG.getDataLayout().isLittleEndian()) {
1622     if (ShuffleKind == 1) // unary
1623       return isVMerge(N, UnitSize, 8, 8);
1624     else if (ShuffleKind == 2) // swapped
1625       return isVMerge(N, UnitSize, 8, 24);
1626     else
1627       return false;
1628   } else {
1629     if (ShuffleKind == 1) // unary
1630       return isVMerge(N, UnitSize, 0, 0);
1631     else if (ShuffleKind == 0) // normal
1632       return isVMerge(N, UnitSize, 0, 16);
1633     else
1634       return false;
1635   }
1636 }
1637
1638 /**
1639  * Common function used to match vmrgew and vmrgow shuffles
1640  *
1641  * The indexOffset determines whether to look for even or odd words in
1642  * the shuffle mask. This is based on the of the endianness of the target
1643  * machine.
1644  *   - Little Endian:
1645  *     - Use offset of 0 to check for odd elements
1646  *     - Use offset of 4 to check for even elements
1647  *   - Big Endian:
1648  *     - Use offset of 0 to check for even elements
1649  *     - Use offset of 4 to check for odd elements
1650  * A detailed description of the vector element ordering for little endian and
1651  * big endian can be found at
1652  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1653  * Targeting your applications - what little endian and big endian IBM XL C/C++
1654  * compiler differences mean to you
1655  *
1656  * The mask to the shuffle vector instruction specifies the indices of the
1657  * elements from the two input vectors to place in the result. The elements are
1658  * numbered in array-access order, starting with the first vector. These vectors
1659  * are always of type v16i8, thus each vector will contain 16 elements of size
1660  * 8. More info on the shuffle vector can be found in the
1661  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1662  * Language Reference.
1663  *
1664  * The RHSStartValue indicates whether the same input vectors are used (unary)
1665  * or two different input vectors are used, based on the following:
1666  *   - If the instruction uses the same vector for both inputs, the range of the
1667  *     indices will be 0 to 15. In this case, the RHSStart value passed should
1668  *     be 0.
1669  *   - If the instruction has two different vectors then the range of the
1670  *     indices will be 0 to 31. In this case, the RHSStart value passed should
1671  *     be 16 (indices 0-15 specify elements in the first vector while indices 16
1672  *     to 31 specify elements in the second vector).
1673  *
1674  * \param[in] N The shuffle vector SD Node to analyze
1675  * \param[in] IndexOffset Specifies whether to look for even or odd elements
1676  * \param[in] RHSStartValue Specifies the starting index for the righthand input
1677  * vector to the shuffle_vector instruction
1678  * \return true iff this shuffle vector represents an even or odd word merge
1679  */
1680 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1681                      unsigned RHSStartValue) {
1682   if (N->getValueType(0) != MVT::v16i8)
1683     return false;
1684
1685   for (unsigned i = 0; i < 2; ++i)
1686     for (unsigned j = 0; j < 4; ++j)
1687       if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1688                              i*RHSStartValue+j+IndexOffset) ||
1689           !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1690                              i*RHSStartValue+j+IndexOffset+8))
1691         return false;
1692   return true;
1693 }
1694
1695 /**
1696  * Determine if the specified shuffle mask is suitable for the vmrgew or
1697  * vmrgow instructions.
1698  *
1699  * \param[in] N The shuffle vector SD Node to analyze
1700  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1701  * \param[in] ShuffleKind Identify the type of merge:
1702  *   - 0 = big-endian merge with two different inputs;
1703  *   - 1 = either-endian merge with two identical inputs;
1704  *   - 2 = little-endian merge with two different inputs (inputs are swapped for
1705  *     little-endian merges).
1706  * \param[in] DAG The current SelectionDAG
1707  * \return true iff this shuffle mask
1708  */
1709 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1710                               unsigned ShuffleKind, SelectionDAG &DAG) {
1711   if (DAG.getDataLayout().isLittleEndian()) {
1712     unsigned indexOffset = CheckEven ? 4 : 0;
1713     if (ShuffleKind == 1) // Unary
1714       return isVMerge(N, indexOffset, 0);
1715     else if (ShuffleKind == 2) // swapped
1716       return isVMerge(N, indexOffset, 16);
1717     else
1718       return false;
1719   }
1720   else {
1721     unsigned indexOffset = CheckEven ? 0 : 4;
1722     if (ShuffleKind == 1) // Unary
1723       return isVMerge(N, indexOffset, 0);
1724     else if (ShuffleKind == 0) // Normal
1725       return isVMerge(N, indexOffset, 16);
1726     else
1727       return false;
1728   }
1729   return false;
1730 }
1731
1732 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1733 /// amount, otherwise return -1.
1734 /// The ShuffleKind distinguishes between big-endian operations with two
1735 /// different inputs (0), either-endian operations with two identical inputs
1736 /// (1), and little-endian operations with two different inputs (2).  For the
1737 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
1738 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1739                              SelectionDAG &DAG) {
1740   if (N->getValueType(0) != MVT::v16i8)
1741     return -1;
1742
1743   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1744
1745   // Find the first non-undef value in the shuffle mask.
1746   unsigned i;
1747   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1748     /*search*/;
1749
1750   if (i == 16) return -1;  // all undef.
1751
1752   // Otherwise, check to see if the rest of the elements are consecutively
1753   // numbered from this value.
1754   unsigned ShiftAmt = SVOp->getMaskElt(i);
1755   if (ShiftAmt < i) return -1;
1756
1757   ShiftAmt -= i;
1758   bool isLE = DAG.getDataLayout().isLittleEndian();
1759
1760   if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1761     // Check the rest of the elements to see if they are consecutive.
1762     for (++i; i != 16; ++i)
1763       if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1764         return -1;
1765   } else if (ShuffleKind == 1) {
1766     // Check the rest of the elements to see if they are consecutive.
1767     for (++i; i != 16; ++i)
1768       if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1769         return -1;
1770   } else
1771     return -1;
1772
1773   if (isLE)
1774     ShiftAmt = 16 - ShiftAmt;
1775
1776   return ShiftAmt;
1777 }
1778
1779 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1780 /// specifies a splat of a single element that is suitable for input to
1781 /// VSPLTB/VSPLTH/VSPLTW.
1782 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1783   assert(N->getValueType(0) == MVT::v16i8 &&
1784          (EltSize == 1 || EltSize == 2 || EltSize == 4));
1785
1786   // The consecutive indices need to specify an element, not part of two
1787   // different elements.  So abandon ship early if this isn't the case.
1788   if (N->getMaskElt(0) % EltSize != 0)
1789     return false;
1790
1791   // This is a splat operation if each element of the permute is the same, and
1792   // if the value doesn't reference the second vector.
1793   unsigned ElementBase = N->getMaskElt(0);
1794
1795   // FIXME: Handle UNDEF elements too!
1796   if (ElementBase >= 16)
1797     return false;
1798
1799   // Check that the indices are consecutive, in the case of a multi-byte element
1800   // splatted with a v16i8 mask.
1801   for (unsigned i = 1; i != EltSize; ++i)
1802     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1803       return false;
1804
1805   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1806     if (N->getMaskElt(i) < 0) continue;
1807     for (unsigned j = 0; j != EltSize; ++j)
1808       if (N->getMaskElt(i+j) != N->getMaskElt(j))
1809         return false;
1810   }
1811   return true;
1812 }
1813
1814 /// Check that the mask is shuffling N byte elements. Within each N byte
1815 /// element of the mask, the indices could be either in increasing or
1816 /// decreasing order as long as they are consecutive.
1817 /// \param[in] N the shuffle vector SD Node to analyze
1818 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1819 /// Word/DoubleWord/QuadWord).
1820 /// \param[in] StepLen the delta indices number among the N byte element, if
1821 /// the mask is in increasing/decreasing order then it is 1/-1.
1822 /// \return true iff the mask is shuffling N byte elements.
1823 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
1824                                    int StepLen) {
1825   assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
1826          "Unexpected element width.");
1827   assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
1828
1829   unsigned NumOfElem = 16 / Width;
1830   unsigned MaskVal[16]; //  Width is never greater than 16
1831   for (unsigned i = 0; i < NumOfElem; ++i) {
1832     MaskVal[0] = N->getMaskElt(i * Width);
1833     if ((StepLen == 1) && (MaskVal[0] % Width)) {
1834       return false;
1835     } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
1836       return false;
1837     }
1838
1839     for (unsigned int j = 1; j < Width; ++j) {
1840       MaskVal[j] = N->getMaskElt(i * Width + j);
1841       if (MaskVal[j] != MaskVal[j-1] + StepLen) {
1842         return false;
1843       }
1844     }
1845   }
1846
1847   return true;
1848 }
1849
1850 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1851                           unsigned &InsertAtByte, bool &Swap, bool IsLE) {
1852   if (!isNByteElemShuffleMask(N, 4, 1))
1853     return false;
1854
1855   // Now we look at mask elements 0,4,8,12
1856   unsigned M0 = N->getMaskElt(0) / 4;
1857   unsigned M1 = N->getMaskElt(4) / 4;
1858   unsigned M2 = N->getMaskElt(8) / 4;
1859   unsigned M3 = N->getMaskElt(12) / 4;
1860   unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
1861   unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
1862
1863   // Below, let H and L be arbitrary elements of the shuffle mask
1864   // where H is in the range [4,7] and L is in the range [0,3].
1865   // H, 1, 2, 3 or L, 5, 6, 7
1866   if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
1867       (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
1868     ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
1869     InsertAtByte = IsLE ? 12 : 0;
1870     Swap = M0 < 4;
1871     return true;
1872   }
1873   // 0, H, 2, 3 or 4, L, 6, 7
1874   if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
1875       (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
1876     ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
1877     InsertAtByte = IsLE ? 8 : 4;
1878     Swap = M1 < 4;
1879     return true;
1880   }
1881   // 0, 1, H, 3 or 4, 5, L, 7
1882   if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
1883       (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
1884     ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
1885     InsertAtByte = IsLE ? 4 : 8;
1886     Swap = M2 < 4;
1887     return true;
1888   }
1889   // 0, 1, 2, H or 4, 5, 6, L
1890   if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
1891       (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
1892     ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
1893     InsertAtByte = IsLE ? 0 : 12;
1894     Swap = M3 < 4;
1895     return true;
1896   }
1897
1898   // If both vector operands for the shuffle are the same vector, the mask will
1899   // contain only elements from the first one and the second one will be undef.
1900   if (N->getOperand(1).isUndef()) {
1901     ShiftElts = 0;
1902     Swap = true;
1903     unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
1904     if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
1905       InsertAtByte = IsLE ? 12 : 0;
1906       return true;
1907     }
1908     if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
1909       InsertAtByte = IsLE ? 8 : 4;
1910       return true;
1911     }
1912     if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
1913       InsertAtByte = IsLE ? 4 : 8;
1914       return true;
1915     }
1916     if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
1917       InsertAtByte = IsLE ? 0 : 12;
1918       return true;
1919     }
1920   }
1921
1922   return false;
1923 }
1924
1925 bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1926                                bool &Swap, bool IsLE) {
1927   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1928   // Ensure each byte index of the word is consecutive.
1929   if (!isNByteElemShuffleMask(N, 4, 1))
1930     return false;
1931
1932   // Now we look at mask elements 0,4,8,12, which are the beginning of words.
1933   unsigned M0 = N->getMaskElt(0) / 4;
1934   unsigned M1 = N->getMaskElt(4) / 4;
1935   unsigned M2 = N->getMaskElt(8) / 4;
1936   unsigned M3 = N->getMaskElt(12) / 4;
1937
1938   // If both vector operands for the shuffle are the same vector, the mask will
1939   // contain only elements from the first one and the second one will be undef.
1940   if (N->getOperand(1).isUndef()) {
1941     assert(M0 < 4 && "Indexing into an undef vector?");
1942     if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
1943       return false;
1944
1945     ShiftElts = IsLE ? (4 - M0) % 4 : M0;
1946     Swap = false;
1947     return true;
1948   }
1949
1950   // Ensure each word index of the ShuffleVector Mask is consecutive.
1951   if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
1952     return false;
1953
1954   if (IsLE) {
1955     if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
1956       // Input vectors don't need to be swapped if the leading element
1957       // of the result is one of the 3 left elements of the second vector
1958       // (or if there is no shift to be done at all).
1959       Swap = false;
1960       ShiftElts = (8 - M0) % 8;
1961     } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
1962       // Input vectors need to be swapped if the leading element
1963       // of the result is one of the 3 left elements of the first vector
1964       // (or if we're shifting by 4 - thereby simply swapping the vectors).
1965       Swap = true;
1966       ShiftElts = (4 - M0) % 4;
1967     }
1968
1969     return true;
1970   } else {                                          // BE
1971     if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
1972       // Input vectors don't need to be swapped if the leading element
1973       // of the result is one of the 4 elements of the first vector.
1974       Swap = false;
1975       ShiftElts = M0;
1976     } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
1977       // Input vectors need to be swapped if the leading element
1978       // of the result is one of the 4 elements of the right vector.
1979       Swap = true;
1980       ShiftElts = M0 - 4;
1981     }
1982
1983     return true;
1984   }
1985 }
1986
1987 bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
1988   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1989
1990   if (!isNByteElemShuffleMask(N, Width, -1))
1991     return false;
1992
1993   for (int i = 0; i < 16; i += Width)
1994     if (N->getMaskElt(i) != i + Width - 1)
1995       return false;
1996
1997   return true;
1998 }
1999
2000 bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
2001   return isXXBRShuffleMaskHelper(N, 2);
2002 }
2003
2004 bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
2005   return isXXBRShuffleMaskHelper(N, 4);
2006 }
2007
2008 bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2009   return isXXBRShuffleMaskHelper(N, 8);
2010 }
2011
2012 bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2013   return isXXBRShuffleMaskHelper(N, 16);
2014 }
2015
2016 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2017 /// if the inputs to the instruction should be swapped and set \p DM to the
2018 /// value for the immediate.
2019 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2020 /// AND element 0 of the result comes from the first input (LE) or second input
2021 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2022 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2023 /// mask.
2024 bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
2025                                bool &Swap, bool IsLE) {
2026   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2027
2028   // Ensure each byte index of the double word is consecutive.
2029   if (!isNByteElemShuffleMask(N, 8, 1))
2030     return false;
2031
2032   unsigned M0 = N->getMaskElt(0) / 8;
2033   unsigned M1 = N->getMaskElt(8) / 8;
2034   assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2035
2036   // If both vector operands for the shuffle are the same vector, the mask will
2037   // contain only elements from the first one and the second one will be undef.
2038   if (N->getOperand(1).isUndef()) {
2039     if ((M0 | M1) < 2) {
2040       DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2041       Swap = false;
2042       return true;
2043     } else
2044       return false;
2045   }
2046
2047   if (IsLE) {
2048     if (M0 > 1 && M1 < 2) {
2049       Swap = false;
2050     } else if (M0 < 2 && M1 > 1) {
2051       M0 = (M0 + 2) % 4;
2052       M1 = (M1 + 2) % 4;
2053       Swap = true;
2054     } else
2055       return false;
2056
2057     // Note: if control flow comes here that means Swap is already set above
2058     DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2059     return true;
2060   } else { // BE
2061     if (M0 < 2 && M1 > 1) {
2062       Swap = false;
2063     } else if (M0 > 1 && M1 < 2) {
2064       M0 = (M0 + 2) % 4;
2065       M1 = (M1 + 2) % 4;
2066       Swap = true;
2067     } else
2068       return false;
2069
2070     // Note: if control flow comes here that means Swap is already set above
2071     DM = (M0 << 1) + (M1 & 1);
2072     return true;
2073   }
2074 }
2075
2076
2077 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
2078 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
2079 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
2080                                 SelectionDAG &DAG) {
2081   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2082   assert(isSplatShuffleMask(SVOp, EltSize));
2083   if (DAG.getDataLayout().isLittleEndian())
2084     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2085   else
2086     return SVOp->getMaskElt(0) / EltSize;
2087 }
2088
2089 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2090 /// by using a vspltis[bhw] instruction of the specified element size, return
2091 /// the constant being splatted.  The ByteSize field indicates the number of
2092 /// bytes of each element [124] -> [bhw].
2093 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2094   SDValue OpVal(nullptr, 0);
2095
2096   // If ByteSize of the splat is bigger than the element size of the
2097   // build_vector, then we have a case where we are checking for a splat where
2098   // multiple elements of the buildvector are folded together into a single
2099   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2100   unsigned EltSize = 16/N->getNumOperands();
2101   if (EltSize < ByteSize) {
2102     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
2103     SDValue UniquedVals[4];
2104     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2105
2106     // See if all of the elements in the buildvector agree across.
2107     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2108       if (N->getOperand(i).isUndef()) continue;
2109       // If the element isn't a constant, bail fully out.
2110       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2111
2112       if (!UniquedVals[i&(Multiple-1)].getNode())
2113         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2114       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2115         return SDValue();  // no match.
2116     }
2117
2118     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2119     // either constant or undef values that are identical for each chunk.  See
2120     // if these chunks can form into a larger vspltis*.
2121
2122     // Check to see if all of the leading entries are either 0 or -1.  If
2123     // neither, then this won't fit into the immediate field.
2124     bool LeadingZero = true;
2125     bool LeadingOnes = true;
2126     for (unsigned i = 0; i != Multiple-1; ++i) {
2127       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
2128
2129       LeadingZero &= isNullConstant(UniquedVals[i]);
2130       LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2131     }
2132     // Finally, check the least significant entry.
2133     if (LeadingZero) {
2134       if (!UniquedVals[Multiple-1].getNode())
2135         return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
2136       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2137       if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
2138         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2139     }
2140     if (LeadingOnes) {
2141       if (!UniquedVals[Multiple-1].getNode())
2142         return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2143       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2144       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
2145         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2146     }
2147
2148     return SDValue();
2149   }
2150
2151   // Check to see if this buildvec has a single non-undef value in its elements.
2152   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2153     if (N->getOperand(i).isUndef()) continue;
2154     if (!OpVal.getNode())
2155       OpVal = N->getOperand(i);
2156     else if (OpVal != N->getOperand(i))
2157       return SDValue();
2158   }
2159
2160   if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
2161
2162   unsigned ValSizeInBytes = EltSize;
2163   uint64_t Value = 0;
2164   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2165     Value = CN->getZExtValue();
2166   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2167     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2168     Value = FloatToBits(CN->getValueAPF().convertToFloat());
2169   }
2170
2171   // If the splat value is larger than the element value, then we can never do
2172   // this splat.  The only case that we could fit the replicated bits into our
2173   // immediate field for would be zero, and we prefer to use vxor for it.
2174   if (ValSizeInBytes < ByteSize) return SDValue();
2175
2176   // If the element value is larger than the splat value, check if it consists
2177   // of a repeated bit pattern of size ByteSize.
2178   if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2179     return SDValue();
2180
2181   // Properly sign extend the value.
2182   int MaskVal = SignExtend32(Value, ByteSize * 8);
2183
2184   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2185   if (MaskVal == 0) return SDValue();
2186
2187   // Finally, if this value fits in a 5 bit sext field, return it
2188   if (SignExtend32<5>(MaskVal) == MaskVal)
2189     return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2190   return SDValue();
2191 }
2192
2193 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
2194 /// amount, otherwise return -1.
2195 int PPC::isQVALIGNIShuffleMask(SDNode *N) {
2196   EVT VT = N->getValueType(0);
2197   if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
2198     return -1;
2199
2200   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2201
2202   // Find the first non-undef value in the shuffle mask.
2203   unsigned i;
2204   for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
2205     /*search*/;
2206
2207   if (i == 4) return -1;  // all undef.
2208
2209   // Otherwise, check to see if the rest of the elements are consecutively
2210   // numbered from this value.
2211   unsigned ShiftAmt = SVOp->getMaskElt(i);
2212   if (ShiftAmt < i) return -1;
2213   ShiftAmt -= i;
2214
2215   // Check the rest of the elements to see if they are consecutive.
2216   for (++i; i != 4; ++i)
2217     if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2218       return -1;
2219
2220   return ShiftAmt;
2221 }
2222
2223 //===----------------------------------------------------------------------===//
2224 //  Addressing Mode Selection
2225 //===----------------------------------------------------------------------===//
2226
2227 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2228 /// or 64-bit immediate, and if the value can be accurately represented as a
2229 /// sign extension from a 16-bit value.  If so, this returns true and the
2230 /// immediate.
2231 bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2232   if (!isa<ConstantSDNode>(N))
2233     return false;
2234
2235   Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2236   if (N->getValueType(0) == MVT::i32)
2237     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2238   else
2239     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2240 }
2241 bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2242   return isIntS16Immediate(Op.getNode(), Imm);
2243 }
2244
2245
2246 /// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2247 /// be represented as an indexed [r+r] operation.
2248 bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2249                                                SDValue &Index,
2250                                                SelectionDAG &DAG) const {
2251   for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
2252       UI != E; ++UI) {
2253     if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
2254       if (Memop->getMemoryVT() == MVT::f64) {
2255           Base = N.getOperand(0);
2256           Index = N.getOperand(1);
2257           return true;
2258       }
2259     }
2260   }
2261   return false;
2262 }
2263
2264 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2265 /// can be represented as an indexed [r+r] operation.  Returns false if it
2266 /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2267 /// non-zero and N can be represented by a base register plus a signed 16-bit
2268 /// displacement, make a more precise judgement by checking (displacement % \p
2269 /// EncodingAlignment).
2270 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
2271                                             SDValue &Index, SelectionDAG &DAG,
2272                                             unsigned EncodingAlignment) const {
2273   int16_t imm = 0;
2274   if (N.getOpcode() == ISD::ADD) {
2275     // Is there any SPE load/store (f64), which can't handle 16bit offset?
2276     // SPE load/store can only handle 8-bit offsets.
2277     if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2278         return true;
2279     if (isIntS16Immediate(N.getOperand(1), imm) &&
2280         (!EncodingAlignment || !(imm % EncodingAlignment)))
2281       return false; // r+i
2282     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2283       return false;    // r+i
2284
2285     Base = N.getOperand(0);
2286     Index = N.getOperand(1);
2287     return true;
2288   } else if (N.getOpcode() == ISD::OR) {
2289     if (isIntS16Immediate(N.getOperand(1), imm) &&
2290         (!EncodingAlignment || !(imm % EncodingAlignment)))
2291       return false; // r+i can fold it if we can.
2292
2293     // If this is an or of disjoint bitfields, we can codegen this as an add
2294     // (for better address arithmetic) if the LHS and RHS of the OR are provably
2295     // disjoint.
2296     KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2297
2298     if (LHSKnown.Zero.getBoolValue()) {
2299       KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2300       // If all of the bits are known zero on the LHS or RHS, the add won't
2301       // carry.
2302       if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2303         Base = N.getOperand(0);
2304         Index = N.getOperand(1);
2305         return true;
2306       }
2307     }
2308   }
2309
2310   return false;
2311 }
2312
2313 // If we happen to be doing an i64 load or store into a stack slot that has
2314 // less than a 4-byte alignment, then the frame-index elimination may need to
2315 // use an indexed load or store instruction (because the offset may not be a
2316 // multiple of 4). The extra register needed to hold the offset comes from the
2317 // register scavenger, and it is possible that the scavenger will need to use
2318 // an emergency spill slot. As a result, we need to make sure that a spill slot
2319 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2320 // stack slot.
2321 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2322   // FIXME: This does not handle the LWA case.
2323   if (VT != MVT::i64)
2324     return;
2325
2326   // NOTE: We'll exclude negative FIs here, which come from argument
2327   // lowering, because there are no known test cases triggering this problem
2328   // using packed structures (or similar). We can remove this exclusion if
2329   // we find such a test case. The reason why this is so test-case driven is
2330   // because this entire 'fixup' is only to prevent crashes (from the
2331   // register scavenger) on not-really-valid inputs. For example, if we have:
2332   //   %a = alloca i1
2333   //   %b = bitcast i1* %a to i64*
2334   //   store i64* a, i64 b
2335   // then the store should really be marked as 'align 1', but is not. If it
2336   // were marked as 'align 1' then the indexed form would have been
2337   // instruction-selected initially, and the problem this 'fixup' is preventing
2338   // won't happen regardless.
2339   if (FrameIdx < 0)
2340     return;
2341
2342   MachineFunction &MF = DAG.getMachineFunction();
2343   MachineFrameInfo &MFI = MF.getFrameInfo();
2344
2345   unsigned Align = MFI.getObjectAlignment(FrameIdx);
2346   if (Align >= 4)
2347     return;
2348
2349   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2350   FuncInfo->setHasNonRISpills();
2351 }
2352
2353 /// Returns true if the address N can be represented by a base register plus
2354 /// a signed 16-bit displacement [r+imm], and if it is not better
2355 /// represented as reg+reg.  If \p EncodingAlignment is non-zero, only accept
2356 /// displacements that are multiples of that value.
2357 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
2358                                             SDValue &Base,
2359                                             SelectionDAG &DAG,
2360                                             unsigned EncodingAlignment) const {
2361   // FIXME dl should come from parent load or store, not from address
2362   SDLoc dl(N);
2363   // If this can be more profitably realized as r+r, fail.
2364   if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2365     return false;
2366
2367   if (N.getOpcode() == ISD::ADD) {
2368     int16_t imm = 0;
2369     if (isIntS16Immediate(N.getOperand(1), imm) &&
2370         (!EncodingAlignment || (imm % EncodingAlignment) == 0)) {
2371       Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2372       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2373         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2374         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2375       } else {
2376         Base = N.getOperand(0);
2377       }
2378       return true; // [r+i]
2379     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2380       // Match LOAD (ADD (X, Lo(G))).
2381       assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2382              && "Cannot handle constant offsets yet!");
2383       Disp = N.getOperand(1).getOperand(0);  // The global address.
2384       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
2385              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
2386              Disp.getOpcode() == ISD::TargetConstantPool ||
2387              Disp.getOpcode() == ISD::TargetJumpTable);
2388       Base = N.getOperand(0);
2389       return true;  // [&g+r]
2390     }
2391   } else if (N.getOpcode() == ISD::OR) {
2392     int16_t imm = 0;
2393     if (isIntS16Immediate(N.getOperand(1), imm) &&
2394         (!EncodingAlignment || (imm % EncodingAlignment) == 0)) {
2395       // If this is an or of disjoint bitfields, we can codegen this as an add
2396       // (for better address arithmetic) if the LHS and RHS of the OR are
2397       // provably disjoint.
2398       KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2399
2400       if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2401         // If all of the bits are known zero on the LHS or RHS, the add won't
2402         // carry.
2403         if (FrameIndexSDNode *FI =
2404               dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2405           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2406           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2407         } else {
2408           Base = N.getOperand(0);
2409         }
2410         Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2411         return true;
2412       }
2413     }
2414   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2415     // Loading from a constant address.
2416
2417     // If this address fits entirely in a 16-bit sext immediate field, codegen
2418     // this as "d, 0"
2419     int16_t Imm;
2420     if (isIntS16Immediate(CN, Imm) &&
2421         (!EncodingAlignment || (Imm % EncodingAlignment) == 0)) {
2422       Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2423       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2424                              CN->getValueType(0));
2425       return true;
2426     }
2427
2428     // Handle 32-bit sext immediates with LIS + addr mode.
2429     if ((CN->getValueType(0) == MVT::i32 ||
2430          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2431         (!EncodingAlignment || (CN->getZExtValue() % EncodingAlignment) == 0)) {
2432       int Addr = (int)CN->getZExtValue();
2433
2434       // Otherwise, break this down into an LIS + disp.
2435       Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2436
2437       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2438                                    MVT::i32);
2439       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2440       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2441       return true;
2442     }
2443   }
2444
2445   Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2446   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2447     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2448     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2449   } else
2450     Base = N;
2451   return true;      // [r+0]
2452 }
2453
2454 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2455 /// represented as an indexed [r+r] operation.
2456 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2457                                                 SDValue &Index,
2458                                                 SelectionDAG &DAG) const {
2459   // Check to see if we can easily represent this as an [r+r] address.  This
2460   // will fail if it thinks that the address is more profitably represented as
2461   // reg+imm, e.g. where imm = 0.
2462   if (SelectAddressRegReg(N, Base, Index, DAG))
2463     return true;
2464
2465   // If the address is the result of an add, we will utilize the fact that the
2466   // address calculation includes an implicit add.  However, we can reduce
2467   // register pressure if we do not materialize a constant just for use as the
2468   // index register.  We only get rid of the add if it is not an add of a
2469   // value and a 16-bit signed constant and both have a single use.
2470   int16_t imm = 0;
2471   if (N.getOpcode() == ISD::ADD &&
2472       (!isIntS16Immediate(N.getOperand(1), imm) ||
2473        !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2474     Base = N.getOperand(0);
2475     Index = N.getOperand(1);
2476     return true;
2477   }
2478
2479   // Otherwise, do it the hard way, using R0 as the base register.
2480   Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2481                          N.getValueType());
2482   Index = N;
2483   return true;
2484 }
2485
2486 /// Returns true if we should use a direct load into vector instruction
2487 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2488 static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2489
2490   // If there are any other uses other than scalar to vector, then we should
2491   // keep it as a scalar load -> direct move pattern to prevent multiple
2492   // loads.
2493   LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
2494   if (!LD)
2495     return false;
2496
2497   EVT MemVT = LD->getMemoryVT();
2498   if (!MemVT.isSimple())
2499     return false;
2500   switch(MemVT.getSimpleVT().SimpleTy) {
2501   case MVT::i64:
2502     break;
2503   case MVT::i32:
2504     if (!ST.hasP8Vector())
2505       return false;
2506     break;
2507   case MVT::i16:
2508   case MVT::i8:
2509     if (!ST.hasP9Vector())
2510       return false;
2511     break;
2512   default:
2513     return false;
2514   }
2515
2516   SDValue LoadedVal(N, 0);
2517   if (!LoadedVal.hasOneUse())
2518     return false;
2519
2520   for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2521        UI != UE; ++UI)
2522     if (UI.getUse().get().getResNo() == 0 &&
2523         UI->getOpcode() != ISD::SCALAR_TO_VECTOR)
2524       return false;
2525
2526   return true;
2527 }
2528
2529 /// getPreIndexedAddressParts - returns true by value, base pointer and
2530 /// offset pointer and addressing mode by reference if the node's address
2531 /// can be legally represented as pre-indexed load / store address.
2532 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2533                                                   SDValue &Offset,
2534                                                   ISD::MemIndexedMode &AM,
2535                                                   SelectionDAG &DAG) const {
2536   if (DisablePPCPreinc) return false;
2537
2538   bool isLoad = true;
2539   SDValue Ptr;
2540   EVT VT;
2541   unsigned Alignment;
2542   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2543     Ptr = LD->getBasePtr();
2544     VT = LD->getMemoryVT();
2545     Alignment = LD->getAlignment();
2546   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2547     Ptr = ST->getBasePtr();
2548     VT  = ST->getMemoryVT();
2549     Alignment = ST->getAlignment();
2550     isLoad = false;
2551   } else
2552     return false;
2553
2554   // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2555   // instructions because we can fold these into a more efficient instruction
2556   // instead, (such as LXSD).
2557   if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2558     return false;
2559   }
2560
2561   // PowerPC doesn't have preinc load/store instructions for vectors (except
2562   // for QPX, which does have preinc r+r forms).
2563   if (VT.isVector()) {
2564     if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
2565       return false;
2566     } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
2567       AM = ISD::PRE_INC;
2568       return true;
2569     }
2570   }
2571
2572   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2573     // Common code will reject creating a pre-inc form if the base pointer
2574     // is a frame index, or if N is a store and the base pointer is either
2575     // the same as or a predecessor of the value being stored.  Check for
2576     // those situations here, and try with swapped Base/Offset instead.
2577     bool Swap = false;
2578
2579     if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2580       Swap = true;
2581     else if (!isLoad) {
2582       SDValue Val = cast<StoreSDNode>(N)->getValue();
2583       if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2584         Swap = true;
2585     }
2586
2587     if (Swap)
2588       std::swap(Base, Offset);
2589
2590     AM = ISD::PRE_INC;
2591     return true;
2592   }
2593
2594   // LDU/STU can only handle immediates that are a multiple of 4.
2595   if (VT != MVT::i64) {
2596     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 0))
2597       return false;
2598   } else {
2599     // LDU/STU need an address with at least 4-byte alignment.
2600     if (Alignment < 4)
2601       return false;
2602
2603     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 4))
2604       return false;
2605   }
2606
2607   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2608     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
2609     // sext i32 to i64 when addr mode is r+i.
2610     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2611         LD->getExtensionType() == ISD::SEXTLOAD &&
2612         isa<ConstantSDNode>(Offset))
2613       return false;
2614   }
2615
2616   AM = ISD::PRE_INC;
2617   return true;
2618 }
2619
2620 //===----------------------------------------------------------------------===//
2621 //  LowerOperation implementation
2622 //===----------------------------------------------------------------------===//
2623
2624 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
2625 /// and LoOpFlags to the target MO flags.
2626 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2627                                unsigned &HiOpFlags, unsigned &LoOpFlags,
2628                                const GlobalValue *GV = nullptr) {
2629   HiOpFlags = PPCII::MO_HA;
2630   LoOpFlags = PPCII::MO_LO;
2631
2632   // Don't use the pic base if not in PIC relocation model.
2633   if (IsPIC) {
2634     HiOpFlags |= PPCII::MO_PIC_FLAG;
2635     LoOpFlags |= PPCII::MO_PIC_FLAG;
2636   }
2637
2638   // If this is a reference to a global value that requires a non-lazy-ptr, make
2639   // sure that instruction lowering adds it.
2640   if (GV && Subtarget.hasLazyResolverStub(GV)) {
2641     HiOpFlags |= PPCII::MO_NLP_FLAG;
2642     LoOpFlags |= PPCII::MO_NLP_FLAG;
2643
2644     if (GV->hasHiddenVisibility()) {
2645       HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2646       LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2647     }
2648   }
2649 }
2650
2651 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2652                              SelectionDAG &DAG) {
2653   SDLoc DL(HiPart);
2654   EVT PtrVT = HiPart.getValueType();
2655   SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2656
2657   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2658   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2659
2660   // With PIC, the first instruction is actually "GR+hi(&G)".
2661   if (isPIC)
2662     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2663                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2664
2665   // Generate non-pic code that has direct accesses to the constant pool.
2666   // The address of the global is just (hi(&g)+lo(&g)).
2667   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2668 }
2669
2670 static void setUsesTOCBasePtr(MachineFunction &MF) {
2671   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2672   FuncInfo->setUsesTOCBasePtr();
2673 }
2674
2675 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2676   setUsesTOCBasePtr(DAG.getMachineFunction());
2677 }
2678
2679 SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
2680                                        SDValue GA) const {
2681   const bool Is64Bit = Subtarget.isPPC64();
2682   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2683   SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
2684                         : Subtarget.isAIXABI()
2685                               ? DAG.getRegister(PPC::R2, VT)
2686                               : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2687   SDValue Ops[] = { GA, Reg };
2688   return DAG.getMemIntrinsicNode(
2689       PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2690       MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0,
2691       MachineMemOperand::MOLoad);
2692 }
2693
2694 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2695                                              SelectionDAG &DAG) const {
2696   EVT PtrVT = Op.getValueType();
2697   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2698   const Constant *C = CP->getConstVal();
2699
2700   // 64-bit SVR4 ABI code is always position-independent.
2701   // The actual address of the GlobalValue is stored in the TOC.
2702   if (Subtarget.is64BitELFABI()) {
2703     setUsesTOCBasePtr(DAG);
2704     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
2705     return getTOCEntry(DAG, SDLoc(CP), GA);
2706   }
2707
2708   unsigned MOHiFlag, MOLoFlag;
2709   bool IsPIC = isPositionIndependent();
2710   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2711
2712   if (IsPIC && Subtarget.isSVR4ABI()) {
2713     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
2714                                            PPCII::MO_PIC_FLAG);
2715     return getTOCEntry(DAG, SDLoc(CP), GA);
2716   }
2717
2718   SDValue CPIHi =
2719     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
2720   SDValue CPILo =
2721     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
2722   return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2723 }
2724
2725 // For 64-bit PowerPC, prefer the more compact relative encodings.
2726 // This trades 32 bits per jump table entry for one or two instructions
2727 // on the jump site.
2728 unsigned PPCTargetLowering::getJumpTableEncoding() const {
2729   if (isJumpTableRelative())
2730     return MachineJumpTableInfo::EK_LabelDifference32;
2731
2732   return TargetLowering::getJumpTableEncoding();
2733 }
2734
2735 bool PPCTargetLowering::isJumpTableRelative() const {
2736   if (Subtarget.isPPC64())
2737     return true;
2738   return TargetLowering::isJumpTableRelative();
2739 }
2740
2741 SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
2742                                                     SelectionDAG &DAG) const {
2743   if (!Subtarget.isPPC64())
2744     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2745
2746   switch (getTargetMachine().getCodeModel()) {
2747   case CodeModel::Small:
2748   case CodeModel::Medium:
2749     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2750   default:
2751     return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
2752                        getPointerTy(DAG.getDataLayout()));
2753   }
2754 }
2755
2756 const MCExpr *
2757 PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
2758                                                 unsigned JTI,
2759                                                 MCContext &Ctx) const {
2760   if (!Subtarget.isPPC64())
2761     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2762
2763   switch (getTargetMachine().getCodeModel()) {
2764   case CodeModel::Small:
2765   case CodeModel::Medium:
2766     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2767   default:
2768     return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2769   }
2770 }
2771
2772 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2773   EVT PtrVT = Op.getValueType();
2774   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2775
2776   // 64-bit SVR4 ABI code is always position-independent.
2777   // The actual address of the GlobalValue is stored in the TOC.
2778   if (Subtarget.is64BitELFABI()) {
2779     setUsesTOCBasePtr(DAG);
2780     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2781     return getTOCEntry(DAG, SDLoc(JT), GA);
2782   }
2783
2784   unsigned MOHiFlag, MOLoFlag;
2785   bool IsPIC = isPositionIndependent();
2786   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2787
2788   if (IsPIC && Subtarget.isSVR4ABI()) {
2789     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2790                                         PPCII::MO_PIC_FLAG);
2791     return getTOCEntry(DAG, SDLoc(GA), GA);
2792   }
2793
2794   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2795   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2796   return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
2797 }
2798
2799 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2800                                              SelectionDAG &DAG) const {
2801   EVT PtrVT = Op.getValueType();
2802   BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2803   const BlockAddress *BA = BASDN->getBlockAddress();
2804
2805   // 64-bit SVR4 ABI code is always position-independent.
2806   // The actual BlockAddress is stored in the TOC.
2807   if (Subtarget.is64BitELFABI()) {
2808     setUsesTOCBasePtr(DAG);
2809     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2810     return getTOCEntry(DAG, SDLoc(BASDN), GA);
2811   }
2812
2813   // 32-bit position-independent ELF stores the BlockAddress in the .got.
2814   if (Subtarget.is32BitELFABI() && isPositionIndependent())
2815     return getTOCEntry(
2816         DAG, SDLoc(BASDN),
2817         DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
2818
2819   unsigned MOHiFlag, MOLoFlag;
2820   bool IsPIC = isPositionIndependent();
2821   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2822   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
2823   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
2824   return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
2825 }
2826
2827 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2828                                               SelectionDAG &DAG) const {
2829   // FIXME: TLS addresses currently use medium model code sequences,
2830   // which is the most useful form.  Eventually support for small and
2831   // large models could be added if users need it, at the cost of
2832   // additional complexity.
2833   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2834   if (DAG.getTarget().useEmulatedTLS())
2835     return LowerToTLSEmulatedModel(GA, DAG);
2836
2837   SDLoc dl(GA);
2838   const GlobalValue *GV = GA->getGlobal();
2839   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2840   bool is64bit = Subtarget.isPPC64();
2841   const Module *M = DAG.getMachineFunction().getFunction().getParent();
2842   PICLevel::Level picLevel = M->getPICLevel();
2843
2844   const TargetMachine &TM = getTargetMachine();
2845   TLSModel::Model Model = TM.getTLSModel(GV);
2846
2847   if (Model == TLSModel::LocalExec) {
2848     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2849                                                PPCII::MO_TPREL_HA);
2850     SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2851                                                PPCII::MO_TPREL_LO);
2852     SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
2853                              : DAG.getRegister(PPC::R2, MVT::i32);
2854
2855     SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
2856     return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
2857   }
2858
2859   if (Model == TLSModel::InitialExec) {
2860     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2861     SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2862                                                 PPCII::MO_TLS);
2863     SDValue GOTPtr;
2864     if (is64bit) {
2865       setUsesTOCBasePtr(DAG);
2866       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2867       GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
2868                            PtrVT, GOTReg, TGA);
2869     } else {
2870       if (!TM.isPositionIndependent())
2871         GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
2872       else if (picLevel == PICLevel::SmallPIC)
2873         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2874       else
2875         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2876     }
2877     SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
2878                                    PtrVT, TGA, GOTPtr);
2879     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
2880   }
2881
2882   if (Model == TLSModel::GeneralDynamic) {
2883     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2884     SDValue GOTPtr;
2885     if (is64bit) {
2886       setUsesTOCBasePtr(DAG);
2887       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2888       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
2889                                    GOTReg, TGA);
2890     } else {
2891       if (picLevel == PICLevel::SmallPIC)
2892         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2893       else
2894         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2895     }
2896     return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
2897                        GOTPtr, TGA, TGA);
2898   }
2899
2900   if (Model == TLSModel::LocalDynamic) {
2901     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2902     SDValue GOTPtr;
2903     if (is64bit) {
2904       setUsesTOCBasePtr(DAG);
2905       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2906       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
2907                            GOTReg, TGA);
2908     } else {
2909       if (picLevel == PICLevel::SmallPIC)
2910         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2911       else
2912         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2913     }
2914     SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
2915                                   PtrVT, GOTPtr, TGA, TGA);
2916     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
2917                                       PtrVT, TLSAddr, TGA);
2918     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
2919   }
2920
2921   llvm_unreachable("Unknown TLS model!");
2922 }
2923
2924 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
2925                                               SelectionDAG &DAG) const {
2926   EVT PtrVT = Op.getValueType();
2927   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
2928   SDLoc DL(GSDN);
2929   const GlobalValue *GV = GSDN->getGlobal();
2930
2931   // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
2932   // The actual address of the GlobalValue is stored in the TOC.
2933   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2934     setUsesTOCBasePtr(DAG);
2935     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
2936     return getTOCEntry(DAG, DL, GA);
2937   }
2938
2939   unsigned MOHiFlag, MOLoFlag;
2940   bool IsPIC = isPositionIndependent();
2941   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
2942
2943   if (IsPIC && Subtarget.isSVR4ABI()) {
2944     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
2945                                             GSDN->getOffset(),
2946                                             PPCII::MO_PIC_FLAG);
2947     return getTOCEntry(DAG, DL, GA);
2948   }
2949
2950   SDValue GAHi =
2951     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
2952   SDValue GALo =
2953     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
2954
2955   SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG);
2956
2957   // If the global reference is actually to a non-lazy-pointer, we have to do an
2958   // extra load to get the address of the global.
2959   if (MOHiFlag & PPCII::MO_NLP_FLAG)
2960     Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2961   return Ptr;
2962 }
2963
2964 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
2965   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2966   SDLoc dl(Op);
2967
2968   if (Op.getValueType() == MVT::v2i64) {
2969     // When the operands themselves are v2i64 values, we need to do something
2970     // special because VSX has no underlying comparison operations for these.
2971     if (Op.getOperand(0).getValueType() == MVT::v2i64) {
2972       // Equality can be handled by casting to the legal type for Altivec
2973       // comparisons, everything else needs to be expanded.
2974       if (CC == ISD::SETEQ || CC == ISD::SETNE) {
2975         return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
2976                  DAG.getSetCC(dl, MVT::v4i32,
2977                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
2978                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
2979                    CC));
2980       }
2981
2982       return SDValue();
2983     }
2984
2985     // We handle most of these in the usual way.
2986     return Op;
2987   }
2988
2989   // If we're comparing for equality to zero, expose the fact that this is
2990   // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
2991   // fold the new nodes.
2992   if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
2993     return V;
2994
2995   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2996     // Leave comparisons against 0 and -1 alone for now, since they're usually
2997     // optimized.  FIXME: revisit this when we can custom lower all setcc
2998     // optimizations.
2999     if (C->isAllOnesValue() || C->isNullValue())
3000       return SDValue();
3001   }
3002
3003   // If we have an integer seteq/setne, turn it into a compare against zero
3004   // by xor'ing the rhs with the lhs, which is faster than setting a
3005   // condition register, reading it back out, and masking the correct bit.  The
3006   // normal approach here uses sub to do this instead of xor.  Using xor exposes
3007   // the result to other bit-twiddling opportunities.
3008   EVT LHSVT = Op.getOperand(0).getValueType();
3009   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3010     EVT VT = Op.getValueType();
3011     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
3012                                 Op.getOperand(1));
3013     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3014   }
3015   return SDValue();
3016 }
3017
3018 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3019   SDNode *Node = Op.getNode();
3020   EVT VT = Node->getValueType(0);
3021   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3022   SDValue InChain = Node->getOperand(0);
3023   SDValue VAListPtr = Node->getOperand(1);
3024   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3025   SDLoc dl(Node);
3026
3027   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3028
3029   // gpr_index
3030   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3031                                     VAListPtr, MachinePointerInfo(SV), MVT::i8);
3032   InChain = GprIndex.getValue(1);
3033
3034   if (VT == MVT::i64) {
3035     // Check if GprIndex is even
3036     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3037                                  DAG.getConstant(1, dl, MVT::i32));
3038     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3039                                 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3040     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3041                                           DAG.getConstant(1, dl, MVT::i32));
3042     // Align GprIndex to be even if it isn't
3043     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3044                            GprIndex);
3045   }
3046
3047   // fpr index is 1 byte after gpr
3048   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3049                                DAG.getConstant(1, dl, MVT::i32));
3050
3051   // fpr
3052   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3053                                     FprPtr, MachinePointerInfo(SV), MVT::i8);
3054   InChain = FprIndex.getValue(1);
3055
3056   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3057                                        DAG.getConstant(8, dl, MVT::i32));
3058
3059   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3060                                         DAG.getConstant(4, dl, MVT::i32));
3061
3062   // areas
3063   SDValue OverflowArea =
3064       DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3065   InChain = OverflowArea.getValue(1);
3066
3067   SDValue RegSaveArea =
3068       DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3069   InChain = RegSaveArea.getValue(1);
3070
3071   // select overflow_area if index > 8
3072   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3073                             DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3074
3075   // adjustment constant gpr_index * 4/8
3076   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3077                                     VT.isInteger() ? GprIndex : FprIndex,
3078                                     DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3079                                                     MVT::i32));
3080
3081   // OurReg = RegSaveArea + RegConstant
3082   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3083                                RegConstant);
3084
3085   // Floating types are 32 bytes into RegSaveArea
3086   if (VT.isFloatingPoint())
3087     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3088                          DAG.getConstant(32, dl, MVT::i32));
3089
3090   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3091   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3092                                    VT.isInteger() ? GprIndex : FprIndex,
3093                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3094                                                    MVT::i32));
3095
3096   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3097                               VT.isInteger() ? VAListPtr : FprPtr,
3098                               MachinePointerInfo(SV), MVT::i8);
3099
3100   // determine if we should load from reg_save_area or overflow_area
3101   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3102
3103   // increase overflow_area by 4/8 if gpr/fpr > 8
3104   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3105                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
3106                                           dl, MVT::i32));
3107
3108   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3109                              OverflowAreaPlusN);
3110
3111   InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3112                               MachinePointerInfo(), MVT::i32);
3113
3114   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3115 }
3116
3117 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3118   assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3119
3120   // We have to copy the entire va_list struct:
3121   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3122   return DAG.getMemcpy(Op.getOperand(0), Op,
3123                        Op.getOperand(1), Op.getOperand(2),
3124                        DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
3125                        false, MachinePointerInfo(), MachinePointerInfo());
3126 }
3127
3128 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3129                                                   SelectionDAG &DAG) const {
3130   return Op.getOperand(0);
3131 }
3132
3133 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3134                                                 SelectionDAG &DAG) const {
3135   SDValue Chain = Op.getOperand(0);
3136   SDValue Trmp = Op.getOperand(1); // trampoline
3137   SDValue FPtr = Op.getOperand(2); // nested function
3138   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3139   SDLoc dl(Op);
3140
3141   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3142   bool isPPC64 = (PtrVT == MVT::i64);
3143   Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3144
3145   TargetLowering::ArgListTy Args;
3146   TargetLowering::ArgListEntry Entry;
3147
3148   Entry.Ty = IntPtrTy;
3149   Entry.Node = Trmp; Args.push_back(Entry);
3150
3151   // TrampSize == (isPPC64 ? 48 : 40);
3152   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3153                                isPPC64 ? MVT::i64 : MVT::i32);
3154   Args.push_back(Entry);
3155
3156   Entry.Node = FPtr; Args.push_back(Entry);
3157   Entry.Node = Nest; Args.push_back(Entry);
3158
3159   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3160   TargetLowering::CallLoweringInfo CLI(DAG);
3161   CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3162       CallingConv::C, Type::getVoidTy(*DAG.getContext()),
3163       DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3164
3165   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3166   return CallResult.second;
3167 }
3168
3169 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3170   MachineFunction &MF = DAG.getMachineFunction();
3171   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3172   EVT PtrVT = getPointerTy(MF.getDataLayout());
3173
3174   SDLoc dl(Op);
3175
3176   if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
3177     // vastart just stores the address of the VarArgsFrameIndex slot into the
3178     // memory location argument.
3179     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3180     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3181     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3182                         MachinePointerInfo(SV));
3183   }
3184
3185   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3186   // We suppose the given va_list is already allocated.
3187   //
3188   // typedef struct {
3189   //  char gpr;     /* index into the array of 8 GPRs
3190   //                 * stored in the register save area
3191   //                 * gpr=0 corresponds to r3,
3192   //                 * gpr=1 to r4, etc.
3193   //                 */
3194   //  char fpr;     /* index into the array of 8 FPRs
3195   //                 * stored in the register save area
3196   //                 * fpr=0 corresponds to f1,
3197   //                 * fpr=1 to f2, etc.
3198   //                 */
3199   //  char *overflow_arg_area;
3200   //                /* location on stack that holds
3201   //                 * the next overflow argument
3202   //                 */
3203   //  char *reg_save_area;
3204   //               /* where r3:r10 and f1:f8 (if saved)
3205   //                * are stored
3206   //                */
3207   // } va_list[1];
3208
3209   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3210   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3211   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3212                                             PtrVT);
3213   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3214                                  PtrVT);
3215
3216   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3217   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3218
3219   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3220   SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3221
3222   uint64_t FPROffset = 1;
3223   SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3224
3225   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3226
3227   // Store first byte : number of int regs
3228   SDValue firstStore =
3229       DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3230                         MachinePointerInfo(SV), MVT::i8);
3231   uint64_t nextOffset = FPROffset;
3232   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3233                                   ConstFPROffset);
3234
3235   // Store second byte : number of float regs
3236   SDValue secondStore =
3237       DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3238                         MachinePointerInfo(SV, nextOffset), MVT::i8);
3239   nextOffset += StackOffset;
3240   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3241
3242   // Store second word : arguments given on stack
3243   SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3244                                     MachinePointerInfo(SV, nextOffset));
3245   nextOffset += FrameOffset;
3246   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3247
3248   // Store third word : arguments given in registers
3249   return DAG.getStore(thirdStore, dl, FR, nextPtr,
3250                       MachinePointerInfo(SV, nextOffset));
3251 }
3252
3253 /// FPR - The set of FP registers that should be allocated for arguments
3254 /// on Darwin and AIX.
3255 static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
3256                                 PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
3257                                 PPC::F11, PPC::F12, PPC::F13};
3258
3259 /// QFPR - The set of QPX registers that should be allocated for arguments.
3260 static const MCPhysReg QFPR[] = {
3261     PPC::QF1, PPC::QF2, PPC::QF3,  PPC::QF4,  PPC::QF5,  PPC::QF6, PPC::QF7,
3262     PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
3263
3264 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
3265 /// the stack.
3266 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3267                                        unsigned PtrByteSize) {
3268   unsigned ArgSize = ArgVT.getStoreSize();
3269   if (Flags.isByVal())
3270     ArgSize = Flags.getByValSize();
3271
3272   // Round up to multiples of the pointer size, except for array members,
3273   // which are always packed.
3274   if (!Flags.isInConsecutiveRegs())
3275     ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3276
3277   return ArgSize;
3278 }
3279
3280 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
3281 /// on the stack.
3282 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3283                                             ISD::ArgFlagsTy Flags,
3284                                             unsigned PtrByteSize) {
3285   unsigned Align = PtrByteSize;
3286
3287   // Altivec parameters are padded to a 16 byte boundary.
3288   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3289       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3290       ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3291       ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3292     Align = 16;
3293   // QPX vector types stored in double-precision are padded to a 32 byte
3294   // boundary.
3295   else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
3296     Align = 32;
3297
3298   // ByVal parameters are aligned as requested.
3299   if (Flags.isByVal()) {
3300     unsigned BVAlign = Flags.getByValAlign();
3301     if (BVAlign > PtrByteSize) {
3302       if (BVAlign % PtrByteSize != 0)
3303           llvm_unreachable(
3304             "ByVal alignment is not a multiple of the pointer size");
3305
3306       Align = BVAlign;
3307     }
3308   }
3309
3310   // Array members are always packed to their original alignment.
3311   if (Flags.isInConsecutiveRegs()) {
3312     // If the array member was split into multiple registers, the first
3313     // needs to be aligned to the size of the full type.  (Except for
3314     // ppcf128, which is only aligned as its f64 components.)
3315     if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3316       Align = OrigVT.getStoreSize();
3317     else
3318       Align = ArgVT.getStoreSize();
3319   }
3320
3321   return Align;
3322 }
3323
3324 /// CalculateStackSlotUsed - Return whether this argument will use its
3325 /// stack slot (instead of being passed in registers).  ArgOffset,
3326 /// AvailableFPRs, and AvailableVRs must hold the current argument
3327 /// position, and will be updated to account for this argument.
3328 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
3329                                    ISD::ArgFlagsTy Flags,
3330                                    unsigned PtrByteSize,
3331                                    unsigned LinkageSize,
3332                                    unsigned ParamAreaSize,
3333                                    unsigned &ArgOffset,
3334                                    unsigned &AvailableFPRs,
3335                                    unsigned &AvailableVRs, bool HasQPX) {
3336   bool UseMemory = false;
3337
3338   // Respect alignment of argument on the stack.
3339   unsigned Align =
3340     CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3341   ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3342   // If there's no space left in the argument save area, we must
3343   // use memory (this check also catches zero-sized arguments).
3344   if (ArgOffset >= LinkageSize + ParamAreaSize)
3345     UseMemory = true;
3346
3347   // Allocate argument on the stack.
3348   ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3349   if (Flags.isInConsecutiveRegsLast())
3350     ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3351   // If we overran the argument save area, we must use memory
3352   // (this check catches arguments passed partially in memory)
3353   if (ArgOffset > LinkageSize + ParamAreaSize)
3354     UseMemory = true;
3355
3356   // However, if the argument is actually passed in an FPR or a VR,
3357   // we don't use memory after all.
3358   if (!Flags.isByVal()) {
3359     if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
3360         // QPX registers overlap with the scalar FP registers.
3361         (HasQPX && (ArgVT == MVT::v4f32 ||
3362                     ArgVT == MVT::v4f64 ||
3363                     ArgVT == MVT::v4i1)))
3364       if (AvailableFPRs > 0) {
3365         --AvailableFPRs;
3366         return false;
3367       }
3368     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3369         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3370         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3371         ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3372       if (AvailableVRs > 0) {
3373         --AvailableVRs;
3374         return false;
3375       }
3376   }
3377
3378   return UseMemory;
3379 }
3380
3381 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
3382 /// ensure minimum alignment required for target.
3383 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
3384                                      unsigned NumBytes) {
3385   unsigned TargetAlign = Lowering->getStackAlignment();
3386   unsigned AlignMask = TargetAlign - 1;
3387   NumBytes = (NumBytes + AlignMask) & ~AlignMask;
3388   return NumBytes;
3389 }
3390
3391 SDValue PPCTargetLowering::LowerFormalArguments(
3392     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3393     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3394     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3395   if (Subtarget.is64BitELFABI())
3396     return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3397                                        InVals);
3398   else if (Subtarget.is32BitELFABI())
3399     return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3400                                        InVals);
3401
3402   // FIXME: We are using this for both AIX and Darwin. We should add appropriate
3403   // AIX testing, and rename it appropriately.
3404   return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG,
3405                                      InVals);
3406 }
3407
3408 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3409     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3410     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3411     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3412
3413   // 32-bit SVR4 ABI Stack Frame Layout:
3414   //              +-----------------------------------+
3415   //        +-->  |            Back chain             |
3416   //        |     +-----------------------------------+
3417   //        |     | Floating-point register save area |
3418   //        |     +-----------------------------------+
3419   //        |     |    General register save area     |
3420   //        |     +-----------------------------------+
3421   //        |     |          CR save word             |
3422   //        |     +-----------------------------------+
3423   //        |     |         VRSAVE save word          |
3424   //        |     +-----------------------------------+
3425   //        |     |         Alignment padding         |
3426   //        |     +-----------------------------------+
3427   //        |     |     Vector register save area     |
3428   //        |     +-----------------------------------+
3429   //        |     |       Local variable space        |
3430   //        |     +-----------------------------------+
3431   //        |     |        Parameter list area        |
3432   //        |     +-----------------------------------+
3433   //        |     |           LR save word            |
3434   //        |     +-----------------------------------+
3435   // SP-->  +---  |            Back chain             |
3436   //              +-----------------------------------+
3437   //
3438   // Specifications:
3439   //   System V Application Binary Interface PowerPC Processor Supplement
3440   //   AltiVec Technology Programming Interface Manual
3441
3442   MachineFunction &MF = DAG.getMachineFunction();
3443   MachineFrameInfo &MFI = MF.getFrameInfo();
3444   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3445
3446   EVT PtrVT = getPointerTy(MF.getDataLayout());
3447   // Potential tail calls could cause overwriting of argument stack slots.
3448   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3449                        (CallConv == CallingConv::Fast));
3450   unsigned PtrByteSize = 4;
3451
3452   // Assign locations to all of the incoming arguments.
3453   SmallVector<CCValAssign, 16> ArgLocs;
3454   PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3455                  *DAG.getContext());
3456
3457   // Reserve space for the linkage area on the stack.
3458   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3459   CCInfo.AllocateStack(LinkageSize, PtrByteSize);
3460   if (useSoftFloat())
3461     CCInfo.PreAnalyzeFormalArguments(Ins);
3462
3463   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3464   CCInfo.clearWasPPCF128();
3465
3466   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3467     CCValAssign &VA = ArgLocs[i];
3468
3469     // Arguments stored in registers.
3470     if (VA.isRegLoc()) {
3471       const TargetRegisterClass *RC;
3472       EVT ValVT = VA.getValVT();
3473
3474       switch (ValVT.getSimpleVT().SimpleTy) {
3475         default:
3476           llvm_unreachable("ValVT not supported by formal arguments Lowering");
3477         case MVT::i1:
3478         case MVT::i32:
3479           RC = &PPC::GPRCRegClass;
3480           break;
3481         case MVT::f32:
3482           if (Subtarget.hasP8Vector())
3483             RC = &PPC::VSSRCRegClass;
3484           else if (Subtarget.hasSPE())
3485             RC = &PPC::SPE4RCRegClass;
3486           else
3487             RC = &PPC::F4RCRegClass;
3488           break;
3489         case MVT::f64:
3490           if (Subtarget.hasVSX())
3491             RC = &PPC::VSFRCRegClass;
3492           else if (Subtarget.hasSPE())
3493             // SPE passes doubles in GPR pairs.
3494             RC = &PPC::GPRCRegClass;
3495           else
3496             RC = &PPC::F8RCRegClass;
3497           break;
3498         case MVT::v16i8:
3499         case MVT::v8i16:
3500         case MVT::v4i32:
3501           RC = &PPC::VRRCRegClass;
3502           break;
3503         case MVT::v4f32:
3504           RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
3505           break;
3506         case MVT::v2f64:
3507         case MVT::v2i64:
3508           RC = &PPC::VRRCRegClass;
3509           break;
3510         case MVT::v4f64:
3511           RC = &PPC::QFRCRegClass;
3512           break;
3513         case MVT::v4i1:
3514           RC = &PPC::QBRCRegClass;
3515           break;
3516       }
3517
3518       SDValue ArgValue;
3519       // Transform the arguments stored in physical registers into
3520       // virtual ones.
3521       if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
3522         assert(i + 1 < e && "No second half of double precision argument");
3523         unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
3524         unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
3525         SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
3526         SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
3527         if (!Subtarget.isLittleEndian())
3528           std::swap (ArgValueLo, ArgValueHi);
3529         ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
3530                                ArgValueHi);
3531       } else {
3532         unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3533         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3534                                       ValVT == MVT::i1 ? MVT::i32 : ValVT);
3535         if (ValVT == MVT::i1)
3536           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3537       }
3538
3539       InVals.push_back(ArgValue);
3540     } else {
3541       // Argument stored in memory.
3542       assert(VA.isMemLoc());
3543
3544       // Get the extended size of the argument type in stack
3545       unsigned ArgSize = VA.getLocVT().getStoreSize();
3546       // Get the actual size of the argument type
3547       unsigned ObjSize = VA.getValVT().getStoreSize();
3548       unsigned ArgOffset = VA.getLocMemOffset();
3549       // Stack objects in PPC32 are right justified.
3550       ArgOffset += ArgSize - ObjSize;
3551       int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
3552
3553       // Create load nodes to retrieve arguments from the stack.
3554       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3555       InVals.push_back(
3556           DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3557     }
3558   }
3559
3560   // Assign locations to all of the incoming aggregate by value arguments.
3561   // Aggregates passed by value are stored in the local variable space of the
3562   // caller's stack frame, right above the parameter list area.
3563   SmallVector<CCValAssign, 16> ByValArgLocs;
3564   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3565                       ByValArgLocs, *DAG.getContext());
3566
3567   // Reserve stack space for the allocations in CCInfo.
3568   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
3569
3570   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3571
3572   // Area that is at least reserved in the caller of this function.
3573   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3574   MinReservedArea = std::max(MinReservedArea, LinkageSize);
3575
3576   // Set the size that is at least reserved in caller of this function.  Tail
3577   // call optimized function's reserved stack space needs to be aligned so that
3578   // taking the difference between two stack areas will result in an aligned
3579   // stack.
3580   MinReservedArea =
3581       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3582   FuncInfo->setMinReservedArea(MinReservedArea);
3583
3584   SmallVector<SDValue, 8> MemOps;
3585
3586   // If the function takes variable number of arguments, make a frame index for
3587   // the start of the first vararg value... for expansion of llvm.va_start.
3588   if (isVarArg) {
3589     static const MCPhysReg GPArgRegs[] = {
3590       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3591       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3592     };
3593     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3594
3595     static const MCPhysReg FPArgRegs[] = {
3596       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3597       PPC::F8
3598     };
3599     unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3600
3601     if (useSoftFloat() || hasSPE())
3602        NumFPArgRegs = 0;
3603
3604     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3605     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3606
3607     // Make room for NumGPArgRegs and NumFPArgRegs.
3608     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3609                 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3610
3611     FuncInfo->setVarArgsStackOffset(
3612       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3613                             CCInfo.getNextStackOffset(), true));
3614
3615     FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false));
3616     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3617
3618     // The fixed integer arguments of a variadic function are stored to the
3619     // VarArgsFrameIndex on the stack so that they may be loaded by
3620     // dereferencing the result of va_next.
3621     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3622       // Get an existing live-in vreg, or add a new one.
3623       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3624       if (!VReg)
3625         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3626
3627       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3628       SDValue Store =
3629           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3630       MemOps.push_back(Store);
3631       // Increment the address by four for the next argument to store
3632       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3633       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3634     }
3635
3636     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3637     // is set.
3638     // The double arguments are stored to the VarArgsFrameIndex
3639     // on the stack.
3640     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3641       // Get an existing live-in vreg, or add a new one.
3642       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3643       if (!VReg)
3644         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3645
3646       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3647       SDValue Store =
3648           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3649       MemOps.push_back(Store);
3650       // Increment the address by eight for the next argument to store
3651       SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3652                                          PtrVT);
3653       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3654     }
3655   }
3656
3657   if (!MemOps.empty())
3658     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3659
3660   return Chain;
3661 }
3662
3663 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3664 // value to MVT::i64 and then truncate to the correct register size.
3665 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3666                                              EVT ObjectVT, SelectionDAG &DAG,
3667                                              SDValue ArgVal,
3668                                              const SDLoc &dl) const {
3669   if (Flags.isSExt())
3670     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3671                          DAG.getValueType(ObjectVT));
3672   else if (Flags.isZExt())
3673     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3674                          DAG.getValueType(ObjectVT));
3675
3676   return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3677 }
3678
3679 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3680     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3681     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3682     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3683   // TODO: add description of PPC stack frame format, or at least some docs.
3684   //
3685   bool isELFv2ABI = Subtarget.isELFv2ABI();
3686   bool isLittleEndian = Subtarget.isLittleEndian();
3687   MachineFunction &MF = DAG.getMachineFunction();
3688   MachineFrameInfo &MFI = MF.getFrameInfo();
3689   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3690
3691   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
3692          "fastcc not supported on varargs functions");
3693
3694   EVT PtrVT = getPointerTy(MF.getDataLayout());
3695   // Potential tail calls could cause overwriting of argument stack slots.
3696   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3697                        (CallConv == CallingConv::Fast));
3698   unsigned PtrByteSize = 8;
3699   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3700
3701   static const MCPhysReg GPR[] = {
3702     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3703     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3704   };
3705   static const MCPhysReg VR[] = {
3706     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3707     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3708   };
3709
3710   const unsigned Num_GPR_Regs = array_lengthof(GPR);
3711   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3712   const unsigned Num_VR_Regs  = array_lengthof(VR);
3713   const unsigned Num_QFPR_Regs = Num_FPR_Regs;
3714
3715   // Do a first pass over the arguments to determine whether the ABI
3716   // guarantees that our caller has allocated the parameter save area
3717   // on its stack frame.  In the ELFv1 ABI, this is always the case;
3718   // in the ELFv2 ABI, it is true if this is a vararg function or if
3719   // any parameter is located in a stack slot.
3720
3721   bool HasParameterArea = !isELFv2ABI || isVarArg;
3722   unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3723   unsigned NumBytes = LinkageSize;
3724   unsigned AvailableFPRs = Num_FPR_Regs;
3725   unsigned AvailableVRs = Num_VR_Regs;
3726   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3727     if (Ins[i].Flags.isNest())
3728       continue;
3729
3730     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3731                                PtrByteSize, LinkageSize, ParamAreaSize,
3732                                NumBytes, AvailableFPRs, AvailableVRs,
3733                                Subtarget.hasQPX()))
3734       HasParameterArea = true;
3735   }
3736
3737   // Add DAG nodes to load the arguments or copy them out of registers.  On
3738   // entry to a function on PPC, the arguments start after the linkage area,
3739   // although the first ones are often in registers.
3740
3741   unsigned ArgOffset = LinkageSize;
3742   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3743   unsigned &QFPR_idx = FPR_idx;
3744   SmallVector<SDValue, 8> MemOps;
3745   Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
3746   unsigned CurArgIdx = 0;
3747   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3748     SDValue ArgVal;
3749     bool needsLoad = false;
3750     EVT ObjectVT = Ins[ArgNo].VT;
3751     EVT OrigVT = Ins[ArgNo].ArgVT;
3752     unsigned ObjSize = ObjectVT.getStoreSize();
3753     unsigned ArgSize = ObjSize;
3754     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3755     if (Ins[ArgNo].isOrigArg()) {
3756       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3757       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3758     }
3759     // We re-align the argument offset for each argument, except when using the
3760     // fast calling convention, when we need to make sure we do that only when
3761     // we'll actually use a stack slot.
3762     unsigned CurArgOffset, Align;
3763     auto ComputeArgOffset = [&]() {
3764       /* Respect alignment of argument on the stack.  */
3765       Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3766       ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3767       CurArgOffset = ArgOffset;
3768     };
3769
3770     if (CallConv != CallingConv::Fast) {
3771       ComputeArgOffset();
3772
3773       /* Compute GPR index associated with argument offset.  */
3774       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3775       GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3776     }
3777
3778     // FIXME the codegen can be much improved in some cases.
3779     // We do not have to keep everything in memory.
3780     if (Flags.isByVal()) {
3781       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3782
3783       if (CallConv == CallingConv::Fast)
3784         ComputeArgOffset();
3785
3786       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3787       ObjSize = Flags.getByValSize();
3788       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3789       // Empty aggregate parameters do not take up registers.  Examples:
3790       //   struct { } a;
3791       //   union  { } b;
3792       //   int c[0];
3793       // etc.  However, we have to provide a place-holder in InVals, so
3794       // pretend we have an 8-byte item at the current address for that
3795       // purpose.
3796       if (!ObjSize) {
3797         int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
3798         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3799         InVals.push_back(FIN);
3800         continue;
3801       }
3802
3803       // Create a stack object covering all stack doublewords occupied
3804       // by the argument.  If the argument is (fully or partially) on
3805       // the stack, or if the argument is fully in registers but the
3806       // caller has allocated the parameter save anyway, we can refer
3807       // directly to the caller's stack frame.  Otherwise, create a
3808       // local copy in our own frame.
3809       int FI;
3810       if (HasParameterArea ||
3811           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
3812         FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
3813       else
3814         FI = MFI.CreateStackObject(ArgSize, Align, false);
3815       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3816
3817       // Handle aggregates smaller than 8 bytes.
3818       if (ObjSize < PtrByteSize) {
3819         // The value of the object is its address, which differs from the
3820         // address of the enclosing doubleword on big-endian systems.
3821         SDValue Arg = FIN;
3822         if (!isLittleEndian) {
3823           SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
3824           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
3825         }
3826         InVals.push_back(Arg);
3827
3828         if (GPR_idx != Num_GPR_Regs) {
3829           unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3830           FuncInfo->addLiveInAttr(VReg, Flags);
3831           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3832           SDValue Store;
3833
3834           if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
3835             EVT ObjType = (ObjSize == 1 ? MVT::i8 :
3836                            (ObjSize == 2 ? MVT::i16 : MVT::i32));
3837             Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
3838                                       MachinePointerInfo(&*FuncArg), ObjType);
3839           } else {
3840             // For sizes that don't fit a truncating store (3, 5, 6, 7),
3841             // store the whole register as-is to the parameter save area
3842             // slot.
3843             Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3844                                  MachinePointerInfo(&*FuncArg));
3845           }
3846
3847           MemOps.push_back(Store);
3848         }
3849         // Whether we copied from a register or not, advance the offset
3850         // into the parameter save area by a full doubleword.
3851         ArgOffset += PtrByteSize;
3852         continue;
3853       }
3854
3855       // The value of the object is its address, which is the address of
3856       // its first stack doubleword.
3857       InVals.push_back(FIN);
3858
3859       // Store whatever pieces of the object are in registers to memory.
3860       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3861         if (GPR_idx == Num_GPR_Regs)
3862           break;
3863
3864         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3865         FuncInfo->addLiveInAttr(VReg, Flags);
3866         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3867         SDValue Addr = FIN;
3868         if (j) {
3869           SDValue Off = DAG.getConstant(j, dl, PtrVT);
3870           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
3871         }
3872         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
3873                                      MachinePointerInfo(&*FuncArg, j));
3874         MemOps.push_back(Store);
3875         ++GPR_idx;
3876       }
3877       ArgOffset += ArgSize;
3878       continue;
3879     }
3880
3881     switch (ObjectVT.getSimpleVT().SimpleTy) {
3882     default: llvm_unreachable("Unhandled argument type!");
3883     case MVT::i1:
3884     case MVT::i32:
3885     case MVT::i64:
3886       if (Flags.isNest()) {
3887         // The 'nest' parameter, if any, is passed in R11.
3888         unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
3889         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3890
3891         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3892           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3893
3894         break;
3895       }
3896
3897       // These can be scalar arguments or elements of an integer array type
3898       // passed directly.  Clang may use those instead of "byval" aggregate
3899       // types to avoid forcing arguments to memory unnecessarily.
3900       if (GPR_idx != Num_GPR_Regs) {
3901         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3902         FuncInfo->addLiveInAttr(VReg, Flags);
3903         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3904
3905         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3906           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3907           // value to MVT::i64 and then truncate to the correct register size.
3908           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3909       } else {
3910         if (CallConv == CallingConv::Fast)
3911           ComputeArgOffset();
3912
3913         needsLoad = true;
3914         ArgSize = PtrByteSize;
3915       }
3916       if (CallConv != CallingConv::Fast || needsLoad)
3917         ArgOffset += 8;
3918       break;
3919
3920     case MVT::f32:
3921     case MVT::f64:
3922       // These can be scalar arguments or elements of a float array type
3923       // passed directly.  The latter are used to implement ELFv2 homogenous
3924       // float aggregates.
3925       if (FPR_idx != Num_FPR_Regs) {
3926         unsigned VReg;
3927
3928         if (ObjectVT == MVT::f32)
3929           VReg = MF.addLiveIn(FPR[FPR_idx],
3930                               Subtarget.hasP8Vector()
3931                                   ? &PPC::VSSRCRegClass
3932                                   : &PPC::F4RCRegClass);
3933         else
3934           VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
3935                                                 ? &PPC::VSFRCRegClass
3936                                                 : &PPC::F8RCRegClass);
3937
3938         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3939         ++FPR_idx;
3940       } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
3941         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
3942         // once we support fp <-> gpr moves.
3943
3944         // This can only ever happen in the presence of f32 array types,
3945         // since otherwise we never run out of FPRs before running out
3946         // of GPRs.
3947         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3948         FuncInfo->addLiveInAttr(VReg, Flags);
3949         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3950
3951         if (ObjectVT == MVT::f32) {
3952           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
3953             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
3954                                  DAG.getConstant(32, dl, MVT::i32));
3955           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
3956         }
3957
3958         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
3959       } else {
3960         if (CallConv == CallingConv::Fast)
3961           ComputeArgOffset();
3962
3963         needsLoad = true;
3964       }
3965
3966       // When passing an array of floats, the array occupies consecutive
3967       // space in the argument area; only round up to the next doubleword
3968       // at the end of the array.  Otherwise, each float takes 8 bytes.
3969       if (CallConv != CallingConv::Fast || needsLoad) {
3970         ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
3971         ArgOffset += ArgSize;
3972         if (Flags.isInConsecutiveRegsLast())
3973           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3974       }
3975       break;
3976     case MVT::v4f32:
3977     case MVT::v4i32:
3978     case MVT::v8i16:
3979     case MVT::v16i8:
3980     case MVT::v2f64:
3981     case MVT::v2i64:
3982     case MVT::v1i128:
3983     case MVT::f128:
3984       if (!Subtarget.hasQPX()) {
3985         // These can be scalar arguments or elements of a vector array type
3986         // passed directly.  The latter are used to implement ELFv2 homogenous
3987         // vector aggregates.
3988         if (VR_idx != Num_VR_Regs) {
3989           unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3990           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3991           ++VR_idx;
3992         } else {
3993           if (CallConv == CallingConv::Fast)
3994             ComputeArgOffset();
3995           needsLoad = true;
3996         }
3997         if (CallConv != CallingConv::Fast || needsLoad)
3998           ArgOffset += 16;
3999         break;
4000       } // not QPX
4001
4002       assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
4003              "Invalid QPX parameter type");
4004       LLVM_FALLTHROUGH;
4005
4006     case MVT::v4f64:
4007     case MVT::v4i1:
4008       // QPX vectors are treated like their scalar floating-point subregisters
4009       // (except that they're larger).
4010       unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
4011       if (QFPR_idx != Num_QFPR_Regs) {
4012         const TargetRegisterClass *RC;
4013         switch (ObjectVT.getSimpleVT().SimpleTy) {
4014         case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
4015         case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
4016         default:         RC = &PPC::QBRCRegClass; break;
4017         }
4018
4019         unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
4020         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4021         ++QFPR_idx;
4022       } else {
4023         if (CallConv == CallingConv::Fast)
4024           ComputeArgOffset();
4025         needsLoad = true;
4026       }
4027       if (CallConv != CallingConv::Fast || needsLoad)
4028         ArgOffset += Sz;
4029       break;
4030     }
4031
4032     // We need to load the argument to a virtual register if we determined
4033     // above that we ran out of physical registers of the appropriate type.
4034     if (needsLoad) {
4035       if (ObjSize < ArgSize && !isLittleEndian)
4036         CurArgOffset += ArgSize - ObjSize;
4037       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4038       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4039       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4040     }
4041
4042     InVals.push_back(ArgVal);
4043   }
4044
4045   // Area that is at least reserved in the caller of this function.
4046   unsigned MinReservedArea;
4047   if (HasParameterArea)
4048     MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4049   else
4050     MinReservedArea = LinkageSize;
4051
4052   // Set the size that is at least reserved in caller of this function.  Tail
4053   // call optimized functions' reserved stack space needs to be aligned so that
4054   // taking the difference between two stack areas will result in an aligned
4055   // stack.
4056   MinReservedArea =
4057       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4058   FuncInfo->setMinReservedArea(MinReservedArea);
4059
4060   // If the function takes variable number of arguments, make a frame index for
4061   // the start of the first vararg value... for expansion of llvm.va_start.
4062   if (isVarArg) {
4063     int Depth = ArgOffset;
4064
4065     FuncInfo->setVarArgsFrameIndex(
4066       MFI.CreateFixedObject(PtrByteSize, Depth, true));
4067     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4068
4069     // If this function is vararg, store any remaining integer argument regs
4070     // to their spots on the stack so that they may be loaded by dereferencing
4071     // the result of va_next.
4072     for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4073          GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4074       unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4075       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4076       SDValue Store =
4077           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4078       MemOps.push_back(Store);
4079       // Increment the address by four for the next argument to store
4080       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4081       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4082     }
4083   }
4084
4085   if (!MemOps.empty())
4086     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4087
4088   return Chain;
4089 }
4090
4091 SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
4092     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4093     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4094     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4095   // TODO: add description of PPC stack frame format, or at least some docs.
4096   //
4097   MachineFunction &MF = DAG.getMachineFunction();
4098   MachineFrameInfo &MFI = MF.getFrameInfo();
4099   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4100
4101   EVT PtrVT = getPointerTy(MF.getDataLayout());
4102   bool isPPC64 = PtrVT == MVT::i64;
4103   // Potential tail calls could cause overwriting of argument stack slots.
4104   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4105                        (CallConv == CallingConv::Fast));
4106   unsigned PtrByteSize = isPPC64 ? 8 : 4;
4107   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4108   unsigned ArgOffset = LinkageSize;
4109   // Area that is at least reserved in caller of this function.
4110   unsigned MinReservedArea = ArgOffset;
4111
4112   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
4113     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4114     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4115   };
4116   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
4117     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4118     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4119   };
4120   static const MCPhysReg VR[] = {
4121     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4122     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4123   };
4124
4125   const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
4126   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4127   const unsigned Num_VR_Regs  = array_lengthof( VR);
4128
4129   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4130
4131   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
4132
4133   // In 32-bit non-varargs functions, the stack space for vectors is after the
4134   // stack space for non-vectors.  We do not use this space unless we have
4135   // too many vectors to fit in registers, something that only occurs in
4136   // constructed examples:), but we have to walk the arglist to figure
4137   // that out...for the pathological case, compute VecArgOffset as the
4138   // start of the vector parameter area.  Computing VecArgOffset is the
4139   // entire point of the following loop.
4140   unsigned VecArgOffset = ArgOffset;
4141   if (!isVarArg && !isPPC64) {
4142     for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
4143          ++ArgNo) {
4144       EVT ObjectVT = Ins[ArgNo].VT;
4145       ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4146
4147       if (Flags.isByVal()) {
4148         // ObjSize is the true size, ArgSize rounded up to multiple of regs.
4149         unsigned ObjSize = Flags.getByValSize();
4150         unsigned ArgSize =
4151                 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4152         VecArgOffset += ArgSize;
4153         continue;
4154       }
4155
4156       switch(ObjectVT.getSimpleVT().SimpleTy) {
4157       default: llvm_unreachable("Unhandled argument type!");
4158       case MVT::i1:
4159       case MVT::i32:
4160       case MVT::f32:
4161         VecArgOffset += 4;
4162         break;
4163       case MVT::i64:  // PPC64
4164       case MVT::f64:
4165         // FIXME: We are guaranteed to be !isPPC64 at this point.
4166         // Does MVT::i64 apply?
4167         VecArgOffset += 8;
4168         break;
4169       case MVT::v4f32:
4170       case MVT::v4i32:
4171       case MVT::v8i16:
4172       case MVT::v16i8:
4173         // Nothing to do, we're only looking at Nonvector args here.
4174         break;
4175       }
4176     }
4177   }
4178   // We've found where the vector parameter area in memory is.  Skip the
4179   // first 12 parameters; these don't use that memory.
4180   VecArgOffset = ((VecArgOffset+15)/16)*16;
4181   VecArgOffset += 12*16;
4182
4183   // Add DAG nodes to load the arguments or copy them out of registers.  On
4184   // entry to a function on PPC, the arguments start after the linkage area,
4185   // although the first ones are often in registers.
4186
4187   SmallVector<SDValue, 8> MemOps;
4188   unsigned nAltivecParamsAtEnd = 0;
4189   Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4190   unsigned CurArgIdx = 0;
4191   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4192     SDValue ArgVal;
4193     bool needsLoad = false;
4194     EVT ObjectVT = Ins[ArgNo].VT;
4195     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
4196     unsigned ArgSize = ObjSize;
4197     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4198     if (Ins[ArgNo].isOrigArg()) {
4199       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4200       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4201     }
4202     unsigned CurArgOffset = ArgOffset;
4203
4204     // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
4205     if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
4206         ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
4207       if (isVarArg || isPPC64) {
4208         MinReservedArea = ((MinReservedArea+15)/16)*16;
4209         MinReservedArea += CalculateStackSlotSize(ObjectVT,
4210                                                   Flags,
4211                                                   PtrByteSize);
4212       } else  nAltivecParamsAtEnd++;
4213     } else
4214       // Calculate min reserved area.
4215       MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
4216                                                 Flags,
4217                                                 PtrByteSize);
4218
4219     // FIXME the codegen can be much improved in some cases.
4220     // We do not have to keep everything in memory.
4221     if (Flags.isByVal()) {
4222       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4223
4224       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4225       ObjSize = Flags.getByValSize();
4226       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4227       // Objects of size 1 and 2 are right justified, everything else is
4228       // left justified.  This means the memory address is adjusted forwards.
4229       if (ObjSize==1 || ObjSize==2) {
4230         CurArgOffset = CurArgOffset + (4 - ObjSize);
4231       }
4232       // The value of the object is its address.
4233       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
4234       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4235       InVals.push_back(FIN);
4236       if (ObjSize==1 || ObjSize==2) {
4237         if (GPR_idx != Num_GPR_Regs) {
4238           unsigned VReg;
4239           if (isPPC64)
4240             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4241           else
4242             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4243           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4244           EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
4245           SDValue Store =
4246               DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
4247                                 MachinePointerInfo(&*FuncArg), ObjType);
4248           MemOps.push_back(Store);
4249           ++GPR_idx;
4250         }
4251
4252         ArgOffset += PtrByteSize;
4253
4254         continue;
4255       }
4256       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4257         // Store whatever pieces of the object are in registers
4258         // to memory.  ArgOffset will be the address of the beginning
4259         // of the object.
4260         if (GPR_idx != Num_GPR_Regs) {
4261           unsigned VReg;
4262           if (isPPC64)
4263             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4264           else
4265             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4266           int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4267           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4268           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4269           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4270                                        MachinePointerInfo(&*FuncArg, j));
4271           MemOps.push_back(Store);
4272           ++GPR_idx;
4273           ArgOffset += PtrByteSize;
4274         } else {
4275           ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
4276           break;
4277         }
4278       }
4279       continue;
4280     }
4281
4282     switch (ObjectVT.getSimpleVT().SimpleTy) {
4283     default: llvm_unreachable("Unhandled argument type!");
4284     case MVT::i1:
4285     case MVT::i32:
4286       if (!isPPC64) {
4287         if (GPR_idx != Num_GPR_Regs) {
4288           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4289           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4290
4291           if (ObjectVT == MVT::i1)
4292             ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
4293
4294           ++GPR_idx;
4295         } else {
4296           needsLoad = true;
4297           ArgSize = PtrByteSize;
4298         }
4299         // All int arguments reserve stack space in the Darwin ABI.
4300         ArgOffset += PtrByteSize;
4301         break;
4302       }
4303       LLVM_FALLTHROUGH;
4304     case MVT::i64:  // PPC64
4305       if (GPR_idx != Num_GPR_Regs) {
4306         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4307         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4308
4309         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4310           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4311           // value to MVT::i64 and then truncate to the correct register size.
4312           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4313
4314         ++GPR_idx;
4315       } else {
4316         needsLoad = true;
4317         ArgSize = PtrByteSize;
4318       }
4319       // All int arguments reserve stack space in the Darwin ABI.
4320       ArgOffset += 8;
4321       break;
4322
4323     case MVT::f32:
4324     case MVT::f64:
4325       // Every 4 bytes of argument space consumes one of the GPRs available for
4326       // argument passing.
4327       if (GPR_idx != Num_GPR_Regs) {
4328         ++GPR_idx;
4329         if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
4330           ++GPR_idx;
4331       }
4332       if (FPR_idx != Num_FPR_Regs) {
4333         unsigned VReg;
4334
4335         if (ObjectVT == MVT::f32)
4336           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
4337         else
4338           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
4339
4340         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4341         ++FPR_idx;
4342       } else {
4343         needsLoad = true;
4344       }
4345
4346       // All FP arguments reserve stack space in the Darwin ABI.
4347       ArgOffset += isPPC64 ? 8 : ObjSize;
4348       break;
4349     case MVT::v4f32:
4350     case MVT::v4i32:
4351     case MVT::v8i16:
4352     case MVT::v16i8:
4353       // Note that vector arguments in registers don't reserve stack space,
4354       // except in varargs functions.
4355       if (VR_idx != Num_VR_Regs) {
4356         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4357         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4358         if (isVarArg) {
4359           while ((ArgOffset % 16) != 0) {
4360             ArgOffset += PtrByteSize;
4361             if (GPR_idx != Num_GPR_Regs)
4362               GPR_idx++;
4363           }
4364           ArgOffset += 16;
4365           GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
4366         }
4367         ++VR_idx;
4368       } else {
4369         if (!isVarArg && !isPPC64) {
4370           // Vectors go after all the nonvectors.
4371           CurArgOffset = VecArgOffset;
4372           VecArgOffset += 16;
4373         } else {
4374           // Vectors are aligned.
4375           ArgOffset = ((ArgOffset+15)/16)*16;
4376           CurArgOffset = ArgOffset;
4377           ArgOffset += 16;
4378         }
4379         needsLoad = true;
4380       }
4381       break;
4382     }
4383
4384     // We need to load the argument to a virtual register if we determined above
4385     // that we ran out of physical registers of the appropriate type.
4386     if (needsLoad) {
4387       int FI = MFI.CreateFixedObject(ObjSize,
4388                                      CurArgOffset + (ArgSize - ObjSize),
4389                                      isImmutable);
4390       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4391       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4392     }
4393
4394     InVals.push_back(ArgVal);
4395   }
4396
4397   // Allow for Altivec parameters at the end, if needed.
4398   if (nAltivecParamsAtEnd) {
4399     MinReservedArea = ((MinReservedArea+15)/16)*16;
4400     MinReservedArea += 16*nAltivecParamsAtEnd;
4401   }
4402
4403   // Area that is at least reserved in the caller of this function.
4404   MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
4405
4406   // Set the size that is at least reserved in caller of this function.  Tail
4407   // call optimized functions' reserved stack space needs to be aligned so that
4408   // taking the difference between two stack areas will result in an aligned
4409   // stack.
4410   MinReservedArea =
4411       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4412   FuncInfo->setMinReservedArea(MinReservedArea);
4413
4414   // If the function takes variable number of arguments, make a frame index for
4415   // the start of the first vararg value... for expansion of llvm.va_start.
4416   if (isVarArg) {
4417     int Depth = ArgOffset;
4418
4419     FuncInfo->setVarArgsFrameIndex(
4420       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4421                             Depth, true));
4422     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4423
4424     // If this function is vararg, store any remaining integer argument regs
4425     // to their spots on the stack so that they may be loaded by dereferencing
4426     // the result of va_next.
4427     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
4428       unsigned VReg;
4429
4430       if (isPPC64)
4431         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4432       else
4433         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4434
4435       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4436       SDValue Store =
4437           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4438       MemOps.push_back(Store);
4439       // Increment the address by four for the next argument to store
4440       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4441       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4442     }
4443   }
4444
4445   if (!MemOps.empty())
4446     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4447
4448   return Chain;
4449 }
4450
4451 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4452 /// adjusted to accommodate the arguments for the tailcall.
4453 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4454                                    unsigned ParamSize) {
4455
4456   if (!isTailCall) return 0;
4457
4458   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4459   unsigned CallerMinReservedArea = FI->getMinReservedArea();
4460   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4461   // Remember only if the new adjustment is bigger.
4462   if (SPDiff < FI->getTailCallSPDelta())
4463     FI->setTailCallSPDelta(SPDiff);
4464
4465   return SPDiff;
4466 }
4467
4468 static bool isFunctionGlobalAddress(SDValue Callee);
4469
4470 static bool
4471 callsShareTOCBase(const Function *Caller, SDValue Callee,
4472                     const TargetMachine &TM) {
4473    // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4474    // don't have enough information to determine if the caller and calle share
4475    // the same  TOC base, so we have to pessimistically assume they don't for
4476    // correctness.
4477    GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4478    if (!G)
4479      return false;
4480
4481    const GlobalValue *GV = G->getGlobal();
4482   // The medium and large code models are expected to provide a sufficiently
4483   // large TOC to provide all data addressing needs of a module with a
4484   // single TOC. Since each module will be addressed with a single TOC then we
4485   // only need to check that caller and callee don't cross dso boundaries.
4486   if (CodeModel::Medium == TM.getCodeModel() ||
4487       CodeModel::Large == TM.getCodeModel())
4488     return TM.shouldAssumeDSOLocal(*Caller->getParent(), GV);
4489
4490   // Otherwise we need to ensure callee and caller are in the same section,
4491   // since the linker may allocate multiple TOCs, and we don't know which
4492   // sections will belong to the same TOC base.
4493
4494   if (!GV->isStrongDefinitionForLinker())
4495     return false;
4496
4497   // Any explicitly-specified sections and section prefixes must also match.
4498   // Also, if we're using -ffunction-sections, then each function is always in
4499   // a different section (the same is true for COMDAT functions).
4500   if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4501       GV->getSection() != Caller->getSection())
4502     return false;
4503   if (const auto *F = dyn_cast<Function>(GV)) {
4504     if (F->getSectionPrefix() != Caller->getSectionPrefix())
4505       return false;
4506   }
4507
4508   // If the callee might be interposed, then we can't assume the ultimate call
4509   // target will be in the same section. Even in cases where we can assume that
4510   // interposition won't happen, in any case where the linker might insert a
4511   // stub to allow for interposition, we must generate code as though
4512   // interposition might occur. To understand why this matters, consider a
4513   // situation where: a -> b -> c where the arrows indicate calls. b and c are
4514   // in the same section, but a is in a different module (i.e. has a different
4515   // TOC base pointer). If the linker allows for interposition between b and c,
4516   // then it will generate a stub for the call edge between b and c which will
4517   // save the TOC pointer into the designated stack slot allocated by b. If we
4518   // return true here, and therefore allow a tail call between b and c, that
4519   // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
4520   // pointer into the stack slot allocated by a (where the a -> b stub saved
4521   // a's TOC base pointer). If we're not considering a tail call, but rather,
4522   // whether a nop is needed after the call instruction in b, because the linker
4523   // will insert a stub, it might complain about a missing nop if we omit it
4524   // (although many don't complain in this case).
4525   if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4526     return false;
4527
4528   return true;
4529 }
4530
4531 static bool
4532 needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4533                             const SmallVectorImpl<ISD::OutputArg> &Outs) {
4534   assert(Subtarget.is64BitELFABI());
4535
4536   const unsigned PtrByteSize = 8;
4537   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4538
4539   static const MCPhysReg GPR[] = {
4540     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4541     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4542   };
4543   static const MCPhysReg VR[] = {
4544     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4545     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4546   };
4547
4548   const unsigned NumGPRs = array_lengthof(GPR);
4549   const unsigned NumFPRs = 13;
4550   const unsigned NumVRs = array_lengthof(VR);
4551   const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4552
4553   unsigned NumBytes = LinkageSize;
4554   unsigned AvailableFPRs = NumFPRs;
4555   unsigned AvailableVRs = NumVRs;
4556
4557   for (const ISD::OutputArg& Param : Outs) {
4558     if (Param.Flags.isNest()) continue;
4559
4560     if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags,
4561                                PtrByteSize, LinkageSize, ParamAreaSize,
4562                                NumBytes, AvailableFPRs, AvailableVRs,
4563                                Subtarget.hasQPX()))
4564       return true;
4565   }
4566   return false;
4567 }
4568
4569 static bool
4570 hasSameArgumentList(const Function *CallerFn, ImmutableCallSite CS) {
4571   if (CS.arg_size() != CallerFn->arg_size())
4572     return false;
4573
4574   ImmutableCallSite::arg_iterator CalleeArgIter = CS.arg_begin();
4575   ImmutableCallSite::arg_iterator CalleeArgEnd = CS.arg_end();
4576   Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4577
4578   for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4579     const Value* CalleeArg = *CalleeArgIter;
4580     const Value* CallerArg = &(*CallerArgIter);
4581     if (CalleeArg == CallerArg)
4582       continue;
4583
4584     // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4585     //        tail call @callee([4 x i64] undef, [4 x i64] %b)
4586     //      }
4587     // 1st argument of callee is undef and has the same type as caller.
4588     if (CalleeArg->getType() == CallerArg->getType() &&
4589         isa<UndefValue>(CalleeArg))
4590       continue;
4591
4592     return false;
4593   }
4594
4595   return true;
4596 }
4597
4598 // Returns true if TCO is possible between the callers and callees
4599 // calling conventions.
4600 static bool
4601 areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
4602                                     CallingConv::ID CalleeCC) {
4603   // Tail calls are possible with fastcc and ccc.
4604   auto isTailCallableCC  = [] (CallingConv::ID CC){
4605       return  CC == CallingConv::C || CC == CallingConv::Fast;
4606   };
4607   if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4608     return false;
4609
4610   // We can safely tail call both fastcc and ccc callees from a c calling
4611   // convention caller. If the caller is fastcc, we may have less stack space
4612   // than a non-fastcc caller with the same signature so disable tail-calls in
4613   // that case.
4614   return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4615 }
4616
4617 bool
4618 PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4619                                     SDValue Callee,
4620                                     CallingConv::ID CalleeCC,
4621                                     ImmutableCallSite CS,
4622                                     bool isVarArg,
4623                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
4624                                     const SmallVectorImpl<ISD::InputArg> &Ins,
4625                                     SelectionDAG& DAG) const {
4626   bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4627
4628   if (DisableSCO && !TailCallOpt) return false;
4629
4630   // Variadic argument functions are not supported.
4631   if (isVarArg) return false;
4632
4633   auto &Caller = DAG.getMachineFunction().getFunction();
4634   // Check that the calling conventions are compatible for tco.
4635   if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4636     return false;
4637
4638   // Caller contains any byval parameter is not supported.
4639   if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4640     return false;
4641
4642   // Callee contains any byval parameter is not supported, too.
4643   // Note: This is a quick work around, because in some cases, e.g.
4644   // caller's stack size > callee's stack size, we are still able to apply
4645   // sibling call optimization. For example, gcc is able to do SCO for caller1
4646   // in the following example, but not for caller2.
4647   //   struct test {
4648   //     long int a;
4649   //     char ary[56];
4650   //   } gTest;
4651   //   __attribute__((noinline)) int callee(struct test v, struct test *b) {
4652   //     b->a = v.a;
4653   //     return 0;
4654   //   }
4655   //   void caller1(struct test a, struct test c, struct test *b) {
4656   //     callee(gTest, b); }
4657   //   void caller2(struct test *b) { callee(gTest, b); }
4658   if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4659     return false;
4660
4661   // If callee and caller use different calling conventions, we cannot pass
4662   // parameters on stack since offsets for the parameter area may be different.
4663   if (Caller.getCallingConv() != CalleeCC &&
4664       needStackSlotPassParameters(Subtarget, Outs))
4665     return false;
4666
4667   // No TCO/SCO on indirect call because Caller have to restore its TOC
4668   if (!isFunctionGlobalAddress(Callee) &&
4669       !isa<ExternalSymbolSDNode>(Callee))
4670     return false;
4671
4672   // If the caller and callee potentially have different TOC bases then we
4673   // cannot tail call since we need to restore the TOC pointer after the call.
4674   // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4675   if (!callsShareTOCBase(&Caller, Callee, getTargetMachine()))
4676     return false;
4677
4678   // TCO allows altering callee ABI, so we don't have to check further.
4679   if (CalleeCC == CallingConv::Fast && TailCallOpt)
4680     return true;
4681
4682   if (DisableSCO) return false;
4683
4684   // If callee use the same argument list that caller is using, then we can
4685   // apply SCO on this case. If it is not, then we need to check if callee needs
4686   // stack for passing arguments.
4687   if (!hasSameArgumentList(&Caller, CS) &&
4688       needStackSlotPassParameters(Subtarget, Outs)) {
4689     return false;
4690   }
4691
4692   return true;
4693 }
4694
4695 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4696 /// for tail call optimization. Targets which want to do tail call
4697 /// optimization should implement this function.
4698 bool
4699 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4700                                                      CallingConv::ID CalleeCC,
4701                                                      bool isVarArg,
4702                                       const SmallVectorImpl<ISD::InputArg> &Ins,
4703                                                      SelectionDAG& DAG) const {
4704   if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4705     return false;
4706
4707   // Variable argument functions are not supported.
4708   if (isVarArg)
4709     return false;
4710
4711   MachineFunction &MF = DAG.getMachineFunction();
4712   CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4713   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4714     // Functions containing by val parameters are not supported.
4715     for (unsigned i = 0; i != Ins.size(); i++) {
4716        ISD::ArgFlagsTy Flags = Ins[i].Flags;
4717        if (Flags.isByVal()) return false;
4718     }
4719
4720     // Non-PIC/GOT tail calls are supported.
4721     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4722       return true;
4723
4724     // At the moment we can only do local tail calls (in same module, hidden
4725     // or protected) if we are generating PIC.
4726     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4727       return G->getGlobal()->hasHiddenVisibility()
4728           || G->getGlobal()->hasProtectedVisibility();
4729   }
4730
4731   return false;
4732 }
4733
4734 /// isCallCompatibleAddress - Return the immediate to use if the specified
4735 /// 32-bit value is representable in the immediate field of a BxA instruction.
4736 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4737   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4738   if (!C) return nullptr;
4739
4740   int Addr = C->getZExtValue();
4741   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
4742       SignExtend32<26>(Addr) != Addr)
4743     return nullptr;  // Top 6 bits have to be sext of immediate.
4744
4745   return DAG
4746       .getConstant(
4747           (int)C->getZExtValue() >> 2, SDLoc(Op),
4748           DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
4749       .getNode();
4750 }
4751
4752 namespace {
4753
4754 struct TailCallArgumentInfo {
4755   SDValue Arg;
4756   SDValue FrameIdxOp;
4757   int FrameIdx = 0;
4758
4759   TailCallArgumentInfo() = default;
4760 };
4761
4762 } // end anonymous namespace
4763
4764 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4765 static void StoreTailCallArgumentsToStackSlot(
4766     SelectionDAG &DAG, SDValue Chain,
4767     const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4768     SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4769   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4770     SDValue Arg = TailCallArgs[i].Arg;
4771     SDValue FIN = TailCallArgs[i].FrameIdxOp;
4772     int FI = TailCallArgs[i].FrameIdx;
4773     // Store relative to framepointer.
4774     MemOpChains.push_back(DAG.getStore(
4775         Chain, dl, Arg, FIN,
4776         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4777   }
4778 }
4779
4780 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4781 /// the appropriate stack slot for the tail call optimized function call.
4782 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
4783                                              SDValue OldRetAddr, SDValue OldFP,
4784                                              int SPDiff, const SDLoc &dl) {
4785   if (SPDiff) {
4786     // Calculate the new stack slot for the return address.
4787     MachineFunction &MF = DAG.getMachineFunction();
4788     const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4789     const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4790     bool isPPC64 = Subtarget.isPPC64();
4791     int SlotSize = isPPC64 ? 8 : 4;
4792     int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4793     int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4794                                                          NewRetAddrLoc, true);
4795     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4796     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4797     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4798                          MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4799
4800     // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
4801     // slot as the FP is never overwritten.
4802     if (Subtarget.isDarwinABI()) {
4803       int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
4804       int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc,
4805                                                          true);
4806       SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
4807       Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
4808                            MachinePointerInfo::getFixedStack(
4809                                DAG.getMachineFunction(), NewFPIdx));
4810     }
4811   }
4812   return Chain;
4813 }
4814
4815 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4816 /// the position of the argument.
4817 static void
4818 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
4819                          SDValue Arg, int SPDiff, unsigned ArgOffset,
4820                      SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
4821   int Offset = ArgOffset + SPDiff;
4822   uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
4823   int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4824   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4825   SDValue FIN = DAG.getFrameIndex(FI, VT);
4826   TailCallArgumentInfo Info;
4827   Info.Arg = Arg;
4828   Info.FrameIdxOp = FIN;
4829   Info.FrameIdx = FI;
4830   TailCallArguments.push_back(Info);
4831 }
4832
4833 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4834 /// stack slot. Returns the chain as result and the loaded frame pointers in
4835 /// LROpOut/FPOpout. Used when tail calling.
4836 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4837     SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
4838     SDValue &FPOpOut, const SDLoc &dl) const {
4839   if (SPDiff) {
4840     // Load the LR and FP stack slot for later adjusting.
4841     EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
4842     LROpOut = getReturnAddrFrameIndex(DAG);
4843     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
4844     Chain = SDValue(LROpOut.getNode(), 1);
4845
4846     // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
4847     // slot as the FP is never overwritten.
4848     if (Subtarget.isDarwinABI()) {
4849       FPOpOut = getFramePointerFrameIndex(DAG);
4850       FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo());
4851       Chain = SDValue(FPOpOut.getNode(), 1);
4852     }
4853   }
4854   return Chain;
4855 }
4856
4857 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4858 /// by "Src" to address "Dst" of size "Size".  Alignment information is
4859 /// specified by the specific parameter attribute. The copy will be passed as
4860 /// a byval function parameter.
4861 /// Sometimes what we are copying is the end of a larger object, the part that
4862 /// does not fit in registers.
4863 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
4864                                          SDValue Chain, ISD::ArgFlagsTy Flags,
4865                                          SelectionDAG &DAG, const SDLoc &dl) {
4866   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
4867   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
4868                        false, false, false, MachinePointerInfo(),
4869                        MachinePointerInfo());
4870 }
4871
4872 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4873 /// tail calls.
4874 static void LowerMemOpCallTo(
4875     SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
4876     SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
4877     bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
4878     SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
4879   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4880   if (!isTailCall) {
4881     if (isVector) {
4882       SDValue StackPtr;
4883       if (isPPC64)
4884         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4885       else
4886         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4887       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4888                            DAG.getConstant(ArgOffset, dl, PtrVT));
4889     }
4890     MemOpChains.push_back(
4891         DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
4892     // Calculate and remember argument location.
4893   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
4894                                   TailCallArguments);
4895 }
4896
4897 static void
4898 PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
4899                 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
4900                 SDValue FPOp,
4901                 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
4902   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4903   // might overwrite each other in case of tail call optimization.
4904   SmallVector<SDValue, 8> MemOpChains2;
4905   // Do not flag preceding copytoreg stuff together with the following stuff.
4906   InFlag = SDValue();
4907   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
4908                                     MemOpChains2, dl);
4909   if (!MemOpChains2.empty())
4910     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4911
4912   // Store the return address to the appropriate stack slot.
4913   Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
4914
4915   // Emit callseq_end just before tailcall node.
4916   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4917                              DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4918   InFlag = Chain.getValue(1);
4919 }
4920
4921 // Is this global address that of a function that can be called by name? (as
4922 // opposed to something that must hold a descriptor for an indirect call).
4923 static bool isFunctionGlobalAddress(SDValue Callee) {
4924   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4925     if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
4926         Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
4927       return false;
4928
4929     return G->getGlobal()->getValueType()->isFunctionTy();
4930   }
4931
4932   return false;
4933 }
4934
4935 static unsigned
4936 PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
4937             SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall,
4938             bool isPatchPoint, bool hasNest,
4939             SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
4940             SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
4941             ImmutableCallSite CS, const PPCSubtarget &Subtarget) {
4942   bool isPPC64 = Subtarget.isPPC64();
4943   bool isSVR4ABI = Subtarget.isSVR4ABI();
4944   bool is64BitELFv1ABI = isPPC64 && isSVR4ABI && !Subtarget.isELFv2ABI();
4945   bool isAIXABI = Subtarget.isAIXABI();
4946
4947   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4948   NodeTys.push_back(MVT::Other);   // Returns a chain
4949   NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
4950
4951   unsigned CallOpc = PPCISD::CALL;
4952
4953   bool needIndirectCall = true;
4954   if (!isSVR4ABI || !isPPC64)
4955     if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
4956       // If this is an absolute destination address, use the munged value.
4957       Callee = SDValue(Dest, 0);
4958       needIndirectCall = false;
4959     }
4960
4961   // PC-relative references to external symbols should go through $stub, unless
4962   // we're building with the leopard linker or later, which automatically
4963   // synthesizes these stubs.
4964   const TargetMachine &TM = DAG.getTarget();
4965   const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
4966   const GlobalValue *GV = nullptr;
4967   if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
4968     GV = G->getGlobal();
4969   bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
4970   bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64;
4971
4972   // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
4973   // every direct call is) turn it into a TargetGlobalAddress /
4974   // TargetExternalSymbol node so that legalize doesn't hack it.
4975   if (isFunctionGlobalAddress(Callee)) {
4976     GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
4977
4978     // A call to a TLS address is actually an indirect call to a
4979     // thread-specific pointer.
4980     unsigned OpFlags = 0;
4981     if (UsePlt)
4982       OpFlags = PPCII::MO_PLT;
4983
4984     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
4985                                         Callee.getValueType(), 0, OpFlags);
4986     needIndirectCall = false;
4987   }
4988
4989   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4990     unsigned char OpFlags = 0;
4991
4992     if (UsePlt)
4993       OpFlags = PPCII::MO_PLT;
4994
4995     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
4996                                          OpFlags);
4997     needIndirectCall = false;
4998   }
4999
5000   if (isPatchPoint) {
5001     // We'll form an invalid direct call when lowering a patchpoint; the full
5002     // sequence for an indirect call is complicated, and many of the
5003     // instructions introduced might have side effects (and, thus, can't be
5004     // removed later). The call itself will be removed as soon as the
5005     // argument/return lowering is complete, so the fact that it has the wrong
5006     // kind of operands should not really matter.
5007     needIndirectCall = false;
5008   }
5009
5010   if (needIndirectCall) {
5011     // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
5012     // to do the call, we can't use PPCISD::CALL.
5013     SDValue MTCTROps[] = {Chain, Callee, InFlag};
5014
5015     if (is64BitELFv1ABI) {
5016       // Function pointers in the 64-bit SVR4 ABI do not point to the function
5017       // entry point, but to the function descriptor (the function entry point
5018       // address is part of the function descriptor though).
5019       // The function descriptor is a three doubleword structure with the
5020       // following fields: function entry point, TOC base address and
5021       // environment pointer.
5022       // Thus for a call through a function pointer, the following actions need
5023       // to be performed:
5024       //   1. Save the TOC of the caller in the TOC save area of its stack
5025       //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5026       //   2. Load the address of the function entry point from the function
5027       //      descriptor.
5028       //   3. Load the TOC of the callee from the function descriptor into r2.
5029       //   4. Load the environment pointer from the function descriptor into
5030       //      r11.
5031       //   5. Branch to the function entry point address.
5032       //   6. On return of the callee, the TOC of the caller needs to be
5033       //      restored (this is done in FinishCall()).
5034       //
5035       // The loads are scheduled at the beginning of the call sequence, and the
5036       // register copies are flagged together to ensure that no other
5037       // operations can be scheduled in between. E.g. without flagging the
5038       // copies together, a TOC access in the caller could be scheduled between
5039       // the assignment of the callee TOC and the branch to the callee, which
5040       // results in the TOC access going through the TOC of the callee instead
5041       // of going through the TOC of the caller, which leads to incorrect code.
5042
5043       // Load the address of the function entry point from the function
5044       // descriptor.
5045       SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
5046       if (LDChain.getValueType() == MVT::Glue)
5047         LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
5048
5049       auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5050                           ? (MachineMemOperand::MODereferenceable |
5051                              MachineMemOperand::MOInvariant)
5052                           : MachineMemOperand::MONone;
5053
5054       MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr);
5055       SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
5056                                         /* Alignment = */ 8, MMOFlags);
5057
5058       // Load environment pointer into r11.
5059       SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
5060       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
5061       SDValue LoadEnvPtr =
5062           DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16),
5063                       /* Alignment = */ 8, MMOFlags);
5064
5065       SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
5066       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
5067       SDValue TOCPtr =
5068           DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8),
5069                       /* Alignment = */ 8, MMOFlags);
5070
5071       setUsesTOCBasePtr(DAG);
5072       SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
5073                                         InFlag);
5074       Chain = TOCVal.getValue(0);
5075       InFlag = TOCVal.getValue(1);
5076
5077       // If the function call has an explicit 'nest' parameter, it takes the
5078       // place of the environment pointer.
5079       if (!hasNest) {
5080         SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
5081                                           InFlag);
5082
5083         Chain = EnvVal.getValue(0);
5084         InFlag = EnvVal.getValue(1);
5085       }
5086
5087       MTCTROps[0] = Chain;
5088       MTCTROps[1] = LoadFuncPtr;
5089       MTCTROps[2] = InFlag;
5090     }
5091
5092     Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
5093                         makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
5094     InFlag = Chain.getValue(1);
5095
5096     NodeTys.clear();
5097     NodeTys.push_back(MVT::Other);
5098     NodeTys.push_back(MVT::Glue);
5099     Ops.push_back(Chain);
5100     CallOpc = PPCISD::BCTRL;
5101     Callee.setNode(nullptr);
5102     // Add use of X11 (holding environment pointer)
5103     if (is64BitELFv1ABI && !hasNest)
5104       Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
5105     // Add CTR register as callee so a bctr can be emitted later.
5106     if (isTailCall)
5107       Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
5108   }
5109
5110   // If this is a direct call, pass the chain and the callee.
5111   if (Callee.getNode()) {
5112     Ops.push_back(Chain);
5113     Ops.push_back(Callee);
5114   }
5115   // If this is a tail call add stack pointer delta.
5116   if (isTailCall)
5117     Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5118
5119   // Add argument registers to the end of the list so that they are known live
5120   // into the call.
5121   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5122     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5123                                   RegsToPass[i].second.getValueType()));
5124
5125   // All calls, in the AIX ABI and 64-bit ELF ABIs, need the TOC register
5126   // live into the call.
5127   // We do need to reserve R2/X2 to appease the verifier for the PATCHPOINT.
5128   if ((isSVR4ABI && isPPC64) || isAIXABI) {
5129     setUsesTOCBasePtr(DAG);
5130
5131     // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5132     // no way to mark dependencies as implicit here.
5133     // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5134     if (!isPatchPoint)
5135       Ops.push_back(DAG.getRegister(isPPC64 ? PPC::X2
5136                                             : PPC::R2, PtrVT));
5137   }
5138
5139   return CallOpc;
5140 }
5141
5142 SDValue PPCTargetLowering::LowerCallResult(
5143     SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5144     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5145     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5146   SmallVector<CCValAssign, 16> RVLocs;
5147   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5148                     *DAG.getContext());
5149
5150   CCRetInfo.AnalyzeCallResult(
5151       Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5152                ? RetCC_PPC_Cold
5153                : RetCC_PPC);
5154
5155   // Copy all of the result registers out of their specified physreg.
5156   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5157     CCValAssign &VA = RVLocs[i];
5158     assert(VA.isRegLoc() && "Can only return in registers!");
5159
5160     SDValue Val;
5161
5162     if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5163       SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5164                                       InFlag);
5165       Chain = Lo.getValue(1);
5166       InFlag = Lo.getValue(2);
5167       VA = RVLocs[++i]; // skip ahead to next loc
5168       SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5169                                       InFlag);
5170       Chain = Hi.getValue(1);
5171       InFlag = Hi.getValue(2);
5172       if (!Subtarget.isLittleEndian())
5173         std::swap (Lo, Hi);
5174       Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5175     } else {
5176       Val = DAG.getCopyFromReg(Chain, dl,
5177                                VA.getLocReg(), VA.getLocVT(), InFlag);
5178       Chain = Val.getValue(1);
5179       InFlag = Val.getValue(2);
5180     }
5181
5182     switch (VA.getLocInfo()) {
5183     default: llvm_unreachable("Unknown loc info!");
5184     case CCValAssign::Full: break;
5185     case CCValAssign::AExt:
5186       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5187       break;
5188     case CCValAssign::ZExt:
5189       Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5190                         DAG.getValueType(VA.getValVT()));
5191       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5192       break;
5193     case CCValAssign::SExt:
5194       Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5195                         DAG.getValueType(VA.getValVT()));
5196       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5197       break;
5198     }
5199
5200     InVals.push_back(Val);
5201   }
5202
5203   return Chain;
5204 }
5205
5206 SDValue PPCTargetLowering::FinishCall(
5207     CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
5208     bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
5209     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag,
5210     SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5211     unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5212     SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const {
5213   std::vector<EVT> NodeTys;
5214   SmallVector<SDValue, 8> Ops;
5215   unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
5216                                  SPDiff, isTailCall, isPatchPoint, hasNest,
5217                                  RegsToPass, Ops, NodeTys, CS, Subtarget);
5218
5219   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5220   if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
5221     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5222
5223   // When performing tail call optimization the callee pops its arguments off
5224   // the stack. Account for this here so these bytes can be pushed back on in
5225   // PPCFrameLowering::eliminateCallFramePseudoInstr.
5226   int BytesCalleePops =
5227     (CallConv == CallingConv::Fast &&
5228      getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
5229
5230   // Add a register mask operand representing the call-preserved registers.
5231   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5232   const uint32_t *Mask =
5233       TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
5234   assert(Mask && "Missing call preserved mask for calling convention");
5235   Ops.push_back(DAG.getRegisterMask(Mask));
5236
5237   if (InFlag.getNode())
5238     Ops.push_back(InFlag);
5239
5240   // Emit tail call.
5241   if (isTailCall) {
5242     assert(((Callee.getOpcode() == ISD::Register &&
5243              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5244             Callee.getOpcode() == ISD::TargetExternalSymbol ||
5245             Callee.getOpcode() == ISD::TargetGlobalAddress ||
5246             isa<ConstantSDNode>(Callee)) &&
5247     "Expecting an global address, external symbol, absolute value or register");
5248
5249     DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5250     return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
5251   }
5252
5253   // Add a NOP immediately after the branch instruction when using the 64-bit
5254   // SVR4 or the AIX ABI.
5255   // At link time, if caller and callee are in a different module and
5256   // thus have a different TOC, the call will be replaced with a call to a stub
5257   // function which saves the current TOC, loads the TOC of the callee and
5258   // branches to the callee. The NOP will be replaced with a load instruction
5259   // which restores the TOC of the caller from the TOC save slot of the current
5260   // stack frame. If caller and callee belong to the same module (and have the
5261   // same TOC), the NOP will remain unchanged, or become some other NOP.
5262
5263   MachineFunction &MF = DAG.getMachineFunction();
5264   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5265   if (!isTailCall && !isPatchPoint &&
5266       ((Subtarget.isSVR4ABI() && Subtarget.isPPC64()) ||
5267        Subtarget.isAIXABI())) {
5268     if (CallOpc == PPCISD::BCTRL) {
5269       if (Subtarget.isAIXABI())
5270         report_fatal_error("Indirect call on AIX is not implemented.");
5271
5272       // This is a call through a function pointer.
5273       // Restore the caller TOC from the save area into R2.
5274       // See PrepareCall() for more information about calls through function
5275       // pointers in the 64-bit SVR4 ABI.
5276       // We are using a target-specific load with r2 hard coded, because the
5277       // result of a target-independent load would never go directly into r2,
5278       // since r2 is a reserved register (which prevents the register allocator
5279       // from allocating it), resulting in an additional register being
5280       // allocated and an unnecessary move instruction being generated.
5281       CallOpc = PPCISD::BCTRL_LOAD_TOC;
5282
5283       SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
5284       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5285       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5286       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
5287
5288       // The address needs to go after the chain input but before the flag (or
5289       // any other variadic arguments).
5290       Ops.insert(std::next(Ops.begin()), AddTOC);
5291     } else if (CallOpc == PPCISD::CALL &&
5292       !callsShareTOCBase(&MF.getFunction(), Callee, DAG.getTarget())) {
5293       // Otherwise insert NOP for non-local calls.
5294       CallOpc = PPCISD::CALL_NOP;
5295     }
5296   }
5297
5298   if (Subtarget.isAIXABI() && isFunctionGlobalAddress(Callee)) {
5299     // On AIX, direct function calls reference the symbol for the function's
5300     // entry point, which is named by inserting a "." before the function's
5301     // C-linkage name.
5302     GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
5303     auto &Context = DAG.getMachineFunction().getMMI().getContext();
5304     MCSymbol *S = Context.getOrCreateSymbol(Twine(".") +
5305                                             Twine(G->getGlobal()->getName()));
5306     Callee = DAG.getMCSymbol(S, PtrVT);
5307     // Replace the GlobalAddressSDNode Callee with the MCSymbolSDNode.
5308     Ops[1] = Callee;
5309   }
5310
5311   Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
5312   InFlag = Chain.getValue(1);
5313
5314   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5315                              DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5316                              InFlag, dl);
5317   if (!Ins.empty())
5318     InFlag = Chain.getValue(1);
5319
5320   return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
5321                          Ins, dl, DAG, InVals);
5322 }
5323
5324 SDValue
5325 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5326                              SmallVectorImpl<SDValue> &InVals) const {
5327   SelectionDAG &DAG                     = CLI.DAG;
5328   SDLoc &dl                             = CLI.DL;
5329   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5330   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
5331   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
5332   SDValue Chain                         = CLI.Chain;
5333   SDValue Callee                        = CLI.Callee;
5334   bool &isTailCall                      = CLI.IsTailCall;
5335   CallingConv::ID CallConv              = CLI.CallConv;
5336   bool isVarArg                         = CLI.IsVarArg;
5337   bool isPatchPoint                     = CLI.IsPatchPoint;
5338   ImmutableCallSite CS                  = CLI.CS;
5339
5340   if (isTailCall) {
5341     if (Subtarget.useLongCalls() && !(CS && CS.isMustTailCall()))
5342       isTailCall = false;
5343     else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5344       isTailCall =
5345         IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS,
5346                                                  isVarArg, Outs, Ins, DAG);
5347     else
5348       isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5349                                                      Ins, DAG);
5350     if (isTailCall) {
5351       ++NumTailCalls;
5352       if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5353         ++NumSiblingCalls;
5354
5355       assert(isa<GlobalAddressSDNode>(Callee) &&
5356              "Callee should be an llvm::Function object.");
5357       LLVM_DEBUG(
5358           const GlobalValue *GV =
5359               cast<GlobalAddressSDNode>(Callee)->getGlobal();
5360           const unsigned Width =
5361               80 - strlen("TCO caller: ") - strlen(", callee linkage: 0, 0");
5362           dbgs() << "TCO caller: "
5363                  << left_justify(DAG.getMachineFunction().getName(), Width)
5364                  << ", callee linkage: " << GV->getVisibility() << ", "
5365                  << GV->getLinkage() << "\n");
5366     }
5367   }
5368
5369   if (!isTailCall && CS && CS.isMustTailCall())
5370     report_fatal_error("failed to perform tail call elimination on a call "
5371                        "site marked musttail");
5372
5373   // When long calls (i.e. indirect calls) are always used, calls are always
5374   // made via function pointer. If we have a function name, first translate it
5375   // into a pointer.
5376   if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5377       !isTailCall)
5378     Callee = LowerGlobalAddress(Callee, DAG);
5379
5380   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5381     return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
5382                             isTailCall, isPatchPoint, Outs, OutVals, Ins,
5383                             dl, DAG, InVals, CS);
5384
5385   if (Subtarget.isSVR4ABI())
5386     return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
5387                             isTailCall, isPatchPoint, Outs, OutVals, Ins,
5388                             dl, DAG, InVals, CS);
5389
5390   if (Subtarget.isAIXABI())
5391     return LowerCall_AIX(Chain, Callee, CallConv, isVarArg,
5392                          isTailCall, isPatchPoint, Outs, OutVals, Ins,
5393                          dl, DAG, InVals, CS);
5394
5395   return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
5396                           isTailCall, isPatchPoint, Outs, OutVals, Ins,
5397                           dl, DAG, InVals, CS);
5398 }
5399
5400 SDValue PPCTargetLowering::LowerCall_32SVR4(
5401     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5402     bool isTailCall, bool isPatchPoint,
5403     const SmallVectorImpl<ISD::OutputArg> &Outs,
5404     const SmallVectorImpl<SDValue> &OutVals,
5405     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5406     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5407     ImmutableCallSite CS) const {
5408   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5409   // of the 32-bit SVR4 ABI stack frame layout.
5410
5411   assert((CallConv == CallingConv::C ||
5412           CallConv == CallingConv::Cold ||
5413           CallConv == CallingConv::Fast) && "Unknown calling convention!");
5414
5415   unsigned PtrByteSize = 4;
5416
5417   MachineFunction &MF = DAG.getMachineFunction();
5418
5419   // Mark this function as potentially containing a function that contains a
5420   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5421   // and restoring the callers stack pointer in this functions epilog. This is
5422   // done because by tail calling the called function might overwrite the value
5423   // in this function's (MF) stack pointer stack slot 0(SP).
5424   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5425       CallConv == CallingConv::Fast)
5426     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5427
5428   // Count how many bytes are to be pushed on the stack, including the linkage
5429   // area, parameter list area and the part of the local variable space which
5430   // contains copies of aggregates which are passed by value.
5431
5432   // Assign locations to all of the outgoing arguments.
5433   SmallVector<CCValAssign, 16> ArgLocs;
5434   PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
5435
5436   // Reserve space for the linkage area on the stack.
5437   CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5438                        PtrByteSize);
5439   if (useSoftFloat())
5440     CCInfo.PreAnalyzeCallOperands(Outs);
5441
5442   if (isVarArg) {
5443     // Handle fixed and variable vector arguments differently.
5444     // Fixed vector arguments go into registers as long as registers are
5445     // available. Variable vector arguments always go into memory.
5446     unsigned NumArgs = Outs.size();
5447
5448     for (unsigned i = 0; i != NumArgs; ++i) {
5449       MVT ArgVT = Outs[i].VT;
5450       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5451       bool Result;
5452
5453       if (Outs[i].IsFixed) {
5454         Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5455                                CCInfo);
5456       } else {
5457         Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5458                                       ArgFlags, CCInfo);
5459       }
5460
5461       if (Result) {
5462 #ifndef NDEBUG
5463         errs() << "Call operand #" << i << " has unhandled type "
5464              << EVT(ArgVT).getEVTString() << "\n";
5465 #endif
5466         llvm_unreachable(nullptr);
5467       }
5468     }
5469   } else {
5470     // All arguments are treated the same.
5471     CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5472   }
5473   CCInfo.clearWasPPCF128();
5474
5475   // Assign locations to all of the outgoing aggregate by value arguments.
5476   SmallVector<CCValAssign, 16> ByValArgLocs;
5477   CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext());
5478
5479   // Reserve stack space for the allocations in CCInfo.
5480   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
5481
5482   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5483
5484   // Size of the linkage area, parameter list area and the part of the local
5485   // space variable where copies of aggregates which are passed by value are
5486   // stored.
5487   unsigned NumBytes = CCByValInfo.getNextStackOffset();
5488
5489   // Calculate by how many bytes the stack has to be adjusted in case of tail
5490   // call optimization.
5491   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5492
5493   // Adjust the stack pointer for the new arguments...
5494   // These operations are automatically eliminated by the prolog/epilog pass
5495   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5496   SDValue CallSeqStart = Chain;
5497
5498   // Load the return address and frame pointer so it can be moved somewhere else
5499   // later.
5500   SDValue LROp, FPOp;
5501   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5502
5503   // Set up a copy of the stack pointer for use loading and storing any
5504   // arguments that may not fit in the registers available for argument
5505   // passing.
5506   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5507
5508   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5509   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5510   SmallVector<SDValue, 8> MemOpChains;
5511
5512   bool seenFloatArg = false;
5513   // Walk the register/memloc assignments, inserting copies/loads.
5514   // i - Tracks the index into the list of registers allocated for the call
5515   // RealArgIdx - Tracks the index into the list of actual function arguments
5516   // j - Tracks the index into the list of byval arguments
5517   for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5518        i != e;
5519        ++i, ++RealArgIdx) {
5520     CCValAssign &VA = ArgLocs[i];
5521     SDValue Arg = OutVals[RealArgIdx];
5522     ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5523
5524     if (Flags.isByVal()) {
5525       // Argument is an aggregate which is passed by value, thus we need to
5526       // create a copy of it in the local variable space of the current stack
5527       // frame (which is the stack frame of the caller) and pass the address of
5528       // this copy to the callee.
5529       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5530       CCValAssign &ByValVA = ByValArgLocs[j++];
5531       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5532
5533       // Memory reserved in the local variable space of the callers stack frame.
5534       unsigned LocMemOffset = ByValVA.getLocMemOffset();
5535
5536       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5537       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5538                            StackPtr, PtrOff);
5539
5540       // Create a copy of the argument in the local area of the current
5541       // stack frame.
5542       SDValue MemcpyCall =
5543         CreateCopyOfByValArgument(Arg, PtrOff,
5544                                   CallSeqStart.getNode()->getOperand(0),
5545                                   Flags, DAG, dl);
5546
5547       // This must go outside the CALLSEQ_START..END.
5548       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5549                                                      SDLoc(MemcpyCall));
5550       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5551                              NewCallSeqStart.getNode());
5552       Chain = CallSeqStart = NewCallSeqStart;
5553
5554       // Pass the address of the aggregate copy on the stack either in a
5555       // physical register or in the parameter list area of the current stack
5556       // frame to the callee.
5557       Arg = PtrOff;
5558     }
5559
5560     // When useCRBits() is true, there can be i1 arguments.
5561     // It is because getRegisterType(MVT::i1) => MVT::i1,
5562     // and for other integer types getRegisterType() => MVT::i32.
5563     // Extend i1 and ensure callee will get i32.
5564     if (Arg.getValueType() == MVT::i1)
5565       Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5566                         dl, MVT::i32, Arg);
5567
5568     if (VA.isRegLoc()) {
5569       seenFloatArg |= VA.getLocVT().isFloatingPoint();
5570       // Put argument in a physical register.
5571       if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5572         bool IsLE = Subtarget.isLittleEndian();
5573         SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5574                         DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5575         RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5576         SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5577                            DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5578         RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5579                              SVal.getValue(0)));
5580       } else
5581         RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5582     } else {
5583       // Put argument in the parameter list area of the current stack frame.
5584       assert(VA.isMemLoc());
5585       unsigned LocMemOffset = VA.getLocMemOffset();
5586
5587       if (!isTailCall) {
5588         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5589         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5590                              StackPtr, PtrOff);
5591
5592         MemOpChains.push_back(
5593             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5594       } else {
5595         // Calculate and remember argument location.
5596         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5597                                  TailCallArguments);
5598       }
5599     }
5600   }
5601
5602   if (!MemOpChains.empty())
5603     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5604
5605   // Build a sequence of copy-to-reg nodes chained together with token chain
5606   // and flag operands which copy the outgoing args into the appropriate regs.
5607   SDValue InFlag;
5608   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5609     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5610                              RegsToPass[i].second, InFlag);
5611     InFlag = Chain.getValue(1);
5612   }
5613
5614   // Set CR bit 6 to true if this is a vararg call with floating args passed in
5615   // registers.
5616   if (isVarArg) {
5617     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5618     SDValue Ops[] = { Chain, InFlag };
5619
5620     Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5621                         dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5622
5623     InFlag = Chain.getValue(1);
5624   }
5625
5626   if (isTailCall)
5627     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5628                     TailCallArguments);
5629
5630   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
5631                     /* unused except on PPC64 ELFv1 */ false, DAG,
5632                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
5633                     NumBytes, Ins, InVals, CS);
5634 }
5635
5636 // Copy an argument into memory, being careful to do this outside the
5637 // call sequence for the call to which the argument belongs.
5638 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5639     SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5640     SelectionDAG &DAG, const SDLoc &dl) const {
5641   SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5642                         CallSeqStart.getNode()->getOperand(0),
5643                         Flags, DAG, dl);
5644   // The MEMCPY must go outside the CALLSEQ_START..END.
5645   int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5646   SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5647                                                  SDLoc(MemcpyCall));
5648   DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5649                          NewCallSeqStart.getNode());
5650   return NewCallSeqStart;
5651 }
5652
5653 SDValue PPCTargetLowering::LowerCall_64SVR4(
5654     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5655     bool isTailCall, bool isPatchPoint,
5656     const SmallVectorImpl<ISD::OutputArg> &Outs,
5657     const SmallVectorImpl<SDValue> &OutVals,
5658     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5659     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5660     ImmutableCallSite CS) const {
5661   bool isELFv2ABI = Subtarget.isELFv2ABI();
5662   bool isLittleEndian = Subtarget.isLittleEndian();
5663   unsigned NumOps = Outs.size();
5664   bool hasNest = false;
5665   bool IsSibCall = false;
5666
5667   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5668   unsigned PtrByteSize = 8;
5669
5670   MachineFunction &MF = DAG.getMachineFunction();
5671
5672   if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5673     IsSibCall = true;
5674
5675   // Mark this function as potentially containing a function that contains a
5676   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5677   // and restoring the callers stack pointer in this functions epilog. This is
5678   // done because by tail calling the called function might overwrite the value
5679   // in this function's (MF) stack pointer stack slot 0(SP).
5680   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5681       CallConv == CallingConv::Fast)
5682     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5683
5684   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
5685          "fastcc not supported on varargs functions");
5686
5687   // Count how many bytes are to be pushed on the stack, including the linkage
5688   // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
5689   // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5690   // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5691   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5692   unsigned NumBytes = LinkageSize;
5693   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5694   unsigned &QFPR_idx = FPR_idx;
5695
5696   static const MCPhysReg GPR[] = {
5697     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5698     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5699   };
5700   static const MCPhysReg VR[] = {
5701     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5702     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5703   };
5704
5705   const unsigned NumGPRs = array_lengthof(GPR);
5706   const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
5707   const unsigned NumVRs  = array_lengthof(VR);
5708   const unsigned NumQFPRs = NumFPRs;
5709
5710   // On ELFv2, we can avoid allocating the parameter area if all the arguments
5711   // can be passed to the callee in registers.
5712   // For the fast calling convention, there is another check below.
5713   // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5714   bool HasParameterArea = !isELFv2ABI || isVarArg || CallConv == CallingConv::Fast;
5715   if (!HasParameterArea) {
5716     unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5717     unsigned AvailableFPRs = NumFPRs;
5718     unsigned AvailableVRs = NumVRs;
5719     unsigned NumBytesTmp = NumBytes;
5720     for (unsigned i = 0; i != NumOps; ++i) {
5721       if (Outs[i].Flags.isNest()) continue;
5722       if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
5723                                 PtrByteSize, LinkageSize, ParamAreaSize,
5724                                 NumBytesTmp, AvailableFPRs, AvailableVRs,
5725                                 Subtarget.hasQPX()))
5726         HasParameterArea = true;
5727     }
5728   }
5729
5730   // When using the fast calling convention, we don't provide backing for
5731   // arguments that will be in registers.
5732   unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
5733
5734   // Avoid allocating parameter area for fastcc functions if all the arguments
5735   // can be passed in the registers.
5736   if (CallConv == CallingConv::Fast)
5737     HasParameterArea = false;
5738
5739   // Add up all the space actually used.
5740   for (unsigned i = 0; i != NumOps; ++i) {
5741     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5742     EVT ArgVT = Outs[i].VT;
5743     EVT OrigVT = Outs[i].ArgVT;
5744
5745     if (Flags.isNest())
5746       continue;
5747
5748     if (CallConv == CallingConv::Fast) {
5749       if (Flags.isByVal()) {
5750         NumGPRsUsed += (Flags.getByValSize()+7)/8;
5751         if (NumGPRsUsed > NumGPRs)
5752           HasParameterArea = true;
5753       } else {
5754         switch (ArgVT.getSimpleVT().SimpleTy) {
5755         default: llvm_unreachable("Unexpected ValueType for argument!");
5756         case MVT::i1:
5757         case MVT::i32:
5758         case MVT::i64:
5759           if (++NumGPRsUsed <= NumGPRs)
5760             continue;
5761           break;
5762         case MVT::v4i32:
5763         case MVT::v8i16:
5764         case MVT::v16i8:
5765         case MVT::v2f64:
5766         case MVT::v2i64:
5767         case MVT::v1i128:
5768         case MVT::f128:
5769           if (++NumVRsUsed <= NumVRs)
5770             continue;
5771           break;
5772         case MVT::v4f32:
5773           // When using QPX, this is handled like a FP register, otherwise, it
5774           // is an Altivec register.
5775           if (Subtarget.hasQPX()) {
5776             if (++NumFPRsUsed <= NumFPRs)
5777               continue;
5778           } else {
5779             if (++NumVRsUsed <= NumVRs)
5780               continue;
5781           }
5782           break;
5783         case MVT::f32:
5784         case MVT::f64:
5785         case MVT::v4f64: // QPX
5786         case MVT::v4i1:  // QPX
5787           if (++NumFPRsUsed <= NumFPRs)
5788             continue;
5789           break;
5790         }
5791         HasParameterArea = true;
5792       }
5793     }
5794
5795     /* Respect alignment of argument on the stack.  */
5796     unsigned Align =
5797       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5798     NumBytes = ((NumBytes + Align - 1) / Align) * Align;
5799
5800     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5801     if (Flags.isInConsecutiveRegsLast())
5802       NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5803   }
5804
5805   unsigned NumBytesActuallyUsed = NumBytes;
5806
5807   // In the old ELFv1 ABI,
5808   // the prolog code of the callee may store up to 8 GPR argument registers to
5809   // the stack, allowing va_start to index over them in memory if its varargs.
5810   // Because we cannot tell if this is needed on the caller side, we have to
5811   // conservatively assume that it is needed.  As such, make sure we have at
5812   // least enough stack space for the caller to store the 8 GPRs.
5813   // In the ELFv2 ABI, we allocate the parameter area iff a callee
5814   // really requires memory operands, e.g. a vararg function.
5815   if (HasParameterArea)
5816     NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5817   else
5818     NumBytes = LinkageSize;
5819
5820   // Tail call needs the stack to be aligned.
5821   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5822       CallConv == CallingConv::Fast)
5823     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5824
5825   int SPDiff = 0;
5826
5827   // Calculate by how many bytes the stack has to be adjusted in case of tail
5828   // call optimization.
5829   if (!IsSibCall)
5830     SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5831
5832   // To protect arguments on the stack from being clobbered in a tail call,
5833   // force all the loads to happen before doing any other lowering.
5834   if (isTailCall)
5835     Chain = DAG.getStackArgumentTokenFactor(Chain);
5836
5837   // Adjust the stack pointer for the new arguments...
5838   // These operations are automatically eliminated by the prolog/epilog pass
5839   if (!IsSibCall)
5840     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5841   SDValue CallSeqStart = Chain;
5842
5843   // Load the return address and frame pointer so it can be move somewhere else
5844   // later.
5845   SDValue LROp, FPOp;
5846   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5847
5848   // Set up a copy of the stack pointer for use loading and storing any
5849   // arguments that may not fit in the registers available for argument
5850   // passing.
5851   SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5852
5853   // Figure out which arguments are going to go in registers, and which in
5854   // memory.  Also, if this is a vararg function, floating point operations
5855   // must be stored to our stack, and loaded into integer regs as well, if
5856   // any integer regs are available for argument passing.
5857   unsigned ArgOffset = LinkageSize;
5858
5859   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5860   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5861
5862   SmallVector<SDValue, 8> MemOpChains;
5863   for (unsigned i = 0; i != NumOps; ++i) {
5864     SDValue Arg = OutVals[i];
5865     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5866     EVT ArgVT = Outs[i].VT;
5867     EVT OrigVT = Outs[i].ArgVT;
5868
5869     // PtrOff will be used to store the current argument to the stack if a
5870     // register cannot be found for it.
5871     SDValue PtrOff;
5872
5873     // We re-align the argument offset for each argument, except when using the
5874     // fast calling convention, when we need to make sure we do that only when
5875     // we'll actually use a stack slot.
5876     auto ComputePtrOff = [&]() {
5877       /* Respect alignment of argument on the stack.  */
5878       unsigned Align =
5879         CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5880       ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
5881
5882       PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5883
5884       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5885     };
5886
5887     if (CallConv != CallingConv::Fast) {
5888       ComputePtrOff();
5889
5890       /* Compute GPR index associated with argument offset.  */
5891       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
5892       GPR_idx = std::min(GPR_idx, NumGPRs);
5893     }
5894
5895     // Promote integers to 64-bit values.
5896     if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
5897       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5898       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5899       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5900     }
5901
5902     // FIXME memcpy is used way more than necessary.  Correctness first.
5903     // Note: "by value" is code for passing a structure by value, not
5904     // basic types.
5905     if (Flags.isByVal()) {
5906       // Note: Size includes alignment padding, so
5907       //   struct x { short a; char b; }
5908       // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
5909       // These are the proper values we need for right-justifying the
5910       // aggregate in a parameter register.
5911       unsigned Size = Flags.getByValSize();
5912
5913       // An empty aggregate parameter takes up no storage and no
5914       // registers.
5915       if (Size == 0)
5916         continue;
5917
5918       if (CallConv == CallingConv::Fast)
5919         ComputePtrOff();
5920
5921       // All aggregates smaller than 8 bytes must be passed right-justified.
5922       if (Size==1 || Size==2 || Size==4) {
5923         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
5924         if (GPR_idx != NumGPRs) {
5925           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5926                                         MachinePointerInfo(), VT);
5927           MemOpChains.push_back(Load.getValue(1));
5928           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5929
5930           ArgOffset += PtrByteSize;
5931           continue;
5932         }
5933       }
5934
5935       if (GPR_idx == NumGPRs && Size < 8) {
5936         SDValue AddPtr = PtrOff;
5937         if (!isLittleEndian) {
5938           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
5939                                           PtrOff.getValueType());
5940           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5941         }
5942         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5943                                                           CallSeqStart,
5944                                                           Flags, DAG, dl);
5945         ArgOffset += PtrByteSize;
5946         continue;
5947       }
5948       // Copy entire object into memory.  There are cases where gcc-generated
5949       // code assumes it is there, even if it could be put entirely into
5950       // registers.  (This is not what the doc says.)
5951
5952       // FIXME: The above statement is likely due to a misunderstanding of the
5953       // documents.  All arguments must be copied into the parameter area BY
5954       // THE CALLEE in the event that the callee takes the address of any
5955       // formal argument.  That has not yet been implemented.  However, it is
5956       // reasonable to use the stack area as a staging area for the register
5957       // load.
5958
5959       // Skip this for small aggregates, as we will use the same slot for a
5960       // right-justified copy, below.
5961       if (Size >= 8)
5962         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
5963                                                           CallSeqStart,
5964                                                           Flags, DAG, dl);
5965
5966       // When a register is available, pass a small aggregate right-justified.
5967       if (Size < 8 && GPR_idx != NumGPRs) {
5968         // The easiest way to get this right-justified in a register
5969         // is to copy the structure into the rightmost portion of a
5970         // local variable slot, then load the whole slot into the
5971         // register.
5972         // FIXME: The memcpy seems to produce pretty awful code for
5973         // small aggregates, particularly for packed ones.
5974         // FIXME: It would be preferable to use the slot in the
5975         // parameter save area instead of a new local variable.
5976         SDValue AddPtr = PtrOff;
5977         if (!isLittleEndian) {
5978           SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
5979           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5980         }
5981         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5982                                                           CallSeqStart,
5983                                                           Flags, DAG, dl);
5984
5985         // Load the slot into the register.
5986         SDValue Load =
5987             DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
5988         MemOpChains.push_back(Load.getValue(1));
5989         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5990
5991         // Done with this argument.
5992         ArgOffset += PtrByteSize;
5993         continue;
5994       }
5995
5996       // For aggregates larger than PtrByteSize, copy the pieces of the
5997       // object that fit into registers from the parameter save area.
5998       for (unsigned j=0; j<Size; j+=PtrByteSize) {
5999         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6000         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6001         if (GPR_idx != NumGPRs) {
6002           SDValue Load =
6003               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6004           MemOpChains.push_back(Load.getValue(1));
6005           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6006           ArgOffset += PtrByteSize;
6007         } else {
6008           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6009           break;
6010         }
6011       }
6012       continue;
6013     }
6014
6015     switch (Arg.getSimpleValueType().SimpleTy) {
6016     default: llvm_unreachable("Unexpected ValueType for argument!");
6017     case MVT::i1:
6018     case MVT::i32:
6019     case MVT::i64:
6020       if (Flags.isNest()) {
6021         // The 'nest' parameter, if any, is passed in R11.
6022         RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6023         hasNest = true;
6024         break;
6025       }
6026
6027       // These can be scalar arguments or elements of an integer array type
6028       // passed directly.  Clang may use those instead of "byval" aggregate
6029       // types to avoid forcing arguments to memory unnecessarily.
6030       if (GPR_idx != NumGPRs) {
6031         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6032       } else {
6033         if (CallConv == CallingConv::Fast)
6034           ComputePtrOff();
6035
6036         assert(HasParameterArea &&
6037                "Parameter area must exist to pass an argument in memory.");
6038         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6039                          true, isTailCall, false, MemOpChains,
6040                          TailCallArguments, dl);
6041         if (CallConv == CallingConv::Fast)
6042           ArgOffset += PtrByteSize;
6043       }
6044       if (CallConv != CallingConv::Fast)
6045         ArgOffset += PtrByteSize;
6046       break;
6047     case MVT::f32:
6048     case MVT::f64: {
6049       // These can be scalar arguments or elements of a float array type
6050       // passed directly.  The latter are used to implement ELFv2 homogenous
6051       // float aggregates.
6052
6053       // Named arguments go into FPRs first, and once they overflow, the
6054       // remaining arguments go into GPRs and then the parameter save area.
6055       // Unnamed arguments for vararg functions always go to GPRs and
6056       // then the parameter save area.  For now, put all arguments to vararg
6057       // routines always in both locations (FPR *and* GPR or stack slot).
6058       bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
6059       bool NeededLoad = false;
6060
6061       // First load the argument into the next available FPR.
6062       if (FPR_idx != NumFPRs)
6063         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6064
6065       // Next, load the argument into GPR or stack slot if needed.
6066       if (!NeedGPROrStack)
6067         ;
6068       else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
6069         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6070         // once we support fp <-> gpr moves.
6071
6072         // In the non-vararg case, this can only ever happen in the
6073         // presence of f32 array types, since otherwise we never run
6074         // out of FPRs before running out of GPRs.
6075         SDValue ArgVal;
6076
6077         // Double values are always passed in a single GPR.
6078         if (Arg.getValueType() != MVT::f32) {
6079           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6080
6081         // Non-array float values are extended and passed in a GPR.
6082         } else if (!Flags.isInConsecutiveRegs()) {
6083           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6084           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6085
6086         // If we have an array of floats, we collect every odd element
6087         // together with its predecessor into one GPR.
6088         } else if (ArgOffset % PtrByteSize != 0) {
6089           SDValue Lo, Hi;
6090           Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6091           Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6092           if (!isLittleEndian)
6093             std::swap(Lo, Hi);
6094           ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6095
6096         // The final element, if even, goes into the first half of a GPR.
6097         } else if (Flags.isInConsecutiveRegsLast()) {
6098           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6099           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6100           if (!isLittleEndian)
6101             ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6102                                  DAG.getConstant(32, dl, MVT::i32));
6103
6104         // Non-final even elements are skipped; they will be handled
6105         // together the with subsequent argument on the next go-around.
6106         } else
6107           ArgVal = SDValue();
6108
6109         if (ArgVal.getNode())
6110           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6111       } else {
6112         if (CallConv == CallingConv::Fast)
6113           ComputePtrOff();
6114
6115         // Single-precision floating-point values are mapped to the
6116         // second (rightmost) word of the stack doubleword.
6117         if (Arg.getValueType() == MVT::f32 &&
6118             !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6119           SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6120           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6121         }
6122
6123         assert(HasParameterArea &&
6124                "Parameter area must exist to pass an argument in memory.");
6125         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6126                          true, isTailCall, false, MemOpChains,
6127                          TailCallArguments, dl);
6128
6129         NeededLoad = true;
6130       }
6131       // When passing an array of floats, the array occupies consecutive
6132       // space in the argument area; only round up to the next doubleword
6133       // at the end of the array.  Otherwise, each float takes 8 bytes.
6134       if (CallConv != CallingConv::Fast || NeededLoad) {
6135         ArgOffset += (Arg.getValueType() == MVT::f32 &&
6136                       Flags.isInConsecutiveRegs()) ? 4 : 8;
6137         if (Flags.isInConsecutiveRegsLast())
6138           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6139       }
6140       break;
6141     }
6142     case MVT::v4f32:
6143     case MVT::v4i32:
6144     case MVT::v8i16:
6145     case MVT::v16i8:
6146     case MVT::v2f64:
6147     case MVT::v2i64:
6148     case MVT::v1i128:
6149     case MVT::f128:
6150       if (!Subtarget.hasQPX()) {
6151       // These can be scalar arguments or elements of a vector array type
6152       // passed directly.  The latter are used to implement ELFv2 homogenous
6153       // vector aggregates.
6154
6155       // For a varargs call, named arguments go into VRs or on the stack as
6156       // usual; unnamed arguments always go to the stack or the corresponding
6157       // GPRs when within range.  For now, we always put the value in both
6158       // locations (or even all three).
6159       if (isVarArg) {
6160         assert(HasParameterArea &&
6161                "Parameter area must exist if we have a varargs call.");
6162         // We could elide this store in the case where the object fits
6163         // entirely in R registers.  Maybe later.
6164         SDValue Store =
6165             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6166         MemOpChains.push_back(Store);
6167         if (VR_idx != NumVRs) {
6168           SDValue Load =
6169               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6170           MemOpChains.push_back(Load.getValue(1));
6171           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6172         }
6173         ArgOffset += 16;
6174         for (unsigned i=0; i<16; i+=PtrByteSize) {
6175           if (GPR_idx == NumGPRs)
6176             break;
6177           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6178                                    DAG.getConstant(i, dl, PtrVT));
6179           SDValue Load =
6180               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6181           MemOpChains.push_back(Load.getValue(1));
6182           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6183         }
6184         break;
6185       }
6186
6187       // Non-varargs Altivec params go into VRs or on the stack.
6188       if (VR_idx != NumVRs) {
6189         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6190       } else {
6191         if (CallConv == CallingConv::Fast)
6192           ComputePtrOff();
6193
6194         assert(HasParameterArea &&
6195                "Parameter area must exist to pass an argument in memory.");
6196         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6197                          true, isTailCall, true, MemOpChains,
6198                          TailCallArguments, dl);
6199         if (CallConv == CallingConv::Fast)
6200           ArgOffset += 16;
6201       }
6202
6203       if (CallConv != CallingConv::Fast)
6204         ArgOffset += 16;
6205       break;
6206       } // not QPX
6207
6208       assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
6209              "Invalid QPX parameter type");
6210
6211       LLVM_FALLTHROUGH;
6212     case MVT::v4f64:
6213     case MVT::v4i1: {
6214       bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
6215       if (isVarArg) {
6216         assert(HasParameterArea &&
6217                "Parameter area must exist if we have a varargs call.");
6218         // We could elide this store in the case where the object fits
6219         // entirely in R registers.  Maybe later.
6220         SDValue Store =
6221             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6222         MemOpChains.push_back(Store);
6223         if (QFPR_idx != NumQFPRs) {
6224           SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store,
6225                                      PtrOff, MachinePointerInfo());
6226           MemOpChains.push_back(Load.getValue(1));
6227           RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
6228         }
6229         ArgOffset += (IsF32 ? 16 : 32);
6230         for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
6231           if (GPR_idx == NumGPRs)
6232             break;
6233           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6234                                    DAG.getConstant(i, dl, PtrVT));
6235           SDValue Load =
6236               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6237           MemOpChains.push_back(Load.getValue(1));
6238           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6239         }
6240         break;
6241       }
6242
6243       // Non-varargs QPX params go into registers or on the stack.
6244       if (QFPR_idx != NumQFPRs) {
6245         RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
6246       } else {
6247         if (CallConv == CallingConv::Fast)
6248           ComputePtrOff();
6249
6250         assert(HasParameterArea &&
6251                "Parameter area must exist to pass an argument in memory.");
6252         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6253                          true, isTailCall, true, MemOpChains,
6254                          TailCallArguments, dl);
6255         if (CallConv == CallingConv::Fast)
6256           ArgOffset += (IsF32 ? 16 : 32);
6257       }
6258
6259       if (CallConv != CallingConv::Fast)
6260         ArgOffset += (IsF32 ? 16 : 32);
6261       break;
6262       }
6263     }
6264   }
6265
6266   assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6267          "mismatch in size of parameter area");
6268   (void)NumBytesActuallyUsed;
6269
6270   if (!MemOpChains.empty())
6271     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6272
6273   // Check if this is an indirect call (MTCTR/BCTRL).
6274   // See PrepareCall() for more information about calls through function
6275   // pointers in the 64-bit SVR4 ABI.
6276   if (!isTailCall && !isPatchPoint &&
6277       !isFunctionGlobalAddress(Callee) &&
6278       !isa<ExternalSymbolSDNode>(Callee)) {
6279     // Load r2 into a virtual register and store it to the TOC save area.
6280     setUsesTOCBasePtr(DAG);
6281     SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6282     // TOC save area offset.
6283     unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6284     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6285     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6286     Chain = DAG.getStore(
6287         Val.getValue(1), dl, Val, AddPtr,
6288         MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
6289     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6290     // This does not mean the MTCTR instruction must use R12; it's easier
6291     // to model this as an extra parameter, so do that.
6292     if (isELFv2ABI && !isPatchPoint)
6293       RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6294   }
6295
6296   // Build a sequence of copy-to-reg nodes chained together with token chain
6297   // and flag operands which copy the outgoing args into the appropriate regs.
6298   SDValue InFlag;
6299   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6300     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6301                              RegsToPass[i].second, InFlag);
6302     InFlag = Chain.getValue(1);
6303   }
6304
6305   if (isTailCall && !IsSibCall)
6306     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6307                     TailCallArguments);
6308
6309   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest,
6310                     DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee,
6311                     SPDiff, NumBytes, Ins, InVals, CS);
6312 }
6313
6314 SDValue PPCTargetLowering::LowerCall_Darwin(
6315     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
6316     bool isTailCall, bool isPatchPoint,
6317     const SmallVectorImpl<ISD::OutputArg> &Outs,
6318     const SmallVectorImpl<SDValue> &OutVals,
6319     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6320     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6321     ImmutableCallSite CS) const {
6322   unsigned NumOps = Outs.size();
6323
6324   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6325   bool isPPC64 = PtrVT == MVT::i64;
6326   unsigned PtrByteSize = isPPC64 ? 8 : 4;
6327
6328   MachineFunction &MF = DAG.getMachineFunction();
6329
6330   // Mark this function as potentially containing a function that contains a
6331   // tail call. As a consequence the frame pointer will be used for dynamicalloc
6332   // and restoring the callers stack pointer in this functions epilog. This is
6333   // done because by tail calling the called function might overwrite the value
6334   // in this function's (MF) stack pointer stack slot 0(SP).
6335   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6336       CallConv == CallingConv::Fast)
6337     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6338
6339   // Count how many bytes are to be pushed on the stack, including the linkage
6340   // area, and parameter passing area.  We start with 24/48 bytes, which is
6341   // prereserved space for [SP][CR][LR][3 x unused].
6342   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6343   unsigned NumBytes = LinkageSize;
6344
6345   // Add up all the space actually used.
6346   // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
6347   // they all go in registers, but we must reserve stack space for them for
6348   // possible use by the caller.  In varargs or 64-bit calls, parameters are
6349   // assigned stack space in order, with padding so Altivec parameters are
6350   // 16-byte aligned.
6351   unsigned nAltivecParamsAtEnd = 0;
6352   for (unsigned i = 0; i != NumOps; ++i) {
6353     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6354     EVT ArgVT = Outs[i].VT;
6355     // Varargs Altivec parameters are padded to a 16 byte boundary.
6356     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
6357         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
6358         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
6359       if (!isVarArg && !isPPC64) {
6360         // Non-varargs Altivec parameters go after all the non-Altivec
6361         // parameters; handle those later so we know how much padding we need.
6362         nAltivecParamsAtEnd++;
6363         continue;
6364       }
6365       // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
6366       NumBytes = ((NumBytes+15)/16)*16;
6367     }
6368     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6369   }
6370
6371   // Allow for Altivec parameters at the end, if needed.
6372   if (nAltivecParamsAtEnd) {
6373     NumBytes = ((NumBytes+15)/16)*16;
6374     NumBytes += 16*nAltivecParamsAtEnd;
6375   }
6376
6377   // The prolog code of the callee may store up to 8 GPR argument registers to
6378   // the stack, allowing va_start to index over them in memory if its varargs.
6379   // Because we cannot tell if this is needed on the caller side, we have to
6380   // conservatively assume that it is needed.  As such, make sure we have at
6381   // least enough stack space for the caller to store the 8 GPRs.
6382   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6383
6384   // Tail call needs the stack to be aligned.
6385   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6386       CallConv == CallingConv::Fast)
6387     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6388
6389   // Calculate by how many bytes the stack has to be adjusted in case of tail
6390   // call optimization.
6391   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
6392
6393   // To protect arguments on the stack from being clobbered in a tail call,
6394   // force all the loads to happen before doing any other lowering.
6395   if (isTailCall)
6396     Chain = DAG.getStackArgumentTokenFactor(Chain);
6397
6398   // Adjust the stack pointer for the new arguments...
6399   // These operations are automatically eliminated by the prolog/epilog pass
6400   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6401   SDValue CallSeqStart = Chain;
6402
6403   // Load the return address and frame pointer so it can be move somewhere else
6404   // later.
6405   SDValue LROp, FPOp;
6406   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6407
6408   // Set up a copy of the stack pointer for use loading and storing any
6409   // arguments that may not fit in the registers available for argument
6410   // passing.
6411   SDValue StackPtr;
6412   if (isPPC64)
6413     StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6414   else
6415     StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6416
6417   // Figure out which arguments are going to go in registers, and which in
6418   // memory.  Also, if this is a vararg function, floating point operations
6419   // must be stored to our stack, and loaded into integer regs as well, if
6420   // any integer regs are available for argument passing.
6421   unsigned ArgOffset = LinkageSize;
6422   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6423
6424   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
6425     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6426     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
6427   };
6428   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
6429     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6430     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6431   };
6432   static const MCPhysReg VR[] = {
6433     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6434     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6435   };
6436   const unsigned NumGPRs = array_lengthof(GPR_32);
6437   const unsigned NumFPRs = 13;
6438   const unsigned NumVRs  = array_lengthof(VR);
6439
6440   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
6441
6442   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6443   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6444
6445   SmallVector<SDValue, 8> MemOpChains;
6446   for (unsigned i = 0; i != NumOps; ++i) {
6447     SDValue Arg = OutVals[i];
6448     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6449
6450     // PtrOff will be used to store the current argument to the stack if a
6451     // register cannot be found for it.
6452     SDValue PtrOff;
6453
6454     PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6455
6456     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6457
6458     // On PPC64, promote integers to 64-bit values.
6459     if (isPPC64 && Arg.getValueType() == MVT::i32) {
6460       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6461       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6462       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6463     }
6464
6465     // FIXME memcpy is used way more than necessary.  Correctness first.
6466     // Note: "by value" is code for passing a structure by value, not
6467     // basic types.
6468     if (Flags.isByVal()) {
6469       unsigned Size = Flags.getByValSize();
6470       // Very small objects are passed right-justified.  Everything else is
6471       // passed left-justified.
6472       if (Size==1 || Size==2) {
6473         EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
6474         if (GPR_idx != NumGPRs) {
6475           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6476                                         MachinePointerInfo(), VT);
6477           MemOpChains.push_back(Load.getValue(1));
6478           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6479
6480           ArgOffset += PtrByteSize;
6481         } else {
6482           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6483                                           PtrOff.getValueType());
6484           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6485           Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6486                                                             CallSeqStart,
6487                                                             Flags, DAG, dl);
6488           ArgOffset += PtrByteSize;
6489         }
6490         continue;
6491       }
6492       // Copy entire object into memory.  There are cases where gcc-generated
6493       // code assumes it is there, even if it could be put entirely into
6494       // registers.  (This is not what the doc says.)
6495       Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6496                                                         CallSeqStart,
6497                                                         Flags, DAG, dl);
6498
6499       // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
6500       // copy the pieces of the object that fit into registers from the
6501       // parameter save area.
6502       for (unsigned j=0; j<Size; j+=PtrByteSize) {
6503         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6504         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6505         if (GPR_idx != NumGPRs) {
6506           SDValue Load =
6507               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6508           MemOpChains.push_back(Load.getValue(1));
6509           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6510           ArgOffset += PtrByteSize;
6511         } else {
6512           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6513           break;
6514         }
6515       }
6516       continue;
6517     }
6518
6519     switch (Arg.getSimpleValueType().SimpleTy) {
6520     default: llvm_unreachable("Unexpected ValueType for argument!");
6521     case MVT::i1:
6522     case MVT::i32:
6523     case MVT::i64:
6524       if (GPR_idx != NumGPRs) {
6525         if (Arg.getValueType() == MVT::i1)
6526           Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
6527
6528         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6529       } else {
6530         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6531                          isPPC64, isTailCall, false, MemOpChains,
6532                          TailCallArguments, dl);
6533       }
6534       ArgOffset += PtrByteSize;
6535       break;
6536     case MVT::f32:
6537     case MVT::f64:
6538       if (FPR_idx != NumFPRs) {
6539         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6540
6541         if (isVarArg) {
6542           SDValue Store =
6543               DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6544           MemOpChains.push_back(Store);
6545
6546           // Float varargs are always shadowed in available integer registers
6547           if (GPR_idx != NumGPRs) {
6548             SDValue Load =
6549                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6550             MemOpChains.push_back(Load.getValue(1));
6551             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6552           }
6553           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
6554             SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6555             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6556             SDValue Load =
6557                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6558             MemOpChains.push_back(Load.getValue(1));
6559             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6560           }
6561         } else {
6562           // If we have any FPRs remaining, we may also have GPRs remaining.
6563           // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
6564           // GPRs.
6565           if (GPR_idx != NumGPRs)
6566             ++GPR_idx;
6567           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
6568               !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
6569             ++GPR_idx;
6570         }
6571       } else
6572         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6573                          isPPC64, isTailCall, false, MemOpChains,
6574                          TailCallArguments, dl);
6575       if (isPPC64)
6576         ArgOffset += 8;
6577       else
6578         ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
6579       break;
6580     case MVT::v4f32:
6581     case MVT::v4i32:
6582     case MVT::v8i16:
6583     case MVT::v16i8:
6584       if (isVarArg) {
6585         // These go aligned on the stack, or in the corresponding R registers
6586         // when within range.  The Darwin PPC ABI doc claims they also go in
6587         // V registers; in fact gcc does this only for arguments that are
6588         // prototyped, not for those that match the ...  We do it for all
6589         // arguments, seems to work.
6590         while (ArgOffset % 16 !=0) {
6591           ArgOffset += PtrByteSize;
6592           if (GPR_idx != NumGPRs)
6593             GPR_idx++;
6594         }
6595         // We could elide this store in the case where the object fits
6596         // entirely in R registers.  Maybe later.
6597         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
6598                              DAG.getConstant(ArgOffset, dl, PtrVT));
6599         SDValue Store =
6600             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6601         MemOpChains.push_back(Store);
6602         if (VR_idx != NumVRs) {
6603           SDValue Load =
6604               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6605           MemOpChains.push_back(Load.getValue(1));
6606           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6607         }
6608         ArgOffset += 16;
6609         for (unsigned i=0; i<16; i+=PtrByteSize) {
6610           if (GPR_idx == NumGPRs)
6611             break;
6612           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6613                                    DAG.getConstant(i, dl, PtrVT));
6614           SDValue Load =
6615               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6616           MemOpChains.push_back(Load.getValue(1));
6617           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6618         }
6619         break;
6620       }
6621
6622       // Non-varargs Altivec params generally go in registers, but have
6623       // stack space allocated at the end.
6624       if (VR_idx != NumVRs) {
6625         // Doesn't have GPR space allocated.
6626         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6627       } else if (nAltivecParamsAtEnd==0) {
6628         // We are emitting Altivec params in order.
6629         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6630                          isPPC64, isTailCall, true, MemOpChains,
6631                          TailCallArguments, dl);
6632         ArgOffset += 16;
6633       }
6634       break;
6635     }
6636   }
6637   // If all Altivec parameters fit in registers, as they usually do,
6638   // they get stack space following the non-Altivec parameters.  We
6639   // don't track this here because nobody below needs it.
6640   // If there are more Altivec parameters than fit in registers emit
6641   // the stores here.
6642   if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
6643     unsigned j = 0;
6644     // Offset is aligned; skip 1st 12 params which go in V registers.
6645     ArgOffset = ((ArgOffset+15)/16)*16;
6646     ArgOffset += 12*16;
6647     for (unsigned i = 0; i != NumOps; ++i) {
6648       SDValue Arg = OutVals[i];
6649       EVT ArgType = Outs[i].VT;
6650       if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
6651           ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
6652         if (++j > NumVRs) {
6653           SDValue PtrOff;
6654           // We are emitting Altivec params in order.
6655           LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6656                            isPPC64, isTailCall, true, MemOpChains,
6657                            TailCallArguments, dl);
6658           ArgOffset += 16;
6659         }
6660       }
6661     }
6662   }
6663
6664   if (!MemOpChains.empty())
6665     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6666
6667   // On Darwin, R12 must contain the address of an indirect callee.  This does
6668   // not mean the MTCTR instruction must use R12; it's easier to model this as
6669   // an extra parameter, so do that.
6670   if (!isTailCall &&
6671       !isFunctionGlobalAddress(Callee) &&
6672       !isa<ExternalSymbolSDNode>(Callee) &&
6673       !isBLACompatibleAddress(Callee, DAG))
6674     RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
6675                                                    PPC::R12), Callee));
6676
6677   // Build a sequence of copy-to-reg nodes chained together with token chain
6678   // and flag operands which copy the outgoing args into the appropriate regs.
6679   SDValue InFlag;
6680   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6681     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6682                              RegsToPass[i].second, InFlag);
6683     InFlag = Chain.getValue(1);
6684   }
6685
6686   if (isTailCall)
6687     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6688                     TailCallArguments);
6689
6690   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
6691                     /* unused except on PPC64 ELFv1 */ false, DAG,
6692                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
6693                     NumBytes, Ins, InVals, CS);
6694 }
6695
6696
6697 SDValue PPCTargetLowering::LowerCall_AIX(
6698     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
6699     bool isTailCall, bool isPatchPoint,
6700     const SmallVectorImpl<ISD::OutputArg> &Outs,
6701     const SmallVectorImpl<SDValue> &OutVals,
6702     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6703     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6704     ImmutableCallSite CS) const {
6705
6706   assert((CallConv == CallingConv::C || CallConv == CallingConv::Fast) &&
6707          "Unimplemented calling convention!");
6708   if (isVarArg || isPatchPoint)
6709     report_fatal_error("This call type is unimplemented on AIX.");
6710
6711   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6712   bool isPPC64 = PtrVT == MVT::i64;
6713   unsigned PtrByteSize = isPPC64 ? 8 : 4;
6714   unsigned NumOps = Outs.size();
6715
6716
6717   // Count how many bytes are to be pushed on the stack, including the linkage
6718   // area, parameter list area.
6719   // On XCOFF, we start with 24/48, which is reserved space for
6720   // [SP][CR][LR][2 x reserved][TOC].
6721   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6722
6723   // The prolog code of the callee may store up to 8 GPR argument registers to
6724   // the stack, allowing va_start to index over them in memory if the callee
6725   // is variadic.
6726   // Because we cannot tell if this is needed on the caller side, we have to
6727   // conservatively assume that it is needed.  As such, make sure we have at
6728   // least enough stack space for the caller to store the 8 GPRs.
6729   unsigned NumBytes = LinkageSize + 8 * PtrByteSize;
6730
6731   // Adjust the stack pointer for the new arguments...
6732   // These operations are automatically eliminated by the prolog/epilog
6733   // inserter pass.
6734   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6735   SDValue CallSeqStart = Chain;
6736
6737   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
6738     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6739     PPC::R7, PPC::R8, PPC::R9, PPC::R10
6740   };
6741   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
6742     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6743     PPC::X7, PPC::X8, PPC::X9, PPC::X10
6744   };
6745
6746   const unsigned NumGPRs = isPPC64 ? array_lengthof(GPR_64)
6747                                    : array_lengthof(GPR_32);
6748   const unsigned NumFPRs = array_lengthof(FPR);
6749   assert(NumFPRs == 13 && "Only FPR 1-13 could be used for parameter passing "
6750                           "on AIX");
6751
6752   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
6753   unsigned GPR_idx = 0, FPR_idx = 0;
6754
6755   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6756
6757   if (isTailCall)
6758     report_fatal_error("Handling of tail call is unimplemented!");
6759   int SPDiff = 0;
6760
6761   for (unsigned i = 0; i != NumOps; ++i) {
6762     SDValue Arg = OutVals[i];
6763     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6764
6765     // Promote integers if needed.
6766     if (Arg.getValueType() == MVT::i1 ||
6767         (isPPC64 && Arg.getValueType() == MVT::i32)) {
6768       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6769       Arg = DAG.getNode(ExtOp, dl, PtrVT, Arg);
6770     }
6771
6772     // Note: "by value" is code for passing a structure by value, not
6773     // basic types.
6774     if (Flags.isByVal())
6775       report_fatal_error("Passing structure by value is unimplemented!");
6776
6777     switch (Arg.getSimpleValueType().SimpleTy) {
6778     default: llvm_unreachable("Unexpected ValueType for argument!");
6779     case MVT::i1:
6780     case MVT::i32:
6781     case MVT::i64:
6782       if (GPR_idx != NumGPRs)
6783         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6784       else
6785         report_fatal_error("Handling of placing parameters on the stack is "
6786                            "unimplemented!");
6787       break;
6788     case MVT::f32:
6789     case MVT::f64:
6790       if (FPR_idx != NumFPRs) {
6791         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6792
6793         // If we have any FPRs remaining, we may also have GPRs remaining.
6794         // Args passed in FPRs consume 1 or 2 (f64 in 32 bit mode) available
6795         // GPRs.
6796         if (GPR_idx != NumGPRs)
6797           ++GPR_idx;
6798         if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64)
6799           ++GPR_idx;
6800       } else
6801         report_fatal_error("Handling of placing parameters on the stack is "
6802                            "unimplemented!");
6803       break;
6804     case MVT::v4f32:
6805     case MVT::v4i32:
6806     case MVT::v8i16:
6807     case MVT::v16i8:
6808     case MVT::v2f64:
6809     case MVT::v2i64:
6810     case MVT::v1i128:
6811     case MVT::f128:
6812     case MVT::v4f64:
6813     case MVT::v4i1:
6814       report_fatal_error("Handling of this parameter type is unimplemented!");
6815     }
6816   }
6817
6818   if (!isFunctionGlobalAddress(Callee) &&
6819       !isa<ExternalSymbolSDNode>(Callee))
6820     report_fatal_error("Handling of indirect call is unimplemented!");
6821
6822   // Build a sequence of copy-to-reg nodes chained together with token chain
6823   // and flag operands which copy the outgoing args into the appropriate regs.
6824   SDValue InFlag;
6825   for (auto Reg : RegsToPass) {
6826     Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
6827     InFlag = Chain.getValue(1);
6828   }
6829
6830   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
6831                     /* unused except on PPC64 ELFv1 */ false, DAG,
6832                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
6833                     NumBytes, Ins, InVals, CS);
6834 }
6835
6836 bool
6837 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
6838                                   MachineFunction &MF, bool isVarArg,
6839                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
6840                                   LLVMContext &Context) const {
6841   SmallVector<CCValAssign, 16> RVLocs;
6842   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
6843   return CCInfo.CheckReturn(
6844       Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
6845                 ? RetCC_PPC_Cold
6846                 : RetCC_PPC);
6847 }
6848
6849 SDValue
6850 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
6851                                bool isVarArg,
6852                                const SmallVectorImpl<ISD::OutputArg> &Outs,
6853                                const SmallVectorImpl<SDValue> &OutVals,
6854                                const SDLoc &dl, SelectionDAG &DAG) const {
6855   SmallVector<CCValAssign, 16> RVLocs;
6856   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
6857                  *DAG.getContext());
6858   CCInfo.AnalyzeReturn(Outs,
6859                        (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
6860                            ? RetCC_PPC_Cold
6861                            : RetCC_PPC);
6862
6863   SDValue Flag;
6864   SmallVector<SDValue, 4> RetOps(1, Chain);
6865
6866   // Copy the result values into the output registers.
6867   for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
6868     CCValAssign &VA = RVLocs[i];
6869     assert(VA.isRegLoc() && "Can only return in registers!");
6870
6871     SDValue Arg = OutVals[RealResIdx];
6872
6873     switch (VA.getLocInfo()) {
6874     default: llvm_unreachable("Unknown loc info!");
6875     case CCValAssign::Full: break;
6876     case CCValAssign::AExt:
6877       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
6878       break;
6879     case CCValAssign::ZExt:
6880       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
6881       break;
6882     case CCValAssign::SExt:
6883       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
6884       break;
6885     }
6886     if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
6887       bool isLittleEndian = Subtarget.isLittleEndian();
6888       // Legalize ret f64 -> ret 2 x i32.
6889       SDValue SVal =
6890           DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6891                       DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
6892       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
6893       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
6894       SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6895                          DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
6896       Flag = Chain.getValue(1);
6897       VA = RVLocs[++i]; // skip ahead to next loc
6898       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
6899     } else
6900       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
6901     Flag = Chain.getValue(1);
6902     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
6903   }
6904
6905   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
6906   const MCPhysReg *I =
6907     TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
6908   if (I) {
6909     for (; *I; ++I) {
6910
6911       if (PPC::G8RCRegClass.contains(*I))
6912         RetOps.push_back(DAG.getRegister(*I, MVT::i64));
6913       else if (PPC::F8RCRegClass.contains(*I))
6914         RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
6915       else if (PPC::CRRCRegClass.contains(*I))
6916         RetOps.push_back(DAG.getRegister(*I, MVT::i1));
6917       else if (PPC::VRRCRegClass.contains(*I))
6918         RetOps.push_back(DAG.getRegister(*I, MVT::Other));
6919       else
6920         llvm_unreachable("Unexpected register class in CSRsViaCopy!");
6921     }
6922   }
6923
6924   RetOps[0] = Chain;  // Update chain.
6925
6926   // Add the flag if we have it.
6927   if (Flag.getNode())
6928     RetOps.push_back(Flag);
6929
6930   return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
6931 }
6932
6933 SDValue
6934 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
6935                                                 SelectionDAG &DAG) const {
6936   SDLoc dl(Op);
6937
6938   // Get the correct type for integers.
6939   EVT IntVT = Op.getValueType();
6940
6941   // Get the inputs.
6942   SDValue Chain = Op.getOperand(0);
6943   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
6944   // Build a DYNAREAOFFSET node.
6945   SDValue Ops[2] = {Chain, FPSIdx};
6946   SDVTList VTs = DAG.getVTList(IntVT);
6947   return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
6948 }
6949
6950 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
6951                                              SelectionDAG &DAG) const {
6952   // When we pop the dynamic allocation we need to restore the SP link.
6953   SDLoc dl(Op);
6954
6955   // Get the correct type for pointers.
6956   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6957
6958   // Construct the stack pointer operand.
6959   bool isPPC64 = Subtarget.isPPC64();
6960   unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
6961   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
6962
6963   // Get the operands for the STACKRESTORE.
6964   SDValue Chain = Op.getOperand(0);
6965   SDValue SaveSP = Op.getOperand(1);
6966
6967   // Load the old link SP.
6968   SDValue LoadLinkSP =
6969       DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
6970
6971   // Restore the stack pointer.
6972   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
6973
6974   // Store the old link SP.
6975   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
6976 }
6977
6978 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
6979   MachineFunction &MF = DAG.getMachineFunction();
6980   bool isPPC64 = Subtarget.isPPC64();
6981   EVT PtrVT = getPointerTy(MF.getDataLayout());
6982
6983   // Get current frame pointer save index.  The users of this index will be
6984   // primarily DYNALLOC instructions.
6985   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
6986   int RASI = FI->getReturnAddrSaveIndex();
6987
6988   // If the frame pointer save index hasn't been defined yet.
6989   if (!RASI) {
6990     // Find out what the fix offset of the frame pointer save area.
6991     int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
6992     // Allocate the frame index for frame pointer save area.
6993     RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
6994     // Save the result.
6995     FI->setReturnAddrSaveIndex(RASI);
6996   }
6997   return DAG.getFrameIndex(RASI, PtrVT);
6998 }
6999
7000 SDValue
7001 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7002   MachineFunction &MF = DAG.getMachineFunction();
7003   bool isPPC64 = Subtarget.isPPC64();
7004   EVT PtrVT = getPointerTy(MF.getDataLayout());
7005
7006   // Get current frame pointer save index.  The users of this index will be
7007   // primarily DYNALLOC instructions.
7008   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7009   int FPSI = FI->getFramePointerSaveIndex();
7010
7011   // If the frame pointer save index hasn't been defined yet.
7012   if (!FPSI) {
7013     // Find out what the fix offset of the frame pointer save area.
7014     int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7015     // Allocate the frame index for frame pointer save area.
7016     FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7017     // Save the result.
7018     FI->setFramePointerSaveIndex(FPSI);
7019   }
7020   return DAG.getFrameIndex(FPSI, PtrVT);
7021 }
7022
7023 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7024                                                    SelectionDAG &DAG) const {
7025   // Get the inputs.
7026   SDValue Chain = Op.getOperand(0);
7027   SDValue Size  = Op.getOperand(1);
7028   SDLoc dl(Op);
7029
7030   // Get the correct type for pointers.
7031   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7032   // Negate the size.
7033   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7034                                 DAG.getConstant(0, dl, PtrVT), Size);
7035   // Construct a node for the frame pointer save index.
7036   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7037   // Build a DYNALLOC node.
7038   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7039   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7040   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7041 }
7042
7043 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7044                                                      SelectionDAG &DAG) const {
7045   MachineFunction &MF = DAG.getMachineFunction();
7046
7047   bool isPPC64 = Subtarget.isPPC64();
7048   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7049
7050   int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7051   return DAG.getFrameIndex(FI, PtrVT);
7052 }
7053
7054 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7055                                                SelectionDAG &DAG) const {
7056   SDLoc DL(Op);
7057   return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7058                      DAG.getVTList(MVT::i32, MVT::Other),
7059                      Op.getOperand(0), Op.getOperand(1));
7060 }
7061
7062 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7063                                                 SelectionDAG &DAG) const {
7064   SDLoc DL(Op);
7065   return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7066                      Op.getOperand(0), Op.getOperand(1));
7067 }
7068
7069 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7070   if (Op.getValueType().isVector())
7071     return LowerVectorLoad(Op, DAG);
7072
7073   assert(Op.getValueType() == MVT::i1 &&
7074          "Custom lowering only for i1 loads");
7075
7076   // First, load 8 bits into 32 bits, then truncate to 1 bit.
7077
7078   SDLoc dl(Op);
7079   LoadSDNode *LD = cast<LoadSDNode>(Op);
7080
7081   SDValue Chain = LD->getChain();
7082   SDValue BasePtr = LD->getBasePtr();
7083   MachineMemOperand *MMO = LD->getMemOperand();
7084
7085   SDValue NewLD =
7086       DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7087                      BasePtr, MVT::i8, MMO);
7088   SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7089
7090   SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7091   return DAG.getMergeValues(Ops, dl);
7092 }
7093
7094 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7095   if (Op.getOperand(1).getValueType().isVector())
7096     return LowerVectorStore(Op, DAG);
7097
7098   assert(Op.getOperand(1).getValueType() == MVT::i1 &&
7099          "Custom lowering only for i1 stores");
7100
7101   // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7102
7103   SDLoc dl(Op);
7104   StoreSDNode *ST = cast<StoreSDNode>(Op);
7105
7106   SDValue Chain = ST->getChain();
7107   SDValue BasePtr = ST->getBasePtr();
7108   SDValue Value = ST->getValue();
7109   MachineMemOperand *MMO = ST->getMemOperand();
7110
7111   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
7112                       Value);
7113   return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7114 }
7115
7116 // FIXME: Remove this once the ANDI glue bug is fixed:
7117 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7118   assert(Op.getValueType() == MVT::i1 &&
7119          "Custom lowering only for i1 results");
7120
7121   SDLoc DL(Op);
7122   return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
7123                      Op.getOperand(0));
7124 }
7125
7126 SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7127                                                SelectionDAG &DAG) const {
7128
7129   // Implements a vector truncate that fits in a vector register as a shuffle.
7130   // We want to legalize vector truncates down to where the source fits in
7131   // a vector register (and target is therefore smaller than vector register
7132   // size).  At that point legalization will try to custom lower the sub-legal
7133   // result and get here - where we can contain the truncate as a single target
7134   // operation.
7135
7136   // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7137   //   <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7138   //
7139   // We will implement it for big-endian ordering as this (where x denotes
7140   // undefined):
7141   //   < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7142   //   < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7143   //
7144   // The same operation in little-endian ordering will be:
7145   //   <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7146   //   <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7147
7148   assert(Op.getValueType().isVector() && "Vector type expected.");
7149
7150   SDLoc DL(Op);
7151   SDValue N1 = Op.getOperand(0);
7152   unsigned SrcSize = N1.getValueType().getSizeInBits();
7153   assert(SrcSize <= 128 && "Source must fit in an Altivec/VSX vector");
7154   SDValue WideSrc = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
7155
7156   EVT TrgVT = Op.getValueType();
7157   unsigned TrgNumElts = TrgVT.getVectorNumElements();
7158   EVT EltVT = TrgVT.getVectorElementType();
7159   unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7160   EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7161
7162   // First list the elements we want to keep.
7163   unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
7164   SmallVector<int, 16> ShuffV;
7165   if (Subtarget.isLittleEndian())
7166     for (unsigned i = 0; i < TrgNumElts; ++i)
7167       ShuffV.push_back(i * SizeMult);
7168   else
7169     for (unsigned i = 1; i <= TrgNumElts; ++i)
7170       ShuffV.push_back(i * SizeMult - 1);
7171
7172   // Populate the remaining elements with undefs.
7173   for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
7174     // ShuffV.push_back(i + WideNumElts);
7175     ShuffV.push_back(WideNumElts + 1);
7176
7177   SDValue Conv = DAG.getNode(ISD::BITCAST, DL, WideVT, WideSrc);
7178   return DAG.getVectorShuffle(WideVT, DL, Conv, DAG.getUNDEF(WideVT), ShuffV);
7179 }
7180
7181 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
7182 /// possible.
7183 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
7184   // Not FP? Not a fsel.
7185   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
7186       !Op.getOperand(2).getValueType().isFloatingPoint())
7187     return Op;
7188
7189   // We might be able to do better than this under some circumstances, but in
7190   // general, fsel-based lowering of select is a finite-math-only optimization.
7191   // For more information, see section F.3 of the 2.06 ISA specification.
7192   if (!DAG.getTarget().Options.NoInfsFPMath ||
7193       !DAG.getTarget().Options.NoNaNsFPMath)
7194     return Op;
7195   // TODO: Propagate flags from the select rather than global settings.
7196   SDNodeFlags Flags;
7197   Flags.setNoInfs(true);
7198   Flags.setNoNaNs(true);
7199
7200   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7201
7202   EVT ResVT = Op.getValueType();
7203   EVT CmpVT = Op.getOperand(0).getValueType();
7204   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7205   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
7206   SDLoc dl(Op);
7207
7208   // If the RHS of the comparison is a 0.0, we don't need to do the
7209   // subtraction at all.
7210   SDValue Sel1;
7211   if (isFloatingPointZero(RHS))
7212     switch (CC) {
7213     default: break;       // SETUO etc aren't handled by fsel.
7214     case ISD::SETNE:
7215       std::swap(TV, FV);
7216       LLVM_FALLTHROUGH;
7217     case ISD::SETEQ:
7218       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
7219         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7220       Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7221       if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
7222         Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7223       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7224                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
7225     case ISD::SETULT:
7226     case ISD::SETLT:
7227       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
7228       LLVM_FALLTHROUGH;
7229     case ISD::SETOGE:
7230     case ISD::SETGE:
7231       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
7232         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7233       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7234     case ISD::SETUGT:
7235     case ISD::SETGT:
7236       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
7237       LLVM_FALLTHROUGH;
7238     case ISD::SETOLE:
7239     case ISD::SETLE:
7240       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
7241         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7242       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7243                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
7244     }
7245
7246   SDValue Cmp;
7247   switch (CC) {
7248   default: break;       // SETUO etc aren't handled by fsel.
7249   case ISD::SETNE:
7250     std::swap(TV, FV);
7251     LLVM_FALLTHROUGH;
7252   case ISD::SETEQ:
7253     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7254     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7255       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7256     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7257     if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
7258       Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7259     return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7260                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
7261   case ISD::SETULT:
7262   case ISD::SETLT:
7263     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7264     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7265       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7266     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7267   case ISD::SETOGE:
7268   case ISD::SETGE:
7269     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7270     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7271       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7272     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7273   case ISD::SETUGT:
7274   case ISD::SETGT:
7275     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
7276     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7277       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7278     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7279   case ISD::SETOLE:
7280   case ISD::SETLE:
7281     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
7282     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7283       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7284     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7285   }
7286   return Op;
7287 }
7288
7289 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
7290                                                SelectionDAG &DAG,
7291                                                const SDLoc &dl) const {
7292   assert(Op.getOperand(0).getValueType().isFloatingPoint());
7293   SDValue Src = Op.getOperand(0);
7294   if (Src.getValueType() == MVT::f32)
7295     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
7296
7297   SDValue Tmp;
7298   switch (Op.getSimpleValueType().SimpleTy) {
7299   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
7300   case MVT::i32:
7301     Tmp = DAG.getNode(
7302         Op.getOpcode() == ISD::FP_TO_SINT
7303             ? PPCISD::FCTIWZ
7304             : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
7305         dl, MVT::f64, Src);
7306     break;
7307   case MVT::i64:
7308     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
7309            "i64 FP_TO_UINT is supported only with FPCVT");
7310     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
7311                                                         PPCISD::FCTIDUZ,
7312                       dl, MVT::f64, Src);
7313     break;
7314   }
7315
7316   // Convert the FP value to an int value through memory.
7317   bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
7318     (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
7319   SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
7320   int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
7321   MachinePointerInfo MPI =
7322       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
7323
7324   // Emit a store to the stack slot.
7325   SDValue Chain;
7326   if (i32Stack) {
7327     MachineFunction &MF = DAG.getMachineFunction();
7328     MachineMemOperand *MMO =
7329       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
7330     SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
7331     Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
7332               DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
7333   } else
7334     Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI);
7335
7336   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
7337   // add in a bias on big endian.
7338   if (Op.getValueType() == MVT::i32 && !i32Stack) {
7339     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
7340                         DAG.getConstant(4, dl, FIPtr.getValueType()));
7341     MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
7342   }
7343
7344   RLI.Chain = Chain;
7345   RLI.Ptr = FIPtr;
7346   RLI.MPI = MPI;
7347 }
7348
7349 /// Custom lowers floating point to integer conversions to use
7350 /// the direct move instructions available in ISA 2.07 to avoid the
7351 /// need for load/store combinations.
7352 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
7353                                                     SelectionDAG &DAG,
7354                                                     const SDLoc &dl) const {
7355   assert(Op.getOperand(0).getValueType().isFloatingPoint());
7356   SDValue Src = Op.getOperand(0);
7357
7358   if (Src.getValueType() == MVT::f32)
7359     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
7360
7361   SDValue Tmp;
7362   switch (Op.getSimpleValueType().SimpleTy) {
7363   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
7364   case MVT::i32:
7365     Tmp = DAG.getNode(
7366         Op.getOpcode() == ISD::FP_TO_SINT
7367             ? PPCISD::FCTIWZ
7368             : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
7369         dl, MVT::f64, Src);
7370     Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
7371     break;
7372   case MVT::i64:
7373     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
7374            "i64 FP_TO_UINT is supported only with FPCVT");
7375     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
7376                                                         PPCISD::FCTIDUZ,
7377                       dl, MVT::f64, Src);
7378     Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
7379     break;
7380   }
7381   return Tmp;
7382 }
7383
7384 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
7385                                           const SDLoc &dl) const {
7386
7387   // FP to INT conversions are legal for f128.
7388   if (EnableQuadPrecision && (Op->getOperand(0).getValueType() == MVT::f128))
7389     return Op;
7390
7391   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
7392   // PPC (the libcall is not available).
7393   if (Op.getOperand(0).getValueType() == MVT::ppcf128) {
7394     if (Op.getValueType() == MVT::i32) {
7395       if (Op.getOpcode() == ISD::FP_TO_SINT) {
7396         SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
7397                                  MVT::f64, Op.getOperand(0),
7398                                  DAG.getIntPtrConstant(0, dl));
7399         SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
7400                                  MVT::f64, Op.getOperand(0),
7401                                  DAG.getIntPtrConstant(1, dl));
7402
7403         // Add the two halves of the long double in round-to-zero mode.
7404         SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
7405
7406         // Now use a smaller FP_TO_SINT.
7407         return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
7408       }
7409       if (Op.getOpcode() == ISD::FP_TO_UINT) {
7410         const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
7411         APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
7412         SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
7413         //  X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
7414         // FIXME: generated code sucks.
7415         // TODO: Are there fast-math-flags to propagate to this FSUB?
7416         SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128,
7417                                    Op.getOperand(0), Tmp);
7418         True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
7419         True = DAG.getNode(ISD::ADD, dl, MVT::i32, True,
7420                            DAG.getConstant(0x80000000, dl, MVT::i32));
7421         SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
7422                                     Op.getOperand(0));
7423         return DAG.getSelectCC(dl, Op.getOperand(0), Tmp, True, False,
7424                                ISD::SETGE);
7425       }
7426     }
7427
7428     return SDValue();
7429   }
7430
7431   if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
7432     return LowerFP_TO_INTDirectMove(Op, DAG, dl);
7433
7434   ReuseLoadInfo RLI;
7435   LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
7436
7437   return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
7438                      RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
7439 }
7440
7441 // We're trying to insert a regular store, S, and then a load, L. If the
7442 // incoming value, O, is a load, we might just be able to have our load use the
7443 // address used by O. However, we don't know if anything else will store to
7444 // that address before we can load from it. To prevent this situation, we need
7445 // to insert our load, L, into the chain as a peer of O. To do this, we give L
7446 // the same chain operand as O, we create a token factor from the chain results
7447 // of O and L, and we replace all uses of O's chain result with that token
7448 // factor (see spliceIntoChain below for this last part).
7449 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
7450                                             ReuseLoadInfo &RLI,
7451                                             SelectionDAG &DAG,
7452                                             ISD::LoadExtType ET) const {
7453   SDLoc dl(Op);
7454   if (ET == ISD::NON_EXTLOAD &&
7455       (Op.getOpcode() == ISD::FP_TO_UINT ||
7456        Op.getOpcode() == ISD::FP_TO_SINT) &&
7457       isOperationLegalOrCustom(Op.getOpcode(),
7458                                Op.getOperand(0).getValueType())) {
7459
7460     LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
7461     return true;
7462   }
7463
7464   LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
7465   if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
7466       LD->isNonTemporal())
7467     return false;
7468   if (LD->getMemoryVT() != MemVT)
7469     return false;
7470
7471   RLI.Ptr = LD->getBasePtr();
7472   if (LD->isIndexed() && !LD->getOffset().isUndef()) {
7473     assert(LD->getAddressingMode() == ISD::PRE_INC &&
7474            "Non-pre-inc AM on PPC?");
7475     RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
7476                           LD->getOffset());
7477   }
7478
7479   RLI.Chain = LD->getChain();
7480   RLI.MPI = LD->getPointerInfo();
7481   RLI.IsDereferenceable = LD->isDereferenceable();
7482   RLI.IsInvariant = LD->isInvariant();
7483   RLI.Alignment = LD->getAlignment();
7484   RLI.AAInfo = LD->getAAInfo();
7485   RLI.Ranges = LD->getRanges();
7486
7487   RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
7488   return true;
7489 }
7490
7491 // Given the head of the old chain, ResChain, insert a token factor containing
7492 // it and NewResChain, and make users of ResChain now be users of that token
7493 // factor.
7494 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
7495 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
7496                                         SDValue NewResChain,
7497                                         SelectionDAG &DAG) const {
7498   if (!ResChain)
7499     return;
7500
7501   SDLoc dl(NewResChain);
7502
7503   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
7504                            NewResChain, DAG.getUNDEF(MVT::Other));
7505   assert(TF.getNode() != NewResChain.getNode() &&
7506          "A new TF really is required here");
7507
7508   DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
7509   DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
7510 }
7511
7512 /// Analyze profitability of direct move
7513 /// prefer float load to int load plus direct move
7514 /// when there is no integer use of int load
7515 bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
7516   SDNode *Origin = Op.getOperand(0).getNode();
7517   if (Origin->getOpcode() != ISD::LOAD)
7518     return true;
7519
7520   // If there is no LXSIBZX/LXSIHZX, like Power8,
7521   // prefer direct move if the memory size is 1 or 2 bytes.
7522   MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
7523   if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
7524     return true;
7525
7526   for (SDNode::use_iterator UI = Origin->use_begin(),
7527                             UE = Origin->use_end();
7528        UI != UE; ++UI) {
7529
7530     // Only look at the users of the loaded value.
7531     if (UI.getUse().get().getResNo() != 0)
7532       continue;
7533
7534     if (UI->getOpcode() != ISD::SINT_TO_FP &&
7535         UI->getOpcode() != ISD::UINT_TO_FP)
7536       return true;
7537   }
7538
7539   return false;
7540 }
7541
7542 /// Custom lowers integer to floating point conversions to use
7543 /// the direct move instructions available in ISA 2.07 to avoid the
7544 /// need for load/store combinations.
7545 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
7546                                                     SelectionDAG &DAG,
7547                                                     const SDLoc &dl) const {
7548   assert((Op.getValueType() == MVT::f32 ||
7549           Op.getValueType() == MVT::f64) &&
7550          "Invalid floating point type as target of conversion");
7551   assert(Subtarget.hasFPCVT() &&
7552          "Int to FP conversions with direct moves require FPCVT");
7553   SDValue FP;
7554   SDValue Src = Op.getOperand(0);
7555   bool SinglePrec = Op.getValueType() == MVT::f32;
7556   bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
7557   bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
7558   unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) :
7559                              (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
7560
7561   if (WordInt) {
7562     FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ,
7563                      dl, MVT::f64, Src);
7564     FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
7565   }
7566   else {
7567     FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
7568     FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
7569   }
7570
7571   return FP;
7572 }
7573
7574 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
7575
7576   EVT VecVT = Vec.getValueType();
7577   assert(VecVT.isVector() && "Expected a vector type.");
7578   assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
7579
7580   EVT EltVT = VecVT.getVectorElementType();
7581   unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7582   EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7583
7584   unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
7585   SmallVector<SDValue, 16> Ops(NumConcat);
7586   Ops[0] = Vec;
7587   SDValue UndefVec = DAG.getUNDEF(VecVT);
7588   for (unsigned i = 1; i < NumConcat; ++i)
7589     Ops[i] = UndefVec;
7590
7591   return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
7592 }
7593
7594 SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
7595                                                 const SDLoc &dl) const {
7596
7597   unsigned Opc = Op.getOpcode();
7598   assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP) &&
7599          "Unexpected conversion type");
7600   assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
7601          "Supports conversions to v2f64/v4f32 only.");
7602
7603   bool SignedConv = Opc == ISD::SINT_TO_FP;
7604   bool FourEltRes = Op.getValueType() == MVT::v4f32;
7605
7606   SDValue Wide = widenVec(DAG, Op.getOperand(0), dl);
7607   EVT WideVT = Wide.getValueType();
7608   unsigned WideNumElts = WideVT.getVectorNumElements();
7609   MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
7610
7611   SmallVector<int, 16> ShuffV;
7612   for (unsigned i = 0; i < WideNumElts; ++i)
7613     ShuffV.push_back(i + WideNumElts);
7614
7615   int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
7616   int SaveElts = FourEltRes ? 4 : 2;
7617   if (Subtarget.isLittleEndian())
7618     for (int i = 0; i < SaveElts; i++)
7619       ShuffV[i * Stride] = i;
7620   else
7621     for (int i = 1; i <= SaveElts; i++)
7622       ShuffV[i * Stride - 1] = i - 1;
7623
7624   SDValue ShuffleSrc2 =
7625       SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
7626   SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
7627   unsigned ExtendOp =
7628       SignedConv ? (unsigned)PPCISD::SExtVElems : (unsigned)ISD::BITCAST;
7629
7630   SDValue Extend;
7631   if (!Subtarget.hasP9Altivec() && SignedConv) {
7632     Arrange = DAG.getBitcast(IntermediateVT, Arrange);
7633     Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
7634                          DAG.getValueType(Op.getOperand(0).getValueType()));
7635   } else
7636     Extend = DAG.getNode(ExtendOp, dl, IntermediateVT, Arrange);
7637
7638   return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
7639 }
7640
7641 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
7642                                           SelectionDAG &DAG) const {
7643   SDLoc dl(Op);
7644
7645   EVT InVT = Op.getOperand(0).getValueType();
7646   EVT OutVT = Op.getValueType();
7647   if (OutVT.isVector() && OutVT.isFloatingPoint() &&
7648       isOperationCustom(Op.getOpcode(), InVT))
7649     return LowerINT_TO_FPVector(Op, DAG, dl);
7650
7651   // Conversions to f128 are legal.
7652   if (EnableQuadPrecision && (Op.getValueType() == MVT::f128))
7653     return Op;
7654
7655   if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
7656     if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
7657       return SDValue();
7658
7659     SDValue Value = Op.getOperand(0);
7660     // The values are now known to be -1 (false) or 1 (true). To convert this
7661     // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
7662     // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
7663     Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
7664
7665     SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
7666
7667     Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
7668
7669     if (Op.getValueType() != MVT::v4f64)
7670       Value = DAG.getNode(ISD::FP_ROUND, dl,
7671                           Op.getValueType(), Value,
7672                           DAG.getIntPtrConstant(1, dl));
7673     return Value;
7674   }
7675
7676   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
7677   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
7678     return SDValue();
7679
7680   if (Op.getOperand(0).getValueType() == MVT::i1)
7681     return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
7682                        DAG.getConstantFP(1.0, dl, Op.getValueType()),
7683                        DAG.getConstantFP(0.0, dl, Op.getValueType()));
7684
7685   // If we have direct moves, we can do all the conversion, skip the store/load
7686   // however, without FPCVT we can't do most conversions.
7687   if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
7688       Subtarget.isPPC64() && Subtarget.hasFPCVT())
7689     return LowerINT_TO_FPDirectMove(Op, DAG, dl);
7690
7691   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
7692          "UINT_TO_FP is supported only with FPCVT");
7693
7694   // If we have FCFIDS, then use it when converting to single-precision.
7695   // Otherwise, convert to double-precision and then round.
7696   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
7697                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
7698                                                             : PPCISD::FCFIDS)
7699                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
7700                                                             : PPCISD::FCFID);
7701   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
7702                   ? MVT::f32
7703                   : MVT::f64;
7704
7705   if (Op.getOperand(0).getValueType() == MVT::i64) {
7706     SDValue SINT = Op.getOperand(0);
7707     // When converting to single-precision, we actually need to convert
7708     // to double-precision first and then round to single-precision.
7709     // To avoid double-rounding effects during that operation, we have
7710     // to prepare the input operand.  Bits that might be truncated when
7711     // converting to double-precision are replaced by a bit that won't
7712     // be lost at this stage, but is below the single-precision rounding
7713     // position.
7714     //
7715     // However, if -enable-unsafe-fp-math is in effect, accept double
7716     // rounding to avoid the extra overhead.
7717     if (Op.getValueType() == MVT::f32 &&
7718         !Subtarget.hasFPCVT() &&
7719         !DAG.getTarget().Options.UnsafeFPMath) {
7720
7721       // Twiddle input to make sure the low 11 bits are zero.  (If this
7722       // is the case, we are guaranteed the value will fit into the 53 bit
7723       // mantissa of an IEEE double-precision value without rounding.)
7724       // If any of those low 11 bits were not zero originally, make sure
7725       // bit 12 (value 2048) is set instead, so that the final rounding
7726       // to single-precision gets the correct result.
7727       SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
7728                                   SINT, DAG.getConstant(2047, dl, MVT::i64));
7729       Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
7730                           Round, DAG.getConstant(2047, dl, MVT::i64));
7731       Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
7732       Round = DAG.getNode(ISD::AND, dl, MVT::i64,
7733                           Round, DAG.getConstant(-2048, dl, MVT::i64));
7734
7735       // However, we cannot use that value unconditionally: if the magnitude
7736       // of the input value is small, the bit-twiddling we did above might
7737       // end up visibly changing the output.  Fortunately, in that case, we
7738       // don't need to twiddle bits since the original input will convert
7739       // exactly to double-precision floating-point already.  Therefore,
7740       // construct a conditional to use the original value if the top 11
7741       // bits are all sign-bit copies, and use the rounded value computed
7742       // above otherwise.
7743       SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
7744                                  SINT, DAG.getConstant(53, dl, MVT::i32));
7745       Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
7746                          Cond, DAG.getConstant(1, dl, MVT::i64));
7747       Cond = DAG.getSetCC(dl, MVT::i32,
7748                           Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
7749
7750       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
7751     }
7752
7753     ReuseLoadInfo RLI;
7754     SDValue Bits;
7755
7756     MachineFunction &MF = DAG.getMachineFunction();
7757     if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
7758       Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
7759                          RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
7760       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7761     } else if (Subtarget.hasLFIWAX() &&
7762                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
7763       MachineMemOperand *MMO =
7764         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7765                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7766       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7767       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
7768                                      DAG.getVTList(MVT::f64, MVT::Other),
7769                                      Ops, MVT::i32, MMO);
7770       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7771     } else if (Subtarget.hasFPCVT() &&
7772                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
7773       MachineMemOperand *MMO =
7774         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7775                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7776       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7777       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
7778                                      DAG.getVTList(MVT::f64, MVT::Other),
7779                                      Ops, MVT::i32, MMO);
7780       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7781     } else if (((Subtarget.hasLFIWAX() &&
7782                  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
7783                 (Subtarget.hasFPCVT() &&
7784                  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
7785                SINT.getOperand(0).getValueType() == MVT::i32) {
7786       MachineFrameInfo &MFI = MF.getFrameInfo();
7787       EVT PtrVT = getPointerTy(DAG.getDataLayout());
7788
7789       int FrameIdx = MFI.CreateStackObject(4, 4, false);
7790       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7791
7792       SDValue Store =
7793           DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
7794                        MachinePointerInfo::getFixedStack(
7795                            DAG.getMachineFunction(), FrameIdx));
7796
7797       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
7798              "Expected an i32 store");
7799
7800       RLI.Ptr = FIdx;
7801       RLI.Chain = Store;
7802       RLI.MPI =
7803           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
7804       RLI.Alignment = 4;
7805
7806       MachineMemOperand *MMO =
7807         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7808                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7809       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7810       Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
7811                                      PPCISD::LFIWZX : PPCISD::LFIWAX,
7812                                      dl, DAG.getVTList(MVT::f64, MVT::Other),
7813                                      Ops, MVT::i32, MMO);
7814     } else
7815       Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
7816
7817     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
7818
7819     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
7820       FP = DAG.getNode(ISD::FP_ROUND, dl,
7821                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
7822     return FP;
7823   }
7824
7825   assert(Op.getOperand(0).getValueType() == MVT::i32 &&
7826          "Unhandled INT_TO_FP type in custom expander!");
7827   // Since we only generate this in 64-bit mode, we can take advantage of
7828   // 64-bit registers.  In particular, sign extend the input value into the
7829   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
7830   // then lfd it and fcfid it.
7831   MachineFunction &MF = DAG.getMachineFunction();
7832   MachineFrameInfo &MFI = MF.getFrameInfo();
7833   EVT PtrVT = getPointerTy(MF.getDataLayout());
7834
7835   SDValue Ld;
7836   if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
7837     ReuseLoadInfo RLI;
7838     bool ReusingLoad;
7839     if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
7840                                             DAG))) {
7841       int FrameIdx = MFI.CreateStackObject(4, 4, false);
7842       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7843
7844       SDValue Store =
7845           DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
7846                        MachinePointerInfo::getFixedStack(
7847                            DAG.getMachineFunction(), FrameIdx));
7848
7849       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
7850              "Expected an i32 store");
7851
7852       RLI.Ptr = FIdx;
7853       RLI.Chain = Store;
7854       RLI.MPI =
7855           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
7856       RLI.Alignment = 4;
7857     }
7858
7859     MachineMemOperand *MMO =
7860       MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7861                               RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7862     SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7863     Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
7864                                    PPCISD::LFIWZX : PPCISD::LFIWAX,
7865                                  dl, DAG.getVTList(MVT::f64, MVT::Other),
7866                                  Ops, MVT::i32, MMO);
7867     if (ReusingLoad)
7868       spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
7869   } else {
7870     assert(Subtarget.isPPC64() &&
7871            "i32->FP without LFIWAX supported only on PPC64");
7872
7873     int FrameIdx = MFI.CreateStackObject(8, 8, false);
7874     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7875
7876     SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
7877                                 Op.getOperand(0));
7878
7879     // STD the extended value into the stack slot.
7880     SDValue Store = DAG.getStore(
7881         DAG.getEntryNode(), dl, Ext64, FIdx,
7882         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
7883
7884     // Load the value as a double.
7885     Ld = DAG.getLoad(
7886         MVT::f64, dl, Store, FIdx,
7887         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
7888   }
7889
7890   // FCFID it and return it.
7891   SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
7892   if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
7893     FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
7894                      DAG.getIntPtrConstant(0, dl));
7895   return FP;
7896 }
7897
7898 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
7899                                             SelectionDAG &DAG) const {
7900   SDLoc dl(Op);
7901   /*
7902    The rounding mode is in bits 30:31 of FPSR, and has the following
7903    settings:
7904      00 Round to nearest
7905      01 Round to 0
7906      10 Round to +inf
7907      11 Round to -inf
7908
7909   FLT_ROUNDS, on the other hand, expects the following:
7910     -1 Undefined
7911      0 Round to 0
7912      1 Round to nearest
7913      2 Round to +inf
7914      3 Round to -inf
7915
7916   To perform the conversion, we do:
7917     ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
7918   */
7919
7920   MachineFunction &MF = DAG.getMachineFunction();
7921   EVT VT = Op.getValueType();
7922   EVT PtrVT = getPointerTy(MF.getDataLayout());
7923
7924   // Save FP Control Word to register
7925   EVT NodeTys[] = {
7926     MVT::f64,    // return register
7927     MVT::Glue    // unused in this context
7928   };
7929   SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
7930
7931   // Save FP register to stack slot
7932   int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false);
7933   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
7934   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot,
7935                                MachinePointerInfo());
7936
7937   // Load FP Control Word from low 32 bits of stack slot.
7938   SDValue Four = DAG.getConstant(4, dl, PtrVT);
7939   SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
7940   SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo());
7941
7942   // Transform as necessary
7943   SDValue CWD1 =
7944     DAG.getNode(ISD::AND, dl, MVT::i32,
7945                 CWD, DAG.getConstant(3, dl, MVT::i32));
7946   SDValue CWD2 =
7947     DAG.getNode(ISD::SRL, dl, MVT::i32,
7948                 DAG.getNode(ISD::AND, dl, MVT::i32,
7949                             DAG.getNode(ISD::XOR, dl, MVT::i32,
7950                                         CWD, DAG.getConstant(3, dl, MVT::i32)),
7951                             DAG.getConstant(3, dl, MVT::i32)),
7952                 DAG.getConstant(1, dl, MVT::i32));
7953
7954   SDValue RetVal =
7955     DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
7956
7957   return DAG.getNode((VT.getSizeInBits() < 16 ?
7958                       ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
7959 }
7960
7961 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
7962   EVT VT = Op.getValueType();
7963   unsigned BitWidth = VT.getSizeInBits();
7964   SDLoc dl(Op);
7965   assert(Op.getNumOperands() == 3 &&
7966          VT == Op.getOperand(1).getValueType() &&
7967          "Unexpected SHL!");
7968
7969   // Expand into a bunch of logical ops.  Note that these ops
7970   // depend on the PPC behavior for oversized shift amounts.
7971   SDValue Lo = Op.getOperand(0);
7972   SDValue Hi = Op.getOperand(1);
7973   SDValue Amt = Op.getOperand(2);
7974   EVT AmtVT = Amt.getValueType();
7975
7976   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7977                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7978   SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
7979   SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
7980   SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
7981   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7982                              DAG.getConstant(-BitWidth, dl, AmtVT));
7983   SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
7984   SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
7985   SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
7986   SDValue OutOps[] = { OutLo, OutHi };
7987   return DAG.getMergeValues(OutOps, dl);
7988 }
7989
7990 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
7991   EVT VT = Op.getValueType();
7992   SDLoc dl(Op);
7993   unsigned BitWidth = VT.getSizeInBits();
7994   assert(Op.getNumOperands() == 3 &&
7995          VT == Op.getOperand(1).getValueType() &&
7996          "Unexpected SRL!");
7997
7998   // Expand into a bunch of logical ops.  Note that these ops
7999   // depend on the PPC behavior for oversized shift amounts.
8000   SDValue Lo = Op.getOperand(0);
8001   SDValue Hi = Op.getOperand(1);
8002   SDValue Amt = Op.getOperand(2);
8003   EVT AmtVT = Amt.getValueType();
8004
8005   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8006                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8007   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8008   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8009   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8010   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8011                              DAG.getConstant(-BitWidth, dl, AmtVT));
8012   SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
8013   SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8014   SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
8015   SDValue OutOps[] = { OutLo, OutHi };
8016   return DAG.getMergeValues(OutOps, dl);
8017 }
8018
8019 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
8020   SDLoc dl(Op);
8021   EVT VT = Op.getValueType();
8022   unsigned BitWidth = VT.getSizeInBits();
8023   assert(Op.getNumOperands() == 3 &&
8024          VT == Op.getOperand(1).getValueType() &&
8025          "Unexpected SRA!");
8026
8027   // Expand into a bunch of logical ops, followed by a select_cc.
8028   SDValue Lo = Op.getOperand(0);
8029   SDValue Hi = Op.getOperand(1);
8030   SDValue Amt = Op.getOperand(2);
8031   EVT AmtVT = Amt.getValueType();
8032
8033   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8034                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8035   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8036   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8037   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8038   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8039                              DAG.getConstant(-BitWidth, dl, AmtVT));
8040   SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
8041   SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
8042   SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
8043                                   Tmp4, Tmp6, ISD::SETLE);
8044   SDValue OutOps[] = { OutLo, OutHi };
8045   return DAG.getMergeValues(OutOps, dl);
8046 }
8047
8048 //===----------------------------------------------------------------------===//
8049 // Vector related lowering.
8050 //
8051
8052 /// BuildSplatI - Build a canonical splati of Val with an element size of
8053 /// SplatSize.  Cast the result to VT.
8054 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
8055                            SelectionDAG &DAG, const SDLoc &dl) {
8056   assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
8057
8058   static const MVT VTys[] = { // canonical VT to use for each size.
8059     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
8060   };
8061
8062   EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
8063
8064   // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
8065   if (Val == -1)
8066     SplatSize = 1;
8067
8068   EVT CanonicalVT = VTys[SplatSize-1];
8069
8070   // Build a canonical splat for this value.
8071   return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
8072 }
8073
8074 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
8075 /// specified intrinsic ID.
8076 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
8077                                 const SDLoc &dl, EVT DestVT = MVT::Other) {
8078   if (DestVT == MVT::Other) DestVT = Op.getValueType();
8079   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8080                      DAG.getConstant(IID, dl, MVT::i32), Op);
8081 }
8082
8083 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
8084 /// specified intrinsic ID.
8085 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
8086                                 SelectionDAG &DAG, const SDLoc &dl,
8087                                 EVT DestVT = MVT::Other) {
8088   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
8089   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8090                      DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
8091 }
8092
8093 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
8094 /// specified intrinsic ID.
8095 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
8096                                 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
8097                                 EVT DestVT = MVT::Other) {
8098   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
8099   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8100                      DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
8101 }
8102
8103 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
8104 /// amount.  The result has the specified value type.
8105 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
8106                            SelectionDAG &DAG, const SDLoc &dl) {
8107   // Force LHS/RHS to be the right type.
8108   LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
8109   RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
8110
8111   int Ops[16];
8112   for (unsigned i = 0; i != 16; ++i)
8113     Ops[i] = i + Amt;
8114   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
8115   return DAG.getNode(ISD::BITCAST, dl, VT, T);
8116 }
8117
8118 /// Do we have an efficient pattern in a .td file for this node?
8119 ///
8120 /// \param V - pointer to the BuildVectorSDNode being matched
8121 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
8122 ///
8123 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
8124 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
8125 /// the opposite is true (expansion is beneficial) are:
8126 /// - The node builds a vector out of integers that are not 32 or 64-bits
8127 /// - The node builds a vector out of constants
8128 /// - The node is a "load-and-splat"
8129 /// In all other cases, we will choose to keep the BUILD_VECTOR.
8130 static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
8131                                             bool HasDirectMove,
8132                                             bool HasP8Vector) {
8133   EVT VecVT = V->getValueType(0);
8134   bool RightType = VecVT == MVT::v2f64 ||
8135     (HasP8Vector && VecVT == MVT::v4f32) ||
8136     (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
8137   if (!RightType)
8138     return false;
8139
8140   bool IsSplat = true;
8141   bool IsLoad = false;
8142   SDValue Op0 = V->getOperand(0);
8143
8144   // This function is called in a block that confirms the node is not a constant
8145   // splat. So a constant BUILD_VECTOR here means the vector is built out of
8146   // different constants.
8147   if (V->isConstant())
8148     return false;
8149   for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
8150     if (V->getOperand(i).isUndef())
8151       return false;
8152     // We want to expand nodes that represent load-and-splat even if the
8153     // loaded value is a floating point truncation or conversion to int.
8154     if (V->getOperand(i).getOpcode() == ISD::LOAD ||
8155         (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
8156          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
8157         (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
8158          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
8159         (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
8160          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
8161       IsLoad = true;
8162     // If the operands are different or the input is not a load and has more
8163     // uses than just this BV node, then it isn't a splat.
8164     if (V->getOperand(i) != Op0 ||
8165         (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
8166       IsSplat = false;
8167   }
8168   return !(IsSplat && IsLoad);
8169 }
8170
8171 // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
8172 SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
8173
8174   SDLoc dl(Op);
8175   SDValue Op0 = Op->getOperand(0);
8176
8177   if (!EnableQuadPrecision ||
8178       (Op.getValueType() != MVT::f128 ) ||
8179       (Op0.getOpcode() != ISD::BUILD_PAIR) ||
8180       (Op0.getOperand(0).getValueType() !=  MVT::i64) ||
8181       (Op0.getOperand(1).getValueType() != MVT::i64))
8182     return SDValue();
8183
8184   return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
8185                      Op0.getOperand(1));
8186 }
8187
8188 // If this is a case we can't handle, return null and let the default
8189 // expansion code take care of it.  If we CAN select this case, and if it
8190 // selects to a single instruction, return Op.  Otherwise, if we can codegen
8191 // this case more efficiently than a constant pool load, lower it to the
8192 // sequence of ops that should be used.
8193 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
8194                                              SelectionDAG &DAG) const {
8195   SDLoc dl(Op);
8196   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
8197   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
8198
8199   if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
8200     // We first build an i32 vector, load it into a QPX register,
8201     // then convert it to a floating-point vector and compare it
8202     // to a zero vector to get the boolean result.
8203     MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
8204     int FrameIdx = MFI.CreateStackObject(16, 16, false);
8205     MachinePointerInfo PtrInfo =
8206         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8207     EVT PtrVT = getPointerTy(DAG.getDataLayout());
8208     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8209
8210     assert(BVN->getNumOperands() == 4 &&
8211       "BUILD_VECTOR for v4i1 does not have 4 operands");
8212
8213     bool IsConst = true;
8214     for (unsigned i = 0; i < 4; ++i) {
8215       if (BVN->getOperand(i).isUndef()) continue;
8216       if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
8217         IsConst = false;
8218         break;
8219       }
8220     }
8221
8222     if (IsConst) {
8223       Constant *One =
8224         ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0);
8225       Constant *NegOne =
8226         ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0);
8227
8228       Constant *CV[4];
8229       for (unsigned i = 0; i < 4; ++i) {
8230         if (BVN->getOperand(i).isUndef())
8231           CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
8232         else if (isNullConstant(BVN->getOperand(i)))
8233           CV[i] = NegOne;
8234         else
8235           CV[i] = One;
8236       }
8237
8238       Constant *CP = ConstantVector::get(CV);
8239       SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()),
8240                                           16 /* alignment */);
8241
8242       SDValue Ops[] = {DAG.getEntryNode(), CPIdx};
8243       SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other});
8244       return DAG.getMemIntrinsicNode(
8245           PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32,
8246           MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
8247     }
8248
8249     SmallVector<SDValue, 4> Stores;
8250     for (unsigned i = 0; i < 4; ++i) {
8251       if (BVN->getOperand(i).isUndef()) continue;
8252
8253       unsigned Offset = 4*i;
8254       SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
8255       Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
8256
8257       unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
8258       if (StoreSize > 4) {
8259         Stores.push_back(
8260             DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx,
8261                               PtrInfo.getWithOffset(Offset), MVT::i32));
8262       } else {
8263         SDValue StoreValue = BVN->getOperand(i);
8264         if (StoreSize < 4)
8265           StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
8266
8267         Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx,
8268                                       PtrInfo.getWithOffset(Offset)));
8269       }
8270     }
8271
8272     SDValue StoreChain;
8273     if (!Stores.empty())
8274       StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
8275     else
8276       StoreChain = DAG.getEntryNode();
8277
8278     // Now load from v4i32 into the QPX register; this will extend it to
8279     // v4i64 but not yet convert it to a floating point. Nevertheless, this
8280     // is typed as v4f64 because the QPX register integer states are not
8281     // explicitly represented.
8282
8283     SDValue Ops[] = {StoreChain,
8284                      DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32),
8285                      FIdx};
8286     SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other});
8287
8288     SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,
8289       dl, VTs, Ops, MVT::v4i32, PtrInfo);
8290     LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
8291       DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32),
8292       LoadedVect);
8293
8294     SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64);
8295
8296     return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
8297   }
8298
8299   // All other QPX vectors are handled by generic code.
8300   if (Subtarget.hasQPX())
8301     return SDValue();
8302
8303   // Check if this is a splat of a constant value.
8304   APInt APSplatBits, APSplatUndef;
8305   unsigned SplatBitSize;
8306   bool HasAnyUndefs;
8307   if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
8308                              HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
8309       SplatBitSize > 32) {
8310     // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
8311     // lowered to VSX instructions under certain conditions.
8312     // Without VSX, there is no pattern more efficient than expanding the node.
8313     if (Subtarget.hasVSX() &&
8314         haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
8315                                         Subtarget.hasP8Vector()))
8316       return Op;
8317     return SDValue();
8318   }
8319
8320   unsigned SplatBits = APSplatBits.getZExtValue();
8321   unsigned SplatUndef = APSplatUndef.getZExtValue();
8322   unsigned SplatSize = SplatBitSize / 8;
8323
8324   // First, handle single instruction cases.
8325
8326   // All zeros?
8327   if (SplatBits == 0) {
8328     // Canonicalize all zero vectors to be v4i32.
8329     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
8330       SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
8331       Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
8332     }
8333     return Op;
8334   }
8335
8336   // We have XXSPLTIB for constant splats one byte wide
8337   if (Subtarget.hasP9Vector() && SplatSize == 1) {
8338     // This is a splat of 1-byte elements with some elements potentially undef.
8339     // Rather than trying to match undef in the SDAG patterns, ensure that all
8340     // elements are the same constant.
8341     if (HasAnyUndefs || ISD::isBuildVectorAllOnes(BVN)) {
8342       SmallVector<SDValue, 16> Ops(16, DAG.getConstant(SplatBits,
8343                                                        dl, MVT::i32));
8344       SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
8345       if (Op.getValueType() != MVT::v16i8)
8346         return DAG.getBitcast(Op.getValueType(), NewBV);
8347       return NewBV;
8348     }
8349
8350     // BuildVectorSDNode::isConstantSplat() is actually pretty smart. It'll
8351     // detect that constant splats like v8i16: 0xABAB are really just splats
8352     // of a 1-byte constant. In this case, we need to convert the node to a
8353     // splat of v16i8 and a bitcast.
8354     if (Op.getValueType() != MVT::v16i8)
8355       return DAG.getBitcast(Op.getValueType(),
8356                             DAG.getConstant(SplatBits, dl, MVT::v16i8));
8357
8358     return Op;
8359   }
8360
8361   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
8362   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
8363                     (32-SplatBitSize));
8364   if (SextVal >= -16 && SextVal <= 15)
8365     return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
8366
8367   // Two instruction sequences.
8368
8369   // If this value is in the range [-32,30] and is even, use:
8370   //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
8371   // If this value is in the range [17,31] and is odd, use:
8372   //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
8373   // If this value is in the range [-31,-17] and is odd, use:
8374   //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
8375   // Note the last two are three-instruction sequences.
8376   if (SextVal >= -32 && SextVal <= 31) {
8377     // To avoid having these optimizations undone by constant folding,
8378     // we convert to a pseudo that will be expanded later into one of
8379     // the above forms.
8380     SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
8381     EVT VT = (SplatSize == 1 ? MVT::v16i8 :
8382               (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
8383     SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
8384     SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
8385     if (VT == Op.getValueType())
8386       return RetVal;
8387     else
8388       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
8389   }
8390
8391   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
8392   // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
8393   // for fneg/fabs.
8394   if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
8395     // Make -1 and vspltisw -1:
8396     SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
8397
8398     // Make the VSLW intrinsic, computing 0x8000_0000.
8399     SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
8400                                    OnesV, DAG, dl);
8401
8402     // xor by OnesV to invert it.
8403     Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
8404     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8405   }
8406
8407   // Check to see if this is a wide variety of vsplti*, binop self cases.
8408   static const signed char SplatCsts[] = {
8409     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
8410     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
8411   };
8412
8413   for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
8414     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
8415     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
8416     int i = SplatCsts[idx];
8417
8418     // Figure out what shift amount will be used by altivec if shifted by i in
8419     // this splat size.
8420     unsigned TypeShiftAmt = i & (SplatBitSize-1);
8421
8422     // vsplti + shl self.
8423     if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
8424       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
8425       static const unsigned IIDs[] = { // Intrinsic to use for each size.
8426         Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
8427         Intrinsic::ppc_altivec_vslw
8428       };
8429       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8430       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8431     }
8432
8433     // vsplti + srl self.
8434     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
8435       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
8436       static const unsigned IIDs[] = { // Intrinsic to use for each size.
8437         Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
8438         Intrinsic::ppc_altivec_vsrw
8439       };
8440       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8441       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8442     }
8443
8444     // vsplti + sra self.
8445     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
8446       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
8447       static const unsigned IIDs[] = { // Intrinsic to use for each size.
8448         Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
8449         Intrinsic::ppc_altivec_vsraw
8450       };
8451       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8452       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8453     }
8454
8455     // vsplti + rol self.
8456     if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
8457                          ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
8458       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
8459       static const unsigned IIDs[] = { // Intrinsic to use for each size.
8460         Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
8461         Intrinsic::ppc_altivec_vrlw
8462       };
8463       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8464       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8465     }
8466
8467     // t = vsplti c, result = vsldoi t, t, 1
8468     if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
8469       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
8470       unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
8471       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8472     }
8473     // t = vsplti c, result = vsldoi t, t, 2
8474     if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
8475       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
8476       unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
8477       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8478     }
8479     // t = vsplti c, result = vsldoi t, t, 3
8480     if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
8481       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
8482       unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
8483       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8484     }
8485   }
8486
8487   return SDValue();
8488 }
8489
8490 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8491 /// the specified operations to build the shuffle.
8492 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
8493                                       SDValue RHS, SelectionDAG &DAG,
8494                                       const SDLoc &dl) {
8495   unsigned OpNum = (PFEntry >> 26) & 0x0F;
8496   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8497   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
8498
8499   enum {
8500     OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8501     OP_VMRGHW,
8502     OP_VMRGLW,
8503     OP_VSPLTISW0,
8504     OP_VSPLTISW1,
8505     OP_VSPLTISW2,
8506     OP_VSPLTISW3,
8507     OP_VSLDOI4,
8508     OP_VSLDOI8,
8509     OP_VSLDOI12
8510   };
8511
8512   if (OpNum == OP_COPY) {
8513     if (LHSID == (1*9+2)*9+3) return LHS;
8514     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
8515     return RHS;
8516   }
8517
8518   SDValue OpLHS, OpRHS;
8519   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
8520   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
8521
8522   int ShufIdxs[16];
8523   switch (OpNum) {
8524   default: llvm_unreachable("Unknown i32 permute!");
8525   case OP_VMRGHW:
8526     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
8527     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
8528     ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
8529     ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
8530     break;
8531   case OP_VMRGLW:
8532     ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
8533     ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
8534     ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
8535     ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
8536     break;
8537   case OP_VSPLTISW0:
8538     for (unsigned i = 0; i != 16; ++i)
8539       ShufIdxs[i] = (i&3)+0;
8540     break;
8541   case OP_VSPLTISW1:
8542     for (unsigned i = 0; i != 16; ++i)
8543       ShufIdxs[i] = (i&3)+4;
8544     break;
8545   case OP_VSPLTISW2:
8546     for (unsigned i = 0; i != 16; ++i)
8547       ShufIdxs[i] = (i&3)+8;
8548     break;
8549   case OP_VSPLTISW3:
8550     for (unsigned i = 0; i != 16; ++i)
8551       ShufIdxs[i] = (i&3)+12;
8552     break;
8553   case OP_VSLDOI4:
8554     return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
8555   case OP_VSLDOI8:
8556     return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
8557   case OP_VSLDOI12:
8558     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
8559   }
8560   EVT VT = OpLHS.getValueType();
8561   OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
8562   OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
8563   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
8564   return DAG.getNode(ISD::BITCAST, dl, VT, T);
8565 }
8566
8567 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
8568 /// by the VINSERTB instruction introduced in ISA 3.0, else just return default
8569 /// SDValue.
8570 SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
8571                                            SelectionDAG &DAG) const {
8572   const unsigned BytesInVector = 16;
8573   bool IsLE = Subtarget.isLittleEndian();
8574   SDLoc dl(N);
8575   SDValue V1 = N->getOperand(0);
8576   SDValue V2 = N->getOperand(1);
8577   unsigned ShiftElts = 0, InsertAtByte = 0;
8578   bool Swap = false;
8579
8580   // Shifts required to get the byte we want at element 7.
8581   unsigned LittleEndianShifts[] = {8, 7,  6,  5,  4,  3,  2,  1,
8582                                    0, 15, 14, 13, 12, 11, 10, 9};
8583   unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
8584                                 1, 2,  3,  4,  5,  6,  7,  8};
8585
8586   ArrayRef<int> Mask = N->getMask();
8587   int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
8588
8589   // For each mask element, find out if we're just inserting something
8590   // from V2 into V1 or vice versa.
8591   // Possible permutations inserting an element from V2 into V1:
8592   //   X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
8593   //   0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
8594   //   ...
8595   //   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
8596   // Inserting from V1 into V2 will be similar, except mask range will be
8597   // [16,31].
8598
8599   bool FoundCandidate = false;
8600   // If both vector operands for the shuffle are the same vector, the mask
8601   // will contain only elements from the first one and the second one will be
8602   // undef.
8603   unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
8604   // Go through the mask of half-words to find an element that's being moved
8605   // from one vector to the other.
8606   for (unsigned i = 0; i < BytesInVector; ++i) {
8607     unsigned CurrentElement = Mask[i];
8608     // If 2nd operand is undefined, we should only look for element 7 in the
8609     // Mask.
8610     if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
8611       continue;
8612
8613     bool OtherElementsInOrder = true;
8614     // Examine the other elements in the Mask to see if they're in original
8615     // order.
8616     for (unsigned j = 0; j < BytesInVector; ++j) {
8617       if (j == i)
8618         continue;
8619       // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
8620       // from V2 [16,31] and vice versa.  Unless the 2nd operand is undefined,
8621       // in which we always assume we're always picking from the 1st operand.
8622       int MaskOffset =
8623           (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
8624       if (Mask[j] != OriginalOrder[j] + MaskOffset) {
8625         OtherElementsInOrder = false;
8626         break;
8627       }
8628     }
8629     // If other elements are in original order, we record the number of shifts
8630     // we need to get the element we want into element 7. Also record which byte
8631     // in the vector we should insert into.
8632     if (OtherElementsInOrder) {
8633       // If 2nd operand is undefined, we assume no shifts and no swapping.
8634       if (V2.isUndef()) {
8635         ShiftElts = 0;
8636         Swap = false;
8637       } else {
8638         // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
8639         ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
8640                          : BigEndianShifts[CurrentElement & 0xF];
8641         Swap = CurrentElement < BytesInVector;
8642       }
8643       InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
8644       FoundCandidate = true;
8645       break;
8646     }
8647   }
8648
8649   if (!FoundCandidate)
8650     return SDValue();
8651
8652   // Candidate found, construct the proper SDAG sequence with VINSERTB,
8653   // optionally with VECSHL if shift is required.
8654   if (Swap)
8655     std::swap(V1, V2);
8656   if (V2.isUndef())
8657     V2 = V1;
8658   if (ShiftElts) {
8659     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
8660                               DAG.getConstant(ShiftElts, dl, MVT::i32));
8661     return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
8662                        DAG.getConstant(InsertAtByte, dl, MVT::i32));
8663   }
8664   return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
8665                      DAG.getConstant(InsertAtByte, dl, MVT::i32));
8666 }
8667
8668 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
8669 /// by the VINSERTH instruction introduced in ISA 3.0, else just return default
8670 /// SDValue.
8671 SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
8672                                            SelectionDAG &DAG) const {
8673   const unsigned NumHalfWords = 8;
8674   const unsigned BytesInVector = NumHalfWords * 2;
8675   // Check that the shuffle is on half-words.
8676   if (!isNByteElemShuffleMask(N, 2, 1))
8677     return SDValue();
8678
8679   bool IsLE = Subtarget.isLittleEndian();
8680   SDLoc dl(N);
8681   SDValue V1 = N->getOperand(0);
8682   SDValue V2 = N->getOperand(1);
8683   unsigned ShiftElts = 0, InsertAtByte = 0;
8684   bool Swap = false;
8685
8686   // Shifts required to get the half-word we want at element 3.
8687   unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
8688   unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
8689
8690   uint32_t Mask = 0;
8691   uint32_t OriginalOrderLow = 0x1234567;
8692   uint32_t OriginalOrderHigh = 0x89ABCDEF;
8693   // Now we look at mask elements 0,2,4,6,8,10,12,14.  Pack the mask into a
8694   // 32-bit space, only need 4-bit nibbles per element.
8695   for (unsigned i = 0; i < NumHalfWords; ++i) {
8696     unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
8697     Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
8698   }
8699
8700   // For each mask element, find out if we're just inserting something
8701   // from V2 into V1 or vice versa.  Possible permutations inserting an element
8702   // from V2 into V1:
8703   //   X, 1, 2, 3, 4, 5, 6, 7
8704   //   0, X, 2, 3, 4, 5, 6, 7
8705   //   0, 1, X, 3, 4, 5, 6, 7
8706   //   0, 1, 2, X, 4, 5, 6, 7
8707   //   0, 1, 2, 3, X, 5, 6, 7
8708   //   0, 1, 2, 3, 4, X, 6, 7
8709   //   0, 1, 2, 3, 4, 5, X, 7
8710   //   0, 1, 2, 3, 4, 5, 6, X
8711   // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
8712
8713   bool FoundCandidate = false;
8714   // Go through the mask of half-words to find an element that's being moved
8715   // from one vector to the other.
8716   for (unsigned i = 0; i < NumHalfWords; ++i) {
8717     unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
8718     uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
8719     uint32_t MaskOtherElts = ~(0xF << MaskShift);
8720     uint32_t TargetOrder = 0x0;
8721
8722     // If both vector operands for the shuffle are the same vector, the mask
8723     // will contain only elements from the first one and the second one will be
8724     // undef.
8725     if (V2.isUndef()) {
8726       ShiftElts = 0;
8727       unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
8728       TargetOrder = OriginalOrderLow;
8729       Swap = false;
8730       // Skip if not the correct element or mask of other elements don't equal
8731       // to our expected order.
8732       if (MaskOneElt == VINSERTHSrcElem &&
8733           (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
8734         InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
8735         FoundCandidate = true;
8736         break;
8737       }
8738     } else { // If both operands are defined.
8739       // Target order is [8,15] if the current mask is between [0,7].
8740       TargetOrder =
8741           (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
8742       // Skip if mask of other elements don't equal our expected order.
8743       if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
8744         // We only need the last 3 bits for the number of shifts.
8745         ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
8746                          : BigEndianShifts[MaskOneElt & 0x7];
8747         InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
8748         Swap = MaskOneElt < NumHalfWords;
8749         FoundCandidate = true;
8750         break;
8751       }
8752     }
8753   }
8754
8755   if (!FoundCandidate)
8756     return SDValue();
8757
8758   // Candidate found, construct the proper SDAG sequence with VINSERTH,
8759   // optionally with VECSHL if shift is required.
8760   if (Swap)
8761     std::swap(V1, V2);
8762   if (V2.isUndef())
8763     V2 = V1;
8764   SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
8765   if (ShiftElts) {
8766     // Double ShiftElts because we're left shifting on v16i8 type.
8767     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
8768                               DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
8769     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
8770     SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
8771                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
8772     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
8773   }
8774   SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
8775   SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
8776                             DAG.getConstant(InsertAtByte, dl, MVT::i32));
8777   return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
8778 }
8779
8780 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
8781 /// is a shuffle we can handle in a single instruction, return it.  Otherwise,
8782 /// return the code it can be lowered into.  Worst case, it can always be
8783 /// lowered into a vperm.
8784 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
8785                                                SelectionDAG &DAG) const {
8786   SDLoc dl(Op);
8787   SDValue V1 = Op.getOperand(0);
8788   SDValue V2 = Op.getOperand(1);
8789   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
8790   EVT VT = Op.getValueType();
8791   bool isLittleEndian = Subtarget.isLittleEndian();
8792
8793   unsigned ShiftElts, InsertAtByte;
8794   bool Swap = false;
8795   if (Subtarget.hasP9Vector() &&
8796       PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
8797                            isLittleEndian)) {
8798     if (Swap)
8799       std::swap(V1, V2);
8800     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
8801     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
8802     if (ShiftElts) {
8803       SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
8804                                 DAG.getConstant(ShiftElts, dl, MVT::i32));
8805       SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
8806                                 DAG.getConstant(InsertAtByte, dl, MVT::i32));
8807       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
8808     }
8809     SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
8810                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
8811     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
8812   }
8813
8814   if (Subtarget.hasP9Altivec()) {
8815     SDValue NewISDNode;
8816     if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
8817       return NewISDNode;
8818
8819     if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
8820       return NewISDNode;
8821   }
8822
8823   if (Subtarget.hasVSX() &&
8824       PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
8825     if (Swap)
8826       std::swap(V1, V2);
8827     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
8828     SDValue Conv2 =
8829         DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
8830
8831     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
8832                               DAG.getConstant(ShiftElts, dl, MVT::i32));
8833     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
8834   }
8835
8836   if (Subtarget.hasVSX() &&
8837     PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
8838     if (Swap)
8839       std::swap(V1, V2);
8840     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
8841     SDValue Conv2 =
8842         DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
8843
8844     SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
8845                               DAG.getConstant(ShiftElts, dl, MVT::i32));
8846     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
8847   }
8848
8849   if (Subtarget.hasP9Vector()) {
8850      if (PPC::isXXBRHShuffleMask(SVOp)) {
8851       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
8852       SDValue ReveHWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v8i16, Conv);
8853       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
8854     } else if (PPC::isXXBRWShuffleMask(SVOp)) {
8855       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
8856       SDValue ReveWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v4i32, Conv);
8857       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
8858     } else if (PPC::isXXBRDShuffleMask(SVOp)) {
8859       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
8860       SDValue ReveDWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Conv);
8861       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
8862     } else if (PPC::isXXBRQShuffleMask(SVOp)) {
8863       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
8864       SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv);
8865       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
8866     }
8867   }
8868
8869   if (Subtarget.hasVSX()) {
8870     if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
8871       int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
8872
8873       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
8874       SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
8875                                   DAG.getConstant(SplatIdx, dl, MVT::i32));
8876       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
8877     }
8878
8879     // Left shifts of 8 bytes are actually swaps. Convert accordingly.
8880     if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
8881       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
8882       SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
8883       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
8884     }
8885   }
8886
8887   if (Subtarget.hasQPX()) {
8888     if (VT.getVectorNumElements() != 4)
8889       return SDValue();
8890
8891     if (V2.isUndef()) V2 = V1;
8892
8893     int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
8894     if (AlignIdx != -1) {
8895       return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
8896                          DAG.getConstant(AlignIdx, dl, MVT::i32));
8897     } else if (SVOp->isSplat()) {
8898       int SplatIdx = SVOp->getSplatIndex();
8899       if (SplatIdx >= 4) {
8900         std::swap(V1, V2);
8901         SplatIdx -= 4;
8902       }
8903
8904       return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
8905                          DAG.getConstant(SplatIdx, dl, MVT::i32));
8906     }
8907
8908     // Lower this into a qvgpci/qvfperm pair.
8909
8910     // Compute the qvgpci literal
8911     unsigned idx = 0;
8912     for (unsigned i = 0; i < 4; ++i) {
8913       int m = SVOp->getMaskElt(i);
8914       unsigned mm = m >= 0 ? (unsigned) m : i;
8915       idx |= mm << (3-i)*3;
8916     }
8917
8918     SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
8919                              DAG.getConstant(idx, dl, MVT::i32));
8920     return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
8921   }
8922
8923   // Cases that are handled by instructions that take permute immediates
8924   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
8925   // selected by the instruction selector.
8926   if (V2.isUndef()) {
8927     if (PPC::isSplatShuffleMask(SVOp, 1) ||
8928         PPC::isSplatShuffleMask(SVOp, 2) ||
8929         PPC::isSplatShuffleMask(SVOp, 4) ||
8930         PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
8931         PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
8932         PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
8933         PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
8934         PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
8935         PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
8936         PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
8937         PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
8938         PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
8939         (Subtarget.hasP8Altivec() && (
8940          PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
8941          PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
8942          PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
8943       return Op;
8944     }
8945   }
8946
8947   // Altivec has a variety of "shuffle immediates" that take two vector inputs
8948   // and produce a fixed permutation.  If any of these match, do not lower to
8949   // VPERM.
8950   unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
8951   if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8952       PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8953       PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
8954       PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
8955       PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
8956       PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
8957       PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
8958       PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
8959       PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
8960       (Subtarget.hasP8Altivec() && (
8961        PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8962        PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
8963        PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
8964     return Op;
8965
8966   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
8967   // perfect shuffle table to emit an optimal matching sequence.
8968   ArrayRef<int> PermMask = SVOp->getMask();
8969
8970   unsigned PFIndexes[4];
8971   bool isFourElementShuffle = true;
8972   for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
8973     unsigned EltNo = 8;   // Start out undef.
8974     for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
8975       if (PermMask[i*4+j] < 0)
8976         continue;   // Undef, ignore it.
8977
8978       unsigned ByteSource = PermMask[i*4+j];
8979       if ((ByteSource & 3) != j) {
8980         isFourElementShuffle = false;
8981         break;
8982       }
8983
8984       if (EltNo == 8) {
8985         EltNo = ByteSource/4;
8986       } else if (EltNo != ByteSource/4) {
8987         isFourElementShuffle = false;
8988         break;
8989       }
8990     }
8991     PFIndexes[i] = EltNo;
8992   }
8993
8994   // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
8995   // perfect shuffle vector to determine if it is cost effective to do this as
8996   // discrete instructions, or whether we should use a vperm.
8997   // For now, we skip this for little endian until such time as we have a
8998   // little-endian perfect shuffle table.
8999   if (isFourElementShuffle && !isLittleEndian) {
9000     // Compute the index in the perfect shuffle table.
9001     unsigned PFTableIndex =
9002       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
9003
9004     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
9005     unsigned Cost  = (PFEntry >> 30);
9006
9007     // Determining when to avoid vperm is tricky.  Many things affect the cost
9008     // of vperm, particularly how many times the perm mask needs to be computed.
9009     // For example, if the perm mask can be hoisted out of a loop or is already
9010     // used (perhaps because there are multiple permutes with the same shuffle
9011     // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
9012     // the loop requires an extra register.
9013     //
9014     // As a compromise, we only emit discrete instructions if the shuffle can be
9015     // generated in 3 or fewer operations.  When we have loop information
9016     // available, if this block is within a loop, we should avoid using vperm
9017     // for 3-operation perms and use a constant pool load instead.
9018     if (Cost < 3)
9019       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
9020   }
9021
9022   // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
9023   // vector that will get spilled to the constant pool.
9024   if (V2.isUndef()) V2 = V1;
9025
9026   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
9027   // that it is in input element units, not in bytes.  Convert now.
9028
9029   // For little endian, the order of the input vectors is reversed, and
9030   // the permutation mask is complemented with respect to 31.  This is
9031   // necessary to produce proper semantics with the big-endian-biased vperm
9032   // instruction.
9033   EVT EltVT = V1.getValueType().getVectorElementType();
9034   unsigned BytesPerElement = EltVT.getSizeInBits()/8;
9035
9036   SmallVector<SDValue, 16> ResultMask;
9037   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
9038     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
9039
9040     for (unsigned j = 0; j != BytesPerElement; ++j)
9041       if (isLittleEndian)
9042         ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
9043                                              dl, MVT::i32));
9044       else
9045         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
9046                                              MVT::i32));
9047   }
9048
9049   SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
9050   if (isLittleEndian)
9051     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
9052                        V2, V1, VPermMask);
9053   else
9054     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
9055                        V1, V2, VPermMask);
9056 }
9057
9058 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
9059 /// vector comparison.  If it is, return true and fill in Opc/isDot with
9060 /// information about the intrinsic.
9061 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
9062                                  bool &isDot, const PPCSubtarget &Subtarget) {
9063   unsigned IntrinsicID =
9064       cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
9065   CompareOpc = -1;
9066   isDot = false;
9067   switch (IntrinsicID) {
9068   default:
9069     return false;
9070   // Comparison predicates.
9071   case Intrinsic::ppc_altivec_vcmpbfp_p:
9072     CompareOpc = 966;
9073     isDot = true;
9074     break;
9075   case Intrinsic::ppc_altivec_vcmpeqfp_p:
9076     CompareOpc = 198;
9077     isDot = true;
9078     break;
9079   case Intrinsic::ppc_altivec_vcmpequb_p:
9080     CompareOpc = 6;
9081     isDot = true;
9082     break;
9083   case Intrinsic::ppc_altivec_vcmpequh_p:
9084     CompareOpc = 70;
9085     isDot = true;
9086     break;
9087   case Intrinsic::ppc_altivec_vcmpequw_p:
9088     CompareOpc = 134;
9089     isDot = true;
9090     break;
9091   case Intrinsic::ppc_altivec_vcmpequd_p:
9092     if (Subtarget.hasP8Altivec()) {
9093       CompareOpc = 199;
9094       isDot = true;
9095     } else
9096       return false;
9097     break;
9098   case Intrinsic::ppc_altivec_vcmpneb_p:
9099   case Intrinsic::ppc_altivec_vcmpneh_p:
9100   case Intrinsic::ppc_altivec_vcmpnew_p:
9101   case Intrinsic::ppc_altivec_vcmpnezb_p:
9102   case Intrinsic::ppc_altivec_vcmpnezh_p:
9103   case Intrinsic::ppc_altivec_vcmpnezw_p:
9104     if (Subtarget.hasP9Altivec()) {
9105       switch (IntrinsicID) {
9106       default:
9107         llvm_unreachable("Unknown comparison intrinsic.");
9108       case Intrinsic::ppc_altivec_vcmpneb_p:
9109         CompareOpc = 7;
9110         break;
9111       case Intrinsic::ppc_altivec_vcmpneh_p:
9112         CompareOpc = 71;
9113         break;
9114       case Intrinsic::ppc_altivec_vcmpnew_p:
9115         CompareOpc = 135;
9116         break;
9117       case Intrinsic::ppc_altivec_vcmpnezb_p:
9118         CompareOpc = 263;
9119         break;
9120       case Intrinsic::ppc_altivec_vcmpnezh_p:
9121         CompareOpc = 327;
9122         break;
9123       case Intrinsic::ppc_altivec_vcmpnezw_p:
9124         CompareOpc = 391;
9125         break;
9126       }
9127       isDot = true;
9128     } else
9129       return false;
9130     break;
9131   case Intrinsic::ppc_altivec_vcmpgefp_p:
9132     CompareOpc = 454;
9133     isDot = true;
9134     break;
9135   case Intrinsic::ppc_altivec_vcmpgtfp_p:
9136     CompareOpc = 710;
9137     isDot = true;
9138     break;
9139   case Intrinsic::ppc_altivec_vcmpgtsb_p:
9140     CompareOpc = 774;
9141     isDot = true;
9142     break;
9143   case Intrinsic::ppc_altivec_vcmpgtsh_p:
9144     CompareOpc = 838;
9145     isDot = true;
9146     break;
9147   case Intrinsic::ppc_altivec_vcmpgtsw_p:
9148     CompareOpc = 902;
9149     isDot = true;
9150     break;
9151   case Intrinsic::ppc_altivec_vcmpgtsd_p:
9152     if (Subtarget.hasP8Altivec()) {
9153       CompareOpc = 967;
9154       isDot = true;
9155     } else
9156       return false;
9157     break;
9158   case Intrinsic::ppc_altivec_vcmpgtub_p:
9159     CompareOpc = 518;
9160     isDot = true;
9161     break;
9162   case Intrinsic::ppc_altivec_vcmpgtuh_p:
9163     CompareOpc = 582;
9164     isDot = true;
9165     break;
9166   case Intrinsic::ppc_altivec_vcmpgtuw_p:
9167     CompareOpc = 646;
9168     isDot = true;
9169     break;
9170   case Intrinsic::ppc_altivec_vcmpgtud_p:
9171     if (Subtarget.hasP8Altivec()) {
9172       CompareOpc = 711;
9173       isDot = true;
9174     } else
9175       return false;
9176     break;
9177
9178   // VSX predicate comparisons use the same infrastructure
9179   case Intrinsic::ppc_vsx_xvcmpeqdp_p:
9180   case Intrinsic::ppc_vsx_xvcmpgedp_p:
9181   case Intrinsic::ppc_vsx_xvcmpgtdp_p:
9182   case Intrinsic::ppc_vsx_xvcmpeqsp_p:
9183   case Intrinsic::ppc_vsx_xvcmpgesp_p:
9184   case Intrinsic::ppc_vsx_xvcmpgtsp_p:
9185     if (Subtarget.hasVSX()) {
9186       switch (IntrinsicID) {
9187       case Intrinsic::ppc_vsx_xvcmpeqdp_p:
9188         CompareOpc = 99;
9189         break;
9190       case Intrinsic::ppc_vsx_xvcmpgedp_p:
9191         CompareOpc = 115;
9192         break;
9193       case Intrinsic::ppc_vsx_xvcmpgtdp_p:
9194         CompareOpc = 107;
9195         break;
9196       case Intrinsic::ppc_vsx_xvcmpeqsp_p:
9197         CompareOpc = 67;
9198         break;
9199       case Intrinsic::ppc_vsx_xvcmpgesp_p:
9200         CompareOpc = 83;
9201         break;
9202       case Intrinsic::ppc_vsx_xvcmpgtsp_p:
9203         CompareOpc = 75;
9204         break;
9205       }
9206       isDot = true;
9207     } else
9208       return false;
9209     break;
9210
9211   // Normal Comparisons.
9212   case Intrinsic::ppc_altivec_vcmpbfp:
9213     CompareOpc = 966;
9214     break;
9215   case Intrinsic::ppc_altivec_vcmpeqfp:
9216     CompareOpc = 198;
9217     break;
9218   case Intrinsic::ppc_altivec_vcmpequb:
9219     CompareOpc = 6;
9220     break;
9221   case Intrinsic::ppc_altivec_vcmpequh:
9222     CompareOpc = 70;
9223     break;
9224   case Intrinsic::ppc_altivec_vcmpequw:
9225     CompareOpc = 134;
9226     break;
9227   case Intrinsic::ppc_altivec_vcmpequd:
9228     if (Subtarget.hasP8Altivec())
9229       CompareOpc = 199;
9230     else
9231       return false;
9232     break;
9233   case Intrinsic::ppc_altivec_vcmpneb:
9234   case Intrinsic::ppc_altivec_vcmpneh:
9235   case Intrinsic::ppc_altivec_vcmpnew:
9236   case Intrinsic::ppc_altivec_vcmpnezb:
9237   case Intrinsic::ppc_altivec_vcmpnezh:
9238   case Intrinsic::ppc_altivec_vcmpnezw:
9239     if (Subtarget.hasP9Altivec())
9240       switch (IntrinsicID) {
9241       default:
9242         llvm_unreachable("Unknown comparison intrinsic.");
9243       case Intrinsic::ppc_altivec_vcmpneb:
9244         CompareOpc = 7;
9245         break;
9246       case Intrinsic::ppc_altivec_vcmpneh:
9247         CompareOpc = 71;
9248         break;
9249       case Intrinsic::ppc_altivec_vcmpnew:
9250         CompareOpc = 135;
9251         break;
9252       case Intrinsic::ppc_altivec_vcmpnezb:
9253         CompareOpc = 263;
9254         break;
9255       case Intrinsic::ppc_altivec_vcmpnezh:
9256         CompareOpc = 327;
9257         break;
9258       case Intrinsic::ppc_altivec_vcmpnezw:
9259         CompareOpc = 391;
9260         break;
9261       }
9262     else
9263       return false;
9264     break;
9265   case Intrinsic::ppc_altivec_vcmpgefp:
9266     CompareOpc = 454;
9267     break;
9268   case Intrinsic::ppc_altivec_vcmpgtfp:
9269     CompareOpc = 710;
9270     break;
9271   case Intrinsic::ppc_altivec_vcmpgtsb:
9272     CompareOpc = 774;
9273     break;
9274   case Intrinsic::ppc_altivec_vcmpgtsh:
9275     CompareOpc = 838;
9276     break;
9277   case Intrinsic::ppc_altivec_vcmpgtsw:
9278     CompareOpc = 902;
9279     break;
9280   case Intrinsic::ppc_altivec_vcmpgtsd:
9281     if (Subtarget.hasP8Altivec())
9282       CompareOpc = 967;
9283     else
9284       return false;
9285     break;
9286   case Intrinsic::ppc_altivec_vcmpgtub:
9287     CompareOpc = 518;
9288     break;
9289   case Intrinsic::ppc_altivec_vcmpgtuh:
9290     CompareOpc = 582;
9291     break;
9292   case Intrinsic::ppc_altivec_vcmpgtuw:
9293     CompareOpc = 646;
9294     break;
9295   case Intrinsic::ppc_altivec_vcmpgtud:
9296     if (Subtarget.hasP8Altivec())
9297       CompareOpc = 711;
9298     else
9299       return false;
9300     break;
9301   }
9302   return true;
9303 }
9304
9305 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
9306 /// lower, do it, otherwise return null.
9307 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
9308                                                    SelectionDAG &DAG) const {
9309   unsigned IntrinsicID =
9310     cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
9311
9312   SDLoc dl(Op);
9313
9314   if (IntrinsicID == Intrinsic::thread_pointer) {
9315     // Reads the thread pointer register, used for __builtin_thread_pointer.
9316     if (Subtarget.isPPC64())
9317       return DAG.getRegister(PPC::X13, MVT::i64);
9318     return DAG.getRegister(PPC::R2, MVT::i32);
9319   }
9320
9321   // If this is a lowered altivec predicate compare, CompareOpc is set to the
9322   // opcode number of the comparison.
9323   int CompareOpc;
9324   bool isDot;
9325   if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
9326     return SDValue();    // Don't custom lower most intrinsics.
9327
9328   // If this is a non-dot comparison, make the VCMP node and we are done.
9329   if (!isDot) {
9330     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
9331                               Op.getOperand(1), Op.getOperand(2),
9332                               DAG.getConstant(CompareOpc, dl, MVT::i32));
9333     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
9334   }
9335
9336   // Create the PPCISD altivec 'dot' comparison node.
9337   SDValue Ops[] = {
9338     Op.getOperand(2),  // LHS
9339     Op.getOperand(3),  // RHS
9340     DAG.getConstant(CompareOpc, dl, MVT::i32)
9341   };
9342   EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
9343   SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
9344
9345   // Now that we have the comparison, emit a copy from the CR to a GPR.
9346   // This is flagged to the above dot comparison.
9347   SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
9348                                 DAG.getRegister(PPC::CR6, MVT::i32),
9349                                 CompNode.getValue(1));
9350
9351   // Unpack the result based on how the target uses it.
9352   unsigned BitNo;   // Bit # of CR6.
9353   bool InvertBit;   // Invert result?
9354   switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
9355   default:  // Can't happen, don't crash on invalid number though.
9356   case 0:   // Return the value of the EQ bit of CR6.
9357     BitNo = 0; InvertBit = false;
9358     break;
9359   case 1:   // Return the inverted value of the EQ bit of CR6.
9360     BitNo = 0; InvertBit = true;
9361     break;
9362   case 2:   // Return the value of the LT bit of CR6.
9363     BitNo = 2; InvertBit = false;
9364     break;
9365   case 3:   // Return the inverted value of the LT bit of CR6.
9366     BitNo = 2; InvertBit = true;
9367     break;
9368   }
9369
9370   // Shift the bit into the low position.
9371   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
9372                       DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
9373   // Isolate the bit.
9374   Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
9375                       DAG.getConstant(1, dl, MVT::i32));
9376
9377   // If we are supposed to, toggle the bit.
9378   if (InvertBit)
9379     Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
9380                         DAG.getConstant(1, dl, MVT::i32));
9381   return Flags;
9382 }
9383
9384 SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
9385                                                SelectionDAG &DAG) const {
9386   // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
9387   // the beginning of the argument list.
9388   int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
9389   SDLoc DL(Op);
9390   switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
9391   case Intrinsic::ppc_cfence: {
9392     assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
9393     assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
9394     return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
9395                                       DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
9396                                                   Op.getOperand(ArgStart + 1)),
9397                                       Op.getOperand(0)),
9398                    0);
9399   }
9400   default:
9401     break;
9402   }
9403   return SDValue();
9404 }
9405
9406 SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const {
9407   // Check for a DIV with the same operands as this REM.
9408   for (auto UI : Op.getOperand(1)->uses()) {
9409     if ((Op.getOpcode() == ISD::SREM && UI->getOpcode() == ISD::SDIV) ||
9410         (Op.getOpcode() == ISD::UREM && UI->getOpcode() == ISD::UDIV))
9411       if (UI->getOperand(0) == Op.getOperand(0) &&
9412           UI->getOperand(1) == Op.getOperand(1))
9413         return SDValue();
9414   }
9415   return Op;
9416 }
9417
9418 // Lower scalar BSWAP64 to xxbrd.
9419 SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
9420   SDLoc dl(Op);
9421   // MTVSRDD
9422   Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
9423                    Op.getOperand(0));
9424   // XXBRD
9425   Op = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Op);
9426   // MFVSRD
9427   int VectorIndex = 0;
9428   if (Subtarget.isLittleEndian())
9429     VectorIndex = 1;
9430   Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
9431                    DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
9432   return Op;
9433 }
9434
9435 // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
9436 // compared to a value that is atomically loaded (atomic loads zero-extend).
9437 SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
9438                                                 SelectionDAG &DAG) const {
9439   assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
9440          "Expecting an atomic compare-and-swap here.");
9441   SDLoc dl(Op);
9442   auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
9443   EVT MemVT = AtomicNode->getMemoryVT();
9444   if (MemVT.getSizeInBits() >= 32)
9445     return Op;
9446
9447   SDValue CmpOp = Op.getOperand(2);
9448   // If this is already correctly zero-extended, leave it alone.
9449   auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
9450   if (DAG.MaskedValueIsZero(CmpOp, HighBits))
9451     return Op;
9452
9453   // Clear the high bits of the compare operand.
9454   unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
9455   SDValue NewCmpOp =
9456     DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
9457                 DAG.getConstant(MaskVal, dl, MVT::i32));
9458
9459   // Replace the existing compare operand with the properly zero-extended one.
9460   SmallVector<SDValue, 4> Ops;
9461   for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
9462     Ops.push_back(AtomicNode->getOperand(i));
9463   Ops[2] = NewCmpOp;
9464   MachineMemOperand *MMO = AtomicNode->getMemOperand();
9465   SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
9466   auto NodeTy =
9467     (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
9468   return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
9469 }
9470
9471 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
9472                                                  SelectionDAG &DAG) const {
9473   SDLoc dl(Op);
9474   // Create a stack slot that is 16-byte aligned.
9475   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
9476   int FrameIdx = MFI.CreateStackObject(16, 16, false);
9477   EVT PtrVT = getPointerTy(DAG.getDataLayout());
9478   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9479
9480   // Store the input value into Value#0 of the stack slot.
9481   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
9482                                MachinePointerInfo());
9483   // Load it out.
9484   return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
9485 }
9486
9487 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
9488                                                   SelectionDAG &DAG) const {
9489   assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
9490          "Should only be called for ISD::INSERT_VECTOR_ELT");
9491
9492   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
9493   // We have legal lowering for constant indices but not for variable ones.
9494   if (!C)
9495     return SDValue();
9496
9497   EVT VT = Op.getValueType();
9498   SDLoc dl(Op);
9499   SDValue V1 = Op.getOperand(0);
9500   SDValue V2 = Op.getOperand(1);
9501   // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
9502   if (VT == MVT::v8i16 || VT == MVT::v16i8) {
9503     SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
9504     unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
9505     unsigned InsertAtElement = C->getZExtValue();
9506     unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
9507     if (Subtarget.isLittleEndian()) {
9508       InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
9509     }
9510     return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
9511                        DAG.getConstant(InsertAtByte, dl, MVT::i32));
9512   }
9513   return Op;
9514 }
9515
9516 SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
9517                                                    SelectionDAG &DAG) const {
9518   SDLoc dl(Op);
9519   SDNode *N = Op.getNode();
9520
9521   assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
9522          "Unknown extract_vector_elt type");
9523
9524   SDValue Value = N->getOperand(0);
9525
9526   // The first part of this is like the store lowering except that we don't
9527   // need to track the chain.
9528
9529   // The values are now known to be -1 (false) or 1 (true). To convert this
9530   // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
9531   // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
9532   Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
9533
9534   // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
9535   // understand how to form the extending load.
9536   SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
9537
9538   Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
9539
9540   // Now convert to an integer and store.
9541   Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
9542     DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
9543     Value);
9544
9545   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
9546   int FrameIdx = MFI.CreateStackObject(16, 16, false);
9547   MachinePointerInfo PtrInfo =
9548       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
9549   EVT PtrVT = getPointerTy(DAG.getDataLayout());
9550   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9551
9552   SDValue StoreChain = DAG.getEntryNode();
9553   SDValue Ops[] = {StoreChain,
9554                    DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
9555                    Value, FIdx};
9556   SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
9557
9558   StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
9559     dl, VTs, Ops, MVT::v4i32, PtrInfo);
9560
9561   // Extract the value requested.
9562   unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
9563   SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
9564   Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
9565
9566   SDValue IntVal =
9567       DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset));
9568
9569   if (!Subtarget.useCRBits())
9570     return IntVal;
9571
9572   return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
9573 }
9574
9575 /// Lowering for QPX v4i1 loads
9576 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
9577                                            SelectionDAG &DAG) const {
9578   SDLoc dl(Op);
9579   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
9580   SDValue LoadChain = LN->getChain();
9581   SDValue BasePtr = LN->getBasePtr();
9582
9583   if (Op.getValueType() == MVT::v4f64 ||
9584       Op.getValueType() == MVT::v4f32) {
9585     EVT MemVT = LN->getMemoryVT();
9586     unsigned Alignment = LN->getAlignment();
9587
9588     // If this load is properly aligned, then it is legal.
9589     if (Alignment >= MemVT.getStoreSize())
9590       return Op;
9591
9592     EVT ScalarVT = Op.getValueType().getScalarType(),
9593         ScalarMemVT = MemVT.getScalarType();
9594     unsigned Stride = ScalarMemVT.getStoreSize();
9595
9596     SDValue Vals[4], LoadChains[4];
9597     for (unsigned Idx = 0; Idx < 4; ++Idx) {
9598       SDValue Load;
9599       if (ScalarVT != ScalarMemVT)
9600         Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
9601                               BasePtr,
9602                               LN->getPointerInfo().getWithOffset(Idx * Stride),
9603                               ScalarMemVT, MinAlign(Alignment, Idx * Stride),
9604                               LN->getMemOperand()->getFlags(), LN->getAAInfo());
9605       else
9606         Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
9607                            LN->getPointerInfo().getWithOffset(Idx * Stride),
9608                            MinAlign(Alignment, Idx * Stride),
9609                            LN->getMemOperand()->getFlags(), LN->getAAInfo());
9610
9611       if (Idx == 0 && LN->isIndexed()) {
9612         assert(LN->getAddressingMode() == ISD::PRE_INC &&
9613                "Unknown addressing mode on vector load");
9614         Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
9615                                   LN->getAddressingMode());
9616       }
9617
9618       Vals[Idx] = Load;
9619       LoadChains[Idx] = Load.getValue(1);
9620
9621       BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
9622                             DAG.getConstant(Stride, dl,
9623                                             BasePtr.getValueType()));
9624     }
9625
9626     SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
9627     SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals);
9628
9629     if (LN->isIndexed()) {
9630       SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
9631       return DAG.getMergeValues(RetOps, dl);
9632     }
9633
9634     SDValue RetOps[] = { Value, TF };
9635     return DAG.getMergeValues(RetOps, dl);
9636   }
9637
9638   assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
9639   assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
9640
9641   // To lower v4i1 from a byte array, we load the byte elements of the
9642   // vector and then reuse the BUILD_VECTOR logic.
9643
9644   SDValue VectElmts[4], VectElmtChains[4];
9645   for (unsigned i = 0; i < 4; ++i) {
9646     SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
9647     Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
9648
9649     VectElmts[i] = DAG.getExtLoad(
9650         ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx,
9651         LN->getPointerInfo().getWithOffset(i), MVT::i8,
9652         /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo());
9653     VectElmtChains[i] = VectElmts[i].getValue(1);
9654   }
9655
9656   LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
9657   SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts);
9658
9659   SDValue RVals[] = { Value, LoadChain };
9660   return DAG.getMergeValues(RVals, dl);
9661 }
9662
9663 /// Lowering for QPX v4i1 stores
9664 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
9665                                             SelectionDAG &DAG) const {
9666   SDLoc dl(Op);
9667   StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
9668   SDValue StoreChain = SN->getChain();
9669   SDValue BasePtr = SN->getBasePtr();
9670   SDValue Value = SN->getValue();
9671
9672   if (Value.getValueType() == MVT::v4f64 ||
9673       Value.getValueType() == MVT::v4f32) {
9674     EVT MemVT = SN->getMemoryVT();
9675     unsigned Alignment = SN->getAlignment();
9676
9677     // If this store is properly aligned, then it is legal.
9678     if (Alignment >= MemVT.getStoreSize())
9679       return Op;
9680
9681     EVT ScalarVT = Value.getValueType().getScalarType(),
9682         ScalarMemVT = MemVT.getScalarType();
9683     unsigned Stride = ScalarMemVT.getStoreSize();
9684
9685     SDValue Stores[4];
9686     for (unsigned Idx = 0; Idx < 4; ++Idx) {
9687       SDValue Ex = DAG.getNode(
9688           ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
9689           DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout())));
9690       SDValue Store;
9691       if (ScalarVT != ScalarMemVT)
9692         Store =
9693             DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
9694                               SN->getPointerInfo().getWithOffset(Idx * Stride),
9695                               ScalarMemVT, MinAlign(Alignment, Idx * Stride),
9696                               SN->getMemOperand()->getFlags(), SN->getAAInfo());
9697       else
9698         Store = DAG.getStore(StoreChain, dl, Ex, BasePtr,
9699                              SN->getPointerInfo().getWithOffset(Idx * Stride),
9700                              MinAlign(Alignment, Idx * Stride),
9701                              SN->getMemOperand()->getFlags(), SN->getAAInfo());
9702
9703       if (Idx == 0 && SN->isIndexed()) {
9704         assert(SN->getAddressingMode() == ISD::PRE_INC &&
9705                "Unknown addressing mode on vector store");
9706         Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
9707                                     SN->getAddressingMode());
9708       }
9709
9710       BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
9711                             DAG.getConstant(Stride, dl,
9712                                             BasePtr.getValueType()));
9713       Stores[Idx] = Store;
9714     }
9715
9716     SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9717
9718     if (SN->isIndexed()) {
9719       SDValue RetOps[] = { TF, Stores[0].getValue(1) };
9720       return DAG.getMergeValues(RetOps, dl);
9721     }
9722
9723     return TF;
9724   }
9725
9726   assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
9727   assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
9728
9729   // The values are now known to be -1 (false) or 1 (true). To convert this
9730   // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
9731   // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
9732   Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
9733
9734   // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
9735   // understand how to form the extending load.
9736   SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
9737
9738   Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
9739
9740   // Now convert to an integer and store.
9741   Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
9742     DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
9743     Value);
9744
9745   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
9746   int FrameIdx = MFI.CreateStackObject(16, 16, false);
9747   MachinePointerInfo PtrInfo =
9748       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
9749   EVT PtrVT = getPointerTy(DAG.getDataLayout());
9750   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9751
9752   SDValue Ops[] = {StoreChain,
9753                    DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
9754                    Value, FIdx};
9755   SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
9756
9757   StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
9758     dl, VTs, Ops, MVT::v4i32, PtrInfo);
9759
9760   // Move data into the byte array.
9761   SDValue Loads[4], LoadChains[4];
9762   for (unsigned i = 0; i < 4; ++i) {
9763     unsigned Offset = 4*i;
9764     SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
9765     Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
9766
9767     Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
9768                            PtrInfo.getWithOffset(Offset));
9769     LoadChains[i] = Loads[i].getValue(1);
9770   }
9771
9772   StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
9773
9774   SDValue Stores[4];
9775   for (unsigned i = 0; i < 4; ++i) {
9776     SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
9777     Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
9778
9779     Stores[i] = DAG.getTruncStore(
9780         StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i),
9781         MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(),
9782         SN->getAAInfo());
9783   }
9784
9785   StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9786
9787   return StoreChain;
9788 }
9789
9790 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
9791   SDLoc dl(Op);
9792   if (Op.getValueType() == MVT::v4i32) {
9793     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
9794
9795     SDValue Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG, dl);
9796     SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
9797
9798     SDValue RHSSwap =   // = vrlw RHS, 16
9799       BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
9800
9801     // Shrinkify inputs to v8i16.
9802     LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
9803     RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
9804     RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
9805
9806     // Low parts multiplied together, generating 32-bit results (we ignore the
9807     // top parts).
9808     SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
9809                                         LHS, RHS, DAG, dl, MVT::v4i32);
9810
9811     SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
9812                                       LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
9813     // Shift the high parts up 16 bits.
9814     HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
9815                               Neg16, DAG, dl);
9816     return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
9817   } else if (Op.getValueType() == MVT::v8i16) {
9818     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
9819
9820     SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
9821
9822     return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
9823                             LHS, RHS, Zero, DAG, dl);
9824   } else if (Op.getValueType() == MVT::v16i8) {
9825     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
9826     bool isLittleEndian = Subtarget.isLittleEndian();
9827
9828     // Multiply the even 8-bit parts, producing 16-bit sums.
9829     SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
9830                                            LHS, RHS, DAG, dl, MVT::v8i16);
9831     EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
9832
9833     // Multiply the odd 8-bit parts, producing 16-bit sums.
9834     SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
9835                                           LHS, RHS, DAG, dl, MVT::v8i16);
9836     OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
9837
9838     // Merge the results together.  Because vmuleub and vmuloub are
9839     // instructions with a big-endian bias, we must reverse the
9840     // element numbering and reverse the meaning of "odd" and "even"
9841     // when generating little endian code.
9842     int Ops[16];
9843     for (unsigned i = 0; i != 8; ++i) {
9844       if (isLittleEndian) {
9845         Ops[i*2  ] = 2*i;
9846         Ops[i*2+1] = 2*i+16;
9847       } else {
9848         Ops[i*2  ] = 2*i+1;
9849         Ops[i*2+1] = 2*i+1+16;
9850       }
9851     }
9852     if (isLittleEndian)
9853       return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
9854     else
9855       return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
9856   } else {
9857     llvm_unreachable("Unknown mul to lower!");
9858   }
9859 }
9860
9861 SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
9862
9863   assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS");
9864
9865   EVT VT = Op.getValueType();
9866   assert(VT.isVector() &&
9867          "Only set vector abs as custom, scalar abs shouldn't reach here!");
9868   assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
9869           VT == MVT::v16i8) &&
9870          "Unexpected vector element type!");
9871   assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) &&
9872          "Current subtarget doesn't support smax v2i64!");
9873
9874   // For vector abs, it can be lowered to:
9875   // abs x
9876   // ==>
9877   // y = -x
9878   // smax(x, y)
9879
9880   SDLoc dl(Op);
9881   SDValue X = Op.getOperand(0);
9882   SDValue Zero = DAG.getConstant(0, dl, VT);
9883   SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X);
9884
9885   // SMAX patch https://reviews.llvm.org/D47332
9886   // hasn't landed yet, so use intrinsic first here.
9887   // TODO: Should use SMAX directly once SMAX patch landed
9888   Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw;
9889   if (VT == MVT::v2i64)
9890     BifID = Intrinsic::ppc_altivec_vmaxsd;
9891   else if (VT == MVT::v8i16)
9892     BifID = Intrinsic::ppc_altivec_vmaxsh;
9893   else if (VT == MVT::v16i8)
9894     BifID = Intrinsic::ppc_altivec_vmaxsb;
9895
9896   return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
9897 }
9898
9899 // Custom lowering for fpext vf32 to v2f64
9900 SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
9901
9902   assert(Op.getOpcode() == ISD::FP_EXTEND &&
9903          "Should only be called for ISD::FP_EXTEND");
9904
9905   // We only want to custom lower an extend from v2f32 to v2f64.
9906   if (Op.getValueType() != MVT::v2f64 ||
9907       Op.getOperand(0).getValueType() != MVT::v2f32)
9908     return SDValue();
9909
9910   SDLoc dl(Op);
9911   SDValue Op0 = Op.getOperand(0);
9912
9913   switch (Op0.getOpcode()) {
9914   default:
9915     return SDValue();
9916   case ISD::FADD:
9917   case ISD::FMUL:
9918   case ISD::FSUB: {
9919     SDValue NewLoad[2];
9920     for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
9921       // Ensure both input are loads.
9922       SDValue LdOp = Op0.getOperand(i);
9923       if (LdOp.getOpcode() != ISD::LOAD)
9924         return SDValue();
9925       // Generate new load node.
9926       LoadSDNode *LD = cast<LoadSDNode>(LdOp);
9927       SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() };
9928       NewLoad[i] =
9929         DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl,
9930                                 DAG.getVTList(MVT::v4f32, MVT::Other),
9931                                 LoadOps, LD->getMemoryVT(),
9932                                 LD->getMemOperand());
9933     }
9934     SDValue NewOp = DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32,
9935                               NewLoad[0], NewLoad[1],
9936                               Op0.getNode()->getFlags());
9937     return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewOp);
9938   }
9939   case ISD::LOAD: {
9940     LoadSDNode *LD = cast<LoadSDNode>(Op0);
9941     SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() };
9942     SDValue NewLd =
9943       DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl,
9944                               DAG.getVTList(MVT::v4f32, MVT::Other),
9945                               LoadOps, LD->getMemoryVT(), LD->getMemOperand());
9946     return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewLd);
9947   }
9948   }
9949   llvm_unreachable("ERROR:Should return for all cases within swtich.");
9950 }
9951
9952 /// LowerOperation - Provide custom lowering hooks for some operations.
9953 ///
9954 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
9955   switch (Op.getOpcode()) {
9956   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
9957   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
9958   case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
9959   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
9960   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
9961   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
9962   case ISD::SETCC:              return LowerSETCC(Op, DAG);
9963   case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
9964   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
9965
9966   // Variable argument lowering.
9967   case ISD::VASTART:            return LowerVASTART(Op, DAG);
9968   case ISD::VAARG:              return LowerVAARG(Op, DAG);
9969   case ISD::VACOPY:             return LowerVACOPY(Op, DAG);
9970
9971   case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG);
9972   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
9973   case ISD::GET_DYNAMIC_AREA_OFFSET:
9974     return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
9975
9976   // Exception handling lowering.
9977   case ISD::EH_DWARF_CFA:       return LowerEH_DWARF_CFA(Op, DAG);
9978   case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
9979   case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
9980
9981   case ISD::LOAD:               return LowerLOAD(Op, DAG);
9982   case ISD::STORE:              return LowerSTORE(Op, DAG);
9983   case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
9984   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
9985   case ISD::FP_TO_UINT:
9986   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
9987   case ISD::UINT_TO_FP:
9988   case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
9989   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
9990
9991   // Lower 64-bit shifts.
9992   case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
9993   case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
9994   case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
9995
9996   // Vector-related lowering.
9997   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
9998   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
9999   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
10000   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
10001   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
10002   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
10003   case ISD::MUL:                return LowerMUL(Op, DAG);
10004   case ISD::ABS:                return LowerABS(Op, DAG);
10005   case ISD::FP_EXTEND:          return LowerFP_EXTEND(Op, DAG);
10006
10007   // For counter-based loop handling.
10008   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
10009
10010   case ISD::BITCAST:            return LowerBITCAST(Op, DAG);
10011
10012   // Frame & Return address.
10013   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
10014   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
10015
10016   case ISD::INTRINSIC_VOID:
10017     return LowerINTRINSIC_VOID(Op, DAG);
10018   case ISD::SREM:
10019   case ISD::UREM:
10020     return LowerREM(Op, DAG);
10021   case ISD::BSWAP:
10022     return LowerBSWAP(Op, DAG);
10023   case ISD::ATOMIC_CMP_SWAP:
10024     return LowerATOMIC_CMP_SWAP(Op, DAG);
10025   }
10026 }
10027
10028 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
10029                                            SmallVectorImpl<SDValue>&Results,
10030                                            SelectionDAG &DAG) const {
10031   SDLoc dl(N);
10032   switch (N->getOpcode()) {
10033   default:
10034     llvm_unreachable("Do not know how to custom type legalize this operation!");
10035   case ISD::READCYCLECOUNTER: {
10036     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
10037     SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
10038
10039     Results.push_back(RTB);
10040     Results.push_back(RTB.getValue(1));
10041     Results.push_back(RTB.getValue(2));
10042     break;
10043   }
10044   case ISD::INTRINSIC_W_CHAIN: {
10045     if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
10046         Intrinsic::loop_decrement)
10047       break;
10048
10049     assert(N->getValueType(0) == MVT::i1 &&
10050            "Unexpected result type for CTR decrement intrinsic");
10051     EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
10052                                  N->getValueType(0));
10053     SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
10054     SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
10055                                  N->getOperand(1));
10056
10057     Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
10058     Results.push_back(NewInt.getValue(1));
10059     break;
10060   }
10061   case ISD::VAARG: {
10062     if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
10063       return;
10064
10065     EVT VT = N->getValueType(0);
10066
10067     if (VT == MVT::i64) {
10068       SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
10069
10070       Results.push_back(NewNode);
10071       Results.push_back(NewNode.getValue(1));
10072     }
10073     return;
10074   }
10075   case ISD::FP_TO_SINT:
10076   case ISD::FP_TO_UINT:
10077     // LowerFP_TO_INT() can only handle f32 and f64.
10078     if (N->getOperand(0).getValueType() == MVT::ppcf128)
10079       return;
10080     Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
10081     return;
10082   case ISD::TRUNCATE: {
10083     EVT TrgVT = N->getValueType(0);
10084     EVT OpVT = N->getOperand(0).getValueType();
10085     if (TrgVT.isVector() &&
10086         isOperationCustom(N->getOpcode(), TrgVT) &&
10087         OpVT.getSizeInBits() <= 128 &&
10088         isPowerOf2_32(OpVT.getVectorElementType().getSizeInBits()))
10089       Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG));
10090     return;
10091   }
10092   case ISD::BITCAST:
10093     // Don't handle bitcast here.
10094     return;
10095   }
10096 }
10097
10098 //===----------------------------------------------------------------------===//
10099 //  Other Lowering Code
10100 //===----------------------------------------------------------------------===//
10101
10102 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
10103   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
10104   Function *Func = Intrinsic::getDeclaration(M, Id);
10105   return Builder.CreateCall(Func, {});
10106 }
10107
10108 // The mappings for emitLeading/TrailingFence is taken from
10109 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
10110 Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
10111                                                  Instruction *Inst,
10112                                                  AtomicOrdering Ord) const {
10113   if (Ord == AtomicOrdering::SequentiallyConsistent)
10114     return callIntrinsic(Builder, Intrinsic::ppc_sync);
10115   if (isReleaseOrStronger(Ord))
10116     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
10117   return nullptr;
10118 }
10119
10120 Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
10121                                                   Instruction *Inst,
10122                                                   AtomicOrdering Ord) const {
10123   if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
10124     // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
10125     // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
10126     // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
10127     if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
10128       return Builder.CreateCall(
10129           Intrinsic::getDeclaration(
10130               Builder.GetInsertBlock()->getParent()->getParent(),
10131               Intrinsic::ppc_cfence, {Inst->getType()}),
10132           {Inst});
10133     // FIXME: Can use isync for rmw operation.
10134     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
10135   }
10136   return nullptr;
10137 }
10138
10139 MachineBasicBlock *
10140 PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
10141                                     unsigned AtomicSize,
10142                                     unsigned BinOpcode,
10143                                     unsigned CmpOpcode,
10144                                     unsigned CmpPred) const {
10145   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10146   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10147
10148   auto LoadMnemonic = PPC::LDARX;
10149   auto StoreMnemonic = PPC::STDCX;
10150   switch (AtomicSize) {
10151   default:
10152     llvm_unreachable("Unexpected size of atomic entity");
10153   case 1:
10154     LoadMnemonic = PPC::LBARX;
10155     StoreMnemonic = PPC::STBCX;
10156     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
10157     break;
10158   case 2:
10159     LoadMnemonic = PPC::LHARX;
10160     StoreMnemonic = PPC::STHCX;
10161     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
10162     break;
10163   case 4:
10164     LoadMnemonic = PPC::LWARX;
10165     StoreMnemonic = PPC::STWCX;
10166     break;
10167   case 8:
10168     LoadMnemonic = PPC::LDARX;
10169     StoreMnemonic = PPC::STDCX;
10170     break;
10171   }
10172
10173   const BasicBlock *LLVM_BB = BB->getBasicBlock();
10174   MachineFunction *F = BB->getParent();
10175   MachineFunction::iterator It = ++BB->getIterator();
10176
10177   Register dest = MI.getOperand(0).getReg();
10178   Register ptrA = MI.getOperand(1).getReg();
10179   Register ptrB = MI.getOperand(2).getReg();
10180   Register incr = MI.getOperand(3).getReg();
10181   DebugLoc dl = MI.getDebugLoc();
10182
10183   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
10184   MachineBasicBlock *loop2MBB =
10185     CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
10186   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10187   F->insert(It, loopMBB);
10188   if (CmpOpcode)
10189     F->insert(It, loop2MBB);
10190   F->insert(It, exitMBB);
10191   exitMBB->splice(exitMBB->begin(), BB,
10192                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
10193   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10194
10195   MachineRegisterInfo &RegInfo = F->getRegInfo();
10196   Register TmpReg = (!BinOpcode) ? incr :
10197     RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
10198                                            : &PPC::GPRCRegClass);
10199
10200   //  thisMBB:
10201   //   ...
10202   //   fallthrough --> loopMBB
10203   BB->addSuccessor(loopMBB);
10204
10205   //  loopMBB:
10206   //   l[wd]arx dest, ptr
10207   //   add r0, dest, incr
10208   //   st[wd]cx. r0, ptr
10209   //   bne- loopMBB
10210   //   fallthrough --> exitMBB
10211
10212   // For max/min...
10213   //  loopMBB:
10214   //   l[wd]arx dest, ptr
10215   //   cmpl?[wd] incr, dest
10216   //   bgt exitMBB
10217   //  loop2MBB:
10218   //   st[wd]cx. dest, ptr
10219   //   bne- loopMBB
10220   //   fallthrough --> exitMBB
10221
10222   BB = loopMBB;
10223   BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
10224     .addReg(ptrA).addReg(ptrB);
10225   if (BinOpcode)
10226     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
10227   if (CmpOpcode) {
10228     // Signed comparisons of byte or halfword values must be sign-extended.
10229     if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
10230       Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
10231       BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
10232               ExtReg).addReg(dest);
10233       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10234         .addReg(incr).addReg(ExtReg);
10235     } else
10236       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10237         .addReg(incr).addReg(dest);
10238
10239     BuildMI(BB, dl, TII->get(PPC::BCC))
10240       .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
10241     BB->addSuccessor(loop2MBB);
10242     BB->addSuccessor(exitMBB);
10243     BB = loop2MBB;
10244   }
10245   BuildMI(BB, dl, TII->get(StoreMnemonic))
10246     .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
10247   BuildMI(BB, dl, TII->get(PPC::BCC))
10248     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
10249   BB->addSuccessor(loopMBB);
10250   BB->addSuccessor(exitMBB);
10251
10252   //  exitMBB:
10253   //   ...
10254   BB = exitMBB;
10255   return BB;
10256 }
10257
10258 MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
10259     MachineInstr &MI, MachineBasicBlock *BB,
10260     bool is8bit, // operation
10261     unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
10262   // If we support part-word atomic mnemonics, just use them
10263   if (Subtarget.hasPartwordAtomics())
10264     return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
10265                             CmpPred);
10266
10267   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10268   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10269   // In 64 bit mode we have to use 64 bits for addresses, even though the
10270   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
10271   // registers without caring whether they're 32 or 64, but here we're
10272   // doing actual arithmetic on the addresses.
10273   bool is64bit = Subtarget.isPPC64();
10274   bool isLittleEndian = Subtarget.isLittleEndian();
10275   unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
10276
10277   const BasicBlock *LLVM_BB = BB->getBasicBlock();
10278   MachineFunction *F = BB->getParent();
10279   MachineFunction::iterator It = ++BB->getIterator();
10280
10281   Register dest = MI.getOperand(0).getReg();
10282   Register ptrA = MI.getOperand(1).getReg();
10283   Register ptrB = MI.getOperand(2).getReg();
10284   Register incr = MI.getOperand(3).getReg();
10285   DebugLoc dl = MI.getDebugLoc();
10286
10287   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
10288   MachineBasicBlock *loop2MBB =
10289       CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
10290   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10291   F->insert(It, loopMBB);
10292   if (CmpOpcode)
10293     F->insert(It, loop2MBB);
10294   F->insert(It, exitMBB);
10295   exitMBB->splice(exitMBB->begin(), BB,
10296                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
10297   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10298
10299   MachineRegisterInfo &RegInfo = F->getRegInfo();
10300   const TargetRegisterClass *RC =
10301       is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
10302   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
10303
10304   Register PtrReg = RegInfo.createVirtualRegister(RC);
10305   Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
10306   Register ShiftReg =
10307       isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
10308   Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
10309   Register MaskReg = RegInfo.createVirtualRegister(GPRC);
10310   Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
10311   Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
10312   Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
10313   Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
10314   Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
10315   Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
10316   Register Ptr1Reg;
10317   Register TmpReg =
10318       (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
10319
10320   //  thisMBB:
10321   //   ...
10322   //   fallthrough --> loopMBB
10323   BB->addSuccessor(loopMBB);
10324
10325   // The 4-byte load must be aligned, while a char or short may be
10326   // anywhere in the word.  Hence all this nasty bookkeeping code.
10327   //   add ptr1, ptrA, ptrB [copy if ptrA==0]
10328   //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
10329   //   xori shift, shift1, 24 [16]
10330   //   rlwinm ptr, ptr1, 0, 0, 29
10331   //   slw incr2, incr, shift
10332   //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
10333   //   slw mask, mask2, shift
10334   //  loopMBB:
10335   //   lwarx tmpDest, ptr
10336   //   add tmp, tmpDest, incr2
10337   //   andc tmp2, tmpDest, mask
10338   //   and tmp3, tmp, mask
10339   //   or tmp4, tmp3, tmp2
10340   //   stwcx. tmp4, ptr
10341   //   bne- loopMBB
10342   //   fallthrough --> exitMBB
10343   //   srw dest, tmpDest, shift
10344   if (ptrA != ZeroReg) {
10345     Ptr1Reg = RegInfo.createVirtualRegister(RC);
10346     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
10347         .addReg(ptrA)
10348         .addReg(ptrB);
10349   } else {
10350     Ptr1Reg = ptrB;
10351   }
10352   // We need use 32-bit subregister to avoid mismatch register class in 64-bit
10353   // mode.
10354   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
10355       .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
10356       .addImm(3)
10357       .addImm(27)
10358       .addImm(is8bit ? 28 : 27);
10359   if (!isLittleEndian)
10360     BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
10361         .addReg(Shift1Reg)
10362         .addImm(is8bit ? 24 : 16);
10363   if (is64bit)
10364     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
10365         .addReg(Ptr1Reg)
10366         .addImm(0)
10367         .addImm(61);
10368   else
10369     BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
10370         .addReg(Ptr1Reg)
10371         .addImm(0)
10372         .addImm(0)
10373         .addImm(29);
10374   BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
10375   if (is8bit)
10376     BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
10377   else {
10378     BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
10379     BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
10380         .addReg(Mask3Reg)
10381         .addImm(65535);
10382   }
10383   BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
10384       .addReg(Mask2Reg)
10385       .addReg(ShiftReg);
10386
10387   BB = loopMBB;
10388   BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
10389       .addReg(ZeroReg)
10390       .addReg(PtrReg);
10391   if (BinOpcode)
10392     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
10393         .addReg(Incr2Reg)
10394         .addReg(TmpDestReg);
10395   BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
10396       .addReg(TmpDestReg)
10397       .addReg(MaskReg);
10398   BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
10399   if (CmpOpcode) {
10400     // For unsigned comparisons, we can directly compare the shifted values.
10401     // For signed comparisons we shift and sign extend.
10402     Register SReg = RegInfo.createVirtualRegister(GPRC);
10403     BuildMI(BB, dl, TII->get(PPC::AND), SReg)
10404         .addReg(TmpDestReg)
10405         .addReg(MaskReg);
10406     unsigned ValueReg = SReg;
10407     unsigned CmpReg = Incr2Reg;
10408     if (CmpOpcode == PPC::CMPW) {
10409       ValueReg = RegInfo.createVirtualRegister(GPRC);
10410       BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
10411           .addReg(SReg)
10412           .addReg(ShiftReg);
10413       Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
10414       BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
10415           .addReg(ValueReg);
10416       ValueReg = ValueSReg;
10417       CmpReg = incr;
10418     }
10419     BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10420         .addReg(CmpReg)
10421         .addReg(ValueReg);
10422     BuildMI(BB, dl, TII->get(PPC::BCC))
10423         .addImm(CmpPred)
10424         .addReg(PPC::CR0)
10425         .addMBB(exitMBB);
10426     BB->addSuccessor(loop2MBB);
10427     BB->addSuccessor(exitMBB);
10428     BB = loop2MBB;
10429   }
10430   BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
10431   BuildMI(BB, dl, TII->get(PPC::STWCX))
10432       .addReg(Tmp4Reg)
10433       .addReg(ZeroReg)
10434       .addReg(PtrReg);
10435   BuildMI(BB, dl, TII->get(PPC::BCC))
10436       .addImm(PPC::PRED_NE)
10437       .addReg(PPC::CR0)
10438       .addMBB(loopMBB);
10439   BB->addSuccessor(loopMBB);
10440   BB->addSuccessor(exitMBB);
10441
10442   //  exitMBB:
10443   //   ...
10444   BB = exitMBB;
10445   BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
10446       .addReg(TmpDestReg)
10447       .addReg(ShiftReg);
10448   return BB;
10449 }
10450
10451 llvm::MachineBasicBlock *
10452 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
10453                                     MachineBasicBlock *MBB) const {
10454   DebugLoc DL = MI.getDebugLoc();
10455   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10456   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
10457
10458   MachineFunction *MF = MBB->getParent();
10459   MachineRegisterInfo &MRI = MF->getRegInfo();
10460
10461   const BasicBlock *BB = MBB->getBasicBlock();
10462   MachineFunction::iterator I = ++MBB->getIterator();
10463
10464   Register DstReg = MI.getOperand(0).getReg();
10465   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
10466   assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
10467   Register mainDstReg = MRI.createVirtualRegister(RC);
10468   Register restoreDstReg = MRI.createVirtualRegister(RC);
10469
10470   MVT PVT = getPointerTy(MF->getDataLayout());
10471   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
10472          "Invalid Pointer Size!");
10473   // For v = setjmp(buf), we generate
10474   //
10475   // thisMBB:
10476   //  SjLjSetup mainMBB
10477   //  bl mainMBB
10478   //  v_restore = 1
10479   //  b sinkMBB
10480   //
10481   // mainMBB:
10482   //  buf[LabelOffset] = LR
10483   //  v_main = 0
10484   //
10485   // sinkMBB:
10486   //  v = phi(main, restore)
10487   //
10488
10489   MachineBasicBlock *thisMBB = MBB;
10490   MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
10491   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
10492   MF->insert(I, mainMBB);
10493   MF->insert(I, sinkMBB);
10494
10495   MachineInstrBuilder MIB;
10496
10497   // Transfer the remainder of BB and its successor edges to sinkMBB.
10498   sinkMBB->splice(sinkMBB->begin(), MBB,
10499                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
10500   sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
10501
10502   // Note that the structure of the jmp_buf used here is not compatible
10503   // with that used by libc, and is not designed to be. Specifically, it
10504   // stores only those 'reserved' registers that LLVM does not otherwise
10505   // understand how to spill. Also, by convention, by the time this
10506   // intrinsic is called, Clang has already stored the frame address in the
10507   // first slot of the buffer and stack address in the third. Following the
10508   // X86 target code, we'll store the jump address in the second slot. We also
10509   // need to save the TOC pointer (R2) to handle jumps between shared
10510   // libraries, and that will be stored in the fourth slot. The thread
10511   // identifier (R13) is not affected.
10512
10513   // thisMBB:
10514   const int64_t LabelOffset = 1 * PVT.getStoreSize();
10515   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
10516   const int64_t BPOffset    = 4 * PVT.getStoreSize();
10517
10518   // Prepare IP either in reg.
10519   const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
10520   Register LabelReg = MRI.createVirtualRegister(PtrRC);
10521   Register BufReg = MI.getOperand(1).getReg();
10522
10523   if (Subtarget.is64BitELFABI()) {
10524     setUsesTOCBasePtr(*MBB->getParent());
10525     MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
10526               .addReg(PPC::X2)
10527               .addImm(TOCOffset)
10528               .addReg(BufReg)
10529               .cloneMemRefs(MI);
10530   }
10531
10532   // Naked functions never have a base pointer, and so we use r1. For all
10533   // other functions, this decision must be delayed until during PEI.
10534   unsigned BaseReg;
10535   if (MF->getFunction().hasFnAttribute(Attribute::Naked))
10536     BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
10537   else
10538     BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
10539
10540   MIB = BuildMI(*thisMBB, MI, DL,
10541                 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
10542             .addReg(BaseReg)
10543             .addImm(BPOffset)
10544             .addReg(BufReg)
10545             .cloneMemRefs(MI);
10546
10547   // Setup
10548   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
10549   MIB.addRegMask(TRI->getNoPreservedMask());
10550
10551   BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
10552
10553   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
10554           .addMBB(mainMBB);
10555   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
10556
10557   thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
10558   thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
10559
10560   // mainMBB:
10561   //  mainDstReg = 0
10562   MIB =
10563       BuildMI(mainMBB, DL,
10564               TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
10565
10566   // Store IP
10567   if (Subtarget.isPPC64()) {
10568     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
10569             .addReg(LabelReg)
10570             .addImm(LabelOffset)
10571             .addReg(BufReg);
10572   } else {
10573     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
10574             .addReg(LabelReg)
10575             .addImm(LabelOffset)
10576             .addReg(BufReg);
10577   }
10578   MIB.cloneMemRefs(MI);
10579
10580   BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
10581   mainMBB->addSuccessor(sinkMBB);
10582
10583   // sinkMBB:
10584   BuildMI(*sinkMBB, sinkMBB->begin(), DL,
10585           TII->get(PPC::PHI), DstReg)
10586     .addReg(mainDstReg).addMBB(mainMBB)
10587     .addReg(restoreDstReg).addMBB(thisMBB);
10588
10589   MI.eraseFromParent();
10590   return sinkMBB;
10591 }
10592
10593 MachineBasicBlock *
10594 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
10595                                      MachineBasicBlock *MBB) const {
10596   DebugLoc DL = MI.getDebugLoc();
10597   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10598
10599   MachineFunction *MF = MBB->getParent();
10600   MachineRegisterInfo &MRI = MF->getRegInfo();
10601
10602   MVT PVT = getPointerTy(MF->getDataLayout());
10603   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
10604          "Invalid Pointer Size!");
10605
10606   const TargetRegisterClass *RC =
10607     (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
10608   Register Tmp = MRI.createVirtualRegister(RC);
10609   // Since FP is only updated here but NOT referenced, it's treated as GPR.
10610   unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
10611   unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
10612   unsigned BP =
10613       (PVT == MVT::i64)
10614           ? PPC::X30
10615           : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
10616                                                               : PPC::R30);
10617
10618   MachineInstrBuilder MIB;
10619
10620   const int64_t LabelOffset = 1 * PVT.getStoreSize();
10621   const int64_t SPOffset    = 2 * PVT.getStoreSize();
10622   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
10623   const int64_t BPOffset    = 4 * PVT.getStoreSize();
10624
10625   Register BufReg = MI.getOperand(0).getReg();
10626
10627   // Reload FP (the jumped-to function may not have had a
10628   // frame pointer, and if so, then its r31 will be restored
10629   // as necessary).
10630   if (PVT == MVT::i64) {
10631     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
10632             .addImm(0)
10633             .addReg(BufReg);
10634   } else {
10635     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
10636             .addImm(0)
10637             .addReg(BufReg);
10638   }
10639   MIB.cloneMemRefs(MI);
10640
10641   // Reload IP
10642   if (PVT == MVT::i64) {
10643     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
10644             .addImm(LabelOffset)
10645             .addReg(BufReg);
10646   } else {
10647     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
10648             .addImm(LabelOffset)
10649             .addReg(BufReg);
10650   }
10651   MIB.cloneMemRefs(MI);
10652
10653   // Reload SP
10654   if (PVT == MVT::i64) {
10655     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
10656             .addImm(SPOffset)
10657             .addReg(BufReg);
10658   } else {
10659     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
10660             .addImm(SPOffset)
10661             .addReg(BufReg);
10662   }
10663   MIB.cloneMemRefs(MI);
10664
10665   // Reload BP
10666   if (PVT == MVT::i64) {
10667     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
10668             .addImm(BPOffset)
10669             .addReg(BufReg);
10670   } else {
10671     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
10672             .addImm(BPOffset)
10673             .addReg(BufReg);
10674   }
10675   MIB.cloneMemRefs(MI);
10676
10677   // Reload TOC
10678   if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
10679     setUsesTOCBasePtr(*MBB->getParent());
10680     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
10681               .addImm(TOCOffset)
10682               .addReg(BufReg)
10683               .cloneMemRefs(MI);
10684   }
10685
10686   // Jump
10687   BuildMI(*MBB, MI, DL,
10688           TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
10689   BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
10690
10691   MI.eraseFromParent();
10692   return MBB;
10693 }
10694
10695 MachineBasicBlock *
10696 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
10697                                                MachineBasicBlock *BB) const {
10698   if (MI.getOpcode() == TargetOpcode::STACKMAP ||
10699       MI.getOpcode() == TargetOpcode::PATCHPOINT) {
10700     if (Subtarget.is64BitELFABI() &&
10701         MI.getOpcode() == TargetOpcode::PATCHPOINT) {
10702       // Call lowering should have added an r2 operand to indicate a dependence
10703       // on the TOC base pointer value. It can't however, because there is no
10704       // way to mark the dependence as implicit there, and so the stackmap code
10705       // will confuse it with a regular operand. Instead, add the dependence
10706       // here.
10707       MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
10708     }
10709
10710     return emitPatchPoint(MI, BB);
10711   }
10712
10713   if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
10714       MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
10715     return emitEHSjLjSetJmp(MI, BB);
10716   } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
10717              MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
10718     return emitEHSjLjLongJmp(MI, BB);
10719   }
10720
10721   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10722
10723   // To "insert" these instructions we actually have to insert their
10724   // control-flow patterns.
10725   const BasicBlock *LLVM_BB = BB->getBasicBlock();
10726   MachineFunction::iterator It = ++BB->getIterator();
10727
10728   MachineFunction *F = BB->getParent();
10729
10730   if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
10731       MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
10732       MI.getOpcode() == PPC::SELECT_I8) {
10733     SmallVector<MachineOperand, 2> Cond;
10734     if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
10735         MI.getOpcode() == PPC::SELECT_CC_I8)
10736       Cond.push_back(MI.getOperand(4));
10737     else
10738       Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
10739     Cond.push_back(MI.getOperand(1));
10740
10741     DebugLoc dl = MI.getDebugLoc();
10742     TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
10743                       MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
10744   } else if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
10745              MI.getOpcode() == PPC::SELECT_CC_I8 ||
10746              MI.getOpcode() == PPC::SELECT_CC_F4 ||
10747              MI.getOpcode() == PPC::SELECT_CC_F8 ||
10748              MI.getOpcode() == PPC::SELECT_CC_F16 ||
10749              MI.getOpcode() == PPC::SELECT_CC_QFRC ||
10750              MI.getOpcode() == PPC::SELECT_CC_QSRC ||
10751              MI.getOpcode() == PPC::SELECT_CC_QBRC ||
10752              MI.getOpcode() == PPC::SELECT_CC_VRRC ||
10753              MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
10754              MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
10755              MI.getOpcode() == PPC::SELECT_CC_VSRC ||
10756              MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
10757              MI.getOpcode() == PPC::SELECT_CC_SPE ||
10758              MI.getOpcode() == PPC::SELECT_I4 ||
10759              MI.getOpcode() == PPC::SELECT_I8 ||
10760              MI.getOpcode() == PPC::SELECT_F4 ||
10761              MI.getOpcode() == PPC::SELECT_F8 ||
10762              MI.getOpcode() == PPC::SELECT_F16 ||
10763              MI.getOpcode() == PPC::SELECT_QFRC ||
10764              MI.getOpcode() == PPC::SELECT_QSRC ||
10765              MI.getOpcode() == PPC::SELECT_QBRC ||
10766              MI.getOpcode() == PPC::SELECT_SPE ||
10767              MI.getOpcode() == PPC::SELECT_SPE4 ||
10768              MI.getOpcode() == PPC::SELECT_VRRC ||
10769              MI.getOpcode() == PPC::SELECT_VSFRC ||
10770              MI.getOpcode() == PPC::SELECT_VSSRC ||
10771              MI.getOpcode() == PPC::SELECT_VSRC) {
10772     // The incoming instruction knows the destination vreg to set, the
10773     // condition code register to branch on, the true/false values to
10774     // select between, and a branch opcode to use.
10775
10776     //  thisMBB:
10777     //  ...
10778     //   TrueVal = ...
10779     //   cmpTY ccX, r1, r2
10780     //   bCC copy1MBB
10781     //   fallthrough --> copy0MBB
10782     MachineBasicBlock *thisMBB = BB;
10783     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
10784     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
10785     DebugLoc dl = MI.getDebugLoc();
10786     F->insert(It, copy0MBB);
10787     F->insert(It, sinkMBB);
10788
10789     // Transfer the remainder of BB and its successor edges to sinkMBB.
10790     sinkMBB->splice(sinkMBB->begin(), BB,
10791                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
10792     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
10793
10794     // Next, add the true and fallthrough blocks as its successors.
10795     BB->addSuccessor(copy0MBB);
10796     BB->addSuccessor(sinkMBB);
10797
10798     if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
10799         MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
10800         MI.getOpcode() == PPC::SELECT_F16 ||
10801         MI.getOpcode() == PPC::SELECT_SPE4 ||
10802         MI.getOpcode() == PPC::SELECT_SPE ||
10803         MI.getOpcode() == PPC::SELECT_QFRC ||
10804         MI.getOpcode() == PPC::SELECT_QSRC ||
10805         MI.getOpcode() == PPC::SELECT_QBRC ||
10806         MI.getOpcode() == PPC::SELECT_VRRC ||
10807         MI.getOpcode() == PPC::SELECT_VSFRC ||
10808         MI.getOpcode() == PPC::SELECT_VSSRC ||
10809         MI.getOpcode() == PPC::SELECT_VSRC) {
10810       BuildMI(BB, dl, TII->get(PPC::BC))
10811           .addReg(MI.getOperand(1).getReg())
10812           .addMBB(sinkMBB);
10813     } else {
10814       unsigned SelectPred = MI.getOperand(4).getImm();
10815       BuildMI(BB, dl, TII->get(PPC::BCC))
10816           .addImm(SelectPred)
10817           .addReg(MI.getOperand(1).getReg())
10818           .addMBB(sinkMBB);
10819     }
10820
10821     //  copy0MBB:
10822     //   %FalseValue = ...
10823     //   # fallthrough to sinkMBB
10824     BB = copy0MBB;
10825
10826     // Update machine-CFG edges
10827     BB->addSuccessor(sinkMBB);
10828
10829     //  sinkMBB:
10830     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
10831     //  ...
10832     BB = sinkMBB;
10833     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
10834         .addReg(MI.getOperand(3).getReg())
10835         .addMBB(copy0MBB)
10836         .addReg(MI.getOperand(2).getReg())
10837         .addMBB(thisMBB);
10838   } else if (MI.getOpcode() == PPC::ReadTB) {
10839     // To read the 64-bit time-base register on a 32-bit target, we read the
10840     // two halves. Should the counter have wrapped while it was being read, we
10841     // need to try again.
10842     // ...
10843     // readLoop:
10844     // mfspr Rx,TBU # load from TBU
10845     // mfspr Ry,TB  # load from TB
10846     // mfspr Rz,TBU # load from TBU
10847     // cmpw crX,Rx,Rz # check if 'old'='new'
10848     // bne readLoop   # branch if they're not equal
10849     // ...
10850
10851     MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
10852     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
10853     DebugLoc dl = MI.getDebugLoc();
10854     F->insert(It, readMBB);
10855     F->insert(It, sinkMBB);
10856
10857     // Transfer the remainder of BB and its successor edges to sinkMBB.
10858     sinkMBB->splice(sinkMBB->begin(), BB,
10859                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
10860     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
10861
10862     BB->addSuccessor(readMBB);
10863     BB = readMBB;
10864
10865     MachineRegisterInfo &RegInfo = F->getRegInfo();
10866     Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
10867     Register LoReg = MI.getOperand(0).getReg();
10868     Register HiReg = MI.getOperand(1).getReg();
10869
10870     BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
10871     BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
10872     BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
10873
10874     Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
10875
10876     BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
10877         .addReg(HiReg)
10878         .addReg(ReadAgainReg);
10879     BuildMI(BB, dl, TII->get(PPC::BCC))
10880         .addImm(PPC::PRED_NE)
10881         .addReg(CmpReg)
10882         .addMBB(readMBB);
10883
10884     BB->addSuccessor(readMBB);
10885     BB->addSuccessor(sinkMBB);
10886   } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
10887     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
10888   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
10889     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
10890   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
10891     BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
10892   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
10893     BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
10894
10895   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
10896     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
10897   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
10898     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
10899   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
10900     BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
10901   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
10902     BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
10903
10904   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
10905     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
10906   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
10907     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
10908   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
10909     BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
10910   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
10911     BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
10912
10913   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
10914     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
10915   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
10916     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
10917   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
10918     BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
10919   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
10920     BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
10921
10922   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
10923     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
10924   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
10925     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
10926   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
10927     BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
10928   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
10929     BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
10930
10931   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
10932     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
10933   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
10934     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
10935   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
10936     BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
10937   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
10938     BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
10939
10940   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
10941     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
10942   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
10943     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
10944   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
10945     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
10946   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
10947     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
10948
10949   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
10950     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
10951   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
10952     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
10953   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
10954     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
10955   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
10956     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
10957
10958   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
10959     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
10960   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
10961     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
10962   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
10963     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
10964   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
10965     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
10966
10967   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
10968     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
10969   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
10970     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
10971   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
10972     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
10973   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
10974     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
10975
10976   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
10977     BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
10978   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
10979     BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
10980   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
10981     BB = EmitAtomicBinary(MI, BB, 4, 0);
10982   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
10983     BB = EmitAtomicBinary(MI, BB, 8, 0);
10984   else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
10985            MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
10986            (Subtarget.hasPartwordAtomics() &&
10987             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
10988            (Subtarget.hasPartwordAtomics() &&
10989             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
10990     bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
10991
10992     auto LoadMnemonic = PPC::LDARX;
10993     auto StoreMnemonic = PPC::STDCX;
10994     switch (MI.getOpcode()) {
10995     default:
10996       llvm_unreachable("Compare and swap of unknown size");
10997     case PPC::ATOMIC_CMP_SWAP_I8:
10998       LoadMnemonic = PPC::LBARX;
10999       StoreMnemonic = PPC::STBCX;
11000       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
11001       break;
11002     case PPC::ATOMIC_CMP_SWAP_I16:
11003       LoadMnemonic = PPC::LHARX;
11004       StoreMnemonic = PPC::STHCX;
11005       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
11006       break;
11007     case PPC::ATOMIC_CMP_SWAP_I32:
11008       LoadMnemonic = PPC::LWARX;
11009       StoreMnemonic = PPC::STWCX;
11010       break;
11011     case PPC::ATOMIC_CMP_SWAP_I64:
11012       LoadMnemonic = PPC::LDARX;
11013       StoreMnemonic = PPC::STDCX;
11014       break;
11015     }
11016     Register dest = MI.getOperand(0).getReg();
11017     Register ptrA = MI.getOperand(1).getReg();
11018     Register ptrB = MI.getOperand(2).getReg();
11019     Register oldval = MI.getOperand(3).getReg();
11020     Register newval = MI.getOperand(4).getReg();
11021     DebugLoc dl = MI.getDebugLoc();
11022
11023     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
11024     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
11025     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
11026     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11027     F->insert(It, loop1MBB);
11028     F->insert(It, loop2MBB);
11029     F->insert(It, midMBB);
11030     F->insert(It, exitMBB);
11031     exitMBB->splice(exitMBB->begin(), BB,
11032                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
11033     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11034
11035     //  thisMBB:
11036     //   ...
11037     //   fallthrough --> loopMBB
11038     BB->addSuccessor(loop1MBB);
11039
11040     // loop1MBB:
11041     //   l[bhwd]arx dest, ptr
11042     //   cmp[wd] dest, oldval
11043     //   bne- midMBB
11044     // loop2MBB:
11045     //   st[bhwd]cx. newval, ptr
11046     //   bne- loopMBB
11047     //   b exitBB
11048     // midMBB:
11049     //   st[bhwd]cx. dest, ptr
11050     // exitBB:
11051     BB = loop1MBB;
11052     BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
11053     BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
11054         .addReg(oldval)
11055         .addReg(dest);
11056     BuildMI(BB, dl, TII->get(PPC::BCC))
11057         .addImm(PPC::PRED_NE)
11058         .addReg(PPC::CR0)
11059         .addMBB(midMBB);
11060     BB->addSuccessor(loop2MBB);
11061     BB->addSuccessor(midMBB);
11062
11063     BB = loop2MBB;
11064     BuildMI(BB, dl, TII->get(StoreMnemonic))
11065         .addReg(newval)
11066         .addReg(ptrA)
11067         .addReg(ptrB);
11068     BuildMI(BB, dl, TII->get(PPC::BCC))
11069         .addImm(PPC::PRED_NE)
11070         .addReg(PPC::CR0)
11071         .addMBB(loop1MBB);
11072     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
11073     BB->addSuccessor(loop1MBB);
11074     BB->addSuccessor(exitMBB);
11075
11076     BB = midMBB;
11077     BuildMI(BB, dl, TII->get(StoreMnemonic))
11078         .addReg(dest)
11079         .addReg(ptrA)
11080         .addReg(ptrB);
11081     BB->addSuccessor(exitMBB);
11082
11083     //  exitMBB:
11084     //   ...
11085     BB = exitMBB;
11086   } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
11087              MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
11088     // We must use 64-bit registers for addresses when targeting 64-bit,
11089     // since we're actually doing arithmetic on them.  Other registers
11090     // can be 32-bit.
11091     bool is64bit = Subtarget.isPPC64();
11092     bool isLittleEndian = Subtarget.isLittleEndian();
11093     bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
11094
11095     Register dest = MI.getOperand(0).getReg();
11096     Register ptrA = MI.getOperand(1).getReg();
11097     Register ptrB = MI.getOperand(2).getReg();
11098     Register oldval = MI.getOperand(3).getReg();
11099     Register newval = MI.getOperand(4).getReg();
11100     DebugLoc dl = MI.getDebugLoc();
11101
11102     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
11103     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
11104     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
11105     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11106     F->insert(It, loop1MBB);
11107     F->insert(It, loop2MBB);
11108     F->insert(It, midMBB);
11109     F->insert(It, exitMBB);
11110     exitMBB->splice(exitMBB->begin(), BB,
11111                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
11112     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11113
11114     MachineRegisterInfo &RegInfo = F->getRegInfo();
11115     const TargetRegisterClass *RC =
11116         is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11117     const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11118
11119     Register PtrReg = RegInfo.createVirtualRegister(RC);
11120     Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
11121     Register ShiftReg =
11122         isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
11123     Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
11124     Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
11125     Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
11126     Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
11127     Register MaskReg = RegInfo.createVirtualRegister(GPRC);
11128     Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
11129     Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
11130     Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
11131     Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
11132     Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
11133     Register Ptr1Reg;
11134     Register TmpReg = RegInfo.createVirtualRegister(GPRC);
11135     Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11136     //  thisMBB:
11137     //   ...
11138     //   fallthrough --> loopMBB
11139     BB->addSuccessor(loop1MBB);
11140
11141     // The 4-byte load must be aligned, while a char or short may be
11142     // anywhere in the word.  Hence all this nasty bookkeeping code.
11143     //   add ptr1, ptrA, ptrB [copy if ptrA==0]
11144     //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11145     //   xori shift, shift1, 24 [16]
11146     //   rlwinm ptr, ptr1, 0, 0, 29
11147     //   slw newval2, newval, shift
11148     //   slw oldval2, oldval,shift
11149     //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11150     //   slw mask, mask2, shift
11151     //   and newval3, newval2, mask
11152     //   and oldval3, oldval2, mask
11153     // loop1MBB:
11154     //   lwarx tmpDest, ptr
11155     //   and tmp, tmpDest, mask
11156     //   cmpw tmp, oldval3
11157     //   bne- midMBB
11158     // loop2MBB:
11159     //   andc tmp2, tmpDest, mask
11160     //   or tmp4, tmp2, newval3
11161     //   stwcx. tmp4, ptr
11162     //   bne- loop1MBB
11163     //   b exitBB
11164     // midMBB:
11165     //   stwcx. tmpDest, ptr
11166     // exitBB:
11167     //   srw dest, tmpDest, shift
11168     if (ptrA != ZeroReg) {
11169       Ptr1Reg = RegInfo.createVirtualRegister(RC);
11170       BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11171           .addReg(ptrA)
11172           .addReg(ptrB);
11173     } else {
11174       Ptr1Reg = ptrB;
11175     }
11176
11177     // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11178     // mode.
11179     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
11180         .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11181         .addImm(3)
11182         .addImm(27)
11183         .addImm(is8bit ? 28 : 27);
11184     if (!isLittleEndian)
11185       BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
11186           .addReg(Shift1Reg)
11187           .addImm(is8bit ? 24 : 16);
11188     if (is64bit)
11189       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
11190           .addReg(Ptr1Reg)
11191           .addImm(0)
11192           .addImm(61);
11193     else
11194       BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
11195           .addReg(Ptr1Reg)
11196           .addImm(0)
11197           .addImm(0)
11198           .addImm(29);
11199     BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
11200         .addReg(newval)
11201         .addReg(ShiftReg);
11202     BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
11203         .addReg(oldval)
11204         .addReg(ShiftReg);
11205     if (is8bit)
11206       BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
11207     else {
11208       BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
11209       BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
11210           .addReg(Mask3Reg)
11211           .addImm(65535);
11212     }
11213     BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
11214         .addReg(Mask2Reg)
11215         .addReg(ShiftReg);
11216     BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
11217         .addReg(NewVal2Reg)
11218         .addReg(MaskReg);
11219     BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
11220         .addReg(OldVal2Reg)
11221         .addReg(MaskReg);
11222
11223     BB = loop1MBB;
11224     BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
11225         .addReg(ZeroReg)
11226         .addReg(PtrReg);
11227     BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
11228         .addReg(TmpDestReg)
11229         .addReg(MaskReg);
11230     BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
11231         .addReg(TmpReg)
11232         .addReg(OldVal3Reg);
11233     BuildMI(BB, dl, TII->get(PPC::BCC))
11234         .addImm(PPC::PRED_NE)
11235         .addReg(PPC::CR0)
11236         .addMBB(midMBB);
11237     BB->addSuccessor(loop2MBB);
11238     BB->addSuccessor(midMBB);
11239
11240     BB = loop2MBB;
11241     BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
11242         .addReg(TmpDestReg)
11243         .addReg(MaskReg);
11244     BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
11245         .addReg(Tmp2Reg)
11246         .addReg(NewVal3Reg);
11247     BuildMI(BB, dl, TII->get(PPC::STWCX))
11248         .addReg(Tmp4Reg)
11249         .addReg(ZeroReg)
11250         .addReg(PtrReg);
11251     BuildMI(BB, dl, TII->get(PPC::BCC))
11252         .addImm(PPC::PRED_NE)
11253         .addReg(PPC::CR0)
11254         .addMBB(loop1MBB);
11255     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
11256     BB->addSuccessor(loop1MBB);
11257     BB->addSuccessor(exitMBB);
11258
11259     BB = midMBB;
11260     BuildMI(BB, dl, TII->get(PPC::STWCX))
11261         .addReg(TmpDestReg)
11262         .addReg(ZeroReg)
11263         .addReg(PtrReg);
11264     BB->addSuccessor(exitMBB);
11265
11266     //  exitMBB:
11267     //   ...
11268     BB = exitMBB;
11269     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
11270         .addReg(TmpReg)
11271         .addReg(ShiftReg);
11272   } else if (MI.getOpcode() == PPC::FADDrtz) {
11273     // This pseudo performs an FADD with rounding mode temporarily forced
11274     // to round-to-zero.  We emit this via custom inserter since the FPSCR
11275     // is not modeled at the SelectionDAG level.
11276     Register Dest = MI.getOperand(0).getReg();
11277     Register Src1 = MI.getOperand(1).getReg();
11278     Register Src2 = MI.getOperand(2).getReg();
11279     DebugLoc dl = MI.getDebugLoc();
11280
11281     MachineRegisterInfo &RegInfo = F->getRegInfo();
11282     Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
11283
11284     // Save FPSCR value.
11285     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
11286
11287     // Set rounding mode to round-to-zero.
11288     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
11289     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
11290
11291     // Perform addition.
11292     BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
11293
11294     // Restore FPSCR value.
11295     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
11296   } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
11297              MI.getOpcode() == PPC::ANDIo_1_GT_BIT ||
11298              MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
11299              MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) {
11300     unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
11301                        MI.getOpcode() == PPC::ANDIo_1_GT_BIT8)
11302                           ? PPC::ANDIo8
11303                           : PPC::ANDIo;
11304     bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
11305                  MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8);
11306
11307     MachineRegisterInfo &RegInfo = F->getRegInfo();
11308     Register Dest = RegInfo.createVirtualRegister(
11309         Opcode == PPC::ANDIo ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
11310
11311     DebugLoc dl = MI.getDebugLoc();
11312     BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
11313         .addReg(MI.getOperand(1).getReg())
11314         .addImm(1);
11315     BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
11316             MI.getOperand(0).getReg())
11317         .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
11318   } else if (MI.getOpcode() == PPC::TCHECK_RET) {
11319     DebugLoc Dl = MI.getDebugLoc();
11320     MachineRegisterInfo &RegInfo = F->getRegInfo();
11321     Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
11322     BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
11323     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
11324             MI.getOperand(0).getReg())
11325         .addReg(CRReg);
11326   } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
11327     DebugLoc Dl = MI.getDebugLoc();
11328     unsigned Imm = MI.getOperand(1).getImm();
11329     BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
11330     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
11331             MI.getOperand(0).getReg())
11332         .addReg(PPC::CR0EQ);
11333   } else if (MI.getOpcode() == PPC::SETRNDi) {
11334     DebugLoc dl = MI.getDebugLoc();
11335     Register OldFPSCRReg = MI.getOperand(0).getReg();
11336
11337     // Save FPSCR value.
11338     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
11339
11340     // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
11341     // the following settings:
11342     //   00 Round to nearest
11343     //   01 Round to 0
11344     //   10 Round to +inf
11345     //   11 Round to -inf
11346
11347     // When the operand is immediate, using the two least significant bits of
11348     // the immediate to set the bits 62:63 of FPSCR.
11349     unsigned Mode = MI.getOperand(1).getImm();
11350     BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
11351       .addImm(31);
11352
11353     BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
11354       .addImm(30);
11355   } else if (MI.getOpcode() == PPC::SETRND) {
11356     DebugLoc dl = MI.getDebugLoc();
11357
11358     // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
11359     // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
11360     // If the target doesn't have DirectMove, we should use stack to do the
11361     // conversion, because the target doesn't have the instructions like mtvsrd
11362     // or mfvsrd to do this conversion directly.
11363     auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
11364       if (Subtarget.hasDirectMove()) {
11365         BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
11366           .addReg(SrcReg);
11367       } else {
11368         // Use stack to do the register copy.
11369         unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
11370         MachineRegisterInfo &RegInfo = F->getRegInfo();
11371         const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
11372         if (RC == &PPC::F8RCRegClass) {
11373           // Copy register from F8RCRegClass to G8RCRegclass.
11374           assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
11375                  "Unsupported RegClass.");
11376
11377           StoreOp = PPC::STFD;
11378           LoadOp = PPC::LD;
11379         } else {
11380           // Copy register from G8RCRegClass to F8RCRegclass.
11381           assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
11382                  (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
11383                  "Unsupported RegClass.");
11384         }
11385
11386         MachineFrameInfo &MFI = F->getFrameInfo();
11387         int FrameIdx = MFI.CreateStackObject(8, 8, false);
11388
11389         MachineMemOperand *MMOStore = F->getMachineMemOperand(
11390           MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
11391           MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
11392           MFI.getObjectAlignment(FrameIdx));
11393
11394         // Store the SrcReg into the stack.
11395         BuildMI(*BB, MI, dl, TII->get(StoreOp))
11396           .addReg(SrcReg)
11397           .addImm(0)
11398           .addFrameIndex(FrameIdx)
11399           .addMemOperand(MMOStore);
11400
11401         MachineMemOperand *MMOLoad = F->getMachineMemOperand(
11402           MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
11403           MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
11404           MFI.getObjectAlignment(FrameIdx));
11405
11406         // Load from the stack where SrcReg is stored, and save to DestReg,
11407         // so we have done the RegClass conversion from RegClass::SrcReg to
11408         // RegClass::DestReg.
11409         BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
11410           .addImm(0)
11411           .addFrameIndex(FrameIdx)
11412           .addMemOperand(MMOLoad);
11413       }
11414     };
11415
11416     Register OldFPSCRReg = MI.getOperand(0).getReg();
11417
11418     // Save FPSCR value.
11419     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
11420
11421     // When the operand is gprc register, use two least significant bits of the
11422     // register and mtfsf instruction to set the bits 62:63 of FPSCR.
11423     //
11424     // copy OldFPSCRTmpReg, OldFPSCRReg
11425     // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
11426     // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
11427     // copy NewFPSCRReg, NewFPSCRTmpReg
11428     // mtfsf 255, NewFPSCRReg
11429     MachineOperand SrcOp = MI.getOperand(1);
11430     MachineRegisterInfo &RegInfo = F->getRegInfo();
11431     Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
11432
11433     copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
11434
11435     Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
11436     Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
11437
11438     // The first operand of INSERT_SUBREG should be a register which has
11439     // subregisters, we only care about its RegClass, so we should use an
11440     // IMPLICIT_DEF register.
11441     BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
11442     BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
11443       .addReg(ImDefReg)
11444       .add(SrcOp)
11445       .addImm(1);
11446
11447     Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
11448     BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
11449       .addReg(OldFPSCRTmpReg)
11450       .addReg(ExtSrcReg)
11451       .addImm(0)
11452       .addImm(62);
11453
11454     Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
11455     copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
11456
11457     // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
11458     // bits of FPSCR.
11459     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
11460       .addImm(255)
11461       .addReg(NewFPSCRReg)
11462       .addImm(0)
11463       .addImm(0);
11464   } else {
11465     llvm_unreachable("Unexpected instr type to insert");
11466   }
11467
11468   MI.eraseFromParent(); // The pseudo instruction is gone now.
11469   return BB;
11470 }
11471
11472 //===----------------------------------------------------------------------===//
11473 // Target Optimization Hooks
11474 //===----------------------------------------------------------------------===//
11475
11476 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
11477   // For the estimates, convergence is quadratic, so we essentially double the
11478   // number of digits correct after every iteration. For both FRE and FRSQRTE,
11479   // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
11480   // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
11481   int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
11482   if (VT.getScalarType() == MVT::f64)
11483     RefinementSteps++;
11484   return RefinementSteps;
11485 }
11486
11487 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
11488                                            int Enabled, int &RefinementSteps,
11489                                            bool &UseOneConstNR,
11490                                            bool Reciprocal) const {
11491   EVT VT = Operand.getValueType();
11492   if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
11493       (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
11494       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
11495       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
11496       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
11497       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
11498     if (RefinementSteps == ReciprocalEstimate::Unspecified)
11499       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
11500
11501     // The Newton-Raphson computation with a single constant does not provide
11502     // enough accuracy on some CPUs.
11503     UseOneConstNR = !Subtarget.needsTwoConstNR();
11504     return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
11505   }
11506   return SDValue();
11507 }
11508
11509 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
11510                                             int Enabled,
11511                                             int &RefinementSteps) const {
11512   EVT VT = Operand.getValueType();
11513   if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
11514       (VT == MVT::f64 && Subtarget.hasFRE()) ||
11515       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
11516       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
11517       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
11518       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
11519     if (RefinementSteps == ReciprocalEstimate::Unspecified)
11520       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
11521     return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
11522   }
11523   return SDValue();
11524 }
11525
11526 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
11527   // Note: This functionality is used only when unsafe-fp-math is enabled, and
11528   // on cores with reciprocal estimates (which are used when unsafe-fp-math is
11529   // enabled for division), this functionality is redundant with the default
11530   // combiner logic (once the division -> reciprocal/multiply transformation
11531   // has taken place). As a result, this matters more for older cores than for
11532   // newer ones.
11533
11534   // Combine multiple FDIVs with the same divisor into multiple FMULs by the
11535   // reciprocal if there are two or more FDIVs (for embedded cores with only
11536   // one FP pipeline) for three or more FDIVs (for generic OOO cores).
11537   switch (Subtarget.getDarwinDirective()) {
11538   default:
11539     return 3;
11540   case PPC::DIR_440:
11541   case PPC::DIR_A2:
11542   case PPC::DIR_E500:
11543   case PPC::DIR_E500mc:
11544   case PPC::DIR_E5500:
11545     return 2;
11546   }
11547 }
11548
11549 // isConsecutiveLSLoc needs to work even if all adds have not yet been
11550 // collapsed, and so we need to look through chains of them.
11551 static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
11552                                      int64_t& Offset, SelectionDAG &DAG) {
11553   if (DAG.isBaseWithConstantOffset(Loc)) {
11554     Base = Loc.getOperand(0);
11555     Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
11556
11557     // The base might itself be a base plus an offset, and if so, accumulate
11558     // that as well.
11559     getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
11560   }
11561 }
11562
11563 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
11564                             unsigned Bytes, int Dist,
11565                             SelectionDAG &DAG) {
11566   if (VT.getSizeInBits() / 8 != Bytes)
11567     return false;
11568
11569   SDValue BaseLoc = Base->getBasePtr();
11570   if (Loc.getOpcode() == ISD::FrameIndex) {
11571     if (BaseLoc.getOpcode() != ISD::FrameIndex)
11572       return false;
11573     const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
11574     int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
11575     int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
11576     int FS  = MFI.getObjectSize(FI);
11577     int BFS = MFI.getObjectSize(BFI);
11578     if (FS != BFS || FS != (int)Bytes) return false;
11579     return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
11580   }
11581
11582   SDValue Base1 = Loc, Base2 = BaseLoc;
11583   int64_t Offset1 = 0, Offset2 = 0;
11584   getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
11585   getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
11586   if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
11587     return true;
11588
11589   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11590   const GlobalValue *GV1 = nullptr;
11591   const GlobalValue *GV2 = nullptr;
11592   Offset1 = 0;
11593   Offset2 = 0;
11594   bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
11595   bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
11596   if (isGA1 && isGA2 && GV1 == GV2)
11597     return Offset1 == (Offset2 + Dist*Bytes);
11598   return false;
11599 }
11600
11601 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
11602 // not enforce equality of the chain operands.
11603 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
11604                             unsigned Bytes, int Dist,
11605                             SelectionDAG &DAG) {
11606   if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
11607     EVT VT = LS->getMemoryVT();
11608     SDValue Loc = LS->getBasePtr();
11609     return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
11610   }
11611
11612   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
11613     EVT VT;
11614     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
11615     default: return false;
11616     case Intrinsic::ppc_qpx_qvlfd:
11617     case Intrinsic::ppc_qpx_qvlfda:
11618       VT = MVT::v4f64;
11619       break;
11620     case Intrinsic::ppc_qpx_qvlfs:
11621     case Intrinsic::ppc_qpx_qvlfsa:
11622       VT = MVT::v4f32;
11623       break;
11624     case Intrinsic::ppc_qpx_qvlfcd:
11625     case Intrinsic::ppc_qpx_qvlfcda:
11626       VT = MVT::v2f64;
11627       break;
11628     case Intrinsic::ppc_qpx_qvlfcs:
11629     case Intrinsic::ppc_qpx_qvlfcsa:
11630       VT = MVT::v2f32;
11631       break;
11632     case Intrinsic::ppc_qpx_qvlfiwa:
11633     case Intrinsic::ppc_qpx_qvlfiwz:
11634     case Intrinsic::ppc_altivec_lvx:
11635     case Intrinsic::ppc_altivec_lvxl:
11636     case Intrinsic::ppc_vsx_lxvw4x:
11637     case Intrinsic::ppc_vsx_lxvw4x_be:
11638       VT = MVT::v4i32;
11639       break;
11640     case Intrinsic::ppc_vsx_lxvd2x:
11641     case Intrinsic::ppc_vsx_lxvd2x_be:
11642       VT = MVT::v2f64;
11643       break;
11644     case Intrinsic::ppc_altivec_lvebx:
11645       VT = MVT::i8;
11646       break;
11647     case Intrinsic::ppc_altivec_lvehx:
11648       VT = MVT::i16;
11649       break;
11650     case Intrinsic::ppc_altivec_lvewx:
11651       VT = MVT::i32;
11652       break;
11653     }
11654
11655     return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
11656   }
11657
11658   if (N->getOpcode() == ISD::INTRINSIC_VOID) {
11659     EVT VT;
11660     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
11661     default: return false;
11662     case Intrinsic::ppc_qpx_qvstfd:
11663     case Intrinsic::ppc_qpx_qvstfda:
11664       VT = MVT::v4f64;
11665       break;
11666     case Intrinsic::ppc_qpx_qvstfs:
11667     case Intrinsic::ppc_qpx_qvstfsa:
11668       VT = MVT::v4f32;
11669       break;
11670     case Intrinsic::ppc_qpx_qvstfcd:
11671     case Intrinsic::ppc_qpx_qvstfcda:
11672       VT = MVT::v2f64;
11673       break;
11674     case Intrinsic::ppc_qpx_qvstfcs:
11675     case Intrinsic::ppc_qpx_qvstfcsa:
11676       VT = MVT::v2f32;
11677       break;
11678     case Intrinsic::ppc_qpx_qvstfiw:
11679     case Intrinsic::ppc_qpx_qvstfiwa:
11680     case Intrinsic::ppc_altivec_stvx:
11681     case Intrinsic::ppc_altivec_stvxl:
11682     case Intrinsic::ppc_vsx_stxvw4x:
11683       VT = MVT::v4i32;
11684       break;
11685     case Intrinsic::ppc_vsx_stxvd2x:
11686       VT = MVT::v2f64;
11687       break;
11688     case Intrinsic::ppc_vsx_stxvw4x_be:
11689       VT = MVT::v4i32;
11690       break;
11691     case Intrinsic::ppc_vsx_stxvd2x_be:
11692       VT = MVT::v2f64;
11693       break;
11694     case Intrinsic::ppc_altivec_stvebx:
11695       VT = MVT::i8;
11696       break;
11697     case Intrinsic::ppc_altivec_stvehx:
11698       VT = MVT::i16;
11699       break;
11700     case Intrinsic::ppc_altivec_stvewx:
11701       VT = MVT::i32;
11702       break;
11703     }
11704
11705     return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
11706   }
11707
11708   return false;
11709 }
11710
11711 // Return true is there is a nearyby consecutive load to the one provided
11712 // (regardless of alignment). We search up and down the chain, looking though
11713 // token factors and other loads (but nothing else). As a result, a true result
11714 // indicates that it is safe to create a new consecutive load adjacent to the
11715 // load provided.
11716 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
11717   SDValue Chain = LD->getChain();
11718   EVT VT = LD->getMemoryVT();
11719
11720   SmallSet<SDNode *, 16> LoadRoots;
11721   SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
11722   SmallSet<SDNode *, 16> Visited;
11723
11724   // First, search up the chain, branching to follow all token-factor operands.
11725   // If we find a consecutive load, then we're done, otherwise, record all
11726   // nodes just above the top-level loads and token factors.
11727   while (!Queue.empty()) {
11728     SDNode *ChainNext = Queue.pop_back_val();
11729     if (!Visited.insert(ChainNext).second)
11730       continue;
11731
11732     if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
11733       if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
11734         return true;
11735
11736       if (!Visited.count(ChainLD->getChain().getNode()))
11737         Queue.push_back(ChainLD->getChain().getNode());
11738     } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
11739       for (const SDUse &O : ChainNext->ops())
11740         if (!Visited.count(O.getNode()))
11741           Queue.push_back(O.getNode());
11742     } else
11743       LoadRoots.insert(ChainNext);
11744   }
11745
11746   // Second, search down the chain, starting from the top-level nodes recorded
11747   // in the first phase. These top-level nodes are the nodes just above all
11748   // loads and token factors. Starting with their uses, recursively look though
11749   // all loads (just the chain uses) and token factors to find a consecutive
11750   // load.
11751   Visited.clear();
11752   Queue.clear();
11753
11754   for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
11755        IE = LoadRoots.end(); I != IE; ++I) {
11756     Queue.push_back(*I);
11757
11758     while (!Queue.empty()) {
11759       SDNode *LoadRoot = Queue.pop_back_val();
11760       if (!Visited.insert(LoadRoot).second)
11761         continue;
11762
11763       if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
11764         if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
11765           return true;
11766
11767       for (SDNode::use_iterator UI = LoadRoot->use_begin(),
11768            UE = LoadRoot->use_end(); UI != UE; ++UI)
11769         if (((isa<MemSDNode>(*UI) &&
11770             cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
11771             UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
11772           Queue.push_back(*UI);
11773     }
11774   }
11775
11776   return false;
11777 }
11778
11779 /// This function is called when we have proved that a SETCC node can be replaced
11780 /// by subtraction (and other supporting instructions) so that the result of
11781 /// comparison is kept in a GPR instead of CR. This function is purely for
11782 /// codegen purposes and has some flags to guide the codegen process.
11783 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
11784                                      bool Swap, SDLoc &DL, SelectionDAG &DAG) {
11785   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
11786
11787   // Zero extend the operands to the largest legal integer. Originally, they
11788   // must be of a strictly smaller size.
11789   auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
11790                          DAG.getConstant(Size, DL, MVT::i32));
11791   auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
11792                          DAG.getConstant(Size, DL, MVT::i32));
11793
11794   // Swap if needed. Depends on the condition code.
11795   if (Swap)
11796     std::swap(Op0, Op1);
11797
11798   // Subtract extended integers.
11799   auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
11800
11801   // Move the sign bit to the least significant position and zero out the rest.
11802   // Now the least significant bit carries the result of original comparison.
11803   auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
11804                              DAG.getConstant(Size - 1, DL, MVT::i32));
11805   auto Final = Shifted;
11806
11807   // Complement the result if needed. Based on the condition code.
11808   if (Complement)
11809     Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
11810                         DAG.getConstant(1, DL, MVT::i64));
11811
11812   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
11813 }
11814
11815 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
11816                                                   DAGCombinerInfo &DCI) const {
11817   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
11818
11819   SelectionDAG &DAG = DCI.DAG;
11820   SDLoc DL(N);
11821
11822   // Size of integers being compared has a critical role in the following
11823   // analysis, so we prefer to do this when all types are legal.
11824   if (!DCI.isAfterLegalizeDAG())
11825     return SDValue();
11826
11827   // If all users of SETCC extend its value to a legal integer type
11828   // then we replace SETCC with a subtraction
11829   for (SDNode::use_iterator UI = N->use_begin(),
11830        UE = N->use_end(); UI != UE; ++UI) {
11831     if (UI->getOpcode() != ISD::ZERO_EXTEND)
11832       return SDValue();
11833   }
11834
11835   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
11836   auto OpSize = N->getOperand(0).getValueSizeInBits();
11837
11838   unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
11839
11840   if (OpSize < Size) {
11841     switch (CC) {
11842     default: break;
11843     case ISD::SETULT:
11844       return generateEquivalentSub(N, Size, false, false, DL, DAG);
11845     case ISD::SETULE:
11846       return generateEquivalentSub(N, Size, true, true, DL, DAG);
11847     case ISD::SETUGT:
11848       return generateEquivalentSub(N, Size, false, true, DL, DAG);
11849     case ISD::SETUGE:
11850       return generateEquivalentSub(N, Size, true, false, DL, DAG);
11851     }
11852   }
11853
11854   return SDValue();
11855 }
11856
11857 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
11858                                                   DAGCombinerInfo &DCI) const {
11859   SelectionDAG &DAG = DCI.DAG;
11860   SDLoc dl(N);
11861
11862   assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
11863   // If we're tracking CR bits, we need to be careful that we don't have:
11864   //   trunc(binary-ops(zext(x), zext(y)))
11865   // or
11866   //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
11867   // such that we're unnecessarily moving things into GPRs when it would be
11868   // better to keep them in CR bits.
11869
11870   // Note that trunc here can be an actual i1 trunc, or can be the effective
11871   // truncation that comes from a setcc or select_cc.
11872   if (N->getOpcode() == ISD::TRUNCATE &&
11873       N->getValueType(0) != MVT::i1)
11874     return SDValue();
11875
11876   if (N->getOperand(0).getValueType() != MVT::i32 &&
11877       N->getOperand(0).getValueType() != MVT::i64)
11878     return SDValue();
11879
11880   if (N->getOpcode() == ISD::SETCC ||
11881       N->getOpcode() == ISD::SELECT_CC) {
11882     // If we're looking at a comparison, then we need to make sure that the
11883     // high bits (all except for the first) don't matter the result.
11884     ISD::CondCode CC =
11885       cast<CondCodeSDNode>(N->getOperand(
11886         N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
11887     unsigned OpBits = N->getOperand(0).getValueSizeInBits();
11888
11889     if (ISD::isSignedIntSetCC(CC)) {
11890       if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
11891           DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
11892         return SDValue();
11893     } else if (ISD::isUnsignedIntSetCC(CC)) {
11894       if (!DAG.MaskedValueIsZero(N->getOperand(0),
11895                                  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
11896           !DAG.MaskedValueIsZero(N->getOperand(1),
11897                                  APInt::getHighBitsSet(OpBits, OpBits-1)))
11898         return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
11899                                              : SDValue());
11900     } else {
11901       // This is neither a signed nor an unsigned comparison, just make sure
11902       // that the high bits are equal.
11903       KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
11904       KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
11905
11906       // We don't really care about what is known about the first bit (if
11907       // anything), so clear it in all masks prior to comparing them.
11908       Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
11909       Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
11910
11911       if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)
11912         return SDValue();
11913     }
11914   }
11915
11916   // We now know that the higher-order bits are irrelevant, we just need to
11917   // make sure that all of the intermediate operations are bit operations, and
11918   // all inputs are extensions.
11919   if (N->getOperand(0).getOpcode() != ISD::AND &&
11920       N->getOperand(0).getOpcode() != ISD::OR  &&
11921       N->getOperand(0).getOpcode() != ISD::XOR &&
11922       N->getOperand(0).getOpcode() != ISD::SELECT &&
11923       N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
11924       N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
11925       N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
11926       N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
11927       N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
11928     return SDValue();
11929
11930   if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
11931       N->getOperand(1).getOpcode() != ISD::AND &&
11932       N->getOperand(1).getOpcode() != ISD::OR  &&
11933       N->getOperand(1).getOpcode() != ISD::XOR &&
11934       N->getOperand(1).getOpcode() != ISD::SELECT &&
11935       N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
11936       N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
11937       N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
11938       N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
11939       N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
11940     return SDValue();
11941
11942   SmallVector<SDValue, 4> Inputs;
11943   SmallVector<SDValue, 8> BinOps, PromOps;
11944   SmallPtrSet<SDNode *, 16> Visited;
11945
11946   for (unsigned i = 0; i < 2; ++i) {
11947     if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
11948           N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
11949           N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
11950           N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
11951         isa<ConstantSDNode>(N->getOperand(i)))
11952       Inputs.push_back(N->getOperand(i));
11953     else
11954       BinOps.push_back(N->getOperand(i));
11955
11956     if (N->getOpcode() == ISD::TRUNCATE)
11957       break;
11958   }
11959
11960   // Visit all inputs, collect all binary operations (and, or, xor and
11961   // select) that are all fed by extensions.
11962   while (!BinOps.empty()) {
11963     SDValue BinOp = BinOps.back();
11964     BinOps.pop_back();
11965
11966     if (!Visited.insert(BinOp.getNode()).second)
11967       continue;
11968
11969     PromOps.push_back(BinOp);
11970
11971     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
11972       // The condition of the select is not promoted.
11973       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
11974         continue;
11975       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
11976         continue;
11977
11978       if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
11979             BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
11980             BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
11981            BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
11982           isa<ConstantSDNode>(BinOp.getOperand(i))) {
11983         Inputs.push_back(BinOp.getOperand(i));
11984       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
11985                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
11986                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
11987                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
11988                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
11989                  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
11990                  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
11991                  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
11992                  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
11993         BinOps.push_back(BinOp.getOperand(i));
11994       } else {
11995         // We have an input that is not an extension or another binary
11996         // operation; we'll abort this transformation.
11997         return SDValue();
11998       }
11999     }
12000   }
12001
12002   // Make sure that this is a self-contained cluster of operations (which
12003   // is not quite the same thing as saying that everything has only one
12004   // use).
12005   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12006     if (isa<ConstantSDNode>(Inputs[i]))
12007       continue;
12008
12009     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
12010                               UE = Inputs[i].getNode()->use_end();
12011          UI != UE; ++UI) {
12012       SDNode *User = *UI;
12013       if (User != N && !Visited.count(User))
12014         return SDValue();
12015
12016       // Make sure that we're not going to promote the non-output-value
12017       // operand(s) or SELECT or SELECT_CC.
12018       // FIXME: Although we could sometimes handle this, and it does occur in
12019       // practice that one of the condition inputs to the select is also one of
12020       // the outputs, we currently can't deal with this.
12021       if (User->getOpcode() == ISD::SELECT) {
12022         if (User->getOperand(0) == Inputs[i])
12023           return SDValue();
12024       } else if (User->getOpcode() == ISD::SELECT_CC) {
12025         if (User->getOperand(0) == Inputs[i] ||
12026             User->getOperand(1) == Inputs[i])
12027           return SDValue();
12028       }
12029     }
12030   }
12031
12032   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
12033     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
12034                               UE = PromOps[i].getNode()->use_end();
12035          UI != UE; ++UI) {
12036       SDNode *User = *UI;
12037       if (User != N && !Visited.count(User))
12038         return SDValue();
12039
12040       // Make sure that we're not going to promote the non-output-value
12041       // operand(s) or SELECT or SELECT_CC.
12042       // FIXME: Although we could sometimes handle this, and it does occur in
12043       // practice that one of the condition inputs to the select is also one of
12044       // the outputs, we currently can't deal with this.
12045       if (User->getOpcode() == ISD::SELECT) {
12046         if (User->getOperand(0) == PromOps[i])
12047           return SDValue();
12048       } else if (User->getOpcode() == ISD::SELECT_CC) {
12049         if (User->getOperand(0) == PromOps[i] ||
12050             User->getOperand(1) == PromOps[i])
12051           return SDValue();
12052       }
12053     }
12054   }
12055
12056   // Replace all inputs with the extension operand.
12057   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12058     // Constants may have users outside the cluster of to-be-promoted nodes,
12059     // and so we need to replace those as we do the promotions.
12060     if (isa<ConstantSDNode>(Inputs[i]))
12061       continue;
12062     else
12063       DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
12064   }
12065
12066   std::list<HandleSDNode> PromOpHandles;
12067   for (auto &PromOp : PromOps)
12068     PromOpHandles.emplace_back(PromOp);
12069
12070   // Replace all operations (these are all the same, but have a different
12071   // (i1) return type). DAG.getNode will validate that the types of
12072   // a binary operator match, so go through the list in reverse so that
12073   // we've likely promoted both operands first. Any intermediate truncations or
12074   // extensions disappear.
12075   while (!PromOpHandles.empty()) {
12076     SDValue PromOp = PromOpHandles.back().getValue();
12077     PromOpHandles.pop_back();
12078
12079     if (PromOp.getOpcode() == ISD::TRUNCATE ||
12080         PromOp.getOpcode() == ISD::SIGN_EXTEND ||
12081         PromOp.getOpcode() == ISD::ZERO_EXTEND ||
12082         PromOp.getOpcode() == ISD::ANY_EXTEND) {
12083       if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
12084           PromOp.getOperand(0).getValueType() != MVT::i1) {
12085         // The operand is not yet ready (see comment below).
12086         PromOpHandles.emplace_front(PromOp);
12087         continue;
12088       }
12089
12090       SDValue RepValue = PromOp.getOperand(0);
12091       if (isa<ConstantSDNode>(RepValue))
12092         RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
12093
12094       DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
12095       continue;
12096     }
12097
12098     unsigned C;
12099     switch (PromOp.getOpcode()) {
12100     default:             C = 0; break;
12101     case ISD::SELECT:    C = 1; break;
12102     case ISD::SELECT_CC: C = 2; break;
12103     }
12104
12105     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
12106          PromOp.getOperand(C).getValueType() != MVT::i1) ||
12107         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
12108          PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
12109       // The to-be-promoted operands of this node have not yet been
12110       // promoted (this should be rare because we're going through the
12111       // list backward, but if one of the operands has several users in
12112       // this cluster of to-be-promoted nodes, it is possible).
12113       PromOpHandles.emplace_front(PromOp);
12114       continue;
12115     }
12116
12117     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
12118                                 PromOp.getNode()->op_end());
12119
12120     // If there are any constant inputs, make sure they're replaced now.
12121     for (unsigned i = 0; i < 2; ++i)
12122       if (isa<ConstantSDNode>(Ops[C+i]))
12123         Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
12124
12125     DAG.ReplaceAllUsesOfValueWith(PromOp,
12126       DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
12127   }
12128
12129   // Now we're left with the initial truncation itself.
12130   if (N->getOpcode() == ISD::TRUNCATE)
12131     return N->getOperand(0);
12132
12133   // Otherwise, this is a comparison. The operands to be compared have just
12134   // changed type (to i1), but everything else is the same.
12135   return SDValue(N, 0);
12136 }
12137
12138 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
12139                                                   DAGCombinerInfo &DCI) const {
12140   SelectionDAG &DAG = DCI.DAG;
12141   SDLoc dl(N);
12142
12143   // If we're tracking CR bits, we need to be careful that we don't have:
12144   //   zext(binary-ops(trunc(x), trunc(y)))
12145   // or
12146   //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
12147   // such that we're unnecessarily moving things into CR bits that can more
12148   // efficiently stay in GPRs. Note that if we're not certain that the high
12149   // bits are set as required by the final extension, we still may need to do
12150   // some masking to get the proper behavior.
12151
12152   // This same functionality is important on PPC64 when dealing with
12153   // 32-to-64-bit extensions; these occur often when 32-bit values are used as
12154   // the return values of functions. Because it is so similar, it is handled
12155   // here as well.
12156
12157   if (N->getValueType(0) != MVT::i32 &&
12158       N->getValueType(0) != MVT::i64)
12159     return SDValue();
12160
12161   if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
12162         (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
12163     return SDValue();
12164
12165   if (N->getOperand(0).getOpcode() != ISD::AND &&
12166       N->getOperand(0).getOpcode() != ISD::OR  &&
12167       N->getOperand(0).getOpcode() != ISD::XOR &&
12168       N->getOperand(0).getOpcode() != ISD::SELECT &&
12169       N->getOperand(0).getOpcode() != ISD::SELECT_CC)
12170     return SDValue();
12171
12172   SmallVector<SDValue, 4> Inputs;
12173   SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
12174   SmallPtrSet<SDNode *, 16> Visited;
12175
12176   // Visit all inputs, collect all binary operations (and, or, xor and
12177   // select) that are all fed by truncations.
12178   while (!BinOps.empty()) {
12179     SDValue BinOp = BinOps.back();
12180     BinOps.pop_back();
12181
12182     if (!Visited.insert(BinOp.getNode()).second)
12183       continue;
12184
12185     PromOps.push_back(BinOp);
12186
12187     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
12188       // The condition of the select is not promoted.
12189       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
12190         continue;
12191       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
12192         continue;
12193
12194       if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
12195           isa<ConstantSDNode>(BinOp.getOperand(i))) {
12196         Inputs.push_back(BinOp.getOperand(i));
12197       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
12198                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
12199                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
12200                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
12201                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
12202         BinOps.push_back(BinOp.getOperand(i));
12203       } else {
12204         // We have an input that is not a truncation or another binary
12205         // operation; we'll abort this transformation.
12206         return SDValue();
12207       }
12208     }
12209   }
12210
12211   // The operands of a select that must be truncated when the select is
12212   // promoted because the operand is actually part of the to-be-promoted set.
12213   DenseMap<SDNode *, EVT> SelectTruncOp[2];
12214
12215   // Make sure that this is a self-contained cluster of operations (which
12216   // is not quite the same thing as saying that everything has only one
12217   // use).
12218   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12219     if (isa<ConstantSDNode>(Inputs[i]))
12220       continue;
12221
12222     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
12223                               UE = Inputs[i].getNode()->use_end();
12224          UI != UE; ++UI) {
12225       SDNode *User = *UI;
12226       if (User != N && !Visited.count(User))
12227         return SDValue();
12228
12229       // If we're going to promote the non-output-value operand(s) or SELECT or
12230       // SELECT_CC, record them for truncation.
12231       if (User->getOpcode() == ISD::SELECT) {
12232         if (User->getOperand(0) == Inputs[i])
12233           SelectTruncOp[0].insert(std::make_pair(User,
12234                                     User->getOperand(0).getValueType()));
12235       } else if (User->getOpcode() == ISD::SELECT_CC) {
12236         if (User->getOperand(0) == Inputs[i])
12237           SelectTruncOp[0].insert(std::make_pair(User,
12238                                     User->getOperand(0).getValueType()));
12239         if (User->getOperand(1) == Inputs[i])
12240           SelectTruncOp[1].insert(std::make_pair(User,
12241                                     User->getOperand(1).getValueType()));
12242       }
12243     }
12244   }
12245
12246   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
12247     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
12248                               UE = PromOps[i].getNode()->use_end();
12249          UI != UE; ++UI) {
12250       SDNode *User = *UI;
12251       if (User != N && !Visited.count(User))
12252         return SDValue();
12253
12254       // If we're going to promote the non-output-value operand(s) or SELECT or
12255       // SELECT_CC, record them for truncation.
12256       if (User->getOpcode() == ISD::SELECT) {
12257         if (User->getOperand(0) == PromOps[i])
12258           SelectTruncOp[0].insert(std::make_pair(User,
12259                                     User->getOperand(0).getValueType()));
12260       } else if (User->getOpcode() == ISD::SELECT_CC) {
12261         if (User->getOperand(0) == PromOps[i])
12262           SelectTruncOp[0].insert(std::make_pair(User,
12263                                     User->getOperand(0).getValueType()));
12264         if (User->getOperand(1) == PromOps[i])
12265           SelectTruncOp[1].insert(std::make_pair(User,
12266                                     User->getOperand(1).getValueType()));
12267       }
12268     }
12269   }
12270
12271   unsigned PromBits = N->getOperand(0).getValueSizeInBits();
12272   bool ReallyNeedsExt = false;
12273   if (N->getOpcode() != ISD::ANY_EXTEND) {
12274     // If all of the inputs are not already sign/zero extended, then
12275     // we'll still need to do that at the end.
12276     for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12277       if (isa<ConstantSDNode>(Inputs[i]))
12278         continue;
12279
12280       unsigned OpBits =
12281         Inputs[i].getOperand(0).getValueSizeInBits();
12282       assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
12283
12284       if ((N->getOpcode() == ISD::ZERO_EXTEND &&
12285            !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
12286                                   APInt::getHighBitsSet(OpBits,
12287                                                         OpBits-PromBits))) ||
12288           (N->getOpcode() == ISD::SIGN_EXTEND &&
12289            DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
12290              (OpBits-(PromBits-1)))) {
12291         ReallyNeedsExt = true;
12292         break;
12293       }
12294     }
12295   }
12296
12297   // Replace all inputs, either with the truncation operand, or a
12298   // truncation or extension to the final output type.
12299   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12300     // Constant inputs need to be replaced with the to-be-promoted nodes that
12301     // use them because they might have users outside of the cluster of
12302     // promoted nodes.
12303     if (isa<ConstantSDNode>(Inputs[i]))
12304       continue;
12305
12306     SDValue InSrc = Inputs[i].getOperand(0);
12307     if (Inputs[i].getValueType() == N->getValueType(0))
12308       DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
12309     else if (N->getOpcode() == ISD::SIGN_EXTEND)
12310       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
12311         DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
12312     else if (N->getOpcode() == ISD::ZERO_EXTEND)
12313       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
12314         DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
12315     else
12316       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
12317         DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
12318   }
12319
12320   std::list<HandleSDNode> PromOpHandles;
12321   for (auto &PromOp : PromOps)
12322     PromOpHandles.emplace_back(PromOp);
12323
12324   // Replace all operations (these are all the same, but have a different
12325   // (promoted) return type). DAG.getNode will validate that the types of
12326   // a binary operator match, so go through the list in reverse so that
12327   // we've likely promoted both operands first.
12328   while (!PromOpHandles.empty()) {
12329     SDValue PromOp = PromOpHandles.back().getValue();
12330     PromOpHandles.pop_back();
12331
12332     unsigned C;
12333     switch (PromOp.getOpcode()) {
12334     default:             C = 0; break;
12335     case ISD::SELECT:    C = 1; break;
12336     case ISD::SELECT_CC: C = 2; break;
12337     }
12338
12339     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
12340          PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
12341         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
12342          PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
12343       // The to-be-promoted operands of this node have not yet been
12344       // promoted (this should be rare because we're going through the
12345       // list backward, but if one of the operands has several users in
12346       // this cluster of to-be-promoted nodes, it is possible).
12347       PromOpHandles.emplace_front(PromOp);
12348       continue;
12349     }
12350
12351     // For SELECT and SELECT_CC nodes, we do a similar check for any
12352     // to-be-promoted comparison inputs.
12353     if (PromOp.getOpcode() == ISD::SELECT ||
12354         PromOp.getOpcode() == ISD::SELECT_CC) {
12355       if ((SelectTruncOp[0].count(PromOp.getNode()) &&
12356            PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
12357           (SelectTruncOp[1].count(PromOp.getNode()) &&
12358            PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
12359         PromOpHandles.emplace_front(PromOp);
12360         continue;
12361       }
12362     }
12363
12364     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
12365                                 PromOp.getNode()->op_end());
12366
12367     // If this node has constant inputs, then they'll need to be promoted here.
12368     for (unsigned i = 0; i < 2; ++i) {
12369       if (!isa<ConstantSDNode>(Ops[C+i]))
12370         continue;
12371       if (Ops[C+i].getValueType() == N->getValueType(0))
12372         continue;
12373
12374       if (N->getOpcode() == ISD::SIGN_EXTEND)
12375         Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
12376       else if (N->getOpcode() == ISD::ZERO_EXTEND)
12377         Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
12378       else
12379         Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
12380     }
12381
12382     // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
12383     // truncate them again to the original value type.
12384     if (PromOp.getOpcode() == ISD::SELECT ||
12385         PromOp.getOpcode() == ISD::SELECT_CC) {
12386       auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
12387       if (SI0 != SelectTruncOp[0].end())
12388         Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
12389       auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
12390       if (SI1 != SelectTruncOp[1].end())
12391         Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
12392     }
12393
12394     DAG.ReplaceAllUsesOfValueWith(PromOp,
12395       DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
12396   }
12397
12398   // Now we're left with the initial extension itself.
12399   if (!ReallyNeedsExt)
12400     return N->getOperand(0);
12401
12402   // To zero extend, just mask off everything except for the first bit (in the
12403   // i1 case).
12404   if (N->getOpcode() == ISD::ZERO_EXTEND)
12405     return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
12406                        DAG.getConstant(APInt::getLowBitsSet(
12407                                          N->getValueSizeInBits(0), PromBits),
12408                                        dl, N->getValueType(0)));
12409
12410   assert(N->getOpcode() == ISD::SIGN_EXTEND &&
12411          "Invalid extension type");
12412   EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
12413   SDValue ShiftCst =
12414       DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
12415   return DAG.getNode(
12416       ISD::SRA, dl, N->getValueType(0),
12417       DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
12418       ShiftCst);
12419 }
12420
12421 SDValue PPCTargetLowering::combineSetCC(SDNode *N,
12422                                         DAGCombinerInfo &DCI) const {
12423   assert(N->getOpcode() == ISD::SETCC &&
12424          "Should be called with a SETCC node");
12425
12426   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
12427   if (CC == ISD::SETNE || CC == ISD::SETEQ) {
12428     SDValue LHS = N->getOperand(0);
12429     SDValue RHS = N->getOperand(1);
12430
12431     // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
12432     if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
12433         LHS.hasOneUse())
12434       std::swap(LHS, RHS);
12435
12436     // x == 0-y --> x+y == 0
12437     // x != 0-y --> x+y != 0
12438     if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
12439         RHS.hasOneUse()) {
12440       SDLoc DL(N);
12441       SelectionDAG &DAG = DCI.DAG;
12442       EVT VT = N->getValueType(0);
12443       EVT OpVT = LHS.getValueType();
12444       SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
12445       return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
12446     }
12447   }
12448
12449   return DAGCombineTruncBoolExt(N, DCI);
12450 }
12451
12452 // Is this an extending load from an f32 to an f64?
12453 static bool isFPExtLoad(SDValue Op) {
12454   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
12455     return LD->getExtensionType() == ISD::EXTLOAD &&
12456       Op.getValueType() == MVT::f64;
12457   return false;
12458 }
12459
12460 /// Reduces the number of fp-to-int conversion when building a vector.
12461 ///
12462 /// If this vector is built out of floating to integer conversions,
12463 /// transform it to a vector built out of floating point values followed by a
12464 /// single floating to integer conversion of the vector.
12465 /// Namely  (build_vector (fptosi $A), (fptosi $B), ...)
12466 /// becomes (fptosi (build_vector ($A, $B, ...)))
12467 SDValue PPCTargetLowering::
12468 combineElementTruncationToVectorTruncation(SDNode *N,
12469                                            DAGCombinerInfo &DCI) const {
12470   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
12471          "Should be called with a BUILD_VECTOR node");
12472
12473   SelectionDAG &DAG = DCI.DAG;
12474   SDLoc dl(N);
12475
12476   SDValue FirstInput = N->getOperand(0);
12477   assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
12478          "The input operand must be an fp-to-int conversion.");
12479
12480   // This combine happens after legalization so the fp_to_[su]i nodes are
12481   // already converted to PPCSISD nodes.
12482   unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
12483   if (FirstConversion == PPCISD::FCTIDZ ||
12484       FirstConversion == PPCISD::FCTIDUZ ||
12485       FirstConversion == PPCISD::FCTIWZ ||
12486       FirstConversion == PPCISD::FCTIWUZ) {
12487     bool IsSplat = true;
12488     bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
12489       FirstConversion == PPCISD::FCTIWUZ;
12490     EVT SrcVT = FirstInput.getOperand(0).getValueType();
12491     SmallVector<SDValue, 4> Ops;
12492     EVT TargetVT = N->getValueType(0);
12493     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
12494       SDValue NextOp = N->getOperand(i);
12495       if (NextOp.getOpcode() != PPCISD::MFVSR)
12496         return SDValue();
12497       unsigned NextConversion = NextOp.getOperand(0).getOpcode();
12498       if (NextConversion != FirstConversion)
12499         return SDValue();
12500       // If we are converting to 32-bit integers, we need to add an FP_ROUND.
12501       // This is not valid if the input was originally double precision. It is
12502       // also not profitable to do unless this is an extending load in which
12503       // case doing this combine will allow us to combine consecutive loads.
12504       if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
12505         return SDValue();
12506       if (N->getOperand(i) != FirstInput)
12507         IsSplat = false;
12508     }
12509
12510     // If this is a splat, we leave it as-is since there will be only a single
12511     // fp-to-int conversion followed by a splat of the integer. This is better
12512     // for 32-bit and smaller ints and neutral for 64-bit ints.
12513     if (IsSplat)
12514       return SDValue();
12515
12516     // Now that we know we have the right type of node, get its operands
12517     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
12518       SDValue In = N->getOperand(i).getOperand(0);
12519       if (Is32Bit) {
12520         // For 32-bit values, we need to add an FP_ROUND node (if we made it
12521         // here, we know that all inputs are extending loads so this is safe).
12522         if (In.isUndef())
12523           Ops.push_back(DAG.getUNDEF(SrcVT));
12524         else {
12525           SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
12526                                       MVT::f32, In.getOperand(0),
12527                                       DAG.getIntPtrConstant(1, dl));
12528           Ops.push_back(Trunc);
12529         }
12530       } else
12531         Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
12532     }
12533
12534     unsigned Opcode;
12535     if (FirstConversion == PPCISD::FCTIDZ ||
12536         FirstConversion == PPCISD::FCTIWZ)
12537       Opcode = ISD::FP_TO_SINT;
12538     else
12539       Opcode = ISD::FP_TO_UINT;
12540
12541     EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
12542     SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
12543     return DAG.getNode(Opcode, dl, TargetVT, BV);
12544   }
12545   return SDValue();
12546 }
12547
12548 /// Reduce the number of loads when building a vector.
12549 ///
12550 /// Building a vector out of multiple loads can be converted to a load
12551 /// of the vector type if the loads are consecutive. If the loads are
12552 /// consecutive but in descending order, a shuffle is added at the end
12553 /// to reorder the vector.
12554 static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
12555   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
12556          "Should be called with a BUILD_VECTOR node");
12557
12558   SDLoc dl(N);
12559
12560   // Return early for non byte-sized type, as they can't be consecutive.
12561   if (!N->getValueType(0).getVectorElementType().isByteSized())
12562     return SDValue();
12563
12564   bool InputsAreConsecutiveLoads = true;
12565   bool InputsAreReverseConsecutive = true;
12566   unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
12567   SDValue FirstInput = N->getOperand(0);
12568   bool IsRoundOfExtLoad = false;
12569
12570   if (FirstInput.getOpcode() == ISD::FP_ROUND &&
12571       FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
12572     LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
12573     IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
12574   }
12575   // Not a build vector of (possibly fp_rounded) loads.
12576   if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
12577       N->getNumOperands() == 1)
12578     return SDValue();
12579
12580   for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
12581     // If any inputs are fp_round(extload), they all must be.
12582     if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
12583       return SDValue();
12584
12585     SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
12586       N->getOperand(i);
12587     if (NextInput.getOpcode() != ISD::LOAD)
12588       return SDValue();
12589
12590     SDValue PreviousInput =
12591       IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
12592     LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
12593     LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
12594
12595     // If any inputs are fp_round(extload), they all must be.
12596     if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
12597       return SDValue();
12598
12599     if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
12600       InputsAreConsecutiveLoads = false;
12601     if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
12602       InputsAreReverseConsecutive = false;
12603
12604     // Exit early if the loads are neither consecutive nor reverse consecutive.
12605     if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
12606       return SDValue();
12607   }
12608
12609   assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
12610          "The loads cannot be both consecutive and reverse consecutive.");
12611
12612   SDValue FirstLoadOp =
12613     IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
12614   SDValue LastLoadOp =
12615     IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
12616                        N->getOperand(N->getNumOperands()-1);
12617
12618   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
12619   LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
12620   if (InputsAreConsecutiveLoads) {
12621     assert(LD1 && "Input needs to be a LoadSDNode.");
12622     return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
12623                        LD1->getBasePtr(), LD1->getPointerInfo(),
12624                        LD1->getAlignment());
12625   }
12626   if (InputsAreReverseConsecutive) {
12627     assert(LDL && "Input needs to be a LoadSDNode.");
12628     SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
12629                                LDL->getBasePtr(), LDL->getPointerInfo(),
12630                                LDL->getAlignment());
12631     SmallVector<int, 16> Ops;
12632     for (int i = N->getNumOperands() - 1; i >= 0; i--)
12633       Ops.push_back(i);
12634
12635     return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
12636                                 DAG.getUNDEF(N->getValueType(0)), Ops);
12637   }
12638   return SDValue();
12639 }
12640
12641 // This function adds the required vector_shuffle needed to get
12642 // the elements of the vector extract in the correct position
12643 // as specified by the CorrectElems encoding.
12644 static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
12645                                       SDValue Input, uint64_t Elems,
12646                                       uint64_t CorrectElems) {
12647   SDLoc dl(N);
12648
12649   unsigned NumElems = Input.getValueType().getVectorNumElements();
12650   SmallVector<int, 16> ShuffleMask(NumElems, -1);
12651
12652   // Knowing the element indices being extracted from the original
12653   // vector and the order in which they're being inserted, just put
12654   // them at element indices required for the instruction.
12655   for (unsigned i = 0; i < N->getNumOperands(); i++) {
12656     if (DAG.getDataLayout().isLittleEndian())
12657       ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
12658     else
12659       ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
12660     CorrectElems = CorrectElems >> 8;
12661     Elems = Elems >> 8;
12662   }
12663
12664   SDValue Shuffle =
12665       DAG.getVectorShuffle(Input.getValueType(), dl, Input,
12666                            DAG.getUNDEF(Input.getValueType()), ShuffleMask);
12667
12668   EVT Ty = N->getValueType(0);
12669   SDValue BV = DAG.getNode(PPCISD::SExtVElems, dl, Ty, Shuffle);
12670   return BV;
12671 }
12672
12673 // Look for build vector patterns where input operands come from sign
12674 // extended vector_extract elements of specific indices. If the correct indices
12675 // aren't used, add a vector shuffle to fix up the indices and create a new
12676 // PPCISD:SExtVElems node which selects the vector sign extend instructions
12677 // during instruction selection.
12678 static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
12679   // This array encodes the indices that the vector sign extend instructions
12680   // extract from when extending from one type to another for both BE and LE.
12681   // The right nibble of each byte corresponds to the LE incides.
12682   // and the left nibble of each byte corresponds to the BE incides.
12683   // For example: 0x3074B8FC  byte->word
12684   // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
12685   // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
12686   // For example: 0x000070F8  byte->double word
12687   // For LE: the allowed indices are: 0x0,0x8
12688   // For BE: the allowed indices are: 0x7,0xF
12689   uint64_t TargetElems[] = {
12690       0x3074B8FC, // b->w
12691       0x000070F8, // b->d
12692       0x10325476, // h->w
12693       0x00003074, // h->d
12694       0x00001032, // w->d
12695   };
12696
12697   uint64_t Elems = 0;
12698   int Index;
12699   SDValue Input;
12700
12701   auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
12702     if (!Op)
12703       return false;
12704     if (Op.getOpcode() != ISD::SIGN_EXTEND &&
12705         Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
12706       return false;
12707
12708     // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
12709     // of the right width.
12710     SDValue Extract = Op.getOperand(0);
12711     if (Extract.getOpcode() == ISD::ANY_EXTEND)
12712       Extract = Extract.getOperand(0);
12713     if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12714       return false;
12715
12716     ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
12717     if (!ExtOp)
12718       return false;
12719
12720     Index = ExtOp->getZExtValue();
12721     if (Input && Input != Extract.getOperand(0))
12722       return false;
12723
12724     if (!Input)
12725       Input = Extract.getOperand(0);
12726
12727     Elems = Elems << 8;
12728     Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
12729     Elems |= Index;
12730
12731     return true;
12732   };
12733
12734   // If the build vector operands aren't sign extended vector extracts,
12735   // of the same input vector, then return.
12736   for (unsigned i = 0; i < N->getNumOperands(); i++) {
12737     if (!isSExtOfVecExtract(N->getOperand(i))) {
12738       return SDValue();
12739     }
12740   }
12741
12742   // If the vector extract indicies are not correct, add the appropriate
12743   // vector_shuffle.
12744   int TgtElemArrayIdx;
12745   int InputSize = Input.getValueType().getScalarSizeInBits();
12746   int OutputSize = N->getValueType(0).getScalarSizeInBits();
12747   if (InputSize + OutputSize == 40)
12748     TgtElemArrayIdx = 0;
12749   else if (InputSize + OutputSize == 72)
12750     TgtElemArrayIdx = 1;
12751   else if (InputSize + OutputSize == 48)
12752     TgtElemArrayIdx = 2;
12753   else if (InputSize + OutputSize == 80)
12754     TgtElemArrayIdx = 3;
12755   else if (InputSize + OutputSize == 96)
12756     TgtElemArrayIdx = 4;
12757   else
12758     return SDValue();
12759
12760   uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
12761   CorrectElems = DAG.getDataLayout().isLittleEndian()
12762                      ? CorrectElems & 0x0F0F0F0F0F0F0F0F
12763                      : CorrectElems & 0xF0F0F0F0F0F0F0F0;
12764   if (Elems != CorrectElems) {
12765     return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
12766   }
12767
12768   // Regular lowering will catch cases where a shuffle is not needed.
12769   return SDValue();
12770 }
12771
12772 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
12773                                                  DAGCombinerInfo &DCI) const {
12774   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
12775          "Should be called with a BUILD_VECTOR node");
12776
12777   SelectionDAG &DAG = DCI.DAG;
12778   SDLoc dl(N);
12779
12780   if (!Subtarget.hasVSX())
12781     return SDValue();
12782
12783   // The target independent DAG combiner will leave a build_vector of
12784   // float-to-int conversions intact. We can generate MUCH better code for
12785   // a float-to-int conversion of a vector of floats.
12786   SDValue FirstInput = N->getOperand(0);
12787   if (FirstInput.getOpcode() == PPCISD::MFVSR) {
12788     SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
12789     if (Reduced)
12790       return Reduced;
12791   }
12792
12793   // If we're building a vector out of consecutive loads, just load that
12794   // vector type.
12795   SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
12796   if (Reduced)
12797     return Reduced;
12798
12799   // If we're building a vector out of extended elements from another vector
12800   // we have P9 vector integer extend instructions. The code assumes legal
12801   // input types (i.e. it can't handle things like v4i16) so do not run before
12802   // legalization.
12803   if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
12804     Reduced = combineBVOfVecSExt(N, DAG);
12805     if (Reduced)
12806       return Reduced;
12807   }
12808
12809
12810   if (N->getValueType(0) != MVT::v2f64)
12811     return SDValue();
12812
12813   // Looking for:
12814   // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
12815   if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
12816       FirstInput.getOpcode() != ISD::UINT_TO_FP)
12817     return SDValue();
12818   if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
12819       N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
12820     return SDValue();
12821   if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
12822     return SDValue();
12823
12824   SDValue Ext1 = FirstInput.getOperand(0);
12825   SDValue Ext2 = N->getOperand(1).getOperand(0);
12826   if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
12827      Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12828     return SDValue();
12829
12830   ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
12831   ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
12832   if (!Ext1Op || !Ext2Op)
12833     return SDValue();
12834   if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
12835       Ext1.getOperand(0) != Ext2.getOperand(0))
12836     return SDValue();
12837
12838   int FirstElem = Ext1Op->getZExtValue();
12839   int SecondElem = Ext2Op->getZExtValue();
12840   int SubvecIdx;
12841   if (FirstElem == 0 && SecondElem == 1)
12842     SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
12843   else if (FirstElem == 2 && SecondElem == 3)
12844     SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
12845   else
12846     return SDValue();
12847
12848   SDValue SrcVec = Ext1.getOperand(0);
12849   auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
12850     PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
12851   return DAG.getNode(NodeType, dl, MVT::v2f64,
12852                      SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
12853 }
12854
12855 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
12856                                               DAGCombinerInfo &DCI) const {
12857   assert((N->getOpcode() == ISD::SINT_TO_FP ||
12858           N->getOpcode() == ISD::UINT_TO_FP) &&
12859          "Need an int -> FP conversion node here");
12860
12861   if (useSoftFloat() || !Subtarget.has64BitSupport())
12862     return SDValue();
12863
12864   SelectionDAG &DAG = DCI.DAG;
12865   SDLoc dl(N);
12866   SDValue Op(N, 0);
12867
12868   // Don't handle ppc_fp128 here or conversions that are out-of-range capable
12869   // from the hardware.
12870   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
12871     return SDValue();
12872   if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
12873       Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
12874     return SDValue();
12875
12876   SDValue FirstOperand(Op.getOperand(0));
12877   bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
12878     (FirstOperand.getValueType() == MVT::i8 ||
12879      FirstOperand.getValueType() == MVT::i16);
12880   if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
12881     bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
12882     bool DstDouble = Op.getValueType() == MVT::f64;
12883     unsigned ConvOp = Signed ?
12884       (DstDouble ? PPCISD::FCFID  : PPCISD::FCFIDS) :
12885       (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
12886     SDValue WidthConst =
12887       DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
12888                             dl, false);
12889     LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
12890     SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
12891     SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
12892                                          DAG.getVTList(MVT::f64, MVT::Other),
12893                                          Ops, MVT::i8, LDN->getMemOperand());
12894
12895     // For signed conversion, we need to sign-extend the value in the VSR
12896     if (Signed) {
12897       SDValue ExtOps[] = { Ld, WidthConst };
12898       SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
12899       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
12900     } else
12901       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
12902   }
12903
12904
12905   // For i32 intermediate values, unfortunately, the conversion functions
12906   // leave the upper 32 bits of the value are undefined. Within the set of
12907   // scalar instructions, we have no method for zero- or sign-extending the
12908   // value. Thus, we cannot handle i32 intermediate values here.
12909   if (Op.getOperand(0).getValueType() == MVT::i32)
12910     return SDValue();
12911
12912   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
12913          "UINT_TO_FP is supported only with FPCVT");
12914
12915   // If we have FCFIDS, then use it when converting to single-precision.
12916   // Otherwise, convert to double-precision and then round.
12917   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
12918                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
12919                                                             : PPCISD::FCFIDS)
12920                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
12921                                                             : PPCISD::FCFID);
12922   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
12923                   ? MVT::f32
12924                   : MVT::f64;
12925
12926   // If we're converting from a float, to an int, and back to a float again,
12927   // then we don't need the store/load pair at all.
12928   if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
12929        Subtarget.hasFPCVT()) ||
12930       (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
12931     SDValue Src = Op.getOperand(0).getOperand(0);
12932     if (Src.getValueType() == MVT::f32) {
12933       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
12934       DCI.AddToWorklist(Src.getNode());
12935     } else if (Src.getValueType() != MVT::f64) {
12936       // Make sure that we don't pick up a ppc_fp128 source value.
12937       return SDValue();
12938     }
12939
12940     unsigned FCTOp =
12941       Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
12942                                                         PPCISD::FCTIDUZ;
12943
12944     SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
12945     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
12946
12947     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
12948       FP = DAG.getNode(ISD::FP_ROUND, dl,
12949                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
12950       DCI.AddToWorklist(FP.getNode());
12951     }
12952
12953     return FP;
12954   }
12955
12956   return SDValue();
12957 }
12958
12959 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
12960 // builtins) into loads with swaps.
12961 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
12962                                               DAGCombinerInfo &DCI) const {
12963   SelectionDAG &DAG = DCI.DAG;
12964   SDLoc dl(N);
12965   SDValue Chain;
12966   SDValue Base;
12967   MachineMemOperand *MMO;
12968
12969   switch (N->getOpcode()) {
12970   default:
12971     llvm_unreachable("Unexpected opcode for little endian VSX load");
12972   case ISD::LOAD: {
12973     LoadSDNode *LD = cast<LoadSDNode>(N);
12974     Chain = LD->getChain();
12975     Base = LD->getBasePtr();
12976     MMO = LD->getMemOperand();
12977     // If the MMO suggests this isn't a load of a full vector, leave
12978     // things alone.  For a built-in, we have to make the change for
12979     // correctness, so if there is a size problem that will be a bug.
12980     if (MMO->getSize() < 16)
12981       return SDValue();
12982     break;
12983   }
12984   case ISD::INTRINSIC_W_CHAIN: {
12985     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
12986     Chain = Intrin->getChain();
12987     // Similarly to the store case below, Intrin->getBasePtr() doesn't get
12988     // us what we want. Get operand 2 instead.
12989     Base = Intrin->getOperand(2);
12990     MMO = Intrin->getMemOperand();
12991     break;
12992   }
12993   }
12994
12995   MVT VecTy = N->getValueType(0).getSimpleVT();
12996
12997   // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
12998   // aligned and the type is a vector with elements up to 4 bytes
12999   if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
13000       && VecTy.getScalarSizeInBits() <= 32 ) {
13001     return SDValue();
13002   }
13003
13004   SDValue LoadOps[] = { Chain, Base };
13005   SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
13006                                          DAG.getVTList(MVT::v2f64, MVT::Other),
13007                                          LoadOps, MVT::v2f64, MMO);
13008
13009   DCI.AddToWorklist(Load.getNode());
13010   Chain = Load.getValue(1);
13011   SDValue Swap = DAG.getNode(
13012       PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
13013   DCI.AddToWorklist(Swap.getNode());
13014
13015   // Add a bitcast if the resulting load type doesn't match v2f64.
13016   if (VecTy != MVT::v2f64) {
13017     SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
13018     DCI.AddToWorklist(N.getNode());
13019     // Package {bitcast value, swap's chain} to match Load's shape.
13020     return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
13021                        N, Swap.getValue(1));
13022   }
13023
13024   return Swap;
13025 }
13026
13027 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
13028 // builtins) into stores with swaps.
13029 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
13030                                                DAGCombinerInfo &DCI) const {
13031   SelectionDAG &DAG = DCI.DAG;
13032   SDLoc dl(N);
13033   SDValue Chain;
13034   SDValue Base;
13035   unsigned SrcOpnd;
13036   MachineMemOperand *MMO;
13037
13038   switch (N->getOpcode()) {
13039   default:
13040     llvm_unreachable("Unexpected opcode for little endian VSX store");
13041   case ISD::STORE: {
13042     StoreSDNode *ST = cast<StoreSDNode>(N);
13043     Chain = ST->getChain();
13044     Base = ST->getBasePtr();
13045     MMO = ST->getMemOperand();
13046     SrcOpnd = 1;
13047     // If the MMO suggests this isn't a store of a full vector, leave
13048     // things alone.  For a built-in, we have to make the change for
13049     // correctness, so if there is a size problem that will be a bug.
13050     if (MMO->getSize() < 16)
13051       return SDValue();
13052     break;
13053   }
13054   case ISD::INTRINSIC_VOID: {
13055     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
13056     Chain = Intrin->getChain();
13057     // Intrin->getBasePtr() oddly does not get what we want.
13058     Base = Intrin->getOperand(3);
13059     MMO = Intrin->getMemOperand();
13060     SrcOpnd = 2;
13061     break;
13062   }
13063   }
13064
13065   SDValue Src = N->getOperand(SrcOpnd);
13066   MVT VecTy = Src.getValueType().getSimpleVT();
13067
13068   // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
13069   // aligned and the type is a vector with elements up to 4 bytes
13070   if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
13071       && VecTy.getScalarSizeInBits() <= 32 ) {
13072     return SDValue();
13073   }
13074
13075   // All stores are done as v2f64 and possible bit cast.
13076   if (VecTy != MVT::v2f64) {
13077     Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
13078     DCI.AddToWorklist(Src.getNode());
13079   }
13080
13081   SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
13082                              DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
13083   DCI.AddToWorklist(Swap.getNode());
13084   Chain = Swap.getValue(1);
13085   SDValue StoreOps[] = { Chain, Swap, Base };
13086   SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
13087                                           DAG.getVTList(MVT::Other),
13088                                           StoreOps, VecTy, MMO);
13089   DCI.AddToWorklist(Store.getNode());
13090   return Store;
13091 }
13092
13093 // Handle DAG combine for STORE (FP_TO_INT F).
13094 SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
13095                                                DAGCombinerInfo &DCI) const {
13096
13097   SelectionDAG &DAG = DCI.DAG;
13098   SDLoc dl(N);
13099   unsigned Opcode = N->getOperand(1).getOpcode();
13100
13101   assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)
13102          && "Not a FP_TO_INT Instruction!");
13103
13104   SDValue Val = N->getOperand(1).getOperand(0);
13105   EVT Op1VT = N->getOperand(1).getValueType();
13106   EVT ResVT = Val.getValueType();
13107
13108   // Floating point types smaller than 32 bits are not legal on Power.
13109   if (ResVT.getScalarSizeInBits() < 32)
13110     return SDValue();
13111
13112   // Only perform combine for conversion to i64/i32 or power9 i16/i8.
13113   bool ValidTypeForStoreFltAsInt =
13114         (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
13115          (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
13116
13117   if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Altivec() ||
13118       cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
13119     return SDValue();
13120
13121   // Extend f32 values to f64
13122   if (ResVT.getScalarSizeInBits() == 32) {
13123     Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
13124     DCI.AddToWorklist(Val.getNode());
13125   }
13126
13127   // Set signed or unsigned conversion opcode.
13128   unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
13129                           PPCISD::FP_TO_SINT_IN_VSR :
13130                           PPCISD::FP_TO_UINT_IN_VSR;
13131
13132   Val = DAG.getNode(ConvOpcode,
13133                     dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
13134   DCI.AddToWorklist(Val.getNode());
13135
13136   // Set number of bytes being converted.
13137   unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
13138   SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
13139                     DAG.getIntPtrConstant(ByteSize, dl, false),
13140                     DAG.getValueType(Op1VT) };
13141
13142   Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
13143           DAG.getVTList(MVT::Other), Ops,
13144           cast<StoreSDNode>(N)->getMemoryVT(),
13145           cast<StoreSDNode>(N)->getMemOperand());
13146
13147   DCI.AddToWorklist(Val.getNode());
13148   return Val;
13149 }
13150
13151 SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
13152                                                 LSBaseSDNode *LSBase,
13153                                                 DAGCombinerInfo &DCI) const {
13154   assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
13155         "Not a reverse memop pattern!");
13156
13157   auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
13158     auto Mask = SVN->getMask();
13159     int i = 0;
13160     auto I = Mask.rbegin();
13161     auto E = Mask.rend();
13162
13163     for (; I != E; ++I) {
13164       if (*I != i)
13165         return false;
13166       i++;
13167     }
13168     return true;
13169   };
13170
13171   SelectionDAG &DAG = DCI.DAG;
13172   EVT VT = SVN->getValueType(0);
13173
13174   if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
13175     return SDValue();
13176
13177   // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
13178   // See comment in PPCVSXSwapRemoval.cpp.
13179   // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
13180   if (!Subtarget.hasP9Vector())
13181     return SDValue();
13182
13183   if(!IsElementReverse(SVN))
13184     return SDValue();
13185
13186   if (LSBase->getOpcode() == ISD::LOAD) {
13187     SDLoc dl(SVN);
13188     SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
13189     return DAG.getMemIntrinsicNode(
13190         PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
13191         LSBase->getMemoryVT(), LSBase->getMemOperand());
13192   }
13193
13194   if (LSBase->getOpcode() == ISD::STORE) {
13195     SDLoc dl(LSBase);
13196     SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
13197                           LSBase->getBasePtr()};
13198     return DAG.getMemIntrinsicNode(
13199         PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
13200         LSBase->getMemoryVT(), LSBase->getMemOperand());
13201   }
13202
13203   llvm_unreachable("Expected a load or store node here");
13204 }
13205
13206 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
13207                                              DAGCombinerInfo &DCI) const {
13208   SelectionDAG &DAG = DCI.DAG;
13209   SDLoc dl(N);
13210   switch (N->getOpcode()) {
13211   default: break;
13212   case ISD::ADD:
13213     return combineADD(N, DCI);
13214   case ISD::SHL:
13215     return combineSHL(N, DCI);
13216   case ISD::SRA:
13217     return combineSRA(N, DCI);
13218   case ISD::SRL:
13219     return combineSRL(N, DCI);
13220   case ISD::MUL:
13221     return combineMUL(N, DCI);
13222   case PPCISD::SHL:
13223     if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
13224         return N->getOperand(0);
13225     break;
13226   case PPCISD::SRL:
13227     if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
13228         return N->getOperand(0);
13229     break;
13230   case PPCISD::SRA:
13231     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
13232       if (C->isNullValue() ||   //  0 >>s V -> 0.
13233           C->isAllOnesValue())    // -1 >>s V -> -1.
13234         return N->getOperand(0);
13235     }
13236     break;
13237   case ISD::SIGN_EXTEND:
13238   case ISD::ZERO_EXTEND:
13239   case ISD::ANY_EXTEND:
13240     return DAGCombineExtBoolTrunc(N, DCI);
13241   case ISD::TRUNCATE:
13242     return combineTRUNCATE(N, DCI);
13243   case ISD::SETCC:
13244     if (SDValue CSCC = combineSetCC(N, DCI))
13245       return CSCC;
13246     LLVM_FALLTHROUGH;
13247   case ISD::SELECT_CC:
13248     return DAGCombineTruncBoolExt(N, DCI);
13249   case ISD::SINT_TO_FP:
13250   case ISD::UINT_TO_FP:
13251     return combineFPToIntToFP(N, DCI);
13252   case ISD::VECTOR_SHUFFLE:
13253     if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
13254       LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
13255       return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
13256     }
13257     break;
13258   case ISD::STORE: {
13259
13260     EVT Op1VT = N->getOperand(1).getValueType();
13261     unsigned Opcode = N->getOperand(1).getOpcode();
13262
13263     if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {
13264       SDValue Val= combineStoreFPToInt(N, DCI);
13265       if (Val)
13266         return Val;
13267     }
13268
13269     if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
13270       ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
13271       SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
13272       if (Val)
13273         return Val;
13274     }
13275
13276     // Turn STORE (BSWAP) -> sthbrx/stwbrx.
13277     if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
13278         N->getOperand(1).getNode()->hasOneUse() &&
13279         (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
13280          (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
13281
13282       // STBRX can only handle simple types and it makes no sense to store less
13283       // two bytes in byte-reversed order.
13284       EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
13285       if (mVT.isExtended() || mVT.getSizeInBits() < 16)
13286         break;
13287
13288       SDValue BSwapOp = N->getOperand(1).getOperand(0);
13289       // Do an any-extend to 32-bits if this is a half-word input.
13290       if (BSwapOp.getValueType() == MVT::i16)
13291         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
13292
13293       // If the type of BSWAP operand is wider than stored memory width
13294       // it need to be shifted to the right side before STBRX.
13295       if (Op1VT.bitsGT(mVT)) {
13296         int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
13297         BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
13298                               DAG.getConstant(Shift, dl, MVT::i32));
13299         // Need to truncate if this is a bswap of i64 stored as i32/i16.
13300         if (Op1VT == MVT::i64)
13301           BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
13302       }
13303
13304       SDValue Ops[] = {
13305         N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
13306       };
13307       return
13308         DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
13309                                 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
13310                                 cast<StoreSDNode>(N)->getMemOperand());
13311     }
13312
13313     // STORE Constant:i32<0>  ->  STORE<trunc to i32> Constant:i64<0>
13314     // So it can increase the chance of CSE constant construction.
13315     if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
13316         isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
13317       // Need to sign-extended to 64-bits to handle negative values.
13318       EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
13319       uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
13320                                     MemVT.getSizeInBits());
13321       SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
13322
13323       // DAG.getTruncStore() can't be used here because it doesn't accept
13324       // the general (base + offset) addressing mode.
13325       // So we use UpdateNodeOperands and setTruncatingStore instead.
13326       DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
13327                              N->getOperand(3));
13328       cast<StoreSDNode>(N)->setTruncatingStore(true);
13329       return SDValue(N, 0);
13330     }
13331
13332     // For little endian, VSX stores require generating xxswapd/lxvd2x.
13333     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
13334     if (Op1VT.isSimple()) {
13335       MVT StoreVT = Op1VT.getSimpleVT();
13336       if (Subtarget.needsSwapsForVSXMemOps() &&
13337           (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
13338            StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
13339         return expandVSXStoreForLE(N, DCI);
13340     }
13341     break;
13342   }
13343   case ISD::LOAD: {
13344     LoadSDNode *LD = cast<LoadSDNode>(N);
13345     EVT VT = LD->getValueType(0);
13346
13347     // For little endian, VSX loads require generating lxvd2x/xxswapd.
13348     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
13349     if (VT.isSimple()) {
13350       MVT LoadVT = VT.getSimpleVT();
13351       if (Subtarget.needsSwapsForVSXMemOps() &&
13352           (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
13353            LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
13354         return expandVSXLoadForLE(N, DCI);
13355     }
13356
13357     // We sometimes end up with a 64-bit integer load, from which we extract
13358     // two single-precision floating-point numbers. This happens with
13359     // std::complex<float>, and other similar structures, because of the way we
13360     // canonicalize structure copies. However, if we lack direct moves,
13361     // then the final bitcasts from the extracted integer values to the
13362     // floating-point numbers turn into store/load pairs. Even with direct moves,
13363     // just loading the two floating-point numbers is likely better.
13364     auto ReplaceTwoFloatLoad = [&]() {
13365       if (VT != MVT::i64)
13366         return false;
13367
13368       if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
13369           LD->isVolatile())
13370         return false;
13371
13372       //  We're looking for a sequence like this:
13373       //  t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
13374       //      t16: i64 = srl t13, Constant:i32<32>
13375       //    t17: i32 = truncate t16
13376       //  t18: f32 = bitcast t17
13377       //    t19: i32 = truncate t13
13378       //  t20: f32 = bitcast t19
13379
13380       if (!LD->hasNUsesOfValue(2, 0))
13381         return false;
13382
13383       auto UI = LD->use_begin();
13384       while (UI.getUse().getResNo() != 0) ++UI;
13385       SDNode *Trunc = *UI++;
13386       while (UI.getUse().getResNo() != 0) ++UI;
13387       SDNode *RightShift = *UI;
13388       if (Trunc->getOpcode() != ISD::TRUNCATE)
13389         std::swap(Trunc, RightShift);
13390
13391       if (Trunc->getOpcode() != ISD::TRUNCATE ||
13392           Trunc->getValueType(0) != MVT::i32 ||
13393           !Trunc->hasOneUse())
13394         return false;
13395       if (RightShift->getOpcode() != ISD::SRL ||
13396           !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
13397           RightShift->getConstantOperandVal(1) != 32 ||
13398           !RightShift->hasOneUse())
13399         return false;
13400
13401       SDNode *Trunc2 = *RightShift->use_begin();
13402       if (Trunc2->getOpcode() != ISD::TRUNCATE ||
13403           Trunc2->getValueType(0) != MVT::i32 ||
13404           !Trunc2->hasOneUse())
13405         return false;
13406
13407       SDNode *Bitcast = *Trunc->use_begin();
13408       SDNode *Bitcast2 = *Trunc2->use_begin();
13409
13410       if (Bitcast->getOpcode() != ISD::BITCAST ||
13411           Bitcast->getValueType(0) != MVT::f32)
13412         return false;
13413       if (Bitcast2->getOpcode() != ISD::BITCAST ||
13414           Bitcast2->getValueType(0) != MVT::f32)
13415         return false;
13416
13417       if (Subtarget.isLittleEndian())
13418         std::swap(Bitcast, Bitcast2);
13419
13420       // Bitcast has the second float (in memory-layout order) and Bitcast2
13421       // has the first one.
13422
13423       SDValue BasePtr = LD->getBasePtr();
13424       if (LD->isIndexed()) {
13425         assert(LD->getAddressingMode() == ISD::PRE_INC &&
13426                "Non-pre-inc AM on PPC?");
13427         BasePtr =
13428           DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
13429                       LD->getOffset());
13430       }
13431
13432       auto MMOFlags =
13433           LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
13434       SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
13435                                       LD->getPointerInfo(), LD->getAlignment(),
13436                                       MMOFlags, LD->getAAInfo());
13437       SDValue AddPtr =
13438         DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
13439                     BasePtr, DAG.getIntPtrConstant(4, dl));
13440       SDValue FloatLoad2 = DAG.getLoad(
13441           MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
13442           LD->getPointerInfo().getWithOffset(4),
13443           MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
13444
13445       if (LD->isIndexed()) {
13446         // Note that DAGCombine should re-form any pre-increment load(s) from
13447         // what is produced here if that makes sense.
13448         DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
13449       }
13450
13451       DCI.CombineTo(Bitcast2, FloatLoad);
13452       DCI.CombineTo(Bitcast, FloatLoad2);
13453
13454       DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
13455                                     SDValue(FloatLoad2.getNode(), 1));
13456       return true;
13457     };
13458
13459     if (ReplaceTwoFloatLoad())
13460       return SDValue(N, 0);
13461
13462     EVT MemVT = LD->getMemoryVT();
13463     Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
13464     unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
13465     Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
13466     unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy);
13467     if (LD->isUnindexed() && VT.isVector() &&
13468         ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
13469           // P8 and later hardware should just use LOAD.
13470           !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
13471                                        VT == MVT::v4i32 || VT == MVT::v4f32)) ||
13472          (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
13473           LD->getAlignment() >= ScalarABIAlignment)) &&
13474         LD->getAlignment() < ABIAlignment) {
13475       // This is a type-legal unaligned Altivec or QPX load.
13476       SDValue Chain = LD->getChain();
13477       SDValue Ptr = LD->getBasePtr();
13478       bool isLittleEndian = Subtarget.isLittleEndian();
13479
13480       // This implements the loading of unaligned vectors as described in
13481       // the venerable Apple Velocity Engine overview. Specifically:
13482       // https://developer.apple.com/hardwaredrivers/ve/alignment.html
13483       // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
13484       //
13485       // The general idea is to expand a sequence of one or more unaligned
13486       // loads into an alignment-based permutation-control instruction (lvsl
13487       // or lvsr), a series of regular vector loads (which always truncate
13488       // their input address to an aligned address), and a series of
13489       // permutations.  The results of these permutations are the requested
13490       // loaded values.  The trick is that the last "extra" load is not taken
13491       // from the address you might suspect (sizeof(vector) bytes after the
13492       // last requested load), but rather sizeof(vector) - 1 bytes after the
13493       // last requested vector. The point of this is to avoid a page fault if
13494       // the base address happened to be aligned. This works because if the
13495       // base address is aligned, then adding less than a full vector length
13496       // will cause the last vector in the sequence to be (re)loaded.
13497       // Otherwise, the next vector will be fetched as you might suspect was
13498       // necessary.
13499
13500       // We might be able to reuse the permutation generation from
13501       // a different base address offset from this one by an aligned amount.
13502       // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
13503       // optimization later.
13504       Intrinsic::ID Intr, IntrLD, IntrPerm;
13505       MVT PermCntlTy, PermTy, LDTy;
13506       if (Subtarget.hasAltivec()) {
13507         Intr = isLittleEndian ?  Intrinsic::ppc_altivec_lvsr :
13508                                  Intrinsic::ppc_altivec_lvsl;
13509         IntrLD = Intrinsic::ppc_altivec_lvx;
13510         IntrPerm = Intrinsic::ppc_altivec_vperm;
13511         PermCntlTy = MVT::v16i8;
13512         PermTy = MVT::v4i32;
13513         LDTy = MVT::v4i32;
13514       } else {
13515         Intr =   MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
13516                                        Intrinsic::ppc_qpx_qvlpcls;
13517         IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
13518                                        Intrinsic::ppc_qpx_qvlfs;
13519         IntrPerm = Intrinsic::ppc_qpx_qvfperm;
13520         PermCntlTy = MVT::v4f64;
13521         PermTy = MVT::v4f64;
13522         LDTy = MemVT.getSimpleVT();
13523       }
13524
13525       SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
13526
13527       // Create the new MMO for the new base load. It is like the original MMO,
13528       // but represents an area in memory almost twice the vector size centered
13529       // on the original address. If the address is unaligned, we might start
13530       // reading up to (sizeof(vector)-1) bytes below the address of the
13531       // original unaligned load.
13532       MachineFunction &MF = DAG.getMachineFunction();
13533       MachineMemOperand *BaseMMO =
13534         MF.getMachineMemOperand(LD->getMemOperand(),
13535                                 -(long)MemVT.getStoreSize()+1,
13536                                 2*MemVT.getStoreSize()-1);
13537
13538       // Create the new base load.
13539       SDValue LDXIntID =
13540           DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
13541       SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
13542       SDValue BaseLoad =
13543         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
13544                                 DAG.getVTList(PermTy, MVT::Other),
13545                                 BaseLoadOps, LDTy, BaseMMO);
13546
13547       // Note that the value of IncOffset (which is provided to the next
13548       // load's pointer info offset value, and thus used to calculate the
13549       // alignment), and the value of IncValue (which is actually used to
13550       // increment the pointer value) are different! This is because we
13551       // require the next load to appear to be aligned, even though it
13552       // is actually offset from the base pointer by a lesser amount.
13553       int IncOffset = VT.getSizeInBits() / 8;
13554       int IncValue = IncOffset;
13555
13556       // Walk (both up and down) the chain looking for another load at the real
13557       // (aligned) offset (the alignment of the other load does not matter in
13558       // this case). If found, then do not use the offset reduction trick, as
13559       // that will prevent the loads from being later combined (as they would
13560       // otherwise be duplicates).
13561       if (!findConsecutiveLoad(LD, DAG))
13562         --IncValue;
13563
13564       SDValue Increment =
13565           DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
13566       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
13567
13568       MachineMemOperand *ExtraMMO =
13569         MF.getMachineMemOperand(LD->getMemOperand(),
13570                                 1, 2*MemVT.getStoreSize()-1);
13571       SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
13572       SDValue ExtraLoad =
13573         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
13574                                 DAG.getVTList(PermTy, MVT::Other),
13575                                 ExtraLoadOps, LDTy, ExtraMMO);
13576
13577       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
13578         BaseLoad.getValue(1), ExtraLoad.getValue(1));
13579
13580       // Because vperm has a big-endian bias, we must reverse the order
13581       // of the input vectors and complement the permute control vector
13582       // when generating little endian code.  We have already handled the
13583       // latter by using lvsr instead of lvsl, so just reverse BaseLoad
13584       // and ExtraLoad here.
13585       SDValue Perm;
13586       if (isLittleEndian)
13587         Perm = BuildIntrinsicOp(IntrPerm,
13588                                 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
13589       else
13590         Perm = BuildIntrinsicOp(IntrPerm,
13591                                 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
13592
13593       if (VT != PermTy)
13594         Perm = Subtarget.hasAltivec() ?
13595                  DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
13596                  DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
13597                                DAG.getTargetConstant(1, dl, MVT::i64));
13598                                // second argument is 1 because this rounding
13599                                // is always exact.
13600
13601       // The output of the permutation is our loaded result, the TokenFactor is
13602       // our new chain.
13603       DCI.CombineTo(N, Perm, TF);
13604       return SDValue(N, 0);
13605     }
13606     }
13607     break;
13608     case ISD::INTRINSIC_WO_CHAIN: {
13609       bool isLittleEndian = Subtarget.isLittleEndian();
13610       unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
13611       Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
13612                                            : Intrinsic::ppc_altivec_lvsl);
13613       if ((IID == Intr ||
13614            IID == Intrinsic::ppc_qpx_qvlpcld  ||
13615            IID == Intrinsic::ppc_qpx_qvlpcls) &&
13616         N->getOperand(1)->getOpcode() == ISD::ADD) {
13617         SDValue Add = N->getOperand(1);
13618
13619         int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
13620                    5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
13621
13622         if (DAG.MaskedValueIsZero(Add->getOperand(1),
13623                                   APInt::getAllOnesValue(Bits /* alignment */)
13624                                       .zext(Add.getScalarValueSizeInBits()))) {
13625           SDNode *BasePtr = Add->getOperand(0).getNode();
13626           for (SDNode::use_iterator UI = BasePtr->use_begin(),
13627                                     UE = BasePtr->use_end();
13628                UI != UE; ++UI) {
13629             if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
13630                 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
13631               // We've found another LVSL/LVSR, and this address is an aligned
13632               // multiple of that one. The results will be the same, so use the
13633               // one we've just found instead.
13634
13635               return SDValue(*UI, 0);
13636             }
13637           }
13638         }
13639
13640         if (isa<ConstantSDNode>(Add->getOperand(1))) {
13641           SDNode *BasePtr = Add->getOperand(0).getNode();
13642           for (SDNode::use_iterator UI = BasePtr->use_begin(),
13643                UE = BasePtr->use_end(); UI != UE; ++UI) {
13644             if (UI->getOpcode() == ISD::ADD &&
13645                 isa<ConstantSDNode>(UI->getOperand(1)) &&
13646                 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
13647                  cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
13648                 (1ULL << Bits) == 0) {
13649               SDNode *OtherAdd = *UI;
13650               for (SDNode::use_iterator VI = OtherAdd->use_begin(),
13651                    VE = OtherAdd->use_end(); VI != VE; ++VI) {
13652                 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
13653                     cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
13654                   return SDValue(*VI, 0);
13655                 }
13656               }
13657             }
13658           }
13659         }
13660       }
13661
13662       // Combine vmaxsw/h/b(a, a's negation) to abs(a)
13663       // Expose the vabsduw/h/b opportunity for down stream
13664       if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
13665           (IID == Intrinsic::ppc_altivec_vmaxsw ||
13666            IID == Intrinsic::ppc_altivec_vmaxsh ||
13667            IID == Intrinsic::ppc_altivec_vmaxsb)) {
13668         SDValue V1 = N->getOperand(1);
13669         SDValue V2 = N->getOperand(2);
13670         if ((V1.getSimpleValueType() == MVT::v4i32 ||
13671              V1.getSimpleValueType() == MVT::v8i16 ||
13672              V1.getSimpleValueType() == MVT::v16i8) &&
13673             V1.getSimpleValueType() == V2.getSimpleValueType()) {
13674           // (0-a, a)
13675           if (V1.getOpcode() == ISD::SUB &&
13676               ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
13677               V1.getOperand(1) == V2) {
13678             return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
13679           }
13680           // (a, 0-a)
13681           if (V2.getOpcode() == ISD::SUB &&
13682               ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
13683               V2.getOperand(1) == V1) {
13684             return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
13685           }
13686           // (x-y, y-x)
13687           if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
13688               V1.getOperand(0) == V2.getOperand(1) &&
13689               V1.getOperand(1) == V2.getOperand(0)) {
13690             return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
13691           }
13692         }
13693       }
13694     }
13695
13696     break;
13697   case ISD::INTRINSIC_W_CHAIN:
13698     // For little endian, VSX loads require generating lxvd2x/xxswapd.
13699     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
13700     if (Subtarget.needsSwapsForVSXMemOps()) {
13701       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13702       default:
13703         break;
13704       case Intrinsic::ppc_vsx_lxvw4x:
13705       case Intrinsic::ppc_vsx_lxvd2x:
13706         return expandVSXLoadForLE(N, DCI);
13707       }
13708     }
13709     break;
13710   case ISD::INTRINSIC_VOID:
13711     // For little endian, VSX stores require generating xxswapd/stxvd2x.
13712     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
13713     if (Subtarget.needsSwapsForVSXMemOps()) {
13714       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13715       default:
13716         break;
13717       case Intrinsic::ppc_vsx_stxvw4x:
13718       case Intrinsic::ppc_vsx_stxvd2x:
13719         return expandVSXStoreForLE(N, DCI);
13720       }
13721     }
13722     break;
13723   case ISD::BSWAP:
13724     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
13725     if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
13726         N->getOperand(0).hasOneUse() &&
13727         (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
13728          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
13729           N->getValueType(0) == MVT::i64))) {
13730       SDValue Load = N->getOperand(0);
13731       LoadSDNode *LD = cast<LoadSDNode>(Load);
13732       // Create the byte-swapping load.
13733       SDValue Ops[] = {
13734         LD->getChain(),    // Chain
13735         LD->getBasePtr(),  // Ptr
13736         DAG.getValueType(N->getValueType(0)) // VT
13737       };
13738       SDValue BSLoad =
13739         DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
13740                                 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
13741                                               MVT::i64 : MVT::i32, MVT::Other),
13742                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
13743
13744       // If this is an i16 load, insert the truncate.
13745       SDValue ResVal = BSLoad;
13746       if (N->getValueType(0) == MVT::i16)
13747         ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
13748
13749       // First, combine the bswap away.  This makes the value produced by the
13750       // load dead.
13751       DCI.CombineTo(N, ResVal);
13752
13753       // Next, combine the load away, we give it a bogus result value but a real
13754       // chain result.  The result value is dead because the bswap is dead.
13755       DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
13756
13757       // Return N so it doesn't get rechecked!
13758       return SDValue(N, 0);
13759     }
13760     break;
13761   case PPCISD::VCMP:
13762     // If a VCMPo node already exists with exactly the same operands as this
13763     // node, use its result instead of this node (VCMPo computes both a CR6 and
13764     // a normal output).
13765     //
13766     if (!N->getOperand(0).hasOneUse() &&
13767         !N->getOperand(1).hasOneUse() &&
13768         !N->getOperand(2).hasOneUse()) {
13769
13770       // Scan all of the users of the LHS, looking for VCMPo's that match.
13771       SDNode *VCMPoNode = nullptr;
13772
13773       SDNode *LHSN = N->getOperand(0).getNode();
13774       for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
13775            UI != E; ++UI)
13776         if (UI->getOpcode() == PPCISD::VCMPo &&
13777             UI->getOperand(1) == N->getOperand(1) &&
13778             UI->getOperand(2) == N->getOperand(2) &&
13779             UI->getOperand(0) == N->getOperand(0)) {
13780           VCMPoNode = *UI;
13781           break;
13782         }
13783
13784       // If there is no VCMPo node, or if the flag value has a single use, don't
13785       // transform this.
13786       if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
13787         break;
13788
13789       // Look at the (necessarily single) use of the flag value.  If it has a
13790       // chain, this transformation is more complex.  Note that multiple things
13791       // could use the value result, which we should ignore.
13792       SDNode *FlagUser = nullptr;
13793       for (SDNode::use_iterator UI = VCMPoNode->use_begin();
13794            FlagUser == nullptr; ++UI) {
13795         assert(UI != VCMPoNode->use_end() && "Didn't find user!");
13796         SDNode *User = *UI;
13797         for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
13798           if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
13799             FlagUser = User;
13800             break;
13801           }
13802         }
13803       }
13804
13805       // If the user is a MFOCRF instruction, we know this is safe.
13806       // Otherwise we give up for right now.
13807       if (FlagUser->getOpcode() == PPCISD::MFOCRF)
13808         return SDValue(VCMPoNode, 0);
13809     }
13810     break;
13811   case ISD::BRCOND: {
13812     SDValue Cond = N->getOperand(1);
13813     SDValue Target = N->getOperand(2);
13814
13815     if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
13816         cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
13817           Intrinsic::loop_decrement) {
13818
13819       // We now need to make the intrinsic dead (it cannot be instruction
13820       // selected).
13821       DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
13822       assert(Cond.getNode()->hasOneUse() &&
13823              "Counter decrement has more than one use");
13824
13825       return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
13826                          N->getOperand(0), Target);
13827     }
13828   }
13829   break;
13830   case ISD::BR_CC: {
13831     // If this is a branch on an altivec predicate comparison, lower this so
13832     // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
13833     // lowering is done pre-legalize, because the legalizer lowers the predicate
13834     // compare down to code that is difficult to reassemble.
13835     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
13836     SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
13837
13838     // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
13839     // value. If so, pass-through the AND to get to the intrinsic.
13840     if (LHS.getOpcode() == ISD::AND &&
13841         LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
13842         cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
13843           Intrinsic::loop_decrement &&
13844         isa<ConstantSDNode>(LHS.getOperand(1)) &&
13845         !isNullConstant(LHS.getOperand(1)))
13846       LHS = LHS.getOperand(0);
13847
13848     if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
13849         cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
13850           Intrinsic::loop_decrement &&
13851         isa<ConstantSDNode>(RHS)) {
13852       assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
13853              "Counter decrement comparison is not EQ or NE");
13854
13855       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
13856       bool isBDNZ = (CC == ISD::SETEQ && Val) ||
13857                     (CC == ISD::SETNE && !Val);
13858
13859       // We now need to make the intrinsic dead (it cannot be instruction
13860       // selected).
13861       DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
13862       assert(LHS.getNode()->hasOneUse() &&
13863              "Counter decrement has more than one use");
13864
13865       return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
13866                          N->getOperand(0), N->getOperand(4));
13867     }
13868
13869     int CompareOpc;
13870     bool isDot;
13871
13872     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
13873         isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
13874         getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
13875       assert(isDot && "Can't compare against a vector result!");
13876
13877       // If this is a comparison against something other than 0/1, then we know
13878       // that the condition is never/always true.
13879       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
13880       if (Val != 0 && Val != 1) {
13881         if (CC == ISD::SETEQ)      // Cond never true, remove branch.
13882           return N->getOperand(0);
13883         // Always !=, turn it into an unconditional branch.
13884         return DAG.getNode(ISD::BR, dl, MVT::Other,
13885                            N->getOperand(0), N->getOperand(4));
13886       }
13887
13888       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
13889
13890       // Create the PPCISD altivec 'dot' comparison node.
13891       SDValue Ops[] = {
13892         LHS.getOperand(2),  // LHS of compare
13893         LHS.getOperand(3),  // RHS of compare
13894         DAG.getConstant(CompareOpc, dl, MVT::i32)
13895       };
13896       EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
13897       SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
13898
13899       // Unpack the result based on how the target uses it.
13900       PPC::Predicate CompOpc;
13901       switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
13902       default:  // Can't happen, don't crash on invalid number though.
13903       case 0:   // Branch on the value of the EQ bit of CR6.
13904         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
13905         break;
13906       case 1:   // Branch on the inverted value of the EQ bit of CR6.
13907         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
13908         break;
13909       case 2:   // Branch on the value of the LT bit of CR6.
13910         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
13911         break;
13912       case 3:   // Branch on the inverted value of the LT bit of CR6.
13913         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
13914         break;
13915       }
13916
13917       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
13918                          DAG.getConstant(CompOpc, dl, MVT::i32),
13919                          DAG.getRegister(PPC::CR6, MVT::i32),
13920                          N->getOperand(4), CompNode.getValue(1));
13921     }
13922     break;
13923   }
13924   case ISD::BUILD_VECTOR:
13925     return DAGCombineBuildVector(N, DCI);
13926   case ISD::ABS:
13927     return combineABS(N, DCI);
13928   case ISD::VSELECT:
13929     return combineVSelect(N, DCI);
13930   }
13931
13932   return SDValue();
13933 }
13934
13935 SDValue
13936 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
13937                                  SelectionDAG &DAG,
13938                                  SmallVectorImpl<SDNode *> &Created) const {
13939   // fold (sdiv X, pow2)
13940   EVT VT = N->getValueType(0);
13941   if (VT == MVT::i64 && !Subtarget.isPPC64())
13942     return SDValue();
13943   if ((VT != MVT::i32 && VT != MVT::i64) ||
13944       !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
13945     return SDValue();
13946
13947   SDLoc DL(N);
13948   SDValue N0 = N->getOperand(0);
13949
13950   bool IsNegPow2 = (-Divisor).isPowerOf2();
13951   unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
13952   SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
13953
13954   SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
13955   Created.push_back(Op.getNode());
13956
13957   if (IsNegPow2) {
13958     Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
13959     Created.push_back(Op.getNode());
13960   }
13961
13962   return Op;
13963 }
13964
13965 //===----------------------------------------------------------------------===//
13966 // Inline Assembly Support
13967 //===----------------------------------------------------------------------===//
13968
13969 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
13970                                                       KnownBits &Known,
13971                                                       const APInt &DemandedElts,
13972                                                       const SelectionDAG &DAG,
13973                                                       unsigned Depth) const {
13974   Known.resetAll();
13975   switch (Op.getOpcode()) {
13976   default: break;
13977   case PPCISD::LBRX: {
13978     // lhbrx is known to have the top bits cleared out.
13979     if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
13980       Known.Zero = 0xFFFF0000;
13981     break;
13982   }
13983   case ISD::INTRINSIC_WO_CHAIN: {
13984     switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
13985     default: break;
13986     case Intrinsic::ppc_altivec_vcmpbfp_p:
13987     case Intrinsic::ppc_altivec_vcmpeqfp_p:
13988     case Intrinsic::ppc_altivec_vcmpequb_p:
13989     case Intrinsic::ppc_altivec_vcmpequh_p:
13990     case Intrinsic::ppc_altivec_vcmpequw_p:
13991     case Intrinsic::ppc_altivec_vcmpequd_p:
13992     case Intrinsic::ppc_altivec_vcmpgefp_p:
13993     case Intrinsic::ppc_altivec_vcmpgtfp_p:
13994     case Intrinsic::ppc_altivec_vcmpgtsb_p:
13995     case Intrinsic::ppc_altivec_vcmpgtsh_p:
13996     case Intrinsic::ppc_altivec_vcmpgtsw_p:
13997     case Intrinsic::ppc_altivec_vcmpgtsd_p:
13998     case Intrinsic::ppc_altivec_vcmpgtub_p:
13999     case Intrinsic::ppc_altivec_vcmpgtuh_p:
14000     case Intrinsic::ppc_altivec_vcmpgtuw_p:
14001     case Intrinsic::ppc_altivec_vcmpgtud_p:
14002       Known.Zero = ~1U;  // All bits but the low one are known to be zero.
14003       break;
14004     }
14005   }
14006   }
14007 }
14008
14009 unsigned PPCTargetLowering::getPrefLoopLogAlignment(MachineLoop *ML) const {
14010   switch (Subtarget.getDarwinDirective()) {
14011   default: break;
14012   case PPC::DIR_970:
14013   case PPC::DIR_PWR4:
14014   case PPC::DIR_PWR5:
14015   case PPC::DIR_PWR5X:
14016   case PPC::DIR_PWR6:
14017   case PPC::DIR_PWR6X:
14018   case PPC::DIR_PWR7:
14019   case PPC::DIR_PWR8:
14020   case PPC::DIR_PWR9: {
14021     if (!ML)
14022       break;
14023
14024     if (!DisableInnermostLoopAlign32) {
14025       // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
14026       // so that we can decrease cache misses and branch-prediction misses.
14027       // Actual alignment of the loop will depend on the hotness check and other
14028       // logic in alignBlocks.
14029       if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
14030         return 5;
14031     }
14032
14033     const PPCInstrInfo *TII = Subtarget.getInstrInfo();
14034
14035     // For small loops (between 5 and 8 instructions), align to a 32-byte
14036     // boundary so that the entire loop fits in one instruction-cache line.
14037     uint64_t LoopSize = 0;
14038     for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
14039       for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
14040         LoopSize += TII->getInstSizeInBytes(*J);
14041         if (LoopSize > 32)
14042           break;
14043       }
14044
14045     if (LoopSize > 16 && LoopSize <= 32)
14046       return 5;
14047
14048     break;
14049   }
14050   }
14051
14052   return TargetLowering::getPrefLoopLogAlignment(ML);
14053 }
14054
14055 /// getConstraintType - Given a constraint, return the type of
14056 /// constraint it is for this target.
14057 PPCTargetLowering::ConstraintType
14058 PPCTargetLowering::getConstraintType(StringRef Constraint) const {
14059   if (Constraint.size() == 1) {
14060     switch (Constraint[0]) {
14061     default: break;
14062     case 'b':
14063     case 'r':
14064     case 'f':
14065     case 'd':
14066     case 'v':
14067     case 'y':
14068       return C_RegisterClass;
14069     case 'Z':
14070       // FIXME: While Z does indicate a memory constraint, it specifically
14071       // indicates an r+r address (used in conjunction with the 'y' modifier
14072       // in the replacement string). Currently, we're forcing the base
14073       // register to be r0 in the asm printer (which is interpreted as zero)
14074       // and forming the complete address in the second register. This is
14075       // suboptimal.
14076       return C_Memory;
14077     }
14078   } else if (Constraint == "wc") { // individual CR bits.
14079     return C_RegisterClass;
14080   } else if (Constraint == "wa" || Constraint == "wd" ||
14081              Constraint == "wf" || Constraint == "ws" ||
14082              Constraint == "wi" || Constraint == "ww") {
14083     return C_RegisterClass; // VSX registers.
14084   }
14085   return TargetLowering::getConstraintType(Constraint);
14086 }
14087
14088 /// Examine constraint type and operand type and determine a weight value.
14089 /// This object must already have been set up with the operand type
14090 /// and the current alternative constraint selected.
14091 TargetLowering::ConstraintWeight
14092 PPCTargetLowering::getSingleConstraintMatchWeight(
14093     AsmOperandInfo &info, const char *constraint) const {
14094   ConstraintWeight weight = CW_Invalid;
14095   Value *CallOperandVal = info.CallOperandVal;
14096     // If we don't have a value, we can't do a match,
14097     // but allow it at the lowest weight.
14098   if (!CallOperandVal)
14099     return CW_Default;
14100   Type *type = CallOperandVal->getType();
14101
14102   // Look at the constraint type.
14103   if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
14104     return CW_Register; // an individual CR bit.
14105   else if ((StringRef(constraint) == "wa" ||
14106             StringRef(constraint) == "wd" ||
14107             StringRef(constraint) == "wf") &&
14108            type->isVectorTy())
14109     return CW_Register;
14110   else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
14111     return CW_Register; // just hold 64-bit integers data.
14112   else if (StringRef(constraint) == "ws" && type->isDoubleTy())
14113     return CW_Register;
14114   else if (StringRef(constraint) == "ww" && type->isFloatTy())
14115     return CW_Register;
14116
14117   switch (*constraint) {
14118   default:
14119     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
14120     break;
14121   case 'b':
14122     if (type->isIntegerTy())
14123       weight = CW_Register;
14124     break;
14125   case 'f':
14126     if (type->isFloatTy())
14127       weight = CW_Register;
14128     break;
14129   case 'd':
14130     if (type->isDoubleTy())
14131       weight = CW_Register;
14132     break;
14133   case 'v':
14134     if (type->isVectorTy())
14135       weight = CW_Register;
14136     break;
14137   case 'y':
14138     weight = CW_Register;
14139     break;
14140   case 'Z':
14141     weight = CW_Memory;
14142     break;
14143   }
14144   return weight;
14145 }
14146
14147 std::pair<unsigned, const TargetRegisterClass *>
14148 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
14149                                                 StringRef Constraint,
14150                                                 MVT VT) const {
14151   if (Constraint.size() == 1) {
14152     // GCC RS6000 Constraint Letters
14153     switch (Constraint[0]) {
14154     case 'b':   // R1-R31
14155       if (VT == MVT::i64 && Subtarget.isPPC64())
14156         return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
14157       return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
14158     case 'r':   // R0-R31
14159       if (VT == MVT::i64 && Subtarget.isPPC64())
14160         return std::make_pair(0U, &PPC::G8RCRegClass);
14161       return std::make_pair(0U, &PPC::GPRCRegClass);
14162     // 'd' and 'f' constraints are both defined to be "the floating point
14163     // registers", where one is for 32-bit and the other for 64-bit. We don't
14164     // really care overly much here so just give them all the same reg classes.
14165     case 'd':
14166     case 'f':
14167       if (Subtarget.hasSPE()) {
14168         if (VT == MVT::f32 || VT == MVT::i32)
14169           return std::make_pair(0U, &PPC::SPE4RCRegClass);
14170         if (VT == MVT::f64 || VT == MVT::i64)
14171           return std::make_pair(0U, &PPC::SPERCRegClass);
14172       } else {
14173         if (VT == MVT::f32 || VT == MVT::i32)
14174           return std::make_pair(0U, &PPC::F4RCRegClass);
14175         if (VT == MVT::f64 || VT == MVT::i64)
14176           return std::make_pair(0U, &PPC::F8RCRegClass);
14177         if (VT == MVT::v4f64 && Subtarget.hasQPX())
14178           return std::make_pair(0U, &PPC::QFRCRegClass);
14179         if (VT == MVT::v4f32 && Subtarget.hasQPX())
14180           return std::make_pair(0U, &PPC::QSRCRegClass);
14181       }
14182       break;
14183     case 'v':
14184       if (VT == MVT::v4f64 && Subtarget.hasQPX())
14185         return std::make_pair(0U, &PPC::QFRCRegClass);
14186       if (VT == MVT::v4f32 && Subtarget.hasQPX())
14187         return std::make_pair(0U, &PPC::QSRCRegClass);
14188       if (Subtarget.hasAltivec())
14189         return std::make_pair(0U, &PPC::VRRCRegClass);
14190       break;
14191     case 'y':   // crrc
14192       return std::make_pair(0U, &PPC::CRRCRegClass);
14193     }
14194   } else if (Constraint == "wc" && Subtarget.useCRBits()) {
14195     // An individual CR bit.
14196     return std::make_pair(0U, &PPC::CRBITRCRegClass);
14197   } else if ((Constraint == "wa" || Constraint == "wd" ||
14198              Constraint == "wf" || Constraint == "wi") &&
14199              Subtarget.hasVSX()) {
14200     return std::make_pair(0U, &PPC::VSRCRegClass);
14201   } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
14202     if (VT == MVT::f32 && Subtarget.hasP8Vector())
14203       return std::make_pair(0U, &PPC::VSSRCRegClass);
14204     else
14205       return std::make_pair(0U, &PPC::VSFRCRegClass);
14206   }
14207
14208   std::pair<unsigned, const TargetRegisterClass *> R =
14209       TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
14210
14211   // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
14212   // (which we call X[0-9]+). If a 64-bit value has been requested, and a
14213   // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
14214   // register.
14215   // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
14216   // the AsmName field from *RegisterInfo.td, then this would not be necessary.
14217   if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
14218       PPC::GPRCRegClass.contains(R.first))
14219     return std::make_pair(TRI->getMatchingSuperReg(R.first,
14220                             PPC::sub_32, &PPC::G8RCRegClass),
14221                           &PPC::G8RCRegClass);
14222
14223   // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
14224   if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
14225     R.first = PPC::CR0;
14226     R.second = &PPC::CRRCRegClass;
14227   }
14228
14229   return R;
14230 }
14231
14232 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
14233 /// vector.  If it is invalid, don't add anything to Ops.
14234 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
14235                                                      std::string &Constraint,
14236                                                      std::vector<SDValue>&Ops,
14237                                                      SelectionDAG &DAG) const {
14238   SDValue Result;
14239
14240   // Only support length 1 constraints.
14241   if (Constraint.length() > 1) return;
14242
14243   char Letter = Constraint[0];
14244   switch (Letter) {
14245   default: break;
14246   case 'I':
14247   case 'J':
14248   case 'K':
14249   case 'L':
14250   case 'M':
14251   case 'N':
14252   case 'O':
14253   case 'P': {
14254     ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
14255     if (!CST) return; // Must be an immediate to match.
14256     SDLoc dl(Op);
14257     int64_t Value = CST->getSExtValue();
14258     EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
14259                          // numbers are printed as such.
14260     switch (Letter) {
14261     default: llvm_unreachable("Unknown constraint letter!");
14262     case 'I':  // "I" is a signed 16-bit constant.
14263       if (isInt<16>(Value))
14264         Result = DAG.getTargetConstant(Value, dl, TCVT);
14265       break;
14266     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
14267       if (isShiftedUInt<16, 16>(Value))
14268         Result = DAG.getTargetConstant(Value, dl, TCVT);
14269       break;
14270     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
14271       if (isShiftedInt<16, 16>(Value))
14272         Result = DAG.getTargetConstant(Value, dl, TCVT);
14273       break;
14274     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
14275       if (isUInt<16>(Value))
14276         Result = DAG.getTargetConstant(Value, dl, TCVT);
14277       break;
14278     case 'M':  // "M" is a constant that is greater than 31.
14279       if (Value > 31)
14280         Result = DAG.getTargetConstant(Value, dl, TCVT);
14281       break;
14282     case 'N':  // "N" is a positive constant that is an exact power of two.
14283       if (Value > 0 && isPowerOf2_64(Value))
14284         Result = DAG.getTargetConstant(Value, dl, TCVT);
14285       break;
14286     case 'O':  // "O" is the constant zero.
14287       if (Value == 0)
14288         Result = DAG.getTargetConstant(Value, dl, TCVT);
14289       break;
14290     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
14291       if (isInt<16>(-Value))
14292         Result = DAG.getTargetConstant(Value, dl, TCVT);
14293       break;
14294     }
14295     break;
14296   }
14297   }
14298
14299   if (Result.getNode()) {
14300     Ops.push_back(Result);
14301     return;
14302   }
14303
14304   // Handle standard constraint letters.
14305   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
14306 }
14307
14308 // isLegalAddressingMode - Return true if the addressing mode represented
14309 // by AM is legal for this target, for a load/store of the specified type.
14310 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
14311                                               const AddrMode &AM, Type *Ty,
14312                                               unsigned AS, Instruction *I) const {
14313   // PPC does not allow r+i addressing modes for vectors!
14314   if (Ty->isVectorTy() && AM.BaseOffs != 0)
14315     return false;
14316
14317   // PPC allows a sign-extended 16-bit immediate field.
14318   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
14319     return false;
14320
14321   // No global is ever allowed as a base.
14322   if (AM.BaseGV)
14323     return false;
14324
14325   // PPC only support r+r,
14326   switch (AM.Scale) {
14327   case 0:  // "r+i" or just "i", depending on HasBaseReg.
14328     break;
14329   case 1:
14330     if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
14331       return false;
14332     // Otherwise we have r+r or r+i.
14333     break;
14334   case 2:
14335     if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
14336       return false;
14337     // Allow 2*r as r+r.
14338     break;
14339   default:
14340     // No other scales are supported.
14341     return false;
14342   }
14343
14344   return true;
14345 }
14346
14347 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
14348                                            SelectionDAG &DAG) const {
14349   MachineFunction &MF = DAG.getMachineFunction();
14350   MachineFrameInfo &MFI = MF.getFrameInfo();
14351   MFI.setReturnAddressIsTaken(true);
14352
14353   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
14354     return SDValue();
14355
14356   SDLoc dl(Op);
14357   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
14358
14359   // Make sure the function does not optimize away the store of the RA to
14360   // the stack.
14361   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
14362   FuncInfo->setLRStoreRequired();
14363   bool isPPC64 = Subtarget.isPPC64();
14364   auto PtrVT = getPointerTy(MF.getDataLayout());
14365
14366   if (Depth > 0) {
14367     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
14368     SDValue Offset =
14369         DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
14370                         isPPC64 ? MVT::i64 : MVT::i32);
14371     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
14372                        DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
14373                        MachinePointerInfo());
14374   }
14375
14376   // Just load the return address off the stack.
14377   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
14378   return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
14379                      MachinePointerInfo());
14380 }
14381
14382 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
14383                                           SelectionDAG &DAG) const {
14384   SDLoc dl(Op);
14385   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
14386
14387   MachineFunction &MF = DAG.getMachineFunction();
14388   MachineFrameInfo &MFI = MF.getFrameInfo();
14389   MFI.setFrameAddressIsTaken(true);
14390
14391   EVT PtrVT = getPointerTy(MF.getDataLayout());
14392   bool isPPC64 = PtrVT == MVT::i64;
14393
14394   // Naked functions never have a frame pointer, and so we use r1. For all
14395   // other functions, this decision must be delayed until during PEI.
14396   unsigned FrameReg;
14397   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
14398     FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
14399   else
14400     FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
14401
14402   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
14403                                          PtrVT);
14404   while (Depth--)
14405     FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
14406                             FrameAddr, MachinePointerInfo());
14407   return FrameAddr;
14408 }
14409
14410 // FIXME? Maybe this could be a TableGen attribute on some registers and
14411 // this table could be generated automatically from RegInfo.
14412 unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
14413                                               SelectionDAG &DAG) const {
14414   bool isPPC64 = Subtarget.isPPC64();
14415   bool isDarwinABI = Subtarget.isDarwinABI();
14416
14417   if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
14418       (!isPPC64 && VT != MVT::i32))
14419     report_fatal_error("Invalid register global variable type");
14420
14421   bool is64Bit = isPPC64 && VT == MVT::i64;
14422   unsigned Reg = StringSwitch<unsigned>(RegName)
14423                    .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
14424                    .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2)
14425                    .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
14426                                   (is64Bit ? PPC::X13 : PPC::R13))
14427                    .Default(0);
14428
14429   if (Reg)
14430     return Reg;
14431   report_fatal_error("Invalid register name global variable");
14432 }
14433
14434 bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
14435   // 32-bit SVR4 ABI access everything as got-indirect.
14436   if (Subtarget.is32BitELFABI())
14437     return true;
14438
14439   // AIX accesses everything indirectly through the TOC, which is similar to
14440   // the GOT.
14441   if (Subtarget.isAIXABI())
14442     return true;
14443
14444   CodeModel::Model CModel = getTargetMachine().getCodeModel();
14445   // If it is small or large code model, module locals are accessed
14446   // indirectly by loading their address from .toc/.got.
14447   if (CModel == CodeModel::Small || CModel == CodeModel::Large)
14448     return true;
14449
14450   // JumpTable and BlockAddress are accessed as got-indirect.
14451   if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
14452     return true;
14453
14454   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
14455     const GlobalValue *GV = G->getGlobal();
14456     unsigned char GVFlags = Subtarget.classifyGlobalReference(GV);
14457     // The NLP flag indicates that a global access has to use an
14458     // extra indirection.
14459     if (GVFlags & PPCII::MO_NLP_FLAG)
14460       return true;
14461   }
14462
14463   return false;
14464 }
14465
14466 bool
14467 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
14468   // The PowerPC target isn't yet aware of offsets.
14469   return false;
14470 }
14471
14472 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
14473                                            const CallInst &I,
14474                                            MachineFunction &MF,
14475                                            unsigned Intrinsic) const {
14476   switch (Intrinsic) {
14477   case Intrinsic::ppc_qpx_qvlfd:
14478   case Intrinsic::ppc_qpx_qvlfs:
14479   case Intrinsic::ppc_qpx_qvlfcd:
14480   case Intrinsic::ppc_qpx_qvlfcs:
14481   case Intrinsic::ppc_qpx_qvlfiwa:
14482   case Intrinsic::ppc_qpx_qvlfiwz:
14483   case Intrinsic::ppc_altivec_lvx:
14484   case Intrinsic::ppc_altivec_lvxl:
14485   case Intrinsic::ppc_altivec_lvebx:
14486   case Intrinsic::ppc_altivec_lvehx:
14487   case Intrinsic::ppc_altivec_lvewx:
14488   case Intrinsic::ppc_vsx_lxvd2x:
14489   case Intrinsic::ppc_vsx_lxvw4x: {
14490     EVT VT;
14491     switch (Intrinsic) {
14492     case Intrinsic::ppc_altivec_lvebx:
14493       VT = MVT::i8;
14494       break;
14495     case Intrinsic::ppc_altivec_lvehx:
14496       VT = MVT::i16;
14497       break;
14498     case Intrinsic::ppc_altivec_lvewx:
14499       VT = MVT::i32;
14500       break;
14501     case Intrinsic::ppc_vsx_lxvd2x:
14502       VT = MVT::v2f64;
14503       break;
14504     case Intrinsic::ppc_qpx_qvlfd:
14505       VT = MVT::v4f64;
14506       break;
14507     case Intrinsic::ppc_qpx_qvlfs:
14508       VT = MVT::v4f32;
14509       break;
14510     case Intrinsic::ppc_qpx_qvlfcd:
14511       VT = MVT::v2f64;
14512       break;
14513     case Intrinsic::ppc_qpx_qvlfcs:
14514       VT = MVT::v2f32;
14515       break;
14516     default:
14517       VT = MVT::v4i32;
14518       break;
14519     }
14520
14521     Info.opc = ISD::INTRINSIC_W_CHAIN;
14522     Info.memVT = VT;
14523     Info.ptrVal = I.getArgOperand(0);
14524     Info.offset = -VT.getStoreSize()+1;
14525     Info.size = 2*VT.getStoreSize()-1;
14526     Info.align = Align(1);
14527     Info.flags = MachineMemOperand::MOLoad;
14528     return true;
14529   }
14530   case Intrinsic::ppc_qpx_qvlfda:
14531   case Intrinsic::ppc_qpx_qvlfsa:
14532   case Intrinsic::ppc_qpx_qvlfcda:
14533   case Intrinsic::ppc_qpx_qvlfcsa:
14534   case Intrinsic::ppc_qpx_qvlfiwaa:
14535   case Intrinsic::ppc_qpx_qvlfiwza: {
14536     EVT VT;
14537     switch (Intrinsic) {
14538     case Intrinsic::ppc_qpx_qvlfda:
14539       VT = MVT::v4f64;
14540       break;
14541     case Intrinsic::ppc_qpx_qvlfsa:
14542       VT = MVT::v4f32;
14543       break;
14544     case Intrinsic::ppc_qpx_qvlfcda:
14545       VT = MVT::v2f64;
14546       break;
14547     case Intrinsic::ppc_qpx_qvlfcsa:
14548       VT = MVT::v2f32;
14549       break;
14550     default:
14551       VT = MVT::v4i32;
14552       break;
14553     }
14554
14555     Info.opc = ISD::INTRINSIC_W_CHAIN;
14556     Info.memVT = VT;
14557     Info.ptrVal = I.getArgOperand(0);
14558     Info.offset = 0;
14559     Info.size = VT.getStoreSize();
14560     Info.align = Align(1);
14561     Info.flags = MachineMemOperand::MOLoad;
14562     return true;
14563   }
14564   case Intrinsic::ppc_qpx_qvstfd:
14565   case Intrinsic::ppc_qpx_qvstfs:
14566   case Intrinsic::ppc_qpx_qvstfcd:
14567   case Intrinsic::ppc_qpx_qvstfcs:
14568   case Intrinsic::ppc_qpx_qvstfiw:
14569   case Intrinsic::ppc_altivec_stvx:
14570   case Intrinsic::ppc_altivec_stvxl:
14571   case Intrinsic::ppc_altivec_stvebx:
14572   case Intrinsic::ppc_altivec_stvehx:
14573   case Intrinsic::ppc_altivec_stvewx:
14574   case Intrinsic::ppc_vsx_stxvd2x:
14575   case Intrinsic::ppc_vsx_stxvw4x: {
14576     EVT VT;
14577     switch (Intrinsic) {
14578     case Intrinsic::ppc_altivec_stvebx:
14579       VT = MVT::i8;
14580       break;
14581     case Intrinsic::ppc_altivec_stvehx:
14582       VT = MVT::i16;
14583       break;
14584     case Intrinsic::ppc_altivec_stvewx:
14585       VT = MVT::i32;
14586       break;
14587     case Intrinsic::ppc_vsx_stxvd2x:
14588       VT = MVT::v2f64;
14589       break;
14590     case Intrinsic::ppc_qpx_qvstfd:
14591       VT = MVT::v4f64;
14592       break;
14593     case Intrinsic::ppc_qpx_qvstfs:
14594       VT = MVT::v4f32;
14595       break;
14596     case Intrinsic::ppc_qpx_qvstfcd:
14597       VT = MVT::v2f64;
14598       break;
14599     case Intrinsic::ppc_qpx_qvstfcs:
14600       VT = MVT::v2f32;
14601       break;
14602     default:
14603       VT = MVT::v4i32;
14604       break;
14605     }
14606
14607     Info.opc = ISD::INTRINSIC_VOID;
14608     Info.memVT = VT;
14609     Info.ptrVal = I.getArgOperand(1);
14610     Info.offset = -VT.getStoreSize()+1;
14611     Info.size = 2*VT.getStoreSize()-1;
14612     Info.align = Align(1);
14613     Info.flags = MachineMemOperand::MOStore;
14614     return true;
14615   }
14616   case Intrinsic::ppc_qpx_qvstfda:
14617   case Intrinsic::ppc_qpx_qvstfsa:
14618   case Intrinsic::ppc_qpx_qvstfcda:
14619   case Intrinsic::ppc_qpx_qvstfcsa:
14620   case Intrinsic::ppc_qpx_qvstfiwa: {
14621     EVT VT;
14622     switch (Intrinsic) {
14623     case Intrinsic::ppc_qpx_qvstfda:
14624       VT = MVT::v4f64;
14625       break;
14626     case Intrinsic::ppc_qpx_qvstfsa:
14627       VT = MVT::v4f32;
14628       break;
14629     case Intrinsic::ppc_qpx_qvstfcda:
14630       VT = MVT::v2f64;
14631       break;
14632     case Intrinsic::ppc_qpx_qvstfcsa:
14633       VT = MVT::v2f32;
14634       break;
14635     default:
14636       VT = MVT::v4i32;
14637       break;
14638     }
14639
14640     Info.opc = ISD::INTRINSIC_VOID;
14641     Info.memVT = VT;
14642     Info.ptrVal = I.getArgOperand(1);
14643     Info.offset = 0;
14644     Info.size = VT.getStoreSize();
14645     Info.align = Align(1);
14646     Info.flags = MachineMemOperand::MOStore;
14647     return true;
14648   }
14649   default:
14650     break;
14651   }
14652
14653   return false;
14654 }
14655
14656 /// getOptimalMemOpType - Returns the target specific optimal type for load
14657 /// and store operations as a result of memset, memcpy, and memmove
14658 /// lowering. If DstAlign is zero that means it's safe to destination
14659 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
14660 /// means there isn't a need to check it against alignment requirement,
14661 /// probably because the source does not need to be loaded. If 'IsMemset' is
14662 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
14663 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
14664 /// source is constant so it does not need to be loaded.
14665 /// It returns EVT::Other if the type should be determined using generic
14666 /// target-independent logic.
14667 EVT PPCTargetLowering::getOptimalMemOpType(
14668     uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
14669     bool ZeroMemset, bool MemcpyStrSrc,
14670     const AttributeList &FuncAttributes) const {
14671   if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
14672     // When expanding a memset, require at least two QPX instructions to cover
14673     // the cost of loading the value to be stored from the constant pool.
14674     if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
14675        (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
14676         !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
14677       return MVT::v4f64;
14678     }
14679
14680     // We should use Altivec/VSX loads and stores when available. For unaligned
14681     // addresses, unaligned VSX loads are only fast starting with the P8.
14682     if (Subtarget.hasAltivec() && Size >= 16 &&
14683         (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
14684          ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
14685       return MVT::v4i32;
14686   }
14687
14688   if (Subtarget.isPPC64()) {
14689     return MVT::i64;
14690   }
14691
14692   return MVT::i32;
14693 }
14694
14695 /// Returns true if it is beneficial to convert a load of a constant
14696 /// to just the constant itself.
14697 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
14698                                                           Type *Ty) const {
14699   assert(Ty->isIntegerTy());
14700
14701   unsigned BitSize = Ty->getPrimitiveSizeInBits();
14702   return !(BitSize == 0 || BitSize > 64);
14703 }
14704
14705 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
14706   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
14707     return false;
14708   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
14709   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
14710   return NumBits1 == 64 && NumBits2 == 32;
14711 }
14712
14713 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
14714   if (!VT1.isInteger() || !VT2.isInteger())
14715     return false;
14716   unsigned NumBits1 = VT1.getSizeInBits();
14717   unsigned NumBits2 = VT2.getSizeInBits();
14718   return NumBits1 == 64 && NumBits2 == 32;
14719 }
14720
14721 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
14722   // Generally speaking, zexts are not free, but they are free when they can be
14723   // folded with other operations.
14724   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
14725     EVT MemVT = LD->getMemoryVT();
14726     if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
14727          (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
14728         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
14729          LD->getExtensionType() == ISD::ZEXTLOAD))
14730       return true;
14731   }
14732
14733   // FIXME: Add other cases...
14734   //  - 32-bit shifts with a zext to i64
14735   //  - zext after ctlz, bswap, etc.
14736   //  - zext after and by a constant mask
14737
14738   return TargetLowering::isZExtFree(Val, VT2);
14739 }
14740
14741 bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
14742   assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
14743          "invalid fpext types");
14744   // Extending to float128 is not free.
14745   if (DestVT == MVT::f128)
14746     return false;
14747   return true;
14748 }
14749
14750 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
14751   return isInt<16>(Imm) || isUInt<16>(Imm);
14752 }
14753
14754 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
14755   return isInt<16>(Imm) || isUInt<16>(Imm);
14756 }
14757
14758 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
14759                                                        unsigned,
14760                                                        unsigned,
14761                                                        MachineMemOperand::Flags,
14762                                                        bool *Fast) const {
14763   if (DisablePPCUnaligned)
14764     return false;
14765
14766   // PowerPC supports unaligned memory access for simple non-vector types.
14767   // Although accessing unaligned addresses is not as efficient as accessing
14768   // aligned addresses, it is generally more efficient than manual expansion,
14769   // and generally only traps for software emulation when crossing page
14770   // boundaries.
14771
14772   if (!VT.isSimple())
14773     return false;
14774
14775   if (VT.getSimpleVT().isVector()) {
14776     if (Subtarget.hasVSX()) {
14777       if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
14778           VT != MVT::v4f32 && VT != MVT::v4i32)
14779         return false;
14780     } else {
14781       return false;
14782     }
14783   }
14784
14785   if (VT == MVT::ppcf128)
14786     return false;
14787
14788   if (Fast)
14789     *Fast = true;
14790
14791   return true;
14792 }
14793
14794 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
14795   VT = VT.getScalarType();
14796
14797   if (!VT.isSimple())
14798     return false;
14799
14800   switch (VT.getSimpleVT().SimpleTy) {
14801   case MVT::f32:
14802   case MVT::f64:
14803     return true;
14804   case MVT::f128:
14805     return (EnableQuadPrecision && Subtarget.hasP9Vector());
14806   default:
14807     break;
14808   }
14809
14810   return false;
14811 }
14812
14813 const MCPhysReg *
14814 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
14815   // LR is a callee-save register, but we must treat it as clobbered by any call
14816   // site. Hence we include LR in the scratch registers, which are in turn added
14817   // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
14818   // to CTR, which is used by any indirect call.
14819   static const MCPhysReg ScratchRegs[] = {
14820     PPC::X12, PPC::LR8, PPC::CTR8, 0
14821   };
14822
14823   return ScratchRegs;
14824 }
14825
14826 unsigned PPCTargetLowering::getExceptionPointerRegister(
14827     const Constant *PersonalityFn) const {
14828   return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
14829 }
14830
14831 unsigned PPCTargetLowering::getExceptionSelectorRegister(
14832     const Constant *PersonalityFn) const {
14833   return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
14834 }
14835
14836 bool
14837 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
14838                      EVT VT , unsigned DefinedValues) const {
14839   if (VT == MVT::v2i64)
14840     return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
14841
14842   if (Subtarget.hasVSX() || Subtarget.hasQPX())
14843     return true;
14844
14845   return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
14846 }
14847
14848 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
14849   if (DisableILPPref || Subtarget.enableMachineScheduler())
14850     return TargetLowering::getSchedulingPreference(N);
14851
14852   return Sched::ILP;
14853 }
14854
14855 // Create a fast isel object.
14856 FastISel *
14857 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
14858                                   const TargetLibraryInfo *LibInfo) const {
14859   return PPC::createFastISel(FuncInfo, LibInfo);
14860 }
14861
14862 void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
14863   if (Subtarget.isDarwinABI()) return;
14864   if (!Subtarget.isPPC64()) return;
14865
14866   // Update IsSplitCSR in PPCFunctionInfo
14867   PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>();
14868   PFI->setIsSplitCSR(true);
14869 }
14870
14871 void PPCTargetLowering::insertCopiesSplitCSR(
14872   MachineBasicBlock *Entry,
14873   const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
14874   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
14875   const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
14876   if (!IStart)
14877     return;
14878
14879   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
14880   MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
14881   MachineBasicBlock::iterator MBBI = Entry->begin();
14882   for (const MCPhysReg *I = IStart; *I; ++I) {
14883     const TargetRegisterClass *RC = nullptr;
14884     if (PPC::G8RCRegClass.contains(*I))
14885       RC = &PPC::G8RCRegClass;
14886     else if (PPC::F8RCRegClass.contains(*I))
14887       RC = &PPC::F8RCRegClass;
14888     else if (PPC::CRRCRegClass.contains(*I))
14889       RC = &PPC::CRRCRegClass;
14890     else if (PPC::VRRCRegClass.contains(*I))
14891       RC = &PPC::VRRCRegClass;
14892     else
14893       llvm_unreachable("Unexpected register class in CSRsViaCopy!");
14894
14895     Register NewVR = MRI->createVirtualRegister(RC);
14896     // Create copy from CSR to a virtual register.
14897     // FIXME: this currently does not emit CFI pseudo-instructions, it works
14898     // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
14899     // nounwind. If we want to generalize this later, we may need to emit
14900     // CFI pseudo-instructions.
14901     assert(Entry->getParent()->getFunction().hasFnAttribute(
14902              Attribute::NoUnwind) &&
14903            "Function should be nounwind in insertCopiesSplitCSR!");
14904     Entry->addLiveIn(*I);
14905     BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
14906       .addReg(*I);
14907
14908     // Insert the copy-back instructions right before the terminator.
14909     for (auto *Exit : Exits)
14910       BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
14911               TII->get(TargetOpcode::COPY), *I)
14912         .addReg(NewVR);
14913   }
14914 }
14915
14916 // Override to enable LOAD_STACK_GUARD lowering on Linux.
14917 bool PPCTargetLowering::useLoadStackGuardNode() const {
14918   if (!Subtarget.isTargetLinux())
14919     return TargetLowering::useLoadStackGuardNode();
14920   return true;
14921 }
14922
14923 // Override to disable global variable loading on Linux.
14924 void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
14925   if (!Subtarget.isTargetLinux())
14926     return TargetLowering::insertSSPDeclarations(M);
14927 }
14928
14929 bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
14930                                      bool ForCodeSize) const {
14931   if (!VT.isSimple() || !Subtarget.hasVSX())
14932     return false;
14933
14934   switch(VT.getSimpleVT().SimpleTy) {
14935   default:
14936     // For FP types that are currently not supported by PPC backend, return
14937     // false. Examples: f16, f80.
14938     return false;
14939   case MVT::f32:
14940   case MVT::f64:
14941   case MVT::ppcf128:
14942     return Imm.isPosZero();
14943   }
14944 }
14945
14946 // For vector shift operation op, fold
14947 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
14948 static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
14949                                   SelectionDAG &DAG) {
14950   SDValue N0 = N->getOperand(0);
14951   SDValue N1 = N->getOperand(1);
14952   EVT VT = N0.getValueType();
14953   unsigned OpSizeInBits = VT.getScalarSizeInBits();
14954   unsigned Opcode = N->getOpcode();
14955   unsigned TargetOpcode;
14956
14957   switch (Opcode) {
14958   default:
14959     llvm_unreachable("Unexpected shift operation");
14960   case ISD::SHL:
14961     TargetOpcode = PPCISD::SHL;
14962     break;
14963   case ISD::SRL:
14964     TargetOpcode = PPCISD::SRL;
14965     break;
14966   case ISD::SRA:
14967     TargetOpcode = PPCISD::SRA;
14968     break;
14969   }
14970
14971   if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
14972       N1->getOpcode() == ISD::AND)
14973     if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
14974       if (Mask->getZExtValue() == OpSizeInBits - 1)
14975         return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
14976
14977   return SDValue();
14978 }
14979
14980 SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
14981   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
14982     return Value;
14983
14984   SDValue N0 = N->getOperand(0);
14985   ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
14986   if (!Subtarget.isISA3_0() ||
14987       N0.getOpcode() != ISD::SIGN_EXTEND ||
14988       N0.getOperand(0).getValueType() != MVT::i32 ||
14989       CN1 == nullptr || N->getValueType(0) != MVT::i64)
14990     return SDValue();
14991
14992   // We can't save an operation here if the value is already extended, and
14993   // the existing shift is easier to combine.
14994   SDValue ExtsSrc = N0.getOperand(0);
14995   if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
14996       ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
14997     return SDValue();
14998
14999   SDLoc DL(N0);
15000   SDValue ShiftBy = SDValue(CN1, 0);
15001   // We want the shift amount to be i32 on the extswli, but the shift could
15002   // have an i64.
15003   if (ShiftBy.getValueType() == MVT::i64)
15004     ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
15005
15006   return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
15007                          ShiftBy);
15008 }
15009
15010 SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
15011   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
15012     return Value;
15013
15014   return SDValue();
15015 }
15016
15017 SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
15018   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
15019     return Value;
15020
15021   return SDValue();
15022 }
15023
15024 // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
15025 // Transform (add X, (zext(sete  Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
15026 // When C is zero, the equation (addi Z, -C) can be simplified to Z
15027 // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
15028 static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
15029                                  const PPCSubtarget &Subtarget) {
15030   if (!Subtarget.isPPC64())
15031     return SDValue();
15032
15033   SDValue LHS = N->getOperand(0);
15034   SDValue RHS = N->getOperand(1);
15035
15036   auto isZextOfCompareWithConstant = [](SDValue Op) {
15037     if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
15038         Op.getValueType() != MVT::i64)
15039       return false;
15040
15041     SDValue Cmp = Op.getOperand(0);
15042     if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
15043         Cmp.getOperand(0).getValueType() != MVT::i64)
15044       return false;
15045
15046     if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
15047       int64_t NegConstant = 0 - Constant->getSExtValue();
15048       // Due to the limitations of the addi instruction,
15049       // -C is required to be [-32768, 32767].
15050       return isInt<16>(NegConstant);
15051     }
15052
15053     return false;
15054   };
15055
15056   bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
15057   bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
15058
15059   // If there is a pattern, canonicalize a zext operand to the RHS.
15060   if (LHSHasPattern && !RHSHasPattern)
15061     std::swap(LHS, RHS);
15062   else if (!LHSHasPattern && !RHSHasPattern)
15063     return SDValue();
15064
15065   SDLoc DL(N);
15066   SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
15067   SDValue Cmp = RHS.getOperand(0);
15068   SDValue Z = Cmp.getOperand(0);
15069   auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
15070
15071   assert(Constant && "Constant Should not be a null pointer.");
15072   int64_t NegConstant = 0 - Constant->getSExtValue();
15073
15074   switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
15075   default: break;
15076   case ISD::SETNE: {
15077     //                                 when C == 0
15078     //                             --> addze X, (addic Z, -1).carry
15079     //                            /
15080     // add X, (zext(setne Z, C))--
15081     //                            \    when -32768 <= -C <= 32767 && C != 0
15082     //                             --> addze X, (addic (addi Z, -C), -1).carry
15083     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
15084                               DAG.getConstant(NegConstant, DL, MVT::i64));
15085     SDValue AddOrZ = NegConstant != 0 ? Add : Z;
15086     SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
15087                                AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
15088     return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
15089                        SDValue(Addc.getNode(), 1));
15090     }
15091   case ISD::SETEQ: {
15092     //                                 when C == 0
15093     //                             --> addze X, (subfic Z, 0).carry
15094     //                            /
15095     // add X, (zext(sete  Z, C))--
15096     //                            \    when -32768 <= -C <= 32767 && C != 0
15097     //                             --> addze X, (subfic (addi Z, -C), 0).carry
15098     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
15099                               DAG.getConstant(NegConstant, DL, MVT::i64));
15100     SDValue AddOrZ = NegConstant != 0 ? Add : Z;
15101     SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
15102                                DAG.getConstant(0, DL, MVT::i64), AddOrZ);
15103     return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
15104                        SDValue(Subc.getNode(), 1));
15105     }
15106   }
15107
15108   return SDValue();
15109 }
15110
15111 SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
15112   if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
15113     return Value;
15114
15115   return SDValue();
15116 }
15117
15118 // Detect TRUNCATE operations on bitcasts of float128 values.
15119 // What we are looking for here is the situtation where we extract a subset
15120 // of bits from a 128 bit float.
15121 // This can be of two forms:
15122 // 1) BITCAST of f128 feeding TRUNCATE
15123 // 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
15124 // The reason this is required is because we do not have a legal i128 type
15125 // and so we want to prevent having to store the f128 and then reload part
15126 // of it.
15127 SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
15128                                            DAGCombinerInfo &DCI) const {
15129   // If we are using CRBits then try that first.
15130   if (Subtarget.useCRBits()) {
15131     // Check if CRBits did anything and return that if it did.
15132     if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
15133       return CRTruncValue;
15134   }
15135
15136   SDLoc dl(N);
15137   SDValue Op0 = N->getOperand(0);
15138
15139   // Looking for a truncate of i128 to i64.
15140   if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
15141     return SDValue();
15142
15143   int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
15144
15145   // SRL feeding TRUNCATE.
15146   if (Op0.getOpcode() == ISD::SRL) {
15147     ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
15148     // The right shift has to be by 64 bits.
15149     if (!ConstNode || ConstNode->getZExtValue() != 64)
15150       return SDValue();
15151
15152     // Switch the element number to extract.
15153     EltToExtract = EltToExtract ? 0 : 1;
15154     // Update Op0 past the SRL.
15155     Op0 = Op0.getOperand(0);
15156   }
15157
15158   // BITCAST feeding a TRUNCATE possibly via SRL.
15159   if (Op0.getOpcode() == ISD::BITCAST &&
15160       Op0.getValueType() == MVT::i128 &&
15161       Op0.getOperand(0).getValueType() == MVT::f128) {
15162     SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
15163     return DCI.DAG.getNode(
15164         ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
15165         DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
15166   }
15167   return SDValue();
15168 }
15169
15170 SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
15171   SelectionDAG &DAG = DCI.DAG;
15172
15173   ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
15174   if (!ConstOpOrElement)
15175     return SDValue();
15176
15177   // An imul is usually smaller than the alternative sequence for legal type.
15178   if (DAG.getMachineFunction().getFunction().hasMinSize() &&
15179       isOperationLegal(ISD::MUL, N->getValueType(0)))
15180     return SDValue();
15181
15182   auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
15183     switch (this->Subtarget.getDarwinDirective()) {
15184     default:
15185       // TODO: enhance the condition for subtarget before pwr8
15186       return false;
15187     case PPC::DIR_PWR8:
15188       //  type        mul     add    shl
15189       // scalar        4       1      1
15190       // vector        7       2      2
15191       return true;
15192     case PPC::DIR_PWR9:
15193       //  type        mul     add    shl
15194       // scalar        5       2      2
15195       // vector        7       2      2
15196
15197       // The cycle RATIO of related operations are showed as a table above.
15198       // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
15199       // scalar and vector type. For 2 instrs patterns, add/sub + shl
15200       // are 4, it is always profitable; but for 3 instrs patterns
15201       // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
15202       // So we should only do it for vector type.
15203       return IsAddOne && IsNeg ? VT.isVector() : true;
15204     }
15205   };
15206
15207   EVT VT = N->getValueType(0);
15208   SDLoc DL(N);
15209
15210   const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
15211   bool IsNeg = MulAmt.isNegative();
15212   APInt MulAmtAbs = MulAmt.abs();
15213
15214   if ((MulAmtAbs - 1).isPowerOf2()) {
15215     // (mul x, 2^N + 1) => (add (shl x, N), x)
15216     // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
15217
15218     if (!IsProfitable(IsNeg, true, VT))
15219       return SDValue();
15220
15221     SDValue Op0 = N->getOperand(0);
15222     SDValue Op1 =
15223         DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
15224                     DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
15225     SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
15226
15227     if (!IsNeg)
15228       return Res;
15229
15230     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
15231   } else if ((MulAmtAbs + 1).isPowerOf2()) {
15232     // (mul x, 2^N - 1) => (sub (shl x, N), x)
15233     // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
15234
15235     if (!IsProfitable(IsNeg, false, VT))
15236       return SDValue();
15237
15238     SDValue Op0 = N->getOperand(0);
15239     SDValue Op1 =
15240         DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
15241                     DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
15242
15243     if (!IsNeg)
15244       return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
15245     else
15246       return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
15247
15248   } else {
15249     return SDValue();
15250   }
15251 }
15252
15253 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
15254   // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
15255   if (!Subtarget.is64BitELFABI())
15256     return false;
15257
15258   // If not a tail call then no need to proceed.
15259   if (!CI->isTailCall())
15260     return false;
15261
15262   // If tail calls are disabled for the caller then we are done.
15263   const Function *Caller = CI->getParent()->getParent();
15264   auto Attr = Caller->getFnAttribute("disable-tail-calls");
15265   if (Attr.getValueAsString() == "true")
15266     return false;
15267
15268   // If sibling calls have been disabled and tail-calls aren't guaranteed
15269   // there is no reason to duplicate.
15270   auto &TM = getTargetMachine();
15271   if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
15272     return false;
15273
15274   // Can't tail call a function called indirectly, or if it has variadic args.
15275   const Function *Callee = CI->getCalledFunction();
15276   if (!Callee || Callee->isVarArg())
15277     return false;
15278
15279   // Make sure the callee and caller calling conventions are eligible for tco.
15280   if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
15281                                            CI->getCallingConv()))
15282       return false;
15283
15284   // If the function is local then we have a good chance at tail-calling it
15285   return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
15286 }
15287
15288 bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
15289   if (!Subtarget.hasVSX())
15290     return false;
15291   if (Subtarget.hasP9Vector() && VT == MVT::f128)
15292     return true;
15293   return VT == MVT::f32 || VT == MVT::f64 ||
15294     VT == MVT::v4f32 || VT == MVT::v2f64;
15295 }
15296
15297 bool PPCTargetLowering::
15298 isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
15299   const Value *Mask = AndI.getOperand(1);
15300   // If the mask is suitable for andi. or andis. we should sink the and.
15301   if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
15302     // Can't handle constants wider than 64-bits.
15303     if (CI->getBitWidth() > 64)
15304       return false;
15305     int64_t ConstVal = CI->getZExtValue();
15306     return isUInt<16>(ConstVal) ||
15307       (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
15308   }
15309
15310   // For non-constant masks, we can always use the record-form and.
15311   return true;
15312 }
15313
15314 // Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
15315 // Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
15316 // Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
15317 // Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
15318 // Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
15319 SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
15320   assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
15321   assert(Subtarget.hasP9Altivec() &&
15322          "Only combine this when P9 altivec supported!");
15323   EVT VT = N->getValueType(0);
15324   if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
15325     return SDValue();
15326
15327   SelectionDAG &DAG = DCI.DAG;
15328   SDLoc dl(N);
15329   if (N->getOperand(0).getOpcode() == ISD::SUB) {
15330     // Even for signed integers, if it's known to be positive (as signed
15331     // integer) due to zero-extended inputs.
15332     unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
15333     unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
15334     if ((SubOpcd0 == ISD::ZERO_EXTEND ||
15335          SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
15336         (SubOpcd1 == ISD::ZERO_EXTEND ||
15337          SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
15338       return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
15339                          N->getOperand(0)->getOperand(0),
15340                          N->getOperand(0)->getOperand(1),
15341                          DAG.getTargetConstant(0, dl, MVT::i32));
15342     }
15343
15344     // For type v4i32, it can be optimized with xvnegsp + vabsduw
15345     if (N->getOperand(0).getValueType() == MVT::v4i32 &&
15346         N->getOperand(0).hasOneUse()) {
15347       return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
15348                          N->getOperand(0)->getOperand(0),
15349                          N->getOperand(0)->getOperand(1),
15350                          DAG.getTargetConstant(1, dl, MVT::i32));
15351     }
15352   }
15353
15354   return SDValue();
15355 }
15356
15357 // For type v4i32/v8ii16/v16i8, transform
15358 // from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
15359 // from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
15360 // from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
15361 // from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
15362 SDValue PPCTargetLowering::combineVSelect(SDNode *N,
15363                                           DAGCombinerInfo &DCI) const {
15364   assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
15365   assert(Subtarget.hasP9Altivec() &&
15366          "Only combine this when P9 altivec supported!");
15367
15368   SelectionDAG &DAG = DCI.DAG;
15369   SDLoc dl(N);
15370   SDValue Cond = N->getOperand(0);
15371   SDValue TrueOpnd = N->getOperand(1);
15372   SDValue FalseOpnd = N->getOperand(2);
15373   EVT VT = N->getOperand(1).getValueType();
15374
15375   if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
15376       FalseOpnd.getOpcode() != ISD::SUB)
15377     return SDValue();
15378
15379   // ABSD only available for type v4i32/v8i16/v16i8
15380   if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
15381     return SDValue();
15382
15383   // At least to save one more dependent computation
15384   if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
15385     return SDValue();
15386
15387   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15388
15389   // Can only handle unsigned comparison here
15390   switch (CC) {
15391   default:
15392     return SDValue();
15393   case ISD::SETUGT:
15394   case ISD::SETUGE:
15395     break;
15396   case ISD::SETULT:
15397   case ISD::SETULE:
15398     std::swap(TrueOpnd, FalseOpnd);
15399     break;
15400   }
15401
15402   SDValue CmpOpnd1 = Cond.getOperand(0);
15403   SDValue CmpOpnd2 = Cond.getOperand(1);
15404
15405   // SETCC CmpOpnd1 CmpOpnd2 cond
15406   // TrueOpnd = CmpOpnd1 - CmpOpnd2
15407   // FalseOpnd = CmpOpnd2 - CmpOpnd1
15408   if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
15409       TrueOpnd.getOperand(1) == CmpOpnd2 &&
15410       FalseOpnd.getOperand(0) == CmpOpnd2 &&
15411       FalseOpnd.getOperand(1) == CmpOpnd1) {
15412     return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
15413                        CmpOpnd1, CmpOpnd2,
15414                        DAG.getTargetConstant(0, dl, MVT::i32));
15415   }
15416
15417   return SDValue();
15418 }