llvm/lib/Target/PowerPC/PPCISelLowering.cpp

   1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file implements the PPCISelLowering class.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "PPCISelLowering.h"
  14 #include "MCTargetDesc/PPCMCTargetDesc.h"
  15 #include "MCTargetDesc/PPCPredicates.h"
  16 #include "PPC.h"
  17 #include "PPCCCState.h"
  18 #include "PPCCallingConv.h"
  19 #include "PPCFrameLowering.h"
  20 #include "PPCInstrInfo.h"
  21 #include "PPCMachineFunctionInfo.h"
  22 #include "PPCPerfectShuffle.h"
  23 #include "PPCRegisterInfo.h"
  24 #include "PPCSubtarget.h"
  25 #include "PPCTargetMachine.h"
  26 #include "llvm/ADT/APFloat.h"
  27 #include "llvm/ADT/APInt.h"
  28 #include "llvm/ADT/APSInt.h"
  29 #include "llvm/ADT/ArrayRef.h"
  30 #include "llvm/ADT/DenseMap.h"
  31 #include "llvm/ADT/STLExtras.h"
  32 #include "llvm/ADT/SmallPtrSet.h"
  33 #include "llvm/ADT/SmallSet.h"
  34 #include "llvm/ADT/SmallVector.h"
  35 #include "llvm/ADT/Statistic.h"
  36 #include "llvm/ADT/StringRef.h"
  37 #include "llvm/ADT/StringSwitch.h"
  38 #include "llvm/CodeGen/CallingConvLower.h"
  39 #include "llvm/CodeGen/ISDOpcodes.h"
  40 #include "llvm/CodeGen/MachineBasicBlock.h"
  41 #include "llvm/CodeGen/MachineFrameInfo.h"
  42 #include "llvm/CodeGen/MachineFunction.h"
  43 #include "llvm/CodeGen/MachineInstr.h"
  44 #include "llvm/CodeGen/MachineInstrBuilder.h"
  45 #include "llvm/CodeGen/MachineJumpTableInfo.h"
  46 #include "llvm/CodeGen/MachineLoopInfo.h"
  47 #include "llvm/CodeGen/MachineMemOperand.h"
  48 #include "llvm/CodeGen/MachineModuleInfo.h"
  49 #include "llvm/CodeGen/MachineOperand.h"
  50 #include "llvm/CodeGen/MachineRegisterInfo.h"
  51 #include "llvm/CodeGen/RuntimeLibcallUtil.h"
  52 #include "llvm/CodeGen/SelectionDAG.h"
  53 #include "llvm/CodeGen/SelectionDAGNodes.h"
  54 #include "llvm/CodeGen/TargetInstrInfo.h"
  55 #include "llvm/CodeGen/TargetLowering.h"
  56 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  57 #include "llvm/CodeGen/TargetRegisterInfo.h"
  58 #include "llvm/CodeGen/ValueTypes.h"
  59 #include "llvm/CodeGenTypes/MachineValueType.h"
  60 #include "llvm/IR/CallingConv.h"
  61 #include "llvm/IR/Constant.h"
  62 #include "llvm/IR/Constants.h"
  63 #include "llvm/IR/DataLayout.h"
  64 #include "llvm/IR/DebugLoc.h"
  65 #include "llvm/IR/DerivedTypes.h"
  66 #include "llvm/IR/Function.h"
  67 #include "llvm/IR/GlobalValue.h"
  68 #include "llvm/IR/IRBuilder.h"
  69 #include "llvm/IR/Instructions.h"
  70 #include "llvm/IR/Intrinsics.h"
  71 #include "llvm/IR/IntrinsicsPowerPC.h"
  72 #include "llvm/IR/Module.h"
  73 #include "llvm/IR/Type.h"
  74 #include "llvm/IR/Use.h"
  75 #include "llvm/IR/Value.h"
  76 #include "llvm/MC/MCContext.h"
  77 #include "llvm/MC/MCExpr.h"
  78 #include "llvm/MC/MCRegisterInfo.h"
  79 #include "llvm/MC/MCSectionXCOFF.h"
  80 #include "llvm/MC/MCSymbolXCOFF.h"
  81 #include "llvm/Support/AtomicOrdering.h"
  82 #include "llvm/Support/BranchProbability.h"
  83 #include "llvm/Support/Casting.h"
  84 #include "llvm/Support/CodeGen.h"
  85 #include "llvm/Support/CommandLine.h"
  86 #include "llvm/Support/Compiler.h"
  87 #include "llvm/Support/Debug.h"
  88 #include "llvm/Support/ErrorHandling.h"
  89 #include "llvm/Support/Format.h"
  90 #include "llvm/Support/KnownBits.h"
  91 #include "llvm/Support/MathExtras.h"
  92 #include "llvm/Support/raw_ostream.h"
  93 #include "llvm/Target/TargetMachine.h"
  94 #include "llvm/Target/TargetOptions.h"
  95 #include <algorithm>
  96 #include <cassert>
  97 #include <cstdint>
  98 #include <iterator>
  99 #include <list>
 100 #include <optional>
 101 #include <utility>
 102 #include <vector>
 103
 104 using namespace llvm;
 105
 106 #define DEBUG_TYPE "ppc-lowering"
 107
 108 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
 109 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
 110
 111 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
 112 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
 113
 114 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
 115 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
 116
 117 static cl::opt<bool> DisableSCO("disable-ppc-sco",
 118 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
 119
 120 static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
 121 cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
 122
 123 static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
 124 cl::desc("use absolute jump tables on ppc"), cl::Hidden);
 125
 126 static cl::opt<bool>
 127     DisablePerfectShuffle("ppc-disable-perfect-shuffle",
 128                           cl::desc("disable vector permute decomposition"),
 129                           cl::init(true), cl::Hidden);
 130
 131 cl::opt<bool> DisableAutoPairedVecSt(
 132     "disable-auto-paired-vec-st",
 133     cl::desc("disable automatically generated 32byte paired vector stores"),
 134     cl::init(true), cl::Hidden);
 135
 136 static cl::opt<unsigned> PPCMinimumJumpTableEntries(
 137     "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
 138     cl::desc("Set minimum number of entries to use a jump table on PPC"));
 139
 140 static cl::opt<unsigned> PPCGatherAllAliasesMaxDepth(
 141     "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
 142     cl::desc("max depth when checking alias info in GatherAllAliases()"));
 143
 144 static cl::opt<unsigned> PPCAIXTLSModelOptUseIEForLDLimit(
 145     "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
 146     cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
 147              "function to use initial-exec"));
 148
 149 STATISTIC(NumTailCalls, "Number of tail calls");
 150 STATISTIC(NumSiblingCalls, "Number of sibling calls");
 151 STATISTIC(ShufflesHandledWithVPERM,
 152           "Number of shuffles lowered to a VPERM or XXPERM");
 153 STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
 154
 155 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
 156
 157 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
 158
 159 static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
 160
 161 // A faster local-[exec|dynamic] TLS access sequence (enabled with the
 162 // -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
 163 // variables; consistent with the IBM XL compiler, we apply a max size of
 164 // slightly under 32KB.
 165 constexpr uint64_t AIXSmallTlsPolicySizeLimit = 32751;
 166
 167 // FIXME: Remove this once the bug has been fixed!
 168 extern cl::opt<bool> ANDIGlueBug;
 169
 170 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
 171                                      const PPCSubtarget &STI)
 172     : TargetLowering(TM), Subtarget(STI) {
 173   // Initialize map that relates the PPC addressing modes to the computed flags
 174   // of a load/store instruction. The map is used to determine the optimal
 175   // addressing mode when selecting load and stores.
 176   initializeAddrModeMap();
 177   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
 178   // arguments are at least 4/8 bytes aligned.
 179   bool isPPC64 = Subtarget.isPPC64();
 180   setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
 181
 182   // Set up the register classes.
 183   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
 184   if (!useSoftFloat()) {
 185     if (hasSPE()) {
 186       addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
 187       // EFPU2 APU only supports f32
 188       if (!Subtarget.hasEFPU2())
 189         addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
 190     } else {
 191       addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
 192       addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
 193     }
 194   }
 195
 196   // Match BITREVERSE to customized fast code sequence in the td file.
 197   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
 198   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
 199
 200   // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
 201   setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
 202
 203   // Custom lower inline assembly to check for special registers.
 204   setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
 205   setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom);
 206
 207   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
 208   for (MVT VT : MVT::integer_valuetypes()) {
 209     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
 210     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
 211   }
 212
 213   if (Subtarget.isISA3_0()) {
 214     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
 215     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
 216     setTruncStoreAction(MVT::f64, MVT::f16, Legal);
 217     setTruncStoreAction(MVT::f32, MVT::f16, Legal);
 218   } else {
 219     // No extending loads from f16 or HW conversions back and forth.
 220     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
 221     setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
 222     setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
 223     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
 224     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
 225     setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
 226     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
 227     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
 228   }
 229
 230   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 231
 232   // PowerPC has pre-inc load and store's.
 233   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
 234   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
 235   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
 236   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
 237   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
 238   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
 239   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
 240   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
 241   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
 242   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
 243   if (!Subtarget.hasSPE()) {
 244     setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
 245     setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
 246     setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
 247     setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
 248   }
 249
 250   // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
 251   const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
 252   for (MVT VT : ScalarIntVTs) {
 253     setOperationAction(ISD::ADDC, VT, Legal);
 254     setOperationAction(ISD::ADDE, VT, Legal);
 255     setOperationAction(ISD::SUBC, VT, Legal);
 256     setOperationAction(ISD::SUBE, VT, Legal);
 257   }
 258
 259   if (Subtarget.useCRBits()) {
 260     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 261
 262     if (isPPC64 || Subtarget.hasFPCVT()) {
 263       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote);
 264       AddPromotedToType(ISD::STRICT_SINT_TO_FP, MVT::i1,
 265                         isPPC64 ? MVT::i64 : MVT::i32);
 266       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote);
 267       AddPromotedToType(ISD::STRICT_UINT_TO_FP, MVT::i1,
 268                         isPPC64 ? MVT::i64 : MVT::i32);
 269
 270       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
 271       AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
 272                          isPPC64 ? MVT::i64 : MVT::i32);
 273       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
 274       AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
 275                         isPPC64 ? MVT::i64 : MVT::i32);
 276
 277       setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i1, Promote);
 278       AddPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::i1,
 279                         isPPC64 ? MVT::i64 : MVT::i32);
 280       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i1, Promote);
 281       AddPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::i1,
 282                         isPPC64 ? MVT::i64 : MVT::i32);
 283
 284       setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
 285       AddPromotedToType(ISD::FP_TO_SINT, MVT::i1,
 286                         isPPC64 ? MVT::i64 : MVT::i32);
 287       setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
 288       AddPromotedToType(ISD::FP_TO_UINT, MVT::i1,
 289                         isPPC64 ? MVT::i64 : MVT::i32);
 290     } else {
 291       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom);
 292       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom);
 293       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
 294       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
 295     }
 296
 297     // PowerPC does not support direct load/store of condition registers.
 298     setOperationAction(ISD::LOAD, MVT::i1, Custom);
 299     setOperationAction(ISD::STORE, MVT::i1, Custom);
 300
 301     // FIXME: Remove this once the ANDI glue bug is fixed:
 302     if (ANDIGlueBug)
 303       setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
 304
 305     for (MVT VT : MVT::integer_valuetypes()) {
 306       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
 307       setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
 308       setTruncStoreAction(VT, MVT::i1, Expand);
 309     }
 310
 311     addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
 312   }
 313
 314   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
 315   // PPC (the libcall is not available).
 316   setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
 317   setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
 318   setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom);
 319   setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom);
 320
 321   // We do not currently implement these libm ops for PowerPC.
 322   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
 323   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
 324   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
 325   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
 326   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
 327   setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
 328
 329   // PowerPC has no SREM/UREM instructions unless we are on P9
 330   // On P9 we may use a hardware instruction to compute the remainder.
 331   // When the result of both the remainder and the division is required it is
 332   // more efficient to compute the remainder from the result of the division
 333   // rather than use the remainder instruction. The instructions are legalized
 334   // directly because the DivRemPairsPass performs the transformation at the IR
 335   // level.
 336   if (Subtarget.isISA3_0()) {
 337     setOperationAction(ISD::SREM, MVT::i32, Legal);
 338     setOperationAction(ISD::UREM, MVT::i32, Legal);
 339     setOperationAction(ISD::SREM, MVT::i64, Legal);
 340     setOperationAction(ISD::UREM, MVT::i64, Legal);
 341   } else {
 342     setOperationAction(ISD::SREM, MVT::i32, Expand);
 343     setOperationAction(ISD::UREM, MVT::i32, Expand);
 344     setOperationAction(ISD::SREM, MVT::i64, Expand);
 345     setOperationAction(ISD::UREM, MVT::i64, Expand);
 346   }
 347
 348   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
 349   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
 350   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
 351   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
 352   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
 353   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
 354   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
 355   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
 356   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
 357
 358   // Handle constrained floating-point operations of scalar.
 359   // TODO: Handle SPE specific operation.
 360   setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
 361   setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
 362   setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
 363   setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
 364   setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
 365
 366   setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
 367   setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
 368   setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
 369   setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
 370
 371   if (!Subtarget.hasSPE()) {
 372     setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
 373     setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
 374   }
 375
 376   if (Subtarget.hasVSX()) {
 377     setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal);
 378     setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal);
 379   }
 380
 381   if (Subtarget.hasFSQRT()) {
 382     setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
 383     setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
 384   }
 385
 386   if (Subtarget.hasFPRND()) {
 387     setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal);
 388     setOperationAction(ISD::STRICT_FCEIL,  MVT::f32, Legal);
 389     setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal);
 390     setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal);
 391
 392     setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal);
 393     setOperationAction(ISD::STRICT_FCEIL,  MVT::f64, Legal);
 394     setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal);
 395     setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal);
 396   }
 397
 398   // We don't support sin/cos/sqrt/fmod/pow
 399   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 400   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 401   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
 402   setOperationAction(ISD::FREM , MVT::f64, Expand);
 403   setOperationAction(ISD::FPOW , MVT::f64, Expand);
 404   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 405   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 406   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
 407   setOperationAction(ISD::FREM , MVT::f32, Expand);
 408   setOperationAction(ISD::FPOW , MVT::f32, Expand);
 409
 410   // MASS transformation for LLVM intrinsics with replicating fast-math flag
 411   // to be consistent to PPCGenScalarMASSEntries pass
 412   if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
 413     setOperationAction(ISD::FSIN , MVT::f64, Custom);
 414     setOperationAction(ISD::FCOS , MVT::f64, Custom);
 415     setOperationAction(ISD::FPOW , MVT::f64, Custom);
 416     setOperationAction(ISD::FLOG, MVT::f64, Custom);
 417     setOperationAction(ISD::FLOG10, MVT::f64, Custom);
 418     setOperationAction(ISD::FEXP, MVT::f64, Custom);
 419     setOperationAction(ISD::FSIN , MVT::f32, Custom);
 420     setOperationAction(ISD::FCOS , MVT::f32, Custom);
 421     setOperationAction(ISD::FPOW , MVT::f32, Custom);
 422     setOperationAction(ISD::FLOG, MVT::f32, Custom);
 423     setOperationAction(ISD::FLOG10, MVT::f32, Custom);
 424     setOperationAction(ISD::FEXP, MVT::f32, Custom);
 425   }
 426
 427   if (Subtarget.hasSPE()) {
 428     setOperationAction(ISD::FMA  , MVT::f64, Expand);
 429     setOperationAction(ISD::FMA  , MVT::f32, Expand);
 430   } else {
 431     setOperationAction(ISD::FMA  , MVT::f64, Legal);
 432     setOperationAction(ISD::FMA  , MVT::f32, Legal);
 433   }
 434
 435   if (Subtarget.hasSPE())
 436     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
 437
 438   setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
 439
 440   // If we're enabling GP optimizations, use hardware square root
 441   if (!Subtarget.hasFSQRT() &&
 442       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
 443         Subtarget.hasFRE()))
 444     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 445
 446   if (!Subtarget.hasFSQRT() &&
 447       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
 448         Subtarget.hasFRES()))
 449     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 450
 451   if (Subtarget.hasFCPSGN()) {
 452     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
 453     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
 454   } else {
 455     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 456     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 457   }
 458
 459   if (Subtarget.hasFPRND()) {
 460     setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
 461     setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
 462     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
 463     setOperationAction(ISD::FROUND, MVT::f64, Legal);
 464
 465     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
 466     setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
 467     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
 468     setOperationAction(ISD::FROUND, MVT::f32, Legal);
 469   }
 470
 471   // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
 472   // instruction xxbrd to speed up scalar BSWAP64.
 473   if (Subtarget.isISA3_1()) {
 474     setOperationAction(ISD::BSWAP, MVT::i32, Legal);
 475     setOperationAction(ISD::BSWAP, MVT::i64, Legal);
 476   } else {
 477     setOperationAction(ISD::BSWAP, MVT::i32, Expand);
 478     setOperationAction(
 479         ISD::BSWAP, MVT::i64,
 480         (Subtarget.hasP9Vector() && Subtarget.isPPC64()) ? Custom : Expand);
 481   }
 482
 483   // CTPOP or CTTZ were introduced in P8/P9 respectively
 484   if (Subtarget.isISA3_0()) {
 485     setOperationAction(ISD::CTTZ , MVT::i32  , Legal);
 486     setOperationAction(ISD::CTTZ , MVT::i64  , Legal);
 487   } else {
 488     setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
 489     setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
 490   }
 491
 492   if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
 493     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
 494     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
 495   } else {
 496     setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
 497     setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
 498   }
 499
 500   // PowerPC does not have ROTR
 501   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
 502   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
 503
 504   if (!Subtarget.useCRBits()) {
 505     // PowerPC does not have Select
 506     setOperationAction(ISD::SELECT, MVT::i32, Expand);
 507     setOperationAction(ISD::SELECT, MVT::i64, Expand);
 508     setOperationAction(ISD::SELECT, MVT::f32, Expand);
 509     setOperationAction(ISD::SELECT, MVT::f64, Expand);
 510   }
 511
 512   // PowerPC wants to turn select_cc of FP into fsel when possible.
 513   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
 514   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
 515
 516   // PowerPC wants to optimize integer setcc a bit
 517   if (!Subtarget.useCRBits())
 518     setOperationAction(ISD::SETCC, MVT::i32, Custom);
 519
 520   if (Subtarget.hasFPU()) {
 521     setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
 522     setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
 523     setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal);
 524
 525     setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
 526     setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
 527     setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal);
 528   }
 529
 530   // PowerPC does not have BRCOND which requires SetCC
 531   if (!Subtarget.useCRBits())
 532     setOperationAction(ISD::BRCOND, MVT::Other, Expand);
 533
 534   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
 535
 536   if (Subtarget.hasSPE()) {
 537     // SPE has built-in conversions
 538     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal);
 539     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal);
 540     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal);
 541     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
 542     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
 543     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
 544
 545     // SPE supports signaling compare of f32/f64.
 546     setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
 547     setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
 548   } else {
 549     // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
 550     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
 551     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 552
 553     // PowerPC does not have [U|S]INT_TO_FP
 554     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand);
 555     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand);
 556     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
 557     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
 558   }
 559
 560   if (Subtarget.hasDirectMove() && isPPC64) {
 561     setOperationAction(ISD::BITCAST, MVT::f32, Legal);
 562     setOperationAction(ISD::BITCAST, MVT::i32, Legal);
 563     setOperationAction(ISD::BITCAST, MVT::i64, Legal);
 564     setOperationAction(ISD::BITCAST, MVT::f64, Legal);
 565     if (TM.Options.UnsafeFPMath) {
 566       setOperationAction(ISD::LRINT, MVT::f64, Legal);
 567       setOperationAction(ISD::LRINT, MVT::f32, Legal);
 568       setOperationAction(ISD::LLRINT, MVT::f64, Legal);
 569       setOperationAction(ISD::LLRINT, MVT::f32, Legal);
 570       setOperationAction(ISD::LROUND, MVT::f64, Legal);
 571       setOperationAction(ISD::LROUND, MVT::f32, Legal);
 572       setOperationAction(ISD::LLROUND, MVT::f64, Legal);
 573       setOperationAction(ISD::LLROUND, MVT::f32, Legal);
 574     }
 575   } else {
 576     setOperationAction(ISD::BITCAST, MVT::f32, Expand);
 577     setOperationAction(ISD::BITCAST, MVT::i32, Expand);
 578     setOperationAction(ISD::BITCAST, MVT::i64, Expand);
 579     setOperationAction(ISD::BITCAST, MVT::f64, Expand);
 580   }
 581
 582   // We cannot sextinreg(i1).  Expand to shifts.
 583   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 584
 585   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
 586   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
 587   // support continuation, user-level threading, and etc.. As a result, no
 588   // other SjLj exception interfaces are implemented and please don't build
 589   // your own exception handling based on them.
 590   // LLVM/Clang supports zero-cost DWARF exception handling.
 591   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
 592   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
 593
 594   // We want to legalize GlobalAddress and ConstantPool nodes into the
 595   // appropriate instructions to materialize the address.
 596   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
 597   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
 598   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
 599   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
 600   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
 601   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
 602   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
 603   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
 604   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
 605   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
 606
 607   // TRAP is legal.
 608   setOperationAction(ISD::TRAP, MVT::Other, Legal);
 609
 610   // TRAMPOLINE is custom lowered.
 611   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
 612   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
 613
 614   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 615   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 616
 617   if (Subtarget.is64BitELFABI()) {
 618     // VAARG always uses double-word chunks, so promote anything smaller.
 619     setOperationAction(ISD::VAARG, MVT::i1, Promote);
 620     AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
 621     setOperationAction(ISD::VAARG, MVT::i8, Promote);
 622     AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
 623     setOperationAction(ISD::VAARG, MVT::i16, Promote);
 624     AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
 625     setOperationAction(ISD::VAARG, MVT::i32, Promote);
 626     AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
 627     setOperationAction(ISD::VAARG, MVT::Other, Expand);
 628   } else if (Subtarget.is32BitELFABI()) {
 629     // VAARG is custom lowered with the 32-bit SVR4 ABI.
 630     setOperationAction(ISD::VAARG, MVT::Other, Custom);
 631     setOperationAction(ISD::VAARG, MVT::i64, Custom);
 632   } else
 633     setOperationAction(ISD::VAARG, MVT::Other, Expand);
 634
 635   // VACOPY is custom lowered with the 32-bit SVR4 ABI.
 636   if (Subtarget.is32BitELFABI())
 637     setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
 638   else
 639     setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 640
 641   // Use the default implementation.
 642   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 643   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 644   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
 645   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
 646   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
 647   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
 648   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
 649   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
 650   setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
 651
 652   // We want to custom lower some of our intrinsics.
 653   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 654   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom);
 655   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom);
 656   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
 657   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom);
 658
 659   // To handle counter-based loop conditions.
 660   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
 661
 662   setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
 663   setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
 664   setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
 665   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
 666
 667   // Comparisons that require checking two conditions.
 668   if (Subtarget.hasSPE()) {
 669     setCondCodeAction(ISD::SETO, MVT::f32, Expand);
 670     setCondCodeAction(ISD::SETO, MVT::f64, Expand);
 671     setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
 672     setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
 673   }
 674   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
 675   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
 676   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
 677   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
 678   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
 679   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
 680   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
 681   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
 682   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
 683   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
 684   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
 685   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
 686
 687   setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
 688   setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
 689
 690   if (Subtarget.has64BitSupport()) {
 691     // They also have instructions for converting between i64 and fp.
 692     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
 693     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand);
 694     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
 695     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
 696     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 697     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
 698     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 699     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
 700     // This is just the low 32 bits of a (signed) fp->i64 conversion.
 701     // We cannot do this with Promote because i64 is not a legal type.
 702     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
 703     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 704
 705     if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
 706       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 707       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
 708     }
 709   } else {
 710     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
 711     if (Subtarget.hasSPE()) {
 712       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal);
 713       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
 714     } else {
 715       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand);
 716       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
 717     }
 718   }
 719
 720   // With the instructions enabled under FPCVT, we can do everything.
 721   if (Subtarget.hasFPCVT()) {
 722     if (Subtarget.has64BitSupport()) {
 723       setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
 724       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
 725       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
 726       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
 727       setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 728       setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
 729       setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 730       setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 731     }
 732
 733     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
 734     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
 735     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
 736     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
 737     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 738     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 739     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 740     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
 741   }
 742
 743   if (Subtarget.use64BitRegs()) {
 744     // 64-bit PowerPC implementations can support i64 types directly
 745     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
 746     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 747     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 748     // 64-bit PowerPC wants to expand i128 shifts itself.
 749     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
 750     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
 751     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
 752   } else {
 753     // 32-bit PowerPC wants to expand i64 shifts itself.
 754     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
 755     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
 756     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
 757   }
 758
 759   // PowerPC has better expansions for funnel shifts than the generic
 760   // TargetLowering::expandFunnelShift.
 761   if (Subtarget.has64BitSupport()) {
 762     setOperationAction(ISD::FSHL, MVT::i64, Custom);
 763     setOperationAction(ISD::FSHR, MVT::i64, Custom);
 764   }
 765   setOperationAction(ISD::FSHL, MVT::i32, Custom);
 766   setOperationAction(ISD::FSHR, MVT::i32, Custom);
 767
 768   if (Subtarget.hasVSX()) {
 769     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
 770     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
 771     setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
 772     setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
 773   }
 774
 775   if (Subtarget.hasAltivec()) {
 776     for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
 777       setOperationAction(ISD::SADDSAT, VT, Legal);
 778       setOperationAction(ISD::SSUBSAT, VT, Legal);
 779       setOperationAction(ISD::UADDSAT, VT, Legal);
 780       setOperationAction(ISD::USUBSAT, VT, Legal);
 781     }
 782     // First set operation action for all vector types to expand. Then we
 783     // will selectively turn on ones that can be effectively codegen'd.
 784     for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
 785       // add/sub are legal for all supported vector VT's.
 786       setOperationAction(ISD::ADD, VT, Legal);
 787       setOperationAction(ISD::SUB, VT, Legal);
 788
 789       // For v2i64, these are only valid with P8Vector. This is corrected after
 790       // the loop.
 791       if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
 792         setOperationAction(ISD::SMAX, VT, Legal);
 793         setOperationAction(ISD::SMIN, VT, Legal);
 794         setOperationAction(ISD::UMAX, VT, Legal);
 795         setOperationAction(ISD::UMIN, VT, Legal);
 796       }
 797       else {
 798         setOperationAction(ISD::SMAX, VT, Expand);
 799         setOperationAction(ISD::SMIN, VT, Expand);
 800         setOperationAction(ISD::UMAX, VT, Expand);
 801         setOperationAction(ISD::UMIN, VT, Expand);
 802       }
 803
 804       if (Subtarget.hasVSX()) {
 805         setOperationAction(ISD::FMAXNUM, VT, Legal);
 806         setOperationAction(ISD::FMINNUM, VT, Legal);
 807       }
 808
 809       // Vector instructions introduced in P8
 810       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
 811         setOperationAction(ISD::CTPOP, VT, Legal);
 812         setOperationAction(ISD::CTLZ, VT, Legal);
 813       }
 814       else {
 815         setOperationAction(ISD::CTPOP, VT, Expand);
 816         setOperationAction(ISD::CTLZ, VT, Expand);
 817       }
 818
 819       // Vector instructions introduced in P9
 820       if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
 821         setOperationAction(ISD::CTTZ, VT, Legal);
 822       else
 823         setOperationAction(ISD::CTTZ, VT, Expand);
 824
 825       // We promote all shuffles to v16i8.
 826       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
 827       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
 828
 829       // We promote all non-typed operations to v4i32.
 830       setOperationAction(ISD::AND   , VT, Promote);
 831       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
 832       setOperationAction(ISD::OR    , VT, Promote);
 833       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
 834       setOperationAction(ISD::XOR   , VT, Promote);
 835       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
 836       setOperationAction(ISD::LOAD  , VT, Promote);
 837       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
 838       setOperationAction(ISD::SELECT, VT, Promote);
 839       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
 840       setOperationAction(ISD::VSELECT, VT, Legal);
 841       setOperationAction(ISD::SELECT_CC, VT, Promote);
 842       AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
 843       setOperationAction(ISD::STORE, VT, Promote);
 844       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
 845
 846       // No other operations are legal.
 847       setOperationAction(ISD::MUL , VT, Expand);
 848       setOperationAction(ISD::SDIV, VT, Expand);
 849       setOperationAction(ISD::SREM, VT, Expand);
 850       setOperationAction(ISD::UDIV, VT, Expand);
 851       setOperationAction(ISD::UREM, VT, Expand);
 852       setOperationAction(ISD::FDIV, VT, Expand);
 853       setOperationAction(ISD::FREM, VT, Expand);
 854       setOperationAction(ISD::FNEG, VT, Expand);
 855       setOperationAction(ISD::FSQRT, VT, Expand);
 856       setOperationAction(ISD::FLOG, VT, Expand);
 857       setOperationAction(ISD::FLOG10, VT, Expand);
 858       setOperationAction(ISD::FLOG2, VT, Expand);
 859       setOperationAction(ISD::FEXP, VT, Expand);
 860       setOperationAction(ISD::FEXP2, VT, Expand);
 861       setOperationAction(ISD::FSIN, VT, Expand);
 862       setOperationAction(ISD::FCOS, VT, Expand);
 863       setOperationAction(ISD::FABS, VT, Expand);
 864       setOperationAction(ISD::FFLOOR, VT, Expand);
 865       setOperationAction(ISD::FCEIL,  VT, Expand);
 866       setOperationAction(ISD::FTRUNC, VT, Expand);
 867       setOperationAction(ISD::FRINT,  VT, Expand);
 868       setOperationAction(ISD::FLDEXP, VT, Expand);
 869       setOperationAction(ISD::FNEARBYINT, VT, Expand);
 870       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
 871       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
 872       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
 873       setOperationAction(ISD::MULHU, VT, Expand);
 874       setOperationAction(ISD::MULHS, VT, Expand);
 875       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 876       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 877       setOperationAction(ISD::UDIVREM, VT, Expand);
 878       setOperationAction(ISD::SDIVREM, VT, Expand);
 879       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
 880       setOperationAction(ISD::FPOW, VT, Expand);
 881       setOperationAction(ISD::BSWAP, VT, Expand);
 882       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
 883       setOperationAction(ISD::ROTL, VT, Expand);
 884       setOperationAction(ISD::ROTR, VT, Expand);
 885
 886       for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
 887         setTruncStoreAction(VT, InnerVT, Expand);
 888         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
 889         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
 890         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
 891       }
 892     }
 893     setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
 894     if (!Subtarget.hasP8Vector()) {
 895       setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
 896       setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
 897       setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
 898       setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
 899     }
 900
 901     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
 902     // with merges, splats, etc.
 903     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
 904
 905     // Vector truncates to sub-word integer that fit in an Altivec/VSX register
 906     // are cheap, so handle them before they get expanded to scalar.
 907     setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
 908     setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
 909     setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
 910     setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
 911     setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
 912
 913     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
 914     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
 915     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
 916     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
 917     setOperationAction(ISD::SELECT, MVT::v4i32,
 918                        Subtarget.useCRBits() ? Legal : Expand);
 919     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
 920     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
 921     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
 922     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
 923     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
 924     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
 925     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
 926     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
 927     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
 928     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
 929     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
 930     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
 931     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
 932
 933     // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
 934     setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
 935     // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
 936     if (Subtarget.hasAltivec())
 937       for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
 938         setOperationAction(ISD::ROTL, VT, Legal);
 939     // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
 940     if (Subtarget.hasP8Altivec())
 941       setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
 942
 943     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
 944     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
 945     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
 946     addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
 947
 948     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
 949     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
 950
 951     if (Subtarget.hasVSX()) {
 952       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 953       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
 954       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
 955     }
 956
 957     if (Subtarget.hasP8Altivec())
 958       setOperationAction(ISD::MUL, MVT::v4i32, Legal);
 959     else
 960       setOperationAction(ISD::MUL, MVT::v4i32, Custom);
 961
 962     if (Subtarget.isISA3_1()) {
 963       setOperationAction(ISD::MUL, MVT::v2i64, Legal);
 964       setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
 965       setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
 966       setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
 967       setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
 968       setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
 969       setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
 970       setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
 971       setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
 972       setOperationAction(ISD::UREM, MVT::v2i64, Legal);
 973       setOperationAction(ISD::SREM, MVT::v2i64, Legal);
 974       setOperationAction(ISD::UREM, MVT::v4i32, Legal);
 975       setOperationAction(ISD::SREM, MVT::v4i32, Legal);
 976       setOperationAction(ISD::UREM, MVT::v1i128, Legal);
 977       setOperationAction(ISD::SREM, MVT::v1i128, Legal);
 978       setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
 979       setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
 980       setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
 981     }
 982
 983     setOperationAction(ISD::MUL, MVT::v8i16, Legal);
 984     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 985
 986     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 987     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
 988
 989     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
 990     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
 991     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
 992     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
 993
 994     // Altivec does not contain unordered floating-point compare instructions
 995     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
 996     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
 997     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
 998     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
 999
1000     if (Subtarget.hasVSX()) {
1001       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
1002       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
1003       if (Subtarget.hasP8Vector()) {
1004         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
1005         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
1006       }
1007       if (Subtarget.hasDirectMove() && isPPC64) {
1008         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
1009         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
1010         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
1011         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
1012         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
1013         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
1014         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
1015         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
1016       }
1017       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
1018
1019       // The nearbyint variants are not allowed to raise the inexact exception
1020       // so we can only code-gen them with unsafe math.
1021       if (TM.Options.UnsafeFPMath) {
1022         setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
1023         setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
1024       }
1025
1026       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1027       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1028       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1029       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
1030       setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1031       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1032       setOperationAction(ISD::FROUND, MVT::f64, Legal);
1033       setOperationAction(ISD::FRINT, MVT::f64, Legal);
1034
1035       setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
1036       setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1037       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1038       setOperationAction(ISD::FROUND, MVT::f32, Legal);
1039       setOperationAction(ISD::FRINT, MVT::f32, Legal);
1040
1041       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1042       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1043
1044       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1045       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1046
1047       // Share the Altivec comparison restrictions.
1048       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1049       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1050       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
1051       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1052
1053       setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1054       setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1055
1056       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
1057
1058       if (Subtarget.hasP8Vector())
1059         addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1060
1061       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1062
1063       addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1064       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1065       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1066
1067       if (Subtarget.hasP8Altivec()) {
1068         setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1069         setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1070         setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1071
1072         // 128 bit shifts can be accomplished via 3 instructions for SHL and
1073         // SRL, but not for SRA because of the instructions available:
1074         // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1075         // doing
1076         setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1077         setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1078         setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1079
1080         setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1081       }
1082       else {
1083         setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1084         setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1085         setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1086
1087         setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1088
1089         // VSX v2i64 only supports non-arithmetic operations.
1090         setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1091         setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1092       }
1093
1094       if (Subtarget.isISA3_1())
1095         setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1096       else
1097         setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1098
1099       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1100       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1101       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
1102       AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1103
1104       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
1105
1106       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
1107       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
1108       setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
1109       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
1110       setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
1111       setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
1112       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
1113       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
1114
1115       // Custom handling for partial vectors of integers converted to
1116       // floating point. We already have optimal handling for v2i32 through
1117       // the DAG combine, so those aren't necessary.
1118       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom);
1119       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom);
1120       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom);
1121       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom);
1122       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom);
1123       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom);
1124       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom);
1125       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom);
1126       setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
1127       setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
1128       setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
1129       setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1130       setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
1131       setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
1132       setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
1133       setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
1134
1135       setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1136       setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1137       setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1138       setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1139       setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
1140       setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
1141
1142       setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
1143       setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
1144
1145       // Handle constrained floating-point operations of vector.
1146       // The predictor is `hasVSX` because altivec instruction has
1147       // no exception but VSX vector instruction has.
1148       setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
1149       setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
1150       setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
1151       setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
1152       setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
1153       setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
1154       setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
1155       setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
1156       setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
1157       setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
1158       setOperationAction(ISD::STRICT_FCEIL,  MVT::v4f32, Legal);
1159       setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
1160       setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
1161
1162       setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1163       setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1164       setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1165       setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1166       setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
1167       setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1168       setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
1169       setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
1170       setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
1171       setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
1172       setOperationAction(ISD::STRICT_FCEIL,  MVT::v2f64, Legal);
1173       setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
1174       setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
1175
1176       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1177       addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1178
1179       for (MVT FPT : MVT::fp_valuetypes())
1180         setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1181
1182       // Expand the SELECT to SELECT_CC
1183       setOperationAction(ISD::SELECT, MVT::f128, Expand);
1184
1185       setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1186       setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1187
1188       // No implementation for these ops for PowerPC.
1189       setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
1190       setOperationAction(ISD::FSIN, MVT::f128, Expand);
1191       setOperationAction(ISD::FCOS, MVT::f128, Expand);
1192       setOperationAction(ISD::FPOW, MVT::f128, Expand);
1193       setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1194       setOperationAction(ISD::FREM, MVT::f128, Expand);
1195     }
1196
1197     if (Subtarget.hasP8Altivec()) {
1198       addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1199       addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1200     }
1201
1202     if (Subtarget.hasP9Vector()) {
1203       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1204       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1205
1206       // Test data class instructions store results in CR bits.
1207       if (Subtarget.useCRBits()) {
1208         setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom);
1209         setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom);
1210         setOperationAction(ISD::IS_FPCLASS, MVT::f128, Custom);
1211       }
1212
1213       // 128 bit shifts can be accomplished via 3 instructions for SHL and
1214       // SRL, but not for SRA because of the instructions available:
1215       // VS{RL} and VS{RL}O.
1216       setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1217       setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1218       setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1219
1220       setOperationAction(ISD::FADD, MVT::f128, Legal);
1221       setOperationAction(ISD::FSUB, MVT::f128, Legal);
1222       setOperationAction(ISD::FDIV, MVT::f128, Legal);
1223       setOperationAction(ISD::FMUL, MVT::f128, Legal);
1224       setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
1225
1226       setOperationAction(ISD::FMA, MVT::f128, Legal);
1227       setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
1228       setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
1229       setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
1230       setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
1231       setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
1232       setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
1233
1234       setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
1235       setOperationAction(ISD::FRINT, MVT::f128, Legal);
1236       setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
1237       setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1238       setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
1239       setOperationAction(ISD::FROUND, MVT::f128, Legal);
1240
1241       setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
1242       setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
1243       setOperationAction(ISD::BITCAST, MVT::i128, Custom);
1244
1245       // Handle constrained floating-point operations of fp128
1246       setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal);
1247       setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal);
1248       setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal);
1249       setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal);
1250       setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal);
1251       setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal);
1252       setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal);
1253       setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
1254       setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
1255       setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal);
1256       setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal);
1257       setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal);
1258       setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal);
1259       setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal);
1260       setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal);
1261       setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1262       setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1263       setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1264       setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1265       setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1266     } else if (Subtarget.hasVSX()) {
1267       setOperationAction(ISD::LOAD, MVT::f128, Promote);
1268       setOperationAction(ISD::STORE, MVT::f128, Promote);
1269
1270       AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1271       AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1272
1273       // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1274       // fp_to_uint and int_to_fp.
1275       setOperationAction(ISD::FADD, MVT::f128, LibCall);
1276       setOperationAction(ISD::FSUB, MVT::f128, LibCall);
1277
1278       setOperationAction(ISD::FMUL, MVT::f128, Expand);
1279       setOperationAction(ISD::FDIV, MVT::f128, Expand);
1280       setOperationAction(ISD::FNEG, MVT::f128, Expand);
1281       setOperationAction(ISD::FABS, MVT::f128, Expand);
1282       setOperationAction(ISD::FSQRT, MVT::f128, Expand);
1283       setOperationAction(ISD::FMA, MVT::f128, Expand);
1284       setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
1285
1286       // Expand the fp_extend if the target type is fp128.
1287       setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand);
1288       setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Expand);
1289
1290       // Expand the fp_round if the source type is fp128.
1291       for (MVT VT : {MVT::f32, MVT::f64}) {
1292         setOperationAction(ISD::FP_ROUND, VT, Custom);
1293         setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom);
1294       }
1295
1296       setOperationAction(ISD::SETCC, MVT::f128, Custom);
1297       setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
1298       setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom);
1299       setOperationAction(ISD::BR_CC, MVT::f128, Expand);
1300
1301       // Lower following f128 select_cc pattern:
1302       // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1303       setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
1304
1305       // We need to handle f128 SELECT_CC with integer result type.
1306       setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
1307       setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1308     }
1309
1310     if (Subtarget.hasP9Altivec()) {
1311       if (Subtarget.isISA3_1()) {
1312         setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal);
1313         setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Legal);
1314         setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Legal);
1315         setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal);
1316       } else {
1317         setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1318         setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1319       }
1320       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8,  Legal);
1321       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
1322       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
1323       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8,  Legal);
1324       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
1325       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
1326       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
1327
1328       setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1329       setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1330       setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1331       setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1332     }
1333
1334     if (Subtarget.hasP10Vector()) {
1335       setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
1336     }
1337   }
1338
1339   if (Subtarget.pairedVectorMemops()) {
1340     addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1341     setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1342     setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1343   }
1344   if (Subtarget.hasMMA()) {
1345     if (Subtarget.isISAFuture())
1346       addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1347     else
1348       addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1349     setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1350     setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1351     setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);
1352   }
1353
1354   if (Subtarget.has64BitSupport())
1355     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1356
1357   if (Subtarget.isISA3_1())
1358     setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1359
1360   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1361
1362   if (!isPPC64) {
1363     setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
1364     setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1365   }
1366
1367   if (shouldInlineQuadwordAtomics()) {
1368     setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
1369     setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
1370     setOperationAction(ISD::INTRINSIC_VOID, MVT::i128, Custom);
1371   }
1372
1373   setBooleanContents(ZeroOrOneBooleanContent);
1374
1375   if (Subtarget.hasAltivec()) {
1376     // Altivec instructions set fields to all zeros or all ones.
1377     setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1378   }
1379
1380   if (shouldInlineQuadwordAtomics())
1381     setMaxAtomicSizeInBitsSupported(128);
1382   else if (isPPC64)
1383     setMaxAtomicSizeInBitsSupported(64);
1384   else
1385     setMaxAtomicSizeInBitsSupported(32);
1386
1387   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1388
1389   // We have target-specific dag combine patterns for the following nodes:
1390   setTargetDAGCombine({ISD::AND, ISD::ADD, ISD::SHL, ISD::SRA, ISD::SRL,
1391                        ISD::MUL, ISD::FMA, ISD::SINT_TO_FP, ISD::BUILD_VECTOR});
1392   if (Subtarget.hasFPCVT())
1393     setTargetDAGCombine(ISD::UINT_TO_FP);
1394   setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
1395   if (Subtarget.useCRBits())
1396     setTargetDAGCombine(ISD::BRCOND);
1397   setTargetDAGCombine({ISD::BSWAP, ISD::INTRINSIC_WO_CHAIN,
1398                        ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID});
1399
1400   setTargetDAGCombine({ISD::SIGN_EXTEND, ISD::ZERO_EXTEND, ISD::ANY_EXTEND});
1401
1402   setTargetDAGCombine({ISD::TRUNCATE, ISD::VECTOR_SHUFFLE});
1403
1404   if (Subtarget.useCRBits()) {
1405     setTargetDAGCombine({ISD::TRUNCATE, ISD::SETCC, ISD::SELECT_CC});
1406   }
1407
1408   setLibcallName(RTLIB::LOG_F128, "logf128");
1409   setLibcallName(RTLIB::LOG2_F128, "log2f128");
1410   setLibcallName(RTLIB::LOG10_F128, "log10f128");
1411   setLibcallName(RTLIB::EXP_F128, "expf128");
1412   setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1413   setLibcallName(RTLIB::SIN_F128, "sinf128");
1414   setLibcallName(RTLIB::COS_F128, "cosf128");
1415   setLibcallName(RTLIB::SINCOS_F128, "sincosf128");
1416   setLibcallName(RTLIB::POW_F128, "powf128");
1417   setLibcallName(RTLIB::FMIN_F128, "fminf128");
1418   setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1419   setLibcallName(RTLIB::REM_F128, "fmodf128");
1420   setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1421   setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1422   setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1423   setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1424   setLibcallName(RTLIB::ROUND_F128, "roundf128");
1425   setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1426   setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1427   setLibcallName(RTLIB::RINT_F128, "rintf128");
1428   setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1429   setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1430   setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1431   setLibcallName(RTLIB::FMA_F128, "fmaf128");
1432   setLibcallName(RTLIB::FREXP_F128, "frexpf128");
1433
1434   if (Subtarget.isAIXABI()) {
1435     setLibcallName(RTLIB::MEMCPY, isPPC64 ? "___memmove64" : "___memmove");
1436     setLibcallName(RTLIB::MEMMOVE, isPPC64 ? "___memmove64" : "___memmove");
1437     setLibcallName(RTLIB::MEMSET, isPPC64 ? "___memset64" : "___memset");
1438     setLibcallName(RTLIB::BZERO, isPPC64 ? "___bzero64" : "___bzero");
1439   }
1440
1441   // With 32 condition bits, we don't need to sink (and duplicate) compares
1442   // aggressively in CodeGenPrep.
1443   if (Subtarget.useCRBits()) {
1444     setHasMultipleConditionRegisters();
1445     setJumpIsExpensive();
1446   }
1447
1448   // TODO: The default entry number is set to 64. This stops most jump table
1449   // generation on PPC. But it is good for current PPC HWs because the indirect
1450   // branch instruction mtctr to the jump table may lead to bad branch predict.
1451   // Re-evaluate this value on future HWs that can do better with mtctr.
1452   setMinimumJumpTableEntries(PPCMinimumJumpTableEntries);
1453
1454   setMinFunctionAlignment(Align(4));
1455
1456   switch (Subtarget.getCPUDirective()) {
1457   default: break;
1458   case PPC::DIR_970:
1459   case PPC::DIR_A2:
1460   case PPC::DIR_E500:
1461   case PPC::DIR_E500mc:
1462   case PPC::DIR_E5500:
1463   case PPC::DIR_PWR4:
1464   case PPC::DIR_PWR5:
1465   case PPC::DIR_PWR5X:
1466   case PPC::DIR_PWR6:
1467   case PPC::DIR_PWR6X:
1468   case PPC::DIR_PWR7:
1469   case PPC::DIR_PWR8:
1470   case PPC::DIR_PWR9:
1471   case PPC::DIR_PWR10:
1472   case PPC::DIR_PWR11:
1473   case PPC::DIR_PWR_FUTURE:
1474     setPrefLoopAlignment(Align(16));
1475     setPrefFunctionAlignment(Align(16));
1476     break;
1477   }
1478
1479   if (Subtarget.enableMachineScheduler())
1480     setSchedulingPreference(Sched::Source);
1481   else
1482     setSchedulingPreference(Sched::Hybrid);
1483
1484   computeRegisterProperties(STI.getRegisterInfo());
1485
1486   // The Freescale cores do better with aggressive inlining of memcpy and
1487   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1488   if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1489       Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1490     MaxStoresPerMemset = 32;
1491     MaxStoresPerMemsetOptSize = 16;
1492     MaxStoresPerMemcpy = 32;
1493     MaxStoresPerMemcpyOptSize = 8;
1494     MaxStoresPerMemmove = 32;
1495     MaxStoresPerMemmoveOptSize = 8;
1496   } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1497     // The A2 also benefits from (very) aggressive inlining of memcpy and
1498     // friends. The overhead of a the function call, even when warm, can be
1499     // over one hundred cycles.
1500     MaxStoresPerMemset = 128;
1501     MaxStoresPerMemcpy = 128;
1502     MaxStoresPerMemmove = 128;
1503     MaxLoadsPerMemcmp = 128;
1504   } else {
1505     MaxLoadsPerMemcmp = 8;
1506     MaxLoadsPerMemcmpOptSize = 4;
1507   }
1508
1509   IsStrictFPEnabled = true;
1510
1511   // Let the subtarget (CPU) decide if a predictable select is more expensive
1512   // than the corresponding branch. This information is used in CGP to decide
1513   // when to convert selects into branches.
1514   PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1515
1516   GatherAllAliasesMaxDepth = PPCGatherAllAliasesMaxDepth;
1517 }
1518
1519 // *********************************** NOTE ************************************
1520 // For selecting load and store instructions, the addressing modes are defined
1521 // as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1522 // patterns to match the load the store instructions.
1523 //
1524 // The TD definitions for the addressing modes correspond to their respective
1525 // Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1526 // on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1527 // address mode flags of a particular node. Afterwards, the computed address
1528 // flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1529 // addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1530 // accordingly, based on the preferred addressing mode.
1531 //
1532 // Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1533 // MemOpFlags contains all the possible flags that can be used to compute the
1534 // optimal addressing mode for load and store instructions.
1535 // AddrMode contains all the possible load and store addressing modes available
1536 // on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1537 //
1538 // When adding new load and store instructions, it is possible that new address
1539 // flags may need to be added into MemOpFlags, and a new addressing mode will
1540 // need to be added to AddrMode. An entry of the new addressing mode (consisting
1541 // of the minimal and main distinguishing address flags for the new load/store
1542 // instructions) will need to be added into initializeAddrModeMap() below.
1543 // Finally, when adding new addressing modes, the getAddrModeForFlags() will
1544 // need to be updated to account for selecting the optimal addressing mode.
1545 // *****************************************************************************
1546 /// Initialize the map that relates the different addressing modes of the load
1547 /// and store instructions to a set of flags. This ensures the load/store
1548 /// instruction is correctly matched during instruction selection.
1549 void PPCTargetLowering::initializeAddrModeMap() {
1550   AddrModesMap[PPC::AM_DForm] = {
1551       // LWZ, STW
1552       PPC::MOF_ZExt | PPC::MOF_RPlusSImm16 | PPC::MOF_WordInt,
1553       PPC::MOF_ZExt | PPC::MOF_RPlusLo | PPC::MOF_WordInt,
1554       PPC::MOF_ZExt | PPC::MOF_NotAddNorCst | PPC::MOF_WordInt,
1555       PPC::MOF_ZExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_WordInt,
1556       // LBZ, LHZ, STB, STH
1557       PPC::MOF_ZExt | PPC::MOF_RPlusSImm16 | PPC::MOF_SubWordInt,
1558       PPC::MOF_ZExt | PPC::MOF_RPlusLo | PPC::MOF_SubWordInt,
1559       PPC::MOF_ZExt | PPC::MOF_NotAddNorCst | PPC::MOF_SubWordInt,
1560       PPC::MOF_ZExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubWordInt,
1561       // LHA
1562       PPC::MOF_SExt | PPC::MOF_RPlusSImm16 | PPC::MOF_SubWordInt,
1563       PPC::MOF_SExt | PPC::MOF_RPlusLo | PPC::MOF_SubWordInt,
1564       PPC::MOF_SExt | PPC::MOF_NotAddNorCst | PPC::MOF_SubWordInt,
1565       PPC::MOF_SExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubWordInt,
1566       // LFS, LFD, STFS, STFD
1567       PPC::MOF_RPlusSImm16 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1568       PPC::MOF_RPlusLo | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1569       PPC::MOF_NotAddNorCst | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1570       PPC::MOF_AddrIsSImm32 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1571   };
1572   AddrModesMap[PPC::AM_DSForm] = {
1573       // LWA
1574       PPC::MOF_SExt | PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_WordInt,
1575       PPC::MOF_SExt | PPC::MOF_NotAddNorCst | PPC::MOF_WordInt,
1576       PPC::MOF_SExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_WordInt,
1577       // LD, STD
1578       PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_DoubleWordInt,
1579       PPC::MOF_NotAddNorCst | PPC::MOF_DoubleWordInt,
1580       PPC::MOF_AddrIsSImm32 | PPC::MOF_DoubleWordInt,
1581       // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1582       PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
1583       PPC::MOF_NotAddNorCst | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
1584       PPC::MOF_AddrIsSImm32 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
1585   };
1586   AddrModesMap[PPC::AM_DQForm] = {
1587       // LXV, STXV
1588       PPC::MOF_RPlusSImm16Mult16 | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
1589       PPC::MOF_NotAddNorCst | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
1590       PPC::MOF_AddrIsSImm32 | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
1591   };
1592   AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1593                                        PPC::MOF_SubtargetP10};
1594   // TODO: Add mapping for quadword load/store.
1595 }
1596
1597 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1598 /// the desired ByVal argument alignment.
1599 static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1600   if (MaxAlign == MaxMaxAlign)
1601     return;
1602   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1603     if (MaxMaxAlign >= 32 &&
1604         VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1605       MaxAlign = Align(32);
1606     else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1607              MaxAlign < 16)
1608       MaxAlign = Align(16);
1609   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1610     Align EltAlign;
1611     getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1612     if (EltAlign > MaxAlign)
1613       MaxAlign = EltAlign;
1614   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1615     for (auto *EltTy : STy->elements()) {
1616       Align EltAlign;
1617       getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1618       if (EltAlign > MaxAlign)
1619         MaxAlign = EltAlign;
1620       if (MaxAlign == MaxMaxAlign)
1621         break;
1622     }
1623   }
1624 }
1625
1626 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1627 /// function arguments in the caller parameter area.
1628 uint64_t PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1629                                                   const DataLayout &DL) const {
1630   // 16byte and wider vectors are passed on 16byte boundary.
1631   // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1632   Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1633   if (Subtarget.hasAltivec())
1634     getMaxByValAlign(Ty, Alignment, Align(16));
1635   return Alignment.value();
1636 }
1637
1638 bool PPCTargetLowering::useSoftFloat() const {
1639   return Subtarget.useSoftFloat();
1640 }
1641
1642 bool PPCTargetLowering::hasSPE() const {
1643   return Subtarget.hasSPE();
1644 }
1645
1646 bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1647   return VT.isScalarInteger();
1648 }
1649
1650 bool PPCTargetLowering::shallExtractConstSplatVectorElementToStore(
1651     Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1652   if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1653     return false;
1654
1655   if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1656     if (VTy->getScalarType()->isIntegerTy()) {
1657       // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1658       if (ElemSizeInBits == 32) {
1659         Index = Subtarget.isLittleEndian() ? 2 : 1;
1660         return true;
1661       }
1662       if (ElemSizeInBits == 64) {
1663         Index = Subtarget.isLittleEndian() ? 1 : 0;
1664         return true;
1665       }
1666     }
1667   }
1668   return false;
1669 }
1670
1671 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1672   switch ((PPCISD::NodeType)Opcode) {
1673   case PPCISD::FIRST_NUMBER:    break;
1674   case PPCISD::FSEL:            return "PPCISD::FSEL";
1675   case PPCISD::XSMAXC:          return "PPCISD::XSMAXC";
1676   case PPCISD::XSMINC:          return "PPCISD::XSMINC";
1677   case PPCISD::FCFID:           return "PPCISD::FCFID";
1678   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
1679   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
1680   case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
1681   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
1682   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
1683   case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
1684   case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
1685   case PPCISD::FRE:             return "PPCISD::FRE";
1686   case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
1687   case PPCISD::FTSQRT:
1688     return "PPCISD::FTSQRT";
1689   case PPCISD::FSQRT:
1690     return "PPCISD::FSQRT";
1691   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
1692   case PPCISD::VPERM:           return "PPCISD::VPERM";
1693   case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
1694   case PPCISD::XXSPLTI_SP_TO_DP:
1695     return "PPCISD::XXSPLTI_SP_TO_DP";
1696   case PPCISD::XXSPLTI32DX:
1697     return "PPCISD::XXSPLTI32DX";
1698   case PPCISD::VECINSERT:       return "PPCISD::VECINSERT";
1699   case PPCISD::XXPERMDI:        return "PPCISD::XXPERMDI";
1700   case PPCISD::XXPERM:
1701     return "PPCISD::XXPERM";
1702   case PPCISD::VECSHL:          return "PPCISD::VECSHL";
1703   case PPCISD::CMPB:            return "PPCISD::CMPB";
1704   case PPCISD::Hi:              return "PPCISD::Hi";
1705   case PPCISD::Lo:              return "PPCISD::Lo";
1706   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
1707   case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1708   case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1709   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
1710   case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
1711   case PPCISD::PROBED_ALLOCA:   return "PPCISD::PROBED_ALLOCA";
1712   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
1713   case PPCISD::SRL:             return "PPCISD::SRL";
1714   case PPCISD::SRA:             return "PPCISD::SRA";
1715   case PPCISD::SHL:             return "PPCISD::SHL";
1716   case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
1717   case PPCISD::CALL:            return "PPCISD::CALL";
1718   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
1719   case PPCISD::CALL_NOTOC:      return "PPCISD::CALL_NOTOC";
1720   case PPCISD::CALL_RM:
1721     return "PPCISD::CALL_RM";
1722   case PPCISD::CALL_NOP_RM:
1723     return "PPCISD::CALL_NOP_RM";
1724   case PPCISD::CALL_NOTOC_RM:
1725     return "PPCISD::CALL_NOTOC_RM";
1726   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
1727   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
1728   case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
1729   case PPCISD::BCTRL_RM:
1730     return "PPCISD::BCTRL_RM";
1731   case PPCISD::BCTRL_LOAD_TOC_RM:
1732     return "PPCISD::BCTRL_LOAD_TOC_RM";
1733   case PPCISD::RET_GLUE:        return "PPCISD::RET_GLUE";
1734   case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
1735   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
1736   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1737   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
1738   case PPCISD::MFVSR:           return "PPCISD::MFVSR";
1739   case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
1740   case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
1741   case PPCISD::SINT_VEC_TO_FP:  return "PPCISD::SINT_VEC_TO_FP";
1742   case PPCISD::UINT_VEC_TO_FP:  return "PPCISD::UINT_VEC_TO_FP";
1743   case PPCISD::SCALAR_TO_VECTOR_PERMUTED:
1744     return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1745   case PPCISD::ANDI_rec_1_EQ_BIT:
1746     return "PPCISD::ANDI_rec_1_EQ_BIT";
1747   case PPCISD::ANDI_rec_1_GT_BIT:
1748     return "PPCISD::ANDI_rec_1_GT_BIT";
1749   case PPCISD::VCMP:            return "PPCISD::VCMP";
1750   case PPCISD::VCMP_rec:        return "PPCISD::VCMP_rec";
1751   case PPCISD::LBRX:            return "PPCISD::LBRX";
1752   case PPCISD::STBRX:           return "PPCISD::STBRX";
1753   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
1754   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
1755   case PPCISD::LXSIZX:          return "PPCISD::LXSIZX";
1756   case PPCISD::STXSIX:          return "PPCISD::STXSIX";
1757   case PPCISD::VEXTS:           return "PPCISD::VEXTS";
1758   case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
1759   case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
1760   case PPCISD::LOAD_VEC_BE:     return "PPCISD::LOAD_VEC_BE";
1761   case PPCISD::STORE_VEC_BE:    return "PPCISD::STORE_VEC_BE";
1762   case PPCISD::ST_VSR_SCAL_INT:
1763                                 return "PPCISD::ST_VSR_SCAL_INT";
1764   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
1765   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
1766   case PPCISD::BDZ:             return "PPCISD::BDZ";
1767   case PPCISD::MFFS:            return "PPCISD::MFFS";
1768   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
1769   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
1770   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
1771   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
1772   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
1773   case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
1774   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1775   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
1776   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
1777   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
1778   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
1779   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
1780   case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
1781   case PPCISD::GET_TPOINTER:    return "PPCISD::GET_TPOINTER";
1782   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1783   case PPCISD::TLSGD_AIX:       return "PPCISD::TLSGD_AIX";
1784   case PPCISD::TLSLD_AIX:       return "PPCISD::TLSLD_AIX";
1785   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
1786   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
1787   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
1788   case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1789   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1790   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
1791   case PPCISD::PADDI_DTPREL:
1792     return "PPCISD::PADDI_DTPREL";
1793   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
1794   case PPCISD::SC:              return "PPCISD::SC";
1795   case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
1796   case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
1797   case PPCISD::RFEBB:           return "PPCISD::RFEBB";
1798   case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
1799   case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
1800   case PPCISD::BUILD_FP128:     return "PPCISD::BUILD_FP128";
1801   case PPCISD::BUILD_SPE64:     return "PPCISD::BUILD_SPE64";
1802   case PPCISD::EXTRACT_SPE:     return "PPCISD::EXTRACT_SPE";
1803   case PPCISD::EXTSWSLI:        return "PPCISD::EXTSWSLI";
1804   case PPCISD::LD_VSX_LH:       return "PPCISD::LD_VSX_LH";
1805   case PPCISD::FP_EXTEND_HALF:  return "PPCISD::FP_EXTEND_HALF";
1806   case PPCISD::MAT_PCREL_ADDR:  return "PPCISD::MAT_PCREL_ADDR";
1807   case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:
1808     return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1809   case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR:
1810     return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1811   case PPCISD::ACC_BUILD:       return "PPCISD::ACC_BUILD";
1812   case PPCISD::PAIR_BUILD:      return "PPCISD::PAIR_BUILD";
1813   case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1814   case PPCISD::XXMFACC:         return "PPCISD::XXMFACC";
1815   case PPCISD::LD_SPLAT:        return "PPCISD::LD_SPLAT";
1816   case PPCISD::ZEXT_LD_SPLAT:   return "PPCISD::ZEXT_LD_SPLAT";
1817   case PPCISD::SEXT_LD_SPLAT:   return "PPCISD::SEXT_LD_SPLAT";
1818   case PPCISD::FNMSUB:          return "PPCISD::FNMSUB";
1819   case PPCISD::STRICT_FADDRTZ:
1820     return "PPCISD::STRICT_FADDRTZ";
1821   case PPCISD::STRICT_FCTIDZ:
1822     return "PPCISD::STRICT_FCTIDZ";
1823   case PPCISD::STRICT_FCTIWZ:
1824     return "PPCISD::STRICT_FCTIWZ";
1825   case PPCISD::STRICT_FCTIDUZ:
1826     return "PPCISD::STRICT_FCTIDUZ";
1827   case PPCISD::STRICT_FCTIWUZ:
1828     return "PPCISD::STRICT_FCTIWUZ";
1829   case PPCISD::STRICT_FCFID:
1830     return "PPCISD::STRICT_FCFID";
1831   case PPCISD::STRICT_FCFIDU:
1832     return "PPCISD::STRICT_FCFIDU";
1833   case PPCISD::STRICT_FCFIDS:
1834     return "PPCISD::STRICT_FCFIDS";
1835   case PPCISD::STRICT_FCFIDUS:
1836     return "PPCISD::STRICT_FCFIDUS";
1837   case PPCISD::LXVRZX:          return "PPCISD::LXVRZX";
1838   case PPCISD::STORE_COND:
1839     return "PPCISD::STORE_COND";
1840   }
1841   return nullptr;
1842 }
1843
1844 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1845                                           EVT VT) const {
1846   if (!VT.isVector())
1847     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1848
1849   return VT.changeVectorElementTypeToInteger();
1850 }
1851
1852 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1853   assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1854   return true;
1855 }
1856
1857 //===----------------------------------------------------------------------===//
1858 // Node matching predicates, for use by the tblgen matching code.
1859 //===----------------------------------------------------------------------===//
1860
1861 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1862 static bool isFloatingPointZero(SDValue Op) {
1863   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1864     return CFP->getValueAPF().isZero();
1865   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1866     // Maybe this has already been legalized into the constant pool?
1867     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1868       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1869         return CFP->getValueAPF().isZero();
1870   }
1871   return false;
1872 }
1873
1874 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
1875 /// true if Op is undef or if it matches the specified value.
1876 static bool isConstantOrUndef(int Op, int Val) {
1877   return Op < 0 || Op == Val;
1878 }
1879
1880 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1881 /// VPKUHUM instruction.
1882 /// The ShuffleKind distinguishes between big-endian operations with
1883 /// two different inputs (0), either-endian operations with two identical
1884 /// inputs (1), and little-endian operations with two different inputs (2).
1885 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1886 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1887                                SelectionDAG &DAG) {
1888   bool IsLE = DAG.getDataLayout().isLittleEndian();
1889   if (ShuffleKind == 0) {
1890     if (IsLE)
1891       return false;
1892     for (unsigned i = 0; i != 16; ++i)
1893       if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1894         return false;
1895   } else if (ShuffleKind == 2) {
1896     if (!IsLE)
1897       return false;
1898     for (unsigned i = 0; i != 16; ++i)
1899       if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1900         return false;
1901   } else if (ShuffleKind == 1) {
1902     unsigned j = IsLE ? 0 : 1;
1903     for (unsigned i = 0; i != 8; ++i)
1904       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
1905           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
1906         return false;
1907   }
1908   return true;
1909 }
1910
1911 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1912 /// VPKUWUM instruction.
1913 /// The ShuffleKind distinguishes between big-endian operations with
1914 /// two different inputs (0), either-endian operations with two identical
1915 /// inputs (1), and little-endian operations with two different inputs (2).
1916 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1917 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1918                                SelectionDAG &DAG) {
1919   bool IsLE = DAG.getDataLayout().isLittleEndian();
1920   if (ShuffleKind == 0) {
1921     if (IsLE)
1922       return false;
1923     for (unsigned i = 0; i != 16; i += 2)
1924       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
1925           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
1926         return false;
1927   } else if (ShuffleKind == 2) {
1928     if (!IsLE)
1929       return false;
1930     for (unsigned i = 0; i != 16; i += 2)
1931       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1932           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
1933         return false;
1934   } else if (ShuffleKind == 1) {
1935     unsigned j = IsLE ? 0 : 2;
1936     for (unsigned i = 0; i != 8; i += 2)
1937       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1938           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1939           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1940           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
1941         return false;
1942   }
1943   return true;
1944 }
1945
1946 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1947 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1948 /// current subtarget.
1949 ///
1950 /// The ShuffleKind distinguishes between big-endian operations with
1951 /// two different inputs (0), either-endian operations with two identical
1952 /// inputs (1), and little-endian operations with two different inputs (2).
1953 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1954 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1955                                SelectionDAG &DAG) {
1956   const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1957   if (!Subtarget.hasP8Vector())
1958     return false;
1959
1960   bool IsLE = DAG.getDataLayout().isLittleEndian();
1961   if (ShuffleKind == 0) {
1962     if (IsLE)
1963       return false;
1964     for (unsigned i = 0; i != 16; i += 4)
1965       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
1966           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+5) ||
1967           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+6) ||
1968           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+7))
1969         return false;
1970   } else if (ShuffleKind == 2) {
1971     if (!IsLE)
1972       return false;
1973     for (unsigned i = 0; i != 16; i += 4)
1974       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1975           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1) ||
1976           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+2) ||
1977           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+3))
1978         return false;
1979   } else if (ShuffleKind == 1) {
1980     unsigned j = IsLE ? 0 : 4;
1981     for (unsigned i = 0; i != 8; i += 4)
1982       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1983           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1984           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+j+2) ||
1985           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+j+3) ||
1986           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1987           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1) ||
1988           !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1989           !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1990         return false;
1991   }
1992   return true;
1993 }
1994
1995 /// isVMerge - Common function, used to match vmrg* shuffles.
1996 ///
1997 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1998                      unsigned LHSStart, unsigned RHSStart) {
1999   if (N->getValueType(0) != MVT::v16i8)
2000     return false;
2001   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
2002          "Unsupported merge size!");
2003
2004   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
2005     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
2006       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
2007                              LHSStart+j+i*UnitSize) ||
2008           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
2009                              RHSStart+j+i*UnitSize))
2010         return false;
2011     }
2012   return true;
2013 }
2014
2015 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
2016 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
2017 /// The ShuffleKind distinguishes between big-endian merges with two
2018 /// different inputs (0), either-endian merges with two identical inputs (1),
2019 /// and little-endian merges with two different inputs (2).  For the latter,
2020 /// the input operands are swapped (see PPCInstrAltivec.td).
2021 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
2022                              unsigned ShuffleKind, SelectionDAG &DAG) {
2023   if (DAG.getDataLayout().isLittleEndian()) {
2024     if (ShuffleKind == 1) // unary
2025       return isVMerge(N, UnitSize, 0, 0);
2026     else if (ShuffleKind == 2) // swapped
2027       return isVMerge(N, UnitSize, 0, 16);
2028     else
2029       return false;
2030   } else {
2031     if (ShuffleKind == 1) // unary
2032       return isVMerge(N, UnitSize, 8, 8);
2033     else if (ShuffleKind == 0) // normal
2034       return isVMerge(N, UnitSize, 8, 24);
2035     else
2036       return false;
2037   }
2038 }
2039
2040 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
2041 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
2042 /// The ShuffleKind distinguishes between big-endian merges with two
2043 /// different inputs (0), either-endian merges with two identical inputs (1),
2044 /// and little-endian merges with two different inputs (2).  For the latter,
2045 /// the input operands are swapped (see PPCInstrAltivec.td).
2046 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
2047                              unsigned ShuffleKind, SelectionDAG &DAG) {
2048   if (DAG.getDataLayout().isLittleEndian()) {
2049     if (ShuffleKind == 1) // unary
2050       return isVMerge(N, UnitSize, 8, 8);
2051     else if (ShuffleKind == 2) // swapped
2052       return isVMerge(N, UnitSize, 8, 24);
2053     else
2054       return false;
2055   } else {
2056     if (ShuffleKind == 1) // unary
2057       return isVMerge(N, UnitSize, 0, 0);
2058     else if (ShuffleKind == 0) // normal
2059       return isVMerge(N, UnitSize, 0, 16);
2060     else
2061       return false;
2062   }
2063 }
2064
2065 /**
2066  * Common function used to match vmrgew and vmrgow shuffles
2067  *
2068  * The indexOffset determines whether to look for even or odd words in
2069  * the shuffle mask. This is based on the of the endianness of the target
2070  * machine.
2071  *   - Little Endian:
2072  *     - Use offset of 0 to check for odd elements
2073  *     - Use offset of 4 to check for even elements
2074  *   - Big Endian:
2075  *     - Use offset of 0 to check for even elements
2076  *     - Use offset of 4 to check for odd elements
2077  * A detailed description of the vector element ordering for little endian and
2078  * big endian can be found at
2079  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2080  * Targeting your applications - what little endian and big endian IBM XL C/C++
2081  * compiler differences mean to you
2082  *
2083  * The mask to the shuffle vector instruction specifies the indices of the
2084  * elements from the two input vectors to place in the result. The elements are
2085  * numbered in array-access order, starting with the first vector. These vectors
2086  * are always of type v16i8, thus each vector will contain 16 elements of size
2087  * 8. More info on the shuffle vector can be found in the
2088  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2089  * Language Reference.
2090  *
2091  * The RHSStartValue indicates whether the same input vectors are used (unary)
2092  * or two different input vectors are used, based on the following:
2093  *   - If the instruction uses the same vector for both inputs, the range of the
2094  *     indices will be 0 to 15. In this case, the RHSStart value passed should
2095  *     be 0.
2096  *   - If the instruction has two different vectors then the range of the
2097  *     indices will be 0 to 31. In this case, the RHSStart value passed should
2098  *     be 16 (indices 0-15 specify elements in the first vector while indices 16
2099  *     to 31 specify elements in the second vector).
2100  *
2101  * \param[in] N The shuffle vector SD Node to analyze
2102  * \param[in] IndexOffset Specifies whether to look for even or odd elements
2103  * \param[in] RHSStartValue Specifies the starting index for the righthand input
2104  * vector to the shuffle_vector instruction
2105  * \return true iff this shuffle vector represents an even or odd word merge
2106  */
2107 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2108                      unsigned RHSStartValue) {
2109   if (N->getValueType(0) != MVT::v16i8)
2110     return false;
2111
2112   for (unsigned i = 0; i < 2; ++i)
2113     for (unsigned j = 0; j < 4; ++j)
2114       if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2115                              i*RHSStartValue+j+IndexOffset) ||
2116           !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2117                              i*RHSStartValue+j+IndexOffset+8))
2118         return false;
2119   return true;
2120 }
2121
2122 /**
2123  * Determine if the specified shuffle mask is suitable for the vmrgew or
2124  * vmrgow instructions.
2125  *
2126  * \param[in] N The shuffle vector SD Node to analyze
2127  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2128  * \param[in] ShuffleKind Identify the type of merge:
2129  *   - 0 = big-endian merge with two different inputs;
2130  *   - 1 = either-endian merge with two identical inputs;
2131  *   - 2 = little-endian merge with two different inputs (inputs are swapped for
2132  *     little-endian merges).
2133  * \param[in] DAG The current SelectionDAG
2134  * \return true iff this shuffle mask
2135  */
2136 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
2137                               unsigned ShuffleKind, SelectionDAG &DAG) {
2138   if (DAG.getDataLayout().isLittleEndian()) {
2139     unsigned indexOffset = CheckEven ? 4 : 0;
2140     if (ShuffleKind == 1) // Unary
2141       return isVMerge(N, indexOffset, 0);
2142     else if (ShuffleKind == 2) // swapped
2143       return isVMerge(N, indexOffset, 16);
2144     else
2145       return false;
2146   }
2147   else {
2148     unsigned indexOffset = CheckEven ? 0 : 4;
2149     if (ShuffleKind == 1) // Unary
2150       return isVMerge(N, indexOffset, 0);
2151     else if (ShuffleKind == 0) // Normal
2152       return isVMerge(N, indexOffset, 16);
2153     else
2154       return false;
2155   }
2156   return false;
2157 }
2158
2159 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2160 /// amount, otherwise return -1.
2161 /// The ShuffleKind distinguishes between big-endian operations with two
2162 /// different inputs (0), either-endian operations with two identical inputs
2163 /// (1), and little-endian operations with two different inputs (2).  For the
2164 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
2165 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2166                              SelectionDAG &DAG) {
2167   if (N->getValueType(0) != MVT::v16i8)
2168     return -1;
2169
2170   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2171
2172   // Find the first non-undef value in the shuffle mask.
2173   unsigned i;
2174   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2175     /*search*/;
2176
2177   if (i == 16) return -1;  // all undef.
2178
2179   // Otherwise, check to see if the rest of the elements are consecutively
2180   // numbered from this value.
2181   unsigned ShiftAmt = SVOp->getMaskElt(i);
2182   if (ShiftAmt < i) return -1;
2183
2184   ShiftAmt -= i;
2185   bool isLE = DAG.getDataLayout().isLittleEndian();
2186
2187   if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2188     // Check the rest of the elements to see if they are consecutive.
2189     for (++i; i != 16; ++i)
2190       if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2191         return -1;
2192   } else if (ShuffleKind == 1) {
2193     // Check the rest of the elements to see if they are consecutive.
2194     for (++i; i != 16; ++i)
2195       if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2196         return -1;
2197   } else
2198     return -1;
2199
2200   if (isLE)
2201     ShiftAmt = 16 - ShiftAmt;
2202
2203   return ShiftAmt;
2204 }
2205
2206 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2207 /// specifies a splat of a single element that is suitable for input to
2208 /// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2209 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
2210   EVT VT = N->getValueType(0);
2211   if (VT == MVT::v2i64 || VT == MVT::v2f64)
2212     return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2213
2214   assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2215          EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2216
2217   // The consecutive indices need to specify an element, not part of two
2218   // different elements.  So abandon ship early if this isn't the case.
2219   if (N->getMaskElt(0) % EltSize != 0)
2220     return false;
2221
2222   // This is a splat operation if each element of the permute is the same, and
2223   // if the value doesn't reference the second vector.
2224   unsigned ElementBase = N->getMaskElt(0);
2225
2226   // FIXME: Handle UNDEF elements too!
2227   if (ElementBase >= 16)
2228     return false;
2229
2230   // Check that the indices are consecutive, in the case of a multi-byte element
2231   // splatted with a v16i8 mask.
2232   for (unsigned i = 1; i != EltSize; ++i)
2233     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2234       return false;
2235
2236   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2237     if (N->getMaskElt(i) < 0) continue;
2238     for (unsigned j = 0; j != EltSize; ++j)
2239       if (N->getMaskElt(i+j) != N->getMaskElt(j))
2240         return false;
2241   }
2242   return true;
2243 }
2244
2245 /// Check that the mask is shuffling N byte elements. Within each N byte
2246 /// element of the mask, the indices could be either in increasing or
2247 /// decreasing order as long as they are consecutive.
2248 /// \param[in] N the shuffle vector SD Node to analyze
2249 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2250 /// Word/DoubleWord/QuadWord).
2251 /// \param[in] StepLen the delta indices number among the N byte element, if
2252 /// the mask is in increasing/decreasing order then it is 1/-1.
2253 /// \return true iff the mask is shuffling N byte elements.
2254 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2255                                    int StepLen) {
2256   assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2257          "Unexpected element width.");
2258   assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2259
2260   unsigned NumOfElem = 16 / Width;
2261   unsigned MaskVal[16]; //  Width is never greater than 16
2262   for (unsigned i = 0; i < NumOfElem; ++i) {
2263     MaskVal[0] = N->getMaskElt(i * Width);
2264     if ((StepLen == 1) && (MaskVal[0] % Width)) {
2265       return false;
2266     } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2267       return false;
2268     }
2269
2270     for (unsigned int j = 1; j < Width; ++j) {
2271       MaskVal[j] = N->getMaskElt(i * Width + j);
2272       if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2273         return false;
2274       }
2275     }
2276   }
2277
2278   return true;
2279 }
2280
2281 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2282                           unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2283   if (!isNByteElemShuffleMask(N, 4, 1))
2284     return false;
2285
2286   // Now we look at mask elements 0,4,8,12
2287   unsigned M0 = N->getMaskElt(0) / 4;
2288   unsigned M1 = N->getMaskElt(4) / 4;
2289   unsigned M2 = N->getMaskElt(8) / 4;
2290   unsigned M3 = N->getMaskElt(12) / 4;
2291   unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2292   unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2293
2294   // Below, let H and L be arbitrary elements of the shuffle mask
2295   // where H is in the range [4,7] and L is in the range [0,3].
2296   // H, 1, 2, 3 or L, 5, 6, 7
2297   if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2298       (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2299     ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2300     InsertAtByte = IsLE ? 12 : 0;
2301     Swap = M0 < 4;
2302     return true;
2303   }
2304   // 0, H, 2, 3 or 4, L, 6, 7
2305   if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2306       (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2307     ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2308     InsertAtByte = IsLE ? 8 : 4;
2309     Swap = M1 < 4;
2310     return true;
2311   }
2312   // 0, 1, H, 3 or 4, 5, L, 7
2313   if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2314       (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2315     ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2316     InsertAtByte = IsLE ? 4 : 8;
2317     Swap = M2 < 4;
2318     return true;
2319   }
2320   // 0, 1, 2, H or 4, 5, 6, L
2321   if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2322       (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2323     ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2324     InsertAtByte = IsLE ? 0 : 12;
2325     Swap = M3 < 4;
2326     return true;
2327   }
2328
2329   // If both vector operands for the shuffle are the same vector, the mask will
2330   // contain only elements from the first one and the second one will be undef.
2331   if (N->getOperand(1).isUndef()) {
2332     ShiftElts = 0;
2333     Swap = true;
2334     unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2335     if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2336       InsertAtByte = IsLE ? 12 : 0;
2337       return true;
2338     }
2339     if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2340       InsertAtByte = IsLE ? 8 : 4;
2341       return true;
2342     }
2343     if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2344       InsertAtByte = IsLE ? 4 : 8;
2345       return true;
2346     }
2347     if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2348       InsertAtByte = IsLE ? 0 : 12;
2349       return true;
2350     }
2351   }
2352
2353   return false;
2354 }
2355
2356 bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2357                                bool &Swap, bool IsLE) {
2358   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2359   // Ensure each byte index of the word is consecutive.
2360   if (!isNByteElemShuffleMask(N, 4, 1))
2361     return false;
2362
2363   // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2364   unsigned M0 = N->getMaskElt(0) / 4;
2365   unsigned M1 = N->getMaskElt(4) / 4;
2366   unsigned M2 = N->getMaskElt(8) / 4;
2367   unsigned M3 = N->getMaskElt(12) / 4;
2368
2369   // If both vector operands for the shuffle are the same vector, the mask will
2370   // contain only elements from the first one and the second one will be undef.
2371   if (N->getOperand(1).isUndef()) {
2372     assert(M0 < 4 && "Indexing into an undef vector?");
2373     if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2374       return false;
2375
2376     ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2377     Swap = false;
2378     return true;
2379   }
2380
2381   // Ensure each word index of the ShuffleVector Mask is consecutive.
2382   if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2383     return false;
2384
2385   if (IsLE) {
2386     if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2387       // Input vectors don't need to be swapped if the leading element
2388       // of the result is one of the 3 left elements of the second vector
2389       // (or if there is no shift to be done at all).
2390       Swap = false;
2391       ShiftElts = (8 - M0) % 8;
2392     } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2393       // Input vectors need to be swapped if the leading element
2394       // of the result is one of the 3 left elements of the first vector
2395       // (or if we're shifting by 4 - thereby simply swapping the vectors).
2396       Swap = true;
2397       ShiftElts = (4 - M0) % 4;
2398     }
2399
2400     return true;
2401   } else {                                          // BE
2402     if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2403       // Input vectors don't need to be swapped if the leading element
2404       // of the result is one of the 4 elements of the first vector.
2405       Swap = false;
2406       ShiftElts = M0;
2407     } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2408       // Input vectors need to be swapped if the leading element
2409       // of the result is one of the 4 elements of the right vector.
2410       Swap = true;
2411       ShiftElts = M0 - 4;
2412     }
2413
2414     return true;
2415   }
2416 }
2417
2418 bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
2419   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2420
2421   if (!isNByteElemShuffleMask(N, Width, -1))
2422     return false;
2423
2424   for (int i = 0; i < 16; i += Width)
2425     if (N->getMaskElt(i) != i + Width - 1)
2426       return false;
2427
2428   return true;
2429 }
2430
2431 bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
2432   return isXXBRShuffleMaskHelper(N, 2);
2433 }
2434
2435 bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
2436   return isXXBRShuffleMaskHelper(N, 4);
2437 }
2438
2439 bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2440   return isXXBRShuffleMaskHelper(N, 8);
2441 }
2442
2443 bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2444   return isXXBRShuffleMaskHelper(N, 16);
2445 }
2446
2447 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2448 /// if the inputs to the instruction should be swapped and set \p DM to the
2449 /// value for the immediate.
2450 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2451 /// AND element 0 of the result comes from the first input (LE) or second input
2452 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2453 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2454 /// mask.
2455 bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
2456                                bool &Swap, bool IsLE) {
2457   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2458
2459   // Ensure each byte index of the double word is consecutive.
2460   if (!isNByteElemShuffleMask(N, 8, 1))
2461     return false;
2462
2463   unsigned M0 = N->getMaskElt(0) / 8;
2464   unsigned M1 = N->getMaskElt(8) / 8;
2465   assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2466
2467   // If both vector operands for the shuffle are the same vector, the mask will
2468   // contain only elements from the first one and the second one will be undef.
2469   if (N->getOperand(1).isUndef()) {
2470     if ((M0 | M1) < 2) {
2471       DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2472       Swap = false;
2473       return true;
2474     } else
2475       return false;
2476   }
2477
2478   if (IsLE) {
2479     if (M0 > 1 && M1 < 2) {
2480       Swap = false;
2481     } else if (M0 < 2 && M1 > 1) {
2482       M0 = (M0 + 2) % 4;
2483       M1 = (M1 + 2) % 4;
2484       Swap = true;
2485     } else
2486       return false;
2487
2488     // Note: if control flow comes here that means Swap is already set above
2489     DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2490     return true;
2491   } else { // BE
2492     if (M0 < 2 && M1 > 1) {
2493       Swap = false;
2494     } else if (M0 > 1 && M1 < 2) {
2495       M0 = (M0 + 2) % 4;
2496       M1 = (M1 + 2) % 4;
2497       Swap = true;
2498     } else
2499       return false;
2500
2501     // Note: if control flow comes here that means Swap is already set above
2502     DM = (M0 << 1) + (M1 & 1);
2503     return true;
2504   }
2505 }
2506
2507
2508 /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2509 /// appropriate for PPC mnemonics (which have a big endian bias - namely
2510 /// elements are counted from the left of the vector register).
2511 unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2512                                          SelectionDAG &DAG) {
2513   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2514   assert(isSplatShuffleMask(SVOp, EltSize));
2515   EVT VT = SVOp->getValueType(0);
2516
2517   if (VT == MVT::v2i64 || VT == MVT::v2f64)
2518     return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2519                                                 : SVOp->getMaskElt(0);
2520
2521   if (DAG.getDataLayout().isLittleEndian())
2522     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2523   else
2524     return SVOp->getMaskElt(0) / EltSize;
2525 }
2526
2527 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2528 /// by using a vspltis[bhw] instruction of the specified element size, return
2529 /// the constant being splatted.  The ByteSize field indicates the number of
2530 /// bytes of each element [124] -> [bhw].
2531 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2532   SDValue OpVal;
2533
2534   // If ByteSize of the splat is bigger than the element size of the
2535   // build_vector, then we have a case where we are checking for a splat where
2536   // multiple elements of the buildvector are folded together into a single
2537   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2538   unsigned EltSize = 16/N->getNumOperands();
2539   if (EltSize < ByteSize) {
2540     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
2541     SDValue UniquedVals[4];
2542     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2543
2544     // See if all of the elements in the buildvector agree across.
2545     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2546       if (N->getOperand(i).isUndef()) continue;
2547       // If the element isn't a constant, bail fully out.
2548       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2549
2550       if (!UniquedVals[i&(Multiple-1)].getNode())
2551         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2552       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2553         return SDValue();  // no match.
2554     }
2555
2556     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2557     // either constant or undef values that are identical for each chunk.  See
2558     // if these chunks can form into a larger vspltis*.
2559
2560     // Check to see if all of the leading entries are either 0 or -1.  If
2561     // neither, then this won't fit into the immediate field.
2562     bool LeadingZero = true;
2563     bool LeadingOnes = true;
2564     for (unsigned i = 0; i != Multiple-1; ++i) {
2565       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
2566
2567       LeadingZero &= isNullConstant(UniquedVals[i]);
2568       LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2569     }
2570     // Finally, check the least significant entry.
2571     if (LeadingZero) {
2572       if (!UniquedVals[Multiple-1].getNode())
2573         return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
2574       int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2575       if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
2576         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2577     }
2578     if (LeadingOnes) {
2579       if (!UniquedVals[Multiple-1].getNode())
2580         return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2581       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2582       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
2583         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2584     }
2585
2586     return SDValue();
2587   }
2588
2589   // Check to see if this buildvec has a single non-undef value in its elements.
2590   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2591     if (N->getOperand(i).isUndef()) continue;
2592     if (!OpVal.getNode())
2593       OpVal = N->getOperand(i);
2594     else if (OpVal != N->getOperand(i))
2595       return SDValue();
2596   }
2597
2598   if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
2599
2600   unsigned ValSizeInBytes = EltSize;
2601   uint64_t Value = 0;
2602   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2603     Value = CN->getZExtValue();
2604   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2605     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2606     Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2607   }
2608
2609   // If the splat value is larger than the element value, then we can never do
2610   // this splat.  The only case that we could fit the replicated bits into our
2611   // immediate field for would be zero, and we prefer to use vxor for it.
2612   if (ValSizeInBytes < ByteSize) return SDValue();
2613
2614   // If the element value is larger than the splat value, check if it consists
2615   // of a repeated bit pattern of size ByteSize.
2616   if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2617     return SDValue();
2618
2619   // Properly sign extend the value.
2620   int MaskVal = SignExtend32(Value, ByteSize * 8);
2621
2622   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2623   if (MaskVal == 0) return SDValue();
2624
2625   // Finally, if this value fits in a 5 bit sext field, return it
2626   if (SignExtend32<5>(MaskVal) == MaskVal)
2627     return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2628   return SDValue();
2629 }
2630
2631 //===----------------------------------------------------------------------===//
2632 //  Addressing Mode Selection
2633 //===----------------------------------------------------------------------===//
2634
2635 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2636 /// or 64-bit immediate, and if the value can be accurately represented as a
2637 /// sign extension from a 16-bit value.  If so, this returns true and the
2638 /// immediate.
2639 bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2640   if (!isa<ConstantSDNode>(N))
2641     return false;
2642
2643   Imm = (int16_t)N->getAsZExtVal();
2644   if (N->getValueType(0) == MVT::i32)
2645     return Imm == (int32_t)N->getAsZExtVal();
2646   else
2647     return Imm == (int64_t)N->getAsZExtVal();
2648 }
2649 bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2650   return isIntS16Immediate(Op.getNode(), Imm);
2651 }
2652
2653 /// Used when computing address flags for selecting loads and stores.
2654 /// If we have an OR, check if the LHS and RHS are provably disjoint.
2655 /// An OR of two provably disjoint values is equivalent to an ADD.
2656 /// Most PPC load/store instructions compute the effective address as a sum,
2657 /// so doing this conversion is useful.
2658 static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2659   if (N.getOpcode() != ISD::OR)
2660     return false;
2661   KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2662   if (!LHSKnown.Zero.getBoolValue())
2663     return false;
2664   KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2665   return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2666 }
2667
2668 /// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2669 /// be represented as an indexed [r+r] operation.
2670 bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2671                                                SDValue &Index,
2672                                                SelectionDAG &DAG) const {
2673   for (SDNode *U : N->uses()) {
2674     if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2675       if (Memop->getMemoryVT() == MVT::f64) {
2676           Base = N.getOperand(0);
2677           Index = N.getOperand(1);
2678           return true;
2679       }
2680     }
2681   }
2682   return false;
2683 }
2684
2685 /// isIntS34Immediate - This method tests if value of node given can be
2686 /// accurately represented as a sign extension from a 34-bit value.  If so,
2687 /// this returns true and the immediate.
2688 bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2689   if (!isa<ConstantSDNode>(N))
2690     return false;
2691
2692   Imm = (int64_t)N->getAsZExtVal();
2693   return isInt<34>(Imm);
2694 }
2695 bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) {
2696   return isIntS34Immediate(Op.getNode(), Imm);
2697 }
2698
2699 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2700 /// can be represented as an indexed [r+r] operation.  Returns false if it
2701 /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2702 /// non-zero and N can be represented by a base register plus a signed 16-bit
2703 /// displacement, make a more precise judgement by checking (displacement % \p
2704 /// EncodingAlignment).
2705 bool PPCTargetLowering::SelectAddressRegReg(
2706     SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2707     MaybeAlign EncodingAlignment) const {
2708   // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2709   // a [pc+imm].
2710   if (SelectAddressPCRel(N, Base))
2711     return false;
2712
2713   int16_t Imm = 0;
2714   if (N.getOpcode() == ISD::ADD) {
2715     // Is there any SPE load/store (f64), which can't handle 16bit offset?
2716     // SPE load/store can only handle 8-bit offsets.
2717     if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2718         return true;
2719     if (isIntS16Immediate(N.getOperand(1), Imm) &&
2720         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2721       return false; // r+i
2722     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2723       return false;    // r+i
2724
2725     Base = N.getOperand(0);
2726     Index = N.getOperand(1);
2727     return true;
2728   } else if (N.getOpcode() == ISD::OR) {
2729     if (isIntS16Immediate(N.getOperand(1), Imm) &&
2730         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2731       return false; // r+i can fold it if we can.
2732
2733     // If this is an or of disjoint bitfields, we can codegen this as an add
2734     // (for better address arithmetic) if the LHS and RHS of the OR are provably
2735     // disjoint.
2736     KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2737
2738     if (LHSKnown.Zero.getBoolValue()) {
2739       KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2740       // If all of the bits are known zero on the LHS or RHS, the add won't
2741       // carry.
2742       if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2743         Base = N.getOperand(0);
2744         Index = N.getOperand(1);
2745         return true;
2746       }
2747     }
2748   }
2749
2750   return false;
2751 }
2752
2753 // If we happen to be doing an i64 load or store into a stack slot that has
2754 // less than a 4-byte alignment, then the frame-index elimination may need to
2755 // use an indexed load or store instruction (because the offset may not be a
2756 // multiple of 4). The extra register needed to hold the offset comes from the
2757 // register scavenger, and it is possible that the scavenger will need to use
2758 // an emergency spill slot. As a result, we need to make sure that a spill slot
2759 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2760 // stack slot.
2761 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2762   // FIXME: This does not handle the LWA case.
2763   if (VT != MVT::i64)
2764     return;
2765
2766   // NOTE: We'll exclude negative FIs here, which come from argument
2767   // lowering, because there are no known test cases triggering this problem
2768   // using packed structures (or similar). We can remove this exclusion if
2769   // we find such a test case. The reason why this is so test-case driven is
2770   // because this entire 'fixup' is only to prevent crashes (from the
2771   // register scavenger) on not-really-valid inputs. For example, if we have:
2772   //   %a = alloca i1
2773   //   %b = bitcast i1* %a to i64*
2774   //   store i64* a, i64 b
2775   // then the store should really be marked as 'align 1', but is not. If it
2776   // were marked as 'align 1' then the indexed form would have been
2777   // instruction-selected initially, and the problem this 'fixup' is preventing
2778   // won't happen regardless.
2779   if (FrameIdx < 0)
2780     return;
2781
2782   MachineFunction &MF = DAG.getMachineFunction();
2783   MachineFrameInfo &MFI = MF.getFrameInfo();
2784
2785   if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2786     return;
2787
2788   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2789   FuncInfo->setHasNonRISpills();
2790 }
2791
2792 /// Returns true if the address N can be represented by a base register plus
2793 /// a signed 16-bit displacement [r+imm], and if it is not better
2794 /// represented as reg+reg.  If \p EncodingAlignment is non-zero, only accept
2795 /// displacements that are multiples of that value.
2796 bool PPCTargetLowering::SelectAddressRegImm(
2797     SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2798     MaybeAlign EncodingAlignment) const {
2799   // FIXME dl should come from parent load or store, not from address
2800   SDLoc dl(N);
2801
2802   // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2803   // a [pc+imm].
2804   if (SelectAddressPCRel(N, Base))
2805     return false;
2806
2807   // If this can be more profitably realized as r+r, fail.
2808   if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2809     return false;
2810
2811   if (N.getOpcode() == ISD::ADD) {
2812     int16_t imm = 0;
2813     if (isIntS16Immediate(N.getOperand(1), imm) &&
2814         (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2815       Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2816       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2817         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2818         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2819       } else {
2820         Base = N.getOperand(0);
2821       }
2822       return true; // [r+i]
2823     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2824       // Match LOAD (ADD (X, Lo(G))).
2825       assert(!N.getOperand(1).getConstantOperandVal(1) &&
2826              "Cannot handle constant offsets yet!");
2827       Disp = N.getOperand(1).getOperand(0);  // The global address.
2828       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
2829              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
2830              Disp.getOpcode() == ISD::TargetConstantPool ||
2831              Disp.getOpcode() == ISD::TargetJumpTable);
2832       Base = N.getOperand(0);
2833       return true;  // [&g+r]
2834     }
2835   } else if (N.getOpcode() == ISD::OR) {
2836     int16_t imm = 0;
2837     if (isIntS16Immediate(N.getOperand(1), imm) &&
2838         (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2839       // If this is an or of disjoint bitfields, we can codegen this as an add
2840       // (for better address arithmetic) if the LHS and RHS of the OR are
2841       // provably disjoint.
2842       KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2843
2844       if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2845         // If all of the bits are known zero on the LHS or RHS, the add won't
2846         // carry.
2847         if (FrameIndexSDNode *FI =
2848               dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2849           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2850           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2851         } else {
2852           Base = N.getOperand(0);
2853         }
2854         Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2855         return true;
2856       }
2857     }
2858   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2859     // Loading from a constant address.
2860
2861     // If this address fits entirely in a 16-bit sext immediate field, codegen
2862     // this as "d, 0"
2863     int16_t Imm;
2864     if (isIntS16Immediate(CN, Imm) &&
2865         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2866       Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2867       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2868                              CN->getValueType(0));
2869       return true;
2870     }
2871
2872     // Handle 32-bit sext immediates with LIS + addr mode.
2873     if ((CN->getValueType(0) == MVT::i32 ||
2874          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2875         (!EncodingAlignment ||
2876          isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2877       int Addr = (int)CN->getZExtValue();
2878
2879       // Otherwise, break this down into an LIS + disp.
2880       Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2881
2882       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2883                                    MVT::i32);
2884       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2885       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2886       return true;
2887     }
2888   }
2889
2890   Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2891   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2892     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2893     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2894   } else
2895     Base = N;
2896   return true;      // [r+0]
2897 }
2898
2899 /// Similar to the 16-bit case but for instructions that take a 34-bit
2900 /// displacement field (prefixed loads/stores).
2901 bool PPCTargetLowering::SelectAddressRegImm34(SDValue N, SDValue &Disp,
2902                                               SDValue &Base,
2903                                               SelectionDAG &DAG) const {
2904   // Only on 64-bit targets.
2905   if (N.getValueType() != MVT::i64)
2906     return false;
2907
2908   SDLoc dl(N);
2909   int64_t Imm = 0;
2910
2911   if (N.getOpcode() == ISD::ADD) {
2912     if (!isIntS34Immediate(N.getOperand(1), Imm))
2913       return false;
2914     Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2915     if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2916       Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2917     else
2918       Base = N.getOperand(0);
2919     return true;
2920   }
2921
2922   if (N.getOpcode() == ISD::OR) {
2923     if (!isIntS34Immediate(N.getOperand(1), Imm))
2924       return false;
2925     // If this is an or of disjoint bitfields, we can codegen this as an add
2926     // (for better address arithmetic) if the LHS and RHS of the OR are
2927     // provably disjoint.
2928     KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2929     if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2930       return false;
2931     if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2932       Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2933     else
2934       Base = N.getOperand(0);
2935     Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2936     return true;
2937   }
2938
2939   if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2940     Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2941     Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2942     return true;
2943   }
2944
2945   return false;
2946 }
2947
2948 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2949 /// represented as an indexed [r+r] operation.
2950 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2951                                                 SDValue &Index,
2952                                                 SelectionDAG &DAG) const {
2953   // Check to see if we can easily represent this as an [r+r] address.  This
2954   // will fail if it thinks that the address is more profitably represented as
2955   // reg+imm, e.g. where imm = 0.
2956   if (SelectAddressRegReg(N, Base, Index, DAG))
2957     return true;
2958
2959   // If the address is the result of an add, we will utilize the fact that the
2960   // address calculation includes an implicit add.  However, we can reduce
2961   // register pressure if we do not materialize a constant just for use as the
2962   // index register.  We only get rid of the add if it is not an add of a
2963   // value and a 16-bit signed constant and both have a single use.
2964   int16_t imm = 0;
2965   if (N.getOpcode() == ISD::ADD &&
2966       (!isIntS16Immediate(N.getOperand(1), imm) ||
2967        !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2968     Base = N.getOperand(0);
2969     Index = N.getOperand(1);
2970     return true;
2971   }
2972
2973   // Otherwise, do it the hard way, using R0 as the base register.
2974   Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2975                          N.getValueType());
2976   Index = N;
2977   return true;
2978 }
2979
2980 template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2981   Ty *PCRelCand = dyn_cast<Ty>(N);
2982   return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
2983 }
2984
2985 /// Returns true if this address is a PC Relative address.
2986 /// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2987 /// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2988 bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
2989   // This is a materialize PC Relative node. Always select this as PC Relative.
2990   Base = N;
2991   if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2992     return true;
2993   if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2994       isValidPCRelNode<GlobalAddressSDNode>(N) ||
2995       isValidPCRelNode<JumpTableSDNode>(N) ||
2996       isValidPCRelNode<BlockAddressSDNode>(N))
2997     return true;
2998   return false;
2999 }
3000
3001 /// Returns true if we should use a direct load into vector instruction
3002 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
3003 static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
3004
3005   // If there are any other uses other than scalar to vector, then we should
3006   // keep it as a scalar load -> direct move pattern to prevent multiple
3007   // loads.
3008   LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
3009   if (!LD)
3010     return false;
3011
3012   EVT MemVT = LD->getMemoryVT();
3013   if (!MemVT.isSimple())
3014     return false;
3015   switch(MemVT.getSimpleVT().SimpleTy) {
3016   case MVT::i64:
3017     break;
3018   case MVT::i32:
3019     if (!ST.hasP8Vector())
3020       return false;
3021     break;
3022   case MVT::i16:
3023   case MVT::i8:
3024     if (!ST.hasP9Vector())
3025       return false;
3026     break;
3027   default:
3028     return false;
3029   }
3030
3031   SDValue LoadedVal(N, 0);
3032   if (!LoadedVal.hasOneUse())
3033     return false;
3034
3035   for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
3036        UI != UE; ++UI)
3037     if (UI.getUse().get().getResNo() == 0 &&
3038         UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
3039         UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
3040       return false;
3041
3042   return true;
3043 }
3044
3045 /// getPreIndexedAddressParts - returns true by value, base pointer and
3046 /// offset pointer and addressing mode by reference if the node's address
3047 /// can be legally represented as pre-indexed load / store address.
3048 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
3049                                                   SDValue &Offset,
3050                                                   ISD::MemIndexedMode &AM,
3051                                                   SelectionDAG &DAG) const {
3052   if (DisablePPCPreinc) return false;
3053
3054   bool isLoad = true;
3055   SDValue Ptr;
3056   EVT VT;
3057   Align Alignment;
3058   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3059     Ptr = LD->getBasePtr();
3060     VT = LD->getMemoryVT();
3061     Alignment = LD->getAlign();
3062   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
3063     Ptr = ST->getBasePtr();
3064     VT  = ST->getMemoryVT();
3065     Alignment = ST->getAlign();
3066     isLoad = false;
3067   } else
3068     return false;
3069
3070   // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3071   // instructions because we can fold these into a more efficient instruction
3072   // instead, (such as LXSD).
3073   if (isLoad && usePartialVectorLoads(N, Subtarget)) {
3074     return false;
3075   }
3076
3077   // PowerPC doesn't have preinc load/store instructions for vectors
3078   if (VT.isVector())
3079     return false;
3080
3081   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
3082     // Common code will reject creating a pre-inc form if the base pointer
3083     // is a frame index, or if N is a store and the base pointer is either
3084     // the same as or a predecessor of the value being stored.  Check for
3085     // those situations here, and try with swapped Base/Offset instead.
3086     bool Swap = false;
3087
3088     if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
3089       Swap = true;
3090     else if (!isLoad) {
3091       SDValue Val = cast<StoreSDNode>(N)->getValue();
3092       if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
3093         Swap = true;
3094     }
3095
3096     if (Swap)
3097       std::swap(Base, Offset);
3098
3099     AM = ISD::PRE_INC;
3100     return true;
3101   }
3102
3103   // LDU/STU can only handle immediates that are a multiple of 4.
3104   if (VT != MVT::i64) {
3105     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
3106       return false;
3107   } else {
3108     // LDU/STU need an address with at least 4-byte alignment.
3109     if (Alignment < Align(4))
3110       return false;
3111
3112     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3113       return false;
3114   }
3115
3116   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3117     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
3118     // sext i32 to i64 when addr mode is r+i.
3119     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3120         LD->getExtensionType() == ISD::SEXTLOAD &&
3121         isa<ConstantSDNode>(Offset))
3122       return false;
3123   }
3124
3125   AM = ISD::PRE_INC;
3126   return true;
3127 }
3128
3129 //===----------------------------------------------------------------------===//
3130 //  LowerOperation implementation
3131 //===----------------------------------------------------------------------===//
3132
3133 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
3134 /// and LoOpFlags to the target MO flags.
3135 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3136                                unsigned &HiOpFlags, unsigned &LoOpFlags,
3137                                const GlobalValue *GV = nullptr) {
3138   HiOpFlags = PPCII::MO_HA;
3139   LoOpFlags = PPCII::MO_LO;
3140
3141   // Don't use the pic base if not in PIC relocation model.
3142   if (IsPIC) {
3143     HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3144     LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3145   }
3146 }
3147
3148 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3149                              SelectionDAG &DAG) {
3150   SDLoc DL(HiPart);
3151   EVT PtrVT = HiPart.getValueType();
3152   SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3153
3154   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3155   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3156
3157   // With PIC, the first instruction is actually "GR+hi(&G)".
3158   if (isPIC)
3159     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3160                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3161
3162   // Generate non-pic code that has direct accesses to the constant pool.
3163   // The address of the global is just (hi(&g)+lo(&g)).
3164   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3165 }
3166
3167 static void setUsesTOCBasePtr(MachineFunction &MF) {
3168   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3169   FuncInfo->setUsesTOCBasePtr();
3170 }
3171
3172 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
3173   setUsesTOCBasePtr(DAG.getMachineFunction());
3174 }
3175
3176 SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3177                                        SDValue GA) const {
3178   const bool Is64Bit = Subtarget.isPPC64();
3179   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
3180   SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
3181                         : Subtarget.isAIXABI()
3182                               ? DAG.getRegister(PPC::R2, VT)
3183                               : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3184   SDValue Ops[] = { GA, Reg };
3185   return DAG.getMemIntrinsicNode(
3186       PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3187       MachinePointerInfo::getGOT(DAG.getMachineFunction()), std::nullopt,
3188       MachineMemOperand::MOLoad);
3189 }
3190
3191 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3192                                              SelectionDAG &DAG) const {
3193   EVT PtrVT = Op.getValueType();
3194   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3195   const Constant *C = CP->getConstVal();
3196
3197   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3198   // The actual address of the GlobalValue is stored in the TOC.
3199   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3200     if (Subtarget.isUsingPCRelativeCalls()) {
3201       SDLoc DL(CP);
3202       EVT Ty = getPointerTy(DAG.getDataLayout());
3203       SDValue ConstPool = DAG.getTargetConstantPool(
3204           C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3205       return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3206     }
3207     setUsesTOCBasePtr(DAG);
3208     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3209     return getTOCEntry(DAG, SDLoc(CP), GA);
3210   }
3211
3212   unsigned MOHiFlag, MOLoFlag;
3213   bool IsPIC = isPositionIndependent();
3214   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3215
3216   if (IsPIC && Subtarget.isSVR4ABI()) {
3217     SDValue GA =
3218         DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3219     return getTOCEntry(DAG, SDLoc(CP), GA);
3220   }
3221
3222   SDValue CPIHi =
3223       DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3224   SDValue CPILo =
3225       DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3226   return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3227 }
3228
3229 // For 64-bit PowerPC, prefer the more compact relative encodings.
3230 // This trades 32 bits per jump table entry for one or two instructions
3231 // on the jump site.
3232 unsigned PPCTargetLowering::getJumpTableEncoding() const {
3233   if (isJumpTableRelative())
3234     return MachineJumpTableInfo::EK_LabelDifference32;
3235
3236   return TargetLowering::getJumpTableEncoding();
3237 }
3238
3239 bool PPCTargetLowering::isJumpTableRelative() const {
3240   if (UseAbsoluteJumpTables)
3241     return false;
3242   if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3243     return true;
3244   return TargetLowering::isJumpTableRelative();
3245 }
3246
3247 SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
3248                                                     SelectionDAG &DAG) const {
3249   if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3250     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3251
3252   switch (getTargetMachine().getCodeModel()) {
3253   case CodeModel::Small:
3254   case CodeModel::Medium:
3255     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3256   default:
3257     return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3258                        getPointerTy(DAG.getDataLayout()));
3259   }
3260 }
3261
3262 const MCExpr *
3263 PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
3264                                                 unsigned JTI,
3265                                                 MCContext &Ctx) const {
3266   if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3267     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3268
3269   switch (getTargetMachine().getCodeModel()) {
3270   case CodeModel::Small:
3271   case CodeModel::Medium:
3272     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3273   default:
3274     return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3275   }
3276 }
3277
3278 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3279   EVT PtrVT = Op.getValueType();
3280   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3281
3282   // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3283   if (Subtarget.isUsingPCRelativeCalls()) {
3284     SDLoc DL(JT);
3285     EVT Ty = getPointerTy(DAG.getDataLayout());
3286     SDValue GA =
3287         DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3288     SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3289     return MatAddr;
3290   }
3291
3292   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3293   // The actual address of the GlobalValue is stored in the TOC.
3294   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3295     setUsesTOCBasePtr(DAG);
3296     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3297     return getTOCEntry(DAG, SDLoc(JT), GA);
3298   }
3299
3300   unsigned MOHiFlag, MOLoFlag;
3301   bool IsPIC = isPositionIndependent();
3302   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3303
3304   if (IsPIC && Subtarget.isSVR4ABI()) {
3305     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3306                                         PPCII::MO_PIC_FLAG);
3307     return getTOCEntry(DAG, SDLoc(GA), GA);
3308   }
3309
3310   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3311   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3312   return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3313 }
3314
3315 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3316                                              SelectionDAG &DAG) const {
3317   EVT PtrVT = Op.getValueType();
3318   BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3319   const BlockAddress *BA = BASDN->getBlockAddress();
3320
3321   // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3322   if (Subtarget.isUsingPCRelativeCalls()) {
3323     SDLoc DL(BASDN);
3324     EVT Ty = getPointerTy(DAG.getDataLayout());
3325     SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3326                                            PPCII::MO_PCREL_FLAG);
3327     SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3328     return MatAddr;
3329   }
3330
3331   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3332   // The actual BlockAddress is stored in the TOC.
3333   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3334     setUsesTOCBasePtr(DAG);
3335     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3336     return getTOCEntry(DAG, SDLoc(BASDN), GA);
3337   }
3338
3339   // 32-bit position-independent ELF stores the BlockAddress in the .got.
3340   if (Subtarget.is32BitELFABI() && isPositionIndependent())
3341     return getTOCEntry(
3342         DAG, SDLoc(BASDN),
3343         DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3344
3345   unsigned MOHiFlag, MOLoFlag;
3346   bool IsPIC = isPositionIndependent();
3347   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3348   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3349   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3350   return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3351 }
3352
3353 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3354                                               SelectionDAG &DAG) const {
3355   if (Subtarget.isAIXABI())
3356     return LowerGlobalTLSAddressAIX(Op, DAG);
3357
3358   return LowerGlobalTLSAddressLinux(Op, DAG);
3359 }
3360
3361 /// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3362 /// and then apply the update.
3363 static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model,
3364                                          SelectionDAG &DAG,
3365                                          const TargetMachine &TM) {
3366   // Initialize TLS model opt setting lazily:
3367   // (1) Use initial-exec for single TLS var references within current function.
3368   // (2) Use local-dynamic for multiple TLS var references within current
3369   // function.
3370   PPCFunctionInfo *FuncInfo =
3371       DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
3372   if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3373     SmallPtrSet<const GlobalValue *, 8> TLSGV;
3374     // Iterate over all instructions within current function, collect all TLS
3375     // global variables (global variables taken as the first parameter to
3376     // Intrinsic::threadlocal_address).
3377     const Function &Func = DAG.getMachineFunction().getFunction();
3378     for (Function::const_iterator BI = Func.begin(), BE = Func.end(); BI != BE;
3379          ++BI)
3380       for (BasicBlock::const_iterator II = BI->begin(), IE = BI->end();
3381            II != IE; ++II)
3382         if (II->getOpcode() == Instruction::Call)
3383           if (const CallInst *CI = dyn_cast<const CallInst>(&*II))
3384             if (Function *CF = CI->getCalledFunction())
3385               if (CF->isDeclaration() &&
3386                   CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3387                 if (const GlobalValue *GV =
3388                         dyn_cast<GlobalValue>(II->getOperand(0))) {
3389                   TLSModel::Model GVModel = TM.getTLSModel(GV);
3390                   if (GVModel == TLSModel::LocalDynamic)
3391                     TLSGV.insert(GV);
3392                 }
3393
3394     unsigned TLSGVCnt = TLSGV.size();
3395     LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3396     if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3397       FuncInfo->setAIXFuncUseTLSIEForLD();
3398     FuncInfo->setAIXFuncTLSModelOptInitDone();
3399   }
3400
3401   if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3402     LLVM_DEBUG(
3403         dbgs() << DAG.getMachineFunction().getName()
3404                << " function is using the TLS-IE model for TLS-LD access.\n");
3405     Model = TLSModel::InitialExec;
3406   }
3407 }
3408
3409 SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3410                                                     SelectionDAG &DAG) const {
3411   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3412
3413   if (DAG.getTarget().useEmulatedTLS())
3414     report_fatal_error("Emulated TLS is not yet supported on AIX");
3415
3416   SDLoc dl(GA);
3417   const GlobalValue *GV = GA->getGlobal();
3418   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3419   bool Is64Bit = Subtarget.isPPC64();
3420   TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
3421
3422   // Apply update to the TLS model.
3423   if (Subtarget.hasAIXShLibTLSModelOpt())
3424     updateForAIXShLibTLSModelOpt(Model, DAG, getTargetMachine());
3425
3426   bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3427
3428   if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3429     bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3430     bool HasAIXSmallTLSGlobalAttr = false;
3431     SDValue VariableOffsetTGA =
3432         DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3433     SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3434     SDValue TLSReg;
3435
3436     if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3437       if (GVar->hasAttribute("aix-small-tls"))
3438         HasAIXSmallTLSGlobalAttr = true;
3439
3440     if (Is64Bit) {
3441       // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3442       // involves a load of the variable offset (from the TOC), followed by an
3443       // add of the loaded variable offset to R13 (the thread pointer).
3444       // This code sequence looks like:
3445       //    ld reg1,var[TC](2)
3446       //    add reg2, reg1, r13     // r13 contains the thread pointer
3447       TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3448
3449       // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3450       // global variable attribute, produce a faster access sequence for
3451       // local-exec TLS variables where the offset from the TLS base is encoded
3452       // as an immediate operand.
3453       //
3454       // We only utilize the faster local-exec access sequence when the TLS
3455       // variable has a size within the policy limit. We treat types that are
3456       // not sized or are empty as being over the policy size limit.
3457       if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3458           IsTLSLocalExecModel) {
3459         Type *GVType = GV->getValueType();
3460         if (GVType->isSized() && !GVType->isEmptyTy() &&
3461             GV->getDataLayout().getTypeAllocSize(GVType) <=
3462                 AIXSmallTlsPolicySizeLimit)
3463           return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3464       }
3465     } else {
3466       // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3467       // involves loading the variable offset from the TOC, generating a call to
3468       // .__get_tpointer to get the thread pointer (which will be in R3), and
3469       // adding the two together:
3470       //    lwz reg1,var[TC](2)
3471       //    bla .__get_tpointer
3472       //    add reg2, reg1, r3
3473       TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3474
3475       // We do not implement the 32-bit version of the faster access sequence
3476       // for local-exec that is controlled by the -maix-small-local-exec-tls
3477       // option, or the "aix-small-tls" global variable attribute.
3478       if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3479         report_fatal_error("The small-local-exec TLS access sequence is "
3480                            "currently only supported on AIX (64-bit mode).");
3481     }
3482     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3483   }
3484
3485   if (Model == TLSModel::LocalDynamic) {
3486     bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3487
3488     // We do not implement the 32-bit version of the faster access sequence
3489     // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3490     if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3491       report_fatal_error("The small-local-dynamic TLS access sequence is "
3492                          "currently only supported on AIX (64-bit mode).");
3493
3494     // For local-dynamic on AIX, we need to generate one TOC entry for each
3495     // variable offset, and a single module-handle TOC entry for the entire
3496     // file.
3497
3498     SDValue VariableOffsetTGA =
3499         DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3500     SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3501
3502     Module *M = DAG.getMachineFunction().getFunction().getParent();
3503     GlobalVariable *TLSGV =
3504         dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3505             StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3506     TLSGV->setThreadLocalMode(GlobalVariable::LocalDynamicTLSModel);
3507     assert(TLSGV && "Not able to create GV for _$TLSML.");
3508     SDValue ModuleHandleTGA =
3509         DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3510     SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3511     SDValue ModuleHandle =
3512         DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3513
3514     // With the -maix-small-local-dynamic-tls option, produce a faster access
3515     // sequence for local-dynamic TLS variables where the offset from the
3516     // module-handle is encoded as an immediate operand.
3517     //
3518     // We only utilize the faster local-dynamic access sequence when the TLS
3519     // variable has a size within the policy limit. We treat types that are
3520     // not sized or are empty as being over the policy size limit.
3521     if (HasAIXSmallLocalDynamicTLS) {
3522       Type *GVType = GV->getValueType();
3523       if (GVType->isSized() && !GVType->isEmptyTy() &&
3524           GV->getDataLayout().getTypeAllocSize(GVType) <=
3525               AIXSmallTlsPolicySizeLimit)
3526         return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3527                            ModuleHandle);
3528     }
3529
3530     return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3531   }
3532
3533   // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3534   // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3535   // need to generate two TOC entries, one for the variable offset, one for the
3536   // region handle. The global address for the TOC entry of the region handle is
3537   // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3538   // entry of the variable offset is created with MO_TLSGD_FLAG.
3539   SDValue VariableOffsetTGA =
3540       DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3541   SDValue RegionHandleTGA =
3542       DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3543   SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3544   SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3545   return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3546                      RegionHandle);
3547 }
3548
3549 SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3550                                                       SelectionDAG &DAG) const {
3551   // FIXME: TLS addresses currently use medium model code sequences,
3552   // which is the most useful form.  Eventually support for small and
3553   // large models could be added if users need it, at the cost of
3554   // additional complexity.
3555   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3556   if (DAG.getTarget().useEmulatedTLS())
3557     return LowerToTLSEmulatedModel(GA, DAG);
3558
3559   SDLoc dl(GA);
3560   const GlobalValue *GV = GA->getGlobal();
3561   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3562   bool is64bit = Subtarget.isPPC64();
3563   const Module *M = DAG.getMachineFunction().getFunction().getParent();
3564   PICLevel::Level picLevel = M->getPICLevel();
3565
3566   const TargetMachine &TM = getTargetMachine();
3567   TLSModel::Model Model = TM.getTLSModel(GV);
3568
3569   if (Model == TLSModel::LocalExec) {
3570     if (Subtarget.isUsingPCRelativeCalls()) {
3571       SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3572       SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3573                                                PPCII::MO_TPREL_PCREL_FLAG);
3574       SDValue MatAddr =
3575           DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3576       return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3577     }
3578
3579     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3580                                                PPCII::MO_TPREL_HA);
3581     SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3582                                                PPCII::MO_TPREL_LO);
3583     SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3584                              : DAG.getRegister(PPC::R2, MVT::i32);
3585
3586     SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3587     return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3588   }
3589
3590   if (Model == TLSModel::InitialExec) {
3591     bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3592     SDValue TGA = DAG.getTargetGlobalAddress(
3593         GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3594     SDValue TGATLS = DAG.getTargetGlobalAddress(
3595         GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3596     SDValue TPOffset;
3597     if (IsPCRel) {
3598       SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3599       TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3600                              MachinePointerInfo());
3601     } else {
3602       SDValue GOTPtr;
3603       if (is64bit) {
3604         setUsesTOCBasePtr(DAG);
3605         SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3606         GOTPtr =
3607             DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3608       } else {
3609         if (!TM.isPositionIndependent())
3610           GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3611         else if (picLevel == PICLevel::SmallPIC)
3612           GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3613         else
3614           GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3615       }
3616       TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3617     }
3618     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3619   }
3620
3621   if (Model == TLSModel::GeneralDynamic) {
3622     if (Subtarget.isUsingPCRelativeCalls()) {
3623       SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3624                                                PPCII::MO_GOT_TLSGD_PCREL_FLAG);
3625       return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3626     }
3627
3628     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3629     SDValue GOTPtr;
3630     if (is64bit) {
3631       setUsesTOCBasePtr(DAG);
3632       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3633       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3634                                    GOTReg, TGA);
3635     } else {
3636       if (picLevel == PICLevel::SmallPIC)
3637         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3638       else
3639         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3640     }
3641     return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3642                        GOTPtr, TGA, TGA);
3643   }
3644
3645   if (Model == TLSModel::LocalDynamic) {
3646     if (Subtarget.isUsingPCRelativeCalls()) {
3647       SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3648                                                PPCII::MO_GOT_TLSLD_PCREL_FLAG);
3649       SDValue MatPCRel =
3650           DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3651       return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3652     }
3653
3654     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3655     SDValue GOTPtr;
3656     if (is64bit) {
3657       setUsesTOCBasePtr(DAG);
3658       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3659       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3660                            GOTReg, TGA);
3661     } else {
3662       if (picLevel == PICLevel::SmallPIC)
3663         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3664       else
3665         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3666     }
3667     SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3668                                   PtrVT, GOTPtr, TGA, TGA);
3669     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3670                                       PtrVT, TLSAddr, TGA);
3671     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3672   }
3673
3674   llvm_unreachable("Unknown TLS model!");
3675 }
3676
3677 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3678                                               SelectionDAG &DAG) const {
3679   EVT PtrVT = Op.getValueType();
3680   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3681   SDLoc DL(GSDN);
3682   const GlobalValue *GV = GSDN->getGlobal();
3683
3684   // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3685   // The actual address of the GlobalValue is stored in the TOC.
3686   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3687     if (Subtarget.isUsingPCRelativeCalls()) {
3688       EVT Ty = getPointerTy(DAG.getDataLayout());
3689       if (isAccessedAsGotIndirect(Op)) {
3690         SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3691                                                 PPCII::MO_GOT_PCREL_FLAG);
3692         SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3693         SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3694                                    MachinePointerInfo());
3695         return Load;
3696       } else {
3697         SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3698                                                 PPCII::MO_PCREL_FLAG);
3699         return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3700       }
3701     }
3702     setUsesTOCBasePtr(DAG);
3703     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3704     return getTOCEntry(DAG, DL, GA);
3705   }
3706
3707   unsigned MOHiFlag, MOLoFlag;
3708   bool IsPIC = isPositionIndependent();
3709   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3710
3711   if (IsPIC && Subtarget.isSVR4ABI()) {
3712     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3713                                             GSDN->getOffset(),
3714                                             PPCII::MO_PIC_FLAG);
3715     return getTOCEntry(DAG, DL, GA);
3716   }
3717
3718   SDValue GAHi =
3719     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3720   SDValue GALo =
3721     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3722
3723   return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3724 }
3725
3726 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3727   bool IsStrict = Op->isStrictFPOpcode();
3728   ISD::CondCode CC =
3729       cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3730   SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3731   SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3732   SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3733   EVT LHSVT = LHS.getValueType();
3734   SDLoc dl(Op);
3735
3736   // Soften the setcc with libcall if it is fp128.
3737   if (LHSVT == MVT::f128) {
3738     assert(!Subtarget.hasP9Vector() &&
3739            "SETCC for f128 is already legal under Power9!");
3740     softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3741                         Op->getOpcode() == ISD::STRICT_FSETCCS);
3742     if (RHS.getNode())
3743       LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3744                         DAG.getCondCode(CC));
3745     if (IsStrict)
3746       return DAG.getMergeValues({LHS, Chain}, dl);
3747     return LHS;
3748   }
3749
3750   assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3751
3752   if (Op.getValueType() == MVT::v2i64) {
3753     // When the operands themselves are v2i64 values, we need to do something
3754     // special because VSX has no underlying comparison operations for these.
3755     if (LHS.getValueType() == MVT::v2i64) {
3756       // Equality can be handled by casting to the legal type for Altivec
3757       // comparisons, everything else needs to be expanded.
3758       if (CC != ISD::SETEQ && CC != ISD::SETNE)
3759         return SDValue();
3760       SDValue SetCC32 = DAG.getSetCC(
3761           dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3762           DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3763       int ShuffV[] = {1, 0, 3, 2};
3764       SDValue Shuff =
3765           DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3766       return DAG.getBitcast(MVT::v2i64,
3767                             DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3768                                         dl, MVT::v4i32, Shuff, SetCC32));
3769     }
3770
3771     // We handle most of these in the usual way.
3772     return Op;
3773   }
3774
3775   // If we're comparing for equality to zero, expose the fact that this is
3776   // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3777   // fold the new nodes.
3778   if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3779     return V;
3780
3781   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3782     // Leave comparisons against 0 and -1 alone for now, since they're usually
3783     // optimized.  FIXME: revisit this when we can custom lower all setcc
3784     // optimizations.
3785     if (C->isAllOnes() || C->isZero())
3786       return SDValue();
3787   }
3788
3789   // If we have an integer seteq/setne, turn it into a compare against zero
3790   // by xor'ing the rhs with the lhs, which is faster than setting a
3791   // condition register, reading it back out, and masking the correct bit.  The
3792   // normal approach here uses sub to do this instead of xor.  Using xor exposes
3793   // the result to other bit-twiddling opportunities.
3794   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3795     EVT VT = Op.getValueType();
3796     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3797     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3798   }
3799   return SDValue();
3800 }
3801
3802 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3803   SDNode *Node = Op.getNode();
3804   EVT VT = Node->getValueType(0);
3805   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3806   SDValue InChain = Node->getOperand(0);
3807   SDValue VAListPtr = Node->getOperand(1);
3808   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3809   SDLoc dl(Node);
3810
3811   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3812
3813   // gpr_index
3814   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3815                                     VAListPtr, MachinePointerInfo(SV), MVT::i8);
3816   InChain = GprIndex.getValue(1);
3817
3818   if (VT == MVT::i64) {
3819     // Check if GprIndex is even
3820     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3821                                  DAG.getConstant(1, dl, MVT::i32));
3822     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3823                                 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3824     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3825                                           DAG.getConstant(1, dl, MVT::i32));
3826     // Align GprIndex to be even if it isn't
3827     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3828                            GprIndex);
3829   }
3830
3831   // fpr index is 1 byte after gpr
3832   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3833                                DAG.getConstant(1, dl, MVT::i32));
3834
3835   // fpr
3836   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3837                                     FprPtr, MachinePointerInfo(SV), MVT::i8);
3838   InChain = FprIndex.getValue(1);
3839
3840   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3841                                        DAG.getConstant(8, dl, MVT::i32));
3842
3843   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3844                                         DAG.getConstant(4, dl, MVT::i32));
3845
3846   // areas
3847   SDValue OverflowArea =
3848       DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3849   InChain = OverflowArea.getValue(1);
3850
3851   SDValue RegSaveArea =
3852       DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3853   InChain = RegSaveArea.getValue(1);
3854
3855   // select overflow_area if index > 8
3856   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3857                             DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3858
3859   // adjustment constant gpr_index * 4/8
3860   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3861                                     VT.isInteger() ? GprIndex : FprIndex,
3862                                     DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3863                                                     MVT::i32));
3864
3865   // OurReg = RegSaveArea + RegConstant
3866   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3867                                RegConstant);
3868
3869   // Floating types are 32 bytes into RegSaveArea
3870   if (VT.isFloatingPoint())
3871     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3872                          DAG.getConstant(32, dl, MVT::i32));
3873
3874   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3875   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3876                                    VT.isInteger() ? GprIndex : FprIndex,
3877                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3878                                                    MVT::i32));
3879
3880   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3881                               VT.isInteger() ? VAListPtr : FprPtr,
3882                               MachinePointerInfo(SV), MVT::i8);
3883
3884   // determine if we should load from reg_save_area or overflow_area
3885   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3886
3887   // increase overflow_area by 4/8 if gpr/fpr > 8
3888   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3889                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
3890                                           dl, MVT::i32));
3891
3892   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3893                              OverflowAreaPlusN);
3894
3895   InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3896                               MachinePointerInfo(), MVT::i32);
3897
3898   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3899 }
3900
3901 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3902   assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3903
3904   // We have to copy the entire va_list struct:
3905   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3906   return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3907                        DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3908                        false, true, /*CI=*/nullptr, std::nullopt,
3909                        MachinePointerInfo(), MachinePointerInfo());
3910 }
3911
3912 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3913                                                   SelectionDAG &DAG) const {
3914   if (Subtarget.isAIXABI())
3915     report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3916
3917   return Op.getOperand(0);
3918 }
3919
3920 SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3921   MachineFunction &MF = DAG.getMachineFunction();
3922   PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3923
3924   assert((Op.getOpcode() == ISD::INLINEASM ||
3925           Op.getOpcode() == ISD::INLINEASM_BR) &&
3926          "Expecting Inline ASM node.");
3927
3928   // If an LR store is already known to be required then there is not point in
3929   // checking this ASM as well.
3930   if (MFI.isLRStoreRequired())
3931     return Op;
3932
3933   // Inline ASM nodes have an optional last operand that is an incoming Flag of
3934   // type MVT::Glue. We want to ignore this last operand if that is the case.
3935   unsigned NumOps = Op.getNumOperands();
3936   if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3937     --NumOps;
3938
3939   // Check all operands that may contain the LR.
3940   for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3941     const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3942     unsigned NumVals = Flags.getNumOperandRegisters();
3943     ++i; // Skip the ID value.
3944
3945     switch (Flags.getKind()) {
3946     default:
3947       llvm_unreachable("Bad flags!");
3948     case InlineAsm::Kind::RegUse:
3949     case InlineAsm::Kind::Imm:
3950     case InlineAsm::Kind::Mem:
3951       i += NumVals;
3952       break;
3953     case InlineAsm::Kind::Clobber:
3954     case InlineAsm::Kind::RegDef:
3955     case InlineAsm::Kind::RegDefEarlyClobber: {
3956       for (; NumVals; --NumVals, ++i) {
3957         Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3958         if (Reg != PPC::LR && Reg != PPC::LR8)
3959           continue;
3960         MFI.setLRStoreRequired();
3961         return Op;
3962       }
3963       break;
3964     }
3965     }
3966   }
3967
3968   return Op;
3969 }
3970
3971 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3972                                                 SelectionDAG &DAG) const {
3973   if (Subtarget.isAIXABI())
3974     report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3975
3976   SDValue Chain = Op.getOperand(0);
3977   SDValue Trmp = Op.getOperand(1); // trampoline
3978   SDValue FPtr = Op.getOperand(2); // nested function
3979   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3980   SDLoc dl(Op);
3981
3982   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3983   bool isPPC64 = (PtrVT == MVT::i64);
3984   Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3985
3986   TargetLowering::ArgListTy Args;
3987   TargetLowering::ArgListEntry Entry;
3988
3989   Entry.Ty = IntPtrTy;
3990   Entry.Node = Trmp; Args.push_back(Entry);
3991
3992   // TrampSize == (isPPC64 ? 48 : 40);
3993   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3994                                isPPC64 ? MVT::i64 : MVT::i32);
3995   Args.push_back(Entry);
3996
3997   Entry.Node = FPtr; Args.push_back(Entry);
3998   Entry.Node = Nest; Args.push_back(Entry);
3999
4000   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
4001   TargetLowering::CallLoweringInfo CLI(DAG);
4002   CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
4003       CallingConv::C, Type::getVoidTy(*DAG.getContext()),
4004       DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
4005
4006   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
4007   return CallResult.second;
4008 }
4009
4010 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
4011   MachineFunction &MF = DAG.getMachineFunction();
4012   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4013   EVT PtrVT = getPointerTy(MF.getDataLayout());
4014
4015   SDLoc dl(Op);
4016
4017   if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
4018     // vastart just stores the address of the VarArgsFrameIndex slot into the
4019     // memory location argument.
4020     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4021     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4022     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4023                         MachinePointerInfo(SV));
4024   }
4025
4026   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
4027   // We suppose the given va_list is already allocated.
4028   //
4029   // typedef struct {
4030   //  char gpr;     /* index into the array of 8 GPRs
4031   //                 * stored in the register save area
4032   //                 * gpr=0 corresponds to r3,
4033   //                 * gpr=1 to r4, etc.
4034   //                 */
4035   //  char fpr;     /* index into the array of 8 FPRs
4036   //                 * stored in the register save area
4037   //                 * fpr=0 corresponds to f1,
4038   //                 * fpr=1 to f2, etc.
4039   //                 */
4040   //  char *overflow_arg_area;
4041   //                /* location on stack that holds
4042   //                 * the next overflow argument
4043   //                 */
4044   //  char *reg_save_area;
4045   //               /* where r3:r10 and f1:f8 (if saved)
4046   //                * are stored
4047   //                */
4048   // } va_list[1];
4049
4050   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
4051   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
4052   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
4053                                             PtrVT);
4054   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
4055                                  PtrVT);
4056
4057   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
4058   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
4059
4060   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
4061   SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
4062
4063   uint64_t FPROffset = 1;
4064   SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
4065
4066   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4067
4068   // Store first byte : number of int regs
4069   SDValue firstStore =
4070       DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
4071                         MachinePointerInfo(SV), MVT::i8);
4072   uint64_t nextOffset = FPROffset;
4073   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
4074                                   ConstFPROffset);
4075
4076   // Store second byte : number of float regs
4077   SDValue secondStore =
4078       DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
4079                         MachinePointerInfo(SV, nextOffset), MVT::i8);
4080   nextOffset += StackOffset;
4081   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
4082
4083   // Store second word : arguments given on stack
4084   SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
4085                                     MachinePointerInfo(SV, nextOffset));
4086   nextOffset += FrameOffset;
4087   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
4088
4089   // Store third word : arguments given in registers
4090   return DAG.getStore(thirdStore, dl, FR, nextPtr,
4091                       MachinePointerInfo(SV, nextOffset));
4092 }
4093
4094 /// FPR - The set of FP registers that should be allocated for arguments
4095 /// on Darwin and AIX.
4096 static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
4097                                 PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
4098                                 PPC::F11, PPC::F12, PPC::F13};
4099
4100 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
4101 /// the stack.
4102 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4103                                        unsigned PtrByteSize) {
4104   unsigned ArgSize = ArgVT.getStoreSize();
4105   if (Flags.isByVal())
4106     ArgSize = Flags.getByValSize();
4107
4108   // Round up to multiples of the pointer size, except for array members,
4109   // which are always packed.
4110   if (!Flags.isInConsecutiveRegs())
4111     ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4112
4113   return ArgSize;
4114 }
4115
4116 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
4117 /// on the stack.
4118 static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
4119                                          ISD::ArgFlagsTy Flags,
4120                                          unsigned PtrByteSize) {
4121   Align Alignment(PtrByteSize);
4122
4123   // Altivec parameters are padded to a 16 byte boundary.
4124   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4125       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4126       ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4127       ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4128     Alignment = Align(16);
4129
4130   // ByVal parameters are aligned as requested.
4131   if (Flags.isByVal()) {
4132     auto BVAlign = Flags.getNonZeroByValAlign();
4133     if (BVAlign > PtrByteSize) {
4134       if (BVAlign.value() % PtrByteSize != 0)
4135         llvm_unreachable(
4136             "ByVal alignment is not a multiple of the pointer size");
4137
4138       Alignment = BVAlign;
4139     }
4140   }
4141
4142   // Array members are always packed to their original alignment.
4143   if (Flags.isInConsecutiveRegs()) {
4144     // If the array member was split into multiple registers, the first
4145     // needs to be aligned to the size of the full type.  (Except for
4146     // ppcf128, which is only aligned as its f64 components.)
4147     if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4148       Alignment = Align(OrigVT.getStoreSize());
4149     else
4150       Alignment = Align(ArgVT.getStoreSize());
4151   }
4152
4153   return Alignment;
4154 }
4155
4156 /// CalculateStackSlotUsed - Return whether this argument will use its
4157 /// stack slot (instead of being passed in registers).  ArgOffset,
4158 /// AvailableFPRs, and AvailableVRs must hold the current argument
4159 /// position, and will be updated to account for this argument.
4160 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4161                                    unsigned PtrByteSize, unsigned LinkageSize,
4162                                    unsigned ParamAreaSize, unsigned &ArgOffset,
4163                                    unsigned &AvailableFPRs,
4164                                    unsigned &AvailableVRs) {
4165   bool UseMemory = false;
4166
4167   // Respect alignment of argument on the stack.
4168   Align Alignment =
4169       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4170   ArgOffset = alignTo(ArgOffset, Alignment);
4171   // If there's no space left in the argument save area, we must
4172   // use memory (this check also catches zero-sized arguments).
4173   if (ArgOffset >= LinkageSize + ParamAreaSize)
4174     UseMemory = true;
4175
4176   // Allocate argument on the stack.
4177   ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4178   if (Flags.isInConsecutiveRegsLast())
4179     ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4180   // If we overran the argument save area, we must use memory
4181   // (this check catches arguments passed partially in memory)
4182   if (ArgOffset > LinkageSize + ParamAreaSize)
4183     UseMemory = true;
4184
4185   // However, if the argument is actually passed in an FPR or a VR,
4186   // we don't use memory after all.
4187   if (!Flags.isByVal()) {
4188     if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4189       if (AvailableFPRs > 0) {
4190         --AvailableFPRs;
4191         return false;
4192       }
4193     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4194         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4195         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4196         ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4197       if (AvailableVRs > 0) {
4198         --AvailableVRs;
4199         return false;
4200       }
4201   }
4202
4203   return UseMemory;
4204 }
4205
4206 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
4207 /// ensure minimum alignment required for target.
4208 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
4209                                      unsigned NumBytes) {
4210   return alignTo(NumBytes, Lowering->getStackAlign());
4211 }
4212
4213 SDValue PPCTargetLowering::LowerFormalArguments(
4214     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4215     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4216     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4217   if (Subtarget.isAIXABI())
4218     return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4219                                     InVals);
4220   if (Subtarget.is64BitELFABI())
4221     return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4222                                        InVals);
4223   assert(Subtarget.is32BitELFABI());
4224   return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4225                                      InVals);
4226 }
4227
4228 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4229     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4230     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4231     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4232
4233   // 32-bit SVR4 ABI Stack Frame Layout:
4234   //              +-----------------------------------+
4235   //        +-->  |            Back chain             |
4236   //        |     +-----------------------------------+
4237   //        |     | Floating-point register save area |
4238   //        |     +-----------------------------------+
4239   //        |     |    General register save area     |
4240   //        |     +-----------------------------------+
4241   //        |     |          CR save word             |
4242   //        |     +-----------------------------------+
4243   //        |     |         VRSAVE save word          |
4244   //        |     +-----------------------------------+
4245   //        |     |         Alignment padding         |
4246   //        |     +-----------------------------------+
4247   //        |     |     Vector register save area     |
4248   //        |     +-----------------------------------+
4249   //        |     |       Local variable space        |
4250   //        |     +-----------------------------------+
4251   //        |     |        Parameter list area        |
4252   //        |     +-----------------------------------+
4253   //        |     |           LR save word            |
4254   //        |     +-----------------------------------+
4255   // SP-->  +---  |            Back chain             |
4256   //              +-----------------------------------+
4257   //
4258   // Specifications:
4259   //   System V Application Binary Interface PowerPC Processor Supplement
4260   //   AltiVec Technology Programming Interface Manual
4261
4262   MachineFunction &MF = DAG.getMachineFunction();
4263   MachineFrameInfo &MFI = MF.getFrameInfo();
4264   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4265
4266   EVT PtrVT = getPointerTy(MF.getDataLayout());
4267   // Potential tail calls could cause overwriting of argument stack slots.
4268   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4269                        (CallConv == CallingConv::Fast));
4270   const Align PtrAlign(4);
4271
4272   // Assign locations to all of the incoming arguments.
4273   SmallVector<CCValAssign, 16> ArgLocs;
4274   PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4275                  *DAG.getContext());
4276
4277   // Reserve space for the linkage area on the stack.
4278   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4279   CCInfo.AllocateStack(LinkageSize, PtrAlign);
4280   if (useSoftFloat())
4281     CCInfo.PreAnalyzeFormalArguments(Ins);
4282
4283   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4284   CCInfo.clearWasPPCF128();
4285
4286   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4287     CCValAssign &VA = ArgLocs[i];
4288
4289     // Arguments stored in registers.
4290     if (VA.isRegLoc()) {
4291       const TargetRegisterClass *RC;
4292       EVT ValVT = VA.getValVT();
4293
4294       switch (ValVT.getSimpleVT().SimpleTy) {
4295         default:
4296           llvm_unreachable("ValVT not supported by formal arguments Lowering");
4297         case MVT::i1:
4298         case MVT::i32:
4299           RC = &PPC::GPRCRegClass;
4300           break;
4301         case MVT::f32:
4302           if (Subtarget.hasP8Vector())
4303             RC = &PPC::VSSRCRegClass;
4304           else if (Subtarget.hasSPE())
4305             RC = &PPC::GPRCRegClass;
4306           else
4307             RC = &PPC::F4RCRegClass;
4308           break;
4309         case MVT::f64:
4310           if (Subtarget.hasVSX())
4311             RC = &PPC::VSFRCRegClass;
4312           else if (Subtarget.hasSPE())
4313             // SPE passes doubles in GPR pairs.
4314             RC = &PPC::GPRCRegClass;
4315           else
4316             RC = &PPC::F8RCRegClass;
4317           break;
4318         case MVT::v16i8:
4319         case MVT::v8i16:
4320         case MVT::v4i32:
4321           RC = &PPC::VRRCRegClass;
4322           break;
4323         case MVT::v4f32:
4324           RC = &PPC::VRRCRegClass;
4325           break;
4326         case MVT::v2f64:
4327         case MVT::v2i64:
4328           RC = &PPC::VRRCRegClass;
4329           break;
4330       }
4331
4332       SDValue ArgValue;
4333       // Transform the arguments stored in physical registers into
4334       // virtual ones.
4335       if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4336         assert(i + 1 < e && "No second half of double precision argument");
4337         Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4338         Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4339         SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4340         SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4341         if (!Subtarget.isLittleEndian())
4342           std::swap (ArgValueLo, ArgValueHi);
4343         ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4344                                ArgValueHi);
4345       } else {
4346         Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4347         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4348                                       ValVT == MVT::i1 ? MVT::i32 : ValVT);
4349         if (ValVT == MVT::i1)
4350           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4351       }
4352
4353       InVals.push_back(ArgValue);
4354     } else {
4355       // Argument stored in memory.
4356       assert(VA.isMemLoc());
4357
4358       // Get the extended size of the argument type in stack
4359       unsigned ArgSize = VA.getLocVT().getStoreSize();
4360       // Get the actual size of the argument type
4361       unsigned ObjSize = VA.getValVT().getStoreSize();
4362       unsigned ArgOffset = VA.getLocMemOffset();
4363       // Stack objects in PPC32 are right justified.
4364       ArgOffset += ArgSize - ObjSize;
4365       int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4366
4367       // Create load nodes to retrieve arguments from the stack.
4368       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4369       InVals.push_back(
4370           DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4371     }
4372   }
4373
4374   // Assign locations to all of the incoming aggregate by value arguments.
4375   // Aggregates passed by value are stored in the local variable space of the
4376   // caller's stack frame, right above the parameter list area.
4377   SmallVector<CCValAssign, 16> ByValArgLocs;
4378   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4379                       ByValArgLocs, *DAG.getContext());
4380
4381   // Reserve stack space for the allocations in CCInfo.
4382   CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4383
4384   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4385
4386   // Area that is at least reserved in the caller of this function.
4387   unsigned MinReservedArea = CCByValInfo.getStackSize();
4388   MinReservedArea = std::max(MinReservedArea, LinkageSize);
4389
4390   // Set the size that is at least reserved in caller of this function.  Tail
4391   // call optimized function's reserved stack space needs to be aligned so that
4392   // taking the difference between two stack areas will result in an aligned
4393   // stack.
4394   MinReservedArea =
4395       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4396   FuncInfo->setMinReservedArea(MinReservedArea);
4397
4398   SmallVector<SDValue, 8> MemOps;
4399
4400   // If the function takes variable number of arguments, make a frame index for
4401   // the start of the first vararg value... for expansion of llvm.va_start.
4402   if (isVarArg) {
4403     static const MCPhysReg GPArgRegs[] = {
4404       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4405       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4406     };
4407     const unsigned NumGPArgRegs = std::size(GPArgRegs);
4408
4409     static const MCPhysReg FPArgRegs[] = {
4410       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4411       PPC::F8
4412     };
4413     unsigned NumFPArgRegs = std::size(FPArgRegs);
4414
4415     if (useSoftFloat() || hasSPE())
4416        NumFPArgRegs = 0;
4417
4418     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4419     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4420
4421     // Make room for NumGPArgRegs and NumFPArgRegs.
4422     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4423                 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4424
4425     FuncInfo->setVarArgsStackOffset(MFI.CreateFixedObject(
4426         PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4427
4428     FuncInfo->setVarArgsFrameIndex(
4429         MFI.CreateStackObject(Depth, Align(8), false));
4430     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4431
4432     // The fixed integer arguments of a variadic function are stored to the
4433     // VarArgsFrameIndex on the stack so that they may be loaded by
4434     // dereferencing the result of va_next.
4435     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4436       // Get an existing live-in vreg, or add a new one.
4437       Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
4438       if (!VReg)
4439         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4440
4441       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4442       SDValue Store =
4443           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4444       MemOps.push_back(Store);
4445       // Increment the address by four for the next argument to store
4446       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4447       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4448     }
4449
4450     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4451     // is set.
4452     // The double arguments are stored to the VarArgsFrameIndex
4453     // on the stack.
4454     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4455       // Get an existing live-in vreg, or add a new one.
4456       Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4457       if (!VReg)
4458         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4459
4460       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4461       SDValue Store =
4462           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4463       MemOps.push_back(Store);
4464       // Increment the address by eight for the next argument to store
4465       SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4466                                          PtrVT);
4467       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4468     }
4469   }
4470
4471   if (!MemOps.empty())
4472     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4473
4474   return Chain;
4475 }
4476
4477 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4478 // value to MVT::i64 and then truncate to the correct register size.
4479 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4480                                              EVT ObjectVT, SelectionDAG &DAG,
4481                                              SDValue ArgVal,
4482                                              const SDLoc &dl) const {
4483   if (Flags.isSExt())
4484     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4485                          DAG.getValueType(ObjectVT));
4486   else if (Flags.isZExt())
4487     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4488                          DAG.getValueType(ObjectVT));
4489
4490   return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4491 }
4492
4493 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4494     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4495     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4496     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4497   // TODO: add description of PPC stack frame format, or at least some docs.
4498   //
4499   bool isELFv2ABI = Subtarget.isELFv2ABI();
4500   bool isLittleEndian = Subtarget.isLittleEndian();
4501   MachineFunction &MF = DAG.getMachineFunction();
4502   MachineFrameInfo &MFI = MF.getFrameInfo();
4503   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4504
4505   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4506          "fastcc not supported on varargs functions");
4507
4508   EVT PtrVT = getPointerTy(MF.getDataLayout());
4509   // Potential tail calls could cause overwriting of argument stack slots.
4510   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4511                        (CallConv == CallingConv::Fast));
4512   unsigned PtrByteSize = 8;
4513   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4514
4515   static const MCPhysReg GPR[] = {
4516     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4517     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4518   };
4519   static const MCPhysReg VR[] = {
4520     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4521     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4522   };
4523
4524   const unsigned Num_GPR_Regs = std::size(GPR);
4525   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4526   const unsigned Num_VR_Regs = std::size(VR);
4527
4528   // Do a first pass over the arguments to determine whether the ABI
4529   // guarantees that our caller has allocated the parameter save area
4530   // on its stack frame.  In the ELFv1 ABI, this is always the case;
4531   // in the ELFv2 ABI, it is true if this is a vararg function or if
4532   // any parameter is located in a stack slot.
4533
4534   bool HasParameterArea = !isELFv2ABI || isVarArg;
4535   unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4536   unsigned NumBytes = LinkageSize;
4537   unsigned AvailableFPRs = Num_FPR_Regs;
4538   unsigned AvailableVRs = Num_VR_Regs;
4539   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4540     if (Ins[i].Flags.isNest())
4541       continue;
4542
4543     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4544                                PtrByteSize, LinkageSize, ParamAreaSize,
4545                                NumBytes, AvailableFPRs, AvailableVRs))
4546       HasParameterArea = true;
4547   }
4548
4549   // Add DAG nodes to load the arguments or copy them out of registers.  On
4550   // entry to a function on PPC, the arguments start after the linkage area,
4551   // although the first ones are often in registers.
4552
4553   unsigned ArgOffset = LinkageSize;
4554   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4555   SmallVector<SDValue, 8> MemOps;
4556   Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4557   unsigned CurArgIdx = 0;
4558   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4559     SDValue ArgVal;
4560     bool needsLoad = false;
4561     EVT ObjectVT = Ins[ArgNo].VT;
4562     EVT OrigVT = Ins[ArgNo].ArgVT;
4563     unsigned ObjSize = ObjectVT.getStoreSize();
4564     unsigned ArgSize = ObjSize;
4565     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4566     if (Ins[ArgNo].isOrigArg()) {
4567       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4568       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4569     }
4570     // We re-align the argument offset for each argument, except when using the
4571     // fast calling convention, when we need to make sure we do that only when
4572     // we'll actually use a stack slot.
4573     unsigned CurArgOffset;
4574     Align Alignment;
4575     auto ComputeArgOffset = [&]() {
4576       /* Respect alignment of argument on the stack.  */
4577       Alignment =
4578           CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4579       ArgOffset = alignTo(ArgOffset, Alignment);
4580       CurArgOffset = ArgOffset;
4581     };
4582
4583     if (CallConv != CallingConv::Fast) {
4584       ComputeArgOffset();
4585
4586       /* Compute GPR index associated with argument offset.  */
4587       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4588       GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4589     }
4590
4591     // FIXME the codegen can be much improved in some cases.
4592     // We do not have to keep everything in memory.
4593     if (Flags.isByVal()) {
4594       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4595
4596       if (CallConv == CallingConv::Fast)
4597         ComputeArgOffset();
4598
4599       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4600       ObjSize = Flags.getByValSize();
4601       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4602       // Empty aggregate parameters do not take up registers.  Examples:
4603       //   struct { } a;
4604       //   union  { } b;
4605       //   int c[0];
4606       // etc.  However, we have to provide a place-holder in InVals, so
4607       // pretend we have an 8-byte item at the current address for that
4608       // purpose.
4609       if (!ObjSize) {
4610         int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4611         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4612         InVals.push_back(FIN);
4613         continue;
4614       }
4615
4616       // Create a stack object covering all stack doublewords occupied
4617       // by the argument.  If the argument is (fully or partially) on
4618       // the stack, or if the argument is fully in registers but the
4619       // caller has allocated the parameter save anyway, we can refer
4620       // directly to the caller's stack frame.  Otherwise, create a
4621       // local copy in our own frame.
4622       int FI;
4623       if (HasParameterArea ||
4624           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4625         FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4626       else
4627         FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4628       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4629
4630       // Handle aggregates smaller than 8 bytes.
4631       if (ObjSize < PtrByteSize) {
4632         // The value of the object is its address, which differs from the
4633         // address of the enclosing doubleword on big-endian systems.
4634         SDValue Arg = FIN;
4635         if (!isLittleEndian) {
4636           SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4637           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4638         }
4639         InVals.push_back(Arg);
4640
4641         if (GPR_idx != Num_GPR_Regs) {
4642           Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4643           FuncInfo->addLiveInAttr(VReg, Flags);
4644           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4645           EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4646           SDValue Store =
4647               DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4648                                 MachinePointerInfo(&*FuncArg), ObjType);
4649           MemOps.push_back(Store);
4650         }
4651         // Whether we copied from a register or not, advance the offset
4652         // into the parameter save area by a full doubleword.
4653         ArgOffset += PtrByteSize;
4654         continue;
4655       }
4656
4657       // The value of the object is its address, which is the address of
4658       // its first stack doubleword.
4659       InVals.push_back(FIN);
4660
4661       // Store whatever pieces of the object are in registers to memory.
4662       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4663         if (GPR_idx == Num_GPR_Regs)
4664           break;
4665
4666         Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4667         FuncInfo->addLiveInAttr(VReg, Flags);
4668         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4669         SDValue Addr = FIN;
4670         if (j) {
4671           SDValue Off = DAG.getConstant(j, dl, PtrVT);
4672           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4673         }
4674         unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4675         EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4676         SDValue Store =
4677             DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4678                               MachinePointerInfo(&*FuncArg, j), ObjType);
4679         MemOps.push_back(Store);
4680         ++GPR_idx;
4681       }
4682       ArgOffset += ArgSize;
4683       continue;
4684     }
4685
4686     switch (ObjectVT.getSimpleVT().SimpleTy) {
4687     default: llvm_unreachable("Unhandled argument type!");
4688     case MVT::i1:
4689     case MVT::i32:
4690     case MVT::i64:
4691       if (Flags.isNest()) {
4692         // The 'nest' parameter, if any, is passed in R11.
4693         Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4694         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4695
4696         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4697           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4698
4699         break;
4700       }
4701
4702       // These can be scalar arguments or elements of an integer array type
4703       // passed directly.  Clang may use those instead of "byval" aggregate
4704       // types to avoid forcing arguments to memory unnecessarily.
4705       if (GPR_idx != Num_GPR_Regs) {
4706         Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4707         FuncInfo->addLiveInAttr(VReg, Flags);
4708         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4709
4710         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4711           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4712           // value to MVT::i64 and then truncate to the correct register size.
4713           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4714       } else {
4715         if (CallConv == CallingConv::Fast)
4716           ComputeArgOffset();
4717
4718         needsLoad = true;
4719         ArgSize = PtrByteSize;
4720       }
4721       if (CallConv != CallingConv::Fast || needsLoad)
4722         ArgOffset += 8;
4723       break;
4724
4725     case MVT::f32:
4726     case MVT::f64:
4727       // These can be scalar arguments or elements of a float array type
4728       // passed directly.  The latter are used to implement ELFv2 homogenous
4729       // float aggregates.
4730       if (FPR_idx != Num_FPR_Regs) {
4731         unsigned VReg;
4732
4733         if (ObjectVT == MVT::f32)
4734           VReg = MF.addLiveIn(FPR[FPR_idx],
4735                               Subtarget.hasP8Vector()
4736                                   ? &PPC::VSSRCRegClass
4737                                   : &PPC::F4RCRegClass);
4738         else
4739           VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4740                                                 ? &PPC::VSFRCRegClass
4741                                                 : &PPC::F8RCRegClass);
4742
4743         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4744         ++FPR_idx;
4745       } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4746         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4747         // once we support fp <-> gpr moves.
4748
4749         // This can only ever happen in the presence of f32 array types,
4750         // since otherwise we never run out of FPRs before running out
4751         // of GPRs.
4752         Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4753         FuncInfo->addLiveInAttr(VReg, Flags);
4754         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4755
4756         if (ObjectVT == MVT::f32) {
4757           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4758             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4759                                  DAG.getConstant(32, dl, MVT::i32));
4760           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4761         }
4762
4763         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4764       } else {
4765         if (CallConv == CallingConv::Fast)
4766           ComputeArgOffset();
4767
4768         needsLoad = true;
4769       }
4770
4771       // When passing an array of floats, the array occupies consecutive
4772       // space in the argument area; only round up to the next doubleword
4773       // at the end of the array.  Otherwise, each float takes 8 bytes.
4774       if (CallConv != CallingConv::Fast || needsLoad) {
4775         ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4776         ArgOffset += ArgSize;
4777         if (Flags.isInConsecutiveRegsLast())
4778           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4779       }
4780       break;
4781     case MVT::v4f32:
4782     case MVT::v4i32:
4783     case MVT::v8i16:
4784     case MVT::v16i8:
4785     case MVT::v2f64:
4786     case MVT::v2i64:
4787     case MVT::v1i128:
4788     case MVT::f128:
4789       // These can be scalar arguments or elements of a vector array type
4790       // passed directly.  The latter are used to implement ELFv2 homogenous
4791       // vector aggregates.
4792       if (VR_idx != Num_VR_Regs) {
4793         Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4794         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4795         ++VR_idx;
4796       } else {
4797         if (CallConv == CallingConv::Fast)
4798           ComputeArgOffset();
4799         needsLoad = true;
4800       }
4801       if (CallConv != CallingConv::Fast || needsLoad)
4802         ArgOffset += 16;
4803       break;
4804     }
4805
4806     // We need to load the argument to a virtual register if we determined
4807     // above that we ran out of physical registers of the appropriate type.
4808     if (needsLoad) {
4809       if (ObjSize < ArgSize && !isLittleEndian)
4810         CurArgOffset += ArgSize - ObjSize;
4811       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4812       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4813       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4814     }
4815
4816     InVals.push_back(ArgVal);
4817   }
4818
4819   // Area that is at least reserved in the caller of this function.
4820   unsigned MinReservedArea;
4821   if (HasParameterArea)
4822     MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4823   else
4824     MinReservedArea = LinkageSize;
4825
4826   // Set the size that is at least reserved in caller of this function.  Tail
4827   // call optimized functions' reserved stack space needs to be aligned so that
4828   // taking the difference between two stack areas will result in an aligned
4829   // stack.
4830   MinReservedArea =
4831       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4832   FuncInfo->setMinReservedArea(MinReservedArea);
4833
4834   // If the function takes variable number of arguments, make a frame index for
4835   // the start of the first vararg value... for expansion of llvm.va_start.
4836   // On ELFv2ABI spec, it writes:
4837   // C programs that are intended to be *portable* across different compilers
4838   // and architectures must use the header file <stdarg.h> to deal with variable
4839   // argument lists.
4840   if (isVarArg && MFI.hasVAStart()) {
4841     int Depth = ArgOffset;
4842
4843     FuncInfo->setVarArgsFrameIndex(
4844       MFI.CreateFixedObject(PtrByteSize, Depth, true));
4845     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4846
4847     // If this function is vararg, store any remaining integer argument regs
4848     // to their spots on the stack so that they may be loaded by dereferencing
4849     // the result of va_next.
4850     for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4851          GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4852       Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4853       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4854       SDValue Store =
4855           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4856       MemOps.push_back(Store);
4857       // Increment the address by four for the next argument to store
4858       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4859       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4860     }
4861   }
4862
4863   if (!MemOps.empty())
4864     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4865
4866   return Chain;
4867 }
4868
4869 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4870 /// adjusted to accommodate the arguments for the tailcall.
4871 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4872                                    unsigned ParamSize) {
4873
4874   if (!isTailCall) return 0;
4875
4876   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4877   unsigned CallerMinReservedArea = FI->getMinReservedArea();
4878   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4879   // Remember only if the new adjustment is bigger.
4880   if (SPDiff < FI->getTailCallSPDelta())
4881     FI->setTailCallSPDelta(SPDiff);
4882
4883   return SPDiff;
4884 }
4885
4886 static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4887
4888 static bool callsShareTOCBase(const Function *Caller,
4889                               const GlobalValue *CalleeGV,
4890                               const TargetMachine &TM) {
4891   // It does not make sense to call callsShareTOCBase() with a caller that
4892   // is PC Relative since PC Relative callers do not have a TOC.
4893 #ifndef NDEBUG
4894   const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4895   assert(!STICaller->isUsingPCRelativeCalls() &&
4896          "PC Relative callers do not have a TOC and cannot share a TOC Base");
4897 #endif
4898
4899   // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4900   // don't have enough information to determine if the caller and callee share
4901   // the same  TOC base, so we have to pessimistically assume they don't for
4902   // correctness.
4903   if (!CalleeGV)
4904     return false;
4905
4906   // If the callee is preemptable, then the static linker will use a plt-stub
4907   // which saves the toc to the stack, and needs a nop after the call
4908   // instruction to convert to a toc-restore.
4909   if (!TM.shouldAssumeDSOLocal(CalleeGV))
4910     return false;
4911
4912   // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4913   // We may need a TOC restore in the situation where the caller requires a
4914   // valid TOC but the callee is PC Relative and does not.
4915   const Function *F = dyn_cast<Function>(CalleeGV);
4916   const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4917
4918   // If we have an Alias we can try to get the function from there.
4919   if (Alias) {
4920     const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4921     F = dyn_cast<Function>(GlobalObj);
4922   }
4923
4924   // If we still have no valid function pointer we do not have enough
4925   // information to determine if the callee uses PC Relative calls so we must
4926   // assume that it does.
4927   if (!F)
4928     return false;
4929
4930   // If the callee uses PC Relative we cannot guarantee that the callee won't
4931   // clobber the TOC of the caller and so we must assume that the two
4932   // functions do not share a TOC base.
4933   const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4934   if (STICallee->isUsingPCRelativeCalls())
4935     return false;
4936
4937   // If the GV is not a strong definition then we need to assume it can be
4938   // replaced by another function at link time. The function that replaces
4939   // it may not share the same TOC as the caller since the callee may be
4940   // replaced by a PC Relative version of the same function.
4941   if (!CalleeGV->isStrongDefinitionForLinker())
4942     return false;
4943
4944   // The medium and large code models are expected to provide a sufficiently
4945   // large TOC to provide all data addressing needs of a module with a
4946   // single TOC.
4947   if (CodeModel::Medium == TM.getCodeModel() ||
4948       CodeModel::Large == TM.getCodeModel())
4949     return true;
4950
4951   // Any explicitly-specified sections and section prefixes must also match.
4952   // Also, if we're using -ffunction-sections, then each function is always in
4953   // a different section (the same is true for COMDAT functions).
4954   if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4955       Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4956     return false;
4957   if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4958     if (F->getSectionPrefix() != Caller->getSectionPrefix())
4959       return false;
4960   }
4961
4962   return true;
4963 }
4964
4965 static bool
4966 needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4967                             const SmallVectorImpl<ISD::OutputArg> &Outs) {
4968   assert(Subtarget.is64BitELFABI());
4969
4970   const unsigned PtrByteSize = 8;
4971   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4972
4973   static const MCPhysReg GPR[] = {
4974     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4975     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4976   };
4977   static const MCPhysReg VR[] = {
4978     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4979     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4980   };
4981
4982   const unsigned NumGPRs = std::size(GPR);
4983   const unsigned NumFPRs = 13;
4984   const unsigned NumVRs = std::size(VR);
4985   const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4986
4987   unsigned NumBytes = LinkageSize;
4988   unsigned AvailableFPRs = NumFPRs;
4989   unsigned AvailableVRs = NumVRs;
4990
4991   for (const ISD::OutputArg& Param : Outs) {
4992     if (Param.Flags.isNest()) continue;
4993
4994     if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4995                                LinkageSize, ParamAreaSize, NumBytes,
4996                                AvailableFPRs, AvailableVRs))
4997       return true;
4998   }
4999   return false;
5000 }
5001
5002 static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
5003   if (CB.arg_size() != CallerFn->arg_size())
5004     return false;
5005
5006   auto CalleeArgIter = CB.arg_begin();
5007   auto CalleeArgEnd = CB.arg_end();
5008   Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
5009
5010   for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
5011     const Value* CalleeArg = *CalleeArgIter;
5012     const Value* CallerArg = &(*CallerArgIter);
5013     if (CalleeArg == CallerArg)
5014       continue;
5015
5016     // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
5017     //        tail call @callee([4 x i64] undef, [4 x i64] %b)
5018     //      }
5019     // 1st argument of callee is undef and has the same type as caller.
5020     if (CalleeArg->getType() == CallerArg->getType() &&
5021         isa<UndefValue>(CalleeArg))
5022       continue;
5023
5024     return false;
5025   }
5026
5027   return true;
5028 }
5029
5030 // Returns true if TCO is possible between the callers and callees
5031 // calling conventions.
5032 static bool
5033 areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
5034                                     CallingConv::ID CalleeCC) {
5035   // Tail calls are possible with fastcc and ccc.
5036   auto isTailCallableCC  = [] (CallingConv::ID CC){
5037       return  CC == CallingConv::C || CC == CallingConv::Fast;
5038   };
5039   if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
5040     return false;
5041
5042   // We can safely tail call both fastcc and ccc callees from a c calling
5043   // convention caller. If the caller is fastcc, we may have less stack space
5044   // than a non-fastcc caller with the same signature so disable tail-calls in
5045   // that case.
5046   return CallerCC == CallingConv::C || CallerCC == CalleeCC;
5047 }
5048
5049 bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
5050     const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5051     CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5052     const SmallVectorImpl<ISD::OutputArg> &Outs,
5053     const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5054     bool isCalleeExternalSymbol) const {
5055   bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
5056
5057   if (DisableSCO && !TailCallOpt) return false;
5058
5059   // Variadic argument functions are not supported.
5060   if (isVarArg) return false;
5061
5062   // Check that the calling conventions are compatible for tco.
5063   if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
5064     return false;
5065
5066   // Caller contains any byval parameter is not supported.
5067   if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5068     return false;
5069
5070   // Callee contains any byval parameter is not supported, too.
5071   // Note: This is a quick work around, because in some cases, e.g.
5072   // caller's stack size > callee's stack size, we are still able to apply
5073   // sibling call optimization. For example, gcc is able to do SCO for caller1
5074   // in the following example, but not for caller2.
5075   //   struct test {
5076   //     long int a;
5077   //     char ary[56];
5078   //   } gTest;
5079   //   __attribute__((noinline)) int callee(struct test v, struct test *b) {
5080   //     b->a = v.a;
5081   //     return 0;
5082   //   }
5083   //   void caller1(struct test a, struct test c, struct test *b) {
5084   //     callee(gTest, b); }
5085   //   void caller2(struct test *b) { callee(gTest, b); }
5086   if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
5087     return false;
5088
5089   // If callee and caller use different calling conventions, we cannot pass
5090   // parameters on stack since offsets for the parameter area may be different.
5091   if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
5092     return false;
5093
5094   // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5095   // the caller and callee share the same TOC for TCO/SCO. If the caller and
5096   // callee potentially have different TOC bases then we cannot tail call since
5097   // we need to restore the TOC pointer after the call.
5098   // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5099   // We cannot guarantee this for indirect calls or calls to external functions.
5100   // When PC-Relative addressing is used, the concept of the TOC is no longer
5101   // applicable so this check is not required.
5102   // Check first for indirect calls.
5103   if (!Subtarget.isUsingPCRelativeCalls() &&
5104       !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5105     return false;
5106
5107   // Check if we share the TOC base.
5108   if (!Subtarget.isUsingPCRelativeCalls() &&
5109       !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5110     return false;
5111
5112   // TCO allows altering callee ABI, so we don't have to check further.
5113   if (CalleeCC == CallingConv::Fast && TailCallOpt)
5114     return true;
5115
5116   if (DisableSCO) return false;
5117
5118   // If callee use the same argument list that caller is using, then we can
5119   // apply SCO on this case. If it is not, then we need to check if callee needs
5120   // stack for passing arguments.
5121   // PC Relative tail calls may not have a CallBase.
5122   // If there is no CallBase we cannot verify if we have the same argument
5123   // list so assume that we don't have the same argument list.
5124   if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5125       needStackSlotPassParameters(Subtarget, Outs))
5126     return false;
5127   else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5128     return false;
5129
5130   return true;
5131 }
5132
5133 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
5134 /// for tail call optimization. Targets which want to do tail call
5135 /// optimization should implement this function.
5136 bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5137     const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5138     CallingConv::ID CallerCC, bool isVarArg,
5139     const SmallVectorImpl<ISD::InputArg> &Ins) const {
5140   if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5141     return false;
5142
5143   // Variable argument functions are not supported.
5144   if (isVarArg)
5145     return false;
5146
5147   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5148     // Functions containing by val parameters are not supported.
5149     if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5150       return false;
5151
5152     // Non-PIC/GOT tail calls are supported.
5153     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5154       return true;
5155
5156     // At the moment we can only do local tail calls (in same module, hidden
5157     // or protected) if we are generating PIC.
5158     if (CalleeGV)
5159       return CalleeGV->hasHiddenVisibility() ||
5160              CalleeGV->hasProtectedVisibility();
5161   }
5162
5163   return false;
5164 }
5165
5166 /// isCallCompatibleAddress - Return the immediate to use if the specified
5167 /// 32-bit value is representable in the immediate field of a BxA instruction.
5168 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
5169   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5170   if (!C) return nullptr;
5171
5172   int Addr = C->getZExtValue();
5173   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
5174       SignExtend32<26>(Addr) != Addr)
5175     return nullptr;  // Top 6 bits have to be sext of immediate.
5176
5177   return DAG
5178       .getConstant(
5179           (int)C->getZExtValue() >> 2, SDLoc(Op),
5180           DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
5181       .getNode();
5182 }
5183
5184 namespace {
5185
5186 struct TailCallArgumentInfo {
5187   SDValue Arg;
5188   SDValue FrameIdxOp;
5189   int FrameIdx = 0;
5190
5191   TailCallArgumentInfo() = default;
5192 };
5193
5194 } // end anonymous namespace
5195
5196 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5197 static void StoreTailCallArgumentsToStackSlot(
5198     SelectionDAG &DAG, SDValue Chain,
5199     const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5200     SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5201   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5202     SDValue Arg = TailCallArgs[i].Arg;
5203     SDValue FIN = TailCallArgs[i].FrameIdxOp;
5204     int FI = TailCallArgs[i].FrameIdx;
5205     // Store relative to framepointer.
5206     MemOpChains.push_back(DAG.getStore(
5207         Chain, dl, Arg, FIN,
5208         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
5209   }
5210 }
5211
5212 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5213 /// the appropriate stack slot for the tail call optimized function call.
5214 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
5215                                              SDValue OldRetAddr, SDValue OldFP,
5216                                              int SPDiff, const SDLoc &dl) {
5217   if (SPDiff) {
5218     // Calculate the new stack slot for the return address.
5219     MachineFunction &MF = DAG.getMachineFunction();
5220     const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5221     const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5222     bool isPPC64 = Subtarget.isPPC64();
5223     int SlotSize = isPPC64 ? 8 : 4;
5224     int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5225     int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5226                                                          NewRetAddrLoc, true);
5227     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5228     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
5229     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5230                          MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5231   }
5232   return Chain;
5233 }
5234
5235 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5236 /// the position of the argument.
5237 static void
5238 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
5239                          SDValue Arg, int SPDiff, unsigned ArgOffset,
5240                      SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5241   int Offset = ArgOffset + SPDiff;
5242   uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5243   int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5244   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5245   SDValue FIN = DAG.getFrameIndex(FI, VT);
5246   TailCallArgumentInfo Info;
5247   Info.Arg = Arg;
5248   Info.FrameIdxOp = FIN;
5249   Info.FrameIdx = FI;
5250   TailCallArguments.push_back(Info);
5251 }
5252
5253 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5254 /// stack slot. Returns the chain as result and the loaded frame pointers in
5255 /// LROpOut/FPOpout. Used when tail calling.
5256 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5257     SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5258     SDValue &FPOpOut, const SDLoc &dl) const {
5259   if (SPDiff) {
5260     // Load the LR and FP stack slot for later adjusting.
5261     EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5262     LROpOut = getReturnAddrFrameIndex(DAG);
5263     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5264     Chain = SDValue(LROpOut.getNode(), 1);
5265   }
5266   return Chain;
5267 }
5268
5269 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5270 /// by "Src" to address "Dst" of size "Size".  Alignment information is
5271 /// specified by the specific parameter attribute. The copy will be passed as
5272 /// a byval function parameter.
5273 /// Sometimes what we are copying is the end of a larger object, the part that
5274 /// does not fit in registers.
5275 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
5276                                          SDValue Chain, ISD::ArgFlagsTy Flags,
5277                                          SelectionDAG &DAG, const SDLoc &dl) {
5278   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5279   return DAG.getMemcpy(
5280       Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false,
5281       /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
5282 }
5283
5284 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5285 /// tail calls.
5286 static void LowerMemOpCallTo(
5287     SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5288     SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5289     bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5290     SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5291   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5292   if (!isTailCall) {
5293     if (isVector) {
5294       SDValue StackPtr;
5295       if (isPPC64)
5296         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5297       else
5298         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5299       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5300                            DAG.getConstant(ArgOffset, dl, PtrVT));
5301     }
5302     MemOpChains.push_back(
5303         DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5304     // Calculate and remember argument location.
5305   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5306                                   TailCallArguments);
5307 }
5308
5309 static void
5310 PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain,
5311                 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5312                 SDValue FPOp,
5313                 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5314   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5315   // might overwrite each other in case of tail call optimization.
5316   SmallVector<SDValue, 8> MemOpChains2;
5317   // Do not flag preceding copytoreg stuff together with the following stuff.
5318   InGlue = SDValue();
5319   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5320                                     MemOpChains2, dl);
5321   if (!MemOpChains2.empty())
5322     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5323
5324   // Store the return address to the appropriate stack slot.
5325   Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5326
5327   // Emit callseq_end just before tailcall node.
5328   Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5329   InGlue = Chain.getValue(1);
5330 }
5331
5332 // Is this global address that of a function that can be called by name? (as
5333 // opposed to something that must hold a descriptor for an indirect call).
5334 static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5335   if (GV) {
5336     if (GV->isThreadLocal())
5337       return false;
5338
5339     return GV->getValueType()->isFunctionTy();
5340   }
5341
5342   return false;
5343 }
5344
5345 SDValue PPCTargetLowering::LowerCallResult(
5346     SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5347     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5348     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5349   SmallVector<CCValAssign, 16> RVLocs;
5350   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5351                     *DAG.getContext());
5352
5353   CCRetInfo.AnalyzeCallResult(
5354       Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5355                ? RetCC_PPC_Cold
5356                : RetCC_PPC);
5357
5358   // Copy all of the result registers out of their specified physreg.
5359   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5360     CCValAssign &VA = RVLocs[i];
5361     assert(VA.isRegLoc() && "Can only return in registers!");
5362
5363     SDValue Val;
5364
5365     if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5366       SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5367                                       InGlue);
5368       Chain = Lo.getValue(1);
5369       InGlue = Lo.getValue(2);
5370       VA = RVLocs[++i]; // skip ahead to next loc
5371       SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5372                                       InGlue);
5373       Chain = Hi.getValue(1);
5374       InGlue = Hi.getValue(2);
5375       if (!Subtarget.isLittleEndian())
5376         std::swap (Lo, Hi);
5377       Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5378     } else {
5379       Val = DAG.getCopyFromReg(Chain, dl,
5380                                VA.getLocReg(), VA.getLocVT(), InGlue);
5381       Chain = Val.getValue(1);
5382       InGlue = Val.getValue(2);
5383     }
5384
5385     switch (VA.getLocInfo()) {
5386     default: llvm_unreachable("Unknown loc info!");
5387     case CCValAssign::Full: break;
5388     case CCValAssign::AExt:
5389       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5390       break;
5391     case CCValAssign::ZExt:
5392       Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5393                         DAG.getValueType(VA.getValVT()));
5394       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5395       break;
5396     case CCValAssign::SExt:
5397       Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5398                         DAG.getValueType(VA.getValVT()));
5399       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5400       break;
5401     }
5402
5403     InVals.push_back(Val);
5404   }
5405
5406   return Chain;
5407 }
5408
5409 static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5410                            const PPCSubtarget &Subtarget, bool isPatchPoint) {
5411   auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5412   const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5413
5414   // PatchPoint calls are not indirect.
5415   if (isPatchPoint)
5416     return false;
5417
5418   if (isFunctionGlobalAddress(GV) || isa<ExternalSymbolSDNode>(Callee))
5419     return false;
5420
5421   // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5422   // becuase the immediate function pointer points to a descriptor instead of
5423   // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5424   // pointer immediate points to the global entry point, while the BLA would
5425   // need to jump to the local entry point (see rL211174).
5426   if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5427       isBLACompatibleAddress(Callee, DAG))
5428     return false;
5429
5430   return true;
5431 }
5432
5433 // AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5434 static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5435   return Subtarget.isAIXABI() ||
5436          (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5437 }
5438
5439 static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
5440                               const Function &Caller, const SDValue &Callee,
5441                               const PPCSubtarget &Subtarget,
5442                               const TargetMachine &TM,
5443                               bool IsStrictFPCall = false) {
5444   if (CFlags.IsTailCall)
5445     return PPCISD::TC_RETURN;
5446
5447   unsigned RetOpc = 0;
5448   // This is a call through a function pointer.
5449   if (CFlags.IsIndirect) {
5450     // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5451     // indirect calls. The save of the caller's TOC pointer to the stack will be
5452     // inserted into the DAG as part of call lowering. The restore of the TOC
5453     // pointer is modeled by using a pseudo instruction for the call opcode that
5454     // represents the 2 instruction sequence of an indirect branch and link,
5455     // immediately followed by a load of the TOC pointer from the stack save
5456     // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5457     // as it is not saved or used.
5458     RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5459                                                  : PPCISD::BCTRL;
5460   } else if (Subtarget.isUsingPCRelativeCalls()) {
5461     assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5462     RetOpc = PPCISD::CALL_NOTOC;
5463   } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5464     // The ABIs that maintain a TOC pointer accross calls need to have a nop
5465     // immediately following the call instruction if the caller and callee may
5466     // have different TOC bases. At link time if the linker determines the calls
5467     // may not share a TOC base, the call is redirected to a trampoline inserted
5468     // by the linker. The trampoline will (among other things) save the callers
5469     // TOC pointer at an ABI designated offset in the linkage area and the
5470     // linker will rewrite the nop to be a load of the TOC pointer from the
5471     // linkage area into gpr2.
5472     auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5473     const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5474     RetOpc =
5475         callsShareTOCBase(&Caller, GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5476   } else
5477     RetOpc = PPCISD::CALL;
5478   if (IsStrictFPCall) {
5479     switch (RetOpc) {
5480     default:
5481       llvm_unreachable("Unknown call opcode");
5482     case PPCISD::BCTRL_LOAD_TOC:
5483       RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
5484       break;
5485     case PPCISD::BCTRL:
5486       RetOpc = PPCISD::BCTRL_RM;
5487       break;
5488     case PPCISD::CALL_NOTOC:
5489       RetOpc = PPCISD::CALL_NOTOC_RM;
5490       break;
5491     case PPCISD::CALL:
5492       RetOpc = PPCISD::CALL_RM;
5493       break;
5494     case PPCISD::CALL_NOP:
5495       RetOpc = PPCISD::CALL_NOP_RM;
5496       break;
5497     }
5498   }
5499   return RetOpc;
5500 }
5501
5502 static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5503                                const SDLoc &dl, const PPCSubtarget &Subtarget) {
5504   if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5505     if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5506       return SDValue(Dest, 0);
5507
5508   // Returns true if the callee is local, and false otherwise.
5509   auto isLocalCallee = [&]() {
5510     const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5511     const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5512
5513     return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5514            !isa_and_nonnull<GlobalIFunc>(GV);
5515   };
5516
5517   // The PLT is only used in 32-bit ELF PIC mode.  Attempting to use the PLT in
5518   // a static relocation model causes some versions of GNU LD (2.17.50, at
5519   // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5520   // built with secure-PLT.
5521   bool UsePlt =
5522       Subtarget.is32BitELFABI() && !isLocalCallee() &&
5523       Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
5524
5525   const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5526     const TargetMachine &TM = Subtarget.getTargetMachine();
5527     const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5528     MCSymbolXCOFF *S =
5529         cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5530
5531     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5532     return DAG.getMCSymbol(S, PtrVT);
5533   };
5534
5535   auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5536   const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5537   if (isFunctionGlobalAddress(GV)) {
5538     const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5539
5540     if (Subtarget.isAIXABI()) {
5541       assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5542       return getAIXFuncEntryPointSymbolSDNode(GV);
5543     }
5544     return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5545                                       UsePlt ? PPCII::MO_PLT : 0);
5546   }
5547
5548   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5549     const char *SymName = S->getSymbol();
5550     if (Subtarget.isAIXABI()) {
5551       // If there exists a user-declared function whose name is the same as the
5552       // ExternalSymbol's, then we pick up the user-declared version.
5553       const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5554       if (const Function *F =
5555               dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5556         return getAIXFuncEntryPointSymbolSDNode(F);
5557
5558       // On AIX, direct function calls reference the symbol for the function's
5559       // entry point, which is named by prepending a "." before the function's
5560       // C-linkage name. A Qualname is returned here because an external
5561       // function entry point is a csect with XTY_ER property.
5562       const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5563         auto &Context = DAG.getMachineFunction().getContext();
5564         MCSectionXCOFF *Sec = Context.getXCOFFSection(
5565             (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5566             XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER));
5567         return Sec->getQualNameSymbol();
5568       };
5569
5570       SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5571     }
5572     return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5573                                        UsePlt ? PPCII::MO_PLT : 0);
5574   }
5575
5576   // No transformation needed.
5577   assert(Callee.getNode() && "What no callee?");
5578   return Callee;
5579 }
5580
5581 static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
5582   assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5583          "Expected a CALLSEQ_STARTSDNode.");
5584
5585   // The last operand is the chain, except when the node has glue. If the node
5586   // has glue, then the last operand is the glue, and the chain is the second
5587   // last operand.
5588   SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5589   if (LastValue.getValueType() != MVT::Glue)
5590     return LastValue;
5591
5592   return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5593 }
5594
5595 // Creates the node that moves a functions address into the count register
5596 // to prepare for an indirect call instruction.
5597 static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5598                                 SDValue &Glue, SDValue &Chain,
5599                                 const SDLoc &dl) {
5600   SDValue MTCTROps[] = {Chain, Callee, Glue};
5601   EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5602   Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5603                       ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5604   // The glue is the second value produced.
5605   Glue = Chain.getValue(1);
5606 }
5607
5608 static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5609                                           SDValue &Glue, SDValue &Chain,
5610                                           SDValue CallSeqStart,
5611                                           const CallBase *CB, const SDLoc &dl,
5612                                           bool hasNest,
5613                                           const PPCSubtarget &Subtarget) {
5614   // Function pointers in the 64-bit SVR4 ABI do not point to the function
5615   // entry point, but to the function descriptor (the function entry point
5616   // address is part of the function descriptor though).
5617   // The function descriptor is a three doubleword structure with the
5618   // following fields: function entry point, TOC base address and
5619   // environment pointer.
5620   // Thus for a call through a function pointer, the following actions need
5621   // to be performed:
5622   //   1. Save the TOC of the caller in the TOC save area of its stack
5623   //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5624   //   2. Load the address of the function entry point from the function
5625   //      descriptor.
5626   //   3. Load the TOC of the callee from the function descriptor into r2.
5627   //   4. Load the environment pointer from the function descriptor into
5628   //      r11.
5629   //   5. Branch to the function entry point address.
5630   //   6. On return of the callee, the TOC of the caller needs to be
5631   //      restored (this is done in FinishCall()).
5632   //
5633   // The loads are scheduled at the beginning of the call sequence, and the
5634   // register copies are flagged together to ensure that no other
5635   // operations can be scheduled in between. E.g. without flagging the
5636   // copies together, a TOC access in the caller could be scheduled between
5637   // the assignment of the callee TOC and the branch to the callee, which leads
5638   // to incorrect code.
5639
5640   // Start by loading the function address from the descriptor.
5641   SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5642   auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5643                       ? (MachineMemOperand::MODereferenceable |
5644                          MachineMemOperand::MOInvariant)
5645                       : MachineMemOperand::MONone;
5646
5647   MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5648
5649   // Registers used in building the DAG.
5650   const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5651   const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5652
5653   // Offsets of descriptor members.
5654   const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5655   const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5656
5657   const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5658   const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5659
5660   // One load for the functions entry point address.
5661   SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5662                                     Alignment, MMOFlags);
5663
5664   // One for loading the TOC anchor for the module that contains the called
5665   // function.
5666   SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5667   SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5668   SDValue TOCPtr =
5669       DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5670                   MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5671
5672   // One for loading the environment pointer.
5673   SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5674   SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5675   SDValue LoadEnvPtr =
5676       DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5677                   MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5678
5679
5680   // Then copy the newly loaded TOC anchor to the TOC pointer.
5681   SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5682   Chain = TOCVal.getValue(0);
5683   Glue = TOCVal.getValue(1);
5684
5685   // If the function call has an explicit 'nest' parameter, it takes the
5686   // place of the environment pointer.
5687   assert((!hasNest || !Subtarget.isAIXABI()) &&
5688          "Nest parameter is not supported on AIX.");
5689   if (!hasNest) {
5690     SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5691     Chain = EnvVal.getValue(0);
5692     Glue = EnvVal.getValue(1);
5693   }
5694
5695   // The rest of the indirect call sequence is the same as the non-descriptor
5696   // DAG.
5697   prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5698 }
5699
5700 static void
5701 buildCallOperands(SmallVectorImpl<SDValue> &Ops,
5702                   PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5703                   SelectionDAG &DAG,
5704                   SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5705                   SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5706                   const PPCSubtarget &Subtarget) {
5707   const bool IsPPC64 = Subtarget.isPPC64();
5708   // MVT for a general purpose register.
5709   const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5710
5711   // First operand is always the chain.
5712   Ops.push_back(Chain);
5713
5714   // If it's a direct call pass the callee as the second operand.
5715   if (!CFlags.IsIndirect)
5716     Ops.push_back(Callee);
5717   else {
5718     assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5719
5720     // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5721     // on the stack (this would have been done in `LowerCall_64SVR4` or
5722     // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5723     // represents both the indirect branch and a load that restores the TOC
5724     // pointer from the linkage area. The operand for the TOC restore is an add
5725     // of the TOC save offset to the stack pointer. This must be the second
5726     // operand: after the chain input but before any other variadic arguments.
5727     // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5728     // saved or used.
5729     if (isTOCSaveRestoreRequired(Subtarget)) {
5730       const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5731
5732       SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5733       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5734       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5735       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5736       Ops.push_back(AddTOC);
5737     }
5738
5739     // Add the register used for the environment pointer.
5740     if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5741       Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5742                                     RegVT));
5743
5744
5745     // Add CTR register as callee so a bctr can be emitted later.
5746     if (CFlags.IsTailCall)
5747       Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5748   }
5749
5750   // If this is a tail call add stack pointer delta.
5751   if (CFlags.IsTailCall)
5752     Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5753
5754   // Add argument registers to the end of the list so that they are known live
5755   // into the call.
5756   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5757     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5758                                   RegsToPass[i].second.getValueType()));
5759
5760   // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5761   // no way to mark dependencies as implicit here.
5762   // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5763   if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5764        !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5765     Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5766
5767   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5768   if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5769     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5770
5771   // Add a register mask operand representing the call-preserved registers.
5772   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5773   const uint32_t *Mask =
5774       TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5775   assert(Mask && "Missing call preserved mask for calling convention");
5776   Ops.push_back(DAG.getRegisterMask(Mask));
5777
5778   // If the glue is valid, it is the last operand.
5779   if (Glue.getNode())
5780     Ops.push_back(Glue);
5781 }
5782
5783 SDValue PPCTargetLowering::FinishCall(
5784     CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5785     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5786     SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5787     unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5788     SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5789
5790   if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5791       Subtarget.isAIXABI())
5792     setUsesTOCBasePtr(DAG);
5793
5794   unsigned CallOpc =
5795       getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5796                     Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5797
5798   if (!CFlags.IsIndirect)
5799     Callee = transformCallee(Callee, DAG, dl, Subtarget);
5800   else if (Subtarget.usesFunctionDescriptors())
5801     prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5802                                   dl, CFlags.HasNest, Subtarget);
5803   else
5804     prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5805
5806   // Build the operand list for the call instruction.
5807   SmallVector<SDValue, 8> Ops;
5808   buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5809                     SPDiff, Subtarget);
5810
5811   // Emit tail call.
5812   if (CFlags.IsTailCall) {
5813     // Indirect tail call when using PC Relative calls do not have the same
5814     // constraints.
5815     assert(((Callee.getOpcode() == ISD::Register &&
5816              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5817             Callee.getOpcode() == ISD::TargetExternalSymbol ||
5818             Callee.getOpcode() == ISD::TargetGlobalAddress ||
5819             isa<ConstantSDNode>(Callee) ||
5820             (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5821            "Expecting a global address, external symbol, absolute value, "
5822            "register or an indirect tail call when PC Relative calls are "
5823            "used.");
5824     // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5825     assert(CallOpc == PPCISD::TC_RETURN &&
5826            "Unexpected call opcode for a tail call.");
5827     DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5828     SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5829     DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5830     return Ret;
5831   }
5832
5833   std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5834   Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5835   DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5836   Glue = Chain.getValue(1);
5837
5838   // When performing tail call optimization the callee pops its arguments off
5839   // the stack. Account for this here so these bytes can be pushed back on in
5840   // PPCFrameLowering::eliminateCallFramePseudoInstr.
5841   int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5842                          getTargetMachine().Options.GuaranteedTailCallOpt)
5843                             ? NumBytes
5844                             : 0;
5845
5846   Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5847   Glue = Chain.getValue(1);
5848
5849   return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5850                          DAG, InVals);
5851 }
5852
5853 bool PPCTargetLowering::supportsTailCallFor(const CallBase *CB) const {
5854   CallingConv::ID CalleeCC = CB->getCallingConv();
5855   const Function *CallerFunc = CB->getCaller();
5856   CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5857   const Function *CalleeFunc = CB->getCalledFunction();
5858   if (!CalleeFunc)
5859     return false;
5860   const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5861
5862   SmallVector<ISD::OutputArg, 2> Outs;
5863   SmallVector<ISD::InputArg, 2> Ins;
5864
5865   GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5866                 CalleeFunc->getAttributes(), Outs, *this,
5867                 CalleeFunc->getDataLayout());
5868
5869   return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5870                           CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5871                           false /*isCalleeExternalSymbol*/);
5872 }
5873
5874 bool PPCTargetLowering::isEligibleForTCO(
5875     const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5876     CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5877     const SmallVectorImpl<ISD::OutputArg> &Outs,
5878     const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5879     bool isCalleeExternalSymbol) const {
5880   if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5881     return false;
5882
5883   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5884     return IsEligibleForTailCallOptimization_64SVR4(
5885         CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5886         isCalleeExternalSymbol);
5887   else
5888     return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5889                                              isVarArg, Ins);
5890 }
5891
5892 SDValue
5893 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5894                              SmallVectorImpl<SDValue> &InVals) const {
5895   SelectionDAG &DAG                     = CLI.DAG;
5896   SDLoc &dl                             = CLI.DL;
5897   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5898   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
5899   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
5900   SDValue Chain                         = CLI.Chain;
5901   SDValue Callee                        = CLI.Callee;
5902   bool &isTailCall                      = CLI.IsTailCall;
5903   CallingConv::ID CallConv              = CLI.CallConv;
5904   bool isVarArg                         = CLI.IsVarArg;
5905   bool isPatchPoint                     = CLI.IsPatchPoint;
5906   const CallBase *CB                    = CLI.CB;
5907
5908   if (isTailCall) {
5909     MachineFunction &MF = DAG.getMachineFunction();
5910     CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5911     auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5912     const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5913     bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5914
5915     isTailCall =
5916         isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5917                          &(MF.getFunction()), IsCalleeExternalSymbol);
5918     if (isTailCall) {
5919       ++NumTailCalls;
5920       if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5921         ++NumSiblingCalls;
5922
5923       // PC Relative calls no longer guarantee that the callee is a Global
5924       // Address Node. The callee could be an indirect tail call in which
5925       // case the SDValue for the callee could be a load (to load the address
5926       // of a function pointer) or it may be a register copy (to move the
5927       // address of the callee from a function parameter into a virtual
5928       // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5929       assert((Subtarget.isUsingPCRelativeCalls() ||
5930               isa<GlobalAddressSDNode>(Callee)) &&
5931              "Callee should be an llvm::Function object.");
5932
5933       LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5934                         << "\nTCO callee: ");
5935       LLVM_DEBUG(Callee.dump());
5936     }
5937   }
5938
5939   if (!isTailCall && CB && CB->isMustTailCall())
5940     report_fatal_error("failed to perform tail call elimination on a call "
5941                        "site marked musttail");
5942
5943   // When long calls (i.e. indirect calls) are always used, calls are always
5944   // made via function pointer. If we have a function name, first translate it
5945   // into a pointer.
5946   if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5947       !isTailCall)
5948     Callee = LowerGlobalAddress(Callee, DAG);
5949
5950   CallFlags CFlags(
5951       CallConv, isTailCall, isVarArg, isPatchPoint,
5952       isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5953       // hasNest
5954       Subtarget.is64BitELFABI() &&
5955           any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5956       CLI.NoMerge);
5957
5958   if (Subtarget.isAIXABI())
5959     return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5960                          InVals, CB);
5961
5962   assert(Subtarget.isSVR4ABI());
5963   if (Subtarget.isPPC64())
5964     return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5965                             InVals, CB);
5966   return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5967                           InVals, CB);
5968 }
5969
5970 SDValue PPCTargetLowering::LowerCall_32SVR4(
5971     SDValue Chain, SDValue Callee, CallFlags CFlags,
5972     const SmallVectorImpl<ISD::OutputArg> &Outs,
5973     const SmallVectorImpl<SDValue> &OutVals,
5974     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5975     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5976     const CallBase *CB) const {
5977   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5978   // of the 32-bit SVR4 ABI stack frame layout.
5979
5980   const CallingConv::ID CallConv = CFlags.CallConv;
5981   const bool IsVarArg = CFlags.IsVarArg;
5982   const bool IsTailCall = CFlags.IsTailCall;
5983
5984   assert((CallConv == CallingConv::C ||
5985           CallConv == CallingConv::Cold ||
5986           CallConv == CallingConv::Fast) && "Unknown calling convention!");
5987
5988   const Align PtrAlign(4);
5989
5990   MachineFunction &MF = DAG.getMachineFunction();
5991
5992   // Mark this function as potentially containing a function that contains a
5993   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5994   // and restoring the callers stack pointer in this functions epilog. This is
5995   // done because by tail calling the called function might overwrite the value
5996   // in this function's (MF) stack pointer stack slot 0(SP).
5997   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5998       CallConv == CallingConv::Fast)
5999     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6000
6001   // Count how many bytes are to be pushed on the stack, including the linkage
6002   // area, parameter list area and the part of the local variable space which
6003   // contains copies of aggregates which are passed by value.
6004
6005   // Assign locations to all of the outgoing arguments.
6006   SmallVector<CCValAssign, 16> ArgLocs;
6007   PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
6008
6009   // Reserve space for the linkage area on the stack.
6010   CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
6011                        PtrAlign);
6012   if (useSoftFloat())
6013     CCInfo.PreAnalyzeCallOperands(Outs);
6014
6015   if (IsVarArg) {
6016     // Handle fixed and variable vector arguments differently.
6017     // Fixed vector arguments go into registers as long as registers are
6018     // available. Variable vector arguments always go into memory.
6019     unsigned NumArgs = Outs.size();
6020
6021     for (unsigned i = 0; i != NumArgs; ++i) {
6022       MVT ArgVT = Outs[i].VT;
6023       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
6024       bool Result;
6025
6026       if (Outs[i].IsFixed) {
6027         Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
6028                                CCInfo);
6029       } else {
6030         Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
6031                                       ArgFlags, CCInfo);
6032       }
6033
6034       if (Result) {
6035 #ifndef NDEBUG
6036         errs() << "Call operand #" << i << " has unhandled type "
6037                << ArgVT << "\n";
6038 #endif
6039         llvm_unreachable(nullptr);
6040       }
6041     }
6042   } else {
6043     // All arguments are treated the same.
6044     CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
6045   }
6046   CCInfo.clearWasPPCF128();
6047
6048   // Assign locations to all of the outgoing aggregate by value arguments.
6049   SmallVector<CCValAssign, 16> ByValArgLocs;
6050   CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
6051
6052   // Reserve stack space for the allocations in CCInfo.
6053   CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
6054
6055   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
6056
6057   // Size of the linkage area, parameter list area and the part of the local
6058   // space variable where copies of aggregates which are passed by value are
6059   // stored.
6060   unsigned NumBytes = CCByValInfo.getStackSize();
6061
6062   // Calculate by how many bytes the stack has to be adjusted in case of tail
6063   // call optimization.
6064   int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
6065
6066   // Adjust the stack pointer for the new arguments...
6067   // These operations are automatically eliminated by the prolog/epilog pass
6068   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6069   SDValue CallSeqStart = Chain;
6070
6071   // Load the return address and frame pointer so it can be moved somewhere else
6072   // later.
6073   SDValue LROp, FPOp;
6074   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6075
6076   // Set up a copy of the stack pointer for use loading and storing any
6077   // arguments that may not fit in the registers available for argument
6078   // passing.
6079   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6080
6081   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6082   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6083   SmallVector<SDValue, 8> MemOpChains;
6084
6085   bool seenFloatArg = false;
6086   // Walk the register/memloc assignments, inserting copies/loads.
6087   // i - Tracks the index into the list of registers allocated for the call
6088   // RealArgIdx - Tracks the index into the list of actual function arguments
6089   // j - Tracks the index into the list of byval arguments
6090   for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
6091        i != e;
6092        ++i, ++RealArgIdx) {
6093     CCValAssign &VA = ArgLocs[i];
6094     SDValue Arg = OutVals[RealArgIdx];
6095     ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
6096
6097     if (Flags.isByVal()) {
6098       // Argument is an aggregate which is passed by value, thus we need to
6099       // create a copy of it in the local variable space of the current stack
6100       // frame (which is the stack frame of the caller) and pass the address of
6101       // this copy to the callee.
6102       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6103       CCValAssign &ByValVA = ByValArgLocs[j++];
6104       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6105
6106       // Memory reserved in the local variable space of the callers stack frame.
6107       unsigned LocMemOffset = ByValVA.getLocMemOffset();
6108
6109       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6110       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6111                            StackPtr, PtrOff);
6112
6113       // Create a copy of the argument in the local area of the current
6114       // stack frame.
6115       SDValue MemcpyCall =
6116         CreateCopyOfByValArgument(Arg, PtrOff,
6117                                   CallSeqStart.getNode()->getOperand(0),
6118                                   Flags, DAG, dl);
6119
6120       // This must go outside the CALLSEQ_START..END.
6121       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6122                                                      SDLoc(MemcpyCall));
6123       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6124                              NewCallSeqStart.getNode());
6125       Chain = CallSeqStart = NewCallSeqStart;
6126
6127       // Pass the address of the aggregate copy on the stack either in a
6128       // physical register or in the parameter list area of the current stack
6129       // frame to the callee.
6130       Arg = PtrOff;
6131     }
6132
6133     // When useCRBits() is true, there can be i1 arguments.
6134     // It is because getRegisterType(MVT::i1) => MVT::i1,
6135     // and for other integer types getRegisterType() => MVT::i32.
6136     // Extend i1 and ensure callee will get i32.
6137     if (Arg.getValueType() == MVT::i1)
6138       Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6139                         dl, MVT::i32, Arg);
6140
6141     if (VA.isRegLoc()) {
6142       seenFloatArg |= VA.getLocVT().isFloatingPoint();
6143       // Put argument in a physical register.
6144       if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6145         bool IsLE = Subtarget.isLittleEndian();
6146         SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6147                         DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6148         RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6149         SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6150                            DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6151         RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6152                              SVal.getValue(0)));
6153       } else
6154         RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6155     } else {
6156       // Put argument in the parameter list area of the current stack frame.
6157       assert(VA.isMemLoc());
6158       unsigned LocMemOffset = VA.getLocMemOffset();
6159
6160       if (!IsTailCall) {
6161         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6162         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6163                              StackPtr, PtrOff);
6164
6165         MemOpChains.push_back(
6166             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6167       } else {
6168         // Calculate and remember argument location.
6169         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6170                                  TailCallArguments);
6171       }
6172     }
6173   }
6174
6175   if (!MemOpChains.empty())
6176     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6177
6178   // Build a sequence of copy-to-reg nodes chained together with token chain
6179   // and flag operands which copy the outgoing args into the appropriate regs.
6180   SDValue InGlue;
6181   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6182     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6183                              RegsToPass[i].second, InGlue);
6184     InGlue = Chain.getValue(1);
6185   }
6186
6187   // Set CR bit 6 to true if this is a vararg call with floating args passed in
6188   // registers.
6189   if (IsVarArg) {
6190     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6191     SDValue Ops[] = { Chain, InGlue };
6192
6193     Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6194                         VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6195
6196     InGlue = Chain.getValue(1);
6197   }
6198
6199   if (IsTailCall)
6200     PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6201                     TailCallArguments);
6202
6203   return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6204                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
6205 }
6206
6207 // Copy an argument into memory, being careful to do this outside the
6208 // call sequence for the call to which the argument belongs.
6209 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6210     SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6211     SelectionDAG &DAG, const SDLoc &dl) const {
6212   SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6213                         CallSeqStart.getNode()->getOperand(0),
6214                         Flags, DAG, dl);
6215   // The MEMCPY must go outside the CALLSEQ_START..END.
6216   int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6217   SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6218                                                  SDLoc(MemcpyCall));
6219   DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6220                          NewCallSeqStart.getNode());
6221   return NewCallSeqStart;
6222 }
6223
6224 SDValue PPCTargetLowering::LowerCall_64SVR4(
6225     SDValue Chain, SDValue Callee, CallFlags CFlags,
6226     const SmallVectorImpl<ISD::OutputArg> &Outs,
6227     const SmallVectorImpl<SDValue> &OutVals,
6228     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6229     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6230     const CallBase *CB) const {
6231   bool isELFv2ABI = Subtarget.isELFv2ABI();
6232   bool isLittleEndian = Subtarget.isLittleEndian();
6233   unsigned NumOps = Outs.size();
6234   bool IsSibCall = false;
6235   bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6236
6237   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6238   unsigned PtrByteSize = 8;
6239
6240   MachineFunction &MF = DAG.getMachineFunction();
6241
6242   if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6243     IsSibCall = true;
6244
6245   // Mark this function as potentially containing a function that contains a
6246   // tail call. As a consequence the frame pointer will be used for dynamicalloc
6247   // and restoring the callers stack pointer in this functions epilog. This is
6248   // done because by tail calling the called function might overwrite the value
6249   // in this function's (MF) stack pointer stack slot 0(SP).
6250   if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6251     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6252
6253   assert(!(IsFastCall && CFlags.IsVarArg) &&
6254          "fastcc not supported on varargs functions");
6255
6256   // Count how many bytes are to be pushed on the stack, including the linkage
6257   // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
6258   // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6259   // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6260   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6261   unsigned NumBytes = LinkageSize;
6262   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6263
6264   static const MCPhysReg GPR[] = {
6265     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6266     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6267   };
6268   static const MCPhysReg VR[] = {
6269     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6270     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6271   };
6272
6273   const unsigned NumGPRs = std::size(GPR);
6274   const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6275   const unsigned NumVRs = std::size(VR);
6276
6277   // On ELFv2, we can avoid allocating the parameter area if all the arguments
6278   // can be passed to the callee in registers.
6279   // For the fast calling convention, there is another check below.
6280   // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6281   bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6282   if (!HasParameterArea) {
6283     unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6284     unsigned AvailableFPRs = NumFPRs;
6285     unsigned AvailableVRs = NumVRs;
6286     unsigned NumBytesTmp = NumBytes;
6287     for (unsigned i = 0; i != NumOps; ++i) {
6288       if (Outs[i].Flags.isNest()) continue;
6289       if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6290                                  PtrByteSize, LinkageSize, ParamAreaSize,
6291                                  NumBytesTmp, AvailableFPRs, AvailableVRs))
6292         HasParameterArea = true;
6293     }
6294   }
6295
6296   // When using the fast calling convention, we don't provide backing for
6297   // arguments that will be in registers.
6298   unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6299
6300   // Avoid allocating parameter area for fastcc functions if all the arguments
6301   // can be passed in the registers.
6302   if (IsFastCall)
6303     HasParameterArea = false;
6304
6305   // Add up all the space actually used.
6306   for (unsigned i = 0; i != NumOps; ++i) {
6307     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6308     EVT ArgVT = Outs[i].VT;
6309     EVT OrigVT = Outs[i].ArgVT;
6310
6311     if (Flags.isNest())
6312       continue;
6313
6314     if (IsFastCall) {
6315       if (Flags.isByVal()) {
6316         NumGPRsUsed += (Flags.getByValSize()+7)/8;
6317         if (NumGPRsUsed > NumGPRs)
6318           HasParameterArea = true;
6319       } else {
6320         switch (ArgVT.getSimpleVT().SimpleTy) {
6321         default: llvm_unreachable("Unexpected ValueType for argument!");
6322         case MVT::i1:
6323         case MVT::i32:
6324         case MVT::i64:
6325           if (++NumGPRsUsed <= NumGPRs)
6326             continue;
6327           break;
6328         case MVT::v4i32:
6329         case MVT::v8i16:
6330         case MVT::v16i8:
6331         case MVT::v2f64:
6332         case MVT::v2i64:
6333         case MVT::v1i128:
6334         case MVT::f128:
6335           if (++NumVRsUsed <= NumVRs)
6336             continue;
6337           break;
6338         case MVT::v4f32:
6339           if (++NumVRsUsed <= NumVRs)
6340             continue;
6341           break;
6342         case MVT::f32:
6343         case MVT::f64:
6344           if (++NumFPRsUsed <= NumFPRs)
6345             continue;
6346           break;
6347         }
6348         HasParameterArea = true;
6349       }
6350     }
6351
6352     /* Respect alignment of argument on the stack.  */
6353     auto Alignement =
6354         CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6355     NumBytes = alignTo(NumBytes, Alignement);
6356
6357     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6358     if (Flags.isInConsecutiveRegsLast())
6359       NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6360   }
6361
6362   unsigned NumBytesActuallyUsed = NumBytes;
6363
6364   // In the old ELFv1 ABI,
6365   // the prolog code of the callee may store up to 8 GPR argument registers to
6366   // the stack, allowing va_start to index over them in memory if its varargs.
6367   // Because we cannot tell if this is needed on the caller side, we have to
6368   // conservatively assume that it is needed.  As such, make sure we have at
6369   // least enough stack space for the caller to store the 8 GPRs.
6370   // In the ELFv2 ABI, we allocate the parameter area iff a callee
6371   // really requires memory operands, e.g. a vararg function.
6372   if (HasParameterArea)
6373     NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6374   else
6375     NumBytes = LinkageSize;
6376
6377   // Tail call needs the stack to be aligned.
6378   if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6379     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6380
6381   int SPDiff = 0;
6382
6383   // Calculate by how many bytes the stack has to be adjusted in case of tail
6384   // call optimization.
6385   if (!IsSibCall)
6386     SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6387
6388   // To protect arguments on the stack from being clobbered in a tail call,
6389   // force all the loads to happen before doing any other lowering.
6390   if (CFlags.IsTailCall)
6391     Chain = DAG.getStackArgumentTokenFactor(Chain);
6392
6393   // Adjust the stack pointer for the new arguments...
6394   // These operations are automatically eliminated by the prolog/epilog pass
6395   if (!IsSibCall)
6396     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6397   SDValue CallSeqStart = Chain;
6398
6399   // Load the return address and frame pointer so it can be move somewhere else
6400   // later.
6401   SDValue LROp, FPOp;
6402   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6403
6404   // Set up a copy of the stack pointer for use loading and storing any
6405   // arguments that may not fit in the registers available for argument
6406   // passing.
6407   SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6408
6409   // Figure out which arguments are going to go in registers, and which in
6410   // memory.  Also, if this is a vararg function, floating point operations
6411   // must be stored to our stack, and loaded into integer regs as well, if
6412   // any integer regs are available for argument passing.
6413   unsigned ArgOffset = LinkageSize;
6414
6415   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6416   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6417
6418   SmallVector<SDValue, 8> MemOpChains;
6419   for (unsigned i = 0; i != NumOps; ++i) {
6420     SDValue Arg = OutVals[i];
6421     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6422     EVT ArgVT = Outs[i].VT;
6423     EVT OrigVT = Outs[i].ArgVT;
6424
6425     // PtrOff will be used to store the current argument to the stack if a
6426     // register cannot be found for it.
6427     SDValue PtrOff;
6428
6429     // We re-align the argument offset for each argument, except when using the
6430     // fast calling convention, when we need to make sure we do that only when
6431     // we'll actually use a stack slot.
6432     auto ComputePtrOff = [&]() {
6433       /* Respect alignment of argument on the stack.  */
6434       auto Alignment =
6435           CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6436       ArgOffset = alignTo(ArgOffset, Alignment);
6437
6438       PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6439
6440       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6441     };
6442
6443     if (!IsFastCall) {
6444       ComputePtrOff();
6445
6446       /* Compute GPR index associated with argument offset.  */
6447       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6448       GPR_idx = std::min(GPR_idx, NumGPRs);
6449     }
6450
6451     // Promote integers to 64-bit values.
6452     if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6453       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6454       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6455       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6456     }
6457
6458     // FIXME memcpy is used way more than necessary.  Correctness first.
6459     // Note: "by value" is code for passing a structure by value, not
6460     // basic types.
6461     if (Flags.isByVal()) {
6462       // Note: Size includes alignment padding, so
6463       //   struct x { short a; char b; }
6464       // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
6465       // These are the proper values we need for right-justifying the
6466       // aggregate in a parameter register.
6467       unsigned Size = Flags.getByValSize();
6468
6469       // An empty aggregate parameter takes up no storage and no
6470       // registers.
6471       if (Size == 0)
6472         continue;
6473
6474       if (IsFastCall)
6475         ComputePtrOff();
6476
6477       // All aggregates smaller than 8 bytes must be passed right-justified.
6478       if (Size==1 || Size==2 || Size==4) {
6479         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6480         if (GPR_idx != NumGPRs) {
6481           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6482                                         MachinePointerInfo(), VT);
6483           MemOpChains.push_back(Load.getValue(1));
6484           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6485
6486           ArgOffset += PtrByteSize;
6487           continue;
6488         }
6489       }
6490
6491       if (GPR_idx == NumGPRs && Size < 8) {
6492         SDValue AddPtr = PtrOff;
6493         if (!isLittleEndian) {
6494           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6495                                           PtrOff.getValueType());
6496           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6497         }
6498         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6499                                                           CallSeqStart,
6500                                                           Flags, DAG, dl);
6501         ArgOffset += PtrByteSize;
6502         continue;
6503       }
6504       // Copy the object to parameter save area if it can not be entirely passed
6505       // by registers.
6506       // FIXME: we only need to copy the parts which need to be passed in
6507       // parameter save area. For the parts passed by registers, we don't need
6508       // to copy them to the stack although we need to allocate space for them
6509       // in parameter save area.
6510       if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6511         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6512                                                           CallSeqStart,
6513                                                           Flags, DAG, dl);
6514
6515       // When a register is available, pass a small aggregate right-justified.
6516       if (Size < 8 && GPR_idx != NumGPRs) {
6517         // The easiest way to get this right-justified in a register
6518         // is to copy the structure into the rightmost portion of a
6519         // local variable slot, then load the whole slot into the
6520         // register.
6521         // FIXME: The memcpy seems to produce pretty awful code for
6522         // small aggregates, particularly for packed ones.
6523         // FIXME: It would be preferable to use the slot in the
6524         // parameter save area instead of a new local variable.
6525         SDValue AddPtr = PtrOff;
6526         if (!isLittleEndian) {
6527           SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6528           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6529         }
6530         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6531                                                           CallSeqStart,
6532                                                           Flags, DAG, dl);
6533
6534         // Load the slot into the register.
6535         SDValue Load =
6536             DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6537         MemOpChains.push_back(Load.getValue(1));
6538         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6539
6540         // Done with this argument.
6541         ArgOffset += PtrByteSize;
6542         continue;
6543       }
6544
6545       // For aggregates larger than PtrByteSize, copy the pieces of the
6546       // object that fit into registers from the parameter save area.
6547       for (unsigned j=0; j<Size; j+=PtrByteSize) {
6548         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6549         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6550         if (GPR_idx != NumGPRs) {
6551           unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6552           EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6553           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6554                                         MachinePointerInfo(), ObjType);
6555
6556           MemOpChains.push_back(Load.getValue(1));
6557           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6558           ArgOffset += PtrByteSize;
6559         } else {
6560           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6561           break;
6562         }
6563       }
6564       continue;
6565     }
6566
6567     switch (Arg.getSimpleValueType().SimpleTy) {
6568     default: llvm_unreachable("Unexpected ValueType for argument!");
6569     case MVT::i1:
6570     case MVT::i32:
6571     case MVT::i64:
6572       if (Flags.isNest()) {
6573         // The 'nest' parameter, if any, is passed in R11.
6574         RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6575         break;
6576       }
6577
6578       // These can be scalar arguments or elements of an integer array type
6579       // passed directly.  Clang may use those instead of "byval" aggregate
6580       // types to avoid forcing arguments to memory unnecessarily.
6581       if (GPR_idx != NumGPRs) {
6582         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6583       } else {
6584         if (IsFastCall)
6585           ComputePtrOff();
6586
6587         assert(HasParameterArea &&
6588                "Parameter area must exist to pass an argument in memory.");
6589         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6590                          true, CFlags.IsTailCall, false, MemOpChains,
6591                          TailCallArguments, dl);
6592         if (IsFastCall)
6593           ArgOffset += PtrByteSize;
6594       }
6595       if (!IsFastCall)
6596         ArgOffset += PtrByteSize;
6597       break;
6598     case MVT::f32:
6599     case MVT::f64: {
6600       // These can be scalar arguments or elements of a float array type
6601       // passed directly.  The latter are used to implement ELFv2 homogenous
6602       // float aggregates.
6603
6604       // Named arguments go into FPRs first, and once they overflow, the
6605       // remaining arguments go into GPRs and then the parameter save area.
6606       // Unnamed arguments for vararg functions always go to GPRs and
6607       // then the parameter save area.  For now, put all arguments to vararg
6608       // routines always in both locations (FPR *and* GPR or stack slot).
6609       bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6610       bool NeededLoad = false;
6611
6612       // First load the argument into the next available FPR.
6613       if (FPR_idx != NumFPRs)
6614         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6615
6616       // Next, load the argument into GPR or stack slot if needed.
6617       if (!NeedGPROrStack)
6618         ;
6619       else if (GPR_idx != NumGPRs && !IsFastCall) {
6620         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6621         // once we support fp <-> gpr moves.
6622
6623         // In the non-vararg case, this can only ever happen in the
6624         // presence of f32 array types, since otherwise we never run
6625         // out of FPRs before running out of GPRs.
6626         SDValue ArgVal;
6627
6628         // Double values are always passed in a single GPR.
6629         if (Arg.getValueType() != MVT::f32) {
6630           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6631
6632         // Non-array float values are extended and passed in a GPR.
6633         } else if (!Flags.isInConsecutiveRegs()) {
6634           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6635           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6636
6637         // If we have an array of floats, we collect every odd element
6638         // together with its predecessor into one GPR.
6639         } else if (ArgOffset % PtrByteSize != 0) {
6640           SDValue Lo, Hi;
6641           Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6642           Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6643           if (!isLittleEndian)
6644             std::swap(Lo, Hi);
6645           ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6646
6647         // The final element, if even, goes into the first half of a GPR.
6648         } else if (Flags.isInConsecutiveRegsLast()) {
6649           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6650           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6651           if (!isLittleEndian)
6652             ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6653                                  DAG.getConstant(32, dl, MVT::i32));
6654
6655         // Non-final even elements are skipped; they will be handled
6656         // together the with subsequent argument on the next go-around.
6657         } else
6658           ArgVal = SDValue();
6659
6660         if (ArgVal.getNode())
6661           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6662       } else {
6663         if (IsFastCall)
6664           ComputePtrOff();
6665
6666         // Single-precision floating-point values are mapped to the
6667         // second (rightmost) word of the stack doubleword.
6668         if (Arg.getValueType() == MVT::f32 &&
6669             !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6670           SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6671           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6672         }
6673
6674         assert(HasParameterArea &&
6675                "Parameter area must exist to pass an argument in memory.");
6676         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6677                          true, CFlags.IsTailCall, false, MemOpChains,
6678                          TailCallArguments, dl);
6679
6680         NeededLoad = true;
6681       }
6682       // When passing an array of floats, the array occupies consecutive
6683       // space in the argument area; only round up to the next doubleword
6684       // at the end of the array.  Otherwise, each float takes 8 bytes.
6685       if (!IsFastCall || NeededLoad) {
6686         ArgOffset += (Arg.getValueType() == MVT::f32 &&
6687                       Flags.isInConsecutiveRegs()) ? 4 : 8;
6688         if (Flags.isInConsecutiveRegsLast())
6689           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6690       }
6691       break;
6692     }
6693     case MVT::v4f32:
6694     case MVT::v4i32:
6695     case MVT::v8i16:
6696     case MVT::v16i8:
6697     case MVT::v2f64:
6698     case MVT::v2i64:
6699     case MVT::v1i128:
6700     case MVT::f128:
6701       // These can be scalar arguments or elements of a vector array type
6702       // passed directly.  The latter are used to implement ELFv2 homogenous
6703       // vector aggregates.
6704
6705       // For a varargs call, named arguments go into VRs or on the stack as
6706       // usual; unnamed arguments always go to the stack or the corresponding
6707       // GPRs when within range.  For now, we always put the value in both
6708       // locations (or even all three).
6709       if (CFlags.IsVarArg) {
6710         assert(HasParameterArea &&
6711                "Parameter area must exist if we have a varargs call.");
6712         // We could elide this store in the case where the object fits
6713         // entirely in R registers.  Maybe later.
6714         SDValue Store =
6715             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6716         MemOpChains.push_back(Store);
6717         if (VR_idx != NumVRs) {
6718           SDValue Load =
6719               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6720           MemOpChains.push_back(Load.getValue(1));
6721           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6722         }
6723         ArgOffset += 16;
6724         for (unsigned i=0; i<16; i+=PtrByteSize) {
6725           if (GPR_idx == NumGPRs)
6726             break;
6727           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6728                                    DAG.getConstant(i, dl, PtrVT));
6729           SDValue Load =
6730               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6731           MemOpChains.push_back(Load.getValue(1));
6732           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6733         }
6734         break;
6735       }
6736
6737       // Non-varargs Altivec params go into VRs or on the stack.
6738       if (VR_idx != NumVRs) {
6739         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6740       } else {
6741         if (IsFastCall)
6742           ComputePtrOff();
6743
6744         assert(HasParameterArea &&
6745                "Parameter area must exist to pass an argument in memory.");
6746         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6747                          true, CFlags.IsTailCall, true, MemOpChains,
6748                          TailCallArguments, dl);
6749         if (IsFastCall)
6750           ArgOffset += 16;
6751       }
6752
6753       if (!IsFastCall)
6754         ArgOffset += 16;
6755       break;
6756     }
6757   }
6758
6759   assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6760          "mismatch in size of parameter area");
6761   (void)NumBytesActuallyUsed;
6762
6763   if (!MemOpChains.empty())
6764     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6765
6766   // Check if this is an indirect call (MTCTR/BCTRL).
6767   // See prepareDescriptorIndirectCall and buildCallOperands for more
6768   // information about calls through function pointers in the 64-bit SVR4 ABI.
6769   if (CFlags.IsIndirect) {
6770     // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6771     // caller in the TOC save area.
6772     if (isTOCSaveRestoreRequired(Subtarget)) {
6773       assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6774       // Load r2 into a virtual register and store it to the TOC save area.
6775       setUsesTOCBasePtr(DAG);
6776       SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6777       // TOC save area offset.
6778       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6779       SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6780       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6781       Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6782                            MachinePointerInfo::getStack(
6783                                DAG.getMachineFunction(), TOCSaveOffset));
6784     }
6785     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6786     // This does not mean the MTCTR instruction must use R12; it's easier
6787     // to model this as an extra parameter, so do that.
6788     if (isELFv2ABI && !CFlags.IsPatchPoint)
6789       RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6790   }
6791
6792   // Build a sequence of copy-to-reg nodes chained together with token chain
6793   // and flag operands which copy the outgoing args into the appropriate regs.
6794   SDValue InGlue;
6795   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6796     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6797                              RegsToPass[i].second, InGlue);
6798     InGlue = Chain.getValue(1);
6799   }
6800
6801   if (CFlags.IsTailCall && !IsSibCall)
6802     PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6803                     TailCallArguments);
6804
6805   return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6806                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
6807 }
6808
6809 // Returns true when the shadow of a general purpose argument register
6810 // in the parameter save area is aligned to at least 'RequiredAlign'.
6811 static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6812   assert(RequiredAlign.value() <= 16 &&
6813          "Required alignment greater than stack alignment.");
6814   switch (Reg) {
6815   default:
6816     report_fatal_error("called on invalid register.");
6817   case PPC::R5:
6818   case PPC::R9:
6819   case PPC::X3:
6820   case PPC::X5:
6821   case PPC::X7:
6822   case PPC::X9:
6823     // These registers are 16 byte aligned which is the most strict aligment
6824     // we can support.
6825     return true;
6826   case PPC::R3:
6827   case PPC::R7:
6828   case PPC::X4:
6829   case PPC::X6:
6830   case PPC::X8:
6831   case PPC::X10:
6832     // The shadow of these registers in the PSA is 8 byte aligned.
6833     return RequiredAlign <= 8;
6834   case PPC::R4:
6835   case PPC::R6:
6836   case PPC::R8:
6837   case PPC::R10:
6838     return RequiredAlign <= 4;
6839   }
6840 }
6841
6842 static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6843                    CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6844                    CCState &S) {
6845   AIXCCState &State = static_cast<AIXCCState &>(S);
6846   const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6847       State.getMachineFunction().getSubtarget());
6848   const bool IsPPC64 = Subtarget.isPPC64();
6849   const unsigned PtrSize = IsPPC64 ? 8 : 4;
6850   const Align PtrAlign(PtrSize);
6851   const Align StackAlign(16);
6852   const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6853
6854   if (ValVT == MVT::f128)
6855     report_fatal_error("f128 is unimplemented on AIX.");
6856
6857   if (ArgFlags.isNest())
6858     report_fatal_error("Nest arguments are unimplemented.");
6859
6860   static const MCPhysReg GPR_32[] = {// 32-bit registers.
6861                                      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6862                                      PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6863   static const MCPhysReg GPR_64[] = {// 64-bit registers.
6864                                      PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6865                                      PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6866
6867   static const MCPhysReg VR[] = {// Vector registers.
6868                                  PPC::V2,  PPC::V3,  PPC::V4,  PPC::V5,
6869                                  PPC::V6,  PPC::V7,  PPC::V8,  PPC::V9,
6870                                  PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6871
6872   const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6873
6874   if (ArgFlags.isByVal()) {
6875     const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6876     if (ByValAlign > StackAlign)
6877       report_fatal_error("Pass-by-value arguments with alignment greater than "
6878                          "16 are not supported.");
6879
6880     const unsigned ByValSize = ArgFlags.getByValSize();
6881     const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6882
6883     // An empty aggregate parameter takes up no storage and no registers,
6884     // but needs a MemLoc for a stack slot for the formal arguments side.
6885     if (ByValSize == 0) {
6886       State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
6887                                        State.getStackSize(), RegVT, LocInfo));
6888       return false;
6889     }
6890
6891     // Shadow allocate any registers that are not properly aligned.
6892     unsigned NextReg = State.getFirstUnallocated(GPRs);
6893     while (NextReg != GPRs.size() &&
6894            !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6895       // Shadow allocate next registers since its aligment is not strict enough.
6896       unsigned Reg = State.AllocateReg(GPRs);
6897       // Allocate the stack space shadowed by said register.
6898       State.AllocateStack(PtrSize, PtrAlign);
6899       assert(Reg && "Alocating register unexpectedly failed.");
6900       (void)Reg;
6901       NextReg = State.getFirstUnallocated(GPRs);
6902     }
6903
6904     const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6905     unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6906     for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6907       if (unsigned Reg = State.AllocateReg(GPRs))
6908         State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6909       else {
6910         State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
6911                                          Offset, MVT::INVALID_SIMPLE_VALUE_TYPE,
6912                                          LocInfo));
6913         break;
6914       }
6915     }
6916     return false;
6917   }
6918
6919   // Arguments always reserve parameter save area.
6920   switch (ValVT.SimpleTy) {
6921   default:
6922     report_fatal_error("Unhandled value type for argument.");
6923   case MVT::i64:
6924     // i64 arguments should have been split to i32 for PPC32.
6925     assert(IsPPC64 && "PPC32 should have split i64 values.");
6926     [[fallthrough]];
6927   case MVT::i1:
6928   case MVT::i32: {
6929     const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6930     // AIX integer arguments are always passed in register width.
6931     if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6932       LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6933                                   : CCValAssign::LocInfo::ZExt;
6934     if (unsigned Reg = State.AllocateReg(GPRs))
6935       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6936     else
6937       State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6938
6939     return false;
6940   }
6941   case MVT::f32:
6942   case MVT::f64: {
6943     // Parameter save area (PSA) is reserved even if the float passes in fpr.
6944     const unsigned StoreSize = LocVT.getStoreSize();
6945     // Floats are always 4-byte aligned in the PSA on AIX.
6946     // This includes f64 in 64-bit mode for ABI compatibility.
6947     const unsigned Offset =
6948         State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6949     unsigned FReg = State.AllocateReg(FPR);
6950     if (FReg)
6951       State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6952
6953     // Reserve and initialize GPRs or initialize the PSA as required.
6954     for (unsigned I = 0; I < StoreSize; I += PtrSize) {
6955       if (unsigned Reg = State.AllocateReg(GPRs)) {
6956         assert(FReg && "An FPR should be available when a GPR is reserved.");
6957         if (State.isVarArg()) {
6958           // Successfully reserved GPRs are only initialized for vararg calls.
6959           // Custom handling is required for:
6960           //   f64 in PPC32 needs to be split into 2 GPRs.
6961           //   f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6962           State.addLoc(
6963               CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6964         }
6965       } else {
6966         // If there are insufficient GPRs, the PSA needs to be initialized.
6967         // Initialization occurs even if an FPR was initialized for
6968         // compatibility with the AIX XL compiler. The full memory for the
6969         // argument will be initialized even if a prior word is saved in GPR.
6970         // A custom memLoc is used when the argument also passes in FPR so
6971         // that the callee handling can skip over it easily.
6972         State.addLoc(
6973             FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6974                                              LocInfo)
6975                  : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6976         break;
6977       }
6978     }
6979
6980     return false;
6981   }
6982   case MVT::v4f32:
6983   case MVT::v4i32:
6984   case MVT::v8i16:
6985   case MVT::v16i8:
6986   case MVT::v2i64:
6987   case MVT::v2f64:
6988   case MVT::v1i128: {
6989     const unsigned VecSize = 16;
6990     const Align VecAlign(VecSize);
6991
6992     if (!State.isVarArg()) {
6993       // If there are vector registers remaining we don't consume any stack
6994       // space.
6995       if (unsigned VReg = State.AllocateReg(VR)) {
6996         State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6997         return false;
6998       }
6999       // Vectors passed on the stack do not shadow GPRs or FPRs even though they
7000       // might be allocated in the portion of the PSA that is shadowed by the
7001       // GPRs.
7002       const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7003       State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7004       return false;
7005     }
7006
7007     unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
7008     // Burn any underaligned registers and their shadowed stack space until
7009     // we reach the required alignment.
7010     while (NextRegIndex != GPRs.size() &&
7011            !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
7012       // Shadow allocate register and its stack shadow.
7013       unsigned Reg = State.AllocateReg(GPRs);
7014       State.AllocateStack(PtrSize, PtrAlign);
7015       assert(Reg && "Allocating register unexpectedly failed.");
7016       (void)Reg;
7017       NextRegIndex = State.getFirstUnallocated(GPRs);
7018     }
7019
7020     // Vectors that are passed as fixed arguments are handled differently.
7021     // They are passed in VRs if any are available (unlike arguments passed
7022     // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
7023     // functions)
7024     if (State.isFixed(ValNo)) {
7025       if (unsigned VReg = State.AllocateReg(VR)) {
7026         State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7027         // Shadow allocate GPRs and stack space even though we pass in a VR.
7028         for (unsigned I = 0; I != VecSize; I += PtrSize)
7029           State.AllocateReg(GPRs);
7030         State.AllocateStack(VecSize, VecAlign);
7031         return false;
7032       }
7033       // No vector registers remain so pass on the stack.
7034       const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7035       State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7036       return false;
7037     }
7038
7039     // If all GPRS are consumed then we pass the argument fully on the stack.
7040     if (NextRegIndex == GPRs.size()) {
7041       const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7042       State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7043       return false;
7044     }
7045
7046     // Corner case for 32-bit codegen. We have 2 registers to pass the first
7047     // half of the argument, and then need to pass the remaining half on the
7048     // stack.
7049     if (GPRs[NextRegIndex] == PPC::R9) {
7050       const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7051       State.addLoc(
7052           CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7053
7054       const unsigned FirstReg = State.AllocateReg(PPC::R9);
7055       const unsigned SecondReg = State.AllocateReg(PPC::R10);
7056       assert(FirstReg && SecondReg &&
7057              "Allocating R9 or R10 unexpectedly failed.");
7058       State.addLoc(
7059           CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
7060       State.addLoc(
7061           CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
7062       return false;
7063     }
7064
7065     // We have enough GPRs to fully pass the vector argument, and we have
7066     // already consumed any underaligned registers. Start with the custom
7067     // MemLoc and then the custom RegLocs.
7068     const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7069     State.addLoc(
7070         CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7071     for (unsigned I = 0; I != VecSize; I += PtrSize) {
7072       const unsigned Reg = State.AllocateReg(GPRs);
7073       assert(Reg && "Failed to allocated register for vararg vector argument");
7074       State.addLoc(
7075           CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7076     }
7077     return false;
7078   }
7079   }
7080   return true;
7081 }
7082
7083 // So far, this function is only used by LowerFormalArguments_AIX()
7084 static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
7085                                                     bool IsPPC64,
7086                                                     bool HasP8Vector,
7087                                                     bool HasVSX) {
7088   assert((IsPPC64 || SVT != MVT::i64) &&
7089          "i64 should have been split for 32-bit codegen.");
7090
7091   switch (SVT) {
7092   default:
7093     report_fatal_error("Unexpected value type for formal argument");
7094   case MVT::i1:
7095   case MVT::i32:
7096   case MVT::i64:
7097     return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7098   case MVT::f32:
7099     return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7100   case MVT::f64:
7101     return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7102   case MVT::v4f32:
7103   case MVT::v4i32:
7104   case MVT::v8i16:
7105   case MVT::v16i8:
7106   case MVT::v2i64:
7107   case MVT::v2f64:
7108   case MVT::v1i128:
7109     return &PPC::VRRCRegClass;
7110   }
7111 }
7112
7113 static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
7114                                         SelectionDAG &DAG, SDValue ArgValue,
7115                                         MVT LocVT, const SDLoc &dl) {
7116   assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7117   assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7118
7119   if (Flags.isSExt())
7120     ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7121                            DAG.getValueType(ValVT));
7122   else if (Flags.isZExt())
7123     ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7124                            DAG.getValueType(ValVT));
7125
7126   return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7127 }
7128
7129 static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7130   const unsigned LASize = FL->getLinkageSize();
7131
7132   if (PPC::GPRCRegClass.contains(Reg)) {
7133     assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7134            "Reg must be a valid argument register!");
7135     return LASize + 4 * (Reg - PPC::R3);
7136   }
7137
7138   if (PPC::G8RCRegClass.contains(Reg)) {
7139     assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7140            "Reg must be a valid argument register!");
7141     return LASize + 8 * (Reg - PPC::X3);
7142   }
7143
7144   llvm_unreachable("Only general purpose registers expected.");
7145 }
7146
7147 //   AIX ABI Stack Frame Layout:
7148 //
7149 //   Low Memory +--------------------------------------------+
7150 //   SP   +---> | Back chain                                 | ---+
7151 //        |     +--------------------------------------------+    |
7152 //        |     | Saved Condition Register                   |    |
7153 //        |     +--------------------------------------------+    |
7154 //        |     | Saved Linkage Register                     |    |
7155 //        |     +--------------------------------------------+    | Linkage Area
7156 //        |     | Reserved for compilers                     |    |
7157 //        |     +--------------------------------------------+    |
7158 //        |     | Reserved for binders                       |    |
7159 //        |     +--------------------------------------------+    |
7160 //        |     | Saved TOC pointer                          | ---+
7161 //        |     +--------------------------------------------+
7162 //        |     | Parameter save area                        |
7163 //        |     +--------------------------------------------+
7164 //        |     | Alloca space                               |
7165 //        |     +--------------------------------------------+
7166 //        |     | Local variable space                       |
7167 //        |     +--------------------------------------------+
7168 //        |     | Float/int conversion temporary             |
7169 //        |     +--------------------------------------------+
7170 //        |     | Save area for AltiVec registers            |
7171 //        |     +--------------------------------------------+
7172 //        |     | AltiVec alignment padding                  |
7173 //        |     +--------------------------------------------+
7174 //        |     | Save area for VRSAVE register              |
7175 //        |     +--------------------------------------------+
7176 //        |     | Save area for General Purpose registers    |
7177 //        |     +--------------------------------------------+
7178 //        |     | Save area for Floating Point registers     |
7179 //        |     +--------------------------------------------+
7180 //        +---- | Back chain                                 |
7181 // High Memory  +--------------------------------------------+
7182 //
7183 //  Specifications:
7184 //  AIX 7.2 Assembler Language Reference
7185 //  Subroutine linkage convention
7186
7187 SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7188     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7189     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7190     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7191
7192   assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7193           CallConv == CallingConv::Fast) &&
7194          "Unexpected calling convention!");
7195
7196   if (getTargetMachine().Options.GuaranteedTailCallOpt)
7197     report_fatal_error("Tail call support is unimplemented on AIX.");
7198
7199   if (useSoftFloat())
7200     report_fatal_error("Soft float support is unimplemented on AIX.");
7201
7202   const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7203
7204   const bool IsPPC64 = Subtarget.isPPC64();
7205   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7206
7207   // Assign locations to all of the incoming arguments.
7208   SmallVector<CCValAssign, 16> ArgLocs;
7209   MachineFunction &MF = DAG.getMachineFunction();
7210   MachineFrameInfo &MFI = MF.getFrameInfo();
7211   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7212   AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7213
7214   const EVT PtrVT = getPointerTy(MF.getDataLayout());
7215   // Reserve space for the linkage area on the stack.
7216   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7217   CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7218   CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7219
7220   SmallVector<SDValue, 8> MemOps;
7221
7222   for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7223     CCValAssign &VA = ArgLocs[I++];
7224     MVT LocVT = VA.getLocVT();
7225     MVT ValVT = VA.getValVT();
7226     ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7227     // For compatibility with the AIX XL compiler, the float args in the
7228     // parameter save area are initialized even if the argument is available
7229     // in register.  The caller is required to initialize both the register
7230     // and memory, however, the callee can choose to expect it in either.
7231     // The memloc is dismissed here because the argument is retrieved from
7232     // the register.
7233     if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7234       continue;
7235
7236     auto HandleMemLoc = [&]() {
7237       const unsigned LocSize = LocVT.getStoreSize();
7238       const unsigned ValSize = ValVT.getStoreSize();
7239       assert((ValSize <= LocSize) &&
7240              "Object size is larger than size of MemLoc");
7241       int CurArgOffset = VA.getLocMemOffset();
7242       // Objects are right-justified because AIX is big-endian.
7243       if (LocSize > ValSize)
7244         CurArgOffset += LocSize - ValSize;
7245       // Potential tail calls could cause overwriting of argument stack slots.
7246       const bool IsImmutable =
7247           !(getTargetMachine().Options.GuaranteedTailCallOpt &&
7248             (CallConv == CallingConv::Fast));
7249       int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7250       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7251       SDValue ArgValue =
7252           DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7253       InVals.push_back(ArgValue);
7254     };
7255
7256     // Vector arguments to VaArg functions are passed both on the stack, and
7257     // in any available GPRs. Load the value from the stack and add the GPRs
7258     // as live ins.
7259     if (VA.isMemLoc() && VA.needsCustom()) {
7260       assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7261       assert(isVarArg && "Only use custom memloc for vararg.");
7262       // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7263       // matching custom RegLocs.
7264       const unsigned OriginalValNo = VA.getValNo();
7265       (void)OriginalValNo;
7266
7267       auto HandleCustomVecRegLoc = [&]() {
7268         assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7269                "Missing custom RegLoc.");
7270         VA = ArgLocs[I++];
7271         assert(VA.getValVT().isVector() &&
7272                "Unexpected Val type for custom RegLoc.");
7273         assert(VA.getValNo() == OriginalValNo &&
7274                "ValNo mismatch between custom MemLoc and RegLoc.");
7275         MVT::SimpleValueType SVT = VA.getLocVT().SimpleTy;
7276         MF.addLiveIn(VA.getLocReg(),
7277                      getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7278                                        Subtarget.hasVSX()));
7279       };
7280
7281       HandleMemLoc();
7282       // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7283       // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7284       // R10.
7285       HandleCustomVecRegLoc();
7286       HandleCustomVecRegLoc();
7287
7288       // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7289       // we passed the vector in R5, R6, R7 and R8.
7290       if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7291         assert(!IsPPC64 &&
7292                "Only 2 custom RegLocs expected for 64-bit codegen.");
7293         HandleCustomVecRegLoc();
7294         HandleCustomVecRegLoc();
7295       }
7296
7297       continue;
7298     }
7299
7300     if (VA.isRegLoc()) {
7301       if (VA.getValVT().isScalarInteger())
7302         FuncInfo->appendParameterType(PPCFunctionInfo::FixedType);
7303       else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7304         switch (VA.getValVT().SimpleTy) {
7305         default:
7306           report_fatal_error("Unhandled value type for argument.");
7307         case MVT::f32:
7308           FuncInfo->appendParameterType(PPCFunctionInfo::ShortFloatingPoint);
7309           break;
7310         case MVT::f64:
7311           FuncInfo->appendParameterType(PPCFunctionInfo::LongFloatingPoint);
7312           break;
7313         }
7314       } else if (VA.getValVT().isVector()) {
7315         switch (VA.getValVT().SimpleTy) {
7316         default:
7317           report_fatal_error("Unhandled value type for argument.");
7318         case MVT::v16i8:
7319           FuncInfo->appendParameterType(PPCFunctionInfo::VectorChar);
7320           break;
7321         case MVT::v8i16:
7322           FuncInfo->appendParameterType(PPCFunctionInfo::VectorShort);
7323           break;
7324         case MVT::v4i32:
7325         case MVT::v2i64:
7326         case MVT::v1i128:
7327           FuncInfo->appendParameterType(PPCFunctionInfo::VectorInt);
7328           break;
7329         case MVT::v4f32:
7330         case MVT::v2f64:
7331           FuncInfo->appendParameterType(PPCFunctionInfo::VectorFloat);
7332           break;
7333         }
7334       }
7335     }
7336
7337     if (Flags.isByVal() && VA.isMemLoc()) {
7338       const unsigned Size =
7339           alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7340                   PtrByteSize);
7341       const int FI = MF.getFrameInfo().CreateFixedObject(
7342           Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7343           /* IsAliased */ true);
7344       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7345       InVals.push_back(FIN);
7346
7347       continue;
7348     }
7349
7350     if (Flags.isByVal()) {
7351       assert(VA.isRegLoc() && "MemLocs should already be handled.");
7352
7353       const MCPhysReg ArgReg = VA.getLocReg();
7354       const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7355
7356       const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7357       const int FI = MF.getFrameInfo().CreateFixedObject(
7358           StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7359           /* IsAliased */ true);
7360       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7361       InVals.push_back(FIN);
7362
7363       // Add live ins for all the RegLocs for the same ByVal.
7364       const TargetRegisterClass *RegClass =
7365           IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7366
7367       auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7368                                                unsigned Offset) {
7369         const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7370         // Since the callers side has left justified the aggregate in the
7371         // register, we can simply store the entire register into the stack
7372         // slot.
7373         SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7374         // The store to the fixedstack object is needed becuase accessing a
7375         // field of the ByVal will use a gep and load. Ideally we will optimize
7376         // to extracting the value from the register directly, and elide the
7377         // stores when the arguments address is not taken, but that will need to
7378         // be future work.
7379         SDValue Store = DAG.getStore(
7380             CopyFrom.getValue(1), dl, CopyFrom,
7381             DAG.getObjectPtrOffset(dl, FIN, TypeSize::getFixed(Offset)),
7382             MachinePointerInfo::getFixedStack(MF, FI, Offset));
7383
7384         MemOps.push_back(Store);
7385       };
7386
7387       unsigned Offset = 0;
7388       HandleRegLoc(VA.getLocReg(), Offset);
7389       Offset += PtrByteSize;
7390       for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7391            Offset += PtrByteSize) {
7392         assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7393                "RegLocs should be for ByVal argument.");
7394
7395         const CCValAssign RL = ArgLocs[I++];
7396         HandleRegLoc(RL.getLocReg(), Offset);
7397         FuncInfo->appendParameterType(PPCFunctionInfo::FixedType);
7398       }
7399
7400       if (Offset != StackSize) {
7401         assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7402                "Expected MemLoc for remaining bytes.");
7403         assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7404         // Consume the MemLoc.The InVal has already been emitted, so nothing
7405         // more needs to be done.
7406         ++I;
7407       }
7408
7409       continue;
7410     }
7411
7412     if (VA.isRegLoc() && !VA.needsCustom()) {
7413       MVT::SimpleValueType SVT = ValVT.SimpleTy;
7414       Register VReg =
7415           MF.addLiveIn(VA.getLocReg(),
7416                        getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7417                                          Subtarget.hasVSX()));
7418       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7419       if (ValVT.isScalarInteger() &&
7420           (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7421         ArgValue =
7422             truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7423       }
7424       InVals.push_back(ArgValue);
7425       continue;
7426     }
7427     if (VA.isMemLoc()) {
7428       HandleMemLoc();
7429       continue;
7430     }
7431   }
7432
7433   // On AIX a minimum of 8 words is saved to the parameter save area.
7434   const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7435   // Area that is at least reserved in the caller of this function.
7436   unsigned CallerReservedArea = std::max<unsigned>(
7437       CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7438
7439   // Set the size that is at least reserved in caller of this function. Tail
7440   // call optimized function's reserved stack space needs to be aligned so
7441   // that taking the difference between two stack areas will result in an
7442   // aligned stack.
7443   CallerReservedArea =
7444       EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7445   FuncInfo->setMinReservedArea(CallerReservedArea);
7446
7447   if (isVarArg) {
7448     FuncInfo->setVarArgsFrameIndex(
7449         MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true));
7450     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7451
7452     static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7453                                        PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7454
7455     static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7456                                        PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7457     const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7458
7459     // The fixed integer arguments of a variadic function are stored to the
7460     // VarArgsFrameIndex on the stack so that they may be loaded by
7461     // dereferencing the result of va_next.
7462     for (unsigned GPRIndex =
7463              (CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7464          GPRIndex < NumGPArgRegs; ++GPRIndex) {
7465
7466       const Register VReg =
7467           IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7468                   : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7469
7470       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7471       SDValue Store =
7472           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7473       MemOps.push_back(Store);
7474       // Increment the address for the next argument to store.
7475       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7476       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7477     }
7478   }
7479
7480   if (!MemOps.empty())
7481     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7482
7483   return Chain;
7484 }
7485
7486 SDValue PPCTargetLowering::LowerCall_AIX(
7487     SDValue Chain, SDValue Callee, CallFlags CFlags,
7488     const SmallVectorImpl<ISD::OutputArg> &Outs,
7489     const SmallVectorImpl<SDValue> &OutVals,
7490     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7491     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
7492     const CallBase *CB) const {
7493   // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7494   // AIX ABI stack frame layout.
7495
7496   assert((CFlags.CallConv == CallingConv::C ||
7497           CFlags.CallConv == CallingConv::Cold ||
7498           CFlags.CallConv == CallingConv::Fast) &&
7499          "Unexpected calling convention!");
7500
7501   if (CFlags.IsPatchPoint)
7502     report_fatal_error("This call type is unimplemented on AIX.");
7503
7504   const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7505
7506   MachineFunction &MF = DAG.getMachineFunction();
7507   SmallVector<CCValAssign, 16> ArgLocs;
7508   AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7509                     *DAG.getContext());
7510
7511   // Reserve space for the linkage save area (LSA) on the stack.
7512   // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7513   //   [SP][CR][LR][2 x reserved][TOC].
7514   // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7515   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7516   const bool IsPPC64 = Subtarget.isPPC64();
7517   const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7518   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7519   CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7520   CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7521
7522   // The prolog code of the callee may store up to 8 GPR argument registers to
7523   // the stack, allowing va_start to index over them in memory if the callee
7524   // is variadic.
7525   // Because we cannot tell if this is needed on the caller side, we have to
7526   // conservatively assume that it is needed.  As such, make sure we have at
7527   // least enough stack space for the caller to store the 8 GPRs.
7528   const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7529   const unsigned NumBytes = std::max<unsigned>(
7530       LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7531
7532   // Adjust the stack pointer for the new arguments...
7533   // These operations are automatically eliminated by the prolog/epilog pass.
7534   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7535   SDValue CallSeqStart = Chain;
7536
7537   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
7538   SmallVector<SDValue, 8> MemOpChains;
7539
7540   // Set up a copy of the stack pointer for loading and storing any
7541   // arguments that may not fit in the registers available for argument
7542   // passing.
7543   const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7544                                    : DAG.getRegister(PPC::R1, MVT::i32);
7545
7546   for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7547     const unsigned ValNo = ArgLocs[I].getValNo();
7548     SDValue Arg = OutVals[ValNo];
7549     ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7550
7551     if (Flags.isByVal()) {
7552       const unsigned ByValSize = Flags.getByValSize();
7553
7554       // Nothing to do for zero-sized ByVals on the caller side.
7555       if (!ByValSize) {
7556         ++I;
7557         continue;
7558       }
7559
7560       auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7561         return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7562                               (LoadOffset != 0)
7563                                   ? DAG.getObjectPtrOffset(
7564                                         dl, Arg, TypeSize::getFixed(LoadOffset))
7565                                   : Arg,
7566                               MachinePointerInfo(), VT);
7567       };
7568
7569       unsigned LoadOffset = 0;
7570
7571       // Initialize registers, which are fully occupied by the by-val argument.
7572       while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7573         SDValue Load = GetLoad(PtrVT, LoadOffset);
7574         MemOpChains.push_back(Load.getValue(1));
7575         LoadOffset += PtrByteSize;
7576         const CCValAssign &ByValVA = ArgLocs[I++];
7577         assert(ByValVA.getValNo() == ValNo &&
7578                "Unexpected location for pass-by-value argument.");
7579         RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7580       }
7581
7582       if (LoadOffset == ByValSize)
7583         continue;
7584
7585       // There must be one more loc to handle the remainder.
7586       assert(ArgLocs[I].getValNo() == ValNo &&
7587              "Expected additional location for by-value argument.");
7588
7589       if (ArgLocs[I].isMemLoc()) {
7590         assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7591         const CCValAssign &ByValVA = ArgLocs[I++];
7592         ISD::ArgFlagsTy MemcpyFlags = Flags;
7593         // Only memcpy the bytes that don't pass in register.
7594         MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7595         Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7596             (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7597                                     dl, Arg, TypeSize::getFixed(LoadOffset))
7598                               : Arg,
7599             DAG.getObjectPtrOffset(
7600                 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7601             CallSeqStart, MemcpyFlags, DAG, dl);
7602         continue;
7603       }
7604
7605       // Initialize the final register residue.
7606       // Any residue that occupies the final by-val arg register must be
7607       // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7608       // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7609       // 2 and 1 byte loads.
7610       const unsigned ResidueBytes = ByValSize % PtrByteSize;
7611       assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7612              "Unexpected register residue for by-value argument.");
7613       SDValue ResidueVal;
7614       for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7615         const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7616         const MVT VT =
7617             N == 1 ? MVT::i8
7618                    : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7619         SDValue Load = GetLoad(VT, LoadOffset);
7620         MemOpChains.push_back(Load.getValue(1));
7621         LoadOffset += N;
7622         Bytes += N;
7623
7624         // By-val arguments are passed left-justfied in register.
7625         // Every load here needs to be shifted, otherwise a full register load
7626         // should have been used.
7627         assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7628                "Unexpected load emitted during handling of pass-by-value "
7629                "argument.");
7630         unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7631         EVT ShiftAmountTy =
7632             getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7633         SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7634         SDValue ShiftedLoad =
7635             DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7636         ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7637                                               ShiftedLoad)
7638                                 : ShiftedLoad;
7639       }
7640
7641       const CCValAssign &ByValVA = ArgLocs[I++];
7642       RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7643       continue;
7644     }
7645
7646     CCValAssign &VA = ArgLocs[I++];
7647     const MVT LocVT = VA.getLocVT();
7648     const MVT ValVT = VA.getValVT();
7649
7650     switch (VA.getLocInfo()) {
7651     default:
7652       report_fatal_error("Unexpected argument extension type.");
7653     case CCValAssign::Full:
7654       break;
7655     case CCValAssign::ZExt:
7656       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7657       break;
7658     case CCValAssign::SExt:
7659       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7660       break;
7661     }
7662
7663     if (VA.isRegLoc() && !VA.needsCustom()) {
7664       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7665       continue;
7666     }
7667
7668     // Vector arguments passed to VarArg functions need custom handling when
7669     // they are passed (at least partially) in GPRs.
7670     if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7671       assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7672       // Store value to its stack slot.
7673       SDValue PtrOff =
7674           DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7675       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7676       SDValue Store =
7677           DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7678       MemOpChains.push_back(Store);
7679       const unsigned OriginalValNo = VA.getValNo();
7680       // Then load the GPRs from the stack
7681       unsigned LoadOffset = 0;
7682       auto HandleCustomVecRegLoc = [&]() {
7683         assert(I != E && "Unexpected end of CCvalAssigns.");
7684         assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7685                "Expected custom RegLoc.");
7686         CCValAssign RegVA = ArgLocs[I++];
7687         assert(RegVA.getValNo() == OriginalValNo &&
7688                "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7689         SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7690                                   DAG.getConstant(LoadOffset, dl, PtrVT));
7691         SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7692         MemOpChains.push_back(Load.getValue(1));
7693         RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7694         LoadOffset += PtrByteSize;
7695       };
7696
7697       // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7698       // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7699       // R10.
7700       HandleCustomVecRegLoc();
7701       HandleCustomVecRegLoc();
7702
7703       if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7704           ArgLocs[I].getValNo() == OriginalValNo) {
7705         assert(!IsPPC64 &&
7706                "Only 2 custom RegLocs expected for 64-bit codegen.");
7707         HandleCustomVecRegLoc();
7708         HandleCustomVecRegLoc();
7709       }
7710
7711       continue;
7712     }
7713
7714     if (VA.isMemLoc()) {
7715       SDValue PtrOff =
7716           DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7717       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7718       MemOpChains.push_back(
7719           DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7720
7721       continue;
7722     }
7723
7724     if (!ValVT.isFloatingPoint())
7725       report_fatal_error(
7726           "Unexpected register handling for calling convention.");
7727
7728     // Custom handling is used for GPR initializations for vararg float
7729     // arguments.
7730     assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7731            LocVT.isInteger() &&
7732            "Custom register handling only expected for VarArg.");
7733
7734     SDValue ArgAsInt =
7735         DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7736
7737     if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7738       // f32 in 32-bit GPR
7739       // f64 in 64-bit GPR
7740       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7741     else if (Arg.getValueType().getFixedSizeInBits() <
7742              LocVT.getFixedSizeInBits())
7743       // f32 in 64-bit GPR.
7744       RegsToPass.push_back(std::make_pair(
7745           VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7746     else {
7747       // f64 in two 32-bit GPRs
7748       // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7749       assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7750              "Unexpected custom register for argument!");
7751       CCValAssign &GPR1 = VA;
7752       SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7753                                      DAG.getConstant(32, dl, MVT::i8));
7754       RegsToPass.push_back(std::make_pair(
7755           GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7756
7757       if (I != E) {
7758         // If only 1 GPR was available, there will only be one custom GPR and
7759         // the argument will also pass in memory.
7760         CCValAssign &PeekArg = ArgLocs[I];
7761         if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7762           assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7763           CCValAssign &GPR2 = ArgLocs[I++];
7764           RegsToPass.push_back(std::make_pair(
7765               GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7766         }
7767       }
7768     }
7769   }
7770
7771   if (!MemOpChains.empty())
7772     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7773
7774   // For indirect calls, we need to save the TOC base to the stack for
7775   // restoration after the call.
7776   if (CFlags.IsIndirect) {
7777     assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7778     const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7779     const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7780     const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7781     const unsigned TOCSaveOffset =
7782         Subtarget.getFrameLowering()->getTOCSaveOffset();
7783
7784     setUsesTOCBasePtr(DAG);
7785     SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7786     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7787     SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7788     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7789     Chain = DAG.getStore(
7790         Val.getValue(1), dl, Val, AddPtr,
7791         MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7792   }
7793
7794   // Build a sequence of copy-to-reg nodes chained together with token chain
7795   // and flag operands which copy the outgoing args into the appropriate regs.
7796   SDValue InGlue;
7797   for (auto Reg : RegsToPass) {
7798     Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7799     InGlue = Chain.getValue(1);
7800   }
7801
7802   const int SPDiff = 0;
7803   return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7804                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
7805 }
7806
7807 bool
7808 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7809                                   MachineFunction &MF, bool isVarArg,
7810                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
7811                                   LLVMContext &Context) const {
7812   SmallVector<CCValAssign, 16> RVLocs;
7813   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7814   return CCInfo.CheckReturn(
7815       Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7816                 ? RetCC_PPC_Cold
7817                 : RetCC_PPC);
7818 }
7819
7820 SDValue
7821 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7822                                bool isVarArg,
7823                                const SmallVectorImpl<ISD::OutputArg> &Outs,
7824                                const SmallVectorImpl<SDValue> &OutVals,
7825                                const SDLoc &dl, SelectionDAG &DAG) const {
7826   SmallVector<CCValAssign, 16> RVLocs;
7827   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7828                  *DAG.getContext());
7829   CCInfo.AnalyzeReturn(Outs,
7830                        (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7831                            ? RetCC_PPC_Cold
7832                            : RetCC_PPC);
7833
7834   SDValue Glue;
7835   SmallVector<SDValue, 4> RetOps(1, Chain);
7836
7837   // Copy the result values into the output registers.
7838   for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7839     CCValAssign &VA = RVLocs[i];
7840     assert(VA.isRegLoc() && "Can only return in registers!");
7841
7842     SDValue Arg = OutVals[RealResIdx];
7843
7844     switch (VA.getLocInfo()) {
7845     default: llvm_unreachable("Unknown loc info!");
7846     case CCValAssign::Full: break;
7847     case CCValAssign::AExt:
7848       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7849       break;
7850     case CCValAssign::ZExt:
7851       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7852       break;
7853     case CCValAssign::SExt:
7854       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7855       break;
7856     }
7857     if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7858       bool isLittleEndian = Subtarget.isLittleEndian();
7859       // Legalize ret f64 -> ret 2 x i32.
7860       SDValue SVal =
7861           DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7862                       DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7863       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7864       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7865       SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7866                          DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7867       Glue = Chain.getValue(1);
7868       VA = RVLocs[++i]; // skip ahead to next loc
7869       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7870     } else
7871       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7872     Glue = Chain.getValue(1);
7873     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7874   }
7875
7876   RetOps[0] = Chain;  // Update chain.
7877
7878   // Add the glue if we have it.
7879   if (Glue.getNode())
7880     RetOps.push_back(Glue);
7881
7882   return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7883 }
7884
7885 SDValue
7886 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7887                                                 SelectionDAG &DAG) const {
7888   SDLoc dl(Op);
7889
7890   // Get the correct type for integers.
7891   EVT IntVT = Op.getValueType();
7892
7893   // Get the inputs.
7894   SDValue Chain = Op.getOperand(0);
7895   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7896   // Build a DYNAREAOFFSET node.
7897   SDValue Ops[2] = {Chain, FPSIdx};
7898   SDVTList VTs = DAG.getVTList(IntVT);
7899   return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7900 }
7901
7902 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7903                                              SelectionDAG &DAG) const {
7904   // When we pop the dynamic allocation we need to restore the SP link.
7905   SDLoc dl(Op);
7906
7907   // Get the correct type for pointers.
7908   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7909
7910   // Construct the stack pointer operand.
7911   bool isPPC64 = Subtarget.isPPC64();
7912   unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7913   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7914
7915   // Get the operands for the STACKRESTORE.
7916   SDValue Chain = Op.getOperand(0);
7917   SDValue SaveSP = Op.getOperand(1);
7918
7919   // Load the old link SP.
7920   SDValue LoadLinkSP =
7921       DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7922
7923   // Restore the stack pointer.
7924   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7925
7926   // Store the old link SP.
7927   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7928 }
7929
7930 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7931   MachineFunction &MF = DAG.getMachineFunction();
7932   bool isPPC64 = Subtarget.isPPC64();
7933   EVT PtrVT = getPointerTy(MF.getDataLayout());
7934
7935   // Get current frame pointer save index.  The users of this index will be
7936   // primarily DYNALLOC instructions.
7937   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7938   int RASI = FI->getReturnAddrSaveIndex();
7939
7940   // If the frame pointer save index hasn't been defined yet.
7941   if (!RASI) {
7942     // Find out what the fix offset of the frame pointer save area.
7943     int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7944     // Allocate the frame index for frame pointer save area.
7945     RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7946     // Save the result.
7947     FI->setReturnAddrSaveIndex(RASI);
7948   }
7949   return DAG.getFrameIndex(RASI, PtrVT);
7950 }
7951
7952 SDValue
7953 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7954   MachineFunction &MF = DAG.getMachineFunction();
7955   bool isPPC64 = Subtarget.isPPC64();
7956   EVT PtrVT = getPointerTy(MF.getDataLayout());
7957
7958   // Get current frame pointer save index.  The users of this index will be
7959   // primarily DYNALLOC instructions.
7960   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7961   int FPSI = FI->getFramePointerSaveIndex();
7962
7963   // If the frame pointer save index hasn't been defined yet.
7964   if (!FPSI) {
7965     // Find out what the fix offset of the frame pointer save area.
7966     int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7967     // Allocate the frame index for frame pointer save area.
7968     FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7969     // Save the result.
7970     FI->setFramePointerSaveIndex(FPSI);
7971   }
7972   return DAG.getFrameIndex(FPSI, PtrVT);
7973 }
7974
7975 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7976                                                    SelectionDAG &DAG) const {
7977   MachineFunction &MF = DAG.getMachineFunction();
7978   // Get the inputs.
7979   SDValue Chain = Op.getOperand(0);
7980   SDValue Size  = Op.getOperand(1);
7981   SDLoc dl(Op);
7982
7983   // Get the correct type for pointers.
7984   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7985   // Negate the size.
7986   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7987                                 DAG.getConstant(0, dl, PtrVT), Size);
7988   // Construct a node for the frame pointer save index.
7989   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7990   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7991   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7992   if (hasInlineStackProbe(MF))
7993     return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7994   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7995 }
7996
7997 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7998                                                      SelectionDAG &DAG) const {
7999   MachineFunction &MF = DAG.getMachineFunction();
8000
8001   bool isPPC64 = Subtarget.isPPC64();
8002   EVT PtrVT = getPointerTy(DAG.getDataLayout());
8003
8004   int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
8005   return DAG.getFrameIndex(FI, PtrVT);
8006 }
8007
8008 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
8009                                                SelectionDAG &DAG) const {
8010   SDLoc DL(Op);
8011   return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
8012                      DAG.getVTList(MVT::i32, MVT::Other),
8013                      Op.getOperand(0), Op.getOperand(1));
8014 }
8015
8016 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
8017                                                 SelectionDAG &DAG) const {
8018   SDLoc DL(Op);
8019   return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
8020                      Op.getOperand(0), Op.getOperand(1));
8021 }
8022
8023 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
8024   if (Op.getValueType().isVector())
8025     return LowerVectorLoad(Op, DAG);
8026
8027   assert(Op.getValueType() == MVT::i1 &&
8028          "Custom lowering only for i1 loads");
8029
8030   // First, load 8 bits into 32 bits, then truncate to 1 bit.
8031
8032   SDLoc dl(Op);
8033   LoadSDNode *LD = cast<LoadSDNode>(Op);
8034
8035   SDValue Chain = LD->getChain();
8036   SDValue BasePtr = LD->getBasePtr();
8037   MachineMemOperand *MMO = LD->getMemOperand();
8038
8039   SDValue NewLD =
8040       DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
8041                      BasePtr, MVT::i8, MMO);
8042   SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
8043
8044   SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8045   return DAG.getMergeValues(Ops, dl);
8046 }
8047
8048 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8049   if (Op.getOperand(1).getValueType().isVector())
8050     return LowerVectorStore(Op, DAG);
8051
8052   assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8053          "Custom lowering only for i1 stores");
8054
8055   // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8056
8057   SDLoc dl(Op);
8058   StoreSDNode *ST = cast<StoreSDNode>(Op);
8059
8060   SDValue Chain = ST->getChain();
8061   SDValue BasePtr = ST->getBasePtr();
8062   SDValue Value = ST->getValue();
8063   MachineMemOperand *MMO = ST->getMemOperand();
8064
8065   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
8066                       Value);
8067   return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8068 }
8069
8070 // FIXME: Remove this once the ANDI glue bug is fixed:
8071 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8072   assert(Op.getValueType() == MVT::i1 &&
8073          "Custom lowering only for i1 results");
8074
8075   SDLoc DL(Op);
8076   return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8077 }
8078
8079 SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8080                                                SelectionDAG &DAG) const {
8081
8082   // Implements a vector truncate that fits in a vector register as a shuffle.
8083   // We want to legalize vector truncates down to where the source fits in
8084   // a vector register (and target is therefore smaller than vector register
8085   // size).  At that point legalization will try to custom lower the sub-legal
8086   // result and get here - where we can contain the truncate as a single target
8087   // operation.
8088
8089   // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8090   //   <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8091   //
8092   // We will implement it for big-endian ordering as this (where x denotes
8093   // undefined):
8094   //   < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8095   //   < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8096   //
8097   // The same operation in little-endian ordering will be:
8098   //   <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8099   //   <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8100
8101   EVT TrgVT = Op.getValueType();
8102   assert(TrgVT.isVector() && "Vector type expected.");
8103   unsigned TrgNumElts = TrgVT.getVectorNumElements();
8104   EVT EltVT = TrgVT.getVectorElementType();
8105   if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8106       TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8107       !llvm::has_single_bit<uint32_t>(EltVT.getSizeInBits()))
8108     return SDValue();
8109
8110   SDValue N1 = Op.getOperand(0);
8111   EVT SrcVT = N1.getValueType();
8112   unsigned SrcSize = SrcVT.getSizeInBits();
8113   if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8114       !llvm::has_single_bit<uint32_t>(
8115           SrcVT.getVectorElementType().getSizeInBits()))
8116     return SDValue();
8117   if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8118     return SDValue();
8119
8120   unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8121   EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8122
8123   SDLoc DL(Op);
8124   SDValue Op1, Op2;
8125   if (SrcSize == 256) {
8126     EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8127     EVT SplitVT =
8128         N1.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
8129     unsigned SplitNumElts = SplitVT.getVectorNumElements();
8130     Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8131                       DAG.getConstant(0, DL, VecIdxTy));
8132     Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8133                       DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8134   }
8135   else {
8136     Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8137     Op2 = DAG.getUNDEF(WideVT);
8138   }
8139
8140   // First list the elements we want to keep.
8141   unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8142   SmallVector<int, 16> ShuffV;
8143   if (Subtarget.isLittleEndian())
8144     for (unsigned i = 0; i < TrgNumElts; ++i)
8145       ShuffV.push_back(i * SizeMult);
8146   else
8147     for (unsigned i = 1; i <= TrgNumElts; ++i)
8148       ShuffV.push_back(i * SizeMult - 1);
8149
8150   // Populate the remaining elements with undefs.
8151   for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8152     // ShuffV.push_back(i + WideNumElts);
8153     ShuffV.push_back(WideNumElts + 1);
8154
8155   Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8156   Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8157   return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8158 }
8159
8160 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8161 /// possible.
8162 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8163   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8164   EVT ResVT = Op.getValueType();
8165   EVT CmpVT = Op.getOperand(0).getValueType();
8166   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8167   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
8168   SDLoc dl(Op);
8169
8170   // Without power9-vector, we don't have native instruction for f128 comparison.
8171   // Following transformation to libcall is needed for setcc:
8172   // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8173   if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8174     SDValue Z = DAG.getSetCC(
8175         dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8176         LHS, RHS, CC);
8177     SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8178     return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8179   }
8180
8181   // Not FP, or using SPE? Not a fsel.
8182   if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8183       Subtarget.hasSPE())
8184     return Op;
8185
8186   SDNodeFlags Flags = Op.getNode()->getFlags();
8187
8188   // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8189   // presence of infinities.
8190   if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8191     switch (CC) {
8192     default:
8193       break;
8194     case ISD::SETOGT:
8195     case ISD::SETGT:
8196       return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8197     case ISD::SETOLT:
8198     case ISD::SETLT:
8199       return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8200     }
8201   }
8202
8203   // We might be able to do better than this under some circumstances, but in
8204   // general, fsel-based lowering of select is a finite-math-only optimization.
8205   // For more information, see section F.3 of the 2.06 ISA specification.
8206   // With ISA 3.0
8207   if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8208       (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
8209       ResVT == MVT::f128)
8210     return Op;
8211
8212   // If the RHS of the comparison is a 0.0, we don't need to do the
8213   // subtraction at all.
8214   SDValue Sel1;
8215   if (isFloatingPointZero(RHS))
8216     switch (CC) {
8217     default: break;       // SETUO etc aren't handled by fsel.
8218     case ISD::SETNE:
8219       std::swap(TV, FV);
8220       [[fallthrough]];
8221     case ISD::SETEQ:
8222       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
8223         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8224       Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8225       if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
8226         Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8227       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8228                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8229     case ISD::SETULT:
8230     case ISD::SETLT:
8231       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
8232       [[fallthrough]];
8233     case ISD::SETOGE:
8234     case ISD::SETGE:
8235       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
8236         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8237       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8238     case ISD::SETUGT:
8239     case ISD::SETGT:
8240       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
8241       [[fallthrough]];
8242     case ISD::SETOLE:
8243     case ISD::SETLE:
8244       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
8245         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8246       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8247                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8248     }
8249
8250   SDValue Cmp;
8251   switch (CC) {
8252   default: break;       // SETUO etc aren't handled by fsel.
8253   case ISD::SETNE:
8254     std::swap(TV, FV);
8255     [[fallthrough]];
8256   case ISD::SETEQ:
8257     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8258     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8259       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8260     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8261     if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
8262       Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8263     return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8264                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8265   case ISD::SETULT:
8266   case ISD::SETLT:
8267     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8268     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8269       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8270     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8271   case ISD::SETOGE:
8272   case ISD::SETGE:
8273     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8274     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8275       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8276     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8277   case ISD::SETUGT:
8278   case ISD::SETGT:
8279     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8280     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8281       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8282     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8283   case ISD::SETOLE:
8284   case ISD::SETLE:
8285     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8286     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8287       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8288     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8289   }
8290   return Op;
8291 }
8292
8293 static unsigned getPPCStrictOpcode(unsigned Opc) {
8294   switch (Opc) {
8295   default:
8296     llvm_unreachable("No strict version of this opcode!");
8297   case PPCISD::FCTIDZ:
8298     return PPCISD::STRICT_FCTIDZ;
8299   case PPCISD::FCTIWZ:
8300     return PPCISD::STRICT_FCTIWZ;
8301   case PPCISD::FCTIDUZ:
8302     return PPCISD::STRICT_FCTIDUZ;
8303   case PPCISD::FCTIWUZ:
8304     return PPCISD::STRICT_FCTIWUZ;
8305   case PPCISD::FCFID:
8306     return PPCISD::STRICT_FCFID;
8307   case PPCISD::FCFIDU:
8308     return PPCISD::STRICT_FCFIDU;
8309   case PPCISD::FCFIDS:
8310     return PPCISD::STRICT_FCFIDS;
8311   case PPCISD::FCFIDUS:
8312     return PPCISD::STRICT_FCFIDUS;
8313   }
8314 }
8315
8316 static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,
8317                               const PPCSubtarget &Subtarget) {
8318   SDLoc dl(Op);
8319   bool IsStrict = Op->isStrictFPOpcode();
8320   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8321                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8322
8323   // TODO: Any other flags to propagate?
8324   SDNodeFlags Flags;
8325   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8326
8327   // For strict nodes, source is the second operand.
8328   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8329   SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8330   MVT DestTy = Op.getSimpleValueType();
8331   assert(Src.getValueType().isFloatingPoint() &&
8332          (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8333           DestTy == MVT::i64) &&
8334          "Invalid FP_TO_INT types");
8335   if (Src.getValueType() == MVT::f32) {
8336     if (IsStrict) {
8337       Src =
8338           DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
8339                       DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8340       Chain = Src.getValue(1);
8341     } else
8342       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8343   }
8344   if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8345     DestTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
8346   unsigned Opc = ISD::DELETED_NODE;
8347   switch (DestTy.SimpleTy) {
8348   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8349   case MVT::i32:
8350     Opc = IsSigned ? PPCISD::FCTIWZ
8351                    : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8352     break;
8353   case MVT::i64:
8354     assert((IsSigned || Subtarget.hasFPCVT()) &&
8355            "i64 FP_TO_UINT is supported only with FPCVT");
8356     Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8357   }
8358   EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8359   SDValue Conv;
8360   if (IsStrict) {
8361     Opc = getPPCStrictOpcode(Opc);
8362     Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8363                        Flags);
8364   } else {
8365     Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8366   }
8367   return Conv;
8368 }
8369
8370 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8371                                                SelectionDAG &DAG,
8372                                                const SDLoc &dl) const {
8373   SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8374   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8375                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8376   bool IsStrict = Op->isStrictFPOpcode();
8377
8378   // Convert the FP value to an int value through memory.
8379   bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8380                   (IsSigned || Subtarget.hasFPCVT());
8381   SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8382   int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8383   MachinePointerInfo MPI =
8384       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
8385
8386   // Emit a store to the stack slot.
8387   SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8388   Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8389   if (i32Stack) {
8390     MachineFunction &MF = DAG.getMachineFunction();
8391     Alignment = Align(4);
8392     MachineMemOperand *MMO =
8393         MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8394     SDValue Ops[] = { Chain, Tmp, FIPtr };
8395     Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8396               DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8397   } else
8398     Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8399
8400   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
8401   // add in a bias on big endian.
8402   if (Op.getValueType() == MVT::i32 && !i32Stack) {
8403     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8404                         DAG.getConstant(4, dl, FIPtr.getValueType()));
8405     MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8406   }
8407
8408   RLI.Chain = Chain;
8409   RLI.Ptr = FIPtr;
8410   RLI.MPI = MPI;
8411   RLI.Alignment = Alignment;
8412 }
8413
8414 /// Custom lowers floating point to integer conversions to use
8415 /// the direct move instructions available in ISA 2.07 to avoid the
8416 /// need for load/store combinations.
8417 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8418                                                     SelectionDAG &DAG,
8419                                                     const SDLoc &dl) const {
8420   SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8421   SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8422   if (Op->isStrictFPOpcode())
8423     return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8424   else
8425     return Mov;
8426 }
8427
8428 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8429                                           const SDLoc &dl) const {
8430   bool IsStrict = Op->isStrictFPOpcode();
8431   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8432                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8433   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8434   EVT SrcVT = Src.getValueType();
8435   EVT DstVT = Op.getValueType();
8436
8437   // FP to INT conversions are legal for f128.
8438   if (SrcVT == MVT::f128)
8439     return Subtarget.hasP9Vector() ? Op : SDValue();
8440
8441   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8442   // PPC (the libcall is not available).
8443   if (SrcVT == MVT::ppcf128) {
8444     if (DstVT == MVT::i32) {
8445       // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8446       // set other fast-math flags to FP operations in both strict and
8447       // non-strict cases. (FP_TO_SINT, FSUB)
8448       SDNodeFlags Flags;
8449       Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8450
8451       if (IsSigned) {
8452         SDValue Lo, Hi;
8453         std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8454
8455         // Add the two halves of the long double in round-to-zero mode, and use
8456         // a smaller FP_TO_SINT.
8457         if (IsStrict) {
8458           SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8459                                     DAG.getVTList(MVT::f64, MVT::Other),
8460                                     {Op.getOperand(0), Lo, Hi}, Flags);
8461           return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8462                              DAG.getVTList(MVT::i32, MVT::Other),
8463                              {Res.getValue(1), Res}, Flags);
8464         } else {
8465           SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8466           return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8467         }
8468       } else {
8469         const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8470         APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8471         SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8472         SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8473         if (IsStrict) {
8474           // Sel = Src < 0x80000000
8475           // FltOfs = select Sel, 0.0, 0x80000000
8476           // IntOfs = select Sel, 0, 0x80000000
8477           // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8478           SDValue Chain = Op.getOperand(0);
8479           EVT SetCCVT =
8480               getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8481           EVT DstSetCCVT =
8482               getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8483           SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8484                                      Chain, true);
8485           Chain = Sel.getValue(1);
8486
8487           SDValue FltOfs = DAG.getSelect(
8488               dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8489           Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8490
8491           SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8492                                     DAG.getVTList(SrcVT, MVT::Other),
8493                                     {Chain, Src, FltOfs}, Flags);
8494           Chain = Val.getValue(1);
8495           SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8496                                      DAG.getVTList(DstVT, MVT::Other),
8497                                      {Chain, Val}, Flags);
8498           Chain = SInt.getValue(1);
8499           SDValue IntOfs = DAG.getSelect(
8500               dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8501           SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8502           return DAG.getMergeValues({Result, Chain}, dl);
8503         } else {
8504           // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8505           // FIXME: generated code sucks.
8506           SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8507           True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8508           True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8509           SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8510           return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8511         }
8512       }
8513     }
8514
8515     return SDValue();
8516   }
8517
8518   if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8519     return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8520
8521   ReuseLoadInfo RLI;
8522   LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8523
8524   return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8525                      RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8526 }
8527
8528 // We're trying to insert a regular store, S, and then a load, L. If the
8529 // incoming value, O, is a load, we might just be able to have our load use the
8530 // address used by O. However, we don't know if anything else will store to
8531 // that address before we can load from it. To prevent this situation, we need
8532 // to insert our load, L, into the chain as a peer of O. To do this, we give L
8533 // the same chain operand as O, we create a token factor from the chain results
8534 // of O and L, and we replace all uses of O's chain result with that token
8535 // factor (see spliceIntoChain below for this last part).
8536 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8537                                             ReuseLoadInfo &RLI,
8538                                             SelectionDAG &DAG,
8539                                             ISD::LoadExtType ET) const {
8540   // Conservatively skip reusing for constrained FP nodes.
8541   if (Op->isStrictFPOpcode())
8542     return false;
8543
8544   SDLoc dl(Op);
8545   bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8546                        (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8547   if (ET == ISD::NON_EXTLOAD &&
8548       (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8549       isOperationLegalOrCustom(Op.getOpcode(),
8550                                Op.getOperand(0).getValueType())) {
8551
8552     LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8553     return true;
8554   }
8555
8556   LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8557   if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8558       LD->isNonTemporal())
8559     return false;
8560   if (LD->getMemoryVT() != MemVT)
8561     return false;
8562
8563   // If the result of the load is an illegal type, then we can't build a
8564   // valid chain for reuse since the legalised loads and token factor node that
8565   // ties the legalised loads together uses a different output chain then the
8566   // illegal load.
8567   if (!isTypeLegal(LD->getValueType(0)))
8568     return false;
8569
8570   RLI.Ptr = LD->getBasePtr();
8571   if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8572     assert(LD->getAddressingMode() == ISD::PRE_INC &&
8573            "Non-pre-inc AM on PPC?");
8574     RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8575                           LD->getOffset());
8576   }
8577
8578   RLI.Chain = LD->getChain();
8579   RLI.MPI = LD->getPointerInfo();
8580   RLI.IsDereferenceable = LD->isDereferenceable();
8581   RLI.IsInvariant = LD->isInvariant();
8582   RLI.Alignment = LD->getAlign();
8583   RLI.AAInfo = LD->getAAInfo();
8584   RLI.Ranges = LD->getRanges();
8585
8586   RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8587   return true;
8588 }
8589
8590 // Given the head of the old chain, ResChain, insert a token factor containing
8591 // it and NewResChain, and make users of ResChain now be users of that token
8592 // factor.
8593 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8594 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8595                                         SDValue NewResChain,
8596                                         SelectionDAG &DAG) const {
8597   if (!ResChain)
8598     return;
8599
8600   SDLoc dl(NewResChain);
8601
8602   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8603                            NewResChain, DAG.getUNDEF(MVT::Other));
8604   assert(TF.getNode() != NewResChain.getNode() &&
8605          "A new TF really is required here");
8606
8607   DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8608   DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8609 }
8610
8611 /// Analyze profitability of direct move
8612 /// prefer float load to int load plus direct move
8613 /// when there is no integer use of int load
8614 bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8615   SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8616   if (Origin->getOpcode() != ISD::LOAD)
8617     return true;
8618
8619   // If there is no LXSIBZX/LXSIHZX, like Power8,
8620   // prefer direct move if the memory size is 1 or 2 bytes.
8621   MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8622   if (!Subtarget.hasP9Vector() &&
8623       (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8624     return true;
8625
8626   for (SDNode::use_iterator UI = Origin->use_begin(),
8627                             UE = Origin->use_end();
8628        UI != UE; ++UI) {
8629
8630     // Only look at the users of the loaded value.
8631     if (UI.getUse().get().getResNo() != 0)
8632       continue;
8633
8634     if (UI->getOpcode() != ISD::SINT_TO_FP &&
8635         UI->getOpcode() != ISD::UINT_TO_FP &&
8636         UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8637         UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
8638       return true;
8639   }
8640
8641   return false;
8642 }
8643
8644 static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG,
8645                               const PPCSubtarget &Subtarget,
8646                               SDValue Chain = SDValue()) {
8647   bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8648                   Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8649   SDLoc dl(Op);
8650
8651   // TODO: Any other flags to propagate?
8652   SDNodeFlags Flags;
8653   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8654
8655   // If we have FCFIDS, then use it when converting to single-precision.
8656   // Otherwise, convert to double-precision and then round.
8657   bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8658   unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8659                               : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8660   EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8661   if (Op->isStrictFPOpcode()) {
8662     if (!Chain)
8663       Chain = Op.getOperand(0);
8664     return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8665                        DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8666   } else
8667     return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8668 }
8669
8670 /// Custom lowers integer to floating point conversions to use
8671 /// the direct move instructions available in ISA 2.07 to avoid the
8672 /// need for load/store combinations.
8673 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8674                                                     SelectionDAG &DAG,
8675                                                     const SDLoc &dl) const {
8676   assert((Op.getValueType() == MVT::f32 ||
8677           Op.getValueType() == MVT::f64) &&
8678          "Invalid floating point type as target of conversion");
8679   assert(Subtarget.hasFPCVT() &&
8680          "Int to FP conversions with direct moves require FPCVT");
8681   SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8682   bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8683   bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8684                 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8685   unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8686   SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8687   return convertIntToFP(Op, Mov, DAG, Subtarget);
8688 }
8689
8690 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8691
8692   EVT VecVT = Vec.getValueType();
8693   assert(VecVT.isVector() && "Expected a vector type.");
8694   assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8695
8696   EVT EltVT = VecVT.getVectorElementType();
8697   unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8698   EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8699
8700   unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8701   SmallVector<SDValue, 16> Ops(NumConcat);
8702   Ops[0] = Vec;
8703   SDValue UndefVec = DAG.getUNDEF(VecVT);
8704   for (unsigned i = 1; i < NumConcat; ++i)
8705     Ops[i] = UndefVec;
8706
8707   return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8708 }
8709
8710 SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8711                                                 const SDLoc &dl) const {
8712   bool IsStrict = Op->isStrictFPOpcode();
8713   unsigned Opc = Op.getOpcode();
8714   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8715   assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8716           Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
8717          "Unexpected conversion type");
8718   assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8719          "Supports conversions to v2f64/v4f32 only.");
8720
8721   // TODO: Any other flags to propagate?
8722   SDNodeFlags Flags;
8723   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8724
8725   bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8726   bool FourEltRes = Op.getValueType() == MVT::v4f32;
8727
8728   SDValue Wide = widenVec(DAG, Src, dl);
8729   EVT WideVT = Wide.getValueType();
8730   unsigned WideNumElts = WideVT.getVectorNumElements();
8731   MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8732
8733   SmallVector<int, 16> ShuffV;
8734   for (unsigned i = 0; i < WideNumElts; ++i)
8735     ShuffV.push_back(i + WideNumElts);
8736
8737   int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8738   int SaveElts = FourEltRes ? 4 : 2;
8739   if (Subtarget.isLittleEndian())
8740     for (int i = 0; i < SaveElts; i++)
8741       ShuffV[i * Stride] = i;
8742   else
8743     for (int i = 1; i <= SaveElts; i++)
8744       ShuffV[i * Stride - 1] = i - 1;
8745
8746   SDValue ShuffleSrc2 =
8747       SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8748   SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8749
8750   SDValue Extend;
8751   if (SignedConv) {
8752     Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8753     EVT ExtVT = Src.getValueType();
8754     if (Subtarget.hasP9Altivec())
8755       ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8756                                IntermediateVT.getVectorNumElements());
8757
8758     Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8759                          DAG.getValueType(ExtVT));
8760   } else
8761     Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8762
8763   if (IsStrict)
8764     return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8765                        {Op.getOperand(0), Extend}, Flags);
8766
8767   return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8768 }
8769
8770 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8771                                           SelectionDAG &DAG) const {
8772   SDLoc dl(Op);
8773   bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8774                   Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8775   bool IsStrict = Op->isStrictFPOpcode();
8776   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8777   SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8778
8779   // TODO: Any other flags to propagate?
8780   SDNodeFlags Flags;
8781   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8782
8783   EVT InVT = Src.getValueType();
8784   EVT OutVT = Op.getValueType();
8785   if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8786       isOperationCustom(Op.getOpcode(), InVT))
8787     return LowerINT_TO_FPVector(Op, DAG, dl);
8788
8789   // Conversions to f128 are legal.
8790   if (Op.getValueType() == MVT::f128)
8791     return Subtarget.hasP9Vector() ? Op : SDValue();
8792
8793   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8794   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8795     return SDValue();
8796
8797   if (Src.getValueType() == MVT::i1) {
8798     SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8799                               DAG.getConstantFP(1.0, dl, Op.getValueType()),
8800                               DAG.getConstantFP(0.0, dl, Op.getValueType()));
8801     if (IsStrict)
8802       return DAG.getMergeValues({Sel, Chain}, dl);
8803     else
8804       return Sel;
8805   }
8806
8807   // If we have direct moves, we can do all the conversion, skip the store/load
8808   // however, without FPCVT we can't do most conversions.
8809   if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8810       Subtarget.isPPC64() && Subtarget.hasFPCVT())
8811     return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8812
8813   assert((IsSigned || Subtarget.hasFPCVT()) &&
8814          "UINT_TO_FP is supported only with FPCVT");
8815
8816   if (Src.getValueType() == MVT::i64) {
8817     SDValue SINT = Src;
8818     // When converting to single-precision, we actually need to convert
8819     // to double-precision first and then round to single-precision.
8820     // To avoid double-rounding effects during that operation, we have
8821     // to prepare the input operand.  Bits that might be truncated when
8822     // converting to double-precision are replaced by a bit that won't
8823     // be lost at this stage, but is below the single-precision rounding
8824     // position.
8825     //
8826     // However, if -enable-unsafe-fp-math is in effect, accept double
8827     // rounding to avoid the extra overhead.
8828     if (Op.getValueType() == MVT::f32 &&
8829         !Subtarget.hasFPCVT() &&
8830         !DAG.getTarget().Options.UnsafeFPMath) {
8831
8832       // Twiddle input to make sure the low 11 bits are zero.  (If this
8833       // is the case, we are guaranteed the value will fit into the 53 bit
8834       // mantissa of an IEEE double-precision value without rounding.)
8835       // If any of those low 11 bits were not zero originally, make sure
8836       // bit 12 (value 2048) is set instead, so that the final rounding
8837       // to single-precision gets the correct result.
8838       SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8839                                   SINT, DAG.getConstant(2047, dl, MVT::i64));
8840       Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8841                           Round, DAG.getConstant(2047, dl, MVT::i64));
8842       Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8843       Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8844                           Round, DAG.getConstant(-2048, dl, MVT::i64));
8845
8846       // However, we cannot use that value unconditionally: if the magnitude
8847       // of the input value is small, the bit-twiddling we did above might
8848       // end up visibly changing the output.  Fortunately, in that case, we
8849       // don't need to twiddle bits since the original input will convert
8850       // exactly to double-precision floating-point already.  Therefore,
8851       // construct a conditional to use the original value if the top 11
8852       // bits are all sign-bit copies, and use the rounded value computed
8853       // above otherwise.
8854       SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8855                                  SINT, DAG.getConstant(53, dl, MVT::i32));
8856       Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8857                          Cond, DAG.getConstant(1, dl, MVT::i64));
8858       Cond = DAG.getSetCC(
8859           dl,
8860           getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8861           Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8862
8863       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8864     }
8865
8866     ReuseLoadInfo RLI;
8867     SDValue Bits;
8868
8869     MachineFunction &MF = DAG.getMachineFunction();
8870     if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8871       Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8872                          RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8873       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8874     } else if (Subtarget.hasLFIWAX() &&
8875                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8876       MachineMemOperand *MMO =
8877         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8878                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8879       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8880       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8881                                      DAG.getVTList(MVT::f64, MVT::Other),
8882                                      Ops, MVT::i32, MMO);
8883       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8884     } else if (Subtarget.hasFPCVT() &&
8885                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8886       MachineMemOperand *MMO =
8887         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8888                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8889       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8890       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8891                                      DAG.getVTList(MVT::f64, MVT::Other),
8892                                      Ops, MVT::i32, MMO);
8893       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8894     } else if (((Subtarget.hasLFIWAX() &&
8895                  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8896                 (Subtarget.hasFPCVT() &&
8897                  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8898                SINT.getOperand(0).getValueType() == MVT::i32) {
8899       MachineFrameInfo &MFI = MF.getFrameInfo();
8900       EVT PtrVT = getPointerTy(DAG.getDataLayout());
8901
8902       int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8903       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8904
8905       SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8906                                    MachinePointerInfo::getFixedStack(
8907                                        DAG.getMachineFunction(), FrameIdx));
8908       Chain = Store;
8909
8910       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8911              "Expected an i32 store");
8912
8913       RLI.Ptr = FIdx;
8914       RLI.Chain = Chain;
8915       RLI.MPI =
8916           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8917       RLI.Alignment = Align(4);
8918
8919       MachineMemOperand *MMO =
8920         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8921                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8922       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8923       Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
8924                                      PPCISD::LFIWZX : PPCISD::LFIWAX,
8925                                      dl, DAG.getVTList(MVT::f64, MVT::Other),
8926                                      Ops, MVT::i32, MMO);
8927       Chain = Bits.getValue(1);
8928     } else
8929       Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8930
8931     SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8932     if (IsStrict)
8933       Chain = FP.getValue(1);
8934
8935     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8936       if (IsStrict)
8937         FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8938                          DAG.getVTList(MVT::f32, MVT::Other),
8939                          {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8940       else
8941         FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8942                          DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8943     }
8944     return FP;
8945   }
8946
8947   assert(Src.getValueType() == MVT::i32 &&
8948          "Unhandled INT_TO_FP type in custom expander!");
8949   // Since we only generate this in 64-bit mode, we can take advantage of
8950   // 64-bit registers.  In particular, sign extend the input value into the
8951   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8952   // then lfd it and fcfid it.
8953   MachineFunction &MF = DAG.getMachineFunction();
8954   MachineFrameInfo &MFI = MF.getFrameInfo();
8955   EVT PtrVT = getPointerTy(MF.getDataLayout());
8956
8957   SDValue Ld;
8958   if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8959     ReuseLoadInfo RLI;
8960     bool ReusingLoad;
8961     if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8962       int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8963       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8964
8965       SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8966                                    MachinePointerInfo::getFixedStack(
8967                                        DAG.getMachineFunction(), FrameIdx));
8968       Chain = Store;
8969
8970       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8971              "Expected an i32 store");
8972
8973       RLI.Ptr = FIdx;
8974       RLI.Chain = Chain;
8975       RLI.MPI =
8976           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8977       RLI.Alignment = Align(4);
8978     }
8979
8980     MachineMemOperand *MMO =
8981       MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8982                               RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8983     SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8984     Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8985                                  DAG.getVTList(MVT::f64, MVT::Other), Ops,
8986                                  MVT::i32, MMO);
8987     Chain = Ld.getValue(1);
8988     if (ReusingLoad)
8989       spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8990   } else {
8991     assert(Subtarget.isPPC64() &&
8992            "i32->FP without LFIWAX supported only on PPC64");
8993
8994     int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8995     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8996
8997     SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8998
8999     // STD the extended value into the stack slot.
9000     SDValue Store = DAG.getStore(
9001         Chain, dl, Ext64, FIdx,
9002         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
9003     Chain = Store;
9004
9005     // Load the value as a double.
9006     Ld = DAG.getLoad(
9007         MVT::f64, dl, Chain, FIdx,
9008         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
9009     Chain = Ld.getValue(1);
9010   }
9011
9012   // FCFID it and return it.
9013   SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
9014   if (IsStrict)
9015     Chain = FP.getValue(1);
9016   if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9017     if (IsStrict)
9018       FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
9019                        DAG.getVTList(MVT::f32, MVT::Other),
9020                        {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
9021     else
9022       FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9023                        DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9024   }
9025   return FP;
9026 }
9027
9028 SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9029                                              SelectionDAG &DAG) const {
9030   SDLoc dl(Op);
9031   /*
9032    The rounding mode is in bits 30:31 of FPSR, and has the following
9033    settings:
9034      00 Round to nearest
9035      01 Round to 0
9036      10 Round to +inf
9037      11 Round to -inf
9038
9039   GET_ROUNDING, on the other hand, expects the following:
9040     -1 Undefined
9041      0 Round to 0
9042      1 Round to nearest
9043      2 Round to +inf
9044      3 Round to -inf
9045
9046   To perform the conversion, we do:
9047     ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9048   */
9049
9050   MachineFunction &MF = DAG.getMachineFunction();
9051   EVT VT = Op.getValueType();
9052   EVT PtrVT = getPointerTy(MF.getDataLayout());
9053
9054   // Save FP Control Word to register
9055   SDValue Chain = Op.getOperand(0);
9056   SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9057   Chain = MFFS.getValue(1);
9058
9059   SDValue CWD;
9060   if (isTypeLegal(MVT::i64)) {
9061     CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9062                       DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9063   } else {
9064     // Save FP register to stack slot
9065     int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9066     SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9067     Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9068
9069     // Load FP Control Word from low 32 bits of stack slot.
9070     assert(hasBigEndianPartOrdering(MVT::i64, MF.getDataLayout()) &&
9071            "Stack slot adjustment is valid only on big endian subtargets!");
9072     SDValue Four = DAG.getConstant(4, dl, PtrVT);
9073     SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9074     CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9075     Chain = CWD.getValue(1);
9076   }
9077
9078   // Transform as necessary
9079   SDValue CWD1 =
9080     DAG.getNode(ISD::AND, dl, MVT::i32,
9081                 CWD, DAG.getConstant(3, dl, MVT::i32));
9082   SDValue CWD2 =
9083     DAG.getNode(ISD::SRL, dl, MVT::i32,
9084                 DAG.getNode(ISD::AND, dl, MVT::i32,
9085                             DAG.getNode(ISD::XOR, dl, MVT::i32,
9086                                         CWD, DAG.getConstant(3, dl, MVT::i32)),
9087                             DAG.getConstant(3, dl, MVT::i32)),
9088                 DAG.getConstant(1, dl, MVT::i32));
9089
9090   SDValue RetVal =
9091     DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9092
9093   RetVal =
9094       DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
9095                   dl, VT, RetVal);
9096
9097   return DAG.getMergeValues({RetVal, Chain}, dl);
9098 }
9099
9100 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9101   EVT VT = Op.getValueType();
9102   unsigned BitWidth = VT.getSizeInBits();
9103   SDLoc dl(Op);
9104   assert(Op.getNumOperands() == 3 &&
9105          VT == Op.getOperand(1).getValueType() &&
9106          "Unexpected SHL!");
9107
9108   // Expand into a bunch of logical ops.  Note that these ops
9109   // depend on the PPC behavior for oversized shift amounts.
9110   SDValue Lo = Op.getOperand(0);
9111   SDValue Hi = Op.getOperand(1);
9112   SDValue Amt = Op.getOperand(2);
9113   EVT AmtVT = Amt.getValueType();
9114
9115   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9116                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9117   SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9118   SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9119   SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9120   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9121                              DAG.getConstant(-BitWidth, dl, AmtVT));
9122   SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9123   SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9124   SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9125   SDValue OutOps[] = { OutLo, OutHi };
9126   return DAG.getMergeValues(OutOps, dl);
9127 }
9128
9129 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9130   EVT VT = Op.getValueType();
9131   SDLoc dl(Op);
9132   unsigned BitWidth = VT.getSizeInBits();
9133   assert(Op.getNumOperands() == 3 &&
9134          VT == Op.getOperand(1).getValueType() &&
9135          "Unexpected SRL!");
9136
9137   // Expand into a bunch of logical ops.  Note that these ops
9138   // depend on the PPC behavior for oversized shift amounts.
9139   SDValue Lo = Op.getOperand(0);
9140   SDValue Hi = Op.getOperand(1);
9141   SDValue Amt = Op.getOperand(2);
9142   EVT AmtVT = Amt.getValueType();
9143
9144   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9145                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9146   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9147   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9148   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9149   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9150                              DAG.getConstant(-BitWidth, dl, AmtVT));
9151   SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9152   SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9153   SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9154   SDValue OutOps[] = { OutLo, OutHi };
9155   return DAG.getMergeValues(OutOps, dl);
9156 }
9157
9158 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9159   SDLoc dl(Op);
9160   EVT VT = Op.getValueType();
9161   unsigned BitWidth = VT.getSizeInBits();
9162   assert(Op.getNumOperands() == 3 &&
9163          VT == Op.getOperand(1).getValueType() &&
9164          "Unexpected SRA!");
9165
9166   // Expand into a bunch of logical ops, followed by a select_cc.
9167   SDValue Lo = Op.getOperand(0);
9168   SDValue Hi = Op.getOperand(1);
9169   SDValue Amt = Op.getOperand(2);
9170   EVT AmtVT = Amt.getValueType();
9171
9172   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9173                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9174   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9175   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9176   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9177   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9178                              DAG.getConstant(-BitWidth, dl, AmtVT));
9179   SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9180   SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9181   SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9182                                   Tmp4, Tmp6, ISD::SETLE);
9183   SDValue OutOps[] = { OutLo, OutHi };
9184   return DAG.getMergeValues(OutOps, dl);
9185 }
9186
9187 SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9188                                             SelectionDAG &DAG) const {
9189   SDLoc dl(Op);
9190   EVT VT = Op.getValueType();
9191   unsigned BitWidth = VT.getSizeInBits();
9192
9193   bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9194   SDValue X = Op.getOperand(0);
9195   SDValue Y = Op.getOperand(1);
9196   SDValue Z = Op.getOperand(2);
9197   EVT AmtVT = Z.getValueType();
9198
9199   // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9200   // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9201   // This is simpler than TargetLowering::expandFunnelShift because we can rely
9202   // on PowerPC shift by BW being well defined.
9203   Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9204                   DAG.getConstant(BitWidth - 1, dl, AmtVT));
9205   SDValue SubZ =
9206       DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9207   X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9208   Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9209   return DAG.getNode(ISD::OR, dl, VT, X, Y);
9210 }
9211
9212 //===----------------------------------------------------------------------===//
9213 // Vector related lowering.
9214 //
9215
9216 /// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9217 /// element size of SplatSize. Cast the result to VT.
9218 static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9219                                       SelectionDAG &DAG, const SDLoc &dl) {
9220   static const MVT VTys[] = { // canonical VT to use for each size.
9221     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9222   };
9223
9224   EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9225
9226   // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9227   if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9228     SplatSize = 1;
9229     Val = 0xFF;
9230   }
9231
9232   EVT CanonicalVT = VTys[SplatSize-1];
9233
9234   // Build a canonical splat for this value.
9235   return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
9236 }
9237
9238 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9239 /// specified intrinsic ID.
9240 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
9241                                 const SDLoc &dl, EVT DestVT = MVT::Other) {
9242   if (DestVT == MVT::Other) DestVT = Op.getValueType();
9243   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9244                      DAG.getConstant(IID, dl, MVT::i32), Op);
9245 }
9246
9247 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9248 /// specified intrinsic ID.
9249 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9250                                 SelectionDAG &DAG, const SDLoc &dl,
9251                                 EVT DestVT = MVT::Other) {
9252   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9253   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9254                      DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9255 }
9256
9257 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9258 /// specified intrinsic ID.
9259 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9260                                 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9261                                 EVT DestVT = MVT::Other) {
9262   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9263   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9264                      DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9265 }
9266
9267 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9268 /// amount.  The result has the specified value type.
9269 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9270                            SelectionDAG &DAG, const SDLoc &dl) {
9271   // Force LHS/RHS to be the right type.
9272   LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9273   RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9274
9275   int Ops[16];
9276   for (unsigned i = 0; i != 16; ++i)
9277     Ops[i] = i + Amt;
9278   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9279   return DAG.getNode(ISD::BITCAST, dl, VT, T);
9280 }
9281
9282 /// Do we have an efficient pattern in a .td file for this node?
9283 ///
9284 /// \param V - pointer to the BuildVectorSDNode being matched
9285 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9286 ///
9287 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9288 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9289 /// the opposite is true (expansion is beneficial) are:
9290 /// - The node builds a vector out of integers that are not 32 or 64-bits
9291 /// - The node builds a vector out of constants
9292 /// - The node is a "load-and-splat"
9293 /// In all other cases, we will choose to keep the BUILD_VECTOR.
9294 static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
9295                                             bool HasDirectMove,
9296                                             bool HasP8Vector) {
9297   EVT VecVT = V->getValueType(0);
9298   bool RightType = VecVT == MVT::v2f64 ||
9299     (HasP8Vector && VecVT == MVT::v4f32) ||
9300     (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9301   if (!RightType)
9302     return false;
9303
9304   bool IsSplat = true;
9305   bool IsLoad = false;
9306   SDValue Op0 = V->getOperand(0);
9307
9308   // This function is called in a block that confirms the node is not a constant
9309   // splat. So a constant BUILD_VECTOR here means the vector is built out of
9310   // different constants.
9311   if (V->isConstant())
9312     return false;
9313   for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9314     if (V->getOperand(i).isUndef())
9315       return false;
9316     // We want to expand nodes that represent load-and-splat even if the
9317     // loaded value is a floating point truncation or conversion to int.
9318     if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9319         (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9320          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9321         (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9322          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9323         (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9324          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9325       IsLoad = true;
9326     // If the operands are different or the input is not a load and has more
9327     // uses than just this BV node, then it isn't a splat.
9328     if (V->getOperand(i) != Op0 ||
9329         (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9330       IsSplat = false;
9331   }
9332   return !(IsSplat && IsLoad);
9333 }
9334
9335 // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9336 SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9337
9338   SDLoc dl(Op);
9339   SDValue Op0 = Op->getOperand(0);
9340
9341   if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9342       (Op.getValueType() != MVT::f128))
9343     return SDValue();
9344
9345   SDValue Lo = Op0.getOperand(0);
9346   SDValue Hi = Op0.getOperand(1);
9347   if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
9348     return SDValue();
9349
9350   if (!Subtarget.isLittleEndian())
9351     std::swap(Lo, Hi);
9352
9353   return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi);
9354 }
9355
9356 static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9357   const SDValue *InputLoad = &Op;
9358   while (InputLoad->getOpcode() == ISD::BITCAST)
9359     InputLoad = &InputLoad->getOperand(0);
9360   if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9361       InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9362     IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9363     InputLoad = &InputLoad->getOperand(0);
9364   }
9365   if (InputLoad->getOpcode() != ISD::LOAD)
9366     return nullptr;
9367   LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9368   return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9369 }
9370
9371 // Convert the argument APFloat to a single precision APFloat if there is no
9372 // loss in information during the conversion to single precision APFloat and the
9373 // resulting number is not a denormal number. Return true if successful.
9374 bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {
9375   APFloat APFloatToConvert = ArgAPFloat;
9376   bool LosesInfo = true;
9377   APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
9378                            &LosesInfo);
9379   bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9380   if (Success)
9381     ArgAPFloat = APFloatToConvert;
9382   return Success;
9383 }
9384
9385 // Bitcast the argument APInt to a double and convert it to a single precision
9386 // APFloat, bitcast the APFloat to an APInt and assign it to the original
9387 // argument if there is no loss in information during the conversion from
9388 // double to single precision APFloat and the resulting number is not a denormal
9389 // number. Return true if successful.
9390 bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
9391   double DpValue = ArgAPInt.bitsToDouble();
9392   APFloat APFloatDp(DpValue);
9393   bool Success = convertToNonDenormSingle(APFloatDp);
9394   if (Success)
9395     ArgAPInt = APFloatDp.bitcastToAPInt();
9396   return Success;
9397 }
9398
9399 // Nondestructive check for convertTonNonDenormSingle.
9400 bool llvm::checkConvertToNonDenormSingle(APFloat &ArgAPFloat) {
9401   // Only convert if it loses info, since XXSPLTIDP should
9402   // handle the other case.
9403   APFloat APFloatToConvert = ArgAPFloat;
9404   bool LosesInfo = true;
9405   APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
9406                            &LosesInfo);
9407
9408   return (!LosesInfo && !APFloatToConvert.isDenormal());
9409 }
9410
9411 static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9412                              unsigned &Opcode) {
9413   LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9414   if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9415     return false;
9416
9417   EVT Ty = Op->getValueType(0);
9418   // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9419   // as we cannot handle extending loads for these types.
9420   if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9421       ISD::isNON_EXTLoad(InputNode))
9422     return true;
9423
9424   EVT MemVT = InputNode->getMemoryVT();
9425   // For v8i16 and v16i8 types, extending loads can be handled as long as the
9426   // memory VT is the same vector element VT type.
9427   // The loads feeding into the v8i16 and v16i8 types will be extending because
9428   // scalar i8/i16 are not legal types.
9429   if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9430       (MemVT == Ty.getVectorElementType()))
9431     return true;
9432
9433   if (Ty == MVT::v2i64) {
9434     // Check the extend type, when the input type is i32, and the output vector
9435     // type is v2i64.
9436     if (MemVT == MVT::i32) {
9437       if (ISD::isZEXTLoad(InputNode))
9438         Opcode = PPCISD::ZEXT_LD_SPLAT;
9439       if (ISD::isSEXTLoad(InputNode))
9440         Opcode = PPCISD::SEXT_LD_SPLAT;
9441     }
9442     return true;
9443   }
9444   return false;
9445 }
9446
9447 // If this is a case we can't handle, return null and let the default
9448 // expansion code take care of it.  If we CAN select this case, and if it
9449 // selects to a single instruction, return Op.  Otherwise, if we can codegen
9450 // this case more efficiently than a constant pool load, lower it to the
9451 // sequence of ops that should be used.
9452 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9453                                              SelectionDAG &DAG) const {
9454   SDLoc dl(Op);
9455   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9456   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9457
9458   // Check if this is a splat of a constant value.
9459   APInt APSplatBits, APSplatUndef;
9460   unsigned SplatBitSize;
9461   bool HasAnyUndefs;
9462   bool BVNIsConstantSplat =
9463       BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9464                            HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9465
9466   // If it is a splat of a double, check if we can shrink it to a 32 bit
9467   // non-denormal float which when converted back to double gives us the same
9468   // double. This is to exploit the XXSPLTIDP instruction.
9469   // If we lose precision, we use XXSPLTI32DX.
9470   if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9471       Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9472     // Check the type first to short-circuit so we don't modify APSplatBits if
9473     // this block isn't executed.
9474     if ((Op->getValueType(0) == MVT::v2f64) &&
9475         convertToNonDenormSingle(APSplatBits)) {
9476       SDValue SplatNode = DAG.getNode(
9477           PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9478           DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9479       return DAG.getBitcast(Op.getValueType(), SplatNode);
9480     } else {
9481       // We may lose precision, so we have to use XXSPLTI32DX.
9482
9483       uint32_t Hi =
9484           (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
9485       uint32_t Lo =
9486           (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
9487       SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9488
9489       if (!Hi || !Lo)
9490         // If either load is 0, then we should generate XXLXOR to set to 0.
9491         SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9492
9493       if (Hi)
9494         SplatNode = DAG.getNode(
9495             PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9496             DAG.getTargetConstant(0, dl, MVT::i32),
9497             DAG.getTargetConstant(Hi, dl, MVT::i32));
9498
9499       if (Lo)
9500         SplatNode =
9501             DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9502                         DAG.getTargetConstant(1, dl, MVT::i32),
9503                         DAG.getTargetConstant(Lo, dl, MVT::i32));
9504
9505       return DAG.getBitcast(Op.getValueType(), SplatNode);
9506     }
9507   }
9508
9509   if (!BVNIsConstantSplat || SplatBitSize > 32) {
9510     unsigned NewOpcode = PPCISD::LD_SPLAT;
9511
9512     // Handle load-and-splat patterns as we have instructions that will do this
9513     // in one go.
9514     if (DAG.isSplatValue(Op, true) &&
9515         isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9516       const SDValue *InputLoad = &Op.getOperand(0);
9517       LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9518
9519       // If the input load is an extending load, it will be an i32 -> i64
9520       // extending load and isValidSplatLoad() will update NewOpcode.
9521       unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9522       unsigned ElementSize =
9523           MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9524
9525       assert(((ElementSize == 2 * MemorySize)
9526                   ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9527                      NewOpcode == PPCISD::SEXT_LD_SPLAT)
9528                   : (NewOpcode == PPCISD::LD_SPLAT)) &&
9529              "Unmatched element size and opcode!\n");
9530
9531       // Checking for a single use of this load, we have to check for vector
9532       // width (128 bits) / ElementSize uses (since each operand of the
9533       // BUILD_VECTOR is a separate use of the value.
9534       unsigned NumUsesOfInputLD = 128 / ElementSize;
9535       for (SDValue BVInOp : Op->ops())
9536         if (BVInOp.isUndef())
9537           NumUsesOfInputLD--;
9538
9539       // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9540       // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9541       // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9542       // 15", but function IsValidSplatLoad() now will only return true when
9543       // the data at index 0 is not nullptr. So we will not get into trouble for
9544       // these cases.
9545       //
9546       // case 1 - lfiwzx/lfiwax
9547       // 1.1: load result is i32 and is sign/zero extend to i64;
9548       // 1.2: build a v2i64 vector type with above loaded value;
9549       // 1.3: the vector has only one value at index 0, others are all undef;
9550       // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9551       if (NumUsesOfInputLD == 1 &&
9552           (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9553            !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9554            Subtarget.hasLFIWAX()))
9555         return SDValue();
9556
9557       // case 2 - lxvr[hb]x
9558       // 2.1: load result is at most i16;
9559       // 2.2: build a vector with above loaded value;
9560       // 2.3: the vector has only one value at index 0, others are all undef;
9561       // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9562       if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9563           Subtarget.isISA3_1() && ElementSize <= 16)
9564         return SDValue();
9565
9566       assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9567       if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9568           Subtarget.hasVSX()) {
9569         SDValue Ops[] = {
9570           LD->getChain(),    // Chain
9571           LD->getBasePtr(),  // Ptr
9572           DAG.getValueType(Op.getValueType()) // VT
9573         };
9574         SDValue LdSplt = DAG.getMemIntrinsicNode(
9575             NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9576             LD->getMemoryVT(), LD->getMemOperand());
9577         // Replace all uses of the output chain of the original load with the
9578         // output chain of the new load.
9579         DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9580                                       LdSplt.getValue(1));
9581         return LdSplt;
9582       }
9583     }
9584
9585     // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9586     // 32-bits can be lowered to VSX instructions under certain conditions.
9587     // Without VSX, there is no pattern more efficient than expanding the node.
9588     if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9589         haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9590                                         Subtarget.hasP8Vector()))
9591       return Op;
9592     return SDValue();
9593   }
9594
9595   uint64_t SplatBits = APSplatBits.getZExtValue();
9596   uint64_t SplatUndef = APSplatUndef.getZExtValue();
9597   unsigned SplatSize = SplatBitSize / 8;
9598
9599   // First, handle single instruction cases.
9600
9601   // All zeros?
9602   if (SplatBits == 0) {
9603     // Canonicalize all zero vectors to be v4i32.
9604     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9605       SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9606       Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9607     }
9608     return Op;
9609   }
9610
9611   // We have XXSPLTIW for constant splats four bytes wide.
9612   // Given vector length is a multiple of 4, 2-byte splats can be replaced
9613   // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9614   // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9615   // turned into a 4-byte splat of 0xABABABAB.
9616   if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9617     return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9618                                   Op.getValueType(), DAG, dl);
9619
9620   if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9621     return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9622                                   dl);
9623
9624   // We have XXSPLTIB for constant splats one byte wide.
9625   if (Subtarget.hasP9Vector() && SplatSize == 1)
9626     return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9627                                   dl);
9628
9629   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9630   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9631                     (32-SplatBitSize));
9632   if (SextVal >= -16 && SextVal <= 15)
9633     return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9634                                   dl);
9635
9636   // Two instruction sequences.
9637
9638   // If this value is in the range [-32,30] and is even, use:
9639   //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9640   // If this value is in the range [17,31] and is odd, use:
9641   //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9642   // If this value is in the range [-31,-17] and is odd, use:
9643   //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9644   // Note the last two are three-instruction sequences.
9645   if (SextVal >= -32 && SextVal <= 31) {
9646     // To avoid having these optimizations undone by constant folding,
9647     // we convert to a pseudo that will be expanded later into one of
9648     // the above forms.
9649     SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9650     EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9651               (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9652     SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9653     SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9654     if (VT == Op.getValueType())
9655       return RetVal;
9656     else
9657       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9658   }
9659
9660   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
9661   // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
9662   // for fneg/fabs.
9663   if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9664     // Make -1 and vspltisw -1:
9665     SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9666
9667     // Make the VSLW intrinsic, computing 0x8000_0000.
9668     SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9669                                    OnesV, DAG, dl);
9670
9671     // xor by OnesV to invert it.
9672     Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9673     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9674   }
9675
9676   // Check to see if this is a wide variety of vsplti*, binop self cases.
9677   static const signed char SplatCsts[] = {
9678     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9679     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9680   };
9681
9682   for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9683     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9684     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
9685     int i = SplatCsts[idx];
9686
9687     // Figure out what shift amount will be used by altivec if shifted by i in
9688     // this splat size.
9689     unsigned TypeShiftAmt = i & (SplatBitSize-1);
9690
9691     // vsplti + shl self.
9692     if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9693       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9694       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9695         Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9696         Intrinsic::ppc_altivec_vslw
9697       };
9698       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9699       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9700     }
9701
9702     // vsplti + srl self.
9703     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9704       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9705       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9706         Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9707         Intrinsic::ppc_altivec_vsrw
9708       };
9709       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9710       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9711     }
9712
9713     // vsplti + rol self.
9714     if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9715                          ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9716       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9717       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9718         Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9719         Intrinsic::ppc_altivec_vrlw
9720       };
9721       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9722       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9723     }
9724
9725     // t = vsplti c, result = vsldoi t, t, 1
9726     if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9727       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9728       unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9729       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9730     }
9731     // t = vsplti c, result = vsldoi t, t, 2
9732     if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9733       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9734       unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9735       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9736     }
9737     // t = vsplti c, result = vsldoi t, t, 3
9738     if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9739       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9740       unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9741       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9742     }
9743   }
9744
9745   return SDValue();
9746 }
9747
9748 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9749 /// the specified operations to build the shuffle.
9750 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9751                                       SDValue RHS, SelectionDAG &DAG,
9752                                       const SDLoc &dl) {
9753   unsigned OpNum = (PFEntry >> 26) & 0x0F;
9754   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9755   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
9756
9757   enum {
9758     OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9759     OP_VMRGHW,
9760     OP_VMRGLW,
9761     OP_VSPLTISW0,
9762     OP_VSPLTISW1,
9763     OP_VSPLTISW2,
9764     OP_VSPLTISW3,
9765     OP_VSLDOI4,
9766     OP_VSLDOI8,
9767     OP_VSLDOI12
9768   };
9769
9770   if (OpNum == OP_COPY) {
9771     if (LHSID == (1*9+2)*9+3) return LHS;
9772     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9773     return RHS;
9774   }
9775
9776   SDValue OpLHS, OpRHS;
9777   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9778   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9779
9780   int ShufIdxs[16];
9781   switch (OpNum) {
9782   default: llvm_unreachable("Unknown i32 permute!");
9783   case OP_VMRGHW:
9784     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
9785     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9786     ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
9787     ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9788     break;
9789   case OP_VMRGLW:
9790     ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9791     ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9792     ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9793     ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9794     break;
9795   case OP_VSPLTISW0:
9796     for (unsigned i = 0; i != 16; ++i)
9797       ShufIdxs[i] = (i&3)+0;
9798     break;
9799   case OP_VSPLTISW1:
9800     for (unsigned i = 0; i != 16; ++i)
9801       ShufIdxs[i] = (i&3)+4;
9802     break;
9803   case OP_VSPLTISW2:
9804     for (unsigned i = 0; i != 16; ++i)
9805       ShufIdxs[i] = (i&3)+8;
9806     break;
9807   case OP_VSPLTISW3:
9808     for (unsigned i = 0; i != 16; ++i)
9809       ShufIdxs[i] = (i&3)+12;
9810     break;
9811   case OP_VSLDOI4:
9812     return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9813   case OP_VSLDOI8:
9814     return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9815   case OP_VSLDOI12:
9816     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9817   }
9818   EVT VT = OpLHS.getValueType();
9819   OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9820   OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9821   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9822   return DAG.getNode(ISD::BITCAST, dl, VT, T);
9823 }
9824
9825 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9826 /// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9827 /// SDValue.
9828 SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9829                                            SelectionDAG &DAG) const {
9830   const unsigned BytesInVector = 16;
9831   bool IsLE = Subtarget.isLittleEndian();
9832   SDLoc dl(N);
9833   SDValue V1 = N->getOperand(0);
9834   SDValue V2 = N->getOperand(1);
9835   unsigned ShiftElts = 0, InsertAtByte = 0;
9836   bool Swap = false;
9837
9838   // Shifts required to get the byte we want at element 7.
9839   unsigned LittleEndianShifts[] = {8, 7,  6,  5,  4,  3,  2,  1,
9840                                    0, 15, 14, 13, 12, 11, 10, 9};
9841   unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9842                                 1, 2,  3,  4,  5,  6,  7,  8};
9843
9844   ArrayRef<int> Mask = N->getMask();
9845   int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9846
9847   // For each mask element, find out if we're just inserting something
9848   // from V2 into V1 or vice versa.
9849   // Possible permutations inserting an element from V2 into V1:
9850   //   X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9851   //   0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9852   //   ...
9853   //   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9854   // Inserting from V1 into V2 will be similar, except mask range will be
9855   // [16,31].
9856
9857   bool FoundCandidate = false;
9858   // If both vector operands for the shuffle are the same vector, the mask
9859   // will contain only elements from the first one and the second one will be
9860   // undef.
9861   unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9862   // Go through the mask of half-words to find an element that's being moved
9863   // from one vector to the other.
9864   for (unsigned i = 0; i < BytesInVector; ++i) {
9865     unsigned CurrentElement = Mask[i];
9866     // If 2nd operand is undefined, we should only look for element 7 in the
9867     // Mask.
9868     if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9869       continue;
9870
9871     bool OtherElementsInOrder = true;
9872     // Examine the other elements in the Mask to see if they're in original
9873     // order.
9874     for (unsigned j = 0; j < BytesInVector; ++j) {
9875       if (j == i)
9876         continue;
9877       // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9878       // from V2 [16,31] and vice versa.  Unless the 2nd operand is undefined,
9879       // in which we always assume we're always picking from the 1st operand.
9880       int MaskOffset =
9881           (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9882       if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9883         OtherElementsInOrder = false;
9884         break;
9885       }
9886     }
9887     // If other elements are in original order, we record the number of shifts
9888     // we need to get the element we want into element 7. Also record which byte
9889     // in the vector we should insert into.
9890     if (OtherElementsInOrder) {
9891       // If 2nd operand is undefined, we assume no shifts and no swapping.
9892       if (V2.isUndef()) {
9893         ShiftElts = 0;
9894         Swap = false;
9895       } else {
9896         // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9897         ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9898                          : BigEndianShifts[CurrentElement & 0xF];
9899         Swap = CurrentElement < BytesInVector;
9900       }
9901       InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9902       FoundCandidate = true;
9903       break;
9904     }
9905   }
9906
9907   if (!FoundCandidate)
9908     return SDValue();
9909
9910   // Candidate found, construct the proper SDAG sequence with VINSERTB,
9911   // optionally with VECSHL if shift is required.
9912   if (Swap)
9913     std::swap(V1, V2);
9914   if (V2.isUndef())
9915     V2 = V1;
9916   if (ShiftElts) {
9917     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9918                               DAG.getConstant(ShiftElts, dl, MVT::i32));
9919     return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9920                        DAG.getConstant(InsertAtByte, dl, MVT::i32));
9921   }
9922   return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9923                      DAG.getConstant(InsertAtByte, dl, MVT::i32));
9924 }
9925
9926 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9927 /// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9928 /// SDValue.
9929 SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9930                                            SelectionDAG &DAG) const {
9931   const unsigned NumHalfWords = 8;
9932   const unsigned BytesInVector = NumHalfWords * 2;
9933   // Check that the shuffle is on half-words.
9934   if (!isNByteElemShuffleMask(N, 2, 1))
9935     return SDValue();
9936
9937   bool IsLE = Subtarget.isLittleEndian();
9938   SDLoc dl(N);
9939   SDValue V1 = N->getOperand(0);
9940   SDValue V2 = N->getOperand(1);
9941   unsigned ShiftElts = 0, InsertAtByte = 0;
9942   bool Swap = false;
9943
9944   // Shifts required to get the half-word we want at element 3.
9945   unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9946   unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9947
9948   uint32_t Mask = 0;
9949   uint32_t OriginalOrderLow = 0x1234567;
9950   uint32_t OriginalOrderHigh = 0x89ABCDEF;
9951   // Now we look at mask elements 0,2,4,6,8,10,12,14.  Pack the mask into a
9952   // 32-bit space, only need 4-bit nibbles per element.
9953   for (unsigned i = 0; i < NumHalfWords; ++i) {
9954     unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9955     Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9956   }
9957
9958   // For each mask element, find out if we're just inserting something
9959   // from V2 into V1 or vice versa.  Possible permutations inserting an element
9960   // from V2 into V1:
9961   //   X, 1, 2, 3, 4, 5, 6, 7
9962   //   0, X, 2, 3, 4, 5, 6, 7
9963   //   0, 1, X, 3, 4, 5, 6, 7
9964   //   0, 1, 2, X, 4, 5, 6, 7
9965   //   0, 1, 2, 3, X, 5, 6, 7
9966   //   0, 1, 2, 3, 4, X, 6, 7
9967   //   0, 1, 2, 3, 4, 5, X, 7
9968   //   0, 1, 2, 3, 4, 5, 6, X
9969   // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9970
9971   bool FoundCandidate = false;
9972   // Go through the mask of half-words to find an element that's being moved
9973   // from one vector to the other.
9974   for (unsigned i = 0; i < NumHalfWords; ++i) {
9975     unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9976     uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9977     uint32_t MaskOtherElts = ~(0xF << MaskShift);
9978     uint32_t TargetOrder = 0x0;
9979
9980     // If both vector operands for the shuffle are the same vector, the mask
9981     // will contain only elements from the first one and the second one will be
9982     // undef.
9983     if (V2.isUndef()) {
9984       ShiftElts = 0;
9985       unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9986       TargetOrder = OriginalOrderLow;
9987       Swap = false;
9988       // Skip if not the correct element or mask of other elements don't equal
9989       // to our expected order.
9990       if (MaskOneElt == VINSERTHSrcElem &&
9991           (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9992         InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9993         FoundCandidate = true;
9994         break;
9995       }
9996     } else { // If both operands are defined.
9997       // Target order is [8,15] if the current mask is between [0,7].
9998       TargetOrder =
9999           (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10000       // Skip if mask of other elements don't equal our expected order.
10001       if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10002         // We only need the last 3 bits for the number of shifts.
10003         ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10004                          : BigEndianShifts[MaskOneElt & 0x7];
10005         InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10006         Swap = MaskOneElt < NumHalfWords;
10007         FoundCandidate = true;
10008         break;
10009       }
10010     }
10011   }
10012
10013   if (!FoundCandidate)
10014     return SDValue();
10015
10016   // Candidate found, construct the proper SDAG sequence with VINSERTH,
10017   // optionally with VECSHL if shift is required.
10018   if (Swap)
10019     std::swap(V1, V2);
10020   if (V2.isUndef())
10021     V2 = V1;
10022   SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10023   if (ShiftElts) {
10024     // Double ShiftElts because we're left shifting on v16i8 type.
10025     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10026                               DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10027     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10028     SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10029                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
10030     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10031   }
10032   SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10033   SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10034                             DAG.getConstant(InsertAtByte, dl, MVT::i32));
10035   return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10036 }
10037
10038 /// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10039 /// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10040 /// return the default SDValue.
10041 SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10042                                               SelectionDAG &DAG) const {
10043   // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10044   // to v16i8. Peek through the bitcasts to get the actual operands.
10045   SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));
10046   SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));
10047
10048   auto ShuffleMask = SVN->getMask();
10049   SDValue VecShuffle(SVN, 0);
10050   SDLoc DL(SVN);
10051
10052   // Check that we have a four byte shuffle.
10053   if (!isNByteElemShuffleMask(SVN, 4, 1))
10054     return SDValue();
10055
10056   // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10057   if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10058     std::swap(LHS, RHS);
10059     VecShuffle = peekThroughBitcasts(DAG.getCommutedVectorShuffle(*SVN));
10060     ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10061     if (!CommutedSV)
10062       return SDValue();
10063     ShuffleMask = CommutedSV->getMask();
10064   }
10065
10066   // Ensure that the RHS is a vector of constants.
10067   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10068   if (!BVN)
10069     return SDValue();
10070
10071   // Check if RHS is a splat of 4-bytes (or smaller).
10072   APInt APSplatValue, APSplatUndef;
10073   unsigned SplatBitSize;
10074   bool HasAnyUndefs;
10075   if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10076                             HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10077       SplatBitSize > 32)
10078     return SDValue();
10079
10080   // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10081   // The instruction splats a constant C into two words of the source vector
10082   // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10083   // Thus we check that the shuffle mask is the equivalent  of
10084   // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10085   // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10086   // within each word are consecutive, so we only need to check the first byte.
10087   SDValue Index;
10088   bool IsLE = Subtarget.isLittleEndian();
10089   if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10090       (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10091        ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10092     Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10093   else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10094            (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10095             ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10096     Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10097   else
10098     return SDValue();
10099
10100   // If the splat is narrower than 32-bits, we need to get the 32-bit value
10101   // for XXSPLTI32DX.
10102   unsigned SplatVal = APSplatValue.getZExtValue();
10103   for (; SplatBitSize < 32; SplatBitSize <<= 1)
10104     SplatVal |= (SplatVal << SplatBitSize);
10105
10106   SDValue SplatNode = DAG.getNode(
10107       PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10108       Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10109   return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10110 }
10111
10112 /// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10113 /// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10114 /// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10115 /// i.e (or (shl x, C1), (srl x, 128-C1)).
10116 SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10117   assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10118   assert(Op.getValueType() == MVT::v1i128 &&
10119          "Only set v1i128 as custom, other type shouldn't reach here!");
10120   SDLoc dl(Op);
10121   SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10122   SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10123   unsigned SHLAmt = N1.getConstantOperandVal(0);
10124   if (SHLAmt % 8 == 0) {
10125     std::array<int, 16> Mask;
10126     std::iota(Mask.begin(), Mask.end(), 0);
10127     std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10128     if (SDValue Shuffle =
10129             DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10130                                  DAG.getUNDEF(MVT::v16i8), Mask))
10131       return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10132   }
10133   SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10134   SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10135                               DAG.getConstant(SHLAmt, dl, MVT::i32));
10136   SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10137                               DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10138   SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10139   return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10140 }
10141
10142 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
10143 /// is a shuffle we can handle in a single instruction, return it.  Otherwise,
10144 /// return the code it can be lowered into.  Worst case, it can always be
10145 /// lowered into a vperm.
10146 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10147                                                SelectionDAG &DAG) const {
10148   SDLoc dl(Op);
10149   SDValue V1 = Op.getOperand(0);
10150   SDValue V2 = Op.getOperand(1);
10151   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10152
10153   // Any nodes that were combined in the target-independent combiner prior
10154   // to vector legalization will not be sent to the target combine. Try to
10155   // combine it here.
10156   if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10157     if (!isa<ShuffleVectorSDNode>(NewShuffle))
10158       return NewShuffle;
10159     Op = NewShuffle;
10160     SVOp = cast<ShuffleVectorSDNode>(Op);
10161     V1 = Op.getOperand(0);
10162     V2 = Op.getOperand(1);
10163   }
10164   EVT VT = Op.getValueType();
10165   bool isLittleEndian = Subtarget.isLittleEndian();
10166
10167   unsigned ShiftElts, InsertAtByte;
10168   bool Swap = false;
10169
10170   // If this is a load-and-splat, we can do that with a single instruction
10171   // in some cases. However if the load has multiple uses, we don't want to
10172   // combine it because that will just produce multiple loads.
10173   bool IsPermutedLoad = false;
10174   const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10175   if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10176       (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10177       InputLoad->hasOneUse()) {
10178     bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10179     int SplatIdx =
10180       PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10181
10182     // The splat index for permuted loads will be in the left half of the vector
10183     // which is strictly wider than the loaded value by 8 bytes. So we need to
10184     // adjust the splat index to point to the correct address in memory.
10185     if (IsPermutedLoad) {
10186       assert((isLittleEndian || IsFourByte) &&
10187              "Unexpected size for permuted load on big endian target");
10188       SplatIdx += IsFourByte ? 2 : 1;
10189       assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10190              "Splat of a value outside of the loaded memory");
10191     }
10192
10193     LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10194     // For 4-byte load-and-splat, we need Power9.
10195     if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10196       uint64_t Offset = 0;
10197       if (IsFourByte)
10198         Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10199       else
10200         Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10201
10202       // If the width of the load is the same as the width of the splat,
10203       // loading with an offset would load the wrong memory.
10204       if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10205         Offset = 0;
10206
10207       SDValue BasePtr = LD->getBasePtr();
10208       if (Offset != 0)
10209         BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
10210                               BasePtr, DAG.getIntPtrConstant(Offset, dl));
10211       SDValue Ops[] = {
10212         LD->getChain(),    // Chain
10213         BasePtr,           // BasePtr
10214         DAG.getValueType(Op.getValueType()) // VT
10215       };
10216       SDVTList VTL =
10217         DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10218       SDValue LdSplt =
10219         DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
10220                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
10221       DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10222       if (LdSplt.getValueType() != SVOp->getValueType(0))
10223         LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10224       return LdSplt;
10225     }
10226   }
10227
10228   // All v2i64 and v2f64 shuffles are legal
10229   if (VT == MVT::v2i64 || VT == MVT::v2f64)
10230     return Op;
10231
10232   if (Subtarget.hasP9Vector() &&
10233       PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10234                            isLittleEndian)) {
10235     if (V2.isUndef())
10236       V2 = V1;
10237     else if (Swap)
10238       std::swap(V1, V2);
10239     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10240     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10241     if (ShiftElts) {
10242       SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10243                                 DAG.getConstant(ShiftElts, dl, MVT::i32));
10244       SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10245                                 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10246       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10247     }
10248     SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10249                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
10250     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10251   }
10252
10253   if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10254     SDValue SplatInsertNode;
10255     if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10256       return SplatInsertNode;
10257   }
10258
10259   if (Subtarget.hasP9Altivec()) {
10260     SDValue NewISDNode;
10261     if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10262       return NewISDNode;
10263
10264     if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10265       return NewISDNode;
10266   }
10267
10268   if (Subtarget.hasVSX() &&
10269       PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10270     if (Swap)
10271       std::swap(V1, V2);
10272     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10273     SDValue Conv2 =
10274         DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10275
10276     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10277                               DAG.getConstant(ShiftElts, dl, MVT::i32));
10278     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10279   }
10280
10281   if (Subtarget.hasVSX() &&
10282     PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10283     if (Swap)
10284       std::swap(V1, V2);
10285     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10286     SDValue Conv2 =
10287         DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10288
10289     SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10290                               DAG.getConstant(ShiftElts, dl, MVT::i32));
10291     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10292   }
10293
10294   if (Subtarget.hasP9Vector()) {
10295      if (PPC::isXXBRHShuffleMask(SVOp)) {
10296       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10297       SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10298       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10299     } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10300       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10301       SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10302       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10303     } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10304       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10305       SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10306       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10307     } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10308       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10309       SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10310       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10311     }
10312   }
10313
10314   if (Subtarget.hasVSX()) {
10315     if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10316       int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10317
10318       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10319       SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10320                                   DAG.getConstant(SplatIdx, dl, MVT::i32));
10321       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10322     }
10323
10324     // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10325     if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10326       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10327       SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10328       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10329     }
10330   }
10331
10332   // Cases that are handled by instructions that take permute immediates
10333   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10334   // selected by the instruction selector.
10335   if (V2.isUndef()) {
10336     if (PPC::isSplatShuffleMask(SVOp, 1) ||
10337         PPC::isSplatShuffleMask(SVOp, 2) ||
10338         PPC::isSplatShuffleMask(SVOp, 4) ||
10339         PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10340         PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10341         PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10342         PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10343         PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10344         PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10345         PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10346         PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10347         PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10348         (Subtarget.hasP8Altivec() && (
10349          PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10350          PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10351          PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10352       return Op;
10353     }
10354   }
10355
10356   // Altivec has a variety of "shuffle immediates" that take two vector inputs
10357   // and produce a fixed permutation.  If any of these match, do not lower to
10358   // VPERM.
10359   unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10360   if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10361       PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10362       PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10363       PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10364       PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10365       PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10366       PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10367       PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10368       PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10369       (Subtarget.hasP8Altivec() && (
10370        PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10371        PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10372        PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10373     return Op;
10374
10375   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
10376   // perfect shuffle table to emit an optimal matching sequence.
10377   ArrayRef<int> PermMask = SVOp->getMask();
10378
10379   if (!DisablePerfectShuffle && !isLittleEndian) {
10380     unsigned PFIndexes[4];
10381     bool isFourElementShuffle = true;
10382     for (unsigned i = 0; i != 4 && isFourElementShuffle;
10383          ++i) {                           // Element number
10384       unsigned EltNo = 8;                 // Start out undef.
10385       for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10386         if (PermMask[i * 4 + j] < 0)
10387           continue; // Undef, ignore it.
10388
10389         unsigned ByteSource = PermMask[i * 4 + j];
10390         if ((ByteSource & 3) != j) {
10391           isFourElementShuffle = false;
10392           break;
10393         }
10394
10395         if (EltNo == 8) {
10396           EltNo = ByteSource / 4;
10397         } else if (EltNo != ByteSource / 4) {
10398           isFourElementShuffle = false;
10399           break;
10400         }
10401       }
10402       PFIndexes[i] = EltNo;
10403     }
10404
10405     // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10406     // perfect shuffle vector to determine if it is cost effective to do this as
10407     // discrete instructions, or whether we should use a vperm.
10408     // For now, we skip this for little endian until such time as we have a
10409     // little-endian perfect shuffle table.
10410     if (isFourElementShuffle) {
10411       // Compute the index in the perfect shuffle table.
10412       unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10413                               PFIndexes[2] * 9 + PFIndexes[3];
10414
10415       unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10416       unsigned Cost = (PFEntry >> 30);
10417
10418       // Determining when to avoid vperm is tricky.  Many things affect the cost
10419       // of vperm, particularly how many times the perm mask needs to be
10420       // computed. For example, if the perm mask can be hoisted out of a loop or
10421       // is already used (perhaps because there are multiple permutes with the
10422       // same shuffle mask?) the vperm has a cost of 1.  OTOH, hoisting the
10423       // permute mask out of the loop requires an extra register.
10424       //
10425       // As a compromise, we only emit discrete instructions if the shuffle can
10426       // be generated in 3 or fewer operations.  When we have loop information
10427       // available, if this block is within a loop, we should avoid using vperm
10428       // for 3-operation perms and use a constant pool load instead.
10429       if (Cost < 3)
10430         return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10431     }
10432   }
10433
10434   // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10435   // vector that will get spilled to the constant pool.
10436   if (V2.isUndef()) V2 = V1;
10437
10438   return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10439 }
10440
10441 SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10442                                       ArrayRef<int> PermMask, EVT VT,
10443                                       SDValue V1, SDValue V2) const {
10444   unsigned Opcode = PPCISD::VPERM;
10445   EVT ValType = V1.getValueType();
10446   SDLoc dl(Op);
10447   bool NeedSwap = false;
10448   bool isLittleEndian = Subtarget.isLittleEndian();
10449   bool isPPC64 = Subtarget.isPPC64();
10450
10451   if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10452       (V1->hasOneUse() || V2->hasOneUse())) {
10453     LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10454                          "XXPERM instead\n");
10455     Opcode = PPCISD::XXPERM;
10456
10457     // The second input to XXPERM is also an output so if the second input has
10458     // multiple uses then copying is necessary, as a result we want the
10459     // single-use operand to be used as the second input to prevent copying.
10460     if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10461         (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10462       std::swap(V1, V2);
10463       NeedSwap = !NeedSwap;
10464     }
10465   }
10466
10467   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10468   // that it is in input element units, not in bytes.  Convert now.
10469
10470   // For little endian, the order of the input vectors is reversed, and
10471   // the permutation mask is complemented with respect to 31.  This is
10472   // necessary to produce proper semantics with the big-endian-based vperm
10473   // instruction.
10474   EVT EltVT = V1.getValueType().getVectorElementType();
10475   unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10476
10477   bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10478   bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10479
10480   /*
10481   Vectors will be appended like so: [ V1 | v2 ]
10482   XXSWAPD on V1:
10483   [   A   |   B   |   C   |   D   ] -> [   C   |   D   |   A   |   B   ]
10484      0-3     4-7     8-11   12-15         0-3     4-7     8-11   12-15
10485   i.e.  index of A, B += 8, and index of C, D -= 8.
10486   XXSWAPD on V2:
10487   [   E   |   F   |   G   |   H   ] -> [   G   |   H   |   E   |   F   ]
10488     16-19   20-23   24-27   28-31        16-19   20-23   24-27   28-31
10489   i.e.  index of E, F += 8, index of G, H -= 8
10490   Swap V1 and V2:
10491   [   V1   |   V2  ] -> [   V2   |   V1   ]
10492      0-15     16-31        0-15     16-31
10493   i.e.  index of V1 += 16, index of V2 -= 16
10494   */
10495
10496   SmallVector<SDValue, 16> ResultMask;
10497   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10498     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10499
10500     if (V1HasXXSWAPD) {
10501       if (SrcElt < 8)
10502         SrcElt += 8;
10503       else if (SrcElt < 16)
10504         SrcElt -= 8;
10505     }
10506     if (V2HasXXSWAPD) {
10507       if (SrcElt > 23)
10508         SrcElt -= 8;
10509       else if (SrcElt > 15)
10510         SrcElt += 8;
10511     }
10512     if (NeedSwap) {
10513       if (SrcElt < 16)
10514         SrcElt += 16;
10515       else
10516         SrcElt -= 16;
10517     }
10518     for (unsigned j = 0; j != BytesPerElement; ++j)
10519       if (isLittleEndian)
10520         ResultMask.push_back(
10521             DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10522       else
10523         ResultMask.push_back(
10524             DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10525   }
10526
10527   if (V1HasXXSWAPD) {
10528     dl = SDLoc(V1->getOperand(0));
10529     V1 = V1->getOperand(0)->getOperand(1);
10530   }
10531   if (V2HasXXSWAPD) {
10532     dl = SDLoc(V2->getOperand(0));
10533     V2 = V2->getOperand(0)->getOperand(1);
10534   }
10535
10536   if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10537     if (ValType != MVT::v2f64)
10538       V1 = DAG.getBitcast(MVT::v2f64, V1);
10539     if (V2.getValueType() != MVT::v2f64)
10540       V2 = DAG.getBitcast(MVT::v2f64, V2);
10541   }
10542
10543   ShufflesHandledWithVPERM++;
10544   SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10545   LLVM_DEBUG({
10546     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10547     if (Opcode == PPCISD::XXPERM) {
10548       dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10549     } else {
10550       dbgs() << "Emitting a VPERM for the following shuffle:\n";
10551     }
10552     SVOp->dump();
10553     dbgs() << "With the following permute control vector:\n";
10554     VPermMask.dump();
10555   });
10556
10557   if (Opcode == PPCISD::XXPERM)
10558     VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10559
10560   // Only need to place items backwards in LE,
10561   // the mask was properly calculated.
10562   if (isLittleEndian)
10563     std::swap(V1, V2);
10564
10565   SDValue VPERMNode =
10566       DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10567
10568   VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10569   return VPERMNode;
10570 }
10571
10572 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10573 /// vector comparison.  If it is, return true and fill in Opc/isDot with
10574 /// information about the intrinsic.
10575 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10576                                  bool &isDot, const PPCSubtarget &Subtarget) {
10577   unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10578   CompareOpc = -1;
10579   isDot = false;
10580   switch (IntrinsicID) {
10581   default:
10582     return false;
10583   // Comparison predicates.
10584   case Intrinsic::ppc_altivec_vcmpbfp_p:
10585     CompareOpc = 966;
10586     isDot = true;
10587     break;
10588   case Intrinsic::ppc_altivec_vcmpeqfp_p:
10589     CompareOpc = 198;
10590     isDot = true;
10591     break;
10592   case Intrinsic::ppc_altivec_vcmpequb_p:
10593     CompareOpc = 6;
10594     isDot = true;
10595     break;
10596   case Intrinsic::ppc_altivec_vcmpequh_p:
10597     CompareOpc = 70;
10598     isDot = true;
10599     break;
10600   case Intrinsic::ppc_altivec_vcmpequw_p:
10601     CompareOpc = 134;
10602     isDot = true;
10603     break;
10604   case Intrinsic::ppc_altivec_vcmpequd_p:
10605     if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10606       CompareOpc = 199;
10607       isDot = true;
10608     } else
10609       return false;
10610     break;
10611   case Intrinsic::ppc_altivec_vcmpneb_p:
10612   case Intrinsic::ppc_altivec_vcmpneh_p:
10613   case Intrinsic::ppc_altivec_vcmpnew_p:
10614   case Intrinsic::ppc_altivec_vcmpnezb_p:
10615   case Intrinsic::ppc_altivec_vcmpnezh_p:
10616   case Intrinsic::ppc_altivec_vcmpnezw_p:
10617     if (Subtarget.hasP9Altivec()) {
10618       switch (IntrinsicID) {
10619       default:
10620         llvm_unreachable("Unknown comparison intrinsic.");
10621       case Intrinsic::ppc_altivec_vcmpneb_p:
10622         CompareOpc = 7;
10623         break;
10624       case Intrinsic::ppc_altivec_vcmpneh_p:
10625         CompareOpc = 71;
10626         break;
10627       case Intrinsic::ppc_altivec_vcmpnew_p:
10628         CompareOpc = 135;
10629         break;
10630       case Intrinsic::ppc_altivec_vcmpnezb_p:
10631         CompareOpc = 263;
10632         break;
10633       case Intrinsic::ppc_altivec_vcmpnezh_p:
10634         CompareOpc = 327;
10635         break;
10636       case Intrinsic::ppc_altivec_vcmpnezw_p:
10637         CompareOpc = 391;
10638         break;
10639       }
10640       isDot = true;
10641     } else
10642       return false;
10643     break;
10644   case Intrinsic::ppc_altivec_vcmpgefp_p:
10645     CompareOpc = 454;
10646     isDot = true;
10647     break;
10648   case Intrinsic::ppc_altivec_vcmpgtfp_p:
10649     CompareOpc = 710;
10650     isDot = true;
10651     break;
10652   case Intrinsic::ppc_altivec_vcmpgtsb_p:
10653     CompareOpc = 774;
10654     isDot = true;
10655     break;
10656   case Intrinsic::ppc_altivec_vcmpgtsh_p:
10657     CompareOpc = 838;
10658     isDot = true;
10659     break;
10660   case Intrinsic::ppc_altivec_vcmpgtsw_p:
10661     CompareOpc = 902;
10662     isDot = true;
10663     break;
10664   case Intrinsic::ppc_altivec_vcmpgtsd_p:
10665     if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10666       CompareOpc = 967;
10667       isDot = true;
10668     } else
10669       return false;
10670     break;
10671   case Intrinsic::ppc_altivec_vcmpgtub_p:
10672     CompareOpc = 518;
10673     isDot = true;
10674     break;
10675   case Intrinsic::ppc_altivec_vcmpgtuh_p:
10676     CompareOpc = 582;
10677     isDot = true;
10678     break;
10679   case Intrinsic::ppc_altivec_vcmpgtuw_p:
10680     CompareOpc = 646;
10681     isDot = true;
10682     break;
10683   case Intrinsic::ppc_altivec_vcmpgtud_p:
10684     if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10685       CompareOpc = 711;
10686       isDot = true;
10687     } else
10688       return false;
10689     break;
10690
10691   case Intrinsic::ppc_altivec_vcmpequq:
10692   case Intrinsic::ppc_altivec_vcmpgtsq:
10693   case Intrinsic::ppc_altivec_vcmpgtuq:
10694     if (!Subtarget.isISA3_1())
10695       return false;
10696     switch (IntrinsicID) {
10697     default:
10698       llvm_unreachable("Unknown comparison intrinsic.");
10699     case Intrinsic::ppc_altivec_vcmpequq:
10700       CompareOpc = 455;
10701       break;
10702     case Intrinsic::ppc_altivec_vcmpgtsq:
10703       CompareOpc = 903;
10704       break;
10705     case Intrinsic::ppc_altivec_vcmpgtuq:
10706       CompareOpc = 647;
10707       break;
10708     }
10709     break;
10710
10711   // VSX predicate comparisons use the same infrastructure
10712   case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10713   case Intrinsic::ppc_vsx_xvcmpgedp_p:
10714   case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10715   case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10716   case Intrinsic::ppc_vsx_xvcmpgesp_p:
10717   case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10718     if (Subtarget.hasVSX()) {
10719       switch (IntrinsicID) {
10720       case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10721         CompareOpc = 99;
10722         break;
10723       case Intrinsic::ppc_vsx_xvcmpgedp_p:
10724         CompareOpc = 115;
10725         break;
10726       case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10727         CompareOpc = 107;
10728         break;
10729       case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10730         CompareOpc = 67;
10731         break;
10732       case Intrinsic::ppc_vsx_xvcmpgesp_p:
10733         CompareOpc = 83;
10734         break;
10735       case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10736         CompareOpc = 75;
10737         break;
10738       }
10739       isDot = true;
10740     } else
10741       return false;
10742     break;
10743
10744   // Normal Comparisons.
10745   case Intrinsic::ppc_altivec_vcmpbfp:
10746     CompareOpc = 966;
10747     break;
10748   case Intrinsic::ppc_altivec_vcmpeqfp:
10749     CompareOpc = 198;
10750     break;
10751   case Intrinsic::ppc_altivec_vcmpequb:
10752     CompareOpc = 6;
10753     break;
10754   case Intrinsic::ppc_altivec_vcmpequh:
10755     CompareOpc = 70;
10756     break;
10757   case Intrinsic::ppc_altivec_vcmpequw:
10758     CompareOpc = 134;
10759     break;
10760   case Intrinsic::ppc_altivec_vcmpequd:
10761     if (Subtarget.hasP8Altivec())
10762       CompareOpc = 199;
10763     else
10764       return false;
10765     break;
10766   case Intrinsic::ppc_altivec_vcmpneb:
10767   case Intrinsic::ppc_altivec_vcmpneh:
10768   case Intrinsic::ppc_altivec_vcmpnew:
10769   case Intrinsic::ppc_altivec_vcmpnezb:
10770   case Intrinsic::ppc_altivec_vcmpnezh:
10771   case Intrinsic::ppc_altivec_vcmpnezw:
10772     if (Subtarget.hasP9Altivec())
10773       switch (IntrinsicID) {
10774       default:
10775         llvm_unreachable("Unknown comparison intrinsic.");
10776       case Intrinsic::ppc_altivec_vcmpneb:
10777         CompareOpc = 7;
10778         break;
10779       case Intrinsic::ppc_altivec_vcmpneh:
10780         CompareOpc = 71;
10781         break;
10782       case Intrinsic::ppc_altivec_vcmpnew:
10783         CompareOpc = 135;
10784         break;
10785       case Intrinsic::ppc_altivec_vcmpnezb:
10786         CompareOpc = 263;
10787         break;
10788       case Intrinsic::ppc_altivec_vcmpnezh:
10789         CompareOpc = 327;
10790         break;
10791       case Intrinsic::ppc_altivec_vcmpnezw:
10792         CompareOpc = 391;
10793         break;
10794       }
10795     else
10796       return false;
10797     break;
10798   case Intrinsic::ppc_altivec_vcmpgefp:
10799     CompareOpc = 454;
10800     break;
10801   case Intrinsic::ppc_altivec_vcmpgtfp:
10802     CompareOpc = 710;
10803     break;
10804   case Intrinsic::ppc_altivec_vcmpgtsb:
10805     CompareOpc = 774;
10806     break;
10807   case Intrinsic::ppc_altivec_vcmpgtsh:
10808     CompareOpc = 838;
10809     break;
10810   case Intrinsic::ppc_altivec_vcmpgtsw:
10811     CompareOpc = 902;
10812     break;
10813   case Intrinsic::ppc_altivec_vcmpgtsd:
10814     if (Subtarget.hasP8Altivec())
10815       CompareOpc = 967;
10816     else
10817       return false;
10818     break;
10819   case Intrinsic::ppc_altivec_vcmpgtub:
10820     CompareOpc = 518;
10821     break;
10822   case Intrinsic::ppc_altivec_vcmpgtuh:
10823     CompareOpc = 582;
10824     break;
10825   case Intrinsic::ppc_altivec_vcmpgtuw:
10826     CompareOpc = 646;
10827     break;
10828   case Intrinsic::ppc_altivec_vcmpgtud:
10829     if (Subtarget.hasP8Altivec())
10830       CompareOpc = 711;
10831     else
10832       return false;
10833     break;
10834   case Intrinsic::ppc_altivec_vcmpequq_p:
10835   case Intrinsic::ppc_altivec_vcmpgtsq_p:
10836   case Intrinsic::ppc_altivec_vcmpgtuq_p:
10837     if (!Subtarget.isISA3_1())
10838       return false;
10839     switch (IntrinsicID) {
10840     default:
10841       llvm_unreachable("Unknown comparison intrinsic.");
10842     case Intrinsic::ppc_altivec_vcmpequq_p:
10843       CompareOpc = 455;
10844       break;
10845     case Intrinsic::ppc_altivec_vcmpgtsq_p:
10846       CompareOpc = 903;
10847       break;
10848     case Intrinsic::ppc_altivec_vcmpgtuq_p:
10849       CompareOpc = 647;
10850       break;
10851     }
10852     isDot = true;
10853     break;
10854   }
10855   return true;
10856 }
10857
10858 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10859 /// lower, do it, otherwise return null.
10860 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10861                                                    SelectionDAG &DAG) const {
10862   unsigned IntrinsicID = Op.getConstantOperandVal(0);
10863
10864   SDLoc dl(Op);
10865
10866   switch (IntrinsicID) {
10867   case Intrinsic::thread_pointer:
10868     // Reads the thread pointer register, used for __builtin_thread_pointer.
10869     if (Subtarget.isPPC64())
10870       return DAG.getRegister(PPC::X13, MVT::i64);
10871     return DAG.getRegister(PPC::R2, MVT::i32);
10872
10873   case Intrinsic::ppc_rldimi: {
10874     assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
10875     SDValue Src = Op.getOperand(1);
10876     APInt Mask = Op.getConstantOperandAPInt(4);
10877     if (Mask.isZero())
10878       return Op.getOperand(2);
10879     if (Mask.isAllOnes())
10880       return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
10881     uint64_t SH = Op.getConstantOperandVal(3);
10882     unsigned MB = 0, ME = 0;
10883     if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
10884       report_fatal_error("invalid rldimi mask!");
10885     // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
10886     if (ME < 63 - SH) {
10887       Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10888                         DAG.getConstant(ME + SH + 1, dl, MVT::i32));
10889     } else if (ME > 63 - SH) {
10890       Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10891                         DAG.getConstant(ME + SH - 63, dl, MVT::i32));
10892     }
10893     return SDValue(
10894         DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
10895                            {Op.getOperand(2), Src,
10896                             DAG.getTargetConstant(63 - ME, dl, MVT::i32),
10897                             DAG.getTargetConstant(MB, dl, MVT::i32)}),
10898         0);
10899   }
10900
10901   case Intrinsic::ppc_rlwimi: {
10902     APInt Mask = Op.getConstantOperandAPInt(4);
10903     if (Mask.isZero())
10904       return Op.getOperand(2);
10905     if (Mask.isAllOnes())
10906       return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
10907                          Op.getOperand(3));
10908     unsigned MB = 0, ME = 0;
10909     if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
10910       report_fatal_error("invalid rlwimi mask!");
10911     return SDValue(DAG.getMachineNode(
10912                        PPC::RLWIMI, dl, MVT::i32,
10913                        {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
10914                         DAG.getTargetConstant(MB, dl, MVT::i32),
10915                         DAG.getTargetConstant(ME, dl, MVT::i32)}),
10916                    0);
10917   }
10918
10919   case Intrinsic::ppc_rlwnm: {
10920     if (Op.getConstantOperandVal(3) == 0)
10921       return DAG.getConstant(0, dl, MVT::i32);
10922     unsigned MB = 0, ME = 0;
10923     if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
10924       report_fatal_error("invalid rlwnm mask!");
10925     return SDValue(
10926         DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
10927                            {Op.getOperand(1), Op.getOperand(2),
10928                             DAG.getTargetConstant(MB, dl, MVT::i32),
10929                             DAG.getTargetConstant(ME, dl, MVT::i32)}),
10930         0);
10931   }
10932
10933   case Intrinsic::ppc_mma_disassemble_acc: {
10934     if (Subtarget.isISAFuture()) {
10935       EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
10936       SDValue WideVec =
10937           SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
10938                                      Op.getOperand(1)),
10939                   0);
10940       SmallVector<SDValue, 4> RetOps;
10941       SDValue Value = SDValue(WideVec.getNode(), 0);
10942       SDValue Value2 = SDValue(WideVec.getNode(), 1);
10943
10944       SDValue Extract;
10945       Extract = DAG.getNode(
10946           PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10947           Subtarget.isLittleEndian() ? Value2 : Value,
10948           DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10949                           dl, getPointerTy(DAG.getDataLayout())));
10950       RetOps.push_back(Extract);
10951       Extract = DAG.getNode(
10952           PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10953           Subtarget.isLittleEndian() ? Value2 : Value,
10954           DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
10955                           dl, getPointerTy(DAG.getDataLayout())));
10956       RetOps.push_back(Extract);
10957       Extract = DAG.getNode(
10958           PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10959           Subtarget.isLittleEndian() ? Value : Value2,
10960           DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10961                           dl, getPointerTy(DAG.getDataLayout())));
10962       RetOps.push_back(Extract);
10963       Extract = DAG.getNode(
10964           PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10965           Subtarget.isLittleEndian() ? Value : Value2,
10966           DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
10967                           dl, getPointerTy(DAG.getDataLayout())));
10968       RetOps.push_back(Extract);
10969       return DAG.getMergeValues(RetOps, dl);
10970     }
10971     [[fallthrough]];
10972   }
10973   case Intrinsic::ppc_vsx_disassemble_pair: {
10974     int NumVecs = 2;
10975     SDValue WideVec = Op.getOperand(1);
10976     if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
10977       NumVecs = 4;
10978       WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
10979     }
10980     SmallVector<SDValue, 4> RetOps;
10981     for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
10982       SDValue Extract = DAG.getNode(
10983           PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
10984           DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
10985                                                      : VecNo,
10986                           dl, getPointerTy(DAG.getDataLayout())));
10987       RetOps.push_back(Extract);
10988     }
10989     return DAG.getMergeValues(RetOps, dl);
10990   }
10991
10992   case Intrinsic::ppc_mma_xxmfacc:
10993   case Intrinsic::ppc_mma_xxmtacc: {
10994     // Allow pre-isa-future subtargets to lower as normal.
10995     if (!Subtarget.isISAFuture())
10996       return SDValue();
10997     // The intrinsics for xxmtacc and xxmfacc take one argument of
10998     // type v512i1, for future cpu the corresponding wacc instruction
10999     // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11000     // the need to produce the xxm[t|f]acc.
11001     SDValue WideVec = Op.getOperand(1);
11002     DAG.ReplaceAllUsesWith(Op, WideVec);
11003     return SDValue();
11004   }
11005
11006   case Intrinsic::ppc_unpack_longdouble: {
11007     auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11008     assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11009            "Argument of long double unpack must be 0 or 1!");
11010     return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11011                        DAG.getConstant(!!(Idx->getSExtValue()), dl,
11012                                        Idx->getValueType(0)));
11013   }
11014
11015   case Intrinsic::ppc_compare_exp_lt:
11016   case Intrinsic::ppc_compare_exp_gt:
11017   case Intrinsic::ppc_compare_exp_eq:
11018   case Intrinsic::ppc_compare_exp_uo: {
11019     unsigned Pred;
11020     switch (IntrinsicID) {
11021     case Intrinsic::ppc_compare_exp_lt:
11022       Pred = PPC::PRED_LT;
11023       break;
11024     case Intrinsic::ppc_compare_exp_gt:
11025       Pred = PPC::PRED_GT;
11026       break;
11027     case Intrinsic::ppc_compare_exp_eq:
11028       Pred = PPC::PRED_EQ;
11029       break;
11030     case Intrinsic::ppc_compare_exp_uo:
11031       Pred = PPC::PRED_UN;
11032       break;
11033     }
11034     return SDValue(
11035         DAG.getMachineNode(
11036             PPC::SELECT_CC_I4, dl, MVT::i32,
11037             {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11038                                         Op.getOperand(1), Op.getOperand(2)),
11039                      0),
11040              DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11041              DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11042         0);
11043   }
11044   case Intrinsic::ppc_test_data_class: {
11045     EVT OpVT = Op.getOperand(1).getValueType();
11046     unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11047                                          : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11048                                                              : PPC::XSTSTDCSP);
11049     return SDValue(
11050         DAG.getMachineNode(
11051             PPC::SELECT_CC_I4, dl, MVT::i32,
11052             {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
11053                                         Op.getOperand(1)),
11054                      0),
11055              DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11056              DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11057         0);
11058   }
11059   case Intrinsic::ppc_fnmsub: {
11060     EVT VT = Op.getOperand(1).getValueType();
11061     if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11062       return DAG.getNode(
11063           ISD::FNEG, dl, VT,
11064           DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11065                       DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11066     return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11067                        Op.getOperand(2), Op.getOperand(3));
11068   }
11069   case Intrinsic::ppc_convert_f128_to_ppcf128:
11070   case Intrinsic::ppc_convert_ppcf128_to_f128: {
11071     RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11072                             ? RTLIB::CONVERT_PPCF128_F128
11073                             : RTLIB::CONVERT_F128_PPCF128;
11074     MakeLibCallOptions CallOptions;
11075     std::pair<SDValue, SDValue> Result =
11076         makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11077                     dl, SDValue());
11078     return Result.first;
11079   }
11080   case Intrinsic::ppc_maxfe:
11081   case Intrinsic::ppc_maxfl:
11082   case Intrinsic::ppc_maxfs:
11083   case Intrinsic::ppc_minfe:
11084   case Intrinsic::ppc_minfl:
11085   case Intrinsic::ppc_minfs: {
11086     EVT VT = Op.getValueType();
11087     assert(
11088         all_of(Op->ops().drop_front(4),
11089                [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11090         "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11091     (void)VT;
11092     ISD::CondCode CC = ISD::SETGT;
11093     if (IntrinsicID == Intrinsic::ppc_minfe ||
11094         IntrinsicID == Intrinsic::ppc_minfl ||
11095         IntrinsicID == Intrinsic::ppc_minfs)
11096       CC = ISD::SETLT;
11097     unsigned I = Op.getNumOperands() - 2, Cnt = I;
11098     SDValue Res = Op.getOperand(I);
11099     for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11100       Res =
11101           DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11102     }
11103     return Res;
11104   }
11105   }
11106
11107   // If this is a lowered altivec predicate compare, CompareOpc is set to the
11108   // opcode number of the comparison.
11109   int CompareOpc;
11110   bool isDot;
11111   if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11112     return SDValue();    // Don't custom lower most intrinsics.
11113
11114   // If this is a non-dot comparison, make the VCMP node and we are done.
11115   if (!isDot) {
11116     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11117                               Op.getOperand(1), Op.getOperand(2),
11118                               DAG.getConstant(CompareOpc, dl, MVT::i32));
11119     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11120   }
11121
11122   // Create the PPCISD altivec 'dot' comparison node.
11123   SDValue Ops[] = {
11124     Op.getOperand(2),  // LHS
11125     Op.getOperand(3),  // RHS
11126     DAG.getConstant(CompareOpc, dl, MVT::i32)
11127   };
11128   EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11129   SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11130
11131   // Now that we have the comparison, emit a copy from the CR to a GPR.
11132   // This is flagged to the above dot comparison.
11133   SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11134                                 DAG.getRegister(PPC::CR6, MVT::i32),
11135                                 CompNode.getValue(1));
11136
11137   // Unpack the result based on how the target uses it.
11138   unsigned BitNo;   // Bit # of CR6.
11139   bool InvertBit;   // Invert result?
11140   switch (Op.getConstantOperandVal(1)) {
11141   default:  // Can't happen, don't crash on invalid number though.
11142   case 0:   // Return the value of the EQ bit of CR6.
11143     BitNo = 0; InvertBit = false;
11144     break;
11145   case 1:   // Return the inverted value of the EQ bit of CR6.
11146     BitNo = 0; InvertBit = true;
11147     break;
11148   case 2:   // Return the value of the LT bit of CR6.
11149     BitNo = 2; InvertBit = false;
11150     break;
11151   case 3:   // Return the inverted value of the LT bit of CR6.
11152     BitNo = 2; InvertBit = true;
11153     break;
11154   }
11155
11156   // Shift the bit into the low position.
11157   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11158                       DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11159   // Isolate the bit.
11160   Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11161                       DAG.getConstant(1, dl, MVT::i32));
11162
11163   // If we are supposed to, toggle the bit.
11164   if (InvertBit)
11165     Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11166                         DAG.getConstant(1, dl, MVT::i32));
11167   return Flags;
11168 }
11169
11170 SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11171                                                SelectionDAG &DAG) const {
11172   // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11173   // the beginning of the argument list.
11174   int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11175   SDLoc DL(Op);
11176   switch (Op.getConstantOperandVal(ArgStart)) {
11177   case Intrinsic::ppc_cfence: {
11178     assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11179     SDValue Val = Op.getOperand(ArgStart + 1);
11180     EVT Ty = Val.getValueType();
11181     if (Ty == MVT::i128) {
11182       // FIXME: Testing one of two paired registers is sufficient to guarantee
11183       // ordering?
11184       Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11185     }
11186     unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11187     EVT FTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
11188     return SDValue(
11189         DAG.getMachineNode(Opcode, DL, MVT::Other,
11190                            DAG.getNode(ISD::ANY_EXTEND, DL, FTy, Val),
11191                            Op.getOperand(0)),
11192         0);
11193   }
11194   default:
11195     break;
11196   }
11197   return SDValue();
11198 }
11199
11200 // Lower scalar BSWAP64 to xxbrd.
11201 SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11202   SDLoc dl(Op);
11203   if (!Subtarget.isPPC64())
11204     return Op;
11205   // MTVSRDD
11206   Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11207                    Op.getOperand(0));
11208   // XXBRD
11209   Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11210   // MFVSRD
11211   int VectorIndex = 0;
11212   if (Subtarget.isLittleEndian())
11213     VectorIndex = 1;
11214   Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11215                    DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11216   return Op;
11217 }
11218
11219 // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11220 // compared to a value that is atomically loaded (atomic loads zero-extend).
11221 SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11222                                                 SelectionDAG &DAG) const {
11223   assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11224          "Expecting an atomic compare-and-swap here.");
11225   SDLoc dl(Op);
11226   auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11227   EVT MemVT = AtomicNode->getMemoryVT();
11228   if (MemVT.getSizeInBits() >= 32)
11229     return Op;
11230
11231   SDValue CmpOp = Op.getOperand(2);
11232   // If this is already correctly zero-extended, leave it alone.
11233   auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11234   if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11235     return Op;
11236
11237   // Clear the high bits of the compare operand.
11238   unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11239   SDValue NewCmpOp =
11240     DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11241                 DAG.getConstant(MaskVal, dl, MVT::i32));
11242
11243   // Replace the existing compare operand with the properly zero-extended one.
11244   SmallVector<SDValue, 4> Ops;
11245   for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11246     Ops.push_back(AtomicNode->getOperand(i));
11247   Ops[2] = NewCmpOp;
11248   MachineMemOperand *MMO = AtomicNode->getMemOperand();
11249   SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11250   auto NodeTy =
11251     (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
11252   return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11253 }
11254
11255 SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11256                                                   SelectionDAG &DAG) const {
11257   AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11258   EVT MemVT = N->getMemoryVT();
11259   assert(MemVT.getSimpleVT() == MVT::i128 &&
11260          "Expect quadword atomic operations");
11261   SDLoc dl(N);
11262   unsigned Opc = N->getOpcode();
11263   switch (Opc) {
11264   case ISD::ATOMIC_LOAD: {
11265     // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11266     // lowered to ppc instructions by pattern matching instruction selector.
11267     SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11268     SmallVector<SDValue, 4> Ops{
11269         N->getOperand(0),
11270         DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11271     for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11272       Ops.push_back(N->getOperand(I));
11273     SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11274                                                 Ops, MemVT, N->getMemOperand());
11275     SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11276     SDValue ValHi =
11277         DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11278     ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11279                         DAG.getConstant(64, dl, MVT::i32));
11280     SDValue Val =
11281         DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11282     return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11283                        {Val, LoadedVal.getValue(2)});
11284   }
11285   case ISD::ATOMIC_STORE: {
11286     // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11287     // lowered to ppc instructions by pattern matching instruction selector.
11288     SDVTList Tys = DAG.getVTList(MVT::Other);
11289     SmallVector<SDValue, 4> Ops{
11290         N->getOperand(0),
11291         DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11292     SDValue Val = N->getOperand(1);
11293     SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11294     SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11295                                 DAG.getConstant(64, dl, MVT::i32));
11296     ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11297     Ops.push_back(ValLo);
11298     Ops.push_back(ValHi);
11299     Ops.push_back(N->getOperand(2));
11300     return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11301                                    N->getMemOperand());
11302   }
11303   default:
11304     llvm_unreachable("Unexpected atomic opcode");
11305   }
11306 }
11307
11308 static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl,
11309                                 SelectionDAG &DAG,
11310                                 const PPCSubtarget &Subtarget) {
11311   assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11312
11313   enum DataClassMask {
11314     DC_NAN = 1 << 6,
11315     DC_NEG_INF = 1 << 4,
11316     DC_POS_INF = 1 << 5,
11317     DC_NEG_ZERO = 1 << 2,
11318     DC_POS_ZERO = 1 << 3,
11319     DC_NEG_SUBNORM = 1,
11320     DC_POS_SUBNORM = 1 << 1,
11321   };
11322
11323   EVT VT = Op.getValueType();
11324
11325   unsigned TestOp = VT == MVT::f128  ? PPC::XSTSTDCQP
11326                     : VT == MVT::f64 ? PPC::XSTSTDCDP
11327                                      : PPC::XSTSTDCSP;
11328
11329   if (Mask == fcAllFlags)
11330     return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11331   if (Mask == 0)
11332     return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11333
11334   // When it's cheaper or necessary to test reverse flags.
11335   if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11336     SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11337     return DAG.getNOT(Dl, Rev, MVT::i1);
11338   }
11339
11340   // Power doesn't support testing whether a value is 'normal'. Test the rest
11341   // first, and test if it's 'not not-normal' with expected sign.
11342   if (Mask & fcNormal) {
11343     SDValue Rev(DAG.getMachineNode(
11344                     TestOp, Dl, MVT::i32,
11345                     DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11346                                               DC_NEG_ZERO | DC_POS_ZERO |
11347                                               DC_NEG_SUBNORM | DC_POS_SUBNORM,
11348                                           Dl, MVT::i32),
11349                     Op),
11350                 0);
11351     // Sign are stored in CR bit 0, result are in CR bit 2.
11352     SDValue Sign(
11353         DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11354                            DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11355         0);
11356     SDValue Normal(DAG.getNOT(
11357         Dl,
11358         SDValue(DAG.getMachineNode(
11359                     TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11360                     DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11361                 0),
11362         MVT::i1));
11363     if (Mask & fcPosNormal)
11364       Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11365     SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11366     if (Mask == fcPosNormal || Mask == fcNegNormal)
11367       return Result;
11368
11369     return DAG.getNode(
11370         ISD::OR, Dl, MVT::i1,
11371         getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11372   }
11373
11374   // The instruction doesn't differentiate between signaling or quiet NaN. Test
11375   // the rest first, and test if it 'is NaN and is signaling/quiet'.
11376   if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11377     bool IsQuiet = Mask & fcQNan;
11378     SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11379
11380     // Quietness is determined by the first bit in fraction field.
11381     uint64_t QuietMask = 0;
11382     SDValue HighWord;
11383     if (VT == MVT::f128) {
11384       HighWord = DAG.getNode(
11385           ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11386           DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11387       QuietMask = 0x8000;
11388     } else if (VT == MVT::f64) {
11389       if (Subtarget.isPPC64()) {
11390         HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11391                                DAG.getBitcast(MVT::i64, Op),
11392                                DAG.getConstant(1, Dl, MVT::i32));
11393       } else {
11394         SDValue Vec = DAG.getBitcast(
11395             MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11396         HighWord = DAG.getNode(
11397             ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11398             DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11399       }
11400       QuietMask = 0x80000;
11401     } else if (VT == MVT::f32) {
11402       HighWord = DAG.getBitcast(MVT::i32, Op);
11403       QuietMask = 0x400000;
11404     }
11405     SDValue NanRes = DAG.getSetCC(
11406         Dl, MVT::i1,
11407         DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11408                     DAG.getConstant(QuietMask, Dl, MVT::i32)),
11409         DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11410     NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11411     if (Mask == fcQNan || Mask == fcSNan)
11412       return NanRes;
11413
11414     return DAG.getNode(ISD::OR, Dl, MVT::i1,
11415                        getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11416                        NanRes);
11417   }
11418
11419   unsigned NativeMask = 0;
11420   if ((Mask & fcNan) == fcNan)
11421     NativeMask |= DC_NAN;
11422   if (Mask & fcNegInf)
11423     NativeMask |= DC_NEG_INF;
11424   if (Mask & fcPosInf)
11425     NativeMask |= DC_POS_INF;
11426   if (Mask & fcNegZero)
11427     NativeMask |= DC_NEG_ZERO;
11428   if (Mask & fcPosZero)
11429     NativeMask |= DC_POS_ZERO;
11430   if (Mask & fcNegSubnormal)
11431     NativeMask |= DC_NEG_SUBNORM;
11432   if (Mask & fcPosSubnormal)
11433     NativeMask |= DC_POS_SUBNORM;
11434   return SDValue(
11435       DAG.getMachineNode(
11436           TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11437           SDValue(DAG.getMachineNode(
11438                       TestOp, Dl, MVT::i32,
11439                       DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11440                   0),
11441           DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11442       0);
11443 }
11444
11445 SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11446                                            SelectionDAG &DAG) const {
11447   assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11448   SDValue LHS = Op.getOperand(0);
11449   uint64_t RHSC = Op.getConstantOperandVal(1);
11450   SDLoc Dl(Op);
11451   FPClassTest Category = static_cast<FPClassTest>(RHSC);
11452   return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11453 }
11454
11455 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11456                                                  SelectionDAG &DAG) const {
11457   SDLoc dl(Op);
11458   // Create a stack slot that is 16-byte aligned.
11459   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
11460   int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11461   EVT PtrVT = getPointerTy(DAG.getDataLayout());
11462   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11463
11464   // Store the input value into Value#0 of the stack slot.
11465   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
11466                                MachinePointerInfo());
11467   // Load it out.
11468   return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
11469 }
11470
11471 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11472                                                   SelectionDAG &DAG) const {
11473   assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11474          "Should only be called for ISD::INSERT_VECTOR_ELT");
11475
11476   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11477
11478   EVT VT = Op.getValueType();
11479   SDLoc dl(Op);
11480   SDValue V1 = Op.getOperand(0);
11481   SDValue V2 = Op.getOperand(1);
11482
11483   if (VT == MVT::v2f64 && C)
11484     return Op;
11485
11486   if (Subtarget.hasP9Vector()) {
11487     // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11488     // because on P10, it allows this specific insert_vector_elt load pattern to
11489     // utilize the refactored load and store infrastructure in order to exploit
11490     // prefixed loads.
11491     // On targets with inexpensive direct moves (Power9 and up), a
11492     // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11493     // load since a single precision load will involve conversion to double
11494     // precision on the load followed by another conversion to single precision.
11495     if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11496         (isa<LoadSDNode>(V2))) {
11497       SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
11498       SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
11499       SDValue InsVecElt =
11500           DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
11501                       BitcastLoad, Op.getOperand(2));
11502       return DAG.getBitcast(MVT::v4f32, InsVecElt);
11503     }
11504   }
11505
11506   if (Subtarget.isISA3_1()) {
11507     if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
11508       return SDValue();
11509     // On P10, we have legal lowering for constant and variable indices for
11510     // all vectors.
11511     if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
11512         VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
11513       return Op;
11514   }
11515
11516   // Before P10, we have legal lowering for constant indices but not for
11517   // variable ones.
11518   if (!C)
11519     return SDValue();
11520
11521   // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
11522   if (VT == MVT::v8i16 || VT == MVT::v16i8) {
11523     SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
11524     unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
11525     unsigned InsertAtElement = C->getZExtValue();
11526     unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
11527     if (Subtarget.isLittleEndian()) {
11528       InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
11529     }
11530     return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
11531                        DAG.getConstant(InsertAtByte, dl, MVT::i32));
11532   }
11533   return Op;
11534 }
11535
11536 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
11537                                            SelectionDAG &DAG) const {
11538   SDLoc dl(Op);
11539   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
11540   SDValue LoadChain = LN->getChain();
11541   SDValue BasePtr = LN->getBasePtr();
11542   EVT VT = Op.getValueType();
11543
11544   if (VT != MVT::v256i1 && VT != MVT::v512i1)
11545     return Op;
11546
11547   // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11548   // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
11549   // 2 or 4 vsx registers.
11550   assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
11551          "Type unsupported without MMA");
11552   assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11553          "Type unsupported without paired vector support");
11554   Align Alignment = LN->getAlign();
11555   SmallVector<SDValue, 4> Loads;
11556   SmallVector<SDValue, 4> LoadChains;
11557   unsigned NumVecs = VT.getSizeInBits() / 128;
11558   for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11559     SDValue Load =
11560         DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
11561                     LN->getPointerInfo().getWithOffset(Idx * 16),
11562                     commonAlignment(Alignment, Idx * 16),
11563                     LN->getMemOperand()->getFlags(), LN->getAAInfo());
11564     BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11565                           DAG.getConstant(16, dl, BasePtr.getValueType()));
11566     Loads.push_back(Load);
11567     LoadChains.push_back(Load.getValue(1));
11568   }
11569   if (Subtarget.isLittleEndian()) {
11570     std::reverse(Loads.begin(), Loads.end());
11571     std::reverse(LoadChains.begin(), LoadChains.end());
11572   }
11573   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11574   SDValue Value =
11575       DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
11576                   dl, VT, Loads);
11577   SDValue RetOps[] = {Value, TF};
11578   return DAG.getMergeValues(RetOps, dl);
11579 }
11580
11581 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
11582                                             SelectionDAG &DAG) const {
11583   SDLoc dl(Op);
11584   StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
11585   SDValue StoreChain = SN->getChain();
11586   SDValue BasePtr = SN->getBasePtr();
11587   SDValue Value = SN->getValue();
11588   SDValue Value2 = SN->getValue();
11589   EVT StoreVT = Value.getValueType();
11590
11591   if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
11592     return Op;
11593
11594   // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11595   // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
11596   // underlying registers individually.
11597   assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
11598          "Type unsupported without MMA");
11599   assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11600          "Type unsupported without paired vector support");
11601   Align Alignment = SN->getAlign();
11602   SmallVector<SDValue, 4> Stores;
11603   unsigned NumVecs = 2;
11604   if (StoreVT == MVT::v512i1) {
11605     if (Subtarget.isISAFuture()) {
11606       EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11607       MachineSDNode *ExtNode = DAG.getMachineNode(
11608           PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
11609
11610       Value = SDValue(ExtNode, 0);
11611       Value2 = SDValue(ExtNode, 1);
11612     } else
11613       Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
11614     NumVecs = 4;
11615   }
11616   for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11617     unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
11618     SDValue Elt;
11619     if (Subtarget.isISAFuture()) {
11620       VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
11621       Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11622                         Idx > 1 ? Value2 : Value,
11623                         DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11624     } else
11625       Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
11626                         DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11627
11628     SDValue Store =
11629         DAG.getStore(StoreChain, dl, Elt, BasePtr,
11630                      SN->getPointerInfo().getWithOffset(Idx * 16),
11631                      commonAlignment(Alignment, Idx * 16),
11632                      SN->getMemOperand()->getFlags(), SN->getAAInfo());
11633     BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11634                           DAG.getConstant(16, dl, BasePtr.getValueType()));
11635     Stores.push_back(Store);
11636   }
11637   SDValue TF = DAG.getTokenFactor(dl, Stores);
11638   return TF;
11639 }
11640
11641 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
11642   SDLoc dl(Op);
11643   if (Op.getValueType() == MVT::v4i32) {
11644     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11645
11646     SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
11647     // +16 as shift amt.
11648     SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
11649     SDValue RHSSwap =   // = vrlw RHS, 16
11650       BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
11651
11652     // Shrinkify inputs to v8i16.
11653     LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
11654     RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
11655     RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
11656
11657     // Low parts multiplied together, generating 32-bit results (we ignore the
11658     // top parts).
11659     SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
11660                                         LHS, RHS, DAG, dl, MVT::v4i32);
11661
11662     SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
11663                                       LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
11664     // Shift the high parts up 16 bits.
11665     HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
11666                               Neg16, DAG, dl);
11667     return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
11668   } else if (Op.getValueType() == MVT::v16i8) {
11669     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11670     bool isLittleEndian = Subtarget.isLittleEndian();
11671
11672     // Multiply the even 8-bit parts, producing 16-bit sums.
11673     SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
11674                                            LHS, RHS, DAG, dl, MVT::v8i16);
11675     EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
11676
11677     // Multiply the odd 8-bit parts, producing 16-bit sums.
11678     SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
11679                                           LHS, RHS, DAG, dl, MVT::v8i16);
11680     OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
11681
11682     // Merge the results together.  Because vmuleub and vmuloub are
11683     // instructions with a big-endian bias, we must reverse the
11684     // element numbering and reverse the meaning of "odd" and "even"
11685     // when generating little endian code.
11686     int Ops[16];
11687     for (unsigned i = 0; i != 8; ++i) {
11688       if (isLittleEndian) {
11689         Ops[i*2  ] = 2*i;
11690         Ops[i*2+1] = 2*i+16;
11691       } else {
11692         Ops[i*2  ] = 2*i+1;
11693         Ops[i*2+1] = 2*i+1+16;
11694       }
11695     }
11696     if (isLittleEndian)
11697       return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
11698     else
11699       return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
11700   } else {
11701     llvm_unreachable("Unknown mul to lower!");
11702   }
11703 }
11704
11705 SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
11706   bool IsStrict = Op->isStrictFPOpcode();
11707   if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
11708       !Subtarget.hasP9Vector())
11709     return SDValue();
11710
11711   return Op;
11712 }
11713
11714 // Custom lowering for fpext vf32 to v2f64
11715 SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
11716
11717   assert(Op.getOpcode() == ISD::FP_EXTEND &&
11718          "Should only be called for ISD::FP_EXTEND");
11719
11720   // FIXME: handle extends from half precision float vectors on P9.
11721   // We only want to custom lower an extend from v2f32 to v2f64.
11722   if (Op.getValueType() != MVT::v2f64 ||
11723       Op.getOperand(0).getValueType() != MVT::v2f32)
11724     return SDValue();
11725
11726   SDLoc dl(Op);
11727   SDValue Op0 = Op.getOperand(0);
11728
11729   switch (Op0.getOpcode()) {
11730   default:
11731     return SDValue();
11732   case ISD::EXTRACT_SUBVECTOR: {
11733     assert(Op0.getNumOperands() == 2 &&
11734            isa<ConstantSDNode>(Op0->getOperand(1)) &&
11735            "Node should have 2 operands with second one being a constant!");
11736
11737     if (Op0.getOperand(0).getValueType() != MVT::v4f32)
11738       return SDValue();
11739
11740     // Custom lower is only done for high or low doubleword.
11741     int Idx = Op0.getConstantOperandVal(1);
11742     if (Idx % 2 != 0)
11743       return SDValue();
11744
11745     // Since input is v4f32, at this point Idx is either 0 or 2.
11746     // Shift to get the doubleword position we want.
11747     int DWord = Idx >> 1;
11748
11749     // High and low word positions are different on little endian.
11750     if (Subtarget.isLittleEndian())
11751       DWord ^= 0x1;
11752
11753     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
11754                        Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
11755   }
11756   case ISD::FADD:
11757   case ISD::FMUL:
11758   case ISD::FSUB: {
11759     SDValue NewLoad[2];
11760     for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
11761       // Ensure both input are loads.
11762       SDValue LdOp = Op0.getOperand(i);
11763       if (LdOp.getOpcode() != ISD::LOAD)
11764         return SDValue();
11765       // Generate new load node.
11766       LoadSDNode *LD = cast<LoadSDNode>(LdOp);
11767       SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11768       NewLoad[i] = DAG.getMemIntrinsicNode(
11769           PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11770           LD->getMemoryVT(), LD->getMemOperand());
11771     }
11772     SDValue NewOp =
11773         DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
11774                     NewLoad[1], Op0.getNode()->getFlags());
11775     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
11776                        DAG.getConstant(0, dl, MVT::i32));
11777   }
11778   case ISD::LOAD: {
11779     LoadSDNode *LD = cast<LoadSDNode>(Op0);
11780     SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11781     SDValue NewLd = DAG.getMemIntrinsicNode(
11782         PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11783         LD->getMemoryVT(), LD->getMemOperand());
11784     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
11785                        DAG.getConstant(0, dl, MVT::i32));
11786   }
11787   }
11788   llvm_unreachable("ERROR:Should return for all cases within swtich.");
11789 }
11790
11791 /// LowerOperation - Provide custom lowering hooks for some operations.
11792 ///
11793 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
11794   switch (Op.getOpcode()) {
11795   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
11796   case ISD::FPOW:               return lowerPow(Op, DAG);
11797   case ISD::FSIN:               return lowerSin(Op, DAG);
11798   case ISD::FCOS:               return lowerCos(Op, DAG);
11799   case ISD::FLOG:               return lowerLog(Op, DAG);
11800   case ISD::FLOG10:             return lowerLog10(Op, DAG);
11801   case ISD::FEXP:               return lowerExp(Op, DAG);
11802   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
11803   case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
11804   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
11805   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
11806   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
11807   case ISD::STRICT_FSETCC:
11808   case ISD::STRICT_FSETCCS:
11809   case ISD::SETCC:              return LowerSETCC(Op, DAG);
11810   case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
11811   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
11812
11813   case ISD::INLINEASM:
11814   case ISD::INLINEASM_BR:       return LowerINLINEASM(Op, DAG);
11815   // Variable argument lowering.
11816   case ISD::VASTART:            return LowerVASTART(Op, DAG);
11817   case ISD::VAARG:              return LowerVAARG(Op, DAG);
11818   case ISD::VACOPY:             return LowerVACOPY(Op, DAG);
11819
11820   case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG);
11821   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
11822   case ISD::GET_DYNAMIC_AREA_OFFSET:
11823     return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
11824
11825   // Exception handling lowering.
11826   case ISD::EH_DWARF_CFA:       return LowerEH_DWARF_CFA(Op, DAG);
11827   case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
11828   case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
11829
11830   case ISD::LOAD:               return LowerLOAD(Op, DAG);
11831   case ISD::STORE:              return LowerSTORE(Op, DAG);
11832   case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
11833   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
11834   case ISD::STRICT_FP_TO_UINT:
11835   case ISD::STRICT_FP_TO_SINT:
11836   case ISD::FP_TO_UINT:
11837   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
11838   case ISD::STRICT_UINT_TO_FP:
11839   case ISD::STRICT_SINT_TO_FP:
11840   case ISD::UINT_TO_FP:
11841   case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
11842   case ISD::GET_ROUNDING:       return LowerGET_ROUNDING(Op, DAG);
11843
11844   // Lower 64-bit shifts.
11845   case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
11846   case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
11847   case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
11848
11849   case ISD::FSHL:               return LowerFunnelShift(Op, DAG);
11850   case ISD::FSHR:               return LowerFunnelShift(Op, DAG);
11851
11852   // Vector-related lowering.
11853   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
11854   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
11855   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
11856   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
11857   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
11858   case ISD::MUL:                return LowerMUL(Op, DAG);
11859   case ISD::FP_EXTEND:          return LowerFP_EXTEND(Op, DAG);
11860   case ISD::STRICT_FP_ROUND:
11861   case ISD::FP_ROUND:
11862     return LowerFP_ROUND(Op, DAG);
11863   case ISD::ROTL:               return LowerROTL(Op, DAG);
11864
11865   // For counter-based loop handling.
11866   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
11867
11868   case ISD::BITCAST:            return LowerBITCAST(Op, DAG);
11869
11870   // Frame & Return address.
11871   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
11872   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
11873
11874   case ISD::INTRINSIC_VOID:
11875     return LowerINTRINSIC_VOID(Op, DAG);
11876   case ISD::BSWAP:
11877     return LowerBSWAP(Op, DAG);
11878   case ISD::ATOMIC_CMP_SWAP:
11879     return LowerATOMIC_CMP_SWAP(Op, DAG);
11880   case ISD::ATOMIC_STORE:
11881     return LowerATOMIC_LOAD_STORE(Op, DAG);
11882   case ISD::IS_FPCLASS:
11883     return LowerIS_FPCLASS(Op, DAG);
11884   }
11885 }
11886
11887 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
11888                                            SmallVectorImpl<SDValue>&Results,
11889                                            SelectionDAG &DAG) const {
11890   SDLoc dl(N);
11891   switch (N->getOpcode()) {
11892   default:
11893     llvm_unreachable("Do not know how to custom type legalize this operation!");
11894   case ISD::ATOMIC_LOAD: {
11895     SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
11896     Results.push_back(Res);
11897     Results.push_back(Res.getValue(1));
11898     break;
11899   }
11900   case ISD::READCYCLECOUNTER: {
11901     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11902     SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
11903
11904     Results.push_back(
11905         DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
11906     Results.push_back(RTB.getValue(2));
11907     break;
11908   }
11909   case ISD::INTRINSIC_W_CHAIN: {
11910     if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
11911       break;
11912
11913     assert(N->getValueType(0) == MVT::i1 &&
11914            "Unexpected result type for CTR decrement intrinsic");
11915     EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11916                                  N->getValueType(0));
11917     SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
11918     SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
11919                                  N->getOperand(1));
11920
11921     Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
11922     Results.push_back(NewInt.getValue(1));
11923     break;
11924   }
11925   case ISD::INTRINSIC_WO_CHAIN: {
11926     switch (N->getConstantOperandVal(0)) {
11927     case Intrinsic::ppc_pack_longdouble:
11928       Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
11929                                     N->getOperand(2), N->getOperand(1)));
11930       break;
11931     case Intrinsic::ppc_maxfe:
11932     case Intrinsic::ppc_minfe:
11933     case Intrinsic::ppc_fnmsub:
11934     case Intrinsic::ppc_convert_f128_to_ppcf128:
11935       Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
11936       break;
11937     }
11938     break;
11939   }
11940   case ISD::VAARG: {
11941     if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
11942       return;
11943
11944     EVT VT = N->getValueType(0);
11945
11946     if (VT == MVT::i64) {
11947       SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
11948
11949       Results.push_back(NewNode);
11950       Results.push_back(NewNode.getValue(1));
11951     }
11952     return;
11953   }
11954   case ISD::STRICT_FP_TO_SINT:
11955   case ISD::STRICT_FP_TO_UINT:
11956   case ISD::FP_TO_SINT:
11957   case ISD::FP_TO_UINT: {
11958     // LowerFP_TO_INT() can only handle f32 and f64.
11959     if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
11960         MVT::ppcf128)
11961       return;
11962     SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
11963     Results.push_back(LoweredValue);
11964     if (N->isStrictFPOpcode())
11965       Results.push_back(LoweredValue.getValue(1));
11966     return;
11967   }
11968   case ISD::TRUNCATE: {
11969     if (!N->getValueType(0).isVector())
11970       return;
11971     SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
11972     if (Lowered)
11973       Results.push_back(Lowered);
11974     return;
11975   }
11976   case ISD::FSHL:
11977   case ISD::FSHR:
11978     // Don't handle funnel shifts here.
11979     return;
11980   case ISD::BITCAST:
11981     // Don't handle bitcast here.
11982     return;
11983   case ISD::FP_EXTEND:
11984     SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
11985     if (Lowered)
11986       Results.push_back(Lowered);
11987     return;
11988   }
11989 }
11990
11991 //===----------------------------------------------------------------------===//
11992 //  Other Lowering Code
11993 //===----------------------------------------------------------------------===//
11994
11995 static Instruction *callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id) {
11996   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11997   Function *Func = Intrinsic::getDeclaration(M, Id);
11998   return Builder.CreateCall(Func, {});
11999 }
12000
12001 // The mappings for emitLeading/TrailingFence is taken from
12002 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
12003 Instruction *PPCTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
12004                                                  Instruction *Inst,
12005                                                  AtomicOrdering Ord) const {
12006   if (Ord == AtomicOrdering::SequentiallyConsistent)
12007     return callIntrinsic(Builder, Intrinsic::ppc_sync);
12008   if (isReleaseOrStronger(Ord))
12009     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12010   return nullptr;
12011 }
12012
12013 Instruction *PPCTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
12014                                                   Instruction *Inst,
12015                                                   AtomicOrdering Ord) const {
12016   if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
12017     // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
12018     // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
12019     // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
12020     if (isa<LoadInst>(Inst))
12021       return Builder.CreateCall(
12022           Intrinsic::getDeclaration(
12023               Builder.GetInsertBlock()->getParent()->getParent(),
12024               Intrinsic::ppc_cfence, {Inst->getType()}),
12025           {Inst});
12026     // FIXME: Can use isync for rmw operation.
12027     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12028   }
12029   return nullptr;
12030 }
12031
12032 MachineBasicBlock *
12033 PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
12034                                     unsigned AtomicSize,
12035                                     unsigned BinOpcode,
12036                                     unsigned CmpOpcode,
12037                                     unsigned CmpPred) const {
12038   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12039   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12040
12041   auto LoadMnemonic = PPC::LDARX;
12042   auto StoreMnemonic = PPC::STDCX;
12043   switch (AtomicSize) {
12044   default:
12045     llvm_unreachable("Unexpected size of atomic entity");
12046   case 1:
12047     LoadMnemonic = PPC::LBARX;
12048     StoreMnemonic = PPC::STBCX;
12049     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12050     break;
12051   case 2:
12052     LoadMnemonic = PPC::LHARX;
12053     StoreMnemonic = PPC::STHCX;
12054     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12055     break;
12056   case 4:
12057     LoadMnemonic = PPC::LWARX;
12058     StoreMnemonic = PPC::STWCX;
12059     break;
12060   case 8:
12061     LoadMnemonic = PPC::LDARX;
12062     StoreMnemonic = PPC::STDCX;
12063     break;
12064   }
12065
12066   const BasicBlock *LLVM_BB = BB->getBasicBlock();
12067   MachineFunction *F = BB->getParent();
12068   MachineFunction::iterator It = ++BB->getIterator();
12069
12070   Register dest = MI.getOperand(0).getReg();
12071   Register ptrA = MI.getOperand(1).getReg();
12072   Register ptrB = MI.getOperand(2).getReg();
12073   Register incr = MI.getOperand(3).getReg();
12074   DebugLoc dl = MI.getDebugLoc();
12075
12076   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12077   MachineBasicBlock *loop2MBB =
12078     CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12079   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12080   F->insert(It, loopMBB);
12081   if (CmpOpcode)
12082     F->insert(It, loop2MBB);
12083   F->insert(It, exitMBB);
12084   exitMBB->splice(exitMBB->begin(), BB,
12085                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
12086   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12087
12088   MachineRegisterInfo &RegInfo = F->getRegInfo();
12089   Register TmpReg = (!BinOpcode) ? incr :
12090     RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
12091                                            : &PPC::GPRCRegClass);
12092
12093   //  thisMBB:
12094   //   ...
12095   //   fallthrough --> loopMBB
12096   BB->addSuccessor(loopMBB);
12097
12098   //  loopMBB:
12099   //   l[wd]arx dest, ptr
12100   //   add r0, dest, incr
12101   //   st[wd]cx. r0, ptr
12102   //   bne- loopMBB
12103   //   fallthrough --> exitMBB
12104
12105   // For max/min...
12106   //  loopMBB:
12107   //   l[wd]arx dest, ptr
12108   //   cmpl?[wd] dest, incr
12109   //   bgt exitMBB
12110   //  loop2MBB:
12111   //   st[wd]cx. dest, ptr
12112   //   bne- loopMBB
12113   //   fallthrough --> exitMBB
12114
12115   BB = loopMBB;
12116   BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
12117     .addReg(ptrA).addReg(ptrB);
12118   if (BinOpcode)
12119     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
12120   if (CmpOpcode) {
12121     Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12122     // Signed comparisons of byte or halfword values must be sign-extended.
12123     if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
12124       Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12125       BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
12126               ExtReg).addReg(dest);
12127       BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
12128     } else
12129       BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
12130
12131     BuildMI(BB, dl, TII->get(PPC::BCC))
12132         .addImm(CmpPred)
12133         .addReg(CrReg)
12134         .addMBB(exitMBB);
12135     BB->addSuccessor(loop2MBB);
12136     BB->addSuccessor(exitMBB);
12137     BB = loop2MBB;
12138   }
12139   BuildMI(BB, dl, TII->get(StoreMnemonic))
12140     .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
12141   BuildMI(BB, dl, TII->get(PPC::BCC))
12142     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
12143   BB->addSuccessor(loopMBB);
12144   BB->addSuccessor(exitMBB);
12145
12146   //  exitMBB:
12147   //   ...
12148   BB = exitMBB;
12149   return BB;
12150 }
12151
12152 static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII) {
12153   switch(MI.getOpcode()) {
12154   default:
12155     return false;
12156   case PPC::COPY:
12157     return TII->isSignExtended(MI.getOperand(1).getReg(),
12158                                &MI.getMF()->getRegInfo());
12159   case PPC::LHA:
12160   case PPC::LHA8:
12161   case PPC::LHAU:
12162   case PPC::LHAU8:
12163   case PPC::LHAUX:
12164   case PPC::LHAUX8:
12165   case PPC::LHAX:
12166   case PPC::LHAX8:
12167   case PPC::LWA:
12168   case PPC::LWAUX:
12169   case PPC::LWAX:
12170   case PPC::LWAX_32:
12171   case PPC::LWA_32:
12172   case PPC::PLHA:
12173   case PPC::PLHA8:
12174   case PPC::PLHA8pc:
12175   case PPC::PLHApc:
12176   case PPC::PLWA:
12177   case PPC::PLWA8:
12178   case PPC::PLWA8pc:
12179   case PPC::PLWApc:
12180   case PPC::EXTSB:
12181   case PPC::EXTSB8:
12182   case PPC::EXTSB8_32_64:
12183   case PPC::EXTSB8_rec:
12184   case PPC::EXTSB_rec:
12185   case PPC::EXTSH:
12186   case PPC::EXTSH8:
12187   case PPC::EXTSH8_32_64:
12188   case PPC::EXTSH8_rec:
12189   case PPC::EXTSH_rec:
12190   case PPC::EXTSW:
12191   case PPC::EXTSWSLI:
12192   case PPC::EXTSWSLI_32_64:
12193   case PPC::EXTSWSLI_32_64_rec:
12194   case PPC::EXTSWSLI_rec:
12195   case PPC::EXTSW_32:
12196   case PPC::EXTSW_32_64:
12197   case PPC::EXTSW_32_64_rec:
12198   case PPC::EXTSW_rec:
12199   case PPC::SRAW:
12200   case PPC::SRAWI:
12201   case PPC::SRAWI_rec:
12202   case PPC::SRAW_rec:
12203     return true;
12204   }
12205   return false;
12206 }
12207
12208 MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
12209     MachineInstr &MI, MachineBasicBlock *BB,
12210     bool is8bit, // operation
12211     unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
12212   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12213   const PPCInstrInfo *TII = Subtarget.getInstrInfo();
12214
12215   // If this is a signed comparison and the value being compared is not known
12216   // to be sign extended, sign extend it here.
12217   DebugLoc dl = MI.getDebugLoc();
12218   MachineFunction *F = BB->getParent();
12219   MachineRegisterInfo &RegInfo = F->getRegInfo();
12220   Register incr = MI.getOperand(3).getReg();
12221   bool IsSignExtended =
12222       incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
12223
12224   if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
12225     Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12226     BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
12227         .addReg(MI.getOperand(3).getReg());
12228     MI.getOperand(3).setReg(ValueReg);
12229     incr = ValueReg;
12230   }
12231   // If we support part-word atomic mnemonics, just use them
12232   if (Subtarget.hasPartwordAtomics())
12233     return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
12234                             CmpPred);
12235
12236   // In 64 bit mode we have to use 64 bits for addresses, even though the
12237   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
12238   // registers without caring whether they're 32 or 64, but here we're
12239   // doing actual arithmetic on the addresses.
12240   bool is64bit = Subtarget.isPPC64();
12241   bool isLittleEndian = Subtarget.isLittleEndian();
12242   unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12243
12244   const BasicBlock *LLVM_BB = BB->getBasicBlock();
12245   MachineFunction::iterator It = ++BB->getIterator();
12246
12247   Register dest = MI.getOperand(0).getReg();
12248   Register ptrA = MI.getOperand(1).getReg();
12249   Register ptrB = MI.getOperand(2).getReg();
12250
12251   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12252   MachineBasicBlock *loop2MBB =
12253       CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12254   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12255   F->insert(It, loopMBB);
12256   if (CmpOpcode)
12257     F->insert(It, loop2MBB);
12258   F->insert(It, exitMBB);
12259   exitMBB->splice(exitMBB->begin(), BB,
12260                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
12261   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12262
12263   const TargetRegisterClass *RC =
12264       is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12265   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12266
12267   Register PtrReg = RegInfo.createVirtualRegister(RC);
12268   Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12269   Register ShiftReg =
12270       isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12271   Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
12272   Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12273   Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12274   Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12275   Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12276   Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
12277   Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12278   Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12279   Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
12280   Register Ptr1Reg;
12281   Register TmpReg =
12282       (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
12283
12284   //  thisMBB:
12285   //   ...
12286   //   fallthrough --> loopMBB
12287   BB->addSuccessor(loopMBB);
12288
12289   // The 4-byte load must be aligned, while a char or short may be
12290   // anywhere in the word.  Hence all this nasty bookkeeping code.
12291   //   add ptr1, ptrA, ptrB [copy if ptrA==0]
12292   //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12293   //   xori shift, shift1, 24 [16]
12294   //   rlwinm ptr, ptr1, 0, 0, 29
12295   //   slw incr2, incr, shift
12296   //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12297   //   slw mask, mask2, shift
12298   //  loopMBB:
12299   //   lwarx tmpDest, ptr
12300   //   add tmp, tmpDest, incr2
12301   //   andc tmp2, tmpDest, mask
12302   //   and tmp3, tmp, mask
12303   //   or tmp4, tmp3, tmp2
12304   //   stwcx. tmp4, ptr
12305   //   bne- loopMBB
12306   //   fallthrough --> exitMBB
12307   //   srw SrwDest, tmpDest, shift
12308   //   rlwinm SrwDest, SrwDest, 0, 24 [16], 31
12309   if (ptrA != ZeroReg) {
12310     Ptr1Reg = RegInfo.createVirtualRegister(RC);
12311     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12312         .addReg(ptrA)
12313         .addReg(ptrB);
12314   } else {
12315     Ptr1Reg = ptrB;
12316   }
12317   // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12318   // mode.
12319   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12320       .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12321       .addImm(3)
12322       .addImm(27)
12323       .addImm(is8bit ? 28 : 27);
12324   if (!isLittleEndian)
12325     BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12326         .addReg(Shift1Reg)
12327         .addImm(is8bit ? 24 : 16);
12328   if (is64bit)
12329     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12330         .addReg(Ptr1Reg)
12331         .addImm(0)
12332         .addImm(61);
12333   else
12334     BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12335         .addReg(Ptr1Reg)
12336         .addImm(0)
12337         .addImm(0)
12338         .addImm(29);
12339   BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
12340   if (is8bit)
12341     BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12342   else {
12343     BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12344     BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12345         .addReg(Mask3Reg)
12346         .addImm(65535);
12347   }
12348   BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12349       .addReg(Mask2Reg)
12350       .addReg(ShiftReg);
12351
12352   BB = loopMBB;
12353   BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12354       .addReg(ZeroReg)
12355       .addReg(PtrReg);
12356   if (BinOpcode)
12357     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
12358         .addReg(Incr2Reg)
12359         .addReg(TmpDestReg);
12360   BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12361       .addReg(TmpDestReg)
12362       .addReg(MaskReg);
12363   BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
12364   if (CmpOpcode) {
12365     // For unsigned comparisons, we can directly compare the shifted values.
12366     // For signed comparisons we shift and sign extend.
12367     Register SReg = RegInfo.createVirtualRegister(GPRC);
12368     Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12369     BuildMI(BB, dl, TII->get(PPC::AND), SReg)
12370         .addReg(TmpDestReg)
12371         .addReg(MaskReg);
12372     unsigned ValueReg = SReg;
12373     unsigned CmpReg = Incr2Reg;
12374     if (CmpOpcode == PPC::CMPW) {
12375       ValueReg = RegInfo.createVirtualRegister(GPRC);
12376       BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
12377           .addReg(SReg)
12378           .addReg(ShiftReg);
12379       Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
12380       BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
12381           .addReg(ValueReg);
12382       ValueReg = ValueSReg;
12383       CmpReg = incr;
12384     }
12385     BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
12386     BuildMI(BB, dl, TII->get(PPC::BCC))
12387         .addImm(CmpPred)
12388         .addReg(CrReg)
12389         .addMBB(exitMBB);
12390     BB->addSuccessor(loop2MBB);
12391     BB->addSuccessor(exitMBB);
12392     BB = loop2MBB;
12393   }
12394   BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
12395   BuildMI(BB, dl, TII->get(PPC::STWCX))
12396       .addReg(Tmp4Reg)
12397       .addReg(ZeroReg)
12398       .addReg(PtrReg);
12399   BuildMI(BB, dl, TII->get(PPC::BCC))
12400       .addImm(PPC::PRED_NE)
12401       .addReg(PPC::CR0)
12402       .addMBB(loopMBB);
12403   BB->addSuccessor(loopMBB);
12404   BB->addSuccessor(exitMBB);
12405
12406   //  exitMBB:
12407   //   ...
12408   BB = exitMBB;
12409   // Since the shift amount is not a constant, we need to clear
12410   // the upper bits with a separate RLWINM.
12411   BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
12412       .addReg(SrwDestReg)
12413       .addImm(0)
12414       .addImm(is8bit ? 24 : 16)
12415       .addImm(31);
12416   BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
12417       .addReg(TmpDestReg)
12418       .addReg(ShiftReg);
12419   return BB;
12420 }
12421
12422 llvm::MachineBasicBlock *
12423 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
12424                                     MachineBasicBlock *MBB) const {
12425   DebugLoc DL = MI.getDebugLoc();
12426   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12427   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
12428
12429   MachineFunction *MF = MBB->getParent();
12430   MachineRegisterInfo &MRI = MF->getRegInfo();
12431
12432   const BasicBlock *BB = MBB->getBasicBlock();
12433   MachineFunction::iterator I = ++MBB->getIterator();
12434
12435   Register DstReg = MI.getOperand(0).getReg();
12436   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
12437   assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
12438   Register mainDstReg = MRI.createVirtualRegister(RC);
12439   Register restoreDstReg = MRI.createVirtualRegister(RC);
12440
12441   MVT PVT = getPointerTy(MF->getDataLayout());
12442   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12443          "Invalid Pointer Size!");
12444   // For v = setjmp(buf), we generate
12445   //
12446   // thisMBB:
12447   //  SjLjSetup mainMBB
12448   //  bl mainMBB
12449   //  v_restore = 1
12450   //  b sinkMBB
12451   //
12452   // mainMBB:
12453   //  buf[LabelOffset] = LR
12454   //  v_main = 0
12455   //
12456   // sinkMBB:
12457   //  v = phi(main, restore)
12458   //
12459
12460   MachineBasicBlock *thisMBB = MBB;
12461   MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
12462   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
12463   MF->insert(I, mainMBB);
12464   MF->insert(I, sinkMBB);
12465
12466   MachineInstrBuilder MIB;
12467
12468   // Transfer the remainder of BB and its successor edges to sinkMBB.
12469   sinkMBB->splice(sinkMBB->begin(), MBB,
12470                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12471   sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
12472
12473   // Note that the structure of the jmp_buf used here is not compatible
12474   // with that used by libc, and is not designed to be. Specifically, it
12475   // stores only those 'reserved' registers that LLVM does not otherwise
12476   // understand how to spill. Also, by convention, by the time this
12477   // intrinsic is called, Clang has already stored the frame address in the
12478   // first slot of the buffer and stack address in the third. Following the
12479   // X86 target code, we'll store the jump address in the second slot. We also
12480   // need to save the TOC pointer (R2) to handle jumps between shared
12481   // libraries, and that will be stored in the fourth slot. The thread
12482   // identifier (R13) is not affected.
12483
12484   // thisMBB:
12485   const int64_t LabelOffset = 1 * PVT.getStoreSize();
12486   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
12487   const int64_t BPOffset    = 4 * PVT.getStoreSize();
12488
12489   // Prepare IP either in reg.
12490   const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
12491   Register LabelReg = MRI.createVirtualRegister(PtrRC);
12492   Register BufReg = MI.getOperand(1).getReg();
12493
12494   if (Subtarget.is64BitELFABI()) {
12495     setUsesTOCBasePtr(*MBB->getParent());
12496     MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
12497               .addReg(PPC::X2)
12498               .addImm(TOCOffset)
12499               .addReg(BufReg)
12500               .cloneMemRefs(MI);
12501   }
12502
12503   // Naked functions never have a base pointer, and so we use r1. For all
12504   // other functions, this decision must be delayed until during PEI.
12505   unsigned BaseReg;
12506   if (MF->getFunction().hasFnAttribute(Attribute::Naked))
12507     BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
12508   else
12509     BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
12510
12511   MIB = BuildMI(*thisMBB, MI, DL,
12512                 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
12513             .addReg(BaseReg)
12514             .addImm(BPOffset)
12515             .addReg(BufReg)
12516             .cloneMemRefs(MI);
12517
12518   // Setup
12519   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
12520   MIB.addRegMask(TRI->getNoPreservedMask());
12521
12522   BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
12523
12524   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
12525           .addMBB(mainMBB);
12526   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
12527
12528   thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
12529   thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
12530
12531   // mainMBB:
12532   //  mainDstReg = 0
12533   MIB =
12534       BuildMI(mainMBB, DL,
12535               TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
12536
12537   // Store IP
12538   if (Subtarget.isPPC64()) {
12539     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
12540             .addReg(LabelReg)
12541             .addImm(LabelOffset)
12542             .addReg(BufReg);
12543   } else {
12544     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
12545             .addReg(LabelReg)
12546             .addImm(LabelOffset)
12547             .addReg(BufReg);
12548   }
12549   MIB.cloneMemRefs(MI);
12550
12551   BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
12552   mainMBB->addSuccessor(sinkMBB);
12553
12554   // sinkMBB:
12555   BuildMI(*sinkMBB, sinkMBB->begin(), DL,
12556           TII->get(PPC::PHI), DstReg)
12557     .addReg(mainDstReg).addMBB(mainMBB)
12558     .addReg(restoreDstReg).addMBB(thisMBB);
12559
12560   MI.eraseFromParent();
12561   return sinkMBB;
12562 }
12563
12564 MachineBasicBlock *
12565 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
12566                                      MachineBasicBlock *MBB) const {
12567   DebugLoc DL = MI.getDebugLoc();
12568   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12569
12570   MachineFunction *MF = MBB->getParent();
12571   MachineRegisterInfo &MRI = MF->getRegInfo();
12572
12573   MVT PVT = getPointerTy(MF->getDataLayout());
12574   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12575          "Invalid Pointer Size!");
12576
12577   const TargetRegisterClass *RC =
12578     (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12579   Register Tmp = MRI.createVirtualRegister(RC);
12580   // Since FP is only updated here but NOT referenced, it's treated as GPR.
12581   unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
12582   unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
12583   unsigned BP =
12584       (PVT == MVT::i64)
12585           ? PPC::X30
12586           : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
12587                                                               : PPC::R30);
12588
12589   MachineInstrBuilder MIB;
12590
12591   const int64_t LabelOffset = 1 * PVT.getStoreSize();
12592   const int64_t SPOffset    = 2 * PVT.getStoreSize();
12593   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
12594   const int64_t BPOffset    = 4 * PVT.getStoreSize();
12595
12596   Register BufReg = MI.getOperand(0).getReg();
12597
12598   // Reload FP (the jumped-to function may not have had a
12599   // frame pointer, and if so, then its r31 will be restored
12600   // as necessary).
12601   if (PVT == MVT::i64) {
12602     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
12603             .addImm(0)
12604             .addReg(BufReg);
12605   } else {
12606     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
12607             .addImm(0)
12608             .addReg(BufReg);
12609   }
12610   MIB.cloneMemRefs(MI);
12611
12612   // Reload IP
12613   if (PVT == MVT::i64) {
12614     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
12615             .addImm(LabelOffset)
12616             .addReg(BufReg);
12617   } else {
12618     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
12619             .addImm(LabelOffset)
12620             .addReg(BufReg);
12621   }
12622   MIB.cloneMemRefs(MI);
12623
12624   // Reload SP
12625   if (PVT == MVT::i64) {
12626     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
12627             .addImm(SPOffset)
12628             .addReg(BufReg);
12629   } else {
12630     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
12631             .addImm(SPOffset)
12632             .addReg(BufReg);
12633   }
12634   MIB.cloneMemRefs(MI);
12635
12636   // Reload BP
12637   if (PVT == MVT::i64) {
12638     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
12639             .addImm(BPOffset)
12640             .addReg(BufReg);
12641   } else {
12642     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
12643             .addImm(BPOffset)
12644             .addReg(BufReg);
12645   }
12646   MIB.cloneMemRefs(MI);
12647
12648   // Reload TOC
12649   if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
12650     setUsesTOCBasePtr(*MBB->getParent());
12651     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
12652               .addImm(TOCOffset)
12653               .addReg(BufReg)
12654               .cloneMemRefs(MI);
12655   }
12656
12657   // Jump
12658   BuildMI(*MBB, MI, DL,
12659           TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
12660   BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
12661
12662   MI.eraseFromParent();
12663   return MBB;
12664 }
12665
12666 bool PPCTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const {
12667   // If the function specifically requests inline stack probes, emit them.
12668   if (MF.getFunction().hasFnAttribute("probe-stack"))
12669     return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
12670            "inline-asm";
12671   return false;
12672 }
12673
12674 unsigned PPCTargetLowering::getStackProbeSize(const MachineFunction &MF) const {
12675   const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
12676   unsigned StackAlign = TFI->getStackAlignment();
12677   assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
12678          "Unexpected stack alignment");
12679   // The default stack probe size is 4096 if the function has no
12680   // stack-probe-size attribute.
12681   const Function &Fn = MF.getFunction();
12682   unsigned StackProbeSize =
12683       Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
12684   // Round down to the stack alignment.
12685   StackProbeSize &= ~(StackAlign - 1);
12686   return StackProbeSize ? StackProbeSize : StackAlign;
12687 }
12688
12689 // Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
12690 // into three phases. In the first phase, it uses pseudo instruction
12691 // PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
12692 // FinalStackPtr. In the second phase, it generates a loop for probing blocks.
12693 // At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
12694 // MaxCallFrameSize so that it can calculate correct data area pointer.
12695 MachineBasicBlock *
12696 PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
12697                                     MachineBasicBlock *MBB) const {
12698   const bool isPPC64 = Subtarget.isPPC64();
12699   MachineFunction *MF = MBB->getParent();
12700   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12701   DebugLoc DL = MI.getDebugLoc();
12702   const unsigned ProbeSize = getStackProbeSize(*MF);
12703   const BasicBlock *ProbedBB = MBB->getBasicBlock();
12704   MachineRegisterInfo &MRI = MF->getRegInfo();
12705   // The CFG of probing stack looks as
12706   //         +-----+
12707   //         | MBB |
12708   //         +--+--+
12709   //            |
12710   //       +----v----+
12711   //  +--->+ TestMBB +---+
12712   //  |    +----+----+   |
12713   //  |         |        |
12714   //  |   +-----v----+   |
12715   //  +---+ BlockMBB |   |
12716   //      +----------+   |
12717   //                     |
12718   //       +---------+   |
12719   //       | TailMBB +<--+
12720   //       +---------+
12721   // In MBB, calculate previous frame pointer and final stack pointer.
12722   // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
12723   // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
12724   // TailMBB is spliced via \p MI.
12725   MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
12726   MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
12727   MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
12728
12729   MachineFunction::iterator MBBIter = ++MBB->getIterator();
12730   MF->insert(MBBIter, TestMBB);
12731   MF->insert(MBBIter, BlockMBB);
12732   MF->insert(MBBIter, TailMBB);
12733
12734   const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
12735   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12736
12737   Register DstReg = MI.getOperand(0).getReg();
12738   Register NegSizeReg = MI.getOperand(1).getReg();
12739   Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
12740   Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12741   Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12742   Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12743
12744   // Since value of NegSizeReg might be realigned in prologepilog, insert a
12745   // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
12746   // NegSize.
12747   unsigned ProbeOpc;
12748   if (!MRI.hasOneNonDBGUse(NegSizeReg))
12749     ProbeOpc =
12750         isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
12751   else
12752     // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
12753     // and NegSizeReg will be allocated in the same phyreg to avoid
12754     // redundant copy when NegSizeReg has only one use which is current MI and
12755     // will be replaced by PREPARE_PROBED_ALLOCA then.
12756     ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
12757                        : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
12758   BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
12759       .addDef(ActualNegSizeReg)
12760       .addReg(NegSizeReg)
12761       .add(MI.getOperand(2))
12762       .add(MI.getOperand(3));
12763
12764   // Calculate final stack pointer, which equals to SP + ActualNegSize.
12765   BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
12766           FinalStackPtr)
12767       .addReg(SPReg)
12768       .addReg(ActualNegSizeReg);
12769
12770   // Materialize a scratch register for update.
12771   int64_t NegProbeSize = -(int64_t)ProbeSize;
12772   assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
12773   Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12774   if (!isInt<16>(NegProbeSize)) {
12775     Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12776     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
12777         .addImm(NegProbeSize >> 16);
12778     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
12779             ScratchReg)
12780         .addReg(TempReg)
12781         .addImm(NegProbeSize & 0xFFFF);
12782   } else
12783     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
12784         .addImm(NegProbeSize);
12785
12786   {
12787     // Probing leading residual part.
12788     Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12789     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
12790         .addReg(ActualNegSizeReg)
12791         .addReg(ScratchReg);
12792     Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12793     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
12794         .addReg(Div)
12795         .addReg(ScratchReg);
12796     Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12797     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
12798         .addReg(Mul)
12799         .addReg(ActualNegSizeReg);
12800     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12801         .addReg(FramePointer)
12802         .addReg(SPReg)
12803         .addReg(NegMod);
12804   }
12805
12806   {
12807     // Remaining part should be multiple of ProbeSize.
12808     Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
12809     BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
12810         .addReg(SPReg)
12811         .addReg(FinalStackPtr);
12812     BuildMI(TestMBB, DL, TII->get(PPC::BCC))
12813         .addImm(PPC::PRED_EQ)
12814         .addReg(CmpResult)
12815         .addMBB(TailMBB);
12816     TestMBB->addSuccessor(BlockMBB);
12817     TestMBB->addSuccessor(TailMBB);
12818   }
12819
12820   {
12821     // Touch the block.
12822     // |P...|P...|P...
12823     BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12824         .addReg(FramePointer)
12825         .addReg(SPReg)
12826         .addReg(ScratchReg);
12827     BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
12828     BlockMBB->addSuccessor(TestMBB);
12829   }
12830
12831   // Calculation of MaxCallFrameSize is deferred to prologepilog, use
12832   // DYNAREAOFFSET pseudo instruction to get the future result.
12833   Register MaxCallFrameSizeReg =
12834       MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12835   BuildMI(TailMBB, DL,
12836           TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
12837           MaxCallFrameSizeReg)
12838       .add(MI.getOperand(2))
12839       .add(MI.getOperand(3));
12840   BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
12841       .addReg(SPReg)
12842       .addReg(MaxCallFrameSizeReg);
12843
12844   // Splice instructions after MI to TailMBB.
12845   TailMBB->splice(TailMBB->end(), MBB,
12846                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12847   TailMBB->transferSuccessorsAndUpdatePHIs(MBB);
12848   MBB->addSuccessor(TestMBB);
12849
12850   // Delete the pseudo instruction.
12851   MI.eraseFromParent();
12852
12853   ++NumDynamicAllocaProbed;
12854   return TailMBB;
12855 }
12856
12857 static bool IsSelectCC(MachineInstr &MI) {
12858   switch (MI.getOpcode()) {
12859   case PPC::SELECT_CC_I4:
12860   case PPC::SELECT_CC_I8:
12861   case PPC::SELECT_CC_F4:
12862   case PPC::SELECT_CC_F8:
12863   case PPC::SELECT_CC_F16:
12864   case PPC::SELECT_CC_VRRC:
12865   case PPC::SELECT_CC_VSFRC:
12866   case PPC::SELECT_CC_VSSRC:
12867   case PPC::SELECT_CC_VSRC:
12868   case PPC::SELECT_CC_SPE4:
12869   case PPC::SELECT_CC_SPE:
12870     return true;
12871   default:
12872     return false;
12873   }
12874 }
12875
12876 static bool IsSelect(MachineInstr &MI) {
12877   switch (MI.getOpcode()) {
12878   case PPC::SELECT_I4:
12879   case PPC::SELECT_I8:
12880   case PPC::SELECT_F4:
12881   case PPC::SELECT_F8:
12882   case PPC::SELECT_F16:
12883   case PPC::SELECT_SPE:
12884   case PPC::SELECT_SPE4:
12885   case PPC::SELECT_VRRC:
12886   case PPC::SELECT_VSFRC:
12887   case PPC::SELECT_VSSRC:
12888   case PPC::SELECT_VSRC:
12889     return true;
12890   default:
12891     return false;
12892   }
12893 }
12894
12895 MachineBasicBlock *
12896 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
12897                                                MachineBasicBlock *BB) const {
12898   if (MI.getOpcode() == TargetOpcode::STACKMAP ||
12899       MI.getOpcode() == TargetOpcode::PATCHPOINT) {
12900     if (Subtarget.is64BitELFABI() &&
12901         MI.getOpcode() == TargetOpcode::PATCHPOINT &&
12902         !Subtarget.isUsingPCRelativeCalls()) {
12903       // Call lowering should have added an r2 operand to indicate a dependence
12904       // on the TOC base pointer value. It can't however, because there is no
12905       // way to mark the dependence as implicit there, and so the stackmap code
12906       // will confuse it with a regular operand. Instead, add the dependence
12907       // here.
12908       MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
12909     }
12910
12911     return emitPatchPoint(MI, BB);
12912   }
12913
12914   if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
12915       MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
12916     return emitEHSjLjSetJmp(MI, BB);
12917   } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
12918              MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
12919     return emitEHSjLjLongJmp(MI, BB);
12920   }
12921
12922   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12923
12924   // To "insert" these instructions we actually have to insert their
12925   // control-flow patterns.
12926   const BasicBlock *LLVM_BB = BB->getBasicBlock();
12927   MachineFunction::iterator It = ++BB->getIterator();
12928
12929   MachineFunction *F = BB->getParent();
12930   MachineRegisterInfo &MRI = F->getRegInfo();
12931
12932   if (Subtarget.hasISEL() &&
12933       (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12934        MI.getOpcode() == PPC::SELECT_CC_I8 ||
12935        MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
12936     SmallVector<MachineOperand, 2> Cond;
12937     if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12938         MI.getOpcode() == PPC::SELECT_CC_I8)
12939       Cond.push_back(MI.getOperand(4));
12940     else
12941       Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
12942     Cond.push_back(MI.getOperand(1));
12943
12944     DebugLoc dl = MI.getDebugLoc();
12945     TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
12946                       MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
12947   } else if (IsSelectCC(MI) || IsSelect(MI)) {
12948     // The incoming instruction knows the destination vreg to set, the
12949     // condition code register to branch on, the true/false values to
12950     // select between, and a branch opcode to use.
12951
12952     //  thisMBB:
12953     //  ...
12954     //   TrueVal = ...
12955     //   cmpTY ccX, r1, r2
12956     //   bCC sinkMBB
12957     //   fallthrough --> copy0MBB
12958     MachineBasicBlock *thisMBB = BB;
12959     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12960     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12961     DebugLoc dl = MI.getDebugLoc();
12962     F->insert(It, copy0MBB);
12963     F->insert(It, sinkMBB);
12964
12965     // Set the call frame size on entry to the new basic blocks.
12966     // See https://reviews.llvm.org/D156113.
12967     unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
12968     copy0MBB->setCallFrameSize(CallFrameSize);
12969     sinkMBB->setCallFrameSize(CallFrameSize);
12970
12971     // Transfer the remainder of BB and its successor edges to sinkMBB.
12972     sinkMBB->splice(sinkMBB->begin(), BB,
12973                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
12974     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
12975
12976     // Next, add the true and fallthrough blocks as its successors.
12977     BB->addSuccessor(copy0MBB);
12978     BB->addSuccessor(sinkMBB);
12979
12980     if (IsSelect(MI)) {
12981       BuildMI(BB, dl, TII->get(PPC::BC))
12982           .addReg(MI.getOperand(1).getReg())
12983           .addMBB(sinkMBB);
12984     } else {
12985       unsigned SelectPred = MI.getOperand(4).getImm();
12986       BuildMI(BB, dl, TII->get(PPC::BCC))
12987           .addImm(SelectPred)
12988           .addReg(MI.getOperand(1).getReg())
12989           .addMBB(sinkMBB);
12990     }
12991
12992     //  copy0MBB:
12993     //   %FalseValue = ...
12994     //   # fallthrough to sinkMBB
12995     BB = copy0MBB;
12996
12997     // Update machine-CFG edges
12998     BB->addSuccessor(sinkMBB);
12999
13000     //  sinkMBB:
13001     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
13002     //  ...
13003     BB = sinkMBB;
13004     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
13005         .addReg(MI.getOperand(3).getReg())
13006         .addMBB(copy0MBB)
13007         .addReg(MI.getOperand(2).getReg())
13008         .addMBB(thisMBB);
13009   } else if (MI.getOpcode() == PPC::ReadTB) {
13010     // To read the 64-bit time-base register on a 32-bit target, we read the
13011     // two halves. Should the counter have wrapped while it was being read, we
13012     // need to try again.
13013     // ...
13014     // readLoop:
13015     // mfspr Rx,TBU # load from TBU
13016     // mfspr Ry,TB  # load from TB
13017     // mfspr Rz,TBU # load from TBU
13018     // cmpw crX,Rx,Rz # check if 'old'='new'
13019     // bne readLoop   # branch if they're not equal
13020     // ...
13021
13022     MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
13023     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13024     DebugLoc dl = MI.getDebugLoc();
13025     F->insert(It, readMBB);
13026     F->insert(It, sinkMBB);
13027
13028     // Transfer the remainder of BB and its successor edges to sinkMBB.
13029     sinkMBB->splice(sinkMBB->begin(), BB,
13030                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
13031     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
13032
13033     BB->addSuccessor(readMBB);
13034     BB = readMBB;
13035
13036     MachineRegisterInfo &RegInfo = F->getRegInfo();
13037     Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13038     Register LoReg = MI.getOperand(0).getReg();
13039     Register HiReg = MI.getOperand(1).getReg();
13040
13041     BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
13042     BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
13043     BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
13044
13045     Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13046
13047     BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
13048         .addReg(HiReg)
13049         .addReg(ReadAgainReg);
13050     BuildMI(BB, dl, TII->get(PPC::BCC))
13051         .addImm(PPC::PRED_NE)
13052         .addReg(CmpReg)
13053         .addMBB(readMBB);
13054
13055     BB->addSuccessor(readMBB);
13056     BB->addSuccessor(sinkMBB);
13057   } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
13058     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
13059   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
13060     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
13061   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
13062     BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
13063   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
13064     BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
13065
13066   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
13067     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
13068   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
13069     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
13070   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
13071     BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
13072   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
13073     BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
13074
13075   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
13076     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
13077   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
13078     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
13079   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
13080     BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
13081   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
13082     BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
13083
13084   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
13085     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
13086   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
13087     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
13088   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
13089     BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
13090   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
13091     BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
13092
13093   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
13094     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
13095   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
13096     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
13097   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
13098     BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
13099   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
13100     BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
13101
13102   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
13103     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
13104   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
13105     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
13106   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
13107     BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
13108   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
13109     BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
13110
13111   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
13112     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
13113   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
13114     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
13115   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
13116     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
13117   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
13118     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
13119
13120   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
13121     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
13122   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
13123     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
13124   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
13125     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
13126   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
13127     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
13128
13129   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
13130     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
13131   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
13132     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
13133   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
13134     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
13135   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
13136     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
13137
13138   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
13139     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
13140   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
13141     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
13142   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
13143     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
13144   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
13145     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
13146
13147   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
13148     BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
13149   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
13150     BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
13151   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
13152     BB = EmitAtomicBinary(MI, BB, 4, 0);
13153   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
13154     BB = EmitAtomicBinary(MI, BB, 8, 0);
13155   else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
13156            MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
13157            (Subtarget.hasPartwordAtomics() &&
13158             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
13159            (Subtarget.hasPartwordAtomics() &&
13160             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
13161     bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
13162
13163     auto LoadMnemonic = PPC::LDARX;
13164     auto StoreMnemonic = PPC::STDCX;
13165     switch (MI.getOpcode()) {
13166     default:
13167       llvm_unreachable("Compare and swap of unknown size");
13168     case PPC::ATOMIC_CMP_SWAP_I8:
13169       LoadMnemonic = PPC::LBARX;
13170       StoreMnemonic = PPC::STBCX;
13171       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13172       break;
13173     case PPC::ATOMIC_CMP_SWAP_I16:
13174       LoadMnemonic = PPC::LHARX;
13175       StoreMnemonic = PPC::STHCX;
13176       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13177       break;
13178     case PPC::ATOMIC_CMP_SWAP_I32:
13179       LoadMnemonic = PPC::LWARX;
13180       StoreMnemonic = PPC::STWCX;
13181       break;
13182     case PPC::ATOMIC_CMP_SWAP_I64:
13183       LoadMnemonic = PPC::LDARX;
13184       StoreMnemonic = PPC::STDCX;
13185       break;
13186     }
13187     MachineRegisterInfo &RegInfo = F->getRegInfo();
13188     Register dest = MI.getOperand(0).getReg();
13189     Register ptrA = MI.getOperand(1).getReg();
13190     Register ptrB = MI.getOperand(2).getReg();
13191     Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13192     Register oldval = MI.getOperand(3).getReg();
13193     Register newval = MI.getOperand(4).getReg();
13194     DebugLoc dl = MI.getDebugLoc();
13195
13196     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13197     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13198     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13199     F->insert(It, loop1MBB);
13200     F->insert(It, loop2MBB);
13201     F->insert(It, exitMBB);
13202     exitMBB->splice(exitMBB->begin(), BB,
13203                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
13204     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
13205
13206     //  thisMBB:
13207     //   ...
13208     //   fallthrough --> loopMBB
13209     BB->addSuccessor(loop1MBB);
13210
13211     // loop1MBB:
13212     //   l[bhwd]arx dest, ptr
13213     //   cmp[wd] dest, oldval
13214     //   bne- exitBB
13215     // loop2MBB:
13216     //   st[bhwd]cx. newval, ptr
13217     //   bne- loopMBB
13218     //   b exitBB
13219     // exitBB:
13220     BB = loop1MBB;
13221     BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
13222     BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
13223         .addReg(dest)
13224         .addReg(oldval);
13225     BuildMI(BB, dl, TII->get(PPC::BCC))
13226         .addImm(PPC::PRED_NE)
13227         .addReg(CrReg)
13228         .addMBB(exitMBB);
13229     BB->addSuccessor(loop2MBB);
13230     BB->addSuccessor(exitMBB);
13231
13232     BB = loop2MBB;
13233     BuildMI(BB, dl, TII->get(StoreMnemonic))
13234         .addReg(newval)
13235         .addReg(ptrA)
13236         .addReg(ptrB);
13237     BuildMI(BB, dl, TII->get(PPC::BCC))
13238         .addImm(PPC::PRED_NE)
13239         .addReg(PPC::CR0)
13240         .addMBB(loop1MBB);
13241     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13242     BB->addSuccessor(loop1MBB);
13243     BB->addSuccessor(exitMBB);
13244
13245     //  exitMBB:
13246     //   ...
13247     BB = exitMBB;
13248   } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
13249              MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
13250     // We must use 64-bit registers for addresses when targeting 64-bit,
13251     // since we're actually doing arithmetic on them.  Other registers
13252     // can be 32-bit.
13253     bool is64bit = Subtarget.isPPC64();
13254     bool isLittleEndian = Subtarget.isLittleEndian();
13255     bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
13256
13257     Register dest = MI.getOperand(0).getReg();
13258     Register ptrA = MI.getOperand(1).getReg();
13259     Register ptrB = MI.getOperand(2).getReg();
13260     Register oldval = MI.getOperand(3).getReg();
13261     Register newval = MI.getOperand(4).getReg();
13262     DebugLoc dl = MI.getDebugLoc();
13263
13264     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13265     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13266     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13267     F->insert(It, loop1MBB);
13268     F->insert(It, loop2MBB);
13269     F->insert(It, exitMBB);
13270     exitMBB->splice(exitMBB->begin(), BB,
13271                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
13272     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
13273
13274     MachineRegisterInfo &RegInfo = F->getRegInfo();
13275     const TargetRegisterClass *RC =
13276         is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13277     const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13278
13279     Register PtrReg = RegInfo.createVirtualRegister(RC);
13280     Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13281     Register ShiftReg =
13282         isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13283     Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
13284     Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
13285     Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
13286     Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
13287     Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13288     Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13289     Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13290     Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13291     Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13292     Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13293     Register Ptr1Reg;
13294     Register TmpReg = RegInfo.createVirtualRegister(GPRC);
13295     Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13296     Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13297     //  thisMBB:
13298     //   ...
13299     //   fallthrough --> loopMBB
13300     BB->addSuccessor(loop1MBB);
13301
13302     // The 4-byte load must be aligned, while a char or short may be
13303     // anywhere in the word.  Hence all this nasty bookkeeping code.
13304     //   add ptr1, ptrA, ptrB [copy if ptrA==0]
13305     //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13306     //   xori shift, shift1, 24 [16]
13307     //   rlwinm ptr, ptr1, 0, 0, 29
13308     //   slw newval2, newval, shift
13309     //   slw oldval2, oldval,shift
13310     //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13311     //   slw mask, mask2, shift
13312     //   and newval3, newval2, mask
13313     //   and oldval3, oldval2, mask
13314     // loop1MBB:
13315     //   lwarx tmpDest, ptr
13316     //   and tmp, tmpDest, mask
13317     //   cmpw tmp, oldval3
13318     //   bne- exitBB
13319     // loop2MBB:
13320     //   andc tmp2, tmpDest, mask
13321     //   or tmp4, tmp2, newval3
13322     //   stwcx. tmp4, ptr
13323     //   bne- loop1MBB
13324     //   b exitBB
13325     // exitBB:
13326     //   srw dest, tmpDest, shift
13327     if (ptrA != ZeroReg) {
13328       Ptr1Reg = RegInfo.createVirtualRegister(RC);
13329       BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13330           .addReg(ptrA)
13331           .addReg(ptrB);
13332     } else {
13333       Ptr1Reg = ptrB;
13334     }
13335
13336     // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13337     // mode.
13338     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13339         .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
13340         .addImm(3)
13341         .addImm(27)
13342         .addImm(is8bit ? 28 : 27);
13343     if (!isLittleEndian)
13344       BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13345           .addReg(Shift1Reg)
13346           .addImm(is8bit ? 24 : 16);
13347     if (is64bit)
13348       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13349           .addReg(Ptr1Reg)
13350           .addImm(0)
13351           .addImm(61);
13352     else
13353       BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13354           .addReg(Ptr1Reg)
13355           .addImm(0)
13356           .addImm(0)
13357           .addImm(29);
13358     BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
13359         .addReg(newval)
13360         .addReg(ShiftReg);
13361     BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
13362         .addReg(oldval)
13363         .addReg(ShiftReg);
13364     if (is8bit)
13365       BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13366     else {
13367       BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13368       BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13369           .addReg(Mask3Reg)
13370           .addImm(65535);
13371     }
13372     BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13373         .addReg(Mask2Reg)
13374         .addReg(ShiftReg);
13375     BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
13376         .addReg(NewVal2Reg)
13377         .addReg(MaskReg);
13378     BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
13379         .addReg(OldVal2Reg)
13380         .addReg(MaskReg);
13381
13382     BB = loop1MBB;
13383     BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13384         .addReg(ZeroReg)
13385         .addReg(PtrReg);
13386     BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
13387         .addReg(TmpDestReg)
13388         .addReg(MaskReg);
13389     BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
13390         .addReg(TmpReg)
13391         .addReg(OldVal3Reg);
13392     BuildMI(BB, dl, TII->get(PPC::BCC))
13393         .addImm(PPC::PRED_NE)
13394         .addReg(CrReg)
13395         .addMBB(exitMBB);
13396     BB->addSuccessor(loop2MBB);
13397     BB->addSuccessor(exitMBB);
13398
13399     BB = loop2MBB;
13400     BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13401         .addReg(TmpDestReg)
13402         .addReg(MaskReg);
13403     BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
13404         .addReg(Tmp2Reg)
13405         .addReg(NewVal3Reg);
13406     BuildMI(BB, dl, TII->get(PPC::STWCX))
13407         .addReg(Tmp4Reg)
13408         .addReg(ZeroReg)
13409         .addReg(PtrReg);
13410     BuildMI(BB, dl, TII->get(PPC::BCC))
13411         .addImm(PPC::PRED_NE)
13412         .addReg(PPC::CR0)
13413         .addMBB(loop1MBB);
13414     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13415     BB->addSuccessor(loop1MBB);
13416     BB->addSuccessor(exitMBB);
13417
13418     //  exitMBB:
13419     //   ...
13420     BB = exitMBB;
13421     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
13422         .addReg(TmpReg)
13423         .addReg(ShiftReg);
13424   } else if (MI.getOpcode() == PPC::FADDrtz) {
13425     // This pseudo performs an FADD with rounding mode temporarily forced
13426     // to round-to-zero.  We emit this via custom inserter since the FPSCR
13427     // is not modeled at the SelectionDAG level.
13428     Register Dest = MI.getOperand(0).getReg();
13429     Register Src1 = MI.getOperand(1).getReg();
13430     Register Src2 = MI.getOperand(2).getReg();
13431     DebugLoc dl = MI.getDebugLoc();
13432
13433     MachineRegisterInfo &RegInfo = F->getRegInfo();
13434     Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13435
13436     // Save FPSCR value.
13437     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
13438
13439     // Set rounding mode to round-to-zero.
13440     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
13441         .addImm(31)
13442         .addReg(PPC::RM, RegState::ImplicitDefine);
13443
13444     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
13445         .addImm(30)
13446         .addReg(PPC::RM, RegState::ImplicitDefine);
13447
13448     // Perform addition.
13449     auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
13450                    .addReg(Src1)
13451                    .addReg(Src2);
13452     if (MI.getFlag(MachineInstr::NoFPExcept))
13453       MIB.setMIFlag(MachineInstr::NoFPExcept);
13454
13455     // Restore FPSCR value.
13456     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
13457   } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13458              MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
13459              MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13460              MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
13461     unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13462                        MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
13463                           ? PPC::ANDI8_rec
13464                           : PPC::ANDI_rec;
13465     bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13466                  MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
13467
13468     MachineRegisterInfo &RegInfo = F->getRegInfo();
13469     Register Dest = RegInfo.createVirtualRegister(
13470         Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
13471
13472     DebugLoc Dl = MI.getDebugLoc();
13473     BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
13474         .addReg(MI.getOperand(1).getReg())
13475         .addImm(1);
13476     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13477             MI.getOperand(0).getReg())
13478         .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
13479   } else if (MI.getOpcode() == PPC::TCHECK_RET) {
13480     DebugLoc Dl = MI.getDebugLoc();
13481     MachineRegisterInfo &RegInfo = F->getRegInfo();
13482     Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13483     BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
13484     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13485             MI.getOperand(0).getReg())
13486         .addReg(CRReg);
13487   } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
13488     DebugLoc Dl = MI.getDebugLoc();
13489     unsigned Imm = MI.getOperand(1).getImm();
13490     BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
13491     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13492             MI.getOperand(0).getReg())
13493         .addReg(PPC::CR0EQ);
13494   } else if (MI.getOpcode() == PPC::SETRNDi) {
13495     DebugLoc dl = MI.getDebugLoc();
13496     Register OldFPSCRReg = MI.getOperand(0).getReg();
13497
13498     // Save FPSCR value.
13499     if (MRI.use_empty(OldFPSCRReg))
13500       BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13501     else
13502       BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13503
13504     // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
13505     // the following settings:
13506     //   00 Round to nearest
13507     //   01 Round to 0
13508     //   10 Round to +inf
13509     //   11 Round to -inf
13510
13511     // When the operand is immediate, using the two least significant bits of
13512     // the immediate to set the bits 62:63 of FPSCR.
13513     unsigned Mode = MI.getOperand(1).getImm();
13514     BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
13515         .addImm(31)
13516         .addReg(PPC::RM, RegState::ImplicitDefine);
13517
13518     BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
13519         .addImm(30)
13520         .addReg(PPC::RM, RegState::ImplicitDefine);
13521   } else if (MI.getOpcode() == PPC::SETRND) {
13522     DebugLoc dl = MI.getDebugLoc();
13523
13524     // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
13525     // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
13526     // If the target doesn't have DirectMove, we should use stack to do the
13527     // conversion, because the target doesn't have the instructions like mtvsrd
13528     // or mfvsrd to do this conversion directly.
13529     auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
13530       if (Subtarget.hasDirectMove()) {
13531         BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
13532           .addReg(SrcReg);
13533       } else {
13534         // Use stack to do the register copy.
13535         unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
13536         MachineRegisterInfo &RegInfo = F->getRegInfo();
13537         const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
13538         if (RC == &PPC::F8RCRegClass) {
13539           // Copy register from F8RCRegClass to G8RCRegclass.
13540           assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
13541                  "Unsupported RegClass.");
13542
13543           StoreOp = PPC::STFD;
13544           LoadOp = PPC::LD;
13545         } else {
13546           // Copy register from G8RCRegClass to F8RCRegclass.
13547           assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
13548                  (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
13549                  "Unsupported RegClass.");
13550         }
13551
13552         MachineFrameInfo &MFI = F->getFrameInfo();
13553         int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
13554
13555         MachineMemOperand *MMOStore = F->getMachineMemOperand(
13556             MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13557             MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
13558             MFI.getObjectAlign(FrameIdx));
13559
13560         // Store the SrcReg into the stack.
13561         BuildMI(*BB, MI, dl, TII->get(StoreOp))
13562           .addReg(SrcReg)
13563           .addImm(0)
13564           .addFrameIndex(FrameIdx)
13565           .addMemOperand(MMOStore);
13566
13567         MachineMemOperand *MMOLoad = F->getMachineMemOperand(
13568             MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13569             MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
13570             MFI.getObjectAlign(FrameIdx));
13571
13572         // Load from the stack where SrcReg is stored, and save to DestReg,
13573         // so we have done the RegClass conversion from RegClass::SrcReg to
13574         // RegClass::DestReg.
13575         BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
13576           .addImm(0)
13577           .addFrameIndex(FrameIdx)
13578           .addMemOperand(MMOLoad);
13579       }
13580     };
13581
13582     Register OldFPSCRReg = MI.getOperand(0).getReg();
13583
13584     // Save FPSCR value.
13585     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13586
13587     // When the operand is gprc register, use two least significant bits of the
13588     // register and mtfsf instruction to set the bits 62:63 of FPSCR.
13589     //
13590     // copy OldFPSCRTmpReg, OldFPSCRReg
13591     // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
13592     // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
13593     // copy NewFPSCRReg, NewFPSCRTmpReg
13594     // mtfsf 255, NewFPSCRReg
13595     MachineOperand SrcOp = MI.getOperand(1);
13596     MachineRegisterInfo &RegInfo = F->getRegInfo();
13597     Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13598
13599     copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
13600
13601     Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13602     Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13603
13604     // The first operand of INSERT_SUBREG should be a register which has
13605     // subregisters, we only care about its RegClass, so we should use an
13606     // IMPLICIT_DEF register.
13607     BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
13608     BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
13609       .addReg(ImDefReg)
13610       .add(SrcOp)
13611       .addImm(1);
13612
13613     Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13614     BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
13615       .addReg(OldFPSCRTmpReg)
13616       .addReg(ExtSrcReg)
13617       .addImm(0)
13618       .addImm(62);
13619
13620     Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13621     copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
13622
13623     // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
13624     // bits of FPSCR.
13625     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
13626       .addImm(255)
13627       .addReg(NewFPSCRReg)
13628       .addImm(0)
13629       .addImm(0);
13630   } else if (MI.getOpcode() == PPC::SETFLM) {
13631     DebugLoc Dl = MI.getDebugLoc();
13632
13633     // Result of setflm is previous FPSCR content, so we need to save it first.
13634     Register OldFPSCRReg = MI.getOperand(0).getReg();
13635     if (MRI.use_empty(OldFPSCRReg))
13636       BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13637     else
13638       BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
13639
13640     // Put bits in 32:63 to FPSCR.
13641     Register NewFPSCRReg = MI.getOperand(1).getReg();
13642     BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
13643         .addImm(255)
13644         .addReg(NewFPSCRReg)
13645         .addImm(0)
13646         .addImm(0);
13647   } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
13648              MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
13649     return emitProbedAlloca(MI, BB);
13650   } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
13651     DebugLoc DL = MI.getDebugLoc();
13652     Register Src = MI.getOperand(2).getReg();
13653     Register Lo = MI.getOperand(0).getReg();
13654     Register Hi = MI.getOperand(1).getReg();
13655     BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13656         .addDef(Lo)
13657         .addUse(Src, 0, PPC::sub_gp8_x1);
13658     BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13659         .addDef(Hi)
13660         .addUse(Src, 0, PPC::sub_gp8_x0);
13661   } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
13662              MI.getOpcode() == PPC::STQX_PSEUDO) {
13663     DebugLoc DL = MI.getDebugLoc();
13664     // Ptr is used as the ptr_rc_no_r0 part
13665     // of LQ/STQ's memory operand and adding result of RA and RB,
13666     // so it has to be g8rc_and_g8rc_nox0.
13667     Register Ptr =
13668         F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
13669     Register Val = MI.getOperand(0).getReg();
13670     Register RA = MI.getOperand(1).getReg();
13671     Register RB = MI.getOperand(2).getReg();
13672     BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
13673     BuildMI(*BB, MI, DL,
13674             MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
13675                                               : TII->get(PPC::STQ))
13676         .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
13677         .addImm(0)
13678         .addReg(Ptr);
13679   } else {
13680     llvm_unreachable("Unexpected instr type to insert");
13681   }
13682
13683   MI.eraseFromParent(); // The pseudo instruction is gone now.
13684   return BB;
13685 }
13686
13687 //===----------------------------------------------------------------------===//
13688 // Target Optimization Hooks
13689 //===----------------------------------------------------------------------===//
13690
13691 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
13692   // For the estimates, convergence is quadratic, so we essentially double the
13693   // number of digits correct after every iteration. For both FRE and FRSQRTE,
13694   // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
13695   // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
13696   int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
13697   if (VT.getScalarType() == MVT::f64)
13698     RefinementSteps++;
13699   return RefinementSteps;
13700 }
13701
13702 SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
13703                                             const DenormalMode &Mode) const {
13704   // We only have VSX Vector Test for software Square Root.
13705   EVT VT = Op.getValueType();
13706   if (!isTypeLegal(MVT::i1) ||
13707       (VT != MVT::f64 &&
13708        ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
13709     return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
13710
13711   SDLoc DL(Op);
13712   // The output register of FTSQRT is CR field.
13713   SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
13714   // ftsqrt BF,FRB
13715   // Let e_b be the unbiased exponent of the double-precision
13716   // floating-point operand in register FRB.
13717   // fe_flag is set to 1 if either of the following conditions occurs.
13718   //   - The double-precision floating-point operand in register FRB is a zero,
13719   //     a NaN, or an infinity, or a negative value.
13720   //   - e_b is less than or equal to -970.
13721   // Otherwise fe_flag is set to 0.
13722   // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
13723   // not eligible for iteration. (zero/negative/infinity/nan or unbiased
13724   // exponent is less than -970)
13725   SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
13726   return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
13727                                     FTSQRT, SRIdxVal),
13728                  0);
13729 }
13730
13731 SDValue
13732 PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
13733                                                SelectionDAG &DAG) const {
13734   // We only have VSX Vector Square Root.
13735   EVT VT = Op.getValueType();
13736   if (VT != MVT::f64 &&
13737       ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
13738     return TargetLowering::getSqrtResultForDenormInput(Op, DAG);
13739
13740   return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
13741 }
13742
13743 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
13744                                            int Enabled, int &RefinementSteps,
13745                                            bool &UseOneConstNR,
13746                                            bool Reciprocal) const {
13747   EVT VT = Operand.getValueType();
13748   if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
13749       (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
13750       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13751       (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13752     if (RefinementSteps == ReciprocalEstimate::Unspecified)
13753       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13754
13755     // The Newton-Raphson computation with a single constant does not provide
13756     // enough accuracy on some CPUs.
13757     UseOneConstNR = !Subtarget.needsTwoConstNR();
13758     return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
13759   }
13760   return SDValue();
13761 }
13762
13763 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
13764                                             int Enabled,
13765                                             int &RefinementSteps) const {
13766   EVT VT = Operand.getValueType();
13767   if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
13768       (VT == MVT::f64 && Subtarget.hasFRE()) ||
13769       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13770       (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13771     if (RefinementSteps == ReciprocalEstimate::Unspecified)
13772       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13773     return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
13774   }
13775   return SDValue();
13776 }
13777
13778 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
13779   // Note: This functionality is used only when unsafe-fp-math is enabled, and
13780   // on cores with reciprocal estimates (which are used when unsafe-fp-math is
13781   // enabled for division), this functionality is redundant with the default
13782   // combiner logic (once the division -> reciprocal/multiply transformation
13783   // has taken place). As a result, this matters more for older cores than for
13784   // newer ones.
13785
13786   // Combine multiple FDIVs with the same divisor into multiple FMULs by the
13787   // reciprocal if there are two or more FDIVs (for embedded cores with only
13788   // one FP pipeline) for three or more FDIVs (for generic OOO cores).
13789   switch (Subtarget.getCPUDirective()) {
13790   default:
13791     return 3;
13792   case PPC::DIR_440:
13793   case PPC::DIR_A2:
13794   case PPC::DIR_E500:
13795   case PPC::DIR_E500mc:
13796   case PPC::DIR_E5500:
13797     return 2;
13798   }
13799 }
13800
13801 // isConsecutiveLSLoc needs to work even if all adds have not yet been
13802 // collapsed, and so we need to look through chains of them.
13803 static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
13804                                      int64_t& Offset, SelectionDAG &DAG) {
13805   if (DAG.isBaseWithConstantOffset(Loc)) {
13806     Base = Loc.getOperand(0);
13807     Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
13808
13809     // The base might itself be a base plus an offset, and if so, accumulate
13810     // that as well.
13811     getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
13812   }
13813 }
13814
13815 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
13816                             unsigned Bytes, int Dist,
13817                             SelectionDAG &DAG) {
13818   if (VT.getSizeInBits() / 8 != Bytes)
13819     return false;
13820
13821   SDValue BaseLoc = Base->getBasePtr();
13822   if (Loc.getOpcode() == ISD::FrameIndex) {
13823     if (BaseLoc.getOpcode() != ISD::FrameIndex)
13824       return false;
13825     const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
13826     int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
13827     int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
13828     int FS  = MFI.getObjectSize(FI);
13829     int BFS = MFI.getObjectSize(BFI);
13830     if (FS != BFS || FS != (int)Bytes) return false;
13831     return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
13832   }
13833
13834   SDValue Base1 = Loc, Base2 = BaseLoc;
13835   int64_t Offset1 = 0, Offset2 = 0;
13836   getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
13837   getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
13838   if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
13839     return true;
13840
13841   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13842   const GlobalValue *GV1 = nullptr;
13843   const GlobalValue *GV2 = nullptr;
13844   Offset1 = 0;
13845   Offset2 = 0;
13846   bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
13847   bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
13848   if (isGA1 && isGA2 && GV1 == GV2)
13849     return Offset1 == (Offset2 + Dist*Bytes);
13850   return false;
13851 }
13852
13853 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
13854 // not enforce equality of the chain operands.
13855 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
13856                             unsigned Bytes, int Dist,
13857                             SelectionDAG &DAG) {
13858   if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
13859     EVT VT = LS->getMemoryVT();
13860     SDValue Loc = LS->getBasePtr();
13861     return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
13862   }
13863
13864   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
13865     EVT VT;
13866     switch (N->getConstantOperandVal(1)) {
13867     default: return false;
13868     case Intrinsic::ppc_altivec_lvx:
13869     case Intrinsic::ppc_altivec_lvxl:
13870     case Intrinsic::ppc_vsx_lxvw4x:
13871     case Intrinsic::ppc_vsx_lxvw4x_be:
13872       VT = MVT::v4i32;
13873       break;
13874     case Intrinsic::ppc_vsx_lxvd2x:
13875     case Intrinsic::ppc_vsx_lxvd2x_be:
13876       VT = MVT::v2f64;
13877       break;
13878     case Intrinsic::ppc_altivec_lvebx:
13879       VT = MVT::i8;
13880       break;
13881     case Intrinsic::ppc_altivec_lvehx:
13882       VT = MVT::i16;
13883       break;
13884     case Intrinsic::ppc_altivec_lvewx:
13885       VT = MVT::i32;
13886       break;
13887     }
13888
13889     return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
13890   }
13891
13892   if (N->getOpcode() == ISD::INTRINSIC_VOID) {
13893     EVT VT;
13894     switch (N->getConstantOperandVal(1)) {
13895     default: return false;
13896     case Intrinsic::ppc_altivec_stvx:
13897     case Intrinsic::ppc_altivec_stvxl:
13898     case Intrinsic::ppc_vsx_stxvw4x:
13899       VT = MVT::v4i32;
13900       break;
13901     case Intrinsic::ppc_vsx_stxvd2x:
13902       VT = MVT::v2f64;
13903       break;
13904     case Intrinsic::ppc_vsx_stxvw4x_be:
13905       VT = MVT::v4i32;
13906       break;
13907     case Intrinsic::ppc_vsx_stxvd2x_be:
13908       VT = MVT::v2f64;
13909       break;
13910     case Intrinsic::ppc_altivec_stvebx:
13911       VT = MVT::i8;
13912       break;
13913     case Intrinsic::ppc_altivec_stvehx:
13914       VT = MVT::i16;
13915       break;
13916     case Intrinsic::ppc_altivec_stvewx:
13917       VT = MVT::i32;
13918       break;
13919     }
13920
13921     return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
13922   }
13923
13924   return false;
13925 }
13926
13927 // Return true is there is a nearyby consecutive load to the one provided
13928 // (regardless of alignment). We search up and down the chain, looking though
13929 // token factors and other loads (but nothing else). As a result, a true result
13930 // indicates that it is safe to create a new consecutive load adjacent to the
13931 // load provided.
13932 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
13933   SDValue Chain = LD->getChain();
13934   EVT VT = LD->getMemoryVT();
13935
13936   SmallSet<SDNode *, 16> LoadRoots;
13937   SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
13938   SmallSet<SDNode *, 16> Visited;
13939
13940   // First, search up the chain, branching to follow all token-factor operands.
13941   // If we find a consecutive load, then we're done, otherwise, record all
13942   // nodes just above the top-level loads and token factors.
13943   while (!Queue.empty()) {
13944     SDNode *ChainNext = Queue.pop_back_val();
13945     if (!Visited.insert(ChainNext).second)
13946       continue;
13947
13948     if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
13949       if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13950         return true;
13951
13952       if (!Visited.count(ChainLD->getChain().getNode()))
13953         Queue.push_back(ChainLD->getChain().getNode());
13954     } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
13955       for (const SDUse &O : ChainNext->ops())
13956         if (!Visited.count(O.getNode()))
13957           Queue.push_back(O.getNode());
13958     } else
13959       LoadRoots.insert(ChainNext);
13960   }
13961
13962   // Second, search down the chain, starting from the top-level nodes recorded
13963   // in the first phase. These top-level nodes are the nodes just above all
13964   // loads and token factors. Starting with their uses, recursively look though
13965   // all loads (just the chain uses) and token factors to find a consecutive
13966   // load.
13967   Visited.clear();
13968   Queue.clear();
13969
13970   for (SDNode *I : LoadRoots) {
13971     Queue.push_back(I);
13972
13973     while (!Queue.empty()) {
13974       SDNode *LoadRoot = Queue.pop_back_val();
13975       if (!Visited.insert(LoadRoot).second)
13976         continue;
13977
13978       if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
13979         if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13980           return true;
13981
13982       for (SDNode *U : LoadRoot->uses())
13983         if (((isa<MemSDNode>(U) &&
13984               cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
13985              U->getOpcode() == ISD::TokenFactor) &&
13986             !Visited.count(U))
13987           Queue.push_back(U);
13988     }
13989   }
13990
13991   return false;
13992 }
13993
13994 /// This function is called when we have proved that a SETCC node can be replaced
13995 /// by subtraction (and other supporting instructions) so that the result of
13996 /// comparison is kept in a GPR instead of CR. This function is purely for
13997 /// codegen purposes and has some flags to guide the codegen process.
13998 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
13999                                      bool Swap, SDLoc &DL, SelectionDAG &DAG) {
14000   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14001
14002   // Zero extend the operands to the largest legal integer. Originally, they
14003   // must be of a strictly smaller size.
14004   auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
14005                          DAG.getConstant(Size, DL, MVT::i32));
14006   auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
14007                          DAG.getConstant(Size, DL, MVT::i32));
14008
14009   // Swap if needed. Depends on the condition code.
14010   if (Swap)
14011     std::swap(Op0, Op1);
14012
14013   // Subtract extended integers.
14014   auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
14015
14016   // Move the sign bit to the least significant position and zero out the rest.
14017   // Now the least significant bit carries the result of original comparison.
14018   auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
14019                              DAG.getConstant(Size - 1, DL, MVT::i32));
14020   auto Final = Shifted;
14021
14022   // Complement the result if needed. Based on the condition code.
14023   if (Complement)
14024     Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
14025                         DAG.getConstant(1, DL, MVT::i64));
14026
14027   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
14028 }
14029
14030 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
14031                                                   DAGCombinerInfo &DCI) const {
14032   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14033
14034   SelectionDAG &DAG = DCI.DAG;
14035   SDLoc DL(N);
14036
14037   // Size of integers being compared has a critical role in the following
14038   // analysis, so we prefer to do this when all types are legal.
14039   if (!DCI.isAfterLegalizeDAG())
14040     return SDValue();
14041
14042   // If all users of SETCC extend its value to a legal integer type
14043   // then we replace SETCC with a subtraction
14044   for (const SDNode *U : N->uses())
14045     if (U->getOpcode() != ISD::ZERO_EXTEND)
14046       return SDValue();
14047
14048   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14049   auto OpSize = N->getOperand(0).getValueSizeInBits();
14050
14051   unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
14052
14053   if (OpSize < Size) {
14054     switch (CC) {
14055     default: break;
14056     case ISD::SETULT:
14057       return generateEquivalentSub(N, Size, false, false, DL, DAG);
14058     case ISD::SETULE:
14059       return generateEquivalentSub(N, Size, true, true, DL, DAG);
14060     case ISD::SETUGT:
14061       return generateEquivalentSub(N, Size, false, true, DL, DAG);
14062     case ISD::SETUGE:
14063       return generateEquivalentSub(N, Size, true, false, DL, DAG);
14064     }
14065   }
14066
14067   return SDValue();
14068 }
14069
14070 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
14071                                                   DAGCombinerInfo &DCI) const {
14072   SelectionDAG &DAG = DCI.DAG;
14073   SDLoc dl(N);
14074
14075   assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
14076   // If we're tracking CR bits, we need to be careful that we don't have:
14077   //   trunc(binary-ops(zext(x), zext(y)))
14078   // or
14079   //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
14080   // such that we're unnecessarily moving things into GPRs when it would be
14081   // better to keep them in CR bits.
14082
14083   // Note that trunc here can be an actual i1 trunc, or can be the effective
14084   // truncation that comes from a setcc or select_cc.
14085   if (N->getOpcode() == ISD::TRUNCATE &&
14086       N->getValueType(0) != MVT::i1)
14087     return SDValue();
14088
14089   if (N->getOperand(0).getValueType() != MVT::i32 &&
14090       N->getOperand(0).getValueType() != MVT::i64)
14091     return SDValue();
14092
14093   if (N->getOpcode() == ISD::SETCC ||
14094       N->getOpcode() == ISD::SELECT_CC) {
14095     // If we're looking at a comparison, then we need to make sure that the
14096     // high bits (all except for the first) don't matter the result.
14097     ISD::CondCode CC =
14098       cast<CondCodeSDNode>(N->getOperand(
14099         N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
14100     unsigned OpBits = N->getOperand(0).getValueSizeInBits();
14101
14102     if (ISD::isSignedIntSetCC(CC)) {
14103       if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
14104           DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
14105         return SDValue();
14106     } else if (ISD::isUnsignedIntSetCC(CC)) {
14107       if (!DAG.MaskedValueIsZero(N->getOperand(0),
14108                                  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
14109           !DAG.MaskedValueIsZero(N->getOperand(1),
14110                                  APInt::getHighBitsSet(OpBits, OpBits-1)))
14111         return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
14112                                              : SDValue());
14113     } else {
14114       // This is neither a signed nor an unsigned comparison, just make sure
14115       // that the high bits are equal.
14116       KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
14117       KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
14118
14119       // We don't really care about what is known about the first bit (if
14120       // anything), so pretend that it is known zero for both to ensure they can
14121       // be compared as constants.
14122       Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
14123       Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
14124
14125       if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
14126           Op1Known.getConstant() != Op2Known.getConstant())
14127         return SDValue();
14128     }
14129   }
14130
14131   // We now know that the higher-order bits are irrelevant, we just need to
14132   // make sure that all of the intermediate operations are bit operations, and
14133   // all inputs are extensions.
14134   if (N->getOperand(0).getOpcode() != ISD::AND &&
14135       N->getOperand(0).getOpcode() != ISD::OR  &&
14136       N->getOperand(0).getOpcode() != ISD::XOR &&
14137       N->getOperand(0).getOpcode() != ISD::SELECT &&
14138       N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
14139       N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
14140       N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
14141       N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
14142       N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
14143     return SDValue();
14144
14145   if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
14146       N->getOperand(1).getOpcode() != ISD::AND &&
14147       N->getOperand(1).getOpcode() != ISD::OR  &&
14148       N->getOperand(1).getOpcode() != ISD::XOR &&
14149       N->getOperand(1).getOpcode() != ISD::SELECT &&
14150       N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
14151       N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
14152       N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
14153       N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
14154       N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
14155     return SDValue();
14156
14157   SmallVector<SDValue, 4> Inputs;
14158   SmallVector<SDValue, 8> BinOps, PromOps;
14159   SmallPtrSet<SDNode *, 16> Visited;
14160
14161   for (unsigned i = 0; i < 2; ++i) {
14162     if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14163           N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14164           N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14165           N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14166         isa<ConstantSDNode>(N->getOperand(i)))
14167       Inputs.push_back(N->getOperand(i));
14168     else
14169       BinOps.push_back(N->getOperand(i));
14170
14171     if (N->getOpcode() == ISD::TRUNCATE)
14172       break;
14173   }
14174
14175   // Visit all inputs, collect all binary operations (and, or, xor and
14176   // select) that are all fed by extensions.
14177   while (!BinOps.empty()) {
14178     SDValue BinOp = BinOps.pop_back_val();
14179
14180     if (!Visited.insert(BinOp.getNode()).second)
14181       continue;
14182
14183     PromOps.push_back(BinOp);
14184
14185     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14186       // The condition of the select is not promoted.
14187       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14188         continue;
14189       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14190         continue;
14191
14192       if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14193             BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14194             BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14195            BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14196           isa<ConstantSDNode>(BinOp.getOperand(i))) {
14197         Inputs.push_back(BinOp.getOperand(i));
14198       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14199                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
14200                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14201                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14202                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
14203                  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14204                  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14205                  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14206                  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
14207         BinOps.push_back(BinOp.getOperand(i));
14208       } else {
14209         // We have an input that is not an extension or another binary
14210         // operation; we'll abort this transformation.
14211         return SDValue();
14212       }
14213     }
14214   }
14215
14216   // Make sure that this is a self-contained cluster of operations (which
14217   // is not quite the same thing as saying that everything has only one
14218   // use).
14219   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14220     if (isa<ConstantSDNode>(Inputs[i]))
14221       continue;
14222
14223     for (const SDNode *User : Inputs[i].getNode()->uses()) {
14224       if (User != N && !Visited.count(User))
14225         return SDValue();
14226
14227       // Make sure that we're not going to promote the non-output-value
14228       // operand(s) or SELECT or SELECT_CC.
14229       // FIXME: Although we could sometimes handle this, and it does occur in
14230       // practice that one of the condition inputs to the select is also one of
14231       // the outputs, we currently can't deal with this.
14232       if (User->getOpcode() == ISD::SELECT) {
14233         if (User->getOperand(0) == Inputs[i])
14234           return SDValue();
14235       } else if (User->getOpcode() == ISD::SELECT_CC) {
14236         if (User->getOperand(0) == Inputs[i] ||
14237             User->getOperand(1) == Inputs[i])
14238           return SDValue();
14239       }
14240     }
14241   }
14242
14243   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14244     for (const SDNode *User : PromOps[i].getNode()->uses()) {
14245       if (User != N && !Visited.count(User))
14246         return SDValue();
14247
14248       // Make sure that we're not going to promote the non-output-value
14249       // operand(s) or SELECT or SELECT_CC.
14250       // FIXME: Although we could sometimes handle this, and it does occur in
14251       // practice that one of the condition inputs to the select is also one of
14252       // the outputs, we currently can't deal with this.
14253       if (User->getOpcode() == ISD::SELECT) {
14254         if (User->getOperand(0) == PromOps[i])
14255           return SDValue();
14256       } else if (User->getOpcode() == ISD::SELECT_CC) {
14257         if (User->getOperand(0) == PromOps[i] ||
14258             User->getOperand(1) == PromOps[i])
14259           return SDValue();
14260       }
14261     }
14262   }
14263
14264   // Replace all inputs with the extension operand.
14265   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14266     // Constants may have users outside the cluster of to-be-promoted nodes,
14267     // and so we need to replace those as we do the promotions.
14268     if (isa<ConstantSDNode>(Inputs[i]))
14269       continue;
14270     else
14271       DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
14272   }
14273
14274   std::list<HandleSDNode> PromOpHandles;
14275   for (auto &PromOp : PromOps)
14276     PromOpHandles.emplace_back(PromOp);
14277
14278   // Replace all operations (these are all the same, but have a different
14279   // (i1) return type). DAG.getNode will validate that the types of
14280   // a binary operator match, so go through the list in reverse so that
14281   // we've likely promoted both operands first. Any intermediate truncations or
14282   // extensions disappear.
14283   while (!PromOpHandles.empty()) {
14284     SDValue PromOp = PromOpHandles.back().getValue();
14285     PromOpHandles.pop_back();
14286
14287     if (PromOp.getOpcode() == ISD::TRUNCATE ||
14288         PromOp.getOpcode() == ISD::SIGN_EXTEND ||
14289         PromOp.getOpcode() == ISD::ZERO_EXTEND ||
14290         PromOp.getOpcode() == ISD::ANY_EXTEND) {
14291       if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
14292           PromOp.getOperand(0).getValueType() != MVT::i1) {
14293         // The operand is not yet ready (see comment below).
14294         PromOpHandles.emplace_front(PromOp);
14295         continue;
14296       }
14297
14298       SDValue RepValue = PromOp.getOperand(0);
14299       if (isa<ConstantSDNode>(RepValue))
14300         RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
14301
14302       DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
14303       continue;
14304     }
14305
14306     unsigned C;
14307     switch (PromOp.getOpcode()) {
14308     default:             C = 0; break;
14309     case ISD::SELECT:    C = 1; break;
14310     case ISD::SELECT_CC: C = 2; break;
14311     }
14312
14313     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14314          PromOp.getOperand(C).getValueType() != MVT::i1) ||
14315         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14316          PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
14317       // The to-be-promoted operands of this node have not yet been
14318       // promoted (this should be rare because we're going through the
14319       // list backward, but if one of the operands has several users in
14320       // this cluster of to-be-promoted nodes, it is possible).
14321       PromOpHandles.emplace_front(PromOp);
14322       continue;
14323     }
14324
14325     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
14326                                 PromOp.getNode()->op_end());
14327
14328     // If there are any constant inputs, make sure they're replaced now.
14329     for (unsigned i = 0; i < 2; ++i)
14330       if (isa<ConstantSDNode>(Ops[C+i]))
14331         Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
14332
14333     DAG.ReplaceAllUsesOfValueWith(PromOp,
14334       DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
14335   }
14336
14337   // Now we're left with the initial truncation itself.
14338   if (N->getOpcode() == ISD::TRUNCATE)
14339     return N->getOperand(0);
14340
14341   // Otherwise, this is a comparison. The operands to be compared have just
14342   // changed type (to i1), but everything else is the same.
14343   return SDValue(N, 0);
14344 }
14345
14346 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
14347                                                   DAGCombinerInfo &DCI) const {
14348   SelectionDAG &DAG = DCI.DAG;
14349   SDLoc dl(N);
14350
14351   // If we're tracking CR bits, we need to be careful that we don't have:
14352   //   zext(binary-ops(trunc(x), trunc(y)))
14353   // or
14354   //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
14355   // such that we're unnecessarily moving things into CR bits that can more
14356   // efficiently stay in GPRs. Note that if we're not certain that the high
14357   // bits are set as required by the final extension, we still may need to do
14358   // some masking to get the proper behavior.
14359
14360   // This same functionality is important on PPC64 when dealing with
14361   // 32-to-64-bit extensions; these occur often when 32-bit values are used as
14362   // the return values of functions. Because it is so similar, it is handled
14363   // here as well.
14364
14365   if (N->getValueType(0) != MVT::i32 &&
14366       N->getValueType(0) != MVT::i64)
14367     return SDValue();
14368
14369   if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
14370         (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
14371     return SDValue();
14372
14373   if (N->getOperand(0).getOpcode() != ISD::AND &&
14374       N->getOperand(0).getOpcode() != ISD::OR  &&
14375       N->getOperand(0).getOpcode() != ISD::XOR &&
14376       N->getOperand(0).getOpcode() != ISD::SELECT &&
14377       N->getOperand(0).getOpcode() != ISD::SELECT_CC)
14378     return SDValue();
14379
14380   SmallVector<SDValue, 4> Inputs;
14381   SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
14382   SmallPtrSet<SDNode *, 16> Visited;
14383
14384   // Visit all inputs, collect all binary operations (and, or, xor and
14385   // select) that are all fed by truncations.
14386   while (!BinOps.empty()) {
14387     SDValue BinOp = BinOps.pop_back_val();
14388
14389     if (!Visited.insert(BinOp.getNode()).second)
14390       continue;
14391
14392     PromOps.push_back(BinOp);
14393
14394     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14395       // The condition of the select is not promoted.
14396       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14397         continue;
14398       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14399         continue;
14400
14401       if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14402           isa<ConstantSDNode>(BinOp.getOperand(i))) {
14403         Inputs.push_back(BinOp.getOperand(i));
14404       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14405                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
14406                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14407                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14408                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
14409         BinOps.push_back(BinOp.getOperand(i));
14410       } else {
14411         // We have an input that is not a truncation or another binary
14412         // operation; we'll abort this transformation.
14413         return SDValue();
14414       }
14415     }
14416   }
14417
14418   // The operands of a select that must be truncated when the select is
14419   // promoted because the operand is actually part of the to-be-promoted set.
14420   DenseMap<SDNode *, EVT> SelectTruncOp[2];
14421
14422   // Make sure that this is a self-contained cluster of operations (which
14423   // is not quite the same thing as saying that everything has only one
14424   // use).
14425   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14426     if (isa<ConstantSDNode>(Inputs[i]))
14427       continue;
14428
14429     for (SDNode *User : Inputs[i].getNode()->uses()) {
14430       if (User != N && !Visited.count(User))
14431         return SDValue();
14432
14433       // If we're going to promote the non-output-value operand(s) or SELECT or
14434       // SELECT_CC, record them for truncation.
14435       if (User->getOpcode() == ISD::SELECT) {
14436         if (User->getOperand(0) == Inputs[i])
14437           SelectTruncOp[0].insert(std::make_pair(User,
14438                                     User->getOperand(0).getValueType()));
14439       } else if (User->getOpcode() == ISD::SELECT_CC) {
14440         if (User->getOperand(0) == Inputs[i])
14441           SelectTruncOp[0].insert(std::make_pair(User,
14442                                     User->getOperand(0).getValueType()));
14443         if (User->getOperand(1) == Inputs[i])
14444           SelectTruncOp[1].insert(std::make_pair(User,
14445                                     User->getOperand(1).getValueType()));
14446       }
14447     }
14448   }
14449
14450   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14451     for (SDNode *User : PromOps[i].getNode()->uses()) {
14452       if (User != N && !Visited.count(User))
14453         return SDValue();
14454
14455       // If we're going to promote the non-output-value operand(s) or SELECT or
14456       // SELECT_CC, record them for truncation.
14457       if (User->getOpcode() == ISD::SELECT) {
14458         if (User->getOperand(0) == PromOps[i])
14459           SelectTruncOp[0].insert(std::make_pair(User,
14460                                     User->getOperand(0).getValueType()));
14461       } else if (User->getOpcode() == ISD::SELECT_CC) {
14462         if (User->getOperand(0) == PromOps[i])
14463           SelectTruncOp[0].insert(std::make_pair(User,
14464                                     User->getOperand(0).getValueType()));
14465         if (User->getOperand(1) == PromOps[i])
14466           SelectTruncOp[1].insert(std::make_pair(User,
14467                                     User->getOperand(1).getValueType()));
14468       }
14469     }
14470   }
14471
14472   unsigned PromBits = N->getOperand(0).getValueSizeInBits();
14473   bool ReallyNeedsExt = false;
14474   if (N->getOpcode() != ISD::ANY_EXTEND) {
14475     // If all of the inputs are not already sign/zero extended, then
14476     // we'll still need to do that at the end.
14477     for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14478       if (isa<ConstantSDNode>(Inputs[i]))
14479         continue;
14480
14481       unsigned OpBits =
14482         Inputs[i].getOperand(0).getValueSizeInBits();
14483       assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
14484
14485       if ((N->getOpcode() == ISD::ZERO_EXTEND &&
14486            !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
14487                                   APInt::getHighBitsSet(OpBits,
14488                                                         OpBits-PromBits))) ||
14489           (N->getOpcode() == ISD::SIGN_EXTEND &&
14490            DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
14491              (OpBits-(PromBits-1)))) {
14492         ReallyNeedsExt = true;
14493         break;
14494       }
14495     }
14496   }
14497
14498   // Replace all inputs, either with the truncation operand, or a
14499   // truncation or extension to the final output type.
14500   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14501     // Constant inputs need to be replaced with the to-be-promoted nodes that
14502     // use them because they might have users outside of the cluster of
14503     // promoted nodes.
14504     if (isa<ConstantSDNode>(Inputs[i]))
14505       continue;
14506
14507     SDValue InSrc = Inputs[i].getOperand(0);
14508     if (Inputs[i].getValueType() == N->getValueType(0))
14509       DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
14510     else if (N->getOpcode() == ISD::SIGN_EXTEND)
14511       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14512         DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
14513     else if (N->getOpcode() == ISD::ZERO_EXTEND)
14514       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14515         DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
14516     else
14517       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14518         DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
14519   }
14520
14521   std::list<HandleSDNode> PromOpHandles;
14522   for (auto &PromOp : PromOps)
14523     PromOpHandles.emplace_back(PromOp);
14524
14525   // Replace all operations (these are all the same, but have a different
14526   // (promoted) return type). DAG.getNode will validate that the types of
14527   // a binary operator match, so go through the list in reverse so that
14528   // we've likely promoted both operands first.
14529   while (!PromOpHandles.empty()) {
14530     SDValue PromOp = PromOpHandles.back().getValue();
14531     PromOpHandles.pop_back();
14532
14533     unsigned C;
14534     switch (PromOp.getOpcode()) {
14535     default:             C = 0; break;
14536     case ISD::SELECT:    C = 1; break;
14537     case ISD::SELECT_CC: C = 2; break;
14538     }
14539
14540     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14541          PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
14542         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14543          PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
14544       // The to-be-promoted operands of this node have not yet been
14545       // promoted (this should be rare because we're going through the
14546       // list backward, but if one of the operands has several users in
14547       // this cluster of to-be-promoted nodes, it is possible).
14548       PromOpHandles.emplace_front(PromOp);
14549       continue;
14550     }
14551
14552     // For SELECT and SELECT_CC nodes, we do a similar check for any
14553     // to-be-promoted comparison inputs.
14554     if (PromOp.getOpcode() == ISD::SELECT ||
14555         PromOp.getOpcode() == ISD::SELECT_CC) {
14556       if ((SelectTruncOp[0].count(PromOp.getNode()) &&
14557            PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
14558           (SelectTruncOp[1].count(PromOp.getNode()) &&
14559            PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
14560         PromOpHandles.emplace_front(PromOp);
14561         continue;
14562       }
14563     }
14564
14565     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
14566                                 PromOp.getNode()->op_end());
14567
14568     // If this node has constant inputs, then they'll need to be promoted here.
14569     for (unsigned i = 0; i < 2; ++i) {
14570       if (!isa<ConstantSDNode>(Ops[C+i]))
14571         continue;
14572       if (Ops[C+i].getValueType() == N->getValueType(0))
14573         continue;
14574
14575       if (N->getOpcode() == ISD::SIGN_EXTEND)
14576         Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14577       else if (N->getOpcode() == ISD::ZERO_EXTEND)
14578         Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14579       else
14580         Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14581     }
14582
14583     // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
14584     // truncate them again to the original value type.
14585     if (PromOp.getOpcode() == ISD::SELECT ||
14586         PromOp.getOpcode() == ISD::SELECT_CC) {
14587       auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
14588       if (SI0 != SelectTruncOp[0].end())
14589         Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
14590       auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
14591       if (SI1 != SelectTruncOp[1].end())
14592         Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
14593     }
14594
14595     DAG.ReplaceAllUsesOfValueWith(PromOp,
14596       DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
14597   }
14598
14599   // Now we're left with the initial extension itself.
14600   if (!ReallyNeedsExt)
14601     return N->getOperand(0);
14602
14603   // To zero extend, just mask off everything except for the first bit (in the
14604   // i1 case).
14605   if (N->getOpcode() == ISD::ZERO_EXTEND)
14606     return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
14607                        DAG.getConstant(APInt::getLowBitsSet(
14608                                          N->getValueSizeInBits(0), PromBits),
14609                                        dl, N->getValueType(0)));
14610
14611   assert(N->getOpcode() == ISD::SIGN_EXTEND &&
14612          "Invalid extension type");
14613   EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
14614   SDValue ShiftCst =
14615       DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
14616   return DAG.getNode(
14617       ISD::SRA, dl, N->getValueType(0),
14618       DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
14619       ShiftCst);
14620 }
14621
14622 SDValue PPCTargetLowering::combineSetCC(SDNode *N,
14623                                         DAGCombinerInfo &DCI) const {
14624   assert(N->getOpcode() == ISD::SETCC &&
14625          "Should be called with a SETCC node");
14626
14627   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14628   if (CC == ISD::SETNE || CC == ISD::SETEQ) {
14629     SDValue LHS = N->getOperand(0);
14630     SDValue RHS = N->getOperand(1);
14631
14632     // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
14633     if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
14634         LHS.hasOneUse())
14635       std::swap(LHS, RHS);
14636
14637     // x == 0-y --> x+y == 0
14638     // x != 0-y --> x+y != 0
14639     if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
14640         RHS.hasOneUse()) {
14641       SDLoc DL(N);
14642       SelectionDAG &DAG = DCI.DAG;
14643       EVT VT = N->getValueType(0);
14644       EVT OpVT = LHS.getValueType();
14645       SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
14646       return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
14647     }
14648   }
14649
14650   return DAGCombineTruncBoolExt(N, DCI);
14651 }
14652
14653 // Is this an extending load from an f32 to an f64?
14654 static bool isFPExtLoad(SDValue Op) {
14655   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
14656     return LD->getExtensionType() == ISD::EXTLOAD &&
14657       Op.getValueType() == MVT::f64;
14658   return false;
14659 }
14660
14661 /// Reduces the number of fp-to-int conversion when building a vector.
14662 ///
14663 /// If this vector is built out of floating to integer conversions,
14664 /// transform it to a vector built out of floating point values followed by a
14665 /// single floating to integer conversion of the vector.
14666 /// Namely  (build_vector (fptosi $A), (fptosi $B), ...)
14667 /// becomes (fptosi (build_vector ($A, $B, ...)))
14668 SDValue PPCTargetLowering::
14669 combineElementTruncationToVectorTruncation(SDNode *N,
14670                                            DAGCombinerInfo &DCI) const {
14671   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14672          "Should be called with a BUILD_VECTOR node");
14673
14674   SelectionDAG &DAG = DCI.DAG;
14675   SDLoc dl(N);
14676
14677   SDValue FirstInput = N->getOperand(0);
14678   assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
14679          "The input operand must be an fp-to-int conversion.");
14680
14681   // This combine happens after legalization so the fp_to_[su]i nodes are
14682   // already converted to PPCSISD nodes.
14683   unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
14684   if (FirstConversion == PPCISD::FCTIDZ ||
14685       FirstConversion == PPCISD::FCTIDUZ ||
14686       FirstConversion == PPCISD::FCTIWZ ||
14687       FirstConversion == PPCISD::FCTIWUZ) {
14688     bool IsSplat = true;
14689     bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
14690       FirstConversion == PPCISD::FCTIWUZ;
14691     EVT SrcVT = FirstInput.getOperand(0).getValueType();
14692     SmallVector<SDValue, 4> Ops;
14693     EVT TargetVT = N->getValueType(0);
14694     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14695       SDValue NextOp = N->getOperand(i);
14696       if (NextOp.getOpcode() != PPCISD::MFVSR)
14697         return SDValue();
14698       unsigned NextConversion = NextOp.getOperand(0).getOpcode();
14699       if (NextConversion != FirstConversion)
14700         return SDValue();
14701       // If we are converting to 32-bit integers, we need to add an FP_ROUND.
14702       // This is not valid if the input was originally double precision. It is
14703       // also not profitable to do unless this is an extending load in which
14704       // case doing this combine will allow us to combine consecutive loads.
14705       if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
14706         return SDValue();
14707       if (N->getOperand(i) != FirstInput)
14708         IsSplat = false;
14709     }
14710
14711     // If this is a splat, we leave it as-is since there will be only a single
14712     // fp-to-int conversion followed by a splat of the integer. This is better
14713     // for 32-bit and smaller ints and neutral for 64-bit ints.
14714     if (IsSplat)
14715       return SDValue();
14716
14717     // Now that we know we have the right type of node, get its operands
14718     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14719       SDValue In = N->getOperand(i).getOperand(0);
14720       if (Is32Bit) {
14721         // For 32-bit values, we need to add an FP_ROUND node (if we made it
14722         // here, we know that all inputs are extending loads so this is safe).
14723         if (In.isUndef())
14724           Ops.push_back(DAG.getUNDEF(SrcVT));
14725         else {
14726           SDValue Trunc =
14727               DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
14728                           DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
14729           Ops.push_back(Trunc);
14730         }
14731       } else
14732         Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
14733     }
14734
14735     unsigned Opcode;
14736     if (FirstConversion == PPCISD::FCTIDZ ||
14737         FirstConversion == PPCISD::FCTIWZ)
14738       Opcode = ISD::FP_TO_SINT;
14739     else
14740       Opcode = ISD::FP_TO_UINT;
14741
14742     EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
14743     SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
14744     return DAG.getNode(Opcode, dl, TargetVT, BV);
14745   }
14746   return SDValue();
14747 }
14748
14749 /// Reduce the number of loads when building a vector.
14750 ///
14751 /// Building a vector out of multiple loads can be converted to a load
14752 /// of the vector type if the loads are consecutive. If the loads are
14753 /// consecutive but in descending order, a shuffle is added at the end
14754 /// to reorder the vector.
14755 static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
14756   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14757          "Should be called with a BUILD_VECTOR node");
14758
14759   SDLoc dl(N);
14760
14761   // Return early for non byte-sized type, as they can't be consecutive.
14762   if (!N->getValueType(0).getVectorElementType().isByteSized())
14763     return SDValue();
14764
14765   bool InputsAreConsecutiveLoads = true;
14766   bool InputsAreReverseConsecutive = true;
14767   unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
14768   SDValue FirstInput = N->getOperand(0);
14769   bool IsRoundOfExtLoad = false;
14770   LoadSDNode *FirstLoad = nullptr;
14771
14772   if (FirstInput.getOpcode() == ISD::FP_ROUND &&
14773       FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
14774     FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
14775     IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
14776   }
14777   // Not a build vector of (possibly fp_rounded) loads.
14778   if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
14779       N->getNumOperands() == 1)
14780     return SDValue();
14781
14782   if (!IsRoundOfExtLoad)
14783     FirstLoad = cast<LoadSDNode>(FirstInput);
14784
14785   SmallVector<LoadSDNode *, 4> InputLoads;
14786   InputLoads.push_back(FirstLoad);
14787   for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
14788     // If any inputs are fp_round(extload), they all must be.
14789     if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
14790       return SDValue();
14791
14792     SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
14793       N->getOperand(i);
14794     if (NextInput.getOpcode() != ISD::LOAD)
14795       return SDValue();
14796
14797     SDValue PreviousInput =
14798       IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
14799     LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
14800     LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
14801
14802     // If any inputs are fp_round(extload), they all must be.
14803     if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
14804       return SDValue();
14805
14806     // We only care about regular loads. The PPC-specific load intrinsics
14807     // will not lead to a merge opportunity.
14808     if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
14809       InputsAreConsecutiveLoads = false;
14810     if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
14811       InputsAreReverseConsecutive = false;
14812
14813     // Exit early if the loads are neither consecutive nor reverse consecutive.
14814     if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
14815       return SDValue();
14816     InputLoads.push_back(LD2);
14817   }
14818
14819   assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
14820          "The loads cannot be both consecutive and reverse consecutive.");
14821
14822   SDValue WideLoad;
14823   SDValue ReturnSDVal;
14824   if (InputsAreConsecutiveLoads) {
14825     assert(FirstLoad && "Input needs to be a LoadSDNode.");
14826     WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
14827                            FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
14828                            FirstLoad->getAlign());
14829     ReturnSDVal = WideLoad;
14830   } else if (InputsAreReverseConsecutive) {
14831     LoadSDNode *LastLoad = InputLoads.back();
14832     assert(LastLoad && "Input needs to be a LoadSDNode.");
14833     WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
14834                            LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
14835                            LastLoad->getAlign());
14836     SmallVector<int, 16> Ops;
14837     for (int i = N->getNumOperands() - 1; i >= 0; i--)
14838       Ops.push_back(i);
14839
14840     ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
14841                                        DAG.getUNDEF(N->getValueType(0)), Ops);
14842   } else
14843     return SDValue();
14844
14845   for (auto *LD : InputLoads)
14846     DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
14847   return ReturnSDVal;
14848 }
14849
14850 // This function adds the required vector_shuffle needed to get
14851 // the elements of the vector extract in the correct position
14852 // as specified by the CorrectElems encoding.
14853 static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
14854                                       SDValue Input, uint64_t Elems,
14855                                       uint64_t CorrectElems) {
14856   SDLoc dl(N);
14857
14858   unsigned NumElems = Input.getValueType().getVectorNumElements();
14859   SmallVector<int, 16> ShuffleMask(NumElems, -1);
14860
14861   // Knowing the element indices being extracted from the original
14862   // vector and the order in which they're being inserted, just put
14863   // them at element indices required for the instruction.
14864   for (unsigned i = 0; i < N->getNumOperands(); i++) {
14865     if (DAG.getDataLayout().isLittleEndian())
14866       ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
14867     else
14868       ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
14869     CorrectElems = CorrectElems >> 8;
14870     Elems = Elems >> 8;
14871   }
14872
14873   SDValue Shuffle =
14874       DAG.getVectorShuffle(Input.getValueType(), dl, Input,
14875                            DAG.getUNDEF(Input.getValueType()), ShuffleMask);
14876
14877   EVT VT = N->getValueType(0);
14878   SDValue Conv = DAG.getBitcast(VT, Shuffle);
14879
14880   EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
14881                                Input.getValueType().getVectorElementType(),
14882                                VT.getVectorNumElements());
14883   return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
14884                      DAG.getValueType(ExtVT));
14885 }
14886
14887 // Look for build vector patterns where input operands come from sign
14888 // extended vector_extract elements of specific indices. If the correct indices
14889 // aren't used, add a vector shuffle to fix up the indices and create
14890 // SIGN_EXTEND_INREG node which selects the vector sign extend instructions
14891 // during instruction selection.
14892 static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
14893   // This array encodes the indices that the vector sign extend instructions
14894   // extract from when extending from one type to another for both BE and LE.
14895   // The right nibble of each byte corresponds to the LE incides.
14896   // and the left nibble of each byte corresponds to the BE incides.
14897   // For example: 0x3074B8FC  byte->word
14898   // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
14899   // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
14900   // For example: 0x000070F8  byte->double word
14901   // For LE: the allowed indices are: 0x0,0x8
14902   // For BE: the allowed indices are: 0x7,0xF
14903   uint64_t TargetElems[] = {
14904       0x3074B8FC, // b->w
14905       0x000070F8, // b->d
14906       0x10325476, // h->w
14907       0x00003074, // h->d
14908       0x00001032, // w->d
14909   };
14910
14911   uint64_t Elems = 0;
14912   int Index;
14913   SDValue Input;
14914
14915   auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
14916     if (!Op)
14917       return false;
14918     if (Op.getOpcode() != ISD::SIGN_EXTEND &&
14919         Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
14920       return false;
14921
14922     // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
14923     // of the right width.
14924     SDValue Extract = Op.getOperand(0);
14925     if (Extract.getOpcode() == ISD::ANY_EXTEND)
14926       Extract = Extract.getOperand(0);
14927     if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14928       return false;
14929
14930     ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
14931     if (!ExtOp)
14932       return false;
14933
14934     Index = ExtOp->getZExtValue();
14935     if (Input && Input != Extract.getOperand(0))
14936       return false;
14937
14938     if (!Input)
14939       Input = Extract.getOperand(0);
14940
14941     Elems = Elems << 8;
14942     Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
14943     Elems |= Index;
14944
14945     return true;
14946   };
14947
14948   // If the build vector operands aren't sign extended vector extracts,
14949   // of the same input vector, then return.
14950   for (unsigned i = 0; i < N->getNumOperands(); i++) {
14951     if (!isSExtOfVecExtract(N->getOperand(i))) {
14952       return SDValue();
14953     }
14954   }
14955
14956   // If the vector extract indices are not correct, add the appropriate
14957   // vector_shuffle.
14958   int TgtElemArrayIdx;
14959   int InputSize = Input.getValueType().getScalarSizeInBits();
14960   int OutputSize = N->getValueType(0).getScalarSizeInBits();
14961   if (InputSize + OutputSize == 40)
14962     TgtElemArrayIdx = 0;
14963   else if (InputSize + OutputSize == 72)
14964     TgtElemArrayIdx = 1;
14965   else if (InputSize + OutputSize == 48)
14966     TgtElemArrayIdx = 2;
14967   else if (InputSize + OutputSize == 80)
14968     TgtElemArrayIdx = 3;
14969   else if (InputSize + OutputSize == 96)
14970     TgtElemArrayIdx = 4;
14971   else
14972     return SDValue();
14973
14974   uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
14975   CorrectElems = DAG.getDataLayout().isLittleEndian()
14976                      ? CorrectElems & 0x0F0F0F0F0F0F0F0F
14977                      : CorrectElems & 0xF0F0F0F0F0F0F0F0;
14978   if (Elems != CorrectElems) {
14979     return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
14980   }
14981
14982   // Regular lowering will catch cases where a shuffle is not needed.
14983   return SDValue();
14984 }
14985
14986 // Look for the pattern of a load from a narrow width to i128, feeding
14987 // into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
14988 // (LXVRZX). This node represents a zero extending load that will be matched
14989 // to the Load VSX Vector Rightmost instructions.
14990 static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
14991   SDLoc DL(N);
14992
14993   // This combine is only eligible for a BUILD_VECTOR of v1i128.
14994   if (N->getValueType(0) != MVT::v1i128)
14995     return SDValue();
14996
14997   SDValue Operand = N->getOperand(0);
14998   // Proceed with the transformation if the operand to the BUILD_VECTOR
14999   // is a load instruction.
15000   if (Operand.getOpcode() != ISD::LOAD)
15001     return SDValue();
15002
15003   auto *LD = cast<LoadSDNode>(Operand);
15004   EVT MemoryType = LD->getMemoryVT();
15005
15006   // This transformation is only valid if the we are loading either a byte,
15007   // halfword, word, or doubleword.
15008   bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
15009                      MemoryType == MVT::i32 || MemoryType == MVT::i64;
15010
15011   // Ensure that the load from the narrow width is being zero extended to i128.
15012   if (!ValidLDType ||
15013       (LD->getExtensionType() != ISD::ZEXTLOAD &&
15014        LD->getExtensionType() != ISD::EXTLOAD))
15015     return SDValue();
15016
15017   SDValue LoadOps[] = {
15018       LD->getChain(), LD->getBasePtr(),
15019       DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
15020
15021   return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
15022                                  DAG.getVTList(MVT::v1i128, MVT::Other),
15023                                  LoadOps, MemoryType, LD->getMemOperand());
15024 }
15025
15026 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
15027                                                  DAGCombinerInfo &DCI) const {
15028   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15029          "Should be called with a BUILD_VECTOR node");
15030
15031   SelectionDAG &DAG = DCI.DAG;
15032   SDLoc dl(N);
15033
15034   if (!Subtarget.hasVSX())
15035     return SDValue();
15036
15037   // The target independent DAG combiner will leave a build_vector of
15038   // float-to-int conversions intact. We can generate MUCH better code for
15039   // a float-to-int conversion of a vector of floats.
15040   SDValue FirstInput = N->getOperand(0);
15041   if (FirstInput.getOpcode() == PPCISD::MFVSR) {
15042     SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
15043     if (Reduced)
15044       return Reduced;
15045   }
15046
15047   // If we're building a vector out of consecutive loads, just load that
15048   // vector type.
15049   SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
15050   if (Reduced)
15051     return Reduced;
15052
15053   // If we're building a vector out of extended elements from another vector
15054   // we have P9 vector integer extend instructions. The code assumes legal
15055   // input types (i.e. it can't handle things like v4i16) so do not run before
15056   // legalization.
15057   if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
15058     Reduced = combineBVOfVecSExt(N, DAG);
15059     if (Reduced)
15060       return Reduced;
15061   }
15062
15063   // On Power10, the Load VSX Vector Rightmost instructions can be utilized
15064   // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
15065   // is a load from <valid narrow width> to i128.
15066   if (Subtarget.isISA3_1()) {
15067     SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
15068     if (BVOfZLoad)
15069       return BVOfZLoad;
15070   }
15071
15072   if (N->getValueType(0) != MVT::v2f64)
15073     return SDValue();
15074
15075   // Looking for:
15076   // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
15077   if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
15078       FirstInput.getOpcode() != ISD::UINT_TO_FP)
15079     return SDValue();
15080   if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
15081       N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
15082     return SDValue();
15083   if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
15084     return SDValue();
15085
15086   SDValue Ext1 = FirstInput.getOperand(0);
15087   SDValue Ext2 = N->getOperand(1).getOperand(0);
15088   if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15089      Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15090     return SDValue();
15091
15092   ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
15093   ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
15094   if (!Ext1Op || !Ext2Op)
15095     return SDValue();
15096   if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
15097       Ext1.getOperand(0) != Ext2.getOperand(0))
15098     return SDValue();
15099
15100   int FirstElem = Ext1Op->getZExtValue();
15101   int SecondElem = Ext2Op->getZExtValue();
15102   int SubvecIdx;
15103   if (FirstElem == 0 && SecondElem == 1)
15104     SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
15105   else if (FirstElem == 2 && SecondElem == 3)
15106     SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
15107   else
15108     return SDValue();
15109
15110   SDValue SrcVec = Ext1.getOperand(0);
15111   auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
15112     PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
15113   return DAG.getNode(NodeType, dl, MVT::v2f64,
15114                      SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
15115 }
15116
15117 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
15118                                               DAGCombinerInfo &DCI) const {
15119   assert((N->getOpcode() == ISD::SINT_TO_FP ||
15120           N->getOpcode() == ISD::UINT_TO_FP) &&
15121          "Need an int -> FP conversion node here");
15122
15123   if (useSoftFloat() || !Subtarget.has64BitSupport())
15124     return SDValue();
15125
15126   SelectionDAG &DAG = DCI.DAG;
15127   SDLoc dl(N);
15128   SDValue Op(N, 0);
15129
15130   // Don't handle ppc_fp128 here or conversions that are out-of-range capable
15131   // from the hardware.
15132   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
15133     return SDValue();
15134   if (!Op.getOperand(0).getValueType().isSimple())
15135     return SDValue();
15136   if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
15137       Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
15138     return SDValue();
15139
15140   SDValue FirstOperand(Op.getOperand(0));
15141   bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
15142     (FirstOperand.getValueType() == MVT::i8 ||
15143      FirstOperand.getValueType() == MVT::i16);
15144   if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
15145     bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
15146     bool DstDouble = Op.getValueType() == MVT::f64;
15147     unsigned ConvOp = Signed ?
15148       (DstDouble ? PPCISD::FCFID  : PPCISD::FCFIDS) :
15149       (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
15150     SDValue WidthConst =
15151       DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
15152                             dl, false);
15153     LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
15154     SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
15155     SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
15156                                          DAG.getVTList(MVT::f64, MVT::Other),
15157                                          Ops, MVT::i8, LDN->getMemOperand());
15158     DAG.makeEquivalentMemoryOrdering(LDN, Ld);
15159
15160     // For signed conversion, we need to sign-extend the value in the VSR
15161     if (Signed) {
15162       SDValue ExtOps[] = { Ld, WidthConst };
15163       SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
15164       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
15165     } else
15166       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
15167   }
15168
15169
15170   // For i32 intermediate values, unfortunately, the conversion functions
15171   // leave the upper 32 bits of the value are undefined. Within the set of
15172   // scalar instructions, we have no method for zero- or sign-extending the
15173   // value. Thus, we cannot handle i32 intermediate values here.
15174   if (Op.getOperand(0).getValueType() == MVT::i32)
15175     return SDValue();
15176
15177   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
15178          "UINT_TO_FP is supported only with FPCVT");
15179
15180   // If we have FCFIDS, then use it when converting to single-precision.
15181   // Otherwise, convert to double-precision and then round.
15182   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15183                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
15184                                                             : PPCISD::FCFIDS)
15185                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
15186                                                             : PPCISD::FCFID);
15187   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15188                   ? MVT::f32
15189                   : MVT::f64;
15190
15191   // If we're converting from a float, to an int, and back to a float again,
15192   // then we don't need the store/load pair at all.
15193   if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
15194        Subtarget.hasFPCVT()) ||
15195       (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
15196     SDValue Src = Op.getOperand(0).getOperand(0);
15197     if (Src.getValueType() == MVT::f32) {
15198       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
15199       DCI.AddToWorklist(Src.getNode());
15200     } else if (Src.getValueType() != MVT::f64) {
15201       // Make sure that we don't pick up a ppc_fp128 source value.
15202       return SDValue();
15203     }
15204
15205     unsigned FCTOp =
15206       Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
15207                                                         PPCISD::FCTIDUZ;
15208
15209     SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
15210     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
15211
15212     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
15213       FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
15214                        DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
15215       DCI.AddToWorklist(FP.getNode());
15216     }
15217
15218     return FP;
15219   }
15220
15221   return SDValue();
15222 }
15223
15224 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
15225 // builtins) into loads with swaps.
15226 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
15227                                               DAGCombinerInfo &DCI) const {
15228   // Delay VSX load for LE combine until after LegalizeOps to prioritize other
15229   // load combines.
15230   if (DCI.isBeforeLegalizeOps())
15231     return SDValue();
15232
15233   SelectionDAG &DAG = DCI.DAG;
15234   SDLoc dl(N);
15235   SDValue Chain;
15236   SDValue Base;
15237   MachineMemOperand *MMO;
15238
15239   switch (N->getOpcode()) {
15240   default:
15241     llvm_unreachable("Unexpected opcode for little endian VSX load");
15242   case ISD::LOAD: {
15243     LoadSDNode *LD = cast<LoadSDNode>(N);
15244     Chain = LD->getChain();
15245     Base = LD->getBasePtr();
15246     MMO = LD->getMemOperand();
15247     // If the MMO suggests this isn't a load of a full vector, leave
15248     // things alone.  For a built-in, we have to make the change for
15249     // correctness, so if there is a size problem that will be a bug.
15250     if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15251       return SDValue();
15252     break;
15253   }
15254   case ISD::INTRINSIC_W_CHAIN: {
15255     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15256     Chain = Intrin->getChain();
15257     // Similarly to the store case below, Intrin->getBasePtr() doesn't get
15258     // us what we want. Get operand 2 instead.
15259     Base = Intrin->getOperand(2);
15260     MMO = Intrin->getMemOperand();
15261     break;
15262   }
15263   }
15264
15265   MVT VecTy = N->getValueType(0).getSimpleVT();
15266
15267   SDValue LoadOps[] = { Chain, Base };
15268   SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
15269                                          DAG.getVTList(MVT::v2f64, MVT::Other),
15270                                          LoadOps, MVT::v2f64, MMO);
15271
15272   DCI.AddToWorklist(Load.getNode());
15273   Chain = Load.getValue(1);
15274   SDValue Swap = DAG.getNode(
15275       PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
15276   DCI.AddToWorklist(Swap.getNode());
15277
15278   // Add a bitcast if the resulting load type doesn't match v2f64.
15279   if (VecTy != MVT::v2f64) {
15280     SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
15281     DCI.AddToWorklist(N.getNode());
15282     // Package {bitcast value, swap's chain} to match Load's shape.
15283     return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
15284                        N, Swap.getValue(1));
15285   }
15286
15287   return Swap;
15288 }
15289
15290 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
15291 // builtins) into stores with swaps.
15292 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
15293                                                DAGCombinerInfo &DCI) const {
15294   // Delay VSX store for LE combine until after LegalizeOps to prioritize other
15295   // store combines.
15296   if (DCI.isBeforeLegalizeOps())
15297     return SDValue();
15298
15299   SelectionDAG &DAG = DCI.DAG;
15300   SDLoc dl(N);
15301   SDValue Chain;
15302   SDValue Base;
15303   unsigned SrcOpnd;
15304   MachineMemOperand *MMO;
15305
15306   switch (N->getOpcode()) {
15307   default:
15308     llvm_unreachable("Unexpected opcode for little endian VSX store");
15309   case ISD::STORE: {
15310     StoreSDNode *ST = cast<StoreSDNode>(N);
15311     Chain = ST->getChain();
15312     Base = ST->getBasePtr();
15313     MMO = ST->getMemOperand();
15314     SrcOpnd = 1;
15315     // If the MMO suggests this isn't a store of a full vector, leave
15316     // things alone.  For a built-in, we have to make the change for
15317     // correctness, so if there is a size problem that will be a bug.
15318     if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15319       return SDValue();
15320     break;
15321   }
15322   case ISD::INTRINSIC_VOID: {
15323     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15324     Chain = Intrin->getChain();
15325     // Intrin->getBasePtr() oddly does not get what we want.
15326     Base = Intrin->getOperand(3);
15327     MMO = Intrin->getMemOperand();
15328     SrcOpnd = 2;
15329     break;
15330   }
15331   }
15332
15333   SDValue Src = N->getOperand(SrcOpnd);
15334   MVT VecTy = Src.getValueType().getSimpleVT();
15335
15336   // All stores are done as v2f64 and possible bit cast.
15337   if (VecTy != MVT::v2f64) {
15338     Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
15339     DCI.AddToWorklist(Src.getNode());
15340   }
15341
15342   SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
15343                              DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
15344   DCI.AddToWorklist(Swap.getNode());
15345   Chain = Swap.getValue(1);
15346   SDValue StoreOps[] = { Chain, Swap, Base };
15347   SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
15348                                           DAG.getVTList(MVT::Other),
15349                                           StoreOps, VecTy, MMO);
15350   DCI.AddToWorklist(Store.getNode());
15351   return Store;
15352 }
15353
15354 // Handle DAG combine for STORE (FP_TO_INT F).
15355 SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
15356                                                DAGCombinerInfo &DCI) const {
15357   SelectionDAG &DAG = DCI.DAG;
15358   SDLoc dl(N);
15359   unsigned Opcode = N->getOperand(1).getOpcode();
15360   (void)Opcode;
15361   bool Strict = N->getOperand(1)->isStrictFPOpcode();
15362
15363   assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15364           Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
15365          && "Not a FP_TO_INT Instruction!");
15366
15367   SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
15368   EVT Op1VT = N->getOperand(1).getValueType();
15369   EVT ResVT = Val.getValueType();
15370
15371   if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
15372     return SDValue();
15373
15374   // Only perform combine for conversion to i64/i32 or power9 i16/i8.
15375   bool ValidTypeForStoreFltAsInt =
15376         (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
15377          (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
15378
15379   // TODO: Lower conversion from f128 on all VSX targets
15380   if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
15381     return SDValue();
15382
15383   if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
15384       cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
15385     return SDValue();
15386
15387   Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
15388
15389   // Set number of bytes being converted.
15390   unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
15391   SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
15392                    DAG.getIntPtrConstant(ByteSize, dl, false),
15393                    DAG.getValueType(Op1VT)};
15394
15395   Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
15396           DAG.getVTList(MVT::Other), Ops,
15397           cast<StoreSDNode>(N)->getMemoryVT(),
15398           cast<StoreSDNode>(N)->getMemOperand());
15399
15400   return Val;
15401 }
15402
15403 static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
15404   // Check that the source of the element keeps flipping
15405   // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
15406   bool PrevElemFromFirstVec = Mask[0] < NumElts;
15407   for (int i = 1, e = Mask.size(); i < e; i++) {
15408     if (PrevElemFromFirstVec && Mask[i] < NumElts)
15409       return false;
15410     if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
15411       return false;
15412     PrevElemFromFirstVec = !PrevElemFromFirstVec;
15413   }
15414   return true;
15415 }
15416
15417 static bool isSplatBV(SDValue Op) {
15418   if (Op.getOpcode() != ISD::BUILD_VECTOR)
15419     return false;
15420   SDValue FirstOp;
15421
15422   // Find first non-undef input.
15423   for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
15424     FirstOp = Op.getOperand(i);
15425     if (!FirstOp.isUndef())
15426       break;
15427   }
15428
15429   // All inputs are undef or the same as the first non-undef input.
15430   for (int i = 1, e = Op.getNumOperands(); i < e; i++)
15431     if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
15432       return false;
15433   return true;
15434 }
15435
15436 static SDValue isScalarToVec(SDValue Op) {
15437   if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15438     return Op;
15439   if (Op.getOpcode() != ISD::BITCAST)
15440     return SDValue();
15441   Op = Op.getOperand(0);
15442   if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15443     return Op;
15444   return SDValue();
15445 }
15446
15447 // Fix up the shuffle mask to account for the fact that the result of
15448 // scalar_to_vector is not in lane zero. This just takes all values in
15449 // the ranges specified by the min/max indices and adds the number of
15450 // elements required to ensure each element comes from the respective
15451 // position in the valid lane.
15452 // On little endian, that's just the corresponding element in the other
15453 // half of the vector. On big endian, it is in the same half but right
15454 // justified rather than left justified in that half.
15455 static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
15456                                             int LHSMaxIdx, int RHSMinIdx,
15457                                             int RHSMaxIdx, int HalfVec,
15458                                             unsigned ValidLaneWidth,
15459                                             const PPCSubtarget &Subtarget) {
15460   for (int i = 0, e = ShuffV.size(); i < e; i++) {
15461     int Idx = ShuffV[i];
15462     if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
15463       ShuffV[i] +=
15464           Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15465   }
15466 }
15467
15468 // Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
15469 // the original is:
15470 // (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
15471 // In such a case, just change the shuffle mask to extract the element
15472 // from the permuted index.
15473 static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
15474                                const PPCSubtarget &Subtarget) {
15475   SDLoc dl(OrigSToV);
15476   EVT VT = OrigSToV.getValueType();
15477   assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
15478          "Expecting a SCALAR_TO_VECTOR here");
15479   SDValue Input = OrigSToV.getOperand(0);
15480
15481   if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15482     ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
15483     SDValue OrigVector = Input.getOperand(0);
15484
15485     // Can't handle non-const element indices or different vector types
15486     // for the input to the extract and the output of the scalar_to_vector.
15487     if (Idx && VT == OrigVector.getValueType()) {
15488       unsigned NumElts = VT.getVectorNumElements();
15489       assert(
15490           NumElts > 1 &&
15491           "Cannot produce a permuted scalar_to_vector for one element vector");
15492       SmallVector<int, 16> NewMask(NumElts, -1);
15493       unsigned ResultInElt = NumElts / 2;
15494       ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
15495       NewMask[ResultInElt] = Idx->getZExtValue();
15496       return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
15497     }
15498   }
15499   return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
15500                      OrigSToV.getOperand(0));
15501 }
15502
15503 // On little endian subtargets, combine shuffles such as:
15504 // vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15505 // into:
15506 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
15507 // because the latter can be matched to a single instruction merge.
15508 // Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
15509 // to put the value into element zero. Adjust the shuffle mask so that the
15510 // vector can remain in permuted form (to prevent a swap prior to a shuffle).
15511 // On big endian targets, this is still useful for SCALAR_TO_VECTOR
15512 // nodes with elements smaller than doubleword because all the ways
15513 // of getting scalar data into a vector register put the value in the
15514 // rightmost element of the left half of the vector.
15515 SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15516                                                 SelectionDAG &DAG) const {
15517   SDValue LHS = SVN->getOperand(0);
15518   SDValue RHS = SVN->getOperand(1);
15519   auto Mask = SVN->getMask();
15520   int NumElts = LHS.getValueType().getVectorNumElements();
15521   SDValue Res(SVN, 0);
15522   SDLoc dl(SVN);
15523   bool IsLittleEndian = Subtarget.isLittleEndian();
15524
15525   // On big endian targets this is only useful for subtargets with direct moves.
15526   // On little endian targets it would be useful for all subtargets with VSX.
15527   // However adding special handling for LE subtargets without direct moves
15528   // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
15529   // which includes direct moves.
15530   if (!Subtarget.hasDirectMove())
15531     return Res;
15532
15533   // If this is not a shuffle of a shuffle and the first element comes from
15534   // the second vector, canonicalize to the commuted form. This will make it
15535   // more likely to match one of the single instruction patterns.
15536   if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
15537       RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
15538     std::swap(LHS, RHS);
15539     Res = DAG.getCommutedVectorShuffle(*SVN);
15540     Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15541   }
15542
15543   // Adjust the shuffle mask if either input vector comes from a
15544   // SCALAR_TO_VECTOR and keep the respective input vector in permuted
15545   // form (to prevent the need for a swap).
15546   SmallVector<int, 16> ShuffV(Mask);
15547   SDValue SToVLHS = isScalarToVec(LHS);
15548   SDValue SToVRHS = isScalarToVec(RHS);
15549   if (SToVLHS || SToVRHS) {
15550     // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15551     // same type and have differing element sizes, then do not perform
15552     // the following transformation. The current transformation for
15553     // SCALAR_TO_VECTOR assumes that both input vectors have the same
15554     // element size. This will be updated in the future to account for
15555     // differing sizes of the LHS and RHS.
15556     if (SToVLHS && SToVRHS &&
15557         (SToVLHS.getValueType().getScalarSizeInBits() !=
15558          SToVRHS.getValueType().getScalarSizeInBits()))
15559       return Res;
15560
15561     int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15562                             : SToVRHS.getValueType().getVectorNumElements();
15563     int NumEltsOut = ShuffV.size();
15564     // The width of the "valid lane" (i.e. the lane that contains the value that
15565     // is vectorized) needs to be expressed in terms of the number of elements
15566     // of the shuffle. It is thereby the ratio of the values before and after
15567     // any bitcast.
15568     unsigned ValidLaneWidth =
15569         SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15570                       LHS.getValueType().getScalarSizeInBits()
15571                 : SToVRHS.getValueType().getScalarSizeInBits() /
15572                       RHS.getValueType().getScalarSizeInBits();
15573
15574     // Initially assume that neither input is permuted. These will be adjusted
15575     // accordingly if either input is.
15576     int LHSMaxIdx = -1;
15577     int RHSMinIdx = -1;
15578     int RHSMaxIdx = -1;
15579     int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15580
15581     // Get the permuted scalar to vector nodes for the source(s) that come from
15582     // ISD::SCALAR_TO_VECTOR.
15583     // On big endian systems, this only makes sense for element sizes smaller
15584     // than 64 bits since for 64-bit elements, all instructions already put
15585     // the value into element zero. Since scalar size of LHS and RHS may differ
15586     // after isScalarToVec, this should be checked using their own sizes.
15587     if (SToVLHS) {
15588       if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15589         return Res;
15590       // Set up the values for the shuffle vector fixup.
15591       LHSMaxIdx = NumEltsOut / NumEltsIn;
15592       SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15593       if (SToVLHS.getValueType() != LHS.getValueType())
15594         SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15595       LHS = SToVLHS;
15596     }
15597     if (SToVRHS) {
15598       if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15599         return Res;
15600       RHSMinIdx = NumEltsOut;
15601       RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15602       SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15603       if (SToVRHS.getValueType() != RHS.getValueType())
15604         SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15605       RHS = SToVRHS;
15606     }
15607
15608     // Fix up the shuffle mask to reflect where the desired element actually is.
15609     // The minimum and maximum indices that correspond to element zero for both
15610     // the LHS and RHS are computed and will control which shuffle mask entries
15611     // are to be changed. For example, if the RHS is permuted, any shuffle mask
15612     // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15613     fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15614                                     HalfVec, ValidLaneWidth, Subtarget);
15615     Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15616
15617     // We may have simplified away the shuffle. We won't be able to do anything
15618     // further with it here.
15619     if (!isa<ShuffleVectorSDNode>(Res))
15620       return Res;
15621     Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15622   }
15623
15624   SDValue TheSplat = IsLittleEndian ? RHS : LHS;
15625   // The common case after we commuted the shuffle is that the RHS is a splat
15626   // and we have elements coming in from the splat at indices that are not
15627   // conducive to using a merge.
15628   // Example:
15629   // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
15630   if (!isSplatBV(TheSplat))
15631     return Res;
15632
15633   // We are looking for a mask such that all even elements are from
15634   // one vector and all odd elements from the other.
15635   if (!isAlternatingShuffMask(Mask, NumElts))
15636     return Res;
15637
15638   // Adjust the mask so we are pulling in the same index from the splat
15639   // as the index from the interesting vector in consecutive elements.
15640   if (IsLittleEndian) {
15641     // Example (even elements from first vector):
15642     // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
15643     if (Mask[0] < NumElts)
15644       for (int i = 1, e = Mask.size(); i < e; i += 2) {
15645         if (ShuffV[i] < 0)
15646           continue;
15647         // If element from non-splat is undef, pick first element from splat.
15648         ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
15649       }
15650     // Example (odd elements from first vector):
15651     // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
15652     else
15653       for (int i = 0, e = Mask.size(); i < e; i += 2) {
15654         if (ShuffV[i] < 0)
15655           continue;
15656         // If element from non-splat is undef, pick first element from splat.
15657         ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
15658       }
15659   } else {
15660     // Example (even elements from first vector):
15661     // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
15662     if (Mask[0] < NumElts)
15663       for (int i = 0, e = Mask.size(); i < e; i += 2) {
15664         if (ShuffV[i] < 0)
15665           continue;
15666         // If element from non-splat is undef, pick first element from splat.
15667         ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
15668       }
15669     // Example (odd elements from first vector):
15670     // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
15671     else
15672       for (int i = 1, e = Mask.size(); i < e; i += 2) {
15673         if (ShuffV[i] < 0)
15674           continue;
15675         // If element from non-splat is undef, pick first element from splat.
15676         ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
15677       }
15678   }
15679
15680   // If the RHS has undefs, we need to remove them since we may have created
15681   // a shuffle that adds those instead of the splat value.
15682   SDValue SplatVal =
15683       cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
15684   TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
15685
15686   if (IsLittleEndian)
15687     RHS = TheSplat;
15688   else
15689     LHS = TheSplat;
15690   return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15691 }
15692
15693 SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
15694                                                 LSBaseSDNode *LSBase,
15695                                                 DAGCombinerInfo &DCI) const {
15696   assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
15697         "Not a reverse memop pattern!");
15698
15699   auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
15700     auto Mask = SVN->getMask();
15701     int i = 0;
15702     auto I = Mask.rbegin();
15703     auto E = Mask.rend();
15704
15705     for (; I != E; ++I) {
15706       if (*I != i)
15707         return false;
15708       i++;
15709     }
15710     return true;
15711   };
15712
15713   SelectionDAG &DAG = DCI.DAG;
15714   EVT VT = SVN->getValueType(0);
15715
15716   if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
15717     return SDValue();
15718
15719   // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
15720   // See comment in PPCVSXSwapRemoval.cpp.
15721   // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
15722   if (!Subtarget.hasP9Vector())
15723     return SDValue();
15724
15725   if(!IsElementReverse(SVN))
15726     return SDValue();
15727
15728   if (LSBase->getOpcode() == ISD::LOAD) {
15729     // If the load return value 0 has more than one user except the
15730     // shufflevector instruction, it is not profitable to replace the
15731     // shufflevector with a reverse load.
15732     for (SDNode::use_iterator UI = LSBase->use_begin(), UE = LSBase->use_end();
15733          UI != UE; ++UI)
15734       if (UI.getUse().getResNo() == 0 && UI->getOpcode() != ISD::VECTOR_SHUFFLE)
15735         return SDValue();
15736
15737     SDLoc dl(LSBase);
15738     SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
15739     return DAG.getMemIntrinsicNode(
15740         PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
15741         LSBase->getMemoryVT(), LSBase->getMemOperand());
15742   }
15743
15744   if (LSBase->getOpcode() == ISD::STORE) {
15745     // If there are other uses of the shuffle, the swap cannot be avoided.
15746     // Forcing the use of an X-Form (since swapped stores only have
15747     // X-Forms) without removing the swap is unprofitable.
15748     if (!SVN->hasOneUse())
15749       return SDValue();
15750
15751     SDLoc dl(LSBase);
15752     SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
15753                           LSBase->getBasePtr()};
15754     return DAG.getMemIntrinsicNode(
15755         PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
15756         LSBase->getMemoryVT(), LSBase->getMemOperand());
15757   }
15758
15759   llvm_unreachable("Expected a load or store node here");
15760 }
15761
15762 static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
15763   unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
15764   if (IntrinsicID == Intrinsic::ppc_stdcx)
15765     StoreWidth = 8;
15766   else if (IntrinsicID == Intrinsic::ppc_stwcx)
15767     StoreWidth = 4;
15768   else if (IntrinsicID == Intrinsic::ppc_sthcx)
15769     StoreWidth = 2;
15770   else if (IntrinsicID == Intrinsic::ppc_stbcx)
15771     StoreWidth = 1;
15772   else
15773     return false;
15774   return true;
15775 }
15776
15777 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
15778                                              DAGCombinerInfo &DCI) const {
15779   SelectionDAG &DAG = DCI.DAG;
15780   SDLoc dl(N);
15781   switch (N->getOpcode()) {
15782   default: break;
15783   case ISD::ADD:
15784     return combineADD(N, DCI);
15785   case ISD::AND: {
15786     // We don't want (and (zext (shift...)), C) if C fits in the width of the
15787     // original input as that will prevent us from selecting optimal rotates.
15788     // This only matters if the input to the extend is i32 widened to i64.
15789     SDValue Op1 = N->getOperand(0);
15790     SDValue Op2 = N->getOperand(1);
15791     if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
15792          Op1.getOpcode() != ISD::ANY_EXTEND) ||
15793         !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
15794         Op1.getOperand(0).getValueType() != MVT::i32)
15795       break;
15796     SDValue NarrowOp = Op1.getOperand(0);
15797     if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
15798         NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
15799       break;
15800
15801     uint64_t Imm = Op2->getAsZExtVal();
15802     // Make sure that the constant is narrow enough to fit in the narrow type.
15803     if (!isUInt<32>(Imm))
15804       break;
15805     SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
15806     SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
15807     return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
15808   }
15809   case ISD::SHL:
15810     return combineSHL(N, DCI);
15811   case ISD::SRA:
15812     return combineSRA(N, DCI);
15813   case ISD::SRL:
15814     return combineSRL(N, DCI);
15815   case ISD::MUL:
15816     return combineMUL(N, DCI);
15817   case ISD::FMA:
15818   case PPCISD::FNMSUB:
15819     return combineFMALike(N, DCI);
15820   case PPCISD::SHL:
15821     if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
15822         return N->getOperand(0);
15823     break;
15824   case PPCISD::SRL:
15825     if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
15826         return N->getOperand(0);
15827     break;
15828   case PPCISD::SRA:
15829     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
15830       if (C->isZero() ||  //  0 >>s V -> 0.
15831           C->isAllOnes()) // -1 >>s V -> -1.
15832         return N->getOperand(0);
15833     }
15834     break;
15835   case ISD::SIGN_EXTEND:
15836   case ISD::ZERO_EXTEND:
15837   case ISD::ANY_EXTEND:
15838     return DAGCombineExtBoolTrunc(N, DCI);
15839   case ISD::TRUNCATE:
15840     return combineTRUNCATE(N, DCI);
15841   case ISD::SETCC:
15842     if (SDValue CSCC = combineSetCC(N, DCI))
15843       return CSCC;
15844     [[fallthrough]];
15845   case ISD::SELECT_CC:
15846     return DAGCombineTruncBoolExt(N, DCI);
15847   case ISD::SINT_TO_FP:
15848   case ISD::UINT_TO_FP:
15849     return combineFPToIntToFP(N, DCI);
15850   case ISD::VECTOR_SHUFFLE:
15851     if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
15852       LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
15853       return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
15854     }
15855     return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
15856   case ISD::STORE: {
15857
15858     EVT Op1VT = N->getOperand(1).getValueType();
15859     unsigned Opcode = N->getOperand(1).getOpcode();
15860
15861     if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15862         Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
15863       SDValue Val = combineStoreFPToInt(N, DCI);
15864       if (Val)
15865         return Val;
15866     }
15867
15868     if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
15869       ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
15870       SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
15871       if (Val)
15872         return Val;
15873     }
15874
15875     // Turn STORE (BSWAP) -> sthbrx/stwbrx.
15876     if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
15877         N->getOperand(1).getNode()->hasOneUse() &&
15878         (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
15879          (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
15880
15881       // STBRX can only handle simple types and it makes no sense to store less
15882       // two bytes in byte-reversed order.
15883       EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
15884       if (mVT.isExtended() || mVT.getSizeInBits() < 16)
15885         break;
15886
15887       SDValue BSwapOp = N->getOperand(1).getOperand(0);
15888       // Do an any-extend to 32-bits if this is a half-word input.
15889       if (BSwapOp.getValueType() == MVT::i16)
15890         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
15891
15892       // If the type of BSWAP operand is wider than stored memory width
15893       // it need to be shifted to the right side before STBRX.
15894       if (Op1VT.bitsGT(mVT)) {
15895         int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
15896         BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
15897                               DAG.getConstant(Shift, dl, MVT::i32));
15898         // Need to truncate if this is a bswap of i64 stored as i32/i16.
15899         if (Op1VT == MVT::i64)
15900           BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
15901       }
15902
15903       SDValue Ops[] = {
15904         N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
15905       };
15906       return
15907         DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
15908                                 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
15909                                 cast<StoreSDNode>(N)->getMemOperand());
15910     }
15911
15912     // STORE Constant:i32<0>  ->  STORE<trunc to i32> Constant:i64<0>
15913     // So it can increase the chance of CSE constant construction.
15914     if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
15915         isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
15916       // Need to sign-extended to 64-bits to handle negative values.
15917       EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
15918       uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
15919                                     MemVT.getSizeInBits());
15920       SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
15921
15922       // DAG.getTruncStore() can't be used here because it doesn't accept
15923       // the general (base + offset) addressing mode.
15924       // So we use UpdateNodeOperands and setTruncatingStore instead.
15925       DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
15926                              N->getOperand(3));
15927       cast<StoreSDNode>(N)->setTruncatingStore(true);
15928       return SDValue(N, 0);
15929     }
15930
15931     // For little endian, VSX stores require generating xxswapd/lxvd2x.
15932     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15933     if (Op1VT.isSimple()) {
15934       MVT StoreVT = Op1VT.getSimpleVT();
15935       if (Subtarget.needsSwapsForVSXMemOps() &&
15936           (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
15937            StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
15938         return expandVSXStoreForLE(N, DCI);
15939     }
15940     break;
15941   }
15942   case ISD::LOAD: {
15943     LoadSDNode *LD = cast<LoadSDNode>(N);
15944     EVT VT = LD->getValueType(0);
15945
15946     // For little endian, VSX loads require generating lxvd2x/xxswapd.
15947     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15948     if (VT.isSimple()) {
15949       MVT LoadVT = VT.getSimpleVT();
15950       if (Subtarget.needsSwapsForVSXMemOps() &&
15951           (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
15952            LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
15953         return expandVSXLoadForLE(N, DCI);
15954     }
15955
15956     // We sometimes end up with a 64-bit integer load, from which we extract
15957     // two single-precision floating-point numbers. This happens with
15958     // std::complex<float>, and other similar structures, because of the way we
15959     // canonicalize structure copies. However, if we lack direct moves,
15960     // then the final bitcasts from the extracted integer values to the
15961     // floating-point numbers turn into store/load pairs. Even with direct moves,
15962     // just loading the two floating-point numbers is likely better.
15963     auto ReplaceTwoFloatLoad = [&]() {
15964       if (VT != MVT::i64)
15965         return false;
15966
15967       if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
15968           LD->isVolatile())
15969         return false;
15970
15971       //  We're looking for a sequence like this:
15972       //  t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
15973       //      t16: i64 = srl t13, Constant:i32<32>
15974       //    t17: i32 = truncate t16
15975       //  t18: f32 = bitcast t17
15976       //    t19: i32 = truncate t13
15977       //  t20: f32 = bitcast t19
15978
15979       if (!LD->hasNUsesOfValue(2, 0))
15980         return false;
15981
15982       auto UI = LD->use_begin();
15983       while (UI.getUse().getResNo() != 0) ++UI;
15984       SDNode *Trunc = *UI++;
15985       while (UI.getUse().getResNo() != 0) ++UI;
15986       SDNode *RightShift = *UI;
15987       if (Trunc->getOpcode() != ISD::TRUNCATE)
15988         std::swap(Trunc, RightShift);
15989
15990       if (Trunc->getOpcode() != ISD::TRUNCATE ||
15991           Trunc->getValueType(0) != MVT::i32 ||
15992           !Trunc->hasOneUse())
15993         return false;
15994       if (RightShift->getOpcode() != ISD::SRL ||
15995           !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
15996           RightShift->getConstantOperandVal(1) != 32 ||
15997           !RightShift->hasOneUse())
15998         return false;
15999
16000       SDNode *Trunc2 = *RightShift->use_begin();
16001       if (Trunc2->getOpcode() != ISD::TRUNCATE ||
16002           Trunc2->getValueType(0) != MVT::i32 ||
16003           !Trunc2->hasOneUse())
16004         return false;
16005
16006       SDNode *Bitcast = *Trunc->use_begin();
16007       SDNode *Bitcast2 = *Trunc2->use_begin();
16008
16009       if (Bitcast->getOpcode() != ISD::BITCAST ||
16010           Bitcast->getValueType(0) != MVT::f32)
16011         return false;
16012       if (Bitcast2->getOpcode() != ISD::BITCAST ||
16013           Bitcast2->getValueType(0) != MVT::f32)
16014         return false;
16015
16016       if (Subtarget.isLittleEndian())
16017         std::swap(Bitcast, Bitcast2);
16018
16019       // Bitcast has the second float (in memory-layout order) and Bitcast2
16020       // has the first one.
16021
16022       SDValue BasePtr = LD->getBasePtr();
16023       if (LD->isIndexed()) {
16024         assert(LD->getAddressingMode() == ISD::PRE_INC &&
16025                "Non-pre-inc AM on PPC?");
16026         BasePtr =
16027           DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16028                       LD->getOffset());
16029       }
16030
16031       auto MMOFlags =
16032           LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
16033       SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
16034                                       LD->getPointerInfo(), LD->getAlign(),
16035                                       MMOFlags, LD->getAAInfo());
16036       SDValue AddPtr =
16037         DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
16038                     BasePtr, DAG.getIntPtrConstant(4, dl));
16039       SDValue FloatLoad2 = DAG.getLoad(
16040           MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
16041           LD->getPointerInfo().getWithOffset(4),
16042           commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
16043
16044       if (LD->isIndexed()) {
16045         // Note that DAGCombine should re-form any pre-increment load(s) from
16046         // what is produced here if that makes sense.
16047         DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
16048       }
16049
16050       DCI.CombineTo(Bitcast2, FloatLoad);
16051       DCI.CombineTo(Bitcast, FloatLoad2);
16052
16053       DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
16054                                     SDValue(FloatLoad2.getNode(), 1));
16055       return true;
16056     };
16057
16058     if (ReplaceTwoFloatLoad())
16059       return SDValue(N, 0);
16060
16061     EVT MemVT = LD->getMemoryVT();
16062     Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
16063     Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
16064     if (LD->isUnindexed() && VT.isVector() &&
16065         ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
16066           // P8 and later hardware should just use LOAD.
16067           !Subtarget.hasP8Vector() &&
16068           (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
16069            VT == MVT::v4f32))) &&
16070         LD->getAlign() < ABIAlignment) {
16071       // This is a type-legal unaligned Altivec load.
16072       SDValue Chain = LD->getChain();
16073       SDValue Ptr = LD->getBasePtr();
16074       bool isLittleEndian = Subtarget.isLittleEndian();
16075
16076       // This implements the loading of unaligned vectors as described in
16077       // the venerable Apple Velocity Engine overview. Specifically:
16078       // https://developer.apple.com/hardwaredrivers/ve/alignment.html
16079       // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
16080       //
16081       // The general idea is to expand a sequence of one or more unaligned
16082       // loads into an alignment-based permutation-control instruction (lvsl
16083       // or lvsr), a series of regular vector loads (which always truncate
16084       // their input address to an aligned address), and a series of
16085       // permutations.  The results of these permutations are the requested
16086       // loaded values.  The trick is that the last "extra" load is not taken
16087       // from the address you might suspect (sizeof(vector) bytes after the
16088       // last requested load), but rather sizeof(vector) - 1 bytes after the
16089       // last requested vector. The point of this is to avoid a page fault if
16090       // the base address happened to be aligned. This works because if the
16091       // base address is aligned, then adding less than a full vector length
16092       // will cause the last vector in the sequence to be (re)loaded.
16093       // Otherwise, the next vector will be fetched as you might suspect was
16094       // necessary.
16095
16096       // We might be able to reuse the permutation generation from
16097       // a different base address offset from this one by an aligned amount.
16098       // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
16099       // optimization later.
16100       Intrinsic::ID Intr, IntrLD, IntrPerm;
16101       MVT PermCntlTy, PermTy, LDTy;
16102       Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16103                             : Intrinsic::ppc_altivec_lvsl;
16104       IntrLD = Intrinsic::ppc_altivec_lvx;
16105       IntrPerm = Intrinsic::ppc_altivec_vperm;
16106       PermCntlTy = MVT::v16i8;
16107       PermTy = MVT::v4i32;
16108       LDTy = MVT::v4i32;
16109
16110       SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
16111
16112       // Create the new MMO for the new base load. It is like the original MMO,
16113       // but represents an area in memory almost twice the vector size centered
16114       // on the original address. If the address is unaligned, we might start
16115       // reading up to (sizeof(vector)-1) bytes below the address of the
16116       // original unaligned load.
16117       MachineFunction &MF = DAG.getMachineFunction();
16118       MachineMemOperand *BaseMMO =
16119         MF.getMachineMemOperand(LD->getMemOperand(),
16120                                 -(int64_t)MemVT.getStoreSize()+1,
16121                                 2*MemVT.getStoreSize()-1);
16122
16123       // Create the new base load.
16124       SDValue LDXIntID =
16125           DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
16126       SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
16127       SDValue BaseLoad =
16128         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
16129                                 DAG.getVTList(PermTy, MVT::Other),
16130                                 BaseLoadOps, LDTy, BaseMMO);
16131
16132       // Note that the value of IncOffset (which is provided to the next
16133       // load's pointer info offset value, and thus used to calculate the
16134       // alignment), and the value of IncValue (which is actually used to
16135       // increment the pointer value) are different! This is because we
16136       // require the next load to appear to be aligned, even though it
16137       // is actually offset from the base pointer by a lesser amount.
16138       int IncOffset = VT.getSizeInBits() / 8;
16139       int IncValue = IncOffset;
16140
16141       // Walk (both up and down) the chain looking for another load at the real
16142       // (aligned) offset (the alignment of the other load does not matter in
16143       // this case). If found, then do not use the offset reduction trick, as
16144       // that will prevent the loads from being later combined (as they would
16145       // otherwise be duplicates).
16146       if (!findConsecutiveLoad(LD, DAG))
16147         --IncValue;
16148
16149       SDValue Increment =
16150           DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
16151       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
16152
16153       MachineMemOperand *ExtraMMO =
16154         MF.getMachineMemOperand(LD->getMemOperand(),
16155                                 1, 2*MemVT.getStoreSize()-1);
16156       SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
16157       SDValue ExtraLoad =
16158         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
16159                                 DAG.getVTList(PermTy, MVT::Other),
16160                                 ExtraLoadOps, LDTy, ExtraMMO);
16161
16162       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16163         BaseLoad.getValue(1), ExtraLoad.getValue(1));
16164
16165       // Because vperm has a big-endian bias, we must reverse the order
16166       // of the input vectors and complement the permute control vector
16167       // when generating little endian code.  We have already handled the
16168       // latter by using lvsr instead of lvsl, so just reverse BaseLoad
16169       // and ExtraLoad here.
16170       SDValue Perm;
16171       if (isLittleEndian)
16172         Perm = BuildIntrinsicOp(IntrPerm,
16173                                 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
16174       else
16175         Perm = BuildIntrinsicOp(IntrPerm,
16176                                 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
16177
16178       if (VT != PermTy)
16179         Perm = Subtarget.hasAltivec()
16180                    ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
16181                    : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
16182                                  DAG.getTargetConstant(1, dl, MVT::i64));
16183                                // second argument is 1 because this rounding
16184                                // is always exact.
16185
16186       // The output of the permutation is our loaded result, the TokenFactor is
16187       // our new chain.
16188       DCI.CombineTo(N, Perm, TF);
16189       return SDValue(N, 0);
16190     }
16191     }
16192     break;
16193     case ISD::INTRINSIC_WO_CHAIN: {
16194       bool isLittleEndian = Subtarget.isLittleEndian();
16195       unsigned IID = N->getConstantOperandVal(0);
16196       Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16197                                            : Intrinsic::ppc_altivec_lvsl);
16198       if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
16199         SDValue Add = N->getOperand(1);
16200
16201         int Bits = 4 /* 16 byte alignment */;
16202
16203         if (DAG.MaskedValueIsZero(Add->getOperand(1),
16204                                   APInt::getAllOnes(Bits /* alignment */)
16205                                       .zext(Add.getScalarValueSizeInBits()))) {
16206           SDNode *BasePtr = Add->getOperand(0).getNode();
16207           for (SDNode *U : BasePtr->uses()) {
16208           if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16209               U->getConstantOperandVal(0) == IID) {
16210             // We've found another LVSL/LVSR, and this address is an aligned
16211             // multiple of that one. The results will be the same, so use the
16212             // one we've just found instead.
16213
16214             return SDValue(U, 0);
16215           }
16216           }
16217         }
16218
16219         if (isa<ConstantSDNode>(Add->getOperand(1))) {
16220           SDNode *BasePtr = Add->getOperand(0).getNode();
16221           for (SDNode *U : BasePtr->uses()) {
16222           if (U->getOpcode() == ISD::ADD &&
16223               isa<ConstantSDNode>(U->getOperand(1)) &&
16224               (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
16225                       (1ULL << Bits) ==
16226                   0) {
16227             SDNode *OtherAdd = U;
16228             for (SDNode *V : OtherAdd->uses()) {
16229               if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16230                   V->getConstantOperandVal(0) == IID) {
16231                 return SDValue(V, 0);
16232               }
16233             }
16234           }
16235           }
16236         }
16237       }
16238
16239       // Combine vmaxsw/h/b(a, a's negation) to abs(a)
16240       // Expose the vabsduw/h/b opportunity for down stream
16241       if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
16242           (IID == Intrinsic::ppc_altivec_vmaxsw ||
16243            IID == Intrinsic::ppc_altivec_vmaxsh ||
16244            IID == Intrinsic::ppc_altivec_vmaxsb)) {
16245         SDValue V1 = N->getOperand(1);
16246         SDValue V2 = N->getOperand(2);
16247         if ((V1.getSimpleValueType() == MVT::v4i32 ||
16248              V1.getSimpleValueType() == MVT::v8i16 ||
16249              V1.getSimpleValueType() == MVT::v16i8) &&
16250             V1.getSimpleValueType() == V2.getSimpleValueType()) {
16251           // (0-a, a)
16252           if (V1.getOpcode() == ISD::SUB &&
16253               ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
16254               V1.getOperand(1) == V2) {
16255             return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
16256           }
16257           // (a, 0-a)
16258           if (V2.getOpcode() == ISD::SUB &&
16259               ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
16260               V2.getOperand(1) == V1) {
16261             return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16262           }
16263           // (x-y, y-x)
16264           if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
16265               V1.getOperand(0) == V2.getOperand(1) &&
16266               V1.getOperand(1) == V2.getOperand(0)) {
16267             return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16268           }
16269         }
16270       }
16271     }
16272
16273     break;
16274   case ISD::INTRINSIC_W_CHAIN:
16275       switch (N->getConstantOperandVal(1)) {
16276       default:
16277         break;
16278       case Intrinsic::ppc_altivec_vsum4sbs:
16279       case Intrinsic::ppc_altivec_vsum4shs:
16280       case Intrinsic::ppc_altivec_vsum4ubs: {
16281         // These sum-across intrinsics only have a chain due to the side effect
16282         // that they may set the SAT bit. If we know the SAT bit will not be set
16283         // for some inputs, we can replace any uses of their chain with the
16284         // input chain.
16285         if (BuildVectorSDNode *BVN =
16286                 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
16287           APInt APSplatBits, APSplatUndef;
16288           unsigned SplatBitSize;
16289           bool HasAnyUndefs;
16290           bool BVNIsConstantSplat = BVN->isConstantSplat(
16291               APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
16292               !Subtarget.isLittleEndian());
16293           // If the constant splat vector is 0, the SAT bit will not be set.
16294           if (BVNIsConstantSplat && APSplatBits == 0)
16295             DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
16296         }
16297         return SDValue();
16298       }
16299     case Intrinsic::ppc_vsx_lxvw4x:
16300     case Intrinsic::ppc_vsx_lxvd2x:
16301       // For little endian, VSX loads require generating lxvd2x/xxswapd.
16302       // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16303       if (Subtarget.needsSwapsForVSXMemOps())
16304         return expandVSXLoadForLE(N, DCI);
16305       break;
16306     }
16307     break;
16308   case ISD::INTRINSIC_VOID:
16309     // For little endian, VSX stores require generating xxswapd/stxvd2x.
16310     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16311     if (Subtarget.needsSwapsForVSXMemOps()) {
16312       switch (N->getConstantOperandVal(1)) {
16313       default:
16314         break;
16315       case Intrinsic::ppc_vsx_stxvw4x:
16316       case Intrinsic::ppc_vsx_stxvd2x:
16317         return expandVSXStoreForLE(N, DCI);
16318       }
16319     }
16320     break;
16321   case ISD::BSWAP: {
16322     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
16323     // For subtargets without LDBRX, we can still do better than the default
16324     // expansion even for 64-bit BSWAP (LOAD).
16325     bool Is64BitBswapOn64BitTgt =
16326         Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
16327     bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
16328                                N->getOperand(0).hasOneUse();
16329     if (IsSingleUseNormalLd &&
16330         (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
16331          (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
16332       SDValue Load = N->getOperand(0);
16333       LoadSDNode *LD = cast<LoadSDNode>(Load);
16334       // Create the byte-swapping load.
16335       SDValue Ops[] = {
16336         LD->getChain(),    // Chain
16337         LD->getBasePtr(),  // Ptr
16338         DAG.getValueType(N->getValueType(0)) // VT
16339       };
16340       SDValue BSLoad =
16341         DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
16342                                 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
16343                                               MVT::i64 : MVT::i32, MVT::Other),
16344                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
16345
16346       // If this is an i16 load, insert the truncate.
16347       SDValue ResVal = BSLoad;
16348       if (N->getValueType(0) == MVT::i16)
16349         ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
16350
16351       // First, combine the bswap away.  This makes the value produced by the
16352       // load dead.
16353       DCI.CombineTo(N, ResVal);
16354
16355       // Next, combine the load away, we give it a bogus result value but a real
16356       // chain result.  The result value is dead because the bswap is dead.
16357       DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
16358
16359       // Return N so it doesn't get rechecked!
16360       return SDValue(N, 0);
16361     }
16362     // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
16363     // before legalization so that the BUILD_PAIR is handled correctly.
16364     if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
16365         !IsSingleUseNormalLd)
16366       return SDValue();
16367     LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
16368
16369     // Can't split volatile or atomic loads.
16370     if (!LD->isSimple())
16371       return SDValue();
16372     SDValue BasePtr = LD->getBasePtr();
16373     SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
16374                              LD->getPointerInfo(), LD->getAlign());
16375     Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
16376     BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16377                           DAG.getIntPtrConstant(4, dl));
16378     MachineMemOperand *NewMMO = DAG.getMachineFunction().getMachineMemOperand(
16379         LD->getMemOperand(), 4, 4);
16380     SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
16381     Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
16382     SDValue Res;
16383     if (Subtarget.isLittleEndian())
16384       Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
16385     else
16386       Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
16387     SDValue TF =
16388         DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16389                     Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
16390     DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
16391     return Res;
16392   }
16393   case PPCISD::VCMP:
16394     // If a VCMP_rec node already exists with exactly the same operands as this
16395     // node, use its result instead of this node (VCMP_rec computes both a CR6
16396     // and a normal output).
16397     //
16398     if (!N->getOperand(0).hasOneUse() &&
16399         !N->getOperand(1).hasOneUse() &&
16400         !N->getOperand(2).hasOneUse()) {
16401
16402       // Scan all of the users of the LHS, looking for VCMP_rec's that match.
16403       SDNode *VCMPrecNode = nullptr;
16404
16405       SDNode *LHSN = N->getOperand(0).getNode();
16406       for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
16407            UI != E; ++UI)
16408         if (UI->getOpcode() == PPCISD::VCMP_rec &&
16409             UI->getOperand(1) == N->getOperand(1) &&
16410             UI->getOperand(2) == N->getOperand(2) &&
16411             UI->getOperand(0) == N->getOperand(0)) {
16412           VCMPrecNode = *UI;
16413           break;
16414         }
16415
16416       // If there is no VCMP_rec node, or if the flag value has a single use,
16417       // don't transform this.
16418       if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
16419         break;
16420
16421       // Look at the (necessarily single) use of the flag value.  If it has a
16422       // chain, this transformation is more complex.  Note that multiple things
16423       // could use the value result, which we should ignore.
16424       SDNode *FlagUser = nullptr;
16425       for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
16426            FlagUser == nullptr; ++UI) {
16427         assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
16428         SDNode *User = *UI;
16429         for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
16430           if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
16431             FlagUser = User;
16432             break;
16433           }
16434         }
16435       }
16436
16437       // If the user is a MFOCRF instruction, we know this is safe.
16438       // Otherwise we give up for right now.
16439       if (FlagUser->getOpcode() == PPCISD::MFOCRF)
16440         return SDValue(VCMPrecNode, 0);
16441     }
16442     break;
16443   case ISD::BR_CC: {
16444     // If this is a branch on an altivec predicate comparison, lower this so
16445     // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
16446     // lowering is done pre-legalize, because the legalizer lowers the predicate
16447     // compare down to code that is difficult to reassemble.
16448     // This code also handles branches that depend on the result of a store
16449     // conditional.
16450     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
16451     SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
16452
16453     int CompareOpc;
16454     bool isDot;
16455
16456     if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
16457       break;
16458
16459     // Since we are doing this pre-legalize, the RHS can be a constant of
16460     // arbitrary bitwidth which may cause issues when trying to get the value
16461     // from the underlying APInt.
16462     auto RHSAPInt = RHS->getAsAPIntVal();
16463     if (!RHSAPInt.isIntN(64))
16464       break;
16465
16466     unsigned Val = RHSAPInt.getZExtValue();
16467     auto isImpossibleCompare = [&]() {
16468       // If this is a comparison against something other than 0/1, then we know
16469       // that the condition is never/always true.
16470       if (Val != 0 && Val != 1) {
16471         if (CC == ISD::SETEQ)      // Cond never true, remove branch.
16472           return N->getOperand(0);
16473         // Always !=, turn it into an unconditional branch.
16474         return DAG.getNode(ISD::BR, dl, MVT::Other,
16475                            N->getOperand(0), N->getOperand(4));
16476       }
16477       return SDValue();
16478     };
16479     // Combine branches fed by store conditional instructions (st[bhwd]cx).
16480     unsigned StoreWidth = 0;
16481     if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
16482         isStoreConditional(LHS, StoreWidth)) {
16483       if (SDValue Impossible = isImpossibleCompare())
16484         return Impossible;
16485       PPC::Predicate CompOpc;
16486       // eq 0 => ne
16487       // ne 0 => eq
16488       // eq 1 => eq
16489       // ne 1 => ne
16490       if (Val == 0)
16491         CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
16492       else
16493         CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
16494
16495       SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
16496                        DAG.getConstant(StoreWidth, dl, MVT::i32)};
16497       auto *MemNode = cast<MemSDNode>(LHS);
16498       SDValue ConstSt = DAG.getMemIntrinsicNode(
16499           PPCISD::STORE_COND, dl,
16500           DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
16501           MemNode->getMemoryVT(), MemNode->getMemOperand());
16502
16503       SDValue InChain;
16504       // Unchain the branch from the original store conditional.
16505       if (N->getOperand(0) == LHS.getValue(1))
16506         InChain = LHS.getOperand(0);
16507       else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
16508         SmallVector<SDValue, 4> InChains;
16509         SDValue InTF = N->getOperand(0);
16510         for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
16511           if (InTF.getOperand(i) != LHS.getValue(1))
16512             InChains.push_back(InTF.getOperand(i));
16513         InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
16514       }
16515
16516       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
16517                          DAG.getConstant(CompOpc, dl, MVT::i32),
16518                          DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
16519                          ConstSt.getValue(2));
16520     }
16521
16522     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16523         getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
16524       assert(isDot && "Can't compare against a vector result!");
16525
16526       if (SDValue Impossible = isImpossibleCompare())
16527         return Impossible;
16528
16529       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
16530       // Create the PPCISD altivec 'dot' comparison node.
16531       SDValue Ops[] = {
16532         LHS.getOperand(2),  // LHS of compare
16533         LHS.getOperand(3),  // RHS of compare
16534         DAG.getConstant(CompareOpc, dl, MVT::i32)
16535       };
16536       EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
16537       SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
16538
16539       // Unpack the result based on how the target uses it.
16540       PPC::Predicate CompOpc;
16541       switch (LHS.getConstantOperandVal(1)) {
16542       default:  // Can't happen, don't crash on invalid number though.
16543       case 0:   // Branch on the value of the EQ bit of CR6.
16544         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
16545         break;
16546       case 1:   // Branch on the inverted value of the EQ bit of CR6.
16547         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
16548         break;
16549       case 2:   // Branch on the value of the LT bit of CR6.
16550         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
16551         break;
16552       case 3:   // Branch on the inverted value of the LT bit of CR6.
16553         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
16554         break;
16555       }
16556
16557       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
16558                          DAG.getConstant(CompOpc, dl, MVT::i32),
16559                          DAG.getRegister(PPC::CR6, MVT::i32),
16560                          N->getOperand(4), CompNode.getValue(1));
16561     }
16562     break;
16563   }
16564   case ISD::BUILD_VECTOR:
16565     return DAGCombineBuildVector(N, DCI);
16566   }
16567
16568   return SDValue();
16569 }
16570
16571 SDValue
16572 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
16573                                  SelectionDAG &DAG,
16574                                  SmallVectorImpl<SDNode *> &Created) const {
16575   // fold (sdiv X, pow2)
16576   EVT VT = N->getValueType(0);
16577   if (VT == MVT::i64 && !Subtarget.isPPC64())
16578     return SDValue();
16579   if ((VT != MVT::i32 && VT != MVT::i64) ||
16580       !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
16581     return SDValue();
16582
16583   SDLoc DL(N);
16584   SDValue N0 = N->getOperand(0);
16585
16586   bool IsNegPow2 = Divisor.isNegatedPowerOf2();
16587   unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
16588   SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
16589
16590   SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
16591   Created.push_back(Op.getNode());
16592
16593   if (IsNegPow2) {
16594     Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
16595     Created.push_back(Op.getNode());
16596   }
16597
16598   return Op;
16599 }
16600
16601 //===----------------------------------------------------------------------===//
16602 // Inline Assembly Support
16603 //===----------------------------------------------------------------------===//
16604
16605 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
16606                                                       KnownBits &Known,
16607                                                       const APInt &DemandedElts,
16608                                                       const SelectionDAG &DAG,
16609                                                       unsigned Depth) const {
16610   Known.resetAll();
16611   switch (Op.getOpcode()) {
16612   default: break;
16613   case PPCISD::LBRX: {
16614     // lhbrx is known to have the top bits cleared out.
16615     if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
16616       Known.Zero = 0xFFFF0000;
16617     break;
16618   }
16619   case ISD::INTRINSIC_WO_CHAIN: {
16620     switch (Op.getConstantOperandVal(0)) {
16621     default: break;
16622     case Intrinsic::ppc_altivec_vcmpbfp_p:
16623     case Intrinsic::ppc_altivec_vcmpeqfp_p:
16624     case Intrinsic::ppc_altivec_vcmpequb_p:
16625     case Intrinsic::ppc_altivec_vcmpequh_p:
16626     case Intrinsic::ppc_altivec_vcmpequw_p:
16627     case Intrinsic::ppc_altivec_vcmpequd_p:
16628     case Intrinsic::ppc_altivec_vcmpequq_p:
16629     case Intrinsic::ppc_altivec_vcmpgefp_p:
16630     case Intrinsic::ppc_altivec_vcmpgtfp_p:
16631     case Intrinsic::ppc_altivec_vcmpgtsb_p:
16632     case Intrinsic::ppc_altivec_vcmpgtsh_p:
16633     case Intrinsic::ppc_altivec_vcmpgtsw_p:
16634     case Intrinsic::ppc_altivec_vcmpgtsd_p:
16635     case Intrinsic::ppc_altivec_vcmpgtsq_p:
16636     case Intrinsic::ppc_altivec_vcmpgtub_p:
16637     case Intrinsic::ppc_altivec_vcmpgtuh_p:
16638     case Intrinsic::ppc_altivec_vcmpgtuw_p:
16639     case Intrinsic::ppc_altivec_vcmpgtud_p:
16640     case Intrinsic::ppc_altivec_vcmpgtuq_p:
16641       Known.Zero = ~1U;  // All bits but the low one are known to be zero.
16642       break;
16643     }
16644     break;
16645   }
16646   case ISD::INTRINSIC_W_CHAIN: {
16647     switch (Op.getConstantOperandVal(1)) {
16648     default:
16649       break;
16650     case Intrinsic::ppc_load2r:
16651       // Top bits are cleared for load2r (which is the same as lhbrx).
16652       Known.Zero = 0xFFFF0000;
16653       break;
16654     }
16655     break;
16656   }
16657   }
16658 }
16659
16660 Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
16661   switch (Subtarget.getCPUDirective()) {
16662   default: break;
16663   case PPC::DIR_970:
16664   case PPC::DIR_PWR4:
16665   case PPC::DIR_PWR5:
16666   case PPC::DIR_PWR5X:
16667   case PPC::DIR_PWR6:
16668   case PPC::DIR_PWR6X:
16669   case PPC::DIR_PWR7:
16670   case PPC::DIR_PWR8:
16671   case PPC::DIR_PWR9:
16672   case PPC::DIR_PWR10:
16673   case PPC::DIR_PWR11:
16674   case PPC::DIR_PWR_FUTURE: {
16675     if (!ML)
16676       break;
16677
16678     if (!DisableInnermostLoopAlign32) {
16679       // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
16680       // so that we can decrease cache misses and branch-prediction misses.
16681       // Actual alignment of the loop will depend on the hotness check and other
16682       // logic in alignBlocks.
16683       if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
16684         return Align(32);
16685     }
16686
16687     const PPCInstrInfo *TII = Subtarget.getInstrInfo();
16688
16689     // For small loops (between 5 and 8 instructions), align to a 32-byte
16690     // boundary so that the entire loop fits in one instruction-cache line.
16691     uint64_t LoopSize = 0;
16692     for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
16693       for (const MachineInstr &J : **I) {
16694         LoopSize += TII->getInstSizeInBytes(J);
16695         if (LoopSize > 32)
16696           break;
16697       }
16698
16699     if (LoopSize > 16 && LoopSize <= 32)
16700       return Align(32);
16701
16702     break;
16703   }
16704   }
16705
16706   return TargetLowering::getPrefLoopAlignment(ML);
16707 }
16708
16709 /// getConstraintType - Given a constraint, return the type of
16710 /// constraint it is for this target.
16711 PPCTargetLowering::ConstraintType
16712 PPCTargetLowering::getConstraintType(StringRef Constraint) const {
16713   if (Constraint.size() == 1) {
16714     switch (Constraint[0]) {
16715     default: break;
16716     case 'b':
16717     case 'r':
16718     case 'f':
16719     case 'd':
16720     case 'v':
16721     case 'y':
16722       return C_RegisterClass;
16723     case 'Z':
16724       // FIXME: While Z does indicate a memory constraint, it specifically
16725       // indicates an r+r address (used in conjunction with the 'y' modifier
16726       // in the replacement string). Currently, we're forcing the base
16727       // register to be r0 in the asm printer (which is interpreted as zero)
16728       // and forming the complete address in the second register. This is
16729       // suboptimal.
16730       return C_Memory;
16731     }
16732   } else if (Constraint == "wc") { // individual CR bits.
16733     return C_RegisterClass;
16734   } else if (Constraint == "wa" || Constraint == "wd" ||
16735              Constraint == "wf" || Constraint == "ws" ||
16736              Constraint == "wi" || Constraint == "ww") {
16737     return C_RegisterClass; // VSX registers.
16738   }
16739   return TargetLowering::getConstraintType(Constraint);
16740 }
16741
16742 /// Examine constraint type and operand type and determine a weight value.
16743 /// This object must already have been set up with the operand type
16744 /// and the current alternative constraint selected.
16745 TargetLowering::ConstraintWeight
16746 PPCTargetLowering::getSingleConstraintMatchWeight(
16747     AsmOperandInfo &info, const char *constraint) const {
16748   ConstraintWeight weight = CW_Invalid;
16749   Value *CallOperandVal = info.CallOperandVal;
16750     // If we don't have a value, we can't do a match,
16751     // but allow it at the lowest weight.
16752   if (!CallOperandVal)
16753     return CW_Default;
16754   Type *type = CallOperandVal->getType();
16755
16756   // Look at the constraint type.
16757   if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
16758     return CW_Register; // an individual CR bit.
16759   else if ((StringRef(constraint) == "wa" ||
16760             StringRef(constraint) == "wd" ||
16761             StringRef(constraint) == "wf") &&
16762            type->isVectorTy())
16763     return CW_Register;
16764   else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
16765     return CW_Register; // just hold 64-bit integers data.
16766   else if (StringRef(constraint) == "ws" && type->isDoubleTy())
16767     return CW_Register;
16768   else if (StringRef(constraint) == "ww" && type->isFloatTy())
16769     return CW_Register;
16770
16771   switch (*constraint) {
16772   default:
16773     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
16774     break;
16775   case 'b':
16776     if (type->isIntegerTy())
16777       weight = CW_Register;
16778     break;
16779   case 'f':
16780     if (type->isFloatTy())
16781       weight = CW_Register;
16782     break;
16783   case 'd':
16784     if (type->isDoubleTy())
16785       weight = CW_Register;
16786     break;
16787   case 'v':
16788     if (type->isVectorTy())
16789       weight = CW_Register;
16790     break;
16791   case 'y':
16792     weight = CW_Register;
16793     break;
16794   case 'Z':
16795     weight = CW_Memory;
16796     break;
16797   }
16798   return weight;
16799 }
16800
16801 std::pair<unsigned, const TargetRegisterClass *>
16802 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
16803                                                 StringRef Constraint,
16804                                                 MVT VT) const {
16805   if (Constraint.size() == 1) {
16806     // GCC RS6000 Constraint Letters
16807     switch (Constraint[0]) {
16808     case 'b':   // R1-R31
16809       if (VT == MVT::i64 && Subtarget.isPPC64())
16810         return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
16811       return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
16812     case 'r':   // R0-R31
16813       if (VT == MVT::i64 && Subtarget.isPPC64())
16814         return std::make_pair(0U, &PPC::G8RCRegClass);
16815       return std::make_pair(0U, &PPC::GPRCRegClass);
16816     // 'd' and 'f' constraints are both defined to be "the floating point
16817     // registers", where one is for 32-bit and the other for 64-bit. We don't
16818     // really care overly much here so just give them all the same reg classes.
16819     case 'd':
16820     case 'f':
16821       if (Subtarget.hasSPE()) {
16822         if (VT == MVT::f32 || VT == MVT::i32)
16823           return std::make_pair(0U, &PPC::GPRCRegClass);
16824         if (VT == MVT::f64 || VT == MVT::i64)
16825           return std::make_pair(0U, &PPC::SPERCRegClass);
16826       } else {
16827         if (VT == MVT::f32 || VT == MVT::i32)
16828           return std::make_pair(0U, &PPC::F4RCRegClass);
16829         if (VT == MVT::f64 || VT == MVT::i64)
16830           return std::make_pair(0U, &PPC::F8RCRegClass);
16831       }
16832       break;
16833     case 'v':
16834       if (Subtarget.hasAltivec() && VT.isVector())
16835         return std::make_pair(0U, &PPC::VRRCRegClass);
16836       else if (Subtarget.hasVSX())
16837         // Scalars in Altivec registers only make sense with VSX.
16838         return std::make_pair(0U, &PPC::VFRCRegClass);
16839       break;
16840     case 'y':   // crrc
16841       return std::make_pair(0U, &PPC::CRRCRegClass);
16842     }
16843   } else if (Constraint == "wc" && Subtarget.useCRBits()) {
16844     // An individual CR bit.
16845     return std::make_pair(0U, &PPC::CRBITRCRegClass);
16846   } else if ((Constraint == "wa" || Constraint == "wd" ||
16847              Constraint == "wf" || Constraint == "wi") &&
16848              Subtarget.hasVSX()) {
16849     // A VSX register for either a scalar (FP) or vector. There is no
16850     // support for single precision scalars on subtargets prior to Power8.
16851     if (VT.isVector())
16852       return std::make_pair(0U, &PPC::VSRCRegClass);
16853     if (VT == MVT::f32 && Subtarget.hasP8Vector())
16854       return std::make_pair(0U, &PPC::VSSRCRegClass);
16855     return std::make_pair(0U, &PPC::VSFRCRegClass);
16856   } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
16857     if (VT == MVT::f32 && Subtarget.hasP8Vector())
16858       return std::make_pair(0U, &PPC::VSSRCRegClass);
16859     else
16860       return std::make_pair(0U, &PPC::VSFRCRegClass);
16861   } else if (Constraint == "lr") {
16862     if (VT == MVT::i64)
16863       return std::make_pair(0U, &PPC::LR8RCRegClass);
16864     else
16865       return std::make_pair(0U, &PPC::LRRCRegClass);
16866   }
16867
16868   // Handle special cases of physical registers that are not properly handled
16869   // by the base class.
16870   if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
16871     // If we name a VSX register, we can't defer to the base class because it
16872     // will not recognize the correct register (their names will be VSL{0-31}
16873     // and V{0-31} so they won't match). So we match them here.
16874     if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
16875       int VSNum = atoi(Constraint.data() + 3);
16876       assert(VSNum >= 0 && VSNum <= 63 &&
16877              "Attempted to access a vsr out of range");
16878       if (VSNum < 32)
16879         return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
16880       return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
16881     }
16882
16883     // For float registers, we can't defer to the base class as it will match
16884     // the SPILLTOVSRRC class.
16885     if (Constraint.size() > 3 && Constraint[1] == 'f') {
16886       int RegNum = atoi(Constraint.data() + 2);
16887       if (RegNum > 31 || RegNum < 0)
16888         report_fatal_error("Invalid floating point register number");
16889       if (VT == MVT::f32 || VT == MVT::i32)
16890         return Subtarget.hasSPE()
16891                    ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
16892                    : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
16893       if (VT == MVT::f64 || VT == MVT::i64)
16894         return Subtarget.hasSPE()
16895                    ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
16896                    : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
16897     }
16898   }
16899
16900   std::pair<unsigned, const TargetRegisterClass *> R =
16901       TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
16902
16903   // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
16904   // (which we call X[0-9]+). If a 64-bit value has been requested, and a
16905   // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
16906   // register.
16907   // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
16908   // the AsmName field from *RegisterInfo.td, then this would not be necessary.
16909   if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
16910       PPC::GPRCRegClass.contains(R.first))
16911     return std::make_pair(TRI->getMatchingSuperReg(R.first,
16912                             PPC::sub_32, &PPC::G8RCRegClass),
16913                           &PPC::G8RCRegClass);
16914
16915   // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
16916   if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
16917     R.first = PPC::CR0;
16918     R.second = &PPC::CRRCRegClass;
16919   }
16920   // FIXME: This warning should ideally be emitted in the front end.
16921   const auto &TM = getTargetMachine();
16922   if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
16923     if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
16924          (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
16925         (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
16926       errs() << "warning: vector registers 20 to 32 are reserved in the "
16927                 "default AIX AltiVec ABI and cannot be used\n";
16928   }
16929
16930   return R;
16931 }
16932
16933 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
16934 /// vector.  If it is invalid, don't add anything to Ops.
16935 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
16936                                                      StringRef Constraint,
16937                                                      std::vector<SDValue> &Ops,
16938                                                      SelectionDAG &DAG) const {
16939   SDValue Result;
16940
16941   // Only support length 1 constraints.
16942   if (Constraint.size() > 1)
16943     return;
16944
16945   char Letter = Constraint[0];
16946   switch (Letter) {
16947   default: break;
16948   case 'I':
16949   case 'J':
16950   case 'K':
16951   case 'L':
16952   case 'M':
16953   case 'N':
16954   case 'O':
16955   case 'P': {
16956     ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
16957     if (!CST) return; // Must be an immediate to match.
16958     SDLoc dl(Op);
16959     int64_t Value = CST->getSExtValue();
16960     EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
16961                          // numbers are printed as such.
16962     switch (Letter) {
16963     default: llvm_unreachable("Unknown constraint letter!");
16964     case 'I':  // "I" is a signed 16-bit constant.
16965       if (isInt<16>(Value))
16966         Result = DAG.getTargetConstant(Value, dl, TCVT);
16967       break;
16968     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
16969       if (isShiftedUInt<16, 16>(Value))
16970         Result = DAG.getTargetConstant(Value, dl, TCVT);
16971       break;
16972     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
16973       if (isShiftedInt<16, 16>(Value))
16974         Result = DAG.getTargetConstant(Value, dl, TCVT);
16975       break;
16976     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
16977       if (isUInt<16>(Value))
16978         Result = DAG.getTargetConstant(Value, dl, TCVT);
16979       break;
16980     case 'M':  // "M" is a constant that is greater than 31.
16981       if (Value > 31)
16982         Result = DAG.getTargetConstant(Value, dl, TCVT);
16983       break;
16984     case 'N':  // "N" is a positive constant that is an exact power of two.
16985       if (Value > 0 && isPowerOf2_64(Value))
16986         Result = DAG.getTargetConstant(Value, dl, TCVT);
16987       break;
16988     case 'O':  // "O" is the constant zero.
16989       if (Value == 0)
16990         Result = DAG.getTargetConstant(Value, dl, TCVT);
16991       break;
16992     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
16993       if (isInt<16>(-Value))
16994         Result = DAG.getTargetConstant(Value, dl, TCVT);
16995       break;
16996     }
16997     break;
16998   }
16999   }
17000
17001   if (Result.getNode()) {
17002     Ops.push_back(Result);
17003     return;
17004   }
17005
17006   // Handle standard constraint letters.
17007   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
17008 }
17009
17010 void PPCTargetLowering::CollectTargetIntrinsicOperands(const CallInst &I,
17011                                               SmallVectorImpl<SDValue> &Ops,
17012                                               SelectionDAG &DAG) const {
17013   if (I.getNumOperands() <= 1)
17014     return;
17015   if (!isa<ConstantSDNode>(Ops[1].getNode()))
17016     return;
17017   auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
17018   if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
17019       IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
17020     return;
17021
17022   if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
17023     Ops.push_back(DAG.getMDNode(MDN));
17024 }
17025
17026 // isLegalAddressingMode - Return true if the addressing mode represented
17027 // by AM is legal for this target, for a load/store of the specified type.
17028 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
17029                                               const AddrMode &AM, Type *Ty,
17030                                               unsigned AS,
17031                                               Instruction *I) const {
17032   // Vector type r+i form is supported since power9 as DQ form. We don't check
17033   // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
17034   // imm form is preferred and the offset can be adjusted to use imm form later
17035   // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
17036   // max offset to check legal addressing mode, we should be a little aggressive
17037   // to contain other offsets for that LSRUse.
17038   if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
17039     return false;
17040
17041   // PPC allows a sign-extended 16-bit immediate field.
17042   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
17043     return false;
17044
17045   // No global is ever allowed as a base.
17046   if (AM.BaseGV)
17047     return false;
17048
17049   // PPC only support r+r,
17050   switch (AM.Scale) {
17051   case 0:  // "r+i" or just "i", depending on HasBaseReg.
17052     break;
17053   case 1:
17054     if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
17055       return false;
17056     // Otherwise we have r+r or r+i.
17057     break;
17058   case 2:
17059     if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
17060       return false;
17061     // Allow 2*r as r+r.
17062     break;
17063   default:
17064     // No other scales are supported.
17065     return false;
17066   }
17067
17068   return true;
17069 }
17070
17071 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
17072                                            SelectionDAG &DAG) const {
17073   MachineFunction &MF = DAG.getMachineFunction();
17074   MachineFrameInfo &MFI = MF.getFrameInfo();
17075   MFI.setReturnAddressIsTaken(true);
17076
17077   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
17078     return SDValue();
17079
17080   SDLoc dl(Op);
17081   unsigned Depth = Op.getConstantOperandVal(0);
17082
17083   // Make sure the function does not optimize away the store of the RA to
17084   // the stack.
17085   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
17086   FuncInfo->setLRStoreRequired();
17087   bool isPPC64 = Subtarget.isPPC64();
17088   auto PtrVT = getPointerTy(MF.getDataLayout());
17089
17090   if (Depth > 0) {
17091     // The link register (return address) is saved in the caller's frame
17092     // not the callee's stack frame. So we must get the caller's frame
17093     // address and load the return address at the LR offset from there.
17094     SDValue FrameAddr =
17095         DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17096                     LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
17097     SDValue Offset =
17098         DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
17099                         isPPC64 ? MVT::i64 : MVT::i32);
17100     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
17101                        DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
17102                        MachinePointerInfo());
17103   }
17104
17105   // Just load the return address off the stack.
17106   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
17107   return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
17108                      MachinePointerInfo());
17109 }
17110
17111 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
17112                                           SelectionDAG &DAG) const {
17113   SDLoc dl(Op);
17114   unsigned Depth = Op.getConstantOperandVal(0);
17115
17116   MachineFunction &MF = DAG.getMachineFunction();
17117   MachineFrameInfo &MFI = MF.getFrameInfo();
17118   MFI.setFrameAddressIsTaken(true);
17119
17120   EVT PtrVT = getPointerTy(MF.getDataLayout());
17121   bool isPPC64 = PtrVT == MVT::i64;
17122
17123   // Naked functions never have a frame pointer, and so we use r1. For all
17124   // other functions, this decision must be delayed until during PEI.
17125   unsigned FrameReg;
17126   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
17127     FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
17128   else
17129     FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
17130
17131   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
17132                                          PtrVT);
17133   while (Depth--)
17134     FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17135                             FrameAddr, MachinePointerInfo());
17136   return FrameAddr;
17137 }
17138
17139 // FIXME? Maybe this could be a TableGen attribute on some registers and
17140 // this table could be generated automatically from RegInfo.
17141 Register PPCTargetLowering::getRegisterByName(const char* RegName, LLT VT,
17142                                               const MachineFunction &MF) const {
17143   bool isPPC64 = Subtarget.isPPC64();
17144
17145   bool is64Bit = isPPC64 && VT == LLT::scalar(64);
17146   if (!is64Bit && VT != LLT::scalar(32))
17147     report_fatal_error("Invalid register global variable type");
17148
17149   Register Reg = StringSwitch<Register>(RegName)
17150                      .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
17151                      .Case("r2", isPPC64 ? Register() : PPC::R2)
17152                      .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
17153                      .Default(Register());
17154
17155   if (Reg)
17156     return Reg;
17157   report_fatal_error("Invalid register name global variable");
17158 }
17159
17160 bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
17161   // 32-bit SVR4 ABI access everything as got-indirect.
17162   if (Subtarget.is32BitELFABI())
17163     return true;
17164
17165   // AIX accesses everything indirectly through the TOC, which is similar to
17166   // the GOT.
17167   if (Subtarget.isAIXABI())
17168     return true;
17169
17170   CodeModel::Model CModel = getTargetMachine().getCodeModel();
17171   // If it is small or large code model, module locals are accessed
17172   // indirectly by loading their address from .toc/.got.
17173   if (CModel == CodeModel::Small || CModel == CodeModel::Large)
17174     return true;
17175
17176   // JumpTable and BlockAddress are accessed as got-indirect.
17177   if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
17178     return true;
17179
17180   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
17181     return Subtarget.isGVIndirectSymbol(G->getGlobal());
17182
17183   return false;
17184 }
17185
17186 bool
17187 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
17188   // The PowerPC target isn't yet aware of offsets.
17189   return false;
17190 }
17191
17192 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
17193                                            const CallInst &I,
17194                                            MachineFunction &MF,
17195                                            unsigned Intrinsic) const {
17196   switch (Intrinsic) {
17197   case Intrinsic::ppc_atomicrmw_xchg_i128:
17198   case Intrinsic::ppc_atomicrmw_add_i128:
17199   case Intrinsic::ppc_atomicrmw_sub_i128:
17200   case Intrinsic::ppc_atomicrmw_nand_i128:
17201   case Intrinsic::ppc_atomicrmw_and_i128:
17202   case Intrinsic::ppc_atomicrmw_or_i128:
17203   case Intrinsic::ppc_atomicrmw_xor_i128:
17204   case Intrinsic::ppc_cmpxchg_i128:
17205     Info.opc = ISD::INTRINSIC_W_CHAIN;
17206     Info.memVT = MVT::i128;
17207     Info.ptrVal = I.getArgOperand(0);
17208     Info.offset = 0;
17209     Info.align = Align(16);
17210     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
17211                  MachineMemOperand::MOVolatile;
17212     return true;
17213   case Intrinsic::ppc_atomic_load_i128:
17214     Info.opc = ISD::INTRINSIC_W_CHAIN;
17215     Info.memVT = MVT::i128;
17216     Info.ptrVal = I.getArgOperand(0);
17217     Info.offset = 0;
17218     Info.align = Align(16);
17219     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
17220     return true;
17221   case Intrinsic::ppc_atomic_store_i128:
17222     Info.opc = ISD::INTRINSIC_VOID;
17223     Info.memVT = MVT::i128;
17224     Info.ptrVal = I.getArgOperand(2);
17225     Info.offset = 0;
17226     Info.align = Align(16);
17227     Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
17228     return true;
17229   case Intrinsic::ppc_altivec_lvx:
17230   case Intrinsic::ppc_altivec_lvxl:
17231   case Intrinsic::ppc_altivec_lvebx:
17232   case Intrinsic::ppc_altivec_lvehx:
17233   case Intrinsic::ppc_altivec_lvewx:
17234   case Intrinsic::ppc_vsx_lxvd2x:
17235   case Intrinsic::ppc_vsx_lxvw4x:
17236   case Intrinsic::ppc_vsx_lxvd2x_be:
17237   case Intrinsic::ppc_vsx_lxvw4x_be:
17238   case Intrinsic::ppc_vsx_lxvl:
17239   case Intrinsic::ppc_vsx_lxvll: {
17240     EVT VT;
17241     switch (Intrinsic) {
17242     case Intrinsic::ppc_altivec_lvebx:
17243       VT = MVT::i8;
17244       break;
17245     case Intrinsic::ppc_altivec_lvehx:
17246       VT = MVT::i16;
17247       break;
17248     case Intrinsic::ppc_altivec_lvewx:
17249       VT = MVT::i32;
17250       break;
17251     case Intrinsic::ppc_vsx_lxvd2x:
17252     case Intrinsic::ppc_vsx_lxvd2x_be:
17253       VT = MVT::v2f64;
17254       break;
17255     default:
17256       VT = MVT::v4i32;
17257       break;
17258     }
17259
17260     Info.opc = ISD::INTRINSIC_W_CHAIN;
17261     Info.memVT = VT;
17262     Info.ptrVal = I.getArgOperand(0);
17263     Info.offset = -VT.getStoreSize()+1;
17264     Info.size = 2*VT.getStoreSize()-1;
17265     Info.align = Align(1);
17266     Info.flags = MachineMemOperand::MOLoad;
17267     return true;
17268   }
17269   case Intrinsic::ppc_altivec_stvx:
17270   case Intrinsic::ppc_altivec_stvxl:
17271   case Intrinsic::ppc_altivec_stvebx:
17272   case Intrinsic::ppc_altivec_stvehx:
17273   case Intrinsic::ppc_altivec_stvewx:
17274   case Intrinsic::ppc_vsx_stxvd2x:
17275   case Intrinsic::ppc_vsx_stxvw4x:
17276   case Intrinsic::ppc_vsx_stxvd2x_be:
17277   case Intrinsic::ppc_vsx_stxvw4x_be:
17278   case Intrinsic::ppc_vsx_stxvl:
17279   case Intrinsic::ppc_vsx_stxvll: {
17280     EVT VT;
17281     switch (Intrinsic) {
17282     case Intrinsic::ppc_altivec_stvebx:
17283       VT = MVT::i8;
17284       break;
17285     case Intrinsic::ppc_altivec_stvehx:
17286       VT = MVT::i16;
17287       break;
17288     case Intrinsic::ppc_altivec_stvewx:
17289       VT = MVT::i32;
17290       break;
17291     case Intrinsic::ppc_vsx_stxvd2x:
17292     case Intrinsic::ppc_vsx_stxvd2x_be:
17293       VT = MVT::v2f64;
17294       break;
17295     default:
17296       VT = MVT::v4i32;
17297       break;
17298     }
17299
17300     Info.opc = ISD::INTRINSIC_VOID;
17301     Info.memVT = VT;
17302     Info.ptrVal = I.getArgOperand(1);
17303     Info.offset = -VT.getStoreSize()+1;
17304     Info.size = 2*VT.getStoreSize()-1;
17305     Info.align = Align(1);
17306     Info.flags = MachineMemOperand::MOStore;
17307     return true;
17308   }
17309   case Intrinsic::ppc_stdcx:
17310   case Intrinsic::ppc_stwcx:
17311   case Intrinsic::ppc_sthcx:
17312   case Intrinsic::ppc_stbcx: {
17313     EVT VT;
17314     auto Alignment = Align(8);
17315     switch (Intrinsic) {
17316     case Intrinsic::ppc_stdcx:
17317       VT = MVT::i64;
17318       break;
17319     case Intrinsic::ppc_stwcx:
17320       VT = MVT::i32;
17321       Alignment = Align(4);
17322       break;
17323     case Intrinsic::ppc_sthcx:
17324       VT = MVT::i16;
17325       Alignment = Align(2);
17326       break;
17327     case Intrinsic::ppc_stbcx:
17328       VT = MVT::i8;
17329       Alignment = Align(1);
17330       break;
17331     }
17332     Info.opc = ISD::INTRINSIC_W_CHAIN;
17333     Info.memVT = VT;
17334     Info.ptrVal = I.getArgOperand(0);
17335     Info.offset = 0;
17336     Info.align = Alignment;
17337     Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
17338     return true;
17339   }
17340   default:
17341     break;
17342   }
17343
17344   return false;
17345 }
17346
17347 /// It returns EVT::Other if the type should be determined using generic
17348 /// target-independent logic.
17349 EVT PPCTargetLowering::getOptimalMemOpType(
17350     const MemOp &Op, const AttributeList &FuncAttributes) const {
17351   if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
17352     // We should use Altivec/VSX loads and stores when available. For unaligned
17353     // addresses, unaligned VSX loads are only fast starting with the P8.
17354     if (Subtarget.hasAltivec() && Op.size() >= 16) {
17355       if (Op.isMemset() && Subtarget.hasVSX()) {
17356         uint64_t TailSize = Op.size() % 16;
17357         // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
17358         // element if vector element type matches tail store. For tail size
17359         // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
17360         if (TailSize > 2 && TailSize <= 4) {
17361           return MVT::v8i16;
17362         }
17363         return MVT::v4i32;
17364       }
17365       if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
17366         return MVT::v4i32;
17367     }
17368   }
17369
17370   if (Subtarget.isPPC64()) {
17371     return MVT::i64;
17372   }
17373
17374   return MVT::i32;
17375 }
17376
17377 /// Returns true if it is beneficial to convert a load of a constant
17378 /// to just the constant itself.
17379 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
17380                                                           Type *Ty) const {
17381   assert(Ty->isIntegerTy());
17382
17383   unsigned BitSize = Ty->getPrimitiveSizeInBits();
17384   return !(BitSize == 0 || BitSize > 64);
17385 }
17386
17387 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
17388   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
17389     return false;
17390   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
17391   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
17392   return NumBits1 == 64 && NumBits2 == 32;
17393 }
17394
17395 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
17396   if (!VT1.isInteger() || !VT2.isInteger())
17397     return false;
17398   unsigned NumBits1 = VT1.getSizeInBits();
17399   unsigned NumBits2 = VT2.getSizeInBits();
17400   return NumBits1 == 64 && NumBits2 == 32;
17401 }
17402
17403 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
17404   // Generally speaking, zexts are not free, but they are free when they can be
17405   // folded with other operations.
17406   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
17407     EVT MemVT = LD->getMemoryVT();
17408     if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
17409          (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
17410         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
17411          LD->getExtensionType() == ISD::ZEXTLOAD))
17412       return true;
17413   }
17414
17415   // FIXME: Add other cases...
17416   //  - 32-bit shifts with a zext to i64
17417   //  - zext after ctlz, bswap, etc.
17418   //  - zext after and by a constant mask
17419
17420   return TargetLowering::isZExtFree(Val, VT2);
17421 }
17422
17423 bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
17424   assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
17425          "invalid fpext types");
17426   // Extending to float128 is not free.
17427   if (DestVT == MVT::f128)
17428     return false;
17429   return true;
17430 }
17431
17432 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
17433   return isInt<16>(Imm) || isUInt<16>(Imm);
17434 }
17435
17436 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
17437   return isInt<16>(Imm) || isUInt<16>(Imm);
17438 }
17439
17440 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align,
17441                                                        MachineMemOperand::Flags,
17442                                                        unsigned *Fast) const {
17443   if (DisablePPCUnaligned)
17444     return false;
17445
17446   // PowerPC supports unaligned memory access for simple non-vector types.
17447   // Although accessing unaligned addresses is not as efficient as accessing
17448   // aligned addresses, it is generally more efficient than manual expansion,
17449   // and generally only traps for software emulation when crossing page
17450   // boundaries.
17451
17452   if (!VT.isSimple())
17453     return false;
17454
17455   if (VT.isFloatingPoint() && !VT.isVector() &&
17456       !Subtarget.allowsUnalignedFPAccess())
17457     return false;
17458
17459   if (VT.getSimpleVT().isVector()) {
17460     if (Subtarget.hasVSX()) {
17461       if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
17462           VT != MVT::v4f32 && VT != MVT::v4i32)
17463         return false;
17464     } else {
17465       return false;
17466     }
17467   }
17468
17469   if (VT == MVT::ppcf128)
17470     return false;
17471
17472   if (Fast)
17473     *Fast = 1;
17474
17475   return true;
17476 }
17477
17478 bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
17479                                                SDValue C) const {
17480   // Check integral scalar types.
17481   if (!VT.isScalarInteger())
17482     return false;
17483   if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
17484     if (!ConstNode->getAPIntValue().isSignedIntN(64))
17485       return false;
17486     // This transformation will generate >= 2 operations. But the following
17487     // cases will generate <= 2 instructions during ISEL. So exclude them.
17488     // 1. If the constant multiplier fits 16 bits, it can be handled by one
17489     // HW instruction, ie. MULLI
17490     // 2. If the multiplier after shifted fits 16 bits, an extra shift
17491     // instruction is needed than case 1, ie. MULLI and RLDICR
17492     int64_t Imm = ConstNode->getSExtValue();
17493     unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
17494     Imm >>= Shift;
17495     if (isInt<16>(Imm))
17496       return false;
17497     uint64_t UImm = static_cast<uint64_t>(Imm);
17498     if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
17499         isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
17500       return true;
17501   }
17502   return false;
17503 }
17504
17505 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
17506                                                    EVT VT) const {
17507   return isFMAFasterThanFMulAndFAdd(
17508       MF.getFunction(), VT.getTypeForEVT(MF.getFunction().getContext()));
17509 }
17510
17511 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
17512                                                    Type *Ty) const {
17513   if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
17514     return false;
17515   switch (Ty->getScalarType()->getTypeID()) {
17516   case Type::FloatTyID:
17517   case Type::DoubleTyID:
17518     return true;
17519   case Type::FP128TyID:
17520     return Subtarget.hasP9Vector();
17521   default:
17522     return false;
17523   }
17524 }
17525
17526 // FIXME: add more patterns which are not profitable to hoist.
17527 bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
17528   if (!I->hasOneUse())
17529     return true;
17530
17531   Instruction *User = I->user_back();
17532   assert(User && "A single use instruction with no uses.");
17533
17534   switch (I->getOpcode()) {
17535   case Instruction::FMul: {
17536     // Don't break FMA, PowerPC prefers FMA.
17537     if (User->getOpcode() != Instruction::FSub &&
17538         User->getOpcode() != Instruction::FAdd)
17539       return true;
17540
17541     const TargetOptions &Options = getTargetMachine().Options;
17542     const Function *F = I->getFunction();
17543     const DataLayout &DL = F->getDataLayout();
17544     Type *Ty = User->getOperand(0)->getType();
17545
17546     return !(
17547         isFMAFasterThanFMulAndFAdd(*F, Ty) &&
17548         isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
17549         (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
17550   }
17551   case Instruction::Load: {
17552     // Don't break "store (load float*)" pattern, this pattern will be combined
17553     // to "store (load int32)" in later InstCombine pass. See function
17554     // combineLoadToOperationType. On PowerPC, loading a float point takes more
17555     // cycles than loading a 32 bit integer.
17556     LoadInst *LI = cast<LoadInst>(I);
17557     // For the loads that combineLoadToOperationType does nothing, like
17558     // ordered load, it should be profitable to hoist them.
17559     // For swifterror load, it can only be used for pointer to pointer type, so
17560     // later type check should get rid of this case.
17561     if (!LI->isUnordered())
17562       return true;
17563
17564     if (User->getOpcode() != Instruction::Store)
17565       return true;
17566
17567     if (I->getType()->getTypeID() != Type::FloatTyID)
17568       return true;
17569
17570     return false;
17571   }
17572   default:
17573     return true;
17574   }
17575   return true;
17576 }
17577
17578 const MCPhysReg *
17579 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
17580   // LR is a callee-save register, but we must treat it as clobbered by any call
17581   // site. Hence we include LR in the scratch registers, which are in turn added
17582   // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
17583   // to CTR, which is used by any indirect call.
17584   static const MCPhysReg ScratchRegs[] = {
17585     PPC::X12, PPC::LR8, PPC::CTR8, 0
17586   };
17587
17588   return ScratchRegs;
17589 }
17590
17591 Register PPCTargetLowering::getExceptionPointerRegister(
17592     const Constant *PersonalityFn) const {
17593   return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
17594 }
17595
17596 Register PPCTargetLowering::getExceptionSelectorRegister(
17597     const Constant *PersonalityFn) const {
17598   return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
17599 }
17600
17601 bool
17602 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
17603                      EVT VT , unsigned DefinedValues) const {
17604   if (VT == MVT::v2i64)
17605     return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
17606
17607   if (Subtarget.hasVSX())
17608     return true;
17609
17610   return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
17611 }
17612
17613 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
17614   if (DisableILPPref || Subtarget.enableMachineScheduler())
17615     return TargetLowering::getSchedulingPreference(N);
17616
17617   return Sched::ILP;
17618 }
17619
17620 // Create a fast isel object.
17621 FastISel *
17622 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
17623                                   const TargetLibraryInfo *LibInfo) const {
17624   return PPC::createFastISel(FuncInfo, LibInfo);
17625 }
17626
17627 // 'Inverted' means the FMA opcode after negating one multiplicand.
17628 // For example, (fma -a b c) = (fnmsub a b c)
17629 static unsigned invertFMAOpcode(unsigned Opc) {
17630   switch (Opc) {
17631   default:
17632     llvm_unreachable("Invalid FMA opcode for PowerPC!");
17633   case ISD::FMA:
17634     return PPCISD::FNMSUB;
17635   case PPCISD::FNMSUB:
17636     return ISD::FMA;
17637   }
17638 }
17639
17640 SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
17641                                                 bool LegalOps, bool OptForSize,
17642                                                 NegatibleCost &Cost,
17643                                                 unsigned Depth) const {
17644   if (Depth > SelectionDAG::MaxRecursionDepth)
17645     return SDValue();
17646
17647   unsigned Opc = Op.getOpcode();
17648   EVT VT = Op.getValueType();
17649   SDNodeFlags Flags = Op.getNode()->getFlags();
17650
17651   switch (Opc) {
17652   case PPCISD::FNMSUB:
17653     if (!Op.hasOneUse() || !isTypeLegal(VT))
17654       break;
17655
17656     const TargetOptions &Options = getTargetMachine().Options;
17657     SDValue N0 = Op.getOperand(0);
17658     SDValue N1 = Op.getOperand(1);
17659     SDValue N2 = Op.getOperand(2);
17660     SDLoc Loc(Op);
17661
17662     NegatibleCost N2Cost = NegatibleCost::Expensive;
17663     SDValue NegN2 =
17664         getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
17665
17666     if (!NegN2)
17667       return SDValue();
17668
17669     // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
17670     // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
17671     // These transformations may change sign of zeroes. For example,
17672     // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
17673     if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
17674       // Try and choose the cheaper one to negate.
17675       NegatibleCost N0Cost = NegatibleCost::Expensive;
17676       SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
17677                                            N0Cost, Depth + 1);
17678
17679       NegatibleCost N1Cost = NegatibleCost::Expensive;
17680       SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
17681                                            N1Cost, Depth + 1);
17682
17683       if (NegN0 && N0Cost <= N1Cost) {
17684         Cost = std::min(N0Cost, N2Cost);
17685         return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
17686       } else if (NegN1) {
17687         Cost = std::min(N1Cost, N2Cost);
17688         return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
17689       }
17690     }
17691
17692     // (fneg (fnmsub a b c)) => (fma a b (fneg c))
17693     if (isOperationLegal(ISD::FMA, VT)) {
17694       Cost = N2Cost;
17695       return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
17696     }
17697
17698     break;
17699   }
17700
17701   return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
17702                                               Cost, Depth);
17703 }
17704
17705 // Override to enable LOAD_STACK_GUARD lowering on Linux.
17706 bool PPCTargetLowering::useLoadStackGuardNode() const {
17707   if (!Subtarget.isTargetLinux())
17708     return TargetLowering::useLoadStackGuardNode();
17709   return true;
17710 }
17711
17712 // Override to disable global variable loading on Linux and insert AIX canary
17713 // word declaration.
17714 void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
17715   if (Subtarget.isAIXABI()) {
17716     M.getOrInsertGlobal(AIXSSPCanaryWordName,
17717                         PointerType::getUnqual(M.getContext()));
17718     return;
17719   }
17720   if (!Subtarget.isTargetLinux())
17721     return TargetLowering::insertSSPDeclarations(M);
17722 }
17723
17724 Value *PPCTargetLowering::getSDagStackGuard(const Module &M) const {
17725   if (Subtarget.isAIXABI())
17726     return M.getGlobalVariable(AIXSSPCanaryWordName);
17727   return TargetLowering::getSDagStackGuard(M);
17728 }
17729
17730 bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
17731                                      bool ForCodeSize) const {
17732   if (!VT.isSimple() || !Subtarget.hasVSX())
17733     return false;
17734
17735   switch(VT.getSimpleVT().SimpleTy) {
17736   default:
17737     // For FP types that are currently not supported by PPC backend, return
17738     // false. Examples: f16, f80.
17739     return false;
17740   case MVT::f32:
17741   case MVT::f64: {
17742     if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
17743       // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
17744       return true;
17745     }
17746     bool IsExact;
17747     APSInt IntResult(16, false);
17748     // The rounding mode doesn't really matter because we only care about floats
17749     // that can be converted to integers exactly.
17750     Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
17751     // For exact values in the range [-16, 15] we can materialize the float.
17752     if (IsExact && IntResult <= 15 && IntResult >= -16)
17753       return true;
17754     return Imm.isZero();
17755   }
17756   case MVT::ppcf128:
17757     return Imm.isPosZero();
17758   }
17759 }
17760
17761 // For vector shift operation op, fold
17762 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
17763 static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
17764                                   SelectionDAG &DAG) {
17765   SDValue N0 = N->getOperand(0);
17766   SDValue N1 = N->getOperand(1);
17767   EVT VT = N0.getValueType();
17768   unsigned OpSizeInBits = VT.getScalarSizeInBits();
17769   unsigned Opcode = N->getOpcode();
17770   unsigned TargetOpcode;
17771
17772   switch (Opcode) {
17773   default:
17774     llvm_unreachable("Unexpected shift operation");
17775   case ISD::SHL:
17776     TargetOpcode = PPCISD::SHL;
17777     break;
17778   case ISD::SRL:
17779     TargetOpcode = PPCISD::SRL;
17780     break;
17781   case ISD::SRA:
17782     TargetOpcode = PPCISD::SRA;
17783     break;
17784   }
17785
17786   if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
17787       N1->getOpcode() == ISD::AND)
17788     if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
17789       if (Mask->getZExtValue() == OpSizeInBits - 1)
17790         return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
17791
17792   return SDValue();
17793 }
17794
17795 SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
17796   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17797     return Value;
17798
17799   SDValue N0 = N->getOperand(0);
17800   ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17801   if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
17802       N0.getOpcode() != ISD::SIGN_EXTEND ||
17803       N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
17804       N->getValueType(0) != MVT::i64)
17805     return SDValue();
17806
17807   // We can't save an operation here if the value is already extended, and
17808   // the existing shift is easier to combine.
17809   SDValue ExtsSrc = N0.getOperand(0);
17810   if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
17811       ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
17812     return SDValue();
17813
17814   SDLoc DL(N0);
17815   SDValue ShiftBy = SDValue(CN1, 0);
17816   // We want the shift amount to be i32 on the extswli, but the shift could
17817   // have an i64.
17818   if (ShiftBy.getValueType() == MVT::i64)
17819     ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
17820
17821   return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
17822                          ShiftBy);
17823 }
17824
17825 SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
17826   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17827     return Value;
17828
17829   return SDValue();
17830 }
17831
17832 SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
17833   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17834     return Value;
17835
17836   return SDValue();
17837 }
17838
17839 // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
17840 // Transform (add X, (zext(sete  Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
17841 // When C is zero, the equation (addi Z, -C) can be simplified to Z
17842 // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
17843 static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
17844                                  const PPCSubtarget &Subtarget) {
17845   if (!Subtarget.isPPC64())
17846     return SDValue();
17847
17848   SDValue LHS = N->getOperand(0);
17849   SDValue RHS = N->getOperand(1);
17850
17851   auto isZextOfCompareWithConstant = [](SDValue Op) {
17852     if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
17853         Op.getValueType() != MVT::i64)
17854       return false;
17855
17856     SDValue Cmp = Op.getOperand(0);
17857     if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
17858         Cmp.getOperand(0).getValueType() != MVT::i64)
17859       return false;
17860
17861     if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
17862       int64_t NegConstant = 0 - Constant->getSExtValue();
17863       // Due to the limitations of the addi instruction,
17864       // -C is required to be [-32768, 32767].
17865       return isInt<16>(NegConstant);
17866     }
17867
17868     return false;
17869   };
17870
17871   bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
17872   bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
17873
17874   // If there is a pattern, canonicalize a zext operand to the RHS.
17875   if (LHSHasPattern && !RHSHasPattern)
17876     std::swap(LHS, RHS);
17877   else if (!LHSHasPattern && !RHSHasPattern)
17878     return SDValue();
17879
17880   SDLoc DL(N);
17881   SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
17882   SDValue Cmp = RHS.getOperand(0);
17883   SDValue Z = Cmp.getOperand(0);
17884   auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
17885   int64_t NegConstant = 0 - Constant->getSExtValue();
17886
17887   switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
17888   default: break;
17889   case ISD::SETNE: {
17890     //                                 when C == 0
17891     //                             --> addze X, (addic Z, -1).carry
17892     //                            /
17893     // add X, (zext(setne Z, C))--
17894     //                            \    when -32768 <= -C <= 32767 && C != 0
17895     //                             --> addze X, (addic (addi Z, -C), -1).carry
17896     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17897                               DAG.getConstant(NegConstant, DL, MVT::i64));
17898     SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17899     SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17900                                AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
17901     return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17902                        SDValue(Addc.getNode(), 1));
17903     }
17904   case ISD::SETEQ: {
17905     //                                 when C == 0
17906     //                             --> addze X, (subfic Z, 0).carry
17907     //                            /
17908     // add X, (zext(sete  Z, C))--
17909     //                            \    when -32768 <= -C <= 32767 && C != 0
17910     //                             --> addze X, (subfic (addi Z, -C), 0).carry
17911     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17912                               DAG.getConstant(NegConstant, DL, MVT::i64));
17913     SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17914     SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17915                                DAG.getConstant(0, DL, MVT::i64), AddOrZ);
17916     return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17917                        SDValue(Subc.getNode(), 1));
17918     }
17919   }
17920
17921   return SDValue();
17922 }
17923
17924 // Transform
17925 // (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
17926 // (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
17927 // In this case both C1 and C2 must be known constants.
17928 // C1+C2 must fit into a 34 bit signed integer.
17929 static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
17930                                           const PPCSubtarget &Subtarget) {
17931   if (!Subtarget.isUsingPCRelativeCalls())
17932     return SDValue();
17933
17934   // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
17935   // If we find that node try to cast the Global Address and the Constant.
17936   SDValue LHS = N->getOperand(0);
17937   SDValue RHS = N->getOperand(1);
17938
17939   if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17940     std::swap(LHS, RHS);
17941
17942   if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17943     return SDValue();
17944
17945   // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
17946   GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
17947   ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
17948
17949   // Check that both casts succeeded.
17950   if (!GSDN || !ConstNode)
17951     return SDValue();
17952
17953   int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
17954   SDLoc DL(GSDN);
17955
17956   // The signed int offset needs to fit in 34 bits.
17957   if (!isInt<34>(NewOffset))
17958     return SDValue();
17959
17960   // The new global address is a copy of the old global address except
17961   // that it has the updated Offset.
17962   SDValue GA =
17963       DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
17964                                  NewOffset, GSDN->getTargetFlags());
17965   SDValue MatPCRel =
17966       DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
17967   return MatPCRel;
17968 }
17969
17970 SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
17971   if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
17972     return Value;
17973
17974   if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
17975     return Value;
17976
17977   return SDValue();
17978 }
17979
17980 // Detect TRUNCATE operations on bitcasts of float128 values.
17981 // What we are looking for here is the situtation where we extract a subset
17982 // of bits from a 128 bit float.
17983 // This can be of two forms:
17984 // 1) BITCAST of f128 feeding TRUNCATE
17985 // 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
17986 // The reason this is required is because we do not have a legal i128 type
17987 // and so we want to prevent having to store the f128 and then reload part
17988 // of it.
17989 SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
17990                                            DAGCombinerInfo &DCI) const {
17991   // If we are using CRBits then try that first.
17992   if (Subtarget.useCRBits()) {
17993     // Check if CRBits did anything and return that if it did.
17994     if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
17995       return CRTruncValue;
17996   }
17997
17998   SDLoc dl(N);
17999   SDValue Op0 = N->getOperand(0);
18000
18001   // Looking for a truncate of i128 to i64.
18002   if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
18003     return SDValue();
18004
18005   int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
18006
18007   // SRL feeding TRUNCATE.
18008   if (Op0.getOpcode() == ISD::SRL) {
18009     ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
18010     // The right shift has to be by 64 bits.
18011     if (!ConstNode || ConstNode->getZExtValue() != 64)
18012       return SDValue();
18013
18014     // Switch the element number to extract.
18015     EltToExtract = EltToExtract ? 0 : 1;
18016     // Update Op0 past the SRL.
18017     Op0 = Op0.getOperand(0);
18018   }
18019
18020   // BITCAST feeding a TRUNCATE possibly via SRL.
18021   if (Op0.getOpcode() == ISD::BITCAST &&
18022       Op0.getValueType() == MVT::i128 &&
18023       Op0.getOperand(0).getValueType() == MVT::f128) {
18024     SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
18025     return DCI.DAG.getNode(
18026         ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
18027         DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
18028   }
18029   return SDValue();
18030 }
18031
18032 SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
18033   SelectionDAG &DAG = DCI.DAG;
18034
18035   ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
18036   if (!ConstOpOrElement)
18037     return SDValue();
18038
18039   // An imul is usually smaller than the alternative sequence for legal type.
18040   if (DAG.getMachineFunction().getFunction().hasMinSize() &&
18041       isOperationLegal(ISD::MUL, N->getValueType(0)))
18042     return SDValue();
18043
18044   auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
18045     switch (this->Subtarget.getCPUDirective()) {
18046     default:
18047       // TODO: enhance the condition for subtarget before pwr8
18048       return false;
18049     case PPC::DIR_PWR8:
18050       //  type        mul     add    shl
18051       // scalar        4       1      1
18052       // vector        7       2      2
18053       return true;
18054     case PPC::DIR_PWR9:
18055     case PPC::DIR_PWR10:
18056     case PPC::DIR_PWR11:
18057     case PPC::DIR_PWR_FUTURE:
18058       //  type        mul     add    shl
18059       // scalar        5       2      2
18060       // vector        7       2      2
18061
18062       // The cycle RATIO of related operations are showed as a table above.
18063       // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
18064       // scalar and vector type. For 2 instrs patterns, add/sub + shl
18065       // are 4, it is always profitable; but for 3 instrs patterns
18066       // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
18067       // So we should only do it for vector type.
18068       return IsAddOne && IsNeg ? VT.isVector() : true;
18069     }
18070   };
18071
18072   EVT VT = N->getValueType(0);
18073   SDLoc DL(N);
18074
18075   const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
18076   bool IsNeg = MulAmt.isNegative();
18077   APInt MulAmtAbs = MulAmt.abs();
18078
18079   if ((MulAmtAbs - 1).isPowerOf2()) {
18080     // (mul x, 2^N + 1) => (add (shl x, N), x)
18081     // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
18082
18083     if (!IsProfitable(IsNeg, true, VT))
18084       return SDValue();
18085
18086     SDValue Op0 = N->getOperand(0);
18087     SDValue Op1 =
18088         DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18089                     DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
18090     SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
18091
18092     if (!IsNeg)
18093       return Res;
18094
18095     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
18096   } else if ((MulAmtAbs + 1).isPowerOf2()) {
18097     // (mul x, 2^N - 1) => (sub (shl x, N), x)
18098     // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
18099
18100     if (!IsProfitable(IsNeg, false, VT))
18101       return SDValue();
18102
18103     SDValue Op0 = N->getOperand(0);
18104     SDValue Op1 =
18105         DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18106                     DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
18107
18108     if (!IsNeg)
18109       return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
18110     else
18111       return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
18112
18113   } else {
18114     return SDValue();
18115   }
18116 }
18117
18118 // Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
18119 // in combiner since we need to check SD flags and other subtarget features.
18120 SDValue PPCTargetLowering::combineFMALike(SDNode *N,
18121                                           DAGCombinerInfo &DCI) const {
18122   SDValue N0 = N->getOperand(0);
18123   SDValue N1 = N->getOperand(1);
18124   SDValue N2 = N->getOperand(2);
18125   SDNodeFlags Flags = N->getFlags();
18126   EVT VT = N->getValueType(0);
18127   SelectionDAG &DAG = DCI.DAG;
18128   const TargetOptions &Options = getTargetMachine().Options;
18129   unsigned Opc = N->getOpcode();
18130   bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
18131   bool LegalOps = !DCI.isBeforeLegalizeOps();
18132   SDLoc Loc(N);
18133
18134   if (!isOperationLegal(ISD::FMA, VT))
18135     return SDValue();
18136
18137   // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
18138   // since (fnmsub a b c)=-0 while c-ab=+0.
18139   if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
18140     return SDValue();
18141
18142   // (fma (fneg a) b c) => (fnmsub a b c)
18143   // (fnmsub (fneg a) b c) => (fma a b c)
18144   if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
18145     return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
18146
18147   // (fma a (fneg b) c) => (fnmsub a b c)
18148   // (fnmsub a (fneg b) c) => (fma a b c)
18149   if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
18150     return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
18151
18152   return SDValue();
18153 }
18154
18155 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
18156   // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
18157   if (!Subtarget.is64BitELFABI())
18158     return false;
18159
18160   // If not a tail call then no need to proceed.
18161   if (!CI->isTailCall())
18162     return false;
18163
18164   // If sibling calls have been disabled and tail-calls aren't guaranteed
18165   // there is no reason to duplicate.
18166   auto &TM = getTargetMachine();
18167   if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
18168     return false;
18169
18170   // Can't tail call a function called indirectly, or if it has variadic args.
18171   const Function *Callee = CI->getCalledFunction();
18172   if (!Callee || Callee->isVarArg())
18173     return false;
18174
18175   // Make sure the callee and caller calling conventions are eligible for tco.
18176   const Function *Caller = CI->getParent()->getParent();
18177   if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
18178                                            CI->getCallingConv()))
18179       return false;
18180
18181   // If the function is local then we have a good chance at tail-calling it
18182   return getTargetMachine().shouldAssumeDSOLocal(Callee);
18183 }
18184
18185 bool PPCTargetLowering::
18186 isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
18187   const Value *Mask = AndI.getOperand(1);
18188   // If the mask is suitable for andi. or andis. we should sink the and.
18189   if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
18190     // Can't handle constants wider than 64-bits.
18191     if (CI->getBitWidth() > 64)
18192       return false;
18193     int64_t ConstVal = CI->getZExtValue();
18194     return isUInt<16>(ConstVal) ||
18195       (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
18196   }
18197
18198   // For non-constant masks, we can always use the record-form and.
18199   return true;
18200 }
18201
18202 /// getAddrModeForFlags - Based on the set of address flags, select the most
18203 /// optimal instruction format to match by.
18204 PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
18205   // This is not a node we should be handling here.
18206   if (Flags == PPC::MOF_None)
18207     return PPC::AM_None;
18208   // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
18209   for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
18210     if ((Flags & FlagSet) == FlagSet)
18211       return PPC::AM_DForm;
18212   for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
18213     if ((Flags & FlagSet) == FlagSet)
18214       return PPC::AM_DSForm;
18215   for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
18216     if ((Flags & FlagSet) == FlagSet)
18217       return PPC::AM_DQForm;
18218   for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
18219     if ((Flags & FlagSet) == FlagSet)
18220       return PPC::AM_PrefixDForm;
18221   // If no other forms are selected, return an X-Form as it is the most
18222   // general addressing mode.
18223   return PPC::AM_XForm;
18224 }
18225
18226 /// Set alignment flags based on whether or not the Frame Index is aligned.
18227 /// Utilized when computing flags for address computation when selecting
18228 /// load and store instructions.
18229 static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
18230                                SelectionDAG &DAG) {
18231   bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
18232   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
18233   if (!FI)
18234     return;
18235   const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
18236   unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
18237   // If this is (add $FI, $S16Imm), the alignment flags are already set
18238   // based on the immediate. We just need to clear the alignment flags
18239   // if the FI alignment is weaker.
18240   if ((FrameIndexAlign % 4) != 0)
18241     FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
18242   if ((FrameIndexAlign % 16) != 0)
18243     FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
18244   // If the address is a plain FrameIndex, set alignment flags based on
18245   // FI alignment.
18246   if (!IsAdd) {
18247     if ((FrameIndexAlign % 4) == 0)
18248       FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18249     if ((FrameIndexAlign % 16) == 0)
18250       FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18251   }
18252 }
18253
18254 /// Given a node, compute flags that are used for address computation when
18255 /// selecting load and store instructions. The flags computed are stored in
18256 /// FlagSet. This function takes into account whether the node is a constant,
18257 /// an ADD, OR, or a constant, and computes the address flags accordingly.
18258 static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
18259                                               SelectionDAG &DAG) {
18260   // Set the alignment flags for the node depending on if the node is
18261   // 4-byte or 16-byte aligned.
18262   auto SetAlignFlagsForImm = [&](uint64_t Imm) {
18263     if ((Imm & 0x3) == 0)
18264       FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18265     if ((Imm & 0xf) == 0)
18266       FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18267   };
18268
18269   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
18270     // All 32-bit constants can be computed as LIS + Disp.
18271     const APInt &ConstImm = CN->getAPIntValue();
18272     if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
18273       FlagSet |= PPC::MOF_AddrIsSImm32;
18274       SetAlignFlagsForImm(ConstImm.getZExtValue());
18275       setAlignFlagsForFI(N, FlagSet, DAG);
18276     }
18277     if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
18278       FlagSet |= PPC::MOF_RPlusSImm34;
18279     else // Let constant materialization handle large constants.
18280       FlagSet |= PPC::MOF_NotAddNorCst;
18281   } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
18282     // This address can be represented as an addition of:
18283     // - Register + Imm16 (possibly a multiple of 4/16)
18284     // - Register + Imm34
18285     // - Register + PPCISD::Lo
18286     // - Register + Register
18287     // In any case, we won't have to match this as Base + Zero.
18288     SDValue RHS = N.getOperand(1);
18289     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(RHS)) {
18290       const APInt &ConstImm = CN->getAPIntValue();
18291       if (ConstImm.isSignedIntN(16)) {
18292         FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
18293         SetAlignFlagsForImm(ConstImm.getZExtValue());
18294         setAlignFlagsForFI(N, FlagSet, DAG);
18295       }
18296       if (ConstImm.isSignedIntN(34))
18297         FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
18298       else
18299         FlagSet |= PPC::MOF_RPlusR; // Register.
18300     } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
18301       FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
18302     else
18303       FlagSet |= PPC::MOF_RPlusR;
18304   } else { // The address computation is not a constant or an addition.
18305     setAlignFlagsForFI(N, FlagSet, DAG);
18306     FlagSet |= PPC::MOF_NotAddNorCst;
18307   }
18308 }
18309
18310 static bool isPCRelNode(SDValue N) {
18311   return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
18312       isValidPCRelNode<ConstantPoolSDNode>(N) ||
18313       isValidPCRelNode<GlobalAddressSDNode>(N) ||
18314       isValidPCRelNode<JumpTableSDNode>(N) ||
18315       isValidPCRelNode<BlockAddressSDNode>(N));
18316 }
18317
18318 /// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
18319 /// the address flags of the load/store instruction that is to be matched.
18320 unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
18321                                            SelectionDAG &DAG) const {
18322   unsigned FlagSet = PPC::MOF_None;
18323
18324   // Compute subtarget flags.
18325   if (!Subtarget.hasP9Vector())
18326     FlagSet |= PPC::MOF_SubtargetBeforeP9;
18327   else
18328     FlagSet |= PPC::MOF_SubtargetP9;
18329
18330   if (Subtarget.hasPrefixInstrs())
18331     FlagSet |= PPC::MOF_SubtargetP10;
18332
18333   if (Subtarget.hasSPE())
18334     FlagSet |= PPC::MOF_SubtargetSPE;
18335
18336   // Check if we have a PCRel node and return early.
18337   if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
18338     return FlagSet;
18339
18340   // If the node is the paired load/store intrinsics, compute flags for
18341   // address computation and return early.
18342   unsigned ParentOp = Parent->getOpcode();
18343   if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
18344                                (ParentOp == ISD::INTRINSIC_VOID))) {
18345     unsigned ID = Parent->getConstantOperandVal(1);
18346     if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
18347       SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
18348                              ? Parent->getOperand(2)
18349                              : Parent->getOperand(3);
18350       computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
18351       FlagSet |= PPC::MOF_Vector;
18352       return FlagSet;
18353     }
18354   }
18355
18356   // Mark this as something we don't want to handle here if it is atomic
18357   // or pre-increment instruction.
18358   if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
18359     if (LSB->isIndexed())
18360       return PPC::MOF_None;
18361
18362   // Compute in-memory type flags. This is based on if there are scalars,
18363   // floats or vectors.
18364   const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
18365   assert(MN && "Parent should be a MemSDNode!");
18366   EVT MemVT = MN->getMemoryVT();
18367   unsigned Size = MemVT.getSizeInBits();
18368   if (MemVT.isScalarInteger()) {
18369     assert(Size <= 128 &&
18370            "Not expecting scalar integers larger than 16 bytes!");
18371     if (Size < 32)
18372       FlagSet |= PPC::MOF_SubWordInt;
18373     else if (Size == 32)
18374       FlagSet |= PPC::MOF_WordInt;
18375     else
18376       FlagSet |= PPC::MOF_DoubleWordInt;
18377   } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
18378     if (Size == 128)
18379       FlagSet |= PPC::MOF_Vector;
18380     else if (Size == 256) {
18381       assert(Subtarget.pairedVectorMemops() &&
18382              "256-bit vectors are only available when paired vector memops is "
18383              "enabled!");
18384       FlagSet |= PPC::MOF_Vector;
18385     } else
18386       llvm_unreachable("Not expecting illegal vectors!");
18387   } else { // Floating point type: can be scalar, f128 or vector types.
18388     if (Size == 32 || Size == 64)
18389       FlagSet |= PPC::MOF_ScalarFloat;
18390     else if (MemVT == MVT::f128 || MemVT.isVector())
18391       FlagSet |= PPC::MOF_Vector;
18392     else
18393       llvm_unreachable("Not expecting illegal scalar floats!");
18394   }
18395
18396   // Compute flags for address computation.
18397   computeFlagsForAddressComputation(N, FlagSet, DAG);
18398
18399   // Compute type extension flags.
18400   if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
18401     switch (LN->getExtensionType()) {
18402     case ISD::SEXTLOAD:
18403       FlagSet |= PPC::MOF_SExt;
18404       break;
18405     case ISD::EXTLOAD:
18406     case ISD::ZEXTLOAD:
18407       FlagSet |= PPC::MOF_ZExt;
18408       break;
18409     case ISD::NON_EXTLOAD:
18410       FlagSet |= PPC::MOF_NoExt;
18411       break;
18412     }
18413   } else
18414     FlagSet |= PPC::MOF_NoExt;
18415
18416   // For integers, no extension is the same as zero extension.
18417   // We set the extension mode to zero extension so we don't have
18418   // to add separate entries in AddrModesMap for loads and stores.
18419   if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
18420     FlagSet |= PPC::MOF_ZExt;
18421     FlagSet &= ~PPC::MOF_NoExt;
18422   }
18423
18424   // If we don't have prefixed instructions, 34-bit constants should be
18425   // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
18426   bool IsNonP1034BitConst =
18427       ((PPC::MOF_RPlusSImm34 | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubtargetP10) &
18428        FlagSet) == PPC::MOF_RPlusSImm34;
18429   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
18430       IsNonP1034BitConst)
18431     FlagSet |= PPC::MOF_NotAddNorCst;
18432
18433   return FlagSet;
18434 }
18435
18436 /// SelectForceXFormMode - Given the specified address, force it to be
18437 /// represented as an indexed [r+r] operation (an XForm instruction).
18438 PPC::AddrMode PPCTargetLowering::SelectForceXFormMode(SDValue N, SDValue &Disp,
18439                                                       SDValue &Base,
18440                                                       SelectionDAG &DAG) const {
18441
18442   PPC::AddrMode Mode = PPC::AM_XForm;
18443   int16_t ForceXFormImm = 0;
18444   if (provablyDisjointOr(DAG, N) &&
18445       !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
18446     Disp = N.getOperand(0);
18447     Base = N.getOperand(1);
18448     return Mode;
18449   }
18450
18451   // If the address is the result of an add, we will utilize the fact that the
18452   // address calculation includes an implicit add.  However, we can reduce
18453   // register pressure if we do not materialize a constant just for use as the
18454   // index register.  We only get rid of the add if it is not an add of a
18455   // value and a 16-bit signed constant and both have a single use.
18456   if (N.getOpcode() == ISD::ADD &&
18457       (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
18458        !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
18459     Disp = N.getOperand(0);
18460     Base = N.getOperand(1);
18461     return Mode;
18462   }
18463
18464   // Otherwise, use R0 as the base register.
18465   Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18466                          N.getValueType());
18467   Base = N;
18468
18469   return Mode;
18470 }
18471
18472 bool PPCTargetLowering::splitValueIntoRegisterParts(
18473     SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
18474     unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
18475   EVT ValVT = Val.getValueType();
18476   // If we are splitting a scalar integer into f64 parts (i.e. so they
18477   // can be placed into VFRC registers), we need to zero extend and
18478   // bitcast the values. This will ensure the value is placed into a
18479   // VSR using direct moves or stack operations as needed.
18480   if (PartVT == MVT::f64 &&
18481       (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
18482     Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
18483     Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
18484     Parts[0] = Val;
18485     return true;
18486   }
18487   return false;
18488 }
18489
18490 SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
18491                                           SelectionDAG &DAG) const {
18492   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18493   TargetLowering::CallLoweringInfo CLI(DAG);
18494   EVT RetVT = Op.getValueType();
18495   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
18496   SDValue Callee =
18497       DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
18498   bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, false);
18499   TargetLowering::ArgListTy Args;
18500   TargetLowering::ArgListEntry Entry;
18501   for (const SDValue &N : Op->op_values()) {
18502     EVT ArgVT = N.getValueType();
18503     Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
18504     Entry.Node = N;
18505     Entry.Ty = ArgTy;
18506     Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, SignExtend);
18507     Entry.IsZExt = !Entry.IsSExt;
18508     Args.push_back(Entry);
18509   }
18510
18511   SDValue InChain = DAG.getEntryNode();
18512   SDValue TCChain = InChain;
18513   const Function &F = DAG.getMachineFunction().getFunction();
18514   bool isTailCall =
18515       TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
18516       (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
18517   if (isTailCall)
18518     InChain = TCChain;
18519   CLI.setDebugLoc(SDLoc(Op))
18520       .setChain(InChain)
18521       .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
18522       .setTailCall(isTailCall)
18523       .setSExtResult(SignExtend)
18524       .setZExtResult(!SignExtend)
18525       .setIsPostTypeLegalization(true);
18526   return TLI.LowerCallTo(CLI).first;
18527 }
18528
18529 SDValue PPCTargetLowering::lowerLibCallBasedOnType(
18530     const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
18531     SelectionDAG &DAG) const {
18532   if (Op.getValueType() == MVT::f32)
18533     return lowerToLibCall(LibCallFloatName, Op, DAG);
18534
18535   if (Op.getValueType() == MVT::f64)
18536     return lowerToLibCall(LibCallDoubleName, Op, DAG);
18537
18538   return SDValue();
18539 }
18540
18541 bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
18542   SDNodeFlags Flags = Op.getNode()->getFlags();
18543   return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
18544          Flags.hasNoNaNs() && Flags.hasNoInfs();
18545 }
18546
18547 bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
18548   return Op.getNode()->getFlags().hasApproximateFuncs();
18549 }
18550
18551 bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
18552   return getTargetMachine().Options.PPCGenScalarMASSEntries;
18553 }
18554
18555 SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
18556                                             const char *LibCallFloatName,
18557                                             const char *LibCallDoubleNameFinite,
18558                                             const char *LibCallFloatNameFinite,
18559                                             SDValue Op,
18560                                             SelectionDAG &DAG) const {
18561   if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
18562     return SDValue();
18563
18564   if (!isLowringToMASSFiniteSafe(Op))
18565     return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
18566                                    DAG);
18567
18568   return lowerLibCallBasedOnType(LibCallFloatNameFinite,
18569                                  LibCallDoubleNameFinite, Op, DAG);
18570 }
18571
18572 SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
18573   return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
18574                           "__xl_powf_finite", Op, DAG);
18575 }
18576
18577 SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
18578   return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
18579                           "__xl_sinf_finite", Op, DAG);
18580 }
18581
18582 SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
18583   return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
18584                           "__xl_cosf_finite", Op, DAG);
18585 }
18586
18587 SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
18588   return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
18589                           "__xl_logf_finite", Op, DAG);
18590 }
18591
18592 SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
18593   return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
18594                           "__xl_log10f_finite", Op, DAG);
18595 }
18596
18597 SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
18598   return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
18599                           "__xl_expf_finite", Op, DAG);
18600 }
18601
18602 // If we happen to match to an aligned D-Form, check if the Frame Index is
18603 // adequately aligned. If it is not, reset the mode to match to X-Form.
18604 static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
18605                                    PPC::AddrMode &Mode) {
18606   if (!isa<FrameIndexSDNode>(N))
18607     return;
18608   if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
18609       (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
18610     Mode = PPC::AM_XForm;
18611 }
18612
18613 /// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
18614 /// compute the address flags of the node, get the optimal address mode based
18615 /// on the flags, and set the Base and Disp based on the address mode.
18616 PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent,
18617                                                        SDValue N, SDValue &Disp,
18618                                                        SDValue &Base,
18619                                                        SelectionDAG &DAG,
18620                                                        MaybeAlign Align) const {
18621   SDLoc DL(Parent);
18622
18623   // Compute the address flags.
18624   unsigned Flags = computeMOFlags(Parent, N, DAG);
18625
18626   // Get the optimal address mode based on the Flags.
18627   PPC::AddrMode Mode = getAddrModeForFlags(Flags);
18628
18629   // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
18630   // Select an X-Form load if it is not.
18631   setXFormForUnalignedFI(N, Flags, Mode);
18632
18633   // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
18634   if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
18635     assert(Subtarget.isUsingPCRelativeCalls() &&
18636            "Must be using PC-Relative calls when a valid PC-Relative node is "
18637            "present!");
18638     Mode = PPC::AM_PCRel;
18639   }
18640
18641   // Set Base and Disp accordingly depending on the address mode.
18642   switch (Mode) {
18643   case PPC::AM_DForm:
18644   case PPC::AM_DSForm:
18645   case PPC::AM_DQForm: {
18646     // This is a register plus a 16-bit immediate. The base will be the
18647     // register and the displacement will be the immediate unless it
18648     // isn't sufficiently aligned.
18649     if (Flags & PPC::MOF_RPlusSImm16) {
18650       SDValue Op0 = N.getOperand(0);
18651       SDValue Op1 = N.getOperand(1);
18652       int16_t Imm = Op1->getAsZExtVal();
18653       if (!Align || isAligned(*Align, Imm)) {
18654         Disp = DAG.getTargetConstant(Imm, DL, N.getValueType());
18655         Base = Op0;
18656         if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0)) {
18657           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18658           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18659         }
18660         break;
18661       }
18662     }
18663     // This is a register plus the @lo relocation. The base is the register
18664     // and the displacement is the global address.
18665     else if (Flags & PPC::MOF_RPlusLo) {
18666       Disp = N.getOperand(1).getOperand(0); // The global address.
18667       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
18668              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
18669              Disp.getOpcode() == ISD::TargetConstantPool ||
18670              Disp.getOpcode() == ISD::TargetJumpTable);
18671       Base = N.getOperand(0);
18672       break;
18673     }
18674     // This is a constant address at most 32 bits. The base will be
18675     // zero or load-immediate-shifted and the displacement will be
18676     // the low 16 bits of the address.
18677     else if (Flags & PPC::MOF_AddrIsSImm32) {
18678       auto *CN = cast<ConstantSDNode>(N);
18679       EVT CNType = CN->getValueType(0);
18680       uint64_t CNImm = CN->getZExtValue();
18681       // If this address fits entirely in a 16-bit sext immediate field, codegen
18682       // this as "d, 0".
18683       int16_t Imm;
18684       if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
18685         Disp = DAG.getTargetConstant(Imm, DL, CNType);
18686         Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18687                                CNType);
18688         break;
18689       }
18690       // Handle 32-bit sext immediate with LIS + Addr mode.
18691       if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
18692           (!Align || isAligned(*Align, CNImm))) {
18693         int32_t Addr = (int32_t)CNImm;
18694         // Otherwise, break this down into LIS + Disp.
18695         Disp = DAG.getTargetConstant((int16_t)Addr, DL, MVT::i32);
18696         Base =
18697             DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32);
18698         uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
18699         Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
18700         break;
18701       }
18702     }
18703     // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
18704     Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
18705     if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
18706       Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18707       fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18708     } else
18709       Base = N;
18710     break;
18711   }
18712   case PPC::AM_PrefixDForm: {
18713     int64_t Imm34 = 0;
18714     unsigned Opcode = N.getOpcode();
18715     if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
18716         (isIntS34Immediate(N.getOperand(1), Imm34))) {
18717       // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
18718       Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18719       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
18720         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18721       else
18722         Base = N.getOperand(0);
18723     } else if (isIntS34Immediate(N, Imm34)) {
18724       // The address is a 34-bit signed immediate.
18725       Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18726       Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
18727     }
18728     break;
18729   }
18730   case PPC::AM_PCRel: {
18731     // When selecting PC-Relative instructions, "Base" is not utilized as
18732     // we select the address as [PC+imm].
18733     Disp = N;
18734     break;
18735   }
18736   case PPC::AM_None:
18737     break;
18738   default: { // By default, X-Form is always available to be selected.
18739     // When a frame index is not aligned, we also match by XForm.
18740     FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
18741     Base = FI ? N : N.getOperand(1);
18742     Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18743                                 N.getValueType())
18744               : N.getOperand(0);
18745     break;
18746   }
18747   }
18748   return Mode;
18749 }
18750
18751 CCAssignFn *PPCTargetLowering::ccAssignFnForCall(CallingConv::ID CC,
18752                                                  bool Return,
18753                                                  bool IsVarArg) const {
18754   switch (CC) {
18755   case CallingConv::Cold:
18756     return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
18757   default:
18758     return CC_PPC64_ELF;
18759   }
18760 }
18761
18762 bool PPCTargetLowering::shouldInlineQuadwordAtomics() const {
18763   return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
18764 }
18765
18766 TargetLowering::AtomicExpansionKind
18767 PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
18768   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
18769   if (shouldInlineQuadwordAtomics() && Size == 128)
18770     return AtomicExpansionKind::MaskedIntrinsic;
18771
18772   switch (AI->getOperation()) {
18773   case AtomicRMWInst::UIncWrap:
18774   case AtomicRMWInst::UDecWrap:
18775     return AtomicExpansionKind::CmpXChg;
18776   default:
18777     return TargetLowering::shouldExpandAtomicRMWInIR(AI);
18778   }
18779
18780   llvm_unreachable("unreachable atomicrmw operation");
18781 }
18782
18783 TargetLowering::AtomicExpansionKind
18784 PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
18785   unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
18786   if (shouldInlineQuadwordAtomics() && Size == 128)
18787     return AtomicExpansionKind::MaskedIntrinsic;
18788   return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI);
18789 }
18790
18791 static Intrinsic::ID
18792 getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp) {
18793   switch (BinOp) {
18794   default:
18795     llvm_unreachable("Unexpected AtomicRMW BinOp");
18796   case AtomicRMWInst::Xchg:
18797     return Intrinsic::ppc_atomicrmw_xchg_i128;
18798   case AtomicRMWInst::Add:
18799     return Intrinsic::ppc_atomicrmw_add_i128;
18800   case AtomicRMWInst::Sub:
18801     return Intrinsic::ppc_atomicrmw_sub_i128;
18802   case AtomicRMWInst::And:
18803     return Intrinsic::ppc_atomicrmw_and_i128;
18804   case AtomicRMWInst::Or:
18805     return Intrinsic::ppc_atomicrmw_or_i128;
18806   case AtomicRMWInst::Xor:
18807     return Intrinsic::ppc_atomicrmw_xor_i128;
18808   case AtomicRMWInst::Nand:
18809     return Intrinsic::ppc_atomicrmw_nand_i128;
18810   }
18811 }
18812
18813 Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
18814     IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
18815     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
18816   assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18817   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18818   Type *ValTy = Incr->getType();
18819   assert(ValTy->getPrimitiveSizeInBits() == 128);
18820   Function *RMW = Intrinsic::getDeclaration(
18821       M, getIntrinsicForAtomicRMWBinOp128(AI->getOperation()));
18822   Type *Int64Ty = Type::getInt64Ty(M->getContext());
18823   Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
18824   Value *IncrHi =
18825       Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
18826   Value *LoHi = Builder.CreateCall(RMW, {AlignedAddr, IncrLo, IncrHi});
18827   Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18828   Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18829   Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18830   Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18831   return Builder.CreateOr(
18832       Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18833 }
18834
18835 Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
18836     IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
18837     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
18838   assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18839   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18840   Type *ValTy = CmpVal->getType();
18841   assert(ValTy->getPrimitiveSizeInBits() == 128);
18842   Function *IntCmpXchg =
18843       Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
18844   Type *Int64Ty = Type::getInt64Ty(M->getContext());
18845   Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
18846   Value *CmpHi =
18847       Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
18848   Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
18849   Value *NewHi =
18850       Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
18851   emitLeadingFence(Builder, CI, Ord);
18852   Value *LoHi =
18853       Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
18854   emitTrailingFence(Builder, CI, Ord);
18855   Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18856   Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18857   Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18858   Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18859   return Builder.CreateOr(
18860       Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18861 }