clang/lib/CodeGen/TargetInfo.cpp

   1 //===---- TargetInfo.cpp - Encapsulate target details -----------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // These classes wrap the information about a call or function
  10 // definition used to handle ABI compliancy.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "TargetInfo.h"
  15 #include "ABIInfo.h"
  16 #include "CGBlocks.h"
  17 #include "CGCXXABI.h"
  18 #include "CGValue.h"
  19 #include "CodeGenFunction.h"
  20 #include "clang/AST/Attr.h"
  21 #include "clang/AST/RecordLayout.h"
  22 #include "clang/Basic/Builtins.h"
  23 #include "clang/Basic/CodeGenOptions.h"
  24 #include "clang/Basic/DiagnosticFrontend.h"
  25 #include "clang/CodeGen/CGFunctionInfo.h"
  26 #include "llvm/ADT/SmallBitVector.h"
  27 #include "llvm/ADT/StringExtras.h"
  28 #include "llvm/ADT/StringSwitch.h"
  29 #include "llvm/ADT/Twine.h"
  30 #include "llvm/IR/DataLayout.h"
  31 #include "llvm/IR/IntrinsicsNVPTX.h"
  32 #include "llvm/IR/IntrinsicsS390.h"
  33 #include "llvm/IR/Type.h"
  34 #include "llvm/Support/MathExtras.h"
  35 #include "llvm/Support/raw_ostream.h"
  36 #include "llvm/TargetParser/RISCVTargetParser.h"
  37 #include "llvm/TargetParser/Triple.h"
  38 #include <algorithm>
  39
  40 using namespace clang;
  41 using namespace CodeGen;
  42
  43 // Helper for coercing an aggregate argument or return value into an integer
  44 // array of the same size (including padding) and alignment.  This alternate
  45 // coercion happens only for the RenderScript ABI and can be removed after
  46 // runtimes that rely on it are no longer supported.
  47 //
  48 // RenderScript assumes that the size of the argument / return value in the IR
  49 // is the same as the size of the corresponding qualified type. This helper
  50 // coerces the aggregate type into an array of the same size (including
  51 // padding).  This coercion is used in lieu of expansion of struct members or
  52 // other canonical coercions that return a coerced-type of larger size.
  53 //
  54 // Ty          - The argument / return value type
  55 // Context     - The associated ASTContext
  56 // LLVMContext - The associated LLVMContext
  57 static ABIArgInfo coerceToIntArray(QualType Ty,
  58                                    ASTContext &Context,
  59                                    llvm::LLVMContext &LLVMContext) {
  60   // Alignment and Size are measured in bits.
  61   const uint64_t Size = Context.getTypeSize(Ty);
  62   const uint64_t Alignment = Context.getTypeAlign(Ty);
  63   llvm::Type *IntType = llvm::Type::getIntNTy(LLVMContext, Alignment);
  64   const uint64_t NumElements = (Size + Alignment - 1) / Alignment;
  65   return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements));
  66 }
  67
  68 static void AssignToArrayRange(CodeGen::CGBuilderTy &Builder,
  69                                llvm::Value *Array,
  70                                llvm::Value *Value,
  71                                unsigned FirstIndex,
  72                                unsigned LastIndex) {
  73   // Alternatively, we could emit this as a loop in the source.
  74   for (unsigned I = FirstIndex; I <= LastIndex; ++I) {
  75     llvm::Value *Cell =
  76         Builder.CreateConstInBoundsGEP1_32(Builder.getInt8Ty(), Array, I);
  77     Builder.CreateAlignedStore(Value, Cell, CharUnits::One());
  78   }
  79 }
  80
  81 static bool isAggregateTypeForABI(QualType T) {
  82   return !CodeGenFunction::hasScalarEvaluationKind(T) ||
  83          T->isMemberFunctionPointerType();
  84 }
  85
  86 ABIArgInfo ABIInfo::getNaturalAlignIndirect(QualType Ty, bool ByVal,
  87                                             bool Realign,
  88                                             llvm::Type *Padding) const {
  89   return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty), ByVal,
  90                                  Realign, Padding);
  91 }
  92
  93 ABIArgInfo
  94 ABIInfo::getNaturalAlignIndirectInReg(QualType Ty, bool Realign) const {
  95   return ABIArgInfo::getIndirectInReg(getContext().getTypeAlignInChars(Ty),
  96                                       /*ByVal*/ false, Realign);
  97 }
  98
  99 Address ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
 100                              QualType Ty) const {
 101   return Address::invalid();
 102 }
 103
 104 static llvm::Type *getVAListElementType(CodeGenFunction &CGF) {
 105   return CGF.ConvertTypeForMem(
 106       CGF.getContext().getBuiltinVaListType()->getPointeeType());
 107 }
 108
 109 bool ABIInfo::isPromotableIntegerTypeForABI(QualType Ty) const {
 110   if (getContext().isPromotableIntegerType(Ty))
 111     return true;
 112
 113   if (const auto *EIT = Ty->getAs<BitIntType>())
 114     if (EIT->getNumBits() < getContext().getTypeSize(getContext().IntTy))
 115       return true;
 116
 117   return false;
 118 }
 119
 120 ABIInfo::~ABIInfo() = default;
 121
 122 SwiftABIInfo::~SwiftABIInfo() = default;
 123
 124 /// Does the given lowering require more than the given number of
 125 /// registers when expanded?
 126 ///
 127 /// This is intended to be the basis of a reasonable basic implementation
 128 /// of should{Pass,Return}Indirectly.
 129 ///
 130 /// For most targets, a limit of four total registers is reasonable; this
 131 /// limits the amount of code required in order to move around the value
 132 /// in case it wasn't produced immediately prior to the call by the caller
 133 /// (or wasn't produced in exactly the right registers) or isn't used
 134 /// immediately within the callee.  But some targets may need to further
 135 /// limit the register count due to an inability to support that many
 136 /// return registers.
 137 bool SwiftABIInfo::occupiesMoreThan(ArrayRef<llvm::Type *> scalarTypes,
 138                                     unsigned maxAllRegisters) const {
 139   unsigned intCount = 0, fpCount = 0;
 140   for (llvm::Type *type : scalarTypes) {
 141     if (type->isPointerTy()) {
 142       intCount++;
 143     } else if (auto intTy = dyn_cast<llvm::IntegerType>(type)) {
 144       auto ptrWidth = CGT.getTarget().getPointerWidth(LangAS::Default);
 145       intCount += (intTy->getBitWidth() + ptrWidth - 1) / ptrWidth;
 146     } else {
 147       assert(type->isVectorTy() || type->isFloatingPointTy());
 148       fpCount++;
 149     }
 150   }
 151
 152   return (intCount + fpCount > maxAllRegisters);
 153 }
 154
 155 bool SwiftABIInfo::shouldPassIndirectly(ArrayRef<llvm::Type *> ComponentTys,
 156                                         bool AsReturnValue) const {
 157   return occupiesMoreThan(ComponentTys, /*total=*/4);
 158 }
 159
 160 bool SwiftABIInfo::isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy,
 161                                      unsigned NumElts) const {
 162   // The default implementation of this assumes that the target guarantees
 163   // 128-bit SIMD support but nothing more.
 164   return (VectorSize.getQuantity() > 8 && VectorSize.getQuantity() <= 16);
 165 }
 166
 167 static CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT,
 168                                               CGCXXABI &CXXABI) {
 169   const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(RT->getDecl());
 170   if (!RD) {
 171     if (!RT->getDecl()->canPassInRegisters())
 172       return CGCXXABI::RAA_Indirect;
 173     return CGCXXABI::RAA_Default;
 174   }
 175   return CXXABI.getRecordArgABI(RD);
 176 }
 177
 178 static CGCXXABI::RecordArgABI getRecordArgABI(QualType T,
 179                                               CGCXXABI &CXXABI) {
 180   const RecordType *RT = T->getAs<RecordType>();
 181   if (!RT)
 182     return CGCXXABI::RAA_Default;
 183   return getRecordArgABI(RT, CXXABI);
 184 }
 185
 186 static bool classifyReturnType(const CGCXXABI &CXXABI, CGFunctionInfo &FI,
 187                                const ABIInfo &Info) {
 188   QualType Ty = FI.getReturnType();
 189
 190   if (const auto *RT = Ty->getAs<RecordType>())
 191     if (!isa<CXXRecordDecl>(RT->getDecl()) &&
 192         !RT->getDecl()->canPassInRegisters()) {
 193       FI.getReturnInfo() = Info.getNaturalAlignIndirect(Ty);
 194       return true;
 195     }
 196
 197   return CXXABI.classifyReturnType(FI);
 198 }
 199
 200 /// Pass transparent unions as if they were the type of the first element. Sema
 201 /// should ensure that all elements of the union have the same "machine type".
 202 static QualType useFirstFieldIfTransparentUnion(QualType Ty) {
 203   if (const RecordType *UT = Ty->getAsUnionType()) {
 204     const RecordDecl *UD = UT->getDecl();
 205     if (UD->hasAttr<TransparentUnionAttr>()) {
 206       assert(!UD->field_empty() && "sema created an empty transparent union");
 207       return UD->field_begin()->getType();
 208     }
 209   }
 210   return Ty;
 211 }
 212
 213 CGCXXABI &ABIInfo::getCXXABI() const {
 214   return CGT.getCXXABI();
 215 }
 216
 217 ASTContext &ABIInfo::getContext() const {
 218   return CGT.getContext();
 219 }
 220
 221 llvm::LLVMContext &ABIInfo::getVMContext() const {
 222   return CGT.getLLVMContext();
 223 }
 224
 225 const llvm::DataLayout &ABIInfo::getDataLayout() const {
 226   return CGT.getDataLayout();
 227 }
 228
 229 const TargetInfo &ABIInfo::getTarget() const {
 230   return CGT.getTarget();
 231 }
 232
 233 const CodeGenOptions &ABIInfo::getCodeGenOpts() const {
 234   return CGT.getCodeGenOpts();
 235 }
 236
 237 bool ABIInfo::isAndroid() const { return getTarget().getTriple().isAndroid(); }
 238
 239 bool ABIInfo::isOHOSFamily() const {
 240   return getTarget().getTriple().isOHOSFamily();
 241 }
 242
 243 bool ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
 244   return false;
 245 }
 246
 247 bool ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
 248                                                 uint64_t Members) const {
 249   return false;
 250 }
 251
 252 bool ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate() const {
 253   // For compatibility with GCC, ignore empty bitfields in C++ mode.
 254   return getContext().getLangOpts().CPlusPlus;
 255 }
 256
 257 LLVM_DUMP_METHOD void ABIArgInfo::dump() const {
 258   raw_ostream &OS = llvm::errs();
 259   OS << "(ABIArgInfo Kind=";
 260   switch (TheKind) {
 261   case Direct:
 262     OS << "Direct Type=";
 263     if (llvm::Type *Ty = getCoerceToType())
 264       Ty->print(OS);
 265     else
 266       OS << "null";
 267     break;
 268   case Extend:
 269     OS << "Extend";
 270     break;
 271   case Ignore:
 272     OS << "Ignore";
 273     break;
 274   case InAlloca:
 275     OS << "InAlloca Offset=" << getInAllocaFieldIndex();
 276     break;
 277   case Indirect:
 278     OS << "Indirect Align=" << getIndirectAlign().getQuantity()
 279        << " ByVal=" << getIndirectByVal()
 280        << " Realign=" << getIndirectRealign();
 281     break;
 282   case IndirectAliased:
 283     OS << "Indirect Align=" << getIndirectAlign().getQuantity()
 284        << " AadrSpace=" << getIndirectAddrSpace()
 285        << " Realign=" << getIndirectRealign();
 286     break;
 287   case Expand:
 288     OS << "Expand";
 289     break;
 290   case CoerceAndExpand:
 291     OS << "CoerceAndExpand Type=";
 292     getCoerceAndExpandType()->print(OS);
 293     break;
 294   }
 295   OS << ")\n";
 296 }
 297
 298 // Dynamically round a pointer up to a multiple of the given alignment.
 299 static llvm::Value *emitRoundPointerUpToAlignment(CodeGenFunction &CGF,
 300                                                   llvm::Value *Ptr,
 301                                                   CharUnits Align) {
 302   // OverflowArgArea = (OverflowArgArea + Align - 1) & -Align;
 303   llvm::Value *RoundUp = CGF.Builder.CreateConstInBoundsGEP1_32(
 304       CGF.Builder.getInt8Ty(), Ptr, Align.getQuantity() - 1);
 305   return CGF.Builder.CreateIntrinsic(
 306       llvm::Intrinsic::ptrmask, {CGF.AllocaInt8PtrTy, CGF.IntPtrTy},
 307       {RoundUp, llvm::ConstantInt::get(CGF.IntPtrTy, -Align.getQuantity())},
 308       nullptr, Ptr->getName() + ".aligned");
 309 }
 310
 311 /// Emit va_arg for a platform using the common void* representation,
 312 /// where arguments are simply emitted in an array of slots on the stack.
 313 ///
 314 /// This version implements the core direct-value passing rules.
 315 ///
 316 /// \param SlotSize - The size and alignment of a stack slot.
 317 ///   Each argument will be allocated to a multiple of this number of
 318 ///   slots, and all the slots will be aligned to this value.
 319 /// \param AllowHigherAlign - The slot alignment is not a cap;
 320 ///   an argument type with an alignment greater than the slot size
 321 ///   will be emitted on a higher-alignment address, potentially
 322 ///   leaving one or more empty slots behind as padding.  If this
 323 ///   is false, the returned address might be less-aligned than
 324 ///   DirectAlign.
 325 /// \param ForceRightAdjust - Default is false. On big-endian platform and
 326 ///   if the argument is smaller than a slot, set this flag will force
 327 ///   right-adjust the argument in its slot irrespective of the type.
 328 static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF,
 329                                       Address VAListAddr,
 330                                       llvm::Type *DirectTy,
 331                                       CharUnits DirectSize,
 332                                       CharUnits DirectAlign,
 333                                       CharUnits SlotSize,
 334                                       bool AllowHigherAlign,
 335                                       bool ForceRightAdjust = false) {
 336   // Cast the element type to i8* if necessary.  Some platforms define
 337   // va_list as a struct containing an i8* instead of just an i8*.
 338   if (VAListAddr.getElementType() != CGF.Int8PtrTy)
 339     VAListAddr = CGF.Builder.CreateElementBitCast(VAListAddr, CGF.Int8PtrTy);
 340
 341   llvm::Value *Ptr = CGF.Builder.CreateLoad(VAListAddr, "argp.cur");
 342
 343   // If the CC aligns values higher than the slot size, do so if needed.
 344   Address Addr = Address::invalid();
 345   if (AllowHigherAlign && DirectAlign > SlotSize) {
 346     Addr = Address(emitRoundPointerUpToAlignment(CGF, Ptr, DirectAlign),
 347                    CGF.Int8Ty, DirectAlign);
 348   } else {
 349     Addr = Address(Ptr, CGF.Int8Ty, SlotSize);
 350   }
 351
 352   // Advance the pointer past the argument, then store that back.
 353   CharUnits FullDirectSize = DirectSize.alignTo(SlotSize);
 354   Address NextPtr =
 355       CGF.Builder.CreateConstInBoundsByteGEP(Addr, FullDirectSize, "argp.next");
 356   CGF.Builder.CreateStore(NextPtr.getPointer(), VAListAddr);
 357
 358   // If the argument is smaller than a slot, and this is a big-endian
 359   // target, the argument will be right-adjusted in its slot.
 360   if (DirectSize < SlotSize && CGF.CGM.getDataLayout().isBigEndian() &&
 361       (!DirectTy->isStructTy() || ForceRightAdjust)) {
 362     Addr = CGF.Builder.CreateConstInBoundsByteGEP(Addr, SlotSize - DirectSize);
 363   }
 364
 365   Addr = CGF.Builder.CreateElementBitCast(Addr, DirectTy);
 366   return Addr;
 367 }
 368
 369 /// Emit va_arg for a platform using the common void* representation,
 370 /// where arguments are simply emitted in an array of slots on the stack.
 371 ///
 372 /// \param IsIndirect - Values of this type are passed indirectly.
 373 /// \param ValueInfo - The size and alignment of this type, generally
 374 ///   computed with getContext().getTypeInfoInChars(ValueTy).
 375 /// \param SlotSizeAndAlign - The size and alignment of a stack slot.
 376 ///   Each argument will be allocated to a multiple of this number of
 377 ///   slots, and all the slots will be aligned to this value.
 378 /// \param AllowHigherAlign - The slot alignment is not a cap;
 379 ///   an argument type with an alignment greater than the slot size
 380 ///   will be emitted on a higher-alignment address, potentially
 381 ///   leaving one or more empty slots behind as padding.
 382 /// \param ForceRightAdjust - Default is false. On big-endian platform and
 383 ///   if the argument is smaller than a slot, set this flag will force
 384 ///   right-adjust the argument in its slot irrespective of the type.
 385 static Address emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr,
 386                                 QualType ValueTy, bool IsIndirect,
 387                                 TypeInfoChars ValueInfo,
 388                                 CharUnits SlotSizeAndAlign,
 389                                 bool AllowHigherAlign,
 390                                 bool ForceRightAdjust = false) {
 391   // The size and alignment of the value that was passed directly.
 392   CharUnits DirectSize, DirectAlign;
 393   if (IsIndirect) {
 394     DirectSize = CGF.getPointerSize();
 395     DirectAlign = CGF.getPointerAlign();
 396   } else {
 397     DirectSize = ValueInfo.Width;
 398     DirectAlign = ValueInfo.Align;
 399   }
 400
 401   // Cast the address we've calculated to the right type.
 402   llvm::Type *DirectTy = CGF.ConvertTypeForMem(ValueTy), *ElementTy = DirectTy;
 403   if (IsIndirect) {
 404     unsigned AllocaAS = CGF.CGM.getDataLayout().getAllocaAddrSpace();
 405     DirectTy = DirectTy->getPointerTo(AllocaAS);
 406   }
 407
 408   Address Addr = emitVoidPtrDirectVAArg(CGF, VAListAddr, DirectTy, DirectSize,
 409                                         DirectAlign, SlotSizeAndAlign,
 410                                         AllowHigherAlign, ForceRightAdjust);
 411
 412   if (IsIndirect) {
 413     Addr = Address(CGF.Builder.CreateLoad(Addr), ElementTy, ValueInfo.Align);
 414   }
 415
 416   return Addr;
 417 }
 418
 419 static Address complexTempStructure(CodeGenFunction &CGF, Address VAListAddr,
 420                                     QualType Ty, CharUnits SlotSize,
 421                                     CharUnits EltSize, const ComplexType *CTy) {
 422   Address Addr =
 423       emitVoidPtrDirectVAArg(CGF, VAListAddr, CGF.Int8Ty, SlotSize * 2,
 424                              SlotSize, SlotSize, /*AllowHigher*/ true);
 425
 426   Address RealAddr = Addr;
 427   Address ImagAddr = RealAddr;
 428   if (CGF.CGM.getDataLayout().isBigEndian()) {
 429     RealAddr =
 430         CGF.Builder.CreateConstInBoundsByteGEP(RealAddr, SlotSize - EltSize);
 431     ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(ImagAddr,
 432                                                       2 * SlotSize - EltSize);
 433   } else {
 434     ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(RealAddr, SlotSize);
 435   }
 436
 437   llvm::Type *EltTy = CGF.ConvertTypeForMem(CTy->getElementType());
 438   RealAddr = CGF.Builder.CreateElementBitCast(RealAddr, EltTy);
 439   ImagAddr = CGF.Builder.CreateElementBitCast(ImagAddr, EltTy);
 440   llvm::Value *Real = CGF.Builder.CreateLoad(RealAddr, ".vareal");
 441   llvm::Value *Imag = CGF.Builder.CreateLoad(ImagAddr, ".vaimag");
 442
 443   Address Temp = CGF.CreateMemTemp(Ty, "vacplx");
 444   CGF.EmitStoreOfComplex({Real, Imag}, CGF.MakeAddrLValue(Temp, Ty),
 445                          /*init*/ true);
 446   return Temp;
 447 }
 448
 449 static Address emitMergePHI(CodeGenFunction &CGF,
 450                             Address Addr1, llvm::BasicBlock *Block1,
 451                             Address Addr2, llvm::BasicBlock *Block2,
 452                             const llvm::Twine &Name = "") {
 453   assert(Addr1.getType() == Addr2.getType());
 454   llvm::PHINode *PHI = CGF.Builder.CreatePHI(Addr1.getType(), 2, Name);
 455   PHI->addIncoming(Addr1.getPointer(), Block1);
 456   PHI->addIncoming(Addr2.getPointer(), Block2);
 457   CharUnits Align = std::min(Addr1.getAlignment(), Addr2.getAlignment());
 458   return Address(PHI, Addr1.getElementType(), Align);
 459 }
 460
 461 TargetCodeGenInfo::TargetCodeGenInfo(std::unique_ptr<ABIInfo> Info)
 462     : Info(std::move(Info)) {}
 463
 464 TargetCodeGenInfo::~TargetCodeGenInfo() = default;
 465
 466 // If someone can figure out a general rule for this, that would be great.
 467 // It's probably just doomed to be platform-dependent, though.
 468 unsigned TargetCodeGenInfo::getSizeOfUnwindException() const {
 469   // Verified for:
 470   //   x86-64     FreeBSD, Linux, Darwin
 471   //   x86-32     FreeBSD, Linux, Darwin
 472   //   PowerPC    Linux
 473   //   ARM        Darwin (*not* EABI)
 474   //   AArch64    Linux
 475   return 32;
 476 }
 477
 478 bool TargetCodeGenInfo::isNoProtoCallVariadic(const CallArgList &args,
 479                                      const FunctionNoProtoType *fnType) const {
 480   // The following conventions are known to require this to be false:
 481   //   x86_stdcall
 482   //   MIPS
 483   // For everything else, we just prefer false unless we opt out.
 484   return false;
 485 }
 486
 487 void
 488 TargetCodeGenInfo::getDependentLibraryOption(llvm::StringRef Lib,
 489                                              llvm::SmallString<24> &Opt) const {
 490   // This assumes the user is passing a library name like "rt" instead of a
 491   // filename like "librt.a/so", and that they don't care whether it's static or
 492   // dynamic.
 493   Opt = "-l";
 494   Opt += Lib;
 495 }
 496
 497 unsigned TargetCodeGenInfo::getOpenCLKernelCallingConv() const {
 498   // OpenCL kernels are called via an explicit runtime API with arguments
 499   // set with clSetKernelArg(), not as normal sub-functions.
 500   // Return SPIR_KERNEL by default as the kernel calling convention to
 501   // ensure the fingerprint is fixed such way that each OpenCL argument
 502   // gets one matching argument in the produced kernel function argument
 503   // list to enable feasible implementation of clSetKernelArg() with
 504   // aggregates etc. In case we would use the default C calling conv here,
 505   // clSetKernelArg() might break depending on the target-specific
 506   // conventions; different targets might split structs passed as values
 507   // to multiple function arguments etc.
 508   return llvm::CallingConv::SPIR_KERNEL;
 509 }
 510
 511 llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
 512     llvm::PointerType *T, QualType QT) const {
 513   return llvm::ConstantPointerNull::get(T);
 514 }
 515
 516 LangAS TargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
 517                                                    const VarDecl *D) const {
 518   assert(!CGM.getLangOpts().OpenCL &&
 519          !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) &&
 520          "Address space agnostic languages only");
 521   return D ? D->getType().getAddressSpace() : LangAS::Default;
 522 }
 523
 524 llvm::Value *TargetCodeGenInfo::performAddrSpaceCast(
 525     CodeGen::CodeGenFunction &CGF, llvm::Value *Src, LangAS SrcAddr,
 526     LangAS DestAddr, llvm::Type *DestTy, bool isNonNull) const {
 527   // Since target may map different address spaces in AST to the same address
 528   // space, an address space conversion may end up as a bitcast.
 529   if (auto *C = dyn_cast<llvm::Constant>(Src))
 530     return performAddrSpaceCast(CGF.CGM, C, SrcAddr, DestAddr, DestTy);
 531   // Try to preserve the source's name to make IR more readable.
 532   return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
 533       Src, DestTy, Src->hasName() ? Src->getName() + ".ascast" : "");
 534 }
 535
 536 llvm::Constant *
 537 TargetCodeGenInfo::performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *Src,
 538                                         LangAS SrcAddr, LangAS DestAddr,
 539                                         llvm::Type *DestTy) const {
 540   // Since target may map different address spaces in AST to the same address
 541   // space, an address space conversion may end up as a bitcast.
 542   return llvm::ConstantExpr::getPointerCast(Src, DestTy);
 543 }
 544
 545 llvm::SyncScope::ID
 546 TargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,
 547                                       SyncScope Scope,
 548                                       llvm::AtomicOrdering Ordering,
 549                                       llvm::LLVMContext &Ctx) const {
 550   return Ctx.getOrInsertSyncScopeID(""); /* default sync scope */
 551 }
 552
 553 static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays);
 554
 555 /// isEmptyField - Return true iff a the field is "empty", that is it
 556 /// is an unnamed bit-field or an (array of) empty record(s).
 557 static bool isEmptyField(ASTContext &Context, const FieldDecl *FD,
 558                          bool AllowArrays) {
 559   if (FD->isUnnamedBitfield())
 560     return true;
 561
 562   QualType FT = FD->getType();
 563
 564   // Constant arrays of empty records count as empty, strip them off.
 565   // Constant arrays of zero length always count as empty.
 566   bool WasArray = false;
 567   if (AllowArrays)
 568     while (const ConstantArrayType *AT = Context.getAsConstantArrayType(FT)) {
 569       if (AT->getSize() == 0)
 570         return true;
 571       FT = AT->getElementType();
 572       // The [[no_unique_address]] special case below does not apply to
 573       // arrays of C++ empty records, so we need to remember this fact.
 574       WasArray = true;
 575     }
 576
 577   const RecordType *RT = FT->getAs<RecordType>();
 578   if (!RT)
 579     return false;
 580
 581   // C++ record fields are never empty, at least in the Itanium ABI.
 582   //
 583   // FIXME: We should use a predicate for whether this behavior is true in the
 584   // current ABI.
 585   //
 586   // The exception to the above rule are fields marked with the
 587   // [[no_unique_address]] attribute (since C++20).  Those do count as empty
 588   // according to the Itanium ABI.  The exception applies only to records,
 589   // not arrays of records, so we must also check whether we stripped off an
 590   // array type above.
 591   if (isa<CXXRecordDecl>(RT->getDecl()) &&
 592       (WasArray || !FD->hasAttr<NoUniqueAddressAttr>()))
 593     return false;
 594
 595   return isEmptyRecord(Context, FT, AllowArrays);
 596 }
 597
 598 /// isEmptyRecord - Return true iff a structure contains only empty
 599 /// fields. Note that a structure with a flexible array member is not
 600 /// considered empty.
 601 static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays) {
 602   const RecordType *RT = T->getAs<RecordType>();
 603   if (!RT)
 604     return false;
 605   const RecordDecl *RD = RT->getDecl();
 606   if (RD->hasFlexibleArrayMember())
 607     return false;
 608
 609   // If this is a C++ record, check the bases first.
 610   if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
 611     for (const auto &I : CXXRD->bases())
 612       if (!isEmptyRecord(Context, I.getType(), true))
 613         return false;
 614
 615   for (const auto *I : RD->fields())
 616     if (!isEmptyField(Context, I, AllowArrays))
 617       return false;
 618   return true;
 619 }
 620
 621 /// isSingleElementStruct - Determine if a structure is a "single
 622 /// element struct", i.e. it has exactly one non-empty field or
 623 /// exactly one field which is itself a single element
 624 /// struct. Structures with flexible array members are never
 625 /// considered single element structs.
 626 ///
 627 /// \return The field declaration for the single non-empty field, if
 628 /// it exists.
 629 static const Type *isSingleElementStruct(QualType T, ASTContext &Context) {
 630   const RecordType *RT = T->getAs<RecordType>();
 631   if (!RT)
 632     return nullptr;
 633
 634   const RecordDecl *RD = RT->getDecl();
 635   if (RD->hasFlexibleArrayMember())
 636     return nullptr;
 637
 638   const Type *Found = nullptr;
 639
 640   // If this is a C++ record, check the bases first.
 641   if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
 642     for (const auto &I : CXXRD->bases()) {
 643       // Ignore empty records.
 644       if (isEmptyRecord(Context, I.getType(), true))
 645         continue;
 646
 647       // If we already found an element then this isn't a single-element struct.
 648       if (Found)
 649         return nullptr;
 650
 651       // If this is non-empty and not a single element struct, the composite
 652       // cannot be a single element struct.
 653       Found = isSingleElementStruct(I.getType(), Context);
 654       if (!Found)
 655         return nullptr;
 656     }
 657   }
 658
 659   // Check for single element.
 660   for (const auto *FD : RD->fields()) {
 661     QualType FT = FD->getType();
 662
 663     // Ignore empty fields.
 664     if (isEmptyField(Context, FD, true))
 665       continue;
 666
 667     // If we already found an element then this isn't a single-element
 668     // struct.
 669     if (Found)
 670       return nullptr;
 671
 672     // Treat single element arrays as the element.
 673     while (const ConstantArrayType *AT = Context.getAsConstantArrayType(FT)) {
 674       if (AT->getSize().getZExtValue() != 1)
 675         break;
 676       FT = AT->getElementType();
 677     }
 678
 679     if (!isAggregateTypeForABI(FT)) {
 680       Found = FT.getTypePtr();
 681     } else {
 682       Found = isSingleElementStruct(FT, Context);
 683       if (!Found)
 684         return nullptr;
 685     }
 686   }
 687
 688   // We don't consider a struct a single-element struct if it has
 689   // padding beyond the element type.
 690   if (Found && Context.getTypeSize(Found) != Context.getTypeSize(T))
 691     return nullptr;
 692
 693   return Found;
 694 }
 695
 696 namespace {
 697 Address EmitVAArgInstr(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
 698                        const ABIArgInfo &AI) {
 699   // This default implementation defers to the llvm backend's va_arg
 700   // instruction. It can handle only passing arguments directly
 701   // (typically only handled in the backend for primitive types), or
 702   // aggregates passed indirectly by pointer (NOTE: if the "byval"
 703   // flag has ABI impact in the callee, this implementation cannot
 704   // work.)
 705
 706   // Only a few cases are covered here at the moment -- those needed
 707   // by the default abi.
 708   llvm::Value *Val;
 709
 710   if (AI.isIndirect()) {
 711     assert(!AI.getPaddingType() &&
 712            "Unexpected PaddingType seen in arginfo in generic VAArg emitter!");
 713     assert(
 714         !AI.getIndirectRealign() &&
 715         "Unexpected IndirectRealign seen in arginfo in generic VAArg emitter!");
 716
 717     auto TyInfo = CGF.getContext().getTypeInfoInChars(Ty);
 718     CharUnits TyAlignForABI = TyInfo.Align;
 719
 720     llvm::Type *ElementTy = CGF.ConvertTypeForMem(Ty);
 721     llvm::Type *BaseTy = llvm::PointerType::getUnqual(ElementTy);
 722     llvm::Value *Addr =
 723         CGF.Builder.CreateVAArg(VAListAddr.getPointer(), BaseTy);
 724     return Address(Addr, ElementTy, TyAlignForABI);
 725   } else {
 726     assert((AI.isDirect() || AI.isExtend()) &&
 727            "Unexpected ArgInfo Kind in generic VAArg emitter!");
 728
 729     assert(!AI.getInReg() &&
 730            "Unexpected InReg seen in arginfo in generic VAArg emitter!");
 731     assert(!AI.getPaddingType() &&
 732            "Unexpected PaddingType seen in arginfo in generic VAArg emitter!");
 733     assert(!AI.getDirectOffset() &&
 734            "Unexpected DirectOffset seen in arginfo in generic VAArg emitter!");
 735     assert(!AI.getCoerceToType() &&
 736            "Unexpected CoerceToType seen in arginfo in generic VAArg emitter!");
 737
 738     Address Temp = CGF.CreateMemTemp(Ty, "varet");
 739     Val = CGF.Builder.CreateVAArg(VAListAddr.getPointer(),
 740                                   CGF.ConvertTypeForMem(Ty));
 741     CGF.Builder.CreateStore(Val, Temp);
 742     return Temp;
 743   }
 744 }
 745
 746 /// DefaultABIInfo - The default implementation for ABI specific
 747 /// details. This implementation provides information which results in
 748 /// self-consistent and sensible LLVM IR generation, but does not
 749 /// conform to any particular ABI.
 750 class DefaultABIInfo : public ABIInfo {
 751 public:
 752   DefaultABIInfo(CodeGen::CodeGenTypes &CGT) : ABIInfo(CGT) {}
 753
 754   ABIArgInfo classifyReturnType(QualType RetTy) const;
 755   ABIArgInfo classifyArgumentType(QualType RetTy) const;
 756
 757   void computeInfo(CGFunctionInfo &FI) const override {
 758     if (!getCXXABI().classifyReturnType(FI))
 759       FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
 760     for (auto &I : FI.arguments())
 761       I.info = classifyArgumentType(I.type);
 762   }
 763
 764   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
 765                     QualType Ty) const override {
 766     return EmitVAArgInstr(CGF, VAListAddr, Ty, classifyArgumentType(Ty));
 767   }
 768 };
 769
 770 class DefaultTargetCodeGenInfo : public TargetCodeGenInfo {
 771 public:
 772   DefaultTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
 773       : TargetCodeGenInfo(std::make_unique<DefaultABIInfo>(CGT)) {}
 774 };
 775
 776 ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const {
 777   Ty = useFirstFieldIfTransparentUnion(Ty);
 778
 779   if (isAggregateTypeForABI(Ty)) {
 780     // Records with non-trivial destructors/copy-constructors should not be
 781     // passed by value.
 782     if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
 783       return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
 784
 785     return getNaturalAlignIndirect(Ty);
 786   }
 787
 788   // Treat an enum type as its underlying type.
 789   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
 790     Ty = EnumTy->getDecl()->getIntegerType();
 791
 792   ASTContext &Context = getContext();
 793   if (const auto *EIT = Ty->getAs<BitIntType>())
 794     if (EIT->getNumBits() >
 795         Context.getTypeSize(Context.getTargetInfo().hasInt128Type()
 796                                 ? Context.Int128Ty
 797                                 : Context.LongLongTy))
 798       return getNaturalAlignIndirect(Ty);
 799
 800   return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
 801                                             : ABIArgInfo::getDirect());
 802 }
 803
 804 ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const {
 805   if (RetTy->isVoidType())
 806     return ABIArgInfo::getIgnore();
 807
 808   if (isAggregateTypeForABI(RetTy))
 809     return getNaturalAlignIndirect(RetTy);
 810
 811   // Treat an enum type as its underlying type.
 812   if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
 813     RetTy = EnumTy->getDecl()->getIntegerType();
 814
 815   if (const auto *EIT = RetTy->getAs<BitIntType>())
 816     if (EIT->getNumBits() >
 817         getContext().getTypeSize(getContext().getTargetInfo().hasInt128Type()
 818                                      ? getContext().Int128Ty
 819                                      : getContext().LongLongTy))
 820       return getNaturalAlignIndirect(RetTy);
 821
 822   return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
 823                                                : ABIArgInfo::getDirect());
 824 }
 825
 826 //===----------------------------------------------------------------------===//
 827 // WebAssembly ABI Implementation
 828 //
 829 // This is a very simple ABI that relies a lot on DefaultABIInfo.
 830 //===----------------------------------------------------------------------===//
 831
 832 enum class WebAssemblyABIKind {
 833   MVP = 0,
 834   ExperimentalMV = 1,
 835 };
 836
 837 class WebAssemblyABIInfo final : public ABIInfo {
 838   DefaultABIInfo defaultInfo;
 839   WebAssemblyABIKind Kind;
 840
 841 public:
 842   explicit WebAssemblyABIInfo(CodeGen::CodeGenTypes &CGT,
 843                               WebAssemblyABIKind Kind)
 844       : ABIInfo(CGT), defaultInfo(CGT), Kind(Kind) {}
 845
 846 private:
 847   ABIArgInfo classifyReturnType(QualType RetTy) const;
 848   ABIArgInfo classifyArgumentType(QualType Ty) const;
 849
 850   // DefaultABIInfo's classifyReturnType and classifyArgumentType are
 851   // non-virtual, but computeInfo and EmitVAArg are virtual, so we
 852   // overload them.
 853   void computeInfo(CGFunctionInfo &FI) const override {
 854     if (!getCXXABI().classifyReturnType(FI))
 855       FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
 856     for (auto &Arg : FI.arguments())
 857       Arg.info = classifyArgumentType(Arg.type);
 858   }
 859
 860   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
 861                     QualType Ty) const override;
 862 };
 863
 864 class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo {
 865 public:
 866   explicit WebAssemblyTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
 867                                         WebAssemblyABIKind K)
 868       : TargetCodeGenInfo(std::make_unique<WebAssemblyABIInfo>(CGT, K)) {
 869     SwiftInfo =
 870         std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/false);
 871   }
 872
 873   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
 874                            CodeGen::CodeGenModule &CGM) const override {
 875     TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
 876     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(D)) {
 877       if (const auto *Attr = FD->getAttr<WebAssemblyImportModuleAttr>()) {
 878         llvm::Function *Fn = cast<llvm::Function>(GV);
 879         llvm::AttrBuilder B(GV->getContext());
 880         B.addAttribute("wasm-import-module", Attr->getImportModule());
 881         Fn->addFnAttrs(B);
 882       }
 883       if (const auto *Attr = FD->getAttr<WebAssemblyImportNameAttr>()) {
 884         llvm::Function *Fn = cast<llvm::Function>(GV);
 885         llvm::AttrBuilder B(GV->getContext());
 886         B.addAttribute("wasm-import-name", Attr->getImportName());
 887         Fn->addFnAttrs(B);
 888       }
 889       if (const auto *Attr = FD->getAttr<WebAssemblyExportNameAttr>()) {
 890         llvm::Function *Fn = cast<llvm::Function>(GV);
 891         llvm::AttrBuilder B(GV->getContext());
 892         B.addAttribute("wasm-export-name", Attr->getExportName());
 893         Fn->addFnAttrs(B);
 894       }
 895     }
 896
 897     if (auto *FD = dyn_cast_or_null<FunctionDecl>(D)) {
 898       llvm::Function *Fn = cast<llvm::Function>(GV);
 899       if (!FD->doesThisDeclarationHaveABody() && !FD->hasPrototype())
 900         Fn->addFnAttr("no-prototype");
 901     }
 902   }
 903
 904   /// Return the WebAssembly externref reference type.
 905   virtual llvm::Type *getWasmExternrefReferenceType() const override {
 906     return llvm::Type::getWasm_ExternrefTy(getABIInfo().getVMContext());
 907   }
 908   /// Return the WebAssembly funcref reference type.
 909   virtual llvm::Type *getWasmFuncrefReferenceType() const override {
 910     return llvm::Type::getWasm_FuncrefTy(getABIInfo().getVMContext());
 911   }
 912 };
 913
 914 /// Classify argument of given type \p Ty.
 915 ABIArgInfo WebAssemblyABIInfo::classifyArgumentType(QualType Ty) const {
 916   Ty = useFirstFieldIfTransparentUnion(Ty);
 917
 918   if (isAggregateTypeForABI(Ty)) {
 919     // Records with non-trivial destructors/copy-constructors should not be
 920     // passed by value.
 921     if (auto RAA = getRecordArgABI(Ty, getCXXABI()))
 922       return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
 923     // Ignore empty structs/unions.
 924     if (isEmptyRecord(getContext(), Ty, true))
 925       return ABIArgInfo::getIgnore();
 926     // Lower single-element structs to just pass a regular value. TODO: We
 927     // could do reasonable-size multiple-element structs too, using getExpand(),
 928     // though watch out for things like bitfields.
 929     if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
 930       return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
 931     // For the experimental multivalue ABI, fully expand all other aggregates
 932     if (Kind == WebAssemblyABIKind::ExperimentalMV) {
 933       const RecordType *RT = Ty->getAs<RecordType>();
 934       assert(RT);
 935       bool HasBitField = false;
 936       for (auto *Field : RT->getDecl()->fields()) {
 937         if (Field->isBitField()) {
 938           HasBitField = true;
 939           break;
 940         }
 941       }
 942       if (!HasBitField)
 943         return ABIArgInfo::getExpand();
 944     }
 945   }
 946
 947   // Otherwise just do the default thing.
 948   return defaultInfo.classifyArgumentType(Ty);
 949 }
 950
 951 ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const {
 952   if (isAggregateTypeForABI(RetTy)) {
 953     // Records with non-trivial destructors/copy-constructors should not be
 954     // returned by value.
 955     if (!getRecordArgABI(RetTy, getCXXABI())) {
 956       // Ignore empty structs/unions.
 957       if (isEmptyRecord(getContext(), RetTy, true))
 958         return ABIArgInfo::getIgnore();
 959       // Lower single-element structs to just return a regular value. TODO: We
 960       // could do reasonable-size multiple-element structs too, using
 961       // ABIArgInfo::getDirect().
 962       if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext()))
 963         return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
 964       // For the experimental multivalue ABI, return all other aggregates
 965       if (Kind == WebAssemblyABIKind::ExperimentalMV)
 966         return ABIArgInfo::getDirect();
 967     }
 968   }
 969
 970   // Otherwise just do the default thing.
 971   return defaultInfo.classifyReturnType(RetTy);
 972 }
 973
 974 Address WebAssemblyABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
 975                                       QualType Ty) const {
 976   bool IsIndirect = isAggregateTypeForABI(Ty) &&
 977                     !isEmptyRecord(getContext(), Ty, true) &&
 978                     !isSingleElementStruct(Ty, getContext());
 979   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
 980                           getContext().getTypeInfoInChars(Ty),
 981                           CharUnits::fromQuantity(4),
 982                           /*AllowHigherAlign=*/true);
 983 }
 984
 985 //===----------------------------------------------------------------------===//
 986 // le32/PNaCl bitcode ABI Implementation
 987 //
 988 // This is a simplified version of the x86_32 ABI.  Arguments and return values
 989 // are always passed on the stack.
 990 //===----------------------------------------------------------------------===//
 991
 992 class PNaClABIInfo : public ABIInfo {
 993  public:
 994   PNaClABIInfo(CodeGen::CodeGenTypes &CGT) : ABIInfo(CGT) {}
 995
 996   ABIArgInfo classifyReturnType(QualType RetTy) const;
 997   ABIArgInfo classifyArgumentType(QualType RetTy) const;
 998
 999   void computeInfo(CGFunctionInfo &FI) const override;
1000   Address EmitVAArg(CodeGenFunction &CGF,
1001                     Address VAListAddr, QualType Ty) const override;
1002 };
1003
1004 class PNaClTargetCodeGenInfo : public TargetCodeGenInfo {
1005  public:
1006    PNaClTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
1007        : TargetCodeGenInfo(std::make_unique<PNaClABIInfo>(CGT)) {}
1008 };
1009
1010 void PNaClABIInfo::computeInfo(CGFunctionInfo &FI) const {
1011   if (!getCXXABI().classifyReturnType(FI))
1012     FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
1013
1014   for (auto &I : FI.arguments())
1015     I.info = classifyArgumentType(I.type);
1016 }
1017
1018 Address PNaClABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
1019                                 QualType Ty) const {
1020   // The PNaCL ABI is a bit odd, in that varargs don't use normal
1021   // function classification. Structs get passed directly for varargs
1022   // functions, through a rewriting transform in
1023   // pnacl-llvm/lib/Transforms/NaCl/ExpandVarArgs.cpp, which allows
1024   // this target to actually support a va_arg instructions with an
1025   // aggregate type, unlike other targets.
1026   return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect());
1027 }
1028
1029 /// Classify argument of given type \p Ty.
1030 ABIArgInfo PNaClABIInfo::classifyArgumentType(QualType Ty) const {
1031   if (isAggregateTypeForABI(Ty)) {
1032     if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
1033       return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
1034     return getNaturalAlignIndirect(Ty);
1035   } else if (const EnumType *EnumTy = Ty->getAs<EnumType>()) {
1036     // Treat an enum type as its underlying type.
1037     Ty = EnumTy->getDecl()->getIntegerType();
1038   } else if (Ty->isFloatingType()) {
1039     // Floating-point types don't go inreg.
1040     return ABIArgInfo::getDirect();
1041   } else if (const auto *EIT = Ty->getAs<BitIntType>()) {
1042     // Treat bit-precise integers as integers if <= 64, otherwise pass
1043     // indirectly.
1044     if (EIT->getNumBits() > 64)
1045       return getNaturalAlignIndirect(Ty);
1046     return ABIArgInfo::getDirect();
1047   }
1048
1049   return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
1050                                             : ABIArgInfo::getDirect());
1051 }
1052
1053 ABIArgInfo PNaClABIInfo::classifyReturnType(QualType RetTy) const {
1054   if (RetTy->isVoidType())
1055     return ABIArgInfo::getIgnore();
1056
1057   // In the PNaCl ABI we always return records/structures on the stack.
1058   if (isAggregateTypeForABI(RetTy))
1059     return getNaturalAlignIndirect(RetTy);
1060
1061   // Treat bit-precise integers as integers if <= 64, otherwise pass indirectly.
1062   if (const auto *EIT = RetTy->getAs<BitIntType>()) {
1063     if (EIT->getNumBits() > 64)
1064       return getNaturalAlignIndirect(RetTy);
1065     return ABIArgInfo::getDirect();
1066   }
1067
1068   // Treat an enum type as its underlying type.
1069   if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
1070     RetTy = EnumTy->getDecl()->getIntegerType();
1071
1072   return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
1073                                                : ABIArgInfo::getDirect());
1074 }
1075
1076 /// IsX86_MMXType - Return true if this is an MMX type.
1077 bool IsX86_MMXType(llvm::Type *IRType) {
1078   // Return true if the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>.
1079   return IRType->isVectorTy() && IRType->getPrimitiveSizeInBits() == 64 &&
1080     cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy() &&
1081     IRType->getScalarSizeInBits() != 64;
1082 }
1083
1084 static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
1085                                           StringRef Constraint,
1086                                           llvm::Type* Ty) {
1087   bool IsMMXCons = llvm::StringSwitch<bool>(Constraint)
1088                      .Cases("y", "&y", "^Ym", true)
1089                      .Default(false);
1090   if (IsMMXCons && Ty->isVectorTy()) {
1091     if (cast<llvm::VectorType>(Ty)->getPrimitiveSizeInBits().getFixedValue() !=
1092         64) {
1093       // Invalid MMX constraint
1094       return nullptr;
1095     }
1096
1097     return llvm::Type::getX86_MMXTy(CGF.getLLVMContext());
1098   }
1099
1100   // No operation needed
1101   return Ty;
1102 }
1103
1104 /// Returns true if this type can be passed in SSE registers with the
1105 /// X86_VectorCall calling convention. Shared between x86_32 and x86_64.
1106 static bool isX86VectorTypeForVectorCall(ASTContext &Context, QualType Ty) {
1107   if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
1108     if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half) {
1109       if (BT->getKind() == BuiltinType::LongDouble) {
1110         if (&Context.getTargetInfo().getLongDoubleFormat() ==
1111             &llvm::APFloat::x87DoubleExtended())
1112           return false;
1113       }
1114       return true;
1115     }
1116   } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
1117     // vectorcall can pass XMM, YMM, and ZMM vectors. We don't pass SSE1 MMX
1118     // registers specially.
1119     unsigned VecSize = Context.getTypeSize(VT);
1120     if (VecSize == 128 || VecSize == 256 || VecSize == 512)
1121       return true;
1122   }
1123   return false;
1124 }
1125
1126 /// Returns true if this aggregate is small enough to be passed in SSE registers
1127 /// in the X86_VectorCall calling convention. Shared between x86_32 and x86_64.
1128 static bool isX86VectorCallAggregateSmallEnough(uint64_t NumMembers) {
1129   return NumMembers <= 4;
1130 }
1131
1132 /// Returns a Homogeneous Vector Aggregate ABIArgInfo, used in X86.
1133 static ABIArgInfo getDirectX86Hva(llvm::Type* T = nullptr) {
1134   auto AI = ABIArgInfo::getDirect(T);
1135   AI.setInReg(true);
1136   AI.setCanBeFlattened(false);
1137   return AI;
1138 }
1139
1140 //===----------------------------------------------------------------------===//
1141 // X86-32 ABI Implementation
1142 //===----------------------------------------------------------------------===//
1143
1144 /// Similar to llvm::CCState, but for Clang.
1145 struct CCState {
1146   CCState(CGFunctionInfo &FI)
1147       : IsPreassigned(FI.arg_size()), CC(FI.getCallingConvention()) {}
1148
1149   llvm::SmallBitVector IsPreassigned;
1150   unsigned CC = CallingConv::CC_C;
1151   unsigned FreeRegs = 0;
1152   unsigned FreeSSERegs = 0;
1153 };
1154
1155 /// X86_32ABIInfo - The X86-32 ABI information.
1156 class X86_32ABIInfo : public ABIInfo {
1157   enum Class {
1158     Integer,
1159     Float
1160   };
1161
1162   static const unsigned MinABIStackAlignInBytes = 4;
1163
1164   bool IsDarwinVectorABI;
1165   bool IsRetSmallStructInRegABI;
1166   bool IsWin32StructABI;
1167   bool IsSoftFloatABI;
1168   bool IsMCUABI;
1169   bool IsLinuxABI;
1170   unsigned DefaultNumRegisterParameters;
1171
1172   static bool isRegisterSize(unsigned Size) {
1173     return (Size == 8 || Size == 16 || Size == 32 || Size == 64);
1174   }
1175
1176   bool isHomogeneousAggregateBaseType(QualType Ty) const override {
1177     // FIXME: Assumes vectorcall is in use.
1178     return isX86VectorTypeForVectorCall(getContext(), Ty);
1179   }
1180
1181   bool isHomogeneousAggregateSmallEnough(const Type *Ty,
1182                                          uint64_t NumMembers) const override {
1183     // FIXME: Assumes vectorcall is in use.
1184     return isX86VectorCallAggregateSmallEnough(NumMembers);
1185   }
1186
1187   bool shouldReturnTypeInRegister(QualType Ty, ASTContext &Context) const;
1188
1189   /// getIndirectResult - Give a source type \arg Ty, return a suitable result
1190   /// such that the argument will be passed in memory.
1191   ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const;
1192
1193   ABIArgInfo getIndirectReturnResult(QualType Ty, CCState &State) const;
1194
1195   /// Return the alignment to use for the given type on the stack.
1196   unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const;
1197
1198   Class classify(QualType Ty) const;
1199   ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const;
1200   ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const;
1201
1202   /// Updates the number of available free registers, returns
1203   /// true if any registers were allocated.
1204   bool updateFreeRegs(QualType Ty, CCState &State) const;
1205
1206   bool shouldAggregateUseDirect(QualType Ty, CCState &State, bool &InReg,
1207                                 bool &NeedsPadding) const;
1208   bool shouldPrimitiveUseInReg(QualType Ty, CCState &State) const;
1209
1210   bool canExpandIndirectArgument(QualType Ty) const;
1211
1212   /// Rewrite the function info so that all memory arguments use
1213   /// inalloca.
1214   void rewriteWithInAlloca(CGFunctionInfo &FI) const;
1215
1216   void addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
1217                            CharUnits &StackOffset, ABIArgInfo &Info,
1218                            QualType Type) const;
1219   void runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const;
1220
1221 public:
1222
1223   void computeInfo(CGFunctionInfo &FI) const override;
1224   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
1225                     QualType Ty) const override;
1226
1227   X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
1228                 bool RetSmallStructInRegABI, bool Win32StructABI,
1229                 unsigned NumRegisterParameters, bool SoftFloatABI)
1230       : ABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI),
1231         IsRetSmallStructInRegABI(RetSmallStructInRegABI),
1232         IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI),
1233         IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()),
1234         IsLinuxABI(CGT.getTarget().getTriple().isOSLinux() ||
1235                    CGT.getTarget().getTriple().isOSCygMing()),
1236         DefaultNumRegisterParameters(NumRegisterParameters) {}
1237 };
1238
1239 class X86_32SwiftABIInfo : public SwiftABIInfo {
1240 public:
1241   explicit X86_32SwiftABIInfo(CodeGenTypes &CGT)
1242       : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/false) {}
1243
1244   bool shouldPassIndirectly(ArrayRef<llvm::Type *> ComponentTys,
1245                             bool AsReturnValue) const override {
1246     // LLVM's x86-32 lowering currently only assigns up to three
1247     // integer registers and three fp registers.  Oddly, it'll use up to
1248     // four vector registers for vectors, but those can overlap with the
1249     // scalar registers.
1250     return occupiesMoreThan(ComponentTys, /*total=*/3);
1251   }
1252 };
1253
1254 class X86_32TargetCodeGenInfo : public TargetCodeGenInfo {
1255 public:
1256   X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
1257                           bool RetSmallStructInRegABI, bool Win32StructABI,
1258                           unsigned NumRegisterParameters, bool SoftFloatABI)
1259       : TargetCodeGenInfo(std::make_unique<X86_32ABIInfo>(
1260             CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI,
1261             NumRegisterParameters, SoftFloatABI)) {
1262     SwiftInfo = std::make_unique<X86_32SwiftABIInfo>(CGT);
1263   }
1264
1265   static bool isStructReturnInRegABI(
1266       const llvm::Triple &Triple, const CodeGenOptions &Opts);
1267
1268   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
1269                            CodeGen::CodeGenModule &CGM) const override;
1270
1271   int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
1272     // Darwin uses different dwarf register numbers for EH.
1273     if (CGM.getTarget().getTriple().isOSDarwin()) return 5;
1274     return 4;
1275   }
1276
1277   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
1278                                llvm::Value *Address) const override;
1279
1280   llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
1281                                   StringRef Constraint,
1282                                   llvm::Type* Ty) const override {
1283     return X86AdjustInlineAsmType(CGF, Constraint, Ty);
1284   }
1285
1286   void addReturnRegisterOutputs(CodeGenFunction &CGF, LValue ReturnValue,
1287                                 std::string &Constraints,
1288                                 std::vector<llvm::Type *> &ResultRegTypes,
1289                                 std::vector<llvm::Type *> &ResultTruncRegTypes,
1290                                 std::vector<LValue> &ResultRegDests,
1291                                 std::string &AsmString,
1292                                 unsigned NumOutputs) const override;
1293
1294   StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
1295     return "movl\t%ebp, %ebp"
1296            "\t\t// marker for objc_retainAutoreleaseReturnValue";
1297   }
1298 };
1299
1300 }
1301
1302 /// Rewrite input constraint references after adding some output constraints.
1303 /// In the case where there is one output and one input and we add one output,
1304 /// we need to replace all operand references greater than or equal to 1:
1305 ///     mov $0, $1
1306 ///     mov eax, $1
1307 /// The result will be:
1308 ///     mov $0, $2
1309 ///     mov eax, $2
1310 static void rewriteInputConstraintReferences(unsigned FirstIn,
1311                                              unsigned NumNewOuts,
1312                                              std::string &AsmString) {
1313   std::string Buf;
1314   llvm::raw_string_ostream OS(Buf);
1315   size_t Pos = 0;
1316   while (Pos < AsmString.size()) {
1317     size_t DollarStart = AsmString.find('$', Pos);
1318     if (DollarStart == std::string::npos)
1319       DollarStart = AsmString.size();
1320     size_t DollarEnd = AsmString.find_first_not_of('$', DollarStart);
1321     if (DollarEnd == std::string::npos)
1322       DollarEnd = AsmString.size();
1323     OS << StringRef(&AsmString[Pos], DollarEnd - Pos);
1324     Pos = DollarEnd;
1325     size_t NumDollars = DollarEnd - DollarStart;
1326     if (NumDollars % 2 != 0 && Pos < AsmString.size()) {
1327       // We have an operand reference.
1328       size_t DigitStart = Pos;
1329       if (AsmString[DigitStart] == '{') {
1330         OS << '{';
1331         ++DigitStart;
1332       }
1333       size_t DigitEnd = AsmString.find_first_not_of("0123456789", DigitStart);
1334       if (DigitEnd == std::string::npos)
1335         DigitEnd = AsmString.size();
1336       StringRef OperandStr(&AsmString[DigitStart], DigitEnd - DigitStart);
1337       unsigned OperandIndex;
1338       if (!OperandStr.getAsInteger(10, OperandIndex)) {
1339         if (OperandIndex >= FirstIn)
1340           OperandIndex += NumNewOuts;
1341         OS << OperandIndex;
1342       } else {
1343         OS << OperandStr;
1344       }
1345       Pos = DigitEnd;
1346     }
1347   }
1348   AsmString = std::move(OS.str());
1349 }
1350
1351 /// Add output constraints for EAX:EDX because they are return registers.
1352 void X86_32TargetCodeGenInfo::addReturnRegisterOutputs(
1353     CodeGenFunction &CGF, LValue ReturnSlot, std::string &Constraints,
1354     std::vector<llvm::Type *> &ResultRegTypes,
1355     std::vector<llvm::Type *> &ResultTruncRegTypes,
1356     std::vector<LValue> &ResultRegDests, std::string &AsmString,
1357     unsigned NumOutputs) const {
1358   uint64_t RetWidth = CGF.getContext().getTypeSize(ReturnSlot.getType());
1359
1360   // Use the EAX constraint if the width is 32 or smaller and EAX:EDX if it is
1361   // larger.
1362   if (!Constraints.empty())
1363     Constraints += ',';
1364   if (RetWidth <= 32) {
1365     Constraints += "={eax}";
1366     ResultRegTypes.push_back(CGF.Int32Ty);
1367   } else {
1368     // Use the 'A' constraint for EAX:EDX.
1369     Constraints += "=A";
1370     ResultRegTypes.push_back(CGF.Int64Ty);
1371   }
1372
1373   // Truncate EAX or EAX:EDX to an integer of the appropriate size.
1374   llvm::Type *CoerceTy = llvm::IntegerType::get(CGF.getLLVMContext(), RetWidth);
1375   ResultTruncRegTypes.push_back(CoerceTy);
1376
1377   // Coerce the integer by bitcasting the return slot pointer.
1378   ReturnSlot.setAddress(
1379       CGF.Builder.CreateElementBitCast(ReturnSlot.getAddress(CGF), CoerceTy));
1380   ResultRegDests.push_back(ReturnSlot);
1381
1382   rewriteInputConstraintReferences(NumOutputs, 1, AsmString);
1383 }
1384
1385 /// shouldReturnTypeInRegister - Determine if the given type should be
1386 /// returned in a register (for the Darwin and MCU ABI).
1387 bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty,
1388                                                ASTContext &Context) const {
1389   uint64_t Size = Context.getTypeSize(Ty);
1390
1391   // For i386, type must be register sized.
1392   // For the MCU ABI, it only needs to be <= 8-byte
1393   if ((IsMCUABI && Size > 64) || (!IsMCUABI && !isRegisterSize(Size)))
1394    return false;
1395
1396   if (Ty->isVectorType()) {
1397     // 64- and 128- bit vectors inside structures are not returned in
1398     // registers.
1399     if (Size == 64 || Size == 128)
1400       return false;
1401
1402     return true;
1403   }
1404
1405   // If this is a builtin, pointer, enum, complex type, member pointer, or
1406   // member function pointer it is ok.
1407   if (Ty->getAs<BuiltinType>() || Ty->hasPointerRepresentation() ||
1408       Ty->isAnyComplexType() || Ty->isEnumeralType() ||
1409       Ty->isBlockPointerType() || Ty->isMemberPointerType())
1410     return true;
1411
1412   // Arrays are treated like records.
1413   if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty))
1414     return shouldReturnTypeInRegister(AT->getElementType(), Context);
1415
1416   // Otherwise, it must be a record type.
1417   const RecordType *RT = Ty->getAs<RecordType>();
1418   if (!RT) return false;
1419
1420   // FIXME: Traverse bases here too.
1421
1422   // Structure types are passed in register if all fields would be
1423   // passed in a register.
1424   for (const auto *FD : RT->getDecl()->fields()) {
1425     // Empty fields are ignored.
1426     if (isEmptyField(Context, FD, true))
1427       continue;
1428
1429     // Check fields recursively.
1430     if (!shouldReturnTypeInRegister(FD->getType(), Context))
1431       return false;
1432   }
1433   return true;
1434 }
1435
1436 static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) {
1437   // Treat complex types as the element type.
1438   if (const ComplexType *CTy = Ty->getAs<ComplexType>())
1439     Ty = CTy->getElementType();
1440
1441   // Check for a type which we know has a simple scalar argument-passing
1442   // convention without any padding.  (We're specifically looking for 32
1443   // and 64-bit integer and integer-equivalents, float, and double.)
1444   if (!Ty->getAs<BuiltinType>() && !Ty->hasPointerRepresentation() &&
1445       !Ty->isEnumeralType() && !Ty->isBlockPointerType())
1446     return false;
1447
1448   uint64_t Size = Context.getTypeSize(Ty);
1449   return Size == 32 || Size == 64;
1450 }
1451
1452 static bool addFieldSizes(ASTContext &Context, const RecordDecl *RD,
1453                           uint64_t &Size) {
1454   for (const auto *FD : RD->fields()) {
1455     // Scalar arguments on the stack get 4 byte alignment on x86. If the
1456     // argument is smaller than 32-bits, expanding the struct will create
1457     // alignment padding.
1458     if (!is32Or64BitBasicType(FD->getType(), Context))
1459       return false;
1460
1461     // FIXME: Reject bit-fields wholesale; there are two problems, we don't know
1462     // how to expand them yet, and the predicate for telling if a bitfield still
1463     // counts as "basic" is more complicated than what we were doing previously.
1464     if (FD->isBitField())
1465       return false;
1466
1467     Size += Context.getTypeSize(FD->getType());
1468   }
1469   return true;
1470 }
1471
1472 static bool addBaseAndFieldSizes(ASTContext &Context, const CXXRecordDecl *RD,
1473                                  uint64_t &Size) {
1474   // Don't do this if there are any non-empty bases.
1475   for (const CXXBaseSpecifier &Base : RD->bases()) {
1476     if (!addBaseAndFieldSizes(Context, Base.getType()->getAsCXXRecordDecl(),
1477                               Size))
1478       return false;
1479   }
1480   if (!addFieldSizes(Context, RD, Size))
1481     return false;
1482   return true;
1483 }
1484
1485 /// Test whether an argument type which is to be passed indirectly (on the
1486 /// stack) would have the equivalent layout if it was expanded into separate
1487 /// arguments. If so, we prefer to do the latter to avoid inhibiting
1488 /// optimizations.
1489 bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const {
1490   // We can only expand structure types.
1491   const RecordType *RT = Ty->getAs<RecordType>();
1492   if (!RT)
1493     return false;
1494   const RecordDecl *RD = RT->getDecl();
1495   uint64_t Size = 0;
1496   if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
1497     if (!IsWin32StructABI) {
1498       // On non-Windows, we have to conservatively match our old bitcode
1499       // prototypes in order to be ABI-compatible at the bitcode level.
1500       if (!CXXRD->isCLike())
1501         return false;
1502     } else {
1503       // Don't do this for dynamic classes.
1504       if (CXXRD->isDynamicClass())
1505         return false;
1506     }
1507     if (!addBaseAndFieldSizes(getContext(), CXXRD, Size))
1508       return false;
1509   } else {
1510     if (!addFieldSizes(getContext(), RD, Size))
1511       return false;
1512   }
1513
1514   // We can do this if there was no alignment padding.
1515   return Size == getContext().getTypeSize(Ty);
1516 }
1517
1518 ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState &State) const {
1519   // If the return value is indirect, then the hidden argument is consuming one
1520   // integer register.
1521   if (State.FreeRegs) {
1522     --State.FreeRegs;
1523     if (!IsMCUABI)
1524       return getNaturalAlignIndirectInReg(RetTy);
1525   }
1526   return getNaturalAlignIndirect(RetTy, /*ByVal=*/false);
1527 }
1528
1529 ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy,
1530                                              CCState &State) const {
1531   if (RetTy->isVoidType())
1532     return ABIArgInfo::getIgnore();
1533
1534   const Type *Base = nullptr;
1535   uint64_t NumElts = 0;
1536   if ((State.CC == llvm::CallingConv::X86_VectorCall ||
1537        State.CC == llvm::CallingConv::X86_RegCall) &&
1538       isHomogeneousAggregate(RetTy, Base, NumElts)) {
1539     // The LLVM struct type for such an aggregate should lower properly.
1540     return ABIArgInfo::getDirect();
1541   }
1542
1543   if (const VectorType *VT = RetTy->getAs<VectorType>()) {
1544     // On Darwin, some vectors are returned in registers.
1545     if (IsDarwinVectorABI) {
1546       uint64_t Size = getContext().getTypeSize(RetTy);
1547
1548       // 128-bit vectors are a special case; they are returned in
1549       // registers and we need to make sure to pick a type the LLVM
1550       // backend will like.
1551       if (Size == 128)
1552         return ABIArgInfo::getDirect(llvm::FixedVectorType::get(
1553             llvm::Type::getInt64Ty(getVMContext()), 2));
1554
1555       // Always return in register if it fits in a general purpose
1556       // register, or if it is 64 bits and has a single element.
1557       if ((Size == 8 || Size == 16 || Size == 32) ||
1558           (Size == 64 && VT->getNumElements() == 1))
1559         return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
1560                                                             Size));
1561
1562       return getIndirectReturnResult(RetTy, State);
1563     }
1564
1565     return ABIArgInfo::getDirect();
1566   }
1567
1568   if (isAggregateTypeForABI(RetTy)) {
1569     if (const RecordType *RT = RetTy->getAs<RecordType>()) {
1570       // Structures with flexible arrays are always indirect.
1571       if (RT->getDecl()->hasFlexibleArrayMember())
1572         return getIndirectReturnResult(RetTy, State);
1573     }
1574
1575     // If specified, structs and unions are always indirect.
1576     if (!IsRetSmallStructInRegABI && !RetTy->isAnyComplexType())
1577       return getIndirectReturnResult(RetTy, State);
1578
1579     // Ignore empty structs/unions.
1580     if (isEmptyRecord(getContext(), RetTy, true))
1581       return ABIArgInfo::getIgnore();
1582
1583     // Return complex of _Float16 as <2 x half> so the backend will use xmm0.
1584     if (const ComplexType *CT = RetTy->getAs<ComplexType>()) {
1585       QualType ET = getContext().getCanonicalType(CT->getElementType());
1586       if (ET->isFloat16Type())
1587         return ABIArgInfo::getDirect(llvm::FixedVectorType::get(
1588             llvm::Type::getHalfTy(getVMContext()), 2));
1589     }
1590
1591     // Small structures which are register sized are generally returned
1592     // in a register.
1593     if (shouldReturnTypeInRegister(RetTy, getContext())) {
1594       uint64_t Size = getContext().getTypeSize(RetTy);
1595
1596       // As a special-case, if the struct is a "single-element" struct, and
1597       // the field is of type "float" or "double", return it in a
1598       // floating-point register. (MSVC does not apply this special case.)
1599       // We apply a similar transformation for pointer types to improve the
1600       // quality of the generated IR.
1601       if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext()))
1602         if ((!IsWin32StructABI && SeltTy->isRealFloatingType())
1603             || SeltTy->hasPointerRepresentation())
1604           return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
1605
1606       // FIXME: We should be able to narrow this integer in cases with dead
1607       // padding.
1608       return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),Size));
1609     }
1610
1611     return getIndirectReturnResult(RetTy, State);
1612   }
1613
1614   // Treat an enum type as its underlying type.
1615   if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
1616     RetTy = EnumTy->getDecl()->getIntegerType();
1617
1618   if (const auto *EIT = RetTy->getAs<BitIntType>())
1619     if (EIT->getNumBits() > 64)
1620       return getIndirectReturnResult(RetTy, State);
1621
1622   return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
1623                                                : ABIArgInfo::getDirect());
1624 }
1625
1626 static bool isSIMDVectorType(ASTContext &Context, QualType Ty) {
1627   return Ty->getAs<VectorType>() && Context.getTypeSize(Ty) == 128;
1628 }
1629
1630 static bool isRecordWithSIMDVectorType(ASTContext &Context, QualType Ty) {
1631   const RecordType *RT = Ty->getAs<RecordType>();
1632   if (!RT)
1633     return false;
1634   const RecordDecl *RD = RT->getDecl();
1635
1636   // If this is a C++ record, check the bases first.
1637   if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
1638     for (const auto &I : CXXRD->bases())
1639       if (!isRecordWithSIMDVectorType(Context, I.getType()))
1640         return false;
1641
1642   for (const auto *i : RD->fields()) {
1643     QualType FT = i->getType();
1644
1645     if (isSIMDVectorType(Context, FT))
1646       return true;
1647
1648     if (isRecordWithSIMDVectorType(Context, FT))
1649       return true;
1650   }
1651
1652   return false;
1653 }
1654
1655 unsigned X86_32ABIInfo::getTypeStackAlignInBytes(QualType Ty,
1656                                                  unsigned Align) const {
1657   // Otherwise, if the alignment is less than or equal to the minimum ABI
1658   // alignment, just use the default; the backend will handle this.
1659   if (Align <= MinABIStackAlignInBytes)
1660     return 0; // Use default alignment.
1661
1662   if (IsLinuxABI) {
1663     // Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't
1664     // want to spend any effort dealing with the ramifications of ABI breaks.
1665     //
1666     // If the vector type is __m128/__m256/__m512, return the default alignment.
1667     if (Ty->isVectorType() && (Align == 16 || Align == 32 || Align == 64))
1668       return Align;
1669   }
1670   // On non-Darwin, the stack type alignment is always 4.
1671   if (!IsDarwinVectorABI) {
1672     // Set explicit alignment, since we may need to realign the top.
1673     return MinABIStackAlignInBytes;
1674   }
1675
1676   // Otherwise, if the type contains an SSE vector type, the alignment is 16.
1677   if (Align >= 16 && (isSIMDVectorType(getContext(), Ty) ||
1678                       isRecordWithSIMDVectorType(getContext(), Ty)))
1679     return 16;
1680
1681   return MinABIStackAlignInBytes;
1682 }
1683
1684 ABIArgInfo X86_32ABIInfo::getIndirectResult(QualType Ty, bool ByVal,
1685                                             CCState &State) const {
1686   if (!ByVal) {
1687     if (State.FreeRegs) {
1688       --State.FreeRegs; // Non-byval indirects just use one pointer.
1689       if (!IsMCUABI)
1690         return getNaturalAlignIndirectInReg(Ty);
1691     }
1692     return getNaturalAlignIndirect(Ty, false);
1693   }
1694
1695   // Compute the byval alignment.
1696   unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8;
1697   unsigned StackAlign = getTypeStackAlignInBytes(Ty, TypeAlign);
1698   if (StackAlign == 0)
1699     return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true);
1700
1701   // If the stack alignment is less than the type alignment, realign the
1702   // argument.
1703   bool Realign = TypeAlign > StackAlign;
1704   return ABIArgInfo::getIndirect(CharUnits::fromQuantity(StackAlign),
1705                                  /*ByVal=*/true, Realign);
1706 }
1707
1708 X86_32ABIInfo::Class X86_32ABIInfo::classify(QualType Ty) const {
1709   const Type *T = isSingleElementStruct(Ty, getContext());
1710   if (!T)
1711     T = Ty.getTypePtr();
1712
1713   if (const BuiltinType *BT = T->getAs<BuiltinType>()) {
1714     BuiltinType::Kind K = BT->getKind();
1715     if (K == BuiltinType::Float || K == BuiltinType::Double)
1716       return Float;
1717   }
1718   return Integer;
1719 }
1720
1721 bool X86_32ABIInfo::updateFreeRegs(QualType Ty, CCState &State) const {
1722   if (!IsSoftFloatABI) {
1723     Class C = classify(Ty);
1724     if (C == Float)
1725       return false;
1726   }
1727
1728   unsigned Size = getContext().getTypeSize(Ty);
1729   unsigned SizeInRegs = (Size + 31) / 32;
1730
1731   if (SizeInRegs == 0)
1732     return false;
1733
1734   if (!IsMCUABI) {
1735     if (SizeInRegs > State.FreeRegs) {
1736       State.FreeRegs = 0;
1737       return false;
1738     }
1739   } else {
1740     // The MCU psABI allows passing parameters in-reg even if there are
1741     // earlier parameters that are passed on the stack. Also,
1742     // it does not allow passing >8-byte structs in-register,
1743     // even if there are 3 free registers available.
1744     if (SizeInRegs > State.FreeRegs || SizeInRegs > 2)
1745       return false;
1746   }
1747
1748   State.FreeRegs -= SizeInRegs;
1749   return true;
1750 }
1751
1752 bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State,
1753                                              bool &InReg,
1754                                              bool &NeedsPadding) const {
1755   // On Windows, aggregates other than HFAs are never passed in registers, and
1756   // they do not consume register slots. Homogenous floating-point aggregates
1757   // (HFAs) have already been dealt with at this point.
1758   if (IsWin32StructABI && isAggregateTypeForABI(Ty))
1759     return false;
1760
1761   NeedsPadding = false;
1762   InReg = !IsMCUABI;
1763
1764   if (!updateFreeRegs(Ty, State))
1765     return false;
1766
1767   if (IsMCUABI)
1768     return true;
1769
1770   if (State.CC == llvm::CallingConv::X86_FastCall ||
1771       State.CC == llvm::CallingConv::X86_VectorCall ||
1772       State.CC == llvm::CallingConv::X86_RegCall) {
1773     if (getContext().getTypeSize(Ty) <= 32 && State.FreeRegs)
1774       NeedsPadding = true;
1775
1776     return false;
1777   }
1778
1779   return true;
1780 }
1781
1782 bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const {
1783   bool IsPtrOrInt = (getContext().getTypeSize(Ty) <= 32) &&
1784                     (Ty->isIntegralOrEnumerationType() || Ty->isPointerType() ||
1785                      Ty->isReferenceType());
1786
1787   if (!IsPtrOrInt && (State.CC == llvm::CallingConv::X86_FastCall ||
1788                       State.CC == llvm::CallingConv::X86_VectorCall))
1789     return false;
1790
1791   if (!updateFreeRegs(Ty, State))
1792     return false;
1793
1794   if (!IsPtrOrInt && State.CC == llvm::CallingConv::X86_RegCall)
1795     return false;
1796
1797   // Return true to apply inreg to all legal parameters except for MCU targets.
1798   return !IsMCUABI;
1799 }
1800
1801 void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const {
1802   // Vectorcall x86 works subtly different than in x64, so the format is
1803   // a bit different than the x64 version.  First, all vector types (not HVAs)
1804   // are assigned, with the first 6 ending up in the [XYZ]MM0-5 registers.
1805   // This differs from the x64 implementation, where the first 6 by INDEX get
1806   // registers.
1807   // In the second pass over the arguments, HVAs are passed in the remaining
1808   // vector registers if possible, or indirectly by address. The address will be
1809   // passed in ECX/EDX if available. Any other arguments are passed according to
1810   // the usual fastcall rules.
1811   MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments();
1812   for (int I = 0, E = Args.size(); I < E; ++I) {
1813     const Type *Base = nullptr;
1814     uint64_t NumElts = 0;
1815     const QualType &Ty = Args[I].type;
1816     if ((Ty->isVectorType() || Ty->isBuiltinType()) &&
1817         isHomogeneousAggregate(Ty, Base, NumElts)) {
1818       if (State.FreeSSERegs >= NumElts) {
1819         State.FreeSSERegs -= NumElts;
1820         Args[I].info = ABIArgInfo::getDirectInReg();
1821         State.IsPreassigned.set(I);
1822       }
1823     }
1824   }
1825 }
1826
1827 ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
1828                                                CCState &State) const {
1829   // FIXME: Set alignment on indirect arguments.
1830   bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall;
1831   bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall;
1832   bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall;
1833
1834   Ty = useFirstFieldIfTransparentUnion(Ty);
1835   TypeInfo TI = getContext().getTypeInfo(Ty);
1836
1837   // Check with the C++ ABI first.
1838   const RecordType *RT = Ty->getAs<RecordType>();
1839   if (RT) {
1840     CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
1841     if (RAA == CGCXXABI::RAA_Indirect) {
1842       return getIndirectResult(Ty, false, State);
1843     } else if (RAA == CGCXXABI::RAA_DirectInMemory) {
1844       // The field index doesn't matter, we'll fix it up later.
1845       return ABIArgInfo::getInAlloca(/*FieldIndex=*/0);
1846     }
1847   }
1848
1849   // Regcall uses the concept of a homogenous vector aggregate, similar
1850   // to other targets.
1851   const Type *Base = nullptr;
1852   uint64_t NumElts = 0;
1853   if ((IsRegCall || IsVectorCall) &&
1854       isHomogeneousAggregate(Ty, Base, NumElts)) {
1855     if (State.FreeSSERegs >= NumElts) {
1856       State.FreeSSERegs -= NumElts;
1857
1858       // Vectorcall passes HVAs directly and does not flatten them, but regcall
1859       // does.
1860       if (IsVectorCall)
1861         return getDirectX86Hva();
1862
1863       if (Ty->isBuiltinType() || Ty->isVectorType())
1864         return ABIArgInfo::getDirect();
1865       return ABIArgInfo::getExpand();
1866     }
1867     return getIndirectResult(Ty, /*ByVal=*/false, State);
1868   }
1869
1870   if (isAggregateTypeForABI(Ty)) {
1871     // Structures with flexible arrays are always indirect.
1872     // FIXME: This should not be byval!
1873     if (RT && RT->getDecl()->hasFlexibleArrayMember())
1874       return getIndirectResult(Ty, true, State);
1875
1876     // Ignore empty structs/unions on non-Windows.
1877     if (!IsWin32StructABI && isEmptyRecord(getContext(), Ty, true))
1878       return ABIArgInfo::getIgnore();
1879
1880     llvm::LLVMContext &LLVMContext = getVMContext();
1881     llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext);
1882     bool NeedsPadding = false;
1883     bool InReg;
1884     if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) {
1885       unsigned SizeInRegs = (TI.Width + 31) / 32;
1886       SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32);
1887       llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);
1888       if (InReg)
1889         return ABIArgInfo::getDirectInReg(Result);
1890       else
1891         return ABIArgInfo::getDirect(Result);
1892     }
1893     llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr;
1894
1895     // Pass over-aligned aggregates on Windows indirectly. This behavior was
1896     // added in MSVC 2015. Use the required alignment from the record layout,
1897     // since that may be less than the regular type alignment, and types with
1898     // required alignment of less than 4 bytes are not passed indirectly.
1899     if (IsWin32StructABI) {
1900       unsigned AlignInBits = 0;
1901       if (RT) {
1902         const ASTRecordLayout &Layout =
1903           getContext().getASTRecordLayout(RT->getDecl());
1904         AlignInBits = getContext().toBits(Layout.getRequiredAlignment());
1905       } else if (TI.isAlignRequired()) {
1906         AlignInBits = TI.Align;
1907       }
1908       if (AlignInBits > 32)
1909         return getIndirectResult(Ty, /*ByVal=*/false, State);
1910     }
1911
1912     // Expand small (<= 128-bit) record types when we know that the stack layout
1913     // of those arguments will match the struct. This is important because the
1914     // LLVM backend isn't smart enough to remove byval, which inhibits many
1915     // optimizations.
1916     // Don't do this for the MCU if there are still free integer registers
1917     // (see X86_64 ABI for full explanation).
1918     if (TI.Width <= 4 * 32 && (!IsMCUABI || State.FreeRegs == 0) &&
1919         canExpandIndirectArgument(Ty))
1920       return ABIArgInfo::getExpandWithPadding(
1921           IsFastCall || IsVectorCall || IsRegCall, PaddingType);
1922
1923     return getIndirectResult(Ty, true, State);
1924   }
1925
1926   if (const VectorType *VT = Ty->getAs<VectorType>()) {
1927     // On Windows, vectors are passed directly if registers are available, or
1928     // indirectly if not. This avoids the need to align argument memory. Pass
1929     // user-defined vector types larger than 512 bits indirectly for simplicity.
1930     if (IsWin32StructABI) {
1931       if (TI.Width <= 512 && State.FreeSSERegs > 0) {
1932         --State.FreeSSERegs;
1933         return ABIArgInfo::getDirectInReg();
1934       }
1935       return getIndirectResult(Ty, /*ByVal=*/false, State);
1936     }
1937
1938     // On Darwin, some vectors are passed in memory, we handle this by passing
1939     // it as an i8/i16/i32/i64.
1940     if (IsDarwinVectorABI) {
1941       if ((TI.Width == 8 || TI.Width == 16 || TI.Width == 32) ||
1942           (TI.Width == 64 && VT->getNumElements() == 1))
1943         return ABIArgInfo::getDirect(
1944             llvm::IntegerType::get(getVMContext(), TI.Width));
1945     }
1946
1947     if (IsX86_MMXType(CGT.ConvertType(Ty)))
1948       return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 64));
1949
1950     return ABIArgInfo::getDirect();
1951   }
1952
1953
1954   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
1955     Ty = EnumTy->getDecl()->getIntegerType();
1956
1957   bool InReg = shouldPrimitiveUseInReg(Ty, State);
1958
1959   if (isPromotableIntegerTypeForABI(Ty)) {
1960     if (InReg)
1961       return ABIArgInfo::getExtendInReg(Ty);
1962     return ABIArgInfo::getExtend(Ty);
1963   }
1964
1965   if (const auto *EIT = Ty->getAs<BitIntType>()) {
1966     if (EIT->getNumBits() <= 64) {
1967       if (InReg)
1968         return ABIArgInfo::getDirectInReg();
1969       return ABIArgInfo::getDirect();
1970     }
1971     return getIndirectResult(Ty, /*ByVal=*/false, State);
1972   }
1973
1974   if (InReg)
1975     return ABIArgInfo::getDirectInReg();
1976   return ABIArgInfo::getDirect();
1977 }
1978
1979 void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
1980   CCState State(FI);
1981   if (IsMCUABI)
1982     State.FreeRegs = 3;
1983   else if (State.CC == llvm::CallingConv::X86_FastCall) {
1984     State.FreeRegs = 2;
1985     State.FreeSSERegs = 3;
1986   } else if (State.CC == llvm::CallingConv::X86_VectorCall) {
1987     State.FreeRegs = 2;
1988     State.FreeSSERegs = 6;
1989   } else if (FI.getHasRegParm())
1990     State.FreeRegs = FI.getRegParm();
1991   else if (State.CC == llvm::CallingConv::X86_RegCall) {
1992     State.FreeRegs = 5;
1993     State.FreeSSERegs = 8;
1994   } else if (IsWin32StructABI) {
1995     // Since MSVC 2015, the first three SSE vectors have been passed in
1996     // registers. The rest are passed indirectly.
1997     State.FreeRegs = DefaultNumRegisterParameters;
1998     State.FreeSSERegs = 3;
1999   } else
2000     State.FreeRegs = DefaultNumRegisterParameters;
2001
2002   if (!::classifyReturnType(getCXXABI(), FI, *this)) {
2003     FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), State);
2004   } else if (FI.getReturnInfo().isIndirect()) {
2005     // The C++ ABI is not aware of register usage, so we have to check if the
2006     // return value was sret and put it in a register ourselves if appropriate.
2007     if (State.FreeRegs) {
2008       --State.FreeRegs;  // The sret parameter consumes a register.
2009       if (!IsMCUABI)
2010         FI.getReturnInfo().setInReg(true);
2011     }
2012   }
2013
2014   // The chain argument effectively gives us another free register.
2015   if (FI.isChainCall())
2016     ++State.FreeRegs;
2017
2018   // For vectorcall, do a first pass over the arguments, assigning FP and vector
2019   // arguments to XMM registers as available.
2020   if (State.CC == llvm::CallingConv::X86_VectorCall)
2021     runVectorCallFirstPass(FI, State);
2022
2023   bool UsedInAlloca = false;
2024   MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments();
2025   for (int I = 0, E = Args.size(); I < E; ++I) {
2026     // Skip arguments that have already been assigned.
2027     if (State.IsPreassigned.test(I))
2028       continue;
2029
2030     Args[I].info = classifyArgumentType(Args[I].type, State);
2031     UsedInAlloca |= (Args[I].info.getKind() == ABIArgInfo::InAlloca);
2032   }
2033
2034   // If we needed to use inalloca for any argument, do a second pass and rewrite
2035   // all the memory arguments to use inalloca.
2036   if (UsedInAlloca)
2037     rewriteWithInAlloca(FI);
2038 }
2039
2040 void
2041 X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
2042                                    CharUnits &StackOffset, ABIArgInfo &Info,
2043                                    QualType Type) const {
2044   // Arguments are always 4-byte-aligned.
2045   CharUnits WordSize = CharUnits::fromQuantity(4);
2046   assert(StackOffset.isMultipleOf(WordSize) && "unaligned inalloca struct");
2047
2048   // sret pointers and indirect things will require an extra pointer
2049   // indirection, unless they are byval. Most things are byval, and will not
2050   // require this indirection.
2051   bool IsIndirect = false;
2052   if (Info.isIndirect() && !Info.getIndirectByVal())
2053     IsIndirect = true;
2054   Info = ABIArgInfo::getInAlloca(FrameFields.size(), IsIndirect);
2055   llvm::Type *LLTy = CGT.ConvertTypeForMem(Type);
2056   if (IsIndirect)
2057     LLTy = LLTy->getPointerTo(0);
2058   FrameFields.push_back(LLTy);
2059   StackOffset += IsIndirect ? WordSize : getContext().getTypeSizeInChars(Type);
2060
2061   // Insert padding bytes to respect alignment.
2062   CharUnits FieldEnd = StackOffset;
2063   StackOffset = FieldEnd.alignTo(WordSize);
2064   if (StackOffset != FieldEnd) {
2065     CharUnits NumBytes = StackOffset - FieldEnd;
2066     llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext());
2067     Ty = llvm::ArrayType::get(Ty, NumBytes.getQuantity());
2068     FrameFields.push_back(Ty);
2069   }
2070 }
2071
2072 static bool isArgInAlloca(const ABIArgInfo &Info) {
2073   // Leave ignored and inreg arguments alone.
2074   switch (Info.getKind()) {
2075   case ABIArgInfo::InAlloca:
2076     return true;
2077   case ABIArgInfo::Ignore:
2078   case ABIArgInfo::IndirectAliased:
2079     return false;
2080   case ABIArgInfo::Indirect:
2081   case ABIArgInfo::Direct:
2082   case ABIArgInfo::Extend:
2083     return !Info.getInReg();
2084   case ABIArgInfo::Expand:
2085   case ABIArgInfo::CoerceAndExpand:
2086     // These are aggregate types which are never passed in registers when
2087     // inalloca is involved.
2088     return true;
2089   }
2090   llvm_unreachable("invalid enum");
2091 }
2092
2093 void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const {
2094   assert(IsWin32StructABI && "inalloca only supported on win32");
2095
2096   // Build a packed struct type for all of the arguments in memory.
2097   SmallVector<llvm::Type *, 6> FrameFields;
2098
2099   // The stack alignment is always 4.
2100   CharUnits StackAlign = CharUnits::fromQuantity(4);
2101
2102   CharUnits StackOffset;
2103   CGFunctionInfo::arg_iterator I = FI.arg_begin(), E = FI.arg_end();
2104
2105   // Put 'this' into the struct before 'sret', if necessary.
2106   bool IsThisCall =
2107       FI.getCallingConvention() == llvm::CallingConv::X86_ThisCall;
2108   ABIArgInfo &Ret = FI.getReturnInfo();
2109   if (Ret.isIndirect() && Ret.isSRetAfterThis() && !IsThisCall &&
2110       isArgInAlloca(I->info)) {
2111     addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type);
2112     ++I;
2113   }
2114
2115   // Put the sret parameter into the inalloca struct if it's in memory.
2116   if (Ret.isIndirect() && !Ret.getInReg()) {
2117     addFieldToArgStruct(FrameFields, StackOffset, Ret, FI.getReturnType());
2118     // On Windows, the hidden sret parameter is always returned in eax.
2119     Ret.setInAllocaSRet(IsWin32StructABI);
2120   }
2121
2122   // Skip the 'this' parameter in ecx.
2123   if (IsThisCall)
2124     ++I;
2125
2126   // Put arguments passed in memory into the struct.
2127   for (; I != E; ++I) {
2128     if (isArgInAlloca(I->info))
2129       addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type);
2130   }
2131
2132   FI.setArgStruct(llvm::StructType::get(getVMContext(), FrameFields,
2133                                         /*isPacked=*/true),
2134                   StackAlign);
2135 }
2136
2137 Address X86_32ABIInfo::EmitVAArg(CodeGenFunction &CGF,
2138                                  Address VAListAddr, QualType Ty) const {
2139
2140   auto TypeInfo = getContext().getTypeInfoInChars(Ty);
2141
2142   // x86-32 changes the alignment of certain arguments on the stack.
2143   //
2144   // Just messing with TypeInfo like this works because we never pass
2145   // anything indirectly.
2146   TypeInfo.Align = CharUnits::fromQuantity(
2147                 getTypeStackAlignInBytes(Ty, TypeInfo.Align.getQuantity()));
2148
2149   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false,
2150                           TypeInfo, CharUnits::fromQuantity(4),
2151                           /*AllowHigherAlign*/ true);
2152 }
2153
2154 bool X86_32TargetCodeGenInfo::isStructReturnInRegABI(
2155     const llvm::Triple &Triple, const CodeGenOptions &Opts) {
2156   assert(Triple.getArch() == llvm::Triple::x86);
2157
2158   switch (Opts.getStructReturnConvention()) {
2159   case CodeGenOptions::SRCK_Default:
2160     break;
2161   case CodeGenOptions::SRCK_OnStack:  // -fpcc-struct-return
2162     return false;
2163   case CodeGenOptions::SRCK_InRegs:  // -freg-struct-return
2164     return true;
2165   }
2166
2167   if (Triple.isOSDarwin() || Triple.isOSIAMCU())
2168     return true;
2169
2170   switch (Triple.getOS()) {
2171   case llvm::Triple::DragonFly:
2172   case llvm::Triple::FreeBSD:
2173   case llvm::Triple::OpenBSD:
2174   case llvm::Triple::Win32:
2175     return true;
2176   default:
2177     return false;
2178   }
2179 }
2180
2181 static void addX86InterruptAttrs(const FunctionDecl *FD, llvm::GlobalValue *GV,
2182                                  CodeGen::CodeGenModule &CGM) {
2183   if (!FD->hasAttr<AnyX86InterruptAttr>())
2184     return;
2185
2186   llvm::Function *Fn = cast<llvm::Function>(GV);
2187   Fn->setCallingConv(llvm::CallingConv::X86_INTR);
2188   if (FD->getNumParams() == 0)
2189     return;
2190
2191   auto PtrTy = cast<PointerType>(FD->getParamDecl(0)->getType());
2192   llvm::Type *ByValTy = CGM.getTypes().ConvertType(PtrTy->getPointeeType());
2193   llvm::Attribute NewAttr = llvm::Attribute::getWithByValType(
2194     Fn->getContext(), ByValTy);
2195   Fn->addParamAttr(0, NewAttr);
2196 }
2197
2198 void X86_32TargetCodeGenInfo::setTargetAttributes(
2199     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
2200   if (GV->isDeclaration())
2201     return;
2202   if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
2203     if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
2204       llvm::Function *Fn = cast<llvm::Function>(GV);
2205       Fn->addFnAttr("stackrealign");
2206     }
2207
2208     addX86InterruptAttrs(FD, GV, CGM);
2209   }
2210 }
2211
2212 bool X86_32TargetCodeGenInfo::initDwarfEHRegSizeTable(
2213                                                CodeGen::CodeGenFunction &CGF,
2214                                                llvm::Value *Address) const {
2215   CodeGen::CGBuilderTy &Builder = CGF.Builder;
2216
2217   llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4);
2218
2219   // 0-7 are the eight integer registers;  the order is different
2220   //   on Darwin (for EH), but the range is the same.
2221   // 8 is %eip.
2222   AssignToArrayRange(Builder, Address, Four8, 0, 8);
2223
2224   if (CGF.CGM.getTarget().getTriple().isOSDarwin()) {
2225     // 12-16 are st(0..4).  Not sure why we stop at 4.
2226     // These have size 16, which is sizeof(long double) on
2227     // platforms with 8-byte alignment for that type.
2228     llvm::Value *Sixteen8 = llvm::ConstantInt::get(CGF.Int8Ty, 16);
2229     AssignToArrayRange(Builder, Address, Sixteen8, 12, 16);
2230
2231   } else {
2232     // 9 is %eflags, which doesn't get a size on Darwin for some
2233     // reason.
2234     Builder.CreateAlignedStore(
2235         Four8, Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, Address, 9),
2236                                CharUnits::One());
2237
2238     // 11-16 are st(0..5).  Not sure why we stop at 5.
2239     // These have size 12, which is sizeof(long double) on
2240     // platforms with 4-byte alignment for that type.
2241     llvm::Value *Twelve8 = llvm::ConstantInt::get(CGF.Int8Ty, 12);
2242     AssignToArrayRange(Builder, Address, Twelve8, 11, 16);
2243   }
2244
2245   return false;
2246 }
2247
2248 //===----------------------------------------------------------------------===//
2249 // X86-64 ABI Implementation
2250 //===----------------------------------------------------------------------===//
2251
2252
2253 namespace {
2254 /// The AVX ABI level for X86 targets.
2255 enum class X86AVXABILevel {
2256   None,
2257   AVX,
2258   AVX512
2259 };
2260
2261 /// \p returns the size in bits of the largest (native) vector for \p AVXLevel.
2262 static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) {
2263   switch (AVXLevel) {
2264   case X86AVXABILevel::AVX512:
2265     return 512;
2266   case X86AVXABILevel::AVX:
2267     return 256;
2268   case X86AVXABILevel::None:
2269     return 128;
2270   }
2271   llvm_unreachable("Unknown AVXLevel");
2272 }
2273
2274 /// X86_64ABIInfo - The X86_64 ABI information.
2275 class X86_64ABIInfo : public ABIInfo {
2276   enum Class {
2277     Integer = 0,
2278     SSE,
2279     SSEUp,
2280     X87,
2281     X87Up,
2282     ComplexX87,
2283     NoClass,
2284     Memory
2285   };
2286
2287   /// merge - Implement the X86_64 ABI merging algorithm.
2288   ///
2289   /// Merge an accumulating classification \arg Accum with a field
2290   /// classification \arg Field.
2291   ///
2292   /// \param Accum - The accumulating classification. This should
2293   /// always be either NoClass or the result of a previous merge
2294   /// call. In addition, this should never be Memory (the caller
2295   /// should just return Memory for the aggregate).
2296   static Class merge(Class Accum, Class Field);
2297
2298   /// postMerge - Implement the X86_64 ABI post merging algorithm.
2299   ///
2300   /// Post merger cleanup, reduces a malformed Hi and Lo pair to
2301   /// final MEMORY or SSE classes when necessary.
2302   ///
2303   /// \param AggregateSize - The size of the current aggregate in
2304   /// the classification process.
2305   ///
2306   /// \param Lo - The classification for the parts of the type
2307   /// residing in the low word of the containing object.
2308   ///
2309   /// \param Hi - The classification for the parts of the type
2310   /// residing in the higher words of the containing object.
2311   ///
2312   void postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const;
2313
2314   /// classify - Determine the x86_64 register classes in which the
2315   /// given type T should be passed.
2316   ///
2317   /// \param Lo - The classification for the parts of the type
2318   /// residing in the low word of the containing object.
2319   ///
2320   /// \param Hi - The classification for the parts of the type
2321   /// residing in the high word of the containing object.
2322   ///
2323   /// \param OffsetBase - The bit offset of this type in the
2324   /// containing object.  Some parameters are classified different
2325   /// depending on whether they straddle an eightbyte boundary.
2326   ///
2327   /// \param isNamedArg - Whether the argument in question is a "named"
2328   /// argument, as used in AMD64-ABI 3.5.7.
2329   ///
2330   /// \param IsRegCall - Whether the calling conversion is regcall.
2331   ///
2332   /// If a word is unused its result will be NoClass; if a type should
2333   /// be passed in Memory then at least the classification of \arg Lo
2334   /// will be Memory.
2335   ///
2336   /// The \arg Lo class will be NoClass iff the argument is ignored.
2337   ///
2338   /// If the \arg Lo class is ComplexX87, then the \arg Hi class will
2339   /// also be ComplexX87.
2340   void classify(QualType T, uint64_t OffsetBase, Class &Lo, Class &Hi,
2341                 bool isNamedArg, bool IsRegCall = false) const;
2342
2343   llvm::Type *GetByteVectorType(QualType Ty) const;
2344   llvm::Type *GetSSETypeAtOffset(llvm::Type *IRType,
2345                                  unsigned IROffset, QualType SourceTy,
2346                                  unsigned SourceOffset) const;
2347   llvm::Type *GetINTEGERTypeAtOffset(llvm::Type *IRType,
2348                                      unsigned IROffset, QualType SourceTy,
2349                                      unsigned SourceOffset) const;
2350
2351   /// getIndirectResult - Give a source type \arg Ty, return a suitable result
2352   /// such that the argument will be returned in memory.
2353   ABIArgInfo getIndirectReturnResult(QualType Ty) const;
2354
2355   /// getIndirectResult - Give a source type \arg Ty, return a suitable result
2356   /// such that the argument will be passed in memory.
2357   ///
2358   /// \param freeIntRegs - The number of free integer registers remaining
2359   /// available.
2360   ABIArgInfo getIndirectResult(QualType Ty, unsigned freeIntRegs) const;
2361
2362   ABIArgInfo classifyReturnType(QualType RetTy) const;
2363
2364   ABIArgInfo classifyArgumentType(QualType Ty, unsigned freeIntRegs,
2365                                   unsigned &neededInt, unsigned &neededSSE,
2366                                   bool isNamedArg,
2367                                   bool IsRegCall = false) const;
2368
2369   ABIArgInfo classifyRegCallStructType(QualType Ty, unsigned &NeededInt,
2370                                        unsigned &NeededSSE,
2371                                        unsigned &MaxVectorWidth) const;
2372
2373   ABIArgInfo classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt,
2374                                            unsigned &NeededSSE,
2375                                            unsigned &MaxVectorWidth) const;
2376
2377   bool IsIllegalVectorType(QualType Ty) const;
2378
2379   /// The 0.98 ABI revision clarified a lot of ambiguities,
2380   /// unfortunately in ways that were not always consistent with
2381   /// certain previous compilers.  In particular, platforms which
2382   /// required strict binary compatibility with older versions of GCC
2383   /// may need to exempt themselves.
2384   bool honorsRevision0_98() const {
2385     return !getTarget().getTriple().isOSDarwin();
2386   }
2387
2388   /// GCC classifies <1 x long long> as SSE but some platform ABIs choose to
2389   /// classify it as INTEGER (for compatibility with older clang compilers).
2390   bool classifyIntegerMMXAsSSE() const {
2391     // Clang <= 3.8 did not do this.
2392     if (getContext().getLangOpts().getClangABICompat() <=
2393         LangOptions::ClangABI::Ver3_8)
2394       return false;
2395
2396     const llvm::Triple &Triple = getTarget().getTriple();
2397     if (Triple.isOSDarwin() || Triple.isPS() || Triple.isOSFreeBSD())
2398       return false;
2399     return true;
2400   }
2401
2402   // GCC classifies vectors of __int128 as memory.
2403   bool passInt128VectorsInMem() const {
2404     // Clang <= 9.0 did not do this.
2405     if (getContext().getLangOpts().getClangABICompat() <=
2406         LangOptions::ClangABI::Ver9)
2407       return false;
2408
2409     const llvm::Triple &T = getTarget().getTriple();
2410     return T.isOSLinux() || T.isOSNetBSD();
2411   }
2412
2413   X86AVXABILevel AVXLevel;
2414   // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on
2415   // 64-bit hardware.
2416   bool Has64BitPointers;
2417
2418 public:
2419   X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel)
2420       : ABIInfo(CGT), AVXLevel(AVXLevel),
2421         Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) {}
2422
2423   bool isPassedUsingAVXType(QualType type) const {
2424     unsigned neededInt, neededSSE;
2425     // The freeIntRegs argument doesn't matter here.
2426     ABIArgInfo info = classifyArgumentType(type, 0, neededInt, neededSSE,
2427                                            /*isNamedArg*/true);
2428     if (info.isDirect()) {
2429       llvm::Type *ty = info.getCoerceToType();
2430       if (llvm::VectorType *vectorTy = dyn_cast_or_null<llvm::VectorType>(ty))
2431         return vectorTy->getPrimitiveSizeInBits().getFixedValue() > 128;
2432     }
2433     return false;
2434   }
2435
2436   void computeInfo(CGFunctionInfo &FI) const override;
2437
2438   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
2439                     QualType Ty) const override;
2440   Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
2441                       QualType Ty) const override;
2442
2443   bool has64BitPointers() const {
2444     return Has64BitPointers;
2445   }
2446 };
2447
2448 /// WinX86_64ABIInfo - The Windows X86_64 ABI information.
2449 class WinX86_64ABIInfo : public ABIInfo {
2450 public:
2451   WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel)
2452       : ABIInfo(CGT), AVXLevel(AVXLevel),
2453         IsMingw64(getTarget().getTriple().isWindowsGNUEnvironment()) {}
2454
2455   void computeInfo(CGFunctionInfo &FI) const override;
2456
2457   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
2458                     QualType Ty) const override;
2459
2460   bool isHomogeneousAggregateBaseType(QualType Ty) const override {
2461     // FIXME: Assumes vectorcall is in use.
2462     return isX86VectorTypeForVectorCall(getContext(), Ty);
2463   }
2464
2465   bool isHomogeneousAggregateSmallEnough(const Type *Ty,
2466                                          uint64_t NumMembers) const override {
2467     // FIXME: Assumes vectorcall is in use.
2468     return isX86VectorCallAggregateSmallEnough(NumMembers);
2469   }
2470
2471 private:
2472   ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType,
2473                       bool IsVectorCall, bool IsRegCall) const;
2474   ABIArgInfo reclassifyHvaArgForVectorCall(QualType Ty, unsigned &FreeSSERegs,
2475                                            const ABIArgInfo &current) const;
2476
2477   X86AVXABILevel AVXLevel;
2478
2479   bool IsMingw64;
2480 };
2481
2482 class X86_64TargetCodeGenInfo : public TargetCodeGenInfo {
2483 public:
2484   X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel)
2485       : TargetCodeGenInfo(std::make_unique<X86_64ABIInfo>(CGT, AVXLevel)) {
2486     SwiftInfo =
2487         std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/true);
2488   }
2489
2490   /// Disable tail call on x86-64. The epilogue code before the tail jump blocks
2491   /// autoreleaseRV/retainRV and autoreleaseRV/unsafeClaimRV optimizations.
2492   bool markARCOptimizedReturnCallsAsNoTail() const override { return true; }
2493
2494   int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
2495     return 7;
2496   }
2497
2498   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
2499                                llvm::Value *Address) const override {
2500     llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8);
2501
2502     // 0-15 are the 16 integer registers.
2503     // 16 is %rip.
2504     AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16);
2505     return false;
2506   }
2507
2508   llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
2509                                   StringRef Constraint,
2510                                   llvm::Type* Ty) const override {
2511     return X86AdjustInlineAsmType(CGF, Constraint, Ty);
2512   }
2513
2514   bool isNoProtoCallVariadic(const CallArgList &args,
2515                              const FunctionNoProtoType *fnType) const override {
2516     // The default CC on x86-64 sets %al to the number of SSA
2517     // registers used, and GCC sets this when calling an unprototyped
2518     // function, so we override the default behavior.  However, don't do
2519     // that when AVX types are involved: the ABI explicitly states it is
2520     // undefined, and it doesn't work in practice because of how the ABI
2521     // defines varargs anyway.
2522     if (fnType->getCallConv() == CC_C) {
2523       bool HasAVXType = false;
2524       for (CallArgList::const_iterator
2525              it = args.begin(), ie = args.end(); it != ie; ++it) {
2526         if (getABIInfo<X86_64ABIInfo>().isPassedUsingAVXType(it->Ty)) {
2527           HasAVXType = true;
2528           break;
2529         }
2530       }
2531
2532       if (!HasAVXType)
2533         return true;
2534     }
2535
2536     return TargetCodeGenInfo::isNoProtoCallVariadic(args, fnType);
2537   }
2538
2539   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
2540                            CodeGen::CodeGenModule &CGM) const override {
2541     if (GV->isDeclaration())
2542       return;
2543     if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
2544       if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
2545         llvm::Function *Fn = cast<llvm::Function>(GV);
2546         Fn->addFnAttr("stackrealign");
2547       }
2548
2549       addX86InterruptAttrs(FD, GV, CGM);
2550     }
2551   }
2552
2553   void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc,
2554                             const FunctionDecl *Caller,
2555                             const FunctionDecl *Callee,
2556                             const CallArgList &Args) const override;
2557 };
2558 } // namespace
2559
2560 static void initFeatureMaps(const ASTContext &Ctx,
2561                             llvm::StringMap<bool> &CallerMap,
2562                             const FunctionDecl *Caller,
2563                             llvm::StringMap<bool> &CalleeMap,
2564                             const FunctionDecl *Callee) {
2565   if (CalleeMap.empty() && CallerMap.empty()) {
2566     // The caller is potentially nullptr in the case where the call isn't in a
2567     // function.  In this case, the getFunctionFeatureMap ensures we just get
2568     // the TU level setting (since it cannot be modified by 'target'..
2569     Ctx.getFunctionFeatureMap(CallerMap, Caller);
2570     Ctx.getFunctionFeatureMap(CalleeMap, Callee);
2571   }
2572 }
2573
2574 static bool checkAVXParamFeature(DiagnosticsEngine &Diag,
2575                                  SourceLocation CallLoc,
2576                                  const llvm::StringMap<bool> &CallerMap,
2577                                  const llvm::StringMap<bool> &CalleeMap,
2578                                  QualType Ty, StringRef Feature,
2579                                  bool IsArgument) {
2580   bool CallerHasFeat = CallerMap.lookup(Feature);
2581   bool CalleeHasFeat = CalleeMap.lookup(Feature);
2582   if (!CallerHasFeat && !CalleeHasFeat)
2583     return Diag.Report(CallLoc, diag::warn_avx_calling_convention)
2584            << IsArgument << Ty << Feature;
2585
2586   // Mixing calling conventions here is very clearly an error.
2587   if (!CallerHasFeat || !CalleeHasFeat)
2588     return Diag.Report(CallLoc, diag::err_avx_calling_convention)
2589            << IsArgument << Ty << Feature;
2590
2591   // Else, both caller and callee have the required feature, so there is no need
2592   // to diagnose.
2593   return false;
2594 }
2595
2596 static bool checkAVXParam(DiagnosticsEngine &Diag, ASTContext &Ctx,
2597                           SourceLocation CallLoc,
2598                           const llvm::StringMap<bool> &CallerMap,
2599                           const llvm::StringMap<bool> &CalleeMap, QualType Ty,
2600                           bool IsArgument) {
2601   uint64_t Size = Ctx.getTypeSize(Ty);
2602   if (Size > 256)
2603     return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty,
2604                                 "avx512f", IsArgument);
2605
2606   if (Size > 128)
2607     return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, "avx",
2608                                 IsArgument);
2609
2610   return false;
2611 }
2612
2613 void X86_64TargetCodeGenInfo::checkFunctionCallABI(
2614     CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
2615     const FunctionDecl *Callee, const CallArgList &Args) const {
2616   llvm::StringMap<bool> CallerMap;
2617   llvm::StringMap<bool> CalleeMap;
2618   unsigned ArgIndex = 0;
2619
2620   // We need to loop through the actual call arguments rather than the
2621   // function's parameters, in case this variadic.
2622   for (const CallArg &Arg : Args) {
2623     // The "avx" feature changes how vectors >128 in size are passed. "avx512f"
2624     // additionally changes how vectors >256 in size are passed. Like GCC, we
2625     // warn when a function is called with an argument where this will change.
2626     // Unlike GCC, we also error when it is an obvious ABI mismatch, that is,
2627     // the caller and callee features are mismatched.
2628     // Unfortunately, we cannot do this diagnostic in SEMA, since the callee can
2629     // change its ABI with attribute-target after this call.
2630     if (Arg.getType()->isVectorType() &&
2631         CGM.getContext().getTypeSize(Arg.getType()) > 128) {
2632       initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee);
2633       QualType Ty = Arg.getType();
2634       // The CallArg seems to have desugared the type already, so for clearer
2635       // diagnostics, replace it with the type in the FunctionDecl if possible.
2636       if (ArgIndex < Callee->getNumParams())
2637         Ty = Callee->getParamDecl(ArgIndex)->getType();
2638
2639       if (checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap,
2640                         CalleeMap, Ty, /*IsArgument*/ true))
2641         return;
2642     }
2643     ++ArgIndex;
2644   }
2645
2646   // Check return always, as we don't have a good way of knowing in codegen
2647   // whether this value is used, tail-called, etc.
2648   if (Callee->getReturnType()->isVectorType() &&
2649       CGM.getContext().getTypeSize(Callee->getReturnType()) > 128) {
2650     initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee);
2651     checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap,
2652                   CalleeMap, Callee->getReturnType(),
2653                   /*IsArgument*/ false);
2654   }
2655 }
2656
2657 std::string TargetCodeGenInfo::qualifyWindowsLibrary(StringRef Lib) {
2658   // If the argument does not end in .lib, automatically add the suffix.
2659   // If the argument contains a space, enclose it in quotes.
2660   // This matches the behavior of MSVC.
2661   bool Quote = Lib.contains(' ');
2662   std::string ArgStr = Quote ? "\"" : "";
2663   ArgStr += Lib;
2664   if (!Lib.ends_with_insensitive(".lib") && !Lib.ends_with_insensitive(".a"))
2665     ArgStr += ".lib";
2666   ArgStr += Quote ? "\"" : "";
2667   return ArgStr;
2668 }
2669
2670 namespace {
2671 class WinX86_32TargetCodeGenInfo : public X86_32TargetCodeGenInfo {
2672 public:
2673   WinX86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
2674         bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI,
2675         unsigned NumRegisterParameters)
2676     : X86_32TargetCodeGenInfo(CGT, DarwinVectorABI, RetSmallStructInRegABI,
2677         Win32StructABI, NumRegisterParameters, false) {}
2678
2679   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
2680                            CodeGen::CodeGenModule &CGM) const override;
2681
2682   void getDependentLibraryOption(llvm::StringRef Lib,
2683                                  llvm::SmallString<24> &Opt) const override {
2684     Opt = "/DEFAULTLIB:";
2685     Opt += qualifyWindowsLibrary(Lib);
2686   }
2687
2688   void getDetectMismatchOption(llvm::StringRef Name,
2689                                llvm::StringRef Value,
2690                                llvm::SmallString<32> &Opt) const override {
2691     Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
2692   }
2693 };
2694 } // namespace
2695
2696 void TargetCodeGenInfo::addStackProbeTargetAttributes(
2697     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
2698   if (llvm::Function *Fn = dyn_cast_or_null<llvm::Function>(GV)) {
2699     if (CGM.getCodeGenOpts().StackProbeSize != 4096)
2700       Fn->addFnAttr("stack-probe-size",
2701                     llvm::utostr(CGM.getCodeGenOpts().StackProbeSize));
2702     if (CGM.getCodeGenOpts().NoStackArgProbe)
2703       Fn->addFnAttr("no-stack-arg-probe");
2704   }
2705 }
2706
2707 void WinX86_32TargetCodeGenInfo::setTargetAttributes(
2708     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
2709   X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
2710   if (GV->isDeclaration())
2711     return;
2712   addStackProbeTargetAttributes(D, GV, CGM);
2713 }
2714
2715 namespace {
2716 class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo {
2717 public:
2718   WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
2719                              X86AVXABILevel AVXLevel)
2720       : TargetCodeGenInfo(std::make_unique<WinX86_64ABIInfo>(CGT, AVXLevel)) {
2721     SwiftInfo =
2722         std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/true);
2723   }
2724
2725   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
2726                            CodeGen::CodeGenModule &CGM) const override;
2727
2728   int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
2729     return 7;
2730   }
2731
2732   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
2733                                llvm::Value *Address) const override {
2734     llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8);
2735
2736     // 0-15 are the 16 integer registers.
2737     // 16 is %rip.
2738     AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16);
2739     return false;
2740   }
2741
2742   void getDependentLibraryOption(llvm::StringRef Lib,
2743                                  llvm::SmallString<24> &Opt) const override {
2744     Opt = "/DEFAULTLIB:";
2745     Opt += qualifyWindowsLibrary(Lib);
2746   }
2747
2748   void getDetectMismatchOption(llvm::StringRef Name,
2749                                llvm::StringRef Value,
2750                                llvm::SmallString<32> &Opt) const override {
2751     Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
2752   }
2753 };
2754 } // namespace
2755
2756 void WinX86_64TargetCodeGenInfo::setTargetAttributes(
2757     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
2758   TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
2759   if (GV->isDeclaration())
2760     return;
2761   if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
2762     if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
2763       llvm::Function *Fn = cast<llvm::Function>(GV);
2764       Fn->addFnAttr("stackrealign");
2765     }
2766
2767     addX86InterruptAttrs(FD, GV, CGM);
2768   }
2769
2770   addStackProbeTargetAttributes(D, GV, CGM);
2771 }
2772
2773 void X86_64ABIInfo::postMerge(unsigned AggregateSize, Class &Lo,
2774                               Class &Hi) const {
2775   // AMD64-ABI 3.2.3p2: Rule 5. Then a post merger cleanup is done:
2776   //
2777   // (a) If one of the classes is Memory, the whole argument is passed in
2778   //     memory.
2779   //
2780   // (b) If X87UP is not preceded by X87, the whole argument is passed in
2781   //     memory.
2782   //
2783   // (c) If the size of the aggregate exceeds two eightbytes and the first
2784   //     eightbyte isn't SSE or any other eightbyte isn't SSEUP, the whole
2785   //     argument is passed in memory. NOTE: This is necessary to keep the
2786   //     ABI working for processors that don't support the __m256 type.
2787   //
2788   // (d) If SSEUP is not preceded by SSE or SSEUP, it is converted to SSE.
2789   //
2790   // Some of these are enforced by the merging logic.  Others can arise
2791   // only with unions; for example:
2792   //   union { _Complex double; unsigned; }
2793   //
2794   // Note that clauses (b) and (c) were added in 0.98.
2795   //
2796   if (Hi == Memory)
2797     Lo = Memory;
2798   if (Hi == X87Up && Lo != X87 && honorsRevision0_98())
2799     Lo = Memory;
2800   if (AggregateSize > 128 && (Lo != SSE || Hi != SSEUp))
2801     Lo = Memory;
2802   if (Hi == SSEUp && Lo != SSE)
2803     Hi = SSE;
2804 }
2805
2806 X86_64ABIInfo::Class X86_64ABIInfo::merge(Class Accum, Class Field) {
2807   // AMD64-ABI 3.2.3p2: Rule 4. Each field of an object is
2808   // classified recursively so that always two fields are
2809   // considered. The resulting class is calculated according to
2810   // the classes of the fields in the eightbyte:
2811   //
2812   // (a) If both classes are equal, this is the resulting class.
2813   //
2814   // (b) If one of the classes is NO_CLASS, the resulting class is
2815   // the other class.
2816   //
2817   // (c) If one of the classes is MEMORY, the result is the MEMORY
2818   // class.
2819   //
2820   // (d) If one of the classes is INTEGER, the result is the
2821   // INTEGER.
2822   //
2823   // (e) If one of the classes is X87, X87UP, COMPLEX_X87 class,
2824   // MEMORY is used as class.
2825   //
2826   // (f) Otherwise class SSE is used.
2827
2828   // Accum should never be memory (we should have returned) or
2829   // ComplexX87 (because this cannot be passed in a structure).
2830   assert((Accum != Memory && Accum != ComplexX87) &&
2831          "Invalid accumulated classification during merge.");
2832   if (Accum == Field || Field == NoClass)
2833     return Accum;
2834   if (Field == Memory)
2835     return Memory;
2836   if (Accum == NoClass)
2837     return Field;
2838   if (Accum == Integer || Field == Integer)
2839     return Integer;
2840   if (Field == X87 || Field == X87Up || Field == ComplexX87 ||
2841       Accum == X87 || Accum == X87Up)
2842     return Memory;
2843   return SSE;
2844 }
2845
2846 void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo,
2847                              Class &Hi, bool isNamedArg, bool IsRegCall) const {
2848   // FIXME: This code can be simplified by introducing a simple value class for
2849   // Class pairs with appropriate constructor methods for the various
2850   // situations.
2851
2852   // FIXME: Some of the split computations are wrong; unaligned vectors
2853   // shouldn't be passed in registers for example, so there is no chance they
2854   // can straddle an eightbyte. Verify & simplify.
2855
2856   Lo = Hi = NoClass;
2857
2858   Class &Current = OffsetBase < 64 ? Lo : Hi;
2859   Current = Memory;
2860
2861   if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
2862     BuiltinType::Kind k = BT->getKind();
2863
2864     if (k == BuiltinType::Void) {
2865       Current = NoClass;
2866     } else if (k == BuiltinType::Int128 || k == BuiltinType::UInt128) {
2867       Lo = Integer;
2868       Hi = Integer;
2869     } else if (k >= BuiltinType::Bool && k <= BuiltinType::LongLong) {
2870       Current = Integer;
2871     } else if (k == BuiltinType::Float || k == BuiltinType::Double ||
2872                k == BuiltinType::Float16 || k == BuiltinType::BFloat16) {
2873       Current = SSE;
2874     } else if (k == BuiltinType::LongDouble) {
2875       const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
2876       if (LDF == &llvm::APFloat::IEEEquad()) {
2877         Lo = SSE;
2878         Hi = SSEUp;
2879       } else if (LDF == &llvm::APFloat::x87DoubleExtended()) {
2880         Lo = X87;
2881         Hi = X87Up;
2882       } else if (LDF == &llvm::APFloat::IEEEdouble()) {
2883         Current = SSE;
2884       } else
2885         llvm_unreachable("unexpected long double representation!");
2886     }
2887     // FIXME: _Decimal32 and _Decimal64 are SSE.
2888     // FIXME: _float128 and _Decimal128 are (SSE, SSEUp).
2889     return;
2890   }
2891
2892   if (const EnumType *ET = Ty->getAs<EnumType>()) {
2893     // Classify the underlying integer type.
2894     classify(ET->getDecl()->getIntegerType(), OffsetBase, Lo, Hi, isNamedArg);
2895     return;
2896   }
2897
2898   if (Ty->hasPointerRepresentation()) {
2899     Current = Integer;
2900     return;
2901   }
2902
2903   if (Ty->isMemberPointerType()) {
2904     if (Ty->isMemberFunctionPointerType()) {
2905       if (Has64BitPointers) {
2906         // If Has64BitPointers, this is an {i64, i64}, so classify both
2907         // Lo and Hi now.
2908         Lo = Hi = Integer;
2909       } else {
2910         // Otherwise, with 32-bit pointers, this is an {i32, i32}. If that
2911         // straddles an eightbyte boundary, Hi should be classified as well.
2912         uint64_t EB_FuncPtr = (OffsetBase) / 64;
2913         uint64_t EB_ThisAdj = (OffsetBase + 64 - 1) / 64;
2914         if (EB_FuncPtr != EB_ThisAdj) {
2915           Lo = Hi = Integer;
2916         } else {
2917           Current = Integer;
2918         }
2919       }
2920     } else {
2921       Current = Integer;
2922     }
2923     return;
2924   }
2925
2926   if (const VectorType *VT = Ty->getAs<VectorType>()) {
2927     uint64_t Size = getContext().getTypeSize(VT);
2928     if (Size == 1 || Size == 8 || Size == 16 || Size == 32) {
2929       // gcc passes the following as integer:
2930       // 4 bytes - <4 x char>, <2 x short>, <1 x int>, <1 x float>
2931       // 2 bytes - <2 x char>, <1 x short>
2932       // 1 byte  - <1 x char>
2933       Current = Integer;
2934
2935       // If this type crosses an eightbyte boundary, it should be
2936       // split.
2937       uint64_t EB_Lo = (OffsetBase) / 64;
2938       uint64_t EB_Hi = (OffsetBase + Size - 1) / 64;
2939       if (EB_Lo != EB_Hi)
2940         Hi = Lo;
2941     } else if (Size == 64) {
2942       QualType ElementType = VT->getElementType();
2943
2944       // gcc passes <1 x double> in memory. :(
2945       if (ElementType->isSpecificBuiltinType(BuiltinType::Double))
2946         return;
2947
2948       // gcc passes <1 x long long> as SSE but clang used to unconditionally
2949       // pass them as integer.  For platforms where clang is the de facto
2950       // platform compiler, we must continue to use integer.
2951       if (!classifyIntegerMMXAsSSE() &&
2952           (ElementType->isSpecificBuiltinType(BuiltinType::LongLong) ||
2953            ElementType->isSpecificBuiltinType(BuiltinType::ULongLong) ||
2954            ElementType->isSpecificBuiltinType(BuiltinType::Long) ||
2955            ElementType->isSpecificBuiltinType(BuiltinType::ULong)))
2956         Current = Integer;
2957       else
2958         Current = SSE;
2959
2960       // If this type crosses an eightbyte boundary, it should be
2961       // split.
2962       if (OffsetBase && OffsetBase != 64)
2963         Hi = Lo;
2964     } else if (Size == 128 ||
2965                (isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) {
2966       QualType ElementType = VT->getElementType();
2967
2968       // gcc passes 256 and 512 bit <X x __int128> vectors in memory. :(
2969       if (passInt128VectorsInMem() && Size != 128 &&
2970           (ElementType->isSpecificBuiltinType(BuiltinType::Int128) ||
2971            ElementType->isSpecificBuiltinType(BuiltinType::UInt128)))
2972         return;
2973
2974       // Arguments of 256-bits are split into four eightbyte chunks. The
2975       // least significant one belongs to class SSE and all the others to class
2976       // SSEUP. The original Lo and Hi design considers that types can't be
2977       // greater than 128-bits, so a 64-bit split in Hi and Lo makes sense.
2978       // This design isn't correct for 256-bits, but since there're no cases
2979       // where the upper parts would need to be inspected, avoid adding
2980       // complexity and just consider Hi to match the 64-256 part.
2981       //
2982       // Note that per 3.5.7 of AMD64-ABI, 256-bit args are only passed in
2983       // registers if they are "named", i.e. not part of the "..." of a
2984       // variadic function.
2985       //
2986       // Similarly, per 3.2.3. of the AVX512 draft, 512-bits ("named") args are
2987       // split into eight eightbyte chunks, one SSE and seven SSEUP.
2988       Lo = SSE;
2989       Hi = SSEUp;
2990     }
2991     return;
2992   }
2993
2994   if (const ComplexType *CT = Ty->getAs<ComplexType>()) {
2995     QualType ET = getContext().getCanonicalType(CT->getElementType());
2996
2997     uint64_t Size = getContext().getTypeSize(Ty);
2998     if (ET->isIntegralOrEnumerationType()) {
2999       if (Size <= 64)
3000         Current = Integer;
3001       else if (Size <= 128)
3002         Lo = Hi = Integer;
3003     } else if (ET->isFloat16Type() || ET == getContext().FloatTy ||
3004                ET->isBFloat16Type()) {
3005       Current = SSE;
3006     } else if (ET == getContext().DoubleTy) {
3007       Lo = Hi = SSE;
3008     } else if (ET == getContext().LongDoubleTy) {
3009       const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
3010       if (LDF == &llvm::APFloat::IEEEquad())
3011         Current = Memory;
3012       else if (LDF == &llvm::APFloat::x87DoubleExtended())
3013         Current = ComplexX87;
3014       else if (LDF == &llvm::APFloat::IEEEdouble())
3015         Lo = Hi = SSE;
3016       else
3017         llvm_unreachable("unexpected long double representation!");
3018     }
3019
3020     // If this complex type crosses an eightbyte boundary then it
3021     // should be split.
3022     uint64_t EB_Real = (OffsetBase) / 64;
3023     uint64_t EB_Imag = (OffsetBase + getContext().getTypeSize(ET)) / 64;
3024     if (Hi == NoClass && EB_Real != EB_Imag)
3025       Hi = Lo;
3026
3027     return;
3028   }
3029
3030   if (const auto *EITy = Ty->getAs<BitIntType>()) {
3031     if (EITy->getNumBits() <= 64)
3032       Current = Integer;
3033     else if (EITy->getNumBits() <= 128)
3034       Lo = Hi = Integer;
3035     // Larger values need to get passed in memory.
3036     return;
3037   }
3038
3039   if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) {
3040     // Arrays are treated like structures.
3041
3042     uint64_t Size = getContext().getTypeSize(Ty);
3043
3044     // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger
3045     // than eight eightbytes, ..., it has class MEMORY.
3046     // regcall ABI doesn't have limitation to an object. The only limitation
3047     // is the free registers, which will be checked in computeInfo.
3048     if (!IsRegCall && Size > 512)
3049       return;
3050
3051     // AMD64-ABI 3.2.3p2: Rule 1. If ..., or it contains unaligned
3052     // fields, it has class MEMORY.
3053     //
3054     // Only need to check alignment of array base.
3055     if (OffsetBase % getContext().getTypeAlign(AT->getElementType()))
3056       return;
3057
3058     // Otherwise implement simplified merge. We could be smarter about
3059     // this, but it isn't worth it and would be harder to verify.
3060     Current = NoClass;
3061     uint64_t EltSize = getContext().getTypeSize(AT->getElementType());
3062     uint64_t ArraySize = AT->getSize().getZExtValue();
3063
3064     // The only case a 256-bit wide vector could be used is when the array
3065     // contains a single 256-bit element. Since Lo and Hi logic isn't extended
3066     // to work for sizes wider than 128, early check and fallback to memory.
3067     //
3068     if (Size > 128 &&
3069         (Size != EltSize || Size > getNativeVectorSizeForAVXABI(AVXLevel)))
3070       return;
3071
3072     for (uint64_t i=0, Offset=OffsetBase; i<ArraySize; ++i, Offset += EltSize) {
3073       Class FieldLo, FieldHi;
3074       classify(AT->getElementType(), Offset, FieldLo, FieldHi, isNamedArg);
3075       Lo = merge(Lo, FieldLo);
3076       Hi = merge(Hi, FieldHi);
3077       if (Lo == Memory || Hi == Memory)
3078         break;
3079     }
3080
3081     postMerge(Size, Lo, Hi);
3082     assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp array classification.");
3083     return;
3084   }
3085
3086   if (const RecordType *RT = Ty->getAs<RecordType>()) {
3087     uint64_t Size = getContext().getTypeSize(Ty);
3088
3089     // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger
3090     // than eight eightbytes, ..., it has class MEMORY.
3091     if (Size > 512)
3092       return;
3093
3094     // AMD64-ABI 3.2.3p2: Rule 2. If a C++ object has either a non-trivial
3095     // copy constructor or a non-trivial destructor, it is passed by invisible
3096     // reference.
3097     if (getRecordArgABI(RT, getCXXABI()))
3098       return;
3099
3100     const RecordDecl *RD = RT->getDecl();
3101
3102     // Assume variable sized types are passed in memory.
3103     if (RD->hasFlexibleArrayMember())
3104       return;
3105
3106     const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
3107
3108     // Reset Lo class, this will be recomputed.
3109     Current = NoClass;
3110
3111     // If this is a C++ record, classify the bases first.
3112     if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
3113       for (const auto &I : CXXRD->bases()) {
3114         assert(!I.isVirtual() && !I.getType()->isDependentType() &&
3115                "Unexpected base class!");
3116         const auto *Base =
3117             cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl());
3118
3119         // Classify this field.
3120         //
3121         // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate exceeds a
3122         // single eightbyte, each is classified separately. Each eightbyte gets
3123         // initialized to class NO_CLASS.
3124         Class FieldLo, FieldHi;
3125         uint64_t Offset =
3126           OffsetBase + getContext().toBits(Layout.getBaseClassOffset(Base));
3127         classify(I.getType(), Offset, FieldLo, FieldHi, isNamedArg);
3128         Lo = merge(Lo, FieldLo);
3129         Hi = merge(Hi, FieldHi);
3130         if (Lo == Memory || Hi == Memory) {
3131           postMerge(Size, Lo, Hi);
3132           return;
3133         }
3134       }
3135     }
3136
3137     // Classify the fields one at a time, merging the results.
3138     unsigned idx = 0;
3139     bool UseClang11Compat = getContext().getLangOpts().getClangABICompat() <=
3140                                 LangOptions::ClangABI::Ver11 ||
3141                             getContext().getTargetInfo().getTriple().isPS();
3142     bool IsUnion = RT->isUnionType() && !UseClang11Compat;
3143
3144     for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
3145            i != e; ++i, ++idx) {
3146       uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx);
3147       bool BitField = i->isBitField();
3148
3149       // Ignore padding bit-fields.
3150       if (BitField && i->isUnnamedBitfield())
3151         continue;
3152
3153       // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger than
3154       // eight eightbytes, or it contains unaligned fields, it has class MEMORY.
3155       //
3156       // The only case a 256-bit or a 512-bit wide vector could be used is when
3157       // the struct contains a single 256-bit or 512-bit element. Early check
3158       // and fallback to memory.
3159       //
3160       // FIXME: Extended the Lo and Hi logic properly to work for size wider
3161       // than 128.
3162       if (Size > 128 &&
3163           ((!IsUnion && Size != getContext().getTypeSize(i->getType())) ||
3164            Size > getNativeVectorSizeForAVXABI(AVXLevel))) {
3165         Lo = Memory;
3166         postMerge(Size, Lo, Hi);
3167         return;
3168       }
3169       // Note, skip this test for bit-fields, see below.
3170       if (!BitField && Offset % getContext().getTypeAlign(i->getType())) {
3171         Lo = Memory;
3172         postMerge(Size, Lo, Hi);
3173         return;
3174       }
3175
3176       // Classify this field.
3177       //
3178       // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate
3179       // exceeds a single eightbyte, each is classified
3180       // separately. Each eightbyte gets initialized to class
3181       // NO_CLASS.
3182       Class FieldLo, FieldHi;
3183
3184       // Bit-fields require special handling, they do not force the
3185       // structure to be passed in memory even if unaligned, and
3186       // therefore they can straddle an eightbyte.
3187       if (BitField) {
3188         assert(!i->isUnnamedBitfield());
3189         uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx);
3190         uint64_t Size = i->getBitWidthValue(getContext());
3191
3192         uint64_t EB_Lo = Offset / 64;
3193         uint64_t EB_Hi = (Offset + Size - 1) / 64;
3194
3195         if (EB_Lo) {
3196           assert(EB_Hi == EB_Lo && "Invalid classification, type > 16 bytes.");
3197           FieldLo = NoClass;
3198           FieldHi = Integer;
3199         } else {
3200           FieldLo = Integer;
3201           FieldHi = EB_Hi ? Integer : NoClass;
3202         }
3203       } else
3204         classify(i->getType(), Offset, FieldLo, FieldHi, isNamedArg);
3205       Lo = merge(Lo, FieldLo);
3206       Hi = merge(Hi, FieldHi);
3207       if (Lo == Memory || Hi == Memory)
3208         break;
3209     }
3210
3211     postMerge(Size, Lo, Hi);
3212   }
3213 }
3214
3215 ABIArgInfo X86_64ABIInfo::getIndirectReturnResult(QualType Ty) const {
3216   // If this is a scalar LLVM value then assume LLVM will pass it in the right
3217   // place naturally.
3218   if (!isAggregateTypeForABI(Ty)) {
3219     // Treat an enum type as its underlying type.
3220     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
3221       Ty = EnumTy->getDecl()->getIntegerType();
3222
3223     if (Ty->isBitIntType())
3224       return getNaturalAlignIndirect(Ty);
3225
3226     return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
3227                                               : ABIArgInfo::getDirect());
3228   }
3229
3230   return getNaturalAlignIndirect(Ty);
3231 }
3232
3233 bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const {
3234   if (const VectorType *VecTy = Ty->getAs<VectorType>()) {
3235     uint64_t Size = getContext().getTypeSize(VecTy);
3236     unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel);
3237     if (Size <= 64 || Size > LargestVector)
3238       return true;
3239     QualType EltTy = VecTy->getElementType();
3240     if (passInt128VectorsInMem() &&
3241         (EltTy->isSpecificBuiltinType(BuiltinType::Int128) ||
3242          EltTy->isSpecificBuiltinType(BuiltinType::UInt128)))
3243       return true;
3244   }
3245
3246   return false;
3247 }
3248
3249 ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty,
3250                                             unsigned freeIntRegs) const {
3251   // If this is a scalar LLVM value then assume LLVM will pass it in the right
3252   // place naturally.
3253   //
3254   // This assumption is optimistic, as there could be free registers available
3255   // when we need to pass this argument in memory, and LLVM could try to pass
3256   // the argument in the free register. This does not seem to happen currently,
3257   // but this code would be much safer if we could mark the argument with
3258   // 'onstack'. See PR12193.
3259   if (!isAggregateTypeForABI(Ty) && !IsIllegalVectorType(Ty) &&
3260       !Ty->isBitIntType()) {
3261     // Treat an enum type as its underlying type.
3262     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
3263       Ty = EnumTy->getDecl()->getIntegerType();
3264
3265     return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
3266                                               : ABIArgInfo::getDirect());
3267   }
3268
3269   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
3270     return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
3271
3272   // Compute the byval alignment. We specify the alignment of the byval in all
3273   // cases so that the mid-level optimizer knows the alignment of the byval.
3274   unsigned Align = std::max(getContext().getTypeAlign(Ty) / 8, 8U);
3275
3276   // Attempt to avoid passing indirect results using byval when possible. This
3277   // is important for good codegen.
3278   //
3279   // We do this by coercing the value into a scalar type which the backend can
3280   // handle naturally (i.e., without using byval).
3281   //
3282   // For simplicity, we currently only do this when we have exhausted all of the
3283   // free integer registers. Doing this when there are free integer registers
3284   // would require more care, as we would have to ensure that the coerced value
3285   // did not claim the unused register. That would require either reording the
3286   // arguments to the function (so that any subsequent inreg values came first),
3287   // or only doing this optimization when there were no following arguments that
3288   // might be inreg.
3289   //
3290   // We currently expect it to be rare (particularly in well written code) for
3291   // arguments to be passed on the stack when there are still free integer
3292   // registers available (this would typically imply large structs being passed
3293   // by value), so this seems like a fair tradeoff for now.
3294   //
3295   // We can revisit this if the backend grows support for 'onstack' parameter
3296   // attributes. See PR12193.
3297   if (freeIntRegs == 0) {
3298     uint64_t Size = getContext().getTypeSize(Ty);
3299
3300     // If this type fits in an eightbyte, coerce it into the matching integral
3301     // type, which will end up on the stack (with alignment 8).
3302     if (Align == 8 && Size <= 64)
3303       return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
3304                                                           Size));
3305   }
3306
3307   return ABIArgInfo::getIndirect(CharUnits::fromQuantity(Align));
3308 }
3309
3310 /// The ABI specifies that a value should be passed in a full vector XMM/YMM
3311 /// register. Pick an LLVM IR type that will be passed as a vector register.
3312 llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const {
3313   // Wrapper structs/arrays that only contain vectors are passed just like
3314   // vectors; strip them off if present.
3315   if (const Type *InnerTy = isSingleElementStruct(Ty, getContext()))
3316     Ty = QualType(InnerTy, 0);
3317
3318   llvm::Type *IRType = CGT.ConvertType(Ty);
3319   if (isa<llvm::VectorType>(IRType)) {
3320     // Don't pass vXi128 vectors in their native type, the backend can't
3321     // legalize them.
3322     if (passInt128VectorsInMem() &&
3323         cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy(128)) {
3324       // Use a vXi64 vector.
3325       uint64_t Size = getContext().getTypeSize(Ty);
3326       return llvm::FixedVectorType::get(llvm::Type::getInt64Ty(getVMContext()),
3327                                         Size / 64);
3328     }
3329
3330     return IRType;
3331   }
3332
3333   if (IRType->getTypeID() == llvm::Type::FP128TyID)
3334     return IRType;
3335
3336   // We couldn't find the preferred IR vector type for 'Ty'.
3337   uint64_t Size = getContext().getTypeSize(Ty);
3338   assert((Size == 128 || Size == 256 || Size == 512) && "Invalid type found!");
3339
3340
3341   // Return a LLVM IR vector type based on the size of 'Ty'.
3342   return llvm::FixedVectorType::get(llvm::Type::getDoubleTy(getVMContext()),
3343                                     Size / 64);
3344 }
3345
3346 /// BitsContainNoUserData - Return true if the specified [start,end) bit range
3347 /// is known to either be off the end of the specified type or being in
3348 /// alignment padding.  The user type specified is known to be at most 128 bits
3349 /// in size, and have passed through X86_64ABIInfo::classify with a successful
3350 /// classification that put one of the two halves in the INTEGER class.
3351 ///
3352 /// It is conservatively correct to return false.
3353 static bool BitsContainNoUserData(QualType Ty, unsigned StartBit,
3354                                   unsigned EndBit, ASTContext &Context) {
3355   // If the bytes being queried are off the end of the type, there is no user
3356   // data hiding here.  This handles analysis of builtins, vectors and other
3357   // types that don't contain interesting padding.
3358   unsigned TySize = (unsigned)Context.getTypeSize(Ty);
3359   if (TySize <= StartBit)
3360     return true;
3361
3362   if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) {
3363     unsigned EltSize = (unsigned)Context.getTypeSize(AT->getElementType());
3364     unsigned NumElts = (unsigned)AT->getSize().getZExtValue();
3365
3366     // Check each element to see if the element overlaps with the queried range.
3367     for (unsigned i = 0; i != NumElts; ++i) {
3368       // If the element is after the span we care about, then we're done..
3369       unsigned EltOffset = i*EltSize;
3370       if (EltOffset >= EndBit) break;
3371
3372       unsigned EltStart = EltOffset < StartBit ? StartBit-EltOffset :0;
3373       if (!BitsContainNoUserData(AT->getElementType(), EltStart,
3374                                  EndBit-EltOffset, Context))
3375         return false;
3376     }
3377     // If it overlaps no elements, then it is safe to process as padding.
3378     return true;
3379   }
3380
3381   if (const RecordType *RT = Ty->getAs<RecordType>()) {
3382     const RecordDecl *RD = RT->getDecl();
3383     const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
3384
3385     // If this is a C++ record, check the bases first.
3386     if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
3387       for (const auto &I : CXXRD->bases()) {
3388         assert(!I.isVirtual() && !I.getType()->isDependentType() &&
3389                "Unexpected base class!");
3390         const auto *Base =
3391             cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl());
3392
3393         // If the base is after the span we care about, ignore it.
3394         unsigned BaseOffset = Context.toBits(Layout.getBaseClassOffset(Base));
3395         if (BaseOffset >= EndBit) continue;
3396
3397         unsigned BaseStart = BaseOffset < StartBit ? StartBit-BaseOffset :0;
3398         if (!BitsContainNoUserData(I.getType(), BaseStart,
3399                                    EndBit-BaseOffset, Context))
3400           return false;
3401       }
3402     }
3403
3404     // Verify that no field has data that overlaps the region of interest.  Yes
3405     // this could be sped up a lot by being smarter about queried fields,
3406     // however we're only looking at structs up to 16 bytes, so we don't care
3407     // much.
3408     unsigned idx = 0;
3409     for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
3410          i != e; ++i, ++idx) {
3411       unsigned FieldOffset = (unsigned)Layout.getFieldOffset(idx);
3412
3413       // If we found a field after the region we care about, then we're done.
3414       if (FieldOffset >= EndBit) break;
3415
3416       unsigned FieldStart = FieldOffset < StartBit ? StartBit-FieldOffset :0;
3417       if (!BitsContainNoUserData(i->getType(), FieldStart, EndBit-FieldOffset,
3418                                  Context))
3419         return false;
3420     }
3421
3422     // If nothing in this record overlapped the area of interest, then we're
3423     // clean.
3424     return true;
3425   }
3426
3427   return false;
3428 }
3429
3430 /// getFPTypeAtOffset - Return a floating point type at the specified offset.
3431 static llvm::Type *getFPTypeAtOffset(llvm::Type *IRType, unsigned IROffset,
3432                                      const llvm::DataLayout &TD) {
3433   if (IROffset == 0 && IRType->isFloatingPointTy())
3434     return IRType;
3435
3436   // If this is a struct, recurse into the field at the specified offset.
3437   if (llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType)) {
3438     if (!STy->getNumContainedTypes())
3439       return nullptr;
3440
3441     const llvm::StructLayout *SL = TD.getStructLayout(STy);
3442     unsigned Elt = SL->getElementContainingOffset(IROffset);
3443     IROffset -= SL->getElementOffset(Elt);
3444     return getFPTypeAtOffset(STy->getElementType(Elt), IROffset, TD);
3445   }
3446
3447   // If this is an array, recurse into the field at the specified offset.
3448   if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(IRType)) {
3449     llvm::Type *EltTy = ATy->getElementType();
3450     unsigned EltSize = TD.getTypeAllocSize(EltTy);
3451     IROffset -= IROffset / EltSize * EltSize;
3452     return getFPTypeAtOffset(EltTy, IROffset, TD);
3453   }
3454
3455   return nullptr;
3456 }
3457
3458 /// GetSSETypeAtOffset - Return a type that will be passed by the backend in the
3459 /// low 8 bytes of an XMM register, corresponding to the SSE class.
3460 llvm::Type *X86_64ABIInfo::
3461 GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset,
3462                    QualType SourceTy, unsigned SourceOffset) const {
3463   const llvm::DataLayout &TD = getDataLayout();
3464   unsigned SourceSize =
3465       (unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset;
3466   llvm::Type *T0 = getFPTypeAtOffset(IRType, IROffset, TD);
3467   if (!T0 || T0->isDoubleTy())
3468     return llvm::Type::getDoubleTy(getVMContext());
3469
3470   // Get the adjacent FP type.
3471   llvm::Type *T1 = nullptr;
3472   unsigned T0Size = TD.getTypeAllocSize(T0);
3473   if (SourceSize > T0Size)
3474       T1 = getFPTypeAtOffset(IRType, IROffset + T0Size, TD);
3475   if (T1 == nullptr) {
3476     // Check if IRType is a half/bfloat + float. float type will be in IROffset+4 due
3477     // to its alignment.
3478     if (T0->is16bitFPTy() && SourceSize > 4)
3479       T1 = getFPTypeAtOffset(IRType, IROffset + 4, TD);
3480     // If we can't get a second FP type, return a simple half or float.
3481     // avx512fp16-abi.c:pr51813_2 shows it works to return float for
3482     // {float, i8} too.
3483     if (T1 == nullptr)
3484       return T0;
3485   }
3486
3487   if (T0->isFloatTy() && T1->isFloatTy())
3488     return llvm::FixedVectorType::get(T0, 2);
3489
3490   if (T0->is16bitFPTy() && T1->is16bitFPTy()) {
3491     llvm::Type *T2 = nullptr;
3492     if (SourceSize > 4)
3493       T2 = getFPTypeAtOffset(IRType, IROffset + 4, TD);
3494     if (T2 == nullptr)
3495       return llvm::FixedVectorType::get(T0, 2);
3496     return llvm::FixedVectorType::get(T0, 4);
3497   }
3498
3499   if (T0->is16bitFPTy() || T1->is16bitFPTy())
3500     return llvm::FixedVectorType::get(llvm::Type::getHalfTy(getVMContext()), 4);
3501
3502   return llvm::Type::getDoubleTy(getVMContext());
3503 }
3504
3505
3506 /// GetINTEGERTypeAtOffset - The ABI specifies that a value should be passed in
3507 /// an 8-byte GPR.  This means that we either have a scalar or we are talking
3508 /// about the high or low part of an up-to-16-byte struct.  This routine picks
3509 /// the best LLVM IR type to represent this, which may be i64 or may be anything
3510 /// else that the backend will pass in a GPR that works better (e.g. i8, %foo*,
3511 /// etc).
3512 ///
3513 /// PrefType is an LLVM IR type that corresponds to (part of) the IR type for
3514 /// the source type.  IROffset is an offset in bytes into the LLVM IR type that
3515 /// the 8-byte value references.  PrefType may be null.
3516 ///
3517 /// SourceTy is the source-level type for the entire argument.  SourceOffset is
3518 /// an offset into this that we're processing (which is always either 0 or 8).
3519 ///
3520 llvm::Type *X86_64ABIInfo::
3521 GetINTEGERTypeAtOffset(llvm::Type *IRType, unsigned IROffset,
3522                        QualType SourceTy, unsigned SourceOffset) const {
3523   // If we're dealing with an un-offset LLVM IR type, then it means that we're
3524   // returning an 8-byte unit starting with it.  See if we can safely use it.
3525   if (IROffset == 0) {
3526     // Pointers and int64's always fill the 8-byte unit.
3527     if ((isa<llvm::PointerType>(IRType) && Has64BitPointers) ||
3528         IRType->isIntegerTy(64))
3529       return IRType;
3530
3531     // If we have a 1/2/4-byte integer, we can use it only if the rest of the
3532     // goodness in the source type is just tail padding.  This is allowed to
3533     // kick in for struct {double,int} on the int, but not on
3534     // struct{double,int,int} because we wouldn't return the second int.  We
3535     // have to do this analysis on the source type because we can't depend on
3536     // unions being lowered a specific way etc.
3537     if (IRType->isIntegerTy(8) || IRType->isIntegerTy(16) ||
3538         IRType->isIntegerTy(32) ||
3539         (isa<llvm::PointerType>(IRType) && !Has64BitPointers)) {
3540       unsigned BitWidth = isa<llvm::PointerType>(IRType) ? 32 :
3541           cast<llvm::IntegerType>(IRType)->getBitWidth();
3542
3543       if (BitsContainNoUserData(SourceTy, SourceOffset*8+BitWidth,
3544                                 SourceOffset*8+64, getContext()))
3545         return IRType;
3546     }
3547   }
3548
3549   if (llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType)) {
3550     // If this is a struct, recurse into the field at the specified offset.
3551     const llvm::StructLayout *SL = getDataLayout().getStructLayout(STy);
3552     if (IROffset < SL->getSizeInBytes()) {
3553       unsigned FieldIdx = SL->getElementContainingOffset(IROffset);
3554       IROffset -= SL->getElementOffset(FieldIdx);
3555
3556       return GetINTEGERTypeAtOffset(STy->getElementType(FieldIdx), IROffset,
3557                                     SourceTy, SourceOffset);
3558     }
3559   }
3560
3561   if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(IRType)) {
3562     llvm::Type *EltTy = ATy->getElementType();
3563     unsigned EltSize = getDataLayout().getTypeAllocSize(EltTy);
3564     unsigned EltOffset = IROffset/EltSize*EltSize;
3565     return GetINTEGERTypeAtOffset(EltTy, IROffset-EltOffset, SourceTy,
3566                                   SourceOffset);
3567   }
3568
3569   // Okay, we don't have any better idea of what to pass, so we pass this in an
3570   // integer register that isn't too big to fit the rest of the struct.
3571   unsigned TySizeInBytes =
3572     (unsigned)getContext().getTypeSizeInChars(SourceTy).getQuantity();
3573
3574   assert(TySizeInBytes != SourceOffset && "Empty field?");
3575
3576   // It is always safe to classify this as an integer type up to i64 that
3577   // isn't larger than the structure.
3578   return llvm::IntegerType::get(getVMContext(),
3579                                 std::min(TySizeInBytes-SourceOffset, 8U)*8);
3580 }
3581
3582
3583 /// GetX86_64ByValArgumentPair - Given a high and low type that can ideally
3584 /// be used as elements of a two register pair to pass or return, return a
3585 /// first class aggregate to represent them.  For example, if the low part of
3586 /// a by-value argument should be passed as i32* and the high part as float,
3587 /// return {i32*, float}.
3588 static llvm::Type *
3589 GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi,
3590                            const llvm::DataLayout &TD) {
3591   // In order to correctly satisfy the ABI, we need to the high part to start
3592   // at offset 8.  If the high and low parts we inferred are both 4-byte types
3593   // (e.g. i32 and i32) then the resultant struct type ({i32,i32}) won't have
3594   // the second element at offset 8.  Check for this:
3595   unsigned LoSize = (unsigned)TD.getTypeAllocSize(Lo);
3596   llvm::Align HiAlign = TD.getABITypeAlign(Hi);
3597   unsigned HiStart = llvm::alignTo(LoSize, HiAlign);
3598   assert(HiStart != 0 && HiStart <= 8 && "Invalid x86-64 argument pair!");
3599
3600   // To handle this, we have to increase the size of the low part so that the
3601   // second element will start at an 8 byte offset.  We can't increase the size
3602   // of the second element because it might make us access off the end of the
3603   // struct.
3604   if (HiStart != 8) {
3605     // There are usually two sorts of types the ABI generation code can produce
3606     // for the low part of a pair that aren't 8 bytes in size: half, float or
3607     // i8/i16/i32.  This can also include pointers when they are 32-bit (X32 and
3608     // NaCl).
3609     // Promote these to a larger type.
3610     if (Lo->isHalfTy() || Lo->isFloatTy())
3611       Lo = llvm::Type::getDoubleTy(Lo->getContext());
3612     else {
3613       assert((Lo->isIntegerTy() || Lo->isPointerTy())
3614              && "Invalid/unknown lo type");
3615       Lo = llvm::Type::getInt64Ty(Lo->getContext());
3616     }
3617   }
3618
3619   llvm::StructType *Result = llvm::StructType::get(Lo, Hi);
3620
3621   // Verify that the second element is at an 8-byte offset.
3622   assert(TD.getStructLayout(Result)->getElementOffset(1) == 8 &&
3623          "Invalid x86-64 argument pair!");
3624   return Result;
3625 }
3626
3627 ABIArgInfo X86_64ABIInfo::
3628 classifyReturnType(QualType RetTy) const {
3629   // AMD64-ABI 3.2.3p4: Rule 1. Classify the return type with the
3630   // classification algorithm.
3631   X86_64ABIInfo::Class Lo, Hi;
3632   classify(RetTy, 0, Lo, Hi, /*isNamedArg*/ true);
3633
3634   // Check some invariants.
3635   assert((Hi != Memory || Lo == Memory) && "Invalid memory classification.");
3636   assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification.");
3637
3638   llvm::Type *ResType = nullptr;
3639   switch (Lo) {
3640   case NoClass:
3641     if (Hi == NoClass)
3642       return ABIArgInfo::getIgnore();
3643     // If the low part is just padding, it takes no register, leave ResType
3644     // null.
3645     assert((Hi == SSE || Hi == Integer || Hi == X87Up) &&
3646            "Unknown missing lo part");
3647     break;
3648
3649   case SSEUp:
3650   case X87Up:
3651     llvm_unreachable("Invalid classification for lo word.");
3652
3653     // AMD64-ABI 3.2.3p4: Rule 2. Types of class memory are returned via
3654     // hidden argument.
3655   case Memory:
3656     return getIndirectReturnResult(RetTy);
3657
3658     // AMD64-ABI 3.2.3p4: Rule 3. If the class is INTEGER, the next
3659     // available register of the sequence %rax, %rdx is used.
3660   case Integer:
3661     ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0);
3662
3663     // If we have a sign or zero extended integer, make sure to return Extend
3664     // so that the parameter gets the right LLVM IR attributes.
3665     if (Hi == NoClass && isa<llvm::IntegerType>(ResType)) {
3666       // Treat an enum type as its underlying type.
3667       if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
3668         RetTy = EnumTy->getDecl()->getIntegerType();
3669
3670       if (RetTy->isIntegralOrEnumerationType() &&
3671           isPromotableIntegerTypeForABI(RetTy))
3672         return ABIArgInfo::getExtend(RetTy);
3673     }
3674     break;
3675
3676     // AMD64-ABI 3.2.3p4: Rule 4. If the class is SSE, the next
3677     // available SSE register of the sequence %xmm0, %xmm1 is used.
3678   case SSE:
3679     ResType = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0);
3680     break;
3681
3682     // AMD64-ABI 3.2.3p4: Rule 6. If the class is X87, the value is
3683     // returned on the X87 stack in %st0 as 80-bit x87 number.
3684   case X87:
3685     ResType = llvm::Type::getX86_FP80Ty(getVMContext());
3686     break;
3687
3688     // AMD64-ABI 3.2.3p4: Rule 8. If the class is COMPLEX_X87, the real
3689     // part of the value is returned in %st0 and the imaginary part in
3690     // %st1.
3691   case ComplexX87:
3692     assert(Hi == ComplexX87 && "Unexpected ComplexX87 classification.");
3693     ResType = llvm::StructType::get(llvm::Type::getX86_FP80Ty(getVMContext()),
3694                                     llvm::Type::getX86_FP80Ty(getVMContext()));
3695     break;
3696   }
3697
3698   llvm::Type *HighPart = nullptr;
3699   switch (Hi) {
3700     // Memory was handled previously and X87 should
3701     // never occur as a hi class.
3702   case Memory:
3703   case X87:
3704     llvm_unreachable("Invalid classification for hi word.");
3705
3706   case ComplexX87: // Previously handled.
3707   case NoClass:
3708     break;
3709
3710   case Integer:
3711     HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8);
3712     if (Lo == NoClass)  // Return HighPart at offset 8 in memory.
3713       return ABIArgInfo::getDirect(HighPart, 8);
3714     break;
3715   case SSE:
3716     HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8);
3717     if (Lo == NoClass)  // Return HighPart at offset 8 in memory.
3718       return ABIArgInfo::getDirect(HighPart, 8);
3719     break;
3720
3721     // AMD64-ABI 3.2.3p4: Rule 5. If the class is SSEUP, the eightbyte
3722     // is passed in the next available eightbyte chunk if the last used
3723     // vector register.
3724     //
3725     // SSEUP should always be preceded by SSE, just widen.
3726   case SSEUp:
3727     assert(Lo == SSE && "Unexpected SSEUp classification.");
3728     ResType = GetByteVectorType(RetTy);
3729     break;
3730
3731     // AMD64-ABI 3.2.3p4: Rule 7. If the class is X87UP, the value is
3732     // returned together with the previous X87 value in %st0.
3733   case X87Up:
3734     // If X87Up is preceded by X87, we don't need to do
3735     // anything. However, in some cases with unions it may not be
3736     // preceded by X87. In such situations we follow gcc and pass the
3737     // extra bits in an SSE reg.
3738     if (Lo != X87) {
3739       HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8);
3740       if (Lo == NoClass)  // Return HighPart at offset 8 in memory.
3741         return ABIArgInfo::getDirect(HighPart, 8);
3742     }
3743     break;
3744   }
3745
3746   // If a high part was specified, merge it together with the low part.  It is
3747   // known to pass in the high eightbyte of the result.  We do this by forming a
3748   // first class struct aggregate with the high and low part: {low, high}
3749   if (HighPart)
3750     ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout());
3751
3752   return ABIArgInfo::getDirect(ResType);
3753 }
3754
3755 ABIArgInfo
3756 X86_64ABIInfo::classifyArgumentType(QualType Ty, unsigned freeIntRegs,
3757                                     unsigned &neededInt, unsigned &neededSSE,
3758                                     bool isNamedArg, bool IsRegCall) const {
3759   Ty = useFirstFieldIfTransparentUnion(Ty);
3760
3761   X86_64ABIInfo::Class Lo, Hi;
3762   classify(Ty, 0, Lo, Hi, isNamedArg, IsRegCall);
3763
3764   // Check some invariants.
3765   // FIXME: Enforce these by construction.
3766   assert((Hi != Memory || Lo == Memory) && "Invalid memory classification.");
3767   assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification.");
3768
3769   neededInt = 0;
3770   neededSSE = 0;
3771   llvm::Type *ResType = nullptr;
3772   switch (Lo) {
3773   case NoClass:
3774     if (Hi == NoClass)
3775       return ABIArgInfo::getIgnore();
3776     // If the low part is just padding, it takes no register, leave ResType
3777     // null.
3778     assert((Hi == SSE || Hi == Integer || Hi == X87Up) &&
3779            "Unknown missing lo part");
3780     break;
3781
3782     // AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument
3783     // on the stack.
3784   case Memory:
3785
3786     // AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or
3787     // COMPLEX_X87, it is passed in memory.
3788   case X87:
3789   case ComplexX87:
3790     if (getRecordArgABI(Ty, getCXXABI()) == CGCXXABI::RAA_Indirect)
3791       ++neededInt;
3792     return getIndirectResult(Ty, freeIntRegs);
3793
3794   case SSEUp:
3795   case X87Up:
3796     llvm_unreachable("Invalid classification for lo word.");
3797
3798     // AMD64-ABI 3.2.3p3: Rule 2. If the class is INTEGER, the next
3799     // available register of the sequence %rdi, %rsi, %rdx, %rcx, %r8
3800     // and %r9 is used.
3801   case Integer:
3802     ++neededInt;
3803
3804     // Pick an 8-byte type based on the preferred type.
3805     ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 0, Ty, 0);
3806
3807     // If we have a sign or zero extended integer, make sure to return Extend
3808     // so that the parameter gets the right LLVM IR attributes.
3809     if (Hi == NoClass && isa<llvm::IntegerType>(ResType)) {
3810       // Treat an enum type as its underlying type.
3811       if (const EnumType *EnumTy = Ty->getAs<EnumType>())
3812         Ty = EnumTy->getDecl()->getIntegerType();
3813
3814       if (Ty->isIntegralOrEnumerationType() &&
3815           isPromotableIntegerTypeForABI(Ty))
3816         return ABIArgInfo::getExtend(Ty);
3817     }
3818
3819     break;
3820
3821     // AMD64-ABI 3.2.3p3: Rule 3. If the class is SSE, the next
3822     // available SSE register is used, the registers are taken in the
3823     // order from %xmm0 to %xmm7.
3824   case SSE: {
3825     llvm::Type *IRType = CGT.ConvertType(Ty);
3826     ResType = GetSSETypeAtOffset(IRType, 0, Ty, 0);
3827     ++neededSSE;
3828     break;
3829   }
3830   }
3831
3832   llvm::Type *HighPart = nullptr;
3833   switch (Hi) {
3834     // Memory was handled previously, ComplexX87 and X87 should
3835     // never occur as hi classes, and X87Up must be preceded by X87,
3836     // which is passed in memory.
3837   case Memory:
3838   case X87:
3839   case ComplexX87:
3840     llvm_unreachable("Invalid classification for hi word.");
3841
3842   case NoClass: break;
3843
3844   case Integer:
3845     ++neededInt;
3846     // Pick an 8-byte type based on the preferred type.
3847     HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8);
3848
3849     if (Lo == NoClass)  // Pass HighPart at offset 8 in memory.
3850       return ABIArgInfo::getDirect(HighPart, 8);
3851     break;
3852
3853     // X87Up generally doesn't occur here (long double is passed in
3854     // memory), except in situations involving unions.
3855   case X87Up:
3856   case SSE:
3857     HighPart = GetSSETypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8);
3858
3859     if (Lo == NoClass)  // Pass HighPart at offset 8 in memory.
3860       return ABIArgInfo::getDirect(HighPart, 8);
3861
3862     ++neededSSE;
3863     break;
3864
3865     // AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the
3866     // eightbyte is passed in the upper half of the last used SSE
3867     // register.  This only happens when 128-bit vectors are passed.
3868   case SSEUp:
3869     assert(Lo == SSE && "Unexpected SSEUp classification");
3870     ResType = GetByteVectorType(Ty);
3871     break;
3872   }
3873
3874   // If a high part was specified, merge it together with the low part.  It is
3875   // known to pass in the high eightbyte of the result.  We do this by forming a
3876   // first class struct aggregate with the high and low part: {low, high}
3877   if (HighPart)
3878     ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout());
3879
3880   return ABIArgInfo::getDirect(ResType);
3881 }
3882
3883 ABIArgInfo
3884 X86_64ABIInfo::classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt,
3885                                              unsigned &NeededSSE,
3886                                              unsigned &MaxVectorWidth) const {
3887   auto RT = Ty->getAs<RecordType>();
3888   assert(RT && "classifyRegCallStructType only valid with struct types");
3889
3890   if (RT->getDecl()->hasFlexibleArrayMember())
3891     return getIndirectReturnResult(Ty);
3892
3893   // Sum up bases
3894   if (auto CXXRD = dyn_cast<CXXRecordDecl>(RT->getDecl())) {
3895     if (CXXRD->isDynamicClass()) {
3896       NeededInt = NeededSSE = 0;
3897       return getIndirectReturnResult(Ty);
3898     }
3899
3900     for (const auto &I : CXXRD->bases())
3901       if (classifyRegCallStructTypeImpl(I.getType(), NeededInt, NeededSSE,
3902                                         MaxVectorWidth)
3903               .isIndirect()) {
3904         NeededInt = NeededSSE = 0;
3905         return getIndirectReturnResult(Ty);
3906       }
3907   }
3908
3909   // Sum up members
3910   for (const auto *FD : RT->getDecl()->fields()) {
3911     QualType MTy = FD->getType();
3912     if (MTy->isRecordType() && !MTy->isUnionType()) {
3913       if (classifyRegCallStructTypeImpl(MTy, NeededInt, NeededSSE,
3914                                         MaxVectorWidth)
3915               .isIndirect()) {
3916         NeededInt = NeededSSE = 0;
3917         return getIndirectReturnResult(Ty);
3918       }
3919     } else {
3920       unsigned LocalNeededInt, LocalNeededSSE;
3921       if (classifyArgumentType(MTy, UINT_MAX, LocalNeededInt, LocalNeededSSE,
3922                                true, true)
3923               .isIndirect()) {
3924         NeededInt = NeededSSE = 0;
3925         return getIndirectReturnResult(Ty);
3926       }
3927       if (const auto *AT = getContext().getAsConstantArrayType(MTy))
3928         MTy = AT->getElementType();
3929       if (const auto *VT = MTy->getAs<VectorType>())
3930         if (getContext().getTypeSize(VT) > MaxVectorWidth)
3931           MaxVectorWidth = getContext().getTypeSize(VT);
3932       NeededInt += LocalNeededInt;
3933       NeededSSE += LocalNeededSSE;
3934     }
3935   }
3936
3937   return ABIArgInfo::getDirect();
3938 }
3939
3940 ABIArgInfo
3941 X86_64ABIInfo::classifyRegCallStructType(QualType Ty, unsigned &NeededInt,
3942                                          unsigned &NeededSSE,
3943                                          unsigned &MaxVectorWidth) const {
3944
3945   NeededInt = 0;
3946   NeededSSE = 0;
3947   MaxVectorWidth = 0;
3948
3949   return classifyRegCallStructTypeImpl(Ty, NeededInt, NeededSSE,
3950                                        MaxVectorWidth);
3951 }
3952
3953 void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
3954
3955   const unsigned CallingConv = FI.getCallingConvention();
3956   // It is possible to force Win64 calling convention on any x86_64 target by
3957   // using __attribute__((ms_abi)). In such case to correctly emit Win64
3958   // compatible code delegate this call to WinX86_64ABIInfo::computeInfo.
3959   if (CallingConv == llvm::CallingConv::Win64) {
3960     WinX86_64ABIInfo Win64ABIInfo(CGT, AVXLevel);
3961     Win64ABIInfo.computeInfo(FI);
3962     return;
3963   }
3964
3965   bool IsRegCall = CallingConv == llvm::CallingConv::X86_RegCall;
3966
3967   // Keep track of the number of assigned registers.
3968   unsigned FreeIntRegs = IsRegCall ? 11 : 6;
3969   unsigned FreeSSERegs = IsRegCall ? 16 : 8;
3970   unsigned NeededInt = 0, NeededSSE = 0, MaxVectorWidth = 0;
3971
3972   if (!::classifyReturnType(getCXXABI(), FI, *this)) {
3973     if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() &&
3974         !FI.getReturnType()->getTypePtr()->isUnionType()) {
3975       FI.getReturnInfo() = classifyRegCallStructType(
3976           FI.getReturnType(), NeededInt, NeededSSE, MaxVectorWidth);
3977       if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) {
3978         FreeIntRegs -= NeededInt;
3979         FreeSSERegs -= NeededSSE;
3980       } else {
3981         FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType());
3982       }
3983     } else if (IsRegCall && FI.getReturnType()->getAs<ComplexType>() &&
3984                getContext().getCanonicalType(FI.getReturnType()
3985                                                  ->getAs<ComplexType>()
3986                                                  ->getElementType()) ==
3987                    getContext().LongDoubleTy)
3988       // Complex Long Double Type is passed in Memory when Regcall
3989       // calling convention is used.
3990       FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType());
3991     else
3992       FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
3993   }
3994
3995   // If the return value is indirect, then the hidden argument is consuming one
3996   // integer register.
3997   if (FI.getReturnInfo().isIndirect())
3998     --FreeIntRegs;
3999   else if (NeededSSE && MaxVectorWidth > 0)
4000     FI.setMaxVectorWidth(MaxVectorWidth);
4001
4002   // The chain argument effectively gives us another free register.
4003   if (FI.isChainCall())
4004     ++FreeIntRegs;
4005
4006   unsigned NumRequiredArgs = FI.getNumRequiredArgs();
4007   // AMD64-ABI 3.2.3p3: Once arguments are classified, the registers
4008   // get assigned (in left-to-right order) for passing as follows...
4009   unsigned ArgNo = 0;
4010   for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
4011        it != ie; ++it, ++ArgNo) {
4012     bool IsNamedArg = ArgNo < NumRequiredArgs;
4013
4014     if (IsRegCall && it->type->isStructureOrClassType())
4015       it->info = classifyRegCallStructType(it->type, NeededInt, NeededSSE,
4016                                            MaxVectorWidth);
4017     else
4018       it->info = classifyArgumentType(it->type, FreeIntRegs, NeededInt,
4019                                       NeededSSE, IsNamedArg);
4020
4021     // AMD64-ABI 3.2.3p3: If there are no registers available for any
4022     // eightbyte of an argument, the whole argument is passed on the
4023     // stack. If registers have already been assigned for some
4024     // eightbytes of such an argument, the assignments get reverted.
4025     if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) {
4026       FreeIntRegs -= NeededInt;
4027       FreeSSERegs -= NeededSSE;
4028       if (MaxVectorWidth > FI.getMaxVectorWidth())
4029         FI.setMaxVectorWidth(MaxVectorWidth);
4030     } else {
4031       it->info = getIndirectResult(it->type, FreeIntRegs);
4032     }
4033   }
4034 }
4035
4036 static Address EmitX86_64VAArgFromMemory(CodeGenFunction &CGF,
4037                                          Address VAListAddr, QualType Ty) {
4038   Address overflow_arg_area_p =
4039       CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_p");
4040   llvm::Value *overflow_arg_area =
4041     CGF.Builder.CreateLoad(overflow_arg_area_p, "overflow_arg_area");
4042
4043   // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16
4044   // byte boundary if alignment needed by type exceeds 8 byte boundary.
4045   // It isn't stated explicitly in the standard, but in practice we use
4046   // alignment greater than 16 where necessary.
4047   CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty);
4048   if (Align > CharUnits::fromQuantity(8)) {
4049     overflow_arg_area = emitRoundPointerUpToAlignment(CGF, overflow_arg_area,
4050                                                       Align);
4051   }
4052
4053   // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area.
4054   llvm::Type *LTy = CGF.ConvertTypeForMem(Ty);
4055   llvm::Value *Res =
4056     CGF.Builder.CreateBitCast(overflow_arg_area,
4057                               llvm::PointerType::getUnqual(LTy));
4058
4059   // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to:
4060   // l->overflow_arg_area + sizeof(type).
4061   // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to
4062   // an 8 byte boundary.
4063
4064   uint64_t SizeInBytes = (CGF.getContext().getTypeSize(Ty) + 7) / 8;
4065   llvm::Value *Offset =
4066       llvm::ConstantInt::get(CGF.Int32Ty, (SizeInBytes + 7)  & ~7);
4067   overflow_arg_area = CGF.Builder.CreateGEP(CGF.Int8Ty, overflow_arg_area,
4068                                             Offset, "overflow_arg_area.next");
4069   CGF.Builder.CreateStore(overflow_arg_area, overflow_arg_area_p);
4070
4071   // AMD64-ABI 3.5.7p5: Step 11. Return the fetched type.
4072   return Address(Res, LTy, Align);
4073 }
4074
4075 Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
4076                                  QualType Ty) const {
4077   // Assume that va_list type is correct; should be pointer to LLVM type:
4078   // struct {
4079   //   i32 gp_offset;
4080   //   i32 fp_offset;
4081   //   i8* overflow_arg_area;
4082   //   i8* reg_save_area;
4083   // };
4084   unsigned neededInt, neededSSE;
4085
4086   Ty = getContext().getCanonicalType(Ty);
4087   ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE,
4088                                        /*isNamedArg*/false);
4089
4090   // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
4091   // in the registers. If not go to step 7.
4092   if (!neededInt && !neededSSE)
4093     return EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty);
4094
4095   // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of
4096   // general purpose registers needed to pass type and num_fp to hold
4097   // the number of floating point registers needed.
4098
4099   // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into
4100   // registers. In the case: l->gp_offset > 48 - num_gp * 8 or
4101   // l->fp_offset > 304 - num_fp * 16 go to step 7.
4102   //
4103   // NOTE: 304 is a typo, there are (6 * 8 + 8 * 16) = 176 bytes of
4104   // register save space).
4105
4106   llvm::Value *InRegs = nullptr;
4107   Address gp_offset_p = Address::invalid(), fp_offset_p = Address::invalid();
4108   llvm::Value *gp_offset = nullptr, *fp_offset = nullptr;
4109   if (neededInt) {
4110     gp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "gp_offset_p");
4111     gp_offset = CGF.Builder.CreateLoad(gp_offset_p, "gp_offset");
4112     InRegs = llvm::ConstantInt::get(CGF.Int32Ty, 48 - neededInt * 8);
4113     InRegs = CGF.Builder.CreateICmpULE(gp_offset, InRegs, "fits_in_gp");
4114   }
4115
4116   if (neededSSE) {
4117     fp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 1, "fp_offset_p");
4118     fp_offset = CGF.Builder.CreateLoad(fp_offset_p, "fp_offset");
4119     llvm::Value *FitsInFP =
4120       llvm::ConstantInt::get(CGF.Int32Ty, 176 - neededSSE * 16);
4121     FitsInFP = CGF.Builder.CreateICmpULE(fp_offset, FitsInFP, "fits_in_fp");
4122     InRegs = InRegs ? CGF.Builder.CreateAnd(InRegs, FitsInFP) : FitsInFP;
4123   }
4124
4125   llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
4126   llvm::BasicBlock *InMemBlock = CGF.createBasicBlock("vaarg.in_mem");
4127   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
4128   CGF.Builder.CreateCondBr(InRegs, InRegBlock, InMemBlock);
4129
4130   // Emit code to load the value if it was passed in registers.
4131
4132   CGF.EmitBlock(InRegBlock);
4133
4134   // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with
4135   // an offset of l->gp_offset and/or l->fp_offset. This may require
4136   // copying to a temporary location in case the parameter is passed
4137   // in different register classes or requires an alignment greater
4138   // than 8 for general purpose registers and 16 for XMM registers.
4139   //
4140   // FIXME: This really results in shameful code when we end up needing to
4141   // collect arguments from different places; often what should result in a
4142   // simple assembling of a structure from scattered addresses has many more
4143   // loads than necessary. Can we clean this up?
4144   llvm::Type *LTy = CGF.ConvertTypeForMem(Ty);
4145   llvm::Value *RegSaveArea = CGF.Builder.CreateLoad(
4146       CGF.Builder.CreateStructGEP(VAListAddr, 3), "reg_save_area");
4147
4148   Address RegAddr = Address::invalid();
4149   if (neededInt && neededSSE) {
4150     // FIXME: Cleanup.
4151     assert(AI.isDirect() && "Unexpected ABI info for mixed regs");
4152     llvm::StructType *ST = cast<llvm::StructType>(AI.getCoerceToType());
4153     Address Tmp = CGF.CreateMemTemp(Ty);
4154     Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST);
4155     assert(ST->getNumElements() == 2 && "Unexpected ABI info for mixed regs");
4156     llvm::Type *TyLo = ST->getElementType(0);
4157     llvm::Type *TyHi = ST->getElementType(1);
4158     assert((TyLo->isFPOrFPVectorTy() ^ TyHi->isFPOrFPVectorTy()) &&
4159            "Unexpected ABI info for mixed regs");
4160     llvm::Type *PTyLo = llvm::PointerType::getUnqual(TyLo);
4161     llvm::Type *PTyHi = llvm::PointerType::getUnqual(TyHi);
4162     llvm::Value *GPAddr =
4163         CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, gp_offset);
4164     llvm::Value *FPAddr =
4165         CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, fp_offset);
4166     llvm::Value *RegLoAddr = TyLo->isFPOrFPVectorTy() ? FPAddr : GPAddr;
4167     llvm::Value *RegHiAddr = TyLo->isFPOrFPVectorTy() ? GPAddr : FPAddr;
4168
4169     // Copy the first element.
4170     // FIXME: Our choice of alignment here and below is probably pessimistic.
4171     llvm::Value *V = CGF.Builder.CreateAlignedLoad(
4172         TyLo, CGF.Builder.CreateBitCast(RegLoAddr, PTyLo),
4173         CharUnits::fromQuantity(getDataLayout().getABITypeAlign(TyLo)));
4174     CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0));
4175
4176     // Copy the second element.
4177     V = CGF.Builder.CreateAlignedLoad(
4178         TyHi, CGF.Builder.CreateBitCast(RegHiAddr, PTyHi),
4179         CharUnits::fromQuantity(getDataLayout().getABITypeAlign(TyHi)));
4180     CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1));
4181
4182     RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy);
4183   } else if (neededInt) {
4184     RegAddr = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, gp_offset),
4185                       CGF.Int8Ty, CharUnits::fromQuantity(8));
4186     RegAddr = CGF.Builder.CreateElementBitCast(RegAddr, LTy);
4187
4188     // Copy to a temporary if necessary to ensure the appropriate alignment.
4189     auto TInfo = getContext().getTypeInfoInChars(Ty);
4190     uint64_t TySize = TInfo.Width.getQuantity();
4191     CharUnits TyAlign = TInfo.Align;
4192
4193     // Copy into a temporary if the type is more aligned than the
4194     // register save area.
4195     if (TyAlign.getQuantity() > 8) {
4196       Address Tmp = CGF.CreateMemTemp(Ty);
4197       CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, false);
4198       RegAddr = Tmp;
4199     }
4200
4201   } else if (neededSSE == 1) {
4202     RegAddr = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, fp_offset),
4203                       CGF.Int8Ty, CharUnits::fromQuantity(16));
4204     RegAddr = CGF.Builder.CreateElementBitCast(RegAddr, LTy);
4205   } else {
4206     assert(neededSSE == 2 && "Invalid number of needed registers!");
4207     // SSE registers are spaced 16 bytes apart in the register save
4208     // area, we need to collect the two eightbytes together.
4209     // The ABI isn't explicit about this, but it seems reasonable
4210     // to assume that the slots are 16-byte aligned, since the stack is
4211     // naturally 16-byte aligned and the prologue is expected to store
4212     // all the SSE registers to the RSA.
4213     Address RegAddrLo = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea,
4214                                                       fp_offset),
4215                                 CGF.Int8Ty, CharUnits::fromQuantity(16));
4216     Address RegAddrHi =
4217       CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo,
4218                                              CharUnits::fromQuantity(16));
4219     llvm::Type *ST = AI.canHaveCoerceToType()
4220                          ? AI.getCoerceToType()
4221                          : llvm::StructType::get(CGF.DoubleTy, CGF.DoubleTy);
4222     llvm::Value *V;
4223     Address Tmp = CGF.CreateMemTemp(Ty);
4224     Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST);
4225     V = CGF.Builder.CreateLoad(CGF.Builder.CreateElementBitCast(
4226         RegAddrLo, ST->getStructElementType(0)));
4227     CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0));
4228     V = CGF.Builder.CreateLoad(CGF.Builder.CreateElementBitCast(
4229         RegAddrHi, ST->getStructElementType(1)));
4230     CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1));
4231
4232     RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy);
4233   }
4234
4235   // AMD64-ABI 3.5.7p5: Step 5. Set:
4236   // l->gp_offset = l->gp_offset + num_gp * 8
4237   // l->fp_offset = l->fp_offset + num_fp * 16.
4238   if (neededInt) {
4239     llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededInt * 8);
4240     CGF.Builder.CreateStore(CGF.Builder.CreateAdd(gp_offset, Offset),
4241                             gp_offset_p);
4242   }
4243   if (neededSSE) {
4244     llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededSSE * 16);
4245     CGF.Builder.CreateStore(CGF.Builder.CreateAdd(fp_offset, Offset),
4246                             fp_offset_p);
4247   }
4248   CGF.EmitBranch(ContBlock);
4249
4250   // Emit code to load the value if it was passed in memory.
4251
4252   CGF.EmitBlock(InMemBlock);
4253   Address MemAddr = EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty);
4254
4255   // Return the appropriate result.
4256
4257   CGF.EmitBlock(ContBlock);
4258   Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, MemAddr, InMemBlock,
4259                                  "vaarg.addr");
4260   return ResAddr;
4261 }
4262
4263 Address X86_64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
4264                                    QualType Ty) const {
4265   // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
4266   // not 1, 2, 4, or 8 bytes, must be passed by reference."
4267   uint64_t Width = getContext().getTypeSize(Ty);
4268   bool IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width);
4269
4270   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
4271                           CGF.getContext().getTypeInfoInChars(Ty),
4272                           CharUnits::fromQuantity(8),
4273                           /*allowHigherAlign*/ false);
4274 }
4275
4276 ABIArgInfo WinX86_64ABIInfo::reclassifyHvaArgForVectorCall(
4277     QualType Ty, unsigned &FreeSSERegs, const ABIArgInfo &current) const {
4278   const Type *Base = nullptr;
4279   uint64_t NumElts = 0;
4280
4281   if (!Ty->isBuiltinType() && !Ty->isVectorType() &&
4282       isHomogeneousAggregate(Ty, Base, NumElts) && FreeSSERegs >= NumElts) {
4283     FreeSSERegs -= NumElts;
4284     return getDirectX86Hva();
4285   }
4286   return current;
4287 }
4288
4289 ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs,
4290                                       bool IsReturnType, bool IsVectorCall,
4291                                       bool IsRegCall) const {
4292
4293   if (Ty->isVoidType())
4294     return ABIArgInfo::getIgnore();
4295
4296   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
4297     Ty = EnumTy->getDecl()->getIntegerType();
4298
4299   TypeInfo Info = getContext().getTypeInfo(Ty);
4300   uint64_t Width = Info.Width;
4301   CharUnits Align = getContext().toCharUnitsFromBits(Info.Align);
4302
4303   const RecordType *RT = Ty->getAs<RecordType>();
4304   if (RT) {
4305     if (!IsReturnType) {
4306       if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()))
4307         return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
4308     }
4309
4310     if (RT->getDecl()->hasFlexibleArrayMember())
4311       return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
4312
4313   }
4314
4315   const Type *Base = nullptr;
4316   uint64_t NumElts = 0;
4317   // vectorcall adds the concept of a homogenous vector aggregate, similar to
4318   // other targets.
4319   if ((IsVectorCall || IsRegCall) &&
4320       isHomogeneousAggregate(Ty, Base, NumElts)) {
4321     if (IsRegCall) {
4322       if (FreeSSERegs >= NumElts) {
4323         FreeSSERegs -= NumElts;
4324         if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())
4325           return ABIArgInfo::getDirect();
4326         return ABIArgInfo::getExpand();
4327       }
4328       return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
4329     } else if (IsVectorCall) {
4330       if (FreeSSERegs >= NumElts &&
4331           (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())) {
4332         FreeSSERegs -= NumElts;
4333         return ABIArgInfo::getDirect();
4334       } else if (IsReturnType) {
4335         return ABIArgInfo::getExpand();
4336       } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) {
4337         // HVAs are delayed and reclassified in the 2nd step.
4338         return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
4339       }
4340     }
4341   }
4342
4343   if (Ty->isMemberPointerType()) {
4344     // If the member pointer is represented by an LLVM int or ptr, pass it
4345     // directly.
4346     llvm::Type *LLTy = CGT.ConvertType(Ty);
4347     if (LLTy->isPointerTy() || LLTy->isIntegerTy())
4348       return ABIArgInfo::getDirect();
4349   }
4350
4351   if (RT || Ty->isAnyComplexType() || Ty->isMemberPointerType()) {
4352     // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
4353     // not 1, 2, 4, or 8 bytes, must be passed by reference."
4354     if (Width > 64 || !llvm::isPowerOf2_64(Width))
4355       return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
4356
4357     // Otherwise, coerce it to a small integer.
4358     return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Width));
4359   }
4360
4361   if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
4362     switch (BT->getKind()) {
4363     case BuiltinType::Bool:
4364       // Bool type is always extended to the ABI, other builtin types are not
4365       // extended.
4366       return ABIArgInfo::getExtend(Ty);
4367
4368     case BuiltinType::LongDouble:
4369       // Mingw64 GCC uses the old 80 bit extended precision floating point
4370       // unit. It passes them indirectly through memory.
4371       if (IsMingw64) {
4372         const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
4373         if (LDF == &llvm::APFloat::x87DoubleExtended())
4374           return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
4375       }
4376       break;
4377
4378     case BuiltinType::Int128:
4379     case BuiltinType::UInt128:
4380       // If it's a parameter type, the normal ABI rule is that arguments larger
4381       // than 8 bytes are passed indirectly. GCC follows it. We follow it too,
4382       // even though it isn't particularly efficient.
4383       if (!IsReturnType)
4384         return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
4385
4386       // Mingw64 GCC returns i128 in XMM0. Coerce to v2i64 to handle that.
4387       // Clang matches them for compatibility.
4388       return ABIArgInfo::getDirect(llvm::FixedVectorType::get(
4389           llvm::Type::getInt64Ty(getVMContext()), 2));
4390
4391     default:
4392       break;
4393     }
4394   }
4395
4396   if (Ty->isBitIntType()) {
4397     // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
4398     // not 1, 2, 4, or 8 bytes, must be passed by reference."
4399     // However, non-power-of-two bit-precise integers will be passed as 1, 2, 4,
4400     // or 8 bytes anyway as long is it fits in them, so we don't have to check
4401     // the power of 2.
4402     if (Width <= 64)
4403       return ABIArgInfo::getDirect();
4404     return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
4405   }
4406
4407   return ABIArgInfo::getDirect();
4408 }
4409
4410 void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
4411   const unsigned CC = FI.getCallingConvention();
4412   bool IsVectorCall = CC == llvm::CallingConv::X86_VectorCall;
4413   bool IsRegCall = CC == llvm::CallingConv::X86_RegCall;
4414
4415   // If __attribute__((sysv_abi)) is in use, use the SysV argument
4416   // classification rules.
4417   if (CC == llvm::CallingConv::X86_64_SysV) {
4418     X86_64ABIInfo SysVABIInfo(CGT, AVXLevel);
4419     SysVABIInfo.computeInfo(FI);
4420     return;
4421   }
4422
4423   unsigned FreeSSERegs = 0;
4424   if (IsVectorCall) {
4425     // We can use up to 4 SSE return registers with vectorcall.
4426     FreeSSERegs = 4;
4427   } else if (IsRegCall) {
4428     // RegCall gives us 16 SSE registers.
4429     FreeSSERegs = 16;
4430   }
4431
4432   if (!getCXXABI().classifyReturnType(FI))
4433     FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true,
4434                                   IsVectorCall, IsRegCall);
4435
4436   if (IsVectorCall) {
4437     // We can use up to 6 SSE register parameters with vectorcall.
4438     FreeSSERegs = 6;
4439   } else if (IsRegCall) {
4440     // RegCall gives us 16 SSE registers, we can reuse the return registers.
4441     FreeSSERegs = 16;
4442   }
4443
4444   unsigned ArgNum = 0;
4445   unsigned ZeroSSERegs = 0;
4446   for (auto &I : FI.arguments()) {
4447     // Vectorcall in x64 only permits the first 6 arguments to be passed as
4448     // XMM/YMM registers. After the sixth argument, pretend no vector
4449     // registers are left.
4450     unsigned *MaybeFreeSSERegs =
4451         (IsVectorCall && ArgNum >= 6) ? &ZeroSSERegs : &FreeSSERegs;
4452     I.info =
4453         classify(I.type, *MaybeFreeSSERegs, false, IsVectorCall, IsRegCall);
4454     ++ArgNum;
4455   }
4456
4457   if (IsVectorCall) {
4458     // For vectorcall, assign aggregate HVAs to any free vector registers in a
4459     // second pass.
4460     for (auto &I : FI.arguments())
4461       I.info = reclassifyHvaArgForVectorCall(I.type, FreeSSERegs, I.info);
4462   }
4463 }
4464
4465 Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
4466                                     QualType Ty) const {
4467   // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
4468   // not 1, 2, 4, or 8 bytes, must be passed by reference."
4469   uint64_t Width = getContext().getTypeSize(Ty);
4470   bool IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width);
4471
4472   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
4473                           CGF.getContext().getTypeInfoInChars(Ty),
4474                           CharUnits::fromQuantity(8),
4475                           /*allowHigherAlign*/ false);
4476 }
4477
4478 static bool PPC_initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
4479                                         llvm::Value *Address, bool Is64Bit,
4480                                         bool IsAIX) {
4481   // This is calculated from the LLVM and GCC tables and verified
4482   // against gcc output.  AFAIK all PPC ABIs use the same encoding.
4483
4484   CodeGen::CGBuilderTy &Builder = CGF.Builder;
4485
4486   llvm::IntegerType *i8 = CGF.Int8Ty;
4487   llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4);
4488   llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8);
4489   llvm::Value *Sixteen8 = llvm::ConstantInt::get(i8, 16);
4490
4491   // 0-31: r0-31, the 4-byte or 8-byte general-purpose registers
4492   AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 0, 31);
4493
4494   // 32-63: fp0-31, the 8-byte floating-point registers
4495   AssignToArrayRange(Builder, Address, Eight8, 32, 63);
4496
4497   // 64-67 are various 4-byte or 8-byte special-purpose registers:
4498   // 64: mq
4499   // 65: lr
4500   // 66: ctr
4501   // 67: ap
4502   AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 64, 67);
4503
4504   // 68-76 are various 4-byte special-purpose registers:
4505   // 68-75 cr0-7
4506   // 76: xer
4507   AssignToArrayRange(Builder, Address, Four8, 68, 76);
4508
4509   // 77-108: v0-31, the 16-byte vector registers
4510   AssignToArrayRange(Builder, Address, Sixteen8, 77, 108);
4511
4512   // 109: vrsave
4513   // 110: vscr
4514   AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 109, 110);
4515
4516   // AIX does not utilize the rest of the registers.
4517   if (IsAIX)
4518     return false;
4519
4520   // 111: spe_acc
4521   // 112: spefscr
4522   // 113: sfp
4523   AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 111, 113);
4524
4525   if (!Is64Bit)
4526     return false;
4527
4528   // TODO: Need to verify if these registers are used on 64 bit AIX with Power8
4529   // or above CPU.
4530   // 64-bit only registers:
4531   // 114: tfhar
4532   // 115: tfiar
4533   // 116: texasr
4534   AssignToArrayRange(Builder, Address, Eight8, 114, 116);
4535
4536   return false;
4537 }
4538
4539 // AIX
4540 namespace {
4541 /// AIXABIInfo - The AIX XCOFF ABI information.
4542 class AIXABIInfo : public ABIInfo {
4543   const bool Is64Bit;
4544   const unsigned PtrByteSize;
4545   CharUnits getParamTypeAlignment(QualType Ty) const;
4546
4547 public:
4548   AIXABIInfo(CodeGen::CodeGenTypes &CGT, bool Is64Bit)
4549       : ABIInfo(CGT), Is64Bit(Is64Bit), PtrByteSize(Is64Bit ? 8 : 4) {}
4550
4551   bool isPromotableTypeForABI(QualType Ty) const;
4552
4553   ABIArgInfo classifyReturnType(QualType RetTy) const;
4554   ABIArgInfo classifyArgumentType(QualType Ty) const;
4555
4556   void computeInfo(CGFunctionInfo &FI) const override {
4557     if (!getCXXABI().classifyReturnType(FI))
4558       FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
4559
4560     for (auto &I : FI.arguments())
4561       I.info = classifyArgumentType(I.type);
4562   }
4563
4564   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
4565                     QualType Ty) const override;
4566 };
4567
4568 class AIXTargetCodeGenInfo : public TargetCodeGenInfo {
4569   const bool Is64Bit;
4570
4571 public:
4572   AIXTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool Is64Bit)
4573       : TargetCodeGenInfo(std::make_unique<AIXABIInfo>(CGT, Is64Bit)),
4574         Is64Bit(Is64Bit) {}
4575   int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
4576     return 1; // r1 is the dedicated stack pointer
4577   }
4578
4579   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
4580                                llvm::Value *Address) const override;
4581 };
4582 } // namespace
4583
4584 // Return true if the ABI requires Ty to be passed sign- or zero-
4585 // extended to 32/64 bits.
4586 bool AIXABIInfo::isPromotableTypeForABI(QualType Ty) const {
4587   // Treat an enum type as its underlying type.
4588   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
4589     Ty = EnumTy->getDecl()->getIntegerType();
4590
4591   // Promotable integer types are required to be promoted by the ABI.
4592   if (getContext().isPromotableIntegerType(Ty))
4593     return true;
4594
4595   if (!Is64Bit)
4596     return false;
4597
4598   // For 64 bit mode, in addition to the usual promotable integer types, we also
4599   // need to extend all 32-bit types, since the ABI requires promotion to 64
4600   // bits.
4601   if (const BuiltinType *BT = Ty->getAs<BuiltinType>())
4602     switch (BT->getKind()) {
4603     case BuiltinType::Int:
4604     case BuiltinType::UInt:
4605       return true;
4606     default:
4607       break;
4608     }
4609
4610   return false;
4611 }
4612
4613 ABIArgInfo AIXABIInfo::classifyReturnType(QualType RetTy) const {
4614   if (RetTy->isAnyComplexType())
4615     return ABIArgInfo::getDirect();
4616
4617   if (RetTy->isVectorType())
4618     return ABIArgInfo::getDirect();
4619
4620   if (RetTy->isVoidType())
4621     return ABIArgInfo::getIgnore();
4622
4623   if (isAggregateTypeForABI(RetTy))
4624     return getNaturalAlignIndirect(RetTy);
4625
4626   return (isPromotableTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
4627                                         : ABIArgInfo::getDirect());
4628 }
4629
4630 ABIArgInfo AIXABIInfo::classifyArgumentType(QualType Ty) const {
4631   Ty = useFirstFieldIfTransparentUnion(Ty);
4632
4633   if (Ty->isAnyComplexType())
4634     return ABIArgInfo::getDirect();
4635
4636   if (Ty->isVectorType())
4637     return ABIArgInfo::getDirect();
4638
4639   if (isAggregateTypeForABI(Ty)) {
4640     // Records with non-trivial destructors/copy-constructors should not be
4641     // passed by value.
4642     if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
4643       return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
4644
4645     CharUnits CCAlign = getParamTypeAlignment(Ty);
4646     CharUnits TyAlign = getContext().getTypeAlignInChars(Ty);
4647
4648     return ABIArgInfo::getIndirect(CCAlign, /*ByVal*/ true,
4649                                    /*Realign*/ TyAlign > CCAlign);
4650   }
4651
4652   return (isPromotableTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
4653                                      : ABIArgInfo::getDirect());
4654 }
4655
4656 CharUnits AIXABIInfo::getParamTypeAlignment(QualType Ty) const {
4657   // Complex types are passed just like their elements.
4658   if (const ComplexType *CTy = Ty->getAs<ComplexType>())
4659     Ty = CTy->getElementType();
4660
4661   if (Ty->isVectorType())
4662     return CharUnits::fromQuantity(16);
4663
4664   // If the structure contains a vector type, the alignment is 16.
4665   if (isRecordWithSIMDVectorType(getContext(), Ty))
4666     return CharUnits::fromQuantity(16);
4667
4668   return CharUnits::fromQuantity(PtrByteSize);
4669 }
4670
4671 Address AIXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
4672                               QualType Ty) const {
4673
4674   auto TypeInfo = getContext().getTypeInfoInChars(Ty);
4675   TypeInfo.Align = getParamTypeAlignment(Ty);
4676
4677   CharUnits SlotSize = CharUnits::fromQuantity(PtrByteSize);
4678
4679   // If we have a complex type and the base type is smaller than the register
4680   // size, the ABI calls for the real and imaginary parts to be right-adjusted
4681   // in separate words in 32bit mode or doublewords in 64bit mode. However,
4682   // Clang expects us to produce a pointer to a structure with the two parts
4683   // packed tightly. So generate loads of the real and imaginary parts relative
4684   // to the va_list pointer, and store them to a temporary structure. We do the
4685   // same as the PPC64ABI here.
4686   if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
4687     CharUnits EltSize = TypeInfo.Width / 2;
4688     if (EltSize < SlotSize)
4689       return complexTempStructure(CGF, VAListAddr, Ty, SlotSize, EltSize, CTy);
4690   }
4691
4692   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, TypeInfo,
4693                           SlotSize, /*AllowHigher*/ true);
4694 }
4695
4696 bool AIXTargetCodeGenInfo::initDwarfEHRegSizeTable(
4697     CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const {
4698   return PPC_initDwarfEHRegSizeTable(CGF, Address, Is64Bit, /*IsAIX*/ true);
4699 }
4700
4701 // PowerPC-32
4702 namespace {
4703 /// PPC32_SVR4_ABIInfo - The 32-bit PowerPC ELF (SVR4) ABI information.
4704 class PPC32_SVR4_ABIInfo : public DefaultABIInfo {
4705   bool IsSoftFloatABI;
4706   bool IsRetSmallStructInRegABI;
4707
4708   CharUnits getParamTypeAlignment(QualType Ty) const;
4709
4710 public:
4711   PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, bool SoftFloatABI,
4712                      bool RetSmallStructInRegABI)
4713       : DefaultABIInfo(CGT), IsSoftFloatABI(SoftFloatABI),
4714         IsRetSmallStructInRegABI(RetSmallStructInRegABI) {}
4715
4716   ABIArgInfo classifyReturnType(QualType RetTy) const;
4717
4718   void computeInfo(CGFunctionInfo &FI) const override {
4719     if (!getCXXABI().classifyReturnType(FI))
4720       FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
4721     for (auto &I : FI.arguments())
4722       I.info = classifyArgumentType(I.type);
4723   }
4724
4725   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
4726                     QualType Ty) const override;
4727 };
4728
4729 class PPC32TargetCodeGenInfo : public TargetCodeGenInfo {
4730 public:
4731   PPC32TargetCodeGenInfo(CodeGenTypes &CGT, bool SoftFloatABI,
4732                          bool RetSmallStructInRegABI)
4733       : TargetCodeGenInfo(std::make_unique<PPC32_SVR4_ABIInfo>(
4734             CGT, SoftFloatABI, RetSmallStructInRegABI)) {}
4735
4736   static bool isStructReturnInRegABI(const llvm::Triple &Triple,
4737                                      const CodeGenOptions &Opts);
4738
4739   int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
4740     // This is recovered from gcc output.
4741     return 1; // r1 is the dedicated stack pointer
4742   }
4743
4744   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
4745                                llvm::Value *Address) const override;
4746 };
4747 }
4748
4749 CharUnits PPC32_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const {
4750   // Complex types are passed just like their elements.
4751   if (const ComplexType *CTy = Ty->getAs<ComplexType>())
4752     Ty = CTy->getElementType();
4753
4754   if (Ty->isVectorType())
4755     return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128 ? 16
4756                                                                        : 4);
4757
4758   // For single-element float/vector structs, we consider the whole type
4759   // to have the same alignment requirements as its single element.
4760   const Type *AlignTy = nullptr;
4761   if (const Type *EltType = isSingleElementStruct(Ty, getContext())) {
4762     const BuiltinType *BT = EltType->getAs<BuiltinType>();
4763     if ((EltType->isVectorType() && getContext().getTypeSize(EltType) == 128) ||
4764         (BT && BT->isFloatingPoint()))
4765       AlignTy = EltType;
4766   }
4767
4768   if (AlignTy)
4769     return CharUnits::fromQuantity(AlignTy->isVectorType() ? 16 : 4);
4770   return CharUnits::fromQuantity(4);
4771 }
4772
4773 ABIArgInfo PPC32_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const {
4774   uint64_t Size;
4775
4776   // -msvr4-struct-return puts small aggregates in GPR3 and GPR4.
4777   if (isAggregateTypeForABI(RetTy) && IsRetSmallStructInRegABI &&
4778       (Size = getContext().getTypeSize(RetTy)) <= 64) {
4779     // System V ABI (1995), page 3-22, specified:
4780     // > A structure or union whose size is less than or equal to 8 bytes
4781     // > shall be returned in r3 and r4, as if it were first stored in the
4782     // > 8-byte aligned memory area and then the low addressed word were
4783     // > loaded into r3 and the high-addressed word into r4.  Bits beyond
4784     // > the last member of the structure or union are not defined.
4785     //
4786     // GCC for big-endian PPC32 inserts the pad before the first member,
4787     // not "beyond the last member" of the struct.  To stay compatible
4788     // with GCC, we coerce the struct to an integer of the same size.
4789     // LLVM will extend it and return i32 in r3, or i64 in r3:r4.
4790     if (Size == 0)
4791       return ABIArgInfo::getIgnore();
4792     else {
4793       llvm::Type *CoerceTy = llvm::Type::getIntNTy(getVMContext(), Size);
4794       return ABIArgInfo::getDirect(CoerceTy);
4795     }
4796   }
4797
4798   return DefaultABIInfo::classifyReturnType(RetTy);
4799 }
4800
4801 // TODO: this implementation is now likely redundant with
4802 // DefaultABIInfo::EmitVAArg.
4803 Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList,
4804                                       QualType Ty) const {
4805   if (getTarget().getTriple().isOSDarwin()) {
4806     auto TI = getContext().getTypeInfoInChars(Ty);
4807     TI.Align = getParamTypeAlignment(Ty);
4808
4809     CharUnits SlotSize = CharUnits::fromQuantity(4);
4810     return emitVoidPtrVAArg(CGF, VAList, Ty,
4811                             classifyArgumentType(Ty).isIndirect(), TI, SlotSize,
4812                             /*AllowHigherAlign=*/true);
4813   }
4814
4815   const unsigned OverflowLimit = 8;
4816   if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
4817     // TODO: Implement this. For now ignore.
4818     (void)CTy;
4819     return Address::invalid(); // FIXME?
4820   }
4821
4822   // struct __va_list_tag {
4823   //   unsigned char gpr;
4824   //   unsigned char fpr;
4825   //   unsigned short reserved;
4826   //   void *overflow_arg_area;
4827   //   void *reg_save_area;
4828   // };
4829
4830   bool isI64 = Ty->isIntegerType() && getContext().getTypeSize(Ty) == 64;
4831   bool isInt = !Ty->isFloatingType();
4832   bool isF64 = Ty->isFloatingType() && getContext().getTypeSize(Ty) == 64;
4833
4834   // All aggregates are passed indirectly?  That doesn't seem consistent
4835   // with the argument-lowering code.
4836   bool isIndirect = isAggregateTypeForABI(Ty);
4837
4838   CGBuilderTy &Builder = CGF.Builder;
4839
4840   // The calling convention either uses 1-2 GPRs or 1 FPR.
4841   Address NumRegsAddr = Address::invalid();
4842   if (isInt || IsSoftFloatABI) {
4843     NumRegsAddr = Builder.CreateStructGEP(VAList, 0, "gpr");
4844   } else {
4845     NumRegsAddr = Builder.CreateStructGEP(VAList, 1, "fpr");
4846   }
4847
4848   llvm::Value *NumRegs = Builder.CreateLoad(NumRegsAddr, "numUsedRegs");
4849
4850   // "Align" the register count when TY is i64.
4851   if (isI64 || (isF64 && IsSoftFloatABI)) {
4852     NumRegs = Builder.CreateAdd(NumRegs, Builder.getInt8(1));
4853     NumRegs = Builder.CreateAnd(NumRegs, Builder.getInt8((uint8_t) ~1U));
4854   }
4855
4856   llvm::Value *CC =
4857       Builder.CreateICmpULT(NumRegs, Builder.getInt8(OverflowLimit), "cond");
4858
4859   llvm::BasicBlock *UsingRegs = CGF.createBasicBlock("using_regs");
4860   llvm::BasicBlock *UsingOverflow = CGF.createBasicBlock("using_overflow");
4861   llvm::BasicBlock *Cont = CGF.createBasicBlock("cont");
4862
4863   Builder.CreateCondBr(CC, UsingRegs, UsingOverflow);
4864
4865   llvm::Type *DirectTy = CGF.ConvertType(Ty), *ElementTy = DirectTy;
4866   if (isIndirect) DirectTy = DirectTy->getPointerTo(0);
4867
4868   // Case 1: consume registers.
4869   Address RegAddr = Address::invalid();
4870   {
4871     CGF.EmitBlock(UsingRegs);
4872
4873     Address RegSaveAreaPtr = Builder.CreateStructGEP(VAList, 4);
4874     RegAddr = Address(Builder.CreateLoad(RegSaveAreaPtr), CGF.Int8Ty,
4875                       CharUnits::fromQuantity(8));
4876     assert(RegAddr.getElementType() == CGF.Int8Ty);
4877
4878     // Floating-point registers start after the general-purpose registers.
4879     if (!(isInt || IsSoftFloatABI)) {
4880       RegAddr = Builder.CreateConstInBoundsByteGEP(RegAddr,
4881                                                    CharUnits::fromQuantity(32));
4882     }
4883
4884     // Get the address of the saved value by scaling the number of
4885     // registers we've used by the number of
4886     CharUnits RegSize = CharUnits::fromQuantity((isInt || IsSoftFloatABI) ? 4 : 8);
4887     llvm::Value *RegOffset =
4888         Builder.CreateMul(NumRegs, Builder.getInt8(RegSize.getQuantity()));
4889     RegAddr = Address(
4890         Builder.CreateInBoundsGEP(CGF.Int8Ty, RegAddr.getPointer(), RegOffset),
4891         CGF.Int8Ty, RegAddr.getAlignment().alignmentOfArrayElement(RegSize));
4892     RegAddr = Builder.CreateElementBitCast(RegAddr, DirectTy);
4893
4894     // Increase the used-register count.
4895     NumRegs =
4896       Builder.CreateAdd(NumRegs,
4897                         Builder.getInt8((isI64 || (isF64 && IsSoftFloatABI)) ? 2 : 1));
4898     Builder.CreateStore(NumRegs, NumRegsAddr);
4899
4900     CGF.EmitBranch(Cont);
4901   }
4902
4903   // Case 2: consume space in the overflow area.
4904   Address MemAddr = Address::invalid();
4905   {
4906     CGF.EmitBlock(UsingOverflow);
4907
4908     Builder.CreateStore(Builder.getInt8(OverflowLimit), NumRegsAddr);
4909
4910     // Everything in the overflow area is rounded up to a size of at least 4.
4911     CharUnits OverflowAreaAlign = CharUnits::fromQuantity(4);
4912
4913     CharUnits Size;
4914     if (!isIndirect) {
4915       auto TypeInfo = CGF.getContext().getTypeInfoInChars(Ty);
4916       Size = TypeInfo.Width.alignTo(OverflowAreaAlign);
4917     } else {
4918       Size = CGF.getPointerSize();
4919     }
4920
4921     Address OverflowAreaAddr = Builder.CreateStructGEP(VAList, 3);
4922     Address OverflowArea =
4923         Address(Builder.CreateLoad(OverflowAreaAddr, "argp.cur"), CGF.Int8Ty,
4924                 OverflowAreaAlign);
4925     // Round up address of argument to alignment
4926     CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty);
4927     if (Align > OverflowAreaAlign) {
4928       llvm::Value *Ptr = OverflowArea.getPointer();
4929       OverflowArea = Address(emitRoundPointerUpToAlignment(CGF, Ptr, Align),
4930                              OverflowArea.getElementType(), Align);
4931     }
4932
4933     MemAddr = Builder.CreateElementBitCast(OverflowArea, DirectTy);
4934
4935     // Increase the overflow area.
4936     OverflowArea = Builder.CreateConstInBoundsByteGEP(OverflowArea, Size);
4937     Builder.CreateStore(OverflowArea.getPointer(), OverflowAreaAddr);
4938     CGF.EmitBranch(Cont);
4939   }
4940
4941   CGF.EmitBlock(Cont);
4942
4943   // Merge the cases with a phi.
4944   Address Result = emitMergePHI(CGF, RegAddr, UsingRegs, MemAddr, UsingOverflow,
4945                                 "vaarg.addr");
4946
4947   // Load the pointer if the argument was passed indirectly.
4948   if (isIndirect) {
4949     Result = Address(Builder.CreateLoad(Result, "aggr"), ElementTy,
4950                      getContext().getTypeAlignInChars(Ty));
4951   }
4952
4953   return Result;
4954 }
4955
4956 bool PPC32TargetCodeGenInfo::isStructReturnInRegABI(
4957     const llvm::Triple &Triple, const CodeGenOptions &Opts) {
4958   assert(Triple.isPPC32());
4959
4960   switch (Opts.getStructReturnConvention()) {
4961   case CodeGenOptions::SRCK_Default:
4962     break;
4963   case CodeGenOptions::SRCK_OnStack: // -maix-struct-return
4964     return false;
4965   case CodeGenOptions::SRCK_InRegs: // -msvr4-struct-return
4966     return true;
4967   }
4968
4969   if (Triple.isOSBinFormatELF() && !Triple.isOSLinux())
4970     return true;
4971
4972   return false;
4973 }
4974
4975 bool
4976 PPC32TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
4977                                                 llvm::Value *Address) const {
4978   return PPC_initDwarfEHRegSizeTable(CGF, Address, /*Is64Bit*/ false,
4979                                      /*IsAIX*/ false);
4980 }
4981
4982 // PowerPC-64
4983
4984 namespace {
4985 enum class PPC64_SVR4_ABIKind {
4986   ELFv1 = 0,
4987   ELFv2,
4988 };
4989
4990 /// PPC64_SVR4_ABIInfo - The 64-bit PowerPC ELF (SVR4) ABI information.
4991 class PPC64_SVR4_ABIInfo : public ABIInfo {
4992   static const unsigned GPRBits = 64;
4993   PPC64_SVR4_ABIKind Kind;
4994   bool IsSoftFloatABI;
4995
4996 public:
4997   PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, PPC64_SVR4_ABIKind Kind,
4998                      bool SoftFloatABI)
4999       : ABIInfo(CGT), Kind(Kind), IsSoftFloatABI(SoftFloatABI) {}
5000
5001   bool isPromotableTypeForABI(QualType Ty) const;
5002   CharUnits getParamTypeAlignment(QualType Ty) const;
5003
5004   ABIArgInfo classifyReturnType(QualType RetTy) const;
5005   ABIArgInfo classifyArgumentType(QualType Ty) const;
5006
5007   bool isHomogeneousAggregateBaseType(QualType Ty) const override;
5008   bool isHomogeneousAggregateSmallEnough(const Type *Ty,
5009                                          uint64_t Members) const override;
5010
5011   // TODO: We can add more logic to computeInfo to improve performance.
5012   // Example: For aggregate arguments that fit in a register, we could
5013   // use getDirectInReg (as is done below for structs containing a single
5014   // floating-point value) to avoid pushing them to memory on function
5015   // entry.  This would require changing the logic in PPCISelLowering
5016   // when lowering the parameters in the caller and args in the callee.
5017   void computeInfo(CGFunctionInfo &FI) const override {
5018     if (!getCXXABI().classifyReturnType(FI))
5019       FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
5020     for (auto &I : FI.arguments()) {
5021       // We rely on the default argument classification for the most part.
5022       // One exception:  An aggregate containing a single floating-point
5023       // or vector item must be passed in a register if one is available.
5024       const Type *T = isSingleElementStruct(I.type, getContext());
5025       if (T) {
5026         const BuiltinType *BT = T->getAs<BuiltinType>();
5027         if ((T->isVectorType() && getContext().getTypeSize(T) == 128) ||
5028             (BT && BT->isFloatingPoint())) {
5029           QualType QT(T, 0);
5030           I.info = ABIArgInfo::getDirectInReg(CGT.ConvertType(QT));
5031           continue;
5032         }
5033       }
5034       I.info = classifyArgumentType(I.type);
5035     }
5036   }
5037
5038   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
5039                     QualType Ty) const override;
5040 };
5041
5042 class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo {
5043
5044 public:
5045   PPC64_SVR4_TargetCodeGenInfo(CodeGenTypes &CGT, PPC64_SVR4_ABIKind Kind,
5046                                bool SoftFloatABI)
5047       : TargetCodeGenInfo(
5048             std::make_unique<PPC64_SVR4_ABIInfo>(CGT, Kind, SoftFloatABI)) {
5049     SwiftInfo =
5050         std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/false);
5051   }
5052
5053   int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
5054     // This is recovered from gcc output.
5055     return 1; // r1 is the dedicated stack pointer
5056   }
5057
5058   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
5059                                llvm::Value *Address) const override;
5060 };
5061
5062 class PPC64TargetCodeGenInfo : public TargetCodeGenInfo {
5063 public:
5064   PPC64TargetCodeGenInfo(CodeGenTypes &CGT)
5065       : TargetCodeGenInfo(std::make_unique<DefaultABIInfo>(CGT)) {}
5066
5067   int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
5068     // This is recovered from gcc output.
5069     return 1; // r1 is the dedicated stack pointer
5070   }
5071
5072   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
5073                                llvm::Value *Address) const override;
5074 };
5075 }
5076
5077 // Return true if the ABI requires Ty to be passed sign- or zero-
5078 // extended to 64 bits.
5079 bool
5080 PPC64_SVR4_ABIInfo::isPromotableTypeForABI(QualType Ty) const {
5081   // Treat an enum type as its underlying type.
5082   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
5083     Ty = EnumTy->getDecl()->getIntegerType();
5084
5085   // Promotable integer types are required to be promoted by the ABI.
5086   if (isPromotableIntegerTypeForABI(Ty))
5087     return true;
5088
5089   // In addition to the usual promotable integer types, we also need to
5090   // extend all 32-bit types, since the ABI requires promotion to 64 bits.
5091   if (const BuiltinType *BT = Ty->getAs<BuiltinType>())
5092     switch (BT->getKind()) {
5093     case BuiltinType::Int:
5094     case BuiltinType::UInt:
5095       return true;
5096     default:
5097       break;
5098     }
5099
5100   if (const auto *EIT = Ty->getAs<BitIntType>())
5101     if (EIT->getNumBits() < 64)
5102       return true;
5103
5104   return false;
5105 }
5106
5107 /// isAlignedParamType - Determine whether a type requires 16-byte or
5108 /// higher alignment in the parameter area.  Always returns at least 8.
5109 CharUnits PPC64_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const {
5110   // Complex types are passed just like their elements.
5111   if (const ComplexType *CTy = Ty->getAs<ComplexType>())
5112     Ty = CTy->getElementType();
5113
5114   auto FloatUsesVector = [this](QualType Ty){
5115     return Ty->isRealFloatingType() && &getContext().getFloatTypeSemantics(
5116                                            Ty) == &llvm::APFloat::IEEEquad();
5117   };
5118
5119   // Only vector types of size 16 bytes need alignment (larger types are
5120   // passed via reference, smaller types are not aligned).
5121   if (Ty->isVectorType()) {
5122     return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128 ? 16 : 8);
5123   } else if (FloatUsesVector(Ty)) {
5124     // According to ABI document section 'Optional Save Areas': If extended
5125     // precision floating-point values in IEEE BINARY 128 QUADRUPLE PRECISION
5126     // format are supported, map them to a single quadword, quadword aligned.
5127     return CharUnits::fromQuantity(16);
5128   }
5129
5130   // For single-element float/vector structs, we consider the whole type
5131   // to have the same alignment requirements as its single element.
5132   const Type *AlignAsType = nullptr;
5133   const Type *EltType = isSingleElementStruct(Ty, getContext());
5134   if (EltType) {
5135     const BuiltinType *BT = EltType->getAs<BuiltinType>();
5136     if ((EltType->isVectorType() && getContext().getTypeSize(EltType) == 128) ||
5137         (BT && BT->isFloatingPoint()))
5138       AlignAsType = EltType;
5139   }
5140
5141   // Likewise for ELFv2 homogeneous aggregates.
5142   const Type *Base = nullptr;
5143   uint64_t Members = 0;
5144   if (!AlignAsType && Kind == PPC64_SVR4_ABIKind::ELFv2 &&
5145       isAggregateTypeForABI(Ty) && isHomogeneousAggregate(Ty, Base, Members))
5146     AlignAsType = Base;
5147
5148   // With special case aggregates, only vector base types need alignment.
5149   if (AlignAsType) {
5150     bool UsesVector = AlignAsType->isVectorType() ||
5151                       FloatUsesVector(QualType(AlignAsType, 0));
5152     return CharUnits::fromQuantity(UsesVector ? 16 : 8);
5153   }
5154
5155   // Otherwise, we only need alignment for any aggregate type that
5156   // has an alignment requirement of >= 16 bytes.
5157   if (isAggregateTypeForABI(Ty) && getContext().getTypeAlign(Ty) >= 128) {
5158     return CharUnits::fromQuantity(16);
5159   }
5160
5161   return CharUnits::fromQuantity(8);
5162 }
5163
5164 /// isHomogeneousAggregate - Return true if a type is an ELFv2 homogeneous
5165 /// aggregate.  Base is set to the base element type, and Members is set
5166 /// to the number of base elements.
5167 bool ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base,
5168                                      uint64_t &Members) const {
5169   if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) {
5170     uint64_t NElements = AT->getSize().getZExtValue();
5171     if (NElements == 0)
5172       return false;
5173     if (!isHomogeneousAggregate(AT->getElementType(), Base, Members))
5174       return false;
5175     Members *= NElements;
5176   } else if (const RecordType *RT = Ty->getAs<RecordType>()) {
5177     const RecordDecl *RD = RT->getDecl();
5178     if (RD->hasFlexibleArrayMember())
5179       return false;
5180
5181     Members = 0;
5182
5183     // If this is a C++ record, check the properties of the record such as
5184     // bases and ABI specific restrictions
5185     if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
5186       if (!getCXXABI().isPermittedToBeHomogeneousAggregate(CXXRD))
5187         return false;
5188
5189       for (const auto &I : CXXRD->bases()) {
5190         // Ignore empty records.
5191         if (isEmptyRecord(getContext(), I.getType(), true))
5192           continue;
5193
5194         uint64_t FldMembers;
5195         if (!isHomogeneousAggregate(I.getType(), Base, FldMembers))
5196           return false;
5197
5198         Members += FldMembers;
5199       }
5200     }
5201
5202     for (const auto *FD : RD->fields()) {
5203       // Ignore (non-zero arrays of) empty records.
5204       QualType FT = FD->getType();
5205       while (const ConstantArrayType *AT =
5206              getContext().getAsConstantArrayType(FT)) {
5207         if (AT->getSize().getZExtValue() == 0)
5208           return false;
5209         FT = AT->getElementType();
5210       }
5211       if (isEmptyRecord(getContext(), FT, true))
5212         continue;
5213
5214       if (isZeroLengthBitfieldPermittedInHomogeneousAggregate() &&
5215           FD->isZeroLengthBitField(getContext()))
5216         continue;
5217
5218       uint64_t FldMembers;
5219       if (!isHomogeneousAggregate(FD->getType(), Base, FldMembers))
5220         return false;
5221
5222       Members = (RD->isUnion() ?
5223                  std::max(Members, FldMembers) : Members + FldMembers);
5224     }
5225
5226     if (!Base)
5227       return false;
5228
5229     // Ensure there is no padding.
5230     if (getContext().getTypeSize(Base) * Members !=
5231         getContext().getTypeSize(Ty))
5232       return false;
5233   } else {
5234     Members = 1;
5235     if (const ComplexType *CT = Ty->getAs<ComplexType>()) {
5236       Members = 2;
5237       Ty = CT->getElementType();
5238     }
5239
5240     // Most ABIs only support float, double, and some vector type widths.
5241     if (!isHomogeneousAggregateBaseType(Ty))
5242       return false;
5243
5244     // The base type must be the same for all members.  Types that
5245     // agree in both total size and mode (float vs. vector) are
5246     // treated as being equivalent here.
5247     const Type *TyPtr = Ty.getTypePtr();
5248     if (!Base) {
5249       Base = TyPtr;
5250       // If it's a non-power-of-2 vector, its size is already a power-of-2,
5251       // so make sure to widen it explicitly.
5252       if (const VectorType *VT = Base->getAs<VectorType>()) {
5253         QualType EltTy = VT->getElementType();
5254         unsigned NumElements =
5255             getContext().getTypeSize(VT) / getContext().getTypeSize(EltTy);
5256         Base = getContext()
5257                    .getVectorType(EltTy, NumElements, VT->getVectorKind())
5258                    .getTypePtr();
5259       }
5260     }
5261
5262     if (Base->isVectorType() != TyPtr->isVectorType() ||
5263         getContext().getTypeSize(Base) != getContext().getTypeSize(TyPtr))
5264       return false;
5265   }
5266   return Members > 0 && isHomogeneousAggregateSmallEnough(Base, Members);
5267 }
5268
5269 bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
5270   // Homogeneous aggregates for ELFv2 must have base types of float,
5271   // double, long double, or 128-bit vectors.
5272   if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
5273     if (BT->getKind() == BuiltinType::Float ||
5274         BT->getKind() == BuiltinType::Double ||
5275         BT->getKind() == BuiltinType::LongDouble ||
5276         BT->getKind() == BuiltinType::Ibm128 ||
5277         (getContext().getTargetInfo().hasFloat128Type() &&
5278          (BT->getKind() == BuiltinType::Float128))) {
5279       if (IsSoftFloatABI)
5280         return false;
5281       return true;
5282     }
5283   }
5284   if (const VectorType *VT = Ty->getAs<VectorType>()) {
5285     if (getContext().getTypeSize(VT) == 128)
5286       return true;
5287   }
5288   return false;
5289 }
5290
5291 bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateSmallEnough(
5292     const Type *Base, uint64_t Members) const {
5293   // Vector and fp128 types require one register, other floating point types
5294   // require one or two registers depending on their size.
5295   uint32_t NumRegs =
5296       ((getContext().getTargetInfo().hasFloat128Type() &&
5297           Base->isFloat128Type()) ||
5298         Base->isVectorType()) ? 1
5299                               : (getContext().getTypeSize(Base) + 63) / 64;
5300
5301   // Homogeneous Aggregates may occupy at most 8 registers.
5302   return Members * NumRegs <= 8;
5303 }
5304
5305 ABIArgInfo
5306 PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const {
5307   Ty = useFirstFieldIfTransparentUnion(Ty);
5308
5309   if (Ty->isAnyComplexType())
5310     return ABIArgInfo::getDirect();
5311
5312   // Non-Altivec vector types are passed in GPRs (smaller than 16 bytes)
5313   // or via reference (larger than 16 bytes).
5314   if (Ty->isVectorType()) {
5315     uint64_t Size = getContext().getTypeSize(Ty);
5316     if (Size > 128)
5317       return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
5318     else if (Size < 128) {
5319       llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(), Size);
5320       return ABIArgInfo::getDirect(CoerceTy);
5321     }
5322   }
5323
5324   if (const auto *EIT = Ty->getAs<BitIntType>())
5325     if (EIT->getNumBits() > 128)
5326       return getNaturalAlignIndirect(Ty, /*ByVal=*/true);
5327
5328   if (isAggregateTypeForABI(Ty)) {
5329     if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
5330       return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
5331
5332     uint64_t ABIAlign = getParamTypeAlignment(Ty).getQuantity();
5333     uint64_t TyAlign = getContext().getTypeAlignInChars(Ty).getQuantity();
5334
5335     // ELFv2 homogeneous aggregates are passed as array types.
5336     const Type *Base = nullptr;
5337     uint64_t Members = 0;
5338     if (Kind == PPC64_SVR4_ABIKind::ELFv2 &&
5339         isHomogeneousAggregate(Ty, Base, Members)) {
5340       llvm::Type *BaseTy = CGT.ConvertType(QualType(Base, 0));
5341       llvm::Type *CoerceTy = llvm::ArrayType::get(BaseTy, Members);
5342       return ABIArgInfo::getDirect(CoerceTy);
5343     }
5344
5345     // If an aggregate may end up fully in registers, we do not
5346     // use the ByVal method, but pass the aggregate as array.
5347     // This is usually beneficial since we avoid forcing the
5348     // back-end to store the argument to memory.
5349     uint64_t Bits = getContext().getTypeSize(Ty);
5350     if (Bits > 0 && Bits <= 8 * GPRBits) {
5351       llvm::Type *CoerceTy;
5352
5353       // Types up to 8 bytes are passed as integer type (which will be
5354       // properly aligned in the argument save area doubleword).
5355       if (Bits <= GPRBits)
5356         CoerceTy =
5357             llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8));
5358       // Larger types are passed as arrays, with the base type selected
5359       // according to the required alignment in the save area.
5360       else {
5361         uint64_t RegBits = ABIAlign * 8;
5362         uint64_t NumRegs = llvm::alignTo(Bits, RegBits) / RegBits;
5363         llvm::Type *RegTy = llvm::IntegerType::get(getVMContext(), RegBits);
5364         CoerceTy = llvm::ArrayType::get(RegTy, NumRegs);
5365       }
5366
5367       return ABIArgInfo::getDirect(CoerceTy);
5368     }
5369
5370     // All other aggregates are passed ByVal.
5371     return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign),
5372                                    /*ByVal=*/true,
5373                                    /*Realign=*/TyAlign > ABIAlign);
5374   }
5375
5376   return (isPromotableTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
5377                                      : ABIArgInfo::getDirect());
5378 }
5379
5380 ABIArgInfo
5381 PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const {
5382   if (RetTy->isVoidType())
5383     return ABIArgInfo::getIgnore();
5384
5385   if (RetTy->isAnyComplexType())
5386     return ABIArgInfo::getDirect();
5387
5388   // Non-Altivec vector types are returned in GPRs (smaller than 16 bytes)
5389   // or via reference (larger than 16 bytes).
5390   if (RetTy->isVectorType()) {
5391     uint64_t Size = getContext().getTypeSize(RetTy);
5392     if (Size > 128)
5393       return getNaturalAlignIndirect(RetTy);
5394     else if (Size < 128) {
5395       llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(), Size);
5396       return ABIArgInfo::getDirect(CoerceTy);
5397     }
5398   }
5399
5400   if (const auto *EIT = RetTy->getAs<BitIntType>())
5401     if (EIT->getNumBits() > 128)
5402       return getNaturalAlignIndirect(RetTy, /*ByVal=*/false);
5403
5404   if (isAggregateTypeForABI(RetTy)) {
5405     // ELFv2 homogeneous aggregates are returned as array types.
5406     const Type *Base = nullptr;
5407     uint64_t Members = 0;
5408     if (Kind == PPC64_SVR4_ABIKind::ELFv2 &&
5409         isHomogeneousAggregate(RetTy, Base, Members)) {
5410       llvm::Type *BaseTy = CGT.ConvertType(QualType(Base, 0));
5411       llvm::Type *CoerceTy = llvm::ArrayType::get(BaseTy, Members);
5412       return ABIArgInfo::getDirect(CoerceTy);
5413     }
5414
5415     // ELFv2 small aggregates are returned in up to two registers.
5416     uint64_t Bits = getContext().getTypeSize(RetTy);
5417     if (Kind == PPC64_SVR4_ABIKind::ELFv2 && Bits <= 2 * GPRBits) {
5418       if (Bits == 0)
5419         return ABIArgInfo::getIgnore();
5420
5421       llvm::Type *CoerceTy;
5422       if (Bits > GPRBits) {
5423         CoerceTy = llvm::IntegerType::get(getVMContext(), GPRBits);
5424         CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy);
5425       } else
5426         CoerceTy =
5427             llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8));
5428       return ABIArgInfo::getDirect(CoerceTy);
5429     }
5430
5431     // All other aggregates are returned indirectly.
5432     return getNaturalAlignIndirect(RetTy);
5433   }
5434
5435   return (isPromotableTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
5436                                         : ABIArgInfo::getDirect());
5437 }
5438
5439 // Based on ARMABIInfo::EmitVAArg, adjusted for 64-bit machine.
5440 Address PPC64_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
5441                                       QualType Ty) const {
5442   auto TypeInfo = getContext().getTypeInfoInChars(Ty);
5443   TypeInfo.Align = getParamTypeAlignment(Ty);
5444
5445   CharUnits SlotSize = CharUnits::fromQuantity(8);
5446
5447   // If we have a complex type and the base type is smaller than 8 bytes,
5448   // the ABI calls for the real and imaginary parts to be right-adjusted
5449   // in separate doublewords.  However, Clang expects us to produce a
5450   // pointer to a structure with the two parts packed tightly.  So generate
5451   // loads of the real and imaginary parts relative to the va_list pointer,
5452   // and store them to a temporary structure.
5453   if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
5454     CharUnits EltSize = TypeInfo.Width / 2;
5455     if (EltSize < SlotSize)
5456       return complexTempStructure(CGF, VAListAddr, Ty, SlotSize, EltSize, CTy);
5457   }
5458
5459   // Otherwise, just use the general rule.
5460   //
5461   // The PPC64 ABI passes some arguments in integer registers, even to variadic
5462   // functions. To allow va_list to use the simple "void*" representation,
5463   // variadic calls allocate space in the argument area for the integer argument
5464   // registers, and variadic functions spill their integer argument registers to
5465   // this area in their prologues. When aggregates smaller than a register are
5466   // passed this way, they are passed in the least significant bits of the
5467   // register, which means that after spilling on big-endian targets they will
5468   // be right-aligned in their argument slot. This is uncommon; for a variety of
5469   // reasons, other big-endian targets don't end up right-aligning aggregate
5470   // types this way, and so right-alignment only applies to fundamental types.
5471   // So on PPC64, we must force the use of right-alignment even for aggregates.
5472   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, TypeInfo,
5473                           SlotSize, /*AllowHigher*/ true,
5474                           /*ForceRightAdjust*/ true);
5475 }
5476
5477 bool
5478 PPC64_SVR4_TargetCodeGenInfo::initDwarfEHRegSizeTable(
5479   CodeGen::CodeGenFunction &CGF,
5480   llvm::Value *Address) const {
5481   return PPC_initDwarfEHRegSizeTable(CGF, Address, /*Is64Bit*/ true,
5482                                      /*IsAIX*/ false);
5483 }
5484
5485 bool
5486 PPC64TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
5487                                                 llvm::Value *Address) const {
5488   return PPC_initDwarfEHRegSizeTable(CGF, Address, /*Is64Bit*/ true,
5489                                      /*IsAIX*/ false);
5490 }
5491
5492 //===----------------------------------------------------------------------===//
5493 // AArch64 ABI Implementation
5494 //===----------------------------------------------------------------------===//
5495
5496 namespace {
5497
5498 enum class AArch64ABIKind {
5499   AAPCS = 0,
5500   DarwinPCS,
5501   Win64,
5502 };
5503
5504 class AArch64ABIInfo : public ABIInfo {
5505   AArch64ABIKind Kind;
5506
5507 public:
5508   AArch64ABIInfo(CodeGenTypes &CGT, AArch64ABIKind Kind)
5509       : ABIInfo(CGT), Kind(Kind) {}
5510
5511 private:
5512   AArch64ABIKind getABIKind() const { return Kind; }
5513   bool isDarwinPCS() const { return Kind == AArch64ABIKind::DarwinPCS; }
5514
5515   ABIArgInfo classifyReturnType(QualType RetTy, bool IsVariadic) const;
5516   ABIArgInfo classifyArgumentType(QualType RetTy, bool IsVariadic,
5517                                   unsigned CallingConvention) const;
5518   ABIArgInfo coerceIllegalVector(QualType Ty) const;
5519   bool isHomogeneousAggregateBaseType(QualType Ty) const override;
5520   bool isHomogeneousAggregateSmallEnough(const Type *Ty,
5521                                          uint64_t Members) const override;
5522   bool isZeroLengthBitfieldPermittedInHomogeneousAggregate() const override;
5523
5524   bool isIllegalVectorType(QualType Ty) const;
5525
5526   void computeInfo(CGFunctionInfo &FI) const override {
5527     if (!::classifyReturnType(getCXXABI(), FI, *this))
5528       FI.getReturnInfo() =
5529           classifyReturnType(FI.getReturnType(), FI.isVariadic());
5530
5531     for (auto &it : FI.arguments())
5532       it.info = classifyArgumentType(it.type, FI.isVariadic(),
5533                                      FI.getCallingConvention());
5534   }
5535
5536   Address EmitDarwinVAArg(Address VAListAddr, QualType Ty,
5537                           CodeGenFunction &CGF) const;
5538
5539   Address EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
5540                          CodeGenFunction &CGF) const;
5541
5542   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
5543                     QualType Ty) const override {
5544     llvm::Type *BaseTy = CGF.ConvertType(Ty);
5545     if (isa<llvm::ScalableVectorType>(BaseTy))
5546       llvm::report_fatal_error("Passing SVE types to variadic functions is "
5547                                "currently not supported");
5548
5549     return Kind == AArch64ABIKind::Win64 ? EmitMSVAArg(CGF, VAListAddr, Ty)
5550            : isDarwinPCS()               ? EmitDarwinVAArg(VAListAddr, Ty, CGF)
5551                                          : EmitAAPCSVAArg(VAListAddr, Ty, CGF);
5552   }
5553
5554   Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
5555                       QualType Ty) const override;
5556
5557   bool allowBFloatArgsAndRet() const override {
5558     return getTarget().hasBFloat16Type();
5559   }
5560 };
5561
5562 class AArch64SwiftABIInfo : public SwiftABIInfo {
5563 public:
5564   explicit AArch64SwiftABIInfo(CodeGenTypes &CGT)
5565       : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/true) {}
5566
5567   bool isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy,
5568                          unsigned NumElts) const override;
5569 };
5570
5571 class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
5572 public:
5573   AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIKind Kind)
5574       : TargetCodeGenInfo(std::make_unique<AArch64ABIInfo>(CGT, Kind)) {
5575     SwiftInfo = std::make_unique<AArch64SwiftABIInfo>(CGT);
5576   }
5577
5578   StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
5579     return "mov\tfp, fp\t\t// marker for objc_retainAutoreleaseReturnValue";
5580   }
5581
5582   int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
5583     return 31;
5584   }
5585
5586   bool doesReturnSlotInterfereWithArgs() const override { return false; }
5587
5588   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
5589                            CodeGen::CodeGenModule &CGM) const override {
5590     const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
5591     if (!FD)
5592       return;
5593
5594     const auto *TA = FD->getAttr<TargetAttr>();
5595     if (TA == nullptr)
5596       return;
5597
5598     ParsedTargetAttr Attr =
5599         CGM.getTarget().parseTargetAttr(TA->getFeaturesStr());
5600     if (Attr.BranchProtection.empty())
5601       return;
5602
5603     TargetInfo::BranchProtectionInfo BPI;
5604     StringRef Error;
5605     (void)CGM.getTarget().validateBranchProtection(Attr.BranchProtection,
5606                                                    Attr.CPU, BPI, Error);
5607     assert(Error.empty());
5608
5609     auto *Fn = cast<llvm::Function>(GV);
5610     static const char *SignReturnAddrStr[] = {"none", "non-leaf", "all"};
5611     Fn->addFnAttr("sign-return-address", SignReturnAddrStr[static_cast<int>(BPI.SignReturnAddr)]);
5612
5613     if (BPI.SignReturnAddr != LangOptions::SignReturnAddressScopeKind::None) {
5614       Fn->addFnAttr("sign-return-address-key",
5615                     BPI.SignKey == LangOptions::SignReturnAddressKeyKind::AKey
5616                         ? "a_key"
5617                         : "b_key");
5618     }
5619
5620     Fn->addFnAttr("branch-target-enforcement",
5621                   BPI.BranchTargetEnforcement ? "true" : "false");
5622   }
5623
5624   bool isScalarizableAsmOperand(CodeGen::CodeGenFunction &CGF,
5625                                 llvm::Type *Ty) const override {
5626     if (CGF.getTarget().hasFeature("ls64")) {
5627       auto *ST = dyn_cast<llvm::StructType>(Ty);
5628       if (ST && ST->getNumElements() == 1) {
5629         auto *AT = dyn_cast<llvm::ArrayType>(ST->getElementType(0));
5630         if (AT && AT->getNumElements() == 8 &&
5631             AT->getElementType()->isIntegerTy(64))
5632           return true;
5633       }
5634     }
5635     return TargetCodeGenInfo::isScalarizableAsmOperand(CGF, Ty);
5636   }
5637 };
5638
5639 class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo {
5640 public:
5641   WindowsAArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIKind K)
5642       : AArch64TargetCodeGenInfo(CGT, K) {}
5643
5644   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
5645                            CodeGen::CodeGenModule &CGM) const override;
5646
5647   void getDependentLibraryOption(llvm::StringRef Lib,
5648                                  llvm::SmallString<24> &Opt) const override {
5649     Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib);
5650   }
5651
5652   void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value,
5653                                llvm::SmallString<32> &Opt) const override {
5654     Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
5655   }
5656 };
5657
5658 void WindowsAArch64TargetCodeGenInfo::setTargetAttributes(
5659     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
5660   AArch64TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
5661   if (GV->isDeclaration())
5662     return;
5663   addStackProbeTargetAttributes(D, GV, CGM);
5664 }
5665 }
5666
5667 ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType Ty) const {
5668   assert(Ty->isVectorType() && "expected vector type!");
5669
5670   const auto *VT = Ty->castAs<VectorType>();
5671   if (VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector) {
5672     assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
5673     assert(VT->getElementType()->castAs<BuiltinType>()->getKind() ==
5674                BuiltinType::UChar &&
5675            "unexpected builtin type for SVE predicate!");
5676     return ABIArgInfo::getDirect(llvm::ScalableVectorType::get(
5677         llvm::Type::getInt1Ty(getVMContext()), 16));
5678   }
5679
5680   if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector) {
5681     assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
5682
5683     const auto *BT = VT->getElementType()->castAs<BuiltinType>();
5684     llvm::ScalableVectorType *ResType = nullptr;
5685     switch (BT->getKind()) {
5686     default:
5687       llvm_unreachable("unexpected builtin type for SVE vector!");
5688     case BuiltinType::SChar:
5689     case BuiltinType::UChar:
5690       ResType = llvm::ScalableVectorType::get(
5691           llvm::Type::getInt8Ty(getVMContext()), 16);
5692       break;
5693     case BuiltinType::Short:
5694     case BuiltinType::UShort:
5695       ResType = llvm::ScalableVectorType::get(
5696           llvm::Type::getInt16Ty(getVMContext()), 8);
5697       break;
5698     case BuiltinType::Int:
5699     case BuiltinType::UInt:
5700       ResType = llvm::ScalableVectorType::get(
5701           llvm::Type::getInt32Ty(getVMContext()), 4);
5702       break;
5703     case BuiltinType::Long:
5704     case BuiltinType::ULong:
5705       ResType = llvm::ScalableVectorType::get(
5706           llvm::Type::getInt64Ty(getVMContext()), 2);
5707       break;
5708     case BuiltinType::Half:
5709       ResType = llvm::ScalableVectorType::get(
5710           llvm::Type::getHalfTy(getVMContext()), 8);
5711       break;
5712     case BuiltinType::Float:
5713       ResType = llvm::ScalableVectorType::get(
5714           llvm::Type::getFloatTy(getVMContext()), 4);
5715       break;
5716     case BuiltinType::Double:
5717       ResType = llvm::ScalableVectorType::get(
5718           llvm::Type::getDoubleTy(getVMContext()), 2);
5719       break;
5720     case BuiltinType::BFloat16:
5721       ResType = llvm::ScalableVectorType::get(
5722           llvm::Type::getBFloatTy(getVMContext()), 8);
5723       break;
5724     }
5725     return ABIArgInfo::getDirect(ResType);
5726   }
5727
5728   uint64_t Size = getContext().getTypeSize(Ty);
5729   // Android promotes <2 x i8> to i16, not i32
5730   if ((isAndroid() || isOHOSFamily()) && (Size <= 16)) {
5731     llvm::Type *ResType = llvm::Type::getInt16Ty(getVMContext());
5732     return ABIArgInfo::getDirect(ResType);
5733   }
5734   if (Size <= 32) {
5735     llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext());
5736     return ABIArgInfo::getDirect(ResType);
5737   }
5738   if (Size == 64) {
5739     auto *ResType =
5740         llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2);
5741     return ABIArgInfo::getDirect(ResType);
5742   }
5743   if (Size == 128) {
5744     auto *ResType =
5745         llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4);
5746     return ABIArgInfo::getDirect(ResType);
5747   }
5748   return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
5749 }
5750
5751 ABIArgInfo
5752 AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadic,
5753                                      unsigned CallingConvention) const {
5754   Ty = useFirstFieldIfTransparentUnion(Ty);
5755
5756   // Handle illegal vector types here.
5757   if (isIllegalVectorType(Ty))
5758     return coerceIllegalVector(Ty);
5759
5760   if (!isAggregateTypeForABI(Ty)) {
5761     // Treat an enum type as its underlying type.
5762     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
5763       Ty = EnumTy->getDecl()->getIntegerType();
5764
5765     if (const auto *EIT = Ty->getAs<BitIntType>())
5766       if (EIT->getNumBits() > 128)
5767         return getNaturalAlignIndirect(Ty);
5768
5769     return (isPromotableIntegerTypeForABI(Ty) && isDarwinPCS()
5770                 ? ABIArgInfo::getExtend(Ty)
5771                 : ABIArgInfo::getDirect());
5772   }
5773
5774   // Structures with either a non-trivial destructor or a non-trivial
5775   // copy constructor are always indirect.
5776   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
5777     return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA ==
5778                                      CGCXXABI::RAA_DirectInMemory);
5779   }
5780
5781   // Empty records are always ignored on Darwin, but actually passed in C++ mode
5782   // elsewhere for GNU compatibility.
5783   uint64_t Size = getContext().getTypeSize(Ty);
5784   bool IsEmpty = isEmptyRecord(getContext(), Ty, true);
5785   if (IsEmpty || Size == 0) {
5786     if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS())
5787       return ABIArgInfo::getIgnore();
5788
5789     // GNU C mode. The only argument that gets ignored is an empty one with size
5790     // 0.
5791     if (IsEmpty && Size == 0)
5792       return ABIArgInfo::getIgnore();
5793     return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
5794   }
5795
5796   // Homogeneous Floating-point Aggregates (HFAs) need to be expanded.
5797   const Type *Base = nullptr;
5798   uint64_t Members = 0;
5799   bool IsWin64 = Kind == AArch64ABIKind::Win64 ||
5800                  CallingConvention == llvm::CallingConv::Win64;
5801   bool IsWinVariadic = IsWin64 && IsVariadic;
5802   // In variadic functions on Windows, all composite types are treated alike,
5803   // no special handling of HFAs/HVAs.
5804   if (!IsWinVariadic && isHomogeneousAggregate(Ty, Base, Members)) {
5805     if (Kind != AArch64ABIKind::AAPCS)
5806       return ABIArgInfo::getDirect(
5807           llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members));
5808
5809     // For alignment adjusted HFAs, cap the argument alignment to 16, leave it
5810     // default otherwise.
5811     unsigned Align =
5812         getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity();
5813     unsigned BaseAlign = getContext().getTypeAlignInChars(Base).getQuantity();
5814     Align = (Align > BaseAlign && Align >= 16) ? 16 : 0;
5815     return ABIArgInfo::getDirect(
5816         llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members), 0,
5817         nullptr, true, Align);
5818   }
5819
5820   // Aggregates <= 16 bytes are passed directly in registers or on the stack.
5821   if (Size <= 128) {
5822     // On RenderScript, coerce Aggregates <= 16 bytes to an integer array of
5823     // same size and alignment.
5824     if (getTarget().isRenderScriptTarget()) {
5825       return coerceToIntArray(Ty, getContext(), getVMContext());
5826     }
5827     unsigned Alignment;
5828     if (Kind == AArch64ABIKind::AAPCS) {
5829       Alignment = getContext().getTypeUnadjustedAlign(Ty);
5830       Alignment = Alignment < 128 ? 64 : 128;
5831     } else {
5832       Alignment =
5833           std::max(getContext().getTypeAlign(Ty),
5834                    (unsigned)getTarget().getPointerWidth(LangAS::Default));
5835     }
5836     Size = llvm::alignTo(Size, Alignment);
5837
5838     // We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
5839     // For aggregates with 16-byte alignment, we use i128.
5840     llvm::Type *BaseTy = llvm::Type::getIntNTy(getVMContext(), Alignment);
5841     return ABIArgInfo::getDirect(
5842         Size == Alignment ? BaseTy
5843                           : llvm::ArrayType::get(BaseTy, Size / Alignment));
5844   }
5845
5846   return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
5847 }
5848
5849 ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy,
5850                                               bool IsVariadic) const {
5851   if (RetTy->isVoidType())
5852     return ABIArgInfo::getIgnore();
5853
5854   if (const auto *VT = RetTy->getAs<VectorType>()) {
5855     if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector ||
5856         VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector)
5857       return coerceIllegalVector(RetTy);
5858   }
5859
5860   // Large vector types should be returned via memory.
5861   if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128)
5862     return getNaturalAlignIndirect(RetTy);
5863
5864   if (!isAggregateTypeForABI(RetTy)) {
5865     // Treat an enum type as its underlying type.
5866     if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
5867       RetTy = EnumTy->getDecl()->getIntegerType();
5868
5869     if (const auto *EIT = RetTy->getAs<BitIntType>())
5870       if (EIT->getNumBits() > 128)
5871         return getNaturalAlignIndirect(RetTy);
5872
5873     return (isPromotableIntegerTypeForABI(RetTy) && isDarwinPCS()
5874                 ? ABIArgInfo::getExtend(RetTy)
5875                 : ABIArgInfo::getDirect());
5876   }
5877
5878   uint64_t Size = getContext().getTypeSize(RetTy);
5879   if (isEmptyRecord(getContext(), RetTy, true) || Size == 0)
5880     return ABIArgInfo::getIgnore();
5881
5882   const Type *Base = nullptr;
5883   uint64_t Members = 0;
5884   if (isHomogeneousAggregate(RetTy, Base, Members) &&
5885       !(getTarget().getTriple().getArch() == llvm::Triple::aarch64_32 &&
5886         IsVariadic))
5887     // Homogeneous Floating-point Aggregates (HFAs) are returned directly.
5888     return ABIArgInfo::getDirect();
5889
5890   // Aggregates <= 16 bytes are returned directly in registers or on the stack.
5891   if (Size <= 128) {
5892     // On RenderScript, coerce Aggregates <= 16 bytes to an integer array of
5893     // same size and alignment.
5894     if (getTarget().isRenderScriptTarget()) {
5895       return coerceToIntArray(RetTy, getContext(), getVMContext());
5896     }
5897
5898     if (Size <= 64 && getDataLayout().isLittleEndian()) {
5899       // Composite types are returned in lower bits of a 64-bit register for LE,
5900       // and in higher bits for BE. However, integer types are always returned
5901       // in lower bits for both LE and BE, and they are not rounded up to
5902       // 64-bits. We can skip rounding up of composite types for LE, but not for
5903       // BE, otherwise composite types will be indistinguishable from integer
5904       // types.
5905       return ABIArgInfo::getDirect(
5906           llvm::IntegerType::get(getVMContext(), Size));
5907     }
5908
5909     unsigned Alignment = getContext().getTypeAlign(RetTy);
5910     Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes
5911
5912     // We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
5913     // For aggregates with 16-byte alignment, we use i128.
5914     if (Alignment < 128 && Size == 128) {
5915       llvm::Type *BaseTy = llvm::Type::getInt64Ty(getVMContext());
5916       return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64));
5917     }
5918     return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size));
5919   }
5920
5921   return getNaturalAlignIndirect(RetTy);
5922 }
5923
5924 /// isIllegalVectorType - check whether the vector type is legal for AArch64.
5925 bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const {
5926   if (const VectorType *VT = Ty->getAs<VectorType>()) {
5927     // Check whether VT is a fixed-length SVE vector. These types are
5928     // represented as scalable vectors in function args/return and must be
5929     // coerced from fixed vectors.
5930     if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector ||
5931         VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector)
5932       return true;
5933
5934     // Check whether VT is legal.
5935     unsigned NumElements = VT->getNumElements();
5936     uint64_t Size = getContext().getTypeSize(VT);
5937     // NumElements should be power of 2.
5938     if (!llvm::isPowerOf2_32(NumElements))
5939       return true;
5940
5941     // arm64_32 has to be compatible with the ARM logic here, which allows huge
5942     // vectors for some reason.
5943     llvm::Triple Triple = getTarget().getTriple();
5944     if (Triple.getArch() == llvm::Triple::aarch64_32 &&
5945         Triple.isOSBinFormatMachO())
5946       return Size <= 32;
5947
5948     return Size != 64 && (Size != 128 || NumElements == 1);
5949   }
5950   return false;
5951 }
5952
5953 bool AArch64SwiftABIInfo::isLegalVectorType(CharUnits VectorSize,
5954                                             llvm::Type *EltTy,
5955                                             unsigned NumElts) const {
5956   if (!llvm::isPowerOf2_32(NumElts))
5957     return false;
5958   if (VectorSize.getQuantity() != 8 &&
5959       (VectorSize.getQuantity() != 16 || NumElts == 1))
5960     return false;
5961   return true;
5962 }
5963
5964 bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
5965   // Homogeneous aggregates for AAPCS64 must have base types of a floating
5966   // point type or a short-vector type. This is the same as the 32-bit ABI,
5967   // but with the difference that any floating-point type is allowed,
5968   // including __fp16.
5969   if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
5970     if (BT->isFloatingPoint())
5971       return true;
5972   } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
5973     unsigned VecSize = getContext().getTypeSize(VT);
5974     if (VecSize == 64 || VecSize == 128)
5975       return true;
5976   }
5977   return false;
5978 }
5979
5980 bool AArch64ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
5981                                                        uint64_t Members) const {
5982   return Members <= 4;
5983 }
5984
5985 bool AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate()
5986     const {
5987   // AAPCS64 says that the rule for whether something is a homogeneous
5988   // aggregate is applied to the output of the data layout decision. So
5989   // anything that doesn't affect the data layout also does not affect
5990   // homogeneity. In particular, zero-length bitfields don't stop a struct
5991   // being homogeneous.
5992   return true;
5993 }
5994
5995 Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
5996                                        CodeGenFunction &CGF) const {
5997   ABIArgInfo AI = classifyArgumentType(Ty, /*IsVariadic=*/true,
5998                                        CGF.CurFnInfo->getCallingConvention());
5999   // Empty records are ignored for parameter passing purposes.
6000   if (AI.isIgnore()) {
6001     uint64_t PointerSize = getTarget().getPointerWidth(LangAS::Default) / 8;
6002     CharUnits SlotSize = CharUnits::fromQuantity(PointerSize);
6003     VAListAddr = CGF.Builder.CreateElementBitCast(VAListAddr, CGF.Int8PtrTy);
6004     auto *Load = CGF.Builder.CreateLoad(VAListAddr);
6005     Address Addr = Address(Load, CGF.Int8Ty, SlotSize);
6006     return CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
6007   }
6008
6009   bool IsIndirect = AI.isIndirect();
6010
6011   llvm::Type *BaseTy = CGF.ConvertType(Ty);
6012   if (IsIndirect)
6013     BaseTy = llvm::PointerType::getUnqual(BaseTy);
6014   else if (AI.getCoerceToType())
6015     BaseTy = AI.getCoerceToType();
6016
6017   unsigned NumRegs = 1;
6018   if (llvm::ArrayType *ArrTy = dyn_cast<llvm::ArrayType>(BaseTy)) {
6019     BaseTy = ArrTy->getElementType();
6020     NumRegs = ArrTy->getNumElements();
6021   }
6022   bool IsFPR = BaseTy->isFloatingPointTy() || BaseTy->isVectorTy();
6023
6024   // The AArch64 va_list type and handling is specified in the Procedure Call
6025   // Standard, section B.4:
6026   //
6027   // struct {
6028   //   void *__stack;
6029   //   void *__gr_top;
6030   //   void *__vr_top;
6031   //   int __gr_offs;
6032   //   int __vr_offs;
6033   // };
6034
6035   llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg");
6036   llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
6037   llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack");
6038   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
6039
6040   CharUnits TySize = getContext().getTypeSizeInChars(Ty);
6041   CharUnits TyAlign = getContext().getTypeUnadjustedAlignInChars(Ty);
6042
6043   Address reg_offs_p = Address::invalid();
6044   llvm::Value *reg_offs = nullptr;
6045   int reg_top_index;
6046   int RegSize = IsIndirect ? 8 : TySize.getQuantity();
6047   if (!IsFPR) {
6048     // 3 is the field number of __gr_offs
6049     reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p");
6050     reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs");
6051     reg_top_index = 1; // field number for __gr_top
6052     RegSize = llvm::alignTo(RegSize, 8);
6053   } else {
6054     // 4 is the field number of __vr_offs.
6055     reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p");
6056     reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs");
6057     reg_top_index = 2; // field number for __vr_top
6058     RegSize = 16 * NumRegs;
6059   }
6060
6061   //=======================================
6062   // Find out where argument was passed
6063   //=======================================
6064
6065   // If reg_offs >= 0 we're already using the stack for this type of
6066   // argument. We don't want to keep updating reg_offs (in case it overflows,
6067   // though anyone passing 2GB of arguments, each at most 16 bytes, deserves
6068   // whatever they get).
6069   llvm::Value *UsingStack = nullptr;
6070   UsingStack = CGF.Builder.CreateICmpSGE(
6071       reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, 0));
6072
6073   CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, MaybeRegBlock);
6074
6075   // Otherwise, at least some kind of argument could go in these registers, the
6076   // question is whether this particular type is too big.
6077   CGF.EmitBlock(MaybeRegBlock);
6078
6079   // Integer arguments may need to correct register alignment (for example a
6080   // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we
6081   // align __gr_offs to calculate the potential address.
6082   if (!IsFPR && !IsIndirect && TyAlign.getQuantity() > 8) {
6083     int Align = TyAlign.getQuantity();
6084
6085     reg_offs = CGF.Builder.CreateAdd(
6086         reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, Align - 1),
6087         "align_regoffs");
6088     reg_offs = CGF.Builder.CreateAnd(
6089         reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, -Align),
6090         "aligned_regoffs");
6091   }
6092
6093   // Update the gr_offs/vr_offs pointer for next call to va_arg on this va_list.
6094   // The fact that this is done unconditionally reflects the fact that
6095   // allocating an argument to the stack also uses up all the remaining
6096   // registers of the appropriate kind.
6097   llvm::Value *NewOffset = nullptr;
6098   NewOffset = CGF.Builder.CreateAdd(
6099       reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, RegSize), "new_reg_offs");
6100   CGF.Builder.CreateStore(NewOffset, reg_offs_p);
6101
6102   // Now we're in a position to decide whether this argument really was in
6103   // registers or not.
6104   llvm::Value *InRegs = nullptr;
6105   InRegs = CGF.Builder.CreateICmpSLE(
6106       NewOffset, llvm::ConstantInt::get(CGF.Int32Ty, 0), "inreg");
6107
6108   CGF.Builder.CreateCondBr(InRegs, InRegBlock, OnStackBlock);
6109
6110   //=======================================
6111   // Argument was in registers
6112   //=======================================
6113
6114   // Now we emit the code for if the argument was originally passed in
6115   // registers. First start the appropriate block:
6116   CGF.EmitBlock(InRegBlock);
6117
6118   llvm::Value *reg_top = nullptr;
6119   Address reg_top_p =
6120       CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, "reg_top_p");
6121   reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top");
6122   Address BaseAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, reg_top, reg_offs),
6123                    CGF.Int8Ty, CharUnits::fromQuantity(IsFPR ? 16 : 8));
6124   Address RegAddr = Address::invalid();
6125   llvm::Type *MemTy = CGF.ConvertTypeForMem(Ty), *ElementTy = MemTy;
6126
6127   if (IsIndirect) {
6128     // If it's been passed indirectly (actually a struct), whatever we find from
6129     // stored registers or on the stack will actually be a struct **.
6130     MemTy = llvm::PointerType::getUnqual(MemTy);
6131   }
6132
6133   const Type *Base = nullptr;
6134   uint64_t NumMembers = 0;
6135   bool IsHFA = isHomogeneousAggregate(Ty, Base, NumMembers);
6136   if (IsHFA && NumMembers > 1) {
6137     // Homogeneous aggregates passed in registers will have their elements split
6138     // and stored 16-bytes apart regardless of size (they're notionally in qN,
6139     // qN+1, ...). We reload and store into a temporary local variable
6140     // contiguously.
6141     assert(!IsIndirect && "Homogeneous aggregates should be passed directly");
6142     auto BaseTyInfo = getContext().getTypeInfoInChars(QualType(Base, 0));
6143     llvm::Type *BaseTy = CGF.ConvertType(QualType(Base, 0));
6144     llvm::Type *HFATy = llvm::ArrayType::get(BaseTy, NumMembers);
6145     Address Tmp = CGF.CreateTempAlloca(HFATy,
6146                                        std::max(TyAlign, BaseTyInfo.Align));
6147
6148     // On big-endian platforms, the value will be right-aligned in its slot.
6149     int Offset = 0;
6150     if (CGF.CGM.getDataLayout().isBigEndian() &&
6151         BaseTyInfo.Width.getQuantity() < 16)
6152       Offset = 16 - BaseTyInfo.Width.getQuantity();
6153
6154     for (unsigned i = 0; i < NumMembers; ++i) {
6155       CharUnits BaseOffset = CharUnits::fromQuantity(16 * i + Offset);
6156       Address LoadAddr =
6157         CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, BaseOffset);
6158       LoadAddr = CGF.Builder.CreateElementBitCast(LoadAddr, BaseTy);
6159
6160       Address StoreAddr = CGF.Builder.CreateConstArrayGEP(Tmp, i);
6161
6162       llvm::Value *Elem = CGF.Builder.CreateLoad(LoadAddr);
6163       CGF.Builder.CreateStore(Elem, StoreAddr);
6164     }
6165
6166     RegAddr = CGF.Builder.CreateElementBitCast(Tmp, MemTy);
6167   } else {
6168     // Otherwise the object is contiguous in memory.
6169
6170     // It might be right-aligned in its slot.
6171     CharUnits SlotSize = BaseAddr.getAlignment();
6172     if (CGF.CGM.getDataLayout().isBigEndian() && !IsIndirect &&
6173         (IsHFA || !isAggregateTypeForABI(Ty)) &&
6174         TySize < SlotSize) {
6175       CharUnits Offset = SlotSize - TySize;
6176       BaseAddr = CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, Offset);
6177     }
6178
6179     RegAddr = CGF.Builder.CreateElementBitCast(BaseAddr, MemTy);
6180   }
6181
6182   CGF.EmitBranch(ContBlock);
6183
6184   //=======================================
6185   // Argument was on the stack
6186   //=======================================
6187   CGF.EmitBlock(OnStackBlock);
6188
6189   Address stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "stack_p");
6190   llvm::Value *OnStackPtr = CGF.Builder.CreateLoad(stack_p, "stack");
6191
6192   // Again, stack arguments may need realignment. In this case both integer and
6193   // floating-point ones might be affected.
6194   if (!IsIndirect && TyAlign.getQuantity() > 8) {
6195     int Align = TyAlign.getQuantity();
6196
6197     OnStackPtr = CGF.Builder.CreatePtrToInt(OnStackPtr, CGF.Int64Ty);
6198
6199     OnStackPtr = CGF.Builder.CreateAdd(
6200         OnStackPtr, llvm::ConstantInt::get(CGF.Int64Ty, Align - 1),
6201         "align_stack");
6202     OnStackPtr = CGF.Builder.CreateAnd(
6203         OnStackPtr, llvm::ConstantInt::get(CGF.Int64Ty, -Align),
6204         "align_stack");
6205
6206     OnStackPtr = CGF.Builder.CreateIntToPtr(OnStackPtr, CGF.Int8PtrTy);
6207   }
6208   Address OnStackAddr = Address(OnStackPtr, CGF.Int8Ty,
6209                                 std::max(CharUnits::fromQuantity(8), TyAlign));
6210
6211   // All stack slots are multiples of 8 bytes.
6212   CharUnits StackSlotSize = CharUnits::fromQuantity(8);
6213   CharUnits StackSize;
6214   if (IsIndirect)
6215     StackSize = StackSlotSize;
6216   else
6217     StackSize = TySize.alignTo(StackSlotSize);
6218
6219   llvm::Value *StackSizeC = CGF.Builder.getSize(StackSize);
6220   llvm::Value *NewStack = CGF.Builder.CreateInBoundsGEP(
6221       CGF.Int8Ty, OnStackPtr, StackSizeC, "new_stack");
6222
6223   // Write the new value of __stack for the next call to va_arg
6224   CGF.Builder.CreateStore(NewStack, stack_p);
6225
6226   if (CGF.CGM.getDataLayout().isBigEndian() && !isAggregateTypeForABI(Ty) &&
6227       TySize < StackSlotSize) {
6228     CharUnits Offset = StackSlotSize - TySize;
6229     OnStackAddr = CGF.Builder.CreateConstInBoundsByteGEP(OnStackAddr, Offset);
6230   }
6231
6232   OnStackAddr = CGF.Builder.CreateElementBitCast(OnStackAddr, MemTy);
6233
6234   CGF.EmitBranch(ContBlock);
6235
6236   //=======================================
6237   // Tidy up
6238   //=======================================
6239   CGF.EmitBlock(ContBlock);
6240
6241   Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, OnStackAddr,
6242                                  OnStackBlock, "vaargs.addr");
6243
6244   if (IsIndirect)
6245     return Address(CGF.Builder.CreateLoad(ResAddr, "vaarg.addr"), ElementTy,
6246                    TyAlign);
6247
6248   return ResAddr;
6249 }
6250
6251 Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty,
6252                                         CodeGenFunction &CGF) const {
6253   // The backend's lowering doesn't support va_arg for aggregates or
6254   // illegal vector types.  Lower VAArg here for these cases and use
6255   // the LLVM va_arg instruction for everything else.
6256   if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty))
6257     return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect());
6258
6259   uint64_t PointerSize = getTarget().getPointerWidth(LangAS::Default) / 8;
6260   CharUnits SlotSize = CharUnits::fromQuantity(PointerSize);
6261
6262   // Empty records are ignored for parameter passing purposes.
6263   if (isEmptyRecord(getContext(), Ty, true)) {
6264     Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr, "ap.cur"),
6265                            getVAListElementType(CGF), SlotSize);
6266     Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
6267     return Addr;
6268   }
6269
6270   // The size of the actual thing passed, which might end up just
6271   // being a pointer for indirect types.
6272   auto TyInfo = getContext().getTypeInfoInChars(Ty);
6273
6274   // Arguments bigger than 16 bytes which aren't homogeneous
6275   // aggregates should be passed indirectly.
6276   bool IsIndirect = false;
6277   if (TyInfo.Width.getQuantity() > 16) {
6278     const Type *Base = nullptr;
6279     uint64_t Members = 0;
6280     IsIndirect = !isHomogeneousAggregate(Ty, Base, Members);
6281   }
6282
6283   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
6284                           TyInfo, SlotSize, /*AllowHigherAlign*/ true);
6285 }
6286
6287 Address AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
6288                                     QualType Ty) const {
6289   bool IsIndirect = false;
6290
6291   // Composites larger than 16 bytes are passed by reference.
6292   if (isAggregateTypeForABI(Ty) && getContext().getTypeSize(Ty) > 128)
6293     IsIndirect = true;
6294
6295   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
6296                           CGF.getContext().getTypeInfoInChars(Ty),
6297                           CharUnits::fromQuantity(8),
6298                           /*allowHigherAlign*/ false);
6299 }
6300
6301 //===----------------------------------------------------------------------===//
6302 // ARM ABI Implementation
6303 //===----------------------------------------------------------------------===//
6304
6305 namespace {
6306
6307 enum class ARMABIKind {
6308   APCS = 0,
6309   AAPCS = 1,
6310   AAPCS_VFP = 2,
6311   AAPCS16_VFP = 3,
6312 };
6313
6314 class ARMABIInfo : public ABIInfo {
6315   ARMABIKind Kind;
6316   bool IsFloatABISoftFP;
6317
6318 public:
6319   ARMABIInfo(CodeGenTypes &CGT, ARMABIKind Kind) : ABIInfo(CGT), Kind(Kind) {
6320     setCCs();
6321     IsFloatABISoftFP = CGT.getCodeGenOpts().FloatABI == "softfp" ||
6322         CGT.getCodeGenOpts().FloatABI == ""; // default
6323   }
6324
6325   bool isEABI() const {
6326     switch (getTarget().getTriple().getEnvironment()) {
6327     case llvm::Triple::Android:
6328     case llvm::Triple::EABI:
6329     case llvm::Triple::EABIHF:
6330     case llvm::Triple::GNUEABI:
6331     case llvm::Triple::GNUEABIHF:
6332     case llvm::Triple::MuslEABI:
6333     case llvm::Triple::MuslEABIHF:
6334       return true;
6335     default:
6336       return getTarget().getTriple().isOHOSFamily();
6337     }
6338   }
6339
6340   bool isEABIHF() const {
6341     switch (getTarget().getTriple().getEnvironment()) {
6342     case llvm::Triple::EABIHF:
6343     case llvm::Triple::GNUEABIHF:
6344     case llvm::Triple::MuslEABIHF:
6345       return true;
6346     default:
6347       return false;
6348     }
6349   }
6350
6351   ARMABIKind getABIKind() const { return Kind; }
6352
6353   bool allowBFloatArgsAndRet() const override {
6354     return !IsFloatABISoftFP && getTarget().hasBFloat16Type();
6355   }
6356
6357 private:
6358   ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic,
6359                                 unsigned functionCallConv) const;
6360   ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic,
6361                                   unsigned functionCallConv) const;
6362   ABIArgInfo classifyHomogeneousAggregate(QualType Ty, const Type *Base,
6363                                           uint64_t Members) const;
6364   ABIArgInfo coerceIllegalVector(QualType Ty) const;
6365   bool isIllegalVectorType(QualType Ty) const;
6366   bool containsAnyFP16Vectors(QualType Ty) const;
6367
6368   bool isHomogeneousAggregateBaseType(QualType Ty) const override;
6369   bool isHomogeneousAggregateSmallEnough(const Type *Ty,
6370                                          uint64_t Members) const override;
6371   bool isZeroLengthBitfieldPermittedInHomogeneousAggregate() const override;
6372
6373   bool isEffectivelyAAPCS_VFP(unsigned callConvention, bool acceptHalf) const;
6374
6375   void computeInfo(CGFunctionInfo &FI) const override;
6376
6377   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
6378                     QualType Ty) const override;
6379
6380   llvm::CallingConv::ID getLLVMDefaultCC() const;
6381   llvm::CallingConv::ID getABIDefaultCC() const;
6382   void setCCs();
6383 };
6384
6385 class ARMSwiftABIInfo : public SwiftABIInfo {
6386 public:
6387   explicit ARMSwiftABIInfo(CodeGenTypes &CGT)
6388       : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/true) {}
6389
6390   bool isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy,
6391                          unsigned NumElts) const override;
6392 };
6393
6394 class ARMTargetCodeGenInfo : public TargetCodeGenInfo {
6395 public:
6396   ARMTargetCodeGenInfo(CodeGenTypes &CGT, ARMABIKind K)
6397       : TargetCodeGenInfo(std::make_unique<ARMABIInfo>(CGT, K)) {
6398     SwiftInfo = std::make_unique<ARMSwiftABIInfo>(CGT);
6399   }
6400
6401   int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
6402     return 13;
6403   }
6404
6405   StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
6406     return "mov\tr7, r7\t\t// marker for objc_retainAutoreleaseReturnValue";
6407   }
6408
6409   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
6410                                llvm::Value *Address) const override {
6411     llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4);
6412
6413     // 0-15 are the 16 integer registers.
6414     AssignToArrayRange(CGF.Builder, Address, Four8, 0, 15);
6415     return false;
6416   }
6417
6418   unsigned getSizeOfUnwindException() const override {
6419     if (getABIInfo<ARMABIInfo>().isEABI())
6420       return 88;
6421     return TargetCodeGenInfo::getSizeOfUnwindException();
6422   }
6423
6424   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
6425                            CodeGen::CodeGenModule &CGM) const override {
6426     if (GV->isDeclaration())
6427       return;
6428     const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
6429     if (!FD)
6430       return;
6431     auto *Fn = cast<llvm::Function>(GV);
6432
6433     if (const auto *TA = FD->getAttr<TargetAttr>()) {
6434       ParsedTargetAttr Attr =
6435           CGM.getTarget().parseTargetAttr(TA->getFeaturesStr());
6436       if (!Attr.BranchProtection.empty()) {
6437         TargetInfo::BranchProtectionInfo BPI;
6438         StringRef DiagMsg;
6439         StringRef Arch =
6440             Attr.CPU.empty() ? CGM.getTarget().getTargetOpts().CPU : Attr.CPU;
6441         if (!CGM.getTarget().validateBranchProtection(Attr.BranchProtection,
6442                                                       Arch, BPI, DiagMsg)) {
6443           CGM.getDiags().Report(
6444               D->getLocation(),
6445               diag::warn_target_unsupported_branch_protection_attribute)
6446               << Arch;
6447         } else {
6448           static const char *SignReturnAddrStr[] = {"none", "non-leaf", "all"};
6449           assert(static_cast<unsigned>(BPI.SignReturnAddr) <= 2 &&
6450                  "Unexpected SignReturnAddressScopeKind");
6451           Fn->addFnAttr(
6452               "sign-return-address",
6453               SignReturnAddrStr[static_cast<int>(BPI.SignReturnAddr)]);
6454
6455           Fn->addFnAttr("branch-target-enforcement",
6456                         BPI.BranchTargetEnforcement ? "true" : "false");
6457         }
6458       } else if (CGM.getLangOpts().BranchTargetEnforcement ||
6459                  CGM.getLangOpts().hasSignReturnAddress()) {
6460         // If the Branch Protection attribute is missing, validate the target
6461         // Architecture attribute against Branch Protection command line
6462         // settings.
6463         if (!CGM.getTarget().isBranchProtectionSupportedArch(Attr.CPU))
6464           CGM.getDiags().Report(
6465               D->getLocation(),
6466               diag::warn_target_unsupported_branch_protection_attribute)
6467               << Attr.CPU;
6468       }
6469     }
6470
6471     const ARMInterruptAttr *Attr = FD->getAttr<ARMInterruptAttr>();
6472     if (!Attr)
6473       return;
6474
6475     const char *Kind;
6476     switch (Attr->getInterrupt()) {
6477     case ARMInterruptAttr::Generic: Kind = ""; break;
6478     case ARMInterruptAttr::IRQ:     Kind = "IRQ"; break;
6479     case ARMInterruptAttr::FIQ:     Kind = "FIQ"; break;
6480     case ARMInterruptAttr::SWI:     Kind = "SWI"; break;
6481     case ARMInterruptAttr::ABORT:   Kind = "ABORT"; break;
6482     case ARMInterruptAttr::UNDEF:   Kind = "UNDEF"; break;
6483     }
6484
6485     Fn->addFnAttr("interrupt", Kind);
6486
6487     ARMABIKind ABI = getABIInfo<ARMABIInfo>().getABIKind();
6488     if (ABI == ARMABIKind::APCS)
6489       return;
6490
6491     // AAPCS guarantees that sp will be 8-byte aligned on any public interface,
6492     // however this is not necessarily true on taking any interrupt. Instruct
6493     // the backend to perform a realignment as part of the function prologue.
6494     llvm::AttrBuilder B(Fn->getContext());
6495     B.addStackAlignmentAttr(8);
6496     Fn->addFnAttrs(B);
6497   }
6498 };
6499
6500 class WindowsARMTargetCodeGenInfo : public ARMTargetCodeGenInfo {
6501 public:
6502   WindowsARMTargetCodeGenInfo(CodeGenTypes &CGT, ARMABIKind K)
6503       : ARMTargetCodeGenInfo(CGT, K) {}
6504
6505   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
6506                            CodeGen::CodeGenModule &CGM) const override;
6507
6508   void getDependentLibraryOption(llvm::StringRef Lib,
6509                                  llvm::SmallString<24> &Opt) const override {
6510     Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib);
6511   }
6512
6513   void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value,
6514                                llvm::SmallString<32> &Opt) const override {
6515     Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
6516   }
6517 };
6518
6519 void WindowsARMTargetCodeGenInfo::setTargetAttributes(
6520     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
6521   ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
6522   if (GV->isDeclaration())
6523     return;
6524   addStackProbeTargetAttributes(D, GV, CGM);
6525 }
6526 }
6527
6528 void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
6529   if (!::classifyReturnType(getCXXABI(), FI, *this))
6530     FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic(),
6531                                             FI.getCallingConvention());
6532
6533   for (auto &I : FI.arguments())
6534     I.info = classifyArgumentType(I.type, FI.isVariadic(),
6535                                   FI.getCallingConvention());
6536
6537
6538   // Always honor user-specified calling convention.
6539   if (FI.getCallingConvention() != llvm::CallingConv::C)
6540     return;
6541
6542   llvm::CallingConv::ID cc = getRuntimeCC();
6543   if (cc != llvm::CallingConv::C)
6544     FI.setEffectiveCallingConvention(cc);
6545 }
6546
6547 /// Return the default calling convention that LLVM will use.
6548 llvm::CallingConv::ID ARMABIInfo::getLLVMDefaultCC() const {
6549   // The default calling convention that LLVM will infer.
6550   if (isEABIHF() || getTarget().getTriple().isWatchABI())
6551     return llvm::CallingConv::ARM_AAPCS_VFP;
6552   else if (isEABI())
6553     return llvm::CallingConv::ARM_AAPCS;
6554   else
6555     return llvm::CallingConv::ARM_APCS;
6556 }
6557
6558 /// Return the calling convention that our ABI would like us to use
6559 /// as the C calling convention.
6560 llvm::CallingConv::ID ARMABIInfo::getABIDefaultCC() const {
6561   switch (getABIKind()) {
6562   case ARMABIKind::APCS:
6563     return llvm::CallingConv::ARM_APCS;
6564   case ARMABIKind::AAPCS:
6565     return llvm::CallingConv::ARM_AAPCS;
6566   case ARMABIKind::AAPCS_VFP:
6567     return llvm::CallingConv::ARM_AAPCS_VFP;
6568   case ARMABIKind::AAPCS16_VFP:
6569     return llvm::CallingConv::ARM_AAPCS_VFP;
6570   }
6571   llvm_unreachable("bad ABI kind");
6572 }
6573
6574 void ARMABIInfo::setCCs() {
6575   assert(getRuntimeCC() == llvm::CallingConv::C);
6576
6577   // Don't muddy up the IR with a ton of explicit annotations if
6578   // they'd just match what LLVM will infer from the triple.
6579   llvm::CallingConv::ID abiCC = getABIDefaultCC();
6580   if (abiCC != getLLVMDefaultCC())
6581     RuntimeCC = abiCC;
6582 }
6583
6584 ABIArgInfo ARMABIInfo::coerceIllegalVector(QualType Ty) const {
6585   uint64_t Size = getContext().getTypeSize(Ty);
6586   if (Size <= 32) {
6587     llvm::Type *ResType =
6588         llvm::Type::getInt32Ty(getVMContext());
6589     return ABIArgInfo::getDirect(ResType);
6590   }
6591   if (Size == 64 || Size == 128) {
6592     auto *ResType = llvm::FixedVectorType::get(
6593         llvm::Type::getInt32Ty(getVMContext()), Size / 32);
6594     return ABIArgInfo::getDirect(ResType);
6595   }
6596   return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
6597 }
6598
6599 ABIArgInfo ARMABIInfo::classifyHomogeneousAggregate(QualType Ty,
6600                                                     const Type *Base,
6601                                                     uint64_t Members) const {
6602   assert(Base && "Base class should be set for homogeneous aggregate");
6603   // Base can be a floating-point or a vector.
6604   if (const VectorType *VT = Base->getAs<VectorType>()) {
6605     // FP16 vectors should be converted to integer vectors
6606     if (!getTarget().hasLegalHalfType() && containsAnyFP16Vectors(Ty)) {
6607       uint64_t Size = getContext().getTypeSize(VT);
6608       auto *NewVecTy = llvm::FixedVectorType::get(
6609           llvm::Type::getInt32Ty(getVMContext()), Size / 32);
6610       llvm::Type *Ty = llvm::ArrayType::get(NewVecTy, Members);
6611       return ABIArgInfo::getDirect(Ty, 0, nullptr, false);
6612     }
6613   }
6614   unsigned Align = 0;
6615   if (getABIKind() == ARMABIKind::AAPCS ||
6616       getABIKind() == ARMABIKind::AAPCS_VFP) {
6617     // For alignment adjusted HFAs, cap the argument alignment to 8, leave it
6618     // default otherwise.
6619     Align = getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity();
6620     unsigned BaseAlign = getContext().getTypeAlignInChars(Base).getQuantity();
6621     Align = (Align > BaseAlign && Align >= 8) ? 8 : 0;
6622   }
6623   return ABIArgInfo::getDirect(nullptr, 0, nullptr, false, Align);
6624 }
6625
6626 ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
6627                                             unsigned functionCallConv) const {
6628   // 6.1.2.1 The following argument types are VFP CPRCs:
6629   //   A single-precision floating-point type (including promoted
6630   //   half-precision types); A double-precision floating-point type;
6631   //   A 64-bit or 128-bit containerized vector type; Homogeneous Aggregate
6632   //   with a Base Type of a single- or double-precision floating-point type,
6633   //   64-bit containerized vectors or 128-bit containerized vectors with one
6634   //   to four Elements.
6635   // Variadic functions should always marshal to the base standard.
6636   bool IsAAPCS_VFP =
6637       !isVariadic && isEffectivelyAAPCS_VFP(functionCallConv, /* AAPCS16 */ false);
6638
6639   Ty = useFirstFieldIfTransparentUnion(Ty);
6640
6641   // Handle illegal vector types here.
6642   if (isIllegalVectorType(Ty))
6643     return coerceIllegalVector(Ty);
6644
6645   if (!isAggregateTypeForABI(Ty)) {
6646     // Treat an enum type as its underlying type.
6647     if (const EnumType *EnumTy = Ty->getAs<EnumType>()) {
6648       Ty = EnumTy->getDecl()->getIntegerType();
6649     }
6650
6651     if (const auto *EIT = Ty->getAs<BitIntType>())
6652       if (EIT->getNumBits() > 64)
6653         return getNaturalAlignIndirect(Ty, /*ByVal=*/true);
6654
6655     return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
6656                                               : ABIArgInfo::getDirect());
6657   }
6658
6659   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
6660     return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
6661   }
6662
6663   // Ignore empty records.
6664   if (isEmptyRecord(getContext(), Ty, true))
6665     return ABIArgInfo::getIgnore();
6666
6667   if (IsAAPCS_VFP) {
6668     // Homogeneous Aggregates need to be expanded when we can fit the aggregate
6669     // into VFP registers.
6670     const Type *Base = nullptr;
6671     uint64_t Members = 0;
6672     if (isHomogeneousAggregate(Ty, Base, Members))
6673       return classifyHomogeneousAggregate(Ty, Base, Members);
6674   } else if (getABIKind() == ARMABIKind::AAPCS16_VFP) {
6675     // WatchOS does have homogeneous aggregates. Note that we intentionally use
6676     // this convention even for a variadic function: the backend will use GPRs
6677     // if needed.
6678     const Type *Base = nullptr;
6679     uint64_t Members = 0;
6680     if (isHomogeneousAggregate(Ty, Base, Members)) {
6681       assert(Base && Members <= 4 && "unexpected homogeneous aggregate");
6682       llvm::Type *Ty =
6683         llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members);
6684       return ABIArgInfo::getDirect(Ty, 0, nullptr, false);
6685     }
6686   }
6687
6688   if (getABIKind() == ARMABIKind::AAPCS16_VFP &&
6689       getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(16)) {
6690     // WatchOS is adopting the 64-bit AAPCS rule on composite types: if they're
6691     // bigger than 128-bits, they get placed in space allocated by the caller,
6692     // and a pointer is passed.
6693     return ABIArgInfo::getIndirect(
6694         CharUnits::fromQuantity(getContext().getTypeAlign(Ty) / 8), false);
6695   }
6696
6697   // Support byval for ARM.
6698   // The ABI alignment for APCS is 4-byte and for AAPCS at least 4-byte and at
6699   // most 8-byte. We realign the indirect argument if type alignment is bigger
6700   // than ABI alignment.
6701   uint64_t ABIAlign = 4;
6702   uint64_t TyAlign;
6703   if (getABIKind() == ARMABIKind::AAPCS_VFP ||
6704       getABIKind() == ARMABIKind::AAPCS) {
6705     TyAlign = getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity();
6706     ABIAlign = std::clamp(TyAlign, (uint64_t)4, (uint64_t)8);
6707   } else {
6708     TyAlign = getContext().getTypeAlignInChars(Ty).getQuantity();
6709   }
6710   if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64)) {
6711     assert(getABIKind() != ARMABIKind::AAPCS16_VFP && "unexpected byval");
6712     return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign),
6713                                    /*ByVal=*/true,
6714                                    /*Realign=*/TyAlign > ABIAlign);
6715   }
6716
6717   // On RenderScript, coerce Aggregates <= 64 bytes to an integer array of
6718   // same size and alignment.
6719   if (getTarget().isRenderScriptTarget()) {
6720     return coerceToIntArray(Ty, getContext(), getVMContext());
6721   }
6722
6723   // Otherwise, pass by coercing to a structure of the appropriate size.
6724   llvm::Type* ElemTy;
6725   unsigned SizeRegs;
6726   // FIXME: Try to match the types of the arguments more accurately where
6727   // we can.
6728   if (TyAlign <= 4) {
6729     ElemTy = llvm::Type::getInt32Ty(getVMContext());
6730     SizeRegs = (getContext().getTypeSize(Ty) + 31) / 32;
6731   } else {
6732     ElemTy = llvm::Type::getInt64Ty(getVMContext());
6733     SizeRegs = (getContext().getTypeSize(Ty) + 63) / 64;
6734   }
6735
6736   return ABIArgInfo::getDirect(llvm::ArrayType::get(ElemTy, SizeRegs));
6737 }
6738
6739 static bool isIntegerLikeType(QualType Ty, ASTContext &Context,
6740                               llvm::LLVMContext &VMContext) {
6741   // APCS, C Language Calling Conventions, Non-Simple Return Values: A structure
6742   // is called integer-like if its size is less than or equal to one word, and
6743   // the offset of each of its addressable sub-fields is zero.
6744
6745   uint64_t Size = Context.getTypeSize(Ty);
6746
6747   // Check that the type fits in a word.
6748   if (Size > 32)
6749     return false;
6750
6751   // FIXME: Handle vector types!
6752   if (Ty->isVectorType())
6753     return false;
6754
6755   // Float types are never treated as "integer like".
6756   if (Ty->isRealFloatingType())
6757     return false;
6758
6759   // If this is a builtin or pointer type then it is ok.
6760   if (Ty->getAs<BuiltinType>() || Ty->isPointerType())
6761     return true;
6762
6763   // Small complex integer types are "integer like".
6764   if (const ComplexType *CT = Ty->getAs<ComplexType>())
6765     return isIntegerLikeType(CT->getElementType(), Context, VMContext);
6766
6767   // Single element and zero sized arrays should be allowed, by the definition
6768   // above, but they are not.
6769
6770   // Otherwise, it must be a record type.
6771   const RecordType *RT = Ty->getAs<RecordType>();
6772   if (!RT) return false;
6773
6774   // Ignore records with flexible arrays.
6775   const RecordDecl *RD = RT->getDecl();
6776   if (RD->hasFlexibleArrayMember())
6777     return false;
6778
6779   // Check that all sub-fields are at offset 0, and are themselves "integer
6780   // like".
6781   const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
6782
6783   bool HadField = false;
6784   unsigned idx = 0;
6785   for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
6786        i != e; ++i, ++idx) {
6787     const FieldDecl *FD = *i;
6788
6789     // Bit-fields are not addressable, we only need to verify they are "integer
6790     // like". We still have to disallow a subsequent non-bitfield, for example:
6791     //   struct { int : 0; int x }
6792     // is non-integer like according to gcc.
6793     if (FD->isBitField()) {
6794       if (!RD->isUnion())
6795         HadField = true;
6796
6797       if (!isIntegerLikeType(FD->getType(), Context, VMContext))
6798         return false;
6799
6800       continue;
6801     }
6802
6803     // Check if this field is at offset 0.
6804     if (Layout.getFieldOffset(idx) != 0)
6805       return false;
6806
6807     if (!isIntegerLikeType(FD->getType(), Context, VMContext))
6808       return false;
6809
6810     // Only allow at most one field in a structure. This doesn't match the
6811     // wording above, but follows gcc in situations with a field following an
6812     // empty structure.
6813     if (!RD->isUnion()) {
6814       if (HadField)
6815         return false;
6816
6817       HadField = true;
6818     }
6819   }
6820
6821   return true;
6822 }
6823
6824 ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, bool isVariadic,
6825                                           unsigned functionCallConv) const {
6826
6827   // Variadic functions should always marshal to the base standard.
6828   bool IsAAPCS_VFP =
6829       !isVariadic && isEffectivelyAAPCS_VFP(functionCallConv, /* AAPCS16 */ true);
6830
6831   if (RetTy->isVoidType())
6832     return ABIArgInfo::getIgnore();
6833
6834   if (const VectorType *VT = RetTy->getAs<VectorType>()) {
6835     // Large vector types should be returned via memory.
6836     if (getContext().getTypeSize(RetTy) > 128)
6837       return getNaturalAlignIndirect(RetTy);
6838     // TODO: FP16/BF16 vectors should be converted to integer vectors
6839     // This check is similar  to isIllegalVectorType - refactor?
6840     if ((!getTarget().hasLegalHalfType() &&
6841         (VT->getElementType()->isFloat16Type() ||
6842          VT->getElementType()->isHalfType())) ||
6843         (IsFloatABISoftFP &&
6844          VT->getElementType()->isBFloat16Type()))
6845       return coerceIllegalVector(RetTy);
6846   }
6847
6848   if (!isAggregateTypeForABI(RetTy)) {
6849     // Treat an enum type as its underlying type.
6850     if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
6851       RetTy = EnumTy->getDecl()->getIntegerType();
6852
6853     if (const auto *EIT = RetTy->getAs<BitIntType>())
6854       if (EIT->getNumBits() > 64)
6855         return getNaturalAlignIndirect(RetTy, /*ByVal=*/false);
6856
6857     return isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
6858                                                 : ABIArgInfo::getDirect();
6859   }
6860
6861   // Are we following APCS?
6862   if (getABIKind() == ARMABIKind::APCS) {
6863     if (isEmptyRecord(getContext(), RetTy, false))
6864       return ABIArgInfo::getIgnore();
6865
6866     // Complex types are all returned as packed integers.
6867     //
6868     // FIXME: Consider using 2 x vector types if the back end handles them
6869     // correctly.
6870     if (RetTy->isAnyComplexType())
6871       return ABIArgInfo::getDirect(llvm::IntegerType::get(
6872           getVMContext(), getContext().getTypeSize(RetTy)));
6873
6874     // Integer like structures are returned in r0.
6875     if (isIntegerLikeType(RetTy, getContext(), getVMContext())) {
6876       // Return in the smallest viable integer type.
6877       uint64_t Size = getContext().getTypeSize(RetTy);
6878       if (Size <= 8)
6879         return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
6880       if (Size <= 16)
6881         return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
6882       return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
6883     }
6884
6885     // Otherwise return in memory.
6886     return getNaturalAlignIndirect(RetTy);
6887   }
6888
6889   // Otherwise this is an AAPCS variant.
6890
6891   if (isEmptyRecord(getContext(), RetTy, true))
6892     return ABIArgInfo::getIgnore();
6893
6894   // Check for homogeneous aggregates with AAPCS-VFP.
6895   if (IsAAPCS_VFP) {
6896     const Type *Base = nullptr;
6897     uint64_t Members = 0;
6898     if (isHomogeneousAggregate(RetTy, Base, Members))
6899       return classifyHomogeneousAggregate(RetTy, Base, Members);
6900   }
6901
6902   // Aggregates <= 4 bytes are returned in r0; other aggregates
6903   // are returned indirectly.
6904   uint64_t Size = getContext().getTypeSize(RetTy);
6905   if (Size <= 32) {
6906     // On RenderScript, coerce Aggregates <= 4 bytes to an integer array of
6907     // same size and alignment.
6908     if (getTarget().isRenderScriptTarget()) {
6909       return coerceToIntArray(RetTy, getContext(), getVMContext());
6910     }
6911     if (getDataLayout().isBigEndian())
6912       // Return in 32 bit integer integer type (as if loaded by LDR, AAPCS 5.4)
6913       return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
6914
6915     // Return in the smallest viable integer type.
6916     if (Size <= 8)
6917       return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
6918     if (Size <= 16)
6919       return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
6920     return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
6921   } else if (Size <= 128 && getABIKind() == ARMABIKind::AAPCS16_VFP) {
6922     llvm::Type *Int32Ty = llvm::Type::getInt32Ty(getVMContext());
6923     llvm::Type *CoerceTy =
6924         llvm::ArrayType::get(Int32Ty, llvm::alignTo(Size, 32) / 32);
6925     return ABIArgInfo::getDirect(CoerceTy);
6926   }
6927
6928   return getNaturalAlignIndirect(RetTy);
6929 }
6930
6931 /// isIllegalVector - check whether Ty is an illegal vector type.
6932 bool ARMABIInfo::isIllegalVectorType(QualType Ty) const {
6933   if (const VectorType *VT = Ty->getAs<VectorType> ()) {
6934     // On targets that don't support half, fp16 or bfloat, they are expanded
6935     // into float, and we don't want the ABI to depend on whether or not they
6936     // are supported in hardware. Thus return false to coerce vectors of these
6937     // types into integer vectors.
6938     // We do not depend on hasLegalHalfType for bfloat as it is a
6939     // separate IR type.
6940     if ((!getTarget().hasLegalHalfType() &&
6941         (VT->getElementType()->isFloat16Type() ||
6942          VT->getElementType()->isHalfType())) ||
6943         (IsFloatABISoftFP &&
6944          VT->getElementType()->isBFloat16Type()))
6945       return true;
6946     if (isAndroid()) {
6947       // Android shipped using Clang 3.1, which supported a slightly different
6948       // vector ABI. The primary differences were that 3-element vector types
6949       // were legal, and so were sub 32-bit vectors (i.e. <2 x i8>). This path
6950       // accepts that legacy behavior for Android only.
6951       // Check whether VT is legal.
6952       unsigned NumElements = VT->getNumElements();
6953       // NumElements should be power of 2 or equal to 3.
6954       if (!llvm::isPowerOf2_32(NumElements) && NumElements != 3)
6955         return true;
6956     } else {
6957       // Check whether VT is legal.
6958       unsigned NumElements = VT->getNumElements();
6959       uint64_t Size = getContext().getTypeSize(VT);
6960       // NumElements should be power of 2.
6961       if (!llvm::isPowerOf2_32(NumElements))
6962         return true;
6963       // Size should be greater than 32 bits.
6964       return Size <= 32;
6965     }
6966   }
6967   return false;
6968 }
6969
6970 /// Return true if a type contains any 16-bit floating point vectors
6971 bool ARMABIInfo::containsAnyFP16Vectors(QualType Ty) const {
6972   if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) {
6973     uint64_t NElements = AT->getSize().getZExtValue();
6974     if (NElements == 0)
6975       return false;
6976     return containsAnyFP16Vectors(AT->getElementType());
6977   } else if (const RecordType *RT = Ty->getAs<RecordType>()) {
6978     const RecordDecl *RD = RT->getDecl();
6979
6980     // If this is a C++ record, check the bases first.
6981     if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
6982       if (llvm::any_of(CXXRD->bases(), [this](const CXXBaseSpecifier &B) {
6983             return containsAnyFP16Vectors(B.getType());
6984           }))
6985         return true;
6986
6987     if (llvm::any_of(RD->fields(), [this](FieldDecl *FD) {
6988           return FD && containsAnyFP16Vectors(FD->getType());
6989         }))
6990       return true;
6991
6992     return false;
6993   } else {
6994     if (const VectorType *VT = Ty->getAs<VectorType>())
6995       return (VT->getElementType()->isFloat16Type() ||
6996               VT->getElementType()->isBFloat16Type() ||
6997               VT->getElementType()->isHalfType());
6998     return false;
6999   }
7000 }
7001
7002 bool ARMSwiftABIInfo::isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy,
7003                                         unsigned NumElts) const {
7004   if (!llvm::isPowerOf2_32(NumElts))
7005     return false;
7006   unsigned size = CGT.getDataLayout().getTypeStoreSizeInBits(EltTy);
7007   if (size > 64)
7008     return false;
7009   if (VectorSize.getQuantity() != 8 &&
7010       (VectorSize.getQuantity() != 16 || NumElts == 1))
7011     return false;
7012   return true;
7013 }
7014
7015 bool ARMABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
7016   // Homogeneous aggregates for AAPCS-VFP must have base types of float,
7017   // double, or 64-bit or 128-bit vectors.
7018   if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
7019     if (BT->getKind() == BuiltinType::Float ||
7020         BT->getKind() == BuiltinType::Double ||
7021         BT->getKind() == BuiltinType::LongDouble)
7022       return true;
7023   } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
7024     unsigned VecSize = getContext().getTypeSize(VT);
7025     if (VecSize == 64 || VecSize == 128)
7026       return true;
7027   }
7028   return false;
7029 }
7030
7031 bool ARMABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
7032                                                    uint64_t Members) const {
7033   return Members <= 4;
7034 }
7035
7036 bool ARMABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate() const {
7037   // AAPCS32 says that the rule for whether something is a homogeneous
7038   // aggregate is applied to the output of the data layout decision. So
7039   // anything that doesn't affect the data layout also does not affect
7040   // homogeneity. In particular, zero-length bitfields don't stop a struct
7041   // being homogeneous.
7042   return true;
7043 }
7044
7045 bool ARMABIInfo::isEffectivelyAAPCS_VFP(unsigned callConvention,
7046                                         bool acceptHalf) const {
7047   // Give precedence to user-specified calling conventions.
7048   if (callConvention != llvm::CallingConv::C)
7049     return (callConvention == llvm::CallingConv::ARM_AAPCS_VFP);
7050   else
7051     return (getABIKind() == ARMABIKind::AAPCS_VFP) ||
7052            (acceptHalf && (getABIKind() == ARMABIKind::AAPCS16_VFP));
7053 }
7054
7055 Address ARMABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
7056                               QualType Ty) const {
7057   CharUnits SlotSize = CharUnits::fromQuantity(4);
7058
7059   // Empty records are ignored for parameter passing purposes.
7060   if (isEmptyRecord(getContext(), Ty, true)) {
7061     VAListAddr = CGF.Builder.CreateElementBitCast(VAListAddr, CGF.Int8PtrTy);
7062     auto *Load = CGF.Builder.CreateLoad(VAListAddr);
7063     Address Addr = Address(Load, CGF.Int8Ty, SlotSize);
7064     return CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
7065   }
7066
7067   CharUnits TySize = getContext().getTypeSizeInChars(Ty);
7068   CharUnits TyAlignForABI = getContext().getTypeUnadjustedAlignInChars(Ty);
7069
7070   // Use indirect if size of the illegal vector is bigger than 16 bytes.
7071   bool IsIndirect = false;
7072   const Type *Base = nullptr;
7073   uint64_t Members = 0;
7074   if (TySize > CharUnits::fromQuantity(16) && isIllegalVectorType(Ty)) {
7075     IsIndirect = true;
7076
7077   // ARMv7k passes structs bigger than 16 bytes indirectly, in space
7078   // allocated by the caller.
7079   } else if (TySize > CharUnits::fromQuantity(16) &&
7080              getABIKind() == ARMABIKind::AAPCS16_VFP &&
7081              !isHomogeneousAggregate(Ty, Base, Members)) {
7082     IsIndirect = true;
7083
7084   // Otherwise, bound the type's ABI alignment.
7085   // The ABI alignment for 64-bit or 128-bit vectors is 8 for AAPCS and 4 for
7086   // APCS. For AAPCS, the ABI alignment is at least 4-byte and at most 8-byte.
7087   // Our callers should be prepared to handle an under-aligned address.
7088   } else if (getABIKind() == ARMABIKind::AAPCS_VFP ||
7089              getABIKind() == ARMABIKind::AAPCS) {
7090     TyAlignForABI = std::max(TyAlignForABI, CharUnits::fromQuantity(4));
7091     TyAlignForABI = std::min(TyAlignForABI, CharUnits::fromQuantity(8));
7092   } else if (getABIKind() == ARMABIKind::AAPCS16_VFP) {
7093     // ARMv7k allows type alignment up to 16 bytes.
7094     TyAlignForABI = std::max(TyAlignForABI, CharUnits::fromQuantity(4));
7095     TyAlignForABI = std::min(TyAlignForABI, CharUnits::fromQuantity(16));
7096   } else {
7097     TyAlignForABI = CharUnits::fromQuantity(4);
7098   }
7099
7100   TypeInfoChars TyInfo(TySize, TyAlignForABI, AlignRequirementKind::None);
7101   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TyInfo,
7102                           SlotSize, /*AllowHigherAlign*/ true);
7103 }
7104
7105 //===----------------------------------------------------------------------===//
7106 // NVPTX ABI Implementation
7107 //===----------------------------------------------------------------------===//
7108
7109 namespace {
7110
7111 class NVPTXTargetCodeGenInfo;
7112
7113 class NVPTXABIInfo : public ABIInfo {
7114   NVPTXTargetCodeGenInfo &CGInfo;
7115
7116 public:
7117   NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
7118       : ABIInfo(CGT), CGInfo(Info) {}
7119
7120   ABIArgInfo classifyReturnType(QualType RetTy) const;
7121   ABIArgInfo classifyArgumentType(QualType Ty) const;
7122
7123   void computeInfo(CGFunctionInfo &FI) const override;
7124   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
7125                     QualType Ty) const override;
7126   bool isUnsupportedType(QualType T) const;
7127   ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const;
7128 };
7129
7130 class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
7131 public:
7132   NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
7133       : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {}
7134
7135   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
7136                            CodeGen::CodeGenModule &M) const override;
7137   bool shouldEmitStaticExternCAliases() const override;
7138
7139   llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override {
7140     // On the device side, surface reference is represented as an object handle
7141     // in 64-bit integer.
7142     return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
7143   }
7144
7145   llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override {
7146     // On the device side, texture reference is represented as an object handle
7147     // in 64-bit integer.
7148     return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
7149   }
7150
7151   bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst,
7152                                               LValue Src) const override {
7153     emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
7154     return true;
7155   }
7156
7157   bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst,
7158                                               LValue Src) const override {
7159     emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
7160     return true;
7161   }
7162
7163 private:
7164   // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
7165   // resulting MDNode to the nvvm.annotations MDNode.
7166   static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
7167                               int Operand);
7168
7169   static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
7170                                            LValue Src) {
7171     llvm::Value *Handle = nullptr;
7172     llvm::Constant *C =
7173         llvm::dyn_cast<llvm::Constant>(Src.getAddress(CGF).getPointer());
7174     // Lookup `addrspacecast` through the constant pointer if any.
7175     if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C))
7176       C = llvm::cast<llvm::Constant>(ASC->getPointerOperand());
7177     if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) {
7178       // Load the handle from the specific global variable using
7179       // `nvvm.texsurf.handle.internal` intrinsic.
7180       Handle = CGF.EmitRuntimeCall(
7181           CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal,
7182                                {GV->getType()}),
7183           {GV}, "texsurf_handle");
7184     } else
7185       Handle = CGF.EmitLoadOfScalar(Src, SourceLocation());
7186     CGF.EmitStoreOfScalar(Handle, Dst);
7187   }
7188 };
7189
7190 /// Checks if the type is unsupported directly by the current target.
7191 bool NVPTXABIInfo::isUnsupportedType(QualType T) const {
7192   ASTContext &Context = getContext();
7193   if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type())
7194     return true;
7195   if (!Context.getTargetInfo().hasFloat128Type() &&
7196       (T->isFloat128Type() ||
7197        (T->isRealFloatingType() && Context.getTypeSize(T) == 128)))
7198     return true;
7199   if (const auto *EIT = T->getAs<BitIntType>())
7200     return EIT->getNumBits() >
7201            (Context.getTargetInfo().hasInt128Type() ? 128U : 64U);
7202   if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() &&
7203       Context.getTypeSize(T) > 64U)
7204     return true;
7205   if (const auto *AT = T->getAsArrayTypeUnsafe())
7206     return isUnsupportedType(AT->getElementType());
7207   const auto *RT = T->getAs<RecordType>();
7208   if (!RT)
7209     return false;
7210   const RecordDecl *RD = RT->getDecl();
7211
7212   // If this is a C++ record, check the bases first.
7213   if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
7214     for (const CXXBaseSpecifier &I : CXXRD->bases())
7215       if (isUnsupportedType(I.getType()))
7216         return true;
7217
7218   for (const FieldDecl *I : RD->fields())
7219     if (isUnsupportedType(I->getType()))
7220       return true;
7221   return false;
7222 }
7223
7224 /// Coerce the given type into an array with maximum allowed size of elements.
7225 ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty,
7226                                                    unsigned MaxSize) const {
7227   // Alignment and Size are measured in bits.
7228   const uint64_t Size = getContext().getTypeSize(Ty);
7229   const uint64_t Alignment = getContext().getTypeAlign(Ty);
7230   const unsigned Div = std::min<unsigned>(MaxSize, Alignment);
7231   llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div);
7232   const uint64_t NumElements = (Size + Div - 1) / Div;
7233   return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements));
7234 }
7235
7236 ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
7237   if (RetTy->isVoidType())
7238     return ABIArgInfo::getIgnore();
7239
7240   if (getContext().getLangOpts().OpenMP &&
7241       getContext().getLangOpts().OpenMPIsDevice && isUnsupportedType(RetTy))
7242     return coerceToIntArrayWithLimit(RetTy, 64);
7243
7244   // note: this is different from default ABI
7245   if (!RetTy->isScalarType())
7246     return ABIArgInfo::getDirect();
7247
7248   // Treat an enum type as its underlying type.
7249   if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
7250     RetTy = EnumTy->getDecl()->getIntegerType();
7251
7252   return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
7253                                                : ABIArgInfo::getDirect());
7254 }
7255
7256 ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
7257   // Treat an enum type as its underlying type.
7258   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
7259     Ty = EnumTy->getDecl()->getIntegerType();
7260
7261   // Return aggregates type as indirect by value
7262   if (isAggregateTypeForABI(Ty)) {
7263     // Under CUDA device compilation, tex/surf builtin types are replaced with
7264     // object types and passed directly.
7265     if (getContext().getLangOpts().CUDAIsDevice) {
7266       if (Ty->isCUDADeviceBuiltinSurfaceType())
7267         return ABIArgInfo::getDirect(
7268             CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
7269       if (Ty->isCUDADeviceBuiltinTextureType())
7270         return ABIArgInfo::getDirect(
7271             CGInfo.getCUDADeviceBuiltinTextureDeviceType());
7272     }
7273     return getNaturalAlignIndirect(Ty, /* byval */ true);
7274   }
7275
7276   if (const auto *EIT = Ty->getAs<BitIntType>()) {
7277     if ((EIT->getNumBits() > 128) ||
7278         (!getContext().getTargetInfo().hasInt128Type() &&
7279          EIT->getNumBits() > 64))
7280       return getNaturalAlignIndirect(Ty, /* byval */ true);
7281   }
7282
7283   return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
7284                                             : ABIArgInfo::getDirect());
7285 }
7286
7287 void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
7288   if (!getCXXABI().classifyReturnType(FI))
7289     FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
7290   for (auto &I : FI.arguments())
7291     I.info = classifyArgumentType(I.type);
7292
7293   // Always honor user-specified calling convention.
7294   if (FI.getCallingConvention() != llvm::CallingConv::C)
7295     return;
7296
7297   FI.setEffectiveCallingConvention(getRuntimeCC());
7298 }
7299
7300 Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
7301                                 QualType Ty) const {
7302   llvm_unreachable("NVPTX does not support varargs");
7303 }
7304
7305 void NVPTXTargetCodeGenInfo::setTargetAttributes(
7306     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
7307   if (GV->isDeclaration())
7308     return;
7309   const VarDecl *VD = dyn_cast_or_null<VarDecl>(D);
7310   if (VD) {
7311     if (M.getLangOpts().CUDA) {
7312       if (VD->getType()->isCUDADeviceBuiltinSurfaceType())
7313         addNVVMMetadata(GV, "surface", 1);
7314       else if (VD->getType()->isCUDADeviceBuiltinTextureType())
7315         addNVVMMetadata(GV, "texture", 1);
7316       return;
7317     }
7318   }
7319
7320   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
7321   if (!FD) return;
7322
7323   llvm::Function *F = cast<llvm::Function>(GV);
7324
7325   // Perform special handling in OpenCL mode
7326   if (M.getLangOpts().OpenCL) {
7327     // Use OpenCL function attributes to check for kernel functions
7328     // By default, all functions are device functions
7329     if (FD->hasAttr<OpenCLKernelAttr>()) {
7330       // OpenCL __kernel functions get kernel metadata
7331       // Create !{<func-ref>, metadata !"kernel", i32 1} node
7332       addNVVMMetadata(F, "kernel", 1);
7333       // And kernel functions are not subject to inlining
7334       F->addFnAttr(llvm::Attribute::NoInline);
7335     }
7336   }
7337
7338   // Perform special handling in CUDA mode.
7339   if (M.getLangOpts().CUDA) {
7340     // CUDA __global__ functions get a kernel metadata entry.  Since
7341     // __global__ functions cannot be called from the device, we do not
7342     // need to set the noinline attribute.
7343     if (FD->hasAttr<CUDAGlobalAttr>()) {
7344       // Create !{<func-ref>, metadata !"kernel", i32 1} node
7345       addNVVMMetadata(F, "kernel", 1);
7346     }
7347     if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) {
7348       // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
7349       llvm::APSInt MaxThreads(32);
7350       MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(M.getContext());
7351       if (MaxThreads > 0)
7352         addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue());
7353
7354       // min blocks is an optional argument for CUDALaunchBoundsAttr. If it was
7355       // not specified in __launch_bounds__ or if the user specified a 0 value,
7356       // we don't have to add a PTX directive.
7357       if (Attr->getMinBlocks()) {
7358         llvm::APSInt MinBlocks(32);
7359         MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(M.getContext());
7360         if (MinBlocks > 0)
7361           // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
7362           addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue());
7363       }
7364     }
7365   }
7366
7367   // Attach kernel metadata directly if compiling for NVPTX.
7368   if (FD->hasAttr<NVPTXKernelAttr>()) {
7369     addNVVMMetadata(F, "kernel", 1);
7370   }
7371 }
7372
7373 void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
7374                                              StringRef Name, int Operand) {
7375   llvm::Module *M = GV->getParent();
7376   llvm::LLVMContext &Ctx = M->getContext();
7377
7378   // Get "nvvm.annotations" metadata node
7379   llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
7380
7381   llvm::Metadata *MDVals[] = {
7382       llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
7383       llvm::ConstantAsMetadata::get(
7384           llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
7385   // Append metadata to nvvm.annotations
7386   MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
7387 }
7388
7389 bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
7390   return false;
7391 }
7392 }
7393
7394 //===----------------------------------------------------------------------===//
7395 // SystemZ ABI Implementation
7396 //===----------------------------------------------------------------------===//
7397
7398 namespace {
7399
7400 class SystemZABIInfo : public ABIInfo {
7401   bool HasVector;
7402   bool IsSoftFloatABI;
7403
7404 public:
7405   SystemZABIInfo(CodeGenTypes &CGT, bool HV, bool SF)
7406       : ABIInfo(CGT), HasVector(HV), IsSoftFloatABI(SF) {}
7407
7408   bool isPromotableIntegerTypeForABI(QualType Ty) const;
7409   bool isCompoundType(QualType Ty) const;
7410   bool isVectorArgumentType(QualType Ty) const;
7411   bool isFPArgumentType(QualType Ty) const;
7412   QualType GetSingleElementType(QualType Ty) const;
7413
7414   ABIArgInfo classifyReturnType(QualType RetTy) const;
7415   ABIArgInfo classifyArgumentType(QualType ArgTy) const;
7416
7417   void computeInfo(CGFunctionInfo &FI) const override;
7418   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
7419                     QualType Ty) const override;
7420 };
7421
7422 class SystemZTargetCodeGenInfo : public TargetCodeGenInfo {
7423   ASTContext &Ctx;
7424
7425   // These are used for speeding up the search for a visible vector ABI.
7426   mutable bool HasVisibleVecABIFlag = false;
7427   mutable std::set<const Type *> SeenTypes;
7428
7429   // Returns true (the first time) if Ty is, or is found to include, a vector
7430   // type that exposes the vector ABI. This is any vector >=16 bytes which
7431   // with vector support are aligned to only 8 bytes. When IsParam is true,
7432   // the type belongs to a value as passed between functions. If it is a
7433   // vector <=16 bytes it will be passed in a vector register (if supported).
7434   bool isVectorTypeBased(const Type *Ty, bool IsParam) const;
7435
7436 public:
7437   SystemZTargetCodeGenInfo(CodeGenTypes &CGT, bool HasVector, bool SoftFloatABI)
7438       : TargetCodeGenInfo(
7439             std::make_unique<SystemZABIInfo>(CGT, HasVector, SoftFloatABI)),
7440             Ctx(CGT.getContext()) {
7441     SwiftInfo =
7442         std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/false);
7443   }
7444
7445   // The vector ABI is different when the vector facility is present and when
7446   // a module e.g. defines an externally visible vector variable, a flag
7447   // indicating a visible vector ABI is added. Eventually this will result in
7448   // a GNU attribute indicating the vector ABI of the module.  Ty is the type
7449   // of a variable or function parameter that is globally visible.
7450   void handleExternallyVisibleObjABI(const Type *Ty, CodeGen::CodeGenModule &M,
7451                                      bool IsParam) const {
7452     if (!HasVisibleVecABIFlag && isVectorTypeBased(Ty, IsParam)) {
7453       M.getModule().addModuleFlag(llvm::Module::Warning,
7454                                   "s390x-visible-vector-ABI", 1);
7455       HasVisibleVecABIFlag = true;
7456     }
7457   }
7458
7459   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
7460                            CodeGen::CodeGenModule &M) const override {
7461     if (!D)
7462       return;
7463
7464     // Check if the vector ABI becomes visible by an externally visible
7465     // variable or function.
7466     if (const auto *VD = dyn_cast<VarDecl>(D)) {
7467       if (VD->isExternallyVisible())
7468         handleExternallyVisibleObjABI(VD->getType().getTypePtr(), M,
7469                                       /*IsParam*/false);
7470     }
7471     else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
7472       if (FD->isExternallyVisible())
7473         handleExternallyVisibleObjABI(FD->getType().getTypePtr(), M,
7474                                       /*IsParam*/false);
7475     }
7476   }
7477
7478   llvm::Value *testFPKind(llvm::Value *V, unsigned BuiltinID,
7479                           CGBuilderTy &Builder,
7480                           CodeGenModule &CGM) const override {
7481     assert(V->getType()->isFloatingPointTy() && "V should have an FP type.");
7482     // Only use TDC in constrained FP mode.
7483     if (!Builder.getIsFPConstrained())
7484       return nullptr;
7485
7486     llvm::Type *Ty = V->getType();
7487     if (Ty->isFloatTy() || Ty->isDoubleTy() || Ty->isFP128Ty()) {
7488       llvm::Module &M = CGM.getModule();
7489       auto &Ctx = M.getContext();
7490       llvm::Function *TDCFunc =
7491           llvm::Intrinsic::getDeclaration(&M, llvm::Intrinsic::s390_tdc, Ty);
7492       unsigned TDCBits = 0;
7493       switch (BuiltinID) {
7494       case Builtin::BI__builtin_isnan:
7495         TDCBits = 0xf;
7496         break;
7497       case Builtin::BIfinite:
7498       case Builtin::BI__finite:
7499       case Builtin::BIfinitef:
7500       case Builtin::BI__finitef:
7501       case Builtin::BIfinitel:
7502       case Builtin::BI__finitel:
7503       case Builtin::BI__builtin_isfinite:
7504         TDCBits = 0xfc0;
7505         break;
7506       case Builtin::BI__builtin_isinf:
7507         TDCBits = 0x30;
7508         break;
7509       default:
7510         break;
7511       }
7512       if (TDCBits)
7513         return Builder.CreateCall(
7514             TDCFunc,
7515             {V, llvm::ConstantInt::get(llvm::Type::getInt64Ty(Ctx), TDCBits)});
7516     }
7517     return nullptr;
7518   }
7519 };
7520 }
7521
7522 bool SystemZABIInfo::isPromotableIntegerTypeForABI(QualType Ty) const {
7523   // Treat an enum type as its underlying type.
7524   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
7525     Ty = EnumTy->getDecl()->getIntegerType();
7526
7527   // Promotable integer types are required to be promoted by the ABI.
7528   if (ABIInfo::isPromotableIntegerTypeForABI(Ty))
7529     return true;
7530
7531   if (const auto *EIT = Ty->getAs<BitIntType>())
7532     if (EIT->getNumBits() < 64)
7533       return true;
7534
7535   // 32-bit values must also be promoted.
7536   if (const BuiltinType *BT = Ty->getAs<BuiltinType>())
7537     switch (BT->getKind()) {
7538     case BuiltinType::Int:
7539     case BuiltinType::UInt:
7540       return true;
7541     default:
7542       return false;
7543     }
7544   return false;
7545 }
7546
7547 bool SystemZABIInfo::isCompoundType(QualType Ty) const {
7548   return (Ty->isAnyComplexType() ||
7549           Ty->isVectorType() ||
7550           isAggregateTypeForABI(Ty));
7551 }
7552
7553 bool SystemZABIInfo::isVectorArgumentType(QualType Ty) const {
7554   return (HasVector &&
7555           Ty->isVectorType() &&
7556           getContext().getTypeSize(Ty) <= 128);
7557 }
7558
7559 bool SystemZABIInfo::isFPArgumentType(QualType Ty) const {
7560   if (IsSoftFloatABI)
7561     return false;
7562
7563   if (const BuiltinType *BT = Ty->getAs<BuiltinType>())
7564     switch (BT->getKind()) {
7565     case BuiltinType::Float:
7566     case BuiltinType::Double:
7567       return true;
7568     default:
7569       return false;
7570     }
7571
7572   return false;
7573 }
7574
7575 QualType SystemZABIInfo::GetSingleElementType(QualType Ty) const {
7576   const RecordType *RT = Ty->getAs<RecordType>();
7577
7578   if (RT && RT->isStructureOrClassType()) {
7579     const RecordDecl *RD = RT->getDecl();
7580     QualType Found;
7581
7582     // If this is a C++ record, check the bases first.
7583     if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
7584       if (CXXRD->hasDefinition())
7585         for (const auto &I : CXXRD->bases()) {
7586           QualType Base = I.getType();
7587
7588           // Empty bases don't affect things either way.
7589           if (isEmptyRecord(getContext(), Base, true))
7590             continue;
7591
7592           if (!Found.isNull())
7593             return Ty;
7594           Found = GetSingleElementType(Base);
7595         }
7596
7597     // Check the fields.
7598     for (const auto *FD : RD->fields()) {
7599       // Unlike isSingleElementStruct(), empty structure and array fields
7600       // do count.  So do anonymous bitfields that aren't zero-sized.
7601
7602       // Like isSingleElementStruct(), ignore C++20 empty data members.
7603       if (FD->hasAttr<NoUniqueAddressAttr>() &&
7604           isEmptyRecord(getContext(), FD->getType(), true))
7605         continue;
7606
7607       // Unlike isSingleElementStruct(), arrays do not count.
7608       // Nested structures still do though.
7609       if (!Found.isNull())
7610         return Ty;
7611       Found = GetSingleElementType(FD->getType());
7612     }
7613
7614     // Unlike isSingleElementStruct(), trailing padding is allowed.
7615     // An 8-byte aligned struct s { float f; } is passed as a double.
7616     if (!Found.isNull())
7617       return Found;
7618   }
7619
7620   return Ty;
7621 }
7622
7623 Address SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
7624                                   QualType Ty) const {
7625   // Assume that va_list type is correct; should be pointer to LLVM type:
7626   // struct {
7627   //   i64 __gpr;
7628   //   i64 __fpr;
7629   //   i8 *__overflow_arg_area;
7630   //   i8 *__reg_save_area;
7631   // };
7632
7633   // Every non-vector argument occupies 8 bytes and is passed by preference
7634   // in either GPRs or FPRs.  Vector arguments occupy 8 or 16 bytes and are
7635   // always passed on the stack.
7636   const SystemZTargetCodeGenInfo &SZCGI =
7637       static_cast<const SystemZTargetCodeGenInfo &>(
7638           CGT.getCGM().getTargetCodeGenInfo());
7639   Ty = getContext().getCanonicalType(Ty);
7640   auto TyInfo = getContext().getTypeInfoInChars(Ty);
7641   llvm::Type *ArgTy = CGF.ConvertTypeForMem(Ty);
7642   llvm::Type *DirectTy = ArgTy;
7643   ABIArgInfo AI = classifyArgumentType(Ty);
7644   bool IsIndirect = AI.isIndirect();
7645   bool InFPRs = false;
7646   bool IsVector = false;
7647   CharUnits UnpaddedSize;
7648   CharUnits DirectAlign;
7649   SZCGI.handleExternallyVisibleObjABI(Ty.getTypePtr(), CGT.getCGM(),
7650                                       /*IsParam*/true);
7651   if (IsIndirect) {
7652     DirectTy = llvm::PointerType::getUnqual(DirectTy);
7653     UnpaddedSize = DirectAlign = CharUnits::fromQuantity(8);
7654   } else {
7655     if (AI.getCoerceToType())
7656       ArgTy = AI.getCoerceToType();
7657     InFPRs = (!IsSoftFloatABI && (ArgTy->isFloatTy() || ArgTy->isDoubleTy()));
7658     IsVector = ArgTy->isVectorTy();
7659     UnpaddedSize = TyInfo.Width;
7660     DirectAlign = TyInfo.Align;
7661   }
7662   CharUnits PaddedSize = CharUnits::fromQuantity(8);
7663   if (IsVector && UnpaddedSize > PaddedSize)
7664     PaddedSize = CharUnits::fromQuantity(16);
7665   assert((UnpaddedSize <= PaddedSize) && "Invalid argument size.");
7666
7667   CharUnits Padding = (PaddedSize - UnpaddedSize);
7668
7669   llvm::Type *IndexTy = CGF.Int64Ty;
7670   llvm::Value *PaddedSizeV =
7671     llvm::ConstantInt::get(IndexTy, PaddedSize.getQuantity());
7672
7673   if (IsVector) {
7674     // Work out the address of a vector argument on the stack.
7675     // Vector arguments are always passed in the high bits of a
7676     // single (8 byte) or double (16 byte) stack slot.
7677     Address OverflowArgAreaPtr =
7678         CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_ptr");
7679     Address OverflowArgArea =
7680         Address(CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area"),
7681                 CGF.Int8Ty, TyInfo.Align);
7682     Address MemAddr =
7683         CGF.Builder.CreateElementBitCast(OverflowArgArea, DirectTy, "mem_addr");
7684
7685     // Update overflow_arg_area_ptr pointer
7686     llvm::Value *NewOverflowArgArea = CGF.Builder.CreateGEP(
7687         OverflowArgArea.getElementType(), OverflowArgArea.getPointer(),
7688         PaddedSizeV, "overflow_arg_area");
7689     CGF.Builder.CreateStore(NewOverflowArgArea, OverflowArgAreaPtr);
7690
7691     return MemAddr;
7692   }
7693
7694   assert(PaddedSize.getQuantity() == 8);
7695
7696   unsigned MaxRegs, RegCountField, RegSaveIndex;
7697   CharUnits RegPadding;
7698   if (InFPRs) {
7699     MaxRegs = 4; // Maximum of 4 FPR arguments
7700     RegCountField = 1; // __fpr
7701     RegSaveIndex = 16; // save offset for f0
7702     RegPadding = CharUnits(); // floats are passed in the high bits of an FPR
7703   } else {
7704     MaxRegs = 5; // Maximum of 5 GPR arguments
7705     RegCountField = 0; // __gpr
7706     RegSaveIndex = 2; // save offset for r2
7707     RegPadding = Padding; // values are passed in the low bits of a GPR
7708   }
7709
7710   Address RegCountPtr =
7711       CGF.Builder.CreateStructGEP(VAListAddr, RegCountField, "reg_count_ptr");
7712   llvm::Value *RegCount = CGF.Builder.CreateLoad(RegCountPtr, "reg_count");
7713   llvm::Value *MaxRegsV = llvm::ConstantInt::get(IndexTy, MaxRegs);
7714   llvm::Value *InRegs = CGF.Builder.CreateICmpULT(RegCount, MaxRegsV,
7715                                                  "fits_in_regs");
7716
7717   llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
7718   llvm::BasicBlock *InMemBlock = CGF.createBasicBlock("vaarg.in_mem");
7719   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
7720   CGF.Builder.CreateCondBr(InRegs, InRegBlock, InMemBlock);
7721
7722   // Emit code to load the value if it was passed in registers.
7723   CGF.EmitBlock(InRegBlock);
7724
7725   // Work out the address of an argument register.
7726   llvm::Value *ScaledRegCount =
7727     CGF.Builder.CreateMul(RegCount, PaddedSizeV, "scaled_reg_count");
7728   llvm::Value *RegBase =
7729     llvm::ConstantInt::get(IndexTy, RegSaveIndex * PaddedSize.getQuantity()
7730                                       + RegPadding.getQuantity());
7731   llvm::Value *RegOffset =
7732     CGF.Builder.CreateAdd(ScaledRegCount, RegBase, "reg_offset");
7733   Address RegSaveAreaPtr =
7734       CGF.Builder.CreateStructGEP(VAListAddr, 3, "reg_save_area_ptr");
7735   llvm::Value *RegSaveArea =
7736       CGF.Builder.CreateLoad(RegSaveAreaPtr, "reg_save_area");
7737   Address RawRegAddr(
7738       CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, RegOffset, "raw_reg_addr"),
7739       CGF.Int8Ty, PaddedSize);
7740   Address RegAddr =
7741       CGF.Builder.CreateElementBitCast(RawRegAddr, DirectTy, "reg_addr");
7742
7743   // Update the register count
7744   llvm::Value *One = llvm::ConstantInt::get(IndexTy, 1);
7745   llvm::Value *NewRegCount =
7746     CGF.Builder.CreateAdd(RegCount, One, "reg_count");
7747   CGF.Builder.CreateStore(NewRegCount, RegCountPtr);
7748   CGF.EmitBranch(ContBlock);
7749
7750   // Emit code to load the value if it was passed in memory.
7751   CGF.EmitBlock(InMemBlock);
7752
7753   // Work out the address of a stack argument.
7754   Address OverflowArgAreaPtr =
7755       CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_ptr");
7756   Address OverflowArgArea =
7757       Address(CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area"),
7758               CGF.Int8Ty, PaddedSize);
7759   Address RawMemAddr =
7760       CGF.Builder.CreateConstByteGEP(OverflowArgArea, Padding, "raw_mem_addr");
7761   Address MemAddr =
7762     CGF.Builder.CreateElementBitCast(RawMemAddr, DirectTy, "mem_addr");
7763
7764   // Update overflow_arg_area_ptr pointer
7765   llvm::Value *NewOverflowArgArea =
7766     CGF.Builder.CreateGEP(OverflowArgArea.getElementType(),
7767                           OverflowArgArea.getPointer(), PaddedSizeV,
7768                           "overflow_arg_area");
7769   CGF.Builder.CreateStore(NewOverflowArgArea, OverflowArgAreaPtr);
7770   CGF.EmitBranch(ContBlock);
7771
7772   // Return the appropriate result.
7773   CGF.EmitBlock(ContBlock);
7774   Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, MemAddr, InMemBlock,
7775                                  "va_arg.addr");
7776
7777   if (IsIndirect)
7778     ResAddr = Address(CGF.Builder.CreateLoad(ResAddr, "indirect_arg"), ArgTy,
7779                       TyInfo.Align);
7780
7781   return ResAddr;
7782 }
7783
7784 ABIArgInfo SystemZABIInfo::classifyReturnType(QualType RetTy) const {
7785   if (RetTy->isVoidType())
7786     return ABIArgInfo::getIgnore();
7787   if (isVectorArgumentType(RetTy))
7788     return ABIArgInfo::getDirect();
7789   if (isCompoundType(RetTy) || getContext().getTypeSize(RetTy) > 64)
7790     return getNaturalAlignIndirect(RetTy);
7791   return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
7792                                                : ABIArgInfo::getDirect());
7793 }
7794
7795 ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const {
7796   // Handle the generic C++ ABI.
7797   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
7798     return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
7799
7800   // Integers and enums are extended to full register width.
7801   if (isPromotableIntegerTypeForABI(Ty))
7802     return ABIArgInfo::getExtend(Ty);
7803
7804   // Handle vector types and vector-like structure types.  Note that
7805   // as opposed to float-like structure types, we do not allow any
7806   // padding for vector-like structures, so verify the sizes match.
7807   uint64_t Size = getContext().getTypeSize(Ty);
7808   QualType SingleElementTy = GetSingleElementType(Ty);
7809   if (isVectorArgumentType(SingleElementTy) &&
7810       getContext().getTypeSize(SingleElementTy) == Size)
7811     return ABIArgInfo::getDirect(CGT.ConvertType(SingleElementTy));
7812
7813   // Values that are not 1, 2, 4 or 8 bytes in size are passed indirectly.
7814   if (Size != 8 && Size != 16 && Size != 32 && Size != 64)
7815     return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
7816
7817   // Handle small structures.
7818   if (const RecordType *RT = Ty->getAs<RecordType>()) {
7819     // Structures with flexible arrays have variable length, so really
7820     // fail the size test above.
7821     const RecordDecl *RD = RT->getDecl();
7822     if (RD->hasFlexibleArrayMember())
7823       return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
7824
7825     // The structure is passed as an unextended integer, a float, or a double.
7826     llvm::Type *PassTy;
7827     if (isFPArgumentType(SingleElementTy)) {
7828       assert(Size == 32 || Size == 64);
7829       if (Size == 32)
7830         PassTy = llvm::Type::getFloatTy(getVMContext());
7831       else
7832         PassTy = llvm::Type::getDoubleTy(getVMContext());
7833     } else
7834       PassTy = llvm::IntegerType::get(getVMContext(), Size);
7835     return ABIArgInfo::getDirect(PassTy);
7836   }
7837
7838   // Non-structure compounds are passed indirectly.
7839   if (isCompoundType(Ty))
7840     return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
7841
7842   return ABIArgInfo::getDirect(nullptr);
7843 }
7844
7845 void SystemZABIInfo::computeInfo(CGFunctionInfo &FI) const {
7846   const SystemZTargetCodeGenInfo &SZCGI =
7847       static_cast<const SystemZTargetCodeGenInfo &>(
7848           CGT.getCGM().getTargetCodeGenInfo());
7849   if (!getCXXABI().classifyReturnType(FI))
7850     FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
7851   unsigned Idx = 0;
7852   for (auto &I : FI.arguments()) {
7853     I.info = classifyArgumentType(I.type);
7854     if (FI.isVariadic() && Idx++ >= FI.getNumRequiredArgs())
7855       // Check if a vararg vector argument is passed, in which case the
7856       // vector ABI becomes visible as the va_list could be passed on to
7857       // other functions.
7858       SZCGI.handleExternallyVisibleObjABI(I.type.getTypePtr(), CGT.getCGM(),
7859                                           /*IsParam*/true);
7860   }
7861 }
7862
7863 bool SystemZTargetCodeGenInfo::isVectorTypeBased(const Type *Ty,
7864                                                  bool IsParam) const {
7865   if (!SeenTypes.insert(Ty).second)
7866     return false;
7867
7868   if (IsParam) {
7869     // A narrow (<16 bytes) vector will as a parameter also expose the ABI as
7870     // it will be passed in a vector register. A wide (>16 bytes) vector will
7871     // be passed via "hidden" pointer where any extra alignment is not
7872     // required (per GCC).
7873     const Type *SingleEltTy = getABIInfo<SystemZABIInfo>()
7874                                   .GetSingleElementType(QualType(Ty, 0))
7875                                   .getTypePtr();
7876     bool SingleVecEltStruct = SingleEltTy != Ty && SingleEltTy->isVectorType() &&
7877       Ctx.getTypeSize(SingleEltTy) == Ctx.getTypeSize(Ty);
7878     if (Ty->isVectorType() || SingleVecEltStruct)
7879       return Ctx.getTypeSize(Ty) / 8 <= 16;
7880   }
7881
7882   // Assume pointers are dereferenced.
7883   while (Ty->isPointerType() || Ty->isArrayType())
7884     Ty = Ty->getPointeeOrArrayElementType();
7885
7886   // Vectors >= 16 bytes expose the ABI through alignment requirements.
7887   if (Ty->isVectorType() && Ctx.getTypeSize(Ty) / 8 >= 16)
7888       return true;
7889
7890   if (const auto *RecordTy = Ty->getAs<RecordType>()) {
7891     const RecordDecl *RD = RecordTy->getDecl();
7892     if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
7893       if (CXXRD->hasDefinition())
7894         for (const auto &I : CXXRD->bases())
7895           if (isVectorTypeBased(I.getType().getTypePtr(), /*IsParam*/false))
7896             return true;
7897     for (const auto *FD : RD->fields())
7898       if (isVectorTypeBased(FD->getType().getTypePtr(), /*IsParam*/false))
7899         return true;
7900   }
7901
7902   if (const auto *FT = Ty->getAs<FunctionType>())
7903     if (isVectorTypeBased(FT->getReturnType().getTypePtr(), /*IsParam*/true))
7904       return true;
7905   if (const FunctionProtoType *Proto = Ty->getAs<FunctionProtoType>())
7906     for (const auto &ParamType : Proto->getParamTypes())
7907       if (isVectorTypeBased(ParamType.getTypePtr(), /*IsParam*/true))
7908         return true;
7909
7910   return false;
7911 }
7912
7913 //===----------------------------------------------------------------------===//
7914 // MSP430 ABI Implementation
7915 //===----------------------------------------------------------------------===//
7916
7917 namespace {
7918
7919 class MSP430ABIInfo : public DefaultABIInfo {
7920   static ABIArgInfo complexArgInfo() {
7921     ABIArgInfo Info = ABIArgInfo::getDirect();
7922     Info.setCanBeFlattened(false);
7923     return Info;
7924   }
7925
7926 public:
7927   MSP430ABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
7928
7929   ABIArgInfo classifyReturnType(QualType RetTy) const {
7930     if (RetTy->isAnyComplexType())
7931       return complexArgInfo();
7932
7933     return DefaultABIInfo::classifyReturnType(RetTy);
7934   }
7935
7936   ABIArgInfo classifyArgumentType(QualType RetTy) const {
7937     if (RetTy->isAnyComplexType())
7938       return complexArgInfo();
7939
7940     return DefaultABIInfo::classifyArgumentType(RetTy);
7941   }
7942
7943   // Just copy the original implementations because
7944   // DefaultABIInfo::classify{Return,Argument}Type() are not virtual
7945   void computeInfo(CGFunctionInfo &FI) const override {
7946     if (!getCXXABI().classifyReturnType(FI))
7947       FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
7948     for (auto &I : FI.arguments())
7949       I.info = classifyArgumentType(I.type);
7950   }
7951
7952   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
7953                     QualType Ty) const override {
7954     return EmitVAArgInstr(CGF, VAListAddr, Ty, classifyArgumentType(Ty));
7955   }
7956 };
7957
7958 class MSP430TargetCodeGenInfo : public TargetCodeGenInfo {
7959 public:
7960   MSP430TargetCodeGenInfo(CodeGenTypes &CGT)
7961       : TargetCodeGenInfo(std::make_unique<MSP430ABIInfo>(CGT)) {}
7962   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
7963                            CodeGen::CodeGenModule &M) const override;
7964 };
7965
7966 }
7967
7968 void MSP430TargetCodeGenInfo::setTargetAttributes(
7969     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
7970   if (GV->isDeclaration())
7971     return;
7972   if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
7973     const auto *InterruptAttr = FD->getAttr<MSP430InterruptAttr>();
7974     if (!InterruptAttr)
7975       return;
7976
7977     // Handle 'interrupt' attribute:
7978     llvm::Function *F = cast<llvm::Function>(GV);
7979
7980     // Step 1: Set ISR calling convention.
7981     F->setCallingConv(llvm::CallingConv::MSP430_INTR);
7982
7983     // Step 2: Add attributes goodness.
7984     F->addFnAttr(llvm::Attribute::NoInline);
7985     F->addFnAttr("interrupt", llvm::utostr(InterruptAttr->getNumber()));
7986   }
7987 }
7988
7989 //===----------------------------------------------------------------------===//
7990 // MIPS ABI Implementation.  This works for both little-endian and
7991 // big-endian variants.
7992 //===----------------------------------------------------------------------===//
7993
7994 namespace {
7995 class MipsABIInfo : public ABIInfo {
7996   bool IsO32;
7997   const unsigned MinABIStackAlignInBytes, StackAlignInBytes;
7998   void CoerceToIntArgs(uint64_t TySize,
7999                        SmallVectorImpl<llvm::Type *> &ArgList) const;
8000   llvm::Type* HandleAggregates(QualType Ty, uint64_t TySize) const;
8001   llvm::Type* returnAggregateInRegs(QualType RetTy, uint64_t Size) const;
8002   llvm::Type* getPaddingType(uint64_t Align, uint64_t Offset) const;
8003 public:
8004   MipsABIInfo(CodeGenTypes &CGT, bool _IsO32) :
8005     ABIInfo(CGT), IsO32(_IsO32), MinABIStackAlignInBytes(IsO32 ? 4 : 8),
8006     StackAlignInBytes(IsO32 ? 8 : 16) {}
8007
8008   ABIArgInfo classifyReturnType(QualType RetTy) const;
8009   ABIArgInfo classifyArgumentType(QualType RetTy, uint64_t &Offset) const;
8010   void computeInfo(CGFunctionInfo &FI) const override;
8011   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
8012                     QualType Ty) const override;
8013   ABIArgInfo extendType(QualType Ty) const;
8014 };
8015
8016 class MIPSTargetCodeGenInfo : public TargetCodeGenInfo {
8017   unsigned SizeOfUnwindException;
8018 public:
8019   MIPSTargetCodeGenInfo(CodeGenTypes &CGT, bool IsO32)
8020       : TargetCodeGenInfo(std::make_unique<MipsABIInfo>(CGT, IsO32)),
8021         SizeOfUnwindException(IsO32 ? 24 : 32) {}
8022
8023   int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
8024     return 29;
8025   }
8026
8027   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
8028                            CodeGen::CodeGenModule &CGM) const override {
8029     const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
8030     if (!FD) return;
8031     llvm::Function *Fn = cast<llvm::Function>(GV);
8032
8033     if (FD->hasAttr<MipsLongCallAttr>())
8034       Fn->addFnAttr("long-call");
8035     else if (FD->hasAttr<MipsShortCallAttr>())
8036       Fn->addFnAttr("short-call");
8037
8038     // Other attributes do not have a meaning for declarations.
8039     if (GV->isDeclaration())
8040       return;
8041
8042     if (FD->hasAttr<Mips16Attr>()) {
8043       Fn->addFnAttr("mips16");
8044     }
8045     else if (FD->hasAttr<NoMips16Attr>()) {
8046       Fn->addFnAttr("nomips16");
8047     }
8048
8049     if (FD->hasAttr<MicroMipsAttr>())
8050       Fn->addFnAttr("micromips");
8051     else if (FD->hasAttr<NoMicroMipsAttr>())
8052       Fn->addFnAttr("nomicromips");
8053
8054     const MipsInterruptAttr *Attr = FD->getAttr<MipsInterruptAttr>();
8055     if (!Attr)
8056       return;
8057
8058     const char *Kind;
8059     switch (Attr->getInterrupt()) {
8060     case MipsInterruptAttr::eic:     Kind = "eic"; break;
8061     case MipsInterruptAttr::sw0:     Kind = "sw0"; break;
8062     case MipsInterruptAttr::sw1:     Kind = "sw1"; break;
8063     case MipsInterruptAttr::hw0:     Kind = "hw0"; break;
8064     case MipsInterruptAttr::hw1:     Kind = "hw1"; break;
8065     case MipsInterruptAttr::hw2:     Kind = "hw2"; break;
8066     case MipsInterruptAttr::hw3:     Kind = "hw3"; break;
8067     case MipsInterruptAttr::hw4:     Kind = "hw4"; break;
8068     case MipsInterruptAttr::hw5:     Kind = "hw5"; break;
8069     }
8070
8071     Fn->addFnAttr("interrupt", Kind);
8072
8073   }
8074
8075   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
8076                                llvm::Value *Address) const override;
8077
8078   unsigned getSizeOfUnwindException() const override {
8079     return SizeOfUnwindException;
8080   }
8081 };
8082 }
8083
8084 void MipsABIInfo::CoerceToIntArgs(
8085     uint64_t TySize, SmallVectorImpl<llvm::Type *> &ArgList) const {
8086   llvm::IntegerType *IntTy =
8087     llvm::IntegerType::get(getVMContext(), MinABIStackAlignInBytes * 8);
8088
8089   // Add (TySize / MinABIStackAlignInBytes) args of IntTy.
8090   for (unsigned N = TySize / (MinABIStackAlignInBytes * 8); N; --N)
8091     ArgList.push_back(IntTy);
8092
8093   // If necessary, add one more integer type to ArgList.
8094   unsigned R = TySize % (MinABIStackAlignInBytes * 8);
8095
8096   if (R)
8097     ArgList.push_back(llvm::IntegerType::get(getVMContext(), R));
8098 }
8099
8100 // In N32/64, an aligned double precision floating point field is passed in
8101 // a register.
8102 llvm::Type* MipsABIInfo::HandleAggregates(QualType Ty, uint64_t TySize) const {
8103   SmallVector<llvm::Type*, 8> ArgList, IntArgList;
8104
8105   if (IsO32) {
8106     CoerceToIntArgs(TySize, ArgList);
8107     return llvm::StructType::get(getVMContext(), ArgList);
8108   }
8109
8110   if (Ty->isComplexType())
8111     return CGT.ConvertType(Ty);
8112
8113   const RecordType *RT = Ty->getAs<RecordType>();
8114
8115   // Unions/vectors are passed in integer registers.
8116   if (!RT || !RT->isStructureOrClassType()) {
8117     CoerceToIntArgs(TySize, ArgList);
8118     return llvm::StructType::get(getVMContext(), ArgList);
8119   }
8120
8121   const RecordDecl *RD = RT->getDecl();
8122   const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
8123   assert(!(TySize % 8) && "Size of structure must be multiple of 8.");
8124
8125   uint64_t LastOffset = 0;
8126   unsigned idx = 0;
8127   llvm::IntegerType *I64 = llvm::IntegerType::get(getVMContext(), 64);
8128
8129   // Iterate over fields in the struct/class and check if there are any aligned
8130   // double fields.
8131   for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
8132        i != e; ++i, ++idx) {
8133     const QualType Ty = i->getType();
8134     const BuiltinType *BT = Ty->getAs<BuiltinType>();
8135
8136     if (!BT || BT->getKind() != BuiltinType::Double)
8137       continue;
8138
8139     uint64_t Offset = Layout.getFieldOffset(idx);
8140     if (Offset % 64) // Ignore doubles that are not aligned.
8141       continue;
8142
8143     // Add ((Offset - LastOffset) / 64) args of type i64.
8144     for (unsigned j = (Offset - LastOffset) / 64; j > 0; --j)
8145       ArgList.push_back(I64);
8146
8147     // Add double type.
8148     ArgList.push_back(llvm::Type::getDoubleTy(getVMContext()));
8149     LastOffset = Offset + 64;
8150   }
8151
8152   CoerceToIntArgs(TySize - LastOffset, IntArgList);
8153   ArgList.append(IntArgList.begin(), IntArgList.end());
8154
8155   return llvm::StructType::get(getVMContext(), ArgList);
8156 }
8157
8158 llvm::Type *MipsABIInfo::getPaddingType(uint64_t OrigOffset,
8159                                         uint64_t Offset) const {
8160   if (OrigOffset + MinABIStackAlignInBytes > Offset)
8161     return nullptr;
8162
8163   return llvm::IntegerType::get(getVMContext(), (Offset - OrigOffset) * 8);
8164 }
8165
8166 ABIArgInfo
8167 MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const {
8168   Ty = useFirstFieldIfTransparentUnion(Ty);
8169
8170   uint64_t OrigOffset = Offset;
8171   uint64_t TySize = getContext().getTypeSize(Ty);
8172   uint64_t Align = getContext().getTypeAlign(Ty) / 8;
8173
8174   Align = std::clamp(Align, (uint64_t)MinABIStackAlignInBytes,
8175                      (uint64_t)StackAlignInBytes);
8176   unsigned CurrOffset = llvm::alignTo(Offset, Align);
8177   Offset = CurrOffset + llvm::alignTo(TySize, Align * 8) / 8;
8178
8179   if (isAggregateTypeForABI(Ty) || Ty->isVectorType()) {
8180     // Ignore empty aggregates.
8181     if (TySize == 0)
8182       return ABIArgInfo::getIgnore();
8183
8184     if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
8185       Offset = OrigOffset + MinABIStackAlignInBytes;
8186       return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
8187     }
8188
8189     // If we have reached here, aggregates are passed directly by coercing to
8190     // another structure type. Padding is inserted if the offset of the
8191     // aggregate is unaligned.
8192     ABIArgInfo ArgInfo =
8193         ABIArgInfo::getDirect(HandleAggregates(Ty, TySize), 0,
8194                               getPaddingType(OrigOffset, CurrOffset));
8195     ArgInfo.setInReg(true);
8196     return ArgInfo;
8197   }
8198
8199   // Treat an enum type as its underlying type.
8200   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
8201     Ty = EnumTy->getDecl()->getIntegerType();
8202
8203   // Make sure we pass indirectly things that are too large.
8204   if (const auto *EIT = Ty->getAs<BitIntType>())
8205     if (EIT->getNumBits() > 128 ||
8206         (EIT->getNumBits() > 64 &&
8207          !getContext().getTargetInfo().hasInt128Type()))
8208       return getNaturalAlignIndirect(Ty);
8209
8210   // All integral types are promoted to the GPR width.
8211   if (Ty->isIntegralOrEnumerationType())
8212     return extendType(Ty);
8213
8214   return ABIArgInfo::getDirect(
8215       nullptr, 0, IsO32 ? nullptr : getPaddingType(OrigOffset, CurrOffset));
8216 }
8217
8218 llvm::Type*
8219 MipsABIInfo::returnAggregateInRegs(QualType RetTy, uint64_t Size) const {
8220   const RecordType *RT = RetTy->getAs<RecordType>();
8221   SmallVector<llvm::Type*, 8> RTList;
8222
8223   if (RT && RT->isStructureOrClassType()) {
8224     const RecordDecl *RD = RT->getDecl();
8225     const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
8226     unsigned FieldCnt = Layout.getFieldCount();
8227
8228     // N32/64 returns struct/classes in floating point registers if the
8229     // following conditions are met:
8230     // 1. The size of the struct/class is no larger than 128-bit.
8231     // 2. The struct/class has one or two fields all of which are floating
8232     //    point types.
8233     // 3. The offset of the first field is zero (this follows what gcc does).
8234     //
8235     // Any other composite results are returned in integer registers.
8236     //
8237     if (FieldCnt && (FieldCnt <= 2) && !Layout.getFieldOffset(0)) {
8238       RecordDecl::field_iterator b = RD->field_begin(), e = RD->field_end();
8239       for (; b != e; ++b) {
8240         const BuiltinType *BT = b->getType()->getAs<BuiltinType>();
8241
8242         if (!BT || !BT->isFloatingPoint())
8243           break;
8244
8245         RTList.push_back(CGT.ConvertType(b->getType()));
8246       }
8247
8248       if (b == e)
8249         return llvm::StructType::get(getVMContext(), RTList,
8250                                      RD->hasAttr<PackedAttr>());
8251
8252       RTList.clear();
8253     }
8254   }
8255
8256   CoerceToIntArgs(Size, RTList);
8257   return llvm::StructType::get(getVMContext(), RTList);
8258 }
8259
8260 ABIArgInfo MipsABIInfo::classifyReturnType(QualType RetTy) const {
8261   uint64_t Size = getContext().getTypeSize(RetTy);
8262
8263   if (RetTy->isVoidType())
8264     return ABIArgInfo::getIgnore();
8265
8266   // O32 doesn't treat zero-sized structs differently from other structs.
8267   // However, N32/N64 ignores zero sized return values.
8268   if (!IsO32 && Size == 0)
8269     return ABIArgInfo::getIgnore();
8270
8271   if (isAggregateTypeForABI(RetTy) || RetTy->isVectorType()) {
8272     if (Size <= 128) {
8273       if (RetTy->isAnyComplexType())
8274         return ABIArgInfo::getDirect();
8275
8276       // O32 returns integer vectors in registers and N32/N64 returns all small
8277       // aggregates in registers.
8278       if (!IsO32 ||
8279           (RetTy->isVectorType() && !RetTy->hasFloatingRepresentation())) {
8280         ABIArgInfo ArgInfo =
8281             ABIArgInfo::getDirect(returnAggregateInRegs(RetTy, Size));
8282         ArgInfo.setInReg(true);
8283         return ArgInfo;
8284       }
8285     }
8286
8287     return getNaturalAlignIndirect(RetTy);
8288   }
8289
8290   // Treat an enum type as its underlying type.
8291   if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
8292     RetTy = EnumTy->getDecl()->getIntegerType();
8293
8294   // Make sure we pass indirectly things that are too large.
8295   if (const auto *EIT = RetTy->getAs<BitIntType>())
8296     if (EIT->getNumBits() > 128 ||
8297         (EIT->getNumBits() > 64 &&
8298          !getContext().getTargetInfo().hasInt128Type()))
8299       return getNaturalAlignIndirect(RetTy);
8300
8301   if (isPromotableIntegerTypeForABI(RetTy))
8302     return ABIArgInfo::getExtend(RetTy);
8303
8304   if ((RetTy->isUnsignedIntegerOrEnumerationType() ||
8305       RetTy->isSignedIntegerOrEnumerationType()) && Size == 32 && !IsO32)
8306     return ABIArgInfo::getSignExtend(RetTy);
8307
8308   return ABIArgInfo::getDirect();
8309 }
8310
8311 void MipsABIInfo::computeInfo(CGFunctionInfo &FI) const {
8312   ABIArgInfo &RetInfo = FI.getReturnInfo();
8313   if (!getCXXABI().classifyReturnType(FI))
8314     RetInfo = classifyReturnType(FI.getReturnType());
8315
8316   // Check if a pointer to an aggregate is passed as a hidden argument.
8317   uint64_t Offset = RetInfo.isIndirect() ? MinABIStackAlignInBytes : 0;
8318
8319   for (auto &I : FI.arguments())
8320     I.info = classifyArgumentType(I.type, Offset);
8321 }
8322
8323 Address MipsABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
8324                                QualType OrigTy) const {
8325   QualType Ty = OrigTy;
8326
8327   // Integer arguments are promoted to 32-bit on O32 and 64-bit on N32/N64.
8328   // Pointers are also promoted in the same way but this only matters for N32.
8329   unsigned SlotSizeInBits = IsO32 ? 32 : 64;
8330   unsigned PtrWidth = getTarget().getPointerWidth(LangAS::Default);
8331   bool DidPromote = false;
8332   if ((Ty->isIntegerType() &&
8333           getContext().getIntWidth(Ty) < SlotSizeInBits) ||
8334       (Ty->isPointerType() && PtrWidth < SlotSizeInBits)) {
8335     DidPromote = true;
8336     Ty = getContext().getIntTypeForBitwidth(SlotSizeInBits,
8337                                             Ty->isSignedIntegerType());
8338   }
8339
8340   auto TyInfo = getContext().getTypeInfoInChars(Ty);
8341
8342   // The alignment of things in the argument area is never larger than
8343   // StackAlignInBytes.
8344   TyInfo.Align =
8345     std::min(TyInfo.Align, CharUnits::fromQuantity(StackAlignInBytes));
8346
8347   // MinABIStackAlignInBytes is the size of argument slots on the stack.
8348   CharUnits ArgSlotSize = CharUnits::fromQuantity(MinABIStackAlignInBytes);
8349
8350   Address Addr = emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false,
8351                           TyInfo, ArgSlotSize, /*AllowHigherAlign*/ true);
8352
8353
8354   // If there was a promotion, "unpromote" into a temporary.
8355   // TODO: can we just use a pointer into a subset of the original slot?
8356   if (DidPromote) {
8357     Address Temp = CGF.CreateMemTemp(OrigTy, "vaarg.promotion-temp");
8358     llvm::Value *Promoted = CGF.Builder.CreateLoad(Addr);
8359
8360     // Truncate down to the right width.
8361     llvm::Type *IntTy = (OrigTy->isIntegerType() ? Temp.getElementType()
8362                                                  : CGF.IntPtrTy);
8363     llvm::Value *V = CGF.Builder.CreateTrunc(Promoted, IntTy);
8364     if (OrigTy->isPointerType())
8365       V = CGF.Builder.CreateIntToPtr(V, Temp.getElementType());
8366
8367     CGF.Builder.CreateStore(V, Temp);
8368     Addr = Temp;
8369   }
8370
8371   return Addr;
8372 }
8373
8374 ABIArgInfo MipsABIInfo::extendType(QualType Ty) const {
8375   int TySize = getContext().getTypeSize(Ty);
8376
8377   // MIPS64 ABI requires unsigned 32 bit integers to be sign extended.
8378   if (Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32)
8379     return ABIArgInfo::getSignExtend(Ty);
8380
8381   return ABIArgInfo::getExtend(Ty);
8382 }
8383
8384 bool
8385 MIPSTargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
8386                                                llvm::Value *Address) const {
8387   // This information comes from gcc's implementation, which seems to
8388   // as canonical as it gets.
8389
8390   // Everything on MIPS is 4 bytes.  Double-precision FP registers
8391   // are aliased to pairs of single-precision FP registers.
8392   llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4);
8393
8394   // 0-31 are the general purpose registers, $0 - $31.
8395   // 32-63 are the floating-point registers, $f0 - $f31.
8396   // 64 and 65 are the multiply/divide registers, $hi and $lo.
8397   // 66 is the (notional, I think) register for signal-handler return.
8398   AssignToArrayRange(CGF.Builder, Address, Four8, 0, 65);
8399
8400   // 67-74 are the floating-point status registers, $fcc0 - $fcc7.
8401   // They are one bit wide and ignored here.
8402
8403   // 80-111 are the coprocessor 0 registers, $c0r0 - $c0r31.
8404   // (coprocessor 1 is the FP unit)
8405   // 112-143 are the coprocessor 2 registers, $c2r0 - $c2r31.
8406   // 144-175 are the coprocessor 3 registers, $c3r0 - $c3r31.
8407   // 176-181 are the DSP accumulator registers.
8408   AssignToArrayRange(CGF.Builder, Address, Four8, 80, 181);
8409   return false;
8410 }
8411
8412 //===----------------------------------------------------------------------===//
8413 // M68k ABI Implementation
8414 //===----------------------------------------------------------------------===//
8415
8416 namespace {
8417
8418 class M68kTargetCodeGenInfo : public TargetCodeGenInfo {
8419 public:
8420   M68kTargetCodeGenInfo(CodeGenTypes &CGT)
8421       : TargetCodeGenInfo(std::make_unique<DefaultABIInfo>(CGT)) {}
8422   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
8423                            CodeGen::CodeGenModule &M) const override;
8424 };
8425
8426 } // namespace
8427
8428 void M68kTargetCodeGenInfo::setTargetAttributes(
8429     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
8430   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(D)) {
8431     if (const auto *attr = FD->getAttr<M68kInterruptAttr>()) {
8432       // Handle 'interrupt' attribute:
8433       llvm::Function *F = cast<llvm::Function>(GV);
8434
8435       // Step 1: Set ISR calling convention.
8436       F->setCallingConv(llvm::CallingConv::M68k_INTR);
8437
8438       // Step 2: Add attributes goodness.
8439       F->addFnAttr(llvm::Attribute::NoInline);
8440
8441       // Step 3: Emit ISR vector alias.
8442       unsigned Num = attr->getNumber() / 2;
8443       llvm::GlobalAlias::create(llvm::Function::ExternalLinkage,
8444                                 "__isr_" + Twine(Num), F);
8445     }
8446   }
8447 }
8448
8449 //===----------------------------------------------------------------------===//
8450 // AVR ABI Implementation. Documented at
8451 // https://gcc.gnu.org/wiki/avr-gcc#Calling_Convention
8452 // https://gcc.gnu.org/wiki/avr-gcc#Reduced_Tiny
8453 //===----------------------------------------------------------------------===//
8454
8455 namespace {
8456 class AVRABIInfo : public DefaultABIInfo {
8457 private:
8458   // The total amount of registers can be used to pass parameters. It is 18 on
8459   // AVR, or 6 on AVRTiny.
8460   const unsigned ParamRegs;
8461   // The total amount of registers can be used to pass return value. It is 8 on
8462   // AVR, or 4 on AVRTiny.
8463   const unsigned RetRegs;
8464
8465 public:
8466   AVRABIInfo(CodeGenTypes &CGT, unsigned NPR, unsigned NRR)
8467       : DefaultABIInfo(CGT), ParamRegs(NPR), RetRegs(NRR) {}
8468
8469   ABIArgInfo classifyReturnType(QualType Ty, bool &LargeRet) const {
8470     // On AVR, a return struct with size less than or equals to 8 bytes is
8471     // returned directly via registers R18-R25. On AVRTiny, a return struct
8472     // with size less than or equals to 4 bytes is returned directly via
8473     // registers R22-R25.
8474     if (isAggregateTypeForABI(Ty) &&
8475         getContext().getTypeSize(Ty) <= RetRegs * 8)
8476       return ABIArgInfo::getDirect();
8477     // A return value (struct or scalar) with larger size is returned via a
8478     // stack slot, along with a pointer as the function's implicit argument.
8479     if (getContext().getTypeSize(Ty) > RetRegs * 8) {
8480       LargeRet = true;
8481       return getNaturalAlignIndirect(Ty);
8482     }
8483     // An i8 return value should not be extended to i16, since AVR has 8-bit
8484     // registers.
8485     if (Ty->isIntegralOrEnumerationType() && getContext().getTypeSize(Ty) <= 8)
8486       return ABIArgInfo::getDirect();
8487     // Otherwise we follow the default way which is compatible.
8488     return DefaultABIInfo::classifyReturnType(Ty);
8489   }
8490
8491   ABIArgInfo classifyArgumentType(QualType Ty, unsigned &NumRegs) const {
8492     unsigned TySize = getContext().getTypeSize(Ty);
8493
8494     // An int8 type argument always costs two registers like an int16.
8495     if (TySize == 8 && NumRegs >= 2) {
8496       NumRegs -= 2;
8497       return ABIArgInfo::getExtend(Ty);
8498     }
8499
8500     // If the argument size is an odd number of bytes, round up the size
8501     // to the next even number.
8502     TySize = llvm::alignTo(TySize, 16);
8503
8504     // Any type including an array/struct type can be passed in rgisters,
8505     // if there are enough registers left.
8506     if (TySize <= NumRegs * 8) {
8507       NumRegs -= TySize / 8;
8508       return ABIArgInfo::getDirect();
8509     }
8510
8511     // An argument is passed either completely in registers or completely in
8512     // memory. Since there are not enough registers left, current argument
8513     // and all other unprocessed arguments should be passed in memory.
8514     // However we still need to return `ABIArgInfo::getDirect()` other than
8515     // `ABIInfo::getNaturalAlignIndirect(Ty)`, otherwise an extra stack slot
8516     // will be allocated, so the stack frame layout will be incompatible with
8517     // avr-gcc.
8518     NumRegs = 0;
8519     return ABIArgInfo::getDirect();
8520   }
8521
8522   void computeInfo(CGFunctionInfo &FI) const override {
8523     // Decide the return type.
8524     bool LargeRet = false;
8525     if (!getCXXABI().classifyReturnType(FI))
8526       FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), LargeRet);
8527
8528     // Decide each argument type. The total number of registers can be used for
8529     // arguments depends on several factors:
8530     // 1. Arguments of varargs functions are passed on the stack. This applies
8531     //    even to the named arguments. So no register can be used.
8532     // 2. Total 18 registers can be used on avr and 6 ones on avrtiny.
8533     // 3. If the return type is a struct with too large size, two registers
8534     //    (out of 18/6) will be cost as an implicit pointer argument.
8535     unsigned NumRegs = ParamRegs;
8536     if (FI.isVariadic())
8537       NumRegs = 0;
8538     else if (LargeRet)
8539       NumRegs -= 2;
8540     for (auto &I : FI.arguments())
8541       I.info = classifyArgumentType(I.type, NumRegs);
8542   }
8543 };
8544
8545 class AVRTargetCodeGenInfo : public TargetCodeGenInfo {
8546 public:
8547   AVRTargetCodeGenInfo(CodeGenTypes &CGT, unsigned NPR, unsigned NRR)
8548       : TargetCodeGenInfo(std::make_unique<AVRABIInfo>(CGT, NPR, NRR)) {}
8549
8550   LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
8551                                   const VarDecl *D) const override {
8552     // Check if global/static variable is defined in address space
8553     // 1~6 (__flash, __flash1, __flash2, __flash3, __flash4, __flash5)
8554     // but not constant.
8555     if (D) {
8556       LangAS AS = D->getType().getAddressSpace();
8557       if (isTargetAddressSpace(AS) && 1 <= toTargetAddressSpace(AS) &&
8558           toTargetAddressSpace(AS) <= 6 && !D->getType().isConstQualified())
8559         CGM.getDiags().Report(D->getLocation(),
8560                               diag::err_verify_nonconst_addrspace)
8561             << "__flash*";
8562     }
8563     return TargetCodeGenInfo::getGlobalVarAddressSpace(CGM, D);
8564   }
8565
8566   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
8567                            CodeGen::CodeGenModule &CGM) const override {
8568     if (GV->isDeclaration())
8569       return;
8570     const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
8571     if (!FD) return;
8572     auto *Fn = cast<llvm::Function>(GV);
8573
8574     if (FD->getAttr<AVRInterruptAttr>())
8575       Fn->addFnAttr("interrupt");
8576
8577     if (FD->getAttr<AVRSignalAttr>())
8578       Fn->addFnAttr("signal");
8579   }
8580 };
8581 }
8582
8583 //===----------------------------------------------------------------------===//
8584 // TCE ABI Implementation (see http://tce.cs.tut.fi). Uses mostly the defaults.
8585 // Currently subclassed only to implement custom OpenCL C function attribute
8586 // handling.
8587 //===----------------------------------------------------------------------===//
8588
8589 namespace {
8590
8591 class TCETargetCodeGenInfo : public TargetCodeGenInfo {
8592 public:
8593   TCETargetCodeGenInfo(CodeGenTypes &CGT)
8594       : TargetCodeGenInfo(std::make_unique<DefaultABIInfo>(CGT)) {}
8595
8596   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
8597                            CodeGen::CodeGenModule &M) const override;
8598 };
8599
8600 void TCETargetCodeGenInfo::setTargetAttributes(
8601     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
8602   if (GV->isDeclaration())
8603     return;
8604   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
8605   if (!FD) return;
8606
8607   llvm::Function *F = cast<llvm::Function>(GV);
8608
8609   if (M.getLangOpts().OpenCL) {
8610     if (FD->hasAttr<OpenCLKernelAttr>()) {
8611       // OpenCL C Kernel functions are not subject to inlining
8612       F->addFnAttr(llvm::Attribute::NoInline);
8613       const ReqdWorkGroupSizeAttr *Attr = FD->getAttr<ReqdWorkGroupSizeAttr>();
8614       if (Attr) {
8615         // Convert the reqd_work_group_size() attributes to metadata.
8616         llvm::LLVMContext &Context = F->getContext();
8617         llvm::NamedMDNode *OpenCLMetadata =
8618             M.getModule().getOrInsertNamedMetadata(
8619                 "opencl.kernel_wg_size_info");
8620
8621         SmallVector<llvm::Metadata *, 5> Operands;
8622         Operands.push_back(llvm::ConstantAsMetadata::get(F));
8623
8624         Operands.push_back(
8625             llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue(
8626                 M.Int32Ty, llvm::APInt(32, Attr->getXDim()))));
8627         Operands.push_back(
8628             llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue(
8629                 M.Int32Ty, llvm::APInt(32, Attr->getYDim()))));
8630         Operands.push_back(
8631             llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue(
8632                 M.Int32Ty, llvm::APInt(32, Attr->getZDim()))));
8633
8634         // Add a boolean constant operand for "required" (true) or "hint"
8635         // (false) for implementing the work_group_size_hint attr later.
8636         // Currently always true as the hint is not yet implemented.
8637         Operands.push_back(
8638             llvm::ConstantAsMetadata::get(llvm::ConstantInt::getTrue(Context)));
8639         OpenCLMetadata->addOperand(llvm::MDNode::get(Context, Operands));
8640       }
8641     }
8642   }
8643 }
8644
8645 }
8646
8647 //===----------------------------------------------------------------------===//
8648 // Hexagon ABI Implementation
8649 //===----------------------------------------------------------------------===//
8650
8651 namespace {
8652
8653 class HexagonABIInfo : public DefaultABIInfo {
8654 public:
8655   HexagonABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
8656
8657 private:
8658   ABIArgInfo classifyReturnType(QualType RetTy) const;
8659   ABIArgInfo classifyArgumentType(QualType RetTy) const;
8660   ABIArgInfo classifyArgumentType(QualType RetTy, unsigned *RegsLeft) const;
8661
8662   void computeInfo(CGFunctionInfo &FI) const override;
8663
8664   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
8665                     QualType Ty) const override;
8666   Address EmitVAArgFromMemory(CodeGenFunction &CFG, Address VAListAddr,
8667                               QualType Ty) const;
8668   Address EmitVAArgForHexagon(CodeGenFunction &CFG, Address VAListAddr,
8669                               QualType Ty) const;
8670   Address EmitVAArgForHexagonLinux(CodeGenFunction &CFG, Address VAListAddr,
8671                                    QualType Ty) const;
8672 };
8673
8674 class HexagonTargetCodeGenInfo : public TargetCodeGenInfo {
8675 public:
8676   HexagonTargetCodeGenInfo(CodeGenTypes &CGT)
8677       : TargetCodeGenInfo(std::make_unique<HexagonABIInfo>(CGT)) {}
8678
8679   int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
8680     return 29;
8681   }
8682
8683   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
8684                            CodeGen::CodeGenModule &GCM) const override {
8685     if (GV->isDeclaration())
8686       return;
8687     const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
8688     if (!FD)
8689       return;
8690   }
8691 };
8692
8693 } // namespace
8694
8695 void HexagonABIInfo::computeInfo(CGFunctionInfo &FI) const {
8696   unsigned RegsLeft = 6;
8697   if (!getCXXABI().classifyReturnType(FI))
8698     FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
8699   for (auto &I : FI.arguments())
8700     I.info = classifyArgumentType(I.type, &RegsLeft);
8701 }
8702
8703 static bool HexagonAdjustRegsLeft(uint64_t Size, unsigned *RegsLeft) {
8704   assert(Size <= 64 && "Not expecting to pass arguments larger than 64 bits"
8705                        " through registers");
8706
8707   if (*RegsLeft == 0)
8708     return false;
8709
8710   if (Size <= 32) {
8711     (*RegsLeft)--;
8712     return true;
8713   }
8714
8715   if (2 <= (*RegsLeft & (~1U))) {
8716     *RegsLeft = (*RegsLeft & (~1U)) - 2;
8717     return true;
8718   }
8719
8720   // Next available register was r5 but candidate was greater than 32-bits so it
8721   // has to go on the stack. However we still consume r5
8722   if (*RegsLeft == 1)
8723     *RegsLeft = 0;
8724
8725   return false;
8726 }
8727
8728 ABIArgInfo HexagonABIInfo::classifyArgumentType(QualType Ty,
8729                                                 unsigned *RegsLeft) const {
8730   if (!isAggregateTypeForABI(Ty)) {
8731     // Treat an enum type as its underlying type.
8732     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
8733       Ty = EnumTy->getDecl()->getIntegerType();
8734
8735     uint64_t Size = getContext().getTypeSize(Ty);
8736     if (Size <= 64)
8737       HexagonAdjustRegsLeft(Size, RegsLeft);
8738
8739     if (Size > 64 && Ty->isBitIntType())
8740       return getNaturalAlignIndirect(Ty, /*ByVal=*/true);
8741
8742     return isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
8743                                              : ABIArgInfo::getDirect();
8744   }
8745
8746   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
8747     return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
8748
8749   // Ignore empty records.
8750   if (isEmptyRecord(getContext(), Ty, true))
8751     return ABIArgInfo::getIgnore();
8752
8753   uint64_t Size = getContext().getTypeSize(Ty);
8754   unsigned Align = getContext().getTypeAlign(Ty);
8755
8756   if (Size > 64)
8757     return getNaturalAlignIndirect(Ty, /*ByVal=*/true);
8758
8759   if (HexagonAdjustRegsLeft(Size, RegsLeft))
8760     Align = Size <= 32 ? 32 : 64;
8761   if (Size <= Align) {
8762     // Pass in the smallest viable integer type.
8763     Size = llvm::bit_ceil(Size);
8764     return ABIArgInfo::getDirect(llvm::Type::getIntNTy(getVMContext(), Size));
8765   }
8766   return DefaultABIInfo::classifyArgumentType(Ty);
8767 }
8768
8769 ABIArgInfo HexagonABIInfo::classifyReturnType(QualType RetTy) const {
8770   if (RetTy->isVoidType())
8771     return ABIArgInfo::getIgnore();
8772
8773   const TargetInfo &T = CGT.getTarget();
8774   uint64_t Size = getContext().getTypeSize(RetTy);
8775
8776   if (RetTy->getAs<VectorType>()) {
8777     // HVX vectors are returned in vector registers or register pairs.
8778     if (T.hasFeature("hvx")) {
8779       assert(T.hasFeature("hvx-length64b") || T.hasFeature("hvx-length128b"));
8780       uint64_t VecSize = T.hasFeature("hvx-length64b") ? 64*8 : 128*8;
8781       if (Size == VecSize || Size == 2*VecSize)
8782         return ABIArgInfo::getDirectInReg();
8783     }
8784     // Large vector types should be returned via memory.
8785     if (Size > 64)
8786       return getNaturalAlignIndirect(RetTy);
8787   }
8788
8789   if (!isAggregateTypeForABI(RetTy)) {
8790     // Treat an enum type as its underlying type.
8791     if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
8792       RetTy = EnumTy->getDecl()->getIntegerType();
8793
8794     if (Size > 64 && RetTy->isBitIntType())
8795       return getNaturalAlignIndirect(RetTy, /*ByVal=*/false);
8796
8797     return isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
8798                                                 : ABIArgInfo::getDirect();
8799   }
8800
8801   if (isEmptyRecord(getContext(), RetTy, true))
8802     return ABIArgInfo::getIgnore();
8803
8804   // Aggregates <= 8 bytes are returned in registers, other aggregates
8805   // are returned indirectly.
8806   if (Size <= 64) {
8807     // Return in the smallest viable integer type.
8808     Size = llvm::bit_ceil(Size);
8809     return ABIArgInfo::getDirect(llvm::Type::getIntNTy(getVMContext(), Size));
8810   }
8811   return getNaturalAlignIndirect(RetTy, /*ByVal=*/true);
8812 }
8813
8814 Address HexagonABIInfo::EmitVAArgFromMemory(CodeGenFunction &CGF,
8815                                             Address VAListAddr,
8816                                             QualType Ty) const {
8817   // Load the overflow area pointer.
8818   Address __overflow_area_pointer_p =
8819       CGF.Builder.CreateStructGEP(VAListAddr, 2, "__overflow_area_pointer_p");
8820   llvm::Value *__overflow_area_pointer = CGF.Builder.CreateLoad(
8821       __overflow_area_pointer_p, "__overflow_area_pointer");
8822
8823   uint64_t Align = CGF.getContext().getTypeAlign(Ty) / 8;
8824   if (Align > 4) {
8825     // Alignment should be a power of 2.
8826     assert((Align & (Align - 1)) == 0 && "Alignment is not power of 2!");
8827
8828     // overflow_arg_area = (overflow_arg_area + align - 1) & -align;
8829     llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int64Ty, Align - 1);
8830
8831     // Add offset to the current pointer to access the argument.
8832     __overflow_area_pointer =
8833         CGF.Builder.CreateGEP(CGF.Int8Ty, __overflow_area_pointer, Offset);
8834     llvm::Value *AsInt =
8835         CGF.Builder.CreatePtrToInt(__overflow_area_pointer, CGF.Int32Ty);
8836
8837     // Create a mask which should be "AND"ed
8838     // with (overflow_arg_area + align - 1)
8839     llvm::Value *Mask = llvm::ConstantInt::get(CGF.Int32Ty, -(int)Align);
8840     __overflow_area_pointer = CGF.Builder.CreateIntToPtr(
8841         CGF.Builder.CreateAnd(AsInt, Mask), __overflow_area_pointer->getType(),
8842         "__overflow_area_pointer.align");
8843   }
8844
8845   // Get the type of the argument from memory and bitcast
8846   // overflow area pointer to the argument type.
8847   llvm::Type *PTy = CGF.ConvertTypeForMem(Ty);
8848   Address AddrTyped = CGF.Builder.CreateElementBitCast(
8849       Address(__overflow_area_pointer, CGF.Int8Ty,
8850               CharUnits::fromQuantity(Align)),
8851       PTy);
8852
8853   // Round up to the minimum stack alignment for varargs which is 4 bytes.
8854   uint64_t Offset = llvm::alignTo(CGF.getContext().getTypeSize(Ty) / 8, 4);
8855
8856   __overflow_area_pointer = CGF.Builder.CreateGEP(
8857       CGF.Int8Ty, __overflow_area_pointer,
8858       llvm::ConstantInt::get(CGF.Int32Ty, Offset),
8859       "__overflow_area_pointer.next");
8860   CGF.Builder.CreateStore(__overflow_area_pointer, __overflow_area_pointer_p);
8861
8862   return AddrTyped;
8863 }
8864
8865 Address HexagonABIInfo::EmitVAArgForHexagon(CodeGenFunction &CGF,
8866                                             Address VAListAddr,
8867                                             QualType Ty) const {
8868   // FIXME: Need to handle alignment
8869   llvm::Type *BP = CGF.Int8PtrTy;
8870   CGBuilderTy &Builder = CGF.Builder;
8871   Address VAListAddrAsBPP = Builder.CreateElementBitCast(VAListAddr, BP, "ap");
8872   llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
8873   // Handle address alignment for type alignment > 32 bits
8874   uint64_t TyAlign = CGF.getContext().getTypeAlign(Ty) / 8;
8875   if (TyAlign > 4) {
8876     assert((TyAlign & (TyAlign - 1)) == 0 && "Alignment is not power of 2!");
8877     llvm::Value *AddrAsInt = Builder.CreatePtrToInt(Addr, CGF.Int32Ty);
8878     AddrAsInt = Builder.CreateAdd(AddrAsInt, Builder.getInt32(TyAlign - 1));
8879     AddrAsInt = Builder.CreateAnd(AddrAsInt, Builder.getInt32(~(TyAlign - 1)));
8880     Addr = Builder.CreateIntToPtr(AddrAsInt, BP);
8881   }
8882   Address AddrTyped = Builder.CreateElementBitCast(
8883       Address(Addr, CGF.Int8Ty, CharUnits::fromQuantity(TyAlign)),
8884       CGF.ConvertType(Ty));
8885
8886   uint64_t Offset = llvm::alignTo(CGF.getContext().getTypeSize(Ty) / 8, 4);
8887   llvm::Value *NextAddr = Builder.CreateGEP(
8888       CGF.Int8Ty, Addr, llvm::ConstantInt::get(CGF.Int32Ty, Offset), "ap.next");
8889   Builder.CreateStore(NextAddr, VAListAddrAsBPP);
8890
8891   return AddrTyped;
8892 }
8893
8894 Address HexagonABIInfo::EmitVAArgForHexagonLinux(CodeGenFunction &CGF,
8895                                                  Address VAListAddr,
8896                                                  QualType Ty) const {
8897   int ArgSize = CGF.getContext().getTypeSize(Ty) / 8;
8898
8899   if (ArgSize > 8)
8900     return EmitVAArgFromMemory(CGF, VAListAddr, Ty);
8901
8902   // Here we have check if the argument is in register area or
8903   // in overflow area.
8904   // If the saved register area pointer + argsize rounded up to alignment >
8905   // saved register area end pointer, argument is in overflow area.
8906   unsigned RegsLeft = 6;
8907   Ty = CGF.getContext().getCanonicalType(Ty);
8908   (void)classifyArgumentType(Ty, &RegsLeft);
8909
8910   llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg");
8911   llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
8912   llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack");
8913   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
8914
8915   // Get rounded size of the argument.GCC does not allow vararg of
8916   // size < 4 bytes. We follow the same logic here.
8917   ArgSize = (CGF.getContext().getTypeSize(Ty) <= 32) ? 4 : 8;
8918   int ArgAlign = (CGF.getContext().getTypeSize(Ty) <= 32) ? 4 : 8;
8919
8920   // Argument may be in saved register area
8921   CGF.EmitBlock(MaybeRegBlock);
8922
8923   // Load the current saved register area pointer.
8924   Address __current_saved_reg_area_pointer_p = CGF.Builder.CreateStructGEP(
8925       VAListAddr, 0, "__current_saved_reg_area_pointer_p");
8926   llvm::Value *__current_saved_reg_area_pointer = CGF.Builder.CreateLoad(
8927       __current_saved_reg_area_pointer_p, "__current_saved_reg_area_pointer");
8928
8929   // Load the saved register area end pointer.
8930   Address __saved_reg_area_end_pointer_p = CGF.Builder.CreateStructGEP(
8931       VAListAddr, 1, "__saved_reg_area_end_pointer_p");
8932   llvm::Value *__saved_reg_area_end_pointer = CGF.Builder.CreateLoad(
8933       __saved_reg_area_end_pointer_p, "__saved_reg_area_end_pointer");
8934
8935   // If the size of argument is > 4 bytes, check if the stack
8936   // location is aligned to 8 bytes
8937   if (ArgAlign > 4) {
8938
8939     llvm::Value *__current_saved_reg_area_pointer_int =
8940         CGF.Builder.CreatePtrToInt(__current_saved_reg_area_pointer,
8941                                    CGF.Int32Ty);
8942
8943     __current_saved_reg_area_pointer_int = CGF.Builder.CreateAdd(
8944         __current_saved_reg_area_pointer_int,
8945         llvm::ConstantInt::get(CGF.Int32Ty, (ArgAlign - 1)),
8946         "align_current_saved_reg_area_pointer");
8947
8948     __current_saved_reg_area_pointer_int =
8949         CGF.Builder.CreateAnd(__current_saved_reg_area_pointer_int,
8950                               llvm::ConstantInt::get(CGF.Int32Ty, -ArgAlign),
8951                               "align_current_saved_reg_area_pointer");
8952
8953     __current_saved_reg_area_pointer =
8954         CGF.Builder.CreateIntToPtr(__current_saved_reg_area_pointer_int,
8955                                    __current_saved_reg_area_pointer->getType(),
8956                                    "align_current_saved_reg_area_pointer");
8957   }
8958
8959   llvm::Value *__new_saved_reg_area_pointer =
8960       CGF.Builder.CreateGEP(CGF.Int8Ty, __current_saved_reg_area_pointer,
8961                             llvm::ConstantInt::get(CGF.Int32Ty, ArgSize),
8962                             "__new_saved_reg_area_pointer");
8963
8964   llvm::Value *UsingStack = nullptr;
8965   UsingStack = CGF.Builder.CreateICmpSGT(__new_saved_reg_area_pointer,
8966                                          __saved_reg_area_end_pointer);
8967
8968   CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, InRegBlock);
8969
8970   // Argument in saved register area
8971   // Implement the block where argument is in register saved area
8972   CGF.EmitBlock(InRegBlock);
8973
8974   llvm::Type *PTy = CGF.ConvertType(Ty);
8975   llvm::Value *__saved_reg_area_p = CGF.Builder.CreateBitCast(
8976       __current_saved_reg_area_pointer, llvm::PointerType::getUnqual(PTy));
8977
8978   CGF.Builder.CreateStore(__new_saved_reg_area_pointer,
8979                           __current_saved_reg_area_pointer_p);
8980
8981   CGF.EmitBranch(ContBlock);
8982
8983   // Argument in overflow area
8984   // Implement the block where the argument is in overflow area.
8985   CGF.EmitBlock(OnStackBlock);
8986
8987   // Load the overflow area pointer
8988   Address __overflow_area_pointer_p =
8989       CGF.Builder.CreateStructGEP(VAListAddr, 2, "__overflow_area_pointer_p");
8990   llvm::Value *__overflow_area_pointer = CGF.Builder.CreateLoad(
8991       __overflow_area_pointer_p, "__overflow_area_pointer");
8992
8993   // Align the overflow area pointer according to the alignment of the argument
8994   if (ArgAlign > 4) {
8995     llvm::Value *__overflow_area_pointer_int =
8996         CGF.Builder.CreatePtrToInt(__overflow_area_pointer, CGF.Int32Ty);
8997
8998     __overflow_area_pointer_int =
8999         CGF.Builder.CreateAdd(__overflow_area_pointer_int,
9000                               llvm::ConstantInt::get(CGF.Int32Ty, ArgAlign - 1),
9001                               "align_overflow_area_pointer");
9002
9003     __overflow_area_pointer_int =
9004         CGF.Builder.CreateAnd(__overflow_area_pointer_int,
9005                               llvm::ConstantInt::get(CGF.Int32Ty, -ArgAlign),
9006                               "align_overflow_area_pointer");
9007
9008     __overflow_area_pointer = CGF.Builder.CreateIntToPtr(
9009         __overflow_area_pointer_int, __overflow_area_pointer->getType(),
9010         "align_overflow_area_pointer");
9011   }
9012
9013   // Get the pointer for next argument in overflow area and store it
9014   // to overflow area pointer.
9015   llvm::Value *__new_overflow_area_pointer = CGF.Builder.CreateGEP(
9016       CGF.Int8Ty, __overflow_area_pointer,
9017       llvm::ConstantInt::get(CGF.Int32Ty, ArgSize),
9018       "__overflow_area_pointer.next");
9019
9020   CGF.Builder.CreateStore(__new_overflow_area_pointer,
9021                           __overflow_area_pointer_p);
9022
9023   CGF.Builder.CreateStore(__new_overflow_area_pointer,
9024                           __current_saved_reg_area_pointer_p);
9025
9026   // Bitcast the overflow area pointer to the type of argument.
9027   llvm::Type *OverflowPTy = CGF.ConvertTypeForMem(Ty);
9028   llvm::Value *__overflow_area_p = CGF.Builder.CreateBitCast(
9029       __overflow_area_pointer, llvm::PointerType::getUnqual(OverflowPTy));
9030
9031   CGF.EmitBranch(ContBlock);
9032
9033   // Get the correct pointer to load the variable argument
9034   // Implement the ContBlock
9035   CGF.EmitBlock(ContBlock);
9036
9037   llvm::Type *MemTy = CGF.ConvertTypeForMem(Ty);
9038   llvm::Type *MemPTy = llvm::PointerType::getUnqual(MemTy);
9039   llvm::PHINode *ArgAddr = CGF.Builder.CreatePHI(MemPTy, 2, "vaarg.addr");
9040   ArgAddr->addIncoming(__saved_reg_area_p, InRegBlock);
9041   ArgAddr->addIncoming(__overflow_area_p, OnStackBlock);
9042
9043   return Address(ArgAddr, MemTy, CharUnits::fromQuantity(ArgAlign));
9044 }
9045
9046 Address HexagonABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
9047                                   QualType Ty) const {
9048
9049   if (getTarget().getTriple().isMusl())
9050     return EmitVAArgForHexagonLinux(CGF, VAListAddr, Ty);
9051
9052   return EmitVAArgForHexagon(CGF, VAListAddr, Ty);
9053 }
9054
9055 //===----------------------------------------------------------------------===//
9056 // Lanai ABI Implementation
9057 //===----------------------------------------------------------------------===//
9058
9059 namespace {
9060 class LanaiABIInfo : public DefaultABIInfo {
9061   struct CCState {
9062     unsigned FreeRegs;
9063   };
9064
9065 public:
9066   LanaiABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
9067
9068   bool shouldUseInReg(QualType Ty, CCState &State) const;
9069
9070   void computeInfo(CGFunctionInfo &FI) const override {
9071     CCState State;
9072     // Lanai uses 4 registers to pass arguments unless the function has the
9073     // regparm attribute set.
9074     if (FI.getHasRegParm()) {
9075       State.FreeRegs = FI.getRegParm();
9076     } else {
9077       State.FreeRegs = 4;
9078     }
9079
9080     if (!getCXXABI().classifyReturnType(FI))
9081       FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
9082     for (auto &I : FI.arguments())
9083       I.info = classifyArgumentType(I.type, State);
9084   }
9085
9086   ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const;
9087   ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const;
9088 };
9089 } // end anonymous namespace
9090
9091 bool LanaiABIInfo::shouldUseInReg(QualType Ty, CCState &State) const {
9092   unsigned Size = getContext().getTypeSize(Ty);
9093   unsigned SizeInRegs = llvm::alignTo(Size, 32U) / 32U;
9094
9095   if (SizeInRegs == 0)
9096     return false;
9097
9098   if (SizeInRegs > State.FreeRegs) {
9099     State.FreeRegs = 0;
9100     return false;
9101   }
9102
9103   State.FreeRegs -= SizeInRegs;
9104
9105   return true;
9106 }
9107
9108 ABIArgInfo LanaiABIInfo::getIndirectResult(QualType Ty, bool ByVal,
9109                                            CCState &State) const {
9110   if (!ByVal) {
9111     if (State.FreeRegs) {
9112       --State.FreeRegs; // Non-byval indirects just use one pointer.
9113       return getNaturalAlignIndirectInReg(Ty);
9114     }
9115     return getNaturalAlignIndirect(Ty, false);
9116   }
9117
9118   // Compute the byval alignment.
9119   const unsigned MinABIStackAlignInBytes = 4;
9120   unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8;
9121   return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true,
9122                                  /*Realign=*/TypeAlign >
9123                                      MinABIStackAlignInBytes);
9124 }
9125
9126 ABIArgInfo LanaiABIInfo::classifyArgumentType(QualType Ty,
9127                                               CCState &State) const {
9128   // Check with the C++ ABI first.
9129   const RecordType *RT = Ty->getAs<RecordType>();
9130   if (RT) {
9131     CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
9132     if (RAA == CGCXXABI::RAA_Indirect) {
9133       return getIndirectResult(Ty, /*ByVal=*/false, State);
9134     } else if (RAA == CGCXXABI::RAA_DirectInMemory) {
9135       return getNaturalAlignIndirect(Ty, /*ByVal=*/true);
9136     }
9137   }
9138
9139   if (isAggregateTypeForABI(Ty)) {
9140     // Structures with flexible arrays are always indirect.
9141     if (RT && RT->getDecl()->hasFlexibleArrayMember())
9142       return getIndirectResult(Ty, /*ByVal=*/true, State);
9143
9144     // Ignore empty structs/unions.
9145     if (isEmptyRecord(getContext(), Ty, true))
9146       return ABIArgInfo::getIgnore();
9147
9148     llvm::LLVMContext &LLVMContext = getVMContext();
9149     unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32;
9150     if (SizeInRegs <= State.FreeRegs) {
9151       llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext);
9152       SmallVector<llvm::Type *, 3> Elements(SizeInRegs, Int32);
9153       llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);
9154       State.FreeRegs -= SizeInRegs;
9155       return ABIArgInfo::getDirectInReg(Result);
9156     } else {
9157       State.FreeRegs = 0;
9158     }
9159     return getIndirectResult(Ty, true, State);
9160   }
9161
9162   // Treat an enum type as its underlying type.
9163   if (const auto *EnumTy = Ty->getAs<EnumType>())
9164     Ty = EnumTy->getDecl()->getIntegerType();
9165
9166   bool InReg = shouldUseInReg(Ty, State);
9167
9168   // Don't pass >64 bit integers in registers.
9169   if (const auto *EIT = Ty->getAs<BitIntType>())
9170     if (EIT->getNumBits() > 64)
9171       return getIndirectResult(Ty, /*ByVal=*/true, State);
9172
9173   if (isPromotableIntegerTypeForABI(Ty)) {
9174     if (InReg)
9175       return ABIArgInfo::getDirectInReg();
9176     return ABIArgInfo::getExtend(Ty);
9177   }
9178   if (InReg)
9179     return ABIArgInfo::getDirectInReg();
9180   return ABIArgInfo::getDirect();
9181 }
9182
9183 namespace {
9184 class LanaiTargetCodeGenInfo : public TargetCodeGenInfo {
9185 public:
9186   LanaiTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
9187       : TargetCodeGenInfo(std::make_unique<LanaiABIInfo>(CGT)) {}
9188 };
9189 }
9190
9191 //===----------------------------------------------------------------------===//
9192 // AMDGPU ABI Implementation
9193 //===----------------------------------------------------------------------===//
9194
9195 namespace {
9196
9197 class AMDGPUABIInfo final : public DefaultABIInfo {
9198 private:
9199   static const unsigned MaxNumRegsForArgsRet = 16;
9200
9201   unsigned numRegsForType(QualType Ty) const;
9202
9203   bool isHomogeneousAggregateBaseType(QualType Ty) const override;
9204   bool isHomogeneousAggregateSmallEnough(const Type *Base,
9205                                          uint64_t Members) const override;
9206
9207   // Coerce HIP scalar pointer arguments from generic pointers to global ones.
9208   llvm::Type *coerceKernelArgumentType(llvm::Type *Ty, unsigned FromAS,
9209                                        unsigned ToAS) const {
9210     // Single value types.
9211     auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(Ty);
9212     if (PtrTy && PtrTy->getAddressSpace() == FromAS)
9213       return llvm::PointerType::get(Ty->getContext(), ToAS);
9214     return Ty;
9215   }
9216
9217 public:
9218   explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) :
9219     DefaultABIInfo(CGT) {}
9220
9221   ABIArgInfo classifyReturnType(QualType RetTy) const;
9222   ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
9223   ABIArgInfo classifyArgumentType(QualType Ty, unsigned &NumRegsLeft) const;
9224
9225   void computeInfo(CGFunctionInfo &FI) const override;
9226   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
9227                     QualType Ty) const override;
9228 };
9229
9230 bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
9231   return true;
9232 }
9233
9234 bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough(
9235   const Type *Base, uint64_t Members) const {
9236   uint32_t NumRegs = (getContext().getTypeSize(Base) + 31) / 32;
9237
9238   // Homogeneous Aggregates may occupy at most 16 registers.
9239   return Members * NumRegs <= MaxNumRegsForArgsRet;
9240 }
9241
9242 /// Estimate number of registers the type will use when passed in registers.
9243 unsigned AMDGPUABIInfo::numRegsForType(QualType Ty) const {
9244   unsigned NumRegs = 0;
9245
9246   if (const VectorType *VT = Ty->getAs<VectorType>()) {
9247     // Compute from the number of elements. The reported size is based on the
9248     // in-memory size, which includes the padding 4th element for 3-vectors.
9249     QualType EltTy = VT->getElementType();
9250     unsigned EltSize = getContext().getTypeSize(EltTy);
9251
9252     // 16-bit element vectors should be passed as packed.
9253     if (EltSize == 16)
9254       return (VT->getNumElements() + 1) / 2;
9255
9256     unsigned EltNumRegs = (EltSize + 31) / 32;
9257     return EltNumRegs * VT->getNumElements();
9258   }
9259
9260   if (const RecordType *RT = Ty->getAs<RecordType>()) {
9261     const RecordDecl *RD = RT->getDecl();
9262     assert(!RD->hasFlexibleArrayMember());
9263
9264     for (const FieldDecl *Field : RD->fields()) {
9265       QualType FieldTy = Field->getType();
9266       NumRegs += numRegsForType(FieldTy);
9267     }
9268
9269     return NumRegs;
9270   }
9271
9272   return (getContext().getTypeSize(Ty) + 31) / 32;
9273 }
9274
9275 void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const {
9276   llvm::CallingConv::ID CC = FI.getCallingConvention();
9277
9278   if (!getCXXABI().classifyReturnType(FI))
9279     FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
9280
9281   unsigned NumRegsLeft = MaxNumRegsForArgsRet;
9282   for (auto &Arg : FI.arguments()) {
9283     if (CC == llvm::CallingConv::AMDGPU_KERNEL) {
9284       Arg.info = classifyKernelArgumentType(Arg.type);
9285     } else {
9286       Arg.info = classifyArgumentType(Arg.type, NumRegsLeft);
9287     }
9288   }
9289 }
9290
9291 Address AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
9292                                  QualType Ty) const {
9293   llvm_unreachable("AMDGPU does not support varargs");
9294 }
9295
9296 ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {
9297   if (isAggregateTypeForABI(RetTy)) {
9298     // Records with non-trivial destructors/copy-constructors should not be
9299     // returned by value.
9300     if (!getRecordArgABI(RetTy, getCXXABI())) {
9301       // Ignore empty structs/unions.
9302       if (isEmptyRecord(getContext(), RetTy, true))
9303         return ABIArgInfo::getIgnore();
9304
9305       // Lower single-element structs to just return a regular value.
9306       if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext()))
9307         return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
9308
9309       if (const RecordType *RT = RetTy->getAs<RecordType>()) {
9310         const RecordDecl *RD = RT->getDecl();
9311         if (RD->hasFlexibleArrayMember())
9312           return DefaultABIInfo::classifyReturnType(RetTy);
9313       }
9314
9315       // Pack aggregates <= 4 bytes into single VGPR or pair.
9316       uint64_t Size = getContext().getTypeSize(RetTy);
9317       if (Size <= 16)
9318         return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
9319
9320       if (Size <= 32)
9321         return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
9322
9323       if (Size <= 64) {
9324         llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
9325         return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
9326       }
9327
9328       if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet)
9329         return ABIArgInfo::getDirect();
9330     }
9331   }
9332
9333   // Otherwise just do the default thing.
9334   return DefaultABIInfo::classifyReturnType(RetTy);
9335 }
9336
9337 /// For kernels all parameters are really passed in a special buffer. It doesn't
9338 /// make sense to pass anything byval, so everything must be direct.
9339 ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const {
9340   Ty = useFirstFieldIfTransparentUnion(Ty);
9341
9342   // TODO: Can we omit empty structs?
9343
9344   if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
9345     Ty = QualType(SeltTy, 0);
9346
9347   llvm::Type *OrigLTy = CGT.ConvertType(Ty);
9348   llvm::Type *LTy = OrigLTy;
9349   if (getContext().getLangOpts().HIP) {
9350     LTy = coerceKernelArgumentType(
9351         OrigLTy, /*FromAS=*/getContext().getTargetAddressSpace(LangAS::Default),
9352         /*ToAS=*/getContext().getTargetAddressSpace(LangAS::cuda_device));
9353   }
9354
9355   // FIXME: Should also use this for OpenCL, but it requires addressing the
9356   // problem of kernels being called.
9357   //
9358   // FIXME: This doesn't apply the optimization of coercing pointers in structs
9359   // to global address space when using byref. This would require implementing a
9360   // new kind of coercion of the in-memory type when for indirect arguments.
9361   if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy &&
9362       isAggregateTypeForABI(Ty)) {
9363     return ABIArgInfo::getIndirectAliased(
9364         getContext().getTypeAlignInChars(Ty),
9365         getContext().getTargetAddressSpace(LangAS::opencl_constant),
9366         false /*Realign*/, nullptr /*Padding*/);
9367   }
9368
9369   // If we set CanBeFlattened to true, CodeGen will expand the struct to its
9370   // individual elements, which confuses the Clover OpenCL backend; therefore we
9371   // have to set it to false here. Other args of getDirect() are just defaults.
9372   return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
9373 }
9374
9375 ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty,
9376                                                unsigned &NumRegsLeft) const {
9377   assert(NumRegsLeft <= MaxNumRegsForArgsRet && "register estimate underflow");
9378
9379   Ty = useFirstFieldIfTransparentUnion(Ty);
9380
9381   if (isAggregateTypeForABI(Ty)) {
9382     // Records with non-trivial destructors/copy-constructors should not be
9383     // passed by value.
9384     if (auto RAA = getRecordArgABI(Ty, getCXXABI()))
9385       return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
9386
9387     // Ignore empty structs/unions.
9388     if (isEmptyRecord(getContext(), Ty, true))
9389       return ABIArgInfo::getIgnore();
9390
9391     // Lower single-element structs to just pass a regular value. TODO: We
9392     // could do reasonable-size multiple-element structs too, using getExpand(),
9393     // though watch out for things like bitfields.
9394     if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
9395       return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
9396
9397     if (const RecordType *RT = Ty->getAs<RecordType>()) {
9398       const RecordDecl *RD = RT->getDecl();
9399       if (RD->hasFlexibleArrayMember())
9400         return DefaultABIInfo::classifyArgumentType(Ty);
9401     }
9402
9403     // Pack aggregates <= 8 bytes into single VGPR or pair.
9404     uint64_t Size = getContext().getTypeSize(Ty);
9405     if (Size <= 64) {
9406       unsigned NumRegs = (Size + 31) / 32;
9407       NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
9408
9409       if (Size <= 16)
9410         return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
9411
9412       if (Size <= 32)
9413         return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
9414
9415       // XXX: Should this be i64 instead, and should the limit increase?
9416       llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
9417       return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
9418     }
9419
9420     if (NumRegsLeft > 0) {
9421       unsigned NumRegs = numRegsForType(Ty);
9422       if (NumRegsLeft >= NumRegs) {
9423         NumRegsLeft -= NumRegs;
9424         return ABIArgInfo::getDirect();
9425       }
9426     }
9427   }
9428
9429   // Otherwise just do the default thing.
9430   ABIArgInfo ArgInfo = DefaultABIInfo::classifyArgumentType(Ty);
9431   if (!ArgInfo.isIndirect()) {
9432     unsigned NumRegs = numRegsForType(Ty);
9433     NumRegsLeft -= std::min(NumRegs, NumRegsLeft);
9434   }
9435
9436   return ArgInfo;
9437 }
9438
9439 class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
9440 public:
9441   AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT)
9442       : TargetCodeGenInfo(std::make_unique<AMDGPUABIInfo>(CGT)) {}
9443
9444   void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F,
9445                                  CodeGenModule &CGM) const;
9446
9447   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
9448                            CodeGen::CodeGenModule &M) const override;
9449   unsigned getOpenCLKernelCallingConv() const override;
9450
9451   llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
9452       llvm::PointerType *T, QualType QT) const override;
9453
9454   LangAS getASTAllocaAddressSpace() const override {
9455     return getLangASFromTargetAS(
9456         getABIInfo().getDataLayout().getAllocaAddrSpace());
9457   }
9458   LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
9459                                   const VarDecl *D) const override;
9460   llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts,
9461                                          SyncScope Scope,
9462                                          llvm::AtomicOrdering Ordering,
9463                                          llvm::LLVMContext &Ctx) const override;
9464   llvm::Value *createEnqueuedBlockKernel(CodeGenFunction &CGF,
9465                                          llvm::Function *BlockInvokeFunc,
9466                                          llvm::Type *BlockTy) const override;
9467   bool shouldEmitStaticExternCAliases() const override;
9468   bool shouldEmitDWARFBitFieldSeparators() const override;
9469   void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
9470 };
9471 }
9472
9473 static bool requiresAMDGPUProtectedVisibility(const Decl *D,
9474                                               llvm::GlobalValue *GV) {
9475   if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility)
9476     return false;
9477
9478   return D->hasAttr<OpenCLKernelAttr>() ||
9479          (isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) ||
9480          (isa<VarDecl>(D) &&
9481           (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
9482            cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinSurfaceType() ||
9483            cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinTextureType()));
9484 }
9485
9486 void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
9487     const FunctionDecl *FD, llvm::Function *F, CodeGenModule &M) const {
9488   const auto *ReqdWGS =
9489       M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
9490   const bool IsOpenCLKernel =
9491       M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>();
9492   const bool IsHIPKernel = M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>();
9493
9494   const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>();
9495   if (ReqdWGS || FlatWGS) {
9496     unsigned Min = 0;
9497     unsigned Max = 0;
9498     if (FlatWGS) {
9499       Min = FlatWGS->getMin()
9500                 ->EvaluateKnownConstInt(M.getContext())
9501                 .getExtValue();
9502       Max = FlatWGS->getMax()
9503                 ->EvaluateKnownConstInt(M.getContext())
9504                 .getExtValue();
9505     }
9506     if (ReqdWGS && Min == 0 && Max == 0)
9507       Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim();
9508
9509     if (Min != 0) {
9510       assert(Min <= Max && "Min must be less than or equal Max");
9511
9512       std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max);
9513       F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
9514     } else
9515       assert(Max == 0 && "Max must be zero");
9516   } else if (IsOpenCLKernel || IsHIPKernel) {
9517     // By default, restrict the maximum size to a value specified by
9518     // --gpu-max-threads-per-block=n or its default value for HIP.
9519     const unsigned OpenCLDefaultMaxWorkGroupSize = 256;
9520     const unsigned DefaultMaxWorkGroupSize =
9521         IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize
9522                        : M.getLangOpts().GPUMaxThreadsPerBlock;
9523     std::string AttrVal =
9524         std::string("1,") + llvm::utostr(DefaultMaxWorkGroupSize);
9525     F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
9526   }
9527
9528   if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) {
9529     unsigned Min =
9530         Attr->getMin()->EvaluateKnownConstInt(M.getContext()).getExtValue();
9531     unsigned Max = Attr->getMax() ? Attr->getMax()
9532                                         ->EvaluateKnownConstInt(M.getContext())
9533                                         .getExtValue()
9534                                   : 0;
9535
9536     if (Min != 0) {
9537       assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max");
9538
9539       std::string AttrVal = llvm::utostr(Min);
9540       if (Max != 0)
9541         AttrVal = AttrVal + "," + llvm::utostr(Max);
9542       F->addFnAttr("amdgpu-waves-per-eu", AttrVal);
9543     } else
9544       assert(Max == 0 && "Max must be zero");
9545   }
9546
9547   if (const auto *Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) {
9548     unsigned NumSGPR = Attr->getNumSGPR();
9549
9550     if (NumSGPR != 0)
9551       F->addFnAttr("amdgpu-num-sgpr", llvm::utostr(NumSGPR));
9552   }
9553
9554   if (const auto *Attr = FD->getAttr<AMDGPUNumVGPRAttr>()) {
9555     uint32_t NumVGPR = Attr->getNumVGPR();
9556
9557     if (NumVGPR != 0)
9558       F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
9559   }
9560 }
9561
9562 void AMDGPUTargetCodeGenInfo::setTargetAttributes(
9563     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
9564   if (requiresAMDGPUProtectedVisibility(D, GV)) {
9565     GV->setVisibility(llvm::GlobalValue::ProtectedVisibility);
9566     GV->setDSOLocal(true);
9567   }
9568
9569   if (GV->isDeclaration())
9570     return;
9571
9572   llvm::Function *F = dyn_cast<llvm::Function>(GV);
9573   if (!F)
9574     return;
9575
9576   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
9577   if (FD)
9578     setFunctionDeclAttributes(FD, F, M);
9579
9580   const bool IsHIPKernel =
9581       M.getLangOpts().HIP && FD && FD->hasAttr<CUDAGlobalAttr>();
9582
9583   // TODO: This should be moved to language specific attributes instead.
9584   if (IsHIPKernel)
9585     F->addFnAttr("uniform-work-group-size", "true");
9586
9587   if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics())
9588     F->addFnAttr("amdgpu-unsafe-fp-atomics", "true");
9589
9590   if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts)
9591     F->addFnAttr("amdgpu-ieee", "false");
9592 }
9593
9594 unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
9595   return llvm::CallingConv::AMDGPU_KERNEL;
9596 }
9597
9598 // Currently LLVM assumes null pointers always have value 0,
9599 // which results in incorrectly transformed IR. Therefore, instead of
9600 // emitting null pointers in private and local address spaces, a null
9601 // pointer in generic address space is emitted which is casted to a
9602 // pointer in local or private address space.
9603 llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer(
9604     const CodeGen::CodeGenModule &CGM, llvm::PointerType *PT,
9605     QualType QT) const {
9606   if (CGM.getContext().getTargetNullPointerValue(QT) == 0)
9607     return llvm::ConstantPointerNull::get(PT);
9608
9609   auto &Ctx = CGM.getContext();
9610   auto NPT = llvm::PointerType::get(
9611       PT->getContext(), Ctx.getTargetAddressSpace(LangAS::opencl_generic));
9612   return llvm::ConstantExpr::getAddrSpaceCast(
9613       llvm::ConstantPointerNull::get(NPT), PT);
9614 }
9615
9616 LangAS
9617 AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
9618                                                   const VarDecl *D) const {
9619   assert(!CGM.getLangOpts().OpenCL &&
9620          !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) &&
9621          "Address space agnostic languages only");
9622   LangAS DefaultGlobalAS = getLangASFromTargetAS(
9623       CGM.getContext().getTargetAddressSpace(LangAS::opencl_global));
9624   if (!D)
9625     return DefaultGlobalAS;
9626
9627   LangAS AddrSpace = D->getType().getAddressSpace();
9628   assert(AddrSpace == LangAS::Default || isTargetAddressSpace(AddrSpace));
9629   if (AddrSpace != LangAS::Default)
9630     return AddrSpace;
9631
9632   // Only promote to address space 4 if VarDecl has constant initialization.
9633   if (CGM.isTypeConstant(D->getType(), false, false) &&
9634       D->hasConstantInitialization()) {
9635     if (auto ConstAS = CGM.getTarget().getConstantAddressSpace())
9636       return *ConstAS;
9637   }
9638   return DefaultGlobalAS;
9639 }
9640
9641 llvm::SyncScope::ID
9642 AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,
9643                                             SyncScope Scope,
9644                                             llvm::AtomicOrdering Ordering,
9645                                             llvm::LLVMContext &Ctx) const {
9646   std::string Name;
9647   switch (Scope) {
9648   case SyncScope::HIPSingleThread:
9649     Name = "singlethread";
9650     break;
9651   case SyncScope::HIPWavefront:
9652   case SyncScope::OpenCLSubGroup:
9653     Name = "wavefront";
9654     break;
9655   case SyncScope::HIPWorkgroup:
9656   case SyncScope::OpenCLWorkGroup:
9657     Name = "workgroup";
9658     break;
9659   case SyncScope::HIPAgent:
9660   case SyncScope::OpenCLDevice:
9661     Name = "agent";
9662     break;
9663   case SyncScope::HIPSystem:
9664   case SyncScope::OpenCLAllSVMDevices:
9665     Name = "";
9666     break;
9667   }
9668
9669   if (Ordering != llvm::AtomicOrdering::SequentiallyConsistent) {
9670     if (!Name.empty())
9671       Name = Twine(Twine(Name) + Twine("-")).str();
9672
9673     Name = Twine(Twine(Name) + Twine("one-as")).str();
9674   }
9675
9676   return Ctx.getOrInsertSyncScopeID(Name);
9677 }
9678
9679 bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
9680   return false;
9681 }
9682
9683 bool AMDGPUTargetCodeGenInfo::shouldEmitDWARFBitFieldSeparators() const {
9684   return true;
9685 }
9686
9687 void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention(
9688     const FunctionType *&FT) const {
9689   FT = getABIInfo().getContext().adjustFunctionType(
9690       FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
9691 }
9692
9693 //===----------------------------------------------------------------------===//
9694 // SPARC v8 ABI Implementation.
9695 // Based on the SPARC Compliance Definition version 2.4.1.
9696 //
9697 // Ensures that complex values are passed in registers.
9698 //
9699 namespace {
9700 class SparcV8ABIInfo : public DefaultABIInfo {
9701 public:
9702   SparcV8ABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
9703
9704 private:
9705   ABIArgInfo classifyReturnType(QualType RetTy) const;
9706   void computeInfo(CGFunctionInfo &FI) const override;
9707 };
9708 } // end anonymous namespace
9709
9710
9711 ABIArgInfo
9712 SparcV8ABIInfo::classifyReturnType(QualType Ty) const {
9713   if (Ty->isAnyComplexType()) {
9714     return ABIArgInfo::getDirect();
9715   }
9716   else {
9717     return DefaultABIInfo::classifyReturnType(Ty);
9718   }
9719 }
9720
9721 void SparcV8ABIInfo::computeInfo(CGFunctionInfo &FI) const {
9722
9723   FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
9724   for (auto &Arg : FI.arguments())
9725     Arg.info = classifyArgumentType(Arg.type);
9726 }
9727
9728 namespace {
9729 class SparcV8TargetCodeGenInfo : public TargetCodeGenInfo {
9730 public:
9731   SparcV8TargetCodeGenInfo(CodeGenTypes &CGT)
9732       : TargetCodeGenInfo(std::make_unique<SparcV8ABIInfo>(CGT)) {}
9733
9734   llvm::Value *decodeReturnAddress(CodeGen::CodeGenFunction &CGF,
9735                                    llvm::Value *Address) const override {
9736     int Offset;
9737     if (isAggregateTypeForABI(CGF.CurFnInfo->getReturnType()))
9738       Offset = 12;
9739     else
9740       Offset = 8;
9741     return CGF.Builder.CreateGEP(CGF.Int8Ty, Address,
9742                                  llvm::ConstantInt::get(CGF.Int32Ty, Offset));
9743   }
9744
9745   llvm::Value *encodeReturnAddress(CodeGen::CodeGenFunction &CGF,
9746                                    llvm::Value *Address) const override {
9747     int Offset;
9748     if (isAggregateTypeForABI(CGF.CurFnInfo->getReturnType()))
9749       Offset = -12;
9750     else
9751       Offset = -8;
9752     return CGF.Builder.CreateGEP(CGF.Int8Ty, Address,
9753                                  llvm::ConstantInt::get(CGF.Int32Ty, Offset));
9754   }
9755 };
9756 } // end anonymous namespace
9757
9758 //===----------------------------------------------------------------------===//
9759 // SPARC v9 ABI Implementation.
9760 // Based on the SPARC Compliance Definition version 2.4.1.
9761 //
9762 // Function arguments a mapped to a nominal "parameter array" and promoted to
9763 // registers depending on their type. Each argument occupies 8 or 16 bytes in
9764 // the array, structs larger than 16 bytes are passed indirectly.
9765 //
9766 // One case requires special care:
9767 //
9768 //   struct mixed {
9769 //     int i;
9770 //     float f;
9771 //   };
9772 //
9773 // When a struct mixed is passed by value, it only occupies 8 bytes in the
9774 // parameter array, but the int is passed in an integer register, and the float
9775 // is passed in a floating point register. This is represented as two arguments
9776 // with the LLVM IR inreg attribute:
9777 //
9778 //   declare void f(i32 inreg %i, float inreg %f)
9779 //
9780 // The code generator will only allocate 4 bytes from the parameter array for
9781 // the inreg arguments. All other arguments are allocated a multiple of 8
9782 // bytes.
9783 //
9784 namespace {
9785 class SparcV9ABIInfo : public ABIInfo {
9786 public:
9787   SparcV9ABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {}
9788
9789 private:
9790   ABIArgInfo classifyType(QualType RetTy, unsigned SizeLimit) const;
9791   void computeInfo(CGFunctionInfo &FI) const override;
9792   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
9793                     QualType Ty) const override;
9794
9795   // Coercion type builder for structs passed in registers. The coercion type
9796   // serves two purposes:
9797   //
9798   // 1. Pad structs to a multiple of 64 bits, so they are passed 'left-aligned'
9799   //    in registers.
9800   // 2. Expose aligned floating point elements as first-level elements, so the
9801   //    code generator knows to pass them in floating point registers.
9802   //
9803   // We also compute the InReg flag which indicates that the struct contains
9804   // aligned 32-bit floats.
9805   //
9806   struct CoerceBuilder {
9807     llvm::LLVMContext &Context;
9808     const llvm::DataLayout &DL;
9809     SmallVector<llvm::Type*, 8> Elems;
9810     uint64_t Size;
9811     bool InReg;
9812
9813     CoerceBuilder(llvm::LLVMContext &c, const llvm::DataLayout &dl)
9814       : Context(c), DL(dl), Size(0), InReg(false) {}
9815
9816     // Pad Elems with integers until Size is ToSize.
9817     void pad(uint64_t ToSize) {
9818       assert(ToSize >= Size && "Cannot remove elements");
9819       if (ToSize == Size)
9820         return;
9821
9822       // Finish the current 64-bit word.
9823       uint64_t Aligned = llvm::alignTo(Size, 64);
9824       if (Aligned > Size && Aligned <= ToSize) {
9825         Elems.push_back(llvm::IntegerType::get(Context, Aligned - Size));
9826         Size = Aligned;
9827       }
9828
9829       // Add whole 64-bit words.
9830       while (Size + 64 <= ToSize) {
9831         Elems.push_back(llvm::Type::getInt64Ty(Context));
9832         Size += 64;
9833       }
9834
9835       // Final in-word padding.
9836       if (Size < ToSize) {
9837         Elems.push_back(llvm::IntegerType::get(Context, ToSize - Size));
9838         Size = ToSize;
9839       }
9840     }
9841
9842     // Add a floating point element at Offset.
9843     void addFloat(uint64_t Offset, llvm::Type *Ty, unsigned Bits) {
9844       // Unaligned floats are treated as integers.
9845       if (Offset % Bits)
9846         return;
9847       // The InReg flag is only required if there are any floats < 64 bits.
9848       if (Bits < 64)
9849         InReg = true;
9850       pad(Offset);
9851       Elems.push_back(Ty);
9852       Size = Offset + Bits;
9853     }
9854
9855     // Add a struct type to the coercion type, starting at Offset (in bits).
9856     void addStruct(uint64_t Offset, llvm::StructType *StrTy) {
9857       const llvm::StructLayout *Layout = DL.getStructLayout(StrTy);
9858       for (unsigned i = 0, e = StrTy->getNumElements(); i != e; ++i) {
9859         llvm::Type *ElemTy = StrTy->getElementType(i);
9860         uint64_t ElemOffset = Offset + Layout->getElementOffsetInBits(i);
9861         switch (ElemTy->getTypeID()) {
9862         case llvm::Type::StructTyID:
9863           addStruct(ElemOffset, cast<llvm::StructType>(ElemTy));
9864           break;
9865         case llvm::Type::FloatTyID:
9866           addFloat(ElemOffset, ElemTy, 32);
9867           break;
9868         case llvm::Type::DoubleTyID:
9869           addFloat(ElemOffset, ElemTy, 64);
9870           break;
9871         case llvm::Type::FP128TyID:
9872           addFloat(ElemOffset, ElemTy, 128);
9873           break;
9874         case llvm::Type::PointerTyID:
9875           if (ElemOffset % 64 == 0) {
9876             pad(ElemOffset);
9877             Elems.push_back(ElemTy);
9878             Size += 64;
9879           }
9880           break;
9881         default:
9882           break;
9883         }
9884       }
9885     }
9886
9887     // Check if Ty is a usable substitute for the coercion type.
9888     bool isUsableType(llvm::StructType *Ty) const {
9889       return llvm::ArrayRef(Elems) == Ty->elements();
9890     }
9891
9892     // Get the coercion type as a literal struct type.
9893     llvm::Type *getType() const {
9894       if (Elems.size() == 1)
9895         return Elems.front();
9896       else
9897         return llvm::StructType::get(Context, Elems);
9898     }
9899   };
9900 };
9901 } // end anonymous namespace
9902
9903 ABIArgInfo
9904 SparcV9ABIInfo::classifyType(QualType Ty, unsigned SizeLimit) const {
9905   if (Ty->isVoidType())
9906     return ABIArgInfo::getIgnore();
9907
9908   uint64_t Size = getContext().getTypeSize(Ty);
9909
9910   // Anything too big to fit in registers is passed with an explicit indirect
9911   // pointer / sret pointer.
9912   if (Size > SizeLimit)
9913     return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
9914
9915   // Treat an enum type as its underlying type.
9916   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
9917     Ty = EnumTy->getDecl()->getIntegerType();
9918
9919   // Integer types smaller than a register are extended.
9920   if (Size < 64 && Ty->isIntegerType())
9921     return ABIArgInfo::getExtend(Ty);
9922
9923   if (const auto *EIT = Ty->getAs<BitIntType>())
9924     if (EIT->getNumBits() < 64)
9925       return ABIArgInfo::getExtend(Ty);
9926
9927   // Other non-aggregates go in registers.
9928   if (!isAggregateTypeForABI(Ty))
9929     return ABIArgInfo::getDirect();
9930
9931   // If a C++ object has either a non-trivial copy constructor or a non-trivial
9932   // destructor, it is passed with an explicit indirect pointer / sret pointer.
9933   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
9934     return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
9935
9936   // This is a small aggregate type that should be passed in registers.
9937   // Build a coercion type from the LLVM struct type.
9938   llvm::StructType *StrTy = dyn_cast<llvm::StructType>(CGT.ConvertType(Ty));
9939   if (!StrTy)
9940     return ABIArgInfo::getDirect();
9941
9942   CoerceBuilder CB(getVMContext(), getDataLayout());
9943   CB.addStruct(0, StrTy);
9944   CB.pad(llvm::alignTo(CB.DL.getTypeSizeInBits(StrTy), 64));
9945
9946   // Try to use the original type for coercion.
9947   llvm::Type *CoerceTy = CB.isUsableType(StrTy) ? StrTy : CB.getType();
9948
9949   if (CB.InReg)
9950     return ABIArgInfo::getDirectInReg(CoerceTy);
9951   else
9952     return ABIArgInfo::getDirect(CoerceTy);
9953 }
9954
9955 Address SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
9956                                   QualType Ty) const {
9957   ABIArgInfo AI = classifyType(Ty, 16 * 8);
9958   llvm::Type *ArgTy = CGT.ConvertType(Ty);
9959   if (AI.canHaveCoerceToType() && !AI.getCoerceToType())
9960     AI.setCoerceToType(ArgTy);
9961
9962   CharUnits SlotSize = CharUnits::fromQuantity(8);
9963
9964   CGBuilderTy &Builder = CGF.Builder;
9965   Address Addr = Address(Builder.CreateLoad(VAListAddr, "ap.cur"),
9966                          getVAListElementType(CGF), SlotSize);
9967   llvm::Type *ArgPtrTy = llvm::PointerType::getUnqual(ArgTy);
9968
9969   auto TypeInfo = getContext().getTypeInfoInChars(Ty);
9970
9971   Address ArgAddr = Address::invalid();
9972   CharUnits Stride;
9973   switch (AI.getKind()) {
9974   case ABIArgInfo::Expand:
9975   case ABIArgInfo::CoerceAndExpand:
9976   case ABIArgInfo::InAlloca:
9977     llvm_unreachable("Unsupported ABI kind for va_arg");
9978
9979   case ABIArgInfo::Extend: {
9980     Stride = SlotSize;
9981     CharUnits Offset = SlotSize - TypeInfo.Width;
9982     ArgAddr = Builder.CreateConstInBoundsByteGEP(Addr, Offset, "extend");
9983     break;
9984   }
9985
9986   case ABIArgInfo::Direct: {
9987     auto AllocSize = getDataLayout().getTypeAllocSize(AI.getCoerceToType());
9988     Stride = CharUnits::fromQuantity(AllocSize).alignTo(SlotSize);
9989     ArgAddr = Addr;
9990     break;
9991   }
9992
9993   case ABIArgInfo::Indirect:
9994   case ABIArgInfo::IndirectAliased:
9995     Stride = SlotSize;
9996     ArgAddr = Builder.CreateElementBitCast(Addr, ArgPtrTy, "indirect");
9997     ArgAddr = Address(Builder.CreateLoad(ArgAddr, "indirect.arg"), ArgTy,
9998                       TypeInfo.Align);
9999     break;
10000
10001   case ABIArgInfo::Ignore:
10002     return Address(llvm::UndefValue::get(ArgPtrTy), ArgTy, TypeInfo.Align);
10003   }
10004
10005   // Update VAList.
10006   Address NextPtr = Builder.CreateConstInBoundsByteGEP(Addr, Stride, "ap.next");
10007   Builder.CreateStore(NextPtr.getPointer(), VAListAddr);
10008
10009   return Builder.CreateElementBitCast(ArgAddr, ArgTy, "arg.addr");
10010 }
10011
10012 void SparcV9ABIInfo::computeInfo(CGFunctionInfo &FI) const {
10013   FI.getReturnInfo() = classifyType(FI.getReturnType(), 32 * 8);
10014   for (auto &I : FI.arguments())
10015     I.info = classifyType(I.type, 16 * 8);
10016 }
10017
10018 namespace {
10019 class SparcV9TargetCodeGenInfo : public TargetCodeGenInfo {
10020 public:
10021   SparcV9TargetCodeGenInfo(CodeGenTypes &CGT)
10022       : TargetCodeGenInfo(std::make_unique<SparcV9ABIInfo>(CGT)) {}
10023
10024   int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
10025     return 14;
10026   }
10027
10028   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
10029                                llvm::Value *Address) const override;
10030
10031   llvm::Value *decodeReturnAddress(CodeGen::CodeGenFunction &CGF,
10032                                    llvm::Value *Address) const override {
10033     return CGF.Builder.CreateGEP(CGF.Int8Ty, Address,
10034                                  llvm::ConstantInt::get(CGF.Int32Ty, 8));
10035   }
10036
10037   llvm::Value *encodeReturnAddress(CodeGen::CodeGenFunction &CGF,
10038                                    llvm::Value *Address) const override {
10039     return CGF.Builder.CreateGEP(CGF.Int8Ty, Address,
10040                                  llvm::ConstantInt::get(CGF.Int32Ty, -8));
10041   }
10042 };
10043 } // end anonymous namespace
10044
10045 bool
10046 SparcV9TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
10047                                                 llvm::Value *Address) const {
10048   // This is calculated from the LLVM and GCC tables and verified
10049   // against gcc output.  AFAIK all ABIs use the same encoding.
10050
10051   CodeGen::CGBuilderTy &Builder = CGF.Builder;
10052
10053   llvm::IntegerType *i8 = CGF.Int8Ty;
10054   llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4);
10055   llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8);
10056
10057   // 0-31: the 8-byte general-purpose registers
10058   AssignToArrayRange(Builder, Address, Eight8, 0, 31);
10059
10060   // 32-63: f0-31, the 4-byte floating-point registers
10061   AssignToArrayRange(Builder, Address, Four8, 32, 63);
10062
10063   //   Y   = 64
10064   //   PSR = 65
10065   //   WIM = 66
10066   //   TBR = 67
10067   //   PC  = 68
10068   //   NPC = 69
10069   //   FSR = 70
10070   //   CSR = 71
10071   AssignToArrayRange(Builder, Address, Eight8, 64, 71);
10072
10073   // 72-87: d0-15, the 8-byte floating-point registers
10074   AssignToArrayRange(Builder, Address, Eight8, 72, 87);
10075
10076   return false;
10077 }
10078
10079 // ARC ABI implementation.
10080 namespace {
10081
10082 class ARCABIInfo : public DefaultABIInfo {
10083   struct CCState {
10084     unsigned FreeRegs;
10085   };
10086
10087 public:
10088   using DefaultABIInfo::DefaultABIInfo;
10089
10090 private:
10091   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
10092                     QualType Ty) const override;
10093
10094   void updateState(const ABIArgInfo &Info, QualType Ty, CCState &State) const {
10095     if (!State.FreeRegs)
10096       return;
10097     if (Info.isIndirect() && Info.getInReg())
10098       State.FreeRegs--;
10099     else if (Info.isDirect() && Info.getInReg()) {
10100       unsigned sz = (getContext().getTypeSize(Ty) + 31) / 32;
10101       if (sz < State.FreeRegs)
10102         State.FreeRegs -= sz;
10103       else
10104         State.FreeRegs = 0;
10105     }
10106   }
10107
10108   void computeInfo(CGFunctionInfo &FI) const override {
10109     CCState State;
10110     // ARC uses 8 registers to pass arguments.
10111     State.FreeRegs = 8;
10112
10113     if (!getCXXABI().classifyReturnType(FI))
10114       FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
10115     updateState(FI.getReturnInfo(), FI.getReturnType(), State);
10116     for (auto &I : FI.arguments()) {
10117       I.info = classifyArgumentType(I.type, State.FreeRegs);
10118       updateState(I.info, I.type, State);
10119     }
10120   }
10121
10122   ABIArgInfo getIndirectByRef(QualType Ty, bool HasFreeRegs) const;
10123   ABIArgInfo getIndirectByValue(QualType Ty) const;
10124   ABIArgInfo classifyArgumentType(QualType Ty, uint8_t FreeRegs) const;
10125   ABIArgInfo classifyReturnType(QualType RetTy) const;
10126 };
10127
10128 class ARCTargetCodeGenInfo : public TargetCodeGenInfo {
10129 public:
10130   ARCTargetCodeGenInfo(CodeGenTypes &CGT)
10131       : TargetCodeGenInfo(std::make_unique<ARCABIInfo>(CGT)) {}
10132 };
10133
10134
10135 ABIArgInfo ARCABIInfo::getIndirectByRef(QualType Ty, bool HasFreeRegs) const {
10136   return HasFreeRegs ? getNaturalAlignIndirectInReg(Ty) :
10137                        getNaturalAlignIndirect(Ty, false);
10138 }
10139
10140 ABIArgInfo ARCABIInfo::getIndirectByValue(QualType Ty) const {
10141   // Compute the byval alignment.
10142   const unsigned MinABIStackAlignInBytes = 4;
10143   unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8;
10144   return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true,
10145                                  TypeAlign > MinABIStackAlignInBytes);
10146 }
10147
10148 Address ARCABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
10149                               QualType Ty) const {
10150   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false,
10151                           getContext().getTypeInfoInChars(Ty),
10152                           CharUnits::fromQuantity(4), true);
10153 }
10154
10155 ABIArgInfo ARCABIInfo::classifyArgumentType(QualType Ty,
10156                                             uint8_t FreeRegs) const {
10157   // Handle the generic C++ ABI.
10158   const RecordType *RT = Ty->getAs<RecordType>();
10159   if (RT) {
10160     CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
10161     if (RAA == CGCXXABI::RAA_Indirect)
10162       return getIndirectByRef(Ty, FreeRegs > 0);
10163
10164     if (RAA == CGCXXABI::RAA_DirectInMemory)
10165       return getIndirectByValue(Ty);
10166   }
10167
10168   // Treat an enum type as its underlying type.
10169   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
10170     Ty = EnumTy->getDecl()->getIntegerType();
10171
10172   auto SizeInRegs = llvm::alignTo(getContext().getTypeSize(Ty), 32) / 32;
10173
10174   if (isAggregateTypeForABI(Ty)) {
10175     // Structures with flexible arrays are always indirect.
10176     if (RT && RT->getDecl()->hasFlexibleArrayMember())
10177       return getIndirectByValue(Ty);
10178
10179     // Ignore empty structs/unions.
10180     if (isEmptyRecord(getContext(), Ty, true))
10181       return ABIArgInfo::getIgnore();
10182
10183     llvm::LLVMContext &LLVMContext = getVMContext();
10184
10185     llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext);
10186     SmallVector<llvm::Type *, 3> Elements(SizeInRegs, Int32);
10187     llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);
10188
10189     return FreeRegs >= SizeInRegs ?
10190         ABIArgInfo::getDirectInReg(Result) :
10191         ABIArgInfo::getDirect(Result, 0, nullptr, false);
10192   }
10193
10194   if (const auto *EIT = Ty->getAs<BitIntType>())
10195     if (EIT->getNumBits() > 64)
10196       return getIndirectByValue(Ty);
10197
10198   return isPromotableIntegerTypeForABI(Ty)
10199              ? (FreeRegs >= SizeInRegs ? ABIArgInfo::getExtendInReg(Ty)
10200                                        : ABIArgInfo::getExtend(Ty))
10201              : (FreeRegs >= SizeInRegs ? ABIArgInfo::getDirectInReg()
10202                                        : ABIArgInfo::getDirect());
10203 }
10204
10205 ABIArgInfo ARCABIInfo::classifyReturnType(QualType RetTy) const {
10206   if (RetTy->isAnyComplexType())
10207     return ABIArgInfo::getDirectInReg();
10208
10209   // Arguments of size > 4 registers are indirect.
10210   auto RetSize = llvm::alignTo(getContext().getTypeSize(RetTy), 32) / 32;
10211   if (RetSize > 4)
10212     return getIndirectByRef(RetTy, /*HasFreeRegs*/ true);
10213
10214   return DefaultABIInfo::classifyReturnType(RetTy);
10215 }
10216
10217 } // End anonymous namespace.
10218
10219 //===----------------------------------------------------------------------===//
10220 // XCore ABI Implementation
10221 //===----------------------------------------------------------------------===//
10222
10223 namespace {
10224
10225 /// A SmallStringEnc instance is used to build up the TypeString by passing
10226 /// it by reference between functions that append to it.
10227 typedef llvm::SmallString<128> SmallStringEnc;
10228
10229 /// TypeStringCache caches the meta encodings of Types.
10230 ///
10231 /// The reason for caching TypeStrings is two fold:
10232 ///   1. To cache a type's encoding for later uses;
10233 ///   2. As a means to break recursive member type inclusion.
10234 ///
10235 /// A cache Entry can have a Status of:
10236 ///   NonRecursive:   The type encoding is not recursive;
10237 ///   Recursive:      The type encoding is recursive;
10238 ///   Incomplete:     An incomplete TypeString;
10239 ///   IncompleteUsed: An incomplete TypeString that has been used in a
10240 ///                   Recursive type encoding.
10241 ///
10242 /// A NonRecursive entry will have all of its sub-members expanded as fully
10243 /// as possible. Whilst it may contain types which are recursive, the type
10244 /// itself is not recursive and thus its encoding may be safely used whenever
10245 /// the type is encountered.
10246 ///
10247 /// A Recursive entry will have all of its sub-members expanded as fully as
10248 /// possible. The type itself is recursive and it may contain other types which
10249 /// are recursive. The Recursive encoding must not be used during the expansion
10250 /// of a recursive type's recursive branch. For simplicity the code uses
10251 /// IncompleteCount to reject all usage of Recursive encodings for member types.
10252 ///
10253 /// An Incomplete entry is always a RecordType and only encodes its
10254 /// identifier e.g. "s(S){}". Incomplete 'StubEnc' entries are ephemeral and
10255 /// are placed into the cache during type expansion as a means to identify and
10256 /// handle recursive inclusion of types as sub-members. If there is recursion
10257 /// the entry becomes IncompleteUsed.
10258 ///
10259 /// During the expansion of a RecordType's members:
10260 ///
10261 ///   If the cache contains a NonRecursive encoding for the member type, the
10262 ///   cached encoding is used;
10263 ///
10264 ///   If the cache contains a Recursive encoding for the member type, the
10265 ///   cached encoding is 'Swapped' out, as it may be incorrect, and...
10266 ///
10267 ///   If the member is a RecordType, an Incomplete encoding is placed into the
10268 ///   cache to break potential recursive inclusion of itself as a sub-member;
10269 ///
10270 ///   Once a member RecordType has been expanded, its temporary incomplete
10271 ///   entry is removed from the cache. If a Recursive encoding was swapped out
10272 ///   it is swapped back in;
10273 ///
10274 ///   If an incomplete entry is used to expand a sub-member, the incomplete
10275 ///   entry is marked as IncompleteUsed. The cache keeps count of how many
10276 ///   IncompleteUsed entries it currently contains in IncompleteUsedCount;
10277 ///
10278 ///   If a member's encoding is found to be a NonRecursive or Recursive viz:
10279 ///   IncompleteUsedCount==0, the member's encoding is added to the cache.
10280 ///   Else the member is part of a recursive type and thus the recursion has
10281 ///   been exited too soon for the encoding to be correct for the member.
10282 ///
10283 class TypeStringCache {
10284   enum Status {NonRecursive, Recursive, Incomplete, IncompleteUsed};
10285   struct Entry {
10286     std::string Str;     // The encoded TypeString for the type.
10287     enum Status State;   // Information about the encoding in 'Str'.
10288     std::string Swapped; // A temporary place holder for a Recursive encoding
10289                          // during the expansion of RecordType's members.
10290   };
10291   std::map<const IdentifierInfo *, struct Entry> Map;
10292   unsigned IncompleteCount;     // Number of Incomplete entries in the Map.
10293   unsigned IncompleteUsedCount; // Number of IncompleteUsed entries in the Map.
10294 public:
10295   TypeStringCache() : IncompleteCount(0), IncompleteUsedCount(0) {}
10296   void addIncomplete(const IdentifierInfo *ID, std::string StubEnc);
10297   bool removeIncomplete(const IdentifierInfo *ID);
10298   void addIfComplete(const IdentifierInfo *ID, StringRef Str,
10299                      bool IsRecursive);
10300   StringRef lookupStr(const IdentifierInfo *ID);
10301 };
10302
10303 /// TypeString encodings for enum & union fields must be order.
10304 /// FieldEncoding is a helper for this ordering process.
10305 class FieldEncoding {
10306   bool HasName;
10307   std::string Enc;
10308 public:
10309   FieldEncoding(bool b, SmallStringEnc &e) : HasName(b), Enc(e.c_str()) {}
10310   StringRef str() { return Enc; }
10311   bool operator<(const FieldEncoding &rhs) const {
10312     if (HasName != rhs.HasName) return HasName;
10313     return Enc < rhs.Enc;
10314   }
10315 };
10316
10317 class XCoreABIInfo : public DefaultABIInfo {
10318 public:
10319   XCoreABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
10320   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
10321                     QualType Ty) const override;
10322 };
10323
10324 class XCoreTargetCodeGenInfo : public TargetCodeGenInfo {
10325   mutable TypeStringCache TSC;
10326   void emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
10327                     const CodeGen::CodeGenModule &M) const;
10328
10329 public:
10330   XCoreTargetCodeGenInfo(CodeGenTypes &CGT)
10331       : TargetCodeGenInfo(std::make_unique<XCoreABIInfo>(CGT)) {}
10332   void emitTargetMetadata(CodeGen::CodeGenModule &CGM,
10333                           const llvm::MapVector<GlobalDecl, StringRef>
10334                               &MangledDeclNames) const override;
10335 };
10336
10337 } // End anonymous namespace.
10338
10339 // TODO: this implementation is likely now redundant with the default
10340 // EmitVAArg.
10341 Address XCoreABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
10342                                 QualType Ty) const {
10343   CGBuilderTy &Builder = CGF.Builder;
10344
10345   // Get the VAList.
10346   CharUnits SlotSize = CharUnits::fromQuantity(4);
10347   Address AP = Address(Builder.CreateLoad(VAListAddr),
10348                        getVAListElementType(CGF), SlotSize);
10349
10350   // Handle the argument.
10351   ABIArgInfo AI = classifyArgumentType(Ty);
10352   CharUnits TypeAlign = getContext().getTypeAlignInChars(Ty);
10353   llvm::Type *ArgTy = CGT.ConvertType(Ty);
10354   if (AI.canHaveCoerceToType() && !AI.getCoerceToType())
10355     AI.setCoerceToType(ArgTy);
10356   llvm::Type *ArgPtrTy = llvm::PointerType::getUnqual(ArgTy);
10357
10358   Address Val = Address::invalid();
10359   CharUnits ArgSize = CharUnits::Zero();
10360   switch (AI.getKind()) {
10361   case ABIArgInfo::Expand:
10362   case ABIArgInfo::CoerceAndExpand:
10363   case ABIArgInfo::InAlloca:
10364     llvm_unreachable("Unsupported ABI kind for va_arg");
10365   case ABIArgInfo::Ignore:
10366     Val = Address(llvm::UndefValue::get(ArgPtrTy), ArgTy, TypeAlign);
10367     ArgSize = CharUnits::Zero();
10368     break;
10369   case ABIArgInfo::Extend:
10370   case ABIArgInfo::Direct:
10371     Val = Builder.CreateElementBitCast(AP, ArgTy);
10372     ArgSize = CharUnits::fromQuantity(
10373         getDataLayout().getTypeAllocSize(AI.getCoerceToType()));
10374     ArgSize = ArgSize.alignTo(SlotSize);
10375     break;
10376   case ABIArgInfo::Indirect:
10377   case ABIArgInfo::IndirectAliased:
10378     Val = Builder.CreateElementBitCast(AP, ArgPtrTy);
10379     Val = Address(Builder.CreateLoad(Val), ArgTy, TypeAlign);
10380     ArgSize = SlotSize;
10381     break;
10382   }
10383
10384   // Increment the VAList.
10385   if (!ArgSize.isZero()) {
10386     Address APN = Builder.CreateConstInBoundsByteGEP(AP, ArgSize);
10387     Builder.CreateStore(APN.getPointer(), VAListAddr);
10388   }
10389
10390   return Val;
10391 }
10392
10393 /// During the expansion of a RecordType, an incomplete TypeString is placed
10394 /// into the cache as a means to identify and break recursion.
10395 /// If there is a Recursive encoding in the cache, it is swapped out and will
10396 /// be reinserted by removeIncomplete().
10397 /// All other types of encoding should have been used rather than arriving here.
10398 void TypeStringCache::addIncomplete(const IdentifierInfo *ID,
10399                                     std::string StubEnc) {
10400   if (!ID)
10401     return;
10402   Entry &E = Map[ID];
10403   assert( (E.Str.empty() || E.State == Recursive) &&
10404          "Incorrectly use of addIncomplete");
10405   assert(!StubEnc.empty() && "Passing an empty string to addIncomplete()");
10406   E.Swapped.swap(E.Str); // swap out the Recursive
10407   E.Str.swap(StubEnc);
10408   E.State = Incomplete;
10409   ++IncompleteCount;
10410 }
10411
10412 /// Once the RecordType has been expanded, the temporary incomplete TypeString
10413 /// must be removed from the cache.
10414 /// If a Recursive was swapped out by addIncomplete(), it will be replaced.
10415 /// Returns true if the RecordType was defined recursively.
10416 bool TypeStringCache::removeIncomplete(const IdentifierInfo *ID) {
10417   if (!ID)
10418     return false;
10419   auto I = Map.find(ID);
10420   assert(I != Map.end() && "Entry not present");
10421   Entry &E = I->second;
10422   assert( (E.State == Incomplete ||
10423            E.State == IncompleteUsed) &&
10424          "Entry must be an incomplete type");
10425   bool IsRecursive = false;
10426   if (E.State == IncompleteUsed) {
10427     // We made use of our Incomplete encoding, thus we are recursive.
10428     IsRecursive = true;
10429     --IncompleteUsedCount;
10430   }
10431   if (E.Swapped.empty())
10432     Map.erase(I);
10433   else {
10434     // Swap the Recursive back.
10435     E.Swapped.swap(E.Str);
10436     E.Swapped.clear();
10437     E.State = Recursive;
10438   }
10439   --IncompleteCount;
10440   return IsRecursive;
10441 }
10442
10443 /// Add the encoded TypeString to the cache only if it is NonRecursive or
10444 /// Recursive (viz: all sub-members were expanded as fully as possible).
10445 void TypeStringCache::addIfComplete(const IdentifierInfo *ID, StringRef Str,
10446                                     bool IsRecursive) {
10447   if (!ID || IncompleteUsedCount)
10448     return; // No key or it is an incomplete sub-type so don't add.
10449   Entry &E = Map[ID];
10450   if (IsRecursive && !E.Str.empty()) {
10451     assert(E.State==Recursive && E.Str.size() == Str.size() &&
10452            "This is not the same Recursive entry");
10453     // The parent container was not recursive after all, so we could have used
10454     // this Recursive sub-member entry after all, but we assumed the worse when
10455     // we started viz: IncompleteCount!=0.
10456     return;
10457   }
10458   assert(E.Str.empty() && "Entry already present");
10459   E.Str = Str.str();
10460   E.State = IsRecursive? Recursive : NonRecursive;
10461 }
10462
10463 /// Return a cached TypeString encoding for the ID. If there isn't one, or we
10464 /// are recursively expanding a type (IncompleteCount != 0) and the cached
10465 /// encoding is Recursive, return an empty StringRef.
10466 StringRef TypeStringCache::lookupStr(const IdentifierInfo *ID) {
10467   if (!ID)
10468     return StringRef();   // We have no key.
10469   auto I = Map.find(ID);
10470   if (I == Map.end())
10471     return StringRef();   // We have no encoding.
10472   Entry &E = I->second;
10473   if (E.State == Recursive && IncompleteCount)
10474     return StringRef();   // We don't use Recursive encodings for member types.
10475
10476   if (E.State == Incomplete) {
10477     // The incomplete type is being used to break out of recursion.
10478     E.State = IncompleteUsed;
10479     ++IncompleteUsedCount;
10480   }
10481   return E.Str;
10482 }
10483
10484 /// The XCore ABI includes a type information section that communicates symbol
10485 /// type information to the linker. The linker uses this information to verify
10486 /// safety/correctness of things such as array bound and pointers et al.
10487 /// The ABI only requires C (and XC) language modules to emit TypeStrings.
10488 /// This type information (TypeString) is emitted into meta data for all global
10489 /// symbols: definitions, declarations, functions & variables.
10490 ///
10491 /// The TypeString carries type, qualifier, name, size & value details.
10492 /// Please see 'Tools Development Guide' section 2.16.2 for format details:
10493 /// https://www.xmos.com/download/public/Tools-Development-Guide%28X9114A%29.pdf
10494 /// The output is tested by test/CodeGen/xcore-stringtype.c.
10495 ///
10496 static bool getTypeString(SmallStringEnc &Enc, const Decl *D,
10497                           const CodeGen::CodeGenModule &CGM,
10498                           TypeStringCache &TSC);
10499
10500 /// XCore uses emitTargetMD to emit TypeString metadata for global symbols.
10501 void XCoreTargetCodeGenInfo::emitTargetMD(
10502     const Decl *D, llvm::GlobalValue *GV,
10503     const CodeGen::CodeGenModule &CGM) const {
10504   SmallStringEnc Enc;
10505   if (getTypeString(Enc, D, CGM, TSC)) {
10506     llvm::LLVMContext &Ctx = CGM.getModule().getContext();
10507     llvm::Metadata *MDVals[] = {llvm::ConstantAsMetadata::get(GV),
10508                                 llvm::MDString::get(Ctx, Enc.str())};
10509     llvm::NamedMDNode *MD =
10510       CGM.getModule().getOrInsertNamedMetadata("xcore.typestrings");
10511     MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
10512   }
10513 }
10514
10515 void XCoreTargetCodeGenInfo::emitTargetMetadata(
10516     CodeGen::CodeGenModule &CGM,
10517     const llvm::MapVector<GlobalDecl, StringRef> &MangledDeclNames) const {
10518   // Warning, new MangledDeclNames may be appended within this loop.
10519   // We rely on MapVector insertions adding new elements to the end
10520   // of the container.
10521   for (unsigned I = 0; I != MangledDeclNames.size(); ++I) {
10522     auto Val = *(MangledDeclNames.begin() + I);
10523     llvm::GlobalValue *GV = CGM.GetGlobalValue(Val.second);
10524     if (GV) {
10525       const Decl *D = Val.first.getDecl()->getMostRecentDecl();
10526       emitTargetMD(D, GV, CGM);
10527     }
10528   }
10529 }
10530
10531 //===----------------------------------------------------------------------===//
10532 // Base ABI and target codegen info implementation common between SPIR and
10533 // SPIR-V.
10534 //===----------------------------------------------------------------------===//
10535
10536 namespace {
10537 class CommonSPIRABIInfo : public DefaultABIInfo {
10538 public:
10539   CommonSPIRABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) { setCCs(); }
10540
10541 private:
10542   void setCCs();
10543 };
10544
10545 class SPIRVABIInfo : public CommonSPIRABIInfo {
10546 public:
10547   SPIRVABIInfo(CodeGenTypes &CGT) : CommonSPIRABIInfo(CGT) {}
10548   void computeInfo(CGFunctionInfo &FI) const override;
10549
10550 private:
10551   ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
10552 };
10553 } // end anonymous namespace
10554 namespace {
10555 class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo {
10556 public:
10557   CommonSPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
10558       : TargetCodeGenInfo(std::make_unique<CommonSPIRABIInfo>(CGT)) {}
10559   CommonSPIRTargetCodeGenInfo(std::unique_ptr<ABIInfo> ABIInfo)
10560       : TargetCodeGenInfo(std::move(ABIInfo)) {}
10561
10562   LangAS getASTAllocaAddressSpace() const override {
10563     return getLangASFromTargetAS(
10564         getABIInfo().getDataLayout().getAllocaAddrSpace());
10565   }
10566
10567   unsigned getOpenCLKernelCallingConv() const override;
10568   llvm::Type *getOpenCLType(CodeGenModule &CGM, const Type *T) const override;
10569 };
10570 class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo {
10571 public:
10572   SPIRVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
10573       : CommonSPIRTargetCodeGenInfo(std::make_unique<SPIRVABIInfo>(CGT)) {}
10574   void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
10575 };
10576 } // End anonymous namespace.
10577
10578 void CommonSPIRABIInfo::setCCs() {
10579   assert(getRuntimeCC() == llvm::CallingConv::C);
10580   RuntimeCC = llvm::CallingConv::SPIR_FUNC;
10581 }
10582
10583 ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
10584   if (getContext().getLangOpts().CUDAIsDevice) {
10585     // Coerce pointer arguments with default address space to CrossWorkGroup
10586     // pointers for HIPSPV/CUDASPV. When the language mode is HIP/CUDA, the
10587     // SPIRTargetInfo maps cuda_device to SPIR-V's CrossWorkGroup address space.
10588     llvm::Type *LTy = CGT.ConvertType(Ty);
10589     auto DefaultAS = getContext().getTargetAddressSpace(LangAS::Default);
10590     auto GlobalAS = getContext().getTargetAddressSpace(LangAS::cuda_device);
10591     auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(LTy);
10592     if (PtrTy && PtrTy->getAddressSpace() == DefaultAS) {
10593       LTy = llvm::PointerType::get(PtrTy->getContext(), GlobalAS);
10594       return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
10595     }
10596
10597     // Force copying aggregate type in kernel arguments by value when
10598     // compiling CUDA targeting SPIR-V. This is required for the object
10599     // copied to be valid on the device.
10600     // This behavior follows the CUDA spec
10601     // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
10602     // and matches the NVPTX implementation.
10603     if (isAggregateTypeForABI(Ty))
10604       return getNaturalAlignIndirect(Ty, /* byval */ true);
10605   }
10606   return classifyArgumentType(Ty);
10607 }
10608
10609 void SPIRVABIInfo::computeInfo(CGFunctionInfo &FI) const {
10610   // The logic is same as in DefaultABIInfo with an exception on the kernel
10611   // arguments handling.
10612   llvm::CallingConv::ID CC = FI.getCallingConvention();
10613
10614   if (!getCXXABI().classifyReturnType(FI))
10615     FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
10616
10617   for (auto &I : FI.arguments()) {
10618     if (CC == llvm::CallingConv::SPIR_KERNEL) {
10619       I.info = classifyKernelArgumentType(I.type);
10620     } else {
10621       I.info = classifyArgumentType(I.type);
10622     }
10623   }
10624 }
10625
10626 namespace clang {
10627 namespace CodeGen {
10628 void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) {
10629   if (CGM.getTarget().getTriple().isSPIRV())
10630     SPIRVABIInfo(CGM.getTypes()).computeInfo(FI);
10631   else
10632     CommonSPIRABIInfo(CGM.getTypes()).computeInfo(FI);
10633 }
10634 }
10635 }
10636
10637 unsigned CommonSPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
10638   return llvm::CallingConv::SPIR_KERNEL;
10639 }
10640
10641 void SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention(
10642     const FunctionType *&FT) const {
10643   // Convert HIP kernels to SPIR-V kernels.
10644   if (getABIInfo().getContext().getLangOpts().HIP) {
10645     FT = getABIInfo().getContext().adjustFunctionType(
10646         FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
10647     return;
10648   }
10649 }
10650
10651 static bool appendType(SmallStringEnc &Enc, QualType QType,
10652                        const CodeGen::CodeGenModule &CGM,
10653                        TypeStringCache &TSC);
10654
10655 /// Helper function for appendRecordType().
10656 /// Builds a SmallVector containing the encoded field types in declaration
10657 /// order.
10658 static bool extractFieldType(SmallVectorImpl<FieldEncoding> &FE,
10659                              const RecordDecl *RD,
10660                              const CodeGen::CodeGenModule &CGM,
10661                              TypeStringCache &TSC) {
10662   for (const auto *Field : RD->fields()) {
10663     SmallStringEnc Enc;
10664     Enc += "m(";
10665     Enc += Field->getName();
10666     Enc += "){";
10667     if (Field->isBitField()) {
10668       Enc += "b(";
10669       llvm::raw_svector_ostream OS(Enc);
10670       OS << Field->getBitWidthValue(CGM.getContext());
10671       Enc += ':';
10672     }
10673     if (!appendType(Enc, Field->getType(), CGM, TSC))
10674       return false;
10675     if (Field->isBitField())
10676       Enc += ')';
10677     Enc += '}';
10678     FE.emplace_back(!Field->getName().empty(), Enc);
10679   }
10680   return true;
10681 }
10682
10683 /// Appends structure and union types to Enc and adds encoding to cache.
10684 /// Recursively calls appendType (via extractFieldType) for each field.
10685 /// Union types have their fields ordered according to the ABI.
10686 static bool appendRecordType(SmallStringEnc &Enc, const RecordType *RT,
10687                              const CodeGen::CodeGenModule &CGM,
10688                              TypeStringCache &TSC, const IdentifierInfo *ID) {
10689   // Append the cached TypeString if we have one.
10690   StringRef TypeString = TSC.lookupStr(ID);
10691   if (!TypeString.empty()) {
10692     Enc += TypeString;
10693     return true;
10694   }
10695
10696   // Start to emit an incomplete TypeString.
10697   size_t Start = Enc.size();
10698   Enc += (RT->isUnionType()? 'u' : 's');
10699   Enc += '(';
10700   if (ID)
10701     Enc += ID->getName();
10702   Enc += "){";
10703
10704   // We collect all encoded fields and order as necessary.
10705   bool IsRecursive = false;
10706   const RecordDecl *RD = RT->getDecl()->getDefinition();
10707   if (RD && !RD->field_empty()) {
10708     // An incomplete TypeString stub is placed in the cache for this RecordType
10709     // so that recursive calls to this RecordType will use it whilst building a
10710     // complete TypeString for this RecordType.
10711     SmallVector<FieldEncoding, 16> FE;
10712     std::string StubEnc(Enc.substr(Start).str());
10713     StubEnc += '}';  // StubEnc now holds a valid incomplete TypeString.
10714     TSC.addIncomplete(ID, std::move(StubEnc));
10715     if (!extractFieldType(FE, RD, CGM, TSC)) {
10716       (void) TSC.removeIncomplete(ID);
10717       return false;
10718     }
10719     IsRecursive = TSC.removeIncomplete(ID);
10720     // The ABI requires unions to be sorted but not structures.
10721     // See FieldEncoding::operator< for sort algorithm.
10722     if (RT->isUnionType())
10723       llvm::sort(FE);
10724     // We can now complete the TypeString.
10725     unsigned E = FE.size();
10726     for (unsigned I = 0; I != E; ++I) {
10727       if (I)
10728         Enc += ',';
10729       Enc += FE[I].str();
10730     }
10731   }
10732   Enc += '}';
10733   TSC.addIfComplete(ID, Enc.substr(Start), IsRecursive);
10734   return true;
10735 }
10736
10737 /// Appends enum types to Enc and adds the encoding to the cache.
10738 static bool appendEnumType(SmallStringEnc &Enc, const EnumType *ET,
10739                            TypeStringCache &TSC,
10740                            const IdentifierInfo *ID) {
10741   // Append the cached TypeString if we have one.
10742   StringRef TypeString = TSC.lookupStr(ID);
10743   if (!TypeString.empty()) {
10744     Enc += TypeString;
10745     return true;
10746   }
10747
10748   size_t Start = Enc.size();
10749   Enc += "e(";
10750   if (ID)
10751     Enc += ID->getName();
10752   Enc += "){";
10753
10754   // We collect all encoded enumerations and order them alphanumerically.
10755   if (const EnumDecl *ED = ET->getDecl()->getDefinition()) {
10756     SmallVector<FieldEncoding, 16> FE;
10757     for (auto I = ED->enumerator_begin(), E = ED->enumerator_end(); I != E;
10758          ++I) {
10759       SmallStringEnc EnumEnc;
10760       EnumEnc += "m(";
10761       EnumEnc += I->getName();
10762       EnumEnc += "){";
10763       I->getInitVal().toString(EnumEnc);
10764       EnumEnc += '}';
10765       FE.push_back(FieldEncoding(!I->getName().empty(), EnumEnc));
10766     }
10767     llvm::sort(FE);
10768     unsigned E = FE.size();
10769     for (unsigned I = 0; I != E; ++I) {
10770       if (I)
10771         Enc += ',';
10772       Enc += FE[I].str();
10773     }
10774   }
10775   Enc += '}';
10776   TSC.addIfComplete(ID, Enc.substr(Start), false);
10777   return true;
10778 }
10779
10780 /// Appends type's qualifier to Enc.
10781 /// This is done prior to appending the type's encoding.
10782 static void appendQualifier(SmallStringEnc &Enc, QualType QT) {
10783   // Qualifiers are emitted in alphabetical order.
10784   static const char *const Table[]={"","c:","r:","cr:","v:","cv:","rv:","crv:"};
10785   int Lookup = 0;
10786   if (QT.isConstQualified())
10787     Lookup += 1<<0;
10788   if (QT.isRestrictQualified())
10789     Lookup += 1<<1;
10790   if (QT.isVolatileQualified())
10791     Lookup += 1<<2;
10792   Enc += Table[Lookup];
10793 }
10794
10795 /// Appends built-in types to Enc.
10796 static bool appendBuiltinType(SmallStringEnc &Enc, const BuiltinType *BT) {
10797   const char *EncType;
10798   switch (BT->getKind()) {
10799     case BuiltinType::Void:
10800       EncType = "0";
10801       break;
10802     case BuiltinType::Bool:
10803       EncType = "b";
10804       break;
10805     case BuiltinType::Char_U:
10806       EncType = "uc";
10807       break;
10808     case BuiltinType::UChar:
10809       EncType = "uc";
10810       break;
10811     case BuiltinType::SChar:
10812       EncType = "sc";
10813       break;
10814     case BuiltinType::UShort:
10815       EncType = "us";
10816       break;
10817     case BuiltinType::Short:
10818       EncType = "ss";
10819       break;
10820     case BuiltinType::UInt:
10821       EncType = "ui";
10822       break;
10823     case BuiltinType::Int:
10824       EncType = "si";
10825       break;
10826     case BuiltinType::ULong:
10827       EncType = "ul";
10828       break;
10829     case BuiltinType::Long:
10830       EncType = "sl";
10831       break;
10832     case BuiltinType::ULongLong:
10833       EncType = "ull";
10834       break;
10835     case BuiltinType::LongLong:
10836       EncType = "sll";
10837       break;
10838     case BuiltinType::Float:
10839       EncType = "ft";
10840       break;
10841     case BuiltinType::Double:
10842       EncType = "d";
10843       break;
10844     case BuiltinType::LongDouble:
10845       EncType = "ld";
10846       break;
10847     default:
10848       return false;
10849   }
10850   Enc += EncType;
10851   return true;
10852 }
10853
10854 /// Appends a pointer encoding to Enc before calling appendType for the pointee.
10855 static bool appendPointerType(SmallStringEnc &Enc, const PointerType *PT,
10856                               const CodeGen::CodeGenModule &CGM,
10857                               TypeStringCache &TSC) {
10858   Enc += "p(";
10859   if (!appendType(Enc, PT->getPointeeType(), CGM, TSC))
10860     return false;
10861   Enc += ')';
10862   return true;
10863 }
10864
10865 /// Appends array encoding to Enc before calling appendType for the element.
10866 static bool appendArrayType(SmallStringEnc &Enc, QualType QT,
10867                             const ArrayType *AT,
10868                             const CodeGen::CodeGenModule &CGM,
10869                             TypeStringCache &TSC, StringRef NoSizeEnc) {
10870   if (AT->getSizeModifier() != ArrayType::Normal)
10871     return false;
10872   Enc += "a(";
10873   if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT))
10874     CAT->getSize().toStringUnsigned(Enc);
10875   else
10876     Enc += NoSizeEnc; // Global arrays use "*", otherwise it is "".
10877   Enc += ':';
10878   // The Qualifiers should be attached to the type rather than the array.
10879   appendQualifier(Enc, QT);
10880   if (!appendType(Enc, AT->getElementType(), CGM, TSC))
10881     return false;
10882   Enc += ')';
10883   return true;
10884 }
10885
10886 /// Appends a function encoding to Enc, calling appendType for the return type
10887 /// and the arguments.
10888 static bool appendFunctionType(SmallStringEnc &Enc, const FunctionType *FT,
10889                              const CodeGen::CodeGenModule &CGM,
10890                              TypeStringCache &TSC) {
10891   Enc += "f{";
10892   if (!appendType(Enc, FT->getReturnType(), CGM, TSC))
10893     return false;
10894   Enc += "}(";
10895   if (const FunctionProtoType *FPT = FT->getAs<FunctionProtoType>()) {
10896     // N.B. we are only interested in the adjusted param types.
10897     auto I = FPT->param_type_begin();
10898     auto E = FPT->param_type_end();
10899     if (I != E) {
10900       do {
10901         if (!appendType(Enc, *I, CGM, TSC))
10902           return false;
10903         ++I;
10904         if (I != E)
10905           Enc += ',';
10906       } while (I != E);
10907       if (FPT->isVariadic())
10908         Enc += ",va";
10909     } else {
10910       if (FPT->isVariadic())
10911         Enc += "va";
10912       else
10913         Enc += '0';
10914     }
10915   }
10916   Enc += ')';
10917   return true;
10918 }
10919
10920 /// Handles the type's qualifier before dispatching a call to handle specific
10921 /// type encodings.
10922 static bool appendType(SmallStringEnc &Enc, QualType QType,
10923                        const CodeGen::CodeGenModule &CGM,
10924                        TypeStringCache &TSC) {
10925
10926   QualType QT = QType.getCanonicalType();
10927
10928   if (const ArrayType *AT = QT->getAsArrayTypeUnsafe())
10929     // The Qualifiers should be attached to the type rather than the array.
10930     // Thus we don't call appendQualifier() here.
10931     return appendArrayType(Enc, QT, AT, CGM, TSC, "");
10932
10933   appendQualifier(Enc, QT);
10934
10935   if (const BuiltinType *BT = QT->getAs<BuiltinType>())
10936     return appendBuiltinType(Enc, BT);
10937
10938   if (const PointerType *PT = QT->getAs<PointerType>())
10939     return appendPointerType(Enc, PT, CGM, TSC);
10940
10941   if (const EnumType *ET = QT->getAs<EnumType>())
10942     return appendEnumType(Enc, ET, TSC, QT.getBaseTypeIdentifier());
10943
10944   if (const RecordType *RT = QT->getAsStructureType())
10945     return appendRecordType(Enc, RT, CGM, TSC, QT.getBaseTypeIdentifier());
10946
10947   if (const RecordType *RT = QT->getAsUnionType())
10948     return appendRecordType(Enc, RT, CGM, TSC, QT.getBaseTypeIdentifier());
10949
10950   if (const FunctionType *FT = QT->getAs<FunctionType>())
10951     return appendFunctionType(Enc, FT, CGM, TSC);
10952
10953   return false;
10954 }
10955
10956 static bool getTypeString(SmallStringEnc &Enc, const Decl *D,
10957                           const CodeGen::CodeGenModule &CGM,
10958                           TypeStringCache &TSC) {
10959   if (!D)
10960     return false;
10961
10962   if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
10963     if (FD->getLanguageLinkage() != CLanguageLinkage)
10964       return false;
10965     return appendType(Enc, FD->getType(), CGM, TSC);
10966   }
10967
10968   if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {
10969     if (VD->getLanguageLinkage() != CLanguageLinkage)
10970       return false;
10971     QualType QT = VD->getType().getCanonicalType();
10972     if (const ArrayType *AT = QT->getAsArrayTypeUnsafe()) {
10973       // Global ArrayTypes are given a size of '*' if the size is unknown.
10974       // The Qualifiers should be attached to the type rather than the array.
10975       // Thus we don't call appendQualifier() here.
10976       return appendArrayType(Enc, QT, AT, CGM, TSC, "*");
10977     }
10978     return appendType(Enc, QT, CGM, TSC);
10979   }
10980   return false;
10981 }
10982
10983 /// Construct a SPIR-V target extension type for the given OpenCL image type.
10984 static llvm::Type *getSPIRVImageType(llvm::LLVMContext &Ctx, StringRef BaseType,
10985                                      StringRef OpenCLName,
10986                                      unsigned AccessQualifier) {
10987   // These parameters compare to the operands of OpTypeImage (see
10988   // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpTypeImage
10989   // for more details). The first 6 integer parameters all default to 0, and
10990   // will be changed to 1 only for the image type(s) that set the parameter to
10991   // one. The 7th integer parameter is the access qualifier, which is tacked on
10992   // at the end.
10993   SmallVector<unsigned, 7> IntParams = {0, 0, 0, 0, 0, 0};
10994
10995   // Choose the dimension of the image--this corresponds to the Dim enum in
10996   // SPIR-V (first integer parameter of OpTypeImage).
10997   if (OpenCLName.startswith("image2d"))
10998     IntParams[0] = 1; // 1D
10999   else if (OpenCLName.startswith("image3d"))
11000     IntParams[0] = 2; // 2D
11001   else if (OpenCLName == "image1d_buffer")
11002     IntParams[0] = 5; // Buffer
11003   else
11004     assert(OpenCLName.startswith("image1d") && "Unknown image type");
11005
11006   // Set the other integer parameters of OpTypeImage if necessary. Note that the
11007   // OpenCL image types don't provide any information for the Sampled or
11008   // Image Format parameters.
11009   if (OpenCLName.contains("_depth"))
11010     IntParams[1] = 1;
11011   if (OpenCLName.contains("_array"))
11012     IntParams[2] = 1;
11013   if (OpenCLName.contains("_msaa"))
11014     IntParams[3] = 1;
11015
11016   // Access qualifier
11017   IntParams.push_back(AccessQualifier);
11018
11019   return llvm::TargetExtType::get(Ctx, BaseType, {llvm::Type::getVoidTy(Ctx)},
11020                                   IntParams);
11021 }
11022
11023 llvm::Type *CommonSPIRTargetCodeGenInfo::getOpenCLType(CodeGenModule &CGM,
11024                                                        const Type *Ty) const {
11025   llvm::LLVMContext &Ctx = CGM.getLLVMContext();
11026   if (auto *PipeTy = dyn_cast<PipeType>(Ty))
11027     return llvm::TargetExtType::get(Ctx, "spirv.Pipe", {},
11028                                     {!PipeTy->isReadOnly()});
11029   if (auto *BuiltinTy = dyn_cast<BuiltinType>(Ty)) {
11030     enum AccessQualifier : unsigned { AQ_ro = 0, AQ_wo = 1, AQ_rw = 2 };
11031     switch (BuiltinTy->getKind()) {
11032 #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix)                   \
11033     case BuiltinType::Id:                                                      \
11034       return getSPIRVImageType(Ctx, "spirv.Image", #ImgType, AQ_##Suffix);
11035 #include "clang/Basic/OpenCLImageTypes.def"
11036     case BuiltinType::OCLSampler:
11037       return llvm::TargetExtType::get(Ctx, "spirv.Sampler");
11038     case BuiltinType::OCLEvent:
11039       return llvm::TargetExtType::get(Ctx, "spirv.Event");
11040     case BuiltinType::OCLClkEvent:
11041       return llvm::TargetExtType::get(Ctx, "spirv.DeviceEvent");
11042     case BuiltinType::OCLQueue:
11043       return llvm::TargetExtType::get(Ctx, "spirv.Queue");
11044     case BuiltinType::OCLReserveID:
11045       return llvm::TargetExtType::get(Ctx, "spirv.ReserveId");
11046 #define INTEL_SUBGROUP_AVC_TYPE(Name, Id)                                      \
11047     case BuiltinType::OCLIntelSubgroupAVC##Id:                                 \
11048       return llvm::TargetExtType::get(Ctx, "spirv.Avc" #Id "INTEL");
11049 #include "clang/Basic/OpenCLExtensionTypes.def"
11050     default:
11051       return nullptr;
11052     }
11053   }
11054
11055   return nullptr;
11056 }
11057 //===----------------------------------------------------------------------===//
11058 // RISC-V ABI Implementation
11059 //===----------------------------------------------------------------------===//
11060
11061 namespace {
11062 class RISCVABIInfo : public DefaultABIInfo {
11063 private:
11064   // Size of the integer ('x') registers in bits.
11065   unsigned XLen;
11066   // Size of the floating point ('f') registers in bits. Note that the target
11067   // ISA might have a wider FLen than the selected ABI (e.g. an RV32IF target
11068   // with soft float ABI has FLen==0).
11069   unsigned FLen;
11070   static const int NumArgGPRs = 8;
11071   static const int NumArgFPRs = 8;
11072   bool detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff,
11073                                       llvm::Type *&Field1Ty,
11074                                       CharUnits &Field1Off,
11075                                       llvm::Type *&Field2Ty,
11076                                       CharUnits &Field2Off) const;
11077
11078 public:
11079   RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, unsigned FLen)
11080       : DefaultABIInfo(CGT), XLen(XLen), FLen(FLen) {}
11081
11082   // DefaultABIInfo's classifyReturnType and classifyArgumentType are
11083   // non-virtual, but computeInfo is virtual, so we overload it.
11084   void computeInfo(CGFunctionInfo &FI) const override;
11085
11086   ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, int &ArgGPRsLeft,
11087                                   int &ArgFPRsLeft) const;
11088   ABIArgInfo classifyReturnType(QualType RetTy) const;
11089
11090   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
11091                     QualType Ty) const override;
11092
11093   ABIArgInfo extendType(QualType Ty) const;
11094
11095   bool detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty,
11096                                 CharUnits &Field1Off, llvm::Type *&Field2Ty,
11097                                 CharUnits &Field2Off, int &NeededArgGPRs,
11098                                 int &NeededArgFPRs) const;
11099   ABIArgInfo coerceAndExpandFPCCEligibleStruct(llvm::Type *Field1Ty,
11100                                                CharUnits Field1Off,
11101                                                llvm::Type *Field2Ty,
11102                                                CharUnits Field2Off) const;
11103
11104   ABIArgInfo coerceVLSVector(QualType Ty) const;
11105 };
11106 } // end anonymous namespace
11107
11108 void RISCVABIInfo::computeInfo(CGFunctionInfo &FI) const {
11109   QualType RetTy = FI.getReturnType();
11110   if (!getCXXABI().classifyReturnType(FI))
11111     FI.getReturnInfo() = classifyReturnType(RetTy);
11112
11113   // IsRetIndirect is true if classifyArgumentType indicated the value should
11114   // be passed indirect, or if the type size is a scalar greater than 2*XLen
11115   // and not a complex type with elements <= FLen. e.g. fp128 is passed direct
11116   // in LLVM IR, relying on the backend lowering code to rewrite the argument
11117   // list and pass indirectly on RV32.
11118   bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect;
11119   if (!IsRetIndirect && RetTy->isScalarType() &&
11120       getContext().getTypeSize(RetTy) > (2 * XLen)) {
11121     if (RetTy->isComplexType() && FLen) {
11122       QualType EltTy = RetTy->castAs<ComplexType>()->getElementType();
11123       IsRetIndirect = getContext().getTypeSize(EltTy) > FLen;
11124     } else {
11125       // This is a normal scalar > 2*XLen, such as fp128 on RV32.
11126       IsRetIndirect = true;
11127     }
11128   }
11129
11130   int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs;
11131   int ArgFPRsLeft = FLen ? NumArgFPRs : 0;
11132   int NumFixedArgs = FI.getNumRequiredArgs();
11133
11134   int ArgNum = 0;
11135   for (auto &ArgInfo : FI.arguments()) {
11136     bool IsFixed = ArgNum < NumFixedArgs;
11137     ArgInfo.info =
11138         classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft, ArgFPRsLeft);
11139     ArgNum++;
11140   }
11141 }
11142
11143 // Returns true if the struct is a potential candidate for the floating point
11144 // calling convention. If this function returns true, the caller is
11145 // responsible for checking that if there is only a single field then that
11146 // field is a float.
11147 bool RISCVABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff,
11148                                                   llvm::Type *&Field1Ty,
11149                                                   CharUnits &Field1Off,
11150                                                   llvm::Type *&Field2Ty,
11151                                                   CharUnits &Field2Off) const {
11152   bool IsInt = Ty->isIntegralOrEnumerationType();
11153   bool IsFloat = Ty->isRealFloatingType();
11154
11155   if (IsInt || IsFloat) {
11156     uint64_t Size = getContext().getTypeSize(Ty);
11157     if (IsInt && Size > XLen)
11158       return false;
11159     // Can't be eligible if larger than the FP registers. Handling of half
11160     // precision values has been specified in the ABI, so don't block those.
11161     if (IsFloat && Size > FLen)
11162       return false;
11163     // Can't be eligible if an integer type was already found (int+int pairs
11164     // are not eligible).
11165     if (IsInt && Field1Ty && Field1Ty->isIntegerTy())
11166       return false;
11167     if (!Field1Ty) {
11168       Field1Ty = CGT.ConvertType(Ty);
11169       Field1Off = CurOff;
11170       return true;
11171     }
11172     if (!Field2Ty) {
11173       Field2Ty = CGT.ConvertType(Ty);
11174       Field2Off = CurOff;
11175       return true;
11176     }
11177     return false;
11178   }
11179
11180   if (auto CTy = Ty->getAs<ComplexType>()) {
11181     if (Field1Ty)
11182       return false;
11183     QualType EltTy = CTy->getElementType();
11184     if (getContext().getTypeSize(EltTy) > FLen)
11185       return false;
11186     Field1Ty = CGT.ConvertType(EltTy);
11187     Field1Off = CurOff;
11188     Field2Ty = Field1Ty;
11189     Field2Off = Field1Off + getContext().getTypeSizeInChars(EltTy);
11190     return true;
11191   }
11192
11193   if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) {
11194     uint64_t ArraySize = ATy->getSize().getZExtValue();
11195     QualType EltTy = ATy->getElementType();
11196     CharUnits EltSize = getContext().getTypeSizeInChars(EltTy);
11197     for (uint64_t i = 0; i < ArraySize; ++i) {
11198       bool Ret = detectFPCCEligibleStructHelper(EltTy, CurOff, Field1Ty,
11199                                                 Field1Off, Field2Ty, Field2Off);
11200       if (!Ret)
11201         return false;
11202       CurOff += EltSize;
11203     }
11204     return true;
11205   }
11206
11207   if (const auto *RTy = Ty->getAs<RecordType>()) {
11208     // Structures with either a non-trivial destructor or a non-trivial
11209     // copy constructor are not eligible for the FP calling convention.
11210     if (getRecordArgABI(Ty, CGT.getCXXABI()))
11211       return false;
11212     if (isEmptyRecord(getContext(), Ty, true))
11213       return true;
11214     const RecordDecl *RD = RTy->getDecl();
11215     // Unions aren't eligible unless they're empty (which is caught above).
11216     if (RD->isUnion())
11217       return false;
11218     const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
11219     // If this is a C++ record, check the bases first.
11220     if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
11221       for (const CXXBaseSpecifier &B : CXXRD->bases()) {
11222         const auto *BDecl =
11223             cast<CXXRecordDecl>(B.getType()->castAs<RecordType>()->getDecl());
11224         CharUnits BaseOff = Layout.getBaseClassOffset(BDecl);
11225         bool Ret = detectFPCCEligibleStructHelper(B.getType(), CurOff + BaseOff,
11226                                                   Field1Ty, Field1Off, Field2Ty,
11227                                                   Field2Off);
11228         if (!Ret)
11229           return false;
11230       }
11231     }
11232     int ZeroWidthBitFieldCount = 0;
11233     for (const FieldDecl *FD : RD->fields()) {
11234       uint64_t FieldOffInBits = Layout.getFieldOffset(FD->getFieldIndex());
11235       QualType QTy = FD->getType();
11236       if (FD->isBitField()) {
11237         unsigned BitWidth = FD->getBitWidthValue(getContext());
11238         // Allow a bitfield with a type greater than XLen as long as the
11239         // bitwidth is XLen or less.
11240         if (getContext().getTypeSize(QTy) > XLen && BitWidth <= XLen)
11241           QTy = getContext().getIntTypeForBitwidth(XLen, false);
11242         if (BitWidth == 0) {
11243           ZeroWidthBitFieldCount++;
11244           continue;
11245         }
11246       }
11247
11248       bool Ret = detectFPCCEligibleStructHelper(
11249           QTy, CurOff + getContext().toCharUnitsFromBits(FieldOffInBits),
11250           Field1Ty, Field1Off, Field2Ty, Field2Off);
11251       if (!Ret)
11252         return false;
11253
11254       // As a quirk of the ABI, zero-width bitfields aren't ignored for fp+fp
11255       // or int+fp structs, but are ignored for a struct with an fp field and
11256       // any number of zero-width bitfields.
11257       if (Field2Ty && ZeroWidthBitFieldCount > 0)
11258         return false;
11259     }
11260     return Field1Ty != nullptr;
11261   }
11262
11263   return false;
11264 }
11265
11266 // Determine if a struct is eligible for passing according to the floating
11267 // point calling convention (i.e., when flattened it contains a single fp
11268 // value, fp+fp, or int+fp of appropriate size). If so, NeededArgFPRs and
11269 // NeededArgGPRs are incremented appropriately.
11270 bool RISCVABIInfo::detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty,
11271                                             CharUnits &Field1Off,
11272                                             llvm::Type *&Field2Ty,
11273                                             CharUnits &Field2Off,
11274                                             int &NeededArgGPRs,
11275                                             int &NeededArgFPRs) const {
11276   Field1Ty = nullptr;
11277   Field2Ty = nullptr;
11278   NeededArgGPRs = 0;
11279   NeededArgFPRs = 0;
11280   bool IsCandidate = detectFPCCEligibleStructHelper(
11281       Ty, CharUnits::Zero(), Field1Ty, Field1Off, Field2Ty, Field2Off);
11282   // Not really a candidate if we have a single int but no float.
11283   if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy())
11284     return false;
11285   if (!IsCandidate)
11286     return false;
11287   if (Field1Ty && Field1Ty->isFloatingPointTy())
11288     NeededArgFPRs++;
11289   else if (Field1Ty)
11290     NeededArgGPRs++;
11291   if (Field2Ty && Field2Ty->isFloatingPointTy())
11292     NeededArgFPRs++;
11293   else if (Field2Ty)
11294     NeededArgGPRs++;
11295   return true;
11296 }
11297
11298 // Call getCoerceAndExpand for the two-element flattened struct described by
11299 // Field1Ty, Field1Off, Field2Ty, Field2Off. This method will create an
11300 // appropriate coerceToType and unpaddedCoerceToType.
11301 ABIArgInfo RISCVABIInfo::coerceAndExpandFPCCEligibleStruct(
11302     llvm::Type *Field1Ty, CharUnits Field1Off, llvm::Type *Field2Ty,
11303     CharUnits Field2Off) const {
11304   SmallVector<llvm::Type *, 3> CoerceElts;
11305   SmallVector<llvm::Type *, 2> UnpaddedCoerceElts;
11306   if (!Field1Off.isZero())
11307     CoerceElts.push_back(llvm::ArrayType::get(
11308         llvm::Type::getInt8Ty(getVMContext()), Field1Off.getQuantity()));
11309
11310   CoerceElts.push_back(Field1Ty);
11311   UnpaddedCoerceElts.push_back(Field1Ty);
11312
11313   if (!Field2Ty) {
11314     return ABIArgInfo::getCoerceAndExpand(
11315         llvm::StructType::get(getVMContext(), CoerceElts, !Field1Off.isZero()),
11316         UnpaddedCoerceElts[0]);
11317   }
11318
11319   CharUnits Field2Align =
11320       CharUnits::fromQuantity(getDataLayout().getABITypeAlign(Field2Ty));
11321   CharUnits Field1End = Field1Off +
11322       CharUnits::fromQuantity(getDataLayout().getTypeStoreSize(Field1Ty));
11323   CharUnits Field2OffNoPadNoPack = Field1End.alignTo(Field2Align);
11324
11325   CharUnits Padding = CharUnits::Zero();
11326   if (Field2Off > Field2OffNoPadNoPack)
11327     Padding = Field2Off - Field2OffNoPadNoPack;
11328   else if (Field2Off != Field2Align && Field2Off > Field1End)
11329     Padding = Field2Off - Field1End;
11330
11331   bool IsPacked = !Field2Off.isMultipleOf(Field2Align);
11332
11333   if (!Padding.isZero())
11334     CoerceElts.push_back(llvm::ArrayType::get(
11335         llvm::Type::getInt8Ty(getVMContext()), Padding.getQuantity()));
11336
11337   CoerceElts.push_back(Field2Ty);
11338   UnpaddedCoerceElts.push_back(Field2Ty);
11339
11340   auto CoerceToType =
11341       llvm::StructType::get(getVMContext(), CoerceElts, IsPacked);
11342   auto UnpaddedCoerceToType =
11343       llvm::StructType::get(getVMContext(), UnpaddedCoerceElts, IsPacked);
11344
11345   return ABIArgInfo::getCoerceAndExpand(CoerceToType, UnpaddedCoerceToType);
11346 }
11347
11348 // Fixed-length RVV vectors are represented as scalable vectors in function
11349 // args/return and must be coerced from fixed vectors.
11350 ABIArgInfo RISCVABIInfo::coerceVLSVector(QualType Ty) const {
11351   assert(Ty->isVectorType() && "expected vector type!");
11352
11353   const auto *VT = Ty->castAs<VectorType>();
11354   assert(VT->getVectorKind() == VectorType::RVVFixedLengthDataVector &&
11355          "Unexpected vector kind");
11356
11357   assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
11358
11359   const auto *BT = VT->getElementType()->castAs<BuiltinType>();
11360   unsigned EltSize = getContext().getTypeSize(BT);
11361   llvm::ScalableVectorType *ResType =
11362         llvm::ScalableVectorType::get(CGT.ConvertType(VT->getElementType()),
11363                                       llvm::RISCV::RVVBitsPerBlock / EltSize);
11364   return ABIArgInfo::getDirect(ResType);
11365 }
11366
11367 ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
11368                                               int &ArgGPRsLeft,
11369                                               int &ArgFPRsLeft) const {
11370   assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow");
11371   Ty = useFirstFieldIfTransparentUnion(Ty);
11372
11373   // Structures with either a non-trivial destructor or a non-trivial
11374   // copy constructor are always passed indirectly.
11375   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
11376     if (ArgGPRsLeft)
11377       ArgGPRsLeft -= 1;
11378     return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA ==
11379                                            CGCXXABI::RAA_DirectInMemory);
11380   }
11381
11382   // Ignore empty structs/unions.
11383   if (isEmptyRecord(getContext(), Ty, true))
11384     return ABIArgInfo::getIgnore();
11385
11386   uint64_t Size = getContext().getTypeSize(Ty);
11387
11388   // Pass floating point values via FPRs if possible.
11389   if (IsFixed && Ty->isFloatingType() && !Ty->isComplexType() &&
11390       FLen >= Size && ArgFPRsLeft) {
11391     ArgFPRsLeft--;
11392     return ABIArgInfo::getDirect();
11393   }
11394
11395   // Complex types for the hard float ABI must be passed direct rather than
11396   // using CoerceAndExpand.
11397   if (IsFixed && Ty->isComplexType() && FLen && ArgFPRsLeft >= 2) {
11398     QualType EltTy = Ty->castAs<ComplexType>()->getElementType();
11399     if (getContext().getTypeSize(EltTy) <= FLen) {
11400       ArgFPRsLeft -= 2;
11401       return ABIArgInfo::getDirect();
11402     }
11403   }
11404
11405   if (IsFixed && FLen && Ty->isStructureOrClassType()) {
11406     llvm::Type *Field1Ty = nullptr;
11407     llvm::Type *Field2Ty = nullptr;
11408     CharUnits Field1Off = CharUnits::Zero();
11409     CharUnits Field2Off = CharUnits::Zero();
11410     int NeededArgGPRs = 0;
11411     int NeededArgFPRs = 0;
11412     bool IsCandidate =
11413         detectFPCCEligibleStruct(Ty, Field1Ty, Field1Off, Field2Ty, Field2Off,
11414                                  NeededArgGPRs, NeededArgFPRs);
11415     if (IsCandidate && NeededArgGPRs <= ArgGPRsLeft &&
11416         NeededArgFPRs <= ArgFPRsLeft) {
11417       ArgGPRsLeft -= NeededArgGPRs;
11418       ArgFPRsLeft -= NeededArgFPRs;
11419       return coerceAndExpandFPCCEligibleStruct(Field1Ty, Field1Off, Field2Ty,
11420                                                Field2Off);
11421     }
11422   }
11423
11424   uint64_t NeededAlign = getContext().getTypeAlign(Ty);
11425   // Determine the number of GPRs needed to pass the current argument
11426   // according to the ABI. 2*XLen-aligned varargs are passed in "aligned"
11427   // register pairs, so may consume 3 registers.
11428   int NeededArgGPRs = 1;
11429   if (!IsFixed && NeededAlign == 2 * XLen)
11430     NeededArgGPRs = 2 + (ArgGPRsLeft % 2);
11431   else if (Size > XLen && Size <= 2 * XLen)
11432     NeededArgGPRs = 2;
11433
11434   if (NeededArgGPRs > ArgGPRsLeft) {
11435     NeededArgGPRs = ArgGPRsLeft;
11436   }
11437
11438   ArgGPRsLeft -= NeededArgGPRs;
11439
11440   if (!isAggregateTypeForABI(Ty) && !Ty->isVectorType()) {
11441     // Treat an enum type as its underlying type.
11442     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
11443       Ty = EnumTy->getDecl()->getIntegerType();
11444
11445     // All integral types are promoted to XLen width
11446     if (Size < XLen && Ty->isIntegralOrEnumerationType()) {
11447       return extendType(Ty);
11448     }
11449
11450     if (const auto *EIT = Ty->getAs<BitIntType>()) {
11451       if (EIT->getNumBits() < XLen)
11452         return extendType(Ty);
11453       if (EIT->getNumBits() > 128 ||
11454           (!getContext().getTargetInfo().hasInt128Type() &&
11455            EIT->getNumBits() > 64))
11456         return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
11457     }
11458
11459     return ABIArgInfo::getDirect();
11460   }
11461
11462   if (const VectorType *VT = Ty->getAs<VectorType>())
11463     if (VT->getVectorKind() == VectorType::RVVFixedLengthDataVector)
11464       return coerceVLSVector(Ty);
11465
11466   // Aggregates which are <= 2*XLen will be passed in registers if possible,
11467   // so coerce to integers.
11468   if (Size <= 2 * XLen) {
11469     unsigned Alignment = getContext().getTypeAlign(Ty);
11470
11471     // Use a single XLen int if possible, 2*XLen if 2*XLen alignment is
11472     // required, and a 2-element XLen array if only XLen alignment is required.
11473     if (Size <= XLen) {
11474       return ABIArgInfo::getDirect(
11475           llvm::IntegerType::get(getVMContext(), XLen));
11476     } else if (Alignment == 2 * XLen) {
11477       return ABIArgInfo::getDirect(
11478           llvm::IntegerType::get(getVMContext(), 2 * XLen));
11479     } else {
11480       return ABIArgInfo::getDirect(llvm::ArrayType::get(
11481           llvm::IntegerType::get(getVMContext(), XLen), 2));
11482     }
11483   }
11484   return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
11485 }
11486
11487 ABIArgInfo RISCVABIInfo::classifyReturnType(QualType RetTy) const {
11488   if (RetTy->isVoidType())
11489     return ABIArgInfo::getIgnore();
11490
11491   int ArgGPRsLeft = 2;
11492   int ArgFPRsLeft = FLen ? 2 : 0;
11493
11494   // The rules for return and argument types are the same, so defer to
11495   // classifyArgumentType.
11496   return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft,
11497                               ArgFPRsLeft);
11498 }
11499
11500 Address RISCVABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
11501                                 QualType Ty) const {
11502   CharUnits SlotSize = CharUnits::fromQuantity(XLen / 8);
11503
11504   // Empty records are ignored for parameter passing purposes.
11505   if (isEmptyRecord(getContext(), Ty, true)) {
11506     Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr),
11507                            getVAListElementType(CGF), SlotSize);
11508     Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
11509     return Addr;
11510   }
11511
11512   auto TInfo = getContext().getTypeInfoInChars(Ty);
11513
11514   // Arguments bigger than 2*Xlen bytes are passed indirectly.
11515   bool IsIndirect = TInfo.Width > 2 * SlotSize;
11516
11517   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TInfo,
11518                           SlotSize, /*AllowHigherAlign=*/true);
11519 }
11520
11521 ABIArgInfo RISCVABIInfo::extendType(QualType Ty) const {
11522   int TySize = getContext().getTypeSize(Ty);
11523   // RV64 ABI requires unsigned 32 bit integers to be sign extended.
11524   if (XLen == 64 && Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32)
11525     return ABIArgInfo::getSignExtend(Ty);
11526   return ABIArgInfo::getExtend(Ty);
11527 }
11528
11529 namespace {
11530 class RISCVTargetCodeGenInfo : public TargetCodeGenInfo {
11531 public:
11532   RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen,
11533                          unsigned FLen)
11534       : TargetCodeGenInfo(std::make_unique<RISCVABIInfo>(CGT, XLen, FLen)) {}
11535
11536   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
11537                            CodeGen::CodeGenModule &CGM) const override {
11538     const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
11539     if (!FD) return;
11540
11541     const auto *Attr = FD->getAttr<RISCVInterruptAttr>();
11542     if (!Attr)
11543       return;
11544
11545     const char *Kind;
11546     switch (Attr->getInterrupt()) {
11547     case RISCVInterruptAttr::supervisor: Kind = "supervisor"; break;
11548     case RISCVInterruptAttr::machine: Kind = "machine"; break;
11549     }
11550
11551     auto *Fn = cast<llvm::Function>(GV);
11552
11553     Fn->addFnAttr("interrupt", Kind);
11554   }
11555 };
11556 } // namespace
11557
11558 //===----------------------------------------------------------------------===//
11559 // VE ABI Implementation.
11560 //
11561 namespace {
11562 class VEABIInfo : public DefaultABIInfo {
11563 public:
11564   VEABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
11565
11566 private:
11567   ABIArgInfo classifyReturnType(QualType RetTy) const;
11568   ABIArgInfo classifyArgumentType(QualType RetTy) const;
11569   void computeInfo(CGFunctionInfo &FI) const override;
11570 };
11571 } // end anonymous namespace
11572
11573 ABIArgInfo VEABIInfo::classifyReturnType(QualType Ty) const {
11574   if (Ty->isAnyComplexType())
11575     return ABIArgInfo::getDirect();
11576   uint64_t Size = getContext().getTypeSize(Ty);
11577   if (Size < 64 && Ty->isIntegerType())
11578     return ABIArgInfo::getExtend(Ty);
11579   return DefaultABIInfo::classifyReturnType(Ty);
11580 }
11581
11582 ABIArgInfo VEABIInfo::classifyArgumentType(QualType Ty) const {
11583   if (Ty->isAnyComplexType())
11584     return ABIArgInfo::getDirect();
11585   uint64_t Size = getContext().getTypeSize(Ty);
11586   if (Size < 64 && Ty->isIntegerType())
11587     return ABIArgInfo::getExtend(Ty);
11588   return DefaultABIInfo::classifyArgumentType(Ty);
11589 }
11590
11591 void VEABIInfo::computeInfo(CGFunctionInfo &FI) const {
11592   FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
11593   for (auto &Arg : FI.arguments())
11594     Arg.info = classifyArgumentType(Arg.type);
11595 }
11596
11597 namespace {
11598 class VETargetCodeGenInfo : public TargetCodeGenInfo {
11599 public:
11600   VETargetCodeGenInfo(CodeGenTypes &CGT)
11601       : TargetCodeGenInfo(std::make_unique<VEABIInfo>(CGT)) {}
11602   // VE ABI requires the arguments of variadic and prototype-less functions
11603   // are passed in both registers and memory.
11604   bool isNoProtoCallVariadic(const CallArgList &args,
11605                              const FunctionNoProtoType *fnType) const override {
11606     return true;
11607   }
11608 };
11609 } // end anonymous namespace
11610
11611 //===----------------------------------------------------------------------===//
11612 // CSKY ABI Implementation
11613 //===----------------------------------------------------------------------===//
11614 namespace {
11615 class CSKYABIInfo : public DefaultABIInfo {
11616   static const int NumArgGPRs = 4;
11617   static const int NumArgFPRs = 4;
11618
11619   static const unsigned XLen = 32;
11620   unsigned FLen;
11621
11622 public:
11623   CSKYABIInfo(CodeGen::CodeGenTypes &CGT, unsigned FLen)
11624       : DefaultABIInfo(CGT), FLen(FLen) {}
11625
11626   void computeInfo(CGFunctionInfo &FI) const override;
11627   ABIArgInfo classifyArgumentType(QualType Ty, int &ArgGPRsLeft,
11628                                   int &ArgFPRsLeft,
11629                                   bool isReturnType = false) const;
11630   ABIArgInfo classifyReturnType(QualType RetTy) const;
11631
11632   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
11633                     QualType Ty) const override;
11634 };
11635
11636 } // end anonymous namespace
11637
11638 void CSKYABIInfo::computeInfo(CGFunctionInfo &FI) const {
11639   QualType RetTy = FI.getReturnType();
11640   if (!getCXXABI().classifyReturnType(FI))
11641     FI.getReturnInfo() = classifyReturnType(RetTy);
11642
11643   bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect;
11644
11645   // We must track the number of GPRs used in order to conform to the CSKY
11646   // ABI, as integer scalars passed in registers should have signext/zeroext
11647   // when promoted.
11648   int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs;
11649   int ArgFPRsLeft = FLen ? NumArgFPRs : 0;
11650
11651   for (auto &ArgInfo : FI.arguments()) {
11652     ArgInfo.info = classifyArgumentType(ArgInfo.type, ArgGPRsLeft, ArgFPRsLeft);
11653   }
11654 }
11655
11656 Address CSKYABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
11657                                QualType Ty) const {
11658   CharUnits SlotSize = CharUnits::fromQuantity(XLen / 8);
11659
11660   // Empty records are ignored for parameter passing purposes.
11661   if (isEmptyRecord(getContext(), Ty, true)) {
11662     Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr),
11663                            getVAListElementType(CGF), SlotSize);
11664     Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
11665     return Addr;
11666   }
11667
11668   auto TInfo = getContext().getTypeInfoInChars(Ty);
11669
11670   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, false, TInfo, SlotSize,
11671                           /*AllowHigherAlign=*/true);
11672 }
11673
11674 ABIArgInfo CSKYABIInfo::classifyArgumentType(QualType Ty, int &ArgGPRsLeft,
11675                                              int &ArgFPRsLeft,
11676                                              bool isReturnType) const {
11677   assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow");
11678   Ty = useFirstFieldIfTransparentUnion(Ty);
11679
11680   // Structures with either a non-trivial destructor or a non-trivial
11681   // copy constructor are always passed indirectly.
11682   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
11683     if (ArgGPRsLeft)
11684       ArgGPRsLeft -= 1;
11685     return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA ==
11686                                            CGCXXABI::RAA_DirectInMemory);
11687   }
11688
11689   // Ignore empty structs/unions.
11690   if (isEmptyRecord(getContext(), Ty, true))
11691     return ABIArgInfo::getIgnore();
11692
11693   if (!Ty->getAsUnionType())
11694     if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
11695       return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
11696
11697   uint64_t Size = getContext().getTypeSize(Ty);
11698   // Pass floating point values via FPRs if possible.
11699   if (Ty->isFloatingType() && !Ty->isComplexType() && FLen >= Size &&
11700       ArgFPRsLeft) {
11701     ArgFPRsLeft--;
11702     return ABIArgInfo::getDirect();
11703   }
11704
11705   // Complex types for the hard float ABI must be passed direct rather than
11706   // using CoerceAndExpand.
11707   if (Ty->isComplexType() && FLen && !isReturnType) {
11708     QualType EltTy = Ty->castAs<ComplexType>()->getElementType();
11709     if (getContext().getTypeSize(EltTy) <= FLen) {
11710       ArgFPRsLeft -= 2;
11711       return ABIArgInfo::getDirect();
11712     }
11713   }
11714
11715   if (!isAggregateTypeForABI(Ty)) {
11716     // Treat an enum type as its underlying type.
11717     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
11718       Ty = EnumTy->getDecl()->getIntegerType();
11719
11720     // All integral types are promoted to XLen width, unless passed on the
11721     // stack.
11722     if (Size < XLen && Ty->isIntegralOrEnumerationType())
11723       return ABIArgInfo::getExtend(Ty);
11724
11725     if (const auto *EIT = Ty->getAs<BitIntType>()) {
11726       if (EIT->getNumBits() < XLen)
11727         return ABIArgInfo::getExtend(Ty);
11728     }
11729
11730     return ABIArgInfo::getDirect();
11731   }
11732
11733   // For argument type, the first 4*XLen parts of aggregate will be passed
11734   // in registers, and the rest will be passed in stack.
11735   // So we can coerce to integers directly and let backend handle it correctly.
11736   // For return type, aggregate which <= 2*XLen will be returned in registers.
11737   // Otherwise, aggregate will be returned indirectly.
11738   if (!isReturnType || (isReturnType && Size <= 2 * XLen)) {
11739     if (Size <= XLen) {
11740       return ABIArgInfo::getDirect(
11741           llvm::IntegerType::get(getVMContext(), XLen));
11742     } else {
11743       return ABIArgInfo::getDirect(llvm::ArrayType::get(
11744           llvm::IntegerType::get(getVMContext(), XLen), (Size + 31) / XLen));
11745     }
11746   }
11747   return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
11748 }
11749
11750 ABIArgInfo CSKYABIInfo::classifyReturnType(QualType RetTy) const {
11751   if (RetTy->isVoidType())
11752     return ABIArgInfo::getIgnore();
11753
11754   int ArgGPRsLeft = 2;
11755   int ArgFPRsLeft = FLen ? 1 : 0;
11756
11757   // The rules for return and argument types are the same, so defer to
11758   // classifyArgumentType.
11759   return classifyArgumentType(RetTy, ArgGPRsLeft, ArgFPRsLeft, true);
11760 }
11761
11762 namespace {
11763 class CSKYTargetCodeGenInfo : public TargetCodeGenInfo {
11764 public:
11765   CSKYTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned FLen)
11766       : TargetCodeGenInfo(std::make_unique<CSKYABIInfo>(CGT, FLen)) {}
11767 };
11768 } // end anonymous namespace
11769
11770 //===----------------------------------------------------------------------===//
11771 // BPF ABI Implementation
11772 //===----------------------------------------------------------------------===//
11773
11774 namespace {
11775
11776 class BPFABIInfo : public DefaultABIInfo {
11777 public:
11778   BPFABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
11779
11780   ABIArgInfo classifyArgumentType(QualType Ty) const {
11781     Ty = useFirstFieldIfTransparentUnion(Ty);
11782
11783     if (isAggregateTypeForABI(Ty)) {
11784       uint64_t Bits = getContext().getTypeSize(Ty);
11785       if (Bits == 0)
11786         return ABIArgInfo::getIgnore();
11787
11788       // If the aggregate needs 1 or 2 registers, do not use reference.
11789       if (Bits <= 128) {
11790         llvm::Type *CoerceTy;
11791         if (Bits <= 64) {
11792           CoerceTy =
11793               llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8));
11794         } else {
11795           llvm::Type *RegTy = llvm::IntegerType::get(getVMContext(), 64);
11796           CoerceTy = llvm::ArrayType::get(RegTy, 2);
11797         }
11798         return ABIArgInfo::getDirect(CoerceTy);
11799       } else {
11800         return getNaturalAlignIndirect(Ty);
11801       }
11802     }
11803
11804     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
11805       Ty = EnumTy->getDecl()->getIntegerType();
11806
11807     ASTContext &Context = getContext();
11808     if (const auto *EIT = Ty->getAs<BitIntType>())
11809       if (EIT->getNumBits() > Context.getTypeSize(Context.Int128Ty))
11810         return getNaturalAlignIndirect(Ty);
11811
11812     return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
11813                                               : ABIArgInfo::getDirect());
11814   }
11815
11816   ABIArgInfo classifyReturnType(QualType RetTy) const {
11817     if (RetTy->isVoidType())
11818       return ABIArgInfo::getIgnore();
11819
11820     if (isAggregateTypeForABI(RetTy))
11821       return getNaturalAlignIndirect(RetTy);
11822
11823     // Treat an enum type as its underlying type.
11824     if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
11825       RetTy = EnumTy->getDecl()->getIntegerType();
11826
11827     ASTContext &Context = getContext();
11828     if (const auto *EIT = RetTy->getAs<BitIntType>())
11829       if (EIT->getNumBits() > Context.getTypeSize(Context.Int128Ty))
11830         return getNaturalAlignIndirect(RetTy);
11831
11832     // Caller will do necessary sign/zero extension.
11833     return ABIArgInfo::getDirect();
11834   }
11835
11836   void computeInfo(CGFunctionInfo &FI) const override {
11837     FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
11838     for (auto &I : FI.arguments())
11839       I.info = classifyArgumentType(I.type);
11840   }
11841
11842 };
11843
11844 class BPFTargetCodeGenInfo : public TargetCodeGenInfo {
11845 public:
11846   BPFTargetCodeGenInfo(CodeGenTypes &CGT)
11847       : TargetCodeGenInfo(std::make_unique<BPFABIInfo>(CGT)) {}
11848 };
11849
11850 }
11851
11852 // LoongArch ABI Implementation. Documented at
11853 // https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html
11854 //
11855 //===----------------------------------------------------------------------===//
11856
11857 namespace {
11858 class LoongArchABIInfo : public DefaultABIInfo {
11859 private:
11860   // Size of the integer ('r') registers in bits.
11861   unsigned GRLen;
11862   // Size of the floating point ('f') registers in bits.
11863   unsigned FRLen;
11864   // Number of general-purpose argument registers.
11865   static const int NumGARs = 8;
11866   // Number of floating-point argument registers.
11867   static const int NumFARs = 8;
11868   bool detectFARsEligibleStructHelper(QualType Ty, CharUnits CurOff,
11869                                       llvm::Type *&Field1Ty,
11870                                       CharUnits &Field1Off,
11871                                       llvm::Type *&Field2Ty,
11872                                       CharUnits &Field2Off) const;
11873
11874 public:
11875   LoongArchABIInfo(CodeGen::CodeGenTypes &CGT, unsigned GRLen, unsigned FRLen)
11876       : DefaultABIInfo(CGT), GRLen(GRLen), FRLen(FRLen) {}
11877
11878   void computeInfo(CGFunctionInfo &FI) const override;
11879
11880   ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, int &GARsLeft,
11881                                   int &FARsLeft) const;
11882   ABIArgInfo classifyReturnType(QualType RetTy) const;
11883
11884   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
11885                     QualType Ty) const override;
11886
11887   ABIArgInfo extendType(QualType Ty) const;
11888
11889   bool detectFARsEligibleStruct(QualType Ty, llvm::Type *&Field1Ty,
11890                                 CharUnits &Field1Off, llvm::Type *&Field2Ty,
11891                                 CharUnits &Field2Off, int &NeededArgGPRs,
11892                                 int &NeededArgFPRs) const;
11893   ABIArgInfo coerceAndExpandFARsEligibleStruct(llvm::Type *Field1Ty,
11894                                                CharUnits Field1Off,
11895                                                llvm::Type *Field2Ty,
11896                                                CharUnits Field2Off) const;
11897 };
11898 } // end anonymous namespace
11899
11900 void LoongArchABIInfo::computeInfo(CGFunctionInfo &FI) const {
11901   QualType RetTy = FI.getReturnType();
11902   if (!getCXXABI().classifyReturnType(FI))
11903     FI.getReturnInfo() = classifyReturnType(RetTy);
11904
11905   // IsRetIndirect is true if classifyArgumentType indicated the value should
11906   // be passed indirect, or if the type size is a scalar greater than 2*GRLen
11907   // and not a complex type with elements <= FRLen. e.g. fp128 is passed direct
11908   // in LLVM IR, relying on the backend lowering code to rewrite the argument
11909   // list and pass indirectly on LA32.
11910   bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect;
11911   if (!IsRetIndirect && RetTy->isScalarType() &&
11912       getContext().getTypeSize(RetTy) > (2 * GRLen)) {
11913     if (RetTy->isComplexType() && FRLen) {
11914       QualType EltTy = RetTy->castAs<ComplexType>()->getElementType();
11915       IsRetIndirect = getContext().getTypeSize(EltTy) > FRLen;
11916     } else {
11917       // This is a normal scalar > 2*GRLen, such as fp128 on LA32.
11918       IsRetIndirect = true;
11919     }
11920   }
11921
11922   // We must track the number of GARs and FARs used in order to conform to the
11923   // LoongArch ABI. As GAR usage is different for variadic arguments, we must
11924   // also track whether we are examining a vararg or not.
11925   int GARsLeft = IsRetIndirect ? NumGARs - 1 : NumGARs;
11926   int FARsLeft = FRLen ? NumFARs : 0;
11927   int NumFixedArgs = FI.getNumRequiredArgs();
11928
11929   int ArgNum = 0;
11930   for (auto &ArgInfo : FI.arguments()) {
11931     ArgInfo.info = classifyArgumentType(
11932         ArgInfo.type, /*IsFixed=*/ArgNum < NumFixedArgs, GARsLeft, FARsLeft);
11933     ArgNum++;
11934   }
11935 }
11936
11937 // Returns true if the struct is a potential candidate to be passed in FARs (and
11938 // GARs). If this function returns true, the caller is responsible for checking
11939 // that if there is only a single field then that field is a float.
11940 bool LoongArchABIInfo::detectFARsEligibleStructHelper(
11941     QualType Ty, CharUnits CurOff, llvm::Type *&Field1Ty, CharUnits &Field1Off,
11942     llvm::Type *&Field2Ty, CharUnits &Field2Off) const {
11943   bool IsInt = Ty->isIntegralOrEnumerationType();
11944   bool IsFloat = Ty->isRealFloatingType();
11945
11946   if (IsInt || IsFloat) {
11947     uint64_t Size = getContext().getTypeSize(Ty);
11948     if (IsInt && Size > GRLen)
11949       return false;
11950     // Can't be eligible if larger than the FP registers. Half precision isn't
11951     // currently supported on LoongArch and the ABI hasn't been confirmed, so
11952     // default to the integer ABI in that case.
11953     if (IsFloat && (Size > FRLen || Size < 32))
11954       return false;
11955     // Can't be eligible if an integer type was already found (int+int pairs
11956     // are not eligible).
11957     if (IsInt && Field1Ty && Field1Ty->isIntegerTy())
11958       return false;
11959     if (!Field1Ty) {
11960       Field1Ty = CGT.ConvertType(Ty);
11961       Field1Off = CurOff;
11962       return true;
11963     }
11964     if (!Field2Ty) {
11965       Field2Ty = CGT.ConvertType(Ty);
11966       Field2Off = CurOff;
11967       return true;
11968     }
11969     return false;
11970   }
11971
11972   if (auto CTy = Ty->getAs<ComplexType>()) {
11973     if (Field1Ty)
11974       return false;
11975     QualType EltTy = CTy->getElementType();
11976     if (getContext().getTypeSize(EltTy) > FRLen)
11977       return false;
11978     Field1Ty = CGT.ConvertType(EltTy);
11979     Field1Off = CurOff;
11980     Field2Ty = Field1Ty;
11981     Field2Off = Field1Off + getContext().getTypeSizeInChars(EltTy);
11982     return true;
11983   }
11984
11985   if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) {
11986     uint64_t ArraySize = ATy->getSize().getZExtValue();
11987     QualType EltTy = ATy->getElementType();
11988     CharUnits EltSize = getContext().getTypeSizeInChars(EltTy);
11989     for (uint64_t i = 0; i < ArraySize; ++i) {
11990       if (!detectFARsEligibleStructHelper(EltTy, CurOff, Field1Ty, Field1Off,
11991                                           Field2Ty, Field2Off))
11992         return false;
11993       CurOff += EltSize;
11994     }
11995     return true;
11996   }
11997
11998   if (const auto *RTy = Ty->getAs<RecordType>()) {
11999     // Structures with either a non-trivial destructor or a non-trivial
12000     // copy constructor are not eligible for the FP calling convention.
12001     if (getRecordArgABI(Ty, CGT.getCXXABI()))
12002       return false;
12003     if (isEmptyRecord(getContext(), Ty, true))
12004       return true;
12005     const RecordDecl *RD = RTy->getDecl();
12006     // Unions aren't eligible unless they're empty (which is caught above).
12007     if (RD->isUnion())
12008       return false;
12009     const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
12010     // If this is a C++ record, check the bases first.
12011     if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
12012       for (const CXXBaseSpecifier &B : CXXRD->bases()) {
12013         const auto *BDecl =
12014             cast<CXXRecordDecl>(B.getType()->castAs<RecordType>()->getDecl());
12015         if (!detectFARsEligibleStructHelper(
12016                 B.getType(), CurOff + Layout.getBaseClassOffset(BDecl),
12017                 Field1Ty, Field1Off, Field2Ty, Field2Off))
12018           return false;
12019       }
12020     }
12021     for (const FieldDecl *FD : RD->fields()) {
12022       QualType QTy = FD->getType();
12023       if (FD->isBitField()) {
12024         unsigned BitWidth = FD->getBitWidthValue(getContext());
12025         // Zero-width bitfields are ignored.
12026         if (BitWidth == 0)
12027           continue;
12028         // Allow a bitfield with a type greater than GRLen as long as the
12029         // bitwidth is GRLen or less.
12030         if (getContext().getTypeSize(QTy) > GRLen && BitWidth <= GRLen) {
12031           QTy = getContext().getIntTypeForBitwidth(GRLen, false);
12032         }
12033       }
12034
12035       if (!detectFARsEligibleStructHelper(
12036               QTy,
12037               CurOff + getContext().toCharUnitsFromBits(
12038                            Layout.getFieldOffset(FD->getFieldIndex())),
12039               Field1Ty, Field1Off, Field2Ty, Field2Off))
12040         return false;
12041     }
12042     return Field1Ty != nullptr;
12043   }
12044
12045   return false;
12046 }
12047
12048 // Determine if a struct is eligible to be passed in FARs (and GARs) (i.e., when
12049 // flattened it contains a single fp value, fp+fp, or int+fp of appropriate
12050 // size). If so, NeededFARs and NeededGARs are incremented appropriately.
12051 bool LoongArchABIInfo::detectFARsEligibleStruct(
12052     QualType Ty, llvm::Type *&Field1Ty, CharUnits &Field1Off,
12053     llvm::Type *&Field2Ty, CharUnits &Field2Off, int &NeededGARs,
12054     int &NeededFARs) const {
12055   Field1Ty = nullptr;
12056   Field2Ty = nullptr;
12057   NeededGARs = 0;
12058   NeededFARs = 0;
12059   if (!detectFARsEligibleStructHelper(Ty, CharUnits::Zero(), Field1Ty,
12060                                       Field1Off, Field2Ty, Field2Off))
12061     return false;
12062   // Not really a candidate if we have a single int but no float.
12063   if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy())
12064     return false;
12065   if (Field1Ty && Field1Ty->isFloatingPointTy())
12066     NeededFARs++;
12067   else if (Field1Ty)
12068     NeededGARs++;
12069   if (Field2Ty && Field2Ty->isFloatingPointTy())
12070     NeededFARs++;
12071   else if (Field2Ty)
12072     NeededGARs++;
12073   return true;
12074 }
12075
12076 // Call getCoerceAndExpand for the two-element flattened struct described by
12077 // Field1Ty, Field1Off, Field2Ty, Field2Off. This method will create an
12078 // appropriate coerceToType and unpaddedCoerceToType.
12079 ABIArgInfo LoongArchABIInfo::coerceAndExpandFARsEligibleStruct(
12080     llvm::Type *Field1Ty, CharUnits Field1Off, llvm::Type *Field2Ty,
12081     CharUnits Field2Off) const {
12082   SmallVector<llvm::Type *, 3> CoerceElts;
12083   SmallVector<llvm::Type *, 2> UnpaddedCoerceElts;
12084   if (!Field1Off.isZero())
12085     CoerceElts.push_back(llvm::ArrayType::get(
12086         llvm::Type::getInt8Ty(getVMContext()), Field1Off.getQuantity()));
12087
12088   CoerceElts.push_back(Field1Ty);
12089   UnpaddedCoerceElts.push_back(Field1Ty);
12090
12091   if (!Field2Ty) {
12092     return ABIArgInfo::getCoerceAndExpand(
12093         llvm::StructType::get(getVMContext(), CoerceElts, !Field1Off.isZero()),
12094         UnpaddedCoerceElts[0]);
12095   }
12096
12097   CharUnits Field2Align =
12098       CharUnits::fromQuantity(getDataLayout().getABITypeAlign(Field2Ty));
12099   CharUnits Field1End =
12100       Field1Off +
12101       CharUnits::fromQuantity(getDataLayout().getTypeStoreSize(Field1Ty));
12102   CharUnits Field2OffNoPadNoPack = Field1End.alignTo(Field2Align);
12103
12104   CharUnits Padding = CharUnits::Zero();
12105   if (Field2Off > Field2OffNoPadNoPack)
12106     Padding = Field2Off - Field2OffNoPadNoPack;
12107   else if (Field2Off != Field2Align && Field2Off > Field1End)
12108     Padding = Field2Off - Field1End;
12109
12110   bool IsPacked = !Field2Off.isMultipleOf(Field2Align);
12111
12112   if (!Padding.isZero())
12113     CoerceElts.push_back(llvm::ArrayType::get(
12114         llvm::Type::getInt8Ty(getVMContext()), Padding.getQuantity()));
12115
12116   CoerceElts.push_back(Field2Ty);
12117   UnpaddedCoerceElts.push_back(Field2Ty);
12118
12119   return ABIArgInfo::getCoerceAndExpand(
12120       llvm::StructType::get(getVMContext(), CoerceElts, IsPacked),
12121       llvm::StructType::get(getVMContext(), UnpaddedCoerceElts, IsPacked));
12122 }
12123
12124 ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
12125                                                   int &GARsLeft,
12126                                                   int &FARsLeft) const {
12127   assert(GARsLeft <= NumGARs && "GAR tracking underflow");
12128   Ty = useFirstFieldIfTransparentUnion(Ty);
12129
12130   // Structures with either a non-trivial destructor or a non-trivial
12131   // copy constructor are always passed indirectly.
12132   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
12133     if (GARsLeft)
12134       GARsLeft -= 1;
12135     return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA ==
12136                                            CGCXXABI::RAA_DirectInMemory);
12137   }
12138
12139   // Ignore empty structs/unions.
12140   if (isEmptyRecord(getContext(), Ty, true))
12141     return ABIArgInfo::getIgnore();
12142
12143   uint64_t Size = getContext().getTypeSize(Ty);
12144
12145   // Pass floating point values via FARs if possible.
12146   if (IsFixed && Ty->isFloatingType() && !Ty->isComplexType() &&
12147       FRLen >= Size && FARsLeft) {
12148     FARsLeft--;
12149     return ABIArgInfo::getDirect();
12150   }
12151
12152   // Complex types for the *f or *d ABI must be passed directly rather than
12153   // using CoerceAndExpand.
12154   if (IsFixed && Ty->isComplexType() && FRLen && FARsLeft >= 2) {
12155     QualType EltTy = Ty->castAs<ComplexType>()->getElementType();
12156     if (getContext().getTypeSize(EltTy) <= FRLen) {
12157       FARsLeft -= 2;
12158       return ABIArgInfo::getDirect();
12159     }
12160   }
12161
12162   if (IsFixed && FRLen && Ty->isStructureOrClassType()) {
12163     llvm::Type *Field1Ty = nullptr;
12164     llvm::Type *Field2Ty = nullptr;
12165     CharUnits Field1Off = CharUnits::Zero();
12166     CharUnits Field2Off = CharUnits::Zero();
12167     int NeededGARs = 0;
12168     int NeededFARs = 0;
12169     bool IsCandidate = detectFARsEligibleStruct(
12170         Ty, Field1Ty, Field1Off, Field2Ty, Field2Off, NeededGARs, NeededFARs);
12171     if (IsCandidate && NeededGARs <= GARsLeft && NeededFARs <= FARsLeft) {
12172       GARsLeft -= NeededGARs;
12173       FARsLeft -= NeededFARs;
12174       return coerceAndExpandFARsEligibleStruct(Field1Ty, Field1Off, Field2Ty,
12175                                                Field2Off);
12176     }
12177   }
12178
12179   uint64_t NeededAlign = getContext().getTypeAlign(Ty);
12180   // Determine the number of GARs needed to pass the current argument
12181   // according to the ABI. 2*GRLen-aligned varargs are passed in "aligned"
12182   // register pairs, so may consume 3 registers.
12183   int NeededGARs = 1;
12184   if (!IsFixed && NeededAlign == 2 * GRLen)
12185     NeededGARs = 2 + (GARsLeft % 2);
12186   else if (Size > GRLen && Size <= 2 * GRLen)
12187     NeededGARs = 2;
12188
12189   if (NeededGARs > GARsLeft)
12190     NeededGARs = GARsLeft;
12191
12192   GARsLeft -= NeededGARs;
12193
12194   if (!isAggregateTypeForABI(Ty) && !Ty->isVectorType()) {
12195     // Treat an enum type as its underlying type.
12196     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
12197       Ty = EnumTy->getDecl()->getIntegerType();
12198
12199     // All integral types are promoted to GRLen width.
12200     if (Size < GRLen && Ty->isIntegralOrEnumerationType())
12201       return extendType(Ty);
12202
12203     if (const auto *EIT = Ty->getAs<BitIntType>()) {
12204       if (EIT->getNumBits() < GRLen)
12205         return extendType(Ty);
12206       if (EIT->getNumBits() > 128 ||
12207           (!getContext().getTargetInfo().hasInt128Type() &&
12208            EIT->getNumBits() > 64))
12209         return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
12210     }
12211
12212     return ABIArgInfo::getDirect();
12213   }
12214
12215   // Aggregates which are <= 2*GRLen will be passed in registers if possible,
12216   // so coerce to integers.
12217   if (Size <= 2 * GRLen) {
12218     // Use a single GRLen int if possible, 2*GRLen if 2*GRLen alignment is
12219     // required, and a 2-element GRLen array if only GRLen alignment is
12220     // required.
12221     if (Size <= GRLen) {
12222       return ABIArgInfo::getDirect(
12223           llvm::IntegerType::get(getVMContext(), GRLen));
12224     }
12225     if (getContext().getTypeAlign(Ty) == 2 * GRLen) {
12226       return ABIArgInfo::getDirect(
12227           llvm::IntegerType::get(getVMContext(), 2 * GRLen));
12228     }
12229     return ABIArgInfo::getDirect(
12230         llvm::ArrayType::get(llvm::IntegerType::get(getVMContext(), GRLen), 2));
12231   }
12232   return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
12233 }
12234
12235 ABIArgInfo LoongArchABIInfo::classifyReturnType(QualType RetTy) const {
12236   if (RetTy->isVoidType())
12237     return ABIArgInfo::getIgnore();
12238   // The rules for return and argument types are the same, so defer to
12239   // classifyArgumentType.
12240   int GARsLeft = 2;
12241   int FARsLeft = FRLen ? 2 : 0;
12242   return classifyArgumentType(RetTy, /*IsFixed=*/true, GARsLeft, FARsLeft);
12243 }
12244
12245 Address LoongArchABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
12246                                     QualType Ty) const {
12247   CharUnits SlotSize = CharUnits::fromQuantity(GRLen / 8);
12248
12249   // Empty records are ignored for parameter passing purposes.
12250   if (isEmptyRecord(getContext(), Ty, true)) {
12251     Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr),
12252                            getVAListElementType(CGF), SlotSize);
12253     Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
12254     return Addr;
12255   }
12256
12257   auto TInfo = getContext().getTypeInfoInChars(Ty);
12258
12259   // Arguments bigger than 2*GRLen bytes are passed indirectly.
12260   return emitVoidPtrVAArg(CGF, VAListAddr, Ty,
12261                           /*IsIndirect=*/TInfo.Width > 2 * SlotSize, TInfo,
12262                           SlotSize,
12263                           /*AllowHigherAlign=*/true);
12264 }
12265
12266 ABIArgInfo LoongArchABIInfo::extendType(QualType Ty) const {
12267   int TySize = getContext().getTypeSize(Ty);
12268   // LA64 ABI requires unsigned 32 bit integers to be sign extended.
12269   if (GRLen == 64 && Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32)
12270     return ABIArgInfo::getSignExtend(Ty);
12271   return ABIArgInfo::getExtend(Ty);
12272 }
12273
12274 namespace {
12275 class LoongArchTargetCodeGenInfo : public TargetCodeGenInfo {
12276 public:
12277   LoongArchTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned GRLen,
12278                              unsigned FRLen)
12279       : TargetCodeGenInfo(
12280             std::make_unique<LoongArchABIInfo>(CGT, GRLen, FRLen)) {}
12281 };
12282 } // namespace
12283
12284 //===----------------------------------------------------------------------===//
12285 // Driver code
12286 //===----------------------------------------------------------------------===//
12287
12288 bool CodeGenModule::supportsCOMDAT() const {
12289   return getTriple().supportsCOMDAT();
12290 }
12291
12292 const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
12293   if (TheTargetCodeGenInfo)
12294     return *TheTargetCodeGenInfo;
12295
12296   // Helper to set the unique_ptr while still keeping the return value.
12297   auto SetCGInfo = [&](TargetCodeGenInfo *P) -> const TargetCodeGenInfo & {
12298     this->TheTargetCodeGenInfo.reset(P);
12299     return *P;
12300   };
12301
12302   const llvm::Triple &Triple = getTarget().getTriple();
12303   switch (Triple.getArch()) {
12304   default:
12305     return SetCGInfo(new DefaultTargetCodeGenInfo(Types));
12306
12307   case llvm::Triple::le32:
12308     return SetCGInfo(new PNaClTargetCodeGenInfo(Types));
12309   case llvm::Triple::m68k:
12310     return SetCGInfo(new M68kTargetCodeGenInfo(Types));
12311   case llvm::Triple::mips:
12312   case llvm::Triple::mipsel:
12313     if (Triple.getOS() == llvm::Triple::NaCl)
12314       return SetCGInfo(new PNaClTargetCodeGenInfo(Types));
12315     return SetCGInfo(new MIPSTargetCodeGenInfo(Types, true));
12316
12317   case llvm::Triple::mips64:
12318   case llvm::Triple::mips64el:
12319     return SetCGInfo(new MIPSTargetCodeGenInfo(Types, false));
12320
12321   case llvm::Triple::avr: {
12322     // For passing parameters, R8~R25 are used on avr, and R18~R25 are used
12323     // on avrtiny. For passing return value, R18~R25 are used on avr, and
12324     // R22~R25 are used on avrtiny.
12325     unsigned NPR = getTarget().getABI() == "avrtiny" ? 6 : 18;
12326     unsigned NRR = getTarget().getABI() == "avrtiny" ? 4 : 8;
12327     return SetCGInfo(new AVRTargetCodeGenInfo(Types, NPR, NRR));
12328   }
12329
12330   case llvm::Triple::aarch64:
12331   case llvm::Triple::aarch64_32:
12332   case llvm::Triple::aarch64_be: {
12333     AArch64ABIKind Kind = AArch64ABIKind::AAPCS;
12334     if (getTarget().getABI() == "darwinpcs")
12335       Kind = AArch64ABIKind::DarwinPCS;
12336     else if (Triple.isOSWindows())
12337       return SetCGInfo(
12338           new WindowsAArch64TargetCodeGenInfo(Types, AArch64ABIKind::Win64));
12339
12340     return SetCGInfo(new AArch64TargetCodeGenInfo(Types, Kind));
12341   }
12342
12343   case llvm::Triple::wasm32:
12344   case llvm::Triple::wasm64: {
12345     WebAssemblyABIKind Kind = WebAssemblyABIKind::MVP;
12346     if (getTarget().getABI() == "experimental-mv")
12347       Kind = WebAssemblyABIKind::ExperimentalMV;
12348     return SetCGInfo(new WebAssemblyTargetCodeGenInfo(Types, Kind));
12349   }
12350
12351   case llvm::Triple::arm:
12352   case llvm::Triple::armeb:
12353   case llvm::Triple::thumb:
12354   case llvm::Triple::thumbeb: {
12355     if (Triple.getOS() == llvm::Triple::Win32) {
12356       return SetCGInfo(
12357           new WindowsARMTargetCodeGenInfo(Types, ARMABIKind::AAPCS_VFP));
12358     }
12359
12360     ARMABIKind Kind = ARMABIKind::AAPCS;
12361     StringRef ABIStr = getTarget().getABI();
12362     if (ABIStr == "apcs-gnu")
12363       Kind = ARMABIKind::APCS;
12364     else if (ABIStr == "aapcs16")
12365       Kind = ARMABIKind::AAPCS16_VFP;
12366     else if (CodeGenOpts.FloatABI == "hard" ||
12367              (CodeGenOpts.FloatABI != "soft" &&
12368               (Triple.getEnvironment() == llvm::Triple::GNUEABIHF ||
12369                Triple.getEnvironment() == llvm::Triple::MuslEABIHF ||
12370                Triple.getEnvironment() == llvm::Triple::EABIHF)))
12371       Kind = ARMABIKind::AAPCS_VFP;
12372
12373     return SetCGInfo(new ARMTargetCodeGenInfo(Types, Kind));
12374   }
12375
12376   case llvm::Triple::ppc: {
12377     if (Triple.isOSAIX())
12378       return SetCGInfo(new AIXTargetCodeGenInfo(Types, /*Is64Bit*/ false));
12379
12380     bool IsSoftFloat =
12381         CodeGenOpts.FloatABI == "soft" || getTarget().hasFeature("spe");
12382     bool RetSmallStructInRegABI =
12383         PPC32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts);
12384     return SetCGInfo(
12385         new PPC32TargetCodeGenInfo(Types, IsSoftFloat, RetSmallStructInRegABI));
12386   }
12387   case llvm::Triple::ppcle: {
12388     bool IsSoftFloat = CodeGenOpts.FloatABI == "soft";
12389     bool RetSmallStructInRegABI =
12390         PPC32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts);
12391     return SetCGInfo(
12392         new PPC32TargetCodeGenInfo(Types, IsSoftFloat, RetSmallStructInRegABI));
12393   }
12394   case llvm::Triple::ppc64:
12395     if (Triple.isOSAIX())
12396       return SetCGInfo(new AIXTargetCodeGenInfo(Types, /*Is64Bit*/ true));
12397
12398     if (Triple.isOSBinFormatELF()) {
12399       PPC64_SVR4_ABIKind Kind = PPC64_SVR4_ABIKind::ELFv1;
12400       if (getTarget().getABI() == "elfv2")
12401         Kind = PPC64_SVR4_ABIKind::ELFv2;
12402       bool IsSoftFloat = CodeGenOpts.FloatABI == "soft";
12403
12404       return SetCGInfo(
12405           new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, IsSoftFloat));
12406     }
12407     return SetCGInfo(new PPC64TargetCodeGenInfo(Types));
12408   case llvm::Triple::ppc64le: {
12409     assert(Triple.isOSBinFormatELF() && "PPC64 LE non-ELF not supported!");
12410     PPC64_SVR4_ABIKind Kind = PPC64_SVR4_ABIKind::ELFv2;
12411     if (getTarget().getABI() == "elfv1")
12412       Kind = PPC64_SVR4_ABIKind::ELFv1;
12413     bool IsSoftFloat = CodeGenOpts.FloatABI == "soft";
12414
12415     return SetCGInfo(
12416         new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, IsSoftFloat));
12417   }
12418
12419   case llvm::Triple::nvptx:
12420   case llvm::Triple::nvptx64:
12421     return SetCGInfo(new NVPTXTargetCodeGenInfo(Types));
12422
12423   case llvm::Triple::msp430:
12424     return SetCGInfo(new MSP430TargetCodeGenInfo(Types));
12425
12426   case llvm::Triple::riscv32:
12427   case llvm::Triple::riscv64: {
12428     StringRef ABIStr = getTarget().getABI();
12429     unsigned XLen = getTarget().getPointerWidth(LangAS::Default);
12430     unsigned ABIFLen = 0;
12431     if (ABIStr.endswith("f"))
12432       ABIFLen = 32;
12433     else if (ABIStr.endswith("d"))
12434       ABIFLen = 64;
12435     return SetCGInfo(new RISCVTargetCodeGenInfo(Types, XLen, ABIFLen));
12436   }
12437
12438   case llvm::Triple::systemz: {
12439     bool SoftFloat = CodeGenOpts.FloatABI == "soft";
12440     bool HasVector = !SoftFloat && getTarget().getABI() == "vector";
12441     return SetCGInfo(new SystemZTargetCodeGenInfo(Types, HasVector, SoftFloat));
12442   }
12443
12444   case llvm::Triple::tce:
12445   case llvm::Triple::tcele:
12446     return SetCGInfo(new TCETargetCodeGenInfo(Types));
12447
12448   case llvm::Triple::x86: {
12449     bool IsDarwinVectorABI = Triple.isOSDarwin();
12450     bool RetSmallStructInRegABI =
12451         X86_32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts);
12452     bool IsWin32FloatStructABI = Triple.isOSWindows() && !Triple.isOSCygMing();
12453
12454     if (Triple.getOS() == llvm::Triple::Win32) {
12455       return SetCGInfo(new WinX86_32TargetCodeGenInfo(
12456           Types, IsDarwinVectorABI, RetSmallStructInRegABI,
12457           IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters));
12458     } else {
12459       return SetCGInfo(new X86_32TargetCodeGenInfo(
12460           Types, IsDarwinVectorABI, RetSmallStructInRegABI,
12461           IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters,
12462           CodeGenOpts.FloatABI == "soft"));
12463     }
12464   }
12465
12466   case llvm::Triple::x86_64: {
12467     StringRef ABI = getTarget().getABI();
12468     X86AVXABILevel AVXLevel =
12469         (ABI == "avx512"
12470              ? X86AVXABILevel::AVX512
12471              : ABI == "avx" ? X86AVXABILevel::AVX : X86AVXABILevel::None);
12472
12473     switch (Triple.getOS()) {
12474     case llvm::Triple::Win32:
12475       return SetCGInfo(new WinX86_64TargetCodeGenInfo(Types, AVXLevel));
12476     default:
12477       return SetCGInfo(new X86_64TargetCodeGenInfo(Types, AVXLevel));
12478     }
12479   }
12480   case llvm::Triple::hexagon:
12481     return SetCGInfo(new HexagonTargetCodeGenInfo(Types));
12482   case llvm::Triple::lanai:
12483     return SetCGInfo(new LanaiTargetCodeGenInfo(Types));
12484   case llvm::Triple::r600:
12485     return SetCGInfo(new AMDGPUTargetCodeGenInfo(Types));
12486   case llvm::Triple::amdgcn:
12487     return SetCGInfo(new AMDGPUTargetCodeGenInfo(Types));
12488   case llvm::Triple::sparc:
12489     return SetCGInfo(new SparcV8TargetCodeGenInfo(Types));
12490   case llvm::Triple::sparcv9:
12491     return SetCGInfo(new SparcV9TargetCodeGenInfo(Types));
12492   case llvm::Triple::xcore:
12493     return SetCGInfo(new XCoreTargetCodeGenInfo(Types));
12494   case llvm::Triple::arc:
12495     return SetCGInfo(new ARCTargetCodeGenInfo(Types));
12496   case llvm::Triple::spir:
12497   case llvm::Triple::spir64:
12498     return SetCGInfo(new CommonSPIRTargetCodeGenInfo(Types));
12499   case llvm::Triple::spirv32:
12500   case llvm::Triple::spirv64:
12501     return SetCGInfo(new SPIRVTargetCodeGenInfo(Types));
12502   case llvm::Triple::ve:
12503     return SetCGInfo(new VETargetCodeGenInfo(Types));
12504   case llvm::Triple::csky: {
12505     bool IsSoftFloat = !getTarget().hasFeature("hard-float-abi");
12506     bool hasFP64 = getTarget().hasFeature("fpuv2_df") ||
12507                    getTarget().hasFeature("fpuv3_df");
12508     return SetCGInfo(new CSKYTargetCodeGenInfo(Types, IsSoftFloat ? 0
12509                                                       : hasFP64   ? 64
12510                                                                   : 32));
12511   }
12512   case llvm::Triple::bpfeb:
12513   case llvm::Triple::bpfel:
12514     return SetCGInfo(new BPFTargetCodeGenInfo(Types));
12515   case llvm::Triple::loongarch32:
12516   case llvm::Triple::loongarch64: {
12517     StringRef ABIStr = getTarget().getABI();
12518     unsigned ABIFRLen = 0;
12519     if (ABIStr.endswith("f"))
12520       ABIFRLen = 32;
12521     else if (ABIStr.endswith("d"))
12522       ABIFRLen = 64;
12523     return SetCGInfo(new LoongArchTargetCodeGenInfo(
12524         Types, getTarget().getPointerWidth(LangAS::Default), ABIFRLen));
12525   }
12526   }
12527 }
12528
12529 /// Create an OpenCL kernel for an enqueued block.
12530 ///
12531 /// The kernel has the same function type as the block invoke function. Its
12532 /// name is the name of the block invoke function postfixed with "_kernel".
12533 /// It simply calls the block invoke function then returns.
12534 llvm::Value *TargetCodeGenInfo::createEnqueuedBlockKernel(
12535     CodeGenFunction &CGF, llvm::Function *Invoke, llvm::Type *BlockTy) const {
12536   auto *InvokeFT = Invoke->getFunctionType();
12537   auto &C = CGF.getLLVMContext();
12538   std::string Name = Invoke->getName().str() + "_kernel";
12539   auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C),
12540                                      InvokeFT->params(), false);
12541   auto *F = llvm::Function::Create(FT, llvm::GlobalValue::ExternalLinkage, Name,
12542                                    &CGF.CGM.getModule());
12543   llvm::CallingConv::ID KernelCC =
12544       CGF.getTypes().ClangCallConvToLLVMCallConv(CallingConv::CC_OpenCLKernel);
12545   F->setCallingConv(KernelCC);
12546
12547   llvm::AttrBuilder KernelAttrs(C);
12548
12549   // FIXME: This is missing setTargetAttributes
12550   CGF.CGM.addDefaultFunctionDefinitionAttributes(KernelAttrs);
12551   F->addFnAttrs(KernelAttrs);
12552
12553   auto IP = CGF.Builder.saveIP();
12554   auto *BB = llvm::BasicBlock::Create(C, "entry", F);
12555   auto &Builder = CGF.Builder;
12556   Builder.SetInsertPoint(BB);
12557   llvm::SmallVector<llvm::Value *, 2> Args(llvm::make_pointer_range(F->args()));
12558   llvm::CallInst *Call = Builder.CreateCall(Invoke, Args);
12559   Call->setCallingConv(Invoke->getCallingConv());
12560
12561   Builder.CreateRetVoid();
12562   Builder.restoreIP(IP);
12563   return F;
12564 }
12565
12566 /// Create an OpenCL kernel for an enqueued block.
12567 ///
12568 /// The type of the first argument (the block literal) is the struct type
12569 /// of the block literal instead of a pointer type. The first argument
12570 /// (block literal) is passed directly by value to the kernel. The kernel
12571 /// allocates the same type of struct on stack and stores the block literal
12572 /// to it and passes its pointer to the block invoke function. The kernel
12573 /// has "enqueued-block" function attribute and kernel argument metadata.
12574 llvm::Value *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel(
12575     CodeGenFunction &CGF, llvm::Function *Invoke, llvm::Type *BlockTy) const {
12576   auto &Builder = CGF.Builder;
12577   auto &C = CGF.getLLVMContext();
12578
12579   auto *InvokeFT = Invoke->getFunctionType();
12580   llvm::SmallVector<llvm::Type *, 2> ArgTys;
12581   llvm::SmallVector<llvm::Metadata *, 8> AddressQuals;
12582   llvm::SmallVector<llvm::Metadata *, 8> AccessQuals;
12583   llvm::SmallVector<llvm::Metadata *, 8> ArgTypeNames;
12584   llvm::SmallVector<llvm::Metadata *, 8> ArgBaseTypeNames;
12585   llvm::SmallVector<llvm::Metadata *, 8> ArgTypeQuals;
12586   llvm::SmallVector<llvm::Metadata *, 8> ArgNames;
12587
12588   ArgTys.push_back(BlockTy);
12589   ArgTypeNames.push_back(llvm::MDString::get(C, "__block_literal"));
12590   AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(0)));
12591   ArgBaseTypeNames.push_back(llvm::MDString::get(C, "__block_literal"));
12592   ArgTypeQuals.push_back(llvm::MDString::get(C, ""));
12593   AccessQuals.push_back(llvm::MDString::get(C, "none"));
12594   ArgNames.push_back(llvm::MDString::get(C, "block_literal"));
12595   for (unsigned I = 1, E = InvokeFT->getNumParams(); I < E; ++I) {
12596     ArgTys.push_back(InvokeFT->getParamType(I));
12597     ArgTypeNames.push_back(llvm::MDString::get(C, "void*"));
12598     AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(3)));
12599     AccessQuals.push_back(llvm::MDString::get(C, "none"));
12600     ArgBaseTypeNames.push_back(llvm::MDString::get(C, "void*"));
12601     ArgTypeQuals.push_back(llvm::MDString::get(C, ""));
12602     ArgNames.push_back(
12603         llvm::MDString::get(C, (Twine("local_arg") + Twine(I)).str()));
12604   }
12605   std::string Name = Invoke->getName().str() + "_kernel";
12606   auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false);
12607   auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name,
12608                                    &CGF.CGM.getModule());
12609   F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
12610
12611   llvm::AttrBuilder KernelAttrs(C);
12612   // FIXME: The invoke isn't applying the right attributes either
12613   // FIXME: This is missing setTargetAttributes
12614   CGF.CGM.addDefaultFunctionDefinitionAttributes(KernelAttrs);
12615   KernelAttrs.addAttribute("enqueued-block");
12616   F->addFnAttrs(KernelAttrs);
12617
12618   auto IP = CGF.Builder.saveIP();
12619   auto *BB = llvm::BasicBlock::Create(C, "entry", F);
12620   Builder.SetInsertPoint(BB);
12621   const auto BlockAlign = CGF.CGM.getDataLayout().getPrefTypeAlign(BlockTy);
12622   auto *BlockPtr = Builder.CreateAlloca(BlockTy, nullptr);
12623   BlockPtr->setAlignment(BlockAlign);
12624   Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign);
12625   auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0));
12626   llvm::SmallVector<llvm::Value *, 2> Args;
12627   Args.push_back(Cast);
12628   for (llvm::Argument &A : llvm::drop_begin(F->args()))
12629     Args.push_back(&A);
12630   llvm::CallInst *call = Builder.CreateCall(Invoke, Args);
12631   call->setCallingConv(Invoke->getCallingConv());
12632   Builder.CreateRetVoid();
12633   Builder.restoreIP(IP);
12634
12635   F->setMetadata("kernel_arg_addr_space", llvm::MDNode::get(C, AddressQuals));
12636   F->setMetadata("kernel_arg_access_qual", llvm::MDNode::get(C, AccessQuals));
12637   F->setMetadata("kernel_arg_type", llvm::MDNode::get(C, ArgTypeNames));
12638   F->setMetadata("kernel_arg_base_type",
12639                  llvm::MDNode::get(C, ArgBaseTypeNames));
12640   F->setMetadata("kernel_arg_type_qual", llvm::MDNode::get(C, ArgTypeQuals));
12641   if (CGF.CGM.getCodeGenOpts().EmitOpenCLArgMetadata)
12642     F->setMetadata("kernel_arg_name", llvm::MDNode::get(C, ArgNames));
12643
12644   return F;
12645 }