clang/lib/CodeGen/Targets/AArch64.cpp

   1 //===- AArch64.cpp --------------------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "ABIInfoImpl.h"
  10 #include "TargetInfo.h"
  11 #include "clang/AST/Decl.h"
  12 #include "clang/Basic/DiagnosticFrontend.h"
  13 #include "llvm/TargetParser/AArch64TargetParser.h"
  14
  15 using namespace clang;
  16 using namespace clang::CodeGen;
  17
  18 //===----------------------------------------------------------------------===//
  19 // AArch64 ABI Implementation
  20 //===----------------------------------------------------------------------===//
  21
  22 namespace {
  23
  24 class AArch64ABIInfo : public ABIInfo {
  25   AArch64ABIKind Kind;
  26
  27 public:
  28   AArch64ABIInfo(CodeGenTypes &CGT, AArch64ABIKind Kind)
  29       : ABIInfo(CGT), Kind(Kind) {}
  30
  31   bool isSoftFloat() const { return Kind == AArch64ABIKind::AAPCSSoft; }
  32
  33 private:
  34   AArch64ABIKind getABIKind() const { return Kind; }
  35   bool isDarwinPCS() const { return Kind == AArch64ABIKind::DarwinPCS; }
  36
  37   ABIArgInfo classifyReturnType(QualType RetTy, bool IsVariadicFn) const;
  38   ABIArgInfo classifyArgumentType(QualType RetTy, bool IsVariadicFn,
  39                                   bool IsNamedArg, unsigned CallingConvention,
  40                                   unsigned &NSRN, unsigned &NPRN) const;
  41   llvm::Type *convertFixedToScalableVectorType(const VectorType *VT) const;
  42   ABIArgInfo coerceIllegalVector(QualType Ty, unsigned &NSRN,
  43                                  unsigned &NPRN) const;
  44   ABIArgInfo coerceAndExpandPureScalableAggregate(
  45       QualType Ty, bool IsNamedArg, unsigned NVec, unsigned NPred,
  46       const SmallVectorImpl<llvm::Type *> &UnpaddedCoerceToSeq, unsigned &NSRN,
  47       unsigned &NPRN) const;
  48   bool isHomogeneousAggregateBaseType(QualType Ty) const override;
  49   bool isHomogeneousAggregateSmallEnough(const Type *Ty,
  50                                          uint64_t Members) const override;
  51   bool isZeroLengthBitfieldPermittedInHomogeneousAggregate() const override;
  52
  53   bool isIllegalVectorType(QualType Ty) const;
  54
  55   bool passAsAggregateType(QualType Ty) const;
  56   bool passAsPureScalableType(QualType Ty, unsigned &NV, unsigned &NP,
  57                               SmallVectorImpl<llvm::Type *> &CoerceToSeq) const;
  58
  59   void flattenType(llvm::Type *Ty,
  60                    SmallVectorImpl<llvm::Type *> &Flattened) const;
  61
  62   void computeInfo(CGFunctionInfo &FI) const override {
  63     if (!::classifyReturnType(getCXXABI(), FI, *this))
  64       FI.getReturnInfo() =
  65           classifyReturnType(FI.getReturnType(), FI.isVariadic());
  66
  67     unsigned ArgNo = 0;
  68     unsigned NSRN = 0, NPRN = 0;
  69     for (auto &it : FI.arguments()) {
  70       const bool IsNamedArg =
  71           !FI.isVariadic() || ArgNo < FI.getRequiredArgs().getNumRequiredArgs();
  72       ++ArgNo;
  73       it.info = classifyArgumentType(it.type, FI.isVariadic(), IsNamedArg,
  74                                      FI.getCallingConvention(), NSRN, NPRN);
  75     }
  76   }
  77
  78   RValue EmitDarwinVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF,
  79                          AggValueSlot Slot) const;
  80
  81   RValue EmitAAPCSVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF,
  82                         AArch64ABIKind Kind, AggValueSlot Slot) const;
  83
  84   RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
  85                    AggValueSlot Slot) const override {
  86     llvm::Type *BaseTy = CGF.ConvertType(Ty);
  87     if (isa<llvm::ScalableVectorType>(BaseTy))
  88       llvm::report_fatal_error("Passing SVE types to variadic functions is "
  89                                "currently not supported");
  90
  91     return Kind == AArch64ABIKind::Win64
  92                ? EmitMSVAArg(CGF, VAListAddr, Ty, Slot)
  93            : isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF, Slot)
  94                            : EmitAAPCSVAArg(VAListAddr, Ty, CGF, Kind, Slot);
  95   }
  96
  97   RValue EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
  98                      AggValueSlot Slot) const override;
  99
 100   bool allowBFloatArgsAndRet() const override {
 101     return getTarget().hasBFloat16Type();
 102   }
 103
 104   using ABIInfo::appendAttributeMangling;
 105   void appendAttributeMangling(TargetClonesAttr *Attr, unsigned Index,
 106                                raw_ostream &Out) const override;
 107   void appendAttributeMangling(StringRef AttrStr,
 108                                raw_ostream &Out) const override;
 109 };
 110
 111 class AArch64SwiftABIInfo : public SwiftABIInfo {
 112 public:
 113   explicit AArch64SwiftABIInfo(CodeGenTypes &CGT)
 114       : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/true) {}
 115
 116   bool isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy,
 117                          unsigned NumElts) const override;
 118 };
 119
 120 class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
 121 public:
 122   AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIKind Kind)
 123       : TargetCodeGenInfo(std::make_unique<AArch64ABIInfo>(CGT, Kind)) {
 124     SwiftInfo = std::make_unique<AArch64SwiftABIInfo>(CGT);
 125   }
 126
 127   StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
 128     return "mov\tfp, fp\t\t// marker for objc_retainAutoreleaseReturnValue";
 129   }
 130
 131   int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
 132     return 31;
 133   }
 134
 135   bool doesReturnSlotInterfereWithArgs() const override { return false; }
 136
 137   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
 138                            CodeGen::CodeGenModule &CGM) const override {
 139     const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
 140     if (!FD)
 141       return;
 142
 143     TargetInfo::BranchProtectionInfo BPI(CGM.getLangOpts());
 144
 145     if (const auto *TA = FD->getAttr<TargetAttr>()) {
 146       ParsedTargetAttr Attr =
 147           CGM.getTarget().parseTargetAttr(TA->getFeaturesStr());
 148       if (!Attr.BranchProtection.empty()) {
 149         StringRef Error;
 150         (void)CGM.getTarget().validateBranchProtection(Attr.BranchProtection,
 151                                                        Attr.CPU, BPI, Error);
 152         assert(Error.empty());
 153       }
 154     }
 155     auto *Fn = cast<llvm::Function>(GV);
 156     setBranchProtectionFnAttributes(BPI, *Fn);
 157   }
 158
 159   bool isScalarizableAsmOperand(CodeGen::CodeGenFunction &CGF,
 160                                 llvm::Type *Ty) const override {
 161     if (CGF.getTarget().hasFeature("ls64")) {
 162       auto *ST = dyn_cast<llvm::StructType>(Ty);
 163       if (ST && ST->getNumElements() == 1) {
 164         auto *AT = dyn_cast<llvm::ArrayType>(ST->getElementType(0));
 165         if (AT && AT->getNumElements() == 8 &&
 166             AT->getElementType()->isIntegerTy(64))
 167           return true;
 168       }
 169     }
 170     return TargetCodeGenInfo::isScalarizableAsmOperand(CGF, Ty);
 171   }
 172
 173   void checkFunctionABI(CodeGenModule &CGM,
 174                         const FunctionDecl *Decl) const override;
 175
 176   void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc,
 177                             const FunctionDecl *Caller,
 178                             const FunctionDecl *Callee, const CallArgList &Args,
 179                             QualType ReturnType) const override;
 180
 181   bool wouldInliningViolateFunctionCallABI(
 182       const FunctionDecl *Caller, const FunctionDecl *Callee) const override;
 183
 184 private:
 185   // Diagnose calls between functions with incompatible Streaming SVE
 186   // attributes.
 187   void checkFunctionCallABIStreaming(CodeGenModule &CGM, SourceLocation CallLoc,
 188                                      const FunctionDecl *Caller,
 189                                      const FunctionDecl *Callee) const;
 190   // Diagnose calls which must pass arguments in floating-point registers when
 191   // the selected target does not have floating-point registers.
 192   void checkFunctionCallABISoftFloat(CodeGenModule &CGM, SourceLocation CallLoc,
 193                                      const FunctionDecl *Caller,
 194                                      const FunctionDecl *Callee,
 195                                      const CallArgList &Args,
 196                                      QualType ReturnType) const;
 197 };
 198
 199 class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo {
 200 public:
 201   WindowsAArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIKind K)
 202       : AArch64TargetCodeGenInfo(CGT, K) {}
 203
 204   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
 205                            CodeGen::CodeGenModule &CGM) const override;
 206
 207   void getDependentLibraryOption(llvm::StringRef Lib,
 208                                  llvm::SmallString<24> &Opt) const override {
 209     Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib);
 210   }
 211
 212   void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value,
 213                                llvm::SmallString<32> &Opt) const override {
 214     Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
 215   }
 216 };
 217
 218 void WindowsAArch64TargetCodeGenInfo::setTargetAttributes(
 219     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
 220   AArch64TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
 221   if (GV->isDeclaration())
 222     return;
 223   addStackProbeTargetAttributes(D, GV, CGM);
 224 }
 225 }
 226
 227 llvm::Type *
 228 AArch64ABIInfo::convertFixedToScalableVectorType(const VectorType *VT) const {
 229   assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
 230
 231   if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
 232     assert(VT->getElementType()->castAs<BuiltinType>()->getKind() ==
 233                BuiltinType::UChar &&
 234            "unexpected builtin type for SVE predicate!");
 235     return llvm::ScalableVectorType::get(llvm::Type::getInt1Ty(getVMContext()),
 236                                          16);
 237   }
 238
 239   if (VT->getVectorKind() == VectorKind::SveFixedLengthData) {
 240     const auto *BT = VT->getElementType()->castAs<BuiltinType>();
 241     switch (BT->getKind()) {
 242     default:
 243       llvm_unreachable("unexpected builtin type for SVE vector!");
 244
 245     case BuiltinType::SChar:
 246     case BuiltinType::UChar:
 247     case BuiltinType::MFloat8:
 248       return llvm::ScalableVectorType::get(
 249           llvm::Type::getInt8Ty(getVMContext()), 16);
 250
 251     case BuiltinType::Short:
 252     case BuiltinType::UShort:
 253       return llvm::ScalableVectorType::get(
 254           llvm::Type::getInt16Ty(getVMContext()), 8);
 255
 256     case BuiltinType::Int:
 257     case BuiltinType::UInt:
 258       return llvm::ScalableVectorType::get(
 259           llvm::Type::getInt32Ty(getVMContext()), 4);
 260
 261     case BuiltinType::Long:
 262     case BuiltinType::ULong:
 263       return llvm::ScalableVectorType::get(
 264           llvm::Type::getInt64Ty(getVMContext()), 2);
 265
 266     case BuiltinType::Half:
 267       return llvm::ScalableVectorType::get(
 268           llvm::Type::getHalfTy(getVMContext()), 8);
 269
 270     case BuiltinType::Float:
 271       return llvm::ScalableVectorType::get(
 272           llvm::Type::getFloatTy(getVMContext()), 4);
 273
 274     case BuiltinType::Double:
 275       return llvm::ScalableVectorType::get(
 276           llvm::Type::getDoubleTy(getVMContext()), 2);
 277
 278     case BuiltinType::BFloat16:
 279       return llvm::ScalableVectorType::get(
 280           llvm::Type::getBFloatTy(getVMContext()), 8);
 281     }
 282   }
 283
 284   llvm_unreachable("expected fixed-length SVE vector");
 285 }
 286
 287 ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType Ty, unsigned &NSRN,
 288                                                unsigned &NPRN) const {
 289   assert(Ty->isVectorType() && "expected vector type!");
 290
 291   const auto *VT = Ty->castAs<VectorType>();
 292   if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
 293     assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
 294     assert(VT->getElementType()->castAs<BuiltinType>()->getKind() ==
 295                BuiltinType::UChar &&
 296            "unexpected builtin type for SVE predicate!");
 297     NPRN = std::min(NPRN + 1, 4u);
 298     return ABIArgInfo::getDirect(llvm::ScalableVectorType::get(
 299         llvm::Type::getInt1Ty(getVMContext()), 16));
 300   }
 301
 302   if (VT->getVectorKind() == VectorKind::SveFixedLengthData) {
 303     NSRN = std::min(NSRN + 1, 8u);
 304     return ABIArgInfo::getDirect(convertFixedToScalableVectorType(VT));
 305   }
 306
 307   uint64_t Size = getContext().getTypeSize(Ty);
 308   // Android promotes <2 x i8> to i16, not i32
 309   if ((isAndroid() || isOHOSFamily()) && (Size <= 16)) {
 310     llvm::Type *ResType = llvm::Type::getInt16Ty(getVMContext());
 311     return ABIArgInfo::getDirect(ResType);
 312   }
 313   if (Size <= 32) {
 314     llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext());
 315     return ABIArgInfo::getDirect(ResType);
 316   }
 317   if (Size == 64) {
 318     NSRN = std::min(NSRN + 1, 8u);
 319     auto *ResType =
 320         llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2);
 321     return ABIArgInfo::getDirect(ResType);
 322   }
 323   if (Size == 128) {
 324     NSRN = std::min(NSRN + 1, 8u);
 325     auto *ResType =
 326         llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4);
 327     return ABIArgInfo::getDirect(ResType);
 328   }
 329
 330   return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
 331 }
 332
 333 ABIArgInfo AArch64ABIInfo::coerceAndExpandPureScalableAggregate(
 334     QualType Ty, bool IsNamedArg, unsigned NVec, unsigned NPred,
 335     const SmallVectorImpl<llvm::Type *> &UnpaddedCoerceToSeq, unsigned &NSRN,
 336     unsigned &NPRN) const {
 337   if (!IsNamedArg || NSRN + NVec > 8 || NPRN + NPred > 4)
 338     return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
 339   NSRN += NVec;
 340   NPRN += NPred;
 341
 342   // Handle SVE vector tuples.
 343   if (Ty->isSVESizelessBuiltinType())
 344     return ABIArgInfo::getDirect();
 345
 346   llvm::Type *UnpaddedCoerceToType =
 347       UnpaddedCoerceToSeq.size() == 1
 348           ? UnpaddedCoerceToSeq[0]
 349           : llvm::StructType::get(CGT.getLLVMContext(), UnpaddedCoerceToSeq,
 350                                   true);
 351
 352   SmallVector<llvm::Type *> CoerceToSeq;
 353   flattenType(CGT.ConvertType(Ty), CoerceToSeq);
 354   auto *CoerceToType =
 355       llvm::StructType::get(CGT.getLLVMContext(), CoerceToSeq, false);
 356
 357   return ABIArgInfo::getCoerceAndExpand(CoerceToType, UnpaddedCoerceToType);
 358 }
 359
 360 ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadicFn,
 361                                                 bool IsNamedArg,
 362                                                 unsigned CallingConvention,
 363                                                 unsigned &NSRN,
 364                                                 unsigned &NPRN) const {
 365   Ty = useFirstFieldIfTransparentUnion(Ty);
 366
 367   // Handle illegal vector types here.
 368   if (isIllegalVectorType(Ty))
 369     return coerceIllegalVector(Ty, NSRN, NPRN);
 370
 371   if (!passAsAggregateType(Ty)) {
 372     // Treat an enum type as its underlying type.
 373     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
 374       Ty = EnumTy->getDecl()->getIntegerType();
 375
 376     if (const auto *EIT = Ty->getAs<BitIntType>())
 377       if (EIT->getNumBits() > 128)
 378         return getNaturalAlignIndirect(Ty, false);
 379
 380     if (Ty->isVectorType())
 381       NSRN = std::min(NSRN + 1, 8u);
 382     else if (const auto *BT = Ty->getAs<BuiltinType>()) {
 383       if (BT->isFloatingPoint())
 384         NSRN = std::min(NSRN + 1, 8u);
 385       else {
 386         switch (BT->getKind()) {
 387         case BuiltinType::SveBool:
 388         case BuiltinType::SveCount:
 389           NPRN = std::min(NPRN + 1, 4u);
 390           break;
 391         case BuiltinType::SveBoolx2:
 392           NPRN = std::min(NPRN + 2, 4u);
 393           break;
 394         case BuiltinType::SveBoolx4:
 395           NPRN = std::min(NPRN + 4, 4u);
 396           break;
 397         default:
 398           if (BT->isSVESizelessBuiltinType())
 399             NSRN = std::min(
 400                 NSRN + getContext().getBuiltinVectorTypeInfo(BT).NumVectors,
 401                 8u);
 402         }
 403       }
 404     }
 405
 406     return (isPromotableIntegerTypeForABI(Ty) && isDarwinPCS()
 407                 ? ABIArgInfo::getExtend(Ty, CGT.ConvertType(Ty))
 408                 : ABIArgInfo::getDirect());
 409   }
 410
 411   // Structures with either a non-trivial destructor or a non-trivial
 412   // copy constructor are always indirect.
 413   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
 414     return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA ==
 415                                      CGCXXABI::RAA_DirectInMemory);
 416   }
 417
 418   // Empty records:
 419   uint64_t Size = getContext().getTypeSize(Ty);
 420   bool IsEmpty = isEmptyRecord(getContext(), Ty, true);
 421   if (!Ty->isSVESizelessBuiltinType() && (IsEmpty || Size == 0)) {
 422     // Empty records are ignored in C mode, and in C++ on Darwin.
 423     if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS())
 424       return ABIArgInfo::getIgnore();
 425
 426     // In C++ mode, arguments which have sizeof() == 0 (which are non-standard
 427     // C++) are ignored. This isn't defined by any standard, so we copy GCC's
 428     // behaviour here.
 429     if (Size == 0)
 430       return ABIArgInfo::getIgnore();
 431
 432     // Otherwise, they are passed as if they have a size of 1 byte.
 433     return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
 434   }
 435
 436   // Homogeneous Floating-point Aggregates (HFAs) need to be expanded.
 437   const Type *Base = nullptr;
 438   uint64_t Members = 0;
 439   bool IsWin64 = Kind == AArch64ABIKind::Win64 ||
 440                  CallingConvention == llvm::CallingConv::Win64;
 441   bool IsWinVariadic = IsWin64 && IsVariadicFn;
 442   // In variadic functions on Windows, all composite types are treated alike,
 443   // no special handling of HFAs/HVAs.
 444   if (!IsWinVariadic && isHomogeneousAggregate(Ty, Base, Members)) {
 445     NSRN = std::min(NSRN + Members, uint64_t(8));
 446     if (Kind != AArch64ABIKind::AAPCS)
 447       return ABIArgInfo::getDirect(
 448           llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members));
 449
 450     // For HFAs/HVAs, cap the argument alignment to 16, otherwise
 451     // set it to 8 according to the AAPCS64 document.
 452     unsigned Align =
 453         getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity();
 454     Align = (Align >= 16) ? 16 : 8;
 455     return ABIArgInfo::getDirect(
 456         llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members), 0,
 457         nullptr, true, Align);
 458   }
 459
 460   // In AAPCS named arguments of a Pure Scalable Type are passed expanded in
 461   // registers, or indirectly if there are not enough registers.
 462   if (Kind == AArch64ABIKind::AAPCS) {
 463     unsigned NVec = 0, NPred = 0;
 464     SmallVector<llvm::Type *> UnpaddedCoerceToSeq;
 465     if (passAsPureScalableType(Ty, NVec, NPred, UnpaddedCoerceToSeq) &&
 466         (NVec + NPred) > 0)
 467       return coerceAndExpandPureScalableAggregate(
 468           Ty, IsNamedArg, NVec, NPred, UnpaddedCoerceToSeq, NSRN, NPRN);
 469   }
 470
 471   // Aggregates <= 16 bytes are passed directly in registers or on the stack.
 472   if (Size <= 128) {
 473     unsigned Alignment;
 474     if (Kind == AArch64ABIKind::AAPCS) {
 475       Alignment = getContext().getTypeUnadjustedAlign(Ty);
 476       Alignment = Alignment < 128 ? 64 : 128;
 477     } else {
 478       Alignment =
 479           std::max(getContext().getTypeAlign(Ty),
 480                    (unsigned)getTarget().getPointerWidth(LangAS::Default));
 481     }
 482     Size = llvm::alignTo(Size, Alignment);
 483
 484     // We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
 485     // For aggregates with 16-byte alignment, we use i128.
 486     llvm::Type *BaseTy = llvm::Type::getIntNTy(getVMContext(), Alignment);
 487     return ABIArgInfo::getDirect(
 488         Size == Alignment ? BaseTy
 489                           : llvm::ArrayType::get(BaseTy, Size / Alignment));
 490   }
 491
 492   return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
 493 }
 494
 495 ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy,
 496                                               bool IsVariadicFn) const {
 497   if (RetTy->isVoidType())
 498     return ABIArgInfo::getIgnore();
 499
 500   if (const auto *VT = RetTy->getAs<VectorType>()) {
 501     if (VT->getVectorKind() == VectorKind::SveFixedLengthData ||
 502         VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
 503       unsigned NSRN = 0, NPRN = 0;
 504       return coerceIllegalVector(RetTy, NSRN, NPRN);
 505     }
 506   }
 507
 508   // Large vector types should be returned via memory.
 509   if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128)
 510     return getNaturalAlignIndirect(RetTy);
 511
 512   if (!passAsAggregateType(RetTy)) {
 513     // Treat an enum type as its underlying type.
 514     if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
 515       RetTy = EnumTy->getDecl()->getIntegerType();
 516
 517     if (const auto *EIT = RetTy->getAs<BitIntType>())
 518       if (EIT->getNumBits() > 128)
 519         return getNaturalAlignIndirect(RetTy);
 520
 521     return (isPromotableIntegerTypeForABI(RetTy) && isDarwinPCS()
 522                 ? ABIArgInfo::getExtend(RetTy)
 523                 : ABIArgInfo::getDirect());
 524   }
 525
 526   uint64_t Size = getContext().getTypeSize(RetTy);
 527   if (!RetTy->isSVESizelessBuiltinType() &&
 528       (isEmptyRecord(getContext(), RetTy, true) || Size == 0))
 529     return ABIArgInfo::getIgnore();
 530
 531   const Type *Base = nullptr;
 532   uint64_t Members = 0;
 533   if (isHomogeneousAggregate(RetTy, Base, Members) &&
 534       !(getTarget().getTriple().getArch() == llvm::Triple::aarch64_32 &&
 535         IsVariadicFn))
 536     // Homogeneous Floating-point Aggregates (HFAs) are returned directly.
 537     return ABIArgInfo::getDirect();
 538
 539   // In AAPCS return values of a Pure Scalable type are treated as a single
 540   // named argument and passed expanded in registers, or indirectly if there are
 541   // not enough registers.
 542   if (Kind == AArch64ABIKind::AAPCS) {
 543     unsigned NSRN = 0, NPRN = 0;
 544     unsigned NVec = 0, NPred = 0;
 545     SmallVector<llvm::Type *> UnpaddedCoerceToSeq;
 546     if (passAsPureScalableType(RetTy, NVec, NPred, UnpaddedCoerceToSeq) &&
 547         (NVec + NPred) > 0)
 548       return coerceAndExpandPureScalableAggregate(
 549           RetTy, /* IsNamedArg */ true, NVec, NPred, UnpaddedCoerceToSeq, NSRN,
 550           NPRN);
 551   }
 552
 553   // Aggregates <= 16 bytes are returned directly in registers or on the stack.
 554   if (Size <= 128) {
 555     if (Size <= 64 && getDataLayout().isLittleEndian()) {
 556       // Composite types are returned in lower bits of a 64-bit register for LE,
 557       // and in higher bits for BE. However, integer types are always returned
 558       // in lower bits for both LE and BE, and they are not rounded up to
 559       // 64-bits. We can skip rounding up of composite types for LE, but not for
 560       // BE, otherwise composite types will be indistinguishable from integer
 561       // types.
 562       return ABIArgInfo::getDirect(
 563           llvm::IntegerType::get(getVMContext(), Size));
 564     }
 565
 566     unsigned Alignment = getContext().getTypeAlign(RetTy);
 567     Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes
 568
 569     // We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
 570     // For aggregates with 16-byte alignment, we use i128.
 571     if (Alignment < 128 && Size == 128) {
 572       llvm::Type *BaseTy = llvm::Type::getInt64Ty(getVMContext());
 573       return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64));
 574     }
 575     return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size));
 576   }
 577
 578   return getNaturalAlignIndirect(RetTy);
 579 }
 580
 581 /// isIllegalVectorType - check whether the vector type is legal for AArch64.
 582 bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const {
 583   if (const VectorType *VT = Ty->getAs<VectorType>()) {
 584     // Check whether VT is a fixed-length SVE vector. These types are
 585     // represented as scalable vectors in function args/return and must be
 586     // coerced from fixed vectors.
 587     if (VT->getVectorKind() == VectorKind::SveFixedLengthData ||
 588         VT->getVectorKind() == VectorKind::SveFixedLengthPredicate)
 589       return true;
 590
 591     // Check whether VT is legal.
 592     unsigned NumElements = VT->getNumElements();
 593     uint64_t Size = getContext().getTypeSize(VT);
 594     // NumElements should be power of 2.
 595     if (!llvm::isPowerOf2_32(NumElements))
 596       return true;
 597
 598     // arm64_32 has to be compatible with the ARM logic here, which allows huge
 599     // vectors for some reason.
 600     llvm::Triple Triple = getTarget().getTriple();
 601     if (Triple.getArch() == llvm::Triple::aarch64_32 &&
 602         Triple.isOSBinFormatMachO())
 603       return Size <= 32;
 604
 605     return Size != 64 && (Size != 128 || NumElements == 1);
 606   }
 607   return false;
 608 }
 609
 610 bool AArch64SwiftABIInfo::isLegalVectorType(CharUnits VectorSize,
 611                                             llvm::Type *EltTy,
 612                                             unsigned NumElts) const {
 613   if (!llvm::isPowerOf2_32(NumElts))
 614     return false;
 615   if (VectorSize.getQuantity() != 8 &&
 616       (VectorSize.getQuantity() != 16 || NumElts == 1))
 617     return false;
 618   return true;
 619 }
 620
 621 bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
 622   // For the soft-float ABI variant, no types are considered to be homogeneous
 623   // aggregates.
 624   if (isSoftFloat())
 625     return false;
 626
 627   // Homogeneous aggregates for AAPCS64 must have base types of a floating
 628   // point type or a short-vector type. This is the same as the 32-bit ABI,
 629   // but with the difference that any floating-point type is allowed,
 630   // including __fp16.
 631   if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
 632     if (BT->isFloatingPoint())
 633       return true;
 634   } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
 635     if (auto Kind = VT->getVectorKind();
 636         Kind == VectorKind::SveFixedLengthData ||
 637         Kind == VectorKind::SveFixedLengthPredicate)
 638       return false;
 639
 640     unsigned VecSize = getContext().getTypeSize(VT);
 641     if (VecSize == 64 || VecSize == 128)
 642       return true;
 643   }
 644   return false;
 645 }
 646
 647 bool AArch64ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
 648                                                        uint64_t Members) const {
 649   return Members <= 4;
 650 }
 651
 652 bool AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate()
 653     const {
 654   // AAPCS64 says that the rule for whether something is a homogeneous
 655   // aggregate is applied to the output of the data layout decision. So
 656   // anything that doesn't affect the data layout also does not affect
 657   // homogeneity. In particular, zero-length bitfields don't stop a struct
 658   // being homogeneous.
 659   return true;
 660 }
 661
 662 bool AArch64ABIInfo::passAsAggregateType(QualType Ty) const {
 663   if (Kind == AArch64ABIKind::AAPCS && Ty->isSVESizelessBuiltinType()) {
 664     const auto *BT = Ty->castAs<BuiltinType>();
 665     return !BT->isSVECount() &&
 666            getContext().getBuiltinVectorTypeInfo(BT).NumVectors > 1;
 667   }
 668   return isAggregateTypeForABI(Ty);
 669 }
 670
 671 // Check if a type needs to be passed in registers as a Pure Scalable Type (as
 672 // defined by AAPCS64). Return the number of data vectors and the number of
 673 // predicate vectors in the type, into `NVec` and `NPred`, respectively. Upon
 674 // return `CoerceToSeq` contains an expanded sequence of LLVM IR types, one
 675 // element for each non-composite member. For practical purposes, limit the
 676 // length of `CoerceToSeq` to about 12 (the maximum that could possibly fit
 677 // in registers) and return false, the effect of which will be to  pass the
 678 // argument under the rules for a large (> 128 bytes) composite.
 679 bool AArch64ABIInfo::passAsPureScalableType(
 680     QualType Ty, unsigned &NVec, unsigned &NPred,
 681     SmallVectorImpl<llvm::Type *> &CoerceToSeq) const {
 682   if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) {
 683     uint64_t NElt = AT->getZExtSize();
 684     if (NElt == 0)
 685       return false;
 686
 687     unsigned NV = 0, NP = 0;
 688     SmallVector<llvm::Type *> EltCoerceToSeq;
 689     if (!passAsPureScalableType(AT->getElementType(), NV, NP, EltCoerceToSeq))
 690       return false;
 691
 692     if (CoerceToSeq.size() + NElt * EltCoerceToSeq.size() > 12)
 693       return false;
 694
 695     for (uint64_t I = 0; I < NElt; ++I)
 696       llvm::copy(EltCoerceToSeq, std::back_inserter(CoerceToSeq));
 697
 698     NVec += NElt * NV;
 699     NPred += NElt * NP;
 700     return true;
 701   }
 702
 703   if (const RecordType *RT = Ty->getAs<RecordType>()) {
 704     // If the record cannot be passed in registers, then it's not a PST.
 705     if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
 706         RAA != CGCXXABI::RAA_Default)
 707       return false;
 708
 709     // Pure scalable types are never unions and never contain unions.
 710     const RecordDecl *RD = RT->getDecl();
 711     if (RD->isUnion())
 712       return false;
 713
 714     // If this is a C++ record, check the bases.
 715     if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
 716       for (const auto &I : CXXRD->bases()) {
 717         if (isEmptyRecord(getContext(), I.getType(), true))
 718           continue;
 719         if (!passAsPureScalableType(I.getType(), NVec, NPred, CoerceToSeq))
 720           return false;
 721       }
 722     }
 723
 724     // Check members.
 725     for (const auto *FD : RD->fields()) {
 726       QualType FT = FD->getType();
 727       if (isEmptyField(getContext(), FD, /* AllowArrays */ true))
 728         continue;
 729       if (!passAsPureScalableType(FT, NVec, NPred, CoerceToSeq))
 730         return false;
 731     }
 732
 733     return true;
 734   }
 735
 736   if (const auto *VT = Ty->getAs<VectorType>()) {
 737     if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
 738       ++NPred;
 739       if (CoerceToSeq.size() + 1 > 12)
 740         return false;
 741       CoerceToSeq.push_back(convertFixedToScalableVectorType(VT));
 742       return true;
 743     }
 744
 745     if (VT->getVectorKind() == VectorKind::SveFixedLengthData) {
 746       ++NVec;
 747       if (CoerceToSeq.size() + 1 > 12)
 748         return false;
 749       CoerceToSeq.push_back(convertFixedToScalableVectorType(VT));
 750       return true;
 751     }
 752
 753     return false;
 754   }
 755
 756   if (!Ty->isBuiltinType())
 757     return false;
 758
 759   bool isPredicate;
 760   switch (Ty->getAs<BuiltinType>()->getKind()) {
 761 #define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId)                    \
 762   case BuiltinType::Id:                                                        \
 763     isPredicate = false;                                                       \
 764     break;
 765 #define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId)                 \
 766   case BuiltinType::Id:                                                        \
 767     isPredicate = true;                                                        \
 768     break;
 769 #define SVE_TYPE(Name, Id, SingletonId)
 770 #include "clang/Basic/AArch64SVEACLETypes.def"
 771   default:
 772     return false;
 773   }
 774
 775   ASTContext::BuiltinVectorTypeInfo Info =
 776       getContext().getBuiltinVectorTypeInfo(cast<BuiltinType>(Ty));
 777   assert(Info.NumVectors > 0 && Info.NumVectors <= 4 &&
 778          "Expected 1, 2, 3 or 4 vectors!");
 779   if (isPredicate)
 780     NPred += Info.NumVectors;
 781   else
 782     NVec += Info.NumVectors;
 783   llvm::Type *EltTy = Info.ElementType->isMFloat8Type()
 784                           ? llvm::Type::getInt8Ty(getVMContext())
 785                           : CGT.ConvertType(Info.ElementType);
 786   auto *VTy = llvm::ScalableVectorType::get(EltTy, Info.EC.getKnownMinValue());
 787
 788   if (CoerceToSeq.size() + Info.NumVectors > 12)
 789     return false;
 790   std::fill_n(std::back_inserter(CoerceToSeq), Info.NumVectors, VTy);
 791
 792   return true;
 793 }
 794
 795 // Expand an LLVM IR type into a sequence with a element for each non-struct,
 796 // non-array member of the type, with the exception of the padding types, which
 797 // are retained.
 798 void AArch64ABIInfo::flattenType(
 799     llvm::Type *Ty, SmallVectorImpl<llvm::Type *> &Flattened) const {
 800
 801   if (ABIArgInfo::isPaddingForCoerceAndExpand(Ty)) {
 802     Flattened.push_back(Ty);
 803     return;
 804   }
 805
 806   if (const auto *AT = dyn_cast<llvm::ArrayType>(Ty)) {
 807     uint64_t NElt = AT->getNumElements();
 808     if (NElt == 0)
 809       return;
 810
 811     SmallVector<llvm::Type *> EltFlattened;
 812     flattenType(AT->getElementType(), EltFlattened);
 813
 814     for (uint64_t I = 0; I < NElt; ++I)
 815       llvm::copy(EltFlattened, std::back_inserter(Flattened));
 816     return;
 817   }
 818
 819   if (const auto *ST = dyn_cast<llvm::StructType>(Ty)) {
 820     for (auto *ET : ST->elements())
 821       flattenType(ET, Flattened);
 822     return;
 823   }
 824
 825   Flattened.push_back(Ty);
 826 }
 827
 828 RValue AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
 829                                       CodeGenFunction &CGF, AArch64ABIKind Kind,
 830                                       AggValueSlot Slot) const {
 831   // These numbers are not used for variadic arguments, hence it doesn't matter
 832   // they don't retain their values across multiple calls to
 833   // `classifyArgumentType` here.
 834   unsigned NSRN = 0, NPRN = 0;
 835   ABIArgInfo AI =
 836       classifyArgumentType(Ty, /*IsVariadicFn=*/true, /* IsNamedArg */ false,
 837                            CGF.CurFnInfo->getCallingConvention(), NSRN, NPRN);
 838   // Empty records are ignored for parameter passing purposes.
 839   if (AI.isIgnore())
 840     return Slot.asRValue();
 841
 842   bool IsIndirect = AI.isIndirect();
 843
 844   llvm::Type *BaseTy = CGF.ConvertType(Ty);
 845   if (IsIndirect)
 846     BaseTy = llvm::PointerType::getUnqual(BaseTy);
 847   else if (AI.getCoerceToType())
 848     BaseTy = AI.getCoerceToType();
 849
 850   unsigned NumRegs = 1;
 851   if (llvm::ArrayType *ArrTy = dyn_cast<llvm::ArrayType>(BaseTy)) {
 852     BaseTy = ArrTy->getElementType();
 853     NumRegs = ArrTy->getNumElements();
 854   }
 855   bool IsFPR =
 856       !isSoftFloat() && (BaseTy->isFloatingPointTy() || BaseTy->isVectorTy());
 857
 858   // The AArch64 va_list type and handling is specified in the Procedure Call
 859   // Standard, section B.4:
 860   //
 861   // struct {
 862   //   void *__stack;
 863   //   void *__gr_top;
 864   //   void *__vr_top;
 865   //   int __gr_offs;
 866   //   int __vr_offs;
 867   // };
 868
 869   llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg");
 870   llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
 871   llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack");
 872   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
 873
 874   CharUnits TySize = getContext().getTypeSizeInChars(Ty);
 875   CharUnits TyAlign = getContext().getTypeUnadjustedAlignInChars(Ty);
 876
 877   Address reg_offs_p = Address::invalid();
 878   llvm::Value *reg_offs = nullptr;
 879   int reg_top_index;
 880   int RegSize = IsIndirect ? 8 : TySize.getQuantity();
 881   if (!IsFPR) {
 882     // 3 is the field number of __gr_offs
 883     reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p");
 884     reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs");
 885     reg_top_index = 1; // field number for __gr_top
 886     RegSize = llvm::alignTo(RegSize, 8);
 887   } else {
 888     // 4 is the field number of __vr_offs.
 889     reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p");
 890     reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs");
 891     reg_top_index = 2; // field number for __vr_top
 892     RegSize = 16 * NumRegs;
 893   }
 894
 895   //=======================================
 896   // Find out where argument was passed
 897   //=======================================
 898
 899   // If reg_offs >= 0 we're already using the stack for this type of
 900   // argument. We don't want to keep updating reg_offs (in case it overflows,
 901   // though anyone passing 2GB of arguments, each at most 16 bytes, deserves
 902   // whatever they get).
 903   llvm::Value *UsingStack = nullptr;
 904   UsingStack = CGF.Builder.CreateICmpSGE(
 905       reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, 0));
 906
 907   CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, MaybeRegBlock);
 908
 909   // Otherwise, at least some kind of argument could go in these registers, the
 910   // question is whether this particular type is too big.
 911   CGF.EmitBlock(MaybeRegBlock);
 912
 913   // Integer arguments may need to correct register alignment (for example a
 914   // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we
 915   // align __gr_offs to calculate the potential address.
 916   if (!IsFPR && !IsIndirect && TyAlign.getQuantity() > 8) {
 917     int Align = TyAlign.getQuantity();
 918
 919     reg_offs = CGF.Builder.CreateAdd(
 920         reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, Align - 1),
 921         "align_regoffs");
 922     reg_offs = CGF.Builder.CreateAnd(
 923         reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, -Align),
 924         "aligned_regoffs");
 925   }
 926
 927   // Update the gr_offs/vr_offs pointer for next call to va_arg on this va_list.
 928   // The fact that this is done unconditionally reflects the fact that
 929   // allocating an argument to the stack also uses up all the remaining
 930   // registers of the appropriate kind.
 931   llvm::Value *NewOffset = nullptr;
 932   NewOffset = CGF.Builder.CreateAdd(
 933       reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, RegSize), "new_reg_offs");
 934   CGF.Builder.CreateStore(NewOffset, reg_offs_p);
 935
 936   // Now we're in a position to decide whether this argument really was in
 937   // registers or not.
 938   llvm::Value *InRegs = nullptr;
 939   InRegs = CGF.Builder.CreateICmpSLE(
 940       NewOffset, llvm::ConstantInt::get(CGF.Int32Ty, 0), "inreg");
 941
 942   CGF.Builder.CreateCondBr(InRegs, InRegBlock, OnStackBlock);
 943
 944   //=======================================
 945   // Argument was in registers
 946   //=======================================
 947
 948   // Now we emit the code for if the argument was originally passed in
 949   // registers. First start the appropriate block:
 950   CGF.EmitBlock(InRegBlock);
 951
 952   llvm::Value *reg_top = nullptr;
 953   Address reg_top_p =
 954       CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, "reg_top_p");
 955   reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top");
 956   Address BaseAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, reg_top, reg_offs),
 957                    CGF.Int8Ty, CharUnits::fromQuantity(IsFPR ? 16 : 8));
 958   Address RegAddr = Address::invalid();
 959   llvm::Type *MemTy = CGF.ConvertTypeForMem(Ty), *ElementTy = MemTy;
 960
 961   if (IsIndirect) {
 962     // If it's been passed indirectly (actually a struct), whatever we find from
 963     // stored registers or on the stack will actually be a struct **.
 964     MemTy = llvm::PointerType::getUnqual(MemTy);
 965   }
 966
 967   const Type *Base = nullptr;
 968   uint64_t NumMembers = 0;
 969   bool IsHFA = isHomogeneousAggregate(Ty, Base, NumMembers);
 970   if (IsHFA && NumMembers > 1) {
 971     // Homogeneous aggregates passed in registers will have their elements split
 972     // and stored 16-bytes apart regardless of size (they're notionally in qN,
 973     // qN+1, ...). We reload and store into a temporary local variable
 974     // contiguously.
 975     assert(!IsIndirect && "Homogeneous aggregates should be passed directly");
 976     auto BaseTyInfo = getContext().getTypeInfoInChars(QualType(Base, 0));
 977     llvm::Type *BaseTy = CGF.ConvertType(QualType(Base, 0));
 978     llvm::Type *HFATy = llvm::ArrayType::get(BaseTy, NumMembers);
 979     Address Tmp = CGF.CreateTempAlloca(HFATy,
 980                                        std::max(TyAlign, BaseTyInfo.Align));
 981
 982     // On big-endian platforms, the value will be right-aligned in its slot.
 983     int Offset = 0;
 984     if (CGF.CGM.getDataLayout().isBigEndian() &&
 985         BaseTyInfo.Width.getQuantity() < 16)
 986       Offset = 16 - BaseTyInfo.Width.getQuantity();
 987
 988     for (unsigned i = 0; i < NumMembers; ++i) {
 989       CharUnits BaseOffset = CharUnits::fromQuantity(16 * i + Offset);
 990       Address LoadAddr =
 991         CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, BaseOffset);
 992       LoadAddr = LoadAddr.withElementType(BaseTy);
 993
 994       Address StoreAddr = CGF.Builder.CreateConstArrayGEP(Tmp, i);
 995
 996       llvm::Value *Elem = CGF.Builder.CreateLoad(LoadAddr);
 997       CGF.Builder.CreateStore(Elem, StoreAddr);
 998     }
 999
1000     RegAddr = Tmp.withElementType(MemTy);
1001   } else {
1002     // Otherwise the object is contiguous in memory.
1003
1004     // It might be right-aligned in its slot.
1005     CharUnits SlotSize = BaseAddr.getAlignment();
1006     if (CGF.CGM.getDataLayout().isBigEndian() && !IsIndirect &&
1007         (IsHFA || !isAggregateTypeForABI(Ty)) &&
1008         TySize < SlotSize) {
1009       CharUnits Offset = SlotSize - TySize;
1010       BaseAddr = CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, Offset);
1011     }
1012
1013     RegAddr = BaseAddr.withElementType(MemTy);
1014   }
1015
1016   CGF.EmitBranch(ContBlock);
1017
1018   //=======================================
1019   // Argument was on the stack
1020   //=======================================
1021   CGF.EmitBlock(OnStackBlock);
1022
1023   Address stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "stack_p");
1024   llvm::Value *OnStackPtr = CGF.Builder.CreateLoad(stack_p, "stack");
1025
1026   // Again, stack arguments may need realignment. In this case both integer and
1027   // floating-point ones might be affected.
1028   if (!IsIndirect && TyAlign.getQuantity() > 8) {
1029     OnStackPtr = emitRoundPointerUpToAlignment(CGF, OnStackPtr, TyAlign);
1030   }
1031   Address OnStackAddr = Address(OnStackPtr, CGF.Int8Ty,
1032                                 std::max(CharUnits::fromQuantity(8), TyAlign));
1033
1034   // All stack slots are multiples of 8 bytes.
1035   CharUnits StackSlotSize = CharUnits::fromQuantity(8);
1036   CharUnits StackSize;
1037   if (IsIndirect)
1038     StackSize = StackSlotSize;
1039   else
1040     StackSize = TySize.alignTo(StackSlotSize);
1041
1042   llvm::Value *StackSizeC = CGF.Builder.getSize(StackSize);
1043   llvm::Value *NewStack = CGF.Builder.CreateInBoundsGEP(
1044       CGF.Int8Ty, OnStackPtr, StackSizeC, "new_stack");
1045
1046   // Write the new value of __stack for the next call to va_arg
1047   CGF.Builder.CreateStore(NewStack, stack_p);
1048
1049   if (CGF.CGM.getDataLayout().isBigEndian() && !isAggregateTypeForABI(Ty) &&
1050       TySize < StackSlotSize) {
1051     CharUnits Offset = StackSlotSize - TySize;
1052     OnStackAddr = CGF.Builder.CreateConstInBoundsByteGEP(OnStackAddr, Offset);
1053   }
1054
1055   OnStackAddr = OnStackAddr.withElementType(MemTy);
1056
1057   CGF.EmitBranch(ContBlock);
1058
1059   //=======================================
1060   // Tidy up
1061   //=======================================
1062   CGF.EmitBlock(ContBlock);
1063
1064   Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, OnStackAddr,
1065                                  OnStackBlock, "vaargs.addr");
1066
1067   if (IsIndirect)
1068     return CGF.EmitLoadOfAnyValue(
1069         CGF.MakeAddrLValue(
1070             Address(CGF.Builder.CreateLoad(ResAddr, "vaarg.addr"), ElementTy,
1071                     TyAlign),
1072             Ty),
1073         Slot);
1074
1075   return CGF.EmitLoadOfAnyValue(CGF.MakeAddrLValue(ResAddr, Ty), Slot);
1076 }
1077
1078 RValue AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty,
1079                                        CodeGenFunction &CGF,
1080                                        AggValueSlot Slot) const {
1081   // The backend's lowering doesn't support va_arg for aggregates or
1082   // illegal vector types.  Lower VAArg here for these cases and use
1083   // the LLVM va_arg instruction for everything else.
1084   if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty))
1085     return CGF.EmitLoadOfAnyValue(
1086         CGF.MakeAddrLValue(
1087             EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect()), Ty),
1088         Slot);
1089
1090   uint64_t PointerSize = getTarget().getPointerWidth(LangAS::Default) / 8;
1091   CharUnits SlotSize = CharUnits::fromQuantity(PointerSize);
1092
1093   // Empty records are ignored for parameter passing purposes.
1094   if (isEmptyRecord(getContext(), Ty, true))
1095     return Slot.asRValue();
1096
1097   // The size of the actual thing passed, which might end up just
1098   // being a pointer for indirect types.
1099   auto TyInfo = getContext().getTypeInfoInChars(Ty);
1100
1101   // Arguments bigger than 16 bytes which aren't homogeneous
1102   // aggregates should be passed indirectly.
1103   bool IsIndirect = false;
1104   if (TyInfo.Width.getQuantity() > 16) {
1105     const Type *Base = nullptr;
1106     uint64_t Members = 0;
1107     IsIndirect = !isHomogeneousAggregate(Ty, Base, Members);
1108   }
1109
1110   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TyInfo, SlotSize,
1111                           /*AllowHigherAlign*/ true, Slot);
1112 }
1113
1114 RValue AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
1115                                    QualType Ty, AggValueSlot Slot) const {
1116   bool IsIndirect = false;
1117
1118   // Composites larger than 16 bytes are passed by reference.
1119   if (isAggregateTypeForABI(Ty) && getContext().getTypeSize(Ty) > 128)
1120     IsIndirect = true;
1121
1122   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
1123                           CGF.getContext().getTypeInfoInChars(Ty),
1124                           CharUnits::fromQuantity(8),
1125                           /*allowHigherAlign*/ false, Slot);
1126 }
1127
1128 static bool isStreamingCompatible(const FunctionDecl *F) {
1129   if (const auto *T = F->getType()->getAs<FunctionProtoType>())
1130     return T->getAArch64SMEAttributes() &
1131            FunctionType::SME_PStateSMCompatibleMask;
1132   return false;
1133 }
1134
1135 // Report an error if an argument or return value of type Ty would need to be
1136 // passed in a floating-point register.
1137 static void diagnoseIfNeedsFPReg(DiagnosticsEngine &Diags,
1138                                  const StringRef ABIName,
1139                                  const AArch64ABIInfo &ABIInfo,
1140                                  const QualType &Ty, const NamedDecl *D,
1141                                  SourceLocation loc) {
1142   const Type *HABase = nullptr;
1143   uint64_t HAMembers = 0;
1144   if (Ty->isFloatingType() || Ty->isVectorType() ||
1145       ABIInfo.isHomogeneousAggregate(Ty, HABase, HAMembers)) {
1146     Diags.Report(loc, diag::err_target_unsupported_type_for_abi)
1147         << D->getDeclName() << Ty << ABIName;
1148   }
1149 }
1150
1151 // If we are using a hard-float ABI, but do not have floating point registers,
1152 // then report an error for any function arguments or returns which would be
1153 // passed in floating-pint registers.
1154 void AArch64TargetCodeGenInfo::checkFunctionABI(
1155     CodeGenModule &CGM, const FunctionDecl *FuncDecl) const {
1156   const AArch64ABIInfo &ABIInfo = getABIInfo<AArch64ABIInfo>();
1157   const TargetInfo &TI = ABIInfo.getContext().getTargetInfo();
1158
1159   if (!TI.hasFeature("fp") && !ABIInfo.isSoftFloat()) {
1160     diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo,
1161                          FuncDecl->getReturnType(), FuncDecl,
1162                          FuncDecl->getLocation());
1163     for (ParmVarDecl *PVD : FuncDecl->parameters()) {
1164       diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, PVD->getType(),
1165                            PVD, FuncDecl->getLocation());
1166     }
1167   }
1168 }
1169
1170 enum class ArmSMEInlinability : uint8_t {
1171   Ok = 0,
1172   ErrorCalleeRequiresNewZA = 1 << 0,
1173   ErrorCalleeRequiresNewZT0 = 1 << 1,
1174   WarnIncompatibleStreamingModes = 1 << 2,
1175   ErrorIncompatibleStreamingModes = 1 << 3,
1176
1177   IncompatibleStreamingModes =
1178       WarnIncompatibleStreamingModes | ErrorIncompatibleStreamingModes,
1179
1180   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/ErrorIncompatibleStreamingModes),
1181 };
1182
1183 /// Determines if there are any Arm SME ABI issues with inlining \p Callee into
1184 /// \p Caller. Returns the issue (if any) in the ArmSMEInlinability bit enum.
1185 static ArmSMEInlinability GetArmSMEInlinability(const FunctionDecl *Caller,
1186                                                 const FunctionDecl *Callee) {
1187   bool CallerIsStreaming =
1188       IsArmStreamingFunction(Caller, /*IncludeLocallyStreaming=*/true);
1189   bool CalleeIsStreaming =
1190       IsArmStreamingFunction(Callee, /*IncludeLocallyStreaming=*/true);
1191   bool CallerIsStreamingCompatible = isStreamingCompatible(Caller);
1192   bool CalleeIsStreamingCompatible = isStreamingCompatible(Callee);
1193
1194   ArmSMEInlinability Inlinability = ArmSMEInlinability::Ok;
1195
1196   if (!CalleeIsStreamingCompatible &&
1197       (CallerIsStreaming != CalleeIsStreaming || CallerIsStreamingCompatible)) {
1198     if (CalleeIsStreaming)
1199       Inlinability |= ArmSMEInlinability::ErrorIncompatibleStreamingModes;
1200     else
1201       Inlinability |= ArmSMEInlinability::WarnIncompatibleStreamingModes;
1202   }
1203   if (auto *NewAttr = Callee->getAttr<ArmNewAttr>()) {
1204     if (NewAttr->isNewZA())
1205       Inlinability |= ArmSMEInlinability::ErrorCalleeRequiresNewZA;
1206     if (NewAttr->isNewZT0())
1207       Inlinability |= ArmSMEInlinability::ErrorCalleeRequiresNewZT0;
1208   }
1209
1210   return Inlinability;
1211 }
1212
1213 void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming(
1214     CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
1215     const FunctionDecl *Callee) const {
1216   if (!Caller || !Callee || !Callee->hasAttr<AlwaysInlineAttr>())
1217     return;
1218
1219   ArmSMEInlinability Inlinability = GetArmSMEInlinability(Caller, Callee);
1220
1221   if ((Inlinability & ArmSMEInlinability::IncompatibleStreamingModes) !=
1222       ArmSMEInlinability::Ok)
1223     CGM.getDiags().Report(
1224         CallLoc,
1225         (Inlinability & ArmSMEInlinability::ErrorIncompatibleStreamingModes) ==
1226                 ArmSMEInlinability::ErrorIncompatibleStreamingModes
1227             ? diag::err_function_always_inline_attribute_mismatch
1228             : diag::warn_function_always_inline_attribute_mismatch)
1229         << Caller->getDeclName() << Callee->getDeclName() << "streaming";
1230
1231   if ((Inlinability & ArmSMEInlinability::ErrorCalleeRequiresNewZA) ==
1232       ArmSMEInlinability::ErrorCalleeRequiresNewZA)
1233     CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za)
1234         << Callee->getDeclName();
1235
1236   if ((Inlinability & ArmSMEInlinability::ErrorCalleeRequiresNewZT0) ==
1237       ArmSMEInlinability::ErrorCalleeRequiresNewZT0)
1238     CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_zt0)
1239         << Callee->getDeclName();
1240 }
1241
1242 // If the target does not have floating-point registers, but we are using a
1243 // hard-float ABI, there is no way to pass floating-point, vector or HFA values
1244 // to functions, so we report an error.
1245 void AArch64TargetCodeGenInfo::checkFunctionCallABISoftFloat(
1246     CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
1247     const FunctionDecl *Callee, const CallArgList &Args,
1248     QualType ReturnType) const {
1249   const AArch64ABIInfo &ABIInfo = getABIInfo<AArch64ABIInfo>();
1250   const TargetInfo &TI = ABIInfo.getContext().getTargetInfo();
1251
1252   if (!Caller || TI.hasFeature("fp") || ABIInfo.isSoftFloat())
1253     return;
1254
1255   diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, ReturnType,
1256                        Callee ? Callee : Caller, CallLoc);
1257
1258   for (const CallArg &Arg : Args)
1259     diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, Arg.getType(),
1260                          Callee ? Callee : Caller, CallLoc);
1261 }
1262
1263 void AArch64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM,
1264                                                     SourceLocation CallLoc,
1265                                                     const FunctionDecl *Caller,
1266                                                     const FunctionDecl *Callee,
1267                                                     const CallArgList &Args,
1268                                                     QualType ReturnType) const {
1269   checkFunctionCallABIStreaming(CGM, CallLoc, Caller, Callee);
1270   checkFunctionCallABISoftFloat(CGM, CallLoc, Caller, Callee, Args, ReturnType);
1271 }
1272
1273 bool AArch64TargetCodeGenInfo::wouldInliningViolateFunctionCallABI(
1274     const FunctionDecl *Caller, const FunctionDecl *Callee) const {
1275   return Caller && Callee &&
1276          GetArmSMEInlinability(Caller, Callee) != ArmSMEInlinability::Ok;
1277 }
1278
1279 void AArch64ABIInfo::appendAttributeMangling(TargetClonesAttr *Attr,
1280                                              unsigned Index,
1281                                              raw_ostream &Out) const {
1282   appendAttributeMangling(Attr->getFeatureStr(Index), Out);
1283 }
1284
1285 void AArch64ABIInfo::appendAttributeMangling(StringRef AttrStr,
1286                                              raw_ostream &Out) const {
1287   if (AttrStr == "default") {
1288     Out << ".default";
1289     return;
1290   }
1291
1292   Out << "._";
1293   SmallVector<StringRef, 8> Features;
1294   AttrStr.split(Features, "+");
1295   for (auto &Feat : Features)
1296     Feat = Feat.trim();
1297
1298   llvm::sort(Features, [](const StringRef LHS, const StringRef RHS) {
1299     return LHS.compare(RHS) < 0;
1300   });
1301
1302   llvm::SmallDenseSet<StringRef, 8> UniqueFeats;
1303   for (auto &Feat : Features)
1304     if (auto Ext = llvm::AArch64::parseFMVExtension(Feat))
1305       if (UniqueFeats.insert(Ext->Name).second)
1306         Out << 'M' << Ext->Name;
1307 }
1308
1309 std::unique_ptr<TargetCodeGenInfo>
1310 CodeGen::createAArch64TargetCodeGenInfo(CodeGenModule &CGM,
1311                                         AArch64ABIKind Kind) {
1312   return std::make_unique<AArch64TargetCodeGenInfo>(CGM.getTypes(), Kind);
1313 }
1314
1315 std::unique_ptr<TargetCodeGenInfo>
1316 CodeGen::createWindowsAArch64TargetCodeGenInfo(CodeGenModule &CGM,
1317                                                AArch64ABIKind K) {
1318   return std::make_unique<WindowsAArch64TargetCodeGenInfo>(CGM.getTypes(), K);
1319 }