[AMDGPU][CodeGen] Do not backtrace invalid -regalloc param (#119687)
[llvm-project.git] / clang / lib / CodeGen / CGBuiltin.cpp
blob49a4c1ecc825e74691e70f192912c074f48d5c09
1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This contains code to emit Builtin calls as LLVM code.
11 //===----------------------------------------------------------------------===//
13 #include "ABIInfo.h"
14 #include "CGCUDARuntime.h"
15 #include "CGCXXABI.h"
16 #include "CGHLSLRuntime.h"
17 #include "CGObjCRuntime.h"
18 #include "CGOpenCLRuntime.h"
19 #include "CGRecordLayout.h"
20 #include "CGValue.h"
21 #include "CodeGenFunction.h"
22 #include "CodeGenModule.h"
23 #include "ConstantEmitter.h"
24 #include "PatternInit.h"
25 #include "TargetInfo.h"
26 #include "clang/AST/ASTContext.h"
27 #include "clang/AST/Attr.h"
28 #include "clang/AST/Decl.h"
29 #include "clang/AST/Expr.h"
30 #include "clang/AST/OSLog.h"
31 #include "clang/AST/OperationKinds.h"
32 #include "clang/AST/Type.h"
33 #include "clang/Basic/TargetBuiltins.h"
34 #include "clang/Basic/TargetInfo.h"
35 #include "clang/Basic/TargetOptions.h"
36 #include "clang/CodeGen/CGFunctionInfo.h"
37 #include "clang/Frontend/FrontendDiagnostic.h"
38 #include "llvm/ADT/APFloat.h"
39 #include "llvm/ADT/APInt.h"
40 #include "llvm/ADT/FloatingPointMode.h"
41 #include "llvm/ADT/SmallPtrSet.h"
42 #include "llvm/ADT/StringExtras.h"
43 #include "llvm/Analysis/ValueTracking.h"
44 #include "llvm/IR/DataLayout.h"
45 #include "llvm/IR/InlineAsm.h"
46 #include "llvm/IR/Intrinsics.h"
47 #include "llvm/IR/IntrinsicsAArch64.h"
48 #include "llvm/IR/IntrinsicsAMDGPU.h"
49 #include "llvm/IR/IntrinsicsARM.h"
50 #include "llvm/IR/IntrinsicsBPF.h"
51 #include "llvm/IR/IntrinsicsDirectX.h"
52 #include "llvm/IR/IntrinsicsHexagon.h"
53 #include "llvm/IR/IntrinsicsNVPTX.h"
54 #include "llvm/IR/IntrinsicsPowerPC.h"
55 #include "llvm/IR/IntrinsicsR600.h"
56 #include "llvm/IR/IntrinsicsRISCV.h"
57 #include "llvm/IR/IntrinsicsS390.h"
58 #include "llvm/IR/IntrinsicsWebAssembly.h"
59 #include "llvm/IR/IntrinsicsX86.h"
60 #include "llvm/IR/MDBuilder.h"
61 #include "llvm/IR/MatrixBuilder.h"
62 #include "llvm/IR/MemoryModelRelaxationAnnotations.h"
63 #include "llvm/Support/AMDGPUAddrSpace.h"
64 #include "llvm/Support/ConvertUTF.h"
65 #include "llvm/Support/MathExtras.h"
66 #include "llvm/Support/ScopedPrinter.h"
67 #include "llvm/TargetParser/AArch64TargetParser.h"
68 #include "llvm/TargetParser/RISCVISAInfo.h"
69 #include "llvm/TargetParser/RISCVTargetParser.h"
70 #include "llvm/TargetParser/X86TargetParser.h"
71 #include <optional>
72 #include <utility>
74 using namespace clang;
75 using namespace CodeGen;
76 using namespace llvm;
78 static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
79 Align AlignmentInBytes) {
80 ConstantInt *Byte;
81 switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
82 case LangOptions::TrivialAutoVarInitKind::Uninitialized:
83 // Nothing to initialize.
84 return;
85 case LangOptions::TrivialAutoVarInitKind::Zero:
86 Byte = CGF.Builder.getInt8(0x00);
87 break;
88 case LangOptions::TrivialAutoVarInitKind::Pattern: {
89 llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext());
90 Byte = llvm::dyn_cast<llvm::ConstantInt>(
91 initializationPatternFor(CGF.CGM, Int8));
92 break;
95 if (CGF.CGM.stopAutoInit())
96 return;
97 auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
98 I->addAnnotationMetadata("auto-init");
101 static Value *handleHlslClip(const CallExpr *E, CodeGenFunction *CGF) {
102 Value *Op0 = CGF->EmitScalarExpr(E->getArg(0));
104 Constant *FZeroConst = ConstantFP::getZero(CGF->FloatTy);
105 Value *CMP;
106 Value *LastInstr;
108 if (const auto *VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) {
109 FZeroConst = ConstantVector::getSplat(
110 ElementCount::getFixed(VecTy->getNumElements()), FZeroConst);
111 auto *FCompInst = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst);
112 CMP = CGF->Builder.CreateIntrinsic(
113 CGF->Builder.getInt1Ty(), CGF->CGM.getHLSLRuntime().getAnyIntrinsic(),
114 {FCompInst}, nullptr);
115 } else
116 CMP = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst);
118 if (CGF->CGM.getTarget().getTriple().isDXIL())
119 LastInstr = CGF->Builder.CreateIntrinsic(
120 CGF->VoidTy, llvm::Intrinsic::dx_discard, {CMP}, nullptr);
121 else if (CGF->CGM.getTarget().getTriple().isSPIRV()) {
122 BasicBlock *LT0 = CGF->createBasicBlock("lt0", CGF->CurFn);
123 BasicBlock *End = CGF->createBasicBlock("end", CGF->CurFn);
125 CGF->Builder.CreateCondBr(CMP, LT0, End);
127 CGF->Builder.SetInsertPoint(LT0);
129 CGF->Builder.CreateIntrinsic(CGF->VoidTy, llvm::Intrinsic::spv_discard, {},
130 nullptr);
132 LastInstr = CGF->Builder.CreateBr(End);
134 CGF->Builder.SetInsertPoint(End);
135 } else {
136 llvm_unreachable("Backend Codegen not supported.");
139 return LastInstr;
142 static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) {
143 Value *Op0 = CGF->EmitScalarExpr(E->getArg(0));
144 const auto *OutArg1 = dyn_cast<HLSLOutArgExpr>(E->getArg(1));
145 const auto *OutArg2 = dyn_cast<HLSLOutArgExpr>(E->getArg(2));
147 CallArgList Args;
148 LValue Op1TmpLValue =
149 CGF->EmitHLSLOutArgExpr(OutArg1, Args, OutArg1->getType());
150 LValue Op2TmpLValue =
151 CGF->EmitHLSLOutArgExpr(OutArg2, Args, OutArg2->getType());
153 if (CGF->getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee())
154 Args.reverseWritebacks();
156 Value *LowBits = nullptr;
157 Value *HighBits = nullptr;
159 if (CGF->CGM.getTarget().getTriple().isDXIL()) {
161 llvm::Type *RetElementTy = CGF->Int32Ty;
162 if (auto *Op0VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>())
163 RetElementTy = llvm::VectorType::get(
164 CGF->Int32Ty, ElementCount::getFixed(Op0VecTy->getNumElements()));
165 auto *RetTy = llvm::StructType::get(RetElementTy, RetElementTy);
167 CallInst *CI = CGF->Builder.CreateIntrinsic(
168 RetTy, Intrinsic::dx_splitdouble, {Op0}, nullptr, "hlsl.splitdouble");
170 LowBits = CGF->Builder.CreateExtractValue(CI, 0);
171 HighBits = CGF->Builder.CreateExtractValue(CI, 1);
173 } else {
174 // For Non DXIL targets we generate the instructions.
176 if (!Op0->getType()->isVectorTy()) {
177 FixedVectorType *DestTy = FixedVectorType::get(CGF->Int32Ty, 2);
178 Value *Bitcast = CGF->Builder.CreateBitCast(Op0, DestTy);
180 LowBits = CGF->Builder.CreateExtractElement(Bitcast, (uint64_t)0);
181 HighBits = CGF->Builder.CreateExtractElement(Bitcast, 1);
182 } else {
183 int NumElements = 1;
184 if (const auto *VecTy =
185 E->getArg(0)->getType()->getAs<clang::VectorType>())
186 NumElements = VecTy->getNumElements();
188 FixedVectorType *Uint32VecTy =
189 FixedVectorType::get(CGF->Int32Ty, NumElements * 2);
190 Value *Uint32Vec = CGF->Builder.CreateBitCast(Op0, Uint32VecTy);
191 if (NumElements == 1) {
192 LowBits = CGF->Builder.CreateExtractElement(Uint32Vec, (uint64_t)0);
193 HighBits = CGF->Builder.CreateExtractElement(Uint32Vec, 1);
194 } else {
195 SmallVector<int> EvenMask, OddMask;
196 for (int I = 0, E = NumElements; I != E; ++I) {
197 EvenMask.push_back(I * 2);
198 OddMask.push_back(I * 2 + 1);
200 LowBits = CGF->Builder.CreateShuffleVector(Uint32Vec, EvenMask);
201 HighBits = CGF->Builder.CreateShuffleVector(Uint32Vec, OddMask);
205 CGF->Builder.CreateStore(LowBits, Op1TmpLValue.getAddress());
206 auto *LastInst =
207 CGF->Builder.CreateStore(HighBits, Op2TmpLValue.getAddress());
208 CGF->EmitWritebacks(Args);
209 return LastInst;
212 static Value *handleAsDoubleBuiltin(CodeGenFunction &CGF, const CallExpr *E) {
213 assert((E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
214 E->getArg(1)->getType()->hasUnsignedIntegerRepresentation()) &&
215 "asdouble operands types mismatch");
216 Value *OpLowBits = CGF.EmitScalarExpr(E->getArg(0));
217 Value *OpHighBits = CGF.EmitScalarExpr(E->getArg(1));
219 llvm::Type *ResultType = CGF.DoubleTy;
220 int N = 1;
221 if (auto *VTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) {
222 N = VTy->getNumElements();
223 ResultType = llvm::FixedVectorType::get(CGF.DoubleTy, N);
226 if (CGF.CGM.getTarget().getTriple().isDXIL())
227 return CGF.Builder.CreateIntrinsic(
228 /*ReturnType=*/ResultType, Intrinsic::dx_asdouble,
229 ArrayRef<Value *>{OpLowBits, OpHighBits}, nullptr, "hlsl.asdouble");
231 if (!E->getArg(0)->getType()->isVectorType()) {
232 OpLowBits = CGF.Builder.CreateVectorSplat(1, OpLowBits);
233 OpHighBits = CGF.Builder.CreateVectorSplat(1, OpHighBits);
236 llvm::SmallVector<int> Mask;
237 for (int i = 0; i < N; i++) {
238 Mask.push_back(i);
239 Mask.push_back(i + N);
242 Value *BitVec = CGF.Builder.CreateShuffleVector(OpLowBits, OpHighBits, Mask);
244 return CGF.Builder.CreateBitCast(BitVec, ResultType);
247 /// Helper for the read/write/add/inc X18 builtins: read the X18 register and
248 /// return it as an i8 pointer.
249 Value *readX18AsPtr(CodeGenFunction &CGF) {
250 LLVMContext &Context = CGF.CGM.getLLVMContext();
251 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
252 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
253 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
254 llvm::Function *F =
255 CGF.CGM.getIntrinsic(llvm::Intrinsic::read_register, {CGF.Int64Ty});
256 llvm::Value *X18 = CGF.Builder.CreateCall(F, Metadata);
257 return CGF.Builder.CreateIntToPtr(X18, CGF.Int8PtrTy);
260 /// getBuiltinLibFunction - Given a builtin id for a function like
261 /// "__builtin_fabsf", return a Function* for "fabsf".
262 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
263 unsigned BuiltinID) {
264 assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
266 // Get the name, skip over the __builtin_ prefix (if necessary).
267 StringRef Name;
268 GlobalDecl D(FD);
270 // TODO: This list should be expanded or refactored after all GCC-compatible
271 // std libcall builtins are implemented.
272 static SmallDenseMap<unsigned, StringRef, 64> F128Builtins{
273 {Builtin::BI__builtin___fprintf_chk, "__fprintf_chkieee128"},
274 {Builtin::BI__builtin___printf_chk, "__printf_chkieee128"},
275 {Builtin::BI__builtin___snprintf_chk, "__snprintf_chkieee128"},
276 {Builtin::BI__builtin___sprintf_chk, "__sprintf_chkieee128"},
277 {Builtin::BI__builtin___vfprintf_chk, "__vfprintf_chkieee128"},
278 {Builtin::BI__builtin___vprintf_chk, "__vprintf_chkieee128"},
279 {Builtin::BI__builtin___vsnprintf_chk, "__vsnprintf_chkieee128"},
280 {Builtin::BI__builtin___vsprintf_chk, "__vsprintf_chkieee128"},
281 {Builtin::BI__builtin_fprintf, "__fprintfieee128"},
282 {Builtin::BI__builtin_printf, "__printfieee128"},
283 {Builtin::BI__builtin_snprintf, "__snprintfieee128"},
284 {Builtin::BI__builtin_sprintf, "__sprintfieee128"},
285 {Builtin::BI__builtin_vfprintf, "__vfprintfieee128"},
286 {Builtin::BI__builtin_vprintf, "__vprintfieee128"},
287 {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"},
288 {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"},
289 {Builtin::BI__builtin_fscanf, "__fscanfieee128"},
290 {Builtin::BI__builtin_scanf, "__scanfieee128"},
291 {Builtin::BI__builtin_sscanf, "__sscanfieee128"},
292 {Builtin::BI__builtin_vfscanf, "__vfscanfieee128"},
293 {Builtin::BI__builtin_vscanf, "__vscanfieee128"},
294 {Builtin::BI__builtin_vsscanf, "__vsscanfieee128"},
295 {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"},
298 // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit
299 // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions
300 // if it is 64-bit 'long double' mode.
301 static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{
302 {Builtin::BI__builtin_frexpl, "frexp"},
303 {Builtin::BI__builtin_ldexpl, "ldexp"},
304 {Builtin::BI__builtin_modfl, "modf"},
307 // If the builtin has been declared explicitly with an assembler label,
308 // use the mangled name. This differs from the plain label on platforms
309 // that prefix labels.
310 if (FD->hasAttr<AsmLabelAttr>())
311 Name = getMangledName(D);
312 else {
313 // TODO: This mutation should also be applied to other targets other than
314 // PPC, after backend supports IEEE 128-bit style libcalls.
315 if (getTriple().isPPC64() &&
316 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
317 F128Builtins.contains(BuiltinID))
318 Name = F128Builtins[BuiltinID];
319 else if (getTriple().isOSAIX() &&
320 &getTarget().getLongDoubleFormat() ==
321 &llvm::APFloat::IEEEdouble() &&
322 AIXLongDouble64Builtins.contains(BuiltinID))
323 Name = AIXLongDouble64Builtins[BuiltinID];
324 else
325 Name = Context.BuiltinInfo.getName(BuiltinID).substr(10);
328 llvm::FunctionType *Ty =
329 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
331 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
334 /// Emit the conversions required to turn the given value into an
335 /// integer of the given size.
336 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
337 QualType T, llvm::IntegerType *IntType) {
338 V = CGF.EmitToMemory(V, T);
340 if (V->getType()->isPointerTy())
341 return CGF.Builder.CreatePtrToInt(V, IntType);
343 assert(V->getType() == IntType);
344 return V;
347 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
348 QualType T, llvm::Type *ResultType) {
349 V = CGF.EmitFromMemory(V, T);
351 if (ResultType->isPointerTy())
352 return CGF.Builder.CreateIntToPtr(V, ResultType);
354 assert(V->getType() == ResultType);
355 return V;
358 static Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E) {
359 ASTContext &Ctx = CGF.getContext();
360 Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0));
361 unsigned Bytes = Ptr.getElementType()->isPointerTy()
362 ? Ctx.getTypeSizeInChars(Ctx.VoidPtrTy).getQuantity()
363 : Ptr.getElementType()->getScalarSizeInBits() / 8;
364 unsigned Align = Ptr.getAlignment().getQuantity();
365 if (Align % Bytes != 0) {
366 DiagnosticsEngine &Diags = CGF.CGM.getDiags();
367 Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned);
368 // Force address to be at least naturally-aligned.
369 return Ptr.withAlignment(CharUnits::fromQuantity(Bytes));
371 return Ptr;
374 /// Utility to insert an atomic instruction based on Intrinsic::ID
375 /// and the expression node.
376 static Value *MakeBinaryAtomicValue(
377 CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
378 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
380 QualType T = E->getType();
381 assert(E->getArg(0)->getType()->isPointerType());
382 assert(CGF.getContext().hasSameUnqualifiedType(T,
383 E->getArg(0)->getType()->getPointeeType()));
384 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
386 Address DestAddr = CheckAtomicAlignment(CGF, E);
388 llvm::IntegerType *IntType = llvm::IntegerType::get(
389 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
391 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
392 llvm::Type *ValueType = Val->getType();
393 Val = EmitToInt(CGF, Val, T, IntType);
395 llvm::Value *Result =
396 CGF.Builder.CreateAtomicRMW(Kind, DestAddr, Val, Ordering);
397 return EmitFromInt(CGF, Result, T, ValueType);
400 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
401 Value *Val = CGF.EmitScalarExpr(E->getArg(0));
402 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(1));
404 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
405 LValue LV = CGF.MakeAddrLValue(Addr, E->getArg(0)->getType());
406 LV.setNontemporal(true);
407 CGF.EmitStoreOfScalar(Val, LV, false);
408 return nullptr;
411 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
412 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(0));
414 LValue LV = CGF.MakeAddrLValue(Addr, E->getType());
415 LV.setNontemporal(true);
416 return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
419 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
420 llvm::AtomicRMWInst::BinOp Kind,
421 const CallExpr *E) {
422 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
425 /// Utility to insert an atomic instruction based Intrinsic::ID and
426 /// the expression node, where the return value is the result of the
427 /// operation.
428 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
429 llvm::AtomicRMWInst::BinOp Kind,
430 const CallExpr *E,
431 Instruction::BinaryOps Op,
432 bool Invert = false) {
433 QualType T = E->getType();
434 assert(E->getArg(0)->getType()->isPointerType());
435 assert(CGF.getContext().hasSameUnqualifiedType(T,
436 E->getArg(0)->getType()->getPointeeType()));
437 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
439 Address DestAddr = CheckAtomicAlignment(CGF, E);
441 llvm::IntegerType *IntType = llvm::IntegerType::get(
442 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
444 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
445 llvm::Type *ValueType = Val->getType();
446 Val = EmitToInt(CGF, Val, T, IntType);
448 llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
449 Kind, DestAddr, Val, llvm::AtomicOrdering::SequentiallyConsistent);
450 Result = CGF.Builder.CreateBinOp(Op, Result, Val);
451 if (Invert)
452 Result =
453 CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
454 llvm::ConstantInt::getAllOnesValue(IntType));
455 Result = EmitFromInt(CGF, Result, T, ValueType);
456 return RValue::get(Result);
459 /// Utility to insert an atomic cmpxchg instruction.
461 /// @param CGF The current codegen function.
462 /// @param E Builtin call expression to convert to cmpxchg.
463 /// arg0 - address to operate on
464 /// arg1 - value to compare with
465 /// arg2 - new value
466 /// @param ReturnBool Specifies whether to return success flag of
467 /// cmpxchg result or the old value.
469 /// @returns result of cmpxchg, according to ReturnBool
471 /// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
472 /// invoke the function EmitAtomicCmpXchgForMSIntrin.
473 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
474 bool ReturnBool) {
475 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
476 Address DestAddr = CheckAtomicAlignment(CGF, E);
478 llvm::IntegerType *IntType = llvm::IntegerType::get(
479 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
481 Value *Cmp = CGF.EmitScalarExpr(E->getArg(1));
482 llvm::Type *ValueType = Cmp->getType();
483 Cmp = EmitToInt(CGF, Cmp, T, IntType);
484 Value *New = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
486 Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
487 DestAddr, Cmp, New, llvm::AtomicOrdering::SequentiallyConsistent,
488 llvm::AtomicOrdering::SequentiallyConsistent);
489 if (ReturnBool)
490 // Extract boolean success flag and zext it to int.
491 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
492 CGF.ConvertType(E->getType()));
493 else
494 // Extract old value and emit it using the same type as compare value.
495 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
496 ValueType);
499 /// This function should be invoked to emit atomic cmpxchg for Microsoft's
500 /// _InterlockedCompareExchange* intrinsics which have the following signature:
501 /// T _InterlockedCompareExchange(T volatile *Destination,
502 /// T Exchange,
503 /// T Comparand);
505 /// Whereas the llvm 'cmpxchg' instruction has the following syntax:
506 /// cmpxchg *Destination, Comparand, Exchange.
507 /// So we need to swap Comparand and Exchange when invoking
508 /// CreateAtomicCmpXchg. That is the reason we could not use the above utility
509 /// function MakeAtomicCmpXchgValue since it expects the arguments to be
510 /// already swapped.
512 static
513 Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E,
514 AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
515 assert(E->getArg(0)->getType()->isPointerType());
516 assert(CGF.getContext().hasSameUnqualifiedType(
517 E->getType(), E->getArg(0)->getType()->getPointeeType()));
518 assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
519 E->getArg(1)->getType()));
520 assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
521 E->getArg(2)->getType()));
523 Address DestAddr = CheckAtomicAlignment(CGF, E);
525 auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
526 auto *RTy = Exchange->getType();
528 auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
530 if (RTy->isPointerTy()) {
531 Exchange = CGF.Builder.CreatePtrToInt(Exchange, CGF.IntPtrTy);
532 Comparand = CGF.Builder.CreatePtrToInt(Comparand, CGF.IntPtrTy);
535 // For Release ordering, the failure ordering should be Monotonic.
536 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
537 AtomicOrdering::Monotonic :
538 SuccessOrdering;
540 // The atomic instruction is marked volatile for consistency with MSVC. This
541 // blocks the few atomics optimizations that LLVM has. If we want to optimize
542 // _Interlocked* operations in the future, we will have to remove the volatile
543 // marker.
544 auto *CmpXchg = CGF.Builder.CreateAtomicCmpXchg(
545 DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering);
546 CmpXchg->setVolatile(true);
548 auto *Result = CGF.Builder.CreateExtractValue(CmpXchg, 0);
549 if (RTy->isPointerTy()) {
550 Result = CGF.Builder.CreateIntToPtr(Result, RTy);
553 return Result;
556 // 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
557 // prototyped like this:
559 // unsigned char _InterlockedCompareExchange128...(
560 // __int64 volatile * _Destination,
561 // __int64 _ExchangeHigh,
562 // __int64 _ExchangeLow,
563 // __int64 * _ComparandResult);
565 // Note that Destination is assumed to be at least 16-byte aligned, despite
566 // being typed int64.
568 static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF,
569 const CallExpr *E,
570 AtomicOrdering SuccessOrdering) {
571 assert(E->getNumArgs() == 4);
572 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
573 llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));
574 llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));
575 Address ComparandAddr = CGF.EmitPointerWithAlignment(E->getArg(3));
577 assert(DestPtr->getType()->isPointerTy());
578 assert(!ExchangeHigh->getType()->isPointerTy());
579 assert(!ExchangeLow->getType()->isPointerTy());
581 // For Release ordering, the failure ordering should be Monotonic.
582 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
583 ? AtomicOrdering::Monotonic
584 : SuccessOrdering;
586 // Convert to i128 pointers and values. Alignment is also overridden for
587 // destination pointer.
588 llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);
589 Address DestAddr(DestPtr, Int128Ty,
590 CGF.getContext().toCharUnitsFromBits(128));
591 ComparandAddr = ComparandAddr.withElementType(Int128Ty);
593 // (((i128)hi) << 64) | ((i128)lo)
594 ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);
595 ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty);
596 ExchangeHigh =
597 CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64));
598 llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);
600 // Load the comparand for the instruction.
601 llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandAddr);
603 auto *CXI = CGF.Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
604 SuccessOrdering, FailureOrdering);
606 // The atomic instruction is marked volatile for consistency with MSVC. This
607 // blocks the few atomics optimizations that LLVM has. If we want to optimize
608 // _Interlocked* operations in the future, we will have to remove the volatile
609 // marker.
610 CXI->setVolatile(true);
612 // Store the result as an outparameter.
613 CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),
614 ComparandAddr);
616 // Get the success boolean and zero extend it to i8.
617 Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);
618 return CGF.Builder.CreateZExt(Success, CGF.Int8Ty);
621 static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E,
622 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
623 assert(E->getArg(0)->getType()->isPointerType());
625 auto *IntTy = CGF.ConvertType(E->getType());
626 Address DestAddr = CheckAtomicAlignment(CGF, E);
627 auto *Result = CGF.Builder.CreateAtomicRMW(
628 AtomicRMWInst::Add, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
629 return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
632 static Value *EmitAtomicDecrementValue(
633 CodeGenFunction &CGF, const CallExpr *E,
634 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
635 assert(E->getArg(0)->getType()->isPointerType());
637 auto *IntTy = CGF.ConvertType(E->getType());
638 Address DestAddr = CheckAtomicAlignment(CGF, E);
639 auto *Result = CGF.Builder.CreateAtomicRMW(
640 AtomicRMWInst::Sub, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
641 return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
644 // Build a plain volatile load.
645 static Value *EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E) {
646 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
647 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
648 CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy);
649 llvm::Type *ITy =
650 llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8);
651 llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize);
652 Load->setVolatile(true);
653 return Load;
656 // Build a plain volatile store.
657 static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) {
658 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
659 Value *Value = CGF.EmitScalarExpr(E->getArg(1));
660 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
661 CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy);
662 llvm::StoreInst *Store =
663 CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize);
664 Store->setVolatile(true);
665 return Store;
668 // Emit a simple mangled intrinsic that has 1 argument and a return type
669 // matching the argument type. Depending on mode, this may be a constrained
670 // floating-point intrinsic.
671 static Value *emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
672 const CallExpr *E, unsigned IntrinsicID,
673 unsigned ConstrainedIntrinsicID) {
674 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
676 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
677 if (CGF.Builder.getIsFPConstrained()) {
678 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
679 return CGF.Builder.CreateConstrainedFPCall(F, { Src0 });
680 } else {
681 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
682 return CGF.Builder.CreateCall(F, Src0);
686 // Emit an intrinsic that has 2 operands of the same type as its result.
687 // Depending on mode, this may be a constrained floating-point intrinsic.
688 static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
689 const CallExpr *E, unsigned IntrinsicID,
690 unsigned ConstrainedIntrinsicID) {
691 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
692 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
694 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
695 if (CGF.Builder.getIsFPConstrained()) {
696 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
697 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
698 } else {
699 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
700 return CGF.Builder.CreateCall(F, { Src0, Src1 });
704 // Has second type mangled argument.
705 static Value *emitBinaryExpMaybeConstrainedFPBuiltin(
706 CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID,
707 llvm::Intrinsic::ID ConstrainedIntrinsicID) {
708 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
709 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
711 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
712 if (CGF.Builder.getIsFPConstrained()) {
713 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
714 {Src0->getType(), Src1->getType()});
715 return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
718 Function *F =
719 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
720 return CGF.Builder.CreateCall(F, {Src0, Src1});
723 // Emit an intrinsic that has 3 operands of the same type as its result.
724 // Depending on mode, this may be a constrained floating-point intrinsic.
725 static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
726 const CallExpr *E, unsigned IntrinsicID,
727 unsigned ConstrainedIntrinsicID) {
728 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
729 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
730 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
732 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
733 if (CGF.Builder.getIsFPConstrained()) {
734 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
735 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
736 } else {
737 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
738 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
742 // Emit an intrinsic where all operands are of the same type as the result.
743 // Depending on mode, this may be a constrained floating-point intrinsic.
744 static Value *emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
745 unsigned IntrinsicID,
746 unsigned ConstrainedIntrinsicID,
747 llvm::Type *Ty,
748 ArrayRef<Value *> Args) {
749 Function *F;
750 if (CGF.Builder.getIsFPConstrained())
751 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
752 else
753 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
755 if (CGF.Builder.getIsFPConstrained())
756 return CGF.Builder.CreateConstrainedFPCall(F, Args);
757 else
758 return CGF.Builder.CreateCall(F, Args);
761 // Emit a simple intrinsic that has N scalar arguments and a return type
762 // matching the argument type. It is assumed that only the first argument is
763 // overloaded.
764 template <unsigned N>
765 static Value *emitBuiltinWithOneOverloadedType(CodeGenFunction &CGF,
766 const CallExpr *E,
767 unsigned IntrinsicID,
768 llvm::StringRef Name = "") {
769 static_assert(N, "expect non-empty argument");
770 SmallVector<Value *, N> Args;
771 for (unsigned I = 0; I < N; ++I)
772 Args.push_back(CGF.EmitScalarExpr(E->getArg(I)));
773 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Args[0]->getType());
774 return CGF.Builder.CreateCall(F, Args, Name);
777 // Emit an intrinsic that has 4 operands of the same type as its result.
778 static Value *emitQuaternaryBuiltin(CodeGenFunction &CGF, const CallExpr *E,
779 unsigned IntrinsicID) {
780 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
781 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
782 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
783 llvm::Value *Src3 = CGF.EmitScalarExpr(E->getArg(3));
785 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
786 return CGF.Builder.CreateCall(F, {Src0, Src1, Src2, Src3});
789 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
790 static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
791 const CallExpr *E,
792 unsigned IntrinsicID) {
793 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
794 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
796 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
797 return CGF.Builder.CreateCall(F, {Src0, Src1});
800 // Emit an intrinsic that has overloaded integer result and fp operand.
801 static Value *
802 emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E,
803 unsigned IntrinsicID,
804 unsigned ConstrainedIntrinsicID) {
805 llvm::Type *ResultType = CGF.ConvertType(E->getType());
806 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
808 if (CGF.Builder.getIsFPConstrained()) {
809 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
810 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
811 {ResultType, Src0->getType()});
812 return CGF.Builder.CreateConstrainedFPCall(F, {Src0});
813 } else {
814 Function *F =
815 CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()});
816 return CGF.Builder.CreateCall(F, Src0);
820 static Value *emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E,
821 llvm::Intrinsic::ID IntrinsicID) {
822 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
823 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
825 QualType IntPtrTy = E->getArg(1)->getType()->getPointeeType();
826 llvm::Type *IntTy = CGF.ConvertType(IntPtrTy);
827 llvm::Function *F =
828 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), IntTy});
829 llvm::Value *Call = CGF.Builder.CreateCall(F, Src0);
831 llvm::Value *Exp = CGF.Builder.CreateExtractValue(Call, 1);
832 LValue LV = CGF.MakeNaturalAlignAddrLValue(Src1, IntPtrTy);
833 CGF.EmitStoreOfScalar(Exp, LV);
835 return CGF.Builder.CreateExtractValue(Call, 0);
838 /// EmitFAbs - Emit a call to @llvm.fabs().
839 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
840 Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
841 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
842 Call->setDoesNotAccessMemory();
843 return Call;
846 /// Emit the computation of the sign bit for a floating point value. Returns
847 /// the i1 sign bit value.
848 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
849 LLVMContext &C = CGF.CGM.getLLVMContext();
851 llvm::Type *Ty = V->getType();
852 int Width = Ty->getPrimitiveSizeInBits();
853 llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
854 V = CGF.Builder.CreateBitCast(V, IntTy);
855 if (Ty->isPPC_FP128Ty()) {
856 // We want the sign bit of the higher-order double. The bitcast we just
857 // did works as if the double-double was stored to memory and then
858 // read as an i128. The "store" will put the higher-order double in the
859 // lower address in both little- and big-Endian modes, but the "load"
860 // will treat those bits as a different part of the i128: the low bits in
861 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
862 // we need to shift the high bits down to the low before truncating.
863 Width >>= 1;
864 if (CGF.getTarget().isBigEndian()) {
865 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
866 V = CGF.Builder.CreateLShr(V, ShiftCst);
868 // We are truncating value in order to extract the higher-order
869 // double, which we will be using to extract the sign from.
870 IntTy = llvm::IntegerType::get(C, Width);
871 V = CGF.Builder.CreateTrunc(V, IntTy);
873 Value *Zero = llvm::Constant::getNullValue(IntTy);
874 return CGF.Builder.CreateICmpSLT(V, Zero);
877 /// Checks no arguments or results are passed indirectly in the ABI (i.e. via a
878 /// hidden pointer). This is used to check annotating FP libcalls (that could
879 /// set `errno`) with "int" TBAA metadata is safe. If any floating-point
880 /// arguments are passed indirectly, setup for the call could be incorrectly
881 /// optimized out.
882 static bool HasNoIndirectArgumentsOrResults(CGFunctionInfo const &FnInfo) {
883 auto IsIndirect = [&](ABIArgInfo const &info) {
884 return info.isIndirect() || info.isIndirectAliased() || info.isInAlloca();
886 return !IsIndirect(FnInfo.getReturnInfo()) &&
887 llvm::none_of(FnInfo.arguments(),
888 [&](CGFunctionInfoArgInfo const &ArgInfo) {
889 return IsIndirect(ArgInfo.info);
893 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
894 const CallExpr *E, llvm::Constant *calleeValue) {
895 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
896 CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
897 llvm::CallBase *callOrInvoke = nullptr;
898 CGFunctionInfo const *FnInfo = nullptr;
899 RValue Call =
900 CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot(),
901 /*Chain=*/nullptr, &callOrInvoke, &FnInfo);
903 if (unsigned BuiltinID = FD->getBuiltinID()) {
904 // Check whether a FP math builtin function, such as BI__builtin_expf
905 ASTContext &Context = CGF.getContext();
906 bool ConstWithoutErrnoAndExceptions =
907 Context.BuiltinInfo.isConstWithoutErrnoAndExceptions(BuiltinID);
908 // Restrict to target with errno, for example, MacOS doesn't set errno.
909 // TODO: Support builtin function with complex type returned, eg: cacosh
910 if (ConstWithoutErrnoAndExceptions && CGF.CGM.getLangOpts().MathErrno &&
911 !CGF.Builder.getIsFPConstrained() && Call.isScalar() &&
912 HasNoIndirectArgumentsOrResults(*FnInfo)) {
913 // Emit "int" TBAA metadata on FP math libcalls.
914 clang::QualType IntTy = Context.IntTy;
915 TBAAAccessInfo TBAAInfo = CGF.CGM.getTBAAAccessInfo(IntTy);
916 CGF.CGM.DecorateInstructionWithTBAA(callOrInvoke, TBAAInfo);
919 return Call;
922 /// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
923 /// depending on IntrinsicID.
925 /// \arg CGF The current codegen function.
926 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
927 /// \arg X The first argument to the llvm.*.with.overflow.*.
928 /// \arg Y The second argument to the llvm.*.with.overflow.*.
929 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
930 /// \returns The result (i.e. sum/product) returned by the intrinsic.
931 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
932 const llvm::Intrinsic::ID IntrinsicID,
933 llvm::Value *X, llvm::Value *Y,
934 llvm::Value *&Carry) {
935 // Make sure we have integers of the same width.
936 assert(X->getType() == Y->getType() &&
937 "Arguments must be the same type. (Did you forget to make sure both "
938 "arguments have the same integer width?)");
940 Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
941 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
942 Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
943 return CGF.Builder.CreateExtractValue(Tmp, 0);
946 static Value *emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID,
947 int low, int high) {
948 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
949 llvm::CallInst *Call = CGF.Builder.CreateCall(F);
950 llvm::ConstantRange CR(APInt(32, low), APInt(32, high));
951 Call->addRangeRetAttr(CR);
952 Call->addRetAttr(llvm::Attribute::AttrKind::NoUndef);
953 return Call;
956 namespace {
957 struct WidthAndSignedness {
958 unsigned Width;
959 bool Signed;
963 static WidthAndSignedness
964 getIntegerWidthAndSignedness(const clang::ASTContext &context,
965 const clang::QualType Type) {
966 assert(Type->isIntegerType() && "Given type is not an integer.");
967 unsigned Width = context.getIntWidth(Type);
968 bool Signed = Type->isSignedIntegerType();
969 return {Width, Signed};
972 // Given one or more integer types, this function produces an integer type that
973 // encompasses them: any value in one of the given types could be expressed in
974 // the encompassing type.
975 static struct WidthAndSignedness
976 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
977 assert(Types.size() > 0 && "Empty list of types.");
979 // If any of the given types is signed, we must return a signed type.
980 bool Signed = false;
981 for (const auto &Type : Types) {
982 Signed |= Type.Signed;
985 // The encompassing type must have a width greater than or equal to the width
986 // of the specified types. Additionally, if the encompassing type is signed,
987 // its width must be strictly greater than the width of any unsigned types
988 // given.
989 unsigned Width = 0;
990 for (const auto &Type : Types) {
991 unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
992 if (Width < MinWidth) {
993 Width = MinWidth;
997 return {Width, Signed};
1000 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
1001 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
1002 return Builder.CreateCall(CGM.getIntrinsic(inst, {ArgValue->getType()}),
1003 ArgValue);
1006 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
1007 /// __builtin_object_size(p, @p To) is correct
1008 static bool areBOSTypesCompatible(int From, int To) {
1009 // Note: Our __builtin_object_size implementation currently treats Type=0 and
1010 // Type=2 identically. Encoding this implementation detail here may make
1011 // improving __builtin_object_size difficult in the future, so it's omitted.
1012 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
1015 static llvm::Value *
1016 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
1017 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
1020 llvm::Value *
1021 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
1022 llvm::IntegerType *ResType,
1023 llvm::Value *EmittedE,
1024 bool IsDynamic) {
1025 uint64_t ObjectSize;
1026 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
1027 return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
1028 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
1031 const FieldDecl *CodeGenFunction::FindFlexibleArrayMemberFieldAndOffset(
1032 ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl,
1033 uint64_t &Offset) {
1034 const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
1035 getLangOpts().getStrictFlexArraysLevel();
1036 uint32_t FieldNo = 0;
1038 if (RD->isImplicit())
1039 return nullptr;
1041 for (const FieldDecl *FD : RD->fields()) {
1042 if ((!FAMDecl || FD == FAMDecl) &&
1043 Decl::isFlexibleArrayMemberLike(
1044 Ctx, FD, FD->getType(), StrictFlexArraysLevel,
1045 /*IgnoreTemplateOrMacroSubstitution=*/true)) {
1046 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
1047 Offset += Layout.getFieldOffset(FieldNo);
1048 return FD;
1051 QualType Ty = FD->getType();
1052 if (Ty->isRecordType()) {
1053 if (const FieldDecl *Field = FindFlexibleArrayMemberFieldAndOffset(
1054 Ctx, Ty->getAsRecordDecl(), FAMDecl, Offset)) {
1055 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
1056 Offset += Layout.getFieldOffset(FieldNo);
1057 return Field;
1061 if (!RD->isUnion())
1062 ++FieldNo;
1065 return nullptr;
1068 static unsigned CountCountedByAttrs(const RecordDecl *RD) {
1069 unsigned Num = 0;
1071 for (const FieldDecl *FD : RD->fields()) {
1072 if (FD->getType()->isCountAttributedType())
1073 return ++Num;
1075 QualType Ty = FD->getType();
1076 if (Ty->isRecordType())
1077 Num += CountCountedByAttrs(Ty->getAsRecordDecl());
1080 return Num;
1083 llvm::Value *
1084 CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
1085 llvm::IntegerType *ResType) {
1086 // The code generated here calculates the size of a struct with a flexible
1087 // array member that uses the counted_by attribute. There are two instances
1088 // we handle:
1090 // struct s {
1091 // unsigned long flags;
1092 // int count;
1093 // int array[] __attribute__((counted_by(count)));
1094 // }
1096 // 1) bdos of the flexible array itself:
1098 // __builtin_dynamic_object_size(p->array, 1) ==
1099 // p->count * sizeof(*p->array)
1101 // 2) bdos of a pointer into the flexible array:
1103 // __builtin_dynamic_object_size(&p->array[42], 1) ==
1104 // (p->count - 42) * sizeof(*p->array)
1106 // 2) bdos of the whole struct, including the flexible array:
1108 // __builtin_dynamic_object_size(p, 1) ==
1109 // max(sizeof(struct s),
1110 // offsetof(struct s, array) + p->count * sizeof(*p->array))
1112 ASTContext &Ctx = getContext();
1113 const Expr *Base = E->IgnoreParenImpCasts();
1114 const Expr *Idx = nullptr;
1116 if (const auto *UO = dyn_cast<UnaryOperator>(Base);
1117 UO && UO->getOpcode() == UO_AddrOf) {
1118 Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts();
1119 if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(SubExpr)) {
1120 Base = ASE->getBase()->IgnoreParenImpCasts();
1121 Idx = ASE->getIdx()->IgnoreParenImpCasts();
1123 if (const auto *IL = dyn_cast<IntegerLiteral>(Idx)) {
1124 int64_t Val = IL->getValue().getSExtValue();
1125 if (Val < 0)
1126 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1128 if (Val == 0)
1129 // The index is 0, so we don't need to take it into account.
1130 Idx = nullptr;
1132 } else {
1133 // Potential pointer to another element in the struct.
1134 Base = SubExpr;
1138 // Get the flexible array member Decl.
1139 const RecordDecl *OuterRD = nullptr;
1140 const FieldDecl *FAMDecl = nullptr;
1141 if (const auto *ME = dyn_cast<MemberExpr>(Base)) {
1142 // Check if \p Base is referencing the FAM itself.
1143 const ValueDecl *VD = ME->getMemberDecl();
1144 OuterRD = VD->getDeclContext()->getOuterLexicalRecordContext();
1145 FAMDecl = dyn_cast<FieldDecl>(VD);
1146 if (!FAMDecl)
1147 return nullptr;
1148 } else if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) {
1149 // Check if we're pointing to the whole struct.
1150 QualType Ty = DRE->getDecl()->getType();
1151 if (Ty->isPointerType())
1152 Ty = Ty->getPointeeType();
1153 OuterRD = Ty->getAsRecordDecl();
1155 // If we have a situation like this:
1157 // struct union_of_fams {
1158 // int flags;
1159 // union {
1160 // signed char normal_field;
1161 // struct {
1162 // int count1;
1163 // int arr1[] __counted_by(count1);
1164 // };
1165 // struct {
1166 // signed char count2;
1167 // int arr2[] __counted_by(count2);
1168 // };
1169 // };
1170 // };
1172 // We don't know which 'count' to use in this scenario:
1174 // size_t get_size(struct union_of_fams *p) {
1175 // return __builtin_dynamic_object_size(p, 1);
1176 // }
1178 // Instead of calculating a wrong number, we give up.
1179 if (OuterRD && CountCountedByAttrs(OuterRD) > 1)
1180 return nullptr;
1183 if (!OuterRD)
1184 return nullptr;
1186 // We call FindFlexibleArrayMemberAndOffset even if FAMDecl is non-null to
1187 // get its offset.
1188 uint64_t Offset = 0;
1189 FAMDecl =
1190 FindFlexibleArrayMemberFieldAndOffset(Ctx, OuterRD, FAMDecl, Offset);
1191 Offset = Ctx.toCharUnitsFromBits(Offset).getQuantity();
1193 if (!FAMDecl || !FAMDecl->getType()->isCountAttributedType())
1194 // No flexible array member found or it doesn't have the "counted_by"
1195 // attribute.
1196 return nullptr;
1198 const FieldDecl *CountedByFD = FAMDecl->findCountedByField();
1199 if (!CountedByFD)
1200 // Can't find the field referenced by the "counted_by" attribute.
1201 return nullptr;
1203 if (isa<DeclRefExpr>(Base))
1204 // The whole struct is specificed in the __bdos. The calculation of the
1205 // whole size of the structure can be done in two ways:
1207 // 1) sizeof(struct S) + count * sizeof(typeof(fam))
1208 // 2) offsetof(struct S, fam) + count * sizeof(typeof(fam))
1210 // The first will add additional padding after the end of the array,
1211 // allocation while the second method is more precise, but not quite
1212 // expected from programmers. See
1213 // https://lore.kernel.org/lkml/ZvV6X5FPBBW7CO1f@archlinux/ for a
1214 // discussion of the topic.
1216 // GCC isn't (currently) able to calculate __bdos on a pointer to the whole
1217 // structure. Therefore, because of the above issue, we'll choose to match
1218 // what GCC does for consistency's sake.
1219 return nullptr;
1221 // Build a load of the counted_by field.
1222 bool IsSigned = CountedByFD->getType()->isSignedIntegerType();
1223 Value *CountedByInst = EmitLoadOfCountedByField(Base, FAMDecl, CountedByFD);
1224 if (!CountedByInst)
1225 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1227 CountedByInst = Builder.CreateIntCast(CountedByInst, ResType, IsSigned);
1229 // Build a load of the index and subtract it from the count.
1230 Value *IdxInst = nullptr;
1231 if (Idx) {
1232 if (Idx->HasSideEffects(getContext()))
1233 // We can't have side-effects.
1234 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1236 bool IdxSigned = Idx->getType()->isSignedIntegerType();
1237 IdxInst = EmitAnyExprToTemp(Idx).getScalarVal();
1238 IdxInst = Builder.CreateIntCast(IdxInst, ResType, IdxSigned);
1240 // We go ahead with the calculation here. If the index turns out to be
1241 // negative, we'll catch it at the end.
1242 CountedByInst =
1243 Builder.CreateSub(CountedByInst, IdxInst, "", !IsSigned, IsSigned);
1246 // Calculate how large the flexible array member is in bytes.
1247 const ArrayType *ArrayTy = Ctx.getAsArrayType(FAMDecl->getType());
1248 CharUnits Size = Ctx.getTypeSizeInChars(ArrayTy->getElementType());
1249 llvm::Constant *ElemSize =
1250 llvm::ConstantInt::get(ResType, Size.getQuantity(), IsSigned);
1251 Value *Res =
1252 Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned);
1253 Res = Builder.CreateIntCast(Res, ResType, IsSigned);
1255 // A negative \p IdxInst or \p CountedByInst means that the index lands
1256 // outside of the flexible array member. If that's the case, we want to
1257 // return 0.
1258 Value *Cmp = Builder.CreateIsNotNeg(CountedByInst);
1259 if (IdxInst)
1260 Cmp = Builder.CreateAnd(Builder.CreateIsNotNeg(IdxInst), Cmp);
1262 return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, IsSigned));
1265 /// Returns a Value corresponding to the size of the given expression.
1266 /// This Value may be either of the following:
1267 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on
1268 /// it)
1269 /// - A call to the @llvm.objectsize intrinsic
1271 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
1272 /// and we wouldn't otherwise try to reference a pass_object_size parameter,
1273 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
1274 llvm::Value *
1275 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
1276 llvm::IntegerType *ResType,
1277 llvm::Value *EmittedE, bool IsDynamic) {
1278 // We need to reference an argument if the pointer is a parameter with the
1279 // pass_object_size attribute.
1280 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
1281 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
1282 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
1283 if (Param != nullptr && PS != nullptr &&
1284 areBOSTypesCompatible(PS->getType(), Type)) {
1285 auto Iter = SizeArguments.find(Param);
1286 assert(Iter != SizeArguments.end());
1288 const ImplicitParamDecl *D = Iter->second;
1289 auto DIter = LocalDeclMap.find(D);
1290 assert(DIter != LocalDeclMap.end());
1292 return EmitLoadOfScalar(DIter->second, /*Volatile=*/false,
1293 getContext().getSizeType(), E->getBeginLoc());
1297 if (IsDynamic) {
1298 // Emit special code for a flexible array member with the "counted_by"
1299 // attribute.
1300 if (Value *V = emitFlexibleArrayMemberSize(E, Type, ResType))
1301 return V;
1304 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
1305 // evaluate E for side-effects. In either case, we shouldn't lower to
1306 // @llvm.objectsize.
1307 if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
1308 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1310 Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
1311 assert(Ptr->getType()->isPointerTy() &&
1312 "Non-pointer passed to __builtin_object_size?");
1314 Function *F =
1315 CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
1317 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
1318 Value *Min = Builder.getInt1((Type & 2) != 0);
1319 // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
1320 Value *NullIsUnknown = Builder.getTrue();
1321 Value *Dynamic = Builder.getInt1(IsDynamic);
1322 return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
1325 namespace {
1326 /// A struct to generically describe a bit test intrinsic.
1327 struct BitTest {
1328 enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
1329 enum InterlockingKind : uint8_t {
1330 Unlocked,
1331 Sequential,
1332 Acquire,
1333 Release,
1334 NoFence
1337 ActionKind Action;
1338 InterlockingKind Interlocking;
1339 bool Is64Bit;
1341 static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
1344 } // namespace
1346 BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
1347 switch (BuiltinID) {
1348 // Main portable variants.
1349 case Builtin::BI_bittest:
1350 return {TestOnly, Unlocked, false};
1351 case Builtin::BI_bittestandcomplement:
1352 return {Complement, Unlocked, false};
1353 case Builtin::BI_bittestandreset:
1354 return {Reset, Unlocked, false};
1355 case Builtin::BI_bittestandset:
1356 return {Set, Unlocked, false};
1357 case Builtin::BI_interlockedbittestandreset:
1358 return {Reset, Sequential, false};
1359 case Builtin::BI_interlockedbittestandset:
1360 return {Set, Sequential, false};
1362 // X86-specific 64-bit variants.
1363 case Builtin::BI_bittest64:
1364 return {TestOnly, Unlocked, true};
1365 case Builtin::BI_bittestandcomplement64:
1366 return {Complement, Unlocked, true};
1367 case Builtin::BI_bittestandreset64:
1368 return {Reset, Unlocked, true};
1369 case Builtin::BI_bittestandset64:
1370 return {Set, Unlocked, true};
1371 case Builtin::BI_interlockedbittestandreset64:
1372 return {Reset, Sequential, true};
1373 case Builtin::BI_interlockedbittestandset64:
1374 return {Set, Sequential, true};
1376 // ARM/AArch64-specific ordering variants.
1377 case Builtin::BI_interlockedbittestandset_acq:
1378 return {Set, Acquire, false};
1379 case Builtin::BI_interlockedbittestandset_rel:
1380 return {Set, Release, false};
1381 case Builtin::BI_interlockedbittestandset_nf:
1382 return {Set, NoFence, false};
1383 case Builtin::BI_interlockedbittestandreset_acq:
1384 return {Reset, Acquire, false};
1385 case Builtin::BI_interlockedbittestandreset_rel:
1386 return {Reset, Release, false};
1387 case Builtin::BI_interlockedbittestandreset_nf:
1388 return {Reset, NoFence, false};
1390 llvm_unreachable("expected only bittest intrinsics");
1393 static char bitActionToX86BTCode(BitTest::ActionKind A) {
1394 switch (A) {
1395 case BitTest::TestOnly: return '\0';
1396 case BitTest::Complement: return 'c';
1397 case BitTest::Reset: return 'r';
1398 case BitTest::Set: return 's';
1400 llvm_unreachable("invalid action");
1403 static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF,
1404 BitTest BT,
1405 const CallExpr *E, Value *BitBase,
1406 Value *BitPos) {
1407 char Action = bitActionToX86BTCode(BT.Action);
1408 char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
1410 // Build the assembly.
1411 SmallString<64> Asm;
1412 raw_svector_ostream AsmOS(Asm);
1413 if (BT.Interlocking != BitTest::Unlocked)
1414 AsmOS << "lock ";
1415 AsmOS << "bt";
1416 if (Action)
1417 AsmOS << Action;
1418 AsmOS << SizeSuffix << " $2, ($1)";
1420 // Build the constraints. FIXME: We should support immediates when possible.
1421 std::string Constraints = "={@ccc},r,r,~{cc},~{memory}";
1422 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1423 if (!MachineClobbers.empty()) {
1424 Constraints += ',';
1425 Constraints += MachineClobbers;
1427 llvm::IntegerType *IntType = llvm::IntegerType::get(
1428 CGF.getLLVMContext(),
1429 CGF.getContext().getTypeSize(E->getArg(1)->getType()));
1430 llvm::FunctionType *FTy =
1431 llvm::FunctionType::get(CGF.Int8Ty, {CGF.UnqualPtrTy, IntType}, false);
1433 llvm::InlineAsm *IA =
1434 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1435 return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
1438 static llvm::AtomicOrdering
1439 getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
1440 switch (I) {
1441 case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;
1442 case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
1443 case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;
1444 case BitTest::Release: return llvm::AtomicOrdering::Release;
1445 case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;
1447 llvm_unreachable("invalid interlocking");
1450 /// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
1451 /// bits and a bit position and read and optionally modify the bit at that
1452 /// position. The position index can be arbitrarily large, i.e. it can be larger
1453 /// than 31 or 63, so we need an indexed load in the general case.
1454 static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
1455 unsigned BuiltinID,
1456 const CallExpr *E) {
1457 Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
1458 Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
1460 BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
1462 // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
1463 // indexing operation internally. Use them if possible.
1464 if (CGF.getTarget().getTriple().isX86())
1465 return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
1467 // Otherwise, use generic code to load one byte and test the bit. Use all but
1468 // the bottom three bits as the array index, and the bottom three bits to form
1469 // a mask.
1470 // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
1471 Value *ByteIndex = CGF.Builder.CreateAShr(
1472 BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
1473 Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBase, ByteIndex,
1474 "bittest.byteaddr"),
1475 CGF.Int8Ty, CharUnits::One());
1476 Value *PosLow =
1477 CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
1478 llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
1480 // The updating instructions will need a mask.
1481 Value *Mask = nullptr;
1482 if (BT.Action != BitTest::TestOnly) {
1483 Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
1484 "bittest.mask");
1487 // Check the action and ordering of the interlocked intrinsics.
1488 llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
1490 Value *OldByte = nullptr;
1491 if (Ordering != llvm::AtomicOrdering::NotAtomic) {
1492 // Emit a combined atomicrmw load/store operation for the interlocked
1493 // intrinsics.
1494 llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
1495 if (BT.Action == BitTest::Reset) {
1496 Mask = CGF.Builder.CreateNot(Mask);
1497 RMWOp = llvm::AtomicRMWInst::And;
1499 OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr, Mask, Ordering);
1500 } else {
1501 // Emit a plain load for the non-interlocked intrinsics.
1502 OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
1503 Value *NewByte = nullptr;
1504 switch (BT.Action) {
1505 case BitTest::TestOnly:
1506 // Don't store anything.
1507 break;
1508 case BitTest::Complement:
1509 NewByte = CGF.Builder.CreateXor(OldByte, Mask);
1510 break;
1511 case BitTest::Reset:
1512 NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
1513 break;
1514 case BitTest::Set:
1515 NewByte = CGF.Builder.CreateOr(OldByte, Mask);
1516 break;
1518 if (NewByte)
1519 CGF.Builder.CreateStore(NewByte, ByteAddr);
1522 // However we loaded the old byte, either by plain load or atomicrmw, shift
1523 // the bit into the low position and mask it to 0 or 1.
1524 Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
1525 return CGF.Builder.CreateAnd(
1526 ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
1529 static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF,
1530 unsigned BuiltinID,
1531 const CallExpr *E) {
1532 Value *Addr = CGF.EmitScalarExpr(E->getArg(0));
1534 SmallString<64> Asm;
1535 raw_svector_ostream AsmOS(Asm);
1536 llvm::IntegerType *RetType = CGF.Int32Ty;
1538 switch (BuiltinID) {
1539 case clang::PPC::BI__builtin_ppc_ldarx:
1540 AsmOS << "ldarx ";
1541 RetType = CGF.Int64Ty;
1542 break;
1543 case clang::PPC::BI__builtin_ppc_lwarx:
1544 AsmOS << "lwarx ";
1545 RetType = CGF.Int32Ty;
1546 break;
1547 case clang::PPC::BI__builtin_ppc_lharx:
1548 AsmOS << "lharx ";
1549 RetType = CGF.Int16Ty;
1550 break;
1551 case clang::PPC::BI__builtin_ppc_lbarx:
1552 AsmOS << "lbarx ";
1553 RetType = CGF.Int8Ty;
1554 break;
1555 default:
1556 llvm_unreachable("Expected only PowerPC load reserve intrinsics");
1559 AsmOS << "$0, ${1:y}";
1561 std::string Constraints = "=r,*Z,~{memory}";
1562 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1563 if (!MachineClobbers.empty()) {
1564 Constraints += ',';
1565 Constraints += MachineClobbers;
1568 llvm::Type *PtrType = CGF.UnqualPtrTy;
1569 llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false);
1571 llvm::InlineAsm *IA =
1572 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1573 llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr});
1574 CI->addParamAttr(
1575 0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType));
1576 return CI;
1579 namespace {
1580 enum class MSVCSetJmpKind {
1581 _setjmpex,
1582 _setjmp3,
1583 _setjmp
1587 /// MSVC handles setjmp a bit differently on different platforms. On every
1588 /// architecture except 32-bit x86, the frame address is passed. On x86, extra
1589 /// parameters can be passed as variadic arguments, but we always pass none.
1590 static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
1591 const CallExpr *E) {
1592 llvm::Value *Arg1 = nullptr;
1593 llvm::Type *Arg1Ty = nullptr;
1594 StringRef Name;
1595 bool IsVarArg = false;
1596 if (SJKind == MSVCSetJmpKind::_setjmp3) {
1597 Name = "_setjmp3";
1598 Arg1Ty = CGF.Int32Ty;
1599 Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
1600 IsVarArg = true;
1601 } else {
1602 Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
1603 Arg1Ty = CGF.Int8PtrTy;
1604 if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
1605 Arg1 = CGF.Builder.CreateCall(
1606 CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));
1607 } else
1608 Arg1 = CGF.Builder.CreateCall(
1609 CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy),
1610 llvm::ConstantInt::get(CGF.Int32Ty, 0));
1613 // Mark the call site and declaration with ReturnsTwice.
1614 llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
1615 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
1616 CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
1617 llvm::Attribute::ReturnsTwice);
1618 llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction(
1619 llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
1620 ReturnsTwiceAttr, /*Local=*/true);
1622 llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
1623 CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
1624 llvm::Value *Args[] = {Buf, Arg1};
1625 llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
1626 CB->setAttributes(ReturnsTwiceAttr);
1627 return RValue::get(CB);
1630 // Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
1631 // we handle them here.
1632 enum class CodeGenFunction::MSVCIntrin {
1633 _BitScanForward,
1634 _BitScanReverse,
1635 _InterlockedAnd,
1636 _InterlockedCompareExchange,
1637 _InterlockedDecrement,
1638 _InterlockedExchange,
1639 _InterlockedExchangeAdd,
1640 _InterlockedExchangeSub,
1641 _InterlockedIncrement,
1642 _InterlockedOr,
1643 _InterlockedXor,
1644 _InterlockedExchangeAdd_acq,
1645 _InterlockedExchangeAdd_rel,
1646 _InterlockedExchangeAdd_nf,
1647 _InterlockedExchange_acq,
1648 _InterlockedExchange_rel,
1649 _InterlockedExchange_nf,
1650 _InterlockedCompareExchange_acq,
1651 _InterlockedCompareExchange_rel,
1652 _InterlockedCompareExchange_nf,
1653 _InterlockedCompareExchange128,
1654 _InterlockedCompareExchange128_acq,
1655 _InterlockedCompareExchange128_rel,
1656 _InterlockedCompareExchange128_nf,
1657 _InterlockedOr_acq,
1658 _InterlockedOr_rel,
1659 _InterlockedOr_nf,
1660 _InterlockedXor_acq,
1661 _InterlockedXor_rel,
1662 _InterlockedXor_nf,
1663 _InterlockedAnd_acq,
1664 _InterlockedAnd_rel,
1665 _InterlockedAnd_nf,
1666 _InterlockedIncrement_acq,
1667 _InterlockedIncrement_rel,
1668 _InterlockedIncrement_nf,
1669 _InterlockedDecrement_acq,
1670 _InterlockedDecrement_rel,
1671 _InterlockedDecrement_nf,
1672 __fastfail,
1675 static std::optional<CodeGenFunction::MSVCIntrin>
1676 translateArmToMsvcIntrin(unsigned BuiltinID) {
1677 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1678 switch (BuiltinID) {
1679 default:
1680 return std::nullopt;
1681 case clang::ARM::BI_BitScanForward:
1682 case clang::ARM::BI_BitScanForward64:
1683 return MSVCIntrin::_BitScanForward;
1684 case clang::ARM::BI_BitScanReverse:
1685 case clang::ARM::BI_BitScanReverse64:
1686 return MSVCIntrin::_BitScanReverse;
1687 case clang::ARM::BI_InterlockedAnd64:
1688 return MSVCIntrin::_InterlockedAnd;
1689 case clang::ARM::BI_InterlockedExchange64:
1690 return MSVCIntrin::_InterlockedExchange;
1691 case clang::ARM::BI_InterlockedExchangeAdd64:
1692 return MSVCIntrin::_InterlockedExchangeAdd;
1693 case clang::ARM::BI_InterlockedExchangeSub64:
1694 return MSVCIntrin::_InterlockedExchangeSub;
1695 case clang::ARM::BI_InterlockedOr64:
1696 return MSVCIntrin::_InterlockedOr;
1697 case clang::ARM::BI_InterlockedXor64:
1698 return MSVCIntrin::_InterlockedXor;
1699 case clang::ARM::BI_InterlockedDecrement64:
1700 return MSVCIntrin::_InterlockedDecrement;
1701 case clang::ARM::BI_InterlockedIncrement64:
1702 return MSVCIntrin::_InterlockedIncrement;
1703 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
1704 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
1705 case clang::ARM::BI_InterlockedExchangeAdd_acq:
1706 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
1707 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1708 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
1709 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
1710 case clang::ARM::BI_InterlockedExchangeAdd_rel:
1711 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
1712 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1713 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
1714 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
1715 case clang::ARM::BI_InterlockedExchangeAdd_nf:
1716 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
1717 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1718 case clang::ARM::BI_InterlockedExchange8_acq:
1719 case clang::ARM::BI_InterlockedExchange16_acq:
1720 case clang::ARM::BI_InterlockedExchange_acq:
1721 case clang::ARM::BI_InterlockedExchange64_acq:
1722 case clang::ARM::BI_InterlockedExchangePointer_acq:
1723 return MSVCIntrin::_InterlockedExchange_acq;
1724 case clang::ARM::BI_InterlockedExchange8_rel:
1725 case clang::ARM::BI_InterlockedExchange16_rel:
1726 case clang::ARM::BI_InterlockedExchange_rel:
1727 case clang::ARM::BI_InterlockedExchange64_rel:
1728 case clang::ARM::BI_InterlockedExchangePointer_rel:
1729 return MSVCIntrin::_InterlockedExchange_rel;
1730 case clang::ARM::BI_InterlockedExchange8_nf:
1731 case clang::ARM::BI_InterlockedExchange16_nf:
1732 case clang::ARM::BI_InterlockedExchange_nf:
1733 case clang::ARM::BI_InterlockedExchange64_nf:
1734 case clang::ARM::BI_InterlockedExchangePointer_nf:
1735 return MSVCIntrin::_InterlockedExchange_nf;
1736 case clang::ARM::BI_InterlockedCompareExchange8_acq:
1737 case clang::ARM::BI_InterlockedCompareExchange16_acq:
1738 case clang::ARM::BI_InterlockedCompareExchange_acq:
1739 case clang::ARM::BI_InterlockedCompareExchange64_acq:
1740 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
1741 return MSVCIntrin::_InterlockedCompareExchange_acq;
1742 case clang::ARM::BI_InterlockedCompareExchange8_rel:
1743 case clang::ARM::BI_InterlockedCompareExchange16_rel:
1744 case clang::ARM::BI_InterlockedCompareExchange_rel:
1745 case clang::ARM::BI_InterlockedCompareExchange64_rel:
1746 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
1747 return MSVCIntrin::_InterlockedCompareExchange_rel;
1748 case clang::ARM::BI_InterlockedCompareExchange8_nf:
1749 case clang::ARM::BI_InterlockedCompareExchange16_nf:
1750 case clang::ARM::BI_InterlockedCompareExchange_nf:
1751 case clang::ARM::BI_InterlockedCompareExchange64_nf:
1752 return MSVCIntrin::_InterlockedCompareExchange_nf;
1753 case clang::ARM::BI_InterlockedOr8_acq:
1754 case clang::ARM::BI_InterlockedOr16_acq:
1755 case clang::ARM::BI_InterlockedOr_acq:
1756 case clang::ARM::BI_InterlockedOr64_acq:
1757 return MSVCIntrin::_InterlockedOr_acq;
1758 case clang::ARM::BI_InterlockedOr8_rel:
1759 case clang::ARM::BI_InterlockedOr16_rel:
1760 case clang::ARM::BI_InterlockedOr_rel:
1761 case clang::ARM::BI_InterlockedOr64_rel:
1762 return MSVCIntrin::_InterlockedOr_rel;
1763 case clang::ARM::BI_InterlockedOr8_nf:
1764 case clang::ARM::BI_InterlockedOr16_nf:
1765 case clang::ARM::BI_InterlockedOr_nf:
1766 case clang::ARM::BI_InterlockedOr64_nf:
1767 return MSVCIntrin::_InterlockedOr_nf;
1768 case clang::ARM::BI_InterlockedXor8_acq:
1769 case clang::ARM::BI_InterlockedXor16_acq:
1770 case clang::ARM::BI_InterlockedXor_acq:
1771 case clang::ARM::BI_InterlockedXor64_acq:
1772 return MSVCIntrin::_InterlockedXor_acq;
1773 case clang::ARM::BI_InterlockedXor8_rel:
1774 case clang::ARM::BI_InterlockedXor16_rel:
1775 case clang::ARM::BI_InterlockedXor_rel:
1776 case clang::ARM::BI_InterlockedXor64_rel:
1777 return MSVCIntrin::_InterlockedXor_rel;
1778 case clang::ARM::BI_InterlockedXor8_nf:
1779 case clang::ARM::BI_InterlockedXor16_nf:
1780 case clang::ARM::BI_InterlockedXor_nf:
1781 case clang::ARM::BI_InterlockedXor64_nf:
1782 return MSVCIntrin::_InterlockedXor_nf;
1783 case clang::ARM::BI_InterlockedAnd8_acq:
1784 case clang::ARM::BI_InterlockedAnd16_acq:
1785 case clang::ARM::BI_InterlockedAnd_acq:
1786 case clang::ARM::BI_InterlockedAnd64_acq:
1787 return MSVCIntrin::_InterlockedAnd_acq;
1788 case clang::ARM::BI_InterlockedAnd8_rel:
1789 case clang::ARM::BI_InterlockedAnd16_rel:
1790 case clang::ARM::BI_InterlockedAnd_rel:
1791 case clang::ARM::BI_InterlockedAnd64_rel:
1792 return MSVCIntrin::_InterlockedAnd_rel;
1793 case clang::ARM::BI_InterlockedAnd8_nf:
1794 case clang::ARM::BI_InterlockedAnd16_nf:
1795 case clang::ARM::BI_InterlockedAnd_nf:
1796 case clang::ARM::BI_InterlockedAnd64_nf:
1797 return MSVCIntrin::_InterlockedAnd_nf;
1798 case clang::ARM::BI_InterlockedIncrement16_acq:
1799 case clang::ARM::BI_InterlockedIncrement_acq:
1800 case clang::ARM::BI_InterlockedIncrement64_acq:
1801 return MSVCIntrin::_InterlockedIncrement_acq;
1802 case clang::ARM::BI_InterlockedIncrement16_rel:
1803 case clang::ARM::BI_InterlockedIncrement_rel:
1804 case clang::ARM::BI_InterlockedIncrement64_rel:
1805 return MSVCIntrin::_InterlockedIncrement_rel;
1806 case clang::ARM::BI_InterlockedIncrement16_nf:
1807 case clang::ARM::BI_InterlockedIncrement_nf:
1808 case clang::ARM::BI_InterlockedIncrement64_nf:
1809 return MSVCIntrin::_InterlockedIncrement_nf;
1810 case clang::ARM::BI_InterlockedDecrement16_acq:
1811 case clang::ARM::BI_InterlockedDecrement_acq:
1812 case clang::ARM::BI_InterlockedDecrement64_acq:
1813 return MSVCIntrin::_InterlockedDecrement_acq;
1814 case clang::ARM::BI_InterlockedDecrement16_rel:
1815 case clang::ARM::BI_InterlockedDecrement_rel:
1816 case clang::ARM::BI_InterlockedDecrement64_rel:
1817 return MSVCIntrin::_InterlockedDecrement_rel;
1818 case clang::ARM::BI_InterlockedDecrement16_nf:
1819 case clang::ARM::BI_InterlockedDecrement_nf:
1820 case clang::ARM::BI_InterlockedDecrement64_nf:
1821 return MSVCIntrin::_InterlockedDecrement_nf;
1823 llvm_unreachable("must return from switch");
1826 static std::optional<CodeGenFunction::MSVCIntrin>
1827 translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
1828 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1829 switch (BuiltinID) {
1830 default:
1831 return std::nullopt;
1832 case clang::AArch64::BI_BitScanForward:
1833 case clang::AArch64::BI_BitScanForward64:
1834 return MSVCIntrin::_BitScanForward;
1835 case clang::AArch64::BI_BitScanReverse:
1836 case clang::AArch64::BI_BitScanReverse64:
1837 return MSVCIntrin::_BitScanReverse;
1838 case clang::AArch64::BI_InterlockedAnd64:
1839 return MSVCIntrin::_InterlockedAnd;
1840 case clang::AArch64::BI_InterlockedExchange64:
1841 return MSVCIntrin::_InterlockedExchange;
1842 case clang::AArch64::BI_InterlockedExchangeAdd64:
1843 return MSVCIntrin::_InterlockedExchangeAdd;
1844 case clang::AArch64::BI_InterlockedExchangeSub64:
1845 return MSVCIntrin::_InterlockedExchangeSub;
1846 case clang::AArch64::BI_InterlockedOr64:
1847 return MSVCIntrin::_InterlockedOr;
1848 case clang::AArch64::BI_InterlockedXor64:
1849 return MSVCIntrin::_InterlockedXor;
1850 case clang::AArch64::BI_InterlockedDecrement64:
1851 return MSVCIntrin::_InterlockedDecrement;
1852 case clang::AArch64::BI_InterlockedIncrement64:
1853 return MSVCIntrin::_InterlockedIncrement;
1854 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
1855 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
1856 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
1857 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
1858 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1859 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
1860 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
1861 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
1862 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
1863 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1864 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
1865 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
1866 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
1867 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
1868 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1869 case clang::AArch64::BI_InterlockedExchange8_acq:
1870 case clang::AArch64::BI_InterlockedExchange16_acq:
1871 case clang::AArch64::BI_InterlockedExchange_acq:
1872 case clang::AArch64::BI_InterlockedExchange64_acq:
1873 case clang::AArch64::BI_InterlockedExchangePointer_acq:
1874 return MSVCIntrin::_InterlockedExchange_acq;
1875 case clang::AArch64::BI_InterlockedExchange8_rel:
1876 case clang::AArch64::BI_InterlockedExchange16_rel:
1877 case clang::AArch64::BI_InterlockedExchange_rel:
1878 case clang::AArch64::BI_InterlockedExchange64_rel:
1879 case clang::AArch64::BI_InterlockedExchangePointer_rel:
1880 return MSVCIntrin::_InterlockedExchange_rel;
1881 case clang::AArch64::BI_InterlockedExchange8_nf:
1882 case clang::AArch64::BI_InterlockedExchange16_nf:
1883 case clang::AArch64::BI_InterlockedExchange_nf:
1884 case clang::AArch64::BI_InterlockedExchange64_nf:
1885 case clang::AArch64::BI_InterlockedExchangePointer_nf:
1886 return MSVCIntrin::_InterlockedExchange_nf;
1887 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
1888 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
1889 case clang::AArch64::BI_InterlockedCompareExchange_acq:
1890 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
1891 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
1892 return MSVCIntrin::_InterlockedCompareExchange_acq;
1893 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
1894 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
1895 case clang::AArch64::BI_InterlockedCompareExchange_rel:
1896 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
1897 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
1898 return MSVCIntrin::_InterlockedCompareExchange_rel;
1899 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
1900 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
1901 case clang::AArch64::BI_InterlockedCompareExchange_nf:
1902 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
1903 return MSVCIntrin::_InterlockedCompareExchange_nf;
1904 case clang::AArch64::BI_InterlockedCompareExchange128:
1905 return MSVCIntrin::_InterlockedCompareExchange128;
1906 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
1907 return MSVCIntrin::_InterlockedCompareExchange128_acq;
1908 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
1909 return MSVCIntrin::_InterlockedCompareExchange128_nf;
1910 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
1911 return MSVCIntrin::_InterlockedCompareExchange128_rel;
1912 case clang::AArch64::BI_InterlockedOr8_acq:
1913 case clang::AArch64::BI_InterlockedOr16_acq:
1914 case clang::AArch64::BI_InterlockedOr_acq:
1915 case clang::AArch64::BI_InterlockedOr64_acq:
1916 return MSVCIntrin::_InterlockedOr_acq;
1917 case clang::AArch64::BI_InterlockedOr8_rel:
1918 case clang::AArch64::BI_InterlockedOr16_rel:
1919 case clang::AArch64::BI_InterlockedOr_rel:
1920 case clang::AArch64::BI_InterlockedOr64_rel:
1921 return MSVCIntrin::_InterlockedOr_rel;
1922 case clang::AArch64::BI_InterlockedOr8_nf:
1923 case clang::AArch64::BI_InterlockedOr16_nf:
1924 case clang::AArch64::BI_InterlockedOr_nf:
1925 case clang::AArch64::BI_InterlockedOr64_nf:
1926 return MSVCIntrin::_InterlockedOr_nf;
1927 case clang::AArch64::BI_InterlockedXor8_acq:
1928 case clang::AArch64::BI_InterlockedXor16_acq:
1929 case clang::AArch64::BI_InterlockedXor_acq:
1930 case clang::AArch64::BI_InterlockedXor64_acq:
1931 return MSVCIntrin::_InterlockedXor_acq;
1932 case clang::AArch64::BI_InterlockedXor8_rel:
1933 case clang::AArch64::BI_InterlockedXor16_rel:
1934 case clang::AArch64::BI_InterlockedXor_rel:
1935 case clang::AArch64::BI_InterlockedXor64_rel:
1936 return MSVCIntrin::_InterlockedXor_rel;
1937 case clang::AArch64::BI_InterlockedXor8_nf:
1938 case clang::AArch64::BI_InterlockedXor16_nf:
1939 case clang::AArch64::BI_InterlockedXor_nf:
1940 case clang::AArch64::BI_InterlockedXor64_nf:
1941 return MSVCIntrin::_InterlockedXor_nf;
1942 case clang::AArch64::BI_InterlockedAnd8_acq:
1943 case clang::AArch64::BI_InterlockedAnd16_acq:
1944 case clang::AArch64::BI_InterlockedAnd_acq:
1945 case clang::AArch64::BI_InterlockedAnd64_acq:
1946 return MSVCIntrin::_InterlockedAnd_acq;
1947 case clang::AArch64::BI_InterlockedAnd8_rel:
1948 case clang::AArch64::BI_InterlockedAnd16_rel:
1949 case clang::AArch64::BI_InterlockedAnd_rel:
1950 case clang::AArch64::BI_InterlockedAnd64_rel:
1951 return MSVCIntrin::_InterlockedAnd_rel;
1952 case clang::AArch64::BI_InterlockedAnd8_nf:
1953 case clang::AArch64::BI_InterlockedAnd16_nf:
1954 case clang::AArch64::BI_InterlockedAnd_nf:
1955 case clang::AArch64::BI_InterlockedAnd64_nf:
1956 return MSVCIntrin::_InterlockedAnd_nf;
1957 case clang::AArch64::BI_InterlockedIncrement16_acq:
1958 case clang::AArch64::BI_InterlockedIncrement_acq:
1959 case clang::AArch64::BI_InterlockedIncrement64_acq:
1960 return MSVCIntrin::_InterlockedIncrement_acq;
1961 case clang::AArch64::BI_InterlockedIncrement16_rel:
1962 case clang::AArch64::BI_InterlockedIncrement_rel:
1963 case clang::AArch64::BI_InterlockedIncrement64_rel:
1964 return MSVCIntrin::_InterlockedIncrement_rel;
1965 case clang::AArch64::BI_InterlockedIncrement16_nf:
1966 case clang::AArch64::BI_InterlockedIncrement_nf:
1967 case clang::AArch64::BI_InterlockedIncrement64_nf:
1968 return MSVCIntrin::_InterlockedIncrement_nf;
1969 case clang::AArch64::BI_InterlockedDecrement16_acq:
1970 case clang::AArch64::BI_InterlockedDecrement_acq:
1971 case clang::AArch64::BI_InterlockedDecrement64_acq:
1972 return MSVCIntrin::_InterlockedDecrement_acq;
1973 case clang::AArch64::BI_InterlockedDecrement16_rel:
1974 case clang::AArch64::BI_InterlockedDecrement_rel:
1975 case clang::AArch64::BI_InterlockedDecrement64_rel:
1976 return MSVCIntrin::_InterlockedDecrement_rel;
1977 case clang::AArch64::BI_InterlockedDecrement16_nf:
1978 case clang::AArch64::BI_InterlockedDecrement_nf:
1979 case clang::AArch64::BI_InterlockedDecrement64_nf:
1980 return MSVCIntrin::_InterlockedDecrement_nf;
1982 llvm_unreachable("must return from switch");
1985 static std::optional<CodeGenFunction::MSVCIntrin>
1986 translateX86ToMsvcIntrin(unsigned BuiltinID) {
1987 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1988 switch (BuiltinID) {
1989 default:
1990 return std::nullopt;
1991 case clang::X86::BI_BitScanForward:
1992 case clang::X86::BI_BitScanForward64:
1993 return MSVCIntrin::_BitScanForward;
1994 case clang::X86::BI_BitScanReverse:
1995 case clang::X86::BI_BitScanReverse64:
1996 return MSVCIntrin::_BitScanReverse;
1997 case clang::X86::BI_InterlockedAnd64:
1998 return MSVCIntrin::_InterlockedAnd;
1999 case clang::X86::BI_InterlockedCompareExchange128:
2000 return MSVCIntrin::_InterlockedCompareExchange128;
2001 case clang::X86::BI_InterlockedExchange64:
2002 return MSVCIntrin::_InterlockedExchange;
2003 case clang::X86::BI_InterlockedExchangeAdd64:
2004 return MSVCIntrin::_InterlockedExchangeAdd;
2005 case clang::X86::BI_InterlockedExchangeSub64:
2006 return MSVCIntrin::_InterlockedExchangeSub;
2007 case clang::X86::BI_InterlockedOr64:
2008 return MSVCIntrin::_InterlockedOr;
2009 case clang::X86::BI_InterlockedXor64:
2010 return MSVCIntrin::_InterlockedXor;
2011 case clang::X86::BI_InterlockedDecrement64:
2012 return MSVCIntrin::_InterlockedDecrement;
2013 case clang::X86::BI_InterlockedIncrement64:
2014 return MSVCIntrin::_InterlockedIncrement;
2016 llvm_unreachable("must return from switch");
2019 // Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.
2020 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
2021 const CallExpr *E) {
2022 switch (BuiltinID) {
2023 case MSVCIntrin::_BitScanForward:
2024 case MSVCIntrin::_BitScanReverse: {
2025 Address IndexAddress(EmitPointerWithAlignment(E->getArg(0)));
2026 Value *ArgValue = EmitScalarExpr(E->getArg(1));
2028 llvm::Type *ArgType = ArgValue->getType();
2029 llvm::Type *IndexType = IndexAddress.getElementType();
2030 llvm::Type *ResultType = ConvertType(E->getType());
2032 Value *ArgZero = llvm::Constant::getNullValue(ArgType);
2033 Value *ResZero = llvm::Constant::getNullValue(ResultType);
2034 Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
2036 BasicBlock *Begin = Builder.GetInsertBlock();
2037 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
2038 Builder.SetInsertPoint(End);
2039 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
2041 Builder.SetInsertPoint(Begin);
2042 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
2043 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
2044 Builder.CreateCondBr(IsZero, End, NotZero);
2045 Result->addIncoming(ResZero, Begin);
2047 Builder.SetInsertPoint(NotZero);
2049 if (BuiltinID == MSVCIntrin::_BitScanForward) {
2050 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
2051 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
2052 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
2053 Builder.CreateStore(ZeroCount, IndexAddress, false);
2054 } else {
2055 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
2056 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
2058 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
2059 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
2060 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
2061 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
2062 Builder.CreateStore(Index, IndexAddress, false);
2064 Builder.CreateBr(End);
2065 Result->addIncoming(ResOne, NotZero);
2067 Builder.SetInsertPoint(End);
2068 return Result;
2070 case MSVCIntrin::_InterlockedAnd:
2071 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
2072 case MSVCIntrin::_InterlockedExchange:
2073 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
2074 case MSVCIntrin::_InterlockedExchangeAdd:
2075 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
2076 case MSVCIntrin::_InterlockedExchangeSub:
2077 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
2078 case MSVCIntrin::_InterlockedOr:
2079 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
2080 case MSVCIntrin::_InterlockedXor:
2081 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
2082 case MSVCIntrin::_InterlockedExchangeAdd_acq:
2083 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
2084 AtomicOrdering::Acquire);
2085 case MSVCIntrin::_InterlockedExchangeAdd_rel:
2086 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
2087 AtomicOrdering::Release);
2088 case MSVCIntrin::_InterlockedExchangeAdd_nf:
2089 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
2090 AtomicOrdering::Monotonic);
2091 case MSVCIntrin::_InterlockedExchange_acq:
2092 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
2093 AtomicOrdering::Acquire);
2094 case MSVCIntrin::_InterlockedExchange_rel:
2095 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
2096 AtomicOrdering::Release);
2097 case MSVCIntrin::_InterlockedExchange_nf:
2098 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
2099 AtomicOrdering::Monotonic);
2100 case MSVCIntrin::_InterlockedCompareExchange:
2101 return EmitAtomicCmpXchgForMSIntrin(*this, E);
2102 case MSVCIntrin::_InterlockedCompareExchange_acq:
2103 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
2104 case MSVCIntrin::_InterlockedCompareExchange_rel:
2105 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
2106 case MSVCIntrin::_InterlockedCompareExchange_nf:
2107 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
2108 case MSVCIntrin::_InterlockedCompareExchange128:
2109 return EmitAtomicCmpXchg128ForMSIntrin(
2110 *this, E, AtomicOrdering::SequentiallyConsistent);
2111 case MSVCIntrin::_InterlockedCompareExchange128_acq:
2112 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire);
2113 case MSVCIntrin::_InterlockedCompareExchange128_rel:
2114 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release);
2115 case MSVCIntrin::_InterlockedCompareExchange128_nf:
2116 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic);
2117 case MSVCIntrin::_InterlockedOr_acq:
2118 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
2119 AtomicOrdering::Acquire);
2120 case MSVCIntrin::_InterlockedOr_rel:
2121 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
2122 AtomicOrdering::Release);
2123 case MSVCIntrin::_InterlockedOr_nf:
2124 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
2125 AtomicOrdering::Monotonic);
2126 case MSVCIntrin::_InterlockedXor_acq:
2127 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2128 AtomicOrdering::Acquire);
2129 case MSVCIntrin::_InterlockedXor_rel:
2130 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2131 AtomicOrdering::Release);
2132 case MSVCIntrin::_InterlockedXor_nf:
2133 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2134 AtomicOrdering::Monotonic);
2135 case MSVCIntrin::_InterlockedAnd_acq:
2136 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2137 AtomicOrdering::Acquire);
2138 case MSVCIntrin::_InterlockedAnd_rel:
2139 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2140 AtomicOrdering::Release);
2141 case MSVCIntrin::_InterlockedAnd_nf:
2142 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2143 AtomicOrdering::Monotonic);
2144 case MSVCIntrin::_InterlockedIncrement_acq:
2145 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
2146 case MSVCIntrin::_InterlockedIncrement_rel:
2147 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
2148 case MSVCIntrin::_InterlockedIncrement_nf:
2149 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
2150 case MSVCIntrin::_InterlockedDecrement_acq:
2151 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
2152 case MSVCIntrin::_InterlockedDecrement_rel:
2153 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
2154 case MSVCIntrin::_InterlockedDecrement_nf:
2155 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
2157 case MSVCIntrin::_InterlockedDecrement:
2158 return EmitAtomicDecrementValue(*this, E);
2159 case MSVCIntrin::_InterlockedIncrement:
2160 return EmitAtomicIncrementValue(*this, E);
2162 case MSVCIntrin::__fastfail: {
2163 // Request immediate process termination from the kernel. The instruction
2164 // sequences to do this are documented on MSDN:
2165 // https://msdn.microsoft.com/en-us/library/dn774154.aspx
2166 llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
2167 StringRef Asm, Constraints;
2168 switch (ISA) {
2169 default:
2170 ErrorUnsupported(E, "__fastfail call for this architecture");
2171 break;
2172 case llvm::Triple::x86:
2173 case llvm::Triple::x86_64:
2174 Asm = "int $$0x29";
2175 Constraints = "{cx}";
2176 break;
2177 case llvm::Triple::thumb:
2178 Asm = "udf #251";
2179 Constraints = "{r0}";
2180 break;
2181 case llvm::Triple::aarch64:
2182 Asm = "brk #0xF003";
2183 Constraints = "{w0}";
2185 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
2186 llvm::InlineAsm *IA =
2187 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
2188 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
2189 getLLVMContext(), llvm::AttributeList::FunctionIndex,
2190 llvm::Attribute::NoReturn);
2191 llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
2192 CI->setAttributes(NoReturnAttr);
2193 return CI;
2196 llvm_unreachable("Incorrect MSVC intrinsic!");
2199 namespace {
2200 // ARC cleanup for __builtin_os_log_format
2201 struct CallObjCArcUse final : EHScopeStack::Cleanup {
2202 CallObjCArcUse(llvm::Value *object) : object(object) {}
2203 llvm::Value *object;
2205 void Emit(CodeGenFunction &CGF, Flags flags) override {
2206 CGF.EmitARCIntrinsicUse(object);
2211 Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E,
2212 BuiltinCheckKind Kind) {
2213 assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) &&
2214 "Unsupported builtin check kind");
2216 Value *ArgValue = EmitScalarExpr(E);
2217 if (!SanOpts.has(SanitizerKind::Builtin))
2218 return ArgValue;
2220 SanitizerScope SanScope(this);
2221 Value *Cond = Builder.CreateICmpNE(
2222 ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
2223 EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
2224 SanitizerHandler::InvalidBuiltin,
2225 {EmitCheckSourceLocation(E->getExprLoc()),
2226 llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
2227 {});
2228 return ArgValue;
2231 Value *CodeGenFunction::EmitCheckedArgForAssume(const Expr *E) {
2232 Value *ArgValue = EvaluateExprAsBool(E);
2233 if (!SanOpts.has(SanitizerKind::Builtin))
2234 return ArgValue;
2236 SanitizerScope SanScope(this);
2237 EmitCheck(
2238 std::make_pair(ArgValue, SanitizerKind::Builtin),
2239 SanitizerHandler::InvalidBuiltin,
2240 {EmitCheckSourceLocation(E->getExprLoc()),
2241 llvm::ConstantInt::get(Builder.getInt8Ty(), BCK_AssumePassedFalse)},
2242 std::nullopt);
2243 return ArgValue;
2246 static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) {
2247 return CGF.Builder.CreateBinaryIntrinsic(
2248 Intrinsic::abs, ArgValue,
2249 ConstantInt::get(CGF.Builder.getInt1Ty(), HasNSW));
2252 static Value *EmitOverflowCheckedAbs(CodeGenFunction &CGF, const CallExpr *E,
2253 bool SanitizeOverflow) {
2254 Value *ArgValue = CGF.EmitScalarExpr(E->getArg(0));
2256 // Try to eliminate overflow check.
2257 if (const auto *VCI = dyn_cast<llvm::ConstantInt>(ArgValue)) {
2258 if (!VCI->isMinSignedValue())
2259 return EmitAbs(CGF, ArgValue, true);
2262 CodeGenFunction::SanitizerScope SanScope(&CGF);
2264 Constant *Zero = Constant::getNullValue(ArgValue->getType());
2265 Value *ResultAndOverflow = CGF.Builder.CreateBinaryIntrinsic(
2266 Intrinsic::ssub_with_overflow, Zero, ArgValue);
2267 Value *Result = CGF.Builder.CreateExtractValue(ResultAndOverflow, 0);
2268 Value *NotOverflow = CGF.Builder.CreateNot(
2269 CGF.Builder.CreateExtractValue(ResultAndOverflow, 1));
2271 // TODO: support -ftrapv-handler.
2272 if (SanitizeOverflow) {
2273 CGF.EmitCheck({{NotOverflow, SanitizerKind::SignedIntegerOverflow}},
2274 SanitizerHandler::NegateOverflow,
2275 {CGF.EmitCheckSourceLocation(E->getArg(0)->getExprLoc()),
2276 CGF.EmitCheckTypeDescriptor(E->getType())},
2277 {ArgValue});
2278 } else
2279 CGF.EmitTrapCheck(NotOverflow, SanitizerHandler::SubOverflow);
2281 Value *CmpResult = CGF.Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
2282 return CGF.Builder.CreateSelect(CmpResult, Result, ArgValue, "abs");
2285 /// Get the argument type for arguments to os_log_helper.
2286 static CanQualType getOSLogArgType(ASTContext &C, int Size) {
2287 QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
2288 return C.getCanonicalType(UnsignedTy);
2291 llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
2292 const analyze_os_log::OSLogBufferLayout &Layout,
2293 CharUnits BufferAlignment) {
2294 ASTContext &Ctx = getContext();
2296 llvm::SmallString<64> Name;
2298 raw_svector_ostream OS(Name);
2299 OS << "__os_log_helper";
2300 OS << "_" << BufferAlignment.getQuantity();
2301 OS << "_" << int(Layout.getSummaryByte());
2302 OS << "_" << int(Layout.getNumArgsByte());
2303 for (const auto &Item : Layout.Items)
2304 OS << "_" << int(Item.getSizeByte()) << "_"
2305 << int(Item.getDescriptorByte());
2308 if (llvm::Function *F = CGM.getModule().getFunction(Name))
2309 return F;
2311 llvm::SmallVector<QualType, 4> ArgTys;
2312 FunctionArgList Args;
2313 Args.push_back(ImplicitParamDecl::Create(
2314 Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy,
2315 ImplicitParamKind::Other));
2316 ArgTys.emplace_back(Ctx.VoidPtrTy);
2318 for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
2319 char Size = Layout.Items[I].getSizeByte();
2320 if (!Size)
2321 continue;
2323 QualType ArgTy = getOSLogArgType(Ctx, Size);
2324 Args.push_back(ImplicitParamDecl::Create(
2325 Ctx, nullptr, SourceLocation(),
2326 &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
2327 ImplicitParamKind::Other));
2328 ArgTys.emplace_back(ArgTy);
2331 QualType ReturnTy = Ctx.VoidTy;
2333 // The helper function has linkonce_odr linkage to enable the linker to merge
2334 // identical functions. To ensure the merging always happens, 'noinline' is
2335 // attached to the function when compiling with -Oz.
2336 const CGFunctionInfo &FI =
2337 CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, Args);
2338 llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
2339 llvm::Function *Fn = llvm::Function::Create(
2340 FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
2341 Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
2342 CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn, /*IsThunk=*/false);
2343 CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
2344 Fn->setDoesNotThrow();
2346 // Attach 'noinline' at -Oz.
2347 if (CGM.getCodeGenOpts().OptimizeSize == 2)
2348 Fn->addFnAttr(llvm::Attribute::NoInline);
2350 auto NL = ApplyDebugLocation::CreateEmpty(*this);
2351 StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args);
2353 // Create a scope with an artificial location for the body of this function.
2354 auto AL = ApplyDebugLocation::CreateArtificial(*this);
2356 CharUnits Offset;
2357 Address BufAddr = makeNaturalAddressForPointer(
2358 Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), Ctx.VoidTy,
2359 BufferAlignment);
2360 Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
2361 Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2362 Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
2363 Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2365 unsigned I = 1;
2366 for (const auto &Item : Layout.Items) {
2367 Builder.CreateStore(
2368 Builder.getInt8(Item.getDescriptorByte()),
2369 Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2370 Builder.CreateStore(
2371 Builder.getInt8(Item.getSizeByte()),
2372 Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2374 CharUnits Size = Item.size();
2375 if (!Size.getQuantity())
2376 continue;
2378 Address Arg = GetAddrOfLocalVar(Args[I]);
2379 Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
2380 Addr = Addr.withElementType(Arg.getElementType());
2381 Builder.CreateStore(Builder.CreateLoad(Arg), Addr);
2382 Offset += Size;
2383 ++I;
2386 FinishFunction();
2388 return Fn;
2391 RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) {
2392 assert(E.getNumArgs() >= 2 &&
2393 "__builtin_os_log_format takes at least 2 arguments");
2394 ASTContext &Ctx = getContext();
2395 analyze_os_log::OSLogBufferLayout Layout;
2396 analyze_os_log::computeOSLogBufferLayout(Ctx, &E, Layout);
2397 Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
2398 llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2400 // Ignore argument 1, the format string. It is not currently used.
2401 CallArgList Args;
2402 Args.add(RValue::get(BufAddr.emitRawPointer(*this)), Ctx.VoidPtrTy);
2404 for (const auto &Item : Layout.Items) {
2405 int Size = Item.getSizeByte();
2406 if (!Size)
2407 continue;
2409 llvm::Value *ArgVal;
2411 if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
2412 uint64_t Val = 0;
2413 for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
2414 Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
2415 ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
2416 } else if (const Expr *TheExpr = Item.getExpr()) {
2417 ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2419 // If a temporary object that requires destruction after the full
2420 // expression is passed, push a lifetime-extended cleanup to extend its
2421 // lifetime to the end of the enclosing block scope.
2422 auto LifetimeExtendObject = [&](const Expr *E) {
2423 E = E->IgnoreParenCasts();
2424 // Extend lifetimes of objects returned by function calls and message
2425 // sends.
2427 // FIXME: We should do this in other cases in which temporaries are
2428 // created including arguments of non-ARC types (e.g., C++
2429 // temporaries).
2430 if (isa<CallExpr>(E) || isa<ObjCMessageExpr>(E))
2431 return true;
2432 return false;
2435 if (TheExpr->getType()->isObjCRetainableType() &&
2436 getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) {
2437 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2438 "Only scalar can be a ObjC retainable type");
2439 if (!isa<Constant>(ArgVal)) {
2440 CleanupKind Cleanup = getARCCleanupKind();
2441 QualType Ty = TheExpr->getType();
2442 RawAddress Alloca = RawAddress::invalid();
2443 RawAddress Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca);
2444 ArgVal = EmitARCRetain(Ty, ArgVal);
2445 Builder.CreateStore(ArgVal, Addr);
2446 pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty,
2447 CodeGenFunction::destroyARCStrongPrecise,
2448 Cleanup & EHCleanup);
2450 // Push a clang.arc.use call to ensure ARC optimizer knows that the
2451 // argument has to be alive.
2452 if (CGM.getCodeGenOpts().OptimizationLevel != 0)
2453 pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal);
2456 } else {
2457 ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
2460 unsigned ArgValSize =
2461 CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
2462 llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
2463 ArgValSize);
2464 ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
2465 CanQualType ArgTy = getOSLogArgType(Ctx, Size);
2466 // If ArgVal has type x86_fp80, zero-extend ArgVal.
2467 ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
2468 Args.add(RValue::get(ArgVal), ArgTy);
2471 const CGFunctionInfo &FI =
2472 CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args);
2473 llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction(
2474 Layout, BufAddr.getAlignment());
2475 EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args);
2476 return RValue::get(BufAddr, *this);
2479 static bool isSpecialUnsignedMultiplySignedResult(
2480 unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info,
2481 WidthAndSignedness ResultInfo) {
2482 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2483 Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width &&
2484 !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed;
2487 static RValue EmitCheckedUnsignedMultiplySignedResult(
2488 CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info,
2489 const clang::Expr *Op2, WidthAndSignedness Op2Info,
2490 const clang::Expr *ResultArg, QualType ResultQTy,
2491 WidthAndSignedness ResultInfo) {
2492 assert(isSpecialUnsignedMultiplySignedResult(
2493 Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) &&
2494 "Cannot specialize this multiply");
2496 llvm::Value *V1 = CGF.EmitScalarExpr(Op1);
2497 llvm::Value *V2 = CGF.EmitScalarExpr(Op2);
2499 llvm::Value *HasOverflow;
2500 llvm::Value *Result = EmitOverflowIntrinsic(
2501 CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow);
2503 // The intrinsic call will detect overflow when the value is > UINT_MAX,
2504 // however, since the original builtin had a signed result, we need to report
2505 // an overflow when the result is greater than INT_MAX.
2506 auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width);
2507 llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax);
2509 llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue);
2510 HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow);
2512 bool isVolatile =
2513 ResultArg->getType()->getPointeeType().isVolatileQualified();
2514 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2515 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2516 isVolatile);
2517 return RValue::get(HasOverflow);
2520 /// Determine if a binop is a checked mixed-sign multiply we can specialize.
2521 static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
2522 WidthAndSignedness Op1Info,
2523 WidthAndSignedness Op2Info,
2524 WidthAndSignedness ResultInfo) {
2525 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2526 std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
2527 Op1Info.Signed != Op2Info.Signed;
2530 /// Emit a checked mixed-sign multiply. This is a cheaper specialization of
2531 /// the generic checked-binop irgen.
2532 static RValue
2533 EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,
2534 WidthAndSignedness Op1Info, const clang::Expr *Op2,
2535 WidthAndSignedness Op2Info,
2536 const clang::Expr *ResultArg, QualType ResultQTy,
2537 WidthAndSignedness ResultInfo) {
2538 assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
2539 Op2Info, ResultInfo) &&
2540 "Not a mixed-sign multipliction we can specialize");
2542 // Emit the signed and unsigned operands.
2543 const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
2544 const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
2545 llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
2546 llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
2547 unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
2548 unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
2550 // One of the operands may be smaller than the other. If so, [s|z]ext it.
2551 if (SignedOpWidth < UnsignedOpWidth)
2552 Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
2553 if (UnsignedOpWidth < SignedOpWidth)
2554 Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
2556 llvm::Type *OpTy = Signed->getType();
2557 llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
2558 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2559 llvm::Type *ResTy = ResultPtr.getElementType();
2560 unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
2562 // Take the absolute value of the signed operand.
2563 llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
2564 llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
2565 llvm::Value *AbsSigned =
2566 CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
2568 // Perform a checked unsigned multiplication.
2569 llvm::Value *UnsignedOverflow;
2570 llvm::Value *UnsignedResult =
2571 EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
2572 Unsigned, UnsignedOverflow);
2574 llvm::Value *Overflow, *Result;
2575 if (ResultInfo.Signed) {
2576 // Signed overflow occurs if the result is greater than INT_MAX or lesser
2577 // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
2578 auto IntMax =
2579 llvm::APInt::getSignedMaxValue(ResultInfo.Width).zext(OpWidth);
2580 llvm::Value *MaxResult =
2581 CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
2582 CGF.Builder.CreateZExt(IsNegative, OpTy));
2583 llvm::Value *SignedOverflow =
2584 CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
2585 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
2587 // Prepare the signed result (possibly by negating it).
2588 llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
2589 llvm::Value *SignedResult =
2590 CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
2591 Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
2592 } else {
2593 // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
2594 llvm::Value *Underflow = CGF.Builder.CreateAnd(
2595 IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
2596 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
2597 if (ResultInfo.Width < OpWidth) {
2598 auto IntMax =
2599 llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
2600 llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
2601 UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
2602 Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
2605 // Negate the product if it would be negative in infinite precision.
2606 Result = CGF.Builder.CreateSelect(
2607 IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
2609 Result = CGF.Builder.CreateTrunc(Result, ResTy);
2611 assert(Overflow && Result && "Missing overflow or result");
2613 bool isVolatile =
2614 ResultArg->getType()->getPointeeType().isVolatileQualified();
2615 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2616 isVolatile);
2617 return RValue::get(Overflow);
2620 static bool
2621 TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty,
2622 llvm::SmallPtrSetImpl<const Decl *> &Seen) {
2623 if (const auto *Arr = Ctx.getAsArrayType(Ty))
2624 Ty = Ctx.getBaseElementType(Arr);
2626 const auto *Record = Ty->getAsCXXRecordDecl();
2627 if (!Record)
2628 return false;
2630 // We've already checked this type, or are in the process of checking it.
2631 if (!Seen.insert(Record).second)
2632 return false;
2634 assert(Record->hasDefinition() &&
2635 "Incomplete types should already be diagnosed");
2637 if (Record->isDynamicClass())
2638 return true;
2640 for (FieldDecl *F : Record->fields()) {
2641 if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
2642 return true;
2644 return false;
2647 /// Determine if the specified type requires laundering by checking if it is a
2648 /// dynamic class type or contains a subobject which is a dynamic class type.
2649 static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty) {
2650 if (!CGM.getCodeGenOpts().StrictVTablePointers)
2651 return false;
2652 llvm::SmallPtrSet<const Decl *, 16> Seen;
2653 return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
2656 RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
2657 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
2658 llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
2660 // The builtin's shift arg may have a different type than the source arg and
2661 // result, but the LLVM intrinsic uses the same type for all values.
2662 llvm::Type *Ty = Src->getType();
2663 ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
2665 // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
2666 unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2667 Function *F = CGM.getIntrinsic(IID, Ty);
2668 return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
2671 // Map math builtins for long-double to f128 version.
2672 static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {
2673 switch (BuiltinID) {
2674 #define MUTATE_LDBL(func) \
2675 case Builtin::BI__builtin_##func##l: \
2676 return Builtin::BI__builtin_##func##f128;
2677 MUTATE_LDBL(sqrt)
2678 MUTATE_LDBL(cbrt)
2679 MUTATE_LDBL(fabs)
2680 MUTATE_LDBL(log)
2681 MUTATE_LDBL(log2)
2682 MUTATE_LDBL(log10)
2683 MUTATE_LDBL(log1p)
2684 MUTATE_LDBL(logb)
2685 MUTATE_LDBL(exp)
2686 MUTATE_LDBL(exp2)
2687 MUTATE_LDBL(expm1)
2688 MUTATE_LDBL(fdim)
2689 MUTATE_LDBL(hypot)
2690 MUTATE_LDBL(ilogb)
2691 MUTATE_LDBL(pow)
2692 MUTATE_LDBL(fmin)
2693 MUTATE_LDBL(fmax)
2694 MUTATE_LDBL(ceil)
2695 MUTATE_LDBL(trunc)
2696 MUTATE_LDBL(rint)
2697 MUTATE_LDBL(nearbyint)
2698 MUTATE_LDBL(round)
2699 MUTATE_LDBL(floor)
2700 MUTATE_LDBL(lround)
2701 MUTATE_LDBL(llround)
2702 MUTATE_LDBL(lrint)
2703 MUTATE_LDBL(llrint)
2704 MUTATE_LDBL(fmod)
2705 MUTATE_LDBL(modf)
2706 MUTATE_LDBL(nan)
2707 MUTATE_LDBL(nans)
2708 MUTATE_LDBL(inf)
2709 MUTATE_LDBL(fma)
2710 MUTATE_LDBL(sin)
2711 MUTATE_LDBL(cos)
2712 MUTATE_LDBL(tan)
2713 MUTATE_LDBL(sinh)
2714 MUTATE_LDBL(cosh)
2715 MUTATE_LDBL(tanh)
2716 MUTATE_LDBL(asin)
2717 MUTATE_LDBL(acos)
2718 MUTATE_LDBL(atan)
2719 MUTATE_LDBL(asinh)
2720 MUTATE_LDBL(acosh)
2721 MUTATE_LDBL(atanh)
2722 MUTATE_LDBL(atan2)
2723 MUTATE_LDBL(erf)
2724 MUTATE_LDBL(erfc)
2725 MUTATE_LDBL(ldexp)
2726 MUTATE_LDBL(frexp)
2727 MUTATE_LDBL(huge_val)
2728 MUTATE_LDBL(copysign)
2729 MUTATE_LDBL(nextafter)
2730 MUTATE_LDBL(nexttoward)
2731 MUTATE_LDBL(remainder)
2732 MUTATE_LDBL(remquo)
2733 MUTATE_LDBL(scalbln)
2734 MUTATE_LDBL(scalbn)
2735 MUTATE_LDBL(tgamma)
2736 MUTATE_LDBL(lgamma)
2737 #undef MUTATE_LDBL
2738 default:
2739 return BuiltinID;
2743 static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,
2744 Value *V) {
2745 if (CGF.Builder.getIsFPConstrained() &&
2746 CGF.Builder.getDefaultConstrainedExcept() != fp::ebIgnore) {
2747 if (Value *Result =
2748 CGF.getTargetHooks().testFPKind(V, BuiltinID, CGF.Builder, CGF.CGM))
2749 return Result;
2751 return nullptr;
2754 static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
2755 const FunctionDecl *FD) {
2756 auto Name = FD->getNameAsString() + "__hipstdpar_unsupported";
2757 auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD);
2758 auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
2760 SmallVector<Value *, 16> Args;
2761 for (auto &&FormalTy : FnTy->params())
2762 Args.push_back(llvm::PoisonValue::get(FormalTy));
2764 return RValue::get(CGF->Builder.CreateCall(UBF, Args));
2767 RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
2768 const CallExpr *E,
2769 ReturnValueSlot ReturnValue) {
2770 assert(!getContext().BuiltinInfo.isImmediate(BuiltinID) &&
2771 "Should not codegen for consteval builtins");
2773 const FunctionDecl *FD = GD.getDecl()->getAsFunction();
2774 // See if we can constant fold this builtin. If so, don't emit it at all.
2775 // TODO: Extend this handling to all builtin calls that we can constant-fold.
2776 Expr::EvalResult Result;
2777 if (E->isPRValue() && E->EvaluateAsRValue(Result, CGM.getContext()) &&
2778 !Result.hasSideEffects()) {
2779 if (Result.Val.isInt())
2780 return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
2781 Result.Val.getInt()));
2782 if (Result.Val.isFloat())
2783 return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
2784 Result.Val.getFloat()));
2787 // If current long-double semantics is IEEE 128-bit, replace math builtins
2788 // of long-double with f128 equivalent.
2789 // TODO: This mutation should also be applied to other targets other than PPC,
2790 // after backend supports IEEE 128-bit style libcalls.
2791 if (getTarget().getTriple().isPPC64() &&
2792 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
2793 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
2795 // If the builtin has been declared explicitly with an assembler label,
2796 // disable the specialized emitting below. Ideally we should communicate the
2797 // rename in IR, or at least avoid generating the intrinsic calls that are
2798 // likely to get lowered to the renamed library functions.
2799 const unsigned BuiltinIDIfNoAsmLabel =
2800 FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
2802 std::optional<bool> ErrnoOverriden;
2803 // ErrnoOverriden is true if math-errno is overriden via the
2804 // '#pragma float_control(precise, on)'. This pragma disables fast-math,
2805 // which implies math-errno.
2806 if (E->hasStoredFPFeatures()) {
2807 FPOptionsOverride OP = E->getFPFeatures();
2808 if (OP.hasMathErrnoOverride())
2809 ErrnoOverriden = OP.getMathErrnoOverride();
2811 // True if 'attribute__((optnone))' is used. This attribute overrides
2812 // fast-math which implies math-errno.
2813 bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>();
2815 // True if we are compiling at -O2 and errno has been disabled
2816 // using the '#pragma float_control(precise, off)', and
2817 // attribute opt-none hasn't been seen.
2818 bool ErrnoOverridenToFalseWithOpt =
2819 ErrnoOverriden.has_value() && !ErrnoOverriden.value() && !OptNone &&
2820 CGM.getCodeGenOpts().OptimizationLevel != 0;
2822 // There are LLVM math intrinsics/instructions corresponding to math library
2823 // functions except the LLVM op will never set errno while the math library
2824 // might. Also, math builtins have the same semantics as their math library
2825 // twins. Thus, we can transform math library and builtin calls to their
2826 // LLVM counterparts if the call is marked 'const' (known to never set errno).
2827 // In case FP exceptions are enabled, the experimental versions of the
2828 // intrinsics model those.
2829 bool ConstAlways =
2830 getContext().BuiltinInfo.isConst(BuiltinID);
2832 // There's a special case with the fma builtins where they are always const
2833 // if the target environment is GNU or the target is OS is Windows and we're
2834 // targeting the MSVCRT.dll environment.
2835 // FIXME: This list can be become outdated. Need to find a way to get it some
2836 // other way.
2837 switch (BuiltinID) {
2838 case Builtin::BI__builtin_fma:
2839 case Builtin::BI__builtin_fmaf:
2840 case Builtin::BI__builtin_fmal:
2841 case Builtin::BI__builtin_fmaf16:
2842 case Builtin::BIfma:
2843 case Builtin::BIfmaf:
2844 case Builtin::BIfmal: {
2845 auto &Trip = CGM.getTriple();
2846 if (Trip.isGNUEnvironment() || Trip.isOSMSVCRT())
2847 ConstAlways = true;
2848 break;
2850 default:
2851 break;
2854 bool ConstWithoutErrnoAndExceptions =
2855 getContext().BuiltinInfo.isConstWithoutErrnoAndExceptions(BuiltinID);
2856 bool ConstWithoutExceptions =
2857 getContext().BuiltinInfo.isConstWithoutExceptions(BuiltinID);
2859 // ConstAttr is enabled in fast-math mode. In fast-math mode, math-errno is
2860 // disabled.
2861 // Math intrinsics are generated only when math-errno is disabled. Any pragmas
2862 // or attributes that affect math-errno should prevent or allow math
2863 // intrincs to be generated. Intrinsics are generated:
2864 // 1- In fast math mode, unless math-errno is overriden
2865 // via '#pragma float_control(precise, on)', or via an
2866 // 'attribute__((optnone))'.
2867 // 2- If math-errno was enabled on command line but overriden
2868 // to false via '#pragma float_control(precise, off))' and
2869 // 'attribute__((optnone))' hasn't been used.
2870 // 3- If we are compiling with optimization and errno has been disabled
2871 // via '#pragma float_control(precise, off)', and
2872 // 'attribute__((optnone))' hasn't been used.
2874 bool ConstWithoutErrnoOrExceptions =
2875 ConstWithoutErrnoAndExceptions || ConstWithoutExceptions;
2876 bool GenerateIntrinsics =
2877 (ConstAlways && !OptNone) ||
2878 (!getLangOpts().MathErrno &&
2879 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2880 if (!GenerateIntrinsics) {
2881 GenerateIntrinsics =
2882 ConstWithoutErrnoOrExceptions && !ConstWithoutErrnoAndExceptions;
2883 if (!GenerateIntrinsics)
2884 GenerateIntrinsics =
2885 ConstWithoutErrnoOrExceptions &&
2886 (!getLangOpts().MathErrno &&
2887 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2888 if (!GenerateIntrinsics)
2889 GenerateIntrinsics =
2890 ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
2892 if (GenerateIntrinsics) {
2893 switch (BuiltinIDIfNoAsmLabel) {
2894 case Builtin::BIacos:
2895 case Builtin::BIacosf:
2896 case Builtin::BIacosl:
2897 case Builtin::BI__builtin_acos:
2898 case Builtin::BI__builtin_acosf:
2899 case Builtin::BI__builtin_acosf16:
2900 case Builtin::BI__builtin_acosl:
2901 case Builtin::BI__builtin_acosf128:
2902 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
2903 *this, E, Intrinsic::acos, Intrinsic::experimental_constrained_acos));
2905 case Builtin::BIasin:
2906 case Builtin::BIasinf:
2907 case Builtin::BIasinl:
2908 case Builtin::BI__builtin_asin:
2909 case Builtin::BI__builtin_asinf:
2910 case Builtin::BI__builtin_asinf16:
2911 case Builtin::BI__builtin_asinl:
2912 case Builtin::BI__builtin_asinf128:
2913 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
2914 *this, E, Intrinsic::asin, Intrinsic::experimental_constrained_asin));
2916 case Builtin::BIatan:
2917 case Builtin::BIatanf:
2918 case Builtin::BIatanl:
2919 case Builtin::BI__builtin_atan:
2920 case Builtin::BI__builtin_atanf:
2921 case Builtin::BI__builtin_atanf16:
2922 case Builtin::BI__builtin_atanl:
2923 case Builtin::BI__builtin_atanf128:
2924 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
2925 *this, E, Intrinsic::atan, Intrinsic::experimental_constrained_atan));
2927 case Builtin::BIatan2:
2928 case Builtin::BIatan2f:
2929 case Builtin::BIatan2l:
2930 case Builtin::BI__builtin_atan2:
2931 case Builtin::BI__builtin_atan2f:
2932 case Builtin::BI__builtin_atan2f16:
2933 case Builtin::BI__builtin_atan2l:
2934 case Builtin::BI__builtin_atan2f128:
2935 return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(
2936 *this, E, Intrinsic::atan2,
2937 Intrinsic::experimental_constrained_atan2));
2939 case Builtin::BIceil:
2940 case Builtin::BIceilf:
2941 case Builtin::BIceill:
2942 case Builtin::BI__builtin_ceil:
2943 case Builtin::BI__builtin_ceilf:
2944 case Builtin::BI__builtin_ceilf16:
2945 case Builtin::BI__builtin_ceill:
2946 case Builtin::BI__builtin_ceilf128:
2947 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2948 Intrinsic::ceil,
2949 Intrinsic::experimental_constrained_ceil));
2951 case Builtin::BIcopysign:
2952 case Builtin::BIcopysignf:
2953 case Builtin::BIcopysignl:
2954 case Builtin::BI__builtin_copysign:
2955 case Builtin::BI__builtin_copysignf:
2956 case Builtin::BI__builtin_copysignf16:
2957 case Builtin::BI__builtin_copysignl:
2958 case Builtin::BI__builtin_copysignf128:
2959 return RValue::get(
2960 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::copysign));
2962 case Builtin::BIcos:
2963 case Builtin::BIcosf:
2964 case Builtin::BIcosl:
2965 case Builtin::BI__builtin_cos:
2966 case Builtin::BI__builtin_cosf:
2967 case Builtin::BI__builtin_cosf16:
2968 case Builtin::BI__builtin_cosl:
2969 case Builtin::BI__builtin_cosf128:
2970 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2971 Intrinsic::cos,
2972 Intrinsic::experimental_constrained_cos));
2974 case Builtin::BIcosh:
2975 case Builtin::BIcoshf:
2976 case Builtin::BIcoshl:
2977 case Builtin::BI__builtin_cosh:
2978 case Builtin::BI__builtin_coshf:
2979 case Builtin::BI__builtin_coshf16:
2980 case Builtin::BI__builtin_coshl:
2981 case Builtin::BI__builtin_coshf128:
2982 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
2983 *this, E, Intrinsic::cosh, Intrinsic::experimental_constrained_cosh));
2985 case Builtin::BIexp:
2986 case Builtin::BIexpf:
2987 case Builtin::BIexpl:
2988 case Builtin::BI__builtin_exp:
2989 case Builtin::BI__builtin_expf:
2990 case Builtin::BI__builtin_expf16:
2991 case Builtin::BI__builtin_expl:
2992 case Builtin::BI__builtin_expf128:
2993 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2994 Intrinsic::exp,
2995 Intrinsic::experimental_constrained_exp));
2997 case Builtin::BIexp2:
2998 case Builtin::BIexp2f:
2999 case Builtin::BIexp2l:
3000 case Builtin::BI__builtin_exp2:
3001 case Builtin::BI__builtin_exp2f:
3002 case Builtin::BI__builtin_exp2f16:
3003 case Builtin::BI__builtin_exp2l:
3004 case Builtin::BI__builtin_exp2f128:
3005 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
3006 Intrinsic::exp2,
3007 Intrinsic::experimental_constrained_exp2));
3008 case Builtin::BI__builtin_exp10:
3009 case Builtin::BI__builtin_exp10f:
3010 case Builtin::BI__builtin_exp10f16:
3011 case Builtin::BI__builtin_exp10l:
3012 case Builtin::BI__builtin_exp10f128: {
3013 // TODO: strictfp support
3014 if (Builder.getIsFPConstrained())
3015 break;
3016 return RValue::get(
3017 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::exp10));
3019 case Builtin::BIfabs:
3020 case Builtin::BIfabsf:
3021 case Builtin::BIfabsl:
3022 case Builtin::BI__builtin_fabs:
3023 case Builtin::BI__builtin_fabsf:
3024 case Builtin::BI__builtin_fabsf16:
3025 case Builtin::BI__builtin_fabsl:
3026 case Builtin::BI__builtin_fabsf128:
3027 return RValue::get(
3028 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs));
3030 case Builtin::BIfloor:
3031 case Builtin::BIfloorf:
3032 case Builtin::BIfloorl:
3033 case Builtin::BI__builtin_floor:
3034 case Builtin::BI__builtin_floorf:
3035 case Builtin::BI__builtin_floorf16:
3036 case Builtin::BI__builtin_floorl:
3037 case Builtin::BI__builtin_floorf128:
3038 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
3039 Intrinsic::floor,
3040 Intrinsic::experimental_constrained_floor));
3042 case Builtin::BIfma:
3043 case Builtin::BIfmaf:
3044 case Builtin::BIfmal:
3045 case Builtin::BI__builtin_fma:
3046 case Builtin::BI__builtin_fmaf:
3047 case Builtin::BI__builtin_fmaf16:
3048 case Builtin::BI__builtin_fmal:
3049 case Builtin::BI__builtin_fmaf128:
3050 return RValue::get(emitTernaryMaybeConstrainedFPBuiltin(*this, E,
3051 Intrinsic::fma,
3052 Intrinsic::experimental_constrained_fma));
3054 case Builtin::BIfmax:
3055 case Builtin::BIfmaxf:
3056 case Builtin::BIfmaxl:
3057 case Builtin::BI__builtin_fmax:
3058 case Builtin::BI__builtin_fmaxf:
3059 case Builtin::BI__builtin_fmaxf16:
3060 case Builtin::BI__builtin_fmaxl:
3061 case Builtin::BI__builtin_fmaxf128:
3062 return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
3063 Intrinsic::maxnum,
3064 Intrinsic::experimental_constrained_maxnum));
3066 case Builtin::BIfmin:
3067 case Builtin::BIfminf:
3068 case Builtin::BIfminl:
3069 case Builtin::BI__builtin_fmin:
3070 case Builtin::BI__builtin_fminf:
3071 case Builtin::BI__builtin_fminf16:
3072 case Builtin::BI__builtin_fminl:
3073 case Builtin::BI__builtin_fminf128:
3074 return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
3075 Intrinsic::minnum,
3076 Intrinsic::experimental_constrained_minnum));
3078 case Builtin::BIfmaximum_num:
3079 case Builtin::BIfmaximum_numf:
3080 case Builtin::BIfmaximum_numl:
3081 case Builtin::BI__builtin_fmaximum_num:
3082 case Builtin::BI__builtin_fmaximum_numf:
3083 case Builtin::BI__builtin_fmaximum_numf16:
3084 case Builtin::BI__builtin_fmaximum_numl:
3085 case Builtin::BI__builtin_fmaximum_numf128:
3086 return RValue::get(
3087 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::maximumnum));
3089 case Builtin::BIfminimum_num:
3090 case Builtin::BIfminimum_numf:
3091 case Builtin::BIfminimum_numl:
3092 case Builtin::BI__builtin_fminimum_num:
3093 case Builtin::BI__builtin_fminimum_numf:
3094 case Builtin::BI__builtin_fminimum_numf16:
3095 case Builtin::BI__builtin_fminimum_numl:
3096 case Builtin::BI__builtin_fminimum_numf128:
3097 return RValue::get(
3098 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::minimumnum));
3100 // fmod() is a special-case. It maps to the frem instruction rather than an
3101 // LLVM intrinsic.
3102 case Builtin::BIfmod:
3103 case Builtin::BIfmodf:
3104 case Builtin::BIfmodl:
3105 case Builtin::BI__builtin_fmod:
3106 case Builtin::BI__builtin_fmodf:
3107 case Builtin::BI__builtin_fmodf16:
3108 case Builtin::BI__builtin_fmodl:
3109 case Builtin::BI__builtin_fmodf128:
3110 case Builtin::BI__builtin_elementwise_fmod: {
3111 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3112 Value *Arg1 = EmitScalarExpr(E->getArg(0));
3113 Value *Arg2 = EmitScalarExpr(E->getArg(1));
3114 return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
3117 case Builtin::BIlog:
3118 case Builtin::BIlogf:
3119 case Builtin::BIlogl:
3120 case Builtin::BI__builtin_log:
3121 case Builtin::BI__builtin_logf:
3122 case Builtin::BI__builtin_logf16:
3123 case Builtin::BI__builtin_logl:
3124 case Builtin::BI__builtin_logf128:
3125 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
3126 Intrinsic::log,
3127 Intrinsic::experimental_constrained_log));
3129 case Builtin::BIlog10:
3130 case Builtin::BIlog10f:
3131 case Builtin::BIlog10l:
3132 case Builtin::BI__builtin_log10:
3133 case Builtin::BI__builtin_log10f:
3134 case Builtin::BI__builtin_log10f16:
3135 case Builtin::BI__builtin_log10l:
3136 case Builtin::BI__builtin_log10f128:
3137 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
3138 Intrinsic::log10,
3139 Intrinsic::experimental_constrained_log10));
3141 case Builtin::BIlog2:
3142 case Builtin::BIlog2f:
3143 case Builtin::BIlog2l:
3144 case Builtin::BI__builtin_log2:
3145 case Builtin::BI__builtin_log2f:
3146 case Builtin::BI__builtin_log2f16:
3147 case Builtin::BI__builtin_log2l:
3148 case Builtin::BI__builtin_log2f128:
3149 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
3150 Intrinsic::log2,
3151 Intrinsic::experimental_constrained_log2));
3153 case Builtin::BInearbyint:
3154 case Builtin::BInearbyintf:
3155 case Builtin::BInearbyintl:
3156 case Builtin::BI__builtin_nearbyint:
3157 case Builtin::BI__builtin_nearbyintf:
3158 case Builtin::BI__builtin_nearbyintl:
3159 case Builtin::BI__builtin_nearbyintf128:
3160 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
3161 Intrinsic::nearbyint,
3162 Intrinsic::experimental_constrained_nearbyint));
3164 case Builtin::BIpow:
3165 case Builtin::BIpowf:
3166 case Builtin::BIpowl:
3167 case Builtin::BI__builtin_pow:
3168 case Builtin::BI__builtin_powf:
3169 case Builtin::BI__builtin_powf16:
3170 case Builtin::BI__builtin_powl:
3171 case Builtin::BI__builtin_powf128:
3172 return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
3173 Intrinsic::pow,
3174 Intrinsic::experimental_constrained_pow));
3176 case Builtin::BIrint:
3177 case Builtin::BIrintf:
3178 case Builtin::BIrintl:
3179 case Builtin::BI__builtin_rint:
3180 case Builtin::BI__builtin_rintf:
3181 case Builtin::BI__builtin_rintf16:
3182 case Builtin::BI__builtin_rintl:
3183 case Builtin::BI__builtin_rintf128:
3184 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
3185 Intrinsic::rint,
3186 Intrinsic::experimental_constrained_rint));
3188 case Builtin::BIround:
3189 case Builtin::BIroundf:
3190 case Builtin::BIroundl:
3191 case Builtin::BI__builtin_round:
3192 case Builtin::BI__builtin_roundf:
3193 case Builtin::BI__builtin_roundf16:
3194 case Builtin::BI__builtin_roundl:
3195 case Builtin::BI__builtin_roundf128:
3196 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
3197 Intrinsic::round,
3198 Intrinsic::experimental_constrained_round));
3200 case Builtin::BIroundeven:
3201 case Builtin::BIroundevenf:
3202 case Builtin::BIroundevenl:
3203 case Builtin::BI__builtin_roundeven:
3204 case Builtin::BI__builtin_roundevenf:
3205 case Builtin::BI__builtin_roundevenf16:
3206 case Builtin::BI__builtin_roundevenl:
3207 case Builtin::BI__builtin_roundevenf128:
3208 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
3209 Intrinsic::roundeven,
3210 Intrinsic::experimental_constrained_roundeven));
3212 case Builtin::BIsin:
3213 case Builtin::BIsinf:
3214 case Builtin::BIsinl:
3215 case Builtin::BI__builtin_sin:
3216 case Builtin::BI__builtin_sinf:
3217 case Builtin::BI__builtin_sinf16:
3218 case Builtin::BI__builtin_sinl:
3219 case Builtin::BI__builtin_sinf128:
3220 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
3221 Intrinsic::sin,
3222 Intrinsic::experimental_constrained_sin));
3224 case Builtin::BIsinh:
3225 case Builtin::BIsinhf:
3226 case Builtin::BIsinhl:
3227 case Builtin::BI__builtin_sinh:
3228 case Builtin::BI__builtin_sinhf:
3229 case Builtin::BI__builtin_sinhf16:
3230 case Builtin::BI__builtin_sinhl:
3231 case Builtin::BI__builtin_sinhf128:
3232 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
3233 *this, E, Intrinsic::sinh, Intrinsic::experimental_constrained_sinh));
3235 case Builtin::BIsqrt:
3236 case Builtin::BIsqrtf:
3237 case Builtin::BIsqrtl:
3238 case Builtin::BI__builtin_sqrt:
3239 case Builtin::BI__builtin_sqrtf:
3240 case Builtin::BI__builtin_sqrtf16:
3241 case Builtin::BI__builtin_sqrtl:
3242 case Builtin::BI__builtin_sqrtf128:
3243 case Builtin::BI__builtin_elementwise_sqrt: {
3244 llvm::Value *Call = emitUnaryMaybeConstrainedFPBuiltin(
3245 *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt);
3246 SetSqrtFPAccuracy(Call);
3247 return RValue::get(Call);
3250 case Builtin::BItan:
3251 case Builtin::BItanf:
3252 case Builtin::BItanl:
3253 case Builtin::BI__builtin_tan:
3254 case Builtin::BI__builtin_tanf:
3255 case Builtin::BI__builtin_tanf16:
3256 case Builtin::BI__builtin_tanl:
3257 case Builtin::BI__builtin_tanf128:
3258 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
3259 *this, E, Intrinsic::tan, Intrinsic::experimental_constrained_tan));
3261 case Builtin::BItanh:
3262 case Builtin::BItanhf:
3263 case Builtin::BItanhl:
3264 case Builtin::BI__builtin_tanh:
3265 case Builtin::BI__builtin_tanhf:
3266 case Builtin::BI__builtin_tanhf16:
3267 case Builtin::BI__builtin_tanhl:
3268 case Builtin::BI__builtin_tanhf128:
3269 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
3270 *this, E, Intrinsic::tanh, Intrinsic::experimental_constrained_tanh));
3272 case Builtin::BItrunc:
3273 case Builtin::BItruncf:
3274 case Builtin::BItruncl:
3275 case Builtin::BI__builtin_trunc:
3276 case Builtin::BI__builtin_truncf:
3277 case Builtin::BI__builtin_truncf16:
3278 case Builtin::BI__builtin_truncl:
3279 case Builtin::BI__builtin_truncf128:
3280 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
3281 Intrinsic::trunc,
3282 Intrinsic::experimental_constrained_trunc));
3284 case Builtin::BIlround:
3285 case Builtin::BIlroundf:
3286 case Builtin::BIlroundl:
3287 case Builtin::BI__builtin_lround:
3288 case Builtin::BI__builtin_lroundf:
3289 case Builtin::BI__builtin_lroundl:
3290 case Builtin::BI__builtin_lroundf128:
3291 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
3292 *this, E, Intrinsic::lround,
3293 Intrinsic::experimental_constrained_lround));
3295 case Builtin::BIllround:
3296 case Builtin::BIllroundf:
3297 case Builtin::BIllroundl:
3298 case Builtin::BI__builtin_llround:
3299 case Builtin::BI__builtin_llroundf:
3300 case Builtin::BI__builtin_llroundl:
3301 case Builtin::BI__builtin_llroundf128:
3302 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
3303 *this, E, Intrinsic::llround,
3304 Intrinsic::experimental_constrained_llround));
3306 case Builtin::BIlrint:
3307 case Builtin::BIlrintf:
3308 case Builtin::BIlrintl:
3309 case Builtin::BI__builtin_lrint:
3310 case Builtin::BI__builtin_lrintf:
3311 case Builtin::BI__builtin_lrintl:
3312 case Builtin::BI__builtin_lrintf128:
3313 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
3314 *this, E, Intrinsic::lrint,
3315 Intrinsic::experimental_constrained_lrint));
3317 case Builtin::BIllrint:
3318 case Builtin::BIllrintf:
3319 case Builtin::BIllrintl:
3320 case Builtin::BI__builtin_llrint:
3321 case Builtin::BI__builtin_llrintf:
3322 case Builtin::BI__builtin_llrintl:
3323 case Builtin::BI__builtin_llrintf128:
3324 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
3325 *this, E, Intrinsic::llrint,
3326 Intrinsic::experimental_constrained_llrint));
3327 case Builtin::BI__builtin_ldexp:
3328 case Builtin::BI__builtin_ldexpf:
3329 case Builtin::BI__builtin_ldexpl:
3330 case Builtin::BI__builtin_ldexpf16:
3331 case Builtin::BI__builtin_ldexpf128: {
3332 return RValue::get(emitBinaryExpMaybeConstrainedFPBuiltin(
3333 *this, E, Intrinsic::ldexp,
3334 Intrinsic::experimental_constrained_ldexp));
3336 default:
3337 break;
3341 // Check NonnullAttribute/NullabilityArg and Alignment.
3342 auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
3343 unsigned ParmNum) {
3344 Value *Val = A.emitRawPointer(*this);
3345 EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD,
3346 ParmNum);
3348 if (SanOpts.has(SanitizerKind::Alignment)) {
3349 SanitizerSet SkippedChecks;
3350 SkippedChecks.set(SanitizerKind::All);
3351 SkippedChecks.clear(SanitizerKind::Alignment);
3352 SourceLocation Loc = Arg->getExprLoc();
3353 // Strip an implicit cast.
3354 if (auto *CE = dyn_cast<ImplicitCastExpr>(Arg))
3355 if (CE->getCastKind() == CK_BitCast)
3356 Arg = CE->getSubExpr();
3357 EmitTypeCheck(Kind, Loc, Val, Arg->getType(), A.getAlignment(),
3358 SkippedChecks);
3362 switch (BuiltinIDIfNoAsmLabel) {
3363 default: break;
3364 case Builtin::BI__builtin___CFStringMakeConstantString:
3365 case Builtin::BI__builtin___NSStringMakeConstantString:
3366 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
3367 case Builtin::BI__builtin_stdarg_start:
3368 case Builtin::BI__builtin_va_start:
3369 case Builtin::BI__va_start:
3370 case Builtin::BI__builtin_va_end:
3371 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
3372 ? EmitScalarExpr(E->getArg(0))
3373 : EmitVAListRef(E->getArg(0)).emitRawPointer(*this),
3374 BuiltinID != Builtin::BI__builtin_va_end);
3375 return RValue::get(nullptr);
3376 case Builtin::BI__builtin_va_copy: {
3377 Value *DstPtr = EmitVAListRef(E->getArg(0)).emitRawPointer(*this);
3378 Value *SrcPtr = EmitVAListRef(E->getArg(1)).emitRawPointer(*this);
3379 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy, {DstPtr->getType()}),
3380 {DstPtr, SrcPtr});
3381 return RValue::get(nullptr);
3383 case Builtin::BIabs:
3384 case Builtin::BIlabs:
3385 case Builtin::BIllabs:
3386 case Builtin::BI__builtin_abs:
3387 case Builtin::BI__builtin_labs:
3388 case Builtin::BI__builtin_llabs: {
3389 bool SanitizeOverflow = SanOpts.has(SanitizerKind::SignedIntegerOverflow);
3391 Value *Result;
3392 switch (getLangOpts().getSignedOverflowBehavior()) {
3393 case LangOptions::SOB_Defined:
3394 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), false);
3395 break;
3396 case LangOptions::SOB_Undefined:
3397 if (!SanitizeOverflow) {
3398 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), true);
3399 break;
3401 [[fallthrough]];
3402 case LangOptions::SOB_Trapping:
3403 // TODO: Somehow handle the corner case when the address of abs is taken.
3404 Result = EmitOverflowCheckedAbs(*this, E, SanitizeOverflow);
3405 break;
3407 return RValue::get(Result);
3409 case Builtin::BI__builtin_complex: {
3410 Value *Real = EmitScalarExpr(E->getArg(0));
3411 Value *Imag = EmitScalarExpr(E->getArg(1));
3412 return RValue::getComplex({Real, Imag});
3414 case Builtin::BI__builtin_conj:
3415 case Builtin::BI__builtin_conjf:
3416 case Builtin::BI__builtin_conjl:
3417 case Builtin::BIconj:
3418 case Builtin::BIconjf:
3419 case Builtin::BIconjl: {
3420 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3421 Value *Real = ComplexVal.first;
3422 Value *Imag = ComplexVal.second;
3423 Imag = Builder.CreateFNeg(Imag, "neg");
3424 return RValue::getComplex(std::make_pair(Real, Imag));
3426 case Builtin::BI__builtin_creal:
3427 case Builtin::BI__builtin_crealf:
3428 case Builtin::BI__builtin_creall:
3429 case Builtin::BIcreal:
3430 case Builtin::BIcrealf:
3431 case Builtin::BIcreall: {
3432 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3433 return RValue::get(ComplexVal.first);
3436 case Builtin::BI__builtin_preserve_access_index: {
3437 // Only enabled preserved access index region when debuginfo
3438 // is available as debuginfo is needed to preserve user-level
3439 // access pattern.
3440 if (!getDebugInfo()) {
3441 CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g");
3442 return RValue::get(EmitScalarExpr(E->getArg(0)));
3445 // Nested builtin_preserve_access_index() not supported
3446 if (IsInPreservedAIRegion) {
3447 CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported");
3448 return RValue::get(EmitScalarExpr(E->getArg(0)));
3451 IsInPreservedAIRegion = true;
3452 Value *Res = EmitScalarExpr(E->getArg(0));
3453 IsInPreservedAIRegion = false;
3454 return RValue::get(Res);
3457 case Builtin::BI__builtin_cimag:
3458 case Builtin::BI__builtin_cimagf:
3459 case Builtin::BI__builtin_cimagl:
3460 case Builtin::BIcimag:
3461 case Builtin::BIcimagf:
3462 case Builtin::BIcimagl: {
3463 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3464 return RValue::get(ComplexVal.second);
3467 case Builtin::BI__builtin_clrsb:
3468 case Builtin::BI__builtin_clrsbl:
3469 case Builtin::BI__builtin_clrsbll: {
3470 // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
3471 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3473 llvm::Type *ArgType = ArgValue->getType();
3474 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3476 llvm::Type *ResultType = ConvertType(E->getType());
3477 Value *Zero = llvm::Constant::getNullValue(ArgType);
3478 Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
3479 Value *Inverse = Builder.CreateNot(ArgValue, "not");
3480 Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
3481 Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
3482 Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
3483 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3484 "cast");
3485 return RValue::get(Result);
3487 case Builtin::BI__builtin_ctzs:
3488 case Builtin::BI__builtin_ctz:
3489 case Builtin::BI__builtin_ctzl:
3490 case Builtin::BI__builtin_ctzll:
3491 case Builtin::BI__builtin_ctzg: {
3492 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg &&
3493 E->getNumArgs() > 1;
3495 Value *ArgValue =
3496 HasFallback ? EmitScalarExpr(E->getArg(0))
3497 : EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
3499 llvm::Type *ArgType = ArgValue->getType();
3500 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3502 llvm::Type *ResultType = ConvertType(E->getType());
3503 Value *ZeroUndef =
3504 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3505 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3506 if (Result->getType() != ResultType)
3507 Result =
3508 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3509 if (!HasFallback)
3510 return RValue::get(Result);
3512 Value *Zero = Constant::getNullValue(ArgType);
3513 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3514 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3515 Value *ResultOrFallback =
3516 Builder.CreateSelect(IsZero, FallbackValue, Result, "ctzg");
3517 return RValue::get(ResultOrFallback);
3519 case Builtin::BI__builtin_clzs:
3520 case Builtin::BI__builtin_clz:
3521 case Builtin::BI__builtin_clzl:
3522 case Builtin::BI__builtin_clzll:
3523 case Builtin::BI__builtin_clzg: {
3524 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg &&
3525 E->getNumArgs() > 1;
3527 Value *ArgValue =
3528 HasFallback ? EmitScalarExpr(E->getArg(0))
3529 : EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
3531 llvm::Type *ArgType = ArgValue->getType();
3532 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3534 llvm::Type *ResultType = ConvertType(E->getType());
3535 Value *ZeroUndef =
3536 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3537 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3538 if (Result->getType() != ResultType)
3539 Result =
3540 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3541 if (!HasFallback)
3542 return RValue::get(Result);
3544 Value *Zero = Constant::getNullValue(ArgType);
3545 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3546 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3547 Value *ResultOrFallback =
3548 Builder.CreateSelect(IsZero, FallbackValue, Result, "clzg");
3549 return RValue::get(ResultOrFallback);
3551 case Builtin::BI__builtin_ffs:
3552 case Builtin::BI__builtin_ffsl:
3553 case Builtin::BI__builtin_ffsll: {
3554 // ffs(x) -> x ? cttz(x) + 1 : 0
3555 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3557 llvm::Type *ArgType = ArgValue->getType();
3558 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3560 llvm::Type *ResultType = ConvertType(E->getType());
3561 Value *Tmp =
3562 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
3563 llvm::ConstantInt::get(ArgType, 1));
3564 Value *Zero = llvm::Constant::getNullValue(ArgType);
3565 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3566 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
3567 if (Result->getType() != ResultType)
3568 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3569 "cast");
3570 return RValue::get(Result);
3572 case Builtin::BI__builtin_parity:
3573 case Builtin::BI__builtin_parityl:
3574 case Builtin::BI__builtin_parityll: {
3575 // parity(x) -> ctpop(x) & 1
3576 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3578 llvm::Type *ArgType = ArgValue->getType();
3579 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3581 llvm::Type *ResultType = ConvertType(E->getType());
3582 Value *Tmp = Builder.CreateCall(F, ArgValue);
3583 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
3584 if (Result->getType() != ResultType)
3585 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3586 "cast");
3587 return RValue::get(Result);
3589 case Builtin::BI__lzcnt16:
3590 case Builtin::BI__lzcnt:
3591 case Builtin::BI__lzcnt64: {
3592 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3594 llvm::Type *ArgType = ArgValue->getType();
3595 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3597 llvm::Type *ResultType = ConvertType(E->getType());
3598 Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
3599 if (Result->getType() != ResultType)
3600 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3601 "cast");
3602 return RValue::get(Result);
3604 case Builtin::BI__popcnt16:
3605 case Builtin::BI__popcnt:
3606 case Builtin::BI__popcnt64:
3607 case Builtin::BI__builtin_popcount:
3608 case Builtin::BI__builtin_popcountl:
3609 case Builtin::BI__builtin_popcountll:
3610 case Builtin::BI__builtin_popcountg: {
3611 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3613 llvm::Type *ArgType = ArgValue->getType();
3614 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3616 llvm::Type *ResultType = ConvertType(E->getType());
3617 Value *Result = Builder.CreateCall(F, ArgValue);
3618 if (Result->getType() != ResultType)
3619 Result =
3620 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3621 return RValue::get(Result);
3623 case Builtin::BI__builtin_unpredictable: {
3624 // Always return the argument of __builtin_unpredictable. LLVM does not
3625 // handle this builtin. Metadata for this builtin should be added directly
3626 // to instructions such as branches or switches that use it.
3627 return RValue::get(EmitScalarExpr(E->getArg(0)));
3629 case Builtin::BI__builtin_expect: {
3630 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3631 llvm::Type *ArgType = ArgValue->getType();
3633 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3634 // Don't generate llvm.expect on -O0 as the backend won't use it for
3635 // anything.
3636 // Note, we still IRGen ExpectedValue because it could have side-effects.
3637 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3638 return RValue::get(ArgValue);
3640 Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
3641 Value *Result =
3642 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
3643 return RValue::get(Result);
3645 case Builtin::BI__builtin_expect_with_probability: {
3646 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3647 llvm::Type *ArgType = ArgValue->getType();
3649 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3650 llvm::APFloat Probability(0.0);
3651 const Expr *ProbArg = E->getArg(2);
3652 bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext());
3653 assert(EvalSucceed && "probability should be able to evaluate as float");
3654 (void)EvalSucceed;
3655 bool LoseInfo = false;
3656 Probability.convert(llvm::APFloat::IEEEdouble(),
3657 llvm::RoundingMode::Dynamic, &LoseInfo);
3658 llvm::Type *Ty = ConvertType(ProbArg->getType());
3659 Constant *Confidence = ConstantFP::get(Ty, Probability);
3660 // Don't generate llvm.expect.with.probability on -O0 as the backend
3661 // won't use it for anything.
3662 // Note, we still IRGen ExpectedValue because it could have side-effects.
3663 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3664 return RValue::get(ArgValue);
3666 Function *FnExpect =
3667 CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType);
3668 Value *Result = Builder.CreateCall(
3669 FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval");
3670 return RValue::get(Result);
3672 case Builtin::BI__builtin_assume_aligned: {
3673 const Expr *Ptr = E->getArg(0);
3674 Value *PtrValue = EmitScalarExpr(Ptr);
3675 Value *OffsetValue =
3676 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
3678 Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
3679 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
3680 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
3681 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
3682 llvm::Value::MaximumAlignment);
3684 emitAlignmentAssumption(PtrValue, Ptr,
3685 /*The expr loc is sufficient.*/ SourceLocation(),
3686 AlignmentCI, OffsetValue);
3687 return RValue::get(PtrValue);
3689 case Builtin::BI__assume:
3690 case Builtin::BI__builtin_assume: {
3691 if (E->getArg(0)->HasSideEffects(getContext()))
3692 return RValue::get(nullptr);
3694 Value *ArgValue = EmitCheckedArgForAssume(E->getArg(0));
3695 Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
3696 Builder.CreateCall(FnAssume, ArgValue);
3697 return RValue::get(nullptr);
3699 case Builtin::BI__builtin_assume_separate_storage: {
3700 const Expr *Arg0 = E->getArg(0);
3701 const Expr *Arg1 = E->getArg(1);
3703 Value *Value0 = EmitScalarExpr(Arg0);
3704 Value *Value1 = EmitScalarExpr(Arg1);
3706 Value *Values[] = {Value0, Value1};
3707 OperandBundleDefT<Value *> OBD("separate_storage", Values);
3708 Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD});
3709 return RValue::get(nullptr);
3711 case Builtin::BI__builtin_allow_runtime_check: {
3712 StringRef Kind =
3713 cast<StringLiteral>(E->getArg(0)->IgnoreParenCasts())->getString();
3714 LLVMContext &Ctx = CGM.getLLVMContext();
3715 llvm::Value *Allow = Builder.CreateCall(
3716 CGM.getIntrinsic(llvm::Intrinsic::allow_runtime_check),
3717 llvm::MetadataAsValue::get(Ctx, llvm::MDString::get(Ctx, Kind)));
3718 return RValue::get(Allow);
3720 case Builtin::BI__arithmetic_fence: {
3721 // Create the builtin call if FastMath is selected, and the target
3722 // supports the builtin, otherwise just return the argument.
3723 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3724 llvm::FastMathFlags FMF = Builder.getFastMathFlags();
3725 bool isArithmeticFenceEnabled =
3726 FMF.allowReassoc() &&
3727 getContext().getTargetInfo().checkArithmeticFenceSupported();
3728 QualType ArgType = E->getArg(0)->getType();
3729 if (ArgType->isComplexType()) {
3730 if (isArithmeticFenceEnabled) {
3731 QualType ElementType = ArgType->castAs<ComplexType>()->getElementType();
3732 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3733 Value *Real = Builder.CreateArithmeticFence(ComplexVal.first,
3734 ConvertType(ElementType));
3735 Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second,
3736 ConvertType(ElementType));
3737 return RValue::getComplex(std::make_pair(Real, Imag));
3739 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3740 Value *Real = ComplexVal.first;
3741 Value *Imag = ComplexVal.second;
3742 return RValue::getComplex(std::make_pair(Real, Imag));
3744 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3745 if (isArithmeticFenceEnabled)
3746 return RValue::get(
3747 Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType)));
3748 return RValue::get(ArgValue);
3750 case Builtin::BI__builtin_bswap16:
3751 case Builtin::BI__builtin_bswap32:
3752 case Builtin::BI__builtin_bswap64:
3753 case Builtin::BI_byteswap_ushort:
3754 case Builtin::BI_byteswap_ulong:
3755 case Builtin::BI_byteswap_uint64: {
3756 return RValue::get(
3757 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bswap));
3759 case Builtin::BI__builtin_bitreverse8:
3760 case Builtin::BI__builtin_bitreverse16:
3761 case Builtin::BI__builtin_bitreverse32:
3762 case Builtin::BI__builtin_bitreverse64: {
3763 return RValue::get(
3764 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bitreverse));
3766 case Builtin::BI__builtin_rotateleft8:
3767 case Builtin::BI__builtin_rotateleft16:
3768 case Builtin::BI__builtin_rotateleft32:
3769 case Builtin::BI__builtin_rotateleft64:
3770 case Builtin::BI_rotl8: // Microsoft variants of rotate left
3771 case Builtin::BI_rotl16:
3772 case Builtin::BI_rotl:
3773 case Builtin::BI_lrotl:
3774 case Builtin::BI_rotl64:
3775 return emitRotate(E, false);
3777 case Builtin::BI__builtin_rotateright8:
3778 case Builtin::BI__builtin_rotateright16:
3779 case Builtin::BI__builtin_rotateright32:
3780 case Builtin::BI__builtin_rotateright64:
3781 case Builtin::BI_rotr8: // Microsoft variants of rotate right
3782 case Builtin::BI_rotr16:
3783 case Builtin::BI_rotr:
3784 case Builtin::BI_lrotr:
3785 case Builtin::BI_rotr64:
3786 return emitRotate(E, true);
3788 case Builtin::BI__builtin_constant_p: {
3789 llvm::Type *ResultType = ConvertType(E->getType());
3791 const Expr *Arg = E->getArg(0);
3792 QualType ArgType = Arg->getType();
3793 // FIXME: The allowance for Obj-C pointers and block pointers is historical
3794 // and likely a mistake.
3795 if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() &&
3796 !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType())
3797 // Per the GCC documentation, only numeric constants are recognized after
3798 // inlining.
3799 return RValue::get(ConstantInt::get(ResultType, 0));
3801 if (Arg->HasSideEffects(getContext()))
3802 // The argument is unevaluated, so be conservative if it might have
3803 // side-effects.
3804 return RValue::get(ConstantInt::get(ResultType, 0));
3806 Value *ArgValue = EmitScalarExpr(Arg);
3807 if (ArgType->isObjCObjectPointerType()) {
3808 // Convert Objective-C objects to id because we cannot distinguish between
3809 // LLVM types for Obj-C classes as they are opaque.
3810 ArgType = CGM.getContext().getObjCIdType();
3811 ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType));
3813 Function *F =
3814 CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
3815 Value *Result = Builder.CreateCall(F, ArgValue);
3816 if (Result->getType() != ResultType)
3817 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
3818 return RValue::get(Result);
3820 case Builtin::BI__builtin_dynamic_object_size:
3821 case Builtin::BI__builtin_object_size: {
3822 unsigned Type =
3823 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
3824 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
3826 // We pass this builtin onto the optimizer so that it can figure out the
3827 // object size in more complex cases.
3828 bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
3829 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
3830 /*EmittedE=*/nullptr, IsDynamic));
3832 case Builtin::BI__builtin_counted_by_ref: {
3833 // Default to returning '(void *) 0'.
3834 llvm::Value *Result = llvm::ConstantPointerNull::get(
3835 llvm::PointerType::getUnqual(getLLVMContext()));
3837 const Expr *Arg = E->getArg(0)->IgnoreParenImpCasts();
3839 if (auto *UO = dyn_cast<UnaryOperator>(Arg);
3840 UO && UO->getOpcode() == UO_AddrOf) {
3841 Arg = UO->getSubExpr()->IgnoreParenImpCasts();
3843 if (auto *ASE = dyn_cast<ArraySubscriptExpr>(Arg))
3844 Arg = ASE->getBase()->IgnoreParenImpCasts();
3847 if (const MemberExpr *ME = dyn_cast_if_present<MemberExpr>(Arg)) {
3848 if (auto *CATy =
3849 ME->getMemberDecl()->getType()->getAs<CountAttributedType>();
3850 CATy && CATy->getKind() == CountAttributedType::CountedBy) {
3851 const auto *FAMDecl = cast<FieldDecl>(ME->getMemberDecl());
3852 if (const FieldDecl *CountFD = FAMDecl->findCountedByField())
3853 Result = GetCountedByFieldExprGEP(Arg, FAMDecl, CountFD);
3854 else
3855 llvm::report_fatal_error("Cannot find the counted_by 'count' field");
3859 return RValue::get(Result);
3861 case Builtin::BI__builtin_prefetch: {
3862 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
3863 // FIXME: Technically these constants should of type 'int', yes?
3864 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
3865 llvm::ConstantInt::get(Int32Ty, 0);
3866 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
3867 llvm::ConstantInt::get(Int32Ty, 3);
3868 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
3869 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
3870 Builder.CreateCall(F, {Address, RW, Locality, Data});
3871 return RValue::get(nullptr);
3873 case Builtin::BI__builtin_readcyclecounter: {
3874 Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
3875 return RValue::get(Builder.CreateCall(F));
3877 case Builtin::BI__builtin_readsteadycounter: {
3878 Function *F = CGM.getIntrinsic(Intrinsic::readsteadycounter);
3879 return RValue::get(Builder.CreateCall(F));
3881 case Builtin::BI__builtin___clear_cache: {
3882 Value *Begin = EmitScalarExpr(E->getArg(0));
3883 Value *End = EmitScalarExpr(E->getArg(1));
3884 Function *F = CGM.getIntrinsic(Intrinsic::clear_cache);
3885 return RValue::get(Builder.CreateCall(F, {Begin, End}));
3887 case Builtin::BI__builtin_trap:
3888 EmitTrapCall(Intrinsic::trap);
3889 return RValue::get(nullptr);
3890 case Builtin::BI__builtin_verbose_trap: {
3891 llvm::DILocation *TrapLocation = Builder.getCurrentDebugLocation();
3892 if (getDebugInfo()) {
3893 TrapLocation = getDebugInfo()->CreateTrapFailureMessageFor(
3894 TrapLocation, *E->getArg(0)->tryEvaluateString(getContext()),
3895 *E->getArg(1)->tryEvaluateString(getContext()));
3897 ApplyDebugLocation ApplyTrapDI(*this, TrapLocation);
3898 // Currently no attempt is made to prevent traps from being merged.
3899 EmitTrapCall(Intrinsic::trap);
3900 return RValue::get(nullptr);
3902 case Builtin::BI__debugbreak:
3903 EmitTrapCall(Intrinsic::debugtrap);
3904 return RValue::get(nullptr);
3905 case Builtin::BI__builtin_unreachable: {
3906 EmitUnreachable(E->getExprLoc());
3908 // We do need to preserve an insertion point.
3909 EmitBlock(createBasicBlock("unreachable.cont"));
3911 return RValue::get(nullptr);
3914 case Builtin::BI__builtin_powi:
3915 case Builtin::BI__builtin_powif:
3916 case Builtin::BI__builtin_powil: {
3917 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
3918 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
3920 if (Builder.getIsFPConstrained()) {
3921 // FIXME: llvm.powi has 2 mangling types,
3922 // llvm.experimental.constrained.powi has one.
3923 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3924 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,
3925 Src0->getType());
3926 return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 }));
3929 Function *F = CGM.getIntrinsic(Intrinsic::powi,
3930 { Src0->getType(), Src1->getType() });
3931 return RValue::get(Builder.CreateCall(F, { Src0, Src1 }));
3933 case Builtin::BI__builtin_frexpl: {
3934 // Linux PPC will not be adding additional PPCDoubleDouble support.
3935 // WIP to switch default to IEEE long double. Will emit libcall for
3936 // frexpl instead of legalizing this type in the BE.
3937 if (&getTarget().getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble())
3938 break;
3939 [[fallthrough]];
3941 case Builtin::BI__builtin_frexp:
3942 case Builtin::BI__builtin_frexpf:
3943 case Builtin::BI__builtin_frexpf128:
3944 case Builtin::BI__builtin_frexpf16:
3945 return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp));
3946 case Builtin::BI__builtin_isgreater:
3947 case Builtin::BI__builtin_isgreaterequal:
3948 case Builtin::BI__builtin_isless:
3949 case Builtin::BI__builtin_islessequal:
3950 case Builtin::BI__builtin_islessgreater:
3951 case Builtin::BI__builtin_isunordered: {
3952 // Ordered comparisons: we know the arguments to these are matching scalar
3953 // floating point values.
3954 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3955 Value *LHS = EmitScalarExpr(E->getArg(0));
3956 Value *RHS = EmitScalarExpr(E->getArg(1));
3958 switch (BuiltinID) {
3959 default: llvm_unreachable("Unknown ordered comparison");
3960 case Builtin::BI__builtin_isgreater:
3961 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
3962 break;
3963 case Builtin::BI__builtin_isgreaterequal:
3964 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
3965 break;
3966 case Builtin::BI__builtin_isless:
3967 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
3968 break;
3969 case Builtin::BI__builtin_islessequal:
3970 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
3971 break;
3972 case Builtin::BI__builtin_islessgreater:
3973 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
3974 break;
3975 case Builtin::BI__builtin_isunordered:
3976 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
3977 break;
3979 // ZExt bool to int type.
3980 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
3983 case Builtin::BI__builtin_isnan: {
3984 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3985 Value *V = EmitScalarExpr(E->getArg(0));
3986 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3987 return RValue::get(Result);
3988 return RValue::get(
3989 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNan),
3990 ConvertType(E->getType())));
3993 case Builtin::BI__builtin_issignaling: {
3994 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3995 Value *V = EmitScalarExpr(E->getArg(0));
3996 return RValue::get(
3997 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSNan),
3998 ConvertType(E->getType())));
4001 case Builtin::BI__builtin_isinf: {
4002 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4003 Value *V = EmitScalarExpr(E->getArg(0));
4004 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
4005 return RValue::get(Result);
4006 return RValue::get(
4007 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcInf),
4008 ConvertType(E->getType())));
4011 case Builtin::BIfinite:
4012 case Builtin::BI__finite:
4013 case Builtin::BIfinitef:
4014 case Builtin::BI__finitef:
4015 case Builtin::BIfinitel:
4016 case Builtin::BI__finitel:
4017 case Builtin::BI__builtin_isfinite: {
4018 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4019 Value *V = EmitScalarExpr(E->getArg(0));
4020 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
4021 return RValue::get(Result);
4022 return RValue::get(
4023 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcFinite),
4024 ConvertType(E->getType())));
4027 case Builtin::BI__builtin_isnormal: {
4028 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4029 Value *V = EmitScalarExpr(E->getArg(0));
4030 return RValue::get(
4031 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNormal),
4032 ConvertType(E->getType())));
4035 case Builtin::BI__builtin_issubnormal: {
4036 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4037 Value *V = EmitScalarExpr(E->getArg(0));
4038 return RValue::get(
4039 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSubnormal),
4040 ConvertType(E->getType())));
4043 case Builtin::BI__builtin_iszero: {
4044 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4045 Value *V = EmitScalarExpr(E->getArg(0));
4046 return RValue::get(
4047 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcZero),
4048 ConvertType(E->getType())));
4051 case Builtin::BI__builtin_isfpclass: {
4052 Expr::EvalResult Result;
4053 if (!E->getArg(1)->EvaluateAsInt(Result, CGM.getContext()))
4054 break;
4055 uint64_t Test = Result.Val.getInt().getLimitedValue();
4056 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4057 Value *V = EmitScalarExpr(E->getArg(0));
4058 return RValue::get(Builder.CreateZExt(Builder.createIsFPClass(V, Test),
4059 ConvertType(E->getType())));
4062 case Builtin::BI__builtin_nondeterministic_value: {
4063 llvm::Type *Ty = ConvertType(E->getArg(0)->getType());
4065 Value *Result = PoisonValue::get(Ty);
4066 Result = Builder.CreateFreeze(Result);
4068 return RValue::get(Result);
4071 case Builtin::BI__builtin_elementwise_abs: {
4072 Value *Result;
4073 QualType QT = E->getArg(0)->getType();
4075 if (auto *VecTy = QT->getAs<VectorType>())
4076 QT = VecTy->getElementType();
4077 if (QT->isIntegerType())
4078 Result = Builder.CreateBinaryIntrinsic(
4079 llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)),
4080 Builder.getFalse(), nullptr, "elt.abs");
4081 else
4082 Result = emitBuiltinWithOneOverloadedType<1>(
4083 *this, E, llvm::Intrinsic::fabs, "elt.abs");
4085 return RValue::get(Result);
4087 case Builtin::BI__builtin_elementwise_acos:
4088 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4089 *this, E, llvm::Intrinsic::acos, "elt.acos"));
4090 case Builtin::BI__builtin_elementwise_asin:
4091 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4092 *this, E, llvm::Intrinsic::asin, "elt.asin"));
4093 case Builtin::BI__builtin_elementwise_atan:
4094 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4095 *this, E, llvm::Intrinsic::atan, "elt.atan"));
4096 case Builtin::BI__builtin_elementwise_atan2:
4097 return RValue::get(emitBuiltinWithOneOverloadedType<2>(
4098 *this, E, llvm::Intrinsic::atan2, "elt.atan2"));
4099 case Builtin::BI__builtin_elementwise_ceil:
4100 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4101 *this, E, llvm::Intrinsic::ceil, "elt.ceil"));
4102 case Builtin::BI__builtin_elementwise_exp:
4103 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4104 *this, E, llvm::Intrinsic::exp, "elt.exp"));
4105 case Builtin::BI__builtin_elementwise_exp2:
4106 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4107 *this, E, llvm::Intrinsic::exp2, "elt.exp2"));
4108 case Builtin::BI__builtin_elementwise_log:
4109 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4110 *this, E, llvm::Intrinsic::log, "elt.log"));
4111 case Builtin::BI__builtin_elementwise_log2:
4112 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4113 *this, E, llvm::Intrinsic::log2, "elt.log2"));
4114 case Builtin::BI__builtin_elementwise_log10:
4115 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4116 *this, E, llvm::Intrinsic::log10, "elt.log10"));
4117 case Builtin::BI__builtin_elementwise_pow: {
4118 return RValue::get(
4119 emitBuiltinWithOneOverloadedType<2>(*this, E, llvm::Intrinsic::pow));
4121 case Builtin::BI__builtin_elementwise_bitreverse:
4122 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4123 *this, E, llvm::Intrinsic::bitreverse, "elt.bitreverse"));
4124 case Builtin::BI__builtin_elementwise_cos:
4125 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4126 *this, E, llvm::Intrinsic::cos, "elt.cos"));
4127 case Builtin::BI__builtin_elementwise_cosh:
4128 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4129 *this, E, llvm::Intrinsic::cosh, "elt.cosh"));
4130 case Builtin::BI__builtin_elementwise_floor:
4131 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4132 *this, E, llvm::Intrinsic::floor, "elt.floor"));
4133 case Builtin::BI__builtin_elementwise_popcount:
4134 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4135 *this, E, llvm::Intrinsic::ctpop, "elt.ctpop"));
4136 case Builtin::BI__builtin_elementwise_roundeven:
4137 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4138 *this, E, llvm::Intrinsic::roundeven, "elt.roundeven"));
4139 case Builtin::BI__builtin_elementwise_round:
4140 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4141 *this, E, llvm::Intrinsic::round, "elt.round"));
4142 case Builtin::BI__builtin_elementwise_rint:
4143 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4144 *this, E, llvm::Intrinsic::rint, "elt.rint"));
4145 case Builtin::BI__builtin_elementwise_nearbyint:
4146 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4147 *this, E, llvm::Intrinsic::nearbyint, "elt.nearbyint"));
4148 case Builtin::BI__builtin_elementwise_sin:
4149 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4150 *this, E, llvm::Intrinsic::sin, "elt.sin"));
4151 case Builtin::BI__builtin_elementwise_sinh:
4152 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4153 *this, E, llvm::Intrinsic::sinh, "elt.sinh"));
4154 case Builtin::BI__builtin_elementwise_tan:
4155 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4156 *this, E, llvm::Intrinsic::tan, "elt.tan"));
4157 case Builtin::BI__builtin_elementwise_tanh:
4158 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4159 *this, E, llvm::Intrinsic::tanh, "elt.tanh"));
4160 case Builtin::BI__builtin_elementwise_trunc:
4161 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4162 *this, E, llvm::Intrinsic::trunc, "elt.trunc"));
4163 case Builtin::BI__builtin_elementwise_canonicalize:
4164 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4165 *this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize"));
4166 case Builtin::BI__builtin_elementwise_copysign:
4167 return RValue::get(emitBuiltinWithOneOverloadedType<2>(
4168 *this, E, llvm::Intrinsic::copysign));
4169 case Builtin::BI__builtin_elementwise_fma:
4170 return RValue::get(
4171 emitBuiltinWithOneOverloadedType<3>(*this, E, llvm::Intrinsic::fma));
4172 case Builtin::BI__builtin_elementwise_add_sat:
4173 case Builtin::BI__builtin_elementwise_sub_sat: {
4174 Value *Op0 = EmitScalarExpr(E->getArg(0));
4175 Value *Op1 = EmitScalarExpr(E->getArg(1));
4176 Value *Result;
4177 assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected");
4178 QualType Ty = E->getArg(0)->getType();
4179 if (auto *VecTy = Ty->getAs<VectorType>())
4180 Ty = VecTy->getElementType();
4181 bool IsSigned = Ty->isSignedIntegerType();
4182 unsigned Opc;
4183 if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat)
4184 Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat;
4185 else
4186 Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat;
4187 Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat");
4188 return RValue::get(Result);
4191 case Builtin::BI__builtin_elementwise_max: {
4192 Value *Op0 = EmitScalarExpr(E->getArg(0));
4193 Value *Op1 = EmitScalarExpr(E->getArg(1));
4194 Value *Result;
4195 if (Op0->getType()->isIntOrIntVectorTy()) {
4196 QualType Ty = E->getArg(0)->getType();
4197 if (auto *VecTy = Ty->getAs<VectorType>())
4198 Ty = VecTy->getElementType();
4199 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
4200 ? llvm::Intrinsic::smax
4201 : llvm::Intrinsic::umax,
4202 Op0, Op1, nullptr, "elt.max");
4203 } else
4204 Result = Builder.CreateMaxNum(Op0, Op1, "elt.max");
4205 return RValue::get(Result);
4207 case Builtin::BI__builtin_elementwise_min: {
4208 Value *Op0 = EmitScalarExpr(E->getArg(0));
4209 Value *Op1 = EmitScalarExpr(E->getArg(1));
4210 Value *Result;
4211 if (Op0->getType()->isIntOrIntVectorTy()) {
4212 QualType Ty = E->getArg(0)->getType();
4213 if (auto *VecTy = Ty->getAs<VectorType>())
4214 Ty = VecTy->getElementType();
4215 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
4216 ? llvm::Intrinsic::smin
4217 : llvm::Intrinsic::umin,
4218 Op0, Op1, nullptr, "elt.min");
4219 } else
4220 Result = Builder.CreateMinNum(Op0, Op1, "elt.min");
4221 return RValue::get(Result);
4224 case Builtin::BI__builtin_elementwise_maximum: {
4225 Value *Op0 = EmitScalarExpr(E->getArg(0));
4226 Value *Op1 = EmitScalarExpr(E->getArg(1));
4227 Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::maximum, Op0,
4228 Op1, nullptr, "elt.maximum");
4229 return RValue::get(Result);
4232 case Builtin::BI__builtin_elementwise_minimum: {
4233 Value *Op0 = EmitScalarExpr(E->getArg(0));
4234 Value *Op1 = EmitScalarExpr(E->getArg(1));
4235 Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::minimum, Op0,
4236 Op1, nullptr, "elt.minimum");
4237 return RValue::get(Result);
4240 case Builtin::BI__builtin_reduce_max: {
4241 auto GetIntrinsicID = [this](QualType QT) {
4242 if (auto *VecTy = QT->getAs<VectorType>())
4243 QT = VecTy->getElementType();
4244 else if (QT->isSizelessVectorType())
4245 QT = QT->getSizelessVectorEltType(CGM.getContext());
4247 if (QT->isSignedIntegerType())
4248 return llvm::Intrinsic::vector_reduce_smax;
4249 if (QT->isUnsignedIntegerType())
4250 return llvm::Intrinsic::vector_reduce_umax;
4251 assert(QT->isFloatingType() && "must have a float here");
4252 return llvm::Intrinsic::vector_reduce_fmax;
4254 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4255 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
4258 case Builtin::BI__builtin_reduce_min: {
4259 auto GetIntrinsicID = [this](QualType QT) {
4260 if (auto *VecTy = QT->getAs<VectorType>())
4261 QT = VecTy->getElementType();
4262 else if (QT->isSizelessVectorType())
4263 QT = QT->getSizelessVectorEltType(CGM.getContext());
4265 if (QT->isSignedIntegerType())
4266 return llvm::Intrinsic::vector_reduce_smin;
4267 if (QT->isUnsignedIntegerType())
4268 return llvm::Intrinsic::vector_reduce_umin;
4269 assert(QT->isFloatingType() && "must have a float here");
4270 return llvm::Intrinsic::vector_reduce_fmin;
4273 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4274 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
4277 case Builtin::BI__builtin_reduce_add:
4278 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4279 *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
4280 case Builtin::BI__builtin_reduce_mul:
4281 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4282 *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));
4283 case Builtin::BI__builtin_reduce_xor:
4284 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4285 *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
4286 case Builtin::BI__builtin_reduce_or:
4287 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4288 *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or"));
4289 case Builtin::BI__builtin_reduce_and:
4290 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4291 *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
4292 case Builtin::BI__builtin_reduce_maximum:
4293 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4294 *this, E, llvm::Intrinsic::vector_reduce_fmaximum, "rdx.maximum"));
4295 case Builtin::BI__builtin_reduce_minimum:
4296 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4297 *this, E, llvm::Intrinsic::vector_reduce_fminimum, "rdx.minimum"));
4299 case Builtin::BI__builtin_matrix_transpose: {
4300 auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
4301 Value *MatValue = EmitScalarExpr(E->getArg(0));
4302 MatrixBuilder MB(Builder);
4303 Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(),
4304 MatrixTy->getNumColumns());
4305 return RValue::get(Result);
4308 case Builtin::BI__builtin_matrix_column_major_load: {
4309 MatrixBuilder MB(Builder);
4310 // Emit everything that isn't dependent on the first parameter type
4311 Value *Stride = EmitScalarExpr(E->getArg(3));
4312 const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>();
4313 auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>();
4314 assert(PtrTy && "arg0 must be of pointer type");
4315 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
4317 Address Src = EmitPointerWithAlignment(E->getArg(0));
4318 EmitNonNullArgCheck(RValue::get(Src.emitRawPointer(*this)),
4319 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4321 Value *Result = MB.CreateColumnMajorLoad(
4322 Src.getElementType(), Src.emitRawPointer(*this),
4323 Align(Src.getAlignment().getQuantity()), Stride, IsVolatile,
4324 ResultTy->getNumRows(), ResultTy->getNumColumns(), "matrix");
4325 return RValue::get(Result);
4328 case Builtin::BI__builtin_matrix_column_major_store: {
4329 MatrixBuilder MB(Builder);
4330 Value *Matrix = EmitScalarExpr(E->getArg(0));
4331 Address Dst = EmitPointerWithAlignment(E->getArg(1));
4332 Value *Stride = EmitScalarExpr(E->getArg(2));
4334 const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
4335 auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>();
4336 assert(PtrTy && "arg1 must be of pointer type");
4337 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
4339 EmitNonNullArgCheck(RValue::get(Dst.emitRawPointer(*this)),
4340 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4342 Value *Result = MB.CreateColumnMajorStore(
4343 Matrix, Dst.emitRawPointer(*this),
4344 Align(Dst.getAlignment().getQuantity()), Stride, IsVolatile,
4345 MatrixTy->getNumRows(), MatrixTy->getNumColumns());
4346 return RValue::get(Result);
4349 case Builtin::BI__builtin_isinf_sign: {
4350 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
4351 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4352 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4353 Value *Arg = EmitScalarExpr(E->getArg(0));
4354 Value *AbsArg = EmitFAbs(*this, Arg);
4355 Value *IsInf = Builder.CreateFCmpOEQ(
4356 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
4357 Value *IsNeg = EmitSignBit(*this, Arg);
4359 llvm::Type *IntTy = ConvertType(E->getType());
4360 Value *Zero = Constant::getNullValue(IntTy);
4361 Value *One = ConstantInt::get(IntTy, 1);
4362 Value *NegativeOne = ConstantInt::get(IntTy, -1);
4363 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
4364 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
4365 return RValue::get(Result);
4368 case Builtin::BI__builtin_flt_rounds: {
4369 Function *F = CGM.getIntrinsic(Intrinsic::get_rounding);
4371 llvm::Type *ResultType = ConvertType(E->getType());
4372 Value *Result = Builder.CreateCall(F);
4373 if (Result->getType() != ResultType)
4374 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
4375 "cast");
4376 return RValue::get(Result);
4379 case Builtin::BI__builtin_set_flt_rounds: {
4380 Function *F = CGM.getIntrinsic(Intrinsic::set_rounding);
4382 Value *V = EmitScalarExpr(E->getArg(0));
4383 Builder.CreateCall(F, V);
4384 return RValue::get(nullptr);
4387 case Builtin::BI__builtin_fpclassify: {
4388 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4389 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4390 Value *V = EmitScalarExpr(E->getArg(5));
4391 llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
4393 // Create Result
4394 BasicBlock *Begin = Builder.GetInsertBlock();
4395 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
4396 Builder.SetInsertPoint(End);
4397 PHINode *Result =
4398 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
4399 "fpclassify_result");
4401 // if (V==0) return FP_ZERO
4402 Builder.SetInsertPoint(Begin);
4403 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
4404 "iszero");
4405 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
4406 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
4407 Builder.CreateCondBr(IsZero, End, NotZero);
4408 Result->addIncoming(ZeroLiteral, Begin);
4410 // if (V != V) return FP_NAN
4411 Builder.SetInsertPoint(NotZero);
4412 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
4413 Value *NanLiteral = EmitScalarExpr(E->getArg(0));
4414 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
4415 Builder.CreateCondBr(IsNan, End, NotNan);
4416 Result->addIncoming(NanLiteral, NotZero);
4418 // if (fabs(V) == infinity) return FP_INFINITY
4419 Builder.SetInsertPoint(NotNan);
4420 Value *VAbs = EmitFAbs(*this, V);
4421 Value *IsInf =
4422 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
4423 "isinf");
4424 Value *InfLiteral = EmitScalarExpr(E->getArg(1));
4425 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
4426 Builder.CreateCondBr(IsInf, End, NotInf);
4427 Result->addIncoming(InfLiteral, NotNan);
4429 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
4430 Builder.SetInsertPoint(NotInf);
4431 APFloat Smallest = APFloat::getSmallestNormalized(
4432 getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
4433 Value *IsNormal =
4434 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
4435 "isnormal");
4436 Value *NormalResult =
4437 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
4438 EmitScalarExpr(E->getArg(3)));
4439 Builder.CreateBr(End);
4440 Result->addIncoming(NormalResult, NotInf);
4442 // return Result
4443 Builder.SetInsertPoint(End);
4444 return RValue::get(Result);
4447 // An alloca will always return a pointer to the alloca (stack) address
4448 // space. This address space need not be the same as the AST / Language
4449 // default (e.g. in C / C++ auto vars are in the generic address space). At
4450 // the AST level this is handled within CreateTempAlloca et al., but for the
4451 // builtin / dynamic alloca we have to handle it here. We use an explicit cast
4452 // instead of passing an AS to CreateAlloca so as to not inhibit optimisation.
4453 case Builtin::BIalloca:
4454 case Builtin::BI_alloca:
4455 case Builtin::BI__builtin_alloca_uninitialized:
4456 case Builtin::BI__builtin_alloca: {
4457 Value *Size = EmitScalarExpr(E->getArg(0));
4458 const TargetInfo &TI = getContext().getTargetInfo();
4459 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
4460 const Align SuitableAlignmentInBytes =
4461 CGM.getContext()
4462 .toCharUnitsFromBits(TI.getSuitableAlign())
4463 .getAsAlign();
4464 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4465 AI->setAlignment(SuitableAlignmentInBytes);
4466 if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)
4467 initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
4468 LangAS AAS = getASTAllocaAddressSpace();
4469 LangAS EAS = E->getType()->getPointeeType().getAddressSpace();
4470 if (AAS != EAS) {
4471 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4472 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4473 EAS, Ty));
4475 return RValue::get(AI);
4478 case Builtin::BI__builtin_alloca_with_align_uninitialized:
4479 case Builtin::BI__builtin_alloca_with_align: {
4480 Value *Size = EmitScalarExpr(E->getArg(0));
4481 Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
4482 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
4483 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
4484 const Align AlignmentInBytes =
4485 CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign();
4486 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4487 AI->setAlignment(AlignmentInBytes);
4488 if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized)
4489 initializeAlloca(*this, AI, Size, AlignmentInBytes);
4490 LangAS AAS = getASTAllocaAddressSpace();
4491 LangAS EAS = E->getType()->getPointeeType().getAddressSpace();
4492 if (AAS != EAS) {
4493 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4494 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4495 EAS, Ty));
4497 return RValue::get(AI);
4500 case Builtin::BIbzero:
4501 case Builtin::BI__builtin_bzero: {
4502 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4503 Value *SizeVal = EmitScalarExpr(E->getArg(1));
4504 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4505 E->getArg(0)->getExprLoc(), FD, 0);
4506 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
4507 return RValue::get(nullptr);
4510 case Builtin::BIbcopy:
4511 case Builtin::BI__builtin_bcopy: {
4512 Address Src = EmitPointerWithAlignment(E->getArg(0));
4513 Address Dest = EmitPointerWithAlignment(E->getArg(1));
4514 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4515 EmitNonNullArgCheck(RValue::get(Src.emitRawPointer(*this)),
4516 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4518 EmitNonNullArgCheck(RValue::get(Dest.emitRawPointer(*this)),
4519 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4521 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4522 return RValue::get(nullptr);
4525 case Builtin::BImemcpy:
4526 case Builtin::BI__builtin_memcpy:
4527 case Builtin::BImempcpy:
4528 case Builtin::BI__builtin_mempcpy: {
4529 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4530 Address Src = EmitPointerWithAlignment(E->getArg(1));
4531 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4532 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4533 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4534 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4535 if (BuiltinID == Builtin::BImempcpy ||
4536 BuiltinID == Builtin::BI__builtin_mempcpy)
4537 return RValue::get(Builder.CreateInBoundsGEP(
4538 Dest.getElementType(), Dest.emitRawPointer(*this), SizeVal));
4539 else
4540 return RValue::get(Dest, *this);
4543 case Builtin::BI__builtin_memcpy_inline: {
4544 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4545 Address Src = EmitPointerWithAlignment(E->getArg(1));
4546 uint64_t Size =
4547 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4548 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4549 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4550 Builder.CreateMemCpyInline(Dest, Src, Size);
4551 return RValue::get(nullptr);
4554 case Builtin::BI__builtin_char_memchr:
4555 BuiltinID = Builtin::BI__builtin_memchr;
4556 break;
4558 case Builtin::BI__builtin___memcpy_chk: {
4559 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
4560 Expr::EvalResult SizeResult, DstSizeResult;
4561 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4562 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4563 break;
4564 llvm::APSInt Size = SizeResult.Val.getInt();
4565 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4566 if (Size.ugt(DstSize))
4567 break;
4568 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4569 Address Src = EmitPointerWithAlignment(E->getArg(1));
4570 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4571 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4572 return RValue::get(Dest, *this);
4575 case Builtin::BI__builtin_objc_memmove_collectable: {
4576 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
4577 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
4578 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4579 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
4580 DestAddr, SrcAddr, SizeVal);
4581 return RValue::get(DestAddr, *this);
4584 case Builtin::BI__builtin___memmove_chk: {
4585 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
4586 Expr::EvalResult SizeResult, DstSizeResult;
4587 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4588 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4589 break;
4590 llvm::APSInt Size = SizeResult.Val.getInt();
4591 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4592 if (Size.ugt(DstSize))
4593 break;
4594 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4595 Address Src = EmitPointerWithAlignment(E->getArg(1));
4596 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4597 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4598 return RValue::get(Dest, *this);
4601 case Builtin::BImemmove:
4602 case Builtin::BI__builtin_memmove: {
4603 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4604 Address Src = EmitPointerWithAlignment(E->getArg(1));
4605 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4606 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4607 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4608 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4609 return RValue::get(Dest, *this);
4611 case Builtin::BImemset:
4612 case Builtin::BI__builtin_memset: {
4613 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4614 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4615 Builder.getInt8Ty());
4616 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4617 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4618 E->getArg(0)->getExprLoc(), FD, 0);
4619 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4620 return RValue::get(Dest, *this);
4622 case Builtin::BI__builtin_memset_inline: {
4623 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4624 Value *ByteVal =
4625 Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty());
4626 uint64_t Size =
4627 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4628 EmitNonNullArgCheck(RValue::get(Dest.emitRawPointer(*this)),
4629 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4631 Builder.CreateMemSetInline(Dest, ByteVal, Size);
4632 return RValue::get(nullptr);
4634 case Builtin::BI__builtin___memset_chk: {
4635 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
4636 Expr::EvalResult SizeResult, DstSizeResult;
4637 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4638 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4639 break;
4640 llvm::APSInt Size = SizeResult.Val.getInt();
4641 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4642 if (Size.ugt(DstSize))
4643 break;
4644 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4645 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4646 Builder.getInt8Ty());
4647 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4648 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4649 return RValue::get(Dest, *this);
4651 case Builtin::BI__builtin_wmemchr: {
4652 // The MSVC runtime library does not provide a definition of wmemchr, so we
4653 // need an inline implementation.
4654 if (!getTarget().getTriple().isOSMSVCRT())
4655 break;
4657 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4658 Value *Str = EmitScalarExpr(E->getArg(0));
4659 Value *Chr = EmitScalarExpr(E->getArg(1));
4660 Value *Size = EmitScalarExpr(E->getArg(2));
4662 BasicBlock *Entry = Builder.GetInsertBlock();
4663 BasicBlock *CmpEq = createBasicBlock("wmemchr.eq");
4664 BasicBlock *Next = createBasicBlock("wmemchr.next");
4665 BasicBlock *Exit = createBasicBlock("wmemchr.exit");
4666 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4667 Builder.CreateCondBr(SizeEq0, Exit, CmpEq);
4669 EmitBlock(CmpEq);
4670 PHINode *StrPhi = Builder.CreatePHI(Str->getType(), 2);
4671 StrPhi->addIncoming(Str, Entry);
4672 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4673 SizePhi->addIncoming(Size, Entry);
4674 CharUnits WCharAlign =
4675 getContext().getTypeAlignInChars(getContext().WCharTy);
4676 Value *StrCh = Builder.CreateAlignedLoad(WCharTy, StrPhi, WCharAlign);
4677 Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0);
4678 Value *StrEqChr = Builder.CreateICmpEQ(StrCh, Chr);
4679 Builder.CreateCondBr(StrEqChr, Exit, Next);
4681 EmitBlock(Next);
4682 Value *NextStr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1);
4683 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4684 Value *NextSizeEq0 =
4685 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4686 Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq);
4687 StrPhi->addIncoming(NextStr, Next);
4688 SizePhi->addIncoming(NextSize, Next);
4690 EmitBlock(Exit);
4691 PHINode *Ret = Builder.CreatePHI(Str->getType(), 3);
4692 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Entry);
4693 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Next);
4694 Ret->addIncoming(FoundChr, CmpEq);
4695 return RValue::get(Ret);
4697 case Builtin::BI__builtin_wmemcmp: {
4698 // The MSVC runtime library does not provide a definition of wmemcmp, so we
4699 // need an inline implementation.
4700 if (!getTarget().getTriple().isOSMSVCRT())
4701 break;
4703 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4705 Value *Dst = EmitScalarExpr(E->getArg(0));
4706 Value *Src = EmitScalarExpr(E->getArg(1));
4707 Value *Size = EmitScalarExpr(E->getArg(2));
4709 BasicBlock *Entry = Builder.GetInsertBlock();
4710 BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
4711 BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
4712 BasicBlock *Next = createBasicBlock("wmemcmp.next");
4713 BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
4714 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4715 Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
4717 EmitBlock(CmpGT);
4718 PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
4719 DstPhi->addIncoming(Dst, Entry);
4720 PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
4721 SrcPhi->addIncoming(Src, Entry);
4722 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4723 SizePhi->addIncoming(Size, Entry);
4724 CharUnits WCharAlign =
4725 getContext().getTypeAlignInChars(getContext().WCharTy);
4726 Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
4727 Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
4728 Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
4729 Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
4731 EmitBlock(CmpLT);
4732 Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
4733 Builder.CreateCondBr(DstLtSrc, Exit, Next);
4735 EmitBlock(Next);
4736 Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
4737 Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
4738 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4739 Value *NextSizeEq0 =
4740 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4741 Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
4742 DstPhi->addIncoming(NextDst, Next);
4743 SrcPhi->addIncoming(NextSrc, Next);
4744 SizePhi->addIncoming(NextSize, Next);
4746 EmitBlock(Exit);
4747 PHINode *Ret = Builder.CreatePHI(IntTy, 4);
4748 Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
4749 Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
4750 Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
4751 Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
4752 return RValue::get(Ret);
4754 case Builtin::BI__builtin_dwarf_cfa: {
4755 // The offset in bytes from the first argument to the CFA.
4757 // Why on earth is this in the frontend? Is there any reason at
4758 // all that the backend can't reasonably determine this while
4759 // lowering llvm.eh.dwarf.cfa()?
4761 // TODO: If there's a satisfactory reason, add a target hook for
4762 // this instead of hard-coding 0, which is correct for most targets.
4763 int32_t Offset = 0;
4765 Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
4766 return RValue::get(Builder.CreateCall(F,
4767 llvm::ConstantInt::get(Int32Ty, Offset)));
4769 case Builtin::BI__builtin_return_address: {
4770 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4771 getContext().UnsignedIntTy);
4772 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4773 return RValue::get(Builder.CreateCall(F, Depth));
4775 case Builtin::BI_ReturnAddress: {
4776 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4777 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
4779 case Builtin::BI__builtin_frame_address: {
4780 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4781 getContext().UnsignedIntTy);
4782 Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);
4783 return RValue::get(Builder.CreateCall(F, Depth));
4785 case Builtin::BI__builtin_extract_return_addr: {
4786 Value *Address = EmitScalarExpr(E->getArg(0));
4787 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
4788 return RValue::get(Result);
4790 case Builtin::BI__builtin_frob_return_addr: {
4791 Value *Address = EmitScalarExpr(E->getArg(0));
4792 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
4793 return RValue::get(Result);
4795 case Builtin::BI__builtin_dwarf_sp_column: {
4796 llvm::IntegerType *Ty
4797 = cast<llvm::IntegerType>(ConvertType(E->getType()));
4798 int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
4799 if (Column == -1) {
4800 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
4801 return RValue::get(llvm::UndefValue::get(Ty));
4803 return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
4805 case Builtin::BI__builtin_init_dwarf_reg_size_table: {
4806 Value *Address = EmitScalarExpr(E->getArg(0));
4807 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
4808 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
4809 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
4811 case Builtin::BI__builtin_eh_return: {
4812 Value *Int = EmitScalarExpr(E->getArg(0));
4813 Value *Ptr = EmitScalarExpr(E->getArg(1));
4815 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
4816 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
4817 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
4818 Function *F =
4819 CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32
4820 : Intrinsic::eh_return_i64);
4821 Builder.CreateCall(F, {Int, Ptr});
4822 Builder.CreateUnreachable();
4824 // We do need to preserve an insertion point.
4825 EmitBlock(createBasicBlock("builtin_eh_return.cont"));
4827 return RValue::get(nullptr);
4829 case Builtin::BI__builtin_unwind_init: {
4830 Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
4831 Builder.CreateCall(F);
4832 return RValue::get(nullptr);
4834 case Builtin::BI__builtin_extend_pointer: {
4835 // Extends a pointer to the size of an _Unwind_Word, which is
4836 // uint64_t on all platforms. Generally this gets poked into a
4837 // register and eventually used as an address, so if the
4838 // addressing registers are wider than pointers and the platform
4839 // doesn't implicitly ignore high-order bits when doing
4840 // addressing, we need to make sure we zext / sext based on
4841 // the platform's expectations.
4843 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
4845 // Cast the pointer to intptr_t.
4846 Value *Ptr = EmitScalarExpr(E->getArg(0));
4847 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
4849 // If that's 64 bits, we're done.
4850 if (IntPtrTy->getBitWidth() == 64)
4851 return RValue::get(Result);
4853 // Otherwise, ask the codegen data what to do.
4854 if (getTargetHooks().extendPointerWithSExt())
4855 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
4856 else
4857 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
4859 case Builtin::BI__builtin_setjmp: {
4860 // Buffer is a void**.
4861 Address Buf = EmitPointerWithAlignment(E->getArg(0));
4863 if (getTarget().getTriple().getArch() == llvm::Triple::systemz) {
4864 // On this target, the back end fills in the context buffer completely.
4865 // It doesn't really matter if the frontend stores to the buffer before
4866 // calling setjmp, the back-end is going to overwrite them anyway.
4867 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
4868 return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));
4871 // Store the frame pointer to the setjmp buffer.
4872 Value *FrameAddr = Builder.CreateCall(
4873 CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
4874 ConstantInt::get(Int32Ty, 0));
4875 Builder.CreateStore(FrameAddr, Buf);
4877 // Store the stack pointer to the setjmp buffer.
4878 Value *StackAddr = Builder.CreateStackSave();
4879 assert(Buf.emitRawPointer(*this)->getType() == StackAddr->getType());
4881 Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
4882 Builder.CreateStore(StackAddr, StackSaveSlot);
4884 // Call LLVM's EH setjmp, which is lightweight.
4885 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
4886 return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));
4888 case Builtin::BI__builtin_longjmp: {
4889 Value *Buf = EmitScalarExpr(E->getArg(0));
4891 // Call LLVM's EH longjmp, which is lightweight.
4892 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
4894 // longjmp doesn't return; mark this as unreachable.
4895 Builder.CreateUnreachable();
4897 // We do need to preserve an insertion point.
4898 EmitBlock(createBasicBlock("longjmp.cont"));
4900 return RValue::get(nullptr);
4902 case Builtin::BI__builtin_launder: {
4903 const Expr *Arg = E->getArg(0);
4904 QualType ArgTy = Arg->getType()->getPointeeType();
4905 Value *Ptr = EmitScalarExpr(Arg);
4906 if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
4907 Ptr = Builder.CreateLaunderInvariantGroup(Ptr);
4909 return RValue::get(Ptr);
4911 case Builtin::BI__sync_fetch_and_add:
4912 case Builtin::BI__sync_fetch_and_sub:
4913 case Builtin::BI__sync_fetch_and_or:
4914 case Builtin::BI__sync_fetch_and_and:
4915 case Builtin::BI__sync_fetch_and_xor:
4916 case Builtin::BI__sync_fetch_and_nand:
4917 case Builtin::BI__sync_add_and_fetch:
4918 case Builtin::BI__sync_sub_and_fetch:
4919 case Builtin::BI__sync_and_and_fetch:
4920 case Builtin::BI__sync_or_and_fetch:
4921 case Builtin::BI__sync_xor_and_fetch:
4922 case Builtin::BI__sync_nand_and_fetch:
4923 case Builtin::BI__sync_val_compare_and_swap:
4924 case Builtin::BI__sync_bool_compare_and_swap:
4925 case Builtin::BI__sync_lock_test_and_set:
4926 case Builtin::BI__sync_lock_release:
4927 case Builtin::BI__sync_swap:
4928 llvm_unreachable("Shouldn't make it through sema");
4929 case Builtin::BI__sync_fetch_and_add_1:
4930 case Builtin::BI__sync_fetch_and_add_2:
4931 case Builtin::BI__sync_fetch_and_add_4:
4932 case Builtin::BI__sync_fetch_and_add_8:
4933 case Builtin::BI__sync_fetch_and_add_16:
4934 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
4935 case Builtin::BI__sync_fetch_and_sub_1:
4936 case Builtin::BI__sync_fetch_and_sub_2:
4937 case Builtin::BI__sync_fetch_and_sub_4:
4938 case Builtin::BI__sync_fetch_and_sub_8:
4939 case Builtin::BI__sync_fetch_and_sub_16:
4940 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
4941 case Builtin::BI__sync_fetch_and_or_1:
4942 case Builtin::BI__sync_fetch_and_or_2:
4943 case Builtin::BI__sync_fetch_and_or_4:
4944 case Builtin::BI__sync_fetch_and_or_8:
4945 case Builtin::BI__sync_fetch_and_or_16:
4946 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
4947 case Builtin::BI__sync_fetch_and_and_1:
4948 case Builtin::BI__sync_fetch_and_and_2:
4949 case Builtin::BI__sync_fetch_and_and_4:
4950 case Builtin::BI__sync_fetch_and_and_8:
4951 case Builtin::BI__sync_fetch_and_and_16:
4952 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
4953 case Builtin::BI__sync_fetch_and_xor_1:
4954 case Builtin::BI__sync_fetch_and_xor_2:
4955 case Builtin::BI__sync_fetch_and_xor_4:
4956 case Builtin::BI__sync_fetch_and_xor_8:
4957 case Builtin::BI__sync_fetch_and_xor_16:
4958 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
4959 case Builtin::BI__sync_fetch_and_nand_1:
4960 case Builtin::BI__sync_fetch_and_nand_2:
4961 case Builtin::BI__sync_fetch_and_nand_4:
4962 case Builtin::BI__sync_fetch_and_nand_8:
4963 case Builtin::BI__sync_fetch_and_nand_16:
4964 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
4966 // Clang extensions: not overloaded yet.
4967 case Builtin::BI__sync_fetch_and_min:
4968 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
4969 case Builtin::BI__sync_fetch_and_max:
4970 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
4971 case Builtin::BI__sync_fetch_and_umin:
4972 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
4973 case Builtin::BI__sync_fetch_and_umax:
4974 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
4976 case Builtin::BI__sync_add_and_fetch_1:
4977 case Builtin::BI__sync_add_and_fetch_2:
4978 case Builtin::BI__sync_add_and_fetch_4:
4979 case Builtin::BI__sync_add_and_fetch_8:
4980 case Builtin::BI__sync_add_and_fetch_16:
4981 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
4982 llvm::Instruction::Add);
4983 case Builtin::BI__sync_sub_and_fetch_1:
4984 case Builtin::BI__sync_sub_and_fetch_2:
4985 case Builtin::BI__sync_sub_and_fetch_4:
4986 case Builtin::BI__sync_sub_and_fetch_8:
4987 case Builtin::BI__sync_sub_and_fetch_16:
4988 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
4989 llvm::Instruction::Sub);
4990 case Builtin::BI__sync_and_and_fetch_1:
4991 case Builtin::BI__sync_and_and_fetch_2:
4992 case Builtin::BI__sync_and_and_fetch_4:
4993 case Builtin::BI__sync_and_and_fetch_8:
4994 case Builtin::BI__sync_and_and_fetch_16:
4995 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
4996 llvm::Instruction::And);
4997 case Builtin::BI__sync_or_and_fetch_1:
4998 case Builtin::BI__sync_or_and_fetch_2:
4999 case Builtin::BI__sync_or_and_fetch_4:
5000 case Builtin::BI__sync_or_and_fetch_8:
5001 case Builtin::BI__sync_or_and_fetch_16:
5002 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
5003 llvm::Instruction::Or);
5004 case Builtin::BI__sync_xor_and_fetch_1:
5005 case Builtin::BI__sync_xor_and_fetch_2:
5006 case Builtin::BI__sync_xor_and_fetch_4:
5007 case Builtin::BI__sync_xor_and_fetch_8:
5008 case Builtin::BI__sync_xor_and_fetch_16:
5009 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
5010 llvm::Instruction::Xor);
5011 case Builtin::BI__sync_nand_and_fetch_1:
5012 case Builtin::BI__sync_nand_and_fetch_2:
5013 case Builtin::BI__sync_nand_and_fetch_4:
5014 case Builtin::BI__sync_nand_and_fetch_8:
5015 case Builtin::BI__sync_nand_and_fetch_16:
5016 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
5017 llvm::Instruction::And, true);
5019 case Builtin::BI__sync_val_compare_and_swap_1:
5020 case Builtin::BI__sync_val_compare_and_swap_2:
5021 case Builtin::BI__sync_val_compare_and_swap_4:
5022 case Builtin::BI__sync_val_compare_and_swap_8:
5023 case Builtin::BI__sync_val_compare_and_swap_16:
5024 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
5026 case Builtin::BI__sync_bool_compare_and_swap_1:
5027 case Builtin::BI__sync_bool_compare_and_swap_2:
5028 case Builtin::BI__sync_bool_compare_and_swap_4:
5029 case Builtin::BI__sync_bool_compare_and_swap_8:
5030 case Builtin::BI__sync_bool_compare_and_swap_16:
5031 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
5033 case Builtin::BI__sync_swap_1:
5034 case Builtin::BI__sync_swap_2:
5035 case Builtin::BI__sync_swap_4:
5036 case Builtin::BI__sync_swap_8:
5037 case Builtin::BI__sync_swap_16:
5038 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
5040 case Builtin::BI__sync_lock_test_and_set_1:
5041 case Builtin::BI__sync_lock_test_and_set_2:
5042 case Builtin::BI__sync_lock_test_and_set_4:
5043 case Builtin::BI__sync_lock_test_and_set_8:
5044 case Builtin::BI__sync_lock_test_and_set_16:
5045 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
5047 case Builtin::BI__sync_lock_release_1:
5048 case Builtin::BI__sync_lock_release_2:
5049 case Builtin::BI__sync_lock_release_4:
5050 case Builtin::BI__sync_lock_release_8:
5051 case Builtin::BI__sync_lock_release_16: {
5052 Address Ptr = CheckAtomicAlignment(*this, E);
5053 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
5055 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
5056 getContext().getTypeSize(ElTy));
5057 llvm::StoreInst *Store =
5058 Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
5059 Store->setAtomic(llvm::AtomicOrdering::Release);
5060 return RValue::get(nullptr);
5063 case Builtin::BI__sync_synchronize: {
5064 // We assume this is supposed to correspond to a C++0x-style
5065 // sequentially-consistent fence (i.e. this is only usable for
5066 // synchronization, not device I/O or anything like that). This intrinsic
5067 // is really badly designed in the sense that in theory, there isn't
5068 // any way to safely use it... but in practice, it mostly works
5069 // to use it with non-atomic loads and stores to get acquire/release
5070 // semantics.
5071 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
5072 return RValue::get(nullptr);
5075 case Builtin::BI__builtin_nontemporal_load:
5076 return RValue::get(EmitNontemporalLoad(*this, E));
5077 case Builtin::BI__builtin_nontemporal_store:
5078 return RValue::get(EmitNontemporalStore(*this, E));
5079 case Builtin::BI__c11_atomic_is_lock_free:
5080 case Builtin::BI__atomic_is_lock_free: {
5081 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
5082 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
5083 // _Atomic(T) is always properly-aligned.
5084 const char *LibCallName = "__atomic_is_lock_free";
5085 CallArgList Args;
5086 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
5087 getContext().getSizeType());
5088 if (BuiltinID == Builtin::BI__atomic_is_lock_free)
5089 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
5090 getContext().VoidPtrTy);
5091 else
5092 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
5093 getContext().VoidPtrTy);
5094 const CGFunctionInfo &FuncInfo =
5095 CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
5096 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
5097 llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
5098 return EmitCall(FuncInfo, CGCallee::forDirect(Func),
5099 ReturnValueSlot(), Args);
5102 case Builtin::BI__atomic_test_and_set: {
5103 // Look at the argument type to determine whether this is a volatile
5104 // operation. The parameter type is always volatile.
5105 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
5106 bool Volatile =
5107 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
5109 Address Ptr =
5110 EmitPointerWithAlignment(E->getArg(0)).withElementType(Int8Ty);
5112 Value *NewVal = Builder.getInt8(1);
5113 Value *Order = EmitScalarExpr(E->getArg(1));
5114 if (isa<llvm::ConstantInt>(Order)) {
5115 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
5116 AtomicRMWInst *Result = nullptr;
5117 switch (ord) {
5118 case 0: // memory_order_relaxed
5119 default: // invalid order
5120 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
5121 llvm::AtomicOrdering::Monotonic);
5122 break;
5123 case 1: // memory_order_consume
5124 case 2: // memory_order_acquire
5125 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
5126 llvm::AtomicOrdering::Acquire);
5127 break;
5128 case 3: // memory_order_release
5129 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
5130 llvm::AtomicOrdering::Release);
5131 break;
5132 case 4: // memory_order_acq_rel
5134 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
5135 llvm::AtomicOrdering::AcquireRelease);
5136 break;
5137 case 5: // memory_order_seq_cst
5138 Result = Builder.CreateAtomicRMW(
5139 llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
5140 llvm::AtomicOrdering::SequentiallyConsistent);
5141 break;
5143 Result->setVolatile(Volatile);
5144 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
5147 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
5149 llvm::BasicBlock *BBs[5] = {
5150 createBasicBlock("monotonic", CurFn),
5151 createBasicBlock("acquire", CurFn),
5152 createBasicBlock("release", CurFn),
5153 createBasicBlock("acqrel", CurFn),
5154 createBasicBlock("seqcst", CurFn)
5156 llvm::AtomicOrdering Orders[5] = {
5157 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
5158 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
5159 llvm::AtomicOrdering::SequentiallyConsistent};
5161 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5162 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
5164 Builder.SetInsertPoint(ContBB);
5165 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
5167 for (unsigned i = 0; i < 5; ++i) {
5168 Builder.SetInsertPoint(BBs[i]);
5169 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
5170 Ptr, NewVal, Orders[i]);
5171 RMW->setVolatile(Volatile);
5172 Result->addIncoming(RMW, BBs[i]);
5173 Builder.CreateBr(ContBB);
5176 SI->addCase(Builder.getInt32(0), BBs[0]);
5177 SI->addCase(Builder.getInt32(1), BBs[1]);
5178 SI->addCase(Builder.getInt32(2), BBs[1]);
5179 SI->addCase(Builder.getInt32(3), BBs[2]);
5180 SI->addCase(Builder.getInt32(4), BBs[3]);
5181 SI->addCase(Builder.getInt32(5), BBs[4]);
5183 Builder.SetInsertPoint(ContBB);
5184 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
5187 case Builtin::BI__atomic_clear: {
5188 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
5189 bool Volatile =
5190 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
5192 Address Ptr = EmitPointerWithAlignment(E->getArg(0));
5193 Ptr = Ptr.withElementType(Int8Ty);
5194 Value *NewVal = Builder.getInt8(0);
5195 Value *Order = EmitScalarExpr(E->getArg(1));
5196 if (isa<llvm::ConstantInt>(Order)) {
5197 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
5198 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
5199 switch (ord) {
5200 case 0: // memory_order_relaxed
5201 default: // invalid order
5202 Store->setOrdering(llvm::AtomicOrdering::Monotonic);
5203 break;
5204 case 3: // memory_order_release
5205 Store->setOrdering(llvm::AtomicOrdering::Release);
5206 break;
5207 case 5: // memory_order_seq_cst
5208 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
5209 break;
5211 return RValue::get(nullptr);
5214 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
5216 llvm::BasicBlock *BBs[3] = {
5217 createBasicBlock("monotonic", CurFn),
5218 createBasicBlock("release", CurFn),
5219 createBasicBlock("seqcst", CurFn)
5221 llvm::AtomicOrdering Orders[3] = {
5222 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
5223 llvm::AtomicOrdering::SequentiallyConsistent};
5225 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5226 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
5228 for (unsigned i = 0; i < 3; ++i) {
5229 Builder.SetInsertPoint(BBs[i]);
5230 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
5231 Store->setOrdering(Orders[i]);
5232 Builder.CreateBr(ContBB);
5235 SI->addCase(Builder.getInt32(0), BBs[0]);
5236 SI->addCase(Builder.getInt32(3), BBs[1]);
5237 SI->addCase(Builder.getInt32(5), BBs[2]);
5239 Builder.SetInsertPoint(ContBB);
5240 return RValue::get(nullptr);
5243 case Builtin::BI__atomic_thread_fence:
5244 case Builtin::BI__atomic_signal_fence:
5245 case Builtin::BI__c11_atomic_thread_fence:
5246 case Builtin::BI__c11_atomic_signal_fence: {
5247 llvm::SyncScope::ID SSID;
5248 if (BuiltinID == Builtin::BI__atomic_signal_fence ||
5249 BuiltinID == Builtin::BI__c11_atomic_signal_fence)
5250 SSID = llvm::SyncScope::SingleThread;
5251 else
5252 SSID = llvm::SyncScope::System;
5253 Value *Order = EmitScalarExpr(E->getArg(0));
5254 if (isa<llvm::ConstantInt>(Order)) {
5255 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
5256 switch (ord) {
5257 case 0: // memory_order_relaxed
5258 default: // invalid order
5259 break;
5260 case 1: // memory_order_consume
5261 case 2: // memory_order_acquire
5262 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
5263 break;
5264 case 3: // memory_order_release
5265 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
5266 break;
5267 case 4: // memory_order_acq_rel
5268 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
5269 break;
5270 case 5: // memory_order_seq_cst
5271 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
5272 break;
5274 return RValue::get(nullptr);
5277 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
5278 AcquireBB = createBasicBlock("acquire", CurFn);
5279 ReleaseBB = createBasicBlock("release", CurFn);
5280 AcqRelBB = createBasicBlock("acqrel", CurFn);
5281 SeqCstBB = createBasicBlock("seqcst", CurFn);
5282 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
5284 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5285 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
5287 Builder.SetInsertPoint(AcquireBB);
5288 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
5289 Builder.CreateBr(ContBB);
5290 SI->addCase(Builder.getInt32(1), AcquireBB);
5291 SI->addCase(Builder.getInt32(2), AcquireBB);
5293 Builder.SetInsertPoint(ReleaseBB);
5294 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
5295 Builder.CreateBr(ContBB);
5296 SI->addCase(Builder.getInt32(3), ReleaseBB);
5298 Builder.SetInsertPoint(AcqRelBB);
5299 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
5300 Builder.CreateBr(ContBB);
5301 SI->addCase(Builder.getInt32(4), AcqRelBB);
5303 Builder.SetInsertPoint(SeqCstBB);
5304 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
5305 Builder.CreateBr(ContBB);
5306 SI->addCase(Builder.getInt32(5), SeqCstBB);
5308 Builder.SetInsertPoint(ContBB);
5309 return RValue::get(nullptr);
5311 case Builtin::BI__scoped_atomic_thread_fence: {
5312 auto ScopeModel = AtomicScopeModel::create(AtomicScopeModelKind::Generic);
5314 Value *Order = EmitScalarExpr(E->getArg(0));
5315 Value *Scope = EmitScalarExpr(E->getArg(1));
5316 auto Ord = dyn_cast<llvm::ConstantInt>(Order);
5317 auto Scp = dyn_cast<llvm::ConstantInt>(Scope);
5318 if (Ord && Scp) {
5319 SyncScope SS = ScopeModel->isValid(Scp->getZExtValue())
5320 ? ScopeModel->map(Scp->getZExtValue())
5321 : ScopeModel->map(ScopeModel->getFallBackValue());
5322 switch (Ord->getZExtValue()) {
5323 case 0: // memory_order_relaxed
5324 default: // invalid order
5325 break;
5326 case 1: // memory_order_consume
5327 case 2: // memory_order_acquire
5328 Builder.CreateFence(
5329 llvm::AtomicOrdering::Acquire,
5330 getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
5331 llvm::AtomicOrdering::Acquire,
5332 getLLVMContext()));
5333 break;
5334 case 3: // memory_order_release
5335 Builder.CreateFence(
5336 llvm::AtomicOrdering::Release,
5337 getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
5338 llvm::AtomicOrdering::Release,
5339 getLLVMContext()));
5340 break;
5341 case 4: // memory_order_acq_rel
5342 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease,
5343 getTargetHooks().getLLVMSyncScopeID(
5344 getLangOpts(), SS,
5345 llvm::AtomicOrdering::AcquireRelease,
5346 getLLVMContext()));
5347 break;
5348 case 5: // memory_order_seq_cst
5349 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
5350 getTargetHooks().getLLVMSyncScopeID(
5351 getLangOpts(), SS,
5352 llvm::AtomicOrdering::SequentiallyConsistent,
5353 getLLVMContext()));
5354 break;
5356 return RValue::get(nullptr);
5359 llvm::BasicBlock *ContBB = createBasicBlock("atomic.scope.continue", CurFn);
5361 llvm::SmallVector<std::pair<llvm::BasicBlock *, llvm::AtomicOrdering>>
5362 OrderBBs;
5363 if (Ord) {
5364 switch (Ord->getZExtValue()) {
5365 case 0: // memory_order_relaxed
5366 default: // invalid order
5367 ContBB->eraseFromParent();
5368 return RValue::get(nullptr);
5369 case 1: // memory_order_consume
5370 case 2: // memory_order_acquire
5371 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5372 llvm::AtomicOrdering::Acquire);
5373 break;
5374 case 3: // memory_order_release
5375 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5376 llvm::AtomicOrdering::Release);
5377 break;
5378 case 4: // memory_order_acq_rel
5379 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5380 llvm::AtomicOrdering::AcquireRelease);
5381 break;
5382 case 5: // memory_order_seq_cst
5383 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5384 llvm::AtomicOrdering::SequentiallyConsistent);
5385 break;
5387 } else {
5388 llvm::BasicBlock *AcquireBB = createBasicBlock("acquire", CurFn);
5389 llvm::BasicBlock *ReleaseBB = createBasicBlock("release", CurFn);
5390 llvm::BasicBlock *AcqRelBB = createBasicBlock("acqrel", CurFn);
5391 llvm::BasicBlock *SeqCstBB = createBasicBlock("seqcst", CurFn);
5393 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5394 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
5395 SI->addCase(Builder.getInt32(1), AcquireBB);
5396 SI->addCase(Builder.getInt32(2), AcquireBB);
5397 SI->addCase(Builder.getInt32(3), ReleaseBB);
5398 SI->addCase(Builder.getInt32(4), AcqRelBB);
5399 SI->addCase(Builder.getInt32(5), SeqCstBB);
5401 OrderBBs.emplace_back(AcquireBB, llvm::AtomicOrdering::Acquire);
5402 OrderBBs.emplace_back(ReleaseBB, llvm::AtomicOrdering::Release);
5403 OrderBBs.emplace_back(AcqRelBB, llvm::AtomicOrdering::AcquireRelease);
5404 OrderBBs.emplace_back(SeqCstBB,
5405 llvm::AtomicOrdering::SequentiallyConsistent);
5408 for (auto &[OrderBB, Ordering] : OrderBBs) {
5409 Builder.SetInsertPoint(OrderBB);
5410 if (Scp) {
5411 SyncScope SS = ScopeModel->isValid(Scp->getZExtValue())
5412 ? ScopeModel->map(Scp->getZExtValue())
5413 : ScopeModel->map(ScopeModel->getFallBackValue());
5414 Builder.CreateFence(Ordering,
5415 getTargetHooks().getLLVMSyncScopeID(
5416 getLangOpts(), SS, Ordering, getLLVMContext()));
5417 Builder.CreateBr(ContBB);
5418 } else {
5419 llvm::DenseMap<unsigned, llvm::BasicBlock *> BBs;
5420 for (unsigned Scp : ScopeModel->getRuntimeValues())
5421 BBs[Scp] = createBasicBlock(getAsString(ScopeModel->map(Scp)), CurFn);
5423 auto *SC = Builder.CreateIntCast(Scope, Builder.getInt32Ty(), false);
5424 llvm::SwitchInst *SI = Builder.CreateSwitch(SC, ContBB);
5425 for (unsigned Scp : ScopeModel->getRuntimeValues()) {
5426 auto *B = BBs[Scp];
5427 SI->addCase(Builder.getInt32(Scp), B);
5429 Builder.SetInsertPoint(B);
5430 Builder.CreateFence(Ordering, getTargetHooks().getLLVMSyncScopeID(
5431 getLangOpts(), ScopeModel->map(Scp),
5432 Ordering, getLLVMContext()));
5433 Builder.CreateBr(ContBB);
5438 Builder.SetInsertPoint(ContBB);
5439 return RValue::get(nullptr);
5442 case Builtin::BI__builtin_signbit:
5443 case Builtin::BI__builtin_signbitf:
5444 case Builtin::BI__builtin_signbitl: {
5445 return RValue::get(
5446 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
5447 ConvertType(E->getType())));
5449 case Builtin::BI__warn_memset_zero_len:
5450 return RValue::getIgnored();
5451 case Builtin::BI__annotation: {
5452 // Re-encode each wide string to UTF8 and make an MDString.
5453 SmallVector<Metadata *, 1> Strings;
5454 for (const Expr *Arg : E->arguments()) {
5455 const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
5456 assert(Str->getCharByteWidth() == 2);
5457 StringRef WideBytes = Str->getBytes();
5458 std::string StrUtf8;
5459 if (!convertUTF16ToUTF8String(
5460 ArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
5461 CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
5462 continue;
5464 Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
5467 // Build and MDTuple of MDStrings and emit the intrinsic call.
5468 llvm::Function *F =
5469 CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
5470 MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
5471 Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
5472 return RValue::getIgnored();
5474 case Builtin::BI__builtin_annotation: {
5475 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
5476 llvm::Function *F =
5477 CGM.getIntrinsic(llvm::Intrinsic::annotation,
5478 {AnnVal->getType(), CGM.ConstGlobalsPtrTy});
5480 // Get the annotation string, go through casts. Sema requires this to be a
5481 // non-wide string literal, potentially casted, so the cast<> is safe.
5482 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
5483 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
5484 return RValue::get(
5485 EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr));
5487 case Builtin::BI__builtin_addcb:
5488 case Builtin::BI__builtin_addcs:
5489 case Builtin::BI__builtin_addc:
5490 case Builtin::BI__builtin_addcl:
5491 case Builtin::BI__builtin_addcll:
5492 case Builtin::BI__builtin_subcb:
5493 case Builtin::BI__builtin_subcs:
5494 case Builtin::BI__builtin_subc:
5495 case Builtin::BI__builtin_subcl:
5496 case Builtin::BI__builtin_subcll: {
5498 // We translate all of these builtins from expressions of the form:
5499 // int x = ..., y = ..., carryin = ..., carryout, result;
5500 // result = __builtin_addc(x, y, carryin, &carryout);
5502 // to LLVM IR of the form:
5504 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
5505 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
5506 // %carry1 = extractvalue {i32, i1} %tmp1, 1
5507 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
5508 // i32 %carryin)
5509 // %result = extractvalue {i32, i1} %tmp2, 0
5510 // %carry2 = extractvalue {i32, i1} %tmp2, 1
5511 // %tmp3 = or i1 %carry1, %carry2
5512 // %tmp4 = zext i1 %tmp3 to i32
5513 // store i32 %tmp4, i32* %carryout
5515 // Scalarize our inputs.
5516 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5517 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5518 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
5519 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
5521 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
5522 llvm::Intrinsic::ID IntrinsicId;
5523 switch (BuiltinID) {
5524 default: llvm_unreachable("Unknown multiprecision builtin id.");
5525 case Builtin::BI__builtin_addcb:
5526 case Builtin::BI__builtin_addcs:
5527 case Builtin::BI__builtin_addc:
5528 case Builtin::BI__builtin_addcl:
5529 case Builtin::BI__builtin_addcll:
5530 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5531 break;
5532 case Builtin::BI__builtin_subcb:
5533 case Builtin::BI__builtin_subcs:
5534 case Builtin::BI__builtin_subc:
5535 case Builtin::BI__builtin_subcl:
5536 case Builtin::BI__builtin_subcll:
5537 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5538 break;
5541 // Construct our resulting LLVM IR expression.
5542 llvm::Value *Carry1;
5543 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
5544 X, Y, Carry1);
5545 llvm::Value *Carry2;
5546 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
5547 Sum1, Carryin, Carry2);
5548 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
5549 X->getType());
5550 Builder.CreateStore(CarryOut, CarryOutPtr);
5551 return RValue::get(Sum2);
5554 case Builtin::BI__builtin_add_overflow:
5555 case Builtin::BI__builtin_sub_overflow:
5556 case Builtin::BI__builtin_mul_overflow: {
5557 const clang::Expr *LeftArg = E->getArg(0);
5558 const clang::Expr *RightArg = E->getArg(1);
5559 const clang::Expr *ResultArg = E->getArg(2);
5561 clang::QualType ResultQTy =
5562 ResultArg->getType()->castAs<PointerType>()->getPointeeType();
5564 WidthAndSignedness LeftInfo =
5565 getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
5566 WidthAndSignedness RightInfo =
5567 getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
5568 WidthAndSignedness ResultInfo =
5569 getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
5571 // Handle mixed-sign multiplication as a special case, because adding
5572 // runtime or backend support for our generic irgen would be too expensive.
5573 if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
5574 return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
5575 RightInfo, ResultArg, ResultQTy,
5576 ResultInfo);
5578 if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo,
5579 ResultInfo))
5580 return EmitCheckedUnsignedMultiplySignedResult(
5581 *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy,
5582 ResultInfo);
5584 WidthAndSignedness EncompassingInfo =
5585 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
5587 llvm::Type *EncompassingLLVMTy =
5588 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
5590 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
5592 llvm::Intrinsic::ID IntrinsicId;
5593 switch (BuiltinID) {
5594 default:
5595 llvm_unreachable("Unknown overflow builtin id.");
5596 case Builtin::BI__builtin_add_overflow:
5597 IntrinsicId = EncompassingInfo.Signed
5598 ? llvm::Intrinsic::sadd_with_overflow
5599 : llvm::Intrinsic::uadd_with_overflow;
5600 break;
5601 case Builtin::BI__builtin_sub_overflow:
5602 IntrinsicId = EncompassingInfo.Signed
5603 ? llvm::Intrinsic::ssub_with_overflow
5604 : llvm::Intrinsic::usub_with_overflow;
5605 break;
5606 case Builtin::BI__builtin_mul_overflow:
5607 IntrinsicId = EncompassingInfo.Signed
5608 ? llvm::Intrinsic::smul_with_overflow
5609 : llvm::Intrinsic::umul_with_overflow;
5610 break;
5613 llvm::Value *Left = EmitScalarExpr(LeftArg);
5614 llvm::Value *Right = EmitScalarExpr(RightArg);
5615 Address ResultPtr = EmitPointerWithAlignment(ResultArg);
5617 // Extend each operand to the encompassing type.
5618 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
5619 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
5621 // Perform the operation on the extended values.
5622 llvm::Value *Overflow, *Result;
5623 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
5625 if (EncompassingInfo.Width > ResultInfo.Width) {
5626 // The encompassing type is wider than the result type, so we need to
5627 // truncate it.
5628 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
5630 // To see if the truncation caused an overflow, we will extend
5631 // the result and then compare it to the original result.
5632 llvm::Value *ResultTruncExt = Builder.CreateIntCast(
5633 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
5634 llvm::Value *TruncationOverflow =
5635 Builder.CreateICmpNE(Result, ResultTruncExt);
5637 Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
5638 Result = ResultTrunc;
5641 // Finally, store the result using the pointer.
5642 bool isVolatile =
5643 ResultArg->getType()->getPointeeType().isVolatileQualified();
5644 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
5646 return RValue::get(Overflow);
5649 case Builtin::BI__builtin_uadd_overflow:
5650 case Builtin::BI__builtin_uaddl_overflow:
5651 case Builtin::BI__builtin_uaddll_overflow:
5652 case Builtin::BI__builtin_usub_overflow:
5653 case Builtin::BI__builtin_usubl_overflow:
5654 case Builtin::BI__builtin_usubll_overflow:
5655 case Builtin::BI__builtin_umul_overflow:
5656 case Builtin::BI__builtin_umull_overflow:
5657 case Builtin::BI__builtin_umulll_overflow:
5658 case Builtin::BI__builtin_sadd_overflow:
5659 case Builtin::BI__builtin_saddl_overflow:
5660 case Builtin::BI__builtin_saddll_overflow:
5661 case Builtin::BI__builtin_ssub_overflow:
5662 case Builtin::BI__builtin_ssubl_overflow:
5663 case Builtin::BI__builtin_ssubll_overflow:
5664 case Builtin::BI__builtin_smul_overflow:
5665 case Builtin::BI__builtin_smull_overflow:
5666 case Builtin::BI__builtin_smulll_overflow: {
5668 // We translate all of these builtins directly to the relevant llvm IR node.
5670 // Scalarize our inputs.
5671 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5672 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5673 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
5675 // Decide which of the overflow intrinsics we are lowering to:
5676 llvm::Intrinsic::ID IntrinsicId;
5677 switch (BuiltinID) {
5678 default: llvm_unreachable("Unknown overflow builtin id.");
5679 case Builtin::BI__builtin_uadd_overflow:
5680 case Builtin::BI__builtin_uaddl_overflow:
5681 case Builtin::BI__builtin_uaddll_overflow:
5682 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5683 break;
5684 case Builtin::BI__builtin_usub_overflow:
5685 case Builtin::BI__builtin_usubl_overflow:
5686 case Builtin::BI__builtin_usubll_overflow:
5687 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5688 break;
5689 case Builtin::BI__builtin_umul_overflow:
5690 case Builtin::BI__builtin_umull_overflow:
5691 case Builtin::BI__builtin_umulll_overflow:
5692 IntrinsicId = llvm::Intrinsic::umul_with_overflow;
5693 break;
5694 case Builtin::BI__builtin_sadd_overflow:
5695 case Builtin::BI__builtin_saddl_overflow:
5696 case Builtin::BI__builtin_saddll_overflow:
5697 IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
5698 break;
5699 case Builtin::BI__builtin_ssub_overflow:
5700 case Builtin::BI__builtin_ssubl_overflow:
5701 case Builtin::BI__builtin_ssubll_overflow:
5702 IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
5703 break;
5704 case Builtin::BI__builtin_smul_overflow:
5705 case Builtin::BI__builtin_smull_overflow:
5706 case Builtin::BI__builtin_smulll_overflow:
5707 IntrinsicId = llvm::Intrinsic::smul_with_overflow;
5708 break;
5712 llvm::Value *Carry;
5713 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
5714 Builder.CreateStore(Sum, SumOutPtr);
5716 return RValue::get(Carry);
5718 case Builtin::BIaddressof:
5719 case Builtin::BI__addressof:
5720 case Builtin::BI__builtin_addressof:
5721 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5722 case Builtin::BI__builtin_function_start:
5723 return RValue::get(CGM.GetFunctionStart(
5724 E->getArg(0)->getAsBuiltinConstantDeclRef(CGM.getContext())));
5725 case Builtin::BI__builtin_operator_new:
5726 return EmitBuiltinNewDeleteCall(
5727 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
5728 case Builtin::BI__builtin_operator_delete:
5729 EmitBuiltinNewDeleteCall(
5730 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
5731 return RValue::get(nullptr);
5733 case Builtin::BI__builtin_is_aligned:
5734 return EmitBuiltinIsAligned(E);
5735 case Builtin::BI__builtin_align_up:
5736 return EmitBuiltinAlignTo(E, true);
5737 case Builtin::BI__builtin_align_down:
5738 return EmitBuiltinAlignTo(E, false);
5740 case Builtin::BI__noop:
5741 // __noop always evaluates to an integer literal zero.
5742 return RValue::get(ConstantInt::get(IntTy, 0));
5743 case Builtin::BI__builtin_call_with_static_chain: {
5744 const CallExpr *Call = cast<CallExpr>(E->getArg(0));
5745 const Expr *Chain = E->getArg(1);
5746 return EmitCall(Call->getCallee()->getType(),
5747 EmitCallee(Call->getCallee()), Call, ReturnValue,
5748 EmitScalarExpr(Chain));
5750 case Builtin::BI_InterlockedExchange8:
5751 case Builtin::BI_InterlockedExchange16:
5752 case Builtin::BI_InterlockedExchange:
5753 case Builtin::BI_InterlockedExchangePointer:
5754 return RValue::get(
5755 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
5756 case Builtin::BI_InterlockedCompareExchangePointer:
5757 return RValue::get(
5758 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange, E));
5759 case Builtin::BI_InterlockedCompareExchangePointer_nf:
5760 return RValue::get(
5761 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E));
5762 case Builtin::BI_InterlockedCompareExchange8:
5763 case Builtin::BI_InterlockedCompareExchange16:
5764 case Builtin::BI_InterlockedCompareExchange:
5765 case Builtin::BI_InterlockedCompareExchange64:
5766 return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E));
5767 case Builtin::BI_InterlockedIncrement16:
5768 case Builtin::BI_InterlockedIncrement:
5769 return RValue::get(
5770 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
5771 case Builtin::BI_InterlockedDecrement16:
5772 case Builtin::BI_InterlockedDecrement:
5773 return RValue::get(
5774 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
5775 case Builtin::BI_InterlockedAnd8:
5776 case Builtin::BI_InterlockedAnd16:
5777 case Builtin::BI_InterlockedAnd:
5778 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
5779 case Builtin::BI_InterlockedExchangeAdd8:
5780 case Builtin::BI_InterlockedExchangeAdd16:
5781 case Builtin::BI_InterlockedExchangeAdd:
5782 return RValue::get(
5783 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
5784 case Builtin::BI_InterlockedExchangeSub8:
5785 case Builtin::BI_InterlockedExchangeSub16:
5786 case Builtin::BI_InterlockedExchangeSub:
5787 return RValue::get(
5788 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
5789 case Builtin::BI_InterlockedOr8:
5790 case Builtin::BI_InterlockedOr16:
5791 case Builtin::BI_InterlockedOr:
5792 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
5793 case Builtin::BI_InterlockedXor8:
5794 case Builtin::BI_InterlockedXor16:
5795 case Builtin::BI_InterlockedXor:
5796 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
5798 case Builtin::BI_bittest64:
5799 case Builtin::BI_bittest:
5800 case Builtin::BI_bittestandcomplement64:
5801 case Builtin::BI_bittestandcomplement:
5802 case Builtin::BI_bittestandreset64:
5803 case Builtin::BI_bittestandreset:
5804 case Builtin::BI_bittestandset64:
5805 case Builtin::BI_bittestandset:
5806 case Builtin::BI_interlockedbittestandreset:
5807 case Builtin::BI_interlockedbittestandreset64:
5808 case Builtin::BI_interlockedbittestandset64:
5809 case Builtin::BI_interlockedbittestandset:
5810 case Builtin::BI_interlockedbittestandset_acq:
5811 case Builtin::BI_interlockedbittestandset_rel:
5812 case Builtin::BI_interlockedbittestandset_nf:
5813 case Builtin::BI_interlockedbittestandreset_acq:
5814 case Builtin::BI_interlockedbittestandreset_rel:
5815 case Builtin::BI_interlockedbittestandreset_nf:
5816 return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
5818 // These builtins exist to emit regular volatile loads and stores not
5819 // affected by the -fms-volatile setting.
5820 case Builtin::BI__iso_volatile_load8:
5821 case Builtin::BI__iso_volatile_load16:
5822 case Builtin::BI__iso_volatile_load32:
5823 case Builtin::BI__iso_volatile_load64:
5824 return RValue::get(EmitISOVolatileLoad(*this, E));
5825 case Builtin::BI__iso_volatile_store8:
5826 case Builtin::BI__iso_volatile_store16:
5827 case Builtin::BI__iso_volatile_store32:
5828 case Builtin::BI__iso_volatile_store64:
5829 return RValue::get(EmitISOVolatileStore(*this, E));
5831 case Builtin::BI__builtin_ptrauth_sign_constant:
5832 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
5834 case Builtin::BI__builtin_ptrauth_auth:
5835 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5836 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5837 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5838 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5839 case Builtin::BI__builtin_ptrauth_strip: {
5840 // Emit the arguments.
5841 SmallVector<llvm::Value *, 5> Args;
5842 for (auto argExpr : E->arguments())
5843 Args.push_back(EmitScalarExpr(argExpr));
5845 // Cast the value to intptr_t, saving its original type.
5846 llvm::Type *OrigValueType = Args[0]->getType();
5847 if (OrigValueType->isPointerTy())
5848 Args[0] = Builder.CreatePtrToInt(Args[0], IntPtrTy);
5850 switch (BuiltinID) {
5851 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5852 if (Args[4]->getType()->isPointerTy())
5853 Args[4] = Builder.CreatePtrToInt(Args[4], IntPtrTy);
5854 [[fallthrough]];
5856 case Builtin::BI__builtin_ptrauth_auth:
5857 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5858 if (Args[2]->getType()->isPointerTy())
5859 Args[2] = Builder.CreatePtrToInt(Args[2], IntPtrTy);
5860 break;
5862 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5863 if (Args[1]->getType()->isPointerTy())
5864 Args[1] = Builder.CreatePtrToInt(Args[1], IntPtrTy);
5865 break;
5867 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5868 case Builtin::BI__builtin_ptrauth_strip:
5869 break;
5872 // Call the intrinsic.
5873 auto IntrinsicID = [&]() -> unsigned {
5874 switch (BuiltinID) {
5875 case Builtin::BI__builtin_ptrauth_auth:
5876 return llvm::Intrinsic::ptrauth_auth;
5877 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5878 return llvm::Intrinsic::ptrauth_resign;
5879 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5880 return llvm::Intrinsic::ptrauth_blend;
5881 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5882 return llvm::Intrinsic::ptrauth_sign_generic;
5883 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5884 return llvm::Intrinsic::ptrauth_sign;
5885 case Builtin::BI__builtin_ptrauth_strip:
5886 return llvm::Intrinsic::ptrauth_strip;
5888 llvm_unreachable("bad ptrauth intrinsic");
5889 }();
5890 auto Intrinsic = CGM.getIntrinsic(IntrinsicID);
5891 llvm::Value *Result = EmitRuntimeCall(Intrinsic, Args);
5893 if (BuiltinID != Builtin::BI__builtin_ptrauth_sign_generic_data &&
5894 BuiltinID != Builtin::BI__builtin_ptrauth_blend_discriminator &&
5895 OrigValueType->isPointerTy()) {
5896 Result = Builder.CreateIntToPtr(Result, OrigValueType);
5898 return RValue::get(Result);
5901 case Builtin::BI__exception_code:
5902 case Builtin::BI_exception_code:
5903 return RValue::get(EmitSEHExceptionCode());
5904 case Builtin::BI__exception_info:
5905 case Builtin::BI_exception_info:
5906 return RValue::get(EmitSEHExceptionInfo());
5907 case Builtin::BI__abnormal_termination:
5908 case Builtin::BI_abnormal_termination:
5909 return RValue::get(EmitSEHAbnormalTermination());
5910 case Builtin::BI_setjmpex:
5911 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5912 E->getArg(0)->getType()->isPointerType())
5913 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5914 break;
5915 case Builtin::BI_setjmp:
5916 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5917 E->getArg(0)->getType()->isPointerType()) {
5918 if (getTarget().getTriple().getArch() == llvm::Triple::x86)
5919 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
5920 else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
5921 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5922 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
5924 break;
5926 // C++ std:: builtins.
5927 case Builtin::BImove:
5928 case Builtin::BImove_if_noexcept:
5929 case Builtin::BIforward:
5930 case Builtin::BIforward_like:
5931 case Builtin::BIas_const:
5932 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5933 case Builtin::BI__GetExceptionInfo: {
5934 if (llvm::GlobalVariable *GV =
5935 CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
5936 return RValue::get(GV);
5937 break;
5940 case Builtin::BI__fastfail:
5941 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
5943 case Builtin::BI__builtin_coro_id:
5944 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
5945 case Builtin::BI__builtin_coro_promise:
5946 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
5947 case Builtin::BI__builtin_coro_resume:
5948 EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
5949 return RValue::get(nullptr);
5950 case Builtin::BI__builtin_coro_frame:
5951 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
5952 case Builtin::BI__builtin_coro_noop:
5953 return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
5954 case Builtin::BI__builtin_coro_free:
5955 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
5956 case Builtin::BI__builtin_coro_destroy:
5957 EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
5958 return RValue::get(nullptr);
5959 case Builtin::BI__builtin_coro_done:
5960 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
5961 case Builtin::BI__builtin_coro_alloc:
5962 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
5963 case Builtin::BI__builtin_coro_begin:
5964 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
5965 case Builtin::BI__builtin_coro_end:
5966 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
5967 case Builtin::BI__builtin_coro_suspend:
5968 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
5969 case Builtin::BI__builtin_coro_size:
5970 return EmitCoroutineIntrinsic(E, Intrinsic::coro_size);
5971 case Builtin::BI__builtin_coro_align:
5972 return EmitCoroutineIntrinsic(E, Intrinsic::coro_align);
5974 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
5975 case Builtin::BIread_pipe:
5976 case Builtin::BIwrite_pipe: {
5977 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5978 *Arg1 = EmitScalarExpr(E->getArg(1));
5979 CGOpenCLRuntime OpenCLRT(CGM);
5980 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5981 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5983 // Type of the generic packet parameter.
5984 unsigned GenericAS =
5985 getContext().getTargetAddressSpace(LangAS::opencl_generic);
5986 llvm::Type *I8PTy = llvm::PointerType::get(getLLVMContext(), GenericAS);
5988 // Testing which overloaded version we should generate the call for.
5989 if (2U == E->getNumArgs()) {
5990 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
5991 : "__write_pipe_2";
5992 // Creating a generic function type to be able to call with any builtin or
5993 // user defined type.
5994 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
5995 llvm::FunctionType *FTy = llvm::FunctionType::get(
5996 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5997 Value *ACast = Builder.CreateAddrSpaceCast(Arg1, I8PTy);
5998 return RValue::get(
5999 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
6000 {Arg0, ACast, PacketSize, PacketAlign}));
6001 } else {
6002 assert(4 == E->getNumArgs() &&
6003 "Illegal number of parameters to pipe function");
6004 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
6005 : "__write_pipe_4";
6007 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
6008 Int32Ty, Int32Ty};
6009 Value *Arg2 = EmitScalarExpr(E->getArg(2)),
6010 *Arg3 = EmitScalarExpr(E->getArg(3));
6011 llvm::FunctionType *FTy = llvm::FunctionType::get(
6012 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6013 Value *ACast = Builder.CreateAddrSpaceCast(Arg3, I8PTy);
6014 // We know the third argument is an integer type, but we may need to cast
6015 // it to i32.
6016 if (Arg2->getType() != Int32Ty)
6017 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
6018 return RValue::get(
6019 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
6020 {Arg0, Arg1, Arg2, ACast, PacketSize, PacketAlign}));
6023 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
6024 // functions
6025 case Builtin::BIreserve_read_pipe:
6026 case Builtin::BIreserve_write_pipe:
6027 case Builtin::BIwork_group_reserve_read_pipe:
6028 case Builtin::BIwork_group_reserve_write_pipe:
6029 case Builtin::BIsub_group_reserve_read_pipe:
6030 case Builtin::BIsub_group_reserve_write_pipe: {
6031 // Composing the mangled name for the function.
6032 const char *Name;
6033 if (BuiltinID == Builtin::BIreserve_read_pipe)
6034 Name = "__reserve_read_pipe";
6035 else if (BuiltinID == Builtin::BIreserve_write_pipe)
6036 Name = "__reserve_write_pipe";
6037 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
6038 Name = "__work_group_reserve_read_pipe";
6039 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
6040 Name = "__work_group_reserve_write_pipe";
6041 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
6042 Name = "__sub_group_reserve_read_pipe";
6043 else
6044 Name = "__sub_group_reserve_write_pipe";
6046 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
6047 *Arg1 = EmitScalarExpr(E->getArg(1));
6048 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
6049 CGOpenCLRuntime OpenCLRT(CGM);
6050 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
6051 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
6053 // Building the generic function prototype.
6054 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
6055 llvm::FunctionType *FTy = llvm::FunctionType::get(
6056 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6057 // We know the second argument is an integer type, but we may need to cast
6058 // it to i32.
6059 if (Arg1->getType() != Int32Ty)
6060 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
6061 return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
6062 {Arg0, Arg1, PacketSize, PacketAlign}));
6064 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
6065 // functions
6066 case Builtin::BIcommit_read_pipe:
6067 case Builtin::BIcommit_write_pipe:
6068 case Builtin::BIwork_group_commit_read_pipe:
6069 case Builtin::BIwork_group_commit_write_pipe:
6070 case Builtin::BIsub_group_commit_read_pipe:
6071 case Builtin::BIsub_group_commit_write_pipe: {
6072 const char *Name;
6073 if (BuiltinID == Builtin::BIcommit_read_pipe)
6074 Name = "__commit_read_pipe";
6075 else if (BuiltinID == Builtin::BIcommit_write_pipe)
6076 Name = "__commit_write_pipe";
6077 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
6078 Name = "__work_group_commit_read_pipe";
6079 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
6080 Name = "__work_group_commit_write_pipe";
6081 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
6082 Name = "__sub_group_commit_read_pipe";
6083 else
6084 Name = "__sub_group_commit_write_pipe";
6086 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
6087 *Arg1 = EmitScalarExpr(E->getArg(1));
6088 CGOpenCLRuntime OpenCLRT(CGM);
6089 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
6090 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
6092 // Building the generic function prototype.
6093 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
6094 llvm::FunctionType *FTy =
6095 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
6096 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6098 return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
6099 {Arg0, Arg1, PacketSize, PacketAlign}));
6101 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
6102 case Builtin::BIget_pipe_num_packets:
6103 case Builtin::BIget_pipe_max_packets: {
6104 const char *BaseName;
6105 const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>();
6106 if (BuiltinID == Builtin::BIget_pipe_num_packets)
6107 BaseName = "__get_pipe_num_packets";
6108 else
6109 BaseName = "__get_pipe_max_packets";
6110 std::string Name = std::string(BaseName) +
6111 std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
6113 // Building the generic function prototype.
6114 Value *Arg0 = EmitScalarExpr(E->getArg(0));
6115 CGOpenCLRuntime OpenCLRT(CGM);
6116 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
6117 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
6118 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
6119 llvm::FunctionType *FTy = llvm::FunctionType::get(
6120 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6122 return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
6123 {Arg0, PacketSize, PacketAlign}));
6126 // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
6127 case Builtin::BIto_global:
6128 case Builtin::BIto_local:
6129 case Builtin::BIto_private: {
6130 auto Arg0 = EmitScalarExpr(E->getArg(0));
6131 auto NewArgT = llvm::PointerType::get(
6132 getLLVMContext(),
6133 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
6134 auto NewRetT = llvm::PointerType::get(
6135 getLLVMContext(),
6136 CGM.getContext().getTargetAddressSpace(
6137 E->getType()->getPointeeType().getAddressSpace()));
6138 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
6139 llvm::Value *NewArg;
6140 if (Arg0->getType()->getPointerAddressSpace() !=
6141 NewArgT->getPointerAddressSpace())
6142 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
6143 else
6144 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
6145 auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
6146 auto NewCall =
6147 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
6148 return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
6149 ConvertType(E->getType())));
6152 // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
6153 // Table 6.13.17.1 specifies four overload forms of enqueue_kernel.
6154 // The code below expands the builtin call to a call to one of the following
6155 // functions that an OpenCL runtime library will have to provide:
6156 // __enqueue_kernel_basic
6157 // __enqueue_kernel_varargs
6158 // __enqueue_kernel_basic_events
6159 // __enqueue_kernel_events_varargs
6160 case Builtin::BIenqueue_kernel: {
6161 StringRef Name; // Generated function call name
6162 unsigned NumArgs = E->getNumArgs();
6164 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
6165 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6166 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6168 llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
6169 llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
6170 LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
6171 llvm::Value *Range = NDRangeL.getAddress().emitRawPointer(*this);
6172 llvm::Type *RangeTy = NDRangeL.getAddress().getType();
6174 if (NumArgs == 4) {
6175 // The most basic form of the call with parameters:
6176 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
6177 Name = "__enqueue_kernel_basic";
6178 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
6179 GenericVoidPtrTy};
6180 llvm::FunctionType *FTy = llvm::FunctionType::get(
6181 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6183 auto Info =
6184 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
6185 llvm::Value *Kernel =
6186 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6187 llvm::Value *Block =
6188 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6190 auto RTCall = EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
6191 {Queue, Flags, Range, Kernel, Block});
6192 return RValue::get(RTCall);
6194 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
6196 // Create a temporary array to hold the sizes of local pointer arguments
6197 // for the block. \p First is the position of the first size argument.
6198 auto CreateArrayForSizeVar = [=](unsigned First)
6199 -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
6200 llvm::APInt ArraySize(32, NumArgs - First);
6201 QualType SizeArrayTy = getContext().getConstantArrayType(
6202 getContext().getSizeType(), ArraySize, nullptr,
6203 ArraySizeModifier::Normal,
6204 /*IndexTypeQuals=*/0);
6205 auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
6206 llvm::Value *TmpPtr = Tmp.getPointer();
6207 // The EmitLifetime* pair expect a naked Alloca as their last argument,
6208 // however for cases where the default AS is not the Alloca AS, Tmp is
6209 // actually the Alloca ascasted to the default AS, hence the
6210 // stripPointerCasts()
6211 llvm::Value *Alloca = TmpPtr->stripPointerCasts();
6212 llvm::Value *TmpSize = EmitLifetimeStart(
6213 CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), Alloca);
6214 llvm::Value *ElemPtr;
6215 // Each of the following arguments specifies the size of the corresponding
6216 // argument passed to the enqueued block.
6217 auto *Zero = llvm::ConstantInt::get(IntTy, 0);
6218 for (unsigned I = First; I < NumArgs; ++I) {
6219 auto *Index = llvm::ConstantInt::get(IntTy, I - First);
6220 auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr,
6221 {Zero, Index});
6222 if (I == First)
6223 ElemPtr = GEP;
6224 auto *V =
6225 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
6226 Builder.CreateAlignedStore(
6227 V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
6229 // Return the Alloca itself rather than a potential ascast as this is only
6230 // used by the paired EmitLifetimeEnd.
6231 return std::tie(ElemPtr, TmpSize, Alloca);
6234 // Could have events and/or varargs.
6235 if (E->getArg(3)->getType()->isBlockPointerType()) {
6236 // No events passed, but has variadic arguments.
6237 Name = "__enqueue_kernel_varargs";
6238 auto Info =
6239 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
6240 llvm::Value *Kernel =
6241 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6242 auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6243 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
6244 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
6246 // Create a vector of the arguments, as well as a constant value to
6247 // express to the runtime the number of variadic arguments.
6248 llvm::Value *const Args[] = {Queue, Flags,
6249 Range, Kernel,
6250 Block, ConstantInt::get(IntTy, NumArgs - 4),
6251 ElemPtr};
6252 llvm::Type *const ArgTys[] = {
6253 QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
6254 GenericVoidPtrTy, IntTy, ElemPtr->getType()};
6256 llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);
6257 auto Call = RValue::get(
6258 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
6259 if (TmpSize)
6260 EmitLifetimeEnd(TmpSize, TmpPtr);
6261 return Call;
6263 // Any calls now have event arguments passed.
6264 if (NumArgs >= 7) {
6265 llvm::PointerType *PtrTy = llvm::PointerType::get(
6266 CGM.getLLVMContext(),
6267 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
6269 llvm::Value *NumEvents =
6270 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
6272 // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
6273 // to be a null pointer constant (including `0` literal), we can take it
6274 // into account and emit null pointer directly.
6275 llvm::Value *EventWaitList = nullptr;
6276 if (E->getArg(4)->isNullPointerConstant(
6277 getContext(), Expr::NPC_ValueDependentIsNotNull)) {
6278 EventWaitList = llvm::ConstantPointerNull::get(PtrTy);
6279 } else {
6280 EventWaitList =
6281 E->getArg(4)->getType()->isArrayType()
6282 ? EmitArrayToPointerDecay(E->getArg(4)).emitRawPointer(*this)
6283 : EmitScalarExpr(E->getArg(4));
6284 // Convert to generic address space.
6285 EventWaitList = Builder.CreatePointerCast(EventWaitList, PtrTy);
6287 llvm::Value *EventRet = nullptr;
6288 if (E->getArg(5)->isNullPointerConstant(
6289 getContext(), Expr::NPC_ValueDependentIsNotNull)) {
6290 EventRet = llvm::ConstantPointerNull::get(PtrTy);
6291 } else {
6292 EventRet =
6293 Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), PtrTy);
6296 auto Info =
6297 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
6298 llvm::Value *Kernel =
6299 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6300 llvm::Value *Block =
6301 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6303 std::vector<llvm::Type *> ArgTys = {
6304 QueueTy, Int32Ty, RangeTy, Int32Ty,
6305 PtrTy, PtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
6307 std::vector<llvm::Value *> Args = {Queue, Flags, Range,
6308 NumEvents, EventWaitList, EventRet,
6309 Kernel, Block};
6311 if (NumArgs == 7) {
6312 // Has events but no variadics.
6313 Name = "__enqueue_kernel_basic_events";
6314 llvm::FunctionType *FTy = llvm::FunctionType::get(
6315 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6316 return RValue::get(
6317 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
6318 llvm::ArrayRef<llvm::Value *>(Args)));
6320 // Has event info and variadics
6321 // Pass the number of variadics to the runtime function too.
6322 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
6323 ArgTys.push_back(Int32Ty);
6324 Name = "__enqueue_kernel_events_varargs";
6326 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
6327 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
6328 Args.push_back(ElemPtr);
6329 ArgTys.push_back(ElemPtr->getType());
6331 llvm::FunctionType *FTy = llvm::FunctionType::get(
6332 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6333 auto Call =
6334 RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
6335 llvm::ArrayRef<llvm::Value *>(Args)));
6336 if (TmpSize)
6337 EmitLifetimeEnd(TmpSize, TmpPtr);
6338 return Call;
6340 llvm_unreachable("Unexpected enqueue_kernel signature");
6342 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
6343 // parameter.
6344 case Builtin::BIget_kernel_work_group_size: {
6345 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6346 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6347 auto Info =
6348 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
6349 Value *Kernel =
6350 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6351 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6352 return RValue::get(EmitRuntimeCall(
6353 CGM.CreateRuntimeFunction(
6354 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
6355 false),
6356 "__get_kernel_work_group_size_impl"),
6357 {Kernel, Arg}));
6359 case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
6360 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6361 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6362 auto Info =
6363 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
6364 Value *Kernel =
6365 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6366 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6367 return RValue::get(EmitRuntimeCall(
6368 CGM.CreateRuntimeFunction(
6369 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
6370 false),
6371 "__get_kernel_preferred_work_group_size_multiple_impl"),
6372 {Kernel, Arg}));
6374 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
6375 case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
6376 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6377 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6378 LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
6379 llvm::Value *NDRange = NDRangeL.getAddress().emitRawPointer(*this);
6380 auto Info =
6381 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
6382 Value *Kernel =
6383 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6384 Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6385 const char *Name =
6386 BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
6387 ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
6388 : "__get_kernel_sub_group_count_for_ndrange_impl";
6389 return RValue::get(EmitRuntimeCall(
6390 CGM.CreateRuntimeFunction(
6391 llvm::FunctionType::get(
6392 IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
6393 false),
6394 Name),
6395 {NDRange, Kernel, Block}));
6397 case Builtin::BI__builtin_store_half:
6398 case Builtin::BI__builtin_store_halff: {
6399 Value *Val = EmitScalarExpr(E->getArg(0));
6400 Address Address = EmitPointerWithAlignment(E->getArg(1));
6401 Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
6402 Builder.CreateStore(HalfVal, Address);
6403 return RValue::get(nullptr);
6405 case Builtin::BI__builtin_load_half: {
6406 Address Address = EmitPointerWithAlignment(E->getArg(0));
6407 Value *HalfVal = Builder.CreateLoad(Address);
6408 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
6410 case Builtin::BI__builtin_load_halff: {
6411 Address Address = EmitPointerWithAlignment(E->getArg(0));
6412 Value *HalfVal = Builder.CreateLoad(Address);
6413 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
6415 case Builtin::BI__builtin_printf:
6416 case Builtin::BIprintf:
6417 if (getTarget().getTriple().isNVPTX() ||
6418 getTarget().getTriple().isAMDGCN() ||
6419 (getTarget().getTriple().isSPIRV() &&
6420 getTarget().getTriple().getVendor() == Triple::VendorType::AMD)) {
6421 if (getTarget().getTriple().isNVPTX())
6422 return EmitNVPTXDevicePrintfCallExpr(E);
6423 if ((getTarget().getTriple().isAMDGCN() ||
6424 getTarget().getTriple().isSPIRV()) &&
6425 getLangOpts().HIP)
6426 return EmitAMDGPUDevicePrintfCallExpr(E);
6429 break;
6430 case Builtin::BI__builtin_canonicalize:
6431 case Builtin::BI__builtin_canonicalizef:
6432 case Builtin::BI__builtin_canonicalizef16:
6433 case Builtin::BI__builtin_canonicalizel:
6434 return RValue::get(
6435 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::canonicalize));
6437 case Builtin::BI__builtin_thread_pointer: {
6438 if (!getContext().getTargetInfo().isTLSSupported())
6439 CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
6440 // Fall through - it's already mapped to the intrinsic by ClangBuiltin.
6441 break;
6443 case Builtin::BI__builtin_os_log_format:
6444 return emitBuiltinOSLogFormat(*E);
6446 case Builtin::BI__xray_customevent: {
6447 if (!ShouldXRayInstrumentFunction())
6448 return RValue::getIgnored();
6450 if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
6451 XRayInstrKind::Custom))
6452 return RValue::getIgnored();
6454 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
6455 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
6456 return RValue::getIgnored();
6458 Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
6459 auto FTy = F->getFunctionType();
6460 auto Arg0 = E->getArg(0);
6461 auto Arg0Val = EmitScalarExpr(Arg0);
6462 auto Arg0Ty = Arg0->getType();
6463 auto PTy0 = FTy->getParamType(0);
6464 if (PTy0 != Arg0Val->getType()) {
6465 if (Arg0Ty->isArrayType())
6466 Arg0Val = EmitArrayToPointerDecay(Arg0).emitRawPointer(*this);
6467 else
6468 Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
6470 auto Arg1 = EmitScalarExpr(E->getArg(1));
6471 auto PTy1 = FTy->getParamType(1);
6472 if (PTy1 != Arg1->getType())
6473 Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
6474 return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
6477 case Builtin::BI__xray_typedevent: {
6478 // TODO: There should be a way to always emit events even if the current
6479 // function is not instrumented. Losing events in a stream can cripple
6480 // a trace.
6481 if (!ShouldXRayInstrumentFunction())
6482 return RValue::getIgnored();
6484 if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
6485 XRayInstrKind::Typed))
6486 return RValue::getIgnored();
6488 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
6489 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
6490 return RValue::getIgnored();
6492 Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
6493 auto FTy = F->getFunctionType();
6494 auto Arg0 = EmitScalarExpr(E->getArg(0));
6495 auto PTy0 = FTy->getParamType(0);
6496 if (PTy0 != Arg0->getType())
6497 Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
6498 auto Arg1 = E->getArg(1);
6499 auto Arg1Val = EmitScalarExpr(Arg1);
6500 auto Arg1Ty = Arg1->getType();
6501 auto PTy1 = FTy->getParamType(1);
6502 if (PTy1 != Arg1Val->getType()) {
6503 if (Arg1Ty->isArrayType())
6504 Arg1Val = EmitArrayToPointerDecay(Arg1).emitRawPointer(*this);
6505 else
6506 Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
6508 auto Arg2 = EmitScalarExpr(E->getArg(2));
6509 auto PTy2 = FTy->getParamType(2);
6510 if (PTy2 != Arg2->getType())
6511 Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
6512 return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
6515 case Builtin::BI__builtin_ms_va_start:
6516 case Builtin::BI__builtin_ms_va_end:
6517 return RValue::get(
6518 EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).emitRawPointer(*this),
6519 BuiltinID == Builtin::BI__builtin_ms_va_start));
6521 case Builtin::BI__builtin_ms_va_copy: {
6522 // Lower this manually. We can't reliably determine whether or not any
6523 // given va_copy() is for a Win64 va_list from the calling convention
6524 // alone, because it's legal to do this from a System V ABI function.
6525 // With opaque pointer types, we won't have enough information in LLVM
6526 // IR to determine this from the argument types, either. Best to do it
6527 // now, while we have enough information.
6528 Address DestAddr = EmitMSVAListRef(E->getArg(0));
6529 Address SrcAddr = EmitMSVAListRef(E->getArg(1));
6531 DestAddr = DestAddr.withElementType(Int8PtrTy);
6532 SrcAddr = SrcAddr.withElementType(Int8PtrTy);
6534 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
6535 return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
6538 case Builtin::BI__builtin_get_device_side_mangled_name: {
6539 auto Name = CGM.getCUDARuntime().getDeviceSideName(
6540 cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());
6541 auto Str = CGM.GetAddrOfConstantCString(Name, "");
6542 return RValue::get(Str.getPointer());
6546 // If this is an alias for a lib function (e.g. __builtin_sin), emit
6547 // the call using the normal call path, but using the unmangled
6548 // version of the function name.
6549 if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
6550 return emitLibraryCall(*this, FD, E,
6551 CGM.getBuiltinLibFunction(FD, BuiltinID));
6553 // If this is a predefined lib function (e.g. malloc), emit the call
6554 // using exactly the normal call path.
6555 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
6556 return emitLibraryCall(*this, FD, E, CGM.getRawFunctionPointer(FD));
6558 // Check that a call to a target specific builtin has the correct target
6559 // features.
6560 // This is down here to avoid non-target specific builtins, however, if
6561 // generic builtins start to require generic target features then we
6562 // can move this up to the beginning of the function.
6563 checkTargetFeatures(E, FD);
6565 if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
6566 LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
6568 // See if we have a target specific intrinsic.
6569 StringRef Name = getContext().BuiltinInfo.getName(BuiltinID);
6570 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
6571 StringRef Prefix =
6572 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
6573 if (!Prefix.empty()) {
6574 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name);
6575 if (IntrinsicID == Intrinsic::not_intrinsic && Prefix == "spv" &&
6576 getTarget().getTriple().getOS() == llvm::Triple::OSType::AMDHSA)
6577 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin("amdgcn", Name);
6578 // NOTE we don't need to perform a compatibility flag check here since the
6579 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
6580 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
6581 if (IntrinsicID == Intrinsic::not_intrinsic)
6582 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
6585 if (IntrinsicID != Intrinsic::not_intrinsic) {
6586 SmallVector<Value*, 16> Args;
6588 // Find out if any arguments are required to be integer constant
6589 // expressions.
6590 unsigned ICEArguments = 0;
6591 ASTContext::GetBuiltinTypeError Error;
6592 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6593 assert(Error == ASTContext::GE_None && "Should not codegen an error");
6595 Function *F = CGM.getIntrinsic(IntrinsicID);
6596 llvm::FunctionType *FTy = F->getFunctionType();
6598 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
6599 Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
6600 // If the intrinsic arg type is different from the builtin arg type
6601 // we need to do a bit cast.
6602 llvm::Type *PTy = FTy->getParamType(i);
6603 if (PTy != ArgValue->getType()) {
6604 // XXX - vector of pointers?
6605 if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
6606 if (PtrTy->getAddressSpace() !=
6607 ArgValue->getType()->getPointerAddressSpace()) {
6608 ArgValue = Builder.CreateAddrSpaceCast(
6609 ArgValue, llvm::PointerType::get(getLLVMContext(),
6610 PtrTy->getAddressSpace()));
6614 // Cast vector type (e.g., v256i32) to x86_amx, this only happen
6615 // in amx intrinsics.
6616 if (PTy->isX86_AMXTy())
6617 ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,
6618 {ArgValue->getType()}, {ArgValue});
6619 else
6620 ArgValue = Builder.CreateBitCast(ArgValue, PTy);
6623 Args.push_back(ArgValue);
6626 Value *V = Builder.CreateCall(F, Args);
6627 QualType BuiltinRetType = E->getType();
6629 llvm::Type *RetTy = VoidTy;
6630 if (!BuiltinRetType->isVoidType())
6631 RetTy = ConvertType(BuiltinRetType);
6633 if (RetTy != V->getType()) {
6634 // XXX - vector of pointers?
6635 if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
6636 if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
6637 V = Builder.CreateAddrSpaceCast(
6638 V, llvm::PointerType::get(getLLVMContext(),
6639 PtrTy->getAddressSpace()));
6643 // Cast x86_amx to vector type (e.g., v256i32), this only happen
6644 // in amx intrinsics.
6645 if (V->getType()->isX86_AMXTy())
6646 V = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy},
6647 {V});
6648 else
6649 V = Builder.CreateBitCast(V, RetTy);
6652 if (RetTy->isVoidTy())
6653 return RValue::get(nullptr);
6655 return RValue::get(V);
6658 // Some target-specific builtins can have aggregate return values, e.g.
6659 // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
6660 // ReturnValue to be non-null, so that the target-specific emission code can
6661 // always just emit into it.
6662 TypeEvaluationKind EvalKind = getEvaluationKind(E->getType());
6663 if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) {
6664 Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp");
6665 ReturnValue = ReturnValueSlot(DestPtr, false);
6668 // Now see if we can emit a target-specific builtin.
6669 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
6670 switch (EvalKind) {
6671 case TEK_Scalar:
6672 if (V->getType()->isVoidTy())
6673 return RValue::get(nullptr);
6674 return RValue::get(V);
6675 case TEK_Aggregate:
6676 return RValue::getAggregate(ReturnValue.getAddress(),
6677 ReturnValue.isVolatile());
6678 case TEK_Complex:
6679 llvm_unreachable("No current target builtin returns complex");
6681 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
6684 // EmitHLSLBuiltinExpr will check getLangOpts().HLSL
6685 if (Value *V = EmitHLSLBuiltinExpr(BuiltinID, E, ReturnValue)) {
6686 switch (EvalKind) {
6687 case TEK_Scalar:
6688 if (V->getType()->isVoidTy())
6689 return RValue::get(nullptr);
6690 return RValue::get(V);
6691 case TEK_Aggregate:
6692 return RValue::getAggregate(ReturnValue.getAddress(),
6693 ReturnValue.isVolatile());
6694 case TEK_Complex:
6695 llvm_unreachable("No current hlsl builtin returns complex");
6697 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
6700 if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)
6701 return EmitHipStdParUnsupportedBuiltin(this, FD);
6703 ErrorUnsupported(E, "builtin function");
6705 // Unknown builtin, for now just dump it out and return undef.
6706 return GetUndefRValue(E->getType());
6709 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
6710 unsigned BuiltinID, const CallExpr *E,
6711 ReturnValueSlot ReturnValue,
6712 llvm::Triple::ArchType Arch) {
6713 // When compiling in HipStdPar mode we have to be conservative in rejecting
6714 // target specific features in the FE, and defer the possible error to the
6715 // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
6716 // referenced by an accelerator executable function, we emit an error.
6717 // Returning nullptr here leads to the builtin being handled in
6718 // EmitStdParUnsupportedBuiltin.
6719 if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice &&
6720 Arch != CGF->getTarget().getTriple().getArch())
6721 return nullptr;
6723 switch (Arch) {
6724 case llvm::Triple::arm:
6725 case llvm::Triple::armeb:
6726 case llvm::Triple::thumb:
6727 case llvm::Triple::thumbeb:
6728 return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);
6729 case llvm::Triple::aarch64:
6730 case llvm::Triple::aarch64_32:
6731 case llvm::Triple::aarch64_be:
6732 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
6733 case llvm::Triple::bpfeb:
6734 case llvm::Triple::bpfel:
6735 return CGF->EmitBPFBuiltinExpr(BuiltinID, E);
6736 case llvm::Triple::x86:
6737 case llvm::Triple::x86_64:
6738 return CGF->EmitX86BuiltinExpr(BuiltinID, E);
6739 case llvm::Triple::ppc:
6740 case llvm::Triple::ppcle:
6741 case llvm::Triple::ppc64:
6742 case llvm::Triple::ppc64le:
6743 return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
6744 case llvm::Triple::r600:
6745 case llvm::Triple::amdgcn:
6746 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6747 case llvm::Triple::systemz:
6748 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
6749 case llvm::Triple::nvptx:
6750 case llvm::Triple::nvptx64:
6751 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
6752 case llvm::Triple::wasm32:
6753 case llvm::Triple::wasm64:
6754 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
6755 case llvm::Triple::hexagon:
6756 return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
6757 case llvm::Triple::riscv32:
6758 case llvm::Triple::riscv64:
6759 return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
6760 case llvm::Triple::spirv64:
6761 if (CGF->getTarget().getTriple().getOS() != llvm::Triple::OSType::AMDHSA)
6762 return nullptr;
6763 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6764 default:
6765 return nullptr;
6769 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
6770 const CallExpr *E,
6771 ReturnValueSlot ReturnValue) {
6772 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
6773 assert(getContext().getAuxTargetInfo() && "Missing aux target info");
6774 return EmitTargetArchBuiltinExpr(
6775 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
6776 ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
6779 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,
6780 getTarget().getTriple().getArch());
6783 static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
6784 NeonTypeFlags TypeFlags,
6785 bool HasLegalHalfType = true,
6786 bool V1Ty = false,
6787 bool AllowBFloatArgsAndRet = true) {
6788 int IsQuad = TypeFlags.isQuad();
6789 switch (TypeFlags.getEltType()) {
6790 case NeonTypeFlags::Int8:
6791 case NeonTypeFlags::Poly8:
6792 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
6793 case NeonTypeFlags::Int16:
6794 case NeonTypeFlags::Poly16:
6795 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6796 case NeonTypeFlags::BFloat16:
6797 if (AllowBFloatArgsAndRet)
6798 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
6799 else
6800 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6801 case NeonTypeFlags::Float16:
6802 if (HasLegalHalfType)
6803 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
6804 else
6805 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6806 case NeonTypeFlags::Int32:
6807 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
6808 case NeonTypeFlags::Int64:
6809 case NeonTypeFlags::Poly64:
6810 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
6811 case NeonTypeFlags::Poly128:
6812 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
6813 // There is a lot of i128 and f128 API missing.
6814 // so we use v16i8 to represent poly128 and get pattern matched.
6815 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
6816 case NeonTypeFlags::Float32:
6817 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
6818 case NeonTypeFlags::Float64:
6819 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
6821 llvm_unreachable("Unknown vector element type!");
6824 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
6825 NeonTypeFlags IntTypeFlags) {
6826 int IsQuad = IntTypeFlags.isQuad();
6827 switch (IntTypeFlags.getEltType()) {
6828 case NeonTypeFlags::Int16:
6829 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
6830 case NeonTypeFlags::Int32:
6831 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
6832 case NeonTypeFlags::Int64:
6833 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
6834 default:
6835 llvm_unreachable("Type can't be converted to floating-point!");
6839 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C,
6840 const ElementCount &Count) {
6841 Value *SV = llvm::ConstantVector::getSplat(Count, C);
6842 return Builder.CreateShuffleVector(V, V, SV, "lane");
6845 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
6846 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
6847 return EmitNeonSplat(V, C, EC);
6850 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
6851 const char *name,
6852 unsigned shift, bool rightshift) {
6853 unsigned j = 0;
6854 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
6855 ai != ae; ++ai, ++j) {
6856 if (F->isConstrainedFPIntrinsic())
6857 if (ai->getType()->isMetadataTy())
6858 continue;
6859 if (shift > 0 && shift == j)
6860 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
6861 else
6862 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
6865 if (F->isConstrainedFPIntrinsic())
6866 return Builder.CreateConstrainedFPCall(F, Ops, name);
6867 else
6868 return Builder.CreateCall(F, Ops, name);
6871 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
6872 bool neg) {
6873 int SV = cast<ConstantInt>(V)->getSExtValue();
6874 return ConstantInt::get(Ty, neg ? -SV : SV);
6877 // Right-shift a vector by a constant.
6878 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
6879 llvm::Type *Ty, bool usgn,
6880 const char *name) {
6881 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6883 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
6884 int EltSize = VTy->getScalarSizeInBits();
6886 Vec = Builder.CreateBitCast(Vec, Ty);
6888 // lshr/ashr are undefined when the shift amount is equal to the vector
6889 // element size.
6890 if (ShiftAmt == EltSize) {
6891 if (usgn) {
6892 // Right-shifting an unsigned value by its size yields 0.
6893 return llvm::ConstantAggregateZero::get(VTy);
6894 } else {
6895 // Right-shifting a signed value by its size is equivalent
6896 // to a shift of size-1.
6897 --ShiftAmt;
6898 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
6902 Shift = EmitNeonShiftVector(Shift, Ty, false);
6903 if (usgn)
6904 return Builder.CreateLShr(Vec, Shift, name);
6905 else
6906 return Builder.CreateAShr(Vec, Shift, name);
6909 enum {
6910 AddRetType = (1 << 0),
6911 Add1ArgType = (1 << 1),
6912 Add2ArgTypes = (1 << 2),
6914 VectorizeRetType = (1 << 3),
6915 VectorizeArgTypes = (1 << 4),
6917 InventFloatType = (1 << 5),
6918 UnsignedAlts = (1 << 6),
6920 Use64BitVectors = (1 << 7),
6921 Use128BitVectors = (1 << 8),
6923 Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
6924 VectorRet = AddRetType | VectorizeRetType,
6925 VectorRetGetArgs01 =
6926 AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
6927 FpCmpzModifiers =
6928 AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
6931 namespace {
6932 struct ARMVectorIntrinsicInfo {
6933 const char *NameHint;
6934 unsigned BuiltinID;
6935 unsigned LLVMIntrinsic;
6936 unsigned AltLLVMIntrinsic;
6937 uint64_t TypeModifier;
6939 bool operator<(unsigned RHSBuiltinID) const {
6940 return BuiltinID < RHSBuiltinID;
6942 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
6943 return BuiltinID < TE.BuiltinID;
6946 } // end anonymous namespace
6948 #define NEONMAP0(NameBase) \
6949 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
6951 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
6952 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6953 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
6955 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
6956 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6957 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
6958 TypeModifier }
6960 static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
6961 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
6962 NEONMAP0(splat_lane_v),
6963 NEONMAP0(splat_laneq_v),
6964 NEONMAP0(splatq_lane_v),
6965 NEONMAP0(splatq_laneq_v),
6966 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6967 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6968 NEONMAP1(vabs_v, arm_neon_vabs, 0),
6969 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
6970 NEONMAP0(vadd_v),
6971 NEONMAP0(vaddhn_v),
6972 NEONMAP0(vaddq_v),
6973 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
6974 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
6975 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
6976 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
6977 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
6978 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
6979 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
6980 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
6981 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
6982 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
6983 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
6984 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6985 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6986 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6987 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6988 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6989 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6990 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
6991 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6992 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6993 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
6994 NEONMAP1(vcage_v, arm_neon_vacge, 0),
6995 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
6996 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
6997 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
6998 NEONMAP1(vcale_v, arm_neon_vacge, 0),
6999 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
7000 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
7001 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
7002 NEONMAP0(vceqz_v),
7003 NEONMAP0(vceqzq_v),
7004 NEONMAP0(vcgez_v),
7005 NEONMAP0(vcgezq_v),
7006 NEONMAP0(vcgtz_v),
7007 NEONMAP0(vcgtzq_v),
7008 NEONMAP0(vclez_v),
7009 NEONMAP0(vclezq_v),
7010 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
7011 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
7012 NEONMAP0(vcltz_v),
7013 NEONMAP0(vcltzq_v),
7014 NEONMAP1(vclz_v, ctlz, Add1ArgType),
7015 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
7016 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
7017 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
7018 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
7019 NEONMAP0(vcvt_f16_s16),
7020 NEONMAP0(vcvt_f16_u16),
7021 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
7022 NEONMAP0(vcvt_f32_v),
7023 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
7024 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
7025 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
7026 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
7027 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
7028 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
7029 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
7030 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
7031 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
7032 NEONMAP0(vcvt_s16_f16),
7033 NEONMAP0(vcvt_s32_v),
7034 NEONMAP0(vcvt_s64_v),
7035 NEONMAP0(vcvt_u16_f16),
7036 NEONMAP0(vcvt_u32_v),
7037 NEONMAP0(vcvt_u64_v),
7038 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
7039 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
7040 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
7041 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
7042 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
7043 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
7044 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
7045 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
7046 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
7047 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
7048 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
7049 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
7050 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
7051 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
7052 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
7053 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
7054 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
7055 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
7056 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
7057 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
7058 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
7059 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
7060 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
7061 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
7062 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
7063 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
7064 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
7065 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
7066 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
7067 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
7068 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
7069 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
7070 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
7071 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
7072 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
7073 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
7074 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
7075 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
7076 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
7077 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
7078 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
7079 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
7080 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
7081 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
7082 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
7083 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
7084 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
7085 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
7086 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
7087 NEONMAP0(vcvtq_f16_s16),
7088 NEONMAP0(vcvtq_f16_u16),
7089 NEONMAP0(vcvtq_f32_v),
7090 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
7091 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
7092 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
7093 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
7094 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
7095 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
7096 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
7097 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
7098 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
7099 NEONMAP0(vcvtq_s16_f16),
7100 NEONMAP0(vcvtq_s32_v),
7101 NEONMAP0(vcvtq_s64_v),
7102 NEONMAP0(vcvtq_u16_f16),
7103 NEONMAP0(vcvtq_u32_v),
7104 NEONMAP0(vcvtq_u64_v),
7105 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
7106 NEONMAP1(vdot_u32, arm_neon_udot, 0),
7107 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
7108 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
7109 NEONMAP0(vext_v),
7110 NEONMAP0(vextq_v),
7111 NEONMAP0(vfma_v),
7112 NEONMAP0(vfmaq_v),
7113 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
7114 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
7115 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
7116 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
7117 NEONMAP0(vld1_dup_v),
7118 NEONMAP1(vld1_v, arm_neon_vld1, 0),
7119 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
7120 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
7121 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
7122 NEONMAP0(vld1q_dup_v),
7123 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
7124 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
7125 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
7126 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
7127 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
7128 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
7129 NEONMAP1(vld2_v, arm_neon_vld2, 0),
7130 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
7131 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
7132 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
7133 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
7134 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
7135 NEONMAP1(vld3_v, arm_neon_vld3, 0),
7136 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
7137 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
7138 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
7139 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
7140 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
7141 NEONMAP1(vld4_v, arm_neon_vld4, 0),
7142 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
7143 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
7144 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
7145 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
7146 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
7147 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
7148 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
7149 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
7150 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
7151 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
7152 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
7153 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
7154 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
7155 NEONMAP0(vmovl_v),
7156 NEONMAP0(vmovn_v),
7157 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
7158 NEONMAP0(vmull_v),
7159 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
7160 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
7161 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
7162 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
7163 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
7164 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
7165 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
7166 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
7167 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
7168 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
7169 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
7170 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
7171 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
7172 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
7173 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
7174 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
7175 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
7176 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
7177 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
7178 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
7179 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
7180 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
7181 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
7182 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
7183 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
7184 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
7185 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
7186 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
7187 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
7188 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
7189 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
7190 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
7191 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
7192 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
7193 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
7194 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
7195 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
7196 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
7197 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
7198 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
7199 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
7200 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
7201 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
7202 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
7203 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
7204 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
7205 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
7206 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
7207 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
7208 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
7209 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
7210 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
7211 NEONMAP0(vrndi_v),
7212 NEONMAP0(vrndiq_v),
7213 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
7214 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
7215 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
7216 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
7217 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
7218 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
7219 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
7220 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
7221 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
7222 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
7223 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
7224 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
7225 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
7226 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
7227 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
7228 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
7229 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
7230 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
7231 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
7232 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
7233 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
7234 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
7235 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
7236 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
7237 NEONMAP0(vshl_n_v),
7238 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
7239 NEONMAP0(vshll_n_v),
7240 NEONMAP0(vshlq_n_v),
7241 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
7242 NEONMAP0(vshr_n_v),
7243 NEONMAP0(vshrn_n_v),
7244 NEONMAP0(vshrq_n_v),
7245 NEONMAP1(vst1_v, arm_neon_vst1, 0),
7246 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
7247 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
7248 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
7249 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
7250 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
7251 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
7252 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
7253 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
7254 NEONMAP1(vst2_v, arm_neon_vst2, 0),
7255 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
7256 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
7257 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
7258 NEONMAP1(vst3_v, arm_neon_vst3, 0),
7259 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
7260 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
7261 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
7262 NEONMAP1(vst4_v, arm_neon_vst4, 0),
7263 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
7264 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
7265 NEONMAP0(vsubhn_v),
7266 NEONMAP0(vtrn_v),
7267 NEONMAP0(vtrnq_v),
7268 NEONMAP0(vtst_v),
7269 NEONMAP0(vtstq_v),
7270 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
7271 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
7272 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
7273 NEONMAP0(vuzp_v),
7274 NEONMAP0(vuzpq_v),
7275 NEONMAP0(vzip_v),
7276 NEONMAP0(vzipq_v)
7279 static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
7280 NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0),
7281 NEONMAP0(splat_lane_v),
7282 NEONMAP0(splat_laneq_v),
7283 NEONMAP0(splatq_lane_v),
7284 NEONMAP0(splatq_laneq_v),
7285 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
7286 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
7287 NEONMAP0(vadd_v),
7288 NEONMAP0(vaddhn_v),
7289 NEONMAP0(vaddq_p128),
7290 NEONMAP0(vaddq_v),
7291 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
7292 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
7293 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
7294 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
7295 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7296 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7297 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7298 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7299 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7300 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7301 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7302 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7303 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
7304 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
7305 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
7306 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
7307 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
7308 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
7309 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
7310 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
7311 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
7312 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
7313 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
7314 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
7315 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
7316 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
7317 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
7318 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
7319 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
7320 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
7321 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
7322 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
7323 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
7324 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
7325 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
7326 NEONMAP0(vceqz_v),
7327 NEONMAP0(vceqzq_v),
7328 NEONMAP0(vcgez_v),
7329 NEONMAP0(vcgezq_v),
7330 NEONMAP0(vcgtz_v),
7331 NEONMAP0(vcgtzq_v),
7332 NEONMAP0(vclez_v),
7333 NEONMAP0(vclezq_v),
7334 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
7335 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
7336 NEONMAP0(vcltz_v),
7337 NEONMAP0(vcltzq_v),
7338 NEONMAP1(vclz_v, ctlz, Add1ArgType),
7339 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
7340 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
7341 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
7342 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
7343 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
7344 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
7345 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
7346 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
7347 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
7348 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
7349 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
7350 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
7351 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
7352 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
7353 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
7354 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
7355 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
7356 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
7357 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
7358 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
7359 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
7360 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
7361 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
7362 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
7363 NEONMAP0(vcvt_f16_s16),
7364 NEONMAP0(vcvt_f16_u16),
7365 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
7366 NEONMAP0(vcvt_f32_v),
7367 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
7368 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
7369 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7370 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7371 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
7372 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
7373 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
7374 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
7375 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
7376 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
7377 NEONMAP0(vcvtq_f16_s16),
7378 NEONMAP0(vcvtq_f16_u16),
7379 NEONMAP0(vcvtq_f32_v),
7380 NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0),
7381 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
7382 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
7383 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7384 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7385 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
7386 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
7387 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
7388 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
7389 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
7390 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
7391 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
7392 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
7393 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
7394 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
7395 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
7396 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7397 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7398 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7399 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7400 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7401 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7402 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7403 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7404 NEONMAP0(vext_v),
7405 NEONMAP0(vextq_v),
7406 NEONMAP0(vfma_v),
7407 NEONMAP0(vfmaq_v),
7408 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
7409 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
7410 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
7411 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
7412 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
7413 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
7414 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
7415 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
7416 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
7417 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
7418 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
7419 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
7420 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
7421 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
7422 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
7423 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
7424 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
7425 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
7426 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
7427 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
7428 NEONMAP0(vmovl_v),
7429 NEONMAP0(vmovn_v),
7430 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
7431 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
7432 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
7433 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
7434 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
7435 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
7436 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
7437 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
7438 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
7439 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
7440 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
7441 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
7442 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
7443 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
7444 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
7445 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
7446 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
7447 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
7448 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
7449 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
7450 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
7451 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
7452 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
7453 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
7454 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7455 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
7456 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7457 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
7458 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7459 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
7460 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7461 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7462 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7463 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
7464 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7465 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7466 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
7467 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
7468 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
7469 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
7470 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
7471 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
7472 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
7473 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
7474 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
7475 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
7476 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
7477 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
7478 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
7479 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7480 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7481 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
7482 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
7483 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
7484 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
7485 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
7486 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
7487 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
7488 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
7489 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
7490 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
7491 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
7492 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
7493 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
7494 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
7495 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
7496 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
7497 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
7498 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
7499 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
7500 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
7501 NEONMAP0(vrndi_v),
7502 NEONMAP0(vrndiq_v),
7503 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7504 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7505 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7506 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7507 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7508 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7509 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
7510 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
7511 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
7512 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
7513 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
7514 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
7515 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
7516 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
7517 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
7518 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
7519 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
7520 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
7521 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
7522 NEONMAP0(vshl_n_v),
7523 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7524 NEONMAP0(vshll_n_v),
7525 NEONMAP0(vshlq_n_v),
7526 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7527 NEONMAP0(vshr_n_v),
7528 NEONMAP0(vshrn_n_v),
7529 NEONMAP0(vshrq_n_v),
7530 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
7531 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
7532 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
7533 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
7534 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
7535 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
7536 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
7537 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
7538 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
7539 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
7540 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
7541 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
7542 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
7543 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
7544 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
7545 NEONMAP0(vsubhn_v),
7546 NEONMAP0(vtst_v),
7547 NEONMAP0(vtstq_v),
7548 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
7549 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
7550 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
7551 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
7554 static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
7555 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
7556 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
7557 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
7558 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7559 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7560 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7561 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7562 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7563 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7564 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7565 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7566 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
7567 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7568 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
7569 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7570 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7571 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7572 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7573 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7574 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7575 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7576 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7577 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7578 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7579 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7580 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7581 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7582 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7583 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7584 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7585 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7586 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7587 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7588 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7589 NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0),
7590 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7591 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7592 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7593 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7594 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7595 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7596 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7597 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7598 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7599 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7600 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7601 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7602 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7603 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7604 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7605 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7606 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7607 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7608 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
7609 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7610 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7611 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7612 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7613 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7614 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7615 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7616 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7617 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7618 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7619 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7620 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7621 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7622 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7623 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7624 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7625 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7626 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7627 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7628 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7629 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
7630 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
7631 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
7632 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7633 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7634 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7635 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7636 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7637 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7638 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7639 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7640 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7641 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7642 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7643 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
7644 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7645 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
7646 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7647 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7648 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
7649 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
7650 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7651 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7652 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
7653 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
7654 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
7655 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
7656 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
7657 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
7658 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
7659 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
7660 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7661 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7662 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7663 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7664 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
7665 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7666 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7667 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7668 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
7669 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7670 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
7671 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
7672 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7673 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
7674 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7675 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
7676 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
7677 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7678 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7679 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
7680 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
7681 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7682 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7683 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
7684 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
7685 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
7686 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
7687 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7688 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7689 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7690 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7691 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
7692 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7693 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7694 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7695 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7696 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7697 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7698 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
7699 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
7700 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7701 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7702 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7703 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7704 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
7705 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
7706 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
7707 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
7708 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7709 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7710 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
7711 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
7712 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
7713 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7714 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7715 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7716 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7717 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
7718 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7719 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7720 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7721 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7722 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
7723 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
7724 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7725 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7726 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
7727 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
7728 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
7729 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
7730 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
7731 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
7732 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
7733 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
7734 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
7735 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
7736 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
7737 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
7738 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
7739 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
7740 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
7741 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
7742 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
7743 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
7744 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
7745 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
7746 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7747 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
7748 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7749 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
7750 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
7751 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
7752 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7753 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
7754 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7755 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
7756 // FP16 scalar intrinisics go here.
7757 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
7758 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7759 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7760 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7761 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7762 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7763 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7764 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7765 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7766 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7767 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7768 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7769 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7770 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7771 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7772 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7773 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7774 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7775 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7776 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7777 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7778 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7779 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7780 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7781 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7782 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7783 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7784 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7785 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7786 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
7787 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
7788 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
7789 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
7790 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
7793 // Some intrinsics are equivalent for codegen.
7794 static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
7795 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
7796 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
7797 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
7798 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
7799 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
7800 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
7801 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
7802 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
7803 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
7804 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
7805 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
7806 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
7807 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
7808 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
7809 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
7810 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
7811 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
7812 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
7813 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
7814 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
7815 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
7816 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
7817 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
7818 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
7819 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
7820 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
7821 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
7822 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
7823 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
7824 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
7825 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
7826 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
7827 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
7828 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
7829 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
7830 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
7831 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
7832 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
7833 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
7834 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
7835 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
7836 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
7837 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
7838 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
7839 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
7840 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
7841 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
7842 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
7843 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
7844 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
7845 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
7846 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
7847 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
7848 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
7849 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
7850 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
7851 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
7852 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
7853 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
7854 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
7855 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
7856 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
7857 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
7858 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
7859 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
7860 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
7861 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
7862 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
7863 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
7864 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
7865 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
7866 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
7867 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
7868 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
7869 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
7870 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
7871 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
7872 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
7873 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
7874 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
7875 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
7876 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
7877 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
7878 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
7879 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
7880 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
7881 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
7882 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
7883 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
7884 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
7885 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
7886 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
7887 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
7888 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
7889 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
7890 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
7891 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
7892 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
7893 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
7894 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
7895 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
7896 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
7897 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
7898 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
7899 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
7900 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
7901 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
7902 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
7903 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
7904 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
7905 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
7906 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
7907 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
7908 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
7909 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
7910 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
7911 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
7912 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
7913 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
7914 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
7915 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
7916 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
7917 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
7918 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
7919 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
7920 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
7921 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
7922 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
7923 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
7924 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
7925 // arbitrary one to be handled as tha canonical variation.
7926 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7927 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7928 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7929 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7930 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7931 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7932 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7933 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7934 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7935 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7936 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7937 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7940 #undef NEONMAP0
7941 #undef NEONMAP1
7942 #undef NEONMAP2
7944 #define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7946 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7947 TypeModifier \
7950 #define SVEMAP2(NameBase, TypeModifier) \
7951 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
7952 static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
7953 #define GET_SVE_LLVM_INTRINSIC_MAP
7954 #include "clang/Basic/arm_sve_builtin_cg.inc"
7955 #include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
7956 #undef GET_SVE_LLVM_INTRINSIC_MAP
7959 #undef SVEMAP1
7960 #undef SVEMAP2
7962 #define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7964 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7965 TypeModifier \
7968 #define SMEMAP2(NameBase, TypeModifier) \
7969 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
7970 static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
7971 #define GET_SME_LLVM_INTRINSIC_MAP
7972 #include "clang/Basic/arm_sme_builtin_cg.inc"
7973 #undef GET_SME_LLVM_INTRINSIC_MAP
7976 #undef SMEMAP1
7977 #undef SMEMAP2
7979 static bool NEONSIMDIntrinsicsProvenSorted = false;
7981 static bool AArch64SIMDIntrinsicsProvenSorted = false;
7982 static bool AArch64SISDIntrinsicsProvenSorted = false;
7983 static bool AArch64SVEIntrinsicsProvenSorted = false;
7984 static bool AArch64SMEIntrinsicsProvenSorted = false;
7986 static const ARMVectorIntrinsicInfo *
7987 findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap,
7988 unsigned BuiltinID, bool &MapProvenSorted) {
7990 #ifndef NDEBUG
7991 if (!MapProvenSorted) {
7992 assert(llvm::is_sorted(IntrinsicMap));
7993 MapProvenSorted = true;
7995 #endif
7997 const ARMVectorIntrinsicInfo *Builtin =
7998 llvm::lower_bound(IntrinsicMap, BuiltinID);
8000 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
8001 return Builtin;
8003 return nullptr;
8006 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
8007 unsigned Modifier,
8008 llvm::Type *ArgType,
8009 const CallExpr *E) {
8010 int VectorSize = 0;
8011 if (Modifier & Use64BitVectors)
8012 VectorSize = 64;
8013 else if (Modifier & Use128BitVectors)
8014 VectorSize = 128;
8016 // Return type.
8017 SmallVector<llvm::Type *, 3> Tys;
8018 if (Modifier & AddRetType) {
8019 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
8020 if (Modifier & VectorizeRetType)
8021 Ty = llvm::FixedVectorType::get(
8022 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
8024 Tys.push_back(Ty);
8027 // Arguments.
8028 if (Modifier & VectorizeArgTypes) {
8029 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
8030 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
8033 if (Modifier & (Add1ArgType | Add2ArgTypes))
8034 Tys.push_back(ArgType);
8036 if (Modifier & Add2ArgTypes)
8037 Tys.push_back(ArgType);
8039 if (Modifier & InventFloatType)
8040 Tys.push_back(FloatTy);
8042 return CGM.getIntrinsic(IntrinsicID, Tys);
8045 static Value *EmitCommonNeonSISDBuiltinExpr(
8046 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
8047 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
8048 unsigned BuiltinID = SISDInfo.BuiltinID;
8049 unsigned int Int = SISDInfo.LLVMIntrinsic;
8050 unsigned Modifier = SISDInfo.TypeModifier;
8051 const char *s = SISDInfo.NameHint;
8053 switch (BuiltinID) {
8054 case NEON::BI__builtin_neon_vcled_s64:
8055 case NEON::BI__builtin_neon_vcled_u64:
8056 case NEON::BI__builtin_neon_vcles_f32:
8057 case NEON::BI__builtin_neon_vcled_f64:
8058 case NEON::BI__builtin_neon_vcltd_s64:
8059 case NEON::BI__builtin_neon_vcltd_u64:
8060 case NEON::BI__builtin_neon_vclts_f32:
8061 case NEON::BI__builtin_neon_vcltd_f64:
8062 case NEON::BI__builtin_neon_vcales_f32:
8063 case NEON::BI__builtin_neon_vcaled_f64:
8064 case NEON::BI__builtin_neon_vcalts_f32:
8065 case NEON::BI__builtin_neon_vcaltd_f64:
8066 // Only one direction of comparisons actually exist, cmle is actually a cmge
8067 // with swapped operands. The table gives us the right intrinsic but we
8068 // still need to do the swap.
8069 std::swap(Ops[0], Ops[1]);
8070 break;
8073 assert(Int && "Generic code assumes a valid intrinsic");
8075 // Determine the type(s) of this overloaded AArch64 intrinsic.
8076 const Expr *Arg = E->getArg(0);
8077 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
8078 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
8080 int j = 0;
8081 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
8082 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
8083 ai != ae; ++ai, ++j) {
8084 llvm::Type *ArgTy = ai->getType();
8085 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
8086 ArgTy->getPrimitiveSizeInBits())
8087 continue;
8089 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
8090 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
8091 // it before inserting.
8092 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
8093 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
8094 Ops[j] =
8095 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
8098 Value *Result = CGF.EmitNeonCall(F, Ops, s);
8099 llvm::Type *ResultType = CGF.ConvertType(E->getType());
8100 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
8101 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
8102 return CGF.Builder.CreateExtractElement(Result, C0);
8104 return CGF.Builder.CreateBitCast(Result, ResultType, s);
8107 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
8108 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
8109 const char *NameHint, unsigned Modifier, const CallExpr *E,
8110 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
8111 llvm::Triple::ArchType Arch) {
8112 // Get the last argument, which specifies the vector type.
8113 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
8114 std::optional<llvm::APSInt> NeonTypeConst =
8115 Arg->getIntegerConstantExpr(getContext());
8116 if (!NeonTypeConst)
8117 return nullptr;
8119 // Determine the type of this overloaded NEON intrinsic.
8120 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
8121 bool Usgn = Type.isUnsigned();
8122 bool Quad = Type.isQuad();
8123 const bool HasLegalHalfType = getTarget().hasLegalHalfType();
8124 const bool AllowBFloatArgsAndRet =
8125 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
8127 llvm::FixedVectorType *VTy =
8128 GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet);
8129 llvm::Type *Ty = VTy;
8130 if (!Ty)
8131 return nullptr;
8133 auto getAlignmentValue32 = [&](Address addr) -> Value* {
8134 return Builder.getInt32(addr.getAlignment().getQuantity());
8137 unsigned Int = LLVMIntrinsic;
8138 if ((Modifier & UnsignedAlts) && !Usgn)
8139 Int = AltLLVMIntrinsic;
8141 switch (BuiltinID) {
8142 default: break;
8143 case NEON::BI__builtin_neon_splat_lane_v:
8144 case NEON::BI__builtin_neon_splat_laneq_v:
8145 case NEON::BI__builtin_neon_splatq_lane_v:
8146 case NEON::BI__builtin_neon_splatq_laneq_v: {
8147 auto NumElements = VTy->getElementCount();
8148 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
8149 NumElements = NumElements * 2;
8150 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
8151 NumElements = NumElements.divideCoefficientBy(2);
8153 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
8154 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
8156 case NEON::BI__builtin_neon_vpadd_v:
8157 case NEON::BI__builtin_neon_vpaddq_v:
8158 // We don't allow fp/int overloading of intrinsics.
8159 if (VTy->getElementType()->isFloatingPointTy() &&
8160 Int == Intrinsic::aarch64_neon_addp)
8161 Int = Intrinsic::aarch64_neon_faddp;
8162 break;
8163 case NEON::BI__builtin_neon_vabs_v:
8164 case NEON::BI__builtin_neon_vabsq_v:
8165 if (VTy->getElementType()->isFloatingPointTy())
8166 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
8167 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
8168 case NEON::BI__builtin_neon_vadd_v:
8169 case NEON::BI__builtin_neon_vaddq_v: {
8170 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
8171 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
8172 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
8173 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
8174 return Builder.CreateBitCast(Ops[0], Ty);
8176 case NEON::BI__builtin_neon_vaddhn_v: {
8177 llvm::FixedVectorType *SrcTy =
8178 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8180 // %sum = add <4 x i32> %lhs, %rhs
8181 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8182 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
8183 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
8185 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
8186 Constant *ShiftAmt =
8187 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
8188 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
8190 // %res = trunc <4 x i32> %high to <4 x i16>
8191 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
8193 case NEON::BI__builtin_neon_vcale_v:
8194 case NEON::BI__builtin_neon_vcaleq_v:
8195 case NEON::BI__builtin_neon_vcalt_v:
8196 case NEON::BI__builtin_neon_vcaltq_v:
8197 std::swap(Ops[0], Ops[1]);
8198 [[fallthrough]];
8199 case NEON::BI__builtin_neon_vcage_v:
8200 case NEON::BI__builtin_neon_vcageq_v:
8201 case NEON::BI__builtin_neon_vcagt_v:
8202 case NEON::BI__builtin_neon_vcagtq_v: {
8203 llvm::Type *Ty;
8204 switch (VTy->getScalarSizeInBits()) {
8205 default: llvm_unreachable("unexpected type");
8206 case 32:
8207 Ty = FloatTy;
8208 break;
8209 case 64:
8210 Ty = DoubleTy;
8211 break;
8212 case 16:
8213 Ty = HalfTy;
8214 break;
8216 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
8217 llvm::Type *Tys[] = { VTy, VecFlt };
8218 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8219 return EmitNeonCall(F, Ops, NameHint);
8221 case NEON::BI__builtin_neon_vceqz_v:
8222 case NEON::BI__builtin_neon_vceqzq_v:
8223 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
8224 ICmpInst::ICMP_EQ, "vceqz");
8225 case NEON::BI__builtin_neon_vcgez_v:
8226 case NEON::BI__builtin_neon_vcgezq_v:
8227 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
8228 ICmpInst::ICMP_SGE, "vcgez");
8229 case NEON::BI__builtin_neon_vclez_v:
8230 case NEON::BI__builtin_neon_vclezq_v:
8231 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
8232 ICmpInst::ICMP_SLE, "vclez");
8233 case NEON::BI__builtin_neon_vcgtz_v:
8234 case NEON::BI__builtin_neon_vcgtzq_v:
8235 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
8236 ICmpInst::ICMP_SGT, "vcgtz");
8237 case NEON::BI__builtin_neon_vcltz_v:
8238 case NEON::BI__builtin_neon_vcltzq_v:
8239 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
8240 ICmpInst::ICMP_SLT, "vcltz");
8241 case NEON::BI__builtin_neon_vclz_v:
8242 case NEON::BI__builtin_neon_vclzq_v:
8243 // We generate target-independent intrinsic, which needs a second argument
8244 // for whether or not clz of zero is undefined; on ARM it isn't.
8245 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
8246 break;
8247 case NEON::BI__builtin_neon_vcvt_f32_v:
8248 case NEON::BI__builtin_neon_vcvtq_f32_v:
8249 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8250 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
8251 HasLegalHalfType);
8252 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
8253 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
8254 case NEON::BI__builtin_neon_vcvt_f16_s16:
8255 case NEON::BI__builtin_neon_vcvt_f16_u16:
8256 case NEON::BI__builtin_neon_vcvtq_f16_s16:
8257 case NEON::BI__builtin_neon_vcvtq_f16_u16:
8258 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8259 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
8260 HasLegalHalfType);
8261 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
8262 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
8263 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
8264 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
8265 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
8266 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
8267 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
8268 Function *F = CGM.getIntrinsic(Int, Tys);
8269 return EmitNeonCall(F, Ops, "vcvt_n");
8271 case NEON::BI__builtin_neon_vcvt_n_f32_v:
8272 case NEON::BI__builtin_neon_vcvt_n_f64_v:
8273 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
8274 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
8275 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
8276 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
8277 Function *F = CGM.getIntrinsic(Int, Tys);
8278 return EmitNeonCall(F, Ops, "vcvt_n");
8280 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
8281 case NEON::BI__builtin_neon_vcvt_n_s32_v:
8282 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
8283 case NEON::BI__builtin_neon_vcvt_n_u32_v:
8284 case NEON::BI__builtin_neon_vcvt_n_s64_v:
8285 case NEON::BI__builtin_neon_vcvt_n_u64_v:
8286 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
8287 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
8288 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
8289 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
8290 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
8291 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
8292 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8293 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8294 return EmitNeonCall(F, Ops, "vcvt_n");
8296 case NEON::BI__builtin_neon_vcvt_s32_v:
8297 case NEON::BI__builtin_neon_vcvt_u32_v:
8298 case NEON::BI__builtin_neon_vcvt_s64_v:
8299 case NEON::BI__builtin_neon_vcvt_u64_v:
8300 case NEON::BI__builtin_neon_vcvt_s16_f16:
8301 case NEON::BI__builtin_neon_vcvt_u16_f16:
8302 case NEON::BI__builtin_neon_vcvtq_s32_v:
8303 case NEON::BI__builtin_neon_vcvtq_u32_v:
8304 case NEON::BI__builtin_neon_vcvtq_s64_v:
8305 case NEON::BI__builtin_neon_vcvtq_u64_v:
8306 case NEON::BI__builtin_neon_vcvtq_s16_f16:
8307 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
8308 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
8309 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
8310 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
8312 case NEON::BI__builtin_neon_vcvta_s16_f16:
8313 case NEON::BI__builtin_neon_vcvta_s32_v:
8314 case NEON::BI__builtin_neon_vcvta_s64_v:
8315 case NEON::BI__builtin_neon_vcvta_u16_f16:
8316 case NEON::BI__builtin_neon_vcvta_u32_v:
8317 case NEON::BI__builtin_neon_vcvta_u64_v:
8318 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
8319 case NEON::BI__builtin_neon_vcvtaq_s32_v:
8320 case NEON::BI__builtin_neon_vcvtaq_s64_v:
8321 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
8322 case NEON::BI__builtin_neon_vcvtaq_u32_v:
8323 case NEON::BI__builtin_neon_vcvtaq_u64_v:
8324 case NEON::BI__builtin_neon_vcvtn_s16_f16:
8325 case NEON::BI__builtin_neon_vcvtn_s32_v:
8326 case NEON::BI__builtin_neon_vcvtn_s64_v:
8327 case NEON::BI__builtin_neon_vcvtn_u16_f16:
8328 case NEON::BI__builtin_neon_vcvtn_u32_v:
8329 case NEON::BI__builtin_neon_vcvtn_u64_v:
8330 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
8331 case NEON::BI__builtin_neon_vcvtnq_s32_v:
8332 case NEON::BI__builtin_neon_vcvtnq_s64_v:
8333 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
8334 case NEON::BI__builtin_neon_vcvtnq_u32_v:
8335 case NEON::BI__builtin_neon_vcvtnq_u64_v:
8336 case NEON::BI__builtin_neon_vcvtp_s16_f16:
8337 case NEON::BI__builtin_neon_vcvtp_s32_v:
8338 case NEON::BI__builtin_neon_vcvtp_s64_v:
8339 case NEON::BI__builtin_neon_vcvtp_u16_f16:
8340 case NEON::BI__builtin_neon_vcvtp_u32_v:
8341 case NEON::BI__builtin_neon_vcvtp_u64_v:
8342 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
8343 case NEON::BI__builtin_neon_vcvtpq_s32_v:
8344 case NEON::BI__builtin_neon_vcvtpq_s64_v:
8345 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
8346 case NEON::BI__builtin_neon_vcvtpq_u32_v:
8347 case NEON::BI__builtin_neon_vcvtpq_u64_v:
8348 case NEON::BI__builtin_neon_vcvtm_s16_f16:
8349 case NEON::BI__builtin_neon_vcvtm_s32_v:
8350 case NEON::BI__builtin_neon_vcvtm_s64_v:
8351 case NEON::BI__builtin_neon_vcvtm_u16_f16:
8352 case NEON::BI__builtin_neon_vcvtm_u32_v:
8353 case NEON::BI__builtin_neon_vcvtm_u64_v:
8354 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
8355 case NEON::BI__builtin_neon_vcvtmq_s32_v:
8356 case NEON::BI__builtin_neon_vcvtmq_s64_v:
8357 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
8358 case NEON::BI__builtin_neon_vcvtmq_u32_v:
8359 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
8360 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8361 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
8363 case NEON::BI__builtin_neon_vcvtx_f32_v: {
8364 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
8365 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
8368 case NEON::BI__builtin_neon_vext_v:
8369 case NEON::BI__builtin_neon_vextq_v: {
8370 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
8371 SmallVector<int, 16> Indices;
8372 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8373 Indices.push_back(i+CV);
8375 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8376 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8377 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
8379 case NEON::BI__builtin_neon_vfma_v:
8380 case NEON::BI__builtin_neon_vfmaq_v: {
8381 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8382 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8383 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8385 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
8386 return emitCallMaybeConstrainedFPBuiltin(
8387 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
8388 {Ops[1], Ops[2], Ops[0]});
8390 case NEON::BI__builtin_neon_vld1_v:
8391 case NEON::BI__builtin_neon_vld1q_v: {
8392 llvm::Type *Tys[] = {Ty, Int8PtrTy};
8393 Ops.push_back(getAlignmentValue32(PtrOp0));
8394 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
8396 case NEON::BI__builtin_neon_vld1_x2_v:
8397 case NEON::BI__builtin_neon_vld1q_x2_v:
8398 case NEON::BI__builtin_neon_vld1_x3_v:
8399 case NEON::BI__builtin_neon_vld1q_x3_v:
8400 case NEON::BI__builtin_neon_vld1_x4_v:
8401 case NEON::BI__builtin_neon_vld1q_x4_v: {
8402 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
8403 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8404 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
8405 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8407 case NEON::BI__builtin_neon_vld2_v:
8408 case NEON::BI__builtin_neon_vld2q_v:
8409 case NEON::BI__builtin_neon_vld3_v:
8410 case NEON::BI__builtin_neon_vld3q_v:
8411 case NEON::BI__builtin_neon_vld4_v:
8412 case NEON::BI__builtin_neon_vld4q_v:
8413 case NEON::BI__builtin_neon_vld2_dup_v:
8414 case NEON::BI__builtin_neon_vld2q_dup_v:
8415 case NEON::BI__builtin_neon_vld3_dup_v:
8416 case NEON::BI__builtin_neon_vld3q_dup_v:
8417 case NEON::BI__builtin_neon_vld4_dup_v:
8418 case NEON::BI__builtin_neon_vld4q_dup_v: {
8419 llvm::Type *Tys[] = {Ty, Int8PtrTy};
8420 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8421 Value *Align = getAlignmentValue32(PtrOp1);
8422 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
8423 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8425 case NEON::BI__builtin_neon_vld1_dup_v:
8426 case NEON::BI__builtin_neon_vld1q_dup_v: {
8427 Value *V = PoisonValue::get(Ty);
8428 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
8429 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
8430 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
8431 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
8432 return EmitNeonSplat(Ops[0], CI);
8434 case NEON::BI__builtin_neon_vld2_lane_v:
8435 case NEON::BI__builtin_neon_vld2q_lane_v:
8436 case NEON::BI__builtin_neon_vld3_lane_v:
8437 case NEON::BI__builtin_neon_vld3q_lane_v:
8438 case NEON::BI__builtin_neon_vld4_lane_v:
8439 case NEON::BI__builtin_neon_vld4q_lane_v: {
8440 llvm::Type *Tys[] = {Ty, Int8PtrTy};
8441 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8442 for (unsigned I = 2; I < Ops.size() - 1; ++I)
8443 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
8444 Ops.push_back(getAlignmentValue32(PtrOp1));
8445 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
8446 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8448 case NEON::BI__builtin_neon_vmovl_v: {
8449 llvm::FixedVectorType *DTy =
8450 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8451 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
8452 if (Usgn)
8453 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
8454 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
8456 case NEON::BI__builtin_neon_vmovn_v: {
8457 llvm::FixedVectorType *QTy =
8458 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8459 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
8460 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
8462 case NEON::BI__builtin_neon_vmull_v:
8463 // FIXME: the integer vmull operations could be emitted in terms of pure
8464 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
8465 // hoisting the exts outside loops. Until global ISel comes along that can
8466 // see through such movement this leads to bad CodeGen. So we need an
8467 // intrinsic for now.
8468 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
8469 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
8470 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
8471 case NEON::BI__builtin_neon_vpadal_v:
8472 case NEON::BI__builtin_neon_vpadalq_v: {
8473 // The source operand type has twice as many elements of half the size.
8474 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8475 llvm::Type *EltTy =
8476 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
8477 auto *NarrowTy =
8478 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8479 llvm::Type *Tys[2] = { Ty, NarrowTy };
8480 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8482 case NEON::BI__builtin_neon_vpaddl_v:
8483 case NEON::BI__builtin_neon_vpaddlq_v: {
8484 // The source operand type has twice as many elements of half the size.
8485 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8486 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
8487 auto *NarrowTy =
8488 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8489 llvm::Type *Tys[2] = { Ty, NarrowTy };
8490 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
8492 case NEON::BI__builtin_neon_vqdmlal_v:
8493 case NEON::BI__builtin_neon_vqdmlsl_v: {
8494 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
8495 Ops[1] =
8496 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
8497 Ops.resize(2);
8498 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
8500 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
8501 case NEON::BI__builtin_neon_vqdmulh_lane_v:
8502 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
8503 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
8504 auto *RTy = cast<llvm::FixedVectorType>(Ty);
8505 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
8506 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
8507 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
8508 RTy->getNumElements() * 2);
8509 llvm::Type *Tys[2] = {
8510 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8511 /*isQuad*/ false))};
8512 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8514 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
8515 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
8516 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
8517 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
8518 llvm::Type *Tys[2] = {
8519 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8520 /*isQuad*/ true))};
8521 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8523 case NEON::BI__builtin_neon_vqshl_n_v:
8524 case NEON::BI__builtin_neon_vqshlq_n_v:
8525 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
8526 1, false);
8527 case NEON::BI__builtin_neon_vqshlu_n_v:
8528 case NEON::BI__builtin_neon_vqshluq_n_v:
8529 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
8530 1, false);
8531 case NEON::BI__builtin_neon_vrecpe_v:
8532 case NEON::BI__builtin_neon_vrecpeq_v:
8533 case NEON::BI__builtin_neon_vrsqrte_v:
8534 case NEON::BI__builtin_neon_vrsqrteq_v:
8535 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
8536 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8537 case NEON::BI__builtin_neon_vrndi_v:
8538 case NEON::BI__builtin_neon_vrndiq_v:
8539 Int = Builder.getIsFPConstrained()
8540 ? Intrinsic::experimental_constrained_nearbyint
8541 : Intrinsic::nearbyint;
8542 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8543 case NEON::BI__builtin_neon_vrshr_n_v:
8544 case NEON::BI__builtin_neon_vrshrq_n_v:
8545 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
8546 1, true);
8547 case NEON::BI__builtin_neon_vsha512hq_u64:
8548 case NEON::BI__builtin_neon_vsha512h2q_u64:
8549 case NEON::BI__builtin_neon_vsha512su0q_u64:
8550 case NEON::BI__builtin_neon_vsha512su1q_u64: {
8551 Function *F = CGM.getIntrinsic(Int);
8552 return EmitNeonCall(F, Ops, "");
8554 case NEON::BI__builtin_neon_vshl_n_v:
8555 case NEON::BI__builtin_neon_vshlq_n_v:
8556 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
8557 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
8558 "vshl_n");
8559 case NEON::BI__builtin_neon_vshll_n_v: {
8560 llvm::FixedVectorType *SrcTy =
8561 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8562 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8563 if (Usgn)
8564 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
8565 else
8566 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
8567 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
8568 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
8570 case NEON::BI__builtin_neon_vshrn_n_v: {
8571 llvm::FixedVectorType *SrcTy =
8572 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8573 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8574 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
8575 if (Usgn)
8576 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
8577 else
8578 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
8579 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
8581 case NEON::BI__builtin_neon_vshr_n_v:
8582 case NEON::BI__builtin_neon_vshrq_n_v:
8583 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
8584 case NEON::BI__builtin_neon_vst1_v:
8585 case NEON::BI__builtin_neon_vst1q_v:
8586 case NEON::BI__builtin_neon_vst2_v:
8587 case NEON::BI__builtin_neon_vst2q_v:
8588 case NEON::BI__builtin_neon_vst3_v:
8589 case NEON::BI__builtin_neon_vst3q_v:
8590 case NEON::BI__builtin_neon_vst4_v:
8591 case NEON::BI__builtin_neon_vst4q_v:
8592 case NEON::BI__builtin_neon_vst2_lane_v:
8593 case NEON::BI__builtin_neon_vst2q_lane_v:
8594 case NEON::BI__builtin_neon_vst3_lane_v:
8595 case NEON::BI__builtin_neon_vst3q_lane_v:
8596 case NEON::BI__builtin_neon_vst4_lane_v:
8597 case NEON::BI__builtin_neon_vst4q_lane_v: {
8598 llvm::Type *Tys[] = {Int8PtrTy, Ty};
8599 Ops.push_back(getAlignmentValue32(PtrOp0));
8600 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
8602 case NEON::BI__builtin_neon_vsm3partw1q_u32:
8603 case NEON::BI__builtin_neon_vsm3partw2q_u32:
8604 case NEON::BI__builtin_neon_vsm3ss1q_u32:
8605 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
8606 case NEON::BI__builtin_neon_vsm4eq_u32: {
8607 Function *F = CGM.getIntrinsic(Int);
8608 return EmitNeonCall(F, Ops, "");
8610 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
8611 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
8612 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
8613 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
8614 Function *F = CGM.getIntrinsic(Int);
8615 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
8616 return EmitNeonCall(F, Ops, "");
8618 case NEON::BI__builtin_neon_vst1_x2_v:
8619 case NEON::BI__builtin_neon_vst1q_x2_v:
8620 case NEON::BI__builtin_neon_vst1_x3_v:
8621 case NEON::BI__builtin_neon_vst1q_x3_v:
8622 case NEON::BI__builtin_neon_vst1_x4_v:
8623 case NEON::BI__builtin_neon_vst1q_x4_v: {
8624 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
8625 // in AArch64 it comes last. We may want to stick to one or another.
8626 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
8627 Arch == llvm::Triple::aarch64_32) {
8628 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
8629 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
8630 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8632 llvm::Type *Tys[2] = {UnqualPtrTy, VTy};
8633 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8635 case NEON::BI__builtin_neon_vsubhn_v: {
8636 llvm::FixedVectorType *SrcTy =
8637 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8639 // %sum = add <4 x i32> %lhs, %rhs
8640 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8641 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
8642 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
8644 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
8645 Constant *ShiftAmt =
8646 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
8647 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
8649 // %res = trunc <4 x i32> %high to <4 x i16>
8650 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
8652 case NEON::BI__builtin_neon_vtrn_v:
8653 case NEON::BI__builtin_neon_vtrnq_v: {
8654 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8655 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8656 Value *SV = nullptr;
8658 for (unsigned vi = 0; vi != 2; ++vi) {
8659 SmallVector<int, 16> Indices;
8660 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8661 Indices.push_back(i+vi);
8662 Indices.push_back(i+e+vi);
8664 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8665 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
8666 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8668 return SV;
8670 case NEON::BI__builtin_neon_vtst_v:
8671 case NEON::BI__builtin_neon_vtstq_v: {
8672 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8673 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8674 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
8675 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
8676 ConstantAggregateZero::get(Ty));
8677 return Builder.CreateSExt(Ops[0], Ty, "vtst");
8679 case NEON::BI__builtin_neon_vuzp_v:
8680 case NEON::BI__builtin_neon_vuzpq_v: {
8681 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8682 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8683 Value *SV = nullptr;
8685 for (unsigned vi = 0; vi != 2; ++vi) {
8686 SmallVector<int, 16> Indices;
8687 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8688 Indices.push_back(2*i+vi);
8690 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8691 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
8692 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8694 return SV;
8696 case NEON::BI__builtin_neon_vxarq_u64: {
8697 Function *F = CGM.getIntrinsic(Int);
8698 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
8699 return EmitNeonCall(F, Ops, "");
8701 case NEON::BI__builtin_neon_vzip_v:
8702 case NEON::BI__builtin_neon_vzipq_v: {
8703 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8704 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8705 Value *SV = nullptr;
8707 for (unsigned vi = 0; vi != 2; ++vi) {
8708 SmallVector<int, 16> Indices;
8709 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8710 Indices.push_back((i + vi*e) >> 1);
8711 Indices.push_back(((i + vi*e) >> 1)+e);
8713 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8714 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
8715 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8717 return SV;
8719 case NEON::BI__builtin_neon_vdot_s32:
8720 case NEON::BI__builtin_neon_vdot_u32:
8721 case NEON::BI__builtin_neon_vdotq_s32:
8722 case NEON::BI__builtin_neon_vdotq_u32: {
8723 auto *InputTy =
8724 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8725 llvm::Type *Tys[2] = { Ty, InputTy };
8726 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
8728 case NEON::BI__builtin_neon_vfmlal_low_f16:
8729 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
8730 auto *InputTy =
8731 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8732 llvm::Type *Tys[2] = { Ty, InputTy };
8733 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
8735 case NEON::BI__builtin_neon_vfmlsl_low_f16:
8736 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
8737 auto *InputTy =
8738 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8739 llvm::Type *Tys[2] = { Ty, InputTy };
8740 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
8742 case NEON::BI__builtin_neon_vfmlal_high_f16:
8743 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
8744 auto *InputTy =
8745 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8746 llvm::Type *Tys[2] = { Ty, InputTy };
8747 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
8749 case NEON::BI__builtin_neon_vfmlsl_high_f16:
8750 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
8751 auto *InputTy =
8752 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8753 llvm::Type *Tys[2] = { Ty, InputTy };
8754 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
8756 case NEON::BI__builtin_neon_vmmlaq_s32:
8757 case NEON::BI__builtin_neon_vmmlaq_u32: {
8758 auto *InputTy =
8759 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8760 llvm::Type *Tys[2] = { Ty, InputTy };
8761 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
8763 case NEON::BI__builtin_neon_vusmmlaq_s32: {
8764 auto *InputTy =
8765 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8766 llvm::Type *Tys[2] = { Ty, InputTy };
8767 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
8769 case NEON::BI__builtin_neon_vusdot_s32:
8770 case NEON::BI__builtin_neon_vusdotq_s32: {
8771 auto *InputTy =
8772 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8773 llvm::Type *Tys[2] = { Ty, InputTy };
8774 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
8776 case NEON::BI__builtin_neon_vbfdot_f32:
8777 case NEON::BI__builtin_neon_vbfdotq_f32: {
8778 llvm::Type *InputTy =
8779 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
8780 llvm::Type *Tys[2] = { Ty, InputTy };
8781 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
8783 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
8784 llvm::Type *Tys[1] = { Ty };
8785 Function *F = CGM.getIntrinsic(Int, Tys);
8786 return EmitNeonCall(F, Ops, "vcvtfp2bf");
8791 assert(Int && "Expected valid intrinsic number");
8793 // Determine the type(s) of this overloaded AArch64 intrinsic.
8794 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
8796 Value *Result = EmitNeonCall(F, Ops, NameHint);
8797 llvm::Type *ResultType = ConvertType(E->getType());
8798 // AArch64 intrinsic one-element vector type cast to
8799 // scalar type expected by the builtin
8800 return Builder.CreateBitCast(Result, ResultType, NameHint);
8803 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
8804 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
8805 const CmpInst::Predicate Ip, const Twine &Name) {
8806 llvm::Type *OTy = Op->getType();
8808 // FIXME: this is utterly horrific. We should not be looking at previous
8809 // codegen context to find out what needs doing. Unfortunately TableGen
8810 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
8811 // (etc).
8812 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
8813 OTy = BI->getOperand(0)->getType();
8815 Op = Builder.CreateBitCast(Op, OTy);
8816 if (OTy->getScalarType()->isFloatingPointTy()) {
8817 if (Fp == CmpInst::FCMP_OEQ)
8818 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
8819 else
8820 Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy));
8821 } else {
8822 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
8824 return Builder.CreateSExt(Op, Ty, Name);
8827 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
8828 Value *ExtOp, Value *IndexOp,
8829 llvm::Type *ResTy, unsigned IntID,
8830 const char *Name) {
8831 SmallVector<Value *, 2> TblOps;
8832 if (ExtOp)
8833 TblOps.push_back(ExtOp);
8835 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
8836 SmallVector<int, 16> Indices;
8837 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
8838 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
8839 Indices.push_back(2*i);
8840 Indices.push_back(2*i+1);
8843 int PairPos = 0, End = Ops.size() - 1;
8844 while (PairPos < End) {
8845 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8846 Ops[PairPos+1], Indices,
8847 Name));
8848 PairPos += 2;
8851 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
8852 // of the 128-bit lookup table with zero.
8853 if (PairPos == End) {
8854 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
8855 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8856 ZeroTbl, Indices, Name));
8859 Function *TblF;
8860 TblOps.push_back(IndexOp);
8861 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
8863 return CGF.EmitNeonCall(TblF, TblOps, Name);
8866 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
8867 unsigned Value;
8868 switch (BuiltinID) {
8869 default:
8870 return nullptr;
8871 case clang::ARM::BI__builtin_arm_nop:
8872 Value = 0;
8873 break;
8874 case clang::ARM::BI__builtin_arm_yield:
8875 case clang::ARM::BI__yield:
8876 Value = 1;
8877 break;
8878 case clang::ARM::BI__builtin_arm_wfe:
8879 case clang::ARM::BI__wfe:
8880 Value = 2;
8881 break;
8882 case clang::ARM::BI__builtin_arm_wfi:
8883 case clang::ARM::BI__wfi:
8884 Value = 3;
8885 break;
8886 case clang::ARM::BI__builtin_arm_sev:
8887 case clang::ARM::BI__sev:
8888 Value = 4;
8889 break;
8890 case clang::ARM::BI__builtin_arm_sevl:
8891 case clang::ARM::BI__sevl:
8892 Value = 5;
8893 break;
8896 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
8897 llvm::ConstantInt::get(Int32Ty, Value));
8900 enum SpecialRegisterAccessKind {
8901 NormalRead,
8902 VolatileRead,
8903 Write,
8906 // Generates the IR for __builtin_read_exec_*.
8907 // Lowers the builtin to amdgcn_ballot intrinsic.
8908 static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
8909 llvm::Type *RegisterType,
8910 llvm::Type *ValueType, bool isExecHi) {
8911 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8912 CodeGen::CodeGenModule &CGM = CGF.CGM;
8914 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
8915 llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
8917 if (isExecHi) {
8918 Value *Rt2 = Builder.CreateLShr(Call, 32);
8919 Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
8920 return Rt2;
8923 return Call;
8926 // Generates the IR for the read/write special register builtin,
8927 // ValueType is the type of the value that is to be written or read,
8928 // RegisterType is the type of the register being written to or read from.
8929 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
8930 const CallExpr *E,
8931 llvm::Type *RegisterType,
8932 llvm::Type *ValueType,
8933 SpecialRegisterAccessKind AccessKind,
8934 StringRef SysReg = "") {
8935 // write and register intrinsics only support 32, 64 and 128 bit operations.
8936 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
8937 RegisterType->isIntegerTy(128)) &&
8938 "Unsupported size for register.");
8940 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8941 CodeGen::CodeGenModule &CGM = CGF.CGM;
8942 LLVMContext &Context = CGM.getLLVMContext();
8944 if (SysReg.empty()) {
8945 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
8946 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
8949 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
8950 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
8951 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
8953 llvm::Type *Types[] = { RegisterType };
8955 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
8956 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
8957 && "Can't fit 64-bit value in 32-bit register");
8959 if (AccessKind != Write) {
8960 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
8961 llvm::Function *F = CGM.getIntrinsic(
8962 AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
8963 : llvm::Intrinsic::read_register,
8964 Types);
8965 llvm::Value *Call = Builder.CreateCall(F, Metadata);
8967 if (MixedTypes)
8968 // Read into 64 bit register and then truncate result to 32 bit.
8969 return Builder.CreateTrunc(Call, ValueType);
8971 if (ValueType->isPointerTy())
8972 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
8973 return Builder.CreateIntToPtr(Call, ValueType);
8975 return Call;
8978 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
8979 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
8980 if (MixedTypes) {
8981 // Extend 32 bit write value to 64 bit to pass to write.
8982 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
8983 return Builder.CreateCall(F, { Metadata, ArgValue });
8986 if (ValueType->isPointerTy()) {
8987 // Have VoidPtrTy ArgValue but want to return an i32/i64.
8988 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
8989 return Builder.CreateCall(F, { Metadata, ArgValue });
8992 return Builder.CreateCall(F, { Metadata, ArgValue });
8995 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
8996 /// argument that specifies the vector type.
8997 static bool HasExtraNeonArgument(unsigned BuiltinID) {
8998 switch (BuiltinID) {
8999 default: break;
9000 case NEON::BI__builtin_neon_vget_lane_i8:
9001 case NEON::BI__builtin_neon_vget_lane_i16:
9002 case NEON::BI__builtin_neon_vget_lane_bf16:
9003 case NEON::BI__builtin_neon_vget_lane_i32:
9004 case NEON::BI__builtin_neon_vget_lane_i64:
9005 case NEON::BI__builtin_neon_vget_lane_f32:
9006 case NEON::BI__builtin_neon_vgetq_lane_i8:
9007 case NEON::BI__builtin_neon_vgetq_lane_i16:
9008 case NEON::BI__builtin_neon_vgetq_lane_bf16:
9009 case NEON::BI__builtin_neon_vgetq_lane_i32:
9010 case NEON::BI__builtin_neon_vgetq_lane_i64:
9011 case NEON::BI__builtin_neon_vgetq_lane_f32:
9012 case NEON::BI__builtin_neon_vduph_lane_bf16:
9013 case NEON::BI__builtin_neon_vduph_laneq_bf16:
9014 case NEON::BI__builtin_neon_vset_lane_i8:
9015 case NEON::BI__builtin_neon_vset_lane_i16:
9016 case NEON::BI__builtin_neon_vset_lane_bf16:
9017 case NEON::BI__builtin_neon_vset_lane_i32:
9018 case NEON::BI__builtin_neon_vset_lane_i64:
9019 case NEON::BI__builtin_neon_vset_lane_f32:
9020 case NEON::BI__builtin_neon_vsetq_lane_i8:
9021 case NEON::BI__builtin_neon_vsetq_lane_i16:
9022 case NEON::BI__builtin_neon_vsetq_lane_bf16:
9023 case NEON::BI__builtin_neon_vsetq_lane_i32:
9024 case NEON::BI__builtin_neon_vsetq_lane_i64:
9025 case NEON::BI__builtin_neon_vsetq_lane_f32:
9026 case NEON::BI__builtin_neon_vsha1h_u32:
9027 case NEON::BI__builtin_neon_vsha1cq_u32:
9028 case NEON::BI__builtin_neon_vsha1pq_u32:
9029 case NEON::BI__builtin_neon_vsha1mq_u32:
9030 case NEON::BI__builtin_neon_vcvth_bf16_f32:
9031 case clang::ARM::BI_MoveToCoprocessor:
9032 case clang::ARM::BI_MoveToCoprocessor2:
9033 return false;
9035 return true;
9038 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
9039 const CallExpr *E,
9040 ReturnValueSlot ReturnValue,
9041 llvm::Triple::ArchType Arch) {
9042 if (auto Hint = GetValueForARMHint(BuiltinID))
9043 return Hint;
9045 if (BuiltinID == clang::ARM::BI__emit) {
9046 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
9047 llvm::FunctionType *FTy =
9048 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
9050 Expr::EvalResult Result;
9051 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
9052 llvm_unreachable("Sema will ensure that the parameter is constant");
9054 llvm::APSInt Value = Result.Val.getInt();
9055 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
9057 llvm::InlineAsm *Emit =
9058 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
9059 /*hasSideEffects=*/true)
9060 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
9061 /*hasSideEffects=*/true);
9063 return Builder.CreateCall(Emit);
9066 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
9067 Value *Option = EmitScalarExpr(E->getArg(0));
9068 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
9071 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
9072 Value *Address = EmitScalarExpr(E->getArg(0));
9073 Value *RW = EmitScalarExpr(E->getArg(1));
9074 Value *IsData = EmitScalarExpr(E->getArg(2));
9076 // Locality is not supported on ARM target
9077 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
9079 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
9080 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
9083 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
9084 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9085 return Builder.CreateCall(
9086 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
9089 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
9090 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
9091 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9092 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
9093 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
9094 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
9095 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
9096 return Res;
9100 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
9101 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9102 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
9104 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
9105 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9106 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
9107 "cls");
9110 if (BuiltinID == clang::ARM::BI__clear_cache) {
9111 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
9112 const FunctionDecl *FD = E->getDirectCallee();
9113 Value *Ops[2];
9114 for (unsigned i = 0; i < 2; i++)
9115 Ops[i] = EmitScalarExpr(E->getArg(i));
9116 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
9117 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
9118 StringRef Name = FD->getName();
9119 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
9122 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
9123 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
9124 Function *F;
9126 switch (BuiltinID) {
9127 default: llvm_unreachable("unexpected builtin");
9128 case clang::ARM::BI__builtin_arm_mcrr:
9129 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
9130 break;
9131 case clang::ARM::BI__builtin_arm_mcrr2:
9132 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
9133 break;
9136 // MCRR{2} instruction has 5 operands but
9137 // the intrinsic has 4 because Rt and Rt2
9138 // are represented as a single unsigned 64
9139 // bit integer in the intrinsic definition
9140 // but internally it's represented as 2 32
9141 // bit integers.
9143 Value *Coproc = EmitScalarExpr(E->getArg(0));
9144 Value *Opc1 = EmitScalarExpr(E->getArg(1));
9145 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
9146 Value *CRm = EmitScalarExpr(E->getArg(3));
9148 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
9149 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
9150 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
9151 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
9153 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
9156 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
9157 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
9158 Function *F;
9160 switch (BuiltinID) {
9161 default: llvm_unreachable("unexpected builtin");
9162 case clang::ARM::BI__builtin_arm_mrrc:
9163 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
9164 break;
9165 case clang::ARM::BI__builtin_arm_mrrc2:
9166 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
9167 break;
9170 Value *Coproc = EmitScalarExpr(E->getArg(0));
9171 Value *Opc1 = EmitScalarExpr(E->getArg(1));
9172 Value *CRm = EmitScalarExpr(E->getArg(2));
9173 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
9175 // Returns an unsigned 64 bit integer, represented
9176 // as two 32 bit integers.
9178 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
9179 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
9180 Rt = Builder.CreateZExt(Rt, Int64Ty);
9181 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
9183 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
9184 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
9185 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
9187 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
9190 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
9191 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
9192 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
9193 getContext().getTypeSize(E->getType()) == 64) ||
9194 BuiltinID == clang::ARM::BI__ldrexd) {
9195 Function *F;
9197 switch (BuiltinID) {
9198 default: llvm_unreachable("unexpected builtin");
9199 case clang::ARM::BI__builtin_arm_ldaex:
9200 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
9201 break;
9202 case clang::ARM::BI__builtin_arm_ldrexd:
9203 case clang::ARM::BI__builtin_arm_ldrex:
9204 case clang::ARM::BI__ldrexd:
9205 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
9206 break;
9209 Value *LdPtr = EmitScalarExpr(E->getArg(0));
9210 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
9212 Value *Val0 = Builder.CreateExtractValue(Val, 1);
9213 Value *Val1 = Builder.CreateExtractValue(Val, 0);
9214 Val0 = Builder.CreateZExt(Val0, Int64Ty);
9215 Val1 = Builder.CreateZExt(Val1, Int64Ty);
9217 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
9218 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
9219 Val = Builder.CreateOr(Val, Val1);
9220 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
9223 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
9224 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
9225 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
9227 QualType Ty = E->getType();
9228 llvm::Type *RealResTy = ConvertType(Ty);
9229 llvm::Type *IntTy =
9230 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
9232 Function *F = CGM.getIntrinsic(
9233 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
9234 : Intrinsic::arm_ldrex,
9235 UnqualPtrTy);
9236 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
9237 Val->addParamAttr(
9238 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
9240 if (RealResTy->isPointerTy())
9241 return Builder.CreateIntToPtr(Val, RealResTy);
9242 else {
9243 llvm::Type *IntResTy = llvm::IntegerType::get(
9244 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
9245 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
9246 RealResTy);
9250 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
9251 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
9252 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
9253 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
9254 Function *F = CGM.getIntrinsic(
9255 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
9256 : Intrinsic::arm_strexd);
9257 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
9259 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
9260 Value *Val = EmitScalarExpr(E->getArg(0));
9261 Builder.CreateStore(Val, Tmp);
9263 Address LdPtr = Tmp.withElementType(STy);
9264 Val = Builder.CreateLoad(LdPtr);
9266 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
9267 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
9268 Value *StPtr = EmitScalarExpr(E->getArg(1));
9269 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
9272 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
9273 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
9274 Value *StoreVal = EmitScalarExpr(E->getArg(0));
9275 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
9277 QualType Ty = E->getArg(0)->getType();
9278 llvm::Type *StoreTy =
9279 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
9281 if (StoreVal->getType()->isPointerTy())
9282 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
9283 else {
9284 llvm::Type *IntTy = llvm::IntegerType::get(
9285 getLLVMContext(),
9286 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
9287 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
9288 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
9291 Function *F = CGM.getIntrinsic(
9292 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
9293 : Intrinsic::arm_strex,
9294 StoreAddr->getType());
9296 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
9297 CI->addParamAttr(
9298 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
9299 return CI;
9302 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
9303 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
9304 return Builder.CreateCall(F);
9307 // CRC32
9308 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
9309 switch (BuiltinID) {
9310 case clang::ARM::BI__builtin_arm_crc32b:
9311 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
9312 case clang::ARM::BI__builtin_arm_crc32cb:
9313 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
9314 case clang::ARM::BI__builtin_arm_crc32h:
9315 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
9316 case clang::ARM::BI__builtin_arm_crc32ch:
9317 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
9318 case clang::ARM::BI__builtin_arm_crc32w:
9319 case clang::ARM::BI__builtin_arm_crc32d:
9320 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
9321 case clang::ARM::BI__builtin_arm_crc32cw:
9322 case clang::ARM::BI__builtin_arm_crc32cd:
9323 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
9326 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
9327 Value *Arg0 = EmitScalarExpr(E->getArg(0));
9328 Value *Arg1 = EmitScalarExpr(E->getArg(1));
9330 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
9331 // intrinsics, hence we need different codegen for these cases.
9332 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
9333 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
9334 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
9335 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
9336 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
9337 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
9339 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
9340 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
9341 return Builder.CreateCall(F, {Res, Arg1b});
9342 } else {
9343 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
9345 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
9346 return Builder.CreateCall(F, {Arg0, Arg1});
9350 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
9351 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
9352 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
9353 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
9354 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
9355 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
9357 SpecialRegisterAccessKind AccessKind = Write;
9358 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
9359 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
9360 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
9361 AccessKind = VolatileRead;
9363 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
9364 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
9366 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
9367 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
9369 llvm::Type *ValueType;
9370 llvm::Type *RegisterType;
9371 if (IsPointerBuiltin) {
9372 ValueType = VoidPtrTy;
9373 RegisterType = Int32Ty;
9374 } else if (Is64Bit) {
9375 ValueType = RegisterType = Int64Ty;
9376 } else {
9377 ValueType = RegisterType = Int32Ty;
9380 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
9381 AccessKind);
9384 if (BuiltinID == ARM::BI__builtin_sponentry) {
9385 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
9386 return Builder.CreateCall(F);
9389 // Handle MSVC intrinsics before argument evaluation to prevent double
9390 // evaluation.
9391 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
9392 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
9394 // Deal with MVE builtins
9395 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
9396 return Result;
9397 // Handle CDE builtins
9398 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
9399 return Result;
9401 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
9402 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
9403 return P.first == BuiltinID;
9405 if (It != end(NEONEquivalentIntrinsicMap))
9406 BuiltinID = It->second;
9408 // Find out if any arguments are required to be integer constant
9409 // expressions.
9410 unsigned ICEArguments = 0;
9411 ASTContext::GetBuiltinTypeError Error;
9412 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
9413 assert(Error == ASTContext::GE_None && "Should not codegen an error");
9415 auto getAlignmentValue32 = [&](Address addr) -> Value* {
9416 return Builder.getInt32(addr.getAlignment().getQuantity());
9419 Address PtrOp0 = Address::invalid();
9420 Address PtrOp1 = Address::invalid();
9421 SmallVector<Value*, 4> Ops;
9422 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
9423 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
9424 for (unsigned i = 0, e = NumArgs; i != e; i++) {
9425 if (i == 0) {
9426 switch (BuiltinID) {
9427 case NEON::BI__builtin_neon_vld1_v:
9428 case NEON::BI__builtin_neon_vld1q_v:
9429 case NEON::BI__builtin_neon_vld1q_lane_v:
9430 case NEON::BI__builtin_neon_vld1_lane_v:
9431 case NEON::BI__builtin_neon_vld1_dup_v:
9432 case NEON::BI__builtin_neon_vld1q_dup_v:
9433 case NEON::BI__builtin_neon_vst1_v:
9434 case NEON::BI__builtin_neon_vst1q_v:
9435 case NEON::BI__builtin_neon_vst1q_lane_v:
9436 case NEON::BI__builtin_neon_vst1_lane_v:
9437 case NEON::BI__builtin_neon_vst2_v:
9438 case NEON::BI__builtin_neon_vst2q_v:
9439 case NEON::BI__builtin_neon_vst2_lane_v:
9440 case NEON::BI__builtin_neon_vst2q_lane_v:
9441 case NEON::BI__builtin_neon_vst3_v:
9442 case NEON::BI__builtin_neon_vst3q_v:
9443 case NEON::BI__builtin_neon_vst3_lane_v:
9444 case NEON::BI__builtin_neon_vst3q_lane_v:
9445 case NEON::BI__builtin_neon_vst4_v:
9446 case NEON::BI__builtin_neon_vst4q_v:
9447 case NEON::BI__builtin_neon_vst4_lane_v:
9448 case NEON::BI__builtin_neon_vst4q_lane_v:
9449 // Get the alignment for the argument in addition to the value;
9450 // we'll use it later.
9451 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
9452 Ops.push_back(PtrOp0.emitRawPointer(*this));
9453 continue;
9456 if (i == 1) {
9457 switch (BuiltinID) {
9458 case NEON::BI__builtin_neon_vld2_v:
9459 case NEON::BI__builtin_neon_vld2q_v:
9460 case NEON::BI__builtin_neon_vld3_v:
9461 case NEON::BI__builtin_neon_vld3q_v:
9462 case NEON::BI__builtin_neon_vld4_v:
9463 case NEON::BI__builtin_neon_vld4q_v:
9464 case NEON::BI__builtin_neon_vld2_lane_v:
9465 case NEON::BI__builtin_neon_vld2q_lane_v:
9466 case NEON::BI__builtin_neon_vld3_lane_v:
9467 case NEON::BI__builtin_neon_vld3q_lane_v:
9468 case NEON::BI__builtin_neon_vld4_lane_v:
9469 case NEON::BI__builtin_neon_vld4q_lane_v:
9470 case NEON::BI__builtin_neon_vld2_dup_v:
9471 case NEON::BI__builtin_neon_vld2q_dup_v:
9472 case NEON::BI__builtin_neon_vld3_dup_v:
9473 case NEON::BI__builtin_neon_vld3q_dup_v:
9474 case NEON::BI__builtin_neon_vld4_dup_v:
9475 case NEON::BI__builtin_neon_vld4q_dup_v:
9476 // Get the alignment for the argument in addition to the value;
9477 // we'll use it later.
9478 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
9479 Ops.push_back(PtrOp1.emitRawPointer(*this));
9480 continue;
9484 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
9487 switch (BuiltinID) {
9488 default: break;
9490 case NEON::BI__builtin_neon_vget_lane_i8:
9491 case NEON::BI__builtin_neon_vget_lane_i16:
9492 case NEON::BI__builtin_neon_vget_lane_i32:
9493 case NEON::BI__builtin_neon_vget_lane_i64:
9494 case NEON::BI__builtin_neon_vget_lane_bf16:
9495 case NEON::BI__builtin_neon_vget_lane_f32:
9496 case NEON::BI__builtin_neon_vgetq_lane_i8:
9497 case NEON::BI__builtin_neon_vgetq_lane_i16:
9498 case NEON::BI__builtin_neon_vgetq_lane_i32:
9499 case NEON::BI__builtin_neon_vgetq_lane_i64:
9500 case NEON::BI__builtin_neon_vgetq_lane_bf16:
9501 case NEON::BI__builtin_neon_vgetq_lane_f32:
9502 case NEON::BI__builtin_neon_vduph_lane_bf16:
9503 case NEON::BI__builtin_neon_vduph_laneq_bf16:
9504 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
9506 case NEON::BI__builtin_neon_vrndns_f32: {
9507 Value *Arg = EmitScalarExpr(E->getArg(0));
9508 llvm::Type *Tys[] = {Arg->getType()};
9509 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
9510 return Builder.CreateCall(F, {Arg}, "vrndn"); }
9512 case NEON::BI__builtin_neon_vset_lane_i8:
9513 case NEON::BI__builtin_neon_vset_lane_i16:
9514 case NEON::BI__builtin_neon_vset_lane_i32:
9515 case NEON::BI__builtin_neon_vset_lane_i64:
9516 case NEON::BI__builtin_neon_vset_lane_bf16:
9517 case NEON::BI__builtin_neon_vset_lane_f32:
9518 case NEON::BI__builtin_neon_vsetq_lane_i8:
9519 case NEON::BI__builtin_neon_vsetq_lane_i16:
9520 case NEON::BI__builtin_neon_vsetq_lane_i32:
9521 case NEON::BI__builtin_neon_vsetq_lane_i64:
9522 case NEON::BI__builtin_neon_vsetq_lane_bf16:
9523 case NEON::BI__builtin_neon_vsetq_lane_f32:
9524 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
9526 case NEON::BI__builtin_neon_vsha1h_u32:
9527 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
9528 "vsha1h");
9529 case NEON::BI__builtin_neon_vsha1cq_u32:
9530 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
9531 "vsha1h");
9532 case NEON::BI__builtin_neon_vsha1pq_u32:
9533 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
9534 "vsha1h");
9535 case NEON::BI__builtin_neon_vsha1mq_u32:
9536 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
9537 "vsha1h");
9539 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
9540 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
9541 "vcvtbfp2bf");
9544 // The ARM _MoveToCoprocessor builtins put the input register value as
9545 // the first argument, but the LLVM intrinsic expects it as the third one.
9546 case clang::ARM::BI_MoveToCoprocessor:
9547 case clang::ARM::BI_MoveToCoprocessor2: {
9548 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
9549 ? Intrinsic::arm_mcr
9550 : Intrinsic::arm_mcr2);
9551 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
9552 Ops[3], Ops[4], Ops[5]});
9556 // Get the last argument, which specifies the vector type.
9557 assert(HasExtraArg);
9558 const Expr *Arg = E->getArg(E->getNumArgs()-1);
9559 std::optional<llvm::APSInt> Result =
9560 Arg->getIntegerConstantExpr(getContext());
9561 if (!Result)
9562 return nullptr;
9564 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
9565 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
9566 // Determine the overloaded type of this builtin.
9567 llvm::Type *Ty;
9568 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
9569 Ty = FloatTy;
9570 else
9571 Ty = DoubleTy;
9573 // Determine whether this is an unsigned conversion or not.
9574 bool usgn = Result->getZExtValue() == 1;
9575 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
9577 // Call the appropriate intrinsic.
9578 Function *F = CGM.getIntrinsic(Int, Ty);
9579 return Builder.CreateCall(F, Ops, "vcvtr");
9582 // Determine the type of this overloaded NEON intrinsic.
9583 NeonTypeFlags Type = Result->getZExtValue();
9584 bool usgn = Type.isUnsigned();
9585 bool rightShift = false;
9587 llvm::FixedVectorType *VTy =
9588 GetNeonType(this, Type, getTarget().hasLegalHalfType(), false,
9589 getTarget().hasBFloat16Type());
9590 llvm::Type *Ty = VTy;
9591 if (!Ty)
9592 return nullptr;
9594 // Many NEON builtins have identical semantics and uses in ARM and
9595 // AArch64. Emit these in a single function.
9596 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
9597 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
9598 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
9599 if (Builtin)
9600 return EmitCommonNeonBuiltinExpr(
9601 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
9602 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
9604 unsigned Int;
9605 switch (BuiltinID) {
9606 default: return nullptr;
9607 case NEON::BI__builtin_neon_vld1q_lane_v:
9608 // Handle 64-bit integer elements as a special case. Use shuffles of
9609 // one-element vectors to avoid poor code for i64 in the backend.
9610 if (VTy->getElementType()->isIntegerTy(64)) {
9611 // Extract the other lane.
9612 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9613 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
9614 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
9615 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9616 // Load the value as a one-element vector.
9617 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
9618 llvm::Type *Tys[] = {Ty, Int8PtrTy};
9619 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
9620 Value *Align = getAlignmentValue32(PtrOp0);
9621 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
9622 // Combine them.
9623 int Indices[] = {1 - Lane, Lane};
9624 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
9626 [[fallthrough]];
9627 case NEON::BI__builtin_neon_vld1_lane_v: {
9628 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9629 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
9630 Value *Ld = Builder.CreateLoad(PtrOp0);
9631 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
9633 case NEON::BI__builtin_neon_vqrshrn_n_v:
9634 Int =
9635 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
9636 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
9637 1, true);
9638 case NEON::BI__builtin_neon_vqrshrun_n_v:
9639 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
9640 Ops, "vqrshrun_n", 1, true);
9641 case NEON::BI__builtin_neon_vqshrn_n_v:
9642 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
9643 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
9644 1, true);
9645 case NEON::BI__builtin_neon_vqshrun_n_v:
9646 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
9647 Ops, "vqshrun_n", 1, true);
9648 case NEON::BI__builtin_neon_vrecpe_v:
9649 case NEON::BI__builtin_neon_vrecpeq_v:
9650 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
9651 Ops, "vrecpe");
9652 case NEON::BI__builtin_neon_vrshrn_n_v:
9653 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
9654 Ops, "vrshrn_n", 1, true);
9655 case NEON::BI__builtin_neon_vrsra_n_v:
9656 case NEON::BI__builtin_neon_vrsraq_n_v:
9657 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9658 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9659 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
9660 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
9661 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
9662 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
9663 case NEON::BI__builtin_neon_vsri_n_v:
9664 case NEON::BI__builtin_neon_vsriq_n_v:
9665 rightShift = true;
9666 [[fallthrough]];
9667 case NEON::BI__builtin_neon_vsli_n_v:
9668 case NEON::BI__builtin_neon_vsliq_n_v:
9669 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
9670 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
9671 Ops, "vsli_n");
9672 case NEON::BI__builtin_neon_vsra_n_v:
9673 case NEON::BI__builtin_neon_vsraq_n_v:
9674 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9675 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
9676 return Builder.CreateAdd(Ops[0], Ops[1]);
9677 case NEON::BI__builtin_neon_vst1q_lane_v:
9678 // Handle 64-bit integer elements as a special case. Use a shuffle to get
9679 // a one-element vector and avoid poor code for i64 in the backend.
9680 if (VTy->getElementType()->isIntegerTy(64)) {
9681 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9682 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
9683 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9684 Ops[2] = getAlignmentValue32(PtrOp0);
9685 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
9686 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
9687 Tys), Ops);
9689 [[fallthrough]];
9690 case NEON::BI__builtin_neon_vst1_lane_v: {
9691 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9692 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
9693 return Builder.CreateStore(Ops[1],
9694 PtrOp0.withElementType(Ops[1]->getType()));
9696 case NEON::BI__builtin_neon_vtbl1_v:
9697 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
9698 Ops, "vtbl1");
9699 case NEON::BI__builtin_neon_vtbl2_v:
9700 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
9701 Ops, "vtbl2");
9702 case NEON::BI__builtin_neon_vtbl3_v:
9703 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
9704 Ops, "vtbl3");
9705 case NEON::BI__builtin_neon_vtbl4_v:
9706 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
9707 Ops, "vtbl4");
9708 case NEON::BI__builtin_neon_vtbx1_v:
9709 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
9710 Ops, "vtbx1");
9711 case NEON::BI__builtin_neon_vtbx2_v:
9712 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
9713 Ops, "vtbx2");
9714 case NEON::BI__builtin_neon_vtbx3_v:
9715 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
9716 Ops, "vtbx3");
9717 case NEON::BI__builtin_neon_vtbx4_v:
9718 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
9719 Ops, "vtbx4");
9723 template<typename Integer>
9724 static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) {
9725 return E->getIntegerConstantExpr(Context)->getExtValue();
9728 static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
9729 llvm::Type *T, bool Unsigned) {
9730 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
9731 // which finds it convenient to specify signed/unsigned as a boolean flag.
9732 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
9735 static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
9736 uint32_t Shift, bool Unsigned) {
9737 // MVE helper function for integer shift right. This must handle signed vs
9738 // unsigned, and also deal specially with the case where the shift count is
9739 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
9740 // undefined behavior, but in MVE it's legal, so we must convert it to code
9741 // that is not undefined in IR.
9742 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
9743 ->getElementType()
9744 ->getPrimitiveSizeInBits();
9745 if (Shift == LaneBits) {
9746 // An unsigned shift of the full lane size always generates zero, so we can
9747 // simply emit a zero vector. A signed shift of the full lane size does the
9748 // same thing as shifting by one bit fewer.
9749 if (Unsigned)
9750 return llvm::Constant::getNullValue(V->getType());
9751 else
9752 --Shift;
9754 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
9757 static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
9758 // MVE-specific helper function for a vector splat, which infers the element
9759 // count of the output vector by knowing that MVE vectors are all 128 bits
9760 // wide.
9761 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
9762 return Builder.CreateVectorSplat(Elements, V);
9765 static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
9766 CodeGenFunction *CGF,
9767 llvm::Value *V,
9768 llvm::Type *DestType) {
9769 // Convert one MVE vector type into another by reinterpreting its in-register
9770 // format.
9772 // Little-endian, this is identical to a bitcast (which reinterprets the
9773 // memory format). But big-endian, they're not necessarily the same, because
9774 // the register and memory formats map to each other differently depending on
9775 // the lane size.
9777 // We generate a bitcast whenever we can (if we're little-endian, or if the
9778 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
9779 // that performs the different kind of reinterpretation.
9780 if (CGF->getTarget().isBigEndian() &&
9781 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
9782 return Builder.CreateCall(
9783 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
9784 {DestType, V->getType()}),
9786 } else {
9787 return Builder.CreateBitCast(V, DestType);
9791 static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
9792 // Make a shufflevector that extracts every other element of a vector (evens
9793 // or odds, as desired).
9794 SmallVector<int, 16> Indices;
9795 unsigned InputElements =
9796 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
9797 for (unsigned i = 0; i < InputElements; i += 2)
9798 Indices.push_back(i + Odd);
9799 return Builder.CreateShuffleVector(V, Indices);
9802 static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
9803 llvm::Value *V1) {
9804 // Make a shufflevector that interleaves two vectors element by element.
9805 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
9806 SmallVector<int, 16> Indices;
9807 unsigned InputElements =
9808 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
9809 for (unsigned i = 0; i < InputElements; i++) {
9810 Indices.push_back(i);
9811 Indices.push_back(i + InputElements);
9813 return Builder.CreateShuffleVector(V0, V1, Indices);
9816 template<unsigned HighBit, unsigned OtherBits>
9817 static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
9818 // MVE-specific helper function to make a vector splat of a constant such as
9819 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
9820 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
9821 unsigned LaneBits = T->getPrimitiveSizeInBits();
9822 uint32_t Value = HighBit << (LaneBits - 1);
9823 if (OtherBits)
9824 Value |= (1UL << (LaneBits - 1)) - 1;
9825 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
9826 return ARMMVEVectorSplat(Builder, Lane);
9829 static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
9830 llvm::Value *V,
9831 unsigned ReverseWidth) {
9832 // MVE-specific helper function which reverses the elements of a
9833 // vector within every (ReverseWidth)-bit collection of lanes.
9834 SmallVector<int, 16> Indices;
9835 unsigned LaneSize = V->getType()->getScalarSizeInBits();
9836 unsigned Elements = 128 / LaneSize;
9837 unsigned Mask = ReverseWidth / LaneSize - 1;
9838 for (unsigned i = 0; i < Elements; i++)
9839 Indices.push_back(i ^ Mask);
9840 return Builder.CreateShuffleVector(V, Indices);
9843 Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
9844 const CallExpr *E,
9845 ReturnValueSlot ReturnValue,
9846 llvm::Triple::ArchType Arch) {
9847 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
9848 Intrinsic::ID IRIntr;
9849 unsigned NumVectors;
9851 // Code autogenerated by Tablegen will handle all the simple builtins.
9852 switch (BuiltinID) {
9853 #include "clang/Basic/arm_mve_builtin_cg.inc"
9855 // If we didn't match an MVE builtin id at all, go back to the
9856 // main EmitARMBuiltinExpr.
9857 default:
9858 return nullptr;
9861 // Anything that breaks from that switch is an MVE builtin that
9862 // needs handwritten code to generate.
9864 switch (CustomCodeGenType) {
9866 case CustomCodeGen::VLD24: {
9867 llvm::SmallVector<Value *, 4> Ops;
9868 llvm::SmallVector<llvm::Type *, 4> Tys;
9870 auto MvecCType = E->getType();
9871 auto MvecLType = ConvertType(MvecCType);
9872 assert(MvecLType->isStructTy() &&
9873 "Return type for vld[24]q should be a struct");
9874 assert(MvecLType->getStructNumElements() == 1 &&
9875 "Return-type struct for vld[24]q should have one element");
9876 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9877 assert(MvecLTypeInner->isArrayTy() &&
9878 "Return-type struct for vld[24]q should contain an array");
9879 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9880 "Array member of return-type struct vld[24]q has wrong length");
9881 auto VecLType = MvecLTypeInner->getArrayElementType();
9883 Tys.push_back(VecLType);
9885 auto Addr = E->getArg(0);
9886 Ops.push_back(EmitScalarExpr(Addr));
9887 Tys.push_back(ConvertType(Addr->getType()));
9889 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9890 Value *LoadResult = Builder.CreateCall(F, Ops);
9891 Value *MvecOut = PoisonValue::get(MvecLType);
9892 for (unsigned i = 0; i < NumVectors; ++i) {
9893 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
9894 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
9897 if (ReturnValue.isNull())
9898 return MvecOut;
9899 else
9900 return Builder.CreateStore(MvecOut, ReturnValue.getAddress());
9903 case CustomCodeGen::VST24: {
9904 llvm::SmallVector<Value *, 4> Ops;
9905 llvm::SmallVector<llvm::Type *, 4> Tys;
9907 auto Addr = E->getArg(0);
9908 Ops.push_back(EmitScalarExpr(Addr));
9909 Tys.push_back(ConvertType(Addr->getType()));
9911 auto MvecCType = E->getArg(1)->getType();
9912 auto MvecLType = ConvertType(MvecCType);
9913 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
9914 assert(MvecLType->getStructNumElements() == 1 &&
9915 "Data-type struct for vst2q should have one element");
9916 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9917 assert(MvecLTypeInner->isArrayTy() &&
9918 "Data-type struct for vst2q should contain an array");
9919 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9920 "Array member of return-type struct vld[24]q has wrong length");
9921 auto VecLType = MvecLTypeInner->getArrayElementType();
9923 Tys.push_back(VecLType);
9925 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
9926 EmitAggExpr(E->getArg(1), MvecSlot);
9927 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
9928 for (unsigned i = 0; i < NumVectors; i++)
9929 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
9931 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9932 Value *ToReturn = nullptr;
9933 for (unsigned i = 0; i < NumVectors; i++) {
9934 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
9935 ToReturn = Builder.CreateCall(F, Ops);
9936 Ops.pop_back();
9938 return ToReturn;
9941 llvm_unreachable("unknown custom codegen type.");
9944 Value *CodeGenFunction::EmitARMCDEBuiltinExpr(unsigned BuiltinID,
9945 const CallExpr *E,
9946 ReturnValueSlot ReturnValue,
9947 llvm::Triple::ArchType Arch) {
9948 switch (BuiltinID) {
9949 default:
9950 return nullptr;
9951 #include "clang/Basic/arm_cde_builtin_cg.inc"
9955 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
9956 const CallExpr *E,
9957 SmallVectorImpl<Value *> &Ops,
9958 llvm::Triple::ArchType Arch) {
9959 unsigned int Int = 0;
9960 const char *s = nullptr;
9962 switch (BuiltinID) {
9963 default:
9964 return nullptr;
9965 case NEON::BI__builtin_neon_vtbl1_v:
9966 case NEON::BI__builtin_neon_vqtbl1_v:
9967 case NEON::BI__builtin_neon_vqtbl1q_v:
9968 case NEON::BI__builtin_neon_vtbl2_v:
9969 case NEON::BI__builtin_neon_vqtbl2_v:
9970 case NEON::BI__builtin_neon_vqtbl2q_v:
9971 case NEON::BI__builtin_neon_vtbl3_v:
9972 case NEON::BI__builtin_neon_vqtbl3_v:
9973 case NEON::BI__builtin_neon_vqtbl3q_v:
9974 case NEON::BI__builtin_neon_vtbl4_v:
9975 case NEON::BI__builtin_neon_vqtbl4_v:
9976 case NEON::BI__builtin_neon_vqtbl4q_v:
9977 break;
9978 case NEON::BI__builtin_neon_vtbx1_v:
9979 case NEON::BI__builtin_neon_vqtbx1_v:
9980 case NEON::BI__builtin_neon_vqtbx1q_v:
9981 case NEON::BI__builtin_neon_vtbx2_v:
9982 case NEON::BI__builtin_neon_vqtbx2_v:
9983 case NEON::BI__builtin_neon_vqtbx2q_v:
9984 case NEON::BI__builtin_neon_vtbx3_v:
9985 case NEON::BI__builtin_neon_vqtbx3_v:
9986 case NEON::BI__builtin_neon_vqtbx3q_v:
9987 case NEON::BI__builtin_neon_vtbx4_v:
9988 case NEON::BI__builtin_neon_vqtbx4_v:
9989 case NEON::BI__builtin_neon_vqtbx4q_v:
9990 break;
9993 assert(E->getNumArgs() >= 3);
9995 // Get the last argument, which specifies the vector type.
9996 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
9997 std::optional<llvm::APSInt> Result =
9998 Arg->getIntegerConstantExpr(CGF.getContext());
9999 if (!Result)
10000 return nullptr;
10002 // Determine the type of this overloaded NEON intrinsic.
10003 NeonTypeFlags Type = Result->getZExtValue();
10004 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
10005 if (!Ty)
10006 return nullptr;
10008 CodeGen::CGBuilderTy &Builder = CGF.Builder;
10010 // AArch64 scalar builtins are not overloaded, they do not have an extra
10011 // argument that specifies the vector type, need to handle each case.
10012 switch (BuiltinID) {
10013 case NEON::BI__builtin_neon_vtbl1_v: {
10014 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
10015 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
10017 case NEON::BI__builtin_neon_vtbl2_v: {
10018 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
10019 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
10021 case NEON::BI__builtin_neon_vtbl3_v: {
10022 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
10023 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
10025 case NEON::BI__builtin_neon_vtbl4_v: {
10026 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
10027 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
10029 case NEON::BI__builtin_neon_vtbx1_v: {
10030 Value *TblRes =
10031 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
10032 Intrinsic::aarch64_neon_tbl1, "vtbl1");
10034 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
10035 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
10036 CmpRes = Builder.CreateSExt(CmpRes, Ty);
10038 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
10039 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
10040 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
10042 case NEON::BI__builtin_neon_vtbx2_v: {
10043 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
10044 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
10046 case NEON::BI__builtin_neon_vtbx3_v: {
10047 Value *TblRes =
10048 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
10049 Intrinsic::aarch64_neon_tbl2, "vtbl2");
10051 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
10052 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
10053 TwentyFourV);
10054 CmpRes = Builder.CreateSExt(CmpRes, Ty);
10056 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
10057 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
10058 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
10060 case NEON::BI__builtin_neon_vtbx4_v: {
10061 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
10062 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
10064 case NEON::BI__builtin_neon_vqtbl1_v:
10065 case NEON::BI__builtin_neon_vqtbl1q_v:
10066 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
10067 case NEON::BI__builtin_neon_vqtbl2_v:
10068 case NEON::BI__builtin_neon_vqtbl2q_v: {
10069 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
10070 case NEON::BI__builtin_neon_vqtbl3_v:
10071 case NEON::BI__builtin_neon_vqtbl3q_v:
10072 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
10073 case NEON::BI__builtin_neon_vqtbl4_v:
10074 case NEON::BI__builtin_neon_vqtbl4q_v:
10075 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
10076 case NEON::BI__builtin_neon_vqtbx1_v:
10077 case NEON::BI__builtin_neon_vqtbx1q_v:
10078 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
10079 case NEON::BI__builtin_neon_vqtbx2_v:
10080 case NEON::BI__builtin_neon_vqtbx2q_v:
10081 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
10082 case NEON::BI__builtin_neon_vqtbx3_v:
10083 case NEON::BI__builtin_neon_vqtbx3q_v:
10084 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
10085 case NEON::BI__builtin_neon_vqtbx4_v:
10086 case NEON::BI__builtin_neon_vqtbx4q_v:
10087 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
10091 if (!Int)
10092 return nullptr;
10094 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
10095 return CGF.EmitNeonCall(F, Ops, s);
10098 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
10099 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
10100 Op = Builder.CreateBitCast(Op, Int16Ty);
10101 Value *V = PoisonValue::get(VTy);
10102 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
10103 Op = Builder.CreateInsertElement(V, Op, CI);
10104 return Op;
10107 /// SVEBuiltinMemEltTy - Returns the memory element type for this memory
10108 /// access builtin. Only required if it can't be inferred from the base pointer
10109 /// operand.
10110 llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
10111 switch (TypeFlags.getMemEltType()) {
10112 case SVETypeFlags::MemEltTyDefault:
10113 return getEltType(TypeFlags);
10114 case SVETypeFlags::MemEltTyInt8:
10115 return Builder.getInt8Ty();
10116 case SVETypeFlags::MemEltTyInt16:
10117 return Builder.getInt16Ty();
10118 case SVETypeFlags::MemEltTyInt32:
10119 return Builder.getInt32Ty();
10120 case SVETypeFlags::MemEltTyInt64:
10121 return Builder.getInt64Ty();
10123 llvm_unreachable("Unknown MemEltType");
10126 llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
10127 switch (TypeFlags.getEltType()) {
10128 default:
10129 llvm_unreachable("Invalid SVETypeFlag!");
10131 case SVETypeFlags::EltTyInt8:
10132 return Builder.getInt8Ty();
10133 case SVETypeFlags::EltTyInt16:
10134 return Builder.getInt16Ty();
10135 case SVETypeFlags::EltTyInt32:
10136 return Builder.getInt32Ty();
10137 case SVETypeFlags::EltTyInt64:
10138 return Builder.getInt64Ty();
10139 case SVETypeFlags::EltTyInt128:
10140 return Builder.getInt128Ty();
10142 case SVETypeFlags::EltTyFloat16:
10143 return Builder.getHalfTy();
10144 case SVETypeFlags::EltTyFloat32:
10145 return Builder.getFloatTy();
10146 case SVETypeFlags::EltTyFloat64:
10147 return Builder.getDoubleTy();
10149 case SVETypeFlags::EltTyBFloat16:
10150 return Builder.getBFloatTy();
10152 case SVETypeFlags::EltTyBool8:
10153 case SVETypeFlags::EltTyBool16:
10154 case SVETypeFlags::EltTyBool32:
10155 case SVETypeFlags::EltTyBool64:
10156 return Builder.getInt1Ty();
10160 // Return the llvm predicate vector type corresponding to the specified element
10161 // TypeFlags.
10162 llvm::ScalableVectorType *
10163 CodeGenFunction::getSVEPredType(const SVETypeFlags &TypeFlags) {
10164 switch (TypeFlags.getEltType()) {
10165 default: llvm_unreachable("Unhandled SVETypeFlag!");
10167 case SVETypeFlags::EltTyInt8:
10168 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
10169 case SVETypeFlags::EltTyInt16:
10170 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10171 case SVETypeFlags::EltTyInt32:
10172 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10173 case SVETypeFlags::EltTyInt64:
10174 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10176 case SVETypeFlags::EltTyBFloat16:
10177 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10178 case SVETypeFlags::EltTyFloat16:
10179 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10180 case SVETypeFlags::EltTyFloat32:
10181 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10182 case SVETypeFlags::EltTyFloat64:
10183 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10185 case SVETypeFlags::EltTyBool8:
10186 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
10187 case SVETypeFlags::EltTyBool16:
10188 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10189 case SVETypeFlags::EltTyBool32:
10190 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10191 case SVETypeFlags::EltTyBool64:
10192 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10196 // Return the llvm vector type corresponding to the specified element TypeFlags.
10197 llvm::ScalableVectorType *
10198 CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
10199 switch (TypeFlags.getEltType()) {
10200 default:
10201 llvm_unreachable("Invalid SVETypeFlag!");
10203 case SVETypeFlags::EltTyInt8:
10204 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
10205 case SVETypeFlags::EltTyInt16:
10206 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
10207 case SVETypeFlags::EltTyInt32:
10208 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
10209 case SVETypeFlags::EltTyInt64:
10210 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
10212 case SVETypeFlags::EltTyMFloat8:
10213 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
10214 case SVETypeFlags::EltTyFloat16:
10215 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
10216 case SVETypeFlags::EltTyBFloat16:
10217 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
10218 case SVETypeFlags::EltTyFloat32:
10219 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
10220 case SVETypeFlags::EltTyFloat64:
10221 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
10223 case SVETypeFlags::EltTyBool8:
10224 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
10225 case SVETypeFlags::EltTyBool16:
10226 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10227 case SVETypeFlags::EltTyBool32:
10228 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10229 case SVETypeFlags::EltTyBool64:
10230 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10234 llvm::Value *
10235 CodeGenFunction::EmitSVEAllTruePred(const SVETypeFlags &TypeFlags) {
10236 Function *Ptrue =
10237 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
10238 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
10241 constexpr unsigned SVEBitsPerBlock = 128;
10243 static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
10244 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
10245 return llvm::ScalableVectorType::get(EltTy, NumElts);
10248 // Reinterpret the input predicate so that it can be used to correctly isolate
10249 // the elements of the specified datatype.
10250 Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred,
10251 llvm::ScalableVectorType *VTy) {
10253 if (isa<TargetExtType>(Pred->getType()) &&
10254 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
10255 return Pred;
10257 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
10258 if (Pred->getType() == RTy)
10259 return Pred;
10261 unsigned IntID;
10262 llvm::Type *IntrinsicTy;
10263 switch (VTy->getMinNumElements()) {
10264 default:
10265 llvm_unreachable("unsupported element count!");
10266 case 1:
10267 case 2:
10268 case 4:
10269 case 8:
10270 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
10271 IntrinsicTy = RTy;
10272 break;
10273 case 16:
10274 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
10275 IntrinsicTy = Pred->getType();
10276 break;
10279 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
10280 Value *C = Builder.CreateCall(F, Pred);
10281 assert(C->getType() == RTy && "Unexpected return type!");
10282 return C;
10285 Value *CodeGenFunction::EmitSVEPredicateTupleCast(Value *PredTuple,
10286 llvm::StructType *Ty) {
10287 if (PredTuple->getType() == Ty)
10288 return PredTuple;
10290 Value *Ret = llvm::PoisonValue::get(Ty);
10291 for (unsigned I = 0; I < Ty->getNumElements(); ++I) {
10292 Value *Pred = Builder.CreateExtractValue(PredTuple, I);
10293 Pred = EmitSVEPredicateCast(
10294 Pred, cast<llvm::ScalableVectorType>(Ty->getTypeAtIndex(I)));
10295 Ret = Builder.CreateInsertValue(Ret, Pred, I);
10298 return Ret;
10301 Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags,
10302 SmallVectorImpl<Value *> &Ops,
10303 unsigned IntID) {
10304 auto *ResultTy = getSVEType(TypeFlags);
10305 auto *OverloadedTy =
10306 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
10308 Function *F = nullptr;
10309 if (Ops[1]->getType()->isVectorTy())
10310 // This is the "vector base, scalar offset" case. In order to uniquely
10311 // map this built-in to an LLVM IR intrinsic, we need both the return type
10312 // and the type of the vector base.
10313 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
10314 else
10315 // This is the "scalar base, vector offset case". The type of the offset
10316 // is encoded in the name of the intrinsic. We only need to specify the
10317 // return type in order to uniquely map this built-in to an LLVM IR
10318 // intrinsic.
10319 F = CGM.getIntrinsic(IntID, OverloadedTy);
10321 // At the ACLE level there's only one predicate type, svbool_t, which is
10322 // mapped to <n x 16 x i1>. However, this might be incompatible with the
10323 // actual type being loaded. For example, when loading doubles (i64) the
10324 // predicate should be <n x 2 x i1> instead. At the IR level the type of
10325 // the predicate and the data being loaded must match. Cast to the type
10326 // expected by the intrinsic. The intrinsic itself should be defined in
10327 // a way than enforces relations between parameter types.
10328 Ops[0] = EmitSVEPredicateCast(
10329 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
10331 // Pass 0 when the offset is missing. This can only be applied when using
10332 // the "vector base" addressing mode for which ACLE allows no offset. The
10333 // corresponding LLVM IR always requires an offset.
10334 if (Ops.size() == 2) {
10335 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
10336 Ops.push_back(ConstantInt::get(Int64Ty, 0));
10339 // For "vector base, scalar index" scale the index so that it becomes a
10340 // scalar offset.
10341 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
10342 unsigned BytesPerElt =
10343 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
10344 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
10347 Value *Call = Builder.CreateCall(F, Ops);
10349 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
10350 // other cases it's folded into a nop.
10351 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
10352 : Builder.CreateSExt(Call, ResultTy);
10355 Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags,
10356 SmallVectorImpl<Value *> &Ops,
10357 unsigned IntID) {
10358 auto *SrcDataTy = getSVEType(TypeFlags);
10359 auto *OverloadedTy =
10360 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
10362 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
10363 // it's the first argument. Move it accordingly.
10364 Ops.insert(Ops.begin(), Ops.pop_back_val());
10366 Function *F = nullptr;
10367 if (Ops[2]->getType()->isVectorTy())
10368 // This is the "vector base, scalar offset" case. In order to uniquely
10369 // map this built-in to an LLVM IR intrinsic, we need both the return type
10370 // and the type of the vector base.
10371 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
10372 else
10373 // This is the "scalar base, vector offset case". The type of the offset
10374 // is encoded in the name of the intrinsic. We only need to specify the
10375 // return type in order to uniquely map this built-in to an LLVM IR
10376 // intrinsic.
10377 F = CGM.getIntrinsic(IntID, OverloadedTy);
10379 // Pass 0 when the offset is missing. This can only be applied when using
10380 // the "vector base" addressing mode for which ACLE allows no offset. The
10381 // corresponding LLVM IR always requires an offset.
10382 if (Ops.size() == 3) {
10383 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
10384 Ops.push_back(ConstantInt::get(Int64Ty, 0));
10387 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
10388 // folded into a nop.
10389 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
10391 // At the ACLE level there's only one predicate type, svbool_t, which is
10392 // mapped to <n x 16 x i1>. However, this might be incompatible with the
10393 // actual type being stored. For example, when storing doubles (i64) the
10394 // predicated should be <n x 2 x i1> instead. At the IR level the type of
10395 // the predicate and the data being stored must match. Cast to the type
10396 // expected by the intrinsic. The intrinsic itself should be defined in
10397 // a way that enforces relations between parameter types.
10398 Ops[1] = EmitSVEPredicateCast(
10399 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
10401 // For "vector base, scalar index" scale the index so that it becomes a
10402 // scalar offset.
10403 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
10404 unsigned BytesPerElt =
10405 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
10406 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
10409 return Builder.CreateCall(F, Ops);
10412 Value *CodeGenFunction::EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags,
10413 SmallVectorImpl<Value *> &Ops,
10414 unsigned IntID) {
10415 // The gather prefetches are overloaded on the vector input - this can either
10416 // be the vector of base addresses or vector of offsets.
10417 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
10418 if (!OverloadedTy)
10419 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
10421 // Cast the predicate from svbool_t to the right number of elements.
10422 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
10424 // vector + imm addressing modes
10425 if (Ops[1]->getType()->isVectorTy()) {
10426 if (Ops.size() == 3) {
10427 // Pass 0 for 'vector+imm' when the index is omitted.
10428 Ops.push_back(ConstantInt::get(Int64Ty, 0));
10430 // The sv_prfop is the last operand in the builtin and IR intrinsic.
10431 std::swap(Ops[2], Ops[3]);
10432 } else {
10433 // Index needs to be passed as scaled offset.
10434 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
10435 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
10436 if (BytesPerElt > 1)
10437 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
10441 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
10442 return Builder.CreateCall(F, Ops);
10445 Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
10446 SmallVectorImpl<Value*> &Ops,
10447 unsigned IntID) {
10448 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
10449 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10450 Value *BasePtr = Ops[1];
10452 // Does the load have an offset?
10453 if (Ops.size() > 2)
10454 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10456 Function *F = CGM.getIntrinsic(IntID, {VTy});
10457 return Builder.CreateCall(F, {Predicate, BasePtr});
10460 Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags,
10461 SmallVectorImpl<Value*> &Ops,
10462 unsigned IntID) {
10463 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
10465 unsigned N;
10466 switch (IntID) {
10467 case Intrinsic::aarch64_sve_st2:
10468 case Intrinsic::aarch64_sve_st1_pn_x2:
10469 case Intrinsic::aarch64_sve_stnt1_pn_x2:
10470 case Intrinsic::aarch64_sve_st2q:
10471 N = 2;
10472 break;
10473 case Intrinsic::aarch64_sve_st3:
10474 case Intrinsic::aarch64_sve_st3q:
10475 N = 3;
10476 break;
10477 case Intrinsic::aarch64_sve_st4:
10478 case Intrinsic::aarch64_sve_st1_pn_x4:
10479 case Intrinsic::aarch64_sve_stnt1_pn_x4:
10480 case Intrinsic::aarch64_sve_st4q:
10481 N = 4;
10482 break;
10483 default:
10484 llvm_unreachable("unknown intrinsic!");
10487 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10488 Value *BasePtr = Ops[1];
10490 // Does the store have an offset?
10491 if (Ops.size() > (2 + N))
10492 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10494 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
10495 // need to break up the tuple vector.
10496 SmallVector<llvm::Value*, 5> Operands;
10497 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
10498 Operands.push_back(Ops[I]);
10499 Operands.append({Predicate, BasePtr});
10500 Function *F = CGM.getIntrinsic(IntID, { VTy });
10502 return Builder.CreateCall(F, Operands);
10505 // SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
10506 // svpmullt_pair intrinsics, with the exception that their results are bitcast
10507 // to a wider type.
10508 Value *CodeGenFunction::EmitSVEPMull(const SVETypeFlags &TypeFlags,
10509 SmallVectorImpl<Value *> &Ops,
10510 unsigned BuiltinID) {
10511 // Splat scalar operand to vector (intrinsics with _n infix)
10512 if (TypeFlags.hasSplatOperand()) {
10513 unsigned OpNo = TypeFlags.getSplatOperand();
10514 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10517 // The pair-wise function has a narrower overloaded type.
10518 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
10519 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
10521 // Now bitcast to the wider result type.
10522 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
10523 return EmitSVEReinterpret(Call, Ty);
10526 Value *CodeGenFunction::EmitSVEMovl(const SVETypeFlags &TypeFlags,
10527 ArrayRef<Value *> Ops, unsigned BuiltinID) {
10528 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
10529 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
10530 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
10533 Value *CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags,
10534 SmallVectorImpl<Value *> &Ops,
10535 unsigned BuiltinID) {
10536 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
10537 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
10538 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10540 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
10541 Value *BasePtr = Ops[1];
10543 // Implement the index operand if not omitted.
10544 if (Ops.size() > 3)
10545 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10547 Value *PrfOp = Ops.back();
10549 Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
10550 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
10553 Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E,
10554 llvm::Type *ReturnTy,
10555 SmallVectorImpl<Value *> &Ops,
10556 unsigned IntrinsicID,
10557 bool IsZExtReturn) {
10558 QualType LangPTy = E->getArg(1)->getType();
10559 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10560 LangPTy->castAs<PointerType>()->getPointeeType());
10562 // The vector type that is returned may be different from the
10563 // eventual type loaded from memory.
10564 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
10565 llvm::ScalableVectorType *MemoryTy = nullptr;
10566 llvm::ScalableVectorType *PredTy = nullptr;
10567 bool IsQuadLoad = false;
10568 switch (IntrinsicID) {
10569 case Intrinsic::aarch64_sve_ld1uwq:
10570 case Intrinsic::aarch64_sve_ld1udq:
10571 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10572 PredTy = llvm::ScalableVectorType::get(
10573 llvm::Type::getInt1Ty(getLLVMContext()), 1);
10574 IsQuadLoad = true;
10575 break;
10576 default:
10577 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10578 PredTy = MemoryTy;
10579 break;
10582 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10583 Value *BasePtr = Ops[1];
10585 // Does the load have an offset?
10586 if (Ops.size() > 2)
10587 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10589 Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);
10590 auto *Load =
10591 cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
10592 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10593 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
10595 if (IsQuadLoad)
10596 return Load;
10598 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
10599 : Builder.CreateSExt(Load, VectorTy);
10602 Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
10603 SmallVectorImpl<Value *> &Ops,
10604 unsigned IntrinsicID) {
10605 QualType LangPTy = E->getArg(1)->getType();
10606 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10607 LangPTy->castAs<PointerType>()->getPointeeType());
10609 // The vector type that is stored may be different from the
10610 // eventual type stored to memory.
10611 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
10612 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10614 auto PredTy = MemoryTy;
10615 auto AddrMemoryTy = MemoryTy;
10616 bool IsQuadStore = false;
10618 switch (IntrinsicID) {
10619 case Intrinsic::aarch64_sve_st1wq:
10620 case Intrinsic::aarch64_sve_st1dq:
10621 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10622 PredTy =
10623 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
10624 IsQuadStore = true;
10625 break;
10626 default:
10627 break;
10629 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10630 Value *BasePtr = Ops[1];
10632 // Does the store have an offset?
10633 if (Ops.size() == 4)
10634 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
10636 // Last value is always the data
10637 Value *Val =
10638 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
10640 Function *F =
10641 CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
10642 auto *Store =
10643 cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
10644 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10645 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
10646 return Store;
10649 Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
10650 SmallVectorImpl<Value *> &Ops,
10651 unsigned IntID) {
10652 Ops[2] = EmitSVEPredicateCast(
10653 Ops[2], getSVEVectorForElementType(SVEBuiltinMemEltTy(TypeFlags)));
10655 SmallVector<Value *> NewOps;
10656 NewOps.push_back(Ops[2]);
10658 llvm::Value *BasePtr = Ops[3];
10659 llvm::Value *RealSlice = Ops[1];
10660 // If the intrinsic contains the vnum parameter, multiply it with the vector
10661 // size in bytes.
10662 if (Ops.size() == 5) {
10663 Function *StreamingVectorLength =
10664 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
10665 llvm::Value *StreamingVectorLengthCall =
10666 Builder.CreateCall(StreamingVectorLength);
10667 llvm::Value *Mulvl =
10668 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
10669 // The type of the ptr parameter is void *, so use Int8Ty here.
10670 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
10671 RealSlice = Builder.CreateZExt(RealSlice, Int64Ty);
10672 RealSlice = Builder.CreateAdd(RealSlice, Ops[4]);
10673 RealSlice = Builder.CreateTrunc(RealSlice, Int32Ty);
10675 NewOps.push_back(BasePtr);
10676 NewOps.push_back(Ops[0]);
10677 NewOps.push_back(RealSlice);
10678 Function *F = CGM.getIntrinsic(IntID);
10679 return Builder.CreateCall(F, NewOps);
10682 Value *CodeGenFunction::EmitSMEReadWrite(const SVETypeFlags &TypeFlags,
10683 SmallVectorImpl<Value *> &Ops,
10684 unsigned IntID) {
10685 auto *VecTy = getSVEType(TypeFlags);
10686 Function *F = CGM.getIntrinsic(IntID, VecTy);
10687 if (TypeFlags.isReadZA())
10688 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
10689 else if (TypeFlags.isWriteZA())
10690 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
10691 return Builder.CreateCall(F, Ops);
10694 Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags,
10695 SmallVectorImpl<Value *> &Ops,
10696 unsigned IntID) {
10697 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
10698 if (Ops.size() == 0)
10699 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
10700 Function *F = CGM.getIntrinsic(IntID, {});
10701 return Builder.CreateCall(F, Ops);
10704 Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags,
10705 SmallVectorImpl<Value *> &Ops,
10706 unsigned IntID) {
10707 if (Ops.size() == 2)
10708 Ops.push_back(Builder.getInt32(0));
10709 else
10710 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
10711 Function *F = CGM.getIntrinsic(IntID, {});
10712 return Builder.CreateCall(F, Ops);
10715 // Limit the usage of scalable llvm IR generated by the ACLE by using the
10716 // sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
10717 Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
10718 return Builder.CreateVectorSplat(
10719 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
10722 Value *CodeGenFunction::EmitSVEDupX(Value* Scalar) {
10723 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
10726 Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
10727 // FIXME: For big endian this needs an additional REV, or needs a separate
10728 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
10729 // instruction is defined as 'bitwise' equivalent from memory point of
10730 // view (when storing/reloading), whereas the svreinterpret builtin
10731 // implements bitwise equivalent cast from register point of view.
10732 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
10734 if (auto *StructTy = dyn_cast<StructType>(Ty)) {
10735 Value *Tuple = llvm::PoisonValue::get(Ty);
10737 for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
10738 Value *In = Builder.CreateExtractValue(Val, I);
10739 Value *Out = Builder.CreateBitCast(In, StructTy->getTypeAtIndex(I));
10740 Tuple = Builder.CreateInsertValue(Tuple, Out, I);
10743 return Tuple;
10746 return Builder.CreateBitCast(Val, Ty);
10749 static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10750 SmallVectorImpl<Value *> &Ops) {
10751 auto *SplatZero = Constant::getNullValue(Ty);
10752 Ops.insert(Ops.begin(), SplatZero);
10755 static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10756 SmallVectorImpl<Value *> &Ops) {
10757 auto *SplatUndef = UndefValue::get(Ty);
10758 Ops.insert(Ops.begin(), SplatUndef);
10761 SmallVector<llvm::Type *, 2>
10762 CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags,
10763 llvm::Type *ResultType,
10764 ArrayRef<Value *> Ops) {
10765 if (TypeFlags.isOverloadNone())
10766 return {};
10768 llvm::Type *DefaultType = getSVEType(TypeFlags);
10770 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
10771 return {DefaultType, Ops[1]->getType()};
10773 if (TypeFlags.isOverloadWhileRW())
10774 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
10776 if (TypeFlags.isOverloadCvt())
10777 return {Ops[0]->getType(), Ops.back()->getType()};
10779 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
10780 ResultType->isVectorTy())
10781 return {ResultType, Ops[1]->getType()};
10783 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
10784 return {DefaultType};
10787 Value *CodeGenFunction::EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags,
10788 ArrayRef<Value *> Ops) {
10789 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
10790 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
10791 unsigned Idx = cast<ConstantInt>(Ops[1])->getZExtValue();
10793 if (TypeFlags.isTupleSet())
10794 return Builder.CreateInsertValue(Ops[0], Ops[2], Idx);
10795 return Builder.CreateExtractValue(Ops[0], Idx);
10798 Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags,
10799 llvm::Type *Ty,
10800 ArrayRef<Value *> Ops) {
10801 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
10803 Value *Tuple = llvm::PoisonValue::get(Ty);
10804 for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
10805 Tuple = Builder.CreateInsertValue(Tuple, Ops[Idx], Idx);
10807 return Tuple;
10810 void CodeGenFunction::GetAArch64SVEProcessedOperands(
10811 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
10812 SVETypeFlags TypeFlags) {
10813 // Find out if any arguments are required to be integer constant expressions.
10814 unsigned ICEArguments = 0;
10815 ASTContext::GetBuiltinTypeError Error;
10816 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
10817 assert(Error == ASTContext::GE_None && "Should not codegen an error");
10819 // Tuple set/get only requires one insert/extract vector, which is
10820 // created by EmitSVETupleSetOrGet.
10821 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
10823 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
10824 bool IsICE = ICEArguments & (1 << i);
10825 Value *Arg = EmitScalarExpr(E->getArg(i));
10827 if (IsICE) {
10828 // If this is required to be a constant, constant fold it so that we know
10829 // that the generated intrinsic gets a ConstantInt.
10830 std::optional<llvm::APSInt> Result =
10831 E->getArg(i)->getIntegerConstantExpr(getContext());
10832 assert(Result && "Expected argument to be a constant");
10834 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
10835 // truncate because the immediate has been range checked and no valid
10836 // immediate requires more than a handful of bits.
10837 *Result = Result->extOrTrunc(32);
10838 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
10839 continue;
10842 if (isa<StructType>(Arg->getType()) && !IsTupleGetOrSet) {
10843 for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
10844 Ops.push_back(Builder.CreateExtractValue(Arg, I));
10846 continue;
10849 Ops.push_back(Arg);
10853 Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
10854 const CallExpr *E) {
10855 llvm::Type *Ty = ConvertType(E->getType());
10856 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
10857 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
10858 Value *Val = EmitScalarExpr(E->getArg(0));
10859 return EmitSVEReinterpret(Val, Ty);
10862 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
10863 AArch64SVEIntrinsicsProvenSorted);
10865 llvm::SmallVector<Value *, 4> Ops;
10866 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10867 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10869 if (TypeFlags.isLoad())
10870 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
10871 TypeFlags.isZExtReturn());
10872 else if (TypeFlags.isStore())
10873 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
10874 else if (TypeFlags.isGatherLoad())
10875 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10876 else if (TypeFlags.isScatterStore())
10877 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10878 else if (TypeFlags.isPrefetch())
10879 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10880 else if (TypeFlags.isGatherPrefetch())
10881 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10882 else if (TypeFlags.isStructLoad())
10883 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10884 else if (TypeFlags.isStructStore())
10885 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10886 else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
10887 return EmitSVETupleSetOrGet(TypeFlags, Ops);
10888 else if (TypeFlags.isTupleCreate())
10889 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
10890 else if (TypeFlags.isUndef())
10891 return UndefValue::get(Ty);
10892 else if (Builtin->LLVMIntrinsic != 0) {
10893 // Emit set FPMR for intrinsics that require it
10894 if (TypeFlags.setsFPMR())
10895 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
10896 Ops.pop_back_val());
10897 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
10898 InsertExplicitZeroOperand(Builder, Ty, Ops);
10900 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
10901 InsertExplicitUndefOperand(Builder, Ty, Ops);
10903 // Some ACLE builtins leave out the argument to specify the predicate
10904 // pattern, which is expected to be expanded to an SV_ALL pattern.
10905 if (TypeFlags.isAppendSVALL())
10906 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
10907 if (TypeFlags.isInsertOp1SVALL())
10908 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
10910 // Predicates must match the main datatype.
10911 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10912 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10913 if (PredTy->getElementType()->isIntegerTy(1))
10914 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10916 // Splat scalar operand to vector (intrinsics with _n infix)
10917 if (TypeFlags.hasSplatOperand()) {
10918 unsigned OpNo = TypeFlags.getSplatOperand();
10919 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10922 if (TypeFlags.isReverseCompare())
10923 std::swap(Ops[1], Ops[2]);
10924 else if (TypeFlags.isReverseUSDOT())
10925 std::swap(Ops[1], Ops[2]);
10926 else if (TypeFlags.isReverseMergeAnyBinOp() &&
10927 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10928 std::swap(Ops[1], Ops[2]);
10929 else if (TypeFlags.isReverseMergeAnyAccOp() &&
10930 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10931 std::swap(Ops[1], Ops[3]);
10933 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
10934 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
10935 llvm::Type *OpndTy = Ops[1]->getType();
10936 auto *SplatZero = Constant::getNullValue(OpndTy);
10937 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
10940 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
10941 getSVEOverloadTypes(TypeFlags, Ty, Ops));
10942 Value *Call = Builder.CreateCall(F, Ops);
10944 if (Call->getType() == Ty)
10945 return Call;
10947 // Predicate results must be converted to svbool_t.
10948 if (auto PredTy = dyn_cast<llvm::ScalableVectorType>(Ty))
10949 return EmitSVEPredicateCast(Call, PredTy);
10950 if (auto PredTupleTy = dyn_cast<llvm::StructType>(Ty))
10951 return EmitSVEPredicateTupleCast(Call, PredTupleTy);
10953 llvm_unreachable("unsupported element count!");
10956 switch (BuiltinID) {
10957 default:
10958 return nullptr;
10960 case SVE::BI__builtin_sve_svreinterpret_b: {
10961 auto SVCountTy =
10962 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10963 Function *CastFromSVCountF =
10964 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10965 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
10967 case SVE::BI__builtin_sve_svreinterpret_c: {
10968 auto SVCountTy =
10969 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10970 Function *CastToSVCountF =
10971 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10972 return Builder.CreateCall(CastToSVCountF, Ops[0]);
10975 case SVE::BI__builtin_sve_svpsel_lane_b8:
10976 case SVE::BI__builtin_sve_svpsel_lane_b16:
10977 case SVE::BI__builtin_sve_svpsel_lane_b32:
10978 case SVE::BI__builtin_sve_svpsel_lane_b64:
10979 case SVE::BI__builtin_sve_svpsel_lane_c8:
10980 case SVE::BI__builtin_sve_svpsel_lane_c16:
10981 case SVE::BI__builtin_sve_svpsel_lane_c32:
10982 case SVE::BI__builtin_sve_svpsel_lane_c64: {
10983 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
10984 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
10985 "aarch64.svcount")) &&
10986 "Unexpected TargetExtType");
10987 auto SVCountTy =
10988 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10989 Function *CastFromSVCountF =
10990 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10991 Function *CastToSVCountF =
10992 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10994 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
10995 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
10996 llvm::Value *Ops0 =
10997 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
10998 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
10999 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
11000 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
11002 case SVE::BI__builtin_sve_svmov_b_z: {
11003 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
11004 SVETypeFlags TypeFlags(Builtin->TypeModifier);
11005 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
11006 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
11007 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
11010 case SVE::BI__builtin_sve_svnot_b_z: {
11011 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
11012 SVETypeFlags TypeFlags(Builtin->TypeModifier);
11013 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
11014 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
11015 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
11018 case SVE::BI__builtin_sve_svmovlb_u16:
11019 case SVE::BI__builtin_sve_svmovlb_u32:
11020 case SVE::BI__builtin_sve_svmovlb_u64:
11021 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
11023 case SVE::BI__builtin_sve_svmovlb_s16:
11024 case SVE::BI__builtin_sve_svmovlb_s32:
11025 case SVE::BI__builtin_sve_svmovlb_s64:
11026 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
11028 case SVE::BI__builtin_sve_svmovlt_u16:
11029 case SVE::BI__builtin_sve_svmovlt_u32:
11030 case SVE::BI__builtin_sve_svmovlt_u64:
11031 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
11033 case SVE::BI__builtin_sve_svmovlt_s16:
11034 case SVE::BI__builtin_sve_svmovlt_s32:
11035 case SVE::BI__builtin_sve_svmovlt_s64:
11036 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
11038 case SVE::BI__builtin_sve_svpmullt_u16:
11039 case SVE::BI__builtin_sve_svpmullt_u64:
11040 case SVE::BI__builtin_sve_svpmullt_n_u16:
11041 case SVE::BI__builtin_sve_svpmullt_n_u64:
11042 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
11044 case SVE::BI__builtin_sve_svpmullb_u16:
11045 case SVE::BI__builtin_sve_svpmullb_u64:
11046 case SVE::BI__builtin_sve_svpmullb_n_u16:
11047 case SVE::BI__builtin_sve_svpmullb_n_u64:
11048 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
11050 case SVE::BI__builtin_sve_svdup_n_b8:
11051 case SVE::BI__builtin_sve_svdup_n_b16:
11052 case SVE::BI__builtin_sve_svdup_n_b32:
11053 case SVE::BI__builtin_sve_svdup_n_b64: {
11054 Value *CmpNE =
11055 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
11056 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
11057 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
11058 return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty));
11061 case SVE::BI__builtin_sve_svdupq_n_b8:
11062 case SVE::BI__builtin_sve_svdupq_n_b16:
11063 case SVE::BI__builtin_sve_svdupq_n_b32:
11064 case SVE::BI__builtin_sve_svdupq_n_b64:
11065 case SVE::BI__builtin_sve_svdupq_n_u8:
11066 case SVE::BI__builtin_sve_svdupq_n_s8:
11067 case SVE::BI__builtin_sve_svdupq_n_u64:
11068 case SVE::BI__builtin_sve_svdupq_n_f64:
11069 case SVE::BI__builtin_sve_svdupq_n_s64:
11070 case SVE::BI__builtin_sve_svdupq_n_u16:
11071 case SVE::BI__builtin_sve_svdupq_n_f16:
11072 case SVE::BI__builtin_sve_svdupq_n_bf16:
11073 case SVE::BI__builtin_sve_svdupq_n_s16:
11074 case SVE::BI__builtin_sve_svdupq_n_u32:
11075 case SVE::BI__builtin_sve_svdupq_n_f32:
11076 case SVE::BI__builtin_sve_svdupq_n_s32: {
11077 // These builtins are implemented by storing each element to an array and using
11078 // ld1rq to materialize a vector.
11079 unsigned NumOpnds = Ops.size();
11081 bool IsBoolTy =
11082 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
11084 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
11085 // so that the compare can use the width that is natural for the expected
11086 // number of predicate lanes.
11087 llvm::Type *EltTy = Ops[0]->getType();
11088 if (IsBoolTy)
11089 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
11091 SmallVector<llvm::Value *, 16> VecOps;
11092 for (unsigned I = 0; I < NumOpnds; ++I)
11093 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
11094 Value *Vec = BuildVector(VecOps);
11096 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
11097 Value *InsertSubVec = Builder.CreateInsertVector(
11098 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, Builder.getInt64(0));
11100 Function *F =
11101 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
11102 Value *DupQLane =
11103 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
11105 if (!IsBoolTy)
11106 return DupQLane;
11108 SVETypeFlags TypeFlags(Builtin->TypeModifier);
11109 Value *Pred = EmitSVEAllTruePred(TypeFlags);
11111 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
11112 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
11113 : Intrinsic::aarch64_sve_cmpne_wide,
11114 OverloadedTy);
11115 Value *Call = Builder.CreateCall(
11116 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
11117 return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
11120 case SVE::BI__builtin_sve_svpfalse_b:
11121 return ConstantInt::getFalse(Ty);
11123 case SVE::BI__builtin_sve_svpfalse_c: {
11124 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
11125 Function *CastToSVCountF =
11126 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
11127 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
11130 case SVE::BI__builtin_sve_svlen_bf16:
11131 case SVE::BI__builtin_sve_svlen_f16:
11132 case SVE::BI__builtin_sve_svlen_f32:
11133 case SVE::BI__builtin_sve_svlen_f64:
11134 case SVE::BI__builtin_sve_svlen_s8:
11135 case SVE::BI__builtin_sve_svlen_s16:
11136 case SVE::BI__builtin_sve_svlen_s32:
11137 case SVE::BI__builtin_sve_svlen_s64:
11138 case SVE::BI__builtin_sve_svlen_u8:
11139 case SVE::BI__builtin_sve_svlen_u16:
11140 case SVE::BI__builtin_sve_svlen_u32:
11141 case SVE::BI__builtin_sve_svlen_u64: {
11142 SVETypeFlags TF(Builtin->TypeModifier);
11143 auto VTy = cast<llvm::VectorType>(getSVEType(TF));
11144 auto *NumEls =
11145 llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue());
11147 Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty);
11148 return Builder.CreateMul(NumEls, Builder.CreateCall(F));
11151 case SVE::BI__builtin_sve_svtbl2_u8:
11152 case SVE::BI__builtin_sve_svtbl2_s8:
11153 case SVE::BI__builtin_sve_svtbl2_u16:
11154 case SVE::BI__builtin_sve_svtbl2_s16:
11155 case SVE::BI__builtin_sve_svtbl2_u32:
11156 case SVE::BI__builtin_sve_svtbl2_s32:
11157 case SVE::BI__builtin_sve_svtbl2_u64:
11158 case SVE::BI__builtin_sve_svtbl2_s64:
11159 case SVE::BI__builtin_sve_svtbl2_f16:
11160 case SVE::BI__builtin_sve_svtbl2_bf16:
11161 case SVE::BI__builtin_sve_svtbl2_f32:
11162 case SVE::BI__builtin_sve_svtbl2_f64: {
11163 SVETypeFlags TF(Builtin->TypeModifier);
11164 auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF));
11165 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
11166 return Builder.CreateCall(F, Ops);
11169 case SVE::BI__builtin_sve_svset_neonq_s8:
11170 case SVE::BI__builtin_sve_svset_neonq_s16:
11171 case SVE::BI__builtin_sve_svset_neonq_s32:
11172 case SVE::BI__builtin_sve_svset_neonq_s64:
11173 case SVE::BI__builtin_sve_svset_neonq_u8:
11174 case SVE::BI__builtin_sve_svset_neonq_u16:
11175 case SVE::BI__builtin_sve_svset_neonq_u32:
11176 case SVE::BI__builtin_sve_svset_neonq_u64:
11177 case SVE::BI__builtin_sve_svset_neonq_f16:
11178 case SVE::BI__builtin_sve_svset_neonq_f32:
11179 case SVE::BI__builtin_sve_svset_neonq_f64:
11180 case SVE::BI__builtin_sve_svset_neonq_bf16: {
11181 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0));
11184 case SVE::BI__builtin_sve_svget_neonq_s8:
11185 case SVE::BI__builtin_sve_svget_neonq_s16:
11186 case SVE::BI__builtin_sve_svget_neonq_s32:
11187 case SVE::BI__builtin_sve_svget_neonq_s64:
11188 case SVE::BI__builtin_sve_svget_neonq_u8:
11189 case SVE::BI__builtin_sve_svget_neonq_u16:
11190 case SVE::BI__builtin_sve_svget_neonq_u32:
11191 case SVE::BI__builtin_sve_svget_neonq_u64:
11192 case SVE::BI__builtin_sve_svget_neonq_f16:
11193 case SVE::BI__builtin_sve_svget_neonq_f32:
11194 case SVE::BI__builtin_sve_svget_neonq_f64:
11195 case SVE::BI__builtin_sve_svget_neonq_bf16: {
11196 return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0));
11199 case SVE::BI__builtin_sve_svdup_neonq_s8:
11200 case SVE::BI__builtin_sve_svdup_neonq_s16:
11201 case SVE::BI__builtin_sve_svdup_neonq_s32:
11202 case SVE::BI__builtin_sve_svdup_neonq_s64:
11203 case SVE::BI__builtin_sve_svdup_neonq_u8:
11204 case SVE::BI__builtin_sve_svdup_neonq_u16:
11205 case SVE::BI__builtin_sve_svdup_neonq_u32:
11206 case SVE::BI__builtin_sve_svdup_neonq_u64:
11207 case SVE::BI__builtin_sve_svdup_neonq_f16:
11208 case SVE::BI__builtin_sve_svdup_neonq_f32:
11209 case SVE::BI__builtin_sve_svdup_neonq_f64:
11210 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
11211 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
11212 Builder.getInt64(0));
11213 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
11214 {Insert, Builder.getInt64(0)});
11218 /// Should not happen
11219 return nullptr;
11222 static void swapCommutativeSMEOperands(unsigned BuiltinID,
11223 SmallVectorImpl<Value *> &Ops) {
11224 unsigned MultiVec;
11225 switch (BuiltinID) {
11226 default:
11227 return;
11228 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
11229 MultiVec = 1;
11230 break;
11231 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
11232 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
11233 MultiVec = 2;
11234 break;
11235 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
11236 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
11237 MultiVec = 4;
11238 break;
11241 if (MultiVec > 0)
11242 for (unsigned I = 0; I < MultiVec; ++I)
11243 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
11246 Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
11247 const CallExpr *E) {
11248 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
11249 AArch64SMEIntrinsicsProvenSorted);
11251 llvm::SmallVector<Value *, 4> Ops;
11252 SVETypeFlags TypeFlags(Builtin->TypeModifier);
11253 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
11255 if (TypeFlags.isLoad() || TypeFlags.isStore())
11256 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11257 else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
11258 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11259 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
11260 BuiltinID == SME::BI__builtin_sme_svzero_za)
11261 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11262 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
11263 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
11264 BuiltinID == SME::BI__builtin_sme_svldr_za ||
11265 BuiltinID == SME::BI__builtin_sme_svstr_za)
11266 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11268 // Emit set FPMR for intrinsics that require it
11269 if (TypeFlags.setsFPMR())
11270 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
11271 Ops.pop_back_val());
11272 // Handle builtins which require their multi-vector operands to be swapped
11273 swapCommutativeSMEOperands(BuiltinID, Ops);
11275 // Should not happen!
11276 if (Builtin->LLVMIntrinsic == 0)
11277 return nullptr;
11279 // Predicates must match the main datatype.
11280 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
11281 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
11282 if (PredTy->getElementType()->isIntegerTy(1))
11283 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
11285 Function *F =
11286 TypeFlags.isOverloadNone()
11287 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
11288 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
11290 return Builder.CreateCall(F, Ops);
11293 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
11294 const CallExpr *E,
11295 llvm::Triple::ArchType Arch) {
11296 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
11297 BuiltinID <= clang::AArch64::LastSVEBuiltin)
11298 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
11300 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
11301 BuiltinID <= clang::AArch64::LastSMEBuiltin)
11302 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
11304 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
11305 return EmitAArch64CpuSupports(E);
11307 unsigned HintID = static_cast<unsigned>(-1);
11308 switch (BuiltinID) {
11309 default: break;
11310 case clang::AArch64::BI__builtin_arm_nop:
11311 HintID = 0;
11312 break;
11313 case clang::AArch64::BI__builtin_arm_yield:
11314 case clang::AArch64::BI__yield:
11315 HintID = 1;
11316 break;
11317 case clang::AArch64::BI__builtin_arm_wfe:
11318 case clang::AArch64::BI__wfe:
11319 HintID = 2;
11320 break;
11321 case clang::AArch64::BI__builtin_arm_wfi:
11322 case clang::AArch64::BI__wfi:
11323 HintID = 3;
11324 break;
11325 case clang::AArch64::BI__builtin_arm_sev:
11326 case clang::AArch64::BI__sev:
11327 HintID = 4;
11328 break;
11329 case clang::AArch64::BI__builtin_arm_sevl:
11330 case clang::AArch64::BI__sevl:
11331 HintID = 5;
11332 break;
11335 if (HintID != static_cast<unsigned>(-1)) {
11336 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
11337 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
11340 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
11341 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
11342 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11343 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
11346 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
11347 // Create call to __arm_sme_state and store the results to the two pointers.
11348 CallInst *CI = EmitRuntimeCall(CGM.CreateRuntimeFunction(
11349 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
11350 false),
11351 "__arm_sme_state"));
11352 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
11353 "aarch64_pstate_sm_compatible");
11354 CI->setAttributes(Attrs);
11355 CI->setCallingConv(
11356 llvm::CallingConv::
11357 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
11358 Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
11359 EmitPointerWithAlignment(E->getArg(0)));
11360 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
11361 EmitPointerWithAlignment(E->getArg(1)));
11364 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
11365 assert((getContext().getTypeSize(E->getType()) == 32) &&
11366 "rbit of unusual size!");
11367 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11368 return Builder.CreateCall(
11369 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
11371 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
11372 assert((getContext().getTypeSize(E->getType()) == 64) &&
11373 "rbit of unusual size!");
11374 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11375 return Builder.CreateCall(
11376 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
11379 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
11380 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
11381 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11382 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
11383 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
11384 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
11385 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
11386 return Res;
11389 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
11390 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11391 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
11392 "cls");
11394 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
11395 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11396 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
11397 "cls");
11400 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
11401 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
11402 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11403 llvm::Type *Ty = Arg->getType();
11404 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
11405 Arg, "frint32z");
11408 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
11409 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
11410 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11411 llvm::Type *Ty = Arg->getType();
11412 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
11413 Arg, "frint64z");
11416 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
11417 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
11418 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11419 llvm::Type *Ty = Arg->getType();
11420 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
11421 Arg, "frint32x");
11424 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
11425 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
11426 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11427 llvm::Type *Ty = Arg->getType();
11428 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
11429 Arg, "frint64x");
11432 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
11433 assert((getContext().getTypeSize(E->getType()) == 32) &&
11434 "__jcvt of unusual size!");
11435 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11436 return Builder.CreateCall(
11437 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
11440 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
11441 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
11442 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
11443 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
11444 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
11445 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
11447 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
11448 // Load from the address via an LLVM intrinsic, receiving a
11449 // tuple of 8 i64 words, and store each one to ValPtr.
11450 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
11451 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
11452 llvm::Value *ToRet;
11453 for (size_t i = 0; i < 8; i++) {
11454 llvm::Value *ValOffsetPtr =
11455 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11456 Address Addr =
11457 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11458 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
11460 return ToRet;
11461 } else {
11462 // Load 8 i64 words from ValPtr, and store them to the address
11463 // via an LLVM intrinsic.
11464 SmallVector<llvm::Value *, 9> Args;
11465 Args.push_back(MemAddr);
11466 for (size_t i = 0; i < 8; i++) {
11467 llvm::Value *ValOffsetPtr =
11468 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11469 Address Addr =
11470 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11471 Args.push_back(Builder.CreateLoad(Addr));
11474 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
11475 ? Intrinsic::aarch64_st64b
11476 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
11477 ? Intrinsic::aarch64_st64bv
11478 : Intrinsic::aarch64_st64bv0);
11479 Function *F = CGM.getIntrinsic(Intr);
11480 return Builder.CreateCall(F, Args);
11484 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
11485 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
11487 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
11488 ? Intrinsic::aarch64_rndr
11489 : Intrinsic::aarch64_rndrrs);
11490 Function *F = CGM.getIntrinsic(Intr);
11491 llvm::Value *Val = Builder.CreateCall(F);
11492 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
11493 Value *Status = Builder.CreateExtractValue(Val, 1);
11495 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
11496 Builder.CreateStore(RandomValue, MemAddress);
11497 Status = Builder.CreateZExt(Status, Int32Ty);
11498 return Status;
11501 if (BuiltinID == clang::AArch64::BI__clear_cache) {
11502 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
11503 const FunctionDecl *FD = E->getDirectCallee();
11504 Value *Ops[2];
11505 for (unsigned i = 0; i < 2; i++)
11506 Ops[i] = EmitScalarExpr(E->getArg(i));
11507 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
11508 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
11509 StringRef Name = FD->getName();
11510 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
11513 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11514 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
11515 getContext().getTypeSize(E->getType()) == 128) {
11516 Function *F =
11517 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11518 ? Intrinsic::aarch64_ldaxp
11519 : Intrinsic::aarch64_ldxp);
11521 Value *LdPtr = EmitScalarExpr(E->getArg(0));
11522 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
11524 Value *Val0 = Builder.CreateExtractValue(Val, 1);
11525 Value *Val1 = Builder.CreateExtractValue(Val, 0);
11526 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11527 Val0 = Builder.CreateZExt(Val0, Int128Ty);
11528 Val1 = Builder.CreateZExt(Val1, Int128Ty);
11530 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
11531 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
11532 Val = Builder.CreateOr(Val, Val1);
11533 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
11534 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11535 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
11536 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
11538 QualType Ty = E->getType();
11539 llvm::Type *RealResTy = ConvertType(Ty);
11540 llvm::Type *IntTy =
11541 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11543 Function *F =
11544 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11545 ? Intrinsic::aarch64_ldaxr
11546 : Intrinsic::aarch64_ldxr,
11547 UnqualPtrTy);
11548 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
11549 Val->addParamAttr(
11550 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
11552 if (RealResTy->isPointerTy())
11553 return Builder.CreateIntToPtr(Val, RealResTy);
11555 llvm::Type *IntResTy = llvm::IntegerType::get(
11556 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
11557 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
11558 RealResTy);
11561 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11562 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
11563 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
11564 Function *F =
11565 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11566 ? Intrinsic::aarch64_stlxp
11567 : Intrinsic::aarch64_stxp);
11568 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
11570 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
11571 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
11573 Tmp = Tmp.withElementType(STy);
11574 llvm::Value *Val = Builder.CreateLoad(Tmp);
11576 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
11577 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
11578 Value *StPtr = EmitScalarExpr(E->getArg(1));
11579 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
11582 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11583 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
11584 Value *StoreVal = EmitScalarExpr(E->getArg(0));
11585 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
11587 QualType Ty = E->getArg(0)->getType();
11588 llvm::Type *StoreTy =
11589 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11591 if (StoreVal->getType()->isPointerTy())
11592 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
11593 else {
11594 llvm::Type *IntTy = llvm::IntegerType::get(
11595 getLLVMContext(),
11596 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
11597 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
11598 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
11601 Function *F =
11602 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11603 ? Intrinsic::aarch64_stlxr
11604 : Intrinsic::aarch64_stxr,
11605 StoreAddr->getType());
11606 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
11607 CI->addParamAttr(
11608 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
11609 return CI;
11612 if (BuiltinID == clang::AArch64::BI__getReg) {
11613 Expr::EvalResult Result;
11614 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11615 llvm_unreachable("Sema will ensure that the parameter is constant");
11617 llvm::APSInt Value = Result.Val.getInt();
11618 LLVMContext &Context = CGM.getLLVMContext();
11619 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
11621 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
11622 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11623 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11625 llvm::Function *F =
11626 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11627 return Builder.CreateCall(F, Metadata);
11630 if (BuiltinID == clang::AArch64::BI__break) {
11631 Expr::EvalResult Result;
11632 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11633 llvm_unreachable("Sema will ensure that the parameter is constant");
11635 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::aarch64_break);
11636 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
11639 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
11640 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
11641 return Builder.CreateCall(F);
11644 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
11645 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
11646 llvm::SyncScope::SingleThread);
11648 // CRC32
11649 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
11650 switch (BuiltinID) {
11651 case clang::AArch64::BI__builtin_arm_crc32b:
11652 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
11653 case clang::AArch64::BI__builtin_arm_crc32cb:
11654 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
11655 case clang::AArch64::BI__builtin_arm_crc32h:
11656 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
11657 case clang::AArch64::BI__builtin_arm_crc32ch:
11658 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
11659 case clang::AArch64::BI__builtin_arm_crc32w:
11660 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
11661 case clang::AArch64::BI__builtin_arm_crc32cw:
11662 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
11663 case clang::AArch64::BI__builtin_arm_crc32d:
11664 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
11665 case clang::AArch64::BI__builtin_arm_crc32cd:
11666 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
11669 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
11670 Value *Arg0 = EmitScalarExpr(E->getArg(0));
11671 Value *Arg1 = EmitScalarExpr(E->getArg(1));
11672 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
11674 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
11675 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
11677 return Builder.CreateCall(F, {Arg0, Arg1});
11680 // Memory Operations (MOPS)
11681 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
11682 Value *Dst = EmitScalarExpr(E->getArg(0));
11683 Value *Val = EmitScalarExpr(E->getArg(1));
11684 Value *Size = EmitScalarExpr(E->getArg(2));
11685 Val = Builder.CreateTrunc(Val, Int8Ty);
11686 Size = Builder.CreateIntCast(Size, Int64Ty, false);
11687 return Builder.CreateCall(
11688 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
11691 // Memory Tagging Extensions (MTE) Intrinsics
11692 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
11693 switch (BuiltinID) {
11694 case clang::AArch64::BI__builtin_arm_irg:
11695 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
11696 case clang::AArch64::BI__builtin_arm_addg:
11697 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
11698 case clang::AArch64::BI__builtin_arm_gmi:
11699 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
11700 case clang::AArch64::BI__builtin_arm_ldg:
11701 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
11702 case clang::AArch64::BI__builtin_arm_stg:
11703 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
11704 case clang::AArch64::BI__builtin_arm_subp:
11705 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
11708 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
11709 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
11710 Value *Pointer = EmitScalarExpr(E->getArg(0));
11711 Value *Mask = EmitScalarExpr(E->getArg(1));
11713 Mask = Builder.CreateZExt(Mask, Int64Ty);
11714 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11715 {Pointer, Mask});
11717 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
11718 Value *Pointer = EmitScalarExpr(E->getArg(0));
11719 Value *TagOffset = EmitScalarExpr(E->getArg(1));
11721 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
11722 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11723 {Pointer, TagOffset});
11725 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
11726 Value *Pointer = EmitScalarExpr(E->getArg(0));
11727 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
11729 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
11730 return Builder.CreateCall(
11731 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
11733 // Although it is possible to supply a different return
11734 // address (first arg) to this intrinsic, for now we set
11735 // return address same as input address.
11736 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
11737 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11738 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11739 {TagAddress, TagAddress});
11741 // Although it is possible to supply a different tag (to set)
11742 // to this intrinsic (as first arg), for now we supply
11743 // the tag that is in input address arg (common use case).
11744 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
11745 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11746 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11747 {TagAddress, TagAddress});
11749 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
11750 Value *PointerA = EmitScalarExpr(E->getArg(0));
11751 Value *PointerB = EmitScalarExpr(E->getArg(1));
11752 return Builder.CreateCall(
11753 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
11757 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11758 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11759 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11760 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11761 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
11762 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
11763 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
11764 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
11766 SpecialRegisterAccessKind AccessKind = Write;
11767 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11768 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11769 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11770 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
11771 AccessKind = VolatileRead;
11773 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11774 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
11776 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11777 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
11779 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11780 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
11782 llvm::Type *ValueType;
11783 llvm::Type *RegisterType = Int64Ty;
11784 if (Is32Bit) {
11785 ValueType = Int32Ty;
11786 } else if (Is128Bit) {
11787 llvm::Type *Int128Ty =
11788 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
11789 ValueType = Int128Ty;
11790 RegisterType = Int128Ty;
11791 } else if (IsPointerBuiltin) {
11792 ValueType = VoidPtrTy;
11793 } else {
11794 ValueType = Int64Ty;
11797 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
11798 AccessKind);
11801 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
11802 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
11803 LLVMContext &Context = CGM.getLLVMContext();
11805 unsigned SysReg =
11806 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
11808 std::string SysRegStr;
11809 llvm::raw_string_ostream(SysRegStr) <<
11810 ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
11811 ((SysReg >> 11) & 7) << ":" <<
11812 ((SysReg >> 7) & 15) << ":" <<
11813 ((SysReg >> 3) & 15) << ":" <<
11814 ( SysReg & 7);
11816 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
11817 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11818 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11820 llvm::Type *RegisterType = Int64Ty;
11821 llvm::Type *Types[] = { RegisterType };
11823 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
11824 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
11826 return Builder.CreateCall(F, Metadata);
11829 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
11830 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
11832 return Builder.CreateCall(F, { Metadata, ArgValue });
11835 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
11836 llvm::Function *F =
11837 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
11838 return Builder.CreateCall(F);
11841 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
11842 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
11843 return Builder.CreateCall(F);
11846 if (BuiltinID == clang::AArch64::BI__mulh ||
11847 BuiltinID == clang::AArch64::BI__umulh) {
11848 llvm::Type *ResType = ConvertType(E->getType());
11849 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11851 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
11852 Value *LHS =
11853 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
11854 Value *RHS =
11855 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
11857 Value *MulResult, *HigherBits;
11858 if (IsSigned) {
11859 MulResult = Builder.CreateNSWMul(LHS, RHS);
11860 HigherBits = Builder.CreateAShr(MulResult, 64);
11861 } else {
11862 MulResult = Builder.CreateNUWMul(LHS, RHS);
11863 HigherBits = Builder.CreateLShr(MulResult, 64);
11865 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
11867 return HigherBits;
11870 if (BuiltinID == AArch64::BI__writex18byte ||
11871 BuiltinID == AArch64::BI__writex18word ||
11872 BuiltinID == AArch64::BI__writex18dword ||
11873 BuiltinID == AArch64::BI__writex18qword) {
11874 // Process the args first
11875 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
11876 Value *DataArg = EmitScalarExpr(E->getArg(1));
11878 // Read x18 as i8*
11879 llvm::Value *X18 = readX18AsPtr(*this);
11881 // Store val at x18 + offset
11882 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
11883 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11884 StoreInst *Store =
11885 Builder.CreateAlignedStore(DataArg, Ptr, CharUnits::One());
11886 return Store;
11889 if (BuiltinID == AArch64::BI__readx18byte ||
11890 BuiltinID == AArch64::BI__readx18word ||
11891 BuiltinID == AArch64::BI__readx18dword ||
11892 BuiltinID == AArch64::BI__readx18qword) {
11893 // Process the args first
11894 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
11896 // Read x18 as i8*
11897 llvm::Value *X18 = readX18AsPtr(*this);
11899 // Load x18 + offset
11900 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
11901 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11902 llvm::Type *IntTy = ConvertType(E->getType());
11903 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
11904 return Load;
11907 if (BuiltinID == AArch64::BI__addx18byte ||
11908 BuiltinID == AArch64::BI__addx18word ||
11909 BuiltinID == AArch64::BI__addx18dword ||
11910 BuiltinID == AArch64::BI__addx18qword ||
11911 BuiltinID == AArch64::BI__incx18byte ||
11912 BuiltinID == AArch64::BI__incx18word ||
11913 BuiltinID == AArch64::BI__incx18dword ||
11914 BuiltinID == AArch64::BI__incx18qword) {
11915 llvm::Type *IntTy;
11916 bool isIncrement;
11917 switch (BuiltinID) {
11918 case AArch64::BI__incx18byte:
11919 IntTy = Int8Ty;
11920 isIncrement = true;
11921 break;
11922 case AArch64::BI__incx18word:
11923 IntTy = Int16Ty;
11924 isIncrement = true;
11925 break;
11926 case AArch64::BI__incx18dword:
11927 IntTy = Int32Ty;
11928 isIncrement = true;
11929 break;
11930 case AArch64::BI__incx18qword:
11931 IntTy = Int64Ty;
11932 isIncrement = true;
11933 break;
11934 default:
11935 IntTy = ConvertType(E->getArg(1)->getType());
11936 isIncrement = false;
11937 break;
11939 // Process the args first
11940 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
11941 Value *ValToAdd =
11942 isIncrement ? ConstantInt::get(IntTy, 1) : EmitScalarExpr(E->getArg(1));
11944 // Read x18 as i8*
11945 llvm::Value *X18 = readX18AsPtr(*this);
11947 // Load x18 + offset
11948 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
11949 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11950 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
11952 // Add values
11953 Value *AddResult = Builder.CreateAdd(Load, ValToAdd);
11955 // Store val at x18 + offset
11956 StoreInst *Store =
11957 Builder.CreateAlignedStore(AddResult, Ptr, CharUnits::One());
11958 return Store;
11961 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
11962 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
11963 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
11964 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
11965 Value *Arg = EmitScalarExpr(E->getArg(0));
11966 llvm::Type *RetTy = ConvertType(E->getType());
11967 return Builder.CreateBitCast(Arg, RetTy);
11970 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11971 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11972 BuiltinID == AArch64::BI_CountLeadingZeros ||
11973 BuiltinID == AArch64::BI_CountLeadingZeros64) {
11974 Value *Arg = EmitScalarExpr(E->getArg(0));
11975 llvm::Type *ArgType = Arg->getType();
11977 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11978 BuiltinID == AArch64::BI_CountLeadingOnes64)
11979 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
11981 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
11982 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
11984 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11985 BuiltinID == AArch64::BI_CountLeadingZeros64)
11986 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11987 return Result;
11990 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
11991 BuiltinID == AArch64::BI_CountLeadingSigns64) {
11992 Value *Arg = EmitScalarExpr(E->getArg(0));
11994 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
11995 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
11996 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
11998 Value *Result = Builder.CreateCall(F, Arg, "cls");
11999 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
12000 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
12001 return Result;
12004 if (BuiltinID == AArch64::BI_CountOneBits ||
12005 BuiltinID == AArch64::BI_CountOneBits64) {
12006 Value *ArgValue = EmitScalarExpr(E->getArg(0));
12007 llvm::Type *ArgType = ArgValue->getType();
12008 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
12010 Value *Result = Builder.CreateCall(F, ArgValue);
12011 if (BuiltinID == AArch64::BI_CountOneBits64)
12012 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
12013 return Result;
12016 if (BuiltinID == AArch64::BI__prefetch) {
12017 Value *Address = EmitScalarExpr(E->getArg(0));
12018 Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
12019 Value *Locality = ConstantInt::get(Int32Ty, 3);
12020 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
12021 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
12022 return Builder.CreateCall(F, {Address, RW, Locality, Data});
12025 if (BuiltinID == AArch64::BI__hlt) {
12026 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt);
12027 Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
12029 // Return 0 for convenience, even though MSVC returns some other undefined
12030 // value.
12031 return ConstantInt::get(Builder.getInt32Ty(), 0);
12034 // Handle MSVC intrinsics before argument evaluation to prevent double
12035 // evaluation.
12036 if (std::optional<MSVCIntrin> MsvcIntId =
12037 translateAarch64ToMsvcIntrin(BuiltinID))
12038 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
12040 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
12041 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
12042 return P.first == BuiltinID;
12044 if (It != end(NEONEquivalentIntrinsicMap))
12045 BuiltinID = It->second;
12047 // Find out if any arguments are required to be integer constant
12048 // expressions.
12049 unsigned ICEArguments = 0;
12050 ASTContext::GetBuiltinTypeError Error;
12051 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
12052 assert(Error == ASTContext::GE_None && "Should not codegen an error");
12054 llvm::SmallVector<Value*, 4> Ops;
12055 Address PtrOp0 = Address::invalid();
12056 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
12057 if (i == 0) {
12058 switch (BuiltinID) {
12059 case NEON::BI__builtin_neon_vld1_v:
12060 case NEON::BI__builtin_neon_vld1q_v:
12061 case NEON::BI__builtin_neon_vld1_dup_v:
12062 case NEON::BI__builtin_neon_vld1q_dup_v:
12063 case NEON::BI__builtin_neon_vld1_lane_v:
12064 case NEON::BI__builtin_neon_vld1q_lane_v:
12065 case NEON::BI__builtin_neon_vst1_v:
12066 case NEON::BI__builtin_neon_vst1q_v:
12067 case NEON::BI__builtin_neon_vst1_lane_v:
12068 case NEON::BI__builtin_neon_vst1q_lane_v:
12069 case NEON::BI__builtin_neon_vldap1_lane_s64:
12070 case NEON::BI__builtin_neon_vldap1q_lane_s64:
12071 case NEON::BI__builtin_neon_vstl1_lane_s64:
12072 case NEON::BI__builtin_neon_vstl1q_lane_s64:
12073 // Get the alignment for the argument in addition to the value;
12074 // we'll use it later.
12075 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
12076 Ops.push_back(PtrOp0.emitRawPointer(*this));
12077 continue;
12080 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
12083 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
12084 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
12085 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
12087 if (Builtin) {
12088 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
12089 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
12090 assert(Result && "SISD intrinsic should have been handled");
12091 return Result;
12094 const Expr *Arg = E->getArg(E->getNumArgs()-1);
12095 NeonTypeFlags Type(0);
12096 if (std::optional<llvm::APSInt> Result =
12097 Arg->getIntegerConstantExpr(getContext()))
12098 // Determine the type of this overloaded NEON intrinsic.
12099 Type = NeonTypeFlags(Result->getZExtValue());
12101 bool usgn = Type.isUnsigned();
12102 bool quad = Type.isQuad();
12104 // Handle non-overloaded intrinsics first.
12105 switch (BuiltinID) {
12106 default: break;
12107 case NEON::BI__builtin_neon_vabsh_f16:
12108 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12109 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
12110 case NEON::BI__builtin_neon_vaddq_p128: {
12111 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
12112 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12113 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12114 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12115 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
12116 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
12117 return Builder.CreateBitCast(Ops[0], Int128Ty);
12119 case NEON::BI__builtin_neon_vldrq_p128: {
12120 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
12121 Value *Ptr = EmitScalarExpr(E->getArg(0));
12122 return Builder.CreateAlignedLoad(Int128Ty, Ptr,
12123 CharUnits::fromQuantity(16));
12125 case NEON::BI__builtin_neon_vstrq_p128: {
12126 Value *Ptr = Ops[0];
12127 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
12129 case NEON::BI__builtin_neon_vcvts_f32_u32:
12130 case NEON::BI__builtin_neon_vcvtd_f64_u64:
12131 usgn = true;
12132 [[fallthrough]];
12133 case NEON::BI__builtin_neon_vcvts_f32_s32:
12134 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
12135 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12136 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
12137 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
12138 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
12139 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
12140 if (usgn)
12141 return Builder.CreateUIToFP(Ops[0], FTy);
12142 return Builder.CreateSIToFP(Ops[0], FTy);
12144 case NEON::BI__builtin_neon_vcvth_f16_u16:
12145 case NEON::BI__builtin_neon_vcvth_f16_u32:
12146 case NEON::BI__builtin_neon_vcvth_f16_u64:
12147 usgn = true;
12148 [[fallthrough]];
12149 case NEON::BI__builtin_neon_vcvth_f16_s16:
12150 case NEON::BI__builtin_neon_vcvth_f16_s32:
12151 case NEON::BI__builtin_neon_vcvth_f16_s64: {
12152 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12153 llvm::Type *FTy = HalfTy;
12154 llvm::Type *InTy;
12155 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
12156 InTy = Int64Ty;
12157 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
12158 InTy = Int32Ty;
12159 else
12160 InTy = Int16Ty;
12161 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
12162 if (usgn)
12163 return Builder.CreateUIToFP(Ops[0], FTy);
12164 return Builder.CreateSIToFP(Ops[0], FTy);
12166 case NEON::BI__builtin_neon_vcvtah_u16_f16:
12167 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
12168 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
12169 case NEON::BI__builtin_neon_vcvtph_u16_f16:
12170 case NEON::BI__builtin_neon_vcvth_u16_f16:
12171 case NEON::BI__builtin_neon_vcvtah_s16_f16:
12172 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
12173 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
12174 case NEON::BI__builtin_neon_vcvtph_s16_f16:
12175 case NEON::BI__builtin_neon_vcvth_s16_f16: {
12176 unsigned Int;
12177 llvm::Type* InTy = Int32Ty;
12178 llvm::Type* FTy = HalfTy;
12179 llvm::Type *Tys[2] = {InTy, FTy};
12180 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12181 switch (BuiltinID) {
12182 default: llvm_unreachable("missing builtin ID in switch!");
12183 case NEON::BI__builtin_neon_vcvtah_u16_f16:
12184 Int = Intrinsic::aarch64_neon_fcvtau; break;
12185 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
12186 Int = Intrinsic::aarch64_neon_fcvtmu; break;
12187 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
12188 Int = Intrinsic::aarch64_neon_fcvtnu; break;
12189 case NEON::BI__builtin_neon_vcvtph_u16_f16:
12190 Int = Intrinsic::aarch64_neon_fcvtpu; break;
12191 case NEON::BI__builtin_neon_vcvth_u16_f16:
12192 Int = Intrinsic::aarch64_neon_fcvtzu; break;
12193 case NEON::BI__builtin_neon_vcvtah_s16_f16:
12194 Int = Intrinsic::aarch64_neon_fcvtas; break;
12195 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
12196 Int = Intrinsic::aarch64_neon_fcvtms; break;
12197 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
12198 Int = Intrinsic::aarch64_neon_fcvtns; break;
12199 case NEON::BI__builtin_neon_vcvtph_s16_f16:
12200 Int = Intrinsic::aarch64_neon_fcvtps; break;
12201 case NEON::BI__builtin_neon_vcvth_s16_f16:
12202 Int = Intrinsic::aarch64_neon_fcvtzs; break;
12204 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
12205 return Builder.CreateTrunc(Ops[0], Int16Ty);
12207 case NEON::BI__builtin_neon_vcaleh_f16:
12208 case NEON::BI__builtin_neon_vcalth_f16:
12209 case NEON::BI__builtin_neon_vcageh_f16:
12210 case NEON::BI__builtin_neon_vcagth_f16: {
12211 unsigned Int;
12212 llvm::Type* InTy = Int32Ty;
12213 llvm::Type* FTy = HalfTy;
12214 llvm::Type *Tys[2] = {InTy, FTy};
12215 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12216 switch (BuiltinID) {
12217 default: llvm_unreachable("missing builtin ID in switch!");
12218 case NEON::BI__builtin_neon_vcageh_f16:
12219 Int = Intrinsic::aarch64_neon_facge; break;
12220 case NEON::BI__builtin_neon_vcagth_f16:
12221 Int = Intrinsic::aarch64_neon_facgt; break;
12222 case NEON::BI__builtin_neon_vcaleh_f16:
12223 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
12224 case NEON::BI__builtin_neon_vcalth_f16:
12225 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
12227 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
12228 return Builder.CreateTrunc(Ops[0], Int16Ty);
12230 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
12231 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
12232 unsigned Int;
12233 llvm::Type* InTy = Int32Ty;
12234 llvm::Type* FTy = HalfTy;
12235 llvm::Type *Tys[2] = {InTy, FTy};
12236 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12237 switch (BuiltinID) {
12238 default: llvm_unreachable("missing builtin ID in switch!");
12239 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
12240 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
12241 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
12242 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
12244 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
12245 return Builder.CreateTrunc(Ops[0], Int16Ty);
12247 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
12248 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
12249 unsigned Int;
12250 llvm::Type* FTy = HalfTy;
12251 llvm::Type* InTy = Int32Ty;
12252 llvm::Type *Tys[2] = {FTy, InTy};
12253 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12254 switch (BuiltinID) {
12255 default: llvm_unreachable("missing builtin ID in switch!");
12256 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
12257 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
12258 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
12259 break;
12260 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
12261 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
12262 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
12263 break;
12265 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
12267 case NEON::BI__builtin_neon_vpaddd_s64: {
12268 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
12269 Value *Vec = EmitScalarExpr(E->getArg(0));
12270 // The vector is v2f64, so make sure it's bitcast to that.
12271 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
12272 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
12273 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
12274 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
12275 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
12276 // Pairwise addition of a v2f64 into a scalar f64.
12277 return Builder.CreateAdd(Op0, Op1, "vpaddd");
12279 case NEON::BI__builtin_neon_vpaddd_f64: {
12280 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
12281 Value *Vec = EmitScalarExpr(E->getArg(0));
12282 // The vector is v2f64, so make sure it's bitcast to that.
12283 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
12284 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
12285 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
12286 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
12287 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
12288 // Pairwise addition of a v2f64 into a scalar f64.
12289 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
12291 case NEON::BI__builtin_neon_vpadds_f32: {
12292 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
12293 Value *Vec = EmitScalarExpr(E->getArg(0));
12294 // The vector is v2f32, so make sure it's bitcast to that.
12295 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
12296 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
12297 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
12298 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
12299 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
12300 // Pairwise addition of a v2f32 into a scalar f32.
12301 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
12303 case NEON::BI__builtin_neon_vceqzd_s64:
12304 case NEON::BI__builtin_neon_vceqzd_f64:
12305 case NEON::BI__builtin_neon_vceqzs_f32:
12306 case NEON::BI__builtin_neon_vceqzh_f16:
12307 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12308 return EmitAArch64CompareBuiltinExpr(
12309 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12310 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
12311 case NEON::BI__builtin_neon_vcgezd_s64:
12312 case NEON::BI__builtin_neon_vcgezd_f64:
12313 case NEON::BI__builtin_neon_vcgezs_f32:
12314 case NEON::BI__builtin_neon_vcgezh_f16:
12315 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12316 return EmitAArch64CompareBuiltinExpr(
12317 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12318 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
12319 case NEON::BI__builtin_neon_vclezd_s64:
12320 case NEON::BI__builtin_neon_vclezd_f64:
12321 case NEON::BI__builtin_neon_vclezs_f32:
12322 case NEON::BI__builtin_neon_vclezh_f16:
12323 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12324 return EmitAArch64CompareBuiltinExpr(
12325 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12326 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
12327 case NEON::BI__builtin_neon_vcgtzd_s64:
12328 case NEON::BI__builtin_neon_vcgtzd_f64:
12329 case NEON::BI__builtin_neon_vcgtzs_f32:
12330 case NEON::BI__builtin_neon_vcgtzh_f16:
12331 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12332 return EmitAArch64CompareBuiltinExpr(
12333 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12334 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
12335 case NEON::BI__builtin_neon_vcltzd_s64:
12336 case NEON::BI__builtin_neon_vcltzd_f64:
12337 case NEON::BI__builtin_neon_vcltzs_f32:
12338 case NEON::BI__builtin_neon_vcltzh_f16:
12339 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12340 return EmitAArch64CompareBuiltinExpr(
12341 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12342 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
12344 case NEON::BI__builtin_neon_vceqzd_u64: {
12345 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12346 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12347 Ops[0] =
12348 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
12349 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
12351 case NEON::BI__builtin_neon_vceqd_f64:
12352 case NEON::BI__builtin_neon_vcled_f64:
12353 case NEON::BI__builtin_neon_vcltd_f64:
12354 case NEON::BI__builtin_neon_vcged_f64:
12355 case NEON::BI__builtin_neon_vcgtd_f64: {
12356 llvm::CmpInst::Predicate P;
12357 switch (BuiltinID) {
12358 default: llvm_unreachable("missing builtin ID in switch!");
12359 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
12360 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
12361 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
12362 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
12363 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
12365 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12366 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12367 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
12368 if (P == llvm::FCmpInst::FCMP_OEQ)
12369 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
12370 else
12371 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
12372 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
12374 case NEON::BI__builtin_neon_vceqs_f32:
12375 case NEON::BI__builtin_neon_vcles_f32:
12376 case NEON::BI__builtin_neon_vclts_f32:
12377 case NEON::BI__builtin_neon_vcges_f32:
12378 case NEON::BI__builtin_neon_vcgts_f32: {
12379 llvm::CmpInst::Predicate P;
12380 switch (BuiltinID) {
12381 default: llvm_unreachable("missing builtin ID in switch!");
12382 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
12383 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
12384 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
12385 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
12386 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
12388 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12389 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
12390 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
12391 if (P == llvm::FCmpInst::FCMP_OEQ)
12392 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
12393 else
12394 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
12395 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
12397 case NEON::BI__builtin_neon_vceqh_f16:
12398 case NEON::BI__builtin_neon_vcleh_f16:
12399 case NEON::BI__builtin_neon_vclth_f16:
12400 case NEON::BI__builtin_neon_vcgeh_f16:
12401 case NEON::BI__builtin_neon_vcgth_f16: {
12402 llvm::CmpInst::Predicate P;
12403 switch (BuiltinID) {
12404 default: llvm_unreachable("missing builtin ID in switch!");
12405 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
12406 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
12407 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
12408 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
12409 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
12411 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12412 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
12413 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
12414 if (P == llvm::FCmpInst::FCMP_OEQ)
12415 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
12416 else
12417 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
12418 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
12420 case NEON::BI__builtin_neon_vceqd_s64:
12421 case NEON::BI__builtin_neon_vceqd_u64:
12422 case NEON::BI__builtin_neon_vcgtd_s64:
12423 case NEON::BI__builtin_neon_vcgtd_u64:
12424 case NEON::BI__builtin_neon_vcltd_s64:
12425 case NEON::BI__builtin_neon_vcltd_u64:
12426 case NEON::BI__builtin_neon_vcged_u64:
12427 case NEON::BI__builtin_neon_vcged_s64:
12428 case NEON::BI__builtin_neon_vcled_u64:
12429 case NEON::BI__builtin_neon_vcled_s64: {
12430 llvm::CmpInst::Predicate P;
12431 switch (BuiltinID) {
12432 default: llvm_unreachable("missing builtin ID in switch!");
12433 case NEON::BI__builtin_neon_vceqd_s64:
12434 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
12435 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
12436 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
12437 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
12438 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
12439 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
12440 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
12441 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
12442 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
12444 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12445 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12446 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12447 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
12448 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
12450 case NEON::BI__builtin_neon_vtstd_s64:
12451 case NEON::BI__builtin_neon_vtstd_u64: {
12452 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12453 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12454 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12455 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
12456 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
12457 llvm::Constant::getNullValue(Int64Ty));
12458 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
12460 case NEON::BI__builtin_neon_vset_lane_i8:
12461 case NEON::BI__builtin_neon_vset_lane_i16:
12462 case NEON::BI__builtin_neon_vset_lane_i32:
12463 case NEON::BI__builtin_neon_vset_lane_i64:
12464 case NEON::BI__builtin_neon_vset_lane_bf16:
12465 case NEON::BI__builtin_neon_vset_lane_f32:
12466 case NEON::BI__builtin_neon_vsetq_lane_i8:
12467 case NEON::BI__builtin_neon_vsetq_lane_i16:
12468 case NEON::BI__builtin_neon_vsetq_lane_i32:
12469 case NEON::BI__builtin_neon_vsetq_lane_i64:
12470 case NEON::BI__builtin_neon_vsetq_lane_bf16:
12471 case NEON::BI__builtin_neon_vsetq_lane_f32:
12472 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12473 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12474 case NEON::BI__builtin_neon_vset_lane_f64:
12475 // The vector type needs a cast for the v1f64 variant.
12476 Ops[1] =
12477 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
12478 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12479 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12480 case NEON::BI__builtin_neon_vsetq_lane_f64:
12481 // The vector type needs a cast for the v2f64 variant.
12482 Ops[1] =
12483 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
12484 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12485 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12487 case NEON::BI__builtin_neon_vget_lane_i8:
12488 case NEON::BI__builtin_neon_vdupb_lane_i8:
12489 Ops[0] =
12490 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
12491 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12492 "vget_lane");
12493 case NEON::BI__builtin_neon_vgetq_lane_i8:
12494 case NEON::BI__builtin_neon_vdupb_laneq_i8:
12495 Ops[0] =
12496 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
12497 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12498 "vgetq_lane");
12499 case NEON::BI__builtin_neon_vget_lane_i16:
12500 case NEON::BI__builtin_neon_vduph_lane_i16:
12501 Ops[0] =
12502 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
12503 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12504 "vget_lane");
12505 case NEON::BI__builtin_neon_vgetq_lane_i16:
12506 case NEON::BI__builtin_neon_vduph_laneq_i16:
12507 Ops[0] =
12508 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
12509 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12510 "vgetq_lane");
12511 case NEON::BI__builtin_neon_vget_lane_i32:
12512 case NEON::BI__builtin_neon_vdups_lane_i32:
12513 Ops[0] =
12514 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
12515 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12516 "vget_lane");
12517 case NEON::BI__builtin_neon_vdups_lane_f32:
12518 Ops[0] =
12519 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12520 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12521 "vdups_lane");
12522 case NEON::BI__builtin_neon_vgetq_lane_i32:
12523 case NEON::BI__builtin_neon_vdups_laneq_i32:
12524 Ops[0] =
12525 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
12526 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12527 "vgetq_lane");
12528 case NEON::BI__builtin_neon_vget_lane_i64:
12529 case NEON::BI__builtin_neon_vdupd_lane_i64:
12530 Ops[0] =
12531 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
12532 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12533 "vget_lane");
12534 case NEON::BI__builtin_neon_vdupd_lane_f64:
12535 Ops[0] =
12536 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12537 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12538 "vdupd_lane");
12539 case NEON::BI__builtin_neon_vgetq_lane_i64:
12540 case NEON::BI__builtin_neon_vdupd_laneq_i64:
12541 Ops[0] =
12542 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
12543 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12544 "vgetq_lane");
12545 case NEON::BI__builtin_neon_vget_lane_f32:
12546 Ops[0] =
12547 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12548 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12549 "vget_lane");
12550 case NEON::BI__builtin_neon_vget_lane_f64:
12551 Ops[0] =
12552 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12553 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12554 "vget_lane");
12555 case NEON::BI__builtin_neon_vgetq_lane_f32:
12556 case NEON::BI__builtin_neon_vdups_laneq_f32:
12557 Ops[0] =
12558 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
12559 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12560 "vgetq_lane");
12561 case NEON::BI__builtin_neon_vgetq_lane_f64:
12562 case NEON::BI__builtin_neon_vdupd_laneq_f64:
12563 Ops[0] =
12564 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
12565 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12566 "vgetq_lane");
12567 case NEON::BI__builtin_neon_vaddh_f16:
12568 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12569 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
12570 case NEON::BI__builtin_neon_vsubh_f16:
12571 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12572 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
12573 case NEON::BI__builtin_neon_vmulh_f16:
12574 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12575 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
12576 case NEON::BI__builtin_neon_vdivh_f16:
12577 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12578 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
12579 case NEON::BI__builtin_neon_vfmah_f16:
12580 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12581 return emitCallMaybeConstrainedFPBuiltin(
12582 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12583 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
12584 case NEON::BI__builtin_neon_vfmsh_f16: {
12585 Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");
12587 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12588 return emitCallMaybeConstrainedFPBuiltin(
12589 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12590 {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
12592 case NEON::BI__builtin_neon_vaddd_s64:
12593 case NEON::BI__builtin_neon_vaddd_u64:
12594 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
12595 case NEON::BI__builtin_neon_vsubd_s64:
12596 case NEON::BI__builtin_neon_vsubd_u64:
12597 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
12598 case NEON::BI__builtin_neon_vqdmlalh_s16:
12599 case NEON::BI__builtin_neon_vqdmlslh_s16: {
12600 SmallVector<Value *, 2> ProductOps;
12601 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12602 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
12603 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12604 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12605 ProductOps, "vqdmlXl");
12606 Constant *CI = ConstantInt::get(SizeTy, 0);
12607 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12609 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
12610 ? Intrinsic::aarch64_neon_sqadd
12611 : Intrinsic::aarch64_neon_sqsub;
12612 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
12614 case NEON::BI__builtin_neon_vqshlud_n_s64: {
12615 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12616 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12617 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
12618 Ops, "vqshlu_n");
12620 case NEON::BI__builtin_neon_vqshld_n_u64:
12621 case NEON::BI__builtin_neon_vqshld_n_s64: {
12622 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
12623 ? Intrinsic::aarch64_neon_uqshl
12624 : Intrinsic::aarch64_neon_sqshl;
12625 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12626 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12627 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
12629 case NEON::BI__builtin_neon_vrshrd_n_u64:
12630 case NEON::BI__builtin_neon_vrshrd_n_s64: {
12631 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
12632 ? Intrinsic::aarch64_neon_urshl
12633 : Intrinsic::aarch64_neon_srshl;
12634 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12635 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
12636 Ops[1] = ConstantInt::get(Int64Ty, -SV);
12637 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
12639 case NEON::BI__builtin_neon_vrsrad_n_u64:
12640 case NEON::BI__builtin_neon_vrsrad_n_s64: {
12641 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
12642 ? Intrinsic::aarch64_neon_urshl
12643 : Intrinsic::aarch64_neon_srshl;
12644 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12645 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
12646 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
12647 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
12648 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
12650 case NEON::BI__builtin_neon_vshld_n_s64:
12651 case NEON::BI__builtin_neon_vshld_n_u64: {
12652 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12653 return Builder.CreateShl(
12654 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
12656 case NEON::BI__builtin_neon_vshrd_n_s64: {
12657 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12658 return Builder.CreateAShr(
12659 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12660 Amt->getZExtValue())),
12661 "shrd_n");
12663 case NEON::BI__builtin_neon_vshrd_n_u64: {
12664 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12665 uint64_t ShiftAmt = Amt->getZExtValue();
12666 // Right-shifting an unsigned value by its size yields 0.
12667 if (ShiftAmt == 64)
12668 return ConstantInt::get(Int64Ty, 0);
12669 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
12670 "shrd_n");
12672 case NEON::BI__builtin_neon_vsrad_n_s64: {
12673 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12674 Ops[1] = Builder.CreateAShr(
12675 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12676 Amt->getZExtValue())),
12677 "shrd_n");
12678 return Builder.CreateAdd(Ops[0], Ops[1]);
12680 case NEON::BI__builtin_neon_vsrad_n_u64: {
12681 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12682 uint64_t ShiftAmt = Amt->getZExtValue();
12683 // Right-shifting an unsigned value by its size yields 0.
12684 // As Op + 0 = Op, return Ops[0] directly.
12685 if (ShiftAmt == 64)
12686 return Ops[0];
12687 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
12688 "shrd_n");
12689 return Builder.CreateAdd(Ops[0], Ops[1]);
12691 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
12692 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
12693 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
12694 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
12695 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12696 "lane");
12697 SmallVector<Value *, 2> ProductOps;
12698 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12699 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
12700 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12701 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12702 ProductOps, "vqdmlXl");
12703 Constant *CI = ConstantInt::get(SizeTy, 0);
12704 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12705 Ops.pop_back();
12707 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
12708 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
12709 ? Intrinsic::aarch64_neon_sqadd
12710 : Intrinsic::aarch64_neon_sqsub;
12711 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
12713 case NEON::BI__builtin_neon_vqdmlals_s32:
12714 case NEON::BI__builtin_neon_vqdmlsls_s32: {
12715 SmallVector<Value *, 2> ProductOps;
12716 ProductOps.push_back(Ops[1]);
12717 ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
12718 Ops[1] =
12719 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12720 ProductOps, "vqdmlXl");
12722 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
12723 ? Intrinsic::aarch64_neon_sqadd
12724 : Intrinsic::aarch64_neon_sqsub;
12725 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
12727 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
12728 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
12729 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
12730 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
12731 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12732 "lane");
12733 SmallVector<Value *, 2> ProductOps;
12734 ProductOps.push_back(Ops[1]);
12735 ProductOps.push_back(Ops[2]);
12736 Ops[1] =
12737 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12738 ProductOps, "vqdmlXl");
12739 Ops.pop_back();
12741 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
12742 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
12743 ? Intrinsic::aarch64_neon_sqadd
12744 : Intrinsic::aarch64_neon_sqsub;
12745 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
12747 case NEON::BI__builtin_neon_vget_lane_bf16:
12748 case NEON::BI__builtin_neon_vduph_lane_bf16:
12749 case NEON::BI__builtin_neon_vduph_lane_f16: {
12750 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12751 "vget_lane");
12753 case NEON::BI__builtin_neon_vgetq_lane_bf16:
12754 case NEON::BI__builtin_neon_vduph_laneq_bf16:
12755 case NEON::BI__builtin_neon_vduph_laneq_f16: {
12756 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12757 "vgetq_lane");
12760 case clang::AArch64::BI_InterlockedAdd:
12761 case clang::AArch64::BI_InterlockedAdd64: {
12762 Address DestAddr = CheckAtomicAlignment(*this, E);
12763 Value *Val = EmitScalarExpr(E->getArg(1));
12764 AtomicRMWInst *RMWI =
12765 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val,
12766 llvm::AtomicOrdering::SequentiallyConsistent);
12767 return Builder.CreateAdd(RMWI, Val);
12771 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
12772 llvm::Type *Ty = VTy;
12773 if (!Ty)
12774 return nullptr;
12776 // Not all intrinsics handled by the common case work for AArch64 yet, so only
12777 // defer to common code if it's been added to our special map.
12778 Builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
12779 AArch64SIMDIntrinsicsProvenSorted);
12781 if (Builtin)
12782 return EmitCommonNeonBuiltinExpr(
12783 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
12784 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
12785 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
12787 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
12788 return V;
12790 unsigned Int;
12791 switch (BuiltinID) {
12792 default: return nullptr;
12793 case NEON::BI__builtin_neon_vbsl_v:
12794 case NEON::BI__builtin_neon_vbslq_v: {
12795 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
12796 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
12797 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
12798 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
12800 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
12801 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
12802 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
12803 return Builder.CreateBitCast(Ops[0], Ty);
12805 case NEON::BI__builtin_neon_vfma_lane_v:
12806 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
12807 // The ARM builtins (and instructions) have the addend as the first
12808 // operand, but the 'fma' intrinsics have it last. Swap it around here.
12809 Value *Addend = Ops[0];
12810 Value *Multiplicand = Ops[1];
12811 Value *LaneSource = Ops[2];
12812 Ops[0] = Multiplicand;
12813 Ops[1] = LaneSource;
12814 Ops[2] = Addend;
12816 // Now adjust things to handle the lane access.
12817 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
12818 ? llvm::FixedVectorType::get(VTy->getElementType(),
12819 VTy->getNumElements() / 2)
12820 : VTy;
12821 llvm::Constant *cst = cast<Constant>(Ops[3]);
12822 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
12823 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
12824 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
12826 Ops.pop_back();
12827 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
12828 : Intrinsic::fma;
12829 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
12831 case NEON::BI__builtin_neon_vfma_laneq_v: {
12832 auto *VTy = cast<llvm::FixedVectorType>(Ty);
12833 // v1f64 fma should be mapped to Neon scalar f64 fma
12834 if (VTy && VTy->getElementType() == DoubleTy) {
12835 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12836 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
12837 llvm::FixedVectorType *VTy =
12838 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true));
12839 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
12840 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12841 Value *Result;
12842 Result = emitCallMaybeConstrainedFPBuiltin(
12843 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
12844 DoubleTy, {Ops[1], Ops[2], Ops[0]});
12845 return Builder.CreateBitCast(Result, Ty);
12847 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12848 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12850 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
12851 VTy->getNumElements() * 2);
12852 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
12853 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
12854 cast<ConstantInt>(Ops[3]));
12855 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
12857 return emitCallMaybeConstrainedFPBuiltin(
12858 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12859 {Ops[2], Ops[1], Ops[0]});
12861 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
12862 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12863 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12865 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12866 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
12867 return emitCallMaybeConstrainedFPBuiltin(
12868 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12869 {Ops[2], Ops[1], Ops[0]});
12871 case NEON::BI__builtin_neon_vfmah_lane_f16:
12872 case NEON::BI__builtin_neon_vfmas_lane_f32:
12873 case NEON::BI__builtin_neon_vfmah_laneq_f16:
12874 case NEON::BI__builtin_neon_vfmas_laneq_f32:
12875 case NEON::BI__builtin_neon_vfmad_lane_f64:
12876 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
12877 Ops.push_back(EmitScalarExpr(E->getArg(3)));
12878 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
12879 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12880 return emitCallMaybeConstrainedFPBuiltin(
12881 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12882 {Ops[1], Ops[2], Ops[0]});
12884 case NEON::BI__builtin_neon_vmull_v:
12885 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12886 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
12887 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
12888 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
12889 case NEON::BI__builtin_neon_vmax_v:
12890 case NEON::BI__builtin_neon_vmaxq_v:
12891 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12892 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
12893 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
12894 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
12895 case NEON::BI__builtin_neon_vmaxh_f16: {
12896 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12897 Int = Intrinsic::aarch64_neon_fmax;
12898 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
12900 case NEON::BI__builtin_neon_vmin_v:
12901 case NEON::BI__builtin_neon_vminq_v:
12902 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12903 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
12904 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
12905 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
12906 case NEON::BI__builtin_neon_vminh_f16: {
12907 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12908 Int = Intrinsic::aarch64_neon_fmin;
12909 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
12911 case NEON::BI__builtin_neon_vabd_v:
12912 case NEON::BI__builtin_neon_vabdq_v:
12913 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12914 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
12915 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
12916 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
12917 case NEON::BI__builtin_neon_vpadal_v:
12918 case NEON::BI__builtin_neon_vpadalq_v: {
12919 unsigned ArgElts = VTy->getNumElements();
12920 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
12921 unsigned BitWidth = EltTy->getBitWidth();
12922 auto *ArgTy = llvm::FixedVectorType::get(
12923 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
12924 llvm::Type* Tys[2] = { VTy, ArgTy };
12925 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
12926 SmallVector<llvm::Value*, 1> TmpOps;
12927 TmpOps.push_back(Ops[1]);
12928 Function *F = CGM.getIntrinsic(Int, Tys);
12929 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
12930 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
12931 return Builder.CreateAdd(tmp, addend);
12933 case NEON::BI__builtin_neon_vpmin_v:
12934 case NEON::BI__builtin_neon_vpminq_v:
12935 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12936 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
12937 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
12938 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
12939 case NEON::BI__builtin_neon_vpmax_v:
12940 case NEON::BI__builtin_neon_vpmaxq_v:
12941 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12942 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
12943 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
12944 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
12945 case NEON::BI__builtin_neon_vminnm_v:
12946 case NEON::BI__builtin_neon_vminnmq_v:
12947 Int = Intrinsic::aarch64_neon_fminnm;
12948 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
12949 case NEON::BI__builtin_neon_vminnmh_f16:
12950 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12951 Int = Intrinsic::aarch64_neon_fminnm;
12952 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
12953 case NEON::BI__builtin_neon_vmaxnm_v:
12954 case NEON::BI__builtin_neon_vmaxnmq_v:
12955 Int = Intrinsic::aarch64_neon_fmaxnm;
12956 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
12957 case NEON::BI__builtin_neon_vmaxnmh_f16:
12958 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12959 Int = Intrinsic::aarch64_neon_fmaxnm;
12960 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
12961 case NEON::BI__builtin_neon_vrecpss_f32: {
12962 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12963 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
12964 Ops, "vrecps");
12966 case NEON::BI__builtin_neon_vrecpsd_f64:
12967 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12968 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
12969 Ops, "vrecps");
12970 case NEON::BI__builtin_neon_vrecpsh_f16:
12971 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12972 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
12973 Ops, "vrecps");
12974 case NEON::BI__builtin_neon_vqshrun_n_v:
12975 Int = Intrinsic::aarch64_neon_sqshrun;
12976 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
12977 case NEON::BI__builtin_neon_vqrshrun_n_v:
12978 Int = Intrinsic::aarch64_neon_sqrshrun;
12979 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
12980 case NEON::BI__builtin_neon_vqshrn_n_v:
12981 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
12982 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
12983 case NEON::BI__builtin_neon_vrshrn_n_v:
12984 Int = Intrinsic::aarch64_neon_rshrn;
12985 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
12986 case NEON::BI__builtin_neon_vqrshrn_n_v:
12987 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
12988 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
12989 case NEON::BI__builtin_neon_vrndah_f16: {
12990 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12991 Int = Builder.getIsFPConstrained()
12992 ? Intrinsic::experimental_constrained_round
12993 : Intrinsic::round;
12994 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
12996 case NEON::BI__builtin_neon_vrnda_v:
12997 case NEON::BI__builtin_neon_vrndaq_v: {
12998 Int = Builder.getIsFPConstrained()
12999 ? Intrinsic::experimental_constrained_round
13000 : Intrinsic::round;
13001 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
13003 case NEON::BI__builtin_neon_vrndih_f16: {
13004 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13005 Int = Builder.getIsFPConstrained()
13006 ? Intrinsic::experimental_constrained_nearbyint
13007 : Intrinsic::nearbyint;
13008 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
13010 case NEON::BI__builtin_neon_vrndmh_f16: {
13011 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13012 Int = Builder.getIsFPConstrained()
13013 ? Intrinsic::experimental_constrained_floor
13014 : Intrinsic::floor;
13015 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
13017 case NEON::BI__builtin_neon_vrndm_v:
13018 case NEON::BI__builtin_neon_vrndmq_v: {
13019 Int = Builder.getIsFPConstrained()
13020 ? Intrinsic::experimental_constrained_floor
13021 : Intrinsic::floor;
13022 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
13024 case NEON::BI__builtin_neon_vrndnh_f16: {
13025 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13026 Int = Builder.getIsFPConstrained()
13027 ? Intrinsic::experimental_constrained_roundeven
13028 : Intrinsic::roundeven;
13029 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
13031 case NEON::BI__builtin_neon_vrndn_v:
13032 case NEON::BI__builtin_neon_vrndnq_v: {
13033 Int = Builder.getIsFPConstrained()
13034 ? Intrinsic::experimental_constrained_roundeven
13035 : Intrinsic::roundeven;
13036 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
13038 case NEON::BI__builtin_neon_vrndns_f32: {
13039 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13040 Int = Builder.getIsFPConstrained()
13041 ? Intrinsic::experimental_constrained_roundeven
13042 : Intrinsic::roundeven;
13043 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
13045 case NEON::BI__builtin_neon_vrndph_f16: {
13046 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13047 Int = Builder.getIsFPConstrained()
13048 ? Intrinsic::experimental_constrained_ceil
13049 : Intrinsic::ceil;
13050 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
13052 case NEON::BI__builtin_neon_vrndp_v:
13053 case NEON::BI__builtin_neon_vrndpq_v: {
13054 Int = Builder.getIsFPConstrained()
13055 ? Intrinsic::experimental_constrained_ceil
13056 : Intrinsic::ceil;
13057 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
13059 case NEON::BI__builtin_neon_vrndxh_f16: {
13060 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13061 Int = Builder.getIsFPConstrained()
13062 ? Intrinsic::experimental_constrained_rint
13063 : Intrinsic::rint;
13064 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
13066 case NEON::BI__builtin_neon_vrndx_v:
13067 case NEON::BI__builtin_neon_vrndxq_v: {
13068 Int = Builder.getIsFPConstrained()
13069 ? Intrinsic::experimental_constrained_rint
13070 : Intrinsic::rint;
13071 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
13073 case NEON::BI__builtin_neon_vrndh_f16: {
13074 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13075 Int = Builder.getIsFPConstrained()
13076 ? Intrinsic::experimental_constrained_trunc
13077 : Intrinsic::trunc;
13078 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
13080 case NEON::BI__builtin_neon_vrnd32x_f32:
13081 case NEON::BI__builtin_neon_vrnd32xq_f32:
13082 case NEON::BI__builtin_neon_vrnd32x_f64:
13083 case NEON::BI__builtin_neon_vrnd32xq_f64: {
13084 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13085 Int = Intrinsic::aarch64_neon_frint32x;
13086 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
13088 case NEON::BI__builtin_neon_vrnd32z_f32:
13089 case NEON::BI__builtin_neon_vrnd32zq_f32:
13090 case NEON::BI__builtin_neon_vrnd32z_f64:
13091 case NEON::BI__builtin_neon_vrnd32zq_f64: {
13092 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13093 Int = Intrinsic::aarch64_neon_frint32z;
13094 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
13096 case NEON::BI__builtin_neon_vrnd64x_f32:
13097 case NEON::BI__builtin_neon_vrnd64xq_f32:
13098 case NEON::BI__builtin_neon_vrnd64x_f64:
13099 case NEON::BI__builtin_neon_vrnd64xq_f64: {
13100 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13101 Int = Intrinsic::aarch64_neon_frint64x;
13102 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
13104 case NEON::BI__builtin_neon_vrnd64z_f32:
13105 case NEON::BI__builtin_neon_vrnd64zq_f32:
13106 case NEON::BI__builtin_neon_vrnd64z_f64:
13107 case NEON::BI__builtin_neon_vrnd64zq_f64: {
13108 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13109 Int = Intrinsic::aarch64_neon_frint64z;
13110 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
13112 case NEON::BI__builtin_neon_vrnd_v:
13113 case NEON::BI__builtin_neon_vrndq_v: {
13114 Int = Builder.getIsFPConstrained()
13115 ? Intrinsic::experimental_constrained_trunc
13116 : Intrinsic::trunc;
13117 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
13119 case NEON::BI__builtin_neon_vcvt_f64_v:
13120 case NEON::BI__builtin_neon_vcvtq_f64_v:
13121 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13122 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
13123 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
13124 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
13125 case NEON::BI__builtin_neon_vcvt_f64_f32: {
13126 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
13127 "unexpected vcvt_f64_f32 builtin");
13128 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
13129 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
13131 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
13133 case NEON::BI__builtin_neon_vcvt_f32_f64: {
13134 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
13135 "unexpected vcvt_f32_f64 builtin");
13136 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
13137 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
13139 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
13141 case NEON::BI__builtin_neon_vcvt_s32_v:
13142 case NEON::BI__builtin_neon_vcvt_u32_v:
13143 case NEON::BI__builtin_neon_vcvt_s64_v:
13144 case NEON::BI__builtin_neon_vcvt_u64_v:
13145 case NEON::BI__builtin_neon_vcvt_s16_f16:
13146 case NEON::BI__builtin_neon_vcvt_u16_f16:
13147 case NEON::BI__builtin_neon_vcvtq_s32_v:
13148 case NEON::BI__builtin_neon_vcvtq_u32_v:
13149 case NEON::BI__builtin_neon_vcvtq_s64_v:
13150 case NEON::BI__builtin_neon_vcvtq_u64_v:
13151 case NEON::BI__builtin_neon_vcvtq_s16_f16:
13152 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
13153 Int =
13154 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
13155 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
13156 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
13158 case NEON::BI__builtin_neon_vcvta_s16_f16:
13159 case NEON::BI__builtin_neon_vcvta_u16_f16:
13160 case NEON::BI__builtin_neon_vcvta_s32_v:
13161 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
13162 case NEON::BI__builtin_neon_vcvtaq_s32_v:
13163 case NEON::BI__builtin_neon_vcvta_u32_v:
13164 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
13165 case NEON::BI__builtin_neon_vcvtaq_u32_v:
13166 case NEON::BI__builtin_neon_vcvta_s64_v:
13167 case NEON::BI__builtin_neon_vcvtaq_s64_v:
13168 case NEON::BI__builtin_neon_vcvta_u64_v:
13169 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
13170 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
13171 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13172 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
13174 case NEON::BI__builtin_neon_vcvtm_s16_f16:
13175 case NEON::BI__builtin_neon_vcvtm_s32_v:
13176 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
13177 case NEON::BI__builtin_neon_vcvtmq_s32_v:
13178 case NEON::BI__builtin_neon_vcvtm_u16_f16:
13179 case NEON::BI__builtin_neon_vcvtm_u32_v:
13180 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
13181 case NEON::BI__builtin_neon_vcvtmq_u32_v:
13182 case NEON::BI__builtin_neon_vcvtm_s64_v:
13183 case NEON::BI__builtin_neon_vcvtmq_s64_v:
13184 case NEON::BI__builtin_neon_vcvtm_u64_v:
13185 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
13186 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
13187 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13188 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
13190 case NEON::BI__builtin_neon_vcvtn_s16_f16:
13191 case NEON::BI__builtin_neon_vcvtn_s32_v:
13192 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
13193 case NEON::BI__builtin_neon_vcvtnq_s32_v:
13194 case NEON::BI__builtin_neon_vcvtn_u16_f16:
13195 case NEON::BI__builtin_neon_vcvtn_u32_v:
13196 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
13197 case NEON::BI__builtin_neon_vcvtnq_u32_v:
13198 case NEON::BI__builtin_neon_vcvtn_s64_v:
13199 case NEON::BI__builtin_neon_vcvtnq_s64_v:
13200 case NEON::BI__builtin_neon_vcvtn_u64_v:
13201 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
13202 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
13203 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13204 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
13206 case NEON::BI__builtin_neon_vcvtp_s16_f16:
13207 case NEON::BI__builtin_neon_vcvtp_s32_v:
13208 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
13209 case NEON::BI__builtin_neon_vcvtpq_s32_v:
13210 case NEON::BI__builtin_neon_vcvtp_u16_f16:
13211 case NEON::BI__builtin_neon_vcvtp_u32_v:
13212 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
13213 case NEON::BI__builtin_neon_vcvtpq_u32_v:
13214 case NEON::BI__builtin_neon_vcvtp_s64_v:
13215 case NEON::BI__builtin_neon_vcvtpq_s64_v:
13216 case NEON::BI__builtin_neon_vcvtp_u64_v:
13217 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
13218 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
13219 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13220 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
13222 case NEON::BI__builtin_neon_vmulx_v:
13223 case NEON::BI__builtin_neon_vmulxq_v: {
13224 Int = Intrinsic::aarch64_neon_fmulx;
13225 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
13227 case NEON::BI__builtin_neon_vmulxh_lane_f16:
13228 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
13229 // vmulx_lane should be mapped to Neon scalar mulx after
13230 // extracting the scalar element
13231 Ops.push_back(EmitScalarExpr(E->getArg(2)));
13232 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
13233 Ops.pop_back();
13234 Int = Intrinsic::aarch64_neon_fmulx;
13235 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
13237 case NEON::BI__builtin_neon_vmul_lane_v:
13238 case NEON::BI__builtin_neon_vmul_laneq_v: {
13239 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
13240 bool Quad = false;
13241 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
13242 Quad = true;
13243 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
13244 llvm::FixedVectorType *VTy =
13245 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
13246 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
13247 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
13248 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
13249 return Builder.CreateBitCast(Result, Ty);
13251 case NEON::BI__builtin_neon_vnegd_s64:
13252 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
13253 case NEON::BI__builtin_neon_vnegh_f16:
13254 return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
13255 case NEON::BI__builtin_neon_vpmaxnm_v:
13256 case NEON::BI__builtin_neon_vpmaxnmq_v: {
13257 Int = Intrinsic::aarch64_neon_fmaxnmp;
13258 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
13260 case NEON::BI__builtin_neon_vpminnm_v:
13261 case NEON::BI__builtin_neon_vpminnmq_v: {
13262 Int = Intrinsic::aarch64_neon_fminnmp;
13263 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
13265 case NEON::BI__builtin_neon_vsqrth_f16: {
13266 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13267 Int = Builder.getIsFPConstrained()
13268 ? Intrinsic::experimental_constrained_sqrt
13269 : Intrinsic::sqrt;
13270 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
13272 case NEON::BI__builtin_neon_vsqrt_v:
13273 case NEON::BI__builtin_neon_vsqrtq_v: {
13274 Int = Builder.getIsFPConstrained()
13275 ? Intrinsic::experimental_constrained_sqrt
13276 : Intrinsic::sqrt;
13277 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13278 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
13280 case NEON::BI__builtin_neon_vrbit_v:
13281 case NEON::BI__builtin_neon_vrbitq_v: {
13282 Int = Intrinsic::bitreverse;
13283 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
13285 case NEON::BI__builtin_neon_vaddv_u8:
13286 // FIXME: These are handled by the AArch64 scalar code.
13287 usgn = true;
13288 [[fallthrough]];
13289 case NEON::BI__builtin_neon_vaddv_s8: {
13290 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13291 Ty = Int32Ty;
13292 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13293 llvm::Type *Tys[2] = { Ty, VTy };
13294 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13295 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13296 return Builder.CreateTrunc(Ops[0], Int8Ty);
13298 case NEON::BI__builtin_neon_vaddv_u16:
13299 usgn = true;
13300 [[fallthrough]];
13301 case NEON::BI__builtin_neon_vaddv_s16: {
13302 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13303 Ty = Int32Ty;
13304 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13305 llvm::Type *Tys[2] = { Ty, VTy };
13306 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13307 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13308 return Builder.CreateTrunc(Ops[0], Int16Ty);
13310 case NEON::BI__builtin_neon_vaddvq_u8:
13311 usgn = true;
13312 [[fallthrough]];
13313 case NEON::BI__builtin_neon_vaddvq_s8: {
13314 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13315 Ty = Int32Ty;
13316 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13317 llvm::Type *Tys[2] = { Ty, VTy };
13318 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13319 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13320 return Builder.CreateTrunc(Ops[0], Int8Ty);
13322 case NEON::BI__builtin_neon_vaddvq_u16:
13323 usgn = true;
13324 [[fallthrough]];
13325 case NEON::BI__builtin_neon_vaddvq_s16: {
13326 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13327 Ty = Int32Ty;
13328 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13329 llvm::Type *Tys[2] = { Ty, VTy };
13330 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13331 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13332 return Builder.CreateTrunc(Ops[0], Int16Ty);
13334 case NEON::BI__builtin_neon_vmaxv_u8: {
13335 Int = Intrinsic::aarch64_neon_umaxv;
13336 Ty = Int32Ty;
13337 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13338 llvm::Type *Tys[2] = { Ty, VTy };
13339 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13340 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13341 return Builder.CreateTrunc(Ops[0], Int8Ty);
13343 case NEON::BI__builtin_neon_vmaxv_u16: {
13344 Int = Intrinsic::aarch64_neon_umaxv;
13345 Ty = Int32Ty;
13346 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13347 llvm::Type *Tys[2] = { Ty, VTy };
13348 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13349 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13350 return Builder.CreateTrunc(Ops[0], Int16Ty);
13352 case NEON::BI__builtin_neon_vmaxvq_u8: {
13353 Int = Intrinsic::aarch64_neon_umaxv;
13354 Ty = Int32Ty;
13355 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13356 llvm::Type *Tys[2] = { Ty, VTy };
13357 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13358 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13359 return Builder.CreateTrunc(Ops[0], Int8Ty);
13361 case NEON::BI__builtin_neon_vmaxvq_u16: {
13362 Int = Intrinsic::aarch64_neon_umaxv;
13363 Ty = Int32Ty;
13364 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13365 llvm::Type *Tys[2] = { Ty, VTy };
13366 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13367 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13368 return Builder.CreateTrunc(Ops[0], Int16Ty);
13370 case NEON::BI__builtin_neon_vmaxv_s8: {
13371 Int = Intrinsic::aarch64_neon_smaxv;
13372 Ty = Int32Ty;
13373 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13374 llvm::Type *Tys[2] = { Ty, VTy };
13375 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13376 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13377 return Builder.CreateTrunc(Ops[0], Int8Ty);
13379 case NEON::BI__builtin_neon_vmaxv_s16: {
13380 Int = Intrinsic::aarch64_neon_smaxv;
13381 Ty = Int32Ty;
13382 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13383 llvm::Type *Tys[2] = { Ty, VTy };
13384 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13385 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13386 return Builder.CreateTrunc(Ops[0], Int16Ty);
13388 case NEON::BI__builtin_neon_vmaxvq_s8: {
13389 Int = Intrinsic::aarch64_neon_smaxv;
13390 Ty = Int32Ty;
13391 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13392 llvm::Type *Tys[2] = { Ty, VTy };
13393 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13394 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13395 return Builder.CreateTrunc(Ops[0], Int8Ty);
13397 case NEON::BI__builtin_neon_vmaxvq_s16: {
13398 Int = Intrinsic::aarch64_neon_smaxv;
13399 Ty = Int32Ty;
13400 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13401 llvm::Type *Tys[2] = { Ty, VTy };
13402 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13403 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13404 return Builder.CreateTrunc(Ops[0], Int16Ty);
13406 case NEON::BI__builtin_neon_vmaxv_f16: {
13407 Int = Intrinsic::aarch64_neon_fmaxv;
13408 Ty = HalfTy;
13409 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13410 llvm::Type *Tys[2] = { Ty, VTy };
13411 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13412 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13413 return Builder.CreateTrunc(Ops[0], HalfTy);
13415 case NEON::BI__builtin_neon_vmaxvq_f16: {
13416 Int = Intrinsic::aarch64_neon_fmaxv;
13417 Ty = HalfTy;
13418 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13419 llvm::Type *Tys[2] = { Ty, VTy };
13420 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13421 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13422 return Builder.CreateTrunc(Ops[0], HalfTy);
13424 case NEON::BI__builtin_neon_vminv_u8: {
13425 Int = Intrinsic::aarch64_neon_uminv;
13426 Ty = Int32Ty;
13427 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13428 llvm::Type *Tys[2] = { Ty, VTy };
13429 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13430 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13431 return Builder.CreateTrunc(Ops[0], Int8Ty);
13433 case NEON::BI__builtin_neon_vminv_u16: {
13434 Int = Intrinsic::aarch64_neon_uminv;
13435 Ty = Int32Ty;
13436 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13437 llvm::Type *Tys[2] = { Ty, VTy };
13438 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13439 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13440 return Builder.CreateTrunc(Ops[0], Int16Ty);
13442 case NEON::BI__builtin_neon_vminvq_u8: {
13443 Int = Intrinsic::aarch64_neon_uminv;
13444 Ty = Int32Ty;
13445 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13446 llvm::Type *Tys[2] = { Ty, VTy };
13447 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13448 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13449 return Builder.CreateTrunc(Ops[0], Int8Ty);
13451 case NEON::BI__builtin_neon_vminvq_u16: {
13452 Int = Intrinsic::aarch64_neon_uminv;
13453 Ty = Int32Ty;
13454 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13455 llvm::Type *Tys[2] = { Ty, VTy };
13456 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13457 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13458 return Builder.CreateTrunc(Ops[0], Int16Ty);
13460 case NEON::BI__builtin_neon_vminv_s8: {
13461 Int = Intrinsic::aarch64_neon_sminv;
13462 Ty = Int32Ty;
13463 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13464 llvm::Type *Tys[2] = { Ty, VTy };
13465 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13466 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13467 return Builder.CreateTrunc(Ops[0], Int8Ty);
13469 case NEON::BI__builtin_neon_vminv_s16: {
13470 Int = Intrinsic::aarch64_neon_sminv;
13471 Ty = Int32Ty;
13472 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13473 llvm::Type *Tys[2] = { Ty, VTy };
13474 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13475 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13476 return Builder.CreateTrunc(Ops[0], Int16Ty);
13478 case NEON::BI__builtin_neon_vminvq_s8: {
13479 Int = Intrinsic::aarch64_neon_sminv;
13480 Ty = Int32Ty;
13481 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13482 llvm::Type *Tys[2] = { Ty, VTy };
13483 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13484 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13485 return Builder.CreateTrunc(Ops[0], Int8Ty);
13487 case NEON::BI__builtin_neon_vminvq_s16: {
13488 Int = Intrinsic::aarch64_neon_sminv;
13489 Ty = Int32Ty;
13490 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13491 llvm::Type *Tys[2] = { Ty, VTy };
13492 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13493 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13494 return Builder.CreateTrunc(Ops[0], Int16Ty);
13496 case NEON::BI__builtin_neon_vminv_f16: {
13497 Int = Intrinsic::aarch64_neon_fminv;
13498 Ty = HalfTy;
13499 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13500 llvm::Type *Tys[2] = { Ty, VTy };
13501 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13502 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13503 return Builder.CreateTrunc(Ops[0], HalfTy);
13505 case NEON::BI__builtin_neon_vminvq_f16: {
13506 Int = Intrinsic::aarch64_neon_fminv;
13507 Ty = HalfTy;
13508 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13509 llvm::Type *Tys[2] = { Ty, VTy };
13510 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13511 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13512 return Builder.CreateTrunc(Ops[0], HalfTy);
13514 case NEON::BI__builtin_neon_vmaxnmv_f16: {
13515 Int = Intrinsic::aarch64_neon_fmaxnmv;
13516 Ty = HalfTy;
13517 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13518 llvm::Type *Tys[2] = { Ty, VTy };
13519 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13520 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13521 return Builder.CreateTrunc(Ops[0], HalfTy);
13523 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
13524 Int = Intrinsic::aarch64_neon_fmaxnmv;
13525 Ty = HalfTy;
13526 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13527 llvm::Type *Tys[2] = { Ty, VTy };
13528 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13529 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13530 return Builder.CreateTrunc(Ops[0], HalfTy);
13532 case NEON::BI__builtin_neon_vminnmv_f16: {
13533 Int = Intrinsic::aarch64_neon_fminnmv;
13534 Ty = HalfTy;
13535 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13536 llvm::Type *Tys[2] = { Ty, VTy };
13537 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13538 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13539 return Builder.CreateTrunc(Ops[0], HalfTy);
13541 case NEON::BI__builtin_neon_vminnmvq_f16: {
13542 Int = Intrinsic::aarch64_neon_fminnmv;
13543 Ty = HalfTy;
13544 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13545 llvm::Type *Tys[2] = { Ty, VTy };
13546 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13547 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13548 return Builder.CreateTrunc(Ops[0], HalfTy);
13550 case NEON::BI__builtin_neon_vmul_n_f64: {
13551 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
13552 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
13553 return Builder.CreateFMul(Ops[0], RHS);
13555 case NEON::BI__builtin_neon_vaddlv_u8: {
13556 Int = Intrinsic::aarch64_neon_uaddlv;
13557 Ty = Int32Ty;
13558 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13559 llvm::Type *Tys[2] = { Ty, VTy };
13560 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13561 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13562 return Builder.CreateTrunc(Ops[0], Int16Ty);
13564 case NEON::BI__builtin_neon_vaddlv_u16: {
13565 Int = Intrinsic::aarch64_neon_uaddlv;
13566 Ty = Int32Ty;
13567 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13568 llvm::Type *Tys[2] = { Ty, VTy };
13569 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13570 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13572 case NEON::BI__builtin_neon_vaddlvq_u8: {
13573 Int = Intrinsic::aarch64_neon_uaddlv;
13574 Ty = Int32Ty;
13575 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13576 llvm::Type *Tys[2] = { Ty, VTy };
13577 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13578 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13579 return Builder.CreateTrunc(Ops[0], Int16Ty);
13581 case NEON::BI__builtin_neon_vaddlvq_u16: {
13582 Int = Intrinsic::aarch64_neon_uaddlv;
13583 Ty = Int32Ty;
13584 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13585 llvm::Type *Tys[2] = { Ty, VTy };
13586 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13587 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13589 case NEON::BI__builtin_neon_vaddlv_s8: {
13590 Int = Intrinsic::aarch64_neon_saddlv;
13591 Ty = Int32Ty;
13592 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13593 llvm::Type *Tys[2] = { Ty, VTy };
13594 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13595 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13596 return Builder.CreateTrunc(Ops[0], Int16Ty);
13598 case NEON::BI__builtin_neon_vaddlv_s16: {
13599 Int = Intrinsic::aarch64_neon_saddlv;
13600 Ty = Int32Ty;
13601 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13602 llvm::Type *Tys[2] = { Ty, VTy };
13603 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13604 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13606 case NEON::BI__builtin_neon_vaddlvq_s8: {
13607 Int = Intrinsic::aarch64_neon_saddlv;
13608 Ty = Int32Ty;
13609 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13610 llvm::Type *Tys[2] = { Ty, VTy };
13611 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13612 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13613 return Builder.CreateTrunc(Ops[0], Int16Ty);
13615 case NEON::BI__builtin_neon_vaddlvq_s16: {
13616 Int = Intrinsic::aarch64_neon_saddlv;
13617 Ty = Int32Ty;
13618 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13619 llvm::Type *Tys[2] = { Ty, VTy };
13620 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13621 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13623 case NEON::BI__builtin_neon_vsri_n_v:
13624 case NEON::BI__builtin_neon_vsriq_n_v: {
13625 Int = Intrinsic::aarch64_neon_vsri;
13626 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13627 return EmitNeonCall(Intrin, Ops, "vsri_n");
13629 case NEON::BI__builtin_neon_vsli_n_v:
13630 case NEON::BI__builtin_neon_vsliq_n_v: {
13631 Int = Intrinsic::aarch64_neon_vsli;
13632 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13633 return EmitNeonCall(Intrin, Ops, "vsli_n");
13635 case NEON::BI__builtin_neon_vsra_n_v:
13636 case NEON::BI__builtin_neon_vsraq_n_v:
13637 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13638 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
13639 return Builder.CreateAdd(Ops[0], Ops[1]);
13640 case NEON::BI__builtin_neon_vrsra_n_v:
13641 case NEON::BI__builtin_neon_vrsraq_n_v: {
13642 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
13643 SmallVector<llvm::Value*,2> TmpOps;
13644 TmpOps.push_back(Ops[1]);
13645 TmpOps.push_back(Ops[2]);
13646 Function* F = CGM.getIntrinsic(Int, Ty);
13647 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
13648 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
13649 return Builder.CreateAdd(Ops[0], tmp);
13651 case NEON::BI__builtin_neon_vld1_v:
13652 case NEON::BI__builtin_neon_vld1q_v: {
13653 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
13655 case NEON::BI__builtin_neon_vst1_v:
13656 case NEON::BI__builtin_neon_vst1q_v:
13657 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
13658 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13659 case NEON::BI__builtin_neon_vld1_lane_v:
13660 case NEON::BI__builtin_neon_vld1q_lane_v: {
13661 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13662 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13663 PtrOp0.getAlignment());
13664 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
13666 case NEON::BI__builtin_neon_vldap1_lane_s64:
13667 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
13668 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13669 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
13670 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
13671 LI->setAtomic(llvm::AtomicOrdering::Acquire);
13672 Ops[0] = LI;
13673 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
13675 case NEON::BI__builtin_neon_vld1_dup_v:
13676 case NEON::BI__builtin_neon_vld1q_dup_v: {
13677 Value *V = PoisonValue::get(Ty);
13678 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13679 PtrOp0.getAlignment());
13680 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
13681 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
13682 return EmitNeonSplat(Ops[0], CI);
13684 case NEON::BI__builtin_neon_vst1_lane_v:
13685 case NEON::BI__builtin_neon_vst1q_lane_v:
13686 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13687 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13688 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13689 case NEON::BI__builtin_neon_vstl1_lane_s64:
13690 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
13691 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13692 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13693 llvm::StoreInst *SI =
13694 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13695 SI->setAtomic(llvm::AtomicOrdering::Release);
13696 return SI;
13698 case NEON::BI__builtin_neon_vld2_v:
13699 case NEON::BI__builtin_neon_vld2q_v: {
13700 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13701 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
13702 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13703 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13705 case NEON::BI__builtin_neon_vld3_v:
13706 case NEON::BI__builtin_neon_vld3q_v: {
13707 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13708 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
13709 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13710 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13712 case NEON::BI__builtin_neon_vld4_v:
13713 case NEON::BI__builtin_neon_vld4q_v: {
13714 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13715 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
13716 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13717 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13719 case NEON::BI__builtin_neon_vld2_dup_v:
13720 case NEON::BI__builtin_neon_vld2q_dup_v: {
13721 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13722 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
13723 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13724 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13726 case NEON::BI__builtin_neon_vld3_dup_v:
13727 case NEON::BI__builtin_neon_vld3q_dup_v: {
13728 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13729 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
13730 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13731 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13733 case NEON::BI__builtin_neon_vld4_dup_v:
13734 case NEON::BI__builtin_neon_vld4q_dup_v: {
13735 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13736 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
13737 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13738 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13740 case NEON::BI__builtin_neon_vld2_lane_v:
13741 case NEON::BI__builtin_neon_vld2q_lane_v: {
13742 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13743 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
13744 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13745 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13746 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13747 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13748 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
13749 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13751 case NEON::BI__builtin_neon_vld3_lane_v:
13752 case NEON::BI__builtin_neon_vld3q_lane_v: {
13753 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13754 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
13755 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13756 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13757 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13758 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13759 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13760 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
13761 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13763 case NEON::BI__builtin_neon_vld4_lane_v:
13764 case NEON::BI__builtin_neon_vld4q_lane_v: {
13765 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13766 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
13767 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13768 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13769 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13770 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13771 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
13772 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
13773 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
13774 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13776 case NEON::BI__builtin_neon_vst2_v:
13777 case NEON::BI__builtin_neon_vst2q_v: {
13778 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13779 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
13780 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
13781 Ops, "");
13783 case NEON::BI__builtin_neon_vst2_lane_v:
13784 case NEON::BI__builtin_neon_vst2q_lane_v: {
13785 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13786 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
13787 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13788 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
13789 Ops, "");
13791 case NEON::BI__builtin_neon_vst3_v:
13792 case NEON::BI__builtin_neon_vst3q_v: {
13793 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13794 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13795 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
13796 Ops, "");
13798 case NEON::BI__builtin_neon_vst3_lane_v:
13799 case NEON::BI__builtin_neon_vst3q_lane_v: {
13800 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13801 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13802 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13803 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
13804 Ops, "");
13806 case NEON::BI__builtin_neon_vst4_v:
13807 case NEON::BI__builtin_neon_vst4q_v: {
13808 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13809 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13810 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
13811 Ops, "");
13813 case NEON::BI__builtin_neon_vst4_lane_v:
13814 case NEON::BI__builtin_neon_vst4q_lane_v: {
13815 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13816 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13817 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
13818 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
13819 Ops, "");
13821 case NEON::BI__builtin_neon_vtrn_v:
13822 case NEON::BI__builtin_neon_vtrnq_v: {
13823 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13824 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13825 Value *SV = nullptr;
13827 for (unsigned vi = 0; vi != 2; ++vi) {
13828 SmallVector<int, 16> Indices;
13829 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13830 Indices.push_back(i+vi);
13831 Indices.push_back(i+e+vi);
13833 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13834 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
13835 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13837 return SV;
13839 case NEON::BI__builtin_neon_vuzp_v:
13840 case NEON::BI__builtin_neon_vuzpq_v: {
13841 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13842 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13843 Value *SV = nullptr;
13845 for (unsigned vi = 0; vi != 2; ++vi) {
13846 SmallVector<int, 16> Indices;
13847 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
13848 Indices.push_back(2*i+vi);
13850 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13851 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
13852 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13854 return SV;
13856 case NEON::BI__builtin_neon_vzip_v:
13857 case NEON::BI__builtin_neon_vzipq_v: {
13858 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13859 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13860 Value *SV = nullptr;
13862 for (unsigned vi = 0; vi != 2; ++vi) {
13863 SmallVector<int, 16> Indices;
13864 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13865 Indices.push_back((i + vi*e) >> 1);
13866 Indices.push_back(((i + vi*e) >> 1)+e);
13868 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13869 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
13870 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13872 return SV;
13874 case NEON::BI__builtin_neon_vqtbl1q_v: {
13875 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
13876 Ops, "vtbl1");
13878 case NEON::BI__builtin_neon_vqtbl2q_v: {
13879 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
13880 Ops, "vtbl2");
13882 case NEON::BI__builtin_neon_vqtbl3q_v: {
13883 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
13884 Ops, "vtbl3");
13886 case NEON::BI__builtin_neon_vqtbl4q_v: {
13887 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
13888 Ops, "vtbl4");
13890 case NEON::BI__builtin_neon_vqtbx1q_v: {
13891 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
13892 Ops, "vtbx1");
13894 case NEON::BI__builtin_neon_vqtbx2q_v: {
13895 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
13896 Ops, "vtbx2");
13898 case NEON::BI__builtin_neon_vqtbx3q_v: {
13899 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
13900 Ops, "vtbx3");
13902 case NEON::BI__builtin_neon_vqtbx4q_v: {
13903 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
13904 Ops, "vtbx4");
13906 case NEON::BI__builtin_neon_vsqadd_v:
13907 case NEON::BI__builtin_neon_vsqaddq_v: {
13908 Int = Intrinsic::aarch64_neon_usqadd;
13909 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
13911 case NEON::BI__builtin_neon_vuqadd_v:
13912 case NEON::BI__builtin_neon_vuqaddq_v: {
13913 Int = Intrinsic::aarch64_neon_suqadd;
13914 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
13917 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
13918 case NEON::BI__builtin_neon_vluti2_laneq_f16:
13919 case NEON::BI__builtin_neon_vluti2_laneq_p16:
13920 case NEON::BI__builtin_neon_vluti2_laneq_p8:
13921 case NEON::BI__builtin_neon_vluti2_laneq_s16:
13922 case NEON::BI__builtin_neon_vluti2_laneq_s8:
13923 case NEON::BI__builtin_neon_vluti2_laneq_u16:
13924 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
13925 Int = Intrinsic::aarch64_neon_vluti2_laneq;
13926 llvm::Type *Tys[2];
13927 Tys[0] = Ty;
13928 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
13929 /*isQuad*/ false));
13930 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
13932 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
13933 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
13934 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
13935 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
13936 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
13937 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
13938 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
13939 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
13940 Int = Intrinsic::aarch64_neon_vluti2_laneq;
13941 llvm::Type *Tys[2];
13942 Tys[0] = Ty;
13943 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
13944 /*isQuad*/ true));
13945 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
13947 case NEON::BI__builtin_neon_vluti2_lane_bf16:
13948 case NEON::BI__builtin_neon_vluti2_lane_f16:
13949 case NEON::BI__builtin_neon_vluti2_lane_p16:
13950 case NEON::BI__builtin_neon_vluti2_lane_p8:
13951 case NEON::BI__builtin_neon_vluti2_lane_s16:
13952 case NEON::BI__builtin_neon_vluti2_lane_s8:
13953 case NEON::BI__builtin_neon_vluti2_lane_u16:
13954 case NEON::BI__builtin_neon_vluti2_lane_u8: {
13955 Int = Intrinsic::aarch64_neon_vluti2_lane;
13956 llvm::Type *Tys[2];
13957 Tys[0] = Ty;
13958 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
13959 /*isQuad*/ false));
13960 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
13962 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
13963 case NEON::BI__builtin_neon_vluti2q_lane_f16:
13964 case NEON::BI__builtin_neon_vluti2q_lane_p16:
13965 case NEON::BI__builtin_neon_vluti2q_lane_p8:
13966 case NEON::BI__builtin_neon_vluti2q_lane_s16:
13967 case NEON::BI__builtin_neon_vluti2q_lane_s8:
13968 case NEON::BI__builtin_neon_vluti2q_lane_u16:
13969 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
13970 Int = Intrinsic::aarch64_neon_vluti2_lane;
13971 llvm::Type *Tys[2];
13972 Tys[0] = Ty;
13973 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
13974 /*isQuad*/ true));
13975 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
13977 case NEON::BI__builtin_neon_vluti4q_lane_p8:
13978 case NEON::BI__builtin_neon_vluti4q_lane_s8:
13979 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
13980 Int = Intrinsic::aarch64_neon_vluti4q_lane;
13981 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane");
13983 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
13984 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
13985 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
13986 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
13987 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq");
13989 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
13990 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
13991 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
13992 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
13993 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
13994 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
13995 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane_x2");
13997 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
13998 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
13999 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
14000 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
14001 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
14002 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
14003 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
14006 case NEON::BI__builtin_neon_vamin_f16:
14007 case NEON::BI__builtin_neon_vaminq_f16:
14008 case NEON::BI__builtin_neon_vamin_f32:
14009 case NEON::BI__builtin_neon_vaminq_f32:
14010 case NEON::BI__builtin_neon_vaminq_f64: {
14011 Int = Intrinsic::aarch64_neon_famin;
14012 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famin");
14014 case NEON::BI__builtin_neon_vamax_f16:
14015 case NEON::BI__builtin_neon_vamaxq_f16:
14016 case NEON::BI__builtin_neon_vamax_f32:
14017 case NEON::BI__builtin_neon_vamaxq_f32:
14018 case NEON::BI__builtin_neon_vamaxq_f64: {
14019 Int = Intrinsic::aarch64_neon_famax;
14020 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax");
14022 case NEON::BI__builtin_neon_vscale_f16:
14023 case NEON::BI__builtin_neon_vscaleq_f16:
14024 case NEON::BI__builtin_neon_vscale_f32:
14025 case NEON::BI__builtin_neon_vscaleq_f32:
14026 case NEON::BI__builtin_neon_vscaleq_f64: {
14027 Int = Intrinsic::aarch64_neon_fp8_fscale;
14028 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fscale");
14033 Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
14034 const CallExpr *E) {
14035 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
14036 BuiltinID == BPF::BI__builtin_btf_type_id ||
14037 BuiltinID == BPF::BI__builtin_preserve_type_info ||
14038 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
14039 "unexpected BPF builtin");
14041 // A sequence number, injected into IR builtin functions, to
14042 // prevent CSE given the only difference of the function
14043 // may just be the debuginfo metadata.
14044 static uint32_t BuiltinSeqNum;
14046 switch (BuiltinID) {
14047 default:
14048 llvm_unreachable("Unexpected BPF builtin");
14049 case BPF::BI__builtin_preserve_field_info: {
14050 const Expr *Arg = E->getArg(0);
14051 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
14053 if (!getDebugInfo()) {
14054 CGM.Error(E->getExprLoc(),
14055 "using __builtin_preserve_field_info() without -g");
14056 return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
14057 : EmitLValue(Arg).emitRawPointer(*this);
14060 // Enable underlying preserve_*_access_index() generation.
14061 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
14062 IsInPreservedAIRegion = true;
14063 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
14064 : EmitLValue(Arg).emitRawPointer(*this);
14065 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
14067 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
14068 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
14070 // Built the IR for the preserve_field_info intrinsic.
14071 llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getOrInsertDeclaration(
14072 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,
14073 {FieldAddr->getType()});
14074 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
14076 case BPF::BI__builtin_btf_type_id:
14077 case BPF::BI__builtin_preserve_type_info: {
14078 if (!getDebugInfo()) {
14079 CGM.Error(E->getExprLoc(), "using builtin function without -g");
14080 return nullptr;
14083 const Expr *Arg0 = E->getArg(0);
14084 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
14085 Arg0->getType(), Arg0->getExprLoc());
14087 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
14088 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
14089 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
14091 llvm::Function *FnDecl;
14092 if (BuiltinID == BPF::BI__builtin_btf_type_id)
14093 FnDecl = llvm::Intrinsic::getOrInsertDeclaration(
14094 &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {});
14095 else
14096 FnDecl = llvm::Intrinsic::getOrInsertDeclaration(
14097 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {});
14098 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
14099 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
14100 return Fn;
14102 case BPF::BI__builtin_preserve_enum_value: {
14103 if (!getDebugInfo()) {
14104 CGM.Error(E->getExprLoc(), "using builtin function without -g");
14105 return nullptr;
14108 const Expr *Arg0 = E->getArg(0);
14109 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
14110 Arg0->getType(), Arg0->getExprLoc());
14112 // Find enumerator
14113 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
14114 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
14115 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
14116 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
14118 auto InitVal = Enumerator->getInitVal();
14119 std::string InitValStr;
14120 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
14121 InitValStr = std::to_string(InitVal.getSExtValue());
14122 else
14123 InitValStr = std::to_string(InitVal.getZExtValue());
14124 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
14125 Value *EnumStrVal = Builder.CreateGlobalString(EnumStr);
14127 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
14128 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
14129 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
14131 llvm::Function *IntrinsicFn = llvm::Intrinsic::getOrInsertDeclaration(
14132 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {});
14133 CallInst *Fn =
14134 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
14135 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
14136 return Fn;
14141 llvm::Value *CodeGenFunction::
14142 BuildVector(ArrayRef<llvm::Value*> Ops) {
14143 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
14144 "Not a power-of-two sized vector!");
14145 bool AllConstants = true;
14146 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
14147 AllConstants &= isa<Constant>(Ops[i]);
14149 // If this is a constant vector, create a ConstantVector.
14150 if (AllConstants) {
14151 SmallVector<llvm::Constant*, 16> CstOps;
14152 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
14153 CstOps.push_back(cast<Constant>(Ops[i]));
14154 return llvm::ConstantVector::get(CstOps);
14157 // Otherwise, insertelement the values to build the vector.
14158 Value *Result = llvm::PoisonValue::get(
14159 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
14161 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
14162 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
14164 return Result;
14167 // Convert the mask from an integer type to a vector of i1.
14168 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
14169 unsigned NumElts) {
14171 auto *MaskTy = llvm::FixedVectorType::get(
14172 CGF.Builder.getInt1Ty(),
14173 cast<IntegerType>(Mask->getType())->getBitWidth());
14174 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
14176 // If we have less than 8 elements, then the starting mask was an i8 and
14177 // we need to extract down to the right number of elements.
14178 if (NumElts < 8) {
14179 int Indices[4];
14180 for (unsigned i = 0; i != NumElts; ++i)
14181 Indices[i] = i;
14182 MaskVec = CGF.Builder.CreateShuffleVector(
14183 MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract");
14185 return MaskVec;
14188 static Value *EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
14189 Align Alignment) {
14190 Value *Ptr = Ops[0];
14192 Value *MaskVec = getMaskVecValue(
14193 CGF, Ops[2],
14194 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
14196 return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
14199 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
14200 Align Alignment) {
14201 llvm::Type *Ty = Ops[1]->getType();
14202 Value *Ptr = Ops[0];
14204 Value *MaskVec = getMaskVecValue(
14205 CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
14207 return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
14210 static Value *EmitX86ExpandLoad(CodeGenFunction &CGF,
14211 ArrayRef<Value *> Ops) {
14212 auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
14213 Value *Ptr = Ops[0];
14215 Value *MaskVec = getMaskVecValue(
14216 CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
14218 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
14219 ResultTy);
14220 return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
14223 static Value *EmitX86CompressExpand(CodeGenFunction &CGF,
14224 ArrayRef<Value *> Ops,
14225 bool IsCompress) {
14226 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
14228 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
14230 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
14231 : Intrinsic::x86_avx512_mask_expand;
14232 llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
14233 return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
14236 static Value *EmitX86CompressStore(CodeGenFunction &CGF,
14237 ArrayRef<Value *> Ops) {
14238 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
14239 Value *Ptr = Ops[0];
14241 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
14243 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
14244 ResultTy);
14245 return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
14248 static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
14249 ArrayRef<Value *> Ops,
14250 bool InvertLHS = false) {
14251 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
14252 Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
14253 Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
14255 if (InvertLHS)
14256 LHS = CGF.Builder.CreateNot(LHS);
14258 return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
14259 Ops[0]->getType());
14262 static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1,
14263 Value *Amt, bool IsRight) {
14264 llvm::Type *Ty = Op0->getType();
14266 // Amount may be scalar immediate, in which case create a splat vector.
14267 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
14268 // we only care about the lowest log2 bits anyway.
14269 if (Amt->getType() != Ty) {
14270 unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
14271 Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
14272 Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
14275 unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
14276 Function *F = CGF.CGM.getIntrinsic(IID, Ty);
14277 return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
14280 static Value *EmitX86vpcom(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
14281 bool IsSigned) {
14282 Value *Op0 = Ops[0];
14283 Value *Op1 = Ops[1];
14284 llvm::Type *Ty = Op0->getType();
14285 uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
14287 CmpInst::Predicate Pred;
14288 switch (Imm) {
14289 case 0x0:
14290 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
14291 break;
14292 case 0x1:
14293 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
14294 break;
14295 case 0x2:
14296 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
14297 break;
14298 case 0x3:
14299 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
14300 break;
14301 case 0x4:
14302 Pred = ICmpInst::ICMP_EQ;
14303 break;
14304 case 0x5:
14305 Pred = ICmpInst::ICMP_NE;
14306 break;
14307 case 0x6:
14308 return llvm::Constant::getNullValue(Ty); // FALSE
14309 case 0x7:
14310 return llvm::Constant::getAllOnesValue(Ty); // TRUE
14311 default:
14312 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
14315 Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
14316 Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
14317 return Res;
14320 static Value *EmitX86Select(CodeGenFunction &CGF,
14321 Value *Mask, Value *Op0, Value *Op1) {
14323 // If the mask is all ones just return first argument.
14324 if (const auto *C = dyn_cast<Constant>(Mask))
14325 if (C->isAllOnesValue())
14326 return Op0;
14328 Mask = getMaskVecValue(
14329 CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());
14331 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
14334 static Value *EmitX86ScalarSelect(CodeGenFunction &CGF,
14335 Value *Mask, Value *Op0, Value *Op1) {
14336 // If the mask is all ones just return first argument.
14337 if (const auto *C = dyn_cast<Constant>(Mask))
14338 if (C->isAllOnesValue())
14339 return Op0;
14341 auto *MaskTy = llvm::FixedVectorType::get(
14342 CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
14343 Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
14344 Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
14345 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
14348 static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp,
14349 unsigned NumElts, Value *MaskIn) {
14350 if (MaskIn) {
14351 const auto *C = dyn_cast<Constant>(MaskIn);
14352 if (!C || !C->isAllOnesValue())
14353 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
14356 if (NumElts < 8) {
14357 int Indices[8];
14358 for (unsigned i = 0; i != NumElts; ++i)
14359 Indices[i] = i;
14360 for (unsigned i = NumElts; i != 8; ++i)
14361 Indices[i] = i % NumElts + NumElts;
14362 Cmp = CGF.Builder.CreateShuffleVector(
14363 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
14366 return CGF.Builder.CreateBitCast(Cmp,
14367 IntegerType::get(CGF.getLLVMContext(),
14368 std::max(NumElts, 8U)));
14371 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
14372 bool Signed, ArrayRef<Value *> Ops) {
14373 assert((Ops.size() == 2 || Ops.size() == 4) &&
14374 "Unexpected number of arguments");
14375 unsigned NumElts =
14376 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14377 Value *Cmp;
14379 if (CC == 3) {
14380 Cmp = Constant::getNullValue(
14381 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
14382 } else if (CC == 7) {
14383 Cmp = Constant::getAllOnesValue(
14384 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
14385 } else {
14386 ICmpInst::Predicate Pred;
14387 switch (CC) {
14388 default: llvm_unreachable("Unknown condition code");
14389 case 0: Pred = ICmpInst::ICMP_EQ; break;
14390 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
14391 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
14392 case 4: Pred = ICmpInst::ICMP_NE; break;
14393 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
14394 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
14396 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
14399 Value *MaskIn = nullptr;
14400 if (Ops.size() == 4)
14401 MaskIn = Ops[3];
14403 return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
14406 static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) {
14407 Value *Zero = Constant::getNullValue(In->getType());
14408 return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
14411 static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E,
14412 ArrayRef<Value *> Ops, bool IsSigned) {
14413 unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
14414 llvm::Type *Ty = Ops[1]->getType();
14416 Value *Res;
14417 if (Rnd != 4) {
14418 Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
14419 : Intrinsic::x86_avx512_uitofp_round;
14420 Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
14421 Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
14422 } else {
14423 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14424 Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
14425 : CGF.Builder.CreateUIToFP(Ops[0], Ty);
14428 return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
14431 // Lowers X86 FMA intrinsics to IR.
14432 static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
14433 ArrayRef<Value *> Ops, unsigned BuiltinID,
14434 bool IsAddSub) {
14436 bool Subtract = false;
14437 Intrinsic::ID IID = Intrinsic::not_intrinsic;
14438 switch (BuiltinID) {
14439 default: break;
14440 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
14441 Subtract = true;
14442 [[fallthrough]];
14443 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
14444 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
14445 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
14446 IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;
14447 break;
14448 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14449 Subtract = true;
14450 [[fallthrough]];
14451 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
14452 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14453 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14454 IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
14455 break;
14456 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
14457 Subtract = true;
14458 [[fallthrough]];
14459 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
14460 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
14461 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
14462 IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
14463 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
14464 Subtract = true;
14465 [[fallthrough]];
14466 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
14467 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
14468 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
14469 IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
14470 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14471 Subtract = true;
14472 [[fallthrough]];
14473 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
14474 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14475 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14476 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
14477 break;
14478 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14479 Subtract = true;
14480 [[fallthrough]];
14481 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14482 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14483 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14484 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
14485 break;
14486 case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3:
14487 Subtract = true;
14488 LLVM_FALLTHROUGH;
14489 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask:
14490 case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz:
14491 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3:
14492 IID = llvm::Intrinsic::x86_avx10_vfmaddph256;
14493 break;
14494 case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
14495 Subtract = true;
14496 LLVM_FALLTHROUGH;
14497 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
14498 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
14499 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
14500 IID = llvm::Intrinsic::x86_avx10_vfmaddsubph256;
14501 break;
14502 case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3:
14503 Subtract = true;
14504 LLVM_FALLTHROUGH;
14505 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask:
14506 case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz:
14507 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3:
14508 IID = llvm::Intrinsic::x86_avx10_vfmaddps256;
14509 break;
14510 case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
14511 Subtract = true;
14512 LLVM_FALLTHROUGH;
14513 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask:
14514 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
14515 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
14516 IID = llvm::Intrinsic::x86_avx10_vfmaddpd256;
14517 break;
14518 case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
14519 Subtract = true;
14520 LLVM_FALLTHROUGH;
14521 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
14522 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
14523 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
14524 IID = llvm::Intrinsic::x86_avx10_vfmaddsubps256;
14525 break;
14526 case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
14527 Subtract = true;
14528 LLVM_FALLTHROUGH;
14529 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
14530 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
14531 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
14532 IID = llvm::Intrinsic::x86_avx10_vfmaddsubpd256;
14533 break;
14536 Value *A = Ops[0];
14537 Value *B = Ops[1];
14538 Value *C = Ops[2];
14540 if (Subtract)
14541 C = CGF.Builder.CreateFNeg(C);
14543 Value *Res;
14545 // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
14546 if (IID != Intrinsic::not_intrinsic &&
14547 (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
14548 IsAddSub)) {
14549 Function *Intr = CGF.CGM.getIntrinsic(IID);
14550 Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
14551 } else {
14552 llvm::Type *Ty = A->getType();
14553 Function *FMA;
14554 if (CGF.Builder.getIsFPConstrained()) {
14555 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14556 FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
14557 Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
14558 } else {
14559 FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
14560 Res = CGF.Builder.CreateCall(FMA, {A, B, C});
14564 // Handle any required masking.
14565 Value *MaskFalseVal = nullptr;
14566 switch (BuiltinID) {
14567 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
14568 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
14569 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
14570 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
14571 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
14572 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14573 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask:
14574 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask:
14575 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask:
14576 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
14577 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
14578 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
14579 MaskFalseVal = Ops[0];
14580 break;
14581 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
14582 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
14583 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
14584 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14585 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14586 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14587 case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz:
14588 case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz:
14589 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
14590 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
14591 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
14592 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
14593 MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
14594 break;
14595 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
14596 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
14597 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
14598 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
14599 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
14600 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
14601 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14602 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14603 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14604 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14605 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14606 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14607 case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3:
14608 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3:
14609 case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3:
14610 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3:
14611 case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
14612 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
14613 case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
14614 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
14615 case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
14616 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
14617 case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
14618 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
14619 MaskFalseVal = Ops[2];
14620 break;
14623 if (MaskFalseVal)
14624 return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
14626 return Res;
14629 static Value *EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E,
14630 MutableArrayRef<Value *> Ops, Value *Upper,
14631 bool ZeroMask = false, unsigned PTIdx = 0,
14632 bool NegAcc = false) {
14633 unsigned Rnd = 4;
14634 if (Ops.size() > 4)
14635 Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
14637 if (NegAcc)
14638 Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
14640 Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
14641 Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
14642 Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
14643 Value *Res;
14644 if (Rnd != 4) {
14645 Intrinsic::ID IID;
14647 switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
14648 case 16:
14649 IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
14650 break;
14651 case 32:
14652 IID = Intrinsic::x86_avx512_vfmadd_f32;
14653 break;
14654 case 64:
14655 IID = Intrinsic::x86_avx512_vfmadd_f64;
14656 break;
14657 default:
14658 llvm_unreachable("Unexpected size");
14660 Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
14661 {Ops[0], Ops[1], Ops[2], Ops[4]});
14662 } else if (CGF.Builder.getIsFPConstrained()) {
14663 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14664 Function *FMA = CGF.CGM.getIntrinsic(
14665 Intrinsic::experimental_constrained_fma, Ops[0]->getType());
14666 Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
14667 } else {
14668 Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
14669 Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
14671 // If we have more than 3 arguments, we need to do masking.
14672 if (Ops.size() > 3) {
14673 Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
14674 : Ops[PTIdx];
14676 // If we negated the accumulator and the its the PassThru value we need to
14677 // bypass the negate. Conveniently Upper should be the same thing in this
14678 // case.
14679 if (NegAcc && PTIdx == 2)
14680 PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
14682 Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
14684 return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
14687 static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
14688 ArrayRef<Value *> Ops) {
14689 llvm::Type *Ty = Ops[0]->getType();
14690 // Arguments have a vXi32 type so cast to vXi64.
14691 Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
14692 Ty->getPrimitiveSizeInBits() / 64);
14693 Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
14694 Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
14696 if (IsSigned) {
14697 // Shift left then arithmetic shift right.
14698 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
14699 LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
14700 LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
14701 RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
14702 RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
14703 } else {
14704 // Clear the upper bits.
14705 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
14706 LHS = CGF.Builder.CreateAnd(LHS, Mask);
14707 RHS = CGF.Builder.CreateAnd(RHS, Mask);
14710 return CGF.Builder.CreateMul(LHS, RHS);
14713 // Emit a masked pternlog intrinsic. This only exists because the header has to
14714 // use a macro and we aren't able to pass the input argument to a pternlog
14715 // builtin and a select builtin without evaluating it twice.
14716 static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
14717 ArrayRef<Value *> Ops) {
14718 llvm::Type *Ty = Ops[0]->getType();
14720 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
14721 unsigned EltWidth = Ty->getScalarSizeInBits();
14722 Intrinsic::ID IID;
14723 if (VecWidth == 128 && EltWidth == 32)
14724 IID = Intrinsic::x86_avx512_pternlog_d_128;
14725 else if (VecWidth == 256 && EltWidth == 32)
14726 IID = Intrinsic::x86_avx512_pternlog_d_256;
14727 else if (VecWidth == 512 && EltWidth == 32)
14728 IID = Intrinsic::x86_avx512_pternlog_d_512;
14729 else if (VecWidth == 128 && EltWidth == 64)
14730 IID = Intrinsic::x86_avx512_pternlog_q_128;
14731 else if (VecWidth == 256 && EltWidth == 64)
14732 IID = Intrinsic::x86_avx512_pternlog_q_256;
14733 else if (VecWidth == 512 && EltWidth == 64)
14734 IID = Intrinsic::x86_avx512_pternlog_q_512;
14735 else
14736 llvm_unreachable("Unexpected intrinsic");
14738 Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
14739 Ops.drop_back());
14740 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
14741 return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
14744 static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
14745 llvm::Type *DstTy) {
14746 unsigned NumberOfElements =
14747 cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14748 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
14749 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
14752 Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
14753 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
14754 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
14755 return EmitX86CpuIs(CPUStr);
14758 // Convert F16 halfs to floats.
14759 static Value *EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF,
14760 ArrayRef<Value *> Ops,
14761 llvm::Type *DstTy) {
14762 assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
14763 "Unknown cvtph2ps intrinsic");
14765 // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
14766 if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
14767 Function *F =
14768 CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
14769 return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
14772 unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14773 Value *Src = Ops[0];
14775 // Extract the subvector.
14776 if (NumDstElts !=
14777 cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
14778 assert(NumDstElts == 4 && "Unexpected vector size");
14779 Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3});
14782 // Bitcast from vXi16 to vXf16.
14783 auto *HalfTy = llvm::FixedVectorType::get(
14784 llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
14785 Src = CGF.Builder.CreateBitCast(Src, HalfTy);
14787 // Perform the fp-extension.
14788 Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
14790 if (Ops.size() >= 3)
14791 Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
14792 return Res;
14795 Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
14797 llvm::Type *Int32Ty = Builder.getInt32Ty();
14799 // Matching the struct layout from the compiler-rt/libgcc structure that is
14800 // filled in:
14801 // unsigned int __cpu_vendor;
14802 // unsigned int __cpu_type;
14803 // unsigned int __cpu_subtype;
14804 // unsigned int __cpu_features[1];
14805 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14806 llvm::ArrayType::get(Int32Ty, 1));
14808 // Grab the global __cpu_model.
14809 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14810 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14812 // Calculate the index needed to access the correct field based on the
14813 // range. Also adjust the expected value.
14814 unsigned Index;
14815 unsigned Value;
14816 std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
14817 #define X86_VENDOR(ENUM, STRING) \
14818 .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
14819 #define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \
14820 .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14821 #define X86_CPU_TYPE(ENUM, STR) \
14822 .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14823 #define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \
14824 .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14825 #define X86_CPU_SUBTYPE(ENUM, STR) \
14826 .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14827 #include "llvm/TargetParser/X86TargetParser.def"
14828 .Default({0, 0});
14829 assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
14831 // Grab the appropriate field from __cpu_model.
14832 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
14833 ConstantInt::get(Int32Ty, Index)};
14834 llvm::Value *CpuValue = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
14835 CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue,
14836 CharUnits::fromQuantity(4));
14838 // Check the value of the field against the requested value.
14839 return Builder.CreateICmpEQ(CpuValue,
14840 llvm::ConstantInt::get(Int32Ty, Value));
14843 Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
14844 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
14845 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
14846 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
14847 return Builder.getFalse();
14848 return EmitX86CpuSupports(FeatureStr);
14851 Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
14852 return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
14855 llvm::Value *
14856 CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) {
14857 Value *Result = Builder.getTrue();
14858 if (FeatureMask[0] != 0) {
14859 // Matching the struct layout from the compiler-rt/libgcc structure that is
14860 // filled in:
14861 // unsigned int __cpu_vendor;
14862 // unsigned int __cpu_type;
14863 // unsigned int __cpu_subtype;
14864 // unsigned int __cpu_features[1];
14865 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14866 llvm::ArrayType::get(Int32Ty, 1));
14868 // Grab the global __cpu_model.
14869 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14870 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14872 // Grab the first (0th) element from the field __cpu_features off of the
14873 // global in the struct STy.
14874 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
14875 Builder.getInt32(0)};
14876 Value *CpuFeatures = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
14877 Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures,
14878 CharUnits::fromQuantity(4));
14880 // Check the value of the bit corresponding to the feature requested.
14881 Value *Mask = Builder.getInt32(FeatureMask[0]);
14882 Value *Bitset = Builder.CreateAnd(Features, Mask);
14883 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14884 Result = Builder.CreateAnd(Result, Cmp);
14887 llvm::Type *ATy = llvm::ArrayType::get(Int32Ty, 3);
14888 llvm::Constant *CpuFeatures2 =
14889 CGM.CreateRuntimeVariable(ATy, "__cpu_features2");
14890 cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
14891 for (int i = 1; i != 4; ++i) {
14892 const uint32_t M = FeatureMask[i];
14893 if (!M)
14894 continue;
14895 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)};
14896 Value *Features = Builder.CreateAlignedLoad(
14897 Int32Ty, Builder.CreateInBoundsGEP(ATy, CpuFeatures2, Idxs),
14898 CharUnits::fromQuantity(4));
14899 // Check the value of the bit corresponding to the feature requested.
14900 Value *Mask = Builder.getInt32(M);
14901 Value *Bitset = Builder.CreateAnd(Features, Mask);
14902 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14903 Result = Builder.CreateAnd(Result, Cmp);
14906 return Result;
14909 Value *CodeGenFunction::EmitAArch64CpuInit() {
14910 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
14911 llvm::FunctionCallee Func =
14912 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
14913 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14914 cast<llvm::GlobalValue>(Func.getCallee())
14915 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14916 return Builder.CreateCall(Func);
14919 Value *CodeGenFunction::EmitRISCVCpuInit() {
14920 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {VoidPtrTy}, false);
14921 llvm::FunctionCallee Func =
14922 CGM.CreateRuntimeFunction(FTy, "__init_riscv_feature_bits");
14923 auto *CalleeGV = cast<llvm::GlobalValue>(Func.getCallee());
14924 CalleeGV->setDSOLocal(true);
14925 CalleeGV->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14926 return Builder.CreateCall(Func, {llvm::ConstantPointerNull::get(VoidPtrTy)});
14929 Value *CodeGenFunction::EmitX86CpuInit() {
14930 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
14931 /*Variadic*/ false);
14932 llvm::FunctionCallee Func =
14933 CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
14934 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14935 cast<llvm::GlobalValue>(Func.getCallee())
14936 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14937 return Builder.CreateCall(Func);
14940 Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
14941 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
14942 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
14943 llvm::SmallVector<StringRef, 8> Features;
14944 ArgStr.split(Features, "+");
14945 for (auto &Feature : Features) {
14946 Feature = Feature.trim();
14947 if (!llvm::AArch64::parseFMVExtension(Feature))
14948 return Builder.getFalse();
14949 if (Feature != "default")
14950 Features.push_back(Feature);
14952 return EmitAArch64CpuSupports(Features);
14955 llvm::Value *
14956 CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
14957 uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
14958 Value *Result = Builder.getTrue();
14959 if (FeaturesMask != 0) {
14960 // Get features from structure in runtime library
14961 // struct {
14962 // unsigned long long features;
14963 // } __aarch64_cpu_features;
14964 llvm::Type *STy = llvm::StructType::get(Int64Ty);
14965 llvm::Constant *AArch64CPUFeatures =
14966 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
14967 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
14968 llvm::Value *CpuFeatures = Builder.CreateGEP(
14969 STy, AArch64CPUFeatures,
14970 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
14971 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
14972 CharUnits::fromQuantity(8));
14973 Value *Mask = Builder.getInt64(FeaturesMask);
14974 Value *Bitset = Builder.CreateAnd(Features, Mask);
14975 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14976 Result = Builder.CreateAnd(Result, Cmp);
14978 return Result;
14981 Value *CodeGenFunction::EmitRISCVCpuSupports(const CallExpr *E) {
14983 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
14984 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
14985 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
14986 return Builder.getFalse();
14988 return EmitRISCVCpuSupports(ArrayRef<StringRef>(FeatureStr));
14991 static Value *loadRISCVFeatureBits(unsigned Index, CGBuilderTy &Builder,
14992 CodeGenModule &CGM) {
14993 llvm::Type *Int32Ty = Builder.getInt32Ty();
14994 llvm::Type *Int64Ty = Builder.getInt64Ty();
14995 llvm::ArrayType *ArrayOfInt64Ty =
14996 llvm::ArrayType::get(Int64Ty, llvm::RISCVISAInfo::FeatureBitSize);
14997 llvm::Type *StructTy = llvm::StructType::get(Int32Ty, ArrayOfInt64Ty);
14998 llvm::Constant *RISCVFeaturesBits =
14999 CGM.CreateRuntimeVariable(StructTy, "__riscv_feature_bits");
15000 cast<llvm::GlobalValue>(RISCVFeaturesBits)->setDSOLocal(true);
15001 Value *IndexVal = llvm::ConstantInt::get(Int32Ty, Index);
15002 llvm::Value *GEPIndices[] = {Builder.getInt32(0), Builder.getInt32(1),
15003 IndexVal};
15004 Value *Ptr =
15005 Builder.CreateInBoundsGEP(StructTy, RISCVFeaturesBits, GEPIndices);
15006 Value *FeaturesBit =
15007 Builder.CreateAlignedLoad(Int64Ty, Ptr, CharUnits::fromQuantity(8));
15008 return FeaturesBit;
15011 Value *CodeGenFunction::EmitRISCVCpuSupports(ArrayRef<StringRef> FeaturesStrs) {
15012 const unsigned RISCVFeatureLength = llvm::RISCVISAInfo::FeatureBitSize;
15013 uint64_t RequireBitMasks[RISCVFeatureLength] = {0};
15015 for (auto Feat : FeaturesStrs) {
15016 auto [GroupID, BitPos] = RISCVISAInfo::getRISCVFeaturesBitsInfo(Feat);
15018 // If there isn't BitPos for this feature, skip this version.
15019 // It also report the warning to user during compilation.
15020 if (BitPos == -1)
15021 return Builder.getFalse();
15023 RequireBitMasks[GroupID] |= (1ULL << BitPos);
15026 Value *Result = nullptr;
15027 for (unsigned Idx = 0; Idx < RISCVFeatureLength; Idx++) {
15028 if (RequireBitMasks[Idx] == 0)
15029 continue;
15031 Value *Mask = Builder.getInt64(RequireBitMasks[Idx]);
15032 Value *Bitset =
15033 Builder.CreateAnd(loadRISCVFeatureBits(Idx, Builder, CGM), Mask);
15034 Value *CmpV = Builder.CreateICmpEQ(Bitset, Mask);
15035 Result = (!Result) ? CmpV : Builder.CreateAnd(Result, CmpV);
15038 assert(Result && "Should have value here.");
15040 return Result;
15043 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
15044 const CallExpr *E) {
15045 if (BuiltinID == Builtin::BI__builtin_cpu_is)
15046 return EmitX86CpuIs(E);
15047 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
15048 return EmitX86CpuSupports(E);
15049 if (BuiltinID == Builtin::BI__builtin_cpu_init)
15050 return EmitX86CpuInit();
15052 // Handle MSVC intrinsics before argument evaluation to prevent double
15053 // evaluation.
15054 if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
15055 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
15057 SmallVector<Value*, 4> Ops;
15058 bool IsMaskFCmp = false;
15059 bool IsConjFMA = false;
15061 // Find out if any arguments are required to be integer constant expressions.
15062 unsigned ICEArguments = 0;
15063 ASTContext::GetBuiltinTypeError Error;
15064 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
15065 assert(Error == ASTContext::GE_None && "Should not codegen an error");
15067 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
15068 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
15071 // These exist so that the builtin that takes an immediate can be bounds
15072 // checked by clang to avoid passing bad immediates to the backend. Since
15073 // AVX has a larger immediate than SSE we would need separate builtins to
15074 // do the different bounds checking. Rather than create a clang specific
15075 // SSE only builtin, this implements eight separate builtins to match gcc
15076 // implementation.
15077 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
15078 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
15079 llvm::Function *F = CGM.getIntrinsic(ID);
15080 return Builder.CreateCall(F, Ops);
15083 // For the vector forms of FP comparisons, translate the builtins directly to
15084 // IR.
15085 // TODO: The builtins could be removed if the SSE header files used vector
15086 // extension comparisons directly (vector ordered/unordered may need
15087 // additional support via __builtin_isnan()).
15088 auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
15089 bool IsSignaling) {
15090 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15091 Value *Cmp;
15092 if (IsSignaling)
15093 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
15094 else
15095 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
15096 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
15097 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
15098 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
15099 return Builder.CreateBitCast(Sext, FPVecTy);
15102 switch (BuiltinID) {
15103 default: return nullptr;
15104 case X86::BI_mm_prefetch: {
15105 Value *Address = Ops[0];
15106 ConstantInt *C = cast<ConstantInt>(Ops[1]);
15107 Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
15108 Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
15109 Value *Data = ConstantInt::get(Int32Ty, 1);
15110 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
15111 return Builder.CreateCall(F, {Address, RW, Locality, Data});
15113 case X86::BI_mm_clflush: {
15114 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
15115 Ops[0]);
15117 case X86::BI_mm_lfence: {
15118 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
15120 case X86::BI_mm_mfence: {
15121 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
15123 case X86::BI_mm_sfence: {
15124 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
15126 case X86::BI_mm_pause: {
15127 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
15129 case X86::BI__rdtsc: {
15130 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
15132 case X86::BI__builtin_ia32_rdtscp: {
15133 Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
15134 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
15135 Ops[0]);
15136 return Builder.CreateExtractValue(Call, 0);
15138 case X86::BI__builtin_ia32_lzcnt_u16:
15139 case X86::BI__builtin_ia32_lzcnt_u32:
15140 case X86::BI__builtin_ia32_lzcnt_u64: {
15141 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
15142 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
15144 case X86::BI__builtin_ia32_tzcnt_u16:
15145 case X86::BI__builtin_ia32_tzcnt_u32:
15146 case X86::BI__builtin_ia32_tzcnt_u64: {
15147 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
15148 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
15150 case X86::BI__builtin_ia32_undef128:
15151 case X86::BI__builtin_ia32_undef256:
15152 case X86::BI__builtin_ia32_undef512:
15153 // The x86 definition of "undef" is not the same as the LLVM definition
15154 // (PR32176). We leave optimizing away an unnecessary zero constant to the
15155 // IR optimizer and backend.
15156 // TODO: If we had a "freeze" IR instruction to generate a fixed undef
15157 // value, we should use that here instead of a zero.
15158 return llvm::Constant::getNullValue(ConvertType(E->getType()));
15159 case X86::BI__builtin_ia32_vec_ext_v4hi:
15160 case X86::BI__builtin_ia32_vec_ext_v16qi:
15161 case X86::BI__builtin_ia32_vec_ext_v8hi:
15162 case X86::BI__builtin_ia32_vec_ext_v4si:
15163 case X86::BI__builtin_ia32_vec_ext_v4sf:
15164 case X86::BI__builtin_ia32_vec_ext_v2di:
15165 case X86::BI__builtin_ia32_vec_ext_v32qi:
15166 case X86::BI__builtin_ia32_vec_ext_v16hi:
15167 case X86::BI__builtin_ia32_vec_ext_v8si:
15168 case X86::BI__builtin_ia32_vec_ext_v4di: {
15169 unsigned NumElts =
15170 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15171 uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
15172 Index &= NumElts - 1;
15173 // These builtins exist so we can ensure the index is an ICE and in range.
15174 // Otherwise we could just do this in the header file.
15175 return Builder.CreateExtractElement(Ops[0], Index);
15177 case X86::BI__builtin_ia32_vec_set_v4hi:
15178 case X86::BI__builtin_ia32_vec_set_v16qi:
15179 case X86::BI__builtin_ia32_vec_set_v8hi:
15180 case X86::BI__builtin_ia32_vec_set_v4si:
15181 case X86::BI__builtin_ia32_vec_set_v2di:
15182 case X86::BI__builtin_ia32_vec_set_v32qi:
15183 case X86::BI__builtin_ia32_vec_set_v16hi:
15184 case X86::BI__builtin_ia32_vec_set_v8si:
15185 case X86::BI__builtin_ia32_vec_set_v4di: {
15186 unsigned NumElts =
15187 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15188 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
15189 Index &= NumElts - 1;
15190 // These builtins exist so we can ensure the index is an ICE and in range.
15191 // Otherwise we could just do this in the header file.
15192 return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
15194 case X86::BI_mm_setcsr:
15195 case X86::BI__builtin_ia32_ldmxcsr: {
15196 RawAddress Tmp = CreateMemTemp(E->getArg(0)->getType());
15197 Builder.CreateStore(Ops[0], Tmp);
15198 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
15199 Tmp.getPointer());
15201 case X86::BI_mm_getcsr:
15202 case X86::BI__builtin_ia32_stmxcsr: {
15203 RawAddress Tmp = CreateMemTemp(E->getType());
15204 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
15205 Tmp.getPointer());
15206 return Builder.CreateLoad(Tmp, "stmxcsr");
15208 case X86::BI__builtin_ia32_xsave:
15209 case X86::BI__builtin_ia32_xsave64:
15210 case X86::BI__builtin_ia32_xrstor:
15211 case X86::BI__builtin_ia32_xrstor64:
15212 case X86::BI__builtin_ia32_xsaveopt:
15213 case X86::BI__builtin_ia32_xsaveopt64:
15214 case X86::BI__builtin_ia32_xrstors:
15215 case X86::BI__builtin_ia32_xrstors64:
15216 case X86::BI__builtin_ia32_xsavec:
15217 case X86::BI__builtin_ia32_xsavec64:
15218 case X86::BI__builtin_ia32_xsaves:
15219 case X86::BI__builtin_ia32_xsaves64:
15220 case X86::BI__builtin_ia32_xsetbv:
15221 case X86::BI_xsetbv: {
15222 Intrinsic::ID ID;
15223 #define INTRINSIC_X86_XSAVE_ID(NAME) \
15224 case X86::BI__builtin_ia32_##NAME: \
15225 ID = Intrinsic::x86_##NAME; \
15226 break
15227 switch (BuiltinID) {
15228 default: llvm_unreachable("Unsupported intrinsic!");
15229 INTRINSIC_X86_XSAVE_ID(xsave);
15230 INTRINSIC_X86_XSAVE_ID(xsave64);
15231 INTRINSIC_X86_XSAVE_ID(xrstor);
15232 INTRINSIC_X86_XSAVE_ID(xrstor64);
15233 INTRINSIC_X86_XSAVE_ID(xsaveopt);
15234 INTRINSIC_X86_XSAVE_ID(xsaveopt64);
15235 INTRINSIC_X86_XSAVE_ID(xrstors);
15236 INTRINSIC_X86_XSAVE_ID(xrstors64);
15237 INTRINSIC_X86_XSAVE_ID(xsavec);
15238 INTRINSIC_X86_XSAVE_ID(xsavec64);
15239 INTRINSIC_X86_XSAVE_ID(xsaves);
15240 INTRINSIC_X86_XSAVE_ID(xsaves64);
15241 INTRINSIC_X86_XSAVE_ID(xsetbv);
15242 case X86::BI_xsetbv:
15243 ID = Intrinsic::x86_xsetbv;
15244 break;
15246 #undef INTRINSIC_X86_XSAVE_ID
15247 Value *Mhi = Builder.CreateTrunc(
15248 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
15249 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
15250 Ops[1] = Mhi;
15251 Ops.push_back(Mlo);
15252 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
15254 case X86::BI__builtin_ia32_xgetbv:
15255 case X86::BI_xgetbv:
15256 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
15257 case X86::BI__builtin_ia32_storedqudi128_mask:
15258 case X86::BI__builtin_ia32_storedqusi128_mask:
15259 case X86::BI__builtin_ia32_storedquhi128_mask:
15260 case X86::BI__builtin_ia32_storedquqi128_mask:
15261 case X86::BI__builtin_ia32_storeupd128_mask:
15262 case X86::BI__builtin_ia32_storeups128_mask:
15263 case X86::BI__builtin_ia32_storedqudi256_mask:
15264 case X86::BI__builtin_ia32_storedqusi256_mask:
15265 case X86::BI__builtin_ia32_storedquhi256_mask:
15266 case X86::BI__builtin_ia32_storedquqi256_mask:
15267 case X86::BI__builtin_ia32_storeupd256_mask:
15268 case X86::BI__builtin_ia32_storeups256_mask:
15269 case X86::BI__builtin_ia32_storedqudi512_mask:
15270 case X86::BI__builtin_ia32_storedqusi512_mask:
15271 case X86::BI__builtin_ia32_storedquhi512_mask:
15272 case X86::BI__builtin_ia32_storedquqi512_mask:
15273 case X86::BI__builtin_ia32_storeupd512_mask:
15274 case X86::BI__builtin_ia32_storeups512_mask:
15275 return EmitX86MaskedStore(*this, Ops, Align(1));
15277 case X86::BI__builtin_ia32_storesbf16128_mask:
15278 case X86::BI__builtin_ia32_storesh128_mask:
15279 case X86::BI__builtin_ia32_storess128_mask:
15280 case X86::BI__builtin_ia32_storesd128_mask:
15281 return EmitX86MaskedStore(*this, Ops, Align(1));
15283 case X86::BI__builtin_ia32_cvtmask2b128:
15284 case X86::BI__builtin_ia32_cvtmask2b256:
15285 case X86::BI__builtin_ia32_cvtmask2b512:
15286 case X86::BI__builtin_ia32_cvtmask2w128:
15287 case X86::BI__builtin_ia32_cvtmask2w256:
15288 case X86::BI__builtin_ia32_cvtmask2w512:
15289 case X86::BI__builtin_ia32_cvtmask2d128:
15290 case X86::BI__builtin_ia32_cvtmask2d256:
15291 case X86::BI__builtin_ia32_cvtmask2d512:
15292 case X86::BI__builtin_ia32_cvtmask2q128:
15293 case X86::BI__builtin_ia32_cvtmask2q256:
15294 case X86::BI__builtin_ia32_cvtmask2q512:
15295 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
15297 case X86::BI__builtin_ia32_cvtb2mask128:
15298 case X86::BI__builtin_ia32_cvtb2mask256:
15299 case X86::BI__builtin_ia32_cvtb2mask512:
15300 case X86::BI__builtin_ia32_cvtw2mask128:
15301 case X86::BI__builtin_ia32_cvtw2mask256:
15302 case X86::BI__builtin_ia32_cvtw2mask512:
15303 case X86::BI__builtin_ia32_cvtd2mask128:
15304 case X86::BI__builtin_ia32_cvtd2mask256:
15305 case X86::BI__builtin_ia32_cvtd2mask512:
15306 case X86::BI__builtin_ia32_cvtq2mask128:
15307 case X86::BI__builtin_ia32_cvtq2mask256:
15308 case X86::BI__builtin_ia32_cvtq2mask512:
15309 return EmitX86ConvertToMask(*this, Ops[0]);
15311 case X86::BI__builtin_ia32_cvtdq2ps512_mask:
15312 case X86::BI__builtin_ia32_cvtqq2ps512_mask:
15313 case X86::BI__builtin_ia32_cvtqq2pd512_mask:
15314 case X86::BI__builtin_ia32_vcvtw2ph512_mask:
15315 case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
15316 case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
15317 case X86::BI__builtin_ia32_vcvtdq2ph256_round_mask:
15318 case X86::BI__builtin_ia32_vcvtdq2ps256_round_mask:
15319 case X86::BI__builtin_ia32_vcvtqq2pd256_round_mask:
15320 case X86::BI__builtin_ia32_vcvtqq2ph256_round_mask:
15321 case X86::BI__builtin_ia32_vcvtqq2ps256_round_mask:
15322 case X86::BI__builtin_ia32_vcvtw2ph256_round_mask:
15323 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
15324 case X86::BI__builtin_ia32_cvtudq2ps512_mask:
15325 case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
15326 case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
15327 case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
15328 case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
15329 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
15330 case X86::BI__builtin_ia32_vcvtudq2ph256_round_mask:
15331 case X86::BI__builtin_ia32_vcvtudq2ps256_round_mask:
15332 case X86::BI__builtin_ia32_vcvtuqq2pd256_round_mask:
15333 case X86::BI__builtin_ia32_vcvtuqq2ph256_round_mask:
15334 case X86::BI__builtin_ia32_vcvtuqq2ps256_round_mask:
15335 case X86::BI__builtin_ia32_vcvtuw2ph256_round_mask:
15336 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
15338 case X86::BI__builtin_ia32_vfmaddss3:
15339 case X86::BI__builtin_ia32_vfmaddsd3:
15340 case X86::BI__builtin_ia32_vfmaddsh3_mask:
15341 case X86::BI__builtin_ia32_vfmaddss3_mask:
15342 case X86::BI__builtin_ia32_vfmaddsd3_mask:
15343 return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
15344 case X86::BI__builtin_ia32_vfmaddss:
15345 case X86::BI__builtin_ia32_vfmaddsd:
15346 return EmitScalarFMAExpr(*this, E, Ops,
15347 Constant::getNullValue(Ops[0]->getType()));
15348 case X86::BI__builtin_ia32_vfmaddsh3_maskz:
15349 case X86::BI__builtin_ia32_vfmaddss3_maskz:
15350 case X86::BI__builtin_ia32_vfmaddsd3_maskz:
15351 return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
15352 case X86::BI__builtin_ia32_vfmaddsh3_mask3:
15353 case X86::BI__builtin_ia32_vfmaddss3_mask3:
15354 case X86::BI__builtin_ia32_vfmaddsd3_mask3:
15355 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
15356 case X86::BI__builtin_ia32_vfmsubsh3_mask3:
15357 case X86::BI__builtin_ia32_vfmsubss3_mask3:
15358 case X86::BI__builtin_ia32_vfmsubsd3_mask3:
15359 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
15360 /*NegAcc*/ true);
15361 case X86::BI__builtin_ia32_vfmaddph:
15362 case X86::BI__builtin_ia32_vfmaddps:
15363 case X86::BI__builtin_ia32_vfmaddpd:
15364 case X86::BI__builtin_ia32_vfmaddph256:
15365 case X86::BI__builtin_ia32_vfmaddps256:
15366 case X86::BI__builtin_ia32_vfmaddpd256:
15367 case X86::BI__builtin_ia32_vfmaddph512_mask:
15368 case X86::BI__builtin_ia32_vfmaddph512_maskz:
15369 case X86::BI__builtin_ia32_vfmaddph512_mask3:
15370 case X86::BI__builtin_ia32_vfmaddnepbh128:
15371 case X86::BI__builtin_ia32_vfmaddnepbh256:
15372 case X86::BI__builtin_ia32_vfmaddnepbh512:
15373 case X86::BI__builtin_ia32_vfmaddps512_mask:
15374 case X86::BI__builtin_ia32_vfmaddps512_maskz:
15375 case X86::BI__builtin_ia32_vfmaddps512_mask3:
15376 case X86::BI__builtin_ia32_vfmsubps512_mask3:
15377 case X86::BI__builtin_ia32_vfmaddpd512_mask:
15378 case X86::BI__builtin_ia32_vfmaddpd512_maskz:
15379 case X86::BI__builtin_ia32_vfmaddpd512_mask3:
15380 case X86::BI__builtin_ia32_vfmsubpd512_mask3:
15381 case X86::BI__builtin_ia32_vfmsubph512_mask3:
15382 case X86::BI__builtin_ia32_vfmaddph256_round_mask:
15383 case X86::BI__builtin_ia32_vfmaddph256_round_maskz:
15384 case X86::BI__builtin_ia32_vfmaddph256_round_mask3:
15385 case X86::BI__builtin_ia32_vfmaddps256_round_mask:
15386 case X86::BI__builtin_ia32_vfmaddps256_round_maskz:
15387 case X86::BI__builtin_ia32_vfmaddps256_round_mask3:
15388 case X86::BI__builtin_ia32_vfmsubps256_round_mask3:
15389 case X86::BI__builtin_ia32_vfmaddpd256_round_mask:
15390 case X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
15391 case X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
15392 case X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
15393 case X86::BI__builtin_ia32_vfmsubph256_round_mask3:
15394 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
15395 case X86::BI__builtin_ia32_vfmaddsubph512_mask:
15396 case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
15397 case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
15398 case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
15399 case X86::BI__builtin_ia32_vfmaddsubps512_mask:
15400 case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
15401 case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
15402 case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
15403 case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
15404 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
15405 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
15406 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
15407 case X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
15408 case X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
15409 case X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
15410 case X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
15411 case X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
15412 case X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
15413 case X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
15414 case X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
15415 case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
15416 case X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
15417 case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
15418 case X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
15419 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
15421 case X86::BI__builtin_ia32_movdqa32store128_mask:
15422 case X86::BI__builtin_ia32_movdqa64store128_mask:
15423 case X86::BI__builtin_ia32_storeaps128_mask:
15424 case X86::BI__builtin_ia32_storeapd128_mask:
15425 case X86::BI__builtin_ia32_movdqa32store256_mask:
15426 case X86::BI__builtin_ia32_movdqa64store256_mask:
15427 case X86::BI__builtin_ia32_storeaps256_mask:
15428 case X86::BI__builtin_ia32_storeapd256_mask:
15429 case X86::BI__builtin_ia32_movdqa32store512_mask:
15430 case X86::BI__builtin_ia32_movdqa64store512_mask:
15431 case X86::BI__builtin_ia32_storeaps512_mask:
15432 case X86::BI__builtin_ia32_storeapd512_mask:
15433 return EmitX86MaskedStore(
15434 *this, Ops,
15435 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
15437 case X86::BI__builtin_ia32_loadups128_mask:
15438 case X86::BI__builtin_ia32_loadups256_mask:
15439 case X86::BI__builtin_ia32_loadups512_mask:
15440 case X86::BI__builtin_ia32_loadupd128_mask:
15441 case X86::BI__builtin_ia32_loadupd256_mask:
15442 case X86::BI__builtin_ia32_loadupd512_mask:
15443 case X86::BI__builtin_ia32_loaddquqi128_mask:
15444 case X86::BI__builtin_ia32_loaddquqi256_mask:
15445 case X86::BI__builtin_ia32_loaddquqi512_mask:
15446 case X86::BI__builtin_ia32_loaddquhi128_mask:
15447 case X86::BI__builtin_ia32_loaddquhi256_mask:
15448 case X86::BI__builtin_ia32_loaddquhi512_mask:
15449 case X86::BI__builtin_ia32_loaddqusi128_mask:
15450 case X86::BI__builtin_ia32_loaddqusi256_mask:
15451 case X86::BI__builtin_ia32_loaddqusi512_mask:
15452 case X86::BI__builtin_ia32_loaddqudi128_mask:
15453 case X86::BI__builtin_ia32_loaddqudi256_mask:
15454 case X86::BI__builtin_ia32_loaddqudi512_mask:
15455 return EmitX86MaskedLoad(*this, Ops, Align(1));
15457 case X86::BI__builtin_ia32_loadsbf16128_mask:
15458 case X86::BI__builtin_ia32_loadsh128_mask:
15459 case X86::BI__builtin_ia32_loadss128_mask:
15460 case X86::BI__builtin_ia32_loadsd128_mask:
15461 return EmitX86MaskedLoad(*this, Ops, Align(1));
15463 case X86::BI__builtin_ia32_loadaps128_mask:
15464 case X86::BI__builtin_ia32_loadaps256_mask:
15465 case X86::BI__builtin_ia32_loadaps512_mask:
15466 case X86::BI__builtin_ia32_loadapd128_mask:
15467 case X86::BI__builtin_ia32_loadapd256_mask:
15468 case X86::BI__builtin_ia32_loadapd512_mask:
15469 case X86::BI__builtin_ia32_movdqa32load128_mask:
15470 case X86::BI__builtin_ia32_movdqa32load256_mask:
15471 case X86::BI__builtin_ia32_movdqa32load512_mask:
15472 case X86::BI__builtin_ia32_movdqa64load128_mask:
15473 case X86::BI__builtin_ia32_movdqa64load256_mask:
15474 case X86::BI__builtin_ia32_movdqa64load512_mask:
15475 return EmitX86MaskedLoad(
15476 *this, Ops,
15477 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
15479 case X86::BI__builtin_ia32_expandloaddf128_mask:
15480 case X86::BI__builtin_ia32_expandloaddf256_mask:
15481 case X86::BI__builtin_ia32_expandloaddf512_mask:
15482 case X86::BI__builtin_ia32_expandloadsf128_mask:
15483 case X86::BI__builtin_ia32_expandloadsf256_mask:
15484 case X86::BI__builtin_ia32_expandloadsf512_mask:
15485 case X86::BI__builtin_ia32_expandloaddi128_mask:
15486 case X86::BI__builtin_ia32_expandloaddi256_mask:
15487 case X86::BI__builtin_ia32_expandloaddi512_mask:
15488 case X86::BI__builtin_ia32_expandloadsi128_mask:
15489 case X86::BI__builtin_ia32_expandloadsi256_mask:
15490 case X86::BI__builtin_ia32_expandloadsi512_mask:
15491 case X86::BI__builtin_ia32_expandloadhi128_mask:
15492 case X86::BI__builtin_ia32_expandloadhi256_mask:
15493 case X86::BI__builtin_ia32_expandloadhi512_mask:
15494 case X86::BI__builtin_ia32_expandloadqi128_mask:
15495 case X86::BI__builtin_ia32_expandloadqi256_mask:
15496 case X86::BI__builtin_ia32_expandloadqi512_mask:
15497 return EmitX86ExpandLoad(*this, Ops);
15499 case X86::BI__builtin_ia32_compressstoredf128_mask:
15500 case X86::BI__builtin_ia32_compressstoredf256_mask:
15501 case X86::BI__builtin_ia32_compressstoredf512_mask:
15502 case X86::BI__builtin_ia32_compressstoresf128_mask:
15503 case X86::BI__builtin_ia32_compressstoresf256_mask:
15504 case X86::BI__builtin_ia32_compressstoresf512_mask:
15505 case X86::BI__builtin_ia32_compressstoredi128_mask:
15506 case X86::BI__builtin_ia32_compressstoredi256_mask:
15507 case X86::BI__builtin_ia32_compressstoredi512_mask:
15508 case X86::BI__builtin_ia32_compressstoresi128_mask:
15509 case X86::BI__builtin_ia32_compressstoresi256_mask:
15510 case X86::BI__builtin_ia32_compressstoresi512_mask:
15511 case X86::BI__builtin_ia32_compressstorehi128_mask:
15512 case X86::BI__builtin_ia32_compressstorehi256_mask:
15513 case X86::BI__builtin_ia32_compressstorehi512_mask:
15514 case X86::BI__builtin_ia32_compressstoreqi128_mask:
15515 case X86::BI__builtin_ia32_compressstoreqi256_mask:
15516 case X86::BI__builtin_ia32_compressstoreqi512_mask:
15517 return EmitX86CompressStore(*this, Ops);
15519 case X86::BI__builtin_ia32_expanddf128_mask:
15520 case X86::BI__builtin_ia32_expanddf256_mask:
15521 case X86::BI__builtin_ia32_expanddf512_mask:
15522 case X86::BI__builtin_ia32_expandsf128_mask:
15523 case X86::BI__builtin_ia32_expandsf256_mask:
15524 case X86::BI__builtin_ia32_expandsf512_mask:
15525 case X86::BI__builtin_ia32_expanddi128_mask:
15526 case X86::BI__builtin_ia32_expanddi256_mask:
15527 case X86::BI__builtin_ia32_expanddi512_mask:
15528 case X86::BI__builtin_ia32_expandsi128_mask:
15529 case X86::BI__builtin_ia32_expandsi256_mask:
15530 case X86::BI__builtin_ia32_expandsi512_mask:
15531 case X86::BI__builtin_ia32_expandhi128_mask:
15532 case X86::BI__builtin_ia32_expandhi256_mask:
15533 case X86::BI__builtin_ia32_expandhi512_mask:
15534 case X86::BI__builtin_ia32_expandqi128_mask:
15535 case X86::BI__builtin_ia32_expandqi256_mask:
15536 case X86::BI__builtin_ia32_expandqi512_mask:
15537 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
15539 case X86::BI__builtin_ia32_compressdf128_mask:
15540 case X86::BI__builtin_ia32_compressdf256_mask:
15541 case X86::BI__builtin_ia32_compressdf512_mask:
15542 case X86::BI__builtin_ia32_compresssf128_mask:
15543 case X86::BI__builtin_ia32_compresssf256_mask:
15544 case X86::BI__builtin_ia32_compresssf512_mask:
15545 case X86::BI__builtin_ia32_compressdi128_mask:
15546 case X86::BI__builtin_ia32_compressdi256_mask:
15547 case X86::BI__builtin_ia32_compressdi512_mask:
15548 case X86::BI__builtin_ia32_compresssi128_mask:
15549 case X86::BI__builtin_ia32_compresssi256_mask:
15550 case X86::BI__builtin_ia32_compresssi512_mask:
15551 case X86::BI__builtin_ia32_compresshi128_mask:
15552 case X86::BI__builtin_ia32_compresshi256_mask:
15553 case X86::BI__builtin_ia32_compresshi512_mask:
15554 case X86::BI__builtin_ia32_compressqi128_mask:
15555 case X86::BI__builtin_ia32_compressqi256_mask:
15556 case X86::BI__builtin_ia32_compressqi512_mask:
15557 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
15559 case X86::BI__builtin_ia32_gather3div2df:
15560 case X86::BI__builtin_ia32_gather3div2di:
15561 case X86::BI__builtin_ia32_gather3div4df:
15562 case X86::BI__builtin_ia32_gather3div4di:
15563 case X86::BI__builtin_ia32_gather3div4sf:
15564 case X86::BI__builtin_ia32_gather3div4si:
15565 case X86::BI__builtin_ia32_gather3div8sf:
15566 case X86::BI__builtin_ia32_gather3div8si:
15567 case X86::BI__builtin_ia32_gather3siv2df:
15568 case X86::BI__builtin_ia32_gather3siv2di:
15569 case X86::BI__builtin_ia32_gather3siv4df:
15570 case X86::BI__builtin_ia32_gather3siv4di:
15571 case X86::BI__builtin_ia32_gather3siv4sf:
15572 case X86::BI__builtin_ia32_gather3siv4si:
15573 case X86::BI__builtin_ia32_gather3siv8sf:
15574 case X86::BI__builtin_ia32_gather3siv8si:
15575 case X86::BI__builtin_ia32_gathersiv8df:
15576 case X86::BI__builtin_ia32_gathersiv16sf:
15577 case X86::BI__builtin_ia32_gatherdiv8df:
15578 case X86::BI__builtin_ia32_gatherdiv16sf:
15579 case X86::BI__builtin_ia32_gathersiv8di:
15580 case X86::BI__builtin_ia32_gathersiv16si:
15581 case X86::BI__builtin_ia32_gatherdiv8di:
15582 case X86::BI__builtin_ia32_gatherdiv16si: {
15583 Intrinsic::ID IID;
15584 switch (BuiltinID) {
15585 default: llvm_unreachable("Unexpected builtin");
15586 case X86::BI__builtin_ia32_gather3div2df:
15587 IID = Intrinsic::x86_avx512_mask_gather3div2_df;
15588 break;
15589 case X86::BI__builtin_ia32_gather3div2di:
15590 IID = Intrinsic::x86_avx512_mask_gather3div2_di;
15591 break;
15592 case X86::BI__builtin_ia32_gather3div4df:
15593 IID = Intrinsic::x86_avx512_mask_gather3div4_df;
15594 break;
15595 case X86::BI__builtin_ia32_gather3div4di:
15596 IID = Intrinsic::x86_avx512_mask_gather3div4_di;
15597 break;
15598 case X86::BI__builtin_ia32_gather3div4sf:
15599 IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
15600 break;
15601 case X86::BI__builtin_ia32_gather3div4si:
15602 IID = Intrinsic::x86_avx512_mask_gather3div4_si;
15603 break;
15604 case X86::BI__builtin_ia32_gather3div8sf:
15605 IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
15606 break;
15607 case X86::BI__builtin_ia32_gather3div8si:
15608 IID = Intrinsic::x86_avx512_mask_gather3div8_si;
15609 break;
15610 case X86::BI__builtin_ia32_gather3siv2df:
15611 IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
15612 break;
15613 case X86::BI__builtin_ia32_gather3siv2di:
15614 IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
15615 break;
15616 case X86::BI__builtin_ia32_gather3siv4df:
15617 IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
15618 break;
15619 case X86::BI__builtin_ia32_gather3siv4di:
15620 IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
15621 break;
15622 case X86::BI__builtin_ia32_gather3siv4sf:
15623 IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
15624 break;
15625 case X86::BI__builtin_ia32_gather3siv4si:
15626 IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
15627 break;
15628 case X86::BI__builtin_ia32_gather3siv8sf:
15629 IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
15630 break;
15631 case X86::BI__builtin_ia32_gather3siv8si:
15632 IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
15633 break;
15634 case X86::BI__builtin_ia32_gathersiv8df:
15635 IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
15636 break;
15637 case X86::BI__builtin_ia32_gathersiv16sf:
15638 IID = Intrinsic::x86_avx512_mask_gather_dps_512;
15639 break;
15640 case X86::BI__builtin_ia32_gatherdiv8df:
15641 IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
15642 break;
15643 case X86::BI__builtin_ia32_gatherdiv16sf:
15644 IID = Intrinsic::x86_avx512_mask_gather_qps_512;
15645 break;
15646 case X86::BI__builtin_ia32_gathersiv8di:
15647 IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
15648 break;
15649 case X86::BI__builtin_ia32_gathersiv16si:
15650 IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
15651 break;
15652 case X86::BI__builtin_ia32_gatherdiv8di:
15653 IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
15654 break;
15655 case X86::BI__builtin_ia32_gatherdiv16si:
15656 IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
15657 break;
15660 unsigned MinElts = std::min(
15661 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
15662 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
15663 Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
15664 Function *Intr = CGM.getIntrinsic(IID);
15665 return Builder.CreateCall(Intr, Ops);
15668 case X86::BI__builtin_ia32_scattersiv8df:
15669 case X86::BI__builtin_ia32_scattersiv16sf:
15670 case X86::BI__builtin_ia32_scatterdiv8df:
15671 case X86::BI__builtin_ia32_scatterdiv16sf:
15672 case X86::BI__builtin_ia32_scattersiv8di:
15673 case X86::BI__builtin_ia32_scattersiv16si:
15674 case X86::BI__builtin_ia32_scatterdiv8di:
15675 case X86::BI__builtin_ia32_scatterdiv16si:
15676 case X86::BI__builtin_ia32_scatterdiv2df:
15677 case X86::BI__builtin_ia32_scatterdiv2di:
15678 case X86::BI__builtin_ia32_scatterdiv4df:
15679 case X86::BI__builtin_ia32_scatterdiv4di:
15680 case X86::BI__builtin_ia32_scatterdiv4sf:
15681 case X86::BI__builtin_ia32_scatterdiv4si:
15682 case X86::BI__builtin_ia32_scatterdiv8sf:
15683 case X86::BI__builtin_ia32_scatterdiv8si:
15684 case X86::BI__builtin_ia32_scattersiv2df:
15685 case X86::BI__builtin_ia32_scattersiv2di:
15686 case X86::BI__builtin_ia32_scattersiv4df:
15687 case X86::BI__builtin_ia32_scattersiv4di:
15688 case X86::BI__builtin_ia32_scattersiv4sf:
15689 case X86::BI__builtin_ia32_scattersiv4si:
15690 case X86::BI__builtin_ia32_scattersiv8sf:
15691 case X86::BI__builtin_ia32_scattersiv8si: {
15692 Intrinsic::ID IID;
15693 switch (BuiltinID) {
15694 default: llvm_unreachable("Unexpected builtin");
15695 case X86::BI__builtin_ia32_scattersiv8df:
15696 IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
15697 break;
15698 case X86::BI__builtin_ia32_scattersiv16sf:
15699 IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
15700 break;
15701 case X86::BI__builtin_ia32_scatterdiv8df:
15702 IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
15703 break;
15704 case X86::BI__builtin_ia32_scatterdiv16sf:
15705 IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
15706 break;
15707 case X86::BI__builtin_ia32_scattersiv8di:
15708 IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
15709 break;
15710 case X86::BI__builtin_ia32_scattersiv16si:
15711 IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
15712 break;
15713 case X86::BI__builtin_ia32_scatterdiv8di:
15714 IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
15715 break;
15716 case X86::BI__builtin_ia32_scatterdiv16si:
15717 IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
15718 break;
15719 case X86::BI__builtin_ia32_scatterdiv2df:
15720 IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
15721 break;
15722 case X86::BI__builtin_ia32_scatterdiv2di:
15723 IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
15724 break;
15725 case X86::BI__builtin_ia32_scatterdiv4df:
15726 IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
15727 break;
15728 case X86::BI__builtin_ia32_scatterdiv4di:
15729 IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
15730 break;
15731 case X86::BI__builtin_ia32_scatterdiv4sf:
15732 IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
15733 break;
15734 case X86::BI__builtin_ia32_scatterdiv4si:
15735 IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
15736 break;
15737 case X86::BI__builtin_ia32_scatterdiv8sf:
15738 IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
15739 break;
15740 case X86::BI__builtin_ia32_scatterdiv8si:
15741 IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
15742 break;
15743 case X86::BI__builtin_ia32_scattersiv2df:
15744 IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
15745 break;
15746 case X86::BI__builtin_ia32_scattersiv2di:
15747 IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
15748 break;
15749 case X86::BI__builtin_ia32_scattersiv4df:
15750 IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
15751 break;
15752 case X86::BI__builtin_ia32_scattersiv4di:
15753 IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
15754 break;
15755 case X86::BI__builtin_ia32_scattersiv4sf:
15756 IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
15757 break;
15758 case X86::BI__builtin_ia32_scattersiv4si:
15759 IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
15760 break;
15761 case X86::BI__builtin_ia32_scattersiv8sf:
15762 IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
15763 break;
15764 case X86::BI__builtin_ia32_scattersiv8si:
15765 IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
15766 break;
15769 unsigned MinElts = std::min(
15770 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
15771 cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
15772 Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
15773 Function *Intr = CGM.getIntrinsic(IID);
15774 return Builder.CreateCall(Intr, Ops);
15777 case X86::BI__builtin_ia32_vextractf128_pd256:
15778 case X86::BI__builtin_ia32_vextractf128_ps256:
15779 case X86::BI__builtin_ia32_vextractf128_si256:
15780 case X86::BI__builtin_ia32_extract128i256:
15781 case X86::BI__builtin_ia32_extractf64x4_mask:
15782 case X86::BI__builtin_ia32_extractf32x4_mask:
15783 case X86::BI__builtin_ia32_extracti64x4_mask:
15784 case X86::BI__builtin_ia32_extracti32x4_mask:
15785 case X86::BI__builtin_ia32_extractf32x8_mask:
15786 case X86::BI__builtin_ia32_extracti32x8_mask:
15787 case X86::BI__builtin_ia32_extractf32x4_256_mask:
15788 case X86::BI__builtin_ia32_extracti32x4_256_mask:
15789 case X86::BI__builtin_ia32_extractf64x2_256_mask:
15790 case X86::BI__builtin_ia32_extracti64x2_256_mask:
15791 case X86::BI__builtin_ia32_extractf64x2_512_mask:
15792 case X86::BI__builtin_ia32_extracti64x2_512_mask: {
15793 auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
15794 unsigned NumElts = DstTy->getNumElements();
15795 unsigned SrcNumElts =
15796 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15797 unsigned SubVectors = SrcNumElts / NumElts;
15798 unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
15799 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15800 Index &= SubVectors - 1; // Remove any extra bits.
15801 Index *= NumElts;
15803 int Indices[16];
15804 for (unsigned i = 0; i != NumElts; ++i)
15805 Indices[i] = i + Index;
15807 Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15808 "extract");
15810 if (Ops.size() == 4)
15811 Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
15813 return Res;
15815 case X86::BI__builtin_ia32_vinsertf128_pd256:
15816 case X86::BI__builtin_ia32_vinsertf128_ps256:
15817 case X86::BI__builtin_ia32_vinsertf128_si256:
15818 case X86::BI__builtin_ia32_insert128i256:
15819 case X86::BI__builtin_ia32_insertf64x4:
15820 case X86::BI__builtin_ia32_insertf32x4:
15821 case X86::BI__builtin_ia32_inserti64x4:
15822 case X86::BI__builtin_ia32_inserti32x4:
15823 case X86::BI__builtin_ia32_insertf32x8:
15824 case X86::BI__builtin_ia32_inserti32x8:
15825 case X86::BI__builtin_ia32_insertf32x4_256:
15826 case X86::BI__builtin_ia32_inserti32x4_256:
15827 case X86::BI__builtin_ia32_insertf64x2_256:
15828 case X86::BI__builtin_ia32_inserti64x2_256:
15829 case X86::BI__builtin_ia32_insertf64x2_512:
15830 case X86::BI__builtin_ia32_inserti64x2_512: {
15831 unsigned DstNumElts =
15832 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15833 unsigned SrcNumElts =
15834 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
15835 unsigned SubVectors = DstNumElts / SrcNumElts;
15836 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
15837 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15838 Index &= SubVectors - 1; // Remove any extra bits.
15839 Index *= SrcNumElts;
15841 int Indices[16];
15842 for (unsigned i = 0; i != DstNumElts; ++i)
15843 Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
15845 Value *Op1 = Builder.CreateShuffleVector(
15846 Ops[1], ArrayRef(Indices, DstNumElts), "widen");
15848 for (unsigned i = 0; i != DstNumElts; ++i) {
15849 if (i >= Index && i < (Index + SrcNumElts))
15850 Indices[i] = (i - Index) + DstNumElts;
15851 else
15852 Indices[i] = i;
15855 return Builder.CreateShuffleVector(Ops[0], Op1,
15856 ArrayRef(Indices, DstNumElts), "insert");
15858 case X86::BI__builtin_ia32_pmovqd512_mask:
15859 case X86::BI__builtin_ia32_pmovwb512_mask: {
15860 Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15861 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
15863 case X86::BI__builtin_ia32_pmovdb512_mask:
15864 case X86::BI__builtin_ia32_pmovdw512_mask:
15865 case X86::BI__builtin_ia32_pmovqw512_mask: {
15866 if (const auto *C = dyn_cast<Constant>(Ops[2]))
15867 if (C->isAllOnesValue())
15868 return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15870 Intrinsic::ID IID;
15871 switch (BuiltinID) {
15872 default: llvm_unreachable("Unsupported intrinsic!");
15873 case X86::BI__builtin_ia32_pmovdb512_mask:
15874 IID = Intrinsic::x86_avx512_mask_pmov_db_512;
15875 break;
15876 case X86::BI__builtin_ia32_pmovdw512_mask:
15877 IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
15878 break;
15879 case X86::BI__builtin_ia32_pmovqw512_mask:
15880 IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
15881 break;
15884 Function *Intr = CGM.getIntrinsic(IID);
15885 return Builder.CreateCall(Intr, Ops);
15887 case X86::BI__builtin_ia32_pblendw128:
15888 case X86::BI__builtin_ia32_blendpd:
15889 case X86::BI__builtin_ia32_blendps:
15890 case X86::BI__builtin_ia32_blendpd256:
15891 case X86::BI__builtin_ia32_blendps256:
15892 case X86::BI__builtin_ia32_pblendw256:
15893 case X86::BI__builtin_ia32_pblendd128:
15894 case X86::BI__builtin_ia32_pblendd256: {
15895 unsigned NumElts =
15896 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15897 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15899 int Indices[16];
15900 // If there are more than 8 elements, the immediate is used twice so make
15901 // sure we handle that.
15902 for (unsigned i = 0; i != NumElts; ++i)
15903 Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
15905 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15906 ArrayRef(Indices, NumElts), "blend");
15908 case X86::BI__builtin_ia32_pshuflw:
15909 case X86::BI__builtin_ia32_pshuflw256:
15910 case X86::BI__builtin_ia32_pshuflw512: {
15911 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15912 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15913 unsigned NumElts = Ty->getNumElements();
15915 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15916 Imm = (Imm & 0xff) * 0x01010101;
15918 int Indices[32];
15919 for (unsigned l = 0; l != NumElts; l += 8) {
15920 for (unsigned i = 0; i != 4; ++i) {
15921 Indices[l + i] = l + (Imm & 3);
15922 Imm >>= 2;
15924 for (unsigned i = 4; i != 8; ++i)
15925 Indices[l + i] = l + i;
15928 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15929 "pshuflw");
15931 case X86::BI__builtin_ia32_pshufhw:
15932 case X86::BI__builtin_ia32_pshufhw256:
15933 case X86::BI__builtin_ia32_pshufhw512: {
15934 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15935 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15936 unsigned NumElts = Ty->getNumElements();
15938 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15939 Imm = (Imm & 0xff) * 0x01010101;
15941 int Indices[32];
15942 for (unsigned l = 0; l != NumElts; l += 8) {
15943 for (unsigned i = 0; i != 4; ++i)
15944 Indices[l + i] = l + i;
15945 for (unsigned i = 4; i != 8; ++i) {
15946 Indices[l + i] = l + 4 + (Imm & 3);
15947 Imm >>= 2;
15951 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15952 "pshufhw");
15954 case X86::BI__builtin_ia32_pshufd:
15955 case X86::BI__builtin_ia32_pshufd256:
15956 case X86::BI__builtin_ia32_pshufd512:
15957 case X86::BI__builtin_ia32_vpermilpd:
15958 case X86::BI__builtin_ia32_vpermilps:
15959 case X86::BI__builtin_ia32_vpermilpd256:
15960 case X86::BI__builtin_ia32_vpermilps256:
15961 case X86::BI__builtin_ia32_vpermilpd512:
15962 case X86::BI__builtin_ia32_vpermilps512: {
15963 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15964 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15965 unsigned NumElts = Ty->getNumElements();
15966 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
15967 unsigned NumLaneElts = NumElts / NumLanes;
15969 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15970 Imm = (Imm & 0xff) * 0x01010101;
15972 int Indices[16];
15973 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15974 for (unsigned i = 0; i != NumLaneElts; ++i) {
15975 Indices[i + l] = (Imm % NumLaneElts) + l;
15976 Imm /= NumLaneElts;
15980 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15981 "permil");
15983 case X86::BI__builtin_ia32_shufpd:
15984 case X86::BI__builtin_ia32_shufpd256:
15985 case X86::BI__builtin_ia32_shufpd512:
15986 case X86::BI__builtin_ia32_shufps:
15987 case X86::BI__builtin_ia32_shufps256:
15988 case X86::BI__builtin_ia32_shufps512: {
15989 uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15990 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15991 unsigned NumElts = Ty->getNumElements();
15992 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
15993 unsigned NumLaneElts = NumElts / NumLanes;
15995 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15996 Imm = (Imm & 0xff) * 0x01010101;
15998 int Indices[16];
15999 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
16000 for (unsigned i = 0; i != NumLaneElts; ++i) {
16001 unsigned Index = Imm % NumLaneElts;
16002 Imm /= NumLaneElts;
16003 if (i >= (NumLaneElts / 2))
16004 Index += NumElts;
16005 Indices[l + i] = l + Index;
16009 return Builder.CreateShuffleVector(Ops[0], Ops[1],
16010 ArrayRef(Indices, NumElts), "shufp");
16012 case X86::BI__builtin_ia32_permdi256:
16013 case X86::BI__builtin_ia32_permdf256:
16014 case X86::BI__builtin_ia32_permdi512:
16015 case X86::BI__builtin_ia32_permdf512: {
16016 unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
16017 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16018 unsigned NumElts = Ty->getNumElements();
16020 // These intrinsics operate on 256-bit lanes of four 64-bit elements.
16021 int Indices[8];
16022 for (unsigned l = 0; l != NumElts; l += 4)
16023 for (unsigned i = 0; i != 4; ++i)
16024 Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
16026 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
16027 "perm");
16029 case X86::BI__builtin_ia32_palignr128:
16030 case X86::BI__builtin_ia32_palignr256:
16031 case X86::BI__builtin_ia32_palignr512: {
16032 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
16034 unsigned NumElts =
16035 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16036 assert(NumElts % 16 == 0);
16038 // If palignr is shifting the pair of vectors more than the size of two
16039 // lanes, emit zero.
16040 if (ShiftVal >= 32)
16041 return llvm::Constant::getNullValue(ConvertType(E->getType()));
16043 // If palignr is shifting the pair of input vectors more than one lane,
16044 // but less than two lanes, convert to shifting in zeroes.
16045 if (ShiftVal > 16) {
16046 ShiftVal -= 16;
16047 Ops[1] = Ops[0];
16048 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
16051 int Indices[64];
16052 // 256-bit palignr operates on 128-bit lanes so we need to handle that
16053 for (unsigned l = 0; l != NumElts; l += 16) {
16054 for (unsigned i = 0; i != 16; ++i) {
16055 unsigned Idx = ShiftVal + i;
16056 if (Idx >= 16)
16057 Idx += NumElts - 16; // End of lane, switch operand.
16058 Indices[l + i] = Idx + l;
16062 return Builder.CreateShuffleVector(Ops[1], Ops[0],
16063 ArrayRef(Indices, NumElts), "palignr");
16065 case X86::BI__builtin_ia32_alignd128:
16066 case X86::BI__builtin_ia32_alignd256:
16067 case X86::BI__builtin_ia32_alignd512:
16068 case X86::BI__builtin_ia32_alignq128:
16069 case X86::BI__builtin_ia32_alignq256:
16070 case X86::BI__builtin_ia32_alignq512: {
16071 unsigned NumElts =
16072 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16073 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
16075 // Mask the shift amount to width of a vector.
16076 ShiftVal &= NumElts - 1;
16078 int Indices[16];
16079 for (unsigned i = 0; i != NumElts; ++i)
16080 Indices[i] = i + ShiftVal;
16082 return Builder.CreateShuffleVector(Ops[1], Ops[0],
16083 ArrayRef(Indices, NumElts), "valign");
16085 case X86::BI__builtin_ia32_shuf_f32x4_256:
16086 case X86::BI__builtin_ia32_shuf_f64x2_256:
16087 case X86::BI__builtin_ia32_shuf_i32x4_256:
16088 case X86::BI__builtin_ia32_shuf_i64x2_256:
16089 case X86::BI__builtin_ia32_shuf_f32x4:
16090 case X86::BI__builtin_ia32_shuf_f64x2:
16091 case X86::BI__builtin_ia32_shuf_i32x4:
16092 case X86::BI__builtin_ia32_shuf_i64x2: {
16093 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
16094 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16095 unsigned NumElts = Ty->getNumElements();
16096 unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
16097 unsigned NumLaneElts = NumElts / NumLanes;
16099 int Indices[16];
16100 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
16101 unsigned Index = (Imm % NumLanes) * NumLaneElts;
16102 Imm /= NumLanes; // Discard the bits we just used.
16103 if (l >= (NumElts / 2))
16104 Index += NumElts; // Switch to other source.
16105 for (unsigned i = 0; i != NumLaneElts; ++i) {
16106 Indices[l + i] = Index + i;
16110 return Builder.CreateShuffleVector(Ops[0], Ops[1],
16111 ArrayRef(Indices, NumElts), "shuf");
16114 case X86::BI__builtin_ia32_vperm2f128_pd256:
16115 case X86::BI__builtin_ia32_vperm2f128_ps256:
16116 case X86::BI__builtin_ia32_vperm2f128_si256:
16117 case X86::BI__builtin_ia32_permti256: {
16118 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
16119 unsigned NumElts =
16120 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16122 // This takes a very simple approach since there are two lanes and a
16123 // shuffle can have 2 inputs. So we reserve the first input for the first
16124 // lane and the second input for the second lane. This may result in
16125 // duplicate sources, but this can be dealt with in the backend.
16127 Value *OutOps[2];
16128 int Indices[8];
16129 for (unsigned l = 0; l != 2; ++l) {
16130 // Determine the source for this lane.
16131 if (Imm & (1 << ((l * 4) + 3)))
16132 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
16133 else if (Imm & (1 << ((l * 4) + 1)))
16134 OutOps[l] = Ops[1];
16135 else
16136 OutOps[l] = Ops[0];
16138 for (unsigned i = 0; i != NumElts/2; ++i) {
16139 // Start with ith element of the source for this lane.
16140 unsigned Idx = (l * NumElts) + i;
16141 // If bit 0 of the immediate half is set, switch to the high half of
16142 // the source.
16143 if (Imm & (1 << (l * 4)))
16144 Idx += NumElts/2;
16145 Indices[(l * (NumElts/2)) + i] = Idx;
16149 return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
16150 ArrayRef(Indices, NumElts), "vperm");
16153 case X86::BI__builtin_ia32_pslldqi128_byteshift:
16154 case X86::BI__builtin_ia32_pslldqi256_byteshift:
16155 case X86::BI__builtin_ia32_pslldqi512_byteshift: {
16156 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16157 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
16158 // Builtin type is vXi64 so multiply by 8 to get bytes.
16159 unsigned NumElts = ResultType->getNumElements() * 8;
16161 // If pslldq is shifting the vector more than 15 bytes, emit zero.
16162 if (ShiftVal >= 16)
16163 return llvm::Constant::getNullValue(ResultType);
16165 int Indices[64];
16166 // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
16167 for (unsigned l = 0; l != NumElts; l += 16) {
16168 for (unsigned i = 0; i != 16; ++i) {
16169 unsigned Idx = NumElts + i - ShiftVal;
16170 if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
16171 Indices[l + i] = Idx + l;
16175 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
16176 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
16177 Value *Zero = llvm::Constant::getNullValue(VecTy);
16178 Value *SV = Builder.CreateShuffleVector(
16179 Zero, Cast, ArrayRef(Indices, NumElts), "pslldq");
16180 return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
16182 case X86::BI__builtin_ia32_psrldqi128_byteshift:
16183 case X86::BI__builtin_ia32_psrldqi256_byteshift:
16184 case X86::BI__builtin_ia32_psrldqi512_byteshift: {
16185 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16186 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
16187 // Builtin type is vXi64 so multiply by 8 to get bytes.
16188 unsigned NumElts = ResultType->getNumElements() * 8;
16190 // If psrldq is shifting the vector more than 15 bytes, emit zero.
16191 if (ShiftVal >= 16)
16192 return llvm::Constant::getNullValue(ResultType);
16194 int Indices[64];
16195 // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
16196 for (unsigned l = 0; l != NumElts; l += 16) {
16197 for (unsigned i = 0; i != 16; ++i) {
16198 unsigned Idx = i + ShiftVal;
16199 if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
16200 Indices[l + i] = Idx + l;
16204 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
16205 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
16206 Value *Zero = llvm::Constant::getNullValue(VecTy);
16207 Value *SV = Builder.CreateShuffleVector(
16208 Cast, Zero, ArrayRef(Indices, NumElts), "psrldq");
16209 return Builder.CreateBitCast(SV, ResultType, "cast");
16211 case X86::BI__builtin_ia32_kshiftliqi:
16212 case X86::BI__builtin_ia32_kshiftlihi:
16213 case X86::BI__builtin_ia32_kshiftlisi:
16214 case X86::BI__builtin_ia32_kshiftlidi: {
16215 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16216 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16218 if (ShiftVal >= NumElts)
16219 return llvm::Constant::getNullValue(Ops[0]->getType());
16221 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
16223 int Indices[64];
16224 for (unsigned i = 0; i != NumElts; ++i)
16225 Indices[i] = NumElts + i - ShiftVal;
16227 Value *Zero = llvm::Constant::getNullValue(In->getType());
16228 Value *SV = Builder.CreateShuffleVector(
16229 Zero, In, ArrayRef(Indices, NumElts), "kshiftl");
16230 return Builder.CreateBitCast(SV, Ops[0]->getType());
16232 case X86::BI__builtin_ia32_kshiftriqi:
16233 case X86::BI__builtin_ia32_kshiftrihi:
16234 case X86::BI__builtin_ia32_kshiftrisi:
16235 case X86::BI__builtin_ia32_kshiftridi: {
16236 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16237 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16239 if (ShiftVal >= NumElts)
16240 return llvm::Constant::getNullValue(Ops[0]->getType());
16242 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
16244 int Indices[64];
16245 for (unsigned i = 0; i != NumElts; ++i)
16246 Indices[i] = i + ShiftVal;
16248 Value *Zero = llvm::Constant::getNullValue(In->getType());
16249 Value *SV = Builder.CreateShuffleVector(
16250 In, Zero, ArrayRef(Indices, NumElts), "kshiftr");
16251 return Builder.CreateBitCast(SV, Ops[0]->getType());
16253 case X86::BI__builtin_ia32_movnti:
16254 case X86::BI__builtin_ia32_movnti64:
16255 case X86::BI__builtin_ia32_movntsd:
16256 case X86::BI__builtin_ia32_movntss: {
16257 llvm::MDNode *Node = llvm::MDNode::get(
16258 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
16260 Value *Ptr = Ops[0];
16261 Value *Src = Ops[1];
16263 // Extract the 0'th element of the source vector.
16264 if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
16265 BuiltinID == X86::BI__builtin_ia32_movntss)
16266 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
16268 // Unaligned nontemporal store of the scalar value.
16269 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr);
16270 SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node);
16271 SI->setAlignment(llvm::Align(1));
16272 return SI;
16274 // Rotate is a special case of funnel shift - 1st 2 args are the same.
16275 case X86::BI__builtin_ia32_vprotb:
16276 case X86::BI__builtin_ia32_vprotw:
16277 case X86::BI__builtin_ia32_vprotd:
16278 case X86::BI__builtin_ia32_vprotq:
16279 case X86::BI__builtin_ia32_vprotbi:
16280 case X86::BI__builtin_ia32_vprotwi:
16281 case X86::BI__builtin_ia32_vprotdi:
16282 case X86::BI__builtin_ia32_vprotqi:
16283 case X86::BI__builtin_ia32_prold128:
16284 case X86::BI__builtin_ia32_prold256:
16285 case X86::BI__builtin_ia32_prold512:
16286 case X86::BI__builtin_ia32_prolq128:
16287 case X86::BI__builtin_ia32_prolq256:
16288 case X86::BI__builtin_ia32_prolq512:
16289 case X86::BI__builtin_ia32_prolvd128:
16290 case X86::BI__builtin_ia32_prolvd256:
16291 case X86::BI__builtin_ia32_prolvd512:
16292 case X86::BI__builtin_ia32_prolvq128:
16293 case X86::BI__builtin_ia32_prolvq256:
16294 case X86::BI__builtin_ia32_prolvq512:
16295 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
16296 case X86::BI__builtin_ia32_prord128:
16297 case X86::BI__builtin_ia32_prord256:
16298 case X86::BI__builtin_ia32_prord512:
16299 case X86::BI__builtin_ia32_prorq128:
16300 case X86::BI__builtin_ia32_prorq256:
16301 case X86::BI__builtin_ia32_prorq512:
16302 case X86::BI__builtin_ia32_prorvd128:
16303 case X86::BI__builtin_ia32_prorvd256:
16304 case X86::BI__builtin_ia32_prorvd512:
16305 case X86::BI__builtin_ia32_prorvq128:
16306 case X86::BI__builtin_ia32_prorvq256:
16307 case X86::BI__builtin_ia32_prorvq512:
16308 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
16309 case X86::BI__builtin_ia32_selectb_128:
16310 case X86::BI__builtin_ia32_selectb_256:
16311 case X86::BI__builtin_ia32_selectb_512:
16312 case X86::BI__builtin_ia32_selectw_128:
16313 case X86::BI__builtin_ia32_selectw_256:
16314 case X86::BI__builtin_ia32_selectw_512:
16315 case X86::BI__builtin_ia32_selectd_128:
16316 case X86::BI__builtin_ia32_selectd_256:
16317 case X86::BI__builtin_ia32_selectd_512:
16318 case X86::BI__builtin_ia32_selectq_128:
16319 case X86::BI__builtin_ia32_selectq_256:
16320 case X86::BI__builtin_ia32_selectq_512:
16321 case X86::BI__builtin_ia32_selectph_128:
16322 case X86::BI__builtin_ia32_selectph_256:
16323 case X86::BI__builtin_ia32_selectph_512:
16324 case X86::BI__builtin_ia32_selectpbf_128:
16325 case X86::BI__builtin_ia32_selectpbf_256:
16326 case X86::BI__builtin_ia32_selectpbf_512:
16327 case X86::BI__builtin_ia32_selectps_128:
16328 case X86::BI__builtin_ia32_selectps_256:
16329 case X86::BI__builtin_ia32_selectps_512:
16330 case X86::BI__builtin_ia32_selectpd_128:
16331 case X86::BI__builtin_ia32_selectpd_256:
16332 case X86::BI__builtin_ia32_selectpd_512:
16333 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
16334 case X86::BI__builtin_ia32_selectsh_128:
16335 case X86::BI__builtin_ia32_selectsbf_128:
16336 case X86::BI__builtin_ia32_selectss_128:
16337 case X86::BI__builtin_ia32_selectsd_128: {
16338 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
16339 Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
16340 A = EmitX86ScalarSelect(*this, Ops[0], A, B);
16341 return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
16343 case X86::BI__builtin_ia32_cmpb128_mask:
16344 case X86::BI__builtin_ia32_cmpb256_mask:
16345 case X86::BI__builtin_ia32_cmpb512_mask:
16346 case X86::BI__builtin_ia32_cmpw128_mask:
16347 case X86::BI__builtin_ia32_cmpw256_mask:
16348 case X86::BI__builtin_ia32_cmpw512_mask:
16349 case X86::BI__builtin_ia32_cmpd128_mask:
16350 case X86::BI__builtin_ia32_cmpd256_mask:
16351 case X86::BI__builtin_ia32_cmpd512_mask:
16352 case X86::BI__builtin_ia32_cmpq128_mask:
16353 case X86::BI__builtin_ia32_cmpq256_mask:
16354 case X86::BI__builtin_ia32_cmpq512_mask: {
16355 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
16356 return EmitX86MaskedCompare(*this, CC, true, Ops);
16358 case X86::BI__builtin_ia32_ucmpb128_mask:
16359 case X86::BI__builtin_ia32_ucmpb256_mask:
16360 case X86::BI__builtin_ia32_ucmpb512_mask:
16361 case X86::BI__builtin_ia32_ucmpw128_mask:
16362 case X86::BI__builtin_ia32_ucmpw256_mask:
16363 case X86::BI__builtin_ia32_ucmpw512_mask:
16364 case X86::BI__builtin_ia32_ucmpd128_mask:
16365 case X86::BI__builtin_ia32_ucmpd256_mask:
16366 case X86::BI__builtin_ia32_ucmpd512_mask:
16367 case X86::BI__builtin_ia32_ucmpq128_mask:
16368 case X86::BI__builtin_ia32_ucmpq256_mask:
16369 case X86::BI__builtin_ia32_ucmpq512_mask: {
16370 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
16371 return EmitX86MaskedCompare(*this, CC, false, Ops);
16373 case X86::BI__builtin_ia32_vpcomb:
16374 case X86::BI__builtin_ia32_vpcomw:
16375 case X86::BI__builtin_ia32_vpcomd:
16376 case X86::BI__builtin_ia32_vpcomq:
16377 return EmitX86vpcom(*this, Ops, true);
16378 case X86::BI__builtin_ia32_vpcomub:
16379 case X86::BI__builtin_ia32_vpcomuw:
16380 case X86::BI__builtin_ia32_vpcomud:
16381 case X86::BI__builtin_ia32_vpcomuq:
16382 return EmitX86vpcom(*this, Ops, false);
16384 case X86::BI__builtin_ia32_kortestcqi:
16385 case X86::BI__builtin_ia32_kortestchi:
16386 case X86::BI__builtin_ia32_kortestcsi:
16387 case X86::BI__builtin_ia32_kortestcdi: {
16388 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
16389 Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
16390 Value *Cmp = Builder.CreateICmpEQ(Or, C);
16391 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
16393 case X86::BI__builtin_ia32_kortestzqi:
16394 case X86::BI__builtin_ia32_kortestzhi:
16395 case X86::BI__builtin_ia32_kortestzsi:
16396 case X86::BI__builtin_ia32_kortestzdi: {
16397 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
16398 Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
16399 Value *Cmp = Builder.CreateICmpEQ(Or, C);
16400 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
16403 case X86::BI__builtin_ia32_ktestcqi:
16404 case X86::BI__builtin_ia32_ktestzqi:
16405 case X86::BI__builtin_ia32_ktestchi:
16406 case X86::BI__builtin_ia32_ktestzhi:
16407 case X86::BI__builtin_ia32_ktestcsi:
16408 case X86::BI__builtin_ia32_ktestzsi:
16409 case X86::BI__builtin_ia32_ktestcdi:
16410 case X86::BI__builtin_ia32_ktestzdi: {
16411 Intrinsic::ID IID;
16412 switch (BuiltinID) {
16413 default: llvm_unreachable("Unsupported intrinsic!");
16414 case X86::BI__builtin_ia32_ktestcqi:
16415 IID = Intrinsic::x86_avx512_ktestc_b;
16416 break;
16417 case X86::BI__builtin_ia32_ktestzqi:
16418 IID = Intrinsic::x86_avx512_ktestz_b;
16419 break;
16420 case X86::BI__builtin_ia32_ktestchi:
16421 IID = Intrinsic::x86_avx512_ktestc_w;
16422 break;
16423 case X86::BI__builtin_ia32_ktestzhi:
16424 IID = Intrinsic::x86_avx512_ktestz_w;
16425 break;
16426 case X86::BI__builtin_ia32_ktestcsi:
16427 IID = Intrinsic::x86_avx512_ktestc_d;
16428 break;
16429 case X86::BI__builtin_ia32_ktestzsi:
16430 IID = Intrinsic::x86_avx512_ktestz_d;
16431 break;
16432 case X86::BI__builtin_ia32_ktestcdi:
16433 IID = Intrinsic::x86_avx512_ktestc_q;
16434 break;
16435 case X86::BI__builtin_ia32_ktestzdi:
16436 IID = Intrinsic::x86_avx512_ktestz_q;
16437 break;
16440 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16441 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
16442 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
16443 Function *Intr = CGM.getIntrinsic(IID);
16444 return Builder.CreateCall(Intr, {LHS, RHS});
16447 case X86::BI__builtin_ia32_kaddqi:
16448 case X86::BI__builtin_ia32_kaddhi:
16449 case X86::BI__builtin_ia32_kaddsi:
16450 case X86::BI__builtin_ia32_kadddi: {
16451 Intrinsic::ID IID;
16452 switch (BuiltinID) {
16453 default: llvm_unreachable("Unsupported intrinsic!");
16454 case X86::BI__builtin_ia32_kaddqi:
16455 IID = Intrinsic::x86_avx512_kadd_b;
16456 break;
16457 case X86::BI__builtin_ia32_kaddhi:
16458 IID = Intrinsic::x86_avx512_kadd_w;
16459 break;
16460 case X86::BI__builtin_ia32_kaddsi:
16461 IID = Intrinsic::x86_avx512_kadd_d;
16462 break;
16463 case X86::BI__builtin_ia32_kadddi:
16464 IID = Intrinsic::x86_avx512_kadd_q;
16465 break;
16468 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16469 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
16470 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
16471 Function *Intr = CGM.getIntrinsic(IID);
16472 Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
16473 return Builder.CreateBitCast(Res, Ops[0]->getType());
16475 case X86::BI__builtin_ia32_kandqi:
16476 case X86::BI__builtin_ia32_kandhi:
16477 case X86::BI__builtin_ia32_kandsi:
16478 case X86::BI__builtin_ia32_kanddi:
16479 return EmitX86MaskLogic(*this, Instruction::And, Ops);
16480 case X86::BI__builtin_ia32_kandnqi:
16481 case X86::BI__builtin_ia32_kandnhi:
16482 case X86::BI__builtin_ia32_kandnsi:
16483 case X86::BI__builtin_ia32_kandndi:
16484 return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
16485 case X86::BI__builtin_ia32_korqi:
16486 case X86::BI__builtin_ia32_korhi:
16487 case X86::BI__builtin_ia32_korsi:
16488 case X86::BI__builtin_ia32_kordi:
16489 return EmitX86MaskLogic(*this, Instruction::Or, Ops);
16490 case X86::BI__builtin_ia32_kxnorqi:
16491 case X86::BI__builtin_ia32_kxnorhi:
16492 case X86::BI__builtin_ia32_kxnorsi:
16493 case X86::BI__builtin_ia32_kxnordi:
16494 return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
16495 case X86::BI__builtin_ia32_kxorqi:
16496 case X86::BI__builtin_ia32_kxorhi:
16497 case X86::BI__builtin_ia32_kxorsi:
16498 case X86::BI__builtin_ia32_kxordi:
16499 return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
16500 case X86::BI__builtin_ia32_knotqi:
16501 case X86::BI__builtin_ia32_knothi:
16502 case X86::BI__builtin_ia32_knotsi:
16503 case X86::BI__builtin_ia32_knotdi: {
16504 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16505 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
16506 return Builder.CreateBitCast(Builder.CreateNot(Res),
16507 Ops[0]->getType());
16509 case X86::BI__builtin_ia32_kmovb:
16510 case X86::BI__builtin_ia32_kmovw:
16511 case X86::BI__builtin_ia32_kmovd:
16512 case X86::BI__builtin_ia32_kmovq: {
16513 // Bitcast to vXi1 type and then back to integer. This gets the mask
16514 // register type into the IR, but might be optimized out depending on
16515 // what's around it.
16516 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16517 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
16518 return Builder.CreateBitCast(Res, Ops[0]->getType());
16521 case X86::BI__builtin_ia32_kunpckdi:
16522 case X86::BI__builtin_ia32_kunpcksi:
16523 case X86::BI__builtin_ia32_kunpckhi: {
16524 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16525 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
16526 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
16527 int Indices[64];
16528 for (unsigned i = 0; i != NumElts; ++i)
16529 Indices[i] = i;
16531 // First extract half of each vector. This gives better codegen than
16532 // doing it in a single shuffle.
16533 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
16534 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
16535 // Concat the vectors.
16536 // NOTE: Operands are swapped to match the intrinsic definition.
16537 Value *Res =
16538 Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
16539 return Builder.CreateBitCast(Res, Ops[0]->getType());
16542 case X86::BI__builtin_ia32_vplzcntd_128:
16543 case X86::BI__builtin_ia32_vplzcntd_256:
16544 case X86::BI__builtin_ia32_vplzcntd_512:
16545 case X86::BI__builtin_ia32_vplzcntq_128:
16546 case X86::BI__builtin_ia32_vplzcntq_256:
16547 case X86::BI__builtin_ia32_vplzcntq_512: {
16548 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
16549 return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
16551 case X86::BI__builtin_ia32_sqrtss:
16552 case X86::BI__builtin_ia32_sqrtsd: {
16553 Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
16554 Function *F;
16555 if (Builder.getIsFPConstrained()) {
16556 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16557 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16558 A->getType());
16559 A = Builder.CreateConstrainedFPCall(F, {A});
16560 } else {
16561 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
16562 A = Builder.CreateCall(F, {A});
16564 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
16566 case X86::BI__builtin_ia32_sqrtsh_round_mask:
16567 case X86::BI__builtin_ia32_sqrtsd_round_mask:
16568 case X86::BI__builtin_ia32_sqrtss_round_mask: {
16569 unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
16570 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
16571 // otherwise keep the intrinsic.
16572 if (CC != 4) {
16573 Intrinsic::ID IID;
16575 switch (BuiltinID) {
16576 default:
16577 llvm_unreachable("Unsupported intrinsic!");
16578 case X86::BI__builtin_ia32_sqrtsh_round_mask:
16579 IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
16580 break;
16581 case X86::BI__builtin_ia32_sqrtsd_round_mask:
16582 IID = Intrinsic::x86_avx512_mask_sqrt_sd;
16583 break;
16584 case X86::BI__builtin_ia32_sqrtss_round_mask:
16585 IID = Intrinsic::x86_avx512_mask_sqrt_ss;
16586 break;
16588 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16590 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
16591 Function *F;
16592 if (Builder.getIsFPConstrained()) {
16593 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16594 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16595 A->getType());
16596 A = Builder.CreateConstrainedFPCall(F, A);
16597 } else {
16598 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
16599 A = Builder.CreateCall(F, A);
16601 Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
16602 A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
16603 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
16605 case X86::BI__builtin_ia32_sqrtpd256:
16606 case X86::BI__builtin_ia32_sqrtpd:
16607 case X86::BI__builtin_ia32_sqrtps256:
16608 case X86::BI__builtin_ia32_sqrtps:
16609 case X86::BI__builtin_ia32_sqrtph256:
16610 case X86::BI__builtin_ia32_sqrtph:
16611 case X86::BI__builtin_ia32_sqrtph512:
16612 case X86::BI__builtin_ia32_vsqrtnepbf16256:
16613 case X86::BI__builtin_ia32_vsqrtnepbf16:
16614 case X86::BI__builtin_ia32_vsqrtnepbf16512:
16615 case X86::BI__builtin_ia32_sqrtps512:
16616 case X86::BI__builtin_ia32_sqrtpd512: {
16617 if (Ops.size() == 2) {
16618 unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
16619 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
16620 // otherwise keep the intrinsic.
16621 if (CC != 4) {
16622 Intrinsic::ID IID;
16624 switch (BuiltinID) {
16625 default:
16626 llvm_unreachable("Unsupported intrinsic!");
16627 case X86::BI__builtin_ia32_sqrtph512:
16628 IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
16629 break;
16630 case X86::BI__builtin_ia32_sqrtps512:
16631 IID = Intrinsic::x86_avx512_sqrt_ps_512;
16632 break;
16633 case X86::BI__builtin_ia32_sqrtpd512:
16634 IID = Intrinsic::x86_avx512_sqrt_pd_512;
16635 break;
16637 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16640 if (Builder.getIsFPConstrained()) {
16641 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16642 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16643 Ops[0]->getType());
16644 return Builder.CreateConstrainedFPCall(F, Ops[0]);
16645 } else {
16646 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
16647 return Builder.CreateCall(F, Ops[0]);
16651 case X86::BI__builtin_ia32_pmuludq128:
16652 case X86::BI__builtin_ia32_pmuludq256:
16653 case X86::BI__builtin_ia32_pmuludq512:
16654 return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
16656 case X86::BI__builtin_ia32_pmuldq128:
16657 case X86::BI__builtin_ia32_pmuldq256:
16658 case X86::BI__builtin_ia32_pmuldq512:
16659 return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
16661 case X86::BI__builtin_ia32_pternlogd512_mask:
16662 case X86::BI__builtin_ia32_pternlogq512_mask:
16663 case X86::BI__builtin_ia32_pternlogd128_mask:
16664 case X86::BI__builtin_ia32_pternlogd256_mask:
16665 case X86::BI__builtin_ia32_pternlogq128_mask:
16666 case X86::BI__builtin_ia32_pternlogq256_mask:
16667 return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
16669 case X86::BI__builtin_ia32_pternlogd512_maskz:
16670 case X86::BI__builtin_ia32_pternlogq512_maskz:
16671 case X86::BI__builtin_ia32_pternlogd128_maskz:
16672 case X86::BI__builtin_ia32_pternlogd256_maskz:
16673 case X86::BI__builtin_ia32_pternlogq128_maskz:
16674 case X86::BI__builtin_ia32_pternlogq256_maskz:
16675 return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
16677 case X86::BI__builtin_ia32_vpshldd128:
16678 case X86::BI__builtin_ia32_vpshldd256:
16679 case X86::BI__builtin_ia32_vpshldd512:
16680 case X86::BI__builtin_ia32_vpshldq128:
16681 case X86::BI__builtin_ia32_vpshldq256:
16682 case X86::BI__builtin_ia32_vpshldq512:
16683 case X86::BI__builtin_ia32_vpshldw128:
16684 case X86::BI__builtin_ia32_vpshldw256:
16685 case X86::BI__builtin_ia32_vpshldw512:
16686 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
16688 case X86::BI__builtin_ia32_vpshrdd128:
16689 case X86::BI__builtin_ia32_vpshrdd256:
16690 case X86::BI__builtin_ia32_vpshrdd512:
16691 case X86::BI__builtin_ia32_vpshrdq128:
16692 case X86::BI__builtin_ia32_vpshrdq256:
16693 case X86::BI__builtin_ia32_vpshrdq512:
16694 case X86::BI__builtin_ia32_vpshrdw128:
16695 case X86::BI__builtin_ia32_vpshrdw256:
16696 case X86::BI__builtin_ia32_vpshrdw512:
16697 // Ops 0 and 1 are swapped.
16698 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
16700 case X86::BI__builtin_ia32_vpshldvd128:
16701 case X86::BI__builtin_ia32_vpshldvd256:
16702 case X86::BI__builtin_ia32_vpshldvd512:
16703 case X86::BI__builtin_ia32_vpshldvq128:
16704 case X86::BI__builtin_ia32_vpshldvq256:
16705 case X86::BI__builtin_ia32_vpshldvq512:
16706 case X86::BI__builtin_ia32_vpshldvw128:
16707 case X86::BI__builtin_ia32_vpshldvw256:
16708 case X86::BI__builtin_ia32_vpshldvw512:
16709 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
16711 case X86::BI__builtin_ia32_vpshrdvd128:
16712 case X86::BI__builtin_ia32_vpshrdvd256:
16713 case X86::BI__builtin_ia32_vpshrdvd512:
16714 case X86::BI__builtin_ia32_vpshrdvq128:
16715 case X86::BI__builtin_ia32_vpshrdvq256:
16716 case X86::BI__builtin_ia32_vpshrdvq512:
16717 case X86::BI__builtin_ia32_vpshrdvw128:
16718 case X86::BI__builtin_ia32_vpshrdvw256:
16719 case X86::BI__builtin_ia32_vpshrdvw512:
16720 // Ops 0 and 1 are swapped.
16721 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
16723 // Reductions
16724 case X86::BI__builtin_ia32_reduce_fadd_pd512:
16725 case X86::BI__builtin_ia32_reduce_fadd_ps512:
16726 case X86::BI__builtin_ia32_reduce_fadd_ph512:
16727 case X86::BI__builtin_ia32_reduce_fadd_ph256:
16728 case X86::BI__builtin_ia32_reduce_fadd_ph128: {
16729 Function *F =
16730 CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
16731 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16732 Builder.getFastMathFlags().setAllowReassoc();
16733 return Builder.CreateCall(F, {Ops[0], Ops[1]});
16735 case X86::BI__builtin_ia32_reduce_fmul_pd512:
16736 case X86::BI__builtin_ia32_reduce_fmul_ps512:
16737 case X86::BI__builtin_ia32_reduce_fmul_ph512:
16738 case X86::BI__builtin_ia32_reduce_fmul_ph256:
16739 case X86::BI__builtin_ia32_reduce_fmul_ph128: {
16740 Function *F =
16741 CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
16742 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16743 Builder.getFastMathFlags().setAllowReassoc();
16744 return Builder.CreateCall(F, {Ops[0], Ops[1]});
16746 case X86::BI__builtin_ia32_reduce_fmax_pd512:
16747 case X86::BI__builtin_ia32_reduce_fmax_ps512:
16748 case X86::BI__builtin_ia32_reduce_fmax_ph512:
16749 case X86::BI__builtin_ia32_reduce_fmax_ph256:
16750 case X86::BI__builtin_ia32_reduce_fmax_ph128: {
16751 Function *F =
16752 CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
16753 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16754 Builder.getFastMathFlags().setNoNaNs();
16755 return Builder.CreateCall(F, {Ops[0]});
16757 case X86::BI__builtin_ia32_reduce_fmin_pd512:
16758 case X86::BI__builtin_ia32_reduce_fmin_ps512:
16759 case X86::BI__builtin_ia32_reduce_fmin_ph512:
16760 case X86::BI__builtin_ia32_reduce_fmin_ph256:
16761 case X86::BI__builtin_ia32_reduce_fmin_ph128: {
16762 Function *F =
16763 CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
16764 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16765 Builder.getFastMathFlags().setNoNaNs();
16766 return Builder.CreateCall(F, {Ops[0]});
16769 case X86::BI__builtin_ia32_rdrand16_step:
16770 case X86::BI__builtin_ia32_rdrand32_step:
16771 case X86::BI__builtin_ia32_rdrand64_step:
16772 case X86::BI__builtin_ia32_rdseed16_step:
16773 case X86::BI__builtin_ia32_rdseed32_step:
16774 case X86::BI__builtin_ia32_rdseed64_step: {
16775 Intrinsic::ID ID;
16776 switch (BuiltinID) {
16777 default: llvm_unreachable("Unsupported intrinsic!");
16778 case X86::BI__builtin_ia32_rdrand16_step:
16779 ID = Intrinsic::x86_rdrand_16;
16780 break;
16781 case X86::BI__builtin_ia32_rdrand32_step:
16782 ID = Intrinsic::x86_rdrand_32;
16783 break;
16784 case X86::BI__builtin_ia32_rdrand64_step:
16785 ID = Intrinsic::x86_rdrand_64;
16786 break;
16787 case X86::BI__builtin_ia32_rdseed16_step:
16788 ID = Intrinsic::x86_rdseed_16;
16789 break;
16790 case X86::BI__builtin_ia32_rdseed32_step:
16791 ID = Intrinsic::x86_rdseed_32;
16792 break;
16793 case X86::BI__builtin_ia32_rdseed64_step:
16794 ID = Intrinsic::x86_rdseed_64;
16795 break;
16798 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
16799 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
16800 Ops[0]);
16801 return Builder.CreateExtractValue(Call, 1);
16803 case X86::BI__builtin_ia32_addcarryx_u32:
16804 case X86::BI__builtin_ia32_addcarryx_u64:
16805 case X86::BI__builtin_ia32_subborrow_u32:
16806 case X86::BI__builtin_ia32_subborrow_u64: {
16807 Intrinsic::ID IID;
16808 switch (BuiltinID) {
16809 default: llvm_unreachable("Unsupported intrinsic!");
16810 case X86::BI__builtin_ia32_addcarryx_u32:
16811 IID = Intrinsic::x86_addcarry_32;
16812 break;
16813 case X86::BI__builtin_ia32_addcarryx_u64:
16814 IID = Intrinsic::x86_addcarry_64;
16815 break;
16816 case X86::BI__builtin_ia32_subborrow_u32:
16817 IID = Intrinsic::x86_subborrow_32;
16818 break;
16819 case X86::BI__builtin_ia32_subborrow_u64:
16820 IID = Intrinsic::x86_subborrow_64;
16821 break;
16824 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
16825 { Ops[0], Ops[1], Ops[2] });
16826 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
16827 Ops[3]);
16828 return Builder.CreateExtractValue(Call, 0);
16831 case X86::BI__builtin_ia32_fpclassps128_mask:
16832 case X86::BI__builtin_ia32_fpclassps256_mask:
16833 case X86::BI__builtin_ia32_fpclassps512_mask:
16834 case X86::BI__builtin_ia32_vfpclasspbf16128_mask:
16835 case X86::BI__builtin_ia32_vfpclasspbf16256_mask:
16836 case X86::BI__builtin_ia32_vfpclasspbf16512_mask:
16837 case X86::BI__builtin_ia32_fpclassph128_mask:
16838 case X86::BI__builtin_ia32_fpclassph256_mask:
16839 case X86::BI__builtin_ia32_fpclassph512_mask:
16840 case X86::BI__builtin_ia32_fpclasspd128_mask:
16841 case X86::BI__builtin_ia32_fpclasspd256_mask:
16842 case X86::BI__builtin_ia32_fpclasspd512_mask: {
16843 unsigned NumElts =
16844 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16845 Value *MaskIn = Ops[2];
16846 Ops.erase(&Ops[2]);
16848 Intrinsic::ID ID;
16849 switch (BuiltinID) {
16850 default: llvm_unreachable("Unsupported intrinsic!");
16851 case X86::BI__builtin_ia32_vfpclasspbf16128_mask:
16852 ID = Intrinsic::x86_avx10_fpclass_nepbf16_128;
16853 break;
16854 case X86::BI__builtin_ia32_vfpclasspbf16256_mask:
16855 ID = Intrinsic::x86_avx10_fpclass_nepbf16_256;
16856 break;
16857 case X86::BI__builtin_ia32_vfpclasspbf16512_mask:
16858 ID = Intrinsic::x86_avx10_fpclass_nepbf16_512;
16859 break;
16860 case X86::BI__builtin_ia32_fpclassph128_mask:
16861 ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
16862 break;
16863 case X86::BI__builtin_ia32_fpclassph256_mask:
16864 ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
16865 break;
16866 case X86::BI__builtin_ia32_fpclassph512_mask:
16867 ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
16868 break;
16869 case X86::BI__builtin_ia32_fpclassps128_mask:
16870 ID = Intrinsic::x86_avx512_fpclass_ps_128;
16871 break;
16872 case X86::BI__builtin_ia32_fpclassps256_mask:
16873 ID = Intrinsic::x86_avx512_fpclass_ps_256;
16874 break;
16875 case X86::BI__builtin_ia32_fpclassps512_mask:
16876 ID = Intrinsic::x86_avx512_fpclass_ps_512;
16877 break;
16878 case X86::BI__builtin_ia32_fpclasspd128_mask:
16879 ID = Intrinsic::x86_avx512_fpclass_pd_128;
16880 break;
16881 case X86::BI__builtin_ia32_fpclasspd256_mask:
16882 ID = Intrinsic::x86_avx512_fpclass_pd_256;
16883 break;
16884 case X86::BI__builtin_ia32_fpclasspd512_mask:
16885 ID = Intrinsic::x86_avx512_fpclass_pd_512;
16886 break;
16889 Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16890 return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
16893 case X86::BI__builtin_ia32_vp2intersect_q_512:
16894 case X86::BI__builtin_ia32_vp2intersect_q_256:
16895 case X86::BI__builtin_ia32_vp2intersect_q_128:
16896 case X86::BI__builtin_ia32_vp2intersect_d_512:
16897 case X86::BI__builtin_ia32_vp2intersect_d_256:
16898 case X86::BI__builtin_ia32_vp2intersect_d_128: {
16899 unsigned NumElts =
16900 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16901 Intrinsic::ID ID;
16903 switch (BuiltinID) {
16904 default: llvm_unreachable("Unsupported intrinsic!");
16905 case X86::BI__builtin_ia32_vp2intersect_q_512:
16906 ID = Intrinsic::x86_avx512_vp2intersect_q_512;
16907 break;
16908 case X86::BI__builtin_ia32_vp2intersect_q_256:
16909 ID = Intrinsic::x86_avx512_vp2intersect_q_256;
16910 break;
16911 case X86::BI__builtin_ia32_vp2intersect_q_128:
16912 ID = Intrinsic::x86_avx512_vp2intersect_q_128;
16913 break;
16914 case X86::BI__builtin_ia32_vp2intersect_d_512:
16915 ID = Intrinsic::x86_avx512_vp2intersect_d_512;
16916 break;
16917 case X86::BI__builtin_ia32_vp2intersect_d_256:
16918 ID = Intrinsic::x86_avx512_vp2intersect_d_256;
16919 break;
16920 case X86::BI__builtin_ia32_vp2intersect_d_128:
16921 ID = Intrinsic::x86_avx512_vp2intersect_d_128;
16922 break;
16925 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
16926 Value *Result = Builder.CreateExtractValue(Call, 0);
16927 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16928 Builder.CreateDefaultAlignedStore(Result, Ops[2]);
16930 Result = Builder.CreateExtractValue(Call, 1);
16931 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16932 return Builder.CreateDefaultAlignedStore(Result, Ops[3]);
16935 case X86::BI__builtin_ia32_vpmultishiftqb128:
16936 case X86::BI__builtin_ia32_vpmultishiftqb256:
16937 case X86::BI__builtin_ia32_vpmultishiftqb512: {
16938 Intrinsic::ID ID;
16939 switch (BuiltinID) {
16940 default: llvm_unreachable("Unsupported intrinsic!");
16941 case X86::BI__builtin_ia32_vpmultishiftqb128:
16942 ID = Intrinsic::x86_avx512_pmultishift_qb_128;
16943 break;
16944 case X86::BI__builtin_ia32_vpmultishiftqb256:
16945 ID = Intrinsic::x86_avx512_pmultishift_qb_256;
16946 break;
16947 case X86::BI__builtin_ia32_vpmultishiftqb512:
16948 ID = Intrinsic::x86_avx512_pmultishift_qb_512;
16949 break;
16952 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16955 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16956 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16957 case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
16958 unsigned NumElts =
16959 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16960 Value *MaskIn = Ops[2];
16961 Ops.erase(&Ops[2]);
16963 Intrinsic::ID ID;
16964 switch (BuiltinID) {
16965 default: llvm_unreachable("Unsupported intrinsic!");
16966 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16967 ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
16968 break;
16969 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16970 ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
16971 break;
16972 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
16973 ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
16974 break;
16977 Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16978 return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
16981 // packed comparison intrinsics
16982 case X86::BI__builtin_ia32_cmpeqps:
16983 case X86::BI__builtin_ia32_cmpeqpd:
16984 return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
16985 case X86::BI__builtin_ia32_cmpltps:
16986 case X86::BI__builtin_ia32_cmpltpd:
16987 return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
16988 case X86::BI__builtin_ia32_cmpleps:
16989 case X86::BI__builtin_ia32_cmplepd:
16990 return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
16991 case X86::BI__builtin_ia32_cmpunordps:
16992 case X86::BI__builtin_ia32_cmpunordpd:
16993 return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
16994 case X86::BI__builtin_ia32_cmpneqps:
16995 case X86::BI__builtin_ia32_cmpneqpd:
16996 return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
16997 case X86::BI__builtin_ia32_cmpnltps:
16998 case X86::BI__builtin_ia32_cmpnltpd:
16999 return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
17000 case X86::BI__builtin_ia32_cmpnleps:
17001 case X86::BI__builtin_ia32_cmpnlepd:
17002 return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
17003 case X86::BI__builtin_ia32_cmpordps:
17004 case X86::BI__builtin_ia32_cmpordpd:
17005 return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
17006 case X86::BI__builtin_ia32_cmpph128_mask:
17007 case X86::BI__builtin_ia32_cmpph256_mask:
17008 case X86::BI__builtin_ia32_cmpph512_mask:
17009 case X86::BI__builtin_ia32_cmpps128_mask:
17010 case X86::BI__builtin_ia32_cmpps256_mask:
17011 case X86::BI__builtin_ia32_cmpps512_mask:
17012 case X86::BI__builtin_ia32_cmppd128_mask:
17013 case X86::BI__builtin_ia32_cmppd256_mask:
17014 case X86::BI__builtin_ia32_cmppd512_mask:
17015 case X86::BI__builtin_ia32_vcmppd256_round_mask:
17016 case X86::BI__builtin_ia32_vcmpps256_round_mask:
17017 case X86::BI__builtin_ia32_vcmpph256_round_mask:
17018 case X86::BI__builtin_ia32_vcmppbf16512_mask:
17019 case X86::BI__builtin_ia32_vcmppbf16256_mask:
17020 case X86::BI__builtin_ia32_vcmppbf16128_mask:
17021 IsMaskFCmp = true;
17022 [[fallthrough]];
17023 case X86::BI__builtin_ia32_cmpps:
17024 case X86::BI__builtin_ia32_cmpps256:
17025 case X86::BI__builtin_ia32_cmppd:
17026 case X86::BI__builtin_ia32_cmppd256: {
17027 // Lowering vector comparisons to fcmp instructions, while
17028 // ignoring signalling behaviour requested
17029 // ignoring rounding mode requested
17030 // This is only possible if fp-model is not strict and FENV_ACCESS is off.
17032 // The third argument is the comparison condition, and integer in the
17033 // range [0, 31]
17034 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
17036 // Lowering to IR fcmp instruction.
17037 // Ignoring requested signaling behaviour,
17038 // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
17039 FCmpInst::Predicate Pred;
17040 bool IsSignaling;
17041 // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
17042 // behavior is inverted. We'll handle that after the switch.
17043 switch (CC & 0xf) {
17044 case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;
17045 case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;
17046 case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;
17047 case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;
17048 case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;
17049 case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;
17050 case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;
17051 case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;
17052 case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;
17053 case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;
17054 case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;
17055 case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
17056 case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;
17057 case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;
17058 case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;
17059 case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;
17060 default: llvm_unreachable("Unhandled CC");
17063 // Invert the signalling behavior for 16-31.
17064 if (CC & 0x10)
17065 IsSignaling = !IsSignaling;
17067 // If the predicate is true or false and we're using constrained intrinsics,
17068 // we don't have a compare intrinsic we can use. Just use the legacy X86
17069 // specific intrinsic.
17070 // If the intrinsic is mask enabled and we're using constrained intrinsics,
17071 // use the legacy X86 specific intrinsic.
17072 if (Builder.getIsFPConstrained() &&
17073 (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
17074 IsMaskFCmp)) {
17076 Intrinsic::ID IID;
17077 switch (BuiltinID) {
17078 default: llvm_unreachable("Unexpected builtin");
17079 case X86::BI__builtin_ia32_cmpps:
17080 IID = Intrinsic::x86_sse_cmp_ps;
17081 break;
17082 case X86::BI__builtin_ia32_cmpps256:
17083 IID = Intrinsic::x86_avx_cmp_ps_256;
17084 break;
17085 case X86::BI__builtin_ia32_cmppd:
17086 IID = Intrinsic::x86_sse2_cmp_pd;
17087 break;
17088 case X86::BI__builtin_ia32_cmppd256:
17089 IID = Intrinsic::x86_avx_cmp_pd_256;
17090 break;
17091 case X86::BI__builtin_ia32_cmpph128_mask:
17092 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128;
17093 break;
17094 case X86::BI__builtin_ia32_cmpph256_mask:
17095 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256;
17096 break;
17097 case X86::BI__builtin_ia32_cmpph512_mask:
17098 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512;
17099 break;
17100 case X86::BI__builtin_ia32_cmpps512_mask:
17101 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
17102 break;
17103 case X86::BI__builtin_ia32_cmppd512_mask:
17104 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
17105 break;
17106 case X86::BI__builtin_ia32_cmpps128_mask:
17107 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
17108 break;
17109 case X86::BI__builtin_ia32_cmpps256_mask:
17110 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
17111 break;
17112 case X86::BI__builtin_ia32_cmppd128_mask:
17113 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
17114 break;
17115 case X86::BI__builtin_ia32_cmppd256_mask:
17116 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
17117 break;
17120 Function *Intr = CGM.getIntrinsic(IID);
17121 if (IsMaskFCmp) {
17122 unsigned NumElts =
17123 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
17124 Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);
17125 Value *Cmp = Builder.CreateCall(Intr, Ops);
17126 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);
17129 return Builder.CreateCall(Intr, Ops);
17132 // Builtins without the _mask suffix return a vector of integers
17133 // of the same width as the input vectors
17134 if (IsMaskFCmp) {
17135 // We ignore SAE if strict FP is disabled. We only keep precise
17136 // exception behavior under strict FP.
17137 // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
17138 // object will be required.
17139 unsigned NumElts =
17140 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
17141 Value *Cmp;
17142 if (IsSignaling)
17143 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
17144 else
17145 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
17146 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
17149 return getVectorFCmpIR(Pred, IsSignaling);
17152 // SSE scalar comparison intrinsics
17153 case X86::BI__builtin_ia32_cmpeqss:
17154 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
17155 case X86::BI__builtin_ia32_cmpltss:
17156 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
17157 case X86::BI__builtin_ia32_cmpless:
17158 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
17159 case X86::BI__builtin_ia32_cmpunordss:
17160 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
17161 case X86::BI__builtin_ia32_cmpneqss:
17162 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
17163 case X86::BI__builtin_ia32_cmpnltss:
17164 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
17165 case X86::BI__builtin_ia32_cmpnless:
17166 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
17167 case X86::BI__builtin_ia32_cmpordss:
17168 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
17169 case X86::BI__builtin_ia32_cmpeqsd:
17170 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
17171 case X86::BI__builtin_ia32_cmpltsd:
17172 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
17173 case X86::BI__builtin_ia32_cmplesd:
17174 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
17175 case X86::BI__builtin_ia32_cmpunordsd:
17176 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
17177 case X86::BI__builtin_ia32_cmpneqsd:
17178 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
17179 case X86::BI__builtin_ia32_cmpnltsd:
17180 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
17181 case X86::BI__builtin_ia32_cmpnlesd:
17182 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
17183 case X86::BI__builtin_ia32_cmpordsd:
17184 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
17186 // f16c half2float intrinsics
17187 case X86::BI__builtin_ia32_vcvtph2ps:
17188 case X86::BI__builtin_ia32_vcvtph2ps256:
17189 case X86::BI__builtin_ia32_vcvtph2ps_mask:
17190 case X86::BI__builtin_ia32_vcvtph2ps256_mask:
17191 case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
17192 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
17193 return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
17196 // AVX512 bf16 intrinsics
17197 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
17198 Ops[2] = getMaskVecValue(
17199 *this, Ops[2],
17200 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
17201 Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
17202 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17204 case X86::BI__builtin_ia32_cvtsbf162ss_32:
17205 return Builder.CreateFPExt(Ops[0], Builder.getFloatTy());
17207 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
17208 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
17209 Intrinsic::ID IID;
17210 switch (BuiltinID) {
17211 default: llvm_unreachable("Unsupported intrinsic!");
17212 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
17213 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
17214 break;
17215 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
17216 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
17217 break;
17219 Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
17220 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
17223 case X86::BI__cpuid:
17224 case X86::BI__cpuidex: {
17225 Value *FuncId = EmitScalarExpr(E->getArg(1));
17226 Value *SubFuncId = BuiltinID == X86::BI__cpuidex
17227 ? EmitScalarExpr(E->getArg(2))
17228 : llvm::ConstantInt::get(Int32Ty, 0);
17230 llvm::StructType *CpuidRetTy =
17231 llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty);
17232 llvm::FunctionType *FTy =
17233 llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);
17235 StringRef Asm, Constraints;
17236 if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
17237 Asm = "cpuid";
17238 Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
17239 } else {
17240 // x86-64 uses %rbx as the base register, so preserve it.
17241 Asm = "xchgq %rbx, ${1:q}\n"
17242 "cpuid\n"
17243 "xchgq %rbx, ${1:q}";
17244 Constraints = "={ax},=r,={cx},={dx},0,2";
17247 llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,
17248 /*hasSideEffects=*/false);
17249 Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});
17250 Value *BasePtr = EmitScalarExpr(E->getArg(0));
17251 Value *Store = nullptr;
17252 for (unsigned i = 0; i < 4; i++) {
17253 Value *Extracted = Builder.CreateExtractValue(IACall, i);
17254 Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);
17255 Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
17258 // Return the last store instruction to signal that we have emitted the
17259 // the intrinsic.
17260 return Store;
17263 case X86::BI__emul:
17264 case X86::BI__emulu: {
17265 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
17266 bool isSigned = (BuiltinID == X86::BI__emul);
17267 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
17268 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
17269 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
17271 case X86::BI__mulh:
17272 case X86::BI__umulh:
17273 case X86::BI_mul128:
17274 case X86::BI_umul128: {
17275 llvm::Type *ResType = ConvertType(E->getType());
17276 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
17278 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
17279 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
17280 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
17282 Value *MulResult, *HigherBits;
17283 if (IsSigned) {
17284 MulResult = Builder.CreateNSWMul(LHS, RHS);
17285 HigherBits = Builder.CreateAShr(MulResult, 64);
17286 } else {
17287 MulResult = Builder.CreateNUWMul(LHS, RHS);
17288 HigherBits = Builder.CreateLShr(MulResult, 64);
17290 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
17292 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
17293 return HigherBits;
17295 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
17296 Builder.CreateStore(HigherBits, HighBitsAddress);
17297 return Builder.CreateIntCast(MulResult, ResType, IsSigned);
17300 case X86::BI__faststorefence: {
17301 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
17302 llvm::SyncScope::System);
17304 case X86::BI__shiftleft128:
17305 case X86::BI__shiftright128: {
17306 llvm::Function *F = CGM.getIntrinsic(
17307 BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
17308 Int64Ty);
17309 // Flip low/high ops and zero-extend amount to matching type.
17310 // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
17311 // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
17312 std::swap(Ops[0], Ops[1]);
17313 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
17314 return Builder.CreateCall(F, Ops);
17316 case X86::BI_ReadWriteBarrier:
17317 case X86::BI_ReadBarrier:
17318 case X86::BI_WriteBarrier: {
17319 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
17320 llvm::SyncScope::SingleThread);
17323 case X86::BI_AddressOfReturnAddress: {
17324 Function *F =
17325 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
17326 return Builder.CreateCall(F);
17328 case X86::BI__stosb: {
17329 // We treat __stosb as a volatile memset - it may not generate "rep stosb"
17330 // instruction, but it will create a memset that won't be optimized away.
17331 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
17333 // Corresponding to intrisics which will return 2 tiles (tile0_tile1).
17334 case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
17335 case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
17336 case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
17337 case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
17338 case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
17339 case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
17340 case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
17341 case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal: {
17342 Intrinsic::ID IID;
17343 switch (BuiltinID) {
17344 default:
17345 llvm_unreachable("Unsupported intrinsic!");
17346 case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
17347 IID = Intrinsic::x86_t2rpntlvwz0_internal;
17348 break;
17349 case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
17350 IID = Intrinsic::x86_t2rpntlvwz0rs_internal;
17351 break;
17352 case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
17353 IID = Intrinsic::x86_t2rpntlvwz0t1_internal;
17354 break;
17355 case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
17356 IID = Intrinsic::x86_t2rpntlvwz0rst1_internal;
17357 break;
17358 case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
17359 IID = Intrinsic::x86_t2rpntlvwz1_internal;
17360 break;
17361 case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
17362 IID = Intrinsic::x86_t2rpntlvwz1rs_internal;
17363 break;
17364 case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
17365 IID = Intrinsic::x86_t2rpntlvwz1t1_internal;
17366 break;
17367 case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal:
17368 IID = Intrinsic::x86_t2rpntlvwz1rst1_internal;
17369 break;
17372 // Ops = (Row0, Col0, Col1, DstPtr0, DstPtr1, SrcPtr, Stride)
17373 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
17374 {Ops[0], Ops[1], Ops[2], Ops[5], Ops[6]});
17376 auto *PtrTy = E->getArg(3)->getType()->getAs<PointerType>();
17377 assert(PtrTy && "arg3 must be of pointer type");
17378 QualType PtreeTy = PtrTy->getPointeeType();
17379 llvm::Type *TyPtee = ConvertType(PtreeTy);
17381 // Bitcast amx type (x86_amx) to vector type (256 x i32)
17382 // Then store tile0 into DstPtr0
17383 Value *T0 = Builder.CreateExtractValue(Call, 0);
17384 Value *VecT0 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
17385 {TyPtee}, {T0});
17386 Builder.CreateDefaultAlignedStore(VecT0, Ops[3]);
17388 // Then store tile1 into DstPtr1
17389 Value *T1 = Builder.CreateExtractValue(Call, 1);
17390 Value *VecT1 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
17391 {TyPtee}, {T1});
17392 Value *Store = Builder.CreateDefaultAlignedStore(VecT1, Ops[4]);
17394 // Note: Here we escape directly use x86_tilestored64_internal to store
17395 // the results due to it can't make sure the Mem written scope. This may
17396 // cause shapes reloads after first amx intrinsic, which current amx reg-
17397 // ister allocation has no ability to handle it.
17399 return Store;
17401 case X86::BI__ud2:
17402 // llvm.trap makes a ud2a instruction on x86.
17403 return EmitTrapCall(Intrinsic::trap);
17404 case X86::BI__int2c: {
17405 // This syscall signals a driver assertion failure in x86 NT kernels.
17406 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
17407 llvm::InlineAsm *IA =
17408 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
17409 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
17410 getLLVMContext(), llvm::AttributeList::FunctionIndex,
17411 llvm::Attribute::NoReturn);
17412 llvm::CallInst *CI = Builder.CreateCall(IA);
17413 CI->setAttributes(NoReturnAttr);
17414 return CI;
17416 case X86::BI__readfsbyte:
17417 case X86::BI__readfsword:
17418 case X86::BI__readfsdword:
17419 case X86::BI__readfsqword: {
17420 llvm::Type *IntTy = ConvertType(E->getType());
17421 Value *Ptr = Builder.CreateIntToPtr(
17422 Ops[0], llvm::PointerType::get(getLLVMContext(), 257));
17423 LoadInst *Load = Builder.CreateAlignedLoad(
17424 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
17425 Load->setVolatile(true);
17426 return Load;
17428 case X86::BI__readgsbyte:
17429 case X86::BI__readgsword:
17430 case X86::BI__readgsdword:
17431 case X86::BI__readgsqword: {
17432 llvm::Type *IntTy = ConvertType(E->getType());
17433 Value *Ptr = Builder.CreateIntToPtr(
17434 Ops[0], llvm::PointerType::get(getLLVMContext(), 256));
17435 LoadInst *Load = Builder.CreateAlignedLoad(
17436 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
17437 Load->setVolatile(true);
17438 return Load;
17440 case X86::BI__builtin_ia32_encodekey128_u32: {
17441 Intrinsic::ID IID = Intrinsic::x86_encodekey128;
17443 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
17445 for (int i = 0; i < 3; ++i) {
17446 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
17447 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
17448 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
17451 return Builder.CreateExtractValue(Call, 0);
17453 case X86::BI__builtin_ia32_encodekey256_u32: {
17454 Intrinsic::ID IID = Intrinsic::x86_encodekey256;
17456 Value *Call =
17457 Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
17459 for (int i = 0; i < 4; ++i) {
17460 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
17461 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
17462 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
17465 return Builder.CreateExtractValue(Call, 0);
17467 case X86::BI__builtin_ia32_aesenc128kl_u8:
17468 case X86::BI__builtin_ia32_aesdec128kl_u8:
17469 case X86::BI__builtin_ia32_aesenc256kl_u8:
17470 case X86::BI__builtin_ia32_aesdec256kl_u8: {
17471 Intrinsic::ID IID;
17472 StringRef BlockName;
17473 switch (BuiltinID) {
17474 default:
17475 llvm_unreachable("Unexpected builtin");
17476 case X86::BI__builtin_ia32_aesenc128kl_u8:
17477 IID = Intrinsic::x86_aesenc128kl;
17478 BlockName = "aesenc128kl";
17479 break;
17480 case X86::BI__builtin_ia32_aesdec128kl_u8:
17481 IID = Intrinsic::x86_aesdec128kl;
17482 BlockName = "aesdec128kl";
17483 break;
17484 case X86::BI__builtin_ia32_aesenc256kl_u8:
17485 IID = Intrinsic::x86_aesenc256kl;
17486 BlockName = "aesenc256kl";
17487 break;
17488 case X86::BI__builtin_ia32_aesdec256kl_u8:
17489 IID = Intrinsic::x86_aesdec256kl;
17490 BlockName = "aesdec256kl";
17491 break;
17494 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
17496 BasicBlock *NoError =
17497 createBasicBlock(BlockName + "_no_error", this->CurFn);
17498 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
17499 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
17501 Value *Ret = Builder.CreateExtractValue(Call, 0);
17502 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
17503 Value *Out = Builder.CreateExtractValue(Call, 1);
17504 Builder.CreateCondBr(Succ, NoError, Error);
17506 Builder.SetInsertPoint(NoError);
17507 Builder.CreateDefaultAlignedStore(Out, Ops[0]);
17508 Builder.CreateBr(End);
17510 Builder.SetInsertPoint(Error);
17511 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
17512 Builder.CreateDefaultAlignedStore(Zero, Ops[0]);
17513 Builder.CreateBr(End);
17515 Builder.SetInsertPoint(End);
17516 return Builder.CreateExtractValue(Call, 0);
17518 case X86::BI__builtin_ia32_aesencwide128kl_u8:
17519 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
17520 case X86::BI__builtin_ia32_aesencwide256kl_u8:
17521 case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
17522 Intrinsic::ID IID;
17523 StringRef BlockName;
17524 switch (BuiltinID) {
17525 case X86::BI__builtin_ia32_aesencwide128kl_u8:
17526 IID = Intrinsic::x86_aesencwide128kl;
17527 BlockName = "aesencwide128kl";
17528 break;
17529 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
17530 IID = Intrinsic::x86_aesdecwide128kl;
17531 BlockName = "aesdecwide128kl";
17532 break;
17533 case X86::BI__builtin_ia32_aesencwide256kl_u8:
17534 IID = Intrinsic::x86_aesencwide256kl;
17535 BlockName = "aesencwide256kl";
17536 break;
17537 case X86::BI__builtin_ia32_aesdecwide256kl_u8:
17538 IID = Intrinsic::x86_aesdecwide256kl;
17539 BlockName = "aesdecwide256kl";
17540 break;
17543 llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);
17544 Value *InOps[9];
17545 InOps[0] = Ops[2];
17546 for (int i = 0; i != 8; ++i) {
17547 Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);
17548 InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));
17551 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
17553 BasicBlock *NoError =
17554 createBasicBlock(BlockName + "_no_error", this->CurFn);
17555 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
17556 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
17558 Value *Ret = Builder.CreateExtractValue(Call, 0);
17559 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
17560 Builder.CreateCondBr(Succ, NoError, Error);
17562 Builder.SetInsertPoint(NoError);
17563 for (int i = 0; i != 8; ++i) {
17564 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
17565 Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i);
17566 Builder.CreateAlignedStore(Extract, Ptr, Align(16));
17568 Builder.CreateBr(End);
17570 Builder.SetInsertPoint(Error);
17571 for (int i = 0; i != 8; ++i) {
17572 Value *Out = Builder.CreateExtractValue(Call, i + 1);
17573 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
17574 Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);
17575 Builder.CreateAlignedStore(Zero, Ptr, Align(16));
17577 Builder.CreateBr(End);
17579 Builder.SetInsertPoint(End);
17580 return Builder.CreateExtractValue(Call, 0);
17582 case X86::BI__builtin_ia32_vfcmaddcph512_mask:
17583 IsConjFMA = true;
17584 [[fallthrough]];
17585 case X86::BI__builtin_ia32_vfmaddcph512_mask: {
17586 Intrinsic::ID IID = IsConjFMA
17587 ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
17588 : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
17589 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17590 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
17592 case X86::BI__builtin_ia32_vfcmaddcph256_round_mask:
17593 IsConjFMA = true;
17594 LLVM_FALLTHROUGH;
17595 case X86::BI__builtin_ia32_vfmaddcph256_round_mask: {
17596 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx10_mask_vfcmaddcph256
17597 : Intrinsic::x86_avx10_mask_vfmaddcph256;
17598 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17599 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
17601 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
17602 IsConjFMA = true;
17603 [[fallthrough]];
17604 case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
17605 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
17606 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
17607 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17608 Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
17609 return EmitX86Select(*this, And, Call, Ops[0]);
17611 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
17612 IsConjFMA = true;
17613 [[fallthrough]];
17614 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
17615 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
17616 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
17617 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17618 static constexpr int Mask[] = {0, 5, 6, 7};
17619 return Builder.CreateShuffleVector(Call, Ops[2], Mask);
17621 case X86::BI__builtin_ia32_prefetchi:
17622 return Builder.CreateCall(
17623 CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),
17624 {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
17625 llvm::ConstantInt::get(Int32Ty, 0)});
17629 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
17630 const CallExpr *E) {
17631 // Do not emit the builtin arguments in the arguments of a function call,
17632 // because the evaluation order of function arguments is not specified in C++.
17633 // This is important when testing to ensure the arguments are emitted in the
17634 // same order every time. Eg:
17635 // Instead of:
17636 // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
17637 // EmitScalarExpr(E->getArg(1)), "swdiv");
17638 // Use:
17639 // Value *Op0 = EmitScalarExpr(E->getArg(0));
17640 // Value *Op1 = EmitScalarExpr(E->getArg(1));
17641 // return Builder.CreateFDiv(Op0, Op1, "swdiv")
17643 Intrinsic::ID ID = Intrinsic::not_intrinsic;
17645 #include "llvm/TargetParser/PPCTargetParser.def"
17646 auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx,
17647 unsigned Mask, CmpInst::Predicate CompOp,
17648 unsigned OpValue) -> Value * {
17649 if (SupportMethod == BUILTIN_PPC_FALSE)
17650 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
17652 if (SupportMethod == BUILTIN_PPC_TRUE)
17653 return llvm::ConstantInt::getTrue(ConvertType(E->getType()));
17655 assert(SupportMethod <= SYS_CALL && "Invalid value for SupportMethod.");
17657 llvm::Value *FieldValue = nullptr;
17658 if (SupportMethod == USE_SYS_CONF) {
17659 llvm::Type *STy = llvm::StructType::get(PPC_SYSTEMCONFIG_TYPE);
17660 llvm::Constant *SysConf =
17661 CGM.CreateRuntimeVariable(STy, "_system_configuration");
17663 // Grab the appropriate field from _system_configuration.
17664 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
17665 ConstantInt::get(Int32Ty, FieldIdx)};
17667 FieldValue = Builder.CreateInBoundsGEP(STy, SysConf, Idxs);
17668 FieldValue = Builder.CreateAlignedLoad(Int32Ty, FieldValue,
17669 CharUnits::fromQuantity(4));
17670 } else if (SupportMethod == SYS_CALL) {
17671 llvm::FunctionType *FTy =
17672 llvm::FunctionType::get(Int64Ty, Int32Ty, false);
17673 llvm::FunctionCallee Func =
17674 CGM.CreateRuntimeFunction(FTy, "getsystemcfg");
17676 FieldValue =
17677 Builder.CreateCall(Func, {ConstantInt::get(Int32Ty, FieldIdx)});
17679 assert(FieldValue &&
17680 "SupportMethod value is not defined in PPCTargetParser.def.");
17682 if (Mask)
17683 FieldValue = Builder.CreateAnd(FieldValue, Mask);
17685 llvm::Type *ValueType = FieldValue->getType();
17686 bool IsValueType64Bit = ValueType->isIntegerTy(64);
17687 assert(
17688 (IsValueType64Bit || ValueType->isIntegerTy(32)) &&
17689 "Only 32/64-bit integers are supported in GenAIXPPCBuiltinCpuExpr().");
17691 return Builder.CreateICmp(
17692 CompOp, FieldValue,
17693 ConstantInt::get(IsValueType64Bit ? Int64Ty : Int32Ty, OpValue));
17696 switch (BuiltinID) {
17697 default: return nullptr;
17699 case Builtin::BI__builtin_cpu_is: {
17700 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
17701 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
17702 llvm::Triple Triple = getTarget().getTriple();
17704 unsigned LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue;
17705 typedef std::tuple<unsigned, unsigned, unsigned, unsigned> CPUInfo;
17707 std::tie(LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue) =
17708 static_cast<CPUInfo>(StringSwitch<CPUInfo>(CPUStr)
17709 #define PPC_CPU(NAME, Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, \
17710 AIXID) \
17711 .Case(NAME, {Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, AIXID})
17712 #include "llvm/TargetParser/PPCTargetParser.def"
17713 .Default({BUILTIN_PPC_UNSUPPORTED, 0,
17714 BUILTIN_PPC_UNSUPPORTED, 0}));
17716 if (Triple.isOSAIX()) {
17717 assert((AIXSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&
17718 "Invalid CPU name. Missed by SemaChecking?");
17719 return GenAIXPPCBuiltinCpuExpr(AIXSupportMethod, AIX_SYSCON_IMPL_IDX, 0,
17720 ICmpInst::ICMP_EQ, AIXIDValue);
17723 assert(Triple.isOSLinux() &&
17724 "__builtin_cpu_is() is only supported for AIX and Linux.");
17726 assert((LinuxSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&
17727 "Invalid CPU name. Missed by SemaChecking?");
17729 if (LinuxSupportMethod == BUILTIN_PPC_FALSE)
17730 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
17732 Value *Op0 = llvm::ConstantInt::get(Int32Ty, PPC_FAWORD_CPUID);
17733 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
17734 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is");
17735 return Builder.CreateICmpEQ(TheCall,
17736 llvm::ConstantInt::get(Int32Ty, LinuxIDValue));
17738 case Builtin::BI__builtin_cpu_supports: {
17739 llvm::Triple Triple = getTarget().getTriple();
17740 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
17741 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
17742 if (Triple.isOSAIX()) {
17743 unsigned SupportMethod, FieldIdx, Mask, Value;
17744 CmpInst::Predicate CompOp;
17745 typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate,
17746 unsigned>
17747 CPUSupportType;
17748 std::tie(SupportMethod, FieldIdx, Mask, CompOp, Value) =
17749 static_cast<CPUSupportType>(StringSwitch<CPUSupportType>(CPUStr)
17750 #define PPC_AIX_FEATURE(NAME, DESC, SUPPORT_METHOD, INDEX, MASK, COMP_OP, \
17751 VALUE) \
17752 .Case(NAME, {SUPPORT_METHOD, INDEX, MASK, COMP_OP, VALUE})
17753 #include "llvm/TargetParser/PPCTargetParser.def"
17754 .Default({BUILTIN_PPC_FALSE, 0, 0,
17755 CmpInst::Predicate(), 0}));
17756 return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp,
17757 Value);
17760 assert(Triple.isOSLinux() &&
17761 "__builtin_cpu_supports() is only supported for AIX and Linux.");
17762 unsigned FeatureWord;
17763 unsigned BitMask;
17764 std::tie(FeatureWord, BitMask) =
17765 StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
17766 #define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \
17767 .Case(Name, {FA_WORD, Bitmask})
17768 #include "llvm/TargetParser/PPCTargetParser.def"
17769 .Default({0, 0});
17770 if (!BitMask)
17771 return Builder.getFalse();
17772 Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord);
17773 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
17774 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports");
17775 Value *Mask =
17776 Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask));
17777 return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty));
17778 #undef PPC_FAWORD_HWCAP
17779 #undef PPC_FAWORD_HWCAP2
17780 #undef PPC_FAWORD_CPUID
17783 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
17784 // call __builtin_readcyclecounter.
17785 case PPC::BI__builtin_ppc_get_timebase:
17786 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
17788 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
17789 case PPC::BI__builtin_altivec_lvx:
17790 case PPC::BI__builtin_altivec_lvxl:
17791 case PPC::BI__builtin_altivec_lvebx:
17792 case PPC::BI__builtin_altivec_lvehx:
17793 case PPC::BI__builtin_altivec_lvewx:
17794 case PPC::BI__builtin_altivec_lvsl:
17795 case PPC::BI__builtin_altivec_lvsr:
17796 case PPC::BI__builtin_vsx_lxvd2x:
17797 case PPC::BI__builtin_vsx_lxvw4x:
17798 case PPC::BI__builtin_vsx_lxvd2x_be:
17799 case PPC::BI__builtin_vsx_lxvw4x_be:
17800 case PPC::BI__builtin_vsx_lxvl:
17801 case PPC::BI__builtin_vsx_lxvll:
17803 SmallVector<Value *, 2> Ops;
17804 Ops.push_back(EmitScalarExpr(E->getArg(0)));
17805 Ops.push_back(EmitScalarExpr(E->getArg(1)));
17806 if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
17807 BuiltinID == PPC::BI__builtin_vsx_lxvll)) {
17808 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
17809 Ops.pop_back();
17812 switch (BuiltinID) {
17813 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
17814 case PPC::BI__builtin_altivec_lvx:
17815 ID = Intrinsic::ppc_altivec_lvx;
17816 break;
17817 case PPC::BI__builtin_altivec_lvxl:
17818 ID = Intrinsic::ppc_altivec_lvxl;
17819 break;
17820 case PPC::BI__builtin_altivec_lvebx:
17821 ID = Intrinsic::ppc_altivec_lvebx;
17822 break;
17823 case PPC::BI__builtin_altivec_lvehx:
17824 ID = Intrinsic::ppc_altivec_lvehx;
17825 break;
17826 case PPC::BI__builtin_altivec_lvewx:
17827 ID = Intrinsic::ppc_altivec_lvewx;
17828 break;
17829 case PPC::BI__builtin_altivec_lvsl:
17830 ID = Intrinsic::ppc_altivec_lvsl;
17831 break;
17832 case PPC::BI__builtin_altivec_lvsr:
17833 ID = Intrinsic::ppc_altivec_lvsr;
17834 break;
17835 case PPC::BI__builtin_vsx_lxvd2x:
17836 ID = Intrinsic::ppc_vsx_lxvd2x;
17837 break;
17838 case PPC::BI__builtin_vsx_lxvw4x:
17839 ID = Intrinsic::ppc_vsx_lxvw4x;
17840 break;
17841 case PPC::BI__builtin_vsx_lxvd2x_be:
17842 ID = Intrinsic::ppc_vsx_lxvd2x_be;
17843 break;
17844 case PPC::BI__builtin_vsx_lxvw4x_be:
17845 ID = Intrinsic::ppc_vsx_lxvw4x_be;
17846 break;
17847 case PPC::BI__builtin_vsx_lxvl:
17848 ID = Intrinsic::ppc_vsx_lxvl;
17849 break;
17850 case PPC::BI__builtin_vsx_lxvll:
17851 ID = Intrinsic::ppc_vsx_lxvll;
17852 break;
17854 llvm::Function *F = CGM.getIntrinsic(ID);
17855 return Builder.CreateCall(F, Ops, "");
17858 // vec_st, vec_xst_be
17859 case PPC::BI__builtin_altivec_stvx:
17860 case PPC::BI__builtin_altivec_stvxl:
17861 case PPC::BI__builtin_altivec_stvebx:
17862 case PPC::BI__builtin_altivec_stvehx:
17863 case PPC::BI__builtin_altivec_stvewx:
17864 case PPC::BI__builtin_vsx_stxvd2x:
17865 case PPC::BI__builtin_vsx_stxvw4x:
17866 case PPC::BI__builtin_vsx_stxvd2x_be:
17867 case PPC::BI__builtin_vsx_stxvw4x_be:
17868 case PPC::BI__builtin_vsx_stxvl:
17869 case PPC::BI__builtin_vsx_stxvll:
17871 SmallVector<Value *, 3> Ops;
17872 Ops.push_back(EmitScalarExpr(E->getArg(0)));
17873 Ops.push_back(EmitScalarExpr(E->getArg(1)));
17874 Ops.push_back(EmitScalarExpr(E->getArg(2)));
17875 if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
17876 BuiltinID == PPC::BI__builtin_vsx_stxvll)) {
17877 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
17878 Ops.pop_back();
17881 switch (BuiltinID) {
17882 default: llvm_unreachable("Unsupported st intrinsic!");
17883 case PPC::BI__builtin_altivec_stvx:
17884 ID = Intrinsic::ppc_altivec_stvx;
17885 break;
17886 case PPC::BI__builtin_altivec_stvxl:
17887 ID = Intrinsic::ppc_altivec_stvxl;
17888 break;
17889 case PPC::BI__builtin_altivec_stvebx:
17890 ID = Intrinsic::ppc_altivec_stvebx;
17891 break;
17892 case PPC::BI__builtin_altivec_stvehx:
17893 ID = Intrinsic::ppc_altivec_stvehx;
17894 break;
17895 case PPC::BI__builtin_altivec_stvewx:
17896 ID = Intrinsic::ppc_altivec_stvewx;
17897 break;
17898 case PPC::BI__builtin_vsx_stxvd2x:
17899 ID = Intrinsic::ppc_vsx_stxvd2x;
17900 break;
17901 case PPC::BI__builtin_vsx_stxvw4x:
17902 ID = Intrinsic::ppc_vsx_stxvw4x;
17903 break;
17904 case PPC::BI__builtin_vsx_stxvd2x_be:
17905 ID = Intrinsic::ppc_vsx_stxvd2x_be;
17906 break;
17907 case PPC::BI__builtin_vsx_stxvw4x_be:
17908 ID = Intrinsic::ppc_vsx_stxvw4x_be;
17909 break;
17910 case PPC::BI__builtin_vsx_stxvl:
17911 ID = Intrinsic::ppc_vsx_stxvl;
17912 break;
17913 case PPC::BI__builtin_vsx_stxvll:
17914 ID = Intrinsic::ppc_vsx_stxvll;
17915 break;
17917 llvm::Function *F = CGM.getIntrinsic(ID);
17918 return Builder.CreateCall(F, Ops, "");
17920 case PPC::BI__builtin_vsx_ldrmb: {
17921 // Essentially boils down to performing an unaligned VMX load sequence so
17922 // as to avoid crossing a page boundary and then shuffling the elements
17923 // into the right side of the vector register.
17924 Value *Op0 = EmitScalarExpr(E->getArg(0));
17925 Value *Op1 = EmitScalarExpr(E->getArg(1));
17926 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
17927 llvm::Type *ResTy = ConvertType(E->getType());
17928 bool IsLE = getTarget().isLittleEndian();
17930 // If the user wants the entire vector, just load the entire vector.
17931 if (NumBytes == 16) {
17932 Value *LD =
17933 Builder.CreateLoad(Address(Op0, ResTy, CharUnits::fromQuantity(1)));
17934 if (!IsLE)
17935 return LD;
17937 // Reverse the bytes on LE.
17938 SmallVector<int, 16> RevMask;
17939 for (int Idx = 0; Idx < 16; Idx++)
17940 RevMask.push_back(15 - Idx);
17941 return Builder.CreateShuffleVector(LD, LD, RevMask);
17944 llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx);
17945 llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr
17946 : Intrinsic::ppc_altivec_lvsl);
17947 llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm);
17948 Value *HiMem = Builder.CreateGEP(
17949 Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1));
17950 Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo");
17951 Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi");
17952 Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1");
17954 Op0 = IsLE ? HiLd : LoLd;
17955 Op1 = IsLE ? LoLd : HiLd;
17956 Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1");
17957 Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType());
17959 if (IsLE) {
17960 SmallVector<int, 16> Consts;
17961 for (int Idx = 0; Idx < 16; Idx++) {
17962 int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1)
17963 : 16 - (NumBytes - Idx);
17964 Consts.push_back(Val);
17966 return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy),
17967 Zero, Consts);
17969 SmallVector<Constant *, 16> Consts;
17970 for (int Idx = 0; Idx < 16; Idx++)
17971 Consts.push_back(Builder.getInt8(NumBytes + Idx));
17972 Value *Mask2 = ConstantVector::get(Consts);
17973 return Builder.CreateBitCast(
17974 Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy);
17976 case PPC::BI__builtin_vsx_strmb: {
17977 Value *Op0 = EmitScalarExpr(E->getArg(0));
17978 Value *Op1 = EmitScalarExpr(E->getArg(1));
17979 Value *Op2 = EmitScalarExpr(E->getArg(2));
17980 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
17981 bool IsLE = getTarget().isLittleEndian();
17982 auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) {
17983 // Storing the whole vector, simply store it on BE and reverse bytes and
17984 // store on LE.
17985 if (Width == 16) {
17986 Value *StVec = Op2;
17987 if (IsLE) {
17988 SmallVector<int, 16> RevMask;
17989 for (int Idx = 0; Idx < 16; Idx++)
17990 RevMask.push_back(15 - Idx);
17991 StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask);
17993 return Builder.CreateStore(
17994 StVec, Address(Op0, Op2->getType(), CharUnits::fromQuantity(1)));
17996 auto *ConvTy = Int64Ty;
17997 unsigned NumElts = 0;
17998 switch (Width) {
17999 default:
18000 llvm_unreachable("width for stores must be a power of 2");
18001 case 8:
18002 ConvTy = Int64Ty;
18003 NumElts = 2;
18004 break;
18005 case 4:
18006 ConvTy = Int32Ty;
18007 NumElts = 4;
18008 break;
18009 case 2:
18010 ConvTy = Int16Ty;
18011 NumElts = 8;
18012 break;
18013 case 1:
18014 ConvTy = Int8Ty;
18015 NumElts = 16;
18016 break;
18018 Value *Vec = Builder.CreateBitCast(
18019 Op2, llvm::FixedVectorType::get(ConvTy, NumElts));
18020 Value *Ptr =
18021 Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset));
18022 Value *Elt = Builder.CreateExtractElement(Vec, EltNo);
18023 if (IsLE && Width > 1) {
18024 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy);
18025 Elt = Builder.CreateCall(F, Elt);
18027 return Builder.CreateStore(
18028 Elt, Address(Ptr, ConvTy, CharUnits::fromQuantity(1)));
18030 unsigned Stored = 0;
18031 unsigned RemainingBytes = NumBytes;
18032 Value *Result;
18033 if (NumBytes == 16)
18034 return StoreSubVec(16, 0, 0);
18035 if (NumBytes >= 8) {
18036 Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1);
18037 RemainingBytes -= 8;
18038 Stored += 8;
18040 if (RemainingBytes >= 4) {
18041 Result = StoreSubVec(4, NumBytes - Stored - 4,
18042 IsLE ? (Stored >> 2) : 3 - (Stored >> 2));
18043 RemainingBytes -= 4;
18044 Stored += 4;
18046 if (RemainingBytes >= 2) {
18047 Result = StoreSubVec(2, NumBytes - Stored - 2,
18048 IsLE ? (Stored >> 1) : 7 - (Stored >> 1));
18049 RemainingBytes -= 2;
18050 Stored += 2;
18052 if (RemainingBytes)
18053 Result =
18054 StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored);
18055 return Result;
18057 // Square root
18058 case PPC::BI__builtin_vsx_xvsqrtsp:
18059 case PPC::BI__builtin_vsx_xvsqrtdp: {
18060 llvm::Type *ResultType = ConvertType(E->getType());
18061 Value *X = EmitScalarExpr(E->getArg(0));
18062 if (Builder.getIsFPConstrained()) {
18063 llvm::Function *F = CGM.getIntrinsic(
18064 Intrinsic::experimental_constrained_sqrt, ResultType);
18065 return Builder.CreateConstrainedFPCall(F, X);
18066 } else {
18067 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
18068 return Builder.CreateCall(F, X);
18071 // Count leading zeros
18072 case PPC::BI__builtin_altivec_vclzb:
18073 case PPC::BI__builtin_altivec_vclzh:
18074 case PPC::BI__builtin_altivec_vclzw:
18075 case PPC::BI__builtin_altivec_vclzd: {
18076 llvm::Type *ResultType = ConvertType(E->getType());
18077 Value *X = EmitScalarExpr(E->getArg(0));
18078 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
18079 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
18080 return Builder.CreateCall(F, {X, Undef});
18082 case PPC::BI__builtin_altivec_vctzb:
18083 case PPC::BI__builtin_altivec_vctzh:
18084 case PPC::BI__builtin_altivec_vctzw:
18085 case PPC::BI__builtin_altivec_vctzd: {
18086 llvm::Type *ResultType = ConvertType(E->getType());
18087 Value *X = EmitScalarExpr(E->getArg(0));
18088 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
18089 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
18090 return Builder.CreateCall(F, {X, Undef});
18092 case PPC::BI__builtin_altivec_vinsd:
18093 case PPC::BI__builtin_altivec_vinsw:
18094 case PPC::BI__builtin_altivec_vinsd_elt:
18095 case PPC::BI__builtin_altivec_vinsw_elt: {
18096 llvm::Type *ResultType = ConvertType(E->getType());
18097 Value *Op0 = EmitScalarExpr(E->getArg(0));
18098 Value *Op1 = EmitScalarExpr(E->getArg(1));
18099 Value *Op2 = EmitScalarExpr(E->getArg(2));
18101 bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
18102 BuiltinID == PPC::BI__builtin_altivec_vinsd);
18104 bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
18105 BuiltinID == PPC::BI__builtin_altivec_vinsw_elt);
18107 // The third argument must be a compile time constant.
18108 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18109 assert(ArgCI &&
18110 "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
18112 // Valid value for the third argument is dependent on the input type and
18113 // builtin called.
18114 int ValidMaxValue = 0;
18115 if (IsUnaligned)
18116 ValidMaxValue = (Is32bit) ? 12 : 8;
18117 else
18118 ValidMaxValue = (Is32bit) ? 3 : 1;
18120 // Get value of third argument.
18121 int64_t ConstArg = ArgCI->getSExtValue();
18123 // Compose range checking error message.
18124 std::string RangeErrMsg = IsUnaligned ? "byte" : "element";
18125 RangeErrMsg += " number " + llvm::to_string(ConstArg);
18126 RangeErrMsg += " is outside of the valid range [0, ";
18127 RangeErrMsg += llvm::to_string(ValidMaxValue) + "]";
18129 // Issue error if third argument is not within the valid range.
18130 if (ConstArg < 0 || ConstArg > ValidMaxValue)
18131 CGM.Error(E->getExprLoc(), RangeErrMsg);
18133 // Input to vec_replace_elt is an element index, convert to byte index.
18134 if (!IsUnaligned) {
18135 ConstArg *= Is32bit ? 4 : 8;
18136 // Fix the constant according to endianess.
18137 if (getTarget().isLittleEndian())
18138 ConstArg = (Is32bit ? 12 : 8) - ConstArg;
18141 ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd;
18142 Op2 = ConstantInt::getSigned(Int32Ty, ConstArg);
18143 // Casting input to vector int as per intrinsic definition.
18144 Op0 =
18145 Is32bit
18146 ? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4))
18147 : Builder.CreateBitCast(Op0,
18148 llvm::FixedVectorType::get(Int64Ty, 2));
18149 return Builder.CreateBitCast(
18150 Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType);
18152 case PPC::BI__builtin_altivec_vadduqm:
18153 case PPC::BI__builtin_altivec_vsubuqm: {
18154 Value *Op0 = EmitScalarExpr(E->getArg(0));
18155 Value *Op1 = EmitScalarExpr(E->getArg(1));
18156 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
18157 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1));
18158 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1));
18159 if (BuiltinID == PPC::BI__builtin_altivec_vadduqm)
18160 return Builder.CreateAdd(Op0, Op1, "vadduqm");
18161 else
18162 return Builder.CreateSub(Op0, Op1, "vsubuqm");
18164 case PPC::BI__builtin_altivec_vaddcuq_c:
18165 case PPC::BI__builtin_altivec_vsubcuq_c: {
18166 SmallVector<Value *, 2> Ops;
18167 Value *Op0 = EmitScalarExpr(E->getArg(0));
18168 Value *Op1 = EmitScalarExpr(E->getArg(1));
18169 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
18170 llvm::IntegerType::get(getLLVMContext(), 128), 1);
18171 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
18172 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
18173 ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c)
18174 ? Intrinsic::ppc_altivec_vaddcuq
18175 : Intrinsic::ppc_altivec_vsubcuq;
18176 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
18178 case PPC::BI__builtin_altivec_vaddeuqm_c:
18179 case PPC::BI__builtin_altivec_vaddecuq_c:
18180 case PPC::BI__builtin_altivec_vsubeuqm_c:
18181 case PPC::BI__builtin_altivec_vsubecuq_c: {
18182 SmallVector<Value *, 3> Ops;
18183 Value *Op0 = EmitScalarExpr(E->getArg(0));
18184 Value *Op1 = EmitScalarExpr(E->getArg(1));
18185 Value *Op2 = EmitScalarExpr(E->getArg(2));
18186 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
18187 llvm::IntegerType::get(getLLVMContext(), 128), 1);
18188 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
18189 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
18190 Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty));
18191 switch (BuiltinID) {
18192 default:
18193 llvm_unreachable("Unsupported intrinsic!");
18194 case PPC::BI__builtin_altivec_vaddeuqm_c:
18195 ID = Intrinsic::ppc_altivec_vaddeuqm;
18196 break;
18197 case PPC::BI__builtin_altivec_vaddecuq_c:
18198 ID = Intrinsic::ppc_altivec_vaddecuq;
18199 break;
18200 case PPC::BI__builtin_altivec_vsubeuqm_c:
18201 ID = Intrinsic::ppc_altivec_vsubeuqm;
18202 break;
18203 case PPC::BI__builtin_altivec_vsubecuq_c:
18204 ID = Intrinsic::ppc_altivec_vsubecuq;
18205 break;
18207 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
18209 case PPC::BI__builtin_ppc_rldimi:
18210 case PPC::BI__builtin_ppc_rlwimi: {
18211 Value *Op0 = EmitScalarExpr(E->getArg(0));
18212 Value *Op1 = EmitScalarExpr(E->getArg(1));
18213 Value *Op2 = EmitScalarExpr(E->getArg(2));
18214 Value *Op3 = EmitScalarExpr(E->getArg(3));
18215 // rldimi is 64-bit instruction, expand the intrinsic before isel to
18216 // leverage peephole and avoid legalization efforts.
18217 if (BuiltinID == PPC::BI__builtin_ppc_rldimi &&
18218 !getTarget().getTriple().isPPC64()) {
18219 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Op0->getType());
18220 Op2 = Builder.CreateZExt(Op2, Int64Ty);
18221 Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
18222 return Builder.CreateOr(Builder.CreateAnd(Shift, Op3),
18223 Builder.CreateAnd(Op1, Builder.CreateNot(Op3)));
18225 return Builder.CreateCall(
18226 CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi
18227 ? Intrinsic::ppc_rldimi
18228 : Intrinsic::ppc_rlwimi),
18229 {Op0, Op1, Op2, Op3});
18231 case PPC::BI__builtin_ppc_rlwnm: {
18232 Value *Op0 = EmitScalarExpr(E->getArg(0));
18233 Value *Op1 = EmitScalarExpr(E->getArg(1));
18234 Value *Op2 = EmitScalarExpr(E->getArg(2));
18235 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm),
18236 {Op0, Op1, Op2});
18238 case PPC::BI__builtin_ppc_poppar4:
18239 case PPC::BI__builtin_ppc_poppar8: {
18240 Value *Op0 = EmitScalarExpr(E->getArg(0));
18241 llvm::Type *ArgType = Op0->getType();
18242 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
18243 Value *Tmp = Builder.CreateCall(F, Op0);
18245 llvm::Type *ResultType = ConvertType(E->getType());
18246 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
18247 if (Result->getType() != ResultType)
18248 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
18249 "cast");
18250 return Result;
18252 case PPC::BI__builtin_ppc_cmpb: {
18253 Value *Op0 = EmitScalarExpr(E->getArg(0));
18254 Value *Op1 = EmitScalarExpr(E->getArg(1));
18255 if (getTarget().getTriple().isPPC64()) {
18256 Function *F =
18257 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty});
18258 return Builder.CreateCall(F, {Op0, Op1}, "cmpb");
18260 // For 32 bit, emit the code as below:
18261 // %conv = trunc i64 %a to i32
18262 // %conv1 = trunc i64 %b to i32
18263 // %shr = lshr i64 %a, 32
18264 // %conv2 = trunc i64 %shr to i32
18265 // %shr3 = lshr i64 %b, 32
18266 // %conv4 = trunc i64 %shr3 to i32
18267 // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)
18268 // %conv5 = zext i32 %0 to i64
18269 // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)
18270 // %conv614 = zext i32 %1 to i64
18271 // %shl = shl nuw i64 %conv614, 32
18272 // %or = or i64 %shl, %conv5
18273 // ret i64 %or
18274 Function *F =
18275 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty});
18276 Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty);
18277 Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty);
18278 Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32);
18279 Value *ArgOneHi =
18280 Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty);
18281 Value *ArgTwoHi =
18282 Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty);
18283 Value *ResLo = Builder.CreateZExt(
18284 Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty);
18285 Value *ResHiShift = Builder.CreateZExt(
18286 Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty);
18287 Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt);
18288 return Builder.CreateOr(ResLo, ResHi);
18290 // Copy sign
18291 case PPC::BI__builtin_vsx_xvcpsgnsp:
18292 case PPC::BI__builtin_vsx_xvcpsgndp: {
18293 llvm::Type *ResultType = ConvertType(E->getType());
18294 Value *X = EmitScalarExpr(E->getArg(0));
18295 Value *Y = EmitScalarExpr(E->getArg(1));
18296 ID = Intrinsic::copysign;
18297 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
18298 return Builder.CreateCall(F, {X, Y});
18300 // Rounding/truncation
18301 case PPC::BI__builtin_vsx_xvrspip:
18302 case PPC::BI__builtin_vsx_xvrdpip:
18303 case PPC::BI__builtin_vsx_xvrdpim:
18304 case PPC::BI__builtin_vsx_xvrspim:
18305 case PPC::BI__builtin_vsx_xvrdpi:
18306 case PPC::BI__builtin_vsx_xvrspi:
18307 case PPC::BI__builtin_vsx_xvrdpic:
18308 case PPC::BI__builtin_vsx_xvrspic:
18309 case PPC::BI__builtin_vsx_xvrdpiz:
18310 case PPC::BI__builtin_vsx_xvrspiz: {
18311 llvm::Type *ResultType = ConvertType(E->getType());
18312 Value *X = EmitScalarExpr(E->getArg(0));
18313 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
18314 BuiltinID == PPC::BI__builtin_vsx_xvrspim)
18315 ID = Builder.getIsFPConstrained()
18316 ? Intrinsic::experimental_constrained_floor
18317 : Intrinsic::floor;
18318 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
18319 BuiltinID == PPC::BI__builtin_vsx_xvrspi)
18320 ID = Builder.getIsFPConstrained()
18321 ? Intrinsic::experimental_constrained_round
18322 : Intrinsic::round;
18323 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
18324 BuiltinID == PPC::BI__builtin_vsx_xvrspic)
18325 ID = Builder.getIsFPConstrained()
18326 ? Intrinsic::experimental_constrained_rint
18327 : Intrinsic::rint;
18328 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
18329 BuiltinID == PPC::BI__builtin_vsx_xvrspip)
18330 ID = Builder.getIsFPConstrained()
18331 ? Intrinsic::experimental_constrained_ceil
18332 : Intrinsic::ceil;
18333 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
18334 BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
18335 ID = Builder.getIsFPConstrained()
18336 ? Intrinsic::experimental_constrained_trunc
18337 : Intrinsic::trunc;
18338 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
18339 return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X)
18340 : Builder.CreateCall(F, X);
18343 // Absolute value
18344 case PPC::BI__builtin_vsx_xvabsdp:
18345 case PPC::BI__builtin_vsx_xvabssp: {
18346 llvm::Type *ResultType = ConvertType(E->getType());
18347 Value *X = EmitScalarExpr(E->getArg(0));
18348 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
18349 return Builder.CreateCall(F, X);
18352 // Fastmath by default
18353 case PPC::BI__builtin_ppc_recipdivf:
18354 case PPC::BI__builtin_ppc_recipdivd:
18355 case PPC::BI__builtin_ppc_rsqrtf:
18356 case PPC::BI__builtin_ppc_rsqrtd: {
18357 FastMathFlags FMF = Builder.getFastMathFlags();
18358 Builder.getFastMathFlags().setFast();
18359 llvm::Type *ResultType = ConvertType(E->getType());
18360 Value *X = EmitScalarExpr(E->getArg(0));
18362 if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||
18363 BuiltinID == PPC::BI__builtin_ppc_recipdivd) {
18364 Value *Y = EmitScalarExpr(E->getArg(1));
18365 Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv");
18366 Builder.getFastMathFlags() &= (FMF);
18367 return FDiv;
18369 auto *One = ConstantFP::get(ResultType, 1.0);
18370 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
18371 Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt");
18372 Builder.getFastMathFlags() &= (FMF);
18373 return FDiv;
18375 case PPC::BI__builtin_ppc_alignx: {
18376 Value *Op0 = EmitScalarExpr(E->getArg(0));
18377 Value *Op1 = EmitScalarExpr(E->getArg(1));
18378 ConstantInt *AlignmentCI = cast<ConstantInt>(Op0);
18379 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
18380 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
18381 llvm::Value::MaximumAlignment);
18383 emitAlignmentAssumption(Op1, E->getArg(1),
18384 /*The expr loc is sufficient.*/ SourceLocation(),
18385 AlignmentCI, nullptr);
18386 return Op1;
18388 case PPC::BI__builtin_ppc_rdlam: {
18389 Value *Op0 = EmitScalarExpr(E->getArg(0));
18390 Value *Op1 = EmitScalarExpr(E->getArg(1));
18391 Value *Op2 = EmitScalarExpr(E->getArg(2));
18392 llvm::Type *Ty = Op0->getType();
18393 Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false);
18394 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
18395 Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt});
18396 return Builder.CreateAnd(Rotate, Op2);
18398 case PPC::BI__builtin_ppc_load2r: {
18399 Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);
18400 Value *Op0 = EmitScalarExpr(E->getArg(0));
18401 Value *LoadIntrinsic = Builder.CreateCall(F, {Op0});
18402 return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);
18404 // FMA variations
18405 case PPC::BI__builtin_ppc_fnmsub:
18406 case PPC::BI__builtin_ppc_fnmsubs:
18407 case PPC::BI__builtin_vsx_xvmaddadp:
18408 case PPC::BI__builtin_vsx_xvmaddasp:
18409 case PPC::BI__builtin_vsx_xvnmaddadp:
18410 case PPC::BI__builtin_vsx_xvnmaddasp:
18411 case PPC::BI__builtin_vsx_xvmsubadp:
18412 case PPC::BI__builtin_vsx_xvmsubasp:
18413 case PPC::BI__builtin_vsx_xvnmsubadp:
18414 case PPC::BI__builtin_vsx_xvnmsubasp: {
18415 llvm::Type *ResultType = ConvertType(E->getType());
18416 Value *X = EmitScalarExpr(E->getArg(0));
18417 Value *Y = EmitScalarExpr(E->getArg(1));
18418 Value *Z = EmitScalarExpr(E->getArg(2));
18419 llvm::Function *F;
18420 if (Builder.getIsFPConstrained())
18421 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
18422 else
18423 F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
18424 switch (BuiltinID) {
18425 case PPC::BI__builtin_vsx_xvmaddadp:
18426 case PPC::BI__builtin_vsx_xvmaddasp:
18427 if (Builder.getIsFPConstrained())
18428 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
18429 else
18430 return Builder.CreateCall(F, {X, Y, Z});
18431 case PPC::BI__builtin_vsx_xvnmaddadp:
18432 case PPC::BI__builtin_vsx_xvnmaddasp:
18433 if (Builder.getIsFPConstrained())
18434 return Builder.CreateFNeg(
18435 Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
18436 else
18437 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
18438 case PPC::BI__builtin_vsx_xvmsubadp:
18439 case PPC::BI__builtin_vsx_xvmsubasp:
18440 if (Builder.getIsFPConstrained())
18441 return Builder.CreateConstrainedFPCall(
18442 F, {X, Y, Builder.CreateFNeg(Z, "neg")});
18443 else
18444 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
18445 case PPC::BI__builtin_ppc_fnmsub:
18446 case PPC::BI__builtin_ppc_fnmsubs:
18447 case PPC::BI__builtin_vsx_xvnmsubadp:
18448 case PPC::BI__builtin_vsx_xvnmsubasp:
18449 if (Builder.getIsFPConstrained())
18450 return Builder.CreateFNeg(
18451 Builder.CreateConstrainedFPCall(
18452 F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
18453 "neg");
18454 else
18455 return Builder.CreateCall(
18456 CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z});
18458 llvm_unreachable("Unknown FMA operation");
18459 return nullptr; // Suppress no-return warning
18462 case PPC::BI__builtin_vsx_insertword: {
18463 Value *Op0 = EmitScalarExpr(E->getArg(0));
18464 Value *Op1 = EmitScalarExpr(E->getArg(1));
18465 Value *Op2 = EmitScalarExpr(E->getArg(2));
18466 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
18468 // Third argument is a compile time constant int. It must be clamped to
18469 // to the range [0, 12].
18470 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18471 assert(ArgCI &&
18472 "Third arg to xxinsertw intrinsic must be constant integer");
18473 const int64_t MaxIndex = 12;
18474 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
18476 // The builtin semantics don't exactly match the xxinsertw instructions
18477 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
18478 // word from the first argument, and inserts it in the second argument. The
18479 // instruction extracts the word from its second input register and inserts
18480 // it into its first input register, so swap the first and second arguments.
18481 std::swap(Op0, Op1);
18483 // Need to cast the second argument from a vector of unsigned int to a
18484 // vector of long long.
18485 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
18487 if (getTarget().isLittleEndian()) {
18488 // Reverse the double words in the vector we will extract from.
18489 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
18490 Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef<int>{1, 0});
18492 // Reverse the index.
18493 Index = MaxIndex - Index;
18496 // Intrinsic expects the first arg to be a vector of int.
18497 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
18498 Op2 = ConstantInt::getSigned(Int32Ty, Index);
18499 return Builder.CreateCall(F, {Op0, Op1, Op2});
18502 case PPC::BI__builtin_vsx_extractuword: {
18503 Value *Op0 = EmitScalarExpr(E->getArg(0));
18504 Value *Op1 = EmitScalarExpr(E->getArg(1));
18505 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
18507 // Intrinsic expects the first argument to be a vector of doublewords.
18508 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
18510 // The second argument is a compile time constant int that needs to
18511 // be clamped to the range [0, 12].
18512 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1);
18513 assert(ArgCI &&
18514 "Second Arg to xxextractuw intrinsic must be a constant integer!");
18515 const int64_t MaxIndex = 12;
18516 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
18518 if (getTarget().isLittleEndian()) {
18519 // Reverse the index.
18520 Index = MaxIndex - Index;
18521 Op1 = ConstantInt::getSigned(Int32Ty, Index);
18523 // Emit the call, then reverse the double words of the results vector.
18524 Value *Call = Builder.CreateCall(F, {Op0, Op1});
18526 Value *ShuffleCall =
18527 Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0});
18528 return ShuffleCall;
18529 } else {
18530 Op1 = ConstantInt::getSigned(Int32Ty, Index);
18531 return Builder.CreateCall(F, {Op0, Op1});
18535 case PPC::BI__builtin_vsx_xxpermdi: {
18536 Value *Op0 = EmitScalarExpr(E->getArg(0));
18537 Value *Op1 = EmitScalarExpr(E->getArg(1));
18538 Value *Op2 = EmitScalarExpr(E->getArg(2));
18539 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18540 assert(ArgCI && "Third arg must be constant integer!");
18542 unsigned Index = ArgCI->getZExtValue();
18543 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
18544 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
18546 // Account for endianness by treating this as just a shuffle. So we use the
18547 // same indices for both LE and BE in order to produce expected results in
18548 // both cases.
18549 int ElemIdx0 = (Index & 2) >> 1;
18550 int ElemIdx1 = 2 + (Index & 1);
18552 int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
18553 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
18554 QualType BIRetType = E->getType();
18555 auto RetTy = ConvertType(BIRetType);
18556 return Builder.CreateBitCast(ShuffleCall, RetTy);
18559 case PPC::BI__builtin_vsx_xxsldwi: {
18560 Value *Op0 = EmitScalarExpr(E->getArg(0));
18561 Value *Op1 = EmitScalarExpr(E->getArg(1));
18562 Value *Op2 = EmitScalarExpr(E->getArg(2));
18563 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18564 assert(ArgCI && "Third argument must be a compile time constant");
18565 unsigned Index = ArgCI->getZExtValue() & 0x3;
18566 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
18567 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4));
18569 // Create a shuffle mask
18570 int ElemIdx0;
18571 int ElemIdx1;
18572 int ElemIdx2;
18573 int ElemIdx3;
18574 if (getTarget().isLittleEndian()) {
18575 // Little endian element N comes from element 8+N-Index of the
18576 // concatenated wide vector (of course, using modulo arithmetic on
18577 // the total number of elements).
18578 ElemIdx0 = (8 - Index) % 8;
18579 ElemIdx1 = (9 - Index) % 8;
18580 ElemIdx2 = (10 - Index) % 8;
18581 ElemIdx3 = (11 - Index) % 8;
18582 } else {
18583 // Big endian ElemIdx<N> = Index + N
18584 ElemIdx0 = Index;
18585 ElemIdx1 = Index + 1;
18586 ElemIdx2 = Index + 2;
18587 ElemIdx3 = Index + 3;
18590 int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
18591 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
18592 QualType BIRetType = E->getType();
18593 auto RetTy = ConvertType(BIRetType);
18594 return Builder.CreateBitCast(ShuffleCall, RetTy);
18597 case PPC::BI__builtin_pack_vector_int128: {
18598 Value *Op0 = EmitScalarExpr(E->getArg(0));
18599 Value *Op1 = EmitScalarExpr(E->getArg(1));
18600 bool isLittleEndian = getTarget().isLittleEndian();
18601 Value *PoisonValue =
18602 llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2));
18603 Value *Res = Builder.CreateInsertElement(
18604 PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0));
18605 Res = Builder.CreateInsertElement(Res, Op1,
18606 (uint64_t)(isLittleEndian ? 0 : 1));
18607 return Builder.CreateBitCast(Res, ConvertType(E->getType()));
18610 case PPC::BI__builtin_unpack_vector_int128: {
18611 Value *Op0 = EmitScalarExpr(E->getArg(0));
18612 Value *Op1 = EmitScalarExpr(E->getArg(1));
18613 ConstantInt *Index = cast<ConstantInt>(Op1);
18614 Value *Unpacked = Builder.CreateBitCast(
18615 Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
18617 if (getTarget().isLittleEndian())
18618 Index =
18619 ConstantInt::get(Index->getIntegerType(), 1 - Index->getZExtValue());
18621 return Builder.CreateExtractElement(Unpacked, Index);
18624 case PPC::BI__builtin_ppc_sthcx: {
18625 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx);
18626 Value *Op0 = EmitScalarExpr(E->getArg(0));
18627 Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty);
18628 return Builder.CreateCall(F, {Op0, Op1});
18631 // The PPC MMA builtins take a pointer to a __vector_quad as an argument.
18632 // Some of the MMA instructions accumulate their result into an existing
18633 // accumulator whereas the others generate a new accumulator. So we need to
18634 // use custom code generation to expand a builtin call with a pointer to a
18635 // load (if the corresponding instruction accumulates its result) followed by
18636 // the call to the intrinsic and a store of the result.
18637 #define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \
18638 case PPC::BI__builtin_##Name:
18639 #include "clang/Basic/BuiltinsPPC.def"
18641 SmallVector<Value *, 4> Ops;
18642 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
18643 if (E->getArg(i)->getType()->isArrayType())
18644 Ops.push_back(
18645 EmitArrayToPointerDecay(E->getArg(i)).emitRawPointer(*this));
18646 else
18647 Ops.push_back(EmitScalarExpr(E->getArg(i)));
18648 // The first argument of these two builtins is a pointer used to store their
18649 // result. However, the llvm intrinsics return their result in multiple
18650 // return values. So, here we emit code extracting these values from the
18651 // intrinsic results and storing them using that pointer.
18652 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||
18653 BuiltinID == PPC::BI__builtin_vsx_disassemble_pair ||
18654 BuiltinID == PPC::BI__builtin_mma_disassemble_pair) {
18655 unsigned NumVecs = 2;
18656 auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
18657 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
18658 NumVecs = 4;
18659 Intrinsic = Intrinsic::ppc_mma_disassemble_acc;
18661 llvm::Function *F = CGM.getIntrinsic(Intrinsic);
18662 Address Addr = EmitPointerWithAlignment(E->getArg(1));
18663 Value *Vec = Builder.CreateLoad(Addr);
18664 Value *Call = Builder.CreateCall(F, {Vec});
18665 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16);
18666 Value *Ptr = Ops[0];
18667 for (unsigned i=0; i<NumVecs; i++) {
18668 Value *Vec = Builder.CreateExtractValue(Call, i);
18669 llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i);
18670 Value *GEP = Builder.CreateInBoundsGEP(VTy, Ptr, Index);
18671 Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16));
18673 return Call;
18675 if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||
18676 BuiltinID == PPC::BI__builtin_mma_build_acc) {
18677 // Reverse the order of the operands for LE, so the
18678 // same builtin call can be used on both LE and BE
18679 // without the need for the programmer to swap operands.
18680 // The operands are reversed starting from the second argument,
18681 // the first operand is the pointer to the pair/accumulator
18682 // that is being built.
18683 if (getTarget().isLittleEndian())
18684 std::reverse(Ops.begin() + 1, Ops.end());
18686 bool Accumulate;
18687 switch (BuiltinID) {
18688 #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \
18689 case PPC::BI__builtin_##Name: \
18690 ID = Intrinsic::ppc_##Intr; \
18691 Accumulate = Acc; \
18692 break;
18693 #include "clang/Basic/BuiltinsPPC.def"
18695 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
18696 BuiltinID == PPC::BI__builtin_vsx_stxvp ||
18697 BuiltinID == PPC::BI__builtin_mma_lxvp ||
18698 BuiltinID == PPC::BI__builtin_mma_stxvp) {
18699 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
18700 BuiltinID == PPC::BI__builtin_mma_lxvp) {
18701 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
18702 } else {
18703 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
18705 Ops.pop_back();
18706 llvm::Function *F = CGM.getIntrinsic(ID);
18707 return Builder.CreateCall(F, Ops, "");
18709 SmallVector<Value*, 4> CallOps;
18710 if (Accumulate) {
18711 Address Addr = EmitPointerWithAlignment(E->getArg(0));
18712 Value *Acc = Builder.CreateLoad(Addr);
18713 CallOps.push_back(Acc);
18715 for (unsigned i=1; i<Ops.size(); i++)
18716 CallOps.push_back(Ops[i]);
18717 llvm::Function *F = CGM.getIntrinsic(ID);
18718 Value *Call = Builder.CreateCall(F, CallOps);
18719 return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign());
18722 case PPC::BI__builtin_ppc_compare_and_swap:
18723 case PPC::BI__builtin_ppc_compare_and_swaplp: {
18724 Address Addr = EmitPointerWithAlignment(E->getArg(0));
18725 Address OldValAddr = EmitPointerWithAlignment(E->getArg(1));
18726 Value *OldVal = Builder.CreateLoad(OldValAddr);
18727 QualType AtomicTy = E->getArg(0)->getType()->getPointeeType();
18728 LValue LV = MakeAddrLValue(Addr, AtomicTy);
18729 Value *Op2 = EmitScalarExpr(E->getArg(2));
18730 auto Pair = EmitAtomicCompareExchange(
18731 LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(),
18732 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true);
18733 // Unlike c11's atomic_compare_exchange, according to
18734 // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
18735 // > In either case, the contents of the memory location specified by addr
18736 // > are copied into the memory location specified by old_val_addr.
18737 // But it hasn't specified storing to OldValAddr is atomic or not and
18738 // which order to use. Now following XL's codegen, treat it as a normal
18739 // store.
18740 Value *LoadedVal = Pair.first.getScalarVal();
18741 Builder.CreateStore(LoadedVal, OldValAddr);
18742 return Builder.CreateZExt(Pair.second, Builder.getInt32Ty());
18744 case PPC::BI__builtin_ppc_fetch_and_add:
18745 case PPC::BI__builtin_ppc_fetch_and_addlp: {
18746 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
18747 llvm::AtomicOrdering::Monotonic);
18749 case PPC::BI__builtin_ppc_fetch_and_and:
18750 case PPC::BI__builtin_ppc_fetch_and_andlp: {
18751 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
18752 llvm::AtomicOrdering::Monotonic);
18755 case PPC::BI__builtin_ppc_fetch_and_or:
18756 case PPC::BI__builtin_ppc_fetch_and_orlp: {
18757 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
18758 llvm::AtomicOrdering::Monotonic);
18760 case PPC::BI__builtin_ppc_fetch_and_swap:
18761 case PPC::BI__builtin_ppc_fetch_and_swaplp: {
18762 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
18763 llvm::AtomicOrdering::Monotonic);
18765 case PPC::BI__builtin_ppc_ldarx:
18766 case PPC::BI__builtin_ppc_lwarx:
18767 case PPC::BI__builtin_ppc_lharx:
18768 case PPC::BI__builtin_ppc_lbarx:
18769 return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E);
18770 case PPC::BI__builtin_ppc_mfspr: {
18771 Value *Op0 = EmitScalarExpr(E->getArg(0));
18772 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
18773 ? Int32Ty
18774 : Int64Ty;
18775 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType);
18776 return Builder.CreateCall(F, {Op0});
18778 case PPC::BI__builtin_ppc_mtspr: {
18779 Value *Op0 = EmitScalarExpr(E->getArg(0));
18780 Value *Op1 = EmitScalarExpr(E->getArg(1));
18781 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
18782 ? Int32Ty
18783 : Int64Ty;
18784 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType);
18785 return Builder.CreateCall(F, {Op0, Op1});
18787 case PPC::BI__builtin_ppc_popcntb: {
18788 Value *ArgValue = EmitScalarExpr(E->getArg(0));
18789 llvm::Type *ArgType = ArgValue->getType();
18790 Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType});
18791 return Builder.CreateCall(F, {ArgValue}, "popcntb");
18793 case PPC::BI__builtin_ppc_mtfsf: {
18794 // The builtin takes a uint32 that needs to be cast to an
18795 // f64 to be passed to the intrinsic.
18796 Value *Op0 = EmitScalarExpr(E->getArg(0));
18797 Value *Op1 = EmitScalarExpr(E->getArg(1));
18798 Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy);
18799 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf);
18800 return Builder.CreateCall(F, {Op0, Cast}, "");
18803 case PPC::BI__builtin_ppc_swdiv_nochk:
18804 case PPC::BI__builtin_ppc_swdivs_nochk: {
18805 Value *Op0 = EmitScalarExpr(E->getArg(0));
18806 Value *Op1 = EmitScalarExpr(E->getArg(1));
18807 FastMathFlags FMF = Builder.getFastMathFlags();
18808 Builder.getFastMathFlags().setFast();
18809 Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk");
18810 Builder.getFastMathFlags() &= (FMF);
18811 return FDiv;
18813 case PPC::BI__builtin_ppc_fric:
18814 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
18815 *this, E, Intrinsic::rint,
18816 Intrinsic::experimental_constrained_rint))
18817 .getScalarVal();
18818 case PPC::BI__builtin_ppc_frim:
18819 case PPC::BI__builtin_ppc_frims:
18820 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
18821 *this, E, Intrinsic::floor,
18822 Intrinsic::experimental_constrained_floor))
18823 .getScalarVal();
18824 case PPC::BI__builtin_ppc_frin:
18825 case PPC::BI__builtin_ppc_frins:
18826 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
18827 *this, E, Intrinsic::round,
18828 Intrinsic::experimental_constrained_round))
18829 .getScalarVal();
18830 case PPC::BI__builtin_ppc_frip:
18831 case PPC::BI__builtin_ppc_frips:
18832 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
18833 *this, E, Intrinsic::ceil,
18834 Intrinsic::experimental_constrained_ceil))
18835 .getScalarVal();
18836 case PPC::BI__builtin_ppc_friz:
18837 case PPC::BI__builtin_ppc_frizs:
18838 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
18839 *this, E, Intrinsic::trunc,
18840 Intrinsic::experimental_constrained_trunc))
18841 .getScalarVal();
18842 case PPC::BI__builtin_ppc_fsqrt:
18843 case PPC::BI__builtin_ppc_fsqrts:
18844 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
18845 *this, E, Intrinsic::sqrt,
18846 Intrinsic::experimental_constrained_sqrt))
18847 .getScalarVal();
18848 case PPC::BI__builtin_ppc_test_data_class: {
18849 Value *Op0 = EmitScalarExpr(E->getArg(0));
18850 Value *Op1 = EmitScalarExpr(E->getArg(1));
18851 return Builder.CreateCall(
18852 CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()),
18853 {Op0, Op1}, "test_data_class");
18855 case PPC::BI__builtin_ppc_maxfe: {
18856 Value *Op0 = EmitScalarExpr(E->getArg(0));
18857 Value *Op1 = EmitScalarExpr(E->getArg(1));
18858 Value *Op2 = EmitScalarExpr(E->getArg(2));
18859 Value *Op3 = EmitScalarExpr(E->getArg(3));
18860 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe),
18861 {Op0, Op1, Op2, Op3});
18863 case PPC::BI__builtin_ppc_maxfl: {
18864 Value *Op0 = EmitScalarExpr(E->getArg(0));
18865 Value *Op1 = EmitScalarExpr(E->getArg(1));
18866 Value *Op2 = EmitScalarExpr(E->getArg(2));
18867 Value *Op3 = EmitScalarExpr(E->getArg(3));
18868 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl),
18869 {Op0, Op1, Op2, Op3});
18871 case PPC::BI__builtin_ppc_maxfs: {
18872 Value *Op0 = EmitScalarExpr(E->getArg(0));
18873 Value *Op1 = EmitScalarExpr(E->getArg(1));
18874 Value *Op2 = EmitScalarExpr(E->getArg(2));
18875 Value *Op3 = EmitScalarExpr(E->getArg(3));
18876 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs),
18877 {Op0, Op1, Op2, Op3});
18879 case PPC::BI__builtin_ppc_minfe: {
18880 Value *Op0 = EmitScalarExpr(E->getArg(0));
18881 Value *Op1 = EmitScalarExpr(E->getArg(1));
18882 Value *Op2 = EmitScalarExpr(E->getArg(2));
18883 Value *Op3 = EmitScalarExpr(E->getArg(3));
18884 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe),
18885 {Op0, Op1, Op2, Op3});
18887 case PPC::BI__builtin_ppc_minfl: {
18888 Value *Op0 = EmitScalarExpr(E->getArg(0));
18889 Value *Op1 = EmitScalarExpr(E->getArg(1));
18890 Value *Op2 = EmitScalarExpr(E->getArg(2));
18891 Value *Op3 = EmitScalarExpr(E->getArg(3));
18892 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl),
18893 {Op0, Op1, Op2, Op3});
18895 case PPC::BI__builtin_ppc_minfs: {
18896 Value *Op0 = EmitScalarExpr(E->getArg(0));
18897 Value *Op1 = EmitScalarExpr(E->getArg(1));
18898 Value *Op2 = EmitScalarExpr(E->getArg(2));
18899 Value *Op3 = EmitScalarExpr(E->getArg(3));
18900 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs),
18901 {Op0, Op1, Op2, Op3});
18903 case PPC::BI__builtin_ppc_swdiv:
18904 case PPC::BI__builtin_ppc_swdivs: {
18905 Value *Op0 = EmitScalarExpr(E->getArg(0));
18906 Value *Op1 = EmitScalarExpr(E->getArg(1));
18907 return Builder.CreateFDiv(Op0, Op1, "swdiv");
18909 case PPC::BI__builtin_ppc_set_fpscr_rn:
18910 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd),
18911 {EmitScalarExpr(E->getArg(0))});
18912 case PPC::BI__builtin_ppc_mffs:
18913 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm));
18917 namespace {
18918 // If \p E is not null pointer, insert address space cast to match return
18919 // type of \p E if necessary.
18920 Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
18921 const CallExpr *E = nullptr) {
18922 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
18923 auto *Call = CGF.Builder.CreateCall(F);
18924 Call->addRetAttr(
18925 Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
18926 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
18927 if (!E)
18928 return Call;
18929 QualType BuiltinRetType = E->getType();
18930 auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
18931 if (RetTy == Call->getType())
18932 return Call;
18933 return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
18936 Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
18937 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);
18938 auto *Call = CGF.Builder.CreateCall(F);
18939 Call->addRetAttr(
18940 Attribute::getWithDereferenceableBytes(Call->getContext(), 256));
18941 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8)));
18942 return Call;
18945 // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
18946 /// Emit code based on Code Object ABI version.
18947 /// COV_4 : Emit code to use dispatch ptr
18948 /// COV_5+ : Emit code to use implicitarg ptr
18949 /// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
18950 /// and use its value for COV_4 or COV_5+ approach. It is used for
18951 /// compiling device libraries in an ABI-agnostic way.
18953 /// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
18954 /// clang during compilation of user code.
18955 Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
18956 llvm::LoadInst *LD;
18958 auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
18960 if (Cov == CodeObjectVersionKind::COV_None) {
18961 StringRef Name = "__oclc_ABI_version";
18962 auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
18963 if (!ABIVersionC)
18964 ABIVersionC = new llvm::GlobalVariable(
18965 CGF.CGM.getModule(), CGF.Int32Ty, false,
18966 llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,
18967 llvm::GlobalVariable::NotThreadLocal,
18968 CGF.CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant));
18970 // This load will be eliminated by the IPSCCP because it is constant
18971 // weak_odr without externally_initialized. Either changing it to weak or
18972 // adding externally_initialized will keep the load.
18973 Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC,
18974 CGF.CGM.getIntAlign());
18976 Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
18977 ABIVersion,
18978 llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));
18980 // Indexing the implicit kernarg segment.
18981 Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(
18982 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
18984 // Indexing the HSA kernel_dispatch_packet struct.
18985 Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32(
18986 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
18988 auto Result = CGF.Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP);
18989 LD = CGF.Builder.CreateLoad(
18990 Address(Result, CGF.Int16Ty, CharUnits::fromQuantity(2)));
18991 } else {
18992 Value *GEP = nullptr;
18993 if (Cov >= CodeObjectVersionKind::COV_5) {
18994 // Indexing the implicit kernarg segment.
18995 GEP = CGF.Builder.CreateConstGEP1_32(
18996 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
18997 } else {
18998 // Indexing the HSA kernel_dispatch_packet struct.
18999 GEP = CGF.Builder.CreateConstGEP1_32(
19000 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
19002 LD = CGF.Builder.CreateLoad(
19003 Address(GEP, CGF.Int16Ty, CharUnits::fromQuantity(2)));
19006 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
19007 llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
19008 APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
19009 LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
19010 LD->setMetadata(llvm::LLVMContext::MD_noundef,
19011 llvm::MDNode::get(CGF.getLLVMContext(), {}));
19012 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
19013 llvm::MDNode::get(CGF.getLLVMContext(), {}));
19014 return LD;
19017 // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
19018 Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
19019 const unsigned XOffset = 12;
19020 auto *DP = EmitAMDGPUDispatchPtr(CGF);
19021 // Indexing the HSA kernel_dispatch_packet struct.
19022 auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
19023 auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
19024 auto *LD = CGF.Builder.CreateLoad(
19025 Address(GEP, CGF.Int32Ty, CharUnits::fromQuantity(4)));
19027 llvm::MDBuilder MDB(CGF.getLLVMContext());
19029 // Known non-zero.
19030 LD->setMetadata(llvm::LLVMContext::MD_range,
19031 MDB.createRange(APInt(32, 1), APInt::getZero(32)));
19032 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
19033 llvm::MDNode::get(CGF.getLLVMContext(), {}));
19034 return LD;
19036 } // namespace
19038 // For processing memory ordering and memory scope arguments of various
19039 // amdgcn builtins.
19040 // \p Order takes a C++11 comptabile memory-ordering specifier and converts
19041 // it into LLVM's memory ordering specifier using atomic C ABI, and writes
19042 // to \p AO. \p Scope takes a const char * and converts it into AMDGCN
19043 // specific SyncScopeID and writes it to \p SSID.
19044 void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
19045 llvm::AtomicOrdering &AO,
19046 llvm::SyncScope::ID &SSID) {
19047 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
19049 // Map C11/C++11 memory ordering to LLVM memory ordering
19050 assert(llvm::isValidAtomicOrderingCABI(ord));
19051 switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
19052 case llvm::AtomicOrderingCABI::acquire:
19053 case llvm::AtomicOrderingCABI::consume:
19054 AO = llvm::AtomicOrdering::Acquire;
19055 break;
19056 case llvm::AtomicOrderingCABI::release:
19057 AO = llvm::AtomicOrdering::Release;
19058 break;
19059 case llvm::AtomicOrderingCABI::acq_rel:
19060 AO = llvm::AtomicOrdering::AcquireRelease;
19061 break;
19062 case llvm::AtomicOrderingCABI::seq_cst:
19063 AO = llvm::AtomicOrdering::SequentiallyConsistent;
19064 break;
19065 case llvm::AtomicOrderingCABI::relaxed:
19066 AO = llvm::AtomicOrdering::Monotonic;
19067 break;
19070 // Some of the atomic builtins take the scope as a string name.
19071 StringRef scp;
19072 if (llvm::getConstantStringInfo(Scope, scp)) {
19073 SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
19074 return;
19077 // Older builtins had an enum argument for the memory scope.
19078 int scope = cast<llvm::ConstantInt>(Scope)->getZExtValue();
19079 switch (scope) {
19080 case 0: // __MEMORY_SCOPE_SYSTEM
19081 SSID = llvm::SyncScope::System;
19082 break;
19083 case 1: // __MEMORY_SCOPE_DEVICE
19084 SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
19085 break;
19086 case 2: // __MEMORY_SCOPE_WRKGRP
19087 SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup");
19088 break;
19089 case 3: // __MEMORY_SCOPE_WVFRNT
19090 SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
19091 break;
19092 case 4: // __MEMORY_SCOPE_SINGLE
19093 SSID = llvm::SyncScope::SingleThread;
19094 break;
19095 default:
19096 SSID = llvm::SyncScope::System;
19097 break;
19101 llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
19102 unsigned Idx,
19103 const CallExpr *E) {
19104 llvm::Value *Arg = nullptr;
19105 if ((ICEArguments & (1 << Idx)) == 0) {
19106 Arg = EmitScalarExpr(E->getArg(Idx));
19107 } else {
19108 // If this is required to be a constant, constant fold it so that we
19109 // know that the generated intrinsic gets a ConstantInt.
19110 std::optional<llvm::APSInt> Result =
19111 E->getArg(Idx)->getIntegerConstantExpr(getContext());
19112 assert(Result && "Expected argument to be a constant");
19113 Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
19115 return Arg;
19118 // Return dot product intrinsic that corresponds to the QT scalar type
19119 static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) {
19120 if (QT->isFloatingType())
19121 return RT.getFDotIntrinsic();
19122 if (QT->isSignedIntegerType())
19123 return RT.getSDotIntrinsic();
19124 assert(QT->isUnsignedIntegerType());
19125 return RT.getUDotIntrinsic();
19128 static Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT) {
19129 if (QT->hasSignedIntegerRepresentation()) {
19130 return RT.getFirstBitSHighIntrinsic();
19133 assert(QT->hasUnsignedIntegerRepresentation());
19134 return RT.getFirstBitUHighIntrinsic();
19137 Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
19138 const CallExpr *E,
19139 ReturnValueSlot ReturnValue) {
19140 if (!getLangOpts().HLSL)
19141 return nullptr;
19143 switch (BuiltinID) {
19144 case Builtin::BI__builtin_hlsl_resource_getpointer: {
19145 Value *HandleOp = EmitScalarExpr(E->getArg(0));
19146 Value *IndexOp = EmitScalarExpr(E->getArg(1));
19148 // TODO: Map to an hlsl_device address space.
19149 llvm::Type *RetTy = llvm::PointerType::getUnqual(getLLVMContext());
19151 return Builder.CreateIntrinsic(RetTy, Intrinsic::dx_resource_getpointer,
19152 ArrayRef<Value *>{HandleOp, IndexOp});
19154 case Builtin::BI__builtin_hlsl_all: {
19155 Value *Op0 = EmitScalarExpr(E->getArg(0));
19156 return Builder.CreateIntrinsic(
19157 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
19158 CGM.getHLSLRuntime().getAllIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
19159 "hlsl.all");
19161 case Builtin::BI__builtin_hlsl_any: {
19162 Value *Op0 = EmitScalarExpr(E->getArg(0));
19163 return Builder.CreateIntrinsic(
19164 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
19165 CGM.getHLSLRuntime().getAnyIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
19166 "hlsl.any");
19168 case Builtin::BI__builtin_hlsl_asdouble:
19169 return handleAsDoubleBuiltin(*this, E);
19170 case Builtin::BI__builtin_hlsl_elementwise_clamp: {
19171 Value *OpX = EmitScalarExpr(E->getArg(0));
19172 Value *OpMin = EmitScalarExpr(E->getArg(1));
19173 Value *OpMax = EmitScalarExpr(E->getArg(2));
19175 QualType Ty = E->getArg(0)->getType();
19176 if (auto *VecTy = Ty->getAs<VectorType>())
19177 Ty = VecTy->getElementType();
19179 Intrinsic::ID Intr;
19180 if (Ty->isFloatingType()) {
19181 Intr = CGM.getHLSLRuntime().getNClampIntrinsic();
19182 } else if (Ty->isUnsignedIntegerType()) {
19183 Intr = CGM.getHLSLRuntime().getUClampIntrinsic();
19184 } else {
19185 assert(Ty->isSignedIntegerType());
19186 Intr = CGM.getHLSLRuntime().getSClampIntrinsic();
19188 return Builder.CreateIntrinsic(
19189 /*ReturnType=*/OpX->getType(), Intr,
19190 ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "hlsl.clamp");
19192 case Builtin::BI__builtin_hlsl_cross: {
19193 Value *Op0 = EmitScalarExpr(E->getArg(0));
19194 Value *Op1 = EmitScalarExpr(E->getArg(1));
19195 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19196 E->getArg(1)->getType()->hasFloatingRepresentation() &&
19197 "cross operands must have a float representation");
19198 // make sure each vector has exactly 3 elements
19199 assert(
19200 E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
19201 E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
19202 "input vectors must have 3 elements each");
19203 return Builder.CreateIntrinsic(
19204 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getCrossIntrinsic(),
19205 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.cross");
19207 case Builtin::BI__builtin_hlsl_dot: {
19208 Value *Op0 = EmitScalarExpr(E->getArg(0));
19209 Value *Op1 = EmitScalarExpr(E->getArg(1));
19210 llvm::Type *T0 = Op0->getType();
19211 llvm::Type *T1 = Op1->getType();
19213 // If the arguments are scalars, just emit a multiply
19214 if (!T0->isVectorTy() && !T1->isVectorTy()) {
19215 if (T0->isFloatingPointTy())
19216 return Builder.CreateFMul(Op0, Op1, "hlsl.dot");
19218 if (T0->isIntegerTy())
19219 return Builder.CreateMul(Op0, Op1, "hlsl.dot");
19221 llvm_unreachable(
19222 "Scalar dot product is only supported on ints and floats.");
19224 // For vectors, validate types and emit the appropriate intrinsic
19226 // A VectorSplat should have happened
19227 assert(T0->isVectorTy() && T1->isVectorTy() &&
19228 "Dot product of vector and scalar is not supported.");
19230 auto *VecTy0 = E->getArg(0)->getType()->getAs<VectorType>();
19231 [[maybe_unused]] auto *VecTy1 =
19232 E->getArg(1)->getType()->getAs<VectorType>();
19234 assert(VecTy0->getElementType() == VecTy1->getElementType() &&
19235 "Dot product of vectors need the same element types.");
19237 assert(VecTy0->getNumElements() == VecTy1->getNumElements() &&
19238 "Dot product requires vectors to be of the same size.");
19240 return Builder.CreateIntrinsic(
19241 /*ReturnType=*/T0->getScalarType(),
19242 getDotProductIntrinsic(CGM.getHLSLRuntime(), VecTy0->getElementType()),
19243 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.dot");
19245 case Builtin::BI__builtin_hlsl_dot4add_i8packed: {
19246 Value *A = EmitScalarExpr(E->getArg(0));
19247 Value *B = EmitScalarExpr(E->getArg(1));
19248 Value *C = EmitScalarExpr(E->getArg(2));
19250 Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddI8PackedIntrinsic();
19251 return Builder.CreateIntrinsic(
19252 /*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr,
19253 "hlsl.dot4add.i8packed");
19255 case Builtin::BI__builtin_hlsl_dot4add_u8packed: {
19256 Value *A = EmitScalarExpr(E->getArg(0));
19257 Value *B = EmitScalarExpr(E->getArg(1));
19258 Value *C = EmitScalarExpr(E->getArg(2));
19260 Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddU8PackedIntrinsic();
19261 return Builder.CreateIntrinsic(
19262 /*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr,
19263 "hlsl.dot4add.u8packed");
19265 case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
19267 Value *X = EmitScalarExpr(E->getArg(0));
19269 return Builder.CreateIntrinsic(
19270 /*ReturnType=*/ConvertType(E->getType()),
19271 getFirstBitHighIntrinsic(CGM.getHLSLRuntime(), E->getArg(0)->getType()),
19272 ArrayRef<Value *>{X}, nullptr, "hlsl.firstbithigh");
19274 case Builtin::BI__builtin_hlsl_lerp: {
19275 Value *X = EmitScalarExpr(E->getArg(0));
19276 Value *Y = EmitScalarExpr(E->getArg(1));
19277 Value *S = EmitScalarExpr(E->getArg(2));
19278 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19279 llvm_unreachable("lerp operand must have a float representation");
19280 return Builder.CreateIntrinsic(
19281 /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
19282 ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
19284 case Builtin::BI__builtin_hlsl_length: {
19285 Value *X = EmitScalarExpr(E->getArg(0));
19287 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19288 "length operand must have a float representation");
19289 // if the operand is a scalar, we can use the fabs llvm intrinsic directly
19290 if (!E->getArg(0)->getType()->isVectorType())
19291 return EmitFAbs(*this, X);
19293 return Builder.CreateIntrinsic(
19294 /*ReturnType=*/X->getType()->getScalarType(),
19295 CGM.getHLSLRuntime().getLengthIntrinsic(), ArrayRef<Value *>{X},
19296 nullptr, "hlsl.length");
19298 case Builtin::BI__builtin_hlsl_normalize: {
19299 Value *X = EmitScalarExpr(E->getArg(0));
19301 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19302 "normalize operand must have a float representation");
19304 return Builder.CreateIntrinsic(
19305 /*ReturnType=*/X->getType(),
19306 CGM.getHLSLRuntime().getNormalizeIntrinsic(), ArrayRef<Value *>{X},
19307 nullptr, "hlsl.normalize");
19309 case Builtin::BI__builtin_hlsl_elementwise_degrees: {
19310 Value *X = EmitScalarExpr(E->getArg(0));
19312 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19313 "degree operand must have a float representation");
19315 return Builder.CreateIntrinsic(
19316 /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getDegreesIntrinsic(),
19317 ArrayRef<Value *>{X}, nullptr, "hlsl.degrees");
19319 case Builtin::BI__builtin_hlsl_elementwise_frac: {
19320 Value *Op0 = EmitScalarExpr(E->getArg(0));
19321 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19322 llvm_unreachable("frac operand must have a float representation");
19323 return Builder.CreateIntrinsic(
19324 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getFracIntrinsic(),
19325 ArrayRef<Value *>{Op0}, nullptr, "hlsl.frac");
19327 case Builtin::BI__builtin_hlsl_elementwise_isinf: {
19328 Value *Op0 = EmitScalarExpr(E->getArg(0));
19329 llvm::Type *Xty = Op0->getType();
19330 llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext());
19331 if (Xty->isVectorTy()) {
19332 auto *XVecTy = E->getArg(0)->getType()->getAs<VectorType>();
19333 retType = llvm::VectorType::get(
19334 retType, ElementCount::getFixed(XVecTy->getNumElements()));
19336 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19337 llvm_unreachable("isinf operand must have a float representation");
19338 return Builder.CreateIntrinsic(retType, Intrinsic::dx_isinf,
19339 ArrayRef<Value *>{Op0}, nullptr, "dx.isinf");
19341 case Builtin::BI__builtin_hlsl_mad: {
19342 Value *M = EmitScalarExpr(E->getArg(0));
19343 Value *A = EmitScalarExpr(E->getArg(1));
19344 Value *B = EmitScalarExpr(E->getArg(2));
19345 if (E->getArg(0)->getType()->hasFloatingRepresentation())
19346 return Builder.CreateIntrinsic(
19347 /*ReturnType*/ M->getType(), Intrinsic::fmuladd,
19348 ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad");
19350 if (E->getArg(0)->getType()->hasSignedIntegerRepresentation()) {
19351 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
19352 return Builder.CreateIntrinsic(
19353 /*ReturnType*/ M->getType(), Intrinsic::dx_imad,
19354 ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad");
19356 Value *Mul = Builder.CreateNSWMul(M, A);
19357 return Builder.CreateNSWAdd(Mul, B);
19359 assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation());
19360 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
19361 return Builder.CreateIntrinsic(
19362 /*ReturnType=*/M->getType(), Intrinsic::dx_umad,
19363 ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad");
19365 Value *Mul = Builder.CreateNUWMul(M, A);
19366 return Builder.CreateNUWAdd(Mul, B);
19368 case Builtin::BI__builtin_hlsl_elementwise_rcp: {
19369 Value *Op0 = EmitScalarExpr(E->getArg(0));
19370 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19371 llvm_unreachable("rcp operand must have a float representation");
19372 llvm::Type *Ty = Op0->getType();
19373 llvm::Type *EltTy = Ty->getScalarType();
19374 Constant *One = Ty->isVectorTy()
19375 ? ConstantVector::getSplat(
19376 ElementCount::getFixed(
19377 cast<FixedVectorType>(Ty)->getNumElements()),
19378 ConstantFP::get(EltTy, 1.0))
19379 : ConstantFP::get(EltTy, 1.0);
19380 return Builder.CreateFDiv(One, Op0, "hlsl.rcp");
19382 case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
19383 Value *Op0 = EmitScalarExpr(E->getArg(0));
19384 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19385 llvm_unreachable("rsqrt operand must have a float representation");
19386 return Builder.CreateIntrinsic(
19387 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getRsqrtIntrinsic(),
19388 ArrayRef<Value *>{Op0}, nullptr, "hlsl.rsqrt");
19390 case Builtin::BI__builtin_hlsl_elementwise_saturate: {
19391 Value *Op0 = EmitScalarExpr(E->getArg(0));
19392 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19393 "saturate operand must have a float representation");
19394 return Builder.CreateIntrinsic(
19395 /*ReturnType=*/Op0->getType(),
19396 CGM.getHLSLRuntime().getSaturateIntrinsic(), ArrayRef<Value *>{Op0},
19397 nullptr, "hlsl.saturate");
19399 case Builtin::BI__builtin_hlsl_select: {
19400 Value *OpCond = EmitScalarExpr(E->getArg(0));
19401 RValue RValTrue = EmitAnyExpr(E->getArg(1));
19402 Value *OpTrue =
19403 RValTrue.isScalar()
19404 ? RValTrue.getScalarVal()
19405 : RValTrue.getAggregatePointer(E->getArg(1)->getType(), *this);
19406 RValue RValFalse = EmitAnyExpr(E->getArg(2));
19407 Value *OpFalse =
19408 RValFalse.isScalar()
19409 ? RValFalse.getScalarVal()
19410 : RValFalse.getAggregatePointer(E->getArg(2)->getType(), *this);
19412 Value *SelectVal =
19413 Builder.CreateSelect(OpCond, OpTrue, OpFalse, "hlsl.select");
19414 if (!RValTrue.isScalar())
19415 Builder.CreateStore(SelectVal, ReturnValue.getAddress(),
19416 ReturnValue.isVolatile());
19418 return SelectVal;
19420 case Builtin::BI__builtin_hlsl_step: {
19421 Value *Op0 = EmitScalarExpr(E->getArg(0));
19422 Value *Op1 = EmitScalarExpr(E->getArg(1));
19423 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19424 E->getArg(1)->getType()->hasFloatingRepresentation() &&
19425 "step operands must have a float representation");
19426 return Builder.CreateIntrinsic(
19427 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getStepIntrinsic(),
19428 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.step");
19430 case Builtin::BI__builtin_hlsl_wave_active_any_true: {
19431 Value *Op = EmitScalarExpr(E->getArg(0));
19432 assert(Op->getType()->isIntegerTy(1) &&
19433 "Intrinsic WaveActiveAnyTrue operand must be a bool");
19435 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAnyTrueIntrinsic();
19436 return EmitRuntimeCall(
19437 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op});
19439 case Builtin::BI__builtin_hlsl_wave_active_count_bits: {
19440 Value *OpExpr = EmitScalarExpr(E->getArg(0));
19441 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveCountBitsIntrinsic();
19442 return EmitRuntimeCall(
19443 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID),
19444 ArrayRef{OpExpr});
19446 case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
19447 // We don't define a SPIR-V intrinsic, instead it is a SPIR-V built-in
19448 // defined in SPIRVBuiltins.td. So instead we manually get the matching name
19449 // for the DirectX intrinsic and the demangled builtin name
19450 switch (CGM.getTarget().getTriple().getArch()) {
19451 case llvm::Triple::dxil:
19452 return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
19453 &CGM.getModule(), Intrinsic::dx_wave_getlaneindex));
19454 case llvm::Triple::spirv:
19455 return EmitRuntimeCall(CGM.CreateRuntimeFunction(
19456 llvm::FunctionType::get(IntTy, {}, false),
19457 "__hlsl_wave_get_lane_index", {}, false, true));
19458 default:
19459 llvm_unreachable(
19460 "Intrinsic WaveGetLaneIndex not supported by target architecture");
19463 case Builtin::BI__builtin_hlsl_wave_is_first_lane: {
19464 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveIsFirstLaneIntrinsic();
19465 return EmitRuntimeCall(
19466 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
19468 case Builtin::BI__builtin_hlsl_wave_read_lane_at: {
19469 // Due to the use of variadic arguments we must explicitly retreive them and
19470 // create our function type.
19471 Value *OpExpr = EmitScalarExpr(E->getArg(0));
19472 Value *OpIndex = EmitScalarExpr(E->getArg(1));
19473 llvm::FunctionType *FT = llvm::FunctionType::get(
19474 OpExpr->getType(), ArrayRef{OpExpr->getType(), OpIndex->getType()},
19475 false);
19477 // Get overloaded name
19478 std::string Name =
19479 Intrinsic::getName(CGM.getHLSLRuntime().getWaveReadLaneAtIntrinsic(),
19480 ArrayRef{OpExpr->getType()}, &CGM.getModule());
19481 return EmitRuntimeCall(CGM.CreateRuntimeFunction(FT, Name, {},
19482 /*Local=*/false,
19483 /*AssumeConvergent=*/true),
19484 ArrayRef{OpExpr, OpIndex}, "hlsl.wave.readlane");
19486 case Builtin::BI__builtin_hlsl_elementwise_sign: {
19487 auto *Arg0 = E->getArg(0);
19488 Value *Op0 = EmitScalarExpr(Arg0);
19489 llvm::Type *Xty = Op0->getType();
19490 llvm::Type *retType = llvm::Type::getInt32Ty(this->getLLVMContext());
19491 if (Xty->isVectorTy()) {
19492 auto *XVecTy = Arg0->getType()->getAs<VectorType>();
19493 retType = llvm::VectorType::get(
19494 retType, ElementCount::getFixed(XVecTy->getNumElements()));
19496 assert((Arg0->getType()->hasFloatingRepresentation() ||
19497 Arg0->getType()->hasIntegerRepresentation()) &&
19498 "sign operand must have a float or int representation");
19500 if (Arg0->getType()->hasUnsignedIntegerRepresentation()) {
19501 Value *Cmp = Builder.CreateICmpEQ(Op0, ConstantInt::get(Xty, 0));
19502 return Builder.CreateSelect(Cmp, ConstantInt::get(retType, 0),
19503 ConstantInt::get(retType, 1), "hlsl.sign");
19506 return Builder.CreateIntrinsic(
19507 retType, CGM.getHLSLRuntime().getSignIntrinsic(),
19508 ArrayRef<Value *>{Op0}, nullptr, "hlsl.sign");
19510 case Builtin::BI__builtin_hlsl_elementwise_radians: {
19511 Value *Op0 = EmitScalarExpr(E->getArg(0));
19512 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19513 "radians operand must have a float representation");
19514 return Builder.CreateIntrinsic(
19515 /*ReturnType=*/Op0->getType(),
19516 CGM.getHLSLRuntime().getRadiansIntrinsic(), ArrayRef<Value *>{Op0},
19517 nullptr, "hlsl.radians");
19519 case Builtin::BI__builtin_hlsl_buffer_update_counter: {
19520 Value *ResHandle = EmitScalarExpr(E->getArg(0));
19521 Value *Offset = EmitScalarExpr(E->getArg(1));
19522 Value *OffsetI8 = Builder.CreateIntCast(Offset, Int8Ty, true);
19523 return Builder.CreateIntrinsic(
19524 /*ReturnType=*/Offset->getType(),
19525 CGM.getHLSLRuntime().getBufferUpdateCounterIntrinsic(),
19526 ArrayRef<Value *>{ResHandle, OffsetI8}, nullptr);
19528 case Builtin::BI__builtin_hlsl_elementwise_splitdouble: {
19530 assert((E->getArg(0)->getType()->hasFloatingRepresentation() &&
19531 E->getArg(1)->getType()->hasUnsignedIntegerRepresentation() &&
19532 E->getArg(2)->getType()->hasUnsignedIntegerRepresentation()) &&
19533 "asuint operands types mismatch");
19534 return handleHlslSplitdouble(E, this);
19536 case Builtin::BI__builtin_hlsl_elementwise_clip:
19537 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19538 "clip operands types mismatch");
19539 return handleHlslClip(E, this);
19540 case Builtin::BI__builtin_hlsl_group_memory_barrier_with_group_sync: {
19541 Intrinsic::ID ID =
19542 CGM.getHLSLRuntime().getGroupMemoryBarrierWithGroupSyncIntrinsic();
19543 return EmitRuntimeCall(
19544 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
19547 return nullptr;
19550 void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
19551 const CallExpr *E) {
19552 constexpr const char *Tag = "amdgpu-as";
19554 LLVMContext &Ctx = Inst->getContext();
19555 SmallVector<MMRAMetadata::TagT, 3> MMRAs;
19556 for (unsigned K = 2; K < E->getNumArgs(); ++K) {
19557 llvm::Value *V = EmitScalarExpr(E->getArg(K));
19558 StringRef AS;
19559 if (llvm::getConstantStringInfo(V, AS)) {
19560 MMRAs.push_back({Tag, AS});
19561 // TODO: Delete the resulting unused constant?
19562 continue;
19564 CGM.Error(E->getExprLoc(),
19565 "expected an address space name as a string literal");
19568 llvm::sort(MMRAs);
19569 MMRAs.erase(llvm::unique(MMRAs), MMRAs.end());
19570 Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
19573 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
19574 const CallExpr *E) {
19575 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
19576 llvm::SyncScope::ID SSID;
19577 switch (BuiltinID) {
19578 case AMDGPU::BI__builtin_amdgcn_div_scale:
19579 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
19580 // Translate from the intrinsics's struct return to the builtin's out
19581 // argument.
19583 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
19585 llvm::Value *X = EmitScalarExpr(E->getArg(0));
19586 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
19587 llvm::Value *Z = EmitScalarExpr(E->getArg(2));
19589 llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
19590 X->getType());
19592 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
19594 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
19595 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
19597 llvm::Type *RealFlagType = FlagOutPtr.getElementType();
19599 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
19600 Builder.CreateStore(FlagExt, FlagOutPtr);
19601 return Result;
19603 case AMDGPU::BI__builtin_amdgcn_div_fmas:
19604 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
19605 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19606 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19607 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19608 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
19610 llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
19611 Src0->getType());
19612 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
19613 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
19616 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
19617 return emitBuiltinWithOneOverloadedType<2>(*this, E,
19618 Intrinsic::amdgcn_ds_swizzle);
19619 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
19620 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
19621 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
19622 llvm::SmallVector<llvm::Value *, 6> Args;
19623 // Find out if any arguments are required to be integer constant
19624 // expressions.
19625 unsigned ICEArguments = 0;
19626 ASTContext::GetBuiltinTypeError Error;
19627 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
19628 assert(Error == ASTContext::GE_None && "Should not codegen an error");
19629 llvm::Type *DataTy = ConvertType(E->getArg(0)->getType());
19630 unsigned Size = DataTy->getPrimitiveSizeInBits();
19631 llvm::Type *IntTy =
19632 llvm::IntegerType::get(Builder.getContext(), std::max(Size, 32u));
19633 Function *F =
19634 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8
19635 ? Intrinsic::amdgcn_mov_dpp8
19636 : Intrinsic::amdgcn_update_dpp,
19637 IntTy);
19638 assert(E->getNumArgs() == 5 || E->getNumArgs() == 6 ||
19639 E->getNumArgs() == 2);
19640 bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp;
19641 if (InsertOld)
19642 Args.push_back(llvm::PoisonValue::get(IntTy));
19643 for (unsigned I = 0; I != E->getNumArgs(); ++I) {
19644 llvm::Value *V = EmitScalarOrConstFoldImmArg(ICEArguments, I, E);
19645 if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) &&
19646 Size < 32) {
19647 if (!DataTy->isIntegerTy())
19648 V = Builder.CreateBitCast(
19649 V, llvm::IntegerType::get(Builder.getContext(), Size));
19650 V = Builder.CreateZExtOrBitCast(V, IntTy);
19652 llvm::Type *ExpTy =
19653 F->getFunctionType()->getFunctionParamType(I + InsertOld);
19654 Args.push_back(Builder.CreateTruncOrBitCast(V, ExpTy));
19656 Value *V = Builder.CreateCall(F, Args);
19657 if (Size < 32 && !DataTy->isIntegerTy())
19658 V = Builder.CreateTrunc(
19659 V, llvm::IntegerType::get(Builder.getContext(), Size));
19660 return Builder.CreateTruncOrBitCast(V, DataTy);
19662 case AMDGPU::BI__builtin_amdgcn_permlane16:
19663 case AMDGPU::BI__builtin_amdgcn_permlanex16:
19664 return emitBuiltinWithOneOverloadedType<6>(
19665 *this, E,
19666 BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
19667 ? Intrinsic::amdgcn_permlane16
19668 : Intrinsic::amdgcn_permlanex16);
19669 case AMDGPU::BI__builtin_amdgcn_permlane64:
19670 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19671 Intrinsic::amdgcn_permlane64);
19672 case AMDGPU::BI__builtin_amdgcn_readlane:
19673 return emitBuiltinWithOneOverloadedType<2>(*this, E,
19674 Intrinsic::amdgcn_readlane);
19675 case AMDGPU::BI__builtin_amdgcn_readfirstlane:
19676 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19677 Intrinsic::amdgcn_readfirstlane);
19678 case AMDGPU::BI__builtin_amdgcn_div_fixup:
19679 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
19680 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
19681 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19682 Intrinsic::amdgcn_div_fixup);
19683 case AMDGPU::BI__builtin_amdgcn_trig_preop:
19684 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
19685 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
19686 case AMDGPU::BI__builtin_amdgcn_rcp:
19687 case AMDGPU::BI__builtin_amdgcn_rcpf:
19688 case AMDGPU::BI__builtin_amdgcn_rcph:
19689 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rcp);
19690 case AMDGPU::BI__builtin_amdgcn_sqrt:
19691 case AMDGPU::BI__builtin_amdgcn_sqrtf:
19692 case AMDGPU::BI__builtin_amdgcn_sqrth:
19693 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19694 Intrinsic::amdgcn_sqrt);
19695 case AMDGPU::BI__builtin_amdgcn_rsq:
19696 case AMDGPU::BI__builtin_amdgcn_rsqf:
19697 case AMDGPU::BI__builtin_amdgcn_rsqh:
19698 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rsq);
19699 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
19700 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
19701 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19702 Intrinsic::amdgcn_rsq_clamp);
19703 case AMDGPU::BI__builtin_amdgcn_sinf:
19704 case AMDGPU::BI__builtin_amdgcn_sinh:
19705 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_sin);
19706 case AMDGPU::BI__builtin_amdgcn_cosf:
19707 case AMDGPU::BI__builtin_amdgcn_cosh:
19708 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_cos);
19709 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
19710 return EmitAMDGPUDispatchPtr(*this, E);
19711 case AMDGPU::BI__builtin_amdgcn_logf:
19712 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_log);
19713 case AMDGPU::BI__builtin_amdgcn_exp2f:
19714 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19715 Intrinsic::amdgcn_exp2);
19716 case AMDGPU::BI__builtin_amdgcn_log_clampf:
19717 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19718 Intrinsic::amdgcn_log_clamp);
19719 case AMDGPU::BI__builtin_amdgcn_ldexp:
19720 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
19721 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19722 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19723 llvm::Function *F =
19724 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
19725 return Builder.CreateCall(F, {Src0, Src1});
19727 case AMDGPU::BI__builtin_amdgcn_ldexph: {
19728 // The raw instruction has a different behavior for out of bounds exponent
19729 // values (implicit truncation instead of saturate to short_min/short_max).
19730 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19731 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19732 llvm::Function *F =
19733 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});
19734 return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)});
19736 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
19737 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
19738 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
19739 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19740 Intrinsic::amdgcn_frexp_mant);
19741 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
19742 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
19743 Value *Src0 = EmitScalarExpr(E->getArg(0));
19744 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
19745 { Builder.getInt32Ty(), Src0->getType() });
19746 return Builder.CreateCall(F, Src0);
19748 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
19749 Value *Src0 = EmitScalarExpr(E->getArg(0));
19750 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
19751 { Builder.getInt16Ty(), Src0->getType() });
19752 return Builder.CreateCall(F, Src0);
19754 case AMDGPU::BI__builtin_amdgcn_fract:
19755 case AMDGPU::BI__builtin_amdgcn_fractf:
19756 case AMDGPU::BI__builtin_amdgcn_fracth:
19757 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19758 Intrinsic::amdgcn_fract);
19759 case AMDGPU::BI__builtin_amdgcn_lerp:
19760 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19761 Intrinsic::amdgcn_lerp);
19762 case AMDGPU::BI__builtin_amdgcn_ubfe:
19763 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19764 Intrinsic::amdgcn_ubfe);
19765 case AMDGPU::BI__builtin_amdgcn_sbfe:
19766 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19767 Intrinsic::amdgcn_sbfe);
19768 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
19769 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
19770 llvm::Type *ResultType = ConvertType(E->getType());
19771 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
19772 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
19773 return Builder.CreateCall(F, { Src });
19775 case AMDGPU::BI__builtin_amdgcn_uicmp:
19776 case AMDGPU::BI__builtin_amdgcn_uicmpl:
19777 case AMDGPU::BI__builtin_amdgcn_sicmp:
19778 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
19779 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19780 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19781 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19783 // FIXME-GFX10: How should 32 bit mask be handled?
19784 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
19785 { Builder.getInt64Ty(), Src0->getType() });
19786 return Builder.CreateCall(F, { Src0, Src1, Src2 });
19788 case AMDGPU::BI__builtin_amdgcn_fcmp:
19789 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
19790 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19791 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19792 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19794 // FIXME-GFX10: How should 32 bit mask be handled?
19795 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
19796 { Builder.getInt64Ty(), Src0->getType() });
19797 return Builder.CreateCall(F, { Src0, Src1, Src2 });
19799 case AMDGPU::BI__builtin_amdgcn_class:
19800 case AMDGPU::BI__builtin_amdgcn_classf:
19801 case AMDGPU::BI__builtin_amdgcn_classh:
19802 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
19803 case AMDGPU::BI__builtin_amdgcn_fmed3f:
19804 case AMDGPU::BI__builtin_amdgcn_fmed3h:
19805 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19806 Intrinsic::amdgcn_fmed3);
19807 case AMDGPU::BI__builtin_amdgcn_ds_append:
19808 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
19809 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
19810 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
19811 Value *Src0 = EmitScalarExpr(E->getArg(0));
19812 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
19813 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
19815 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
19816 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
19817 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
19818 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
19819 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
19820 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
19821 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
19822 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
19823 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
19824 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
19825 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
19826 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
19827 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
19828 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16: {
19829 Intrinsic::ID IID;
19830 switch (BuiltinID) {
19831 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
19832 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
19833 IID = Intrinsic::amdgcn_global_load_tr_b64;
19834 break;
19835 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
19836 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
19837 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
19838 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
19839 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
19840 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
19841 IID = Intrinsic::amdgcn_global_load_tr_b128;
19842 break;
19843 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
19844 IID = Intrinsic::amdgcn_ds_read_tr4_b64;
19845 break;
19846 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
19847 IID = Intrinsic::amdgcn_ds_read_tr8_b64;
19848 break;
19849 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
19850 IID = Intrinsic::amdgcn_ds_read_tr6_b96;
19851 break;
19852 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16:
19853 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
19854 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
19855 IID = Intrinsic::amdgcn_ds_read_tr16_b64;
19856 break;
19858 llvm::Type *LoadTy = ConvertType(E->getType());
19859 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
19860 llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
19861 return Builder.CreateCall(F, {Addr});
19863 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
19864 Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,
19865 {llvm::Type::getInt64Ty(getLLVMContext())});
19866 return Builder.CreateCall(F);
19868 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
19869 Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv,
19870 {llvm::Type::getInt64Ty(getLLVMContext())});
19871 llvm::Value *Env = EmitScalarExpr(E->getArg(0));
19872 return Builder.CreateCall(F, {Env});
19874 case AMDGPU::BI__builtin_amdgcn_read_exec:
19875 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
19876 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
19877 return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
19878 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
19879 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
19880 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
19881 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
19882 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
19883 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
19884 llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
19885 llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
19886 llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));
19887 llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));
19888 llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
19889 llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
19891 // The builtins take these arguments as vec4 where the last element is
19892 // ignored. The intrinsic takes them as vec3.
19893 RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
19894 ArrayRef<int>{0, 1, 2});
19895 RayDir =
19896 Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2});
19897 RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
19898 ArrayRef<int>{0, 1, 2});
19900 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
19901 {NodePtr->getType(), RayDir->getType()});
19902 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
19903 RayInverseDir, TextureDescr});
19906 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: {
19907 SmallVector<Value *, 4> Args;
19908 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
19909 Args.push_back(EmitScalarExpr(E->getArg(i)));
19911 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn);
19912 Value *Call = Builder.CreateCall(F, Args);
19913 Value *Rtn = Builder.CreateExtractValue(Call, 0);
19914 Value *A = Builder.CreateExtractValue(Call, 1);
19915 llvm::Type *RetTy = ConvertType(E->getType());
19916 Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
19917 (uint64_t)0);
19918 return Builder.CreateInsertElement(I0, A, 1);
19920 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
19921 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
19922 llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);
19923 Function *F = CGM.getIntrinsic(
19924 BuiltinID == AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4
19925 ? Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4
19926 : Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4,
19927 {VT, VT});
19929 SmallVector<Value *, 9> Args;
19930 for (unsigned I = 0, N = E->getNumArgs(); I != N; ++I)
19931 Args.push_back(EmitScalarExpr(E->getArg(I)));
19932 return Builder.CreateCall(F, Args);
19934 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
19935 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
19936 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
19937 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
19938 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
19939 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
19940 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
19941 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
19942 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
19943 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
19944 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
19945 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
19946 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
19947 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
19948 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
19949 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
19950 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
19951 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
19952 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
19953 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
19954 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
19955 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
19956 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
19957 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
19958 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
19959 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
19960 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
19961 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
19962 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
19963 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
19964 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
19965 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
19966 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
19967 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
19968 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
19969 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
19970 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
19971 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
19972 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
19973 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
19974 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
19975 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
19976 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
19977 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
19978 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
19979 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
19980 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
19981 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
19982 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
19983 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
19984 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
19985 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
19986 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
19987 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
19988 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
19989 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
19990 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
19991 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
19992 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
19993 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: {
19995 // These operations perform a matrix multiplication and accumulation of
19996 // the form:
19997 // D = A * B + C
19998 // We need to specify one type for matrices AB and one for matrices CD.
19999 // Sparse matrix operations can have different types for A and B as well as
20000 // an additional type for sparsity index.
20001 // Destination type should be put before types used for source operands.
20002 SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes;
20003 // On GFX12, the intrinsics with 16-bit accumulator use a packed layout.
20004 // There is no need for the variable opsel argument, so always set it to
20005 // "false".
20006 bool AppendFalseForOpselArg = false;
20007 unsigned BuiltinWMMAOp;
20009 switch (BuiltinID) {
20010 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
20011 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
20012 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
20013 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
20014 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20015 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
20016 break;
20017 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
20018 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
20019 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
20020 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
20021 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20022 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
20023 break;
20024 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
20025 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
20026 AppendFalseForOpselArg = true;
20027 [[fallthrough]];
20028 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
20029 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
20030 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20031 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
20032 break;
20033 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
20034 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
20035 AppendFalseForOpselArg = true;
20036 [[fallthrough]];
20037 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
20038 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
20039 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20040 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
20041 break;
20042 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
20043 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
20044 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20045 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
20046 break;
20047 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
20048 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
20049 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20050 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
20051 break;
20052 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
20053 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
20054 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
20055 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
20056 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
20057 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
20058 break;
20059 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
20060 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
20061 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
20062 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
20063 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
20064 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
20065 break;
20066 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
20067 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
20068 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20069 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
20070 break;
20071 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
20072 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
20073 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20074 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
20075 break;
20076 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
20077 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
20078 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20079 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
20080 break;
20081 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
20082 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
20083 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20084 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
20085 break;
20086 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
20087 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
20088 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
20089 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
20090 break;
20091 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
20092 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
20093 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20094 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
20095 break;
20096 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
20097 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
20098 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20099 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
20100 break;
20101 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
20102 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
20103 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20104 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
20105 break;
20106 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
20107 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
20108 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20109 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
20110 break;
20111 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
20112 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
20113 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
20114 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
20115 break;
20116 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
20117 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
20118 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
20119 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
20120 break;
20121 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
20122 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
20123 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
20124 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
20125 break;
20126 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
20127 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
20128 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20129 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
20130 break;
20131 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
20132 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
20133 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20134 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
20135 break;
20136 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
20137 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
20138 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20139 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
20140 break;
20141 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
20142 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
20143 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20144 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
20145 break;
20148 SmallVector<Value *, 6> Args;
20149 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
20150 Args.push_back(EmitScalarExpr(E->getArg(i)));
20151 if (AppendFalseForOpselArg)
20152 Args.push_back(Builder.getFalse());
20154 SmallVector<llvm::Type *, 6> ArgTypes;
20155 for (auto ArgIdx : ArgsForMatchingMatrixTypes)
20156 ArgTypes.push_back(Args[ArgIdx]->getType());
20158 Function *F = CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
20159 return Builder.CreateCall(F, Args);
20162 // amdgcn workitem
20163 case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
20164 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
20165 case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
20166 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
20167 case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
20168 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
20170 // amdgcn workgroup size
20171 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
20172 return EmitAMDGPUWorkGroupSize(*this, 0);
20173 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
20174 return EmitAMDGPUWorkGroupSize(*this, 1);
20175 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
20176 return EmitAMDGPUWorkGroupSize(*this, 2);
20178 // amdgcn grid size
20179 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
20180 return EmitAMDGPUGridSize(*this, 0);
20181 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
20182 return EmitAMDGPUGridSize(*this, 1);
20183 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
20184 return EmitAMDGPUGridSize(*this, 2);
20186 // r600 intrinsics
20187 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
20188 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
20189 return emitBuiltinWithOneOverloadedType<1>(*this, E,
20190 Intrinsic::r600_recipsqrt_ieee);
20191 case AMDGPU::BI__builtin_r600_read_tidig_x:
20192 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
20193 case AMDGPU::BI__builtin_r600_read_tidig_y:
20194 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
20195 case AMDGPU::BI__builtin_r600_read_tidig_z:
20196 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
20197 case AMDGPU::BI__builtin_amdgcn_alignbit: {
20198 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
20199 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
20200 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
20201 Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
20202 return Builder.CreateCall(F, { Src0, Src1, Src2 });
20204 case AMDGPU::BI__builtin_amdgcn_fence: {
20205 ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)),
20206 EmitScalarExpr(E->getArg(1)), AO, SSID);
20207 FenceInst *Fence = Builder.CreateFence(AO, SSID);
20208 if (E->getNumArgs() > 2)
20209 AddAMDGPUFenceAddressSpaceMMRA(Fence, E);
20210 return Fence;
20212 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
20213 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
20214 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
20215 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
20216 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
20217 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
20218 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
20219 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
20220 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
20221 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
20222 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
20223 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
20224 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
20225 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
20226 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
20227 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
20228 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
20229 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
20230 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
20231 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
20232 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
20233 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
20234 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
20235 llvm::AtomicRMWInst::BinOp BinOp;
20236 switch (BuiltinID) {
20237 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
20238 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
20239 BinOp = llvm::AtomicRMWInst::UIncWrap;
20240 break;
20241 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
20242 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
20243 BinOp = llvm::AtomicRMWInst::UDecWrap;
20244 break;
20245 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
20246 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
20247 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
20248 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
20249 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
20250 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
20251 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
20252 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
20253 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
20254 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
20255 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
20256 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
20257 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
20258 BinOp = llvm::AtomicRMWInst::FAdd;
20259 break;
20260 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
20261 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
20262 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
20263 BinOp = llvm::AtomicRMWInst::FMin;
20264 break;
20265 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
20266 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
20267 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
20268 BinOp = llvm::AtomicRMWInst::FMax;
20269 break;
20272 Address Ptr = CheckAtomicAlignment(*this, E);
20273 Value *Val = EmitScalarExpr(E->getArg(1));
20274 llvm::Type *OrigTy = Val->getType();
20275 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
20277 bool Volatile;
20279 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
20280 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
20281 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
20282 // __builtin_amdgcn_ds_faddf/fminf/fmaxf has an explicit volatile argument
20283 Volatile =
20284 cast<ConstantInt>(EmitScalarExpr(E->getArg(4)))->getZExtValue();
20285 } else {
20286 // Infer volatile from the passed type.
20287 Volatile =
20288 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
20291 if (E->getNumArgs() >= 4) {
20292 // Some of the builtins have explicit ordering and scope arguments.
20293 ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)),
20294 EmitScalarExpr(E->getArg(3)), AO, SSID);
20295 } else {
20296 // Most of the builtins do not have syncscope/order arguments. For DS
20297 // atomics the scope doesn't really matter, as they implicitly operate at
20298 // workgroup scope.
20300 // The global/flat cases need to use agent scope to consistently produce
20301 // the native instruction instead of a cmpxchg expansion.
20302 SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
20303 AO = AtomicOrdering::Monotonic;
20305 // The v2bf16 builtin uses i16 instead of a natural bfloat type.
20306 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
20307 BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
20308 BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
20309 llvm::Type *V2BF16Ty = FixedVectorType::get(
20310 llvm::Type::getBFloatTy(Builder.getContext()), 2);
20311 Val = Builder.CreateBitCast(Val, V2BF16Ty);
20315 llvm::AtomicRMWInst *RMW =
20316 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
20317 if (Volatile)
20318 RMW->setVolatile(true);
20320 unsigned AddrSpace = Ptr.getType()->getAddressSpace();
20321 if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {
20322 // Most targets require "amdgpu.no.fine.grained.memory" to emit the native
20323 // instruction for flat and global operations.
20324 llvm::MDTuple *EmptyMD = MDNode::get(getLLVMContext(), {});
20325 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
20327 // Most targets require "amdgpu.ignore.denormal.mode" to emit the native
20328 // instruction, but this only matters for float fadd.
20329 if (BinOp == llvm::AtomicRMWInst::FAdd && Val->getType()->isFloatTy())
20330 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
20333 return Builder.CreateBitCast(RMW, OrigTy);
20335 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
20336 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
20337 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
20338 llvm::Type *ResultType = ConvertType(E->getType());
20339 // s_sendmsg_rtn is mangled using return type only.
20340 Function *F =
20341 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
20342 return Builder.CreateCall(F, {Arg});
20344 case AMDGPU::BI__builtin_amdgcn_permlane16_swap:
20345 case AMDGPU::BI__builtin_amdgcn_permlane32_swap: {
20346 // Because builtin types are limited, and the intrinsic uses a struct/pair
20347 // output, marshal the pair-of-i32 to <2 x i32>.
20348 Value *VDstOld = EmitScalarExpr(E->getArg(0));
20349 Value *VSrcOld = EmitScalarExpr(E->getArg(1));
20350 Value *FI = EmitScalarExpr(E->getArg(2));
20351 Value *BoundCtrl = EmitScalarExpr(E->getArg(3));
20352 Function *F =
20353 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16_swap
20354 ? Intrinsic::amdgcn_permlane16_swap
20355 : Intrinsic::amdgcn_permlane32_swap);
20356 llvm::CallInst *Call =
20357 Builder.CreateCall(F, {VDstOld, VSrcOld, FI, BoundCtrl});
20359 llvm::Value *Elt0 = Builder.CreateExtractValue(Call, 0);
20360 llvm::Value *Elt1 = Builder.CreateExtractValue(Call, 1);
20362 llvm::Type *ResultType = ConvertType(E->getType());
20364 llvm::Value *Insert0 = Builder.CreateInsertElement(
20365 llvm::PoisonValue::get(ResultType), Elt0, UINT64_C(0));
20366 llvm::Value *AsVector =
20367 Builder.CreateInsertElement(Insert0, Elt1, UINT64_C(1));
20368 return AsVector;
20370 case AMDGPU::BI__builtin_amdgcn_bitop3_b32:
20371 case AMDGPU::BI__builtin_amdgcn_bitop3_b16:
20372 return emitQuaternaryBuiltin(*this, E, Intrinsic::amdgcn_bitop3);
20373 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc:
20374 return emitBuiltinWithOneOverloadedType<4>(
20375 *this, E, Intrinsic::amdgcn_make_buffer_rsrc);
20376 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
20377 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
20378 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:
20379 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:
20380 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:
20381 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
20382 return emitBuiltinWithOneOverloadedType<5>(
20383 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
20384 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
20385 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
20386 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
20387 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
20388 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
20389 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
20390 llvm::Type *RetTy = nullptr;
20391 switch (BuiltinID) {
20392 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
20393 RetTy = Int8Ty;
20394 break;
20395 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
20396 RetTy = Int16Ty;
20397 break;
20398 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
20399 RetTy = Int32Ty;
20400 break;
20401 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
20402 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/2);
20403 break;
20404 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
20405 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/3);
20406 break;
20407 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
20408 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/4);
20409 break;
20411 Function *F =
20412 CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
20413 return Builder.CreateCall(
20414 F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
20415 EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
20417 case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
20418 return emitBuiltinWithOneOverloadedType<2>(
20419 *this, E, Intrinsic::amdgcn_s_prefetch_data);
20420 default:
20421 return nullptr;
20425 /// Handle a SystemZ function in which the final argument is a pointer
20426 /// to an int that receives the post-instruction CC value. At the LLVM level
20427 /// this is represented as a function that returns a {result, cc} pair.
20428 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
20429 unsigned IntrinsicID,
20430 const CallExpr *E) {
20431 unsigned NumArgs = E->getNumArgs() - 1;
20432 SmallVector<Value *, 8> Args(NumArgs);
20433 for (unsigned I = 0; I < NumArgs; ++I)
20434 Args[I] = CGF.EmitScalarExpr(E->getArg(I));
20435 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
20436 Function *F = CGF.CGM.getIntrinsic(IntrinsicID);
20437 Value *Call = CGF.Builder.CreateCall(F, Args);
20438 Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
20439 CGF.Builder.CreateStore(CC, CCPtr);
20440 return CGF.Builder.CreateExtractValue(Call, 0);
20443 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
20444 const CallExpr *E) {
20445 switch (BuiltinID) {
20446 case SystemZ::BI__builtin_tbegin: {
20447 Value *TDB = EmitScalarExpr(E->getArg(0));
20448 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
20449 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
20450 return Builder.CreateCall(F, {TDB, Control});
20452 case SystemZ::BI__builtin_tbegin_nofloat: {
20453 Value *TDB = EmitScalarExpr(E->getArg(0));
20454 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
20455 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
20456 return Builder.CreateCall(F, {TDB, Control});
20458 case SystemZ::BI__builtin_tbeginc: {
20459 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
20460 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
20461 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
20462 return Builder.CreateCall(F, {TDB, Control});
20464 case SystemZ::BI__builtin_tabort: {
20465 Value *Data = EmitScalarExpr(E->getArg(0));
20466 Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
20467 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
20469 case SystemZ::BI__builtin_non_tx_store: {
20470 Value *Address = EmitScalarExpr(E->getArg(0));
20471 Value *Data = EmitScalarExpr(E->getArg(1));
20472 Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
20473 return Builder.CreateCall(F, {Data, Address});
20476 // Vector builtins. Note that most vector builtins are mapped automatically
20477 // to target-specific LLVM intrinsics. The ones handled specially here can
20478 // be represented via standard LLVM IR, which is preferable to enable common
20479 // LLVM optimizations.
20481 case SystemZ::BI__builtin_s390_vclzb:
20482 case SystemZ::BI__builtin_s390_vclzh:
20483 case SystemZ::BI__builtin_s390_vclzf:
20484 case SystemZ::BI__builtin_s390_vclzg: {
20485 llvm::Type *ResultType = ConvertType(E->getType());
20486 Value *X = EmitScalarExpr(E->getArg(0));
20487 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
20488 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
20489 return Builder.CreateCall(F, {X, Undef});
20492 case SystemZ::BI__builtin_s390_vctzb:
20493 case SystemZ::BI__builtin_s390_vctzh:
20494 case SystemZ::BI__builtin_s390_vctzf:
20495 case SystemZ::BI__builtin_s390_vctzg: {
20496 llvm::Type *ResultType = ConvertType(E->getType());
20497 Value *X = EmitScalarExpr(E->getArg(0));
20498 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
20499 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
20500 return Builder.CreateCall(F, {X, Undef});
20503 case SystemZ::BI__builtin_s390_verllb:
20504 case SystemZ::BI__builtin_s390_verllh:
20505 case SystemZ::BI__builtin_s390_verllf:
20506 case SystemZ::BI__builtin_s390_verllg: {
20507 llvm::Type *ResultType = ConvertType(E->getType());
20508 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
20509 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
20510 // Splat scalar rotate amount to vector type.
20511 unsigned NumElts = cast<llvm::FixedVectorType>(ResultType)->getNumElements();
20512 Amt = Builder.CreateIntCast(Amt, ResultType->getScalarType(), false);
20513 Amt = Builder.CreateVectorSplat(NumElts, Amt);
20514 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
20515 return Builder.CreateCall(F, { Src, Src, Amt });
20518 case SystemZ::BI__builtin_s390_verllvb:
20519 case SystemZ::BI__builtin_s390_verllvh:
20520 case SystemZ::BI__builtin_s390_verllvf:
20521 case SystemZ::BI__builtin_s390_verllvg: {
20522 llvm::Type *ResultType = ConvertType(E->getType());
20523 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
20524 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
20525 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
20526 return Builder.CreateCall(F, { Src, Src, Amt });
20529 case SystemZ::BI__builtin_s390_vfsqsb:
20530 case SystemZ::BI__builtin_s390_vfsqdb: {
20531 llvm::Type *ResultType = ConvertType(E->getType());
20532 Value *X = EmitScalarExpr(E->getArg(0));
20533 if (Builder.getIsFPConstrained()) {
20534 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType);
20535 return Builder.CreateConstrainedFPCall(F, { X });
20536 } else {
20537 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
20538 return Builder.CreateCall(F, X);
20541 case SystemZ::BI__builtin_s390_vfmasb:
20542 case SystemZ::BI__builtin_s390_vfmadb: {
20543 llvm::Type *ResultType = ConvertType(E->getType());
20544 Value *X = EmitScalarExpr(E->getArg(0));
20545 Value *Y = EmitScalarExpr(E->getArg(1));
20546 Value *Z = EmitScalarExpr(E->getArg(2));
20547 if (Builder.getIsFPConstrained()) {
20548 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20549 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
20550 } else {
20551 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20552 return Builder.CreateCall(F, {X, Y, Z});
20555 case SystemZ::BI__builtin_s390_vfmssb:
20556 case SystemZ::BI__builtin_s390_vfmsdb: {
20557 llvm::Type *ResultType = ConvertType(E->getType());
20558 Value *X = EmitScalarExpr(E->getArg(0));
20559 Value *Y = EmitScalarExpr(E->getArg(1));
20560 Value *Z = EmitScalarExpr(E->getArg(2));
20561 if (Builder.getIsFPConstrained()) {
20562 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20563 return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
20564 } else {
20565 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20566 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
20569 case SystemZ::BI__builtin_s390_vfnmasb:
20570 case SystemZ::BI__builtin_s390_vfnmadb: {
20571 llvm::Type *ResultType = ConvertType(E->getType());
20572 Value *X = EmitScalarExpr(E->getArg(0));
20573 Value *Y = EmitScalarExpr(E->getArg(1));
20574 Value *Z = EmitScalarExpr(E->getArg(2));
20575 if (Builder.getIsFPConstrained()) {
20576 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20577 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
20578 } else {
20579 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20580 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
20583 case SystemZ::BI__builtin_s390_vfnmssb:
20584 case SystemZ::BI__builtin_s390_vfnmsdb: {
20585 llvm::Type *ResultType = ConvertType(E->getType());
20586 Value *X = EmitScalarExpr(E->getArg(0));
20587 Value *Y = EmitScalarExpr(E->getArg(1));
20588 Value *Z = EmitScalarExpr(E->getArg(2));
20589 if (Builder.getIsFPConstrained()) {
20590 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20591 Value *NegZ = Builder.CreateFNeg(Z, "sub");
20592 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ}));
20593 } else {
20594 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20595 Value *NegZ = Builder.CreateFNeg(Z, "neg");
20596 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
20599 case SystemZ::BI__builtin_s390_vflpsb:
20600 case SystemZ::BI__builtin_s390_vflpdb: {
20601 llvm::Type *ResultType = ConvertType(E->getType());
20602 Value *X = EmitScalarExpr(E->getArg(0));
20603 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
20604 return Builder.CreateCall(F, X);
20606 case SystemZ::BI__builtin_s390_vflnsb:
20607 case SystemZ::BI__builtin_s390_vflndb: {
20608 llvm::Type *ResultType = ConvertType(E->getType());
20609 Value *X = EmitScalarExpr(E->getArg(0));
20610 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
20611 return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg");
20613 case SystemZ::BI__builtin_s390_vfisb:
20614 case SystemZ::BI__builtin_s390_vfidb: {
20615 llvm::Type *ResultType = ConvertType(E->getType());
20616 Value *X = EmitScalarExpr(E->getArg(0));
20617 // Constant-fold the M4 and M5 mask arguments.
20618 llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext());
20619 llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext());
20620 // Check whether this instance can be represented via a LLVM standard
20621 // intrinsic. We only support some combinations of M4 and M5.
20622 Intrinsic::ID ID = Intrinsic::not_intrinsic;
20623 Intrinsic::ID CI;
20624 switch (M4.getZExtValue()) {
20625 default: break;
20626 case 0: // IEEE-inexact exception allowed
20627 switch (M5.getZExtValue()) {
20628 default: break;
20629 case 0: ID = Intrinsic::rint;
20630 CI = Intrinsic::experimental_constrained_rint; break;
20632 break;
20633 case 4: // IEEE-inexact exception suppressed
20634 switch (M5.getZExtValue()) {
20635 default: break;
20636 case 0: ID = Intrinsic::nearbyint;
20637 CI = Intrinsic::experimental_constrained_nearbyint; break;
20638 case 1: ID = Intrinsic::round;
20639 CI = Intrinsic::experimental_constrained_round; break;
20640 case 5: ID = Intrinsic::trunc;
20641 CI = Intrinsic::experimental_constrained_trunc; break;
20642 case 6: ID = Intrinsic::ceil;
20643 CI = Intrinsic::experimental_constrained_ceil; break;
20644 case 7: ID = Intrinsic::floor;
20645 CI = Intrinsic::experimental_constrained_floor; break;
20647 break;
20649 if (ID != Intrinsic::not_intrinsic) {
20650 if (Builder.getIsFPConstrained()) {
20651 Function *F = CGM.getIntrinsic(CI, ResultType);
20652 return Builder.CreateConstrainedFPCall(F, X);
20653 } else {
20654 Function *F = CGM.getIntrinsic(ID, ResultType);
20655 return Builder.CreateCall(F, X);
20658 switch (BuiltinID) { // FIXME: constrained version?
20659 case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
20660 case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
20661 default: llvm_unreachable("Unknown BuiltinID");
20663 Function *F = CGM.getIntrinsic(ID);
20664 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
20665 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
20666 return Builder.CreateCall(F, {X, M4Value, M5Value});
20668 case SystemZ::BI__builtin_s390_vfmaxsb:
20669 case SystemZ::BI__builtin_s390_vfmaxdb: {
20670 llvm::Type *ResultType = ConvertType(E->getType());
20671 Value *X = EmitScalarExpr(E->getArg(0));
20672 Value *Y = EmitScalarExpr(E->getArg(1));
20673 // Constant-fold the M4 mask argument.
20674 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
20675 // Check whether this instance can be represented via a LLVM standard
20676 // intrinsic. We only support some values of M4.
20677 Intrinsic::ID ID = Intrinsic::not_intrinsic;
20678 Intrinsic::ID CI;
20679 switch (M4.getZExtValue()) {
20680 default: break;
20681 case 4: ID = Intrinsic::maxnum;
20682 CI = Intrinsic::experimental_constrained_maxnum; break;
20684 if (ID != Intrinsic::not_intrinsic) {
20685 if (Builder.getIsFPConstrained()) {
20686 Function *F = CGM.getIntrinsic(CI, ResultType);
20687 return Builder.CreateConstrainedFPCall(F, {X, Y});
20688 } else {
20689 Function *F = CGM.getIntrinsic(ID, ResultType);
20690 return Builder.CreateCall(F, {X, Y});
20693 switch (BuiltinID) {
20694 case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
20695 case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
20696 default: llvm_unreachable("Unknown BuiltinID");
20698 Function *F = CGM.getIntrinsic(ID);
20699 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
20700 return Builder.CreateCall(F, {X, Y, M4Value});
20702 case SystemZ::BI__builtin_s390_vfminsb:
20703 case SystemZ::BI__builtin_s390_vfmindb: {
20704 llvm::Type *ResultType = ConvertType(E->getType());
20705 Value *X = EmitScalarExpr(E->getArg(0));
20706 Value *Y = EmitScalarExpr(E->getArg(1));
20707 // Constant-fold the M4 mask argument.
20708 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
20709 // Check whether this instance can be represented via a LLVM standard
20710 // intrinsic. We only support some values of M4.
20711 Intrinsic::ID ID = Intrinsic::not_intrinsic;
20712 Intrinsic::ID CI;
20713 switch (M4.getZExtValue()) {
20714 default: break;
20715 case 4: ID = Intrinsic::minnum;
20716 CI = Intrinsic::experimental_constrained_minnum; break;
20718 if (ID != Intrinsic::not_intrinsic) {
20719 if (Builder.getIsFPConstrained()) {
20720 Function *F = CGM.getIntrinsic(CI, ResultType);
20721 return Builder.CreateConstrainedFPCall(F, {X, Y});
20722 } else {
20723 Function *F = CGM.getIntrinsic(ID, ResultType);
20724 return Builder.CreateCall(F, {X, Y});
20727 switch (BuiltinID) {
20728 case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
20729 case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
20730 default: llvm_unreachable("Unknown BuiltinID");
20732 Function *F = CGM.getIntrinsic(ID);
20733 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
20734 return Builder.CreateCall(F, {X, Y, M4Value});
20737 case SystemZ::BI__builtin_s390_vlbrh:
20738 case SystemZ::BI__builtin_s390_vlbrf:
20739 case SystemZ::BI__builtin_s390_vlbrg: {
20740 llvm::Type *ResultType = ConvertType(E->getType());
20741 Value *X = EmitScalarExpr(E->getArg(0));
20742 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType);
20743 return Builder.CreateCall(F, X);
20746 // Vector intrinsics that output the post-instruction CC value.
20748 #define INTRINSIC_WITH_CC(NAME) \
20749 case SystemZ::BI__builtin_##NAME: \
20750 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
20752 INTRINSIC_WITH_CC(s390_vpkshs);
20753 INTRINSIC_WITH_CC(s390_vpksfs);
20754 INTRINSIC_WITH_CC(s390_vpksgs);
20756 INTRINSIC_WITH_CC(s390_vpklshs);
20757 INTRINSIC_WITH_CC(s390_vpklsfs);
20758 INTRINSIC_WITH_CC(s390_vpklsgs);
20760 INTRINSIC_WITH_CC(s390_vceqbs);
20761 INTRINSIC_WITH_CC(s390_vceqhs);
20762 INTRINSIC_WITH_CC(s390_vceqfs);
20763 INTRINSIC_WITH_CC(s390_vceqgs);
20765 INTRINSIC_WITH_CC(s390_vchbs);
20766 INTRINSIC_WITH_CC(s390_vchhs);
20767 INTRINSIC_WITH_CC(s390_vchfs);
20768 INTRINSIC_WITH_CC(s390_vchgs);
20770 INTRINSIC_WITH_CC(s390_vchlbs);
20771 INTRINSIC_WITH_CC(s390_vchlhs);
20772 INTRINSIC_WITH_CC(s390_vchlfs);
20773 INTRINSIC_WITH_CC(s390_vchlgs);
20775 INTRINSIC_WITH_CC(s390_vfaebs);
20776 INTRINSIC_WITH_CC(s390_vfaehs);
20777 INTRINSIC_WITH_CC(s390_vfaefs);
20779 INTRINSIC_WITH_CC(s390_vfaezbs);
20780 INTRINSIC_WITH_CC(s390_vfaezhs);
20781 INTRINSIC_WITH_CC(s390_vfaezfs);
20783 INTRINSIC_WITH_CC(s390_vfeebs);
20784 INTRINSIC_WITH_CC(s390_vfeehs);
20785 INTRINSIC_WITH_CC(s390_vfeefs);
20787 INTRINSIC_WITH_CC(s390_vfeezbs);
20788 INTRINSIC_WITH_CC(s390_vfeezhs);
20789 INTRINSIC_WITH_CC(s390_vfeezfs);
20791 INTRINSIC_WITH_CC(s390_vfenebs);
20792 INTRINSIC_WITH_CC(s390_vfenehs);
20793 INTRINSIC_WITH_CC(s390_vfenefs);
20795 INTRINSIC_WITH_CC(s390_vfenezbs);
20796 INTRINSIC_WITH_CC(s390_vfenezhs);
20797 INTRINSIC_WITH_CC(s390_vfenezfs);
20799 INTRINSIC_WITH_CC(s390_vistrbs);
20800 INTRINSIC_WITH_CC(s390_vistrhs);
20801 INTRINSIC_WITH_CC(s390_vistrfs);
20803 INTRINSIC_WITH_CC(s390_vstrcbs);
20804 INTRINSIC_WITH_CC(s390_vstrchs);
20805 INTRINSIC_WITH_CC(s390_vstrcfs);
20807 INTRINSIC_WITH_CC(s390_vstrczbs);
20808 INTRINSIC_WITH_CC(s390_vstrczhs);
20809 INTRINSIC_WITH_CC(s390_vstrczfs);
20811 INTRINSIC_WITH_CC(s390_vfcesbs);
20812 INTRINSIC_WITH_CC(s390_vfcedbs);
20813 INTRINSIC_WITH_CC(s390_vfchsbs);
20814 INTRINSIC_WITH_CC(s390_vfchdbs);
20815 INTRINSIC_WITH_CC(s390_vfchesbs);
20816 INTRINSIC_WITH_CC(s390_vfchedbs);
20818 INTRINSIC_WITH_CC(s390_vftcisb);
20819 INTRINSIC_WITH_CC(s390_vftcidb);
20821 INTRINSIC_WITH_CC(s390_vstrsb);
20822 INTRINSIC_WITH_CC(s390_vstrsh);
20823 INTRINSIC_WITH_CC(s390_vstrsf);
20825 INTRINSIC_WITH_CC(s390_vstrszb);
20826 INTRINSIC_WITH_CC(s390_vstrszh);
20827 INTRINSIC_WITH_CC(s390_vstrszf);
20829 #undef INTRINSIC_WITH_CC
20831 default:
20832 return nullptr;
20836 namespace {
20837 // Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
20838 struct NVPTXMmaLdstInfo {
20839 unsigned NumResults; // Number of elements to load/store
20840 // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
20841 unsigned IID_col;
20842 unsigned IID_row;
20845 #define MMA_INTR(geom_op_type, layout) \
20846 Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
20847 #define MMA_LDST(n, geom_op_type) \
20848 { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
20850 static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
20851 switch (BuiltinID) {
20852 // FP MMA loads
20853 case NVPTX::BI__hmma_m16n16k16_ld_a:
20854 return MMA_LDST(8, m16n16k16_load_a_f16);
20855 case NVPTX::BI__hmma_m16n16k16_ld_b:
20856 return MMA_LDST(8, m16n16k16_load_b_f16);
20857 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
20858 return MMA_LDST(4, m16n16k16_load_c_f16);
20859 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
20860 return MMA_LDST(8, m16n16k16_load_c_f32);
20861 case NVPTX::BI__hmma_m32n8k16_ld_a:
20862 return MMA_LDST(8, m32n8k16_load_a_f16);
20863 case NVPTX::BI__hmma_m32n8k16_ld_b:
20864 return MMA_LDST(8, m32n8k16_load_b_f16);
20865 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
20866 return MMA_LDST(4, m32n8k16_load_c_f16);
20867 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
20868 return MMA_LDST(8, m32n8k16_load_c_f32);
20869 case NVPTX::BI__hmma_m8n32k16_ld_a:
20870 return MMA_LDST(8, m8n32k16_load_a_f16);
20871 case NVPTX::BI__hmma_m8n32k16_ld_b:
20872 return MMA_LDST(8, m8n32k16_load_b_f16);
20873 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
20874 return MMA_LDST(4, m8n32k16_load_c_f16);
20875 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
20876 return MMA_LDST(8, m8n32k16_load_c_f32);
20878 // Integer MMA loads
20879 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
20880 return MMA_LDST(2, m16n16k16_load_a_s8);
20881 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
20882 return MMA_LDST(2, m16n16k16_load_a_u8);
20883 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
20884 return MMA_LDST(2, m16n16k16_load_b_s8);
20885 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
20886 return MMA_LDST(2, m16n16k16_load_b_u8);
20887 case NVPTX::BI__imma_m16n16k16_ld_c:
20888 return MMA_LDST(8, m16n16k16_load_c_s32);
20889 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
20890 return MMA_LDST(4, m32n8k16_load_a_s8);
20891 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
20892 return MMA_LDST(4, m32n8k16_load_a_u8);
20893 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
20894 return MMA_LDST(1, m32n8k16_load_b_s8);
20895 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
20896 return MMA_LDST(1, m32n8k16_load_b_u8);
20897 case NVPTX::BI__imma_m32n8k16_ld_c:
20898 return MMA_LDST(8, m32n8k16_load_c_s32);
20899 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
20900 return MMA_LDST(1, m8n32k16_load_a_s8);
20901 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
20902 return MMA_LDST(1, m8n32k16_load_a_u8);
20903 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
20904 return MMA_LDST(4, m8n32k16_load_b_s8);
20905 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
20906 return MMA_LDST(4, m8n32k16_load_b_u8);
20907 case NVPTX::BI__imma_m8n32k16_ld_c:
20908 return MMA_LDST(8, m8n32k16_load_c_s32);
20910 // Sub-integer MMA loads.
20911 // Only row/col layout is supported by A/B fragments.
20912 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
20913 return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)};
20914 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
20915 return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)};
20916 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
20917 return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0};
20918 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
20919 return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0};
20920 case NVPTX::BI__imma_m8n8k32_ld_c:
20921 return MMA_LDST(2, m8n8k32_load_c_s32);
20922 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
20923 return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)};
20924 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
20925 return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0};
20926 case NVPTX::BI__bmma_m8n8k128_ld_c:
20927 return MMA_LDST(2, m8n8k128_load_c_s32);
20929 // Double MMA loads
20930 case NVPTX::BI__dmma_m8n8k4_ld_a:
20931 return MMA_LDST(1, m8n8k4_load_a_f64);
20932 case NVPTX::BI__dmma_m8n8k4_ld_b:
20933 return MMA_LDST(1, m8n8k4_load_b_f64);
20934 case NVPTX::BI__dmma_m8n8k4_ld_c:
20935 return MMA_LDST(2, m8n8k4_load_c_f64);
20937 // Alternate float MMA loads
20938 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
20939 return MMA_LDST(4, m16n16k16_load_a_bf16);
20940 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
20941 return MMA_LDST(4, m16n16k16_load_b_bf16);
20942 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
20943 return MMA_LDST(2, m8n32k16_load_a_bf16);
20944 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
20945 return MMA_LDST(8, m8n32k16_load_b_bf16);
20946 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
20947 return MMA_LDST(8, m32n8k16_load_a_bf16);
20948 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
20949 return MMA_LDST(2, m32n8k16_load_b_bf16);
20950 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
20951 return MMA_LDST(4, m16n16k8_load_a_tf32);
20952 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
20953 return MMA_LDST(4, m16n16k8_load_b_tf32);
20954 case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
20955 return MMA_LDST(8, m16n16k8_load_c_f32);
20957 // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike
20958 // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
20959 // use fragment C for both loads and stores.
20960 // FP MMA stores.
20961 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
20962 return MMA_LDST(4, m16n16k16_store_d_f16);
20963 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
20964 return MMA_LDST(8, m16n16k16_store_d_f32);
20965 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
20966 return MMA_LDST(4, m32n8k16_store_d_f16);
20967 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
20968 return MMA_LDST(8, m32n8k16_store_d_f32);
20969 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
20970 return MMA_LDST(4, m8n32k16_store_d_f16);
20971 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
20972 return MMA_LDST(8, m8n32k16_store_d_f32);
20974 // Integer and sub-integer MMA stores.
20975 // Another naming quirk. Unlike other MMA builtins that use PTX types in the
20976 // name, integer loads/stores use LLVM's i32.
20977 case NVPTX::BI__imma_m16n16k16_st_c_i32:
20978 return MMA_LDST(8, m16n16k16_store_d_s32);
20979 case NVPTX::BI__imma_m32n8k16_st_c_i32:
20980 return MMA_LDST(8, m32n8k16_store_d_s32);
20981 case NVPTX::BI__imma_m8n32k16_st_c_i32:
20982 return MMA_LDST(8, m8n32k16_store_d_s32);
20983 case NVPTX::BI__imma_m8n8k32_st_c_i32:
20984 return MMA_LDST(2, m8n8k32_store_d_s32);
20985 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
20986 return MMA_LDST(2, m8n8k128_store_d_s32);
20988 // Double MMA store
20989 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
20990 return MMA_LDST(2, m8n8k4_store_d_f64);
20992 // Alternate float MMA store
20993 case NVPTX::BI__mma_m16n16k8_st_c_f32:
20994 return MMA_LDST(8, m16n16k8_store_d_f32);
20996 default:
20997 llvm_unreachable("Unknown MMA builtin");
21000 #undef MMA_LDST
21001 #undef MMA_INTR
21004 struct NVPTXMmaInfo {
21005 unsigned NumEltsA;
21006 unsigned NumEltsB;
21007 unsigned NumEltsC;
21008 unsigned NumEltsD;
21010 // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority
21011 // over 'col' for layout. The index of non-satf variants is expected to match
21012 // the undocumented layout constants used by CUDA's mma.hpp.
21013 std::array<unsigned, 8> Variants;
21015 unsigned getMMAIntrinsic(int Layout, bool Satf) {
21016 unsigned Index = Layout + 4 * Satf;
21017 if (Index >= Variants.size())
21018 return 0;
21019 return Variants[Index];
21023 // Returns an intrinsic that matches Layout and Satf for valid combinations of
21024 // Layout and Satf, 0 otherwise.
21025 static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
21026 // clang-format off
21027 #define MMA_VARIANTS(geom, type) \
21028 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
21029 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
21030 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
21031 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
21032 #define MMA_SATF_VARIANTS(geom, type) \
21033 MMA_VARIANTS(geom, type), \
21034 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
21035 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
21036 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
21037 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
21038 // Sub-integer MMA only supports row.col layout.
21039 #define MMA_VARIANTS_I4(geom, type) \
21040 0, \
21041 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
21042 0, \
21043 0, \
21044 0, \
21045 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
21046 0, \
21048 // b1 MMA does not support .satfinite.
21049 #define MMA_VARIANTS_B1_XOR(geom, type) \
21050 0, \
21051 Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \
21052 0, \
21053 0, \
21054 0, \
21055 0, \
21056 0, \
21058 #define MMA_VARIANTS_B1_AND(geom, type) \
21059 0, \
21060 Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \
21061 0, \
21062 0, \
21063 0, \
21064 0, \
21065 0, \
21067 // clang-format on
21068 switch (BuiltinID) {
21069 // FP MMA
21070 // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while
21071 // NumEltsN of return value are ordered as A,B,C,D.
21072 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
21073 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}};
21074 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
21075 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}};
21076 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
21077 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}};
21078 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
21079 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}};
21080 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
21081 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}};
21082 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
21083 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}};
21084 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
21085 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}};
21086 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
21087 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}};
21088 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
21089 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}};
21090 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
21091 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}};
21092 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
21093 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}};
21094 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
21095 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}};
21097 // Integer MMA
21098 case NVPTX::BI__imma_m16n16k16_mma_s8:
21099 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}};
21100 case NVPTX::BI__imma_m16n16k16_mma_u8:
21101 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}};
21102 case NVPTX::BI__imma_m32n8k16_mma_s8:
21103 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}};
21104 case NVPTX::BI__imma_m32n8k16_mma_u8:
21105 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}};
21106 case NVPTX::BI__imma_m8n32k16_mma_s8:
21107 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}};
21108 case NVPTX::BI__imma_m8n32k16_mma_u8:
21109 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}};
21111 // Sub-integer MMA
21112 case NVPTX::BI__imma_m8n8k32_mma_s4:
21113 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}};
21114 case NVPTX::BI__imma_m8n8k32_mma_u4:
21115 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}};
21116 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
21117 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}};
21118 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
21119 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}};
21121 // Double MMA
21122 case NVPTX::BI__dmma_m8n8k4_mma_f64:
21123 return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}};
21125 // Alternate FP MMA
21126 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
21127 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}};
21128 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
21129 return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}};
21130 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
21131 return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}};
21132 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32:
21133 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}};
21134 default:
21135 llvm_unreachable("Unexpected builtin ID.");
21137 #undef MMA_VARIANTS
21138 #undef MMA_SATF_VARIANTS
21139 #undef MMA_VARIANTS_I4
21140 #undef MMA_VARIANTS_B1_AND
21141 #undef MMA_VARIANTS_B1_XOR
21144 static Value *MakeLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
21145 const CallExpr *E) {
21146 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
21147 QualType ArgType = E->getArg(0)->getType();
21148 clang::CharUnits Align = CGF.CGM.getNaturalPointeeTypeAlignment(ArgType);
21149 llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
21150 return CGF.Builder.CreateCall(
21151 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
21152 {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())});
21155 static Value *MakeLdg(CodeGenFunction &CGF, const CallExpr *E) {
21156 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
21157 QualType ArgType = E->getArg(0)->getType();
21158 clang::CharUnits AlignV = CGF.CGM.getNaturalPointeeTypeAlignment(ArgType);
21159 llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
21161 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
21162 auto *ASC = CGF.Builder.CreateAddrSpaceCast(Ptr, CGF.Builder.getPtrTy(1));
21163 auto *LD = CGF.Builder.CreateAlignedLoad(ElemTy, ASC, AlignV.getAsAlign());
21164 MDNode *MD = MDNode::get(CGF.Builder.getContext(), {});
21165 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
21167 return LD;
21170 static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF,
21171 const CallExpr *E) {
21172 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
21173 llvm::Type *ElemTy =
21174 CGF.ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
21175 return CGF.Builder.CreateCall(
21176 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
21177 {Ptr, CGF.EmitScalarExpr(E->getArg(1))});
21180 static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS,
21181 CodeGenFunction &CGF, const CallExpr *E,
21182 int SrcSize) {
21183 return E->getNumArgs() == 3
21184 ? CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicIDS),
21185 {CGF.EmitScalarExpr(E->getArg(0)),
21186 CGF.EmitScalarExpr(E->getArg(1)),
21187 CGF.EmitScalarExpr(E->getArg(2))})
21188 : CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicID),
21189 {CGF.EmitScalarExpr(E->getArg(0)),
21190 CGF.EmitScalarExpr(E->getArg(1))});
21193 static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
21194 const CallExpr *E, CodeGenFunction &CGF) {
21195 auto &C = CGF.CGM.getContext();
21196 if (!(C.getLangOpts().NativeHalfType ||
21197 !C.getTargetInfo().useFP16ConversionIntrinsics())) {
21198 CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getName(BuiltinID).str() +
21199 " requires native half type support.");
21200 return nullptr;
21203 if (BuiltinID == NVPTX::BI__nvvm_ldg_h || BuiltinID == NVPTX::BI__nvvm_ldg_h2)
21204 return MakeLdg(CGF, E);
21206 if (IntrinsicID == Intrinsic::nvvm_ldu_global_f)
21207 return MakeLdu(IntrinsicID, CGF, E);
21209 SmallVector<Value *, 16> Args;
21210 auto *F = CGF.CGM.getIntrinsic(IntrinsicID);
21211 auto *FTy = F->getFunctionType();
21212 unsigned ICEArguments = 0;
21213 ASTContext::GetBuiltinTypeError Error;
21214 C.GetBuiltinType(BuiltinID, Error, &ICEArguments);
21215 assert(Error == ASTContext::GE_None && "Should not codegen an error");
21216 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
21217 assert((ICEArguments & (1 << i)) == 0);
21218 auto *ArgValue = CGF.EmitScalarExpr(E->getArg(i));
21219 auto *PTy = FTy->getParamType(i);
21220 if (PTy != ArgValue->getType())
21221 ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy);
21222 Args.push_back(ArgValue);
21225 return CGF.Builder.CreateCall(F, Args);
21227 } // namespace
21229 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
21230 const CallExpr *E) {
21231 switch (BuiltinID) {
21232 case NVPTX::BI__nvvm_atom_add_gen_i:
21233 case NVPTX::BI__nvvm_atom_add_gen_l:
21234 case NVPTX::BI__nvvm_atom_add_gen_ll:
21235 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
21237 case NVPTX::BI__nvvm_atom_sub_gen_i:
21238 case NVPTX::BI__nvvm_atom_sub_gen_l:
21239 case NVPTX::BI__nvvm_atom_sub_gen_ll:
21240 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
21242 case NVPTX::BI__nvvm_atom_and_gen_i:
21243 case NVPTX::BI__nvvm_atom_and_gen_l:
21244 case NVPTX::BI__nvvm_atom_and_gen_ll:
21245 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
21247 case NVPTX::BI__nvvm_atom_or_gen_i:
21248 case NVPTX::BI__nvvm_atom_or_gen_l:
21249 case NVPTX::BI__nvvm_atom_or_gen_ll:
21250 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
21252 case NVPTX::BI__nvvm_atom_xor_gen_i:
21253 case NVPTX::BI__nvvm_atom_xor_gen_l:
21254 case NVPTX::BI__nvvm_atom_xor_gen_ll:
21255 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
21257 case NVPTX::BI__nvvm_atom_xchg_gen_i:
21258 case NVPTX::BI__nvvm_atom_xchg_gen_l:
21259 case NVPTX::BI__nvvm_atom_xchg_gen_ll:
21260 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
21262 case NVPTX::BI__nvvm_atom_max_gen_i:
21263 case NVPTX::BI__nvvm_atom_max_gen_l:
21264 case NVPTX::BI__nvvm_atom_max_gen_ll:
21265 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
21267 case NVPTX::BI__nvvm_atom_max_gen_ui:
21268 case NVPTX::BI__nvvm_atom_max_gen_ul:
21269 case NVPTX::BI__nvvm_atom_max_gen_ull:
21270 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
21272 case NVPTX::BI__nvvm_atom_min_gen_i:
21273 case NVPTX::BI__nvvm_atom_min_gen_l:
21274 case NVPTX::BI__nvvm_atom_min_gen_ll:
21275 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
21277 case NVPTX::BI__nvvm_atom_min_gen_ui:
21278 case NVPTX::BI__nvvm_atom_min_gen_ul:
21279 case NVPTX::BI__nvvm_atom_min_gen_ull:
21280 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
21282 case NVPTX::BI__nvvm_atom_cas_gen_us:
21283 case NVPTX::BI__nvvm_atom_cas_gen_i:
21284 case NVPTX::BI__nvvm_atom_cas_gen_l:
21285 case NVPTX::BI__nvvm_atom_cas_gen_ll:
21286 // __nvvm_atom_cas_gen_* should return the old value rather than the
21287 // success flag.
21288 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
21290 case NVPTX::BI__nvvm_atom_add_gen_f:
21291 case NVPTX::BI__nvvm_atom_add_gen_d: {
21292 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
21293 Value *Val = EmitScalarExpr(E->getArg(1));
21295 return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val,
21296 AtomicOrdering::SequentiallyConsistent);
21299 case NVPTX::BI__nvvm_atom_inc_gen_ui: {
21300 Value *Ptr = EmitScalarExpr(E->getArg(0));
21301 Value *Val = EmitScalarExpr(E->getArg(1));
21302 Function *FnALI32 =
21303 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
21304 return Builder.CreateCall(FnALI32, {Ptr, Val});
21307 case NVPTX::BI__nvvm_atom_dec_gen_ui: {
21308 Value *Ptr = EmitScalarExpr(E->getArg(0));
21309 Value *Val = EmitScalarExpr(E->getArg(1));
21310 Function *FnALD32 =
21311 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
21312 return Builder.CreateCall(FnALD32, {Ptr, Val});
21315 case NVPTX::BI__nvvm_ldg_c:
21316 case NVPTX::BI__nvvm_ldg_sc:
21317 case NVPTX::BI__nvvm_ldg_c2:
21318 case NVPTX::BI__nvvm_ldg_sc2:
21319 case NVPTX::BI__nvvm_ldg_c4:
21320 case NVPTX::BI__nvvm_ldg_sc4:
21321 case NVPTX::BI__nvvm_ldg_s:
21322 case NVPTX::BI__nvvm_ldg_s2:
21323 case NVPTX::BI__nvvm_ldg_s4:
21324 case NVPTX::BI__nvvm_ldg_i:
21325 case NVPTX::BI__nvvm_ldg_i2:
21326 case NVPTX::BI__nvvm_ldg_i4:
21327 case NVPTX::BI__nvvm_ldg_l:
21328 case NVPTX::BI__nvvm_ldg_l2:
21329 case NVPTX::BI__nvvm_ldg_ll:
21330 case NVPTX::BI__nvvm_ldg_ll2:
21331 case NVPTX::BI__nvvm_ldg_uc:
21332 case NVPTX::BI__nvvm_ldg_uc2:
21333 case NVPTX::BI__nvvm_ldg_uc4:
21334 case NVPTX::BI__nvvm_ldg_us:
21335 case NVPTX::BI__nvvm_ldg_us2:
21336 case NVPTX::BI__nvvm_ldg_us4:
21337 case NVPTX::BI__nvvm_ldg_ui:
21338 case NVPTX::BI__nvvm_ldg_ui2:
21339 case NVPTX::BI__nvvm_ldg_ui4:
21340 case NVPTX::BI__nvvm_ldg_ul:
21341 case NVPTX::BI__nvvm_ldg_ul2:
21342 case NVPTX::BI__nvvm_ldg_ull:
21343 case NVPTX::BI__nvvm_ldg_ull2:
21344 case NVPTX::BI__nvvm_ldg_f:
21345 case NVPTX::BI__nvvm_ldg_f2:
21346 case NVPTX::BI__nvvm_ldg_f4:
21347 case NVPTX::BI__nvvm_ldg_d:
21348 case NVPTX::BI__nvvm_ldg_d2:
21349 // PTX Interoperability section 2.2: "For a vector with an even number of
21350 // elements, its alignment is set to number of elements times the alignment
21351 // of its member: n*alignof(t)."
21352 return MakeLdg(*this, E);
21354 case NVPTX::BI__nvvm_ldu_c:
21355 case NVPTX::BI__nvvm_ldu_sc:
21356 case NVPTX::BI__nvvm_ldu_c2:
21357 case NVPTX::BI__nvvm_ldu_sc2:
21358 case NVPTX::BI__nvvm_ldu_c4:
21359 case NVPTX::BI__nvvm_ldu_sc4:
21360 case NVPTX::BI__nvvm_ldu_s:
21361 case NVPTX::BI__nvvm_ldu_s2:
21362 case NVPTX::BI__nvvm_ldu_s4:
21363 case NVPTX::BI__nvvm_ldu_i:
21364 case NVPTX::BI__nvvm_ldu_i2:
21365 case NVPTX::BI__nvvm_ldu_i4:
21366 case NVPTX::BI__nvvm_ldu_l:
21367 case NVPTX::BI__nvvm_ldu_l2:
21368 case NVPTX::BI__nvvm_ldu_ll:
21369 case NVPTX::BI__nvvm_ldu_ll2:
21370 case NVPTX::BI__nvvm_ldu_uc:
21371 case NVPTX::BI__nvvm_ldu_uc2:
21372 case NVPTX::BI__nvvm_ldu_uc4:
21373 case NVPTX::BI__nvvm_ldu_us:
21374 case NVPTX::BI__nvvm_ldu_us2:
21375 case NVPTX::BI__nvvm_ldu_us4:
21376 case NVPTX::BI__nvvm_ldu_ui:
21377 case NVPTX::BI__nvvm_ldu_ui2:
21378 case NVPTX::BI__nvvm_ldu_ui4:
21379 case NVPTX::BI__nvvm_ldu_ul:
21380 case NVPTX::BI__nvvm_ldu_ul2:
21381 case NVPTX::BI__nvvm_ldu_ull:
21382 case NVPTX::BI__nvvm_ldu_ull2:
21383 return MakeLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
21384 case NVPTX::BI__nvvm_ldu_f:
21385 case NVPTX::BI__nvvm_ldu_f2:
21386 case NVPTX::BI__nvvm_ldu_f4:
21387 case NVPTX::BI__nvvm_ldu_d:
21388 case NVPTX::BI__nvvm_ldu_d2:
21389 return MakeLdu(Intrinsic::nvvm_ldu_global_f, *this, E);
21391 case NVPTX::BI__nvvm_atom_cta_add_gen_i:
21392 case NVPTX::BI__nvvm_atom_cta_add_gen_l:
21393 case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
21394 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E);
21395 case NVPTX::BI__nvvm_atom_sys_add_gen_i:
21396 case NVPTX::BI__nvvm_atom_sys_add_gen_l:
21397 case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
21398 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E);
21399 case NVPTX::BI__nvvm_atom_cta_add_gen_f:
21400 case NVPTX::BI__nvvm_atom_cta_add_gen_d:
21401 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E);
21402 case NVPTX::BI__nvvm_atom_sys_add_gen_f:
21403 case NVPTX::BI__nvvm_atom_sys_add_gen_d:
21404 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E);
21405 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
21406 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
21407 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
21408 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E);
21409 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
21410 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
21411 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
21412 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E);
21413 case NVPTX::BI__nvvm_atom_cta_max_gen_i:
21414 case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
21415 case NVPTX::BI__nvvm_atom_cta_max_gen_l:
21416 case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
21417 case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
21418 case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
21419 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E);
21420 case NVPTX::BI__nvvm_atom_sys_max_gen_i:
21421 case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
21422 case NVPTX::BI__nvvm_atom_sys_max_gen_l:
21423 case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
21424 case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
21425 case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
21426 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E);
21427 case NVPTX::BI__nvvm_atom_cta_min_gen_i:
21428 case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
21429 case NVPTX::BI__nvvm_atom_cta_min_gen_l:
21430 case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
21431 case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
21432 case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
21433 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E);
21434 case NVPTX::BI__nvvm_atom_sys_min_gen_i:
21435 case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
21436 case NVPTX::BI__nvvm_atom_sys_min_gen_l:
21437 case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
21438 case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
21439 case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
21440 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E);
21441 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
21442 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E);
21443 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
21444 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E);
21445 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
21446 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E);
21447 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
21448 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E);
21449 case NVPTX::BI__nvvm_atom_cta_and_gen_i:
21450 case NVPTX::BI__nvvm_atom_cta_and_gen_l:
21451 case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
21452 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E);
21453 case NVPTX::BI__nvvm_atom_sys_and_gen_i:
21454 case NVPTX::BI__nvvm_atom_sys_and_gen_l:
21455 case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
21456 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E);
21457 case NVPTX::BI__nvvm_atom_cta_or_gen_i:
21458 case NVPTX::BI__nvvm_atom_cta_or_gen_l:
21459 case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
21460 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E);
21461 case NVPTX::BI__nvvm_atom_sys_or_gen_i:
21462 case NVPTX::BI__nvvm_atom_sys_or_gen_l:
21463 case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
21464 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E);
21465 case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
21466 case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
21467 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
21468 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E);
21469 case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
21470 case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
21471 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
21472 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E);
21473 case NVPTX::BI__nvvm_atom_cta_cas_gen_us:
21474 case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
21475 case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
21476 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
21477 Value *Ptr = EmitScalarExpr(E->getArg(0));
21478 llvm::Type *ElemTy =
21479 ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
21480 return Builder.CreateCall(
21481 CGM.getIntrinsic(
21482 Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),
21483 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
21485 case NVPTX::BI__nvvm_atom_sys_cas_gen_us:
21486 case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
21487 case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
21488 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
21489 Value *Ptr = EmitScalarExpr(E->getArg(0));
21490 llvm::Type *ElemTy =
21491 ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
21492 return Builder.CreateCall(
21493 CGM.getIntrinsic(
21494 Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),
21495 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
21497 case NVPTX::BI__nvvm_match_all_sync_i32p:
21498 case NVPTX::BI__nvvm_match_all_sync_i64p: {
21499 Value *Mask = EmitScalarExpr(E->getArg(0));
21500 Value *Val = EmitScalarExpr(E->getArg(1));
21501 Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
21502 Value *ResultPair = Builder.CreateCall(
21503 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
21504 ? Intrinsic::nvvm_match_all_sync_i32p
21505 : Intrinsic::nvvm_match_all_sync_i64p),
21506 {Mask, Val});
21507 Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
21508 PredOutPtr.getElementType());
21509 Builder.CreateStore(Pred, PredOutPtr);
21510 return Builder.CreateExtractValue(ResultPair, 0);
21513 // FP MMA loads
21514 case NVPTX::BI__hmma_m16n16k16_ld_a:
21515 case NVPTX::BI__hmma_m16n16k16_ld_b:
21516 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
21517 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
21518 case NVPTX::BI__hmma_m32n8k16_ld_a:
21519 case NVPTX::BI__hmma_m32n8k16_ld_b:
21520 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
21521 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
21522 case NVPTX::BI__hmma_m8n32k16_ld_a:
21523 case NVPTX::BI__hmma_m8n32k16_ld_b:
21524 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
21525 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
21526 // Integer MMA loads.
21527 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
21528 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
21529 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
21530 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
21531 case NVPTX::BI__imma_m16n16k16_ld_c:
21532 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
21533 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
21534 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
21535 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
21536 case NVPTX::BI__imma_m32n8k16_ld_c:
21537 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
21538 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
21539 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
21540 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
21541 case NVPTX::BI__imma_m8n32k16_ld_c:
21542 // Sub-integer MMA loads.
21543 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
21544 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
21545 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
21546 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
21547 case NVPTX::BI__imma_m8n8k32_ld_c:
21548 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
21549 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
21550 case NVPTX::BI__bmma_m8n8k128_ld_c:
21551 // Double MMA loads.
21552 case NVPTX::BI__dmma_m8n8k4_ld_a:
21553 case NVPTX::BI__dmma_m8n8k4_ld_b:
21554 case NVPTX::BI__dmma_m8n8k4_ld_c:
21555 // Alternate float MMA loads.
21556 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
21557 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
21558 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
21559 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
21560 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
21561 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
21562 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
21563 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
21564 case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
21565 Address Dst = EmitPointerWithAlignment(E->getArg(0));
21566 Value *Src = EmitScalarExpr(E->getArg(1));
21567 Value *Ldm = EmitScalarExpr(E->getArg(2));
21568 std::optional<llvm::APSInt> isColMajorArg =
21569 E->getArg(3)->getIntegerConstantExpr(getContext());
21570 if (!isColMajorArg)
21571 return nullptr;
21572 bool isColMajor = isColMajorArg->getSExtValue();
21573 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
21574 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
21575 if (IID == 0)
21576 return nullptr;
21578 Value *Result =
21579 Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
21581 // Save returned values.
21582 assert(II.NumResults);
21583 if (II.NumResults == 1) {
21584 Builder.CreateAlignedStore(Result, Dst.emitRawPointer(*this),
21585 CharUnits::fromQuantity(4));
21586 } else {
21587 for (unsigned i = 0; i < II.NumResults; ++i) {
21588 Builder.CreateAlignedStore(
21589 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
21590 Dst.getElementType()),
21591 Builder.CreateGEP(Dst.getElementType(), Dst.emitRawPointer(*this),
21592 llvm::ConstantInt::get(IntTy, i)),
21593 CharUnits::fromQuantity(4));
21596 return Result;
21599 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
21600 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
21601 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
21602 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
21603 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
21604 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
21605 case NVPTX::BI__imma_m16n16k16_st_c_i32:
21606 case NVPTX::BI__imma_m32n8k16_st_c_i32:
21607 case NVPTX::BI__imma_m8n32k16_st_c_i32:
21608 case NVPTX::BI__imma_m8n8k32_st_c_i32:
21609 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
21610 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
21611 case NVPTX::BI__mma_m16n16k8_st_c_f32: {
21612 Value *Dst = EmitScalarExpr(E->getArg(0));
21613 Address Src = EmitPointerWithAlignment(E->getArg(1));
21614 Value *Ldm = EmitScalarExpr(E->getArg(2));
21615 std::optional<llvm::APSInt> isColMajorArg =
21616 E->getArg(3)->getIntegerConstantExpr(getContext());
21617 if (!isColMajorArg)
21618 return nullptr;
21619 bool isColMajor = isColMajorArg->getSExtValue();
21620 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
21621 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
21622 if (IID == 0)
21623 return nullptr;
21624 Function *Intrinsic =
21625 CGM.getIntrinsic(IID, Dst->getType());
21626 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
21627 SmallVector<Value *, 10> Values = {Dst};
21628 for (unsigned i = 0; i < II.NumResults; ++i) {
21629 Value *V = Builder.CreateAlignedLoad(
21630 Src.getElementType(),
21631 Builder.CreateGEP(Src.getElementType(), Src.emitRawPointer(*this),
21632 llvm::ConstantInt::get(IntTy, i)),
21633 CharUnits::fromQuantity(4));
21634 Values.push_back(Builder.CreateBitCast(V, ParamType));
21636 Values.push_back(Ldm);
21637 Value *Result = Builder.CreateCall(Intrinsic, Values);
21638 return Result;
21641 // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
21642 // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
21643 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
21644 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
21645 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
21646 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
21647 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
21648 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
21649 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
21650 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
21651 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
21652 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
21653 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
21654 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
21655 case NVPTX::BI__imma_m16n16k16_mma_s8:
21656 case NVPTX::BI__imma_m16n16k16_mma_u8:
21657 case NVPTX::BI__imma_m32n8k16_mma_s8:
21658 case NVPTX::BI__imma_m32n8k16_mma_u8:
21659 case NVPTX::BI__imma_m8n32k16_mma_s8:
21660 case NVPTX::BI__imma_m8n32k16_mma_u8:
21661 case NVPTX::BI__imma_m8n8k32_mma_s4:
21662 case NVPTX::BI__imma_m8n8k32_mma_u4:
21663 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
21664 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
21665 case NVPTX::BI__dmma_m8n8k4_mma_f64:
21666 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
21667 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
21668 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
21669 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
21670 Address Dst = EmitPointerWithAlignment(E->getArg(0));
21671 Address SrcA = EmitPointerWithAlignment(E->getArg(1));
21672 Address SrcB = EmitPointerWithAlignment(E->getArg(2));
21673 Address SrcC = EmitPointerWithAlignment(E->getArg(3));
21674 std::optional<llvm::APSInt> LayoutArg =
21675 E->getArg(4)->getIntegerConstantExpr(getContext());
21676 if (!LayoutArg)
21677 return nullptr;
21678 int Layout = LayoutArg->getSExtValue();
21679 if (Layout < 0 || Layout > 3)
21680 return nullptr;
21681 llvm::APSInt SatfArg;
21682 if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
21683 BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
21684 SatfArg = 0; // .b1 does not have satf argument.
21685 else if (std::optional<llvm::APSInt> OptSatfArg =
21686 E->getArg(5)->getIntegerConstantExpr(getContext()))
21687 SatfArg = *OptSatfArg;
21688 else
21689 return nullptr;
21690 bool Satf = SatfArg.getSExtValue();
21691 NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
21692 unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
21693 if (IID == 0) // Unsupported combination of Layout/Satf.
21694 return nullptr;
21696 SmallVector<Value *, 24> Values;
21697 Function *Intrinsic = CGM.getIntrinsic(IID);
21698 llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
21699 // Load A
21700 for (unsigned i = 0; i < MI.NumEltsA; ++i) {
21701 Value *V = Builder.CreateAlignedLoad(
21702 SrcA.getElementType(),
21703 Builder.CreateGEP(SrcA.getElementType(), SrcA.emitRawPointer(*this),
21704 llvm::ConstantInt::get(IntTy, i)),
21705 CharUnits::fromQuantity(4));
21706 Values.push_back(Builder.CreateBitCast(V, AType));
21708 // Load B
21709 llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
21710 for (unsigned i = 0; i < MI.NumEltsB; ++i) {
21711 Value *V = Builder.CreateAlignedLoad(
21712 SrcB.getElementType(),
21713 Builder.CreateGEP(SrcB.getElementType(), SrcB.emitRawPointer(*this),
21714 llvm::ConstantInt::get(IntTy, i)),
21715 CharUnits::fromQuantity(4));
21716 Values.push_back(Builder.CreateBitCast(V, BType));
21718 // Load C
21719 llvm::Type *CType =
21720 Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
21721 for (unsigned i = 0; i < MI.NumEltsC; ++i) {
21722 Value *V = Builder.CreateAlignedLoad(
21723 SrcC.getElementType(),
21724 Builder.CreateGEP(SrcC.getElementType(), SrcC.emitRawPointer(*this),
21725 llvm::ConstantInt::get(IntTy, i)),
21726 CharUnits::fromQuantity(4));
21727 Values.push_back(Builder.CreateBitCast(V, CType));
21729 Value *Result = Builder.CreateCall(Intrinsic, Values);
21730 llvm::Type *DType = Dst.getElementType();
21731 for (unsigned i = 0; i < MI.NumEltsD; ++i)
21732 Builder.CreateAlignedStore(
21733 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
21734 Builder.CreateGEP(Dst.getElementType(), Dst.emitRawPointer(*this),
21735 llvm::ConstantInt::get(IntTy, i)),
21736 CharUnits::fromQuantity(4));
21737 return Result;
21739 // The following builtins require half type support
21740 case NVPTX::BI__nvvm_ex2_approx_f16:
21741 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this);
21742 case NVPTX::BI__nvvm_ex2_approx_f16x2:
21743 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this);
21744 case NVPTX::BI__nvvm_ff2f16x2_rn:
21745 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this);
21746 case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
21747 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this);
21748 case NVPTX::BI__nvvm_ff2f16x2_rz:
21749 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this);
21750 case NVPTX::BI__nvvm_ff2f16x2_rz_relu:
21751 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this);
21752 case NVPTX::BI__nvvm_fma_rn_f16:
21753 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this);
21754 case NVPTX::BI__nvvm_fma_rn_f16x2:
21755 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this);
21756 case NVPTX::BI__nvvm_fma_rn_ftz_f16:
21757 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this);
21758 case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:
21759 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this);
21760 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:
21761 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,
21762 *this);
21763 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:
21764 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,
21765 *this);
21766 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:
21767 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,
21768 *this);
21769 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:
21770 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,
21771 *this);
21772 case NVPTX::BI__nvvm_fma_rn_relu_f16:
21773 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this);
21774 case NVPTX::BI__nvvm_fma_rn_relu_f16x2:
21775 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this);
21776 case NVPTX::BI__nvvm_fma_rn_sat_f16:
21777 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this);
21778 case NVPTX::BI__nvvm_fma_rn_sat_f16x2:
21779 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this);
21780 case NVPTX::BI__nvvm_fmax_f16:
21781 return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this);
21782 case NVPTX::BI__nvvm_fmax_f16x2:
21783 return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this);
21784 case NVPTX::BI__nvvm_fmax_ftz_f16:
21785 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this);
21786 case NVPTX::BI__nvvm_fmax_ftz_f16x2:
21787 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this);
21788 case NVPTX::BI__nvvm_fmax_ftz_nan_f16:
21789 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this);
21790 case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:
21791 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,
21792 *this);
21793 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:
21794 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,
21795 E, *this);
21796 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:
21797 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,
21798 BuiltinID, E, *this);
21799 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:
21800 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,
21801 *this);
21802 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:
21803 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,
21804 E, *this);
21805 case NVPTX::BI__nvvm_fmax_nan_f16:
21806 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this);
21807 case NVPTX::BI__nvvm_fmax_nan_f16x2:
21808 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this);
21809 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:
21810 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,
21811 *this);
21812 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:
21813 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,
21814 E, *this);
21815 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:
21816 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,
21817 *this);
21818 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:
21819 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,
21820 *this);
21821 case NVPTX::BI__nvvm_fmin_f16:
21822 return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this);
21823 case NVPTX::BI__nvvm_fmin_f16x2:
21824 return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this);
21825 case NVPTX::BI__nvvm_fmin_ftz_f16:
21826 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this);
21827 case NVPTX::BI__nvvm_fmin_ftz_f16x2:
21828 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this);
21829 case NVPTX::BI__nvvm_fmin_ftz_nan_f16:
21830 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this);
21831 case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:
21832 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,
21833 *this);
21834 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:
21835 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,
21836 E, *this);
21837 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:
21838 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
21839 BuiltinID, E, *this);
21840 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:
21841 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,
21842 *this);
21843 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:
21844 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,
21845 E, *this);
21846 case NVPTX::BI__nvvm_fmin_nan_f16:
21847 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this);
21848 case NVPTX::BI__nvvm_fmin_nan_f16x2:
21849 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this);
21850 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:
21851 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,
21852 *this);
21853 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:
21854 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,
21855 E, *this);
21856 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:
21857 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,
21858 *this);
21859 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
21860 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
21861 *this);
21862 case NVPTX::BI__nvvm_ldg_h:
21863 case NVPTX::BI__nvvm_ldg_h2:
21864 return MakeHalfType(Intrinsic::not_intrinsic, BuiltinID, E, *this);
21865 case NVPTX::BI__nvvm_ldu_h:
21866 case NVPTX::BI__nvvm_ldu_h2:
21867 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
21868 case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
21869 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
21870 Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,
21872 case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:
21873 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,
21874 Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *this, E,
21876 case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:
21877 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,
21878 Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *this, E,
21879 16);
21880 case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:
21881 return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
21882 Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E,
21883 16);
21884 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
21885 return Builder.CreateCall(
21886 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
21887 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
21888 return Builder.CreateCall(
21889 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
21890 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
21891 return Builder.CreateCall(
21892 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
21893 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
21894 return Builder.CreateCall(
21895 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
21896 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
21897 return Builder.CreateCall(
21898 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
21899 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
21900 return Builder.CreateCall(
21901 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
21902 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
21903 return Builder.CreateCall(
21904 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
21905 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
21906 return Builder.CreateCall(
21907 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
21908 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
21909 return Builder.CreateCall(
21910 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
21911 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
21912 return Builder.CreateCall(
21913 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
21914 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
21915 return Builder.CreateCall(
21916 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
21917 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
21918 return Builder.CreateCall(
21919 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
21920 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
21921 return Builder.CreateCall(
21922 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
21923 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
21924 return Builder.CreateCall(
21925 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
21926 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
21927 return Builder.CreateCall(
21928 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
21929 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
21930 return Builder.CreateCall(
21931 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
21932 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
21933 return Builder.CreateCall(
21934 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
21935 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
21936 return Builder.CreateCall(
21937 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
21938 case NVPTX::BI__nvvm_is_explicit_cluster:
21939 return Builder.CreateCall(
21940 CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
21941 case NVPTX::BI__nvvm_isspacep_shared_cluster:
21942 return Builder.CreateCall(
21943 CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
21944 EmitScalarExpr(E->getArg(0)));
21945 case NVPTX::BI__nvvm_mapa:
21946 return Builder.CreateCall(
21947 CGM.getIntrinsic(Intrinsic::nvvm_mapa),
21948 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
21949 case NVPTX::BI__nvvm_mapa_shared_cluster:
21950 return Builder.CreateCall(
21951 CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
21952 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
21953 case NVPTX::BI__nvvm_getctarank:
21954 return Builder.CreateCall(
21955 CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
21956 EmitScalarExpr(E->getArg(0)));
21957 case NVPTX::BI__nvvm_getctarank_shared_cluster:
21958 return Builder.CreateCall(
21959 CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
21960 EmitScalarExpr(E->getArg(0)));
21961 case NVPTX::BI__nvvm_barrier_cluster_arrive:
21962 return Builder.CreateCall(
21963 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive));
21964 case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:
21965 return Builder.CreateCall(
21966 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed));
21967 case NVPTX::BI__nvvm_barrier_cluster_wait:
21968 return Builder.CreateCall(
21969 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait));
21970 case NVPTX::BI__nvvm_fence_sc_cluster:
21971 return Builder.CreateCall(
21972 CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster));
21973 default:
21974 return nullptr;
21978 namespace {
21979 struct BuiltinAlignArgs {
21980 llvm::Value *Src = nullptr;
21981 llvm::Type *SrcType = nullptr;
21982 llvm::Value *Alignment = nullptr;
21983 llvm::Value *Mask = nullptr;
21984 llvm::IntegerType *IntType = nullptr;
21986 BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) {
21987 QualType AstType = E->getArg(0)->getType();
21988 if (AstType->isArrayType())
21989 Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(CGF);
21990 else
21991 Src = CGF.EmitScalarExpr(E->getArg(0));
21992 SrcType = Src->getType();
21993 if (SrcType->isPointerTy()) {
21994 IntType = IntegerType::get(
21995 CGF.getLLVMContext(),
21996 CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType));
21997 } else {
21998 assert(SrcType->isIntegerTy());
21999 IntType = cast<llvm::IntegerType>(SrcType);
22001 Alignment = CGF.EmitScalarExpr(E->getArg(1));
22002 Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment");
22003 auto *One = llvm::ConstantInt::get(IntType, 1);
22004 Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
22007 } // namespace
22009 /// Generate (x & (y-1)) == 0.
22010 RValue CodeGenFunction::EmitBuiltinIsAligned(const CallExpr *E) {
22011 BuiltinAlignArgs Args(E, *this);
22012 llvm::Value *SrcAddress = Args.Src;
22013 if (Args.SrcType->isPointerTy())
22014 SrcAddress =
22015 Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr");
22016 return RValue::get(Builder.CreateICmpEQ(
22017 Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"),
22018 llvm::Constant::getNullValue(Args.IntType), "is_aligned"));
22021 /// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
22022 /// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
22023 /// llvm.ptrmask intrinsic (with a GEP before in the align_up case).
22024 RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) {
22025 BuiltinAlignArgs Args(E, *this);
22026 llvm::Value *SrcForMask = Args.Src;
22027 if (AlignUp) {
22028 // When aligning up we have to first add the mask to ensure we go over the
22029 // next alignment value and then align down to the next valid multiple.
22030 // By adding the mask, we ensure that align_up on an already aligned
22031 // value will not change the value.
22032 if (Args.Src->getType()->isPointerTy()) {
22033 if (getLangOpts().isSignedOverflowDefined())
22034 SrcForMask =
22035 Builder.CreateGEP(Int8Ty, SrcForMask, Args.Mask, "over_boundary");
22036 else
22037 SrcForMask = EmitCheckedInBoundsGEP(Int8Ty, SrcForMask, Args.Mask,
22038 /*SignedIndices=*/true,
22039 /*isSubtraction=*/false,
22040 E->getExprLoc(), "over_boundary");
22041 } else {
22042 SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
22045 // Invert the mask to only clear the lower bits.
22046 llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask");
22047 llvm::Value *Result = nullptr;
22048 if (Args.Src->getType()->isPointerTy()) {
22049 Result = Builder.CreateIntrinsic(
22050 Intrinsic::ptrmask, {Args.SrcType, Args.IntType},
22051 {SrcForMask, InvertedMask}, nullptr, "aligned_result");
22052 } else {
22053 Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
22055 assert(Result->getType() == Args.SrcType);
22056 return RValue::get(Result);
22059 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
22060 const CallExpr *E) {
22061 switch (BuiltinID) {
22062 case WebAssembly::BI__builtin_wasm_memory_size: {
22063 llvm::Type *ResultType = ConvertType(E->getType());
22064 Value *I = EmitScalarExpr(E->getArg(0));
22065 Function *Callee =
22066 CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
22067 return Builder.CreateCall(Callee, I);
22069 case WebAssembly::BI__builtin_wasm_memory_grow: {
22070 llvm::Type *ResultType = ConvertType(E->getType());
22071 Value *Args[] = {EmitScalarExpr(E->getArg(0)),
22072 EmitScalarExpr(E->getArg(1))};
22073 Function *Callee =
22074 CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
22075 return Builder.CreateCall(Callee, Args);
22077 case WebAssembly::BI__builtin_wasm_tls_size: {
22078 llvm::Type *ResultType = ConvertType(E->getType());
22079 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
22080 return Builder.CreateCall(Callee);
22082 case WebAssembly::BI__builtin_wasm_tls_align: {
22083 llvm::Type *ResultType = ConvertType(E->getType());
22084 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType);
22085 return Builder.CreateCall(Callee);
22087 case WebAssembly::BI__builtin_wasm_tls_base: {
22088 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base);
22089 return Builder.CreateCall(Callee);
22091 case WebAssembly::BI__builtin_wasm_throw: {
22092 Value *Tag = EmitScalarExpr(E->getArg(0));
22093 Value *Obj = EmitScalarExpr(E->getArg(1));
22094 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
22095 return Builder.CreateCall(Callee, {Tag, Obj});
22097 case WebAssembly::BI__builtin_wasm_rethrow: {
22098 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
22099 return Builder.CreateCall(Callee);
22101 case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: {
22102 Value *Addr = EmitScalarExpr(E->getArg(0));
22103 Value *Expected = EmitScalarExpr(E->getArg(1));
22104 Value *Timeout = EmitScalarExpr(E->getArg(2));
22105 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32);
22106 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
22108 case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: {
22109 Value *Addr = EmitScalarExpr(E->getArg(0));
22110 Value *Expected = EmitScalarExpr(E->getArg(1));
22111 Value *Timeout = EmitScalarExpr(E->getArg(2));
22112 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64);
22113 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
22115 case WebAssembly::BI__builtin_wasm_memory_atomic_notify: {
22116 Value *Addr = EmitScalarExpr(E->getArg(0));
22117 Value *Count = EmitScalarExpr(E->getArg(1));
22118 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify);
22119 return Builder.CreateCall(Callee, {Addr, Count});
22121 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:
22122 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64:
22123 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32:
22124 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: {
22125 Value *Src = EmitScalarExpr(E->getArg(0));
22126 llvm::Type *ResT = ConvertType(E->getType());
22127 Function *Callee =
22128 CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()});
22129 return Builder.CreateCall(Callee, {Src});
22131 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32:
22132 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64:
22133 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32:
22134 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: {
22135 Value *Src = EmitScalarExpr(E->getArg(0));
22136 llvm::Type *ResT = ConvertType(E->getType());
22137 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned,
22138 {ResT, Src->getType()});
22139 return Builder.CreateCall(Callee, {Src});
22141 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
22142 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
22143 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
22144 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
22145 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i16x8_f16x8:
22146 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
22147 Value *Src = EmitScalarExpr(E->getArg(0));
22148 llvm::Type *ResT = ConvertType(E->getType());
22149 Function *Callee =
22150 CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()});
22151 return Builder.CreateCall(Callee, {Src});
22153 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
22154 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
22155 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
22156 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
22157 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i16x8_f16x8:
22158 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
22159 Value *Src = EmitScalarExpr(E->getArg(0));
22160 llvm::Type *ResT = ConvertType(E->getType());
22161 Function *Callee =
22162 CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()});
22163 return Builder.CreateCall(Callee, {Src});
22165 case WebAssembly::BI__builtin_wasm_min_f32:
22166 case WebAssembly::BI__builtin_wasm_min_f64:
22167 case WebAssembly::BI__builtin_wasm_min_f16x8:
22168 case WebAssembly::BI__builtin_wasm_min_f32x4:
22169 case WebAssembly::BI__builtin_wasm_min_f64x2: {
22170 Value *LHS = EmitScalarExpr(E->getArg(0));
22171 Value *RHS = EmitScalarExpr(E->getArg(1));
22172 Function *Callee =
22173 CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType()));
22174 return Builder.CreateCall(Callee, {LHS, RHS});
22176 case WebAssembly::BI__builtin_wasm_max_f32:
22177 case WebAssembly::BI__builtin_wasm_max_f64:
22178 case WebAssembly::BI__builtin_wasm_max_f16x8:
22179 case WebAssembly::BI__builtin_wasm_max_f32x4:
22180 case WebAssembly::BI__builtin_wasm_max_f64x2: {
22181 Value *LHS = EmitScalarExpr(E->getArg(0));
22182 Value *RHS = EmitScalarExpr(E->getArg(1));
22183 Function *Callee =
22184 CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
22185 return Builder.CreateCall(Callee, {LHS, RHS});
22187 case WebAssembly::BI__builtin_wasm_pmin_f16x8:
22188 case WebAssembly::BI__builtin_wasm_pmin_f32x4:
22189 case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
22190 Value *LHS = EmitScalarExpr(E->getArg(0));
22191 Value *RHS = EmitScalarExpr(E->getArg(1));
22192 Function *Callee =
22193 CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
22194 return Builder.CreateCall(Callee, {LHS, RHS});
22196 case WebAssembly::BI__builtin_wasm_pmax_f16x8:
22197 case WebAssembly::BI__builtin_wasm_pmax_f32x4:
22198 case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
22199 Value *LHS = EmitScalarExpr(E->getArg(0));
22200 Value *RHS = EmitScalarExpr(E->getArg(1));
22201 Function *Callee =
22202 CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
22203 return Builder.CreateCall(Callee, {LHS, RHS});
22205 case WebAssembly::BI__builtin_wasm_ceil_f16x8:
22206 case WebAssembly::BI__builtin_wasm_floor_f16x8:
22207 case WebAssembly::BI__builtin_wasm_trunc_f16x8:
22208 case WebAssembly::BI__builtin_wasm_nearest_f16x8:
22209 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
22210 case WebAssembly::BI__builtin_wasm_floor_f32x4:
22211 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
22212 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
22213 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
22214 case WebAssembly::BI__builtin_wasm_floor_f64x2:
22215 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
22216 case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
22217 unsigned IntNo;
22218 switch (BuiltinID) {
22219 case WebAssembly::BI__builtin_wasm_ceil_f16x8:
22220 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
22221 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
22222 IntNo = Intrinsic::ceil;
22223 break;
22224 case WebAssembly::BI__builtin_wasm_floor_f16x8:
22225 case WebAssembly::BI__builtin_wasm_floor_f32x4:
22226 case WebAssembly::BI__builtin_wasm_floor_f64x2:
22227 IntNo = Intrinsic::floor;
22228 break;
22229 case WebAssembly::BI__builtin_wasm_trunc_f16x8:
22230 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
22231 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
22232 IntNo = Intrinsic::trunc;
22233 break;
22234 case WebAssembly::BI__builtin_wasm_nearest_f16x8:
22235 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
22236 case WebAssembly::BI__builtin_wasm_nearest_f64x2:
22237 IntNo = Intrinsic::nearbyint;
22238 break;
22239 default:
22240 llvm_unreachable("unexpected builtin ID");
22242 Value *Value = EmitScalarExpr(E->getArg(0));
22243 Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
22244 return Builder.CreateCall(Callee, Value);
22246 case WebAssembly::BI__builtin_wasm_ref_null_extern: {
22247 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_extern);
22248 return Builder.CreateCall(Callee);
22250 case WebAssembly::BI__builtin_wasm_ref_null_func: {
22251 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_func);
22252 return Builder.CreateCall(Callee);
22254 case WebAssembly::BI__builtin_wasm_swizzle_i8x16: {
22255 Value *Src = EmitScalarExpr(E->getArg(0));
22256 Value *Indices = EmitScalarExpr(E->getArg(1));
22257 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle);
22258 return Builder.CreateCall(Callee, {Src, Indices});
22260 case WebAssembly::BI__builtin_wasm_abs_i8x16:
22261 case WebAssembly::BI__builtin_wasm_abs_i16x8:
22262 case WebAssembly::BI__builtin_wasm_abs_i32x4:
22263 case WebAssembly::BI__builtin_wasm_abs_i64x2: {
22264 Value *Vec = EmitScalarExpr(E->getArg(0));
22265 Value *Neg = Builder.CreateNeg(Vec, "neg");
22266 Constant *Zero = llvm::Constant::getNullValue(Vec->getType());
22267 Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond");
22268 return Builder.CreateSelect(ICmp, Neg, Vec, "abs");
22270 case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:
22271 case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {
22272 Value *LHS = EmitScalarExpr(E->getArg(0));
22273 Value *RHS = EmitScalarExpr(E->getArg(1));
22274 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned,
22275 ConvertType(E->getType()));
22276 return Builder.CreateCall(Callee, {LHS, RHS});
22278 case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: {
22279 Value *LHS = EmitScalarExpr(E->getArg(0));
22280 Value *RHS = EmitScalarExpr(E->getArg(1));
22281 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed);
22282 return Builder.CreateCall(Callee, {LHS, RHS});
22284 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
22285 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
22286 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
22287 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: {
22288 Value *Vec = EmitScalarExpr(E->getArg(0));
22289 unsigned IntNo;
22290 switch (BuiltinID) {
22291 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
22292 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
22293 IntNo = Intrinsic::wasm_extadd_pairwise_signed;
22294 break;
22295 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
22296 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4:
22297 IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;
22298 break;
22299 default:
22300 llvm_unreachable("unexpected builtin ID");
22303 Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
22304 return Builder.CreateCall(Callee, Vec);
22306 case WebAssembly::BI__builtin_wasm_bitselect: {
22307 Value *V1 = EmitScalarExpr(E->getArg(0));
22308 Value *V2 = EmitScalarExpr(E->getArg(1));
22309 Value *C = EmitScalarExpr(E->getArg(2));
22310 Function *Callee =
22311 CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType()));
22312 return Builder.CreateCall(Callee, {V1, V2, C});
22314 case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
22315 Value *LHS = EmitScalarExpr(E->getArg(0));
22316 Value *RHS = EmitScalarExpr(E->getArg(1));
22317 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
22318 return Builder.CreateCall(Callee, {LHS, RHS});
22320 case WebAssembly::BI__builtin_wasm_any_true_v128:
22321 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
22322 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
22323 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
22324 case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
22325 unsigned IntNo;
22326 switch (BuiltinID) {
22327 case WebAssembly::BI__builtin_wasm_any_true_v128:
22328 IntNo = Intrinsic::wasm_anytrue;
22329 break;
22330 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
22331 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
22332 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
22333 case WebAssembly::BI__builtin_wasm_all_true_i64x2:
22334 IntNo = Intrinsic::wasm_alltrue;
22335 break;
22336 default:
22337 llvm_unreachable("unexpected builtin ID");
22339 Value *Vec = EmitScalarExpr(E->getArg(0));
22340 Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
22341 return Builder.CreateCall(Callee, {Vec});
22343 case WebAssembly::BI__builtin_wasm_bitmask_i8x16:
22344 case WebAssembly::BI__builtin_wasm_bitmask_i16x8:
22345 case WebAssembly::BI__builtin_wasm_bitmask_i32x4:
22346 case WebAssembly::BI__builtin_wasm_bitmask_i64x2: {
22347 Value *Vec = EmitScalarExpr(E->getArg(0));
22348 Function *Callee =
22349 CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
22350 return Builder.CreateCall(Callee, {Vec});
22352 case WebAssembly::BI__builtin_wasm_abs_f16x8:
22353 case WebAssembly::BI__builtin_wasm_abs_f32x4:
22354 case WebAssembly::BI__builtin_wasm_abs_f64x2: {
22355 Value *Vec = EmitScalarExpr(E->getArg(0));
22356 Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
22357 return Builder.CreateCall(Callee, {Vec});
22359 case WebAssembly::BI__builtin_wasm_sqrt_f16x8:
22360 case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
22361 case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
22362 Value *Vec = EmitScalarExpr(E->getArg(0));
22363 Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
22364 return Builder.CreateCall(Callee, {Vec});
22366 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
22367 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
22368 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
22369 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: {
22370 Value *Low = EmitScalarExpr(E->getArg(0));
22371 Value *High = EmitScalarExpr(E->getArg(1));
22372 unsigned IntNo;
22373 switch (BuiltinID) {
22374 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
22375 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
22376 IntNo = Intrinsic::wasm_narrow_signed;
22377 break;
22378 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
22379 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:
22380 IntNo = Intrinsic::wasm_narrow_unsigned;
22381 break;
22382 default:
22383 llvm_unreachable("unexpected builtin ID");
22385 Function *Callee =
22386 CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
22387 return Builder.CreateCall(Callee, {Low, High});
22389 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
22390 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: {
22391 Value *Vec = EmitScalarExpr(E->getArg(0));
22392 unsigned IntNo;
22393 switch (BuiltinID) {
22394 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
22395 IntNo = Intrinsic::fptosi_sat;
22396 break;
22397 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4:
22398 IntNo = Intrinsic::fptoui_sat;
22399 break;
22400 default:
22401 llvm_unreachable("unexpected builtin ID");
22403 llvm::Type *SrcT = Vec->getType();
22404 llvm::Type *TruncT = SrcT->getWithNewType(Builder.getInt32Ty());
22405 Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT});
22406 Value *Trunc = Builder.CreateCall(Callee, Vec);
22407 Value *Splat = Constant::getNullValue(TruncT);
22408 return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef<int>{0, 1, 2, 3});
22410 case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {
22411 Value *Ops[18];
22412 size_t OpIdx = 0;
22413 Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
22414 Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
22415 while (OpIdx < 18) {
22416 std::optional<llvm::APSInt> LaneConst =
22417 E->getArg(OpIdx)->getIntegerConstantExpr(getContext());
22418 assert(LaneConst && "Constant arg isn't actually constant?");
22419 Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst);
22421 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
22422 return Builder.CreateCall(Callee, Ops);
22424 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
22425 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
22426 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
22427 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
22428 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
22429 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: {
22430 Value *A = EmitScalarExpr(E->getArg(0));
22431 Value *B = EmitScalarExpr(E->getArg(1));
22432 Value *C = EmitScalarExpr(E->getArg(2));
22433 unsigned IntNo;
22434 switch (BuiltinID) {
22435 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
22436 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
22437 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
22438 IntNo = Intrinsic::wasm_relaxed_madd;
22439 break;
22440 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
22441 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
22442 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2:
22443 IntNo = Intrinsic::wasm_relaxed_nmadd;
22444 break;
22445 default:
22446 llvm_unreachable("unexpected builtin ID");
22448 Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
22449 return Builder.CreateCall(Callee, {A, B, C});
22451 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16:
22452 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8:
22453 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4:
22454 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: {
22455 Value *A = EmitScalarExpr(E->getArg(0));
22456 Value *B = EmitScalarExpr(E->getArg(1));
22457 Value *C = EmitScalarExpr(E->getArg(2));
22458 Function *Callee =
22459 CGM.getIntrinsic(Intrinsic::wasm_relaxed_laneselect, A->getType());
22460 return Builder.CreateCall(Callee, {A, B, C});
22462 case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {
22463 Value *Src = EmitScalarExpr(E->getArg(0));
22464 Value *Indices = EmitScalarExpr(E->getArg(1));
22465 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle);
22466 return Builder.CreateCall(Callee, {Src, Indices});
22468 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
22469 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
22470 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
22471 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: {
22472 Value *LHS = EmitScalarExpr(E->getArg(0));
22473 Value *RHS = EmitScalarExpr(E->getArg(1));
22474 unsigned IntNo;
22475 switch (BuiltinID) {
22476 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
22477 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
22478 IntNo = Intrinsic::wasm_relaxed_min;
22479 break;
22480 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
22481 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2:
22482 IntNo = Intrinsic::wasm_relaxed_max;
22483 break;
22484 default:
22485 llvm_unreachable("unexpected builtin ID");
22487 Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType());
22488 return Builder.CreateCall(Callee, {LHS, RHS});
22490 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
22491 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
22492 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
22493 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: {
22494 Value *Vec = EmitScalarExpr(E->getArg(0));
22495 unsigned IntNo;
22496 switch (BuiltinID) {
22497 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
22498 IntNo = Intrinsic::wasm_relaxed_trunc_signed;
22499 break;
22500 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
22501 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned;
22502 break;
22503 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
22504 IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero;
22505 break;
22506 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2:
22507 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero;
22508 break;
22509 default:
22510 llvm_unreachable("unexpected builtin ID");
22512 Function *Callee = CGM.getIntrinsic(IntNo);
22513 return Builder.CreateCall(Callee, {Vec});
22515 case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: {
22516 Value *LHS = EmitScalarExpr(E->getArg(0));
22517 Value *RHS = EmitScalarExpr(E->getArg(1));
22518 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed);
22519 return Builder.CreateCall(Callee, {LHS, RHS});
22521 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: {
22522 Value *LHS = EmitScalarExpr(E->getArg(0));
22523 Value *RHS = EmitScalarExpr(E->getArg(1));
22524 Function *Callee =
22525 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed);
22526 return Builder.CreateCall(Callee, {LHS, RHS});
22528 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: {
22529 Value *LHS = EmitScalarExpr(E->getArg(0));
22530 Value *RHS = EmitScalarExpr(E->getArg(1));
22531 Value *Acc = EmitScalarExpr(E->getArg(2));
22532 Function *Callee =
22533 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed);
22534 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
22536 case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: {
22537 Value *LHS = EmitScalarExpr(E->getArg(0));
22538 Value *RHS = EmitScalarExpr(E->getArg(1));
22539 Value *Acc = EmitScalarExpr(E->getArg(2));
22540 Function *Callee =
22541 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32);
22542 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
22544 case WebAssembly::BI__builtin_wasm_loadf16_f32: {
22545 Value *Addr = EmitScalarExpr(E->getArg(0));
22546 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_loadf16_f32);
22547 return Builder.CreateCall(Callee, {Addr});
22549 case WebAssembly::BI__builtin_wasm_storef16_f32: {
22550 Value *Val = EmitScalarExpr(E->getArg(0));
22551 Value *Addr = EmitScalarExpr(E->getArg(1));
22552 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_storef16_f32);
22553 return Builder.CreateCall(Callee, {Val, Addr});
22555 case WebAssembly::BI__builtin_wasm_splat_f16x8: {
22556 Value *Val = EmitScalarExpr(E->getArg(0));
22557 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_splat_f16x8);
22558 return Builder.CreateCall(Callee, {Val});
22560 case WebAssembly::BI__builtin_wasm_extract_lane_f16x8: {
22561 Value *Vector = EmitScalarExpr(E->getArg(0));
22562 Value *Index = EmitScalarExpr(E->getArg(1));
22563 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_extract_lane_f16x8);
22564 return Builder.CreateCall(Callee, {Vector, Index});
22566 case WebAssembly::BI__builtin_wasm_replace_lane_f16x8: {
22567 Value *Vector = EmitScalarExpr(E->getArg(0));
22568 Value *Index = EmitScalarExpr(E->getArg(1));
22569 Value *Val = EmitScalarExpr(E->getArg(2));
22570 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_replace_lane_f16x8);
22571 return Builder.CreateCall(Callee, {Vector, Index, Val});
22573 case WebAssembly::BI__builtin_wasm_table_get: {
22574 assert(E->getArg(0)->getType()->isArrayType());
22575 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22576 Value *Index = EmitScalarExpr(E->getArg(1));
22577 Function *Callee;
22578 if (E->getType().isWebAssemblyExternrefType())
22579 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_externref);
22580 else if (E->getType().isWebAssemblyFuncrefType())
22581 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_funcref);
22582 else
22583 llvm_unreachable(
22584 "Unexpected reference type for __builtin_wasm_table_get");
22585 return Builder.CreateCall(Callee, {Table, Index});
22587 case WebAssembly::BI__builtin_wasm_table_set: {
22588 assert(E->getArg(0)->getType()->isArrayType());
22589 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22590 Value *Index = EmitScalarExpr(E->getArg(1));
22591 Value *Val = EmitScalarExpr(E->getArg(2));
22592 Function *Callee;
22593 if (E->getArg(2)->getType().isWebAssemblyExternrefType())
22594 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_externref);
22595 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
22596 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_funcref);
22597 else
22598 llvm_unreachable(
22599 "Unexpected reference type for __builtin_wasm_table_set");
22600 return Builder.CreateCall(Callee, {Table, Index, Val});
22602 case WebAssembly::BI__builtin_wasm_table_size: {
22603 assert(E->getArg(0)->getType()->isArrayType());
22604 Value *Value = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22605 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_size);
22606 return Builder.CreateCall(Callee, Value);
22608 case WebAssembly::BI__builtin_wasm_table_grow: {
22609 assert(E->getArg(0)->getType()->isArrayType());
22610 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22611 Value *Val = EmitScalarExpr(E->getArg(1));
22612 Value *NElems = EmitScalarExpr(E->getArg(2));
22614 Function *Callee;
22615 if (E->getArg(1)->getType().isWebAssemblyExternrefType())
22616 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_grow_externref);
22617 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
22618 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
22619 else
22620 llvm_unreachable(
22621 "Unexpected reference type for __builtin_wasm_table_grow");
22623 return Builder.CreateCall(Callee, {Table, Val, NElems});
22625 case WebAssembly::BI__builtin_wasm_table_fill: {
22626 assert(E->getArg(0)->getType()->isArrayType());
22627 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22628 Value *Index = EmitScalarExpr(E->getArg(1));
22629 Value *Val = EmitScalarExpr(E->getArg(2));
22630 Value *NElems = EmitScalarExpr(E->getArg(3));
22632 Function *Callee;
22633 if (E->getArg(2)->getType().isWebAssemblyExternrefType())
22634 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_externref);
22635 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
22636 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
22637 else
22638 llvm_unreachable(
22639 "Unexpected reference type for __builtin_wasm_table_fill");
22641 return Builder.CreateCall(Callee, {Table, Index, Val, NElems});
22643 case WebAssembly::BI__builtin_wasm_table_copy: {
22644 assert(E->getArg(0)->getType()->isArrayType());
22645 Value *TableX = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22646 Value *TableY = EmitArrayToPointerDecay(E->getArg(1)).emitRawPointer(*this);
22647 Value *DstIdx = EmitScalarExpr(E->getArg(2));
22648 Value *SrcIdx = EmitScalarExpr(E->getArg(3));
22649 Value *NElems = EmitScalarExpr(E->getArg(4));
22651 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_copy);
22653 return Builder.CreateCall(Callee, {TableX, TableY, SrcIdx, DstIdx, NElems});
22655 default:
22656 return nullptr;
22660 static std::pair<Intrinsic::ID, unsigned>
22661 getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID) {
22662 struct Info {
22663 unsigned BuiltinID;
22664 Intrinsic::ID IntrinsicID;
22665 unsigned VecLen;
22667 static Info Infos[] = {
22668 #define CUSTOM_BUILTIN_MAPPING(x,s) \
22669 { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
22670 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0)
22671 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0)
22672 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0)
22673 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0)
22674 CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0)
22675 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0)
22676 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0)
22677 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0)
22678 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0)
22679 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0)
22680 CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0)
22681 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0)
22682 CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0)
22683 CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0)
22684 CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0)
22685 CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0)
22686 CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0)
22687 CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0)
22688 CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0)
22689 CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0)
22690 CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0)
22691 CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0)
22692 // Legacy builtins that take a vector in place of a vector predicate.
22693 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
22694 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
22695 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
22696 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
22697 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
22698 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
22699 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
22700 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
22701 #include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
22702 #undef CUSTOM_BUILTIN_MAPPING
22705 auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
22706 static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
22707 (void)SortOnce;
22709 const Info *F = llvm::lower_bound(Infos, Info{BuiltinID, 0, 0}, CmpInfo);
22710 if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
22711 return {Intrinsic::not_intrinsic, 0};
22713 return {F->IntrinsicID, F->VecLen};
22716 Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
22717 const CallExpr *E) {
22718 Intrinsic::ID ID;
22719 unsigned VecLen;
22720 std::tie(ID, VecLen) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID);
22722 auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) {
22723 // The base pointer is passed by address, so it needs to be loaded.
22724 Address A = EmitPointerWithAlignment(E->getArg(0));
22725 Address BP = Address(A.emitRawPointer(*this), Int8PtrTy, A.getAlignment());
22726 llvm::Value *Base = Builder.CreateLoad(BP);
22727 // The treatment of both loads and stores is the same: the arguments for
22728 // the builtin are the same as the arguments for the intrinsic.
22729 // Load:
22730 // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)
22731 // builtin(Base, Mod, Start) -> intr(Base, Mod, Start)
22732 // Store:
22733 // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)
22734 // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start)
22735 SmallVector<llvm::Value*,5> Ops = { Base };
22736 for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i)
22737 Ops.push_back(EmitScalarExpr(E->getArg(i)));
22739 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
22740 // The load intrinsics generate two results (Value, NewBase), stores
22741 // generate one (NewBase). The new base address needs to be stored.
22742 llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1)
22743 : Result;
22744 llvm::Value *LV = EmitScalarExpr(E->getArg(0));
22745 Address Dest = EmitPointerWithAlignment(E->getArg(0));
22746 llvm::Value *RetVal =
22747 Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
22748 if (IsLoad)
22749 RetVal = Builder.CreateExtractValue(Result, 0);
22750 return RetVal;
22753 // Handle the conversion of bit-reverse load intrinsics to bit code.
22754 // The intrinsic call after this function only reads from memory and the
22755 // write to memory is dealt by the store instruction.
22756 auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) {
22757 // The intrinsic generates one result, which is the new value for the base
22758 // pointer. It needs to be returned. The result of the load instruction is
22759 // passed to intrinsic by address, so the value needs to be stored.
22760 llvm::Value *BaseAddress = EmitScalarExpr(E->getArg(0));
22762 // Expressions like &(*pt++) will be incremented per evaluation.
22763 // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
22764 // per call.
22765 Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
22766 DestAddr = DestAddr.withElementType(Int8Ty);
22767 llvm::Value *DestAddress = DestAddr.emitRawPointer(*this);
22769 // Operands are Base, Dest, Modifier.
22770 // The intrinsic format in LLVM IR is defined as
22771 // { ValueType, i8* } (i8*, i32).
22772 llvm::Value *Result = Builder.CreateCall(
22773 CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))});
22775 // The value needs to be stored as the variable is passed by reference.
22776 llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
22778 // The store needs to be truncated to fit the destination type.
22779 // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
22780 // to be handled with stores of respective destination type.
22781 DestVal = Builder.CreateTrunc(DestVal, DestTy);
22783 Builder.CreateAlignedStore(DestVal, DestAddress, DestAddr.getAlignment());
22784 // The updated value of the base pointer is returned.
22785 return Builder.CreateExtractValue(Result, 1);
22788 auto V2Q = [this, VecLen] (llvm::Value *Vec) {
22789 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B
22790 : Intrinsic::hexagon_V6_vandvrt;
22791 return Builder.CreateCall(CGM.getIntrinsic(ID),
22792 {Vec, Builder.getInt32(-1)});
22794 auto Q2V = [this, VecLen] (llvm::Value *Pred) {
22795 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B
22796 : Intrinsic::hexagon_V6_vandqrt;
22797 return Builder.CreateCall(CGM.getIntrinsic(ID),
22798 {Pred, Builder.getInt32(-1)});
22801 switch (BuiltinID) {
22802 // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
22803 // and the corresponding C/C++ builtins use loads/stores to update
22804 // the predicate.
22805 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
22806 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:
22807 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
22808 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
22809 // Get the type from the 0-th argument.
22810 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
22811 Address PredAddr =
22812 EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);
22813 llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr));
22814 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
22815 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn});
22817 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
22818 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
22819 PredAddr.getAlignment());
22820 return Builder.CreateExtractValue(Result, 0);
22822 // These are identical to the builtins above, except they don't consume
22823 // input carry, only generate carry-out. Since they still produce two
22824 // outputs, generate the store of the predicate, but no load.
22825 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo:
22826 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B:
22827 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo:
22828 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B: {
22829 // Get the type from the 0-th argument.
22830 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
22831 Address PredAddr =
22832 EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);
22833 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
22834 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
22836 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
22837 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
22838 PredAddr.getAlignment());
22839 return Builder.CreateExtractValue(Result, 0);
22842 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq:
22843 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq:
22844 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq:
22845 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq:
22846 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B:
22847 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B:
22848 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B:
22849 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: {
22850 SmallVector<llvm::Value*,4> Ops;
22851 const Expr *PredOp = E->getArg(0);
22852 // There will be an implicit cast to a boolean vector. Strip it.
22853 if (auto *Cast = dyn_cast<ImplicitCastExpr>(PredOp)) {
22854 if (Cast->getCastKind() == CK_BitCast)
22855 PredOp = Cast->getSubExpr();
22856 Ops.push_back(V2Q(EmitScalarExpr(PredOp)));
22858 for (int i = 1, e = E->getNumArgs(); i != e; ++i)
22859 Ops.push_back(EmitScalarExpr(E->getArg(i)));
22860 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
22863 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
22864 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
22865 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
22866 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
22867 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
22868 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
22869 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
22870 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
22871 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
22872 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
22873 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
22874 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
22875 return MakeCircOp(ID, /*IsLoad=*/true);
22876 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
22877 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
22878 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
22879 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
22880 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
22881 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
22882 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
22883 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
22884 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
22885 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
22886 return MakeCircOp(ID, /*IsLoad=*/false);
22887 case Hexagon::BI__builtin_brev_ldub:
22888 return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
22889 case Hexagon::BI__builtin_brev_ldb:
22890 return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
22891 case Hexagon::BI__builtin_brev_lduh:
22892 return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
22893 case Hexagon::BI__builtin_brev_ldh:
22894 return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
22895 case Hexagon::BI__builtin_brev_ldw:
22896 return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
22897 case Hexagon::BI__builtin_brev_ldd:
22898 return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
22899 } // switch
22901 return nullptr;
22904 Value *CodeGenFunction::EmitRISCVCpuIs(const CallExpr *E) {
22905 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
22906 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
22907 return EmitRISCVCpuIs(CPUStr);
22910 Value *CodeGenFunction::EmitRISCVCpuIs(StringRef CPUStr) {
22911 llvm::Type *Int32Ty = Builder.getInt32Ty();
22912 llvm::Type *Int64Ty = Builder.getInt64Ty();
22913 llvm::StructType *StructTy = llvm::StructType::get(Int32Ty, Int64Ty, Int64Ty);
22914 llvm::Constant *RISCVCPUModel =
22915 CGM.CreateRuntimeVariable(StructTy, "__riscv_cpu_model");
22916 cast<llvm::GlobalValue>(RISCVCPUModel)->setDSOLocal(true);
22918 auto loadRISCVCPUID = [&](unsigned Index) {
22919 Value *Ptr = Builder.CreateStructGEP(StructTy, RISCVCPUModel, Index);
22920 Value *CPUID = Builder.CreateAlignedLoad(StructTy->getTypeAtIndex(Index),
22921 Ptr, llvm::MaybeAlign());
22922 return CPUID;
22925 const llvm::RISCV::CPUModel Model = llvm::RISCV::getCPUModel(CPUStr);
22927 // Compare mvendorid.
22928 Value *VendorID = loadRISCVCPUID(0);
22929 Value *Result =
22930 Builder.CreateICmpEQ(VendorID, Builder.getInt32(Model.MVendorID));
22932 // Compare marchid.
22933 Value *ArchID = loadRISCVCPUID(1);
22934 Result = Builder.CreateAnd(
22935 Result, Builder.CreateICmpEQ(ArchID, Builder.getInt64(Model.MArchID)));
22937 // Compare mimpid.
22938 Value *ImpID = loadRISCVCPUID(2);
22939 Result = Builder.CreateAnd(
22940 Result, Builder.CreateICmpEQ(ImpID, Builder.getInt64(Model.MImpID)));
22942 return Result;
22945 Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
22946 const CallExpr *E,
22947 ReturnValueSlot ReturnValue) {
22949 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
22950 return EmitRISCVCpuSupports(E);
22951 if (BuiltinID == Builtin::BI__builtin_cpu_init)
22952 return EmitRISCVCpuInit();
22953 if (BuiltinID == Builtin::BI__builtin_cpu_is)
22954 return EmitRISCVCpuIs(E);
22956 SmallVector<Value *, 4> Ops;
22957 llvm::Type *ResultType = ConvertType(E->getType());
22959 // Find out if any arguments are required to be integer constant expressions.
22960 unsigned ICEArguments = 0;
22961 ASTContext::GetBuiltinTypeError Error;
22962 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
22963 if (Error == ASTContext::GE_Missing_type) {
22964 // Vector intrinsics don't have a type string.
22965 assert(BuiltinID >= clang::RISCV::FirstRVVBuiltin &&
22966 BuiltinID <= clang::RISCV::LastRVVBuiltin);
22967 ICEArguments = 0;
22968 if (BuiltinID == RISCVVector::BI__builtin_rvv_vget_v ||
22969 BuiltinID == RISCVVector::BI__builtin_rvv_vset_v)
22970 ICEArguments = 1 << 1;
22971 } else {
22972 assert(Error == ASTContext::GE_None && "Unexpected error");
22975 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_load)
22976 ICEArguments |= (1 << 1);
22977 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_store)
22978 ICEArguments |= (1 << 2);
22980 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
22981 // Handle aggregate argument, namely RVV tuple types in segment load/store
22982 if (hasAggregateEvaluationKind(E->getArg(i)->getType())) {
22983 LValue L = EmitAggExprToLValue(E->getArg(i));
22984 llvm::Value *AggValue = Builder.CreateLoad(L.getAddress());
22985 Ops.push_back(AggValue);
22986 continue;
22988 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
22991 Intrinsic::ID ID = Intrinsic::not_intrinsic;
22992 // The 0th bit simulates the `vta` of RVV
22993 // The 1st bit simulates the `vma` of RVV
22994 constexpr unsigned RVV_VTA = 0x1;
22995 constexpr unsigned RVV_VMA = 0x2;
22996 int PolicyAttrs = 0;
22997 bool IsMasked = false;
22998 // This is used by segment load/store to determine it's llvm type.
22999 unsigned SegInstSEW = 8;
23001 // Required for overloaded intrinsics.
23002 llvm::SmallVector<llvm::Type *, 2> IntrinsicTypes;
23003 switch (BuiltinID) {
23004 default: llvm_unreachable("unexpected builtin ID");
23005 case RISCV::BI__builtin_riscv_orc_b_32:
23006 case RISCV::BI__builtin_riscv_orc_b_64:
23007 case RISCV::BI__builtin_riscv_clmul_32:
23008 case RISCV::BI__builtin_riscv_clmul_64:
23009 case RISCV::BI__builtin_riscv_clmulh_32:
23010 case RISCV::BI__builtin_riscv_clmulh_64:
23011 case RISCV::BI__builtin_riscv_clmulr_32:
23012 case RISCV::BI__builtin_riscv_clmulr_64:
23013 case RISCV::BI__builtin_riscv_xperm4_32:
23014 case RISCV::BI__builtin_riscv_xperm4_64:
23015 case RISCV::BI__builtin_riscv_xperm8_32:
23016 case RISCV::BI__builtin_riscv_xperm8_64:
23017 case RISCV::BI__builtin_riscv_brev8_32:
23018 case RISCV::BI__builtin_riscv_brev8_64:
23019 case RISCV::BI__builtin_riscv_zip_32:
23020 case RISCV::BI__builtin_riscv_unzip_32: {
23021 switch (BuiltinID) {
23022 default: llvm_unreachable("unexpected builtin ID");
23023 // Zbb
23024 case RISCV::BI__builtin_riscv_orc_b_32:
23025 case RISCV::BI__builtin_riscv_orc_b_64:
23026 ID = Intrinsic::riscv_orc_b;
23027 break;
23029 // Zbc
23030 case RISCV::BI__builtin_riscv_clmul_32:
23031 case RISCV::BI__builtin_riscv_clmul_64:
23032 ID = Intrinsic::riscv_clmul;
23033 break;
23034 case RISCV::BI__builtin_riscv_clmulh_32:
23035 case RISCV::BI__builtin_riscv_clmulh_64:
23036 ID = Intrinsic::riscv_clmulh;
23037 break;
23038 case RISCV::BI__builtin_riscv_clmulr_32:
23039 case RISCV::BI__builtin_riscv_clmulr_64:
23040 ID = Intrinsic::riscv_clmulr;
23041 break;
23043 // Zbkx
23044 case RISCV::BI__builtin_riscv_xperm8_32:
23045 case RISCV::BI__builtin_riscv_xperm8_64:
23046 ID = Intrinsic::riscv_xperm8;
23047 break;
23048 case RISCV::BI__builtin_riscv_xperm4_32:
23049 case RISCV::BI__builtin_riscv_xperm4_64:
23050 ID = Intrinsic::riscv_xperm4;
23051 break;
23053 // Zbkb
23054 case RISCV::BI__builtin_riscv_brev8_32:
23055 case RISCV::BI__builtin_riscv_brev8_64:
23056 ID = Intrinsic::riscv_brev8;
23057 break;
23058 case RISCV::BI__builtin_riscv_zip_32:
23059 ID = Intrinsic::riscv_zip;
23060 break;
23061 case RISCV::BI__builtin_riscv_unzip_32:
23062 ID = Intrinsic::riscv_unzip;
23063 break;
23066 IntrinsicTypes = {ResultType};
23067 break;
23070 // Zk builtins
23072 // Zknh
23073 case RISCV::BI__builtin_riscv_sha256sig0:
23074 ID = Intrinsic::riscv_sha256sig0;
23075 break;
23076 case RISCV::BI__builtin_riscv_sha256sig1:
23077 ID = Intrinsic::riscv_sha256sig1;
23078 break;
23079 case RISCV::BI__builtin_riscv_sha256sum0:
23080 ID = Intrinsic::riscv_sha256sum0;
23081 break;
23082 case RISCV::BI__builtin_riscv_sha256sum1:
23083 ID = Intrinsic::riscv_sha256sum1;
23084 break;
23086 // Zksed
23087 case RISCV::BI__builtin_riscv_sm4ks:
23088 ID = Intrinsic::riscv_sm4ks;
23089 break;
23090 case RISCV::BI__builtin_riscv_sm4ed:
23091 ID = Intrinsic::riscv_sm4ed;
23092 break;
23094 // Zksh
23095 case RISCV::BI__builtin_riscv_sm3p0:
23096 ID = Intrinsic::riscv_sm3p0;
23097 break;
23098 case RISCV::BI__builtin_riscv_sm3p1:
23099 ID = Intrinsic::riscv_sm3p1;
23100 break;
23102 case RISCV::BI__builtin_riscv_clz_32:
23103 case RISCV::BI__builtin_riscv_clz_64: {
23104 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
23105 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
23106 if (Result->getType() != ResultType)
23107 Result =
23108 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
23109 return Result;
23111 case RISCV::BI__builtin_riscv_ctz_32:
23112 case RISCV::BI__builtin_riscv_ctz_64: {
23113 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
23114 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
23115 if (Result->getType() != ResultType)
23116 Result =
23117 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
23118 return Result;
23121 // Zihintntl
23122 case RISCV::BI__builtin_riscv_ntl_load: {
23123 llvm::Type *ResTy = ConvertType(E->getType());
23124 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
23125 if (Ops.size() == 2)
23126 DomainVal = cast<ConstantInt>(Ops[1])->getZExtValue();
23128 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
23129 getLLVMContext(),
23130 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
23131 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
23132 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
23134 int Width;
23135 if(ResTy->isScalableTy()) {
23136 const ScalableVectorType *SVTy = cast<ScalableVectorType>(ResTy);
23137 llvm::Type *ScalarTy = ResTy->getScalarType();
23138 Width = ScalarTy->getPrimitiveSizeInBits() *
23139 SVTy->getElementCount().getKnownMinValue();
23140 } else
23141 Width = ResTy->getPrimitiveSizeInBits();
23142 LoadInst *Load = Builder.CreateLoad(
23143 Address(Ops[0], ResTy, CharUnits::fromQuantity(Width / 8)));
23145 Load->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
23146 Load->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
23147 RISCVDomainNode);
23149 return Load;
23151 case RISCV::BI__builtin_riscv_ntl_store: {
23152 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
23153 if (Ops.size() == 3)
23154 DomainVal = cast<ConstantInt>(Ops[2])->getZExtValue();
23156 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
23157 getLLVMContext(),
23158 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
23159 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
23160 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
23162 StoreInst *Store = Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
23163 Store->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
23164 Store->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
23165 RISCVDomainNode);
23167 return Store;
23169 // XCValu
23170 case RISCV::BI__builtin_riscv_cv_alu_addN:
23171 ID = Intrinsic::riscv_cv_alu_addN;
23172 break;
23173 case RISCV::BI__builtin_riscv_cv_alu_addRN:
23174 ID = Intrinsic::riscv_cv_alu_addRN;
23175 break;
23176 case RISCV::BI__builtin_riscv_cv_alu_adduN:
23177 ID = Intrinsic::riscv_cv_alu_adduN;
23178 break;
23179 case RISCV::BI__builtin_riscv_cv_alu_adduRN:
23180 ID = Intrinsic::riscv_cv_alu_adduRN;
23181 break;
23182 case RISCV::BI__builtin_riscv_cv_alu_clip:
23183 ID = Intrinsic::riscv_cv_alu_clip;
23184 break;
23185 case RISCV::BI__builtin_riscv_cv_alu_clipu:
23186 ID = Intrinsic::riscv_cv_alu_clipu;
23187 break;
23188 case RISCV::BI__builtin_riscv_cv_alu_extbs:
23189 return Builder.CreateSExt(Builder.CreateTrunc(Ops[0], Int8Ty), Int32Ty,
23190 "extbs");
23191 case RISCV::BI__builtin_riscv_cv_alu_extbz:
23192 return Builder.CreateZExt(Builder.CreateTrunc(Ops[0], Int8Ty), Int32Ty,
23193 "extbz");
23194 case RISCV::BI__builtin_riscv_cv_alu_exths:
23195 return Builder.CreateSExt(Builder.CreateTrunc(Ops[0], Int16Ty), Int32Ty,
23196 "exths");
23197 case RISCV::BI__builtin_riscv_cv_alu_exthz:
23198 return Builder.CreateZExt(Builder.CreateTrunc(Ops[0], Int16Ty), Int32Ty,
23199 "exthz");
23200 case RISCV::BI__builtin_riscv_cv_alu_slet:
23201 return Builder.CreateZExt(Builder.CreateICmpSLE(Ops[0], Ops[1]), Int32Ty,
23202 "sle");
23203 case RISCV::BI__builtin_riscv_cv_alu_sletu:
23204 return Builder.CreateZExt(Builder.CreateICmpULE(Ops[0], Ops[1]), Int32Ty,
23205 "sleu");
23206 case RISCV::BI__builtin_riscv_cv_alu_subN:
23207 ID = Intrinsic::riscv_cv_alu_subN;
23208 break;
23209 case RISCV::BI__builtin_riscv_cv_alu_subRN:
23210 ID = Intrinsic::riscv_cv_alu_subRN;
23211 break;
23212 case RISCV::BI__builtin_riscv_cv_alu_subuN:
23213 ID = Intrinsic::riscv_cv_alu_subuN;
23214 break;
23215 case RISCV::BI__builtin_riscv_cv_alu_subuRN:
23216 ID = Intrinsic::riscv_cv_alu_subuRN;
23217 break;
23219 // Vector builtins are handled from here.
23220 #include "clang/Basic/riscv_vector_builtin_cg.inc"
23222 // SiFive Vector builtins are handled from here.
23223 #include "clang/Basic/riscv_sifive_vector_builtin_cg.inc"
23226 assert(ID != Intrinsic::not_intrinsic);
23228 llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
23229 return Builder.CreateCall(F, Ops, "");