1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This contains code to emit Builtin calls as LLVM code.
11 //===----------------------------------------------------------------------===//
14 #include "CGCUDARuntime.h"
16 #include "CGHLSLRuntime.h"
17 #include "CGObjCRuntime.h"
18 #include "CGOpenCLRuntime.h"
19 #include "CGRecordLayout.h"
21 #include "CodeGenFunction.h"
22 #include "CodeGenModule.h"
23 #include "ConstantEmitter.h"
24 #include "PatternInit.h"
25 #include "TargetInfo.h"
26 #include "clang/AST/ASTContext.h"
27 #include "clang/AST/Attr.h"
28 #include "clang/AST/Decl.h"
29 #include "clang/AST/Expr.h"
30 #include "clang/AST/OSLog.h"
31 #include "clang/AST/OperationKinds.h"
32 #include "clang/AST/Type.h"
33 #include "clang/Basic/TargetBuiltins.h"
34 #include "clang/Basic/TargetInfo.h"
35 #include "clang/Basic/TargetOptions.h"
36 #include "clang/CodeGen/CGFunctionInfo.h"
37 #include "clang/Frontend/FrontendDiagnostic.h"
38 #include "llvm/ADT/APFloat.h"
39 #include "llvm/ADT/APInt.h"
40 #include "llvm/ADT/FloatingPointMode.h"
41 #include "llvm/ADT/SmallPtrSet.h"
42 #include "llvm/ADT/StringExtras.h"
43 #include "llvm/Analysis/ValueTracking.h"
44 #include "llvm/IR/DataLayout.h"
45 #include "llvm/IR/InlineAsm.h"
46 #include "llvm/IR/Intrinsics.h"
47 #include "llvm/IR/IntrinsicsAArch64.h"
48 #include "llvm/IR/IntrinsicsAMDGPU.h"
49 #include "llvm/IR/IntrinsicsARM.h"
50 #include "llvm/IR/IntrinsicsBPF.h"
51 #include "llvm/IR/IntrinsicsDirectX.h"
52 #include "llvm/IR/IntrinsicsHexagon.h"
53 #include "llvm/IR/IntrinsicsNVPTX.h"
54 #include "llvm/IR/IntrinsicsPowerPC.h"
55 #include "llvm/IR/IntrinsicsR600.h"
56 #include "llvm/IR/IntrinsicsRISCV.h"
57 #include "llvm/IR/IntrinsicsS390.h"
58 #include "llvm/IR/IntrinsicsWebAssembly.h"
59 #include "llvm/IR/IntrinsicsX86.h"
60 #include "llvm/IR/MDBuilder.h"
61 #include "llvm/IR/MatrixBuilder.h"
62 #include "llvm/IR/MemoryModelRelaxationAnnotations.h"
63 #include "llvm/Support/AMDGPUAddrSpace.h"
64 #include "llvm/Support/ConvertUTF.h"
65 #include "llvm/Support/MathExtras.h"
66 #include "llvm/Support/ScopedPrinter.h"
67 #include "llvm/TargetParser/AArch64TargetParser.h"
68 #include "llvm/TargetParser/RISCVISAInfo.h"
69 #include "llvm/TargetParser/RISCVTargetParser.h"
70 #include "llvm/TargetParser/X86TargetParser.h"
74 using namespace clang
;
75 using namespace CodeGen
;
78 static void initializeAlloca(CodeGenFunction
&CGF
, AllocaInst
*AI
, Value
*Size
,
79 Align AlignmentInBytes
) {
81 switch (CGF
.getLangOpts().getTrivialAutoVarInit()) {
82 case LangOptions::TrivialAutoVarInitKind::Uninitialized
:
83 // Nothing to initialize.
85 case LangOptions::TrivialAutoVarInitKind::Zero
:
86 Byte
= CGF
.Builder
.getInt8(0x00);
88 case LangOptions::TrivialAutoVarInitKind::Pattern
: {
89 llvm::Type
*Int8
= llvm::IntegerType::getInt8Ty(CGF
.CGM
.getLLVMContext());
90 Byte
= llvm::dyn_cast
<llvm::ConstantInt
>(
91 initializationPatternFor(CGF
.CGM
, Int8
));
95 if (CGF
.CGM
.stopAutoInit())
97 auto *I
= CGF
.Builder
.CreateMemSet(AI
, Byte
, Size
, AlignmentInBytes
);
98 I
->addAnnotationMetadata("auto-init");
101 static Value
*handleHlslClip(const CallExpr
*E
, CodeGenFunction
*CGF
) {
102 Value
*Op0
= CGF
->EmitScalarExpr(E
->getArg(0));
104 Constant
*FZeroConst
= ConstantFP::getZero(CGF
->FloatTy
);
108 if (const auto *VecTy
= E
->getArg(0)->getType()->getAs
<clang::VectorType
>()) {
109 FZeroConst
= ConstantVector::getSplat(
110 ElementCount::getFixed(VecTy
->getNumElements()), FZeroConst
);
111 auto *FCompInst
= CGF
->Builder
.CreateFCmpOLT(Op0
, FZeroConst
);
112 CMP
= CGF
->Builder
.CreateIntrinsic(
113 CGF
->Builder
.getInt1Ty(), CGF
->CGM
.getHLSLRuntime().getAnyIntrinsic(),
114 {FCompInst
}, nullptr);
116 CMP
= CGF
->Builder
.CreateFCmpOLT(Op0
, FZeroConst
);
118 if (CGF
->CGM
.getTarget().getTriple().isDXIL())
119 LastInstr
= CGF
->Builder
.CreateIntrinsic(
120 CGF
->VoidTy
, llvm::Intrinsic::dx_discard
, {CMP
}, nullptr);
121 else if (CGF
->CGM
.getTarget().getTriple().isSPIRV()) {
122 BasicBlock
*LT0
= CGF
->createBasicBlock("lt0", CGF
->CurFn
);
123 BasicBlock
*End
= CGF
->createBasicBlock("end", CGF
->CurFn
);
125 CGF
->Builder
.CreateCondBr(CMP
, LT0
, End
);
127 CGF
->Builder
.SetInsertPoint(LT0
);
129 CGF
->Builder
.CreateIntrinsic(CGF
->VoidTy
, llvm::Intrinsic::spv_discard
, {},
132 LastInstr
= CGF
->Builder
.CreateBr(End
);
134 CGF
->Builder
.SetInsertPoint(End
);
136 llvm_unreachable("Backend Codegen not supported.");
142 static Value
*handleHlslSplitdouble(const CallExpr
*E
, CodeGenFunction
*CGF
) {
143 Value
*Op0
= CGF
->EmitScalarExpr(E
->getArg(0));
144 const auto *OutArg1
= dyn_cast
<HLSLOutArgExpr
>(E
->getArg(1));
145 const auto *OutArg2
= dyn_cast
<HLSLOutArgExpr
>(E
->getArg(2));
148 LValue Op1TmpLValue
=
149 CGF
->EmitHLSLOutArgExpr(OutArg1
, Args
, OutArg1
->getType());
150 LValue Op2TmpLValue
=
151 CGF
->EmitHLSLOutArgExpr(OutArg2
, Args
, OutArg2
->getType());
153 if (CGF
->getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee())
154 Args
.reverseWritebacks();
156 Value
*LowBits
= nullptr;
157 Value
*HighBits
= nullptr;
159 if (CGF
->CGM
.getTarget().getTriple().isDXIL()) {
161 llvm::Type
*RetElementTy
= CGF
->Int32Ty
;
162 if (auto *Op0VecTy
= E
->getArg(0)->getType()->getAs
<clang::VectorType
>())
163 RetElementTy
= llvm::VectorType::get(
164 CGF
->Int32Ty
, ElementCount::getFixed(Op0VecTy
->getNumElements()));
165 auto *RetTy
= llvm::StructType::get(RetElementTy
, RetElementTy
);
167 CallInst
*CI
= CGF
->Builder
.CreateIntrinsic(
168 RetTy
, Intrinsic::dx_splitdouble
, {Op0
}, nullptr, "hlsl.splitdouble");
170 LowBits
= CGF
->Builder
.CreateExtractValue(CI
, 0);
171 HighBits
= CGF
->Builder
.CreateExtractValue(CI
, 1);
174 // For Non DXIL targets we generate the instructions.
176 if (!Op0
->getType()->isVectorTy()) {
177 FixedVectorType
*DestTy
= FixedVectorType::get(CGF
->Int32Ty
, 2);
178 Value
*Bitcast
= CGF
->Builder
.CreateBitCast(Op0
, DestTy
);
180 LowBits
= CGF
->Builder
.CreateExtractElement(Bitcast
, (uint64_t)0);
181 HighBits
= CGF
->Builder
.CreateExtractElement(Bitcast
, 1);
184 if (const auto *VecTy
=
185 E
->getArg(0)->getType()->getAs
<clang::VectorType
>())
186 NumElements
= VecTy
->getNumElements();
188 FixedVectorType
*Uint32VecTy
=
189 FixedVectorType::get(CGF
->Int32Ty
, NumElements
* 2);
190 Value
*Uint32Vec
= CGF
->Builder
.CreateBitCast(Op0
, Uint32VecTy
);
191 if (NumElements
== 1) {
192 LowBits
= CGF
->Builder
.CreateExtractElement(Uint32Vec
, (uint64_t)0);
193 HighBits
= CGF
->Builder
.CreateExtractElement(Uint32Vec
, 1);
195 SmallVector
<int> EvenMask
, OddMask
;
196 for (int I
= 0, E
= NumElements
; I
!= E
; ++I
) {
197 EvenMask
.push_back(I
* 2);
198 OddMask
.push_back(I
* 2 + 1);
200 LowBits
= CGF
->Builder
.CreateShuffleVector(Uint32Vec
, EvenMask
);
201 HighBits
= CGF
->Builder
.CreateShuffleVector(Uint32Vec
, OddMask
);
205 CGF
->Builder
.CreateStore(LowBits
, Op1TmpLValue
.getAddress());
207 CGF
->Builder
.CreateStore(HighBits
, Op2TmpLValue
.getAddress());
208 CGF
->EmitWritebacks(Args
);
212 static Value
*handleAsDoubleBuiltin(CodeGenFunction
&CGF
, const CallExpr
*E
) {
213 assert((E
->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
214 E
->getArg(1)->getType()->hasUnsignedIntegerRepresentation()) &&
215 "asdouble operands types mismatch");
216 Value
*OpLowBits
= CGF
.EmitScalarExpr(E
->getArg(0));
217 Value
*OpHighBits
= CGF
.EmitScalarExpr(E
->getArg(1));
219 llvm::Type
*ResultType
= CGF
.DoubleTy
;
221 if (auto *VTy
= E
->getArg(0)->getType()->getAs
<clang::VectorType
>()) {
222 N
= VTy
->getNumElements();
223 ResultType
= llvm::FixedVectorType::get(CGF
.DoubleTy
, N
);
226 if (CGF
.CGM
.getTarget().getTriple().isDXIL())
227 return CGF
.Builder
.CreateIntrinsic(
228 /*ReturnType=*/ResultType
, Intrinsic::dx_asdouble
,
229 ArrayRef
<Value
*>{OpLowBits
, OpHighBits
}, nullptr, "hlsl.asdouble");
231 if (!E
->getArg(0)->getType()->isVectorType()) {
232 OpLowBits
= CGF
.Builder
.CreateVectorSplat(1, OpLowBits
);
233 OpHighBits
= CGF
.Builder
.CreateVectorSplat(1, OpHighBits
);
236 llvm::SmallVector
<int> Mask
;
237 for (int i
= 0; i
< N
; i
++) {
239 Mask
.push_back(i
+ N
);
242 Value
*BitVec
= CGF
.Builder
.CreateShuffleVector(OpLowBits
, OpHighBits
, Mask
);
244 return CGF
.Builder
.CreateBitCast(BitVec
, ResultType
);
247 /// Helper for the read/write/add/inc X18 builtins: read the X18 register and
248 /// return it as an i8 pointer.
249 Value
*readX18AsPtr(CodeGenFunction
&CGF
) {
250 LLVMContext
&Context
= CGF
.CGM
.getLLVMContext();
251 llvm::Metadata
*Ops
[] = {llvm::MDString::get(Context
, "x18")};
252 llvm::MDNode
*RegName
= llvm::MDNode::get(Context
, Ops
);
253 llvm::Value
*Metadata
= llvm::MetadataAsValue::get(Context
, RegName
);
255 CGF
.CGM
.getIntrinsic(llvm::Intrinsic::read_register
, {CGF
.Int64Ty
});
256 llvm::Value
*X18
= CGF
.Builder
.CreateCall(F
, Metadata
);
257 return CGF
.Builder
.CreateIntToPtr(X18
, CGF
.Int8PtrTy
);
260 /// getBuiltinLibFunction - Given a builtin id for a function like
261 /// "__builtin_fabsf", return a Function* for "fabsf".
262 llvm::Constant
*CodeGenModule::getBuiltinLibFunction(const FunctionDecl
*FD
,
263 unsigned BuiltinID
) {
264 assert(Context
.BuiltinInfo
.isLibFunction(BuiltinID
));
266 // Get the name, skip over the __builtin_ prefix (if necessary).
270 // TODO: This list should be expanded or refactored after all GCC-compatible
271 // std libcall builtins are implemented.
272 static SmallDenseMap
<unsigned, StringRef
, 64> F128Builtins
{
273 {Builtin::BI__builtin___fprintf_chk
, "__fprintf_chkieee128"},
274 {Builtin::BI__builtin___printf_chk
, "__printf_chkieee128"},
275 {Builtin::BI__builtin___snprintf_chk
, "__snprintf_chkieee128"},
276 {Builtin::BI__builtin___sprintf_chk
, "__sprintf_chkieee128"},
277 {Builtin::BI__builtin___vfprintf_chk
, "__vfprintf_chkieee128"},
278 {Builtin::BI__builtin___vprintf_chk
, "__vprintf_chkieee128"},
279 {Builtin::BI__builtin___vsnprintf_chk
, "__vsnprintf_chkieee128"},
280 {Builtin::BI__builtin___vsprintf_chk
, "__vsprintf_chkieee128"},
281 {Builtin::BI__builtin_fprintf
, "__fprintfieee128"},
282 {Builtin::BI__builtin_printf
, "__printfieee128"},
283 {Builtin::BI__builtin_snprintf
, "__snprintfieee128"},
284 {Builtin::BI__builtin_sprintf
, "__sprintfieee128"},
285 {Builtin::BI__builtin_vfprintf
, "__vfprintfieee128"},
286 {Builtin::BI__builtin_vprintf
, "__vprintfieee128"},
287 {Builtin::BI__builtin_vsnprintf
, "__vsnprintfieee128"},
288 {Builtin::BI__builtin_vsprintf
, "__vsprintfieee128"},
289 {Builtin::BI__builtin_fscanf
, "__fscanfieee128"},
290 {Builtin::BI__builtin_scanf
, "__scanfieee128"},
291 {Builtin::BI__builtin_sscanf
, "__sscanfieee128"},
292 {Builtin::BI__builtin_vfscanf
, "__vfscanfieee128"},
293 {Builtin::BI__builtin_vscanf
, "__vscanfieee128"},
294 {Builtin::BI__builtin_vsscanf
, "__vsscanfieee128"},
295 {Builtin::BI__builtin_nexttowardf128
, "__nexttowardieee128"},
298 // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit
299 // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions
300 // if it is 64-bit 'long double' mode.
301 static SmallDenseMap
<unsigned, StringRef
, 4> AIXLongDouble64Builtins
{
302 {Builtin::BI__builtin_frexpl
, "frexp"},
303 {Builtin::BI__builtin_ldexpl
, "ldexp"},
304 {Builtin::BI__builtin_modfl
, "modf"},
307 // If the builtin has been declared explicitly with an assembler label,
308 // use the mangled name. This differs from the plain label on platforms
309 // that prefix labels.
310 if (FD
->hasAttr
<AsmLabelAttr
>())
311 Name
= getMangledName(D
);
313 // TODO: This mutation should also be applied to other targets other than
314 // PPC, after backend supports IEEE 128-bit style libcalls.
315 if (getTriple().isPPC64() &&
316 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
317 F128Builtins
.contains(BuiltinID
))
318 Name
= F128Builtins
[BuiltinID
];
319 else if (getTriple().isOSAIX() &&
320 &getTarget().getLongDoubleFormat() ==
321 &llvm::APFloat::IEEEdouble() &&
322 AIXLongDouble64Builtins
.contains(BuiltinID
))
323 Name
= AIXLongDouble64Builtins
[BuiltinID
];
325 Name
= Context
.BuiltinInfo
.getName(BuiltinID
).substr(10);
328 llvm::FunctionType
*Ty
=
329 cast
<llvm::FunctionType
>(getTypes().ConvertType(FD
->getType()));
331 return GetOrCreateLLVMFunction(Name
, Ty
, D
, /*ForVTable=*/false);
334 /// Emit the conversions required to turn the given value into an
335 /// integer of the given size.
336 static Value
*EmitToInt(CodeGenFunction
&CGF
, llvm::Value
*V
,
337 QualType T
, llvm::IntegerType
*IntType
) {
338 V
= CGF
.EmitToMemory(V
, T
);
340 if (V
->getType()->isPointerTy())
341 return CGF
.Builder
.CreatePtrToInt(V
, IntType
);
343 assert(V
->getType() == IntType
);
347 static Value
*EmitFromInt(CodeGenFunction
&CGF
, llvm::Value
*V
,
348 QualType T
, llvm::Type
*ResultType
) {
349 V
= CGF
.EmitFromMemory(V
, T
);
351 if (ResultType
->isPointerTy())
352 return CGF
.Builder
.CreateIntToPtr(V
, ResultType
);
354 assert(V
->getType() == ResultType
);
358 static Address
CheckAtomicAlignment(CodeGenFunction
&CGF
, const CallExpr
*E
) {
359 ASTContext
&Ctx
= CGF
.getContext();
360 Address Ptr
= CGF
.EmitPointerWithAlignment(E
->getArg(0));
361 unsigned Bytes
= Ptr
.getElementType()->isPointerTy()
362 ? Ctx
.getTypeSizeInChars(Ctx
.VoidPtrTy
).getQuantity()
363 : Ptr
.getElementType()->getScalarSizeInBits() / 8;
364 unsigned Align
= Ptr
.getAlignment().getQuantity();
365 if (Align
% Bytes
!= 0) {
366 DiagnosticsEngine
&Diags
= CGF
.CGM
.getDiags();
367 Diags
.Report(E
->getBeginLoc(), diag::warn_sync_op_misaligned
);
368 // Force address to be at least naturally-aligned.
369 return Ptr
.withAlignment(CharUnits::fromQuantity(Bytes
));
374 /// Utility to insert an atomic instruction based on Intrinsic::ID
375 /// and the expression node.
376 static Value
*MakeBinaryAtomicValue(
377 CodeGenFunction
&CGF
, llvm::AtomicRMWInst::BinOp Kind
, const CallExpr
*E
,
378 AtomicOrdering Ordering
= AtomicOrdering::SequentiallyConsistent
) {
380 QualType T
= E
->getType();
381 assert(E
->getArg(0)->getType()->isPointerType());
382 assert(CGF
.getContext().hasSameUnqualifiedType(T
,
383 E
->getArg(0)->getType()->getPointeeType()));
384 assert(CGF
.getContext().hasSameUnqualifiedType(T
, E
->getArg(1)->getType()));
386 Address DestAddr
= CheckAtomicAlignment(CGF
, E
);
388 llvm::IntegerType
*IntType
= llvm::IntegerType::get(
389 CGF
.getLLVMContext(), CGF
.getContext().getTypeSize(T
));
391 llvm::Value
*Val
= CGF
.EmitScalarExpr(E
->getArg(1));
392 llvm::Type
*ValueType
= Val
->getType();
393 Val
= EmitToInt(CGF
, Val
, T
, IntType
);
395 llvm::Value
*Result
=
396 CGF
.Builder
.CreateAtomicRMW(Kind
, DestAddr
, Val
, Ordering
);
397 return EmitFromInt(CGF
, Result
, T
, ValueType
);
400 static Value
*EmitNontemporalStore(CodeGenFunction
&CGF
, const CallExpr
*E
) {
401 Value
*Val
= CGF
.EmitScalarExpr(E
->getArg(0));
402 Address Addr
= CGF
.EmitPointerWithAlignment(E
->getArg(1));
404 Val
= CGF
.EmitToMemory(Val
, E
->getArg(0)->getType());
405 LValue LV
= CGF
.MakeAddrLValue(Addr
, E
->getArg(0)->getType());
406 LV
.setNontemporal(true);
407 CGF
.EmitStoreOfScalar(Val
, LV
, false);
411 static Value
*EmitNontemporalLoad(CodeGenFunction
&CGF
, const CallExpr
*E
) {
412 Address Addr
= CGF
.EmitPointerWithAlignment(E
->getArg(0));
414 LValue LV
= CGF
.MakeAddrLValue(Addr
, E
->getType());
415 LV
.setNontemporal(true);
416 return CGF
.EmitLoadOfScalar(LV
, E
->getExprLoc());
419 static RValue
EmitBinaryAtomic(CodeGenFunction
&CGF
,
420 llvm::AtomicRMWInst::BinOp Kind
,
422 return RValue::get(MakeBinaryAtomicValue(CGF
, Kind
, E
));
425 /// Utility to insert an atomic instruction based Intrinsic::ID and
426 /// the expression node, where the return value is the result of the
428 static RValue
EmitBinaryAtomicPost(CodeGenFunction
&CGF
,
429 llvm::AtomicRMWInst::BinOp Kind
,
431 Instruction::BinaryOps Op
,
432 bool Invert
= false) {
433 QualType T
= E
->getType();
434 assert(E
->getArg(0)->getType()->isPointerType());
435 assert(CGF
.getContext().hasSameUnqualifiedType(T
,
436 E
->getArg(0)->getType()->getPointeeType()));
437 assert(CGF
.getContext().hasSameUnqualifiedType(T
, E
->getArg(1)->getType()));
439 Address DestAddr
= CheckAtomicAlignment(CGF
, E
);
441 llvm::IntegerType
*IntType
= llvm::IntegerType::get(
442 CGF
.getLLVMContext(), CGF
.getContext().getTypeSize(T
));
444 llvm::Value
*Val
= CGF
.EmitScalarExpr(E
->getArg(1));
445 llvm::Type
*ValueType
= Val
->getType();
446 Val
= EmitToInt(CGF
, Val
, T
, IntType
);
448 llvm::Value
*Result
= CGF
.Builder
.CreateAtomicRMW(
449 Kind
, DestAddr
, Val
, llvm::AtomicOrdering::SequentiallyConsistent
);
450 Result
= CGF
.Builder
.CreateBinOp(Op
, Result
, Val
);
453 CGF
.Builder
.CreateBinOp(llvm::Instruction::Xor
, Result
,
454 llvm::ConstantInt::getAllOnesValue(IntType
));
455 Result
= EmitFromInt(CGF
, Result
, T
, ValueType
);
456 return RValue::get(Result
);
459 /// Utility to insert an atomic cmpxchg instruction.
461 /// @param CGF The current codegen function.
462 /// @param E Builtin call expression to convert to cmpxchg.
463 /// arg0 - address to operate on
464 /// arg1 - value to compare with
466 /// @param ReturnBool Specifies whether to return success flag of
467 /// cmpxchg result or the old value.
469 /// @returns result of cmpxchg, according to ReturnBool
471 /// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
472 /// invoke the function EmitAtomicCmpXchgForMSIntrin.
473 static Value
*MakeAtomicCmpXchgValue(CodeGenFunction
&CGF
, const CallExpr
*E
,
475 QualType T
= ReturnBool
? E
->getArg(1)->getType() : E
->getType();
476 Address DestAddr
= CheckAtomicAlignment(CGF
, E
);
478 llvm::IntegerType
*IntType
= llvm::IntegerType::get(
479 CGF
.getLLVMContext(), CGF
.getContext().getTypeSize(T
));
481 Value
*Cmp
= CGF
.EmitScalarExpr(E
->getArg(1));
482 llvm::Type
*ValueType
= Cmp
->getType();
483 Cmp
= EmitToInt(CGF
, Cmp
, T
, IntType
);
484 Value
*New
= EmitToInt(CGF
, CGF
.EmitScalarExpr(E
->getArg(2)), T
, IntType
);
486 Value
*Pair
= CGF
.Builder
.CreateAtomicCmpXchg(
487 DestAddr
, Cmp
, New
, llvm::AtomicOrdering::SequentiallyConsistent
,
488 llvm::AtomicOrdering::SequentiallyConsistent
);
490 // Extract boolean success flag and zext it to int.
491 return CGF
.Builder
.CreateZExt(CGF
.Builder
.CreateExtractValue(Pair
, 1),
492 CGF
.ConvertType(E
->getType()));
494 // Extract old value and emit it using the same type as compare value.
495 return EmitFromInt(CGF
, CGF
.Builder
.CreateExtractValue(Pair
, 0), T
,
499 /// This function should be invoked to emit atomic cmpxchg for Microsoft's
500 /// _InterlockedCompareExchange* intrinsics which have the following signature:
501 /// T _InterlockedCompareExchange(T volatile *Destination,
505 /// Whereas the llvm 'cmpxchg' instruction has the following syntax:
506 /// cmpxchg *Destination, Comparand, Exchange.
507 /// So we need to swap Comparand and Exchange when invoking
508 /// CreateAtomicCmpXchg. That is the reason we could not use the above utility
509 /// function MakeAtomicCmpXchgValue since it expects the arguments to be
513 Value
*EmitAtomicCmpXchgForMSIntrin(CodeGenFunction
&CGF
, const CallExpr
*E
,
514 AtomicOrdering SuccessOrdering
= AtomicOrdering::SequentiallyConsistent
) {
515 assert(E
->getArg(0)->getType()->isPointerType());
516 assert(CGF
.getContext().hasSameUnqualifiedType(
517 E
->getType(), E
->getArg(0)->getType()->getPointeeType()));
518 assert(CGF
.getContext().hasSameUnqualifiedType(E
->getType(),
519 E
->getArg(1)->getType()));
520 assert(CGF
.getContext().hasSameUnqualifiedType(E
->getType(),
521 E
->getArg(2)->getType()));
523 Address DestAddr
= CheckAtomicAlignment(CGF
, E
);
525 auto *Exchange
= CGF
.EmitScalarExpr(E
->getArg(1));
526 auto *RTy
= Exchange
->getType();
528 auto *Comparand
= CGF
.EmitScalarExpr(E
->getArg(2));
530 if (RTy
->isPointerTy()) {
531 Exchange
= CGF
.Builder
.CreatePtrToInt(Exchange
, CGF
.IntPtrTy
);
532 Comparand
= CGF
.Builder
.CreatePtrToInt(Comparand
, CGF
.IntPtrTy
);
535 // For Release ordering, the failure ordering should be Monotonic.
536 auto FailureOrdering
= SuccessOrdering
== AtomicOrdering::Release
?
537 AtomicOrdering::Monotonic
:
540 // The atomic instruction is marked volatile for consistency with MSVC. This
541 // blocks the few atomics optimizations that LLVM has. If we want to optimize
542 // _Interlocked* operations in the future, we will have to remove the volatile
544 auto *CmpXchg
= CGF
.Builder
.CreateAtomicCmpXchg(
545 DestAddr
, Comparand
, Exchange
, SuccessOrdering
, FailureOrdering
);
546 CmpXchg
->setVolatile(true);
548 auto *Result
= CGF
.Builder
.CreateExtractValue(CmpXchg
, 0);
549 if (RTy
->isPointerTy()) {
550 Result
= CGF
.Builder
.CreateIntToPtr(Result
, RTy
);
556 // 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
557 // prototyped like this:
559 // unsigned char _InterlockedCompareExchange128...(
560 // __int64 volatile * _Destination,
561 // __int64 _ExchangeHigh,
562 // __int64 _ExchangeLow,
563 // __int64 * _ComparandResult);
565 // Note that Destination is assumed to be at least 16-byte aligned, despite
566 // being typed int64.
568 static Value
*EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction
&CGF
,
570 AtomicOrdering SuccessOrdering
) {
571 assert(E
->getNumArgs() == 4);
572 llvm::Value
*DestPtr
= CGF
.EmitScalarExpr(E
->getArg(0));
573 llvm::Value
*ExchangeHigh
= CGF
.EmitScalarExpr(E
->getArg(1));
574 llvm::Value
*ExchangeLow
= CGF
.EmitScalarExpr(E
->getArg(2));
575 Address ComparandAddr
= CGF
.EmitPointerWithAlignment(E
->getArg(3));
577 assert(DestPtr
->getType()->isPointerTy());
578 assert(!ExchangeHigh
->getType()->isPointerTy());
579 assert(!ExchangeLow
->getType()->isPointerTy());
581 // For Release ordering, the failure ordering should be Monotonic.
582 auto FailureOrdering
= SuccessOrdering
== AtomicOrdering::Release
583 ? AtomicOrdering::Monotonic
586 // Convert to i128 pointers and values. Alignment is also overridden for
587 // destination pointer.
588 llvm::Type
*Int128Ty
= llvm::IntegerType::get(CGF
.getLLVMContext(), 128);
589 Address
DestAddr(DestPtr
, Int128Ty
,
590 CGF
.getContext().toCharUnitsFromBits(128));
591 ComparandAddr
= ComparandAddr
.withElementType(Int128Ty
);
593 // (((i128)hi) << 64) | ((i128)lo)
594 ExchangeHigh
= CGF
.Builder
.CreateZExt(ExchangeHigh
, Int128Ty
);
595 ExchangeLow
= CGF
.Builder
.CreateZExt(ExchangeLow
, Int128Ty
);
597 CGF
.Builder
.CreateShl(ExchangeHigh
, llvm::ConstantInt::get(Int128Ty
, 64));
598 llvm::Value
*Exchange
= CGF
.Builder
.CreateOr(ExchangeHigh
, ExchangeLow
);
600 // Load the comparand for the instruction.
601 llvm::Value
*Comparand
= CGF
.Builder
.CreateLoad(ComparandAddr
);
603 auto *CXI
= CGF
.Builder
.CreateAtomicCmpXchg(DestAddr
, Comparand
, Exchange
,
604 SuccessOrdering
, FailureOrdering
);
606 // The atomic instruction is marked volatile for consistency with MSVC. This
607 // blocks the few atomics optimizations that LLVM has. If we want to optimize
608 // _Interlocked* operations in the future, we will have to remove the volatile
610 CXI
->setVolatile(true);
612 // Store the result as an outparameter.
613 CGF
.Builder
.CreateStore(CGF
.Builder
.CreateExtractValue(CXI
, 0),
616 // Get the success boolean and zero extend it to i8.
617 Value
*Success
= CGF
.Builder
.CreateExtractValue(CXI
, 1);
618 return CGF
.Builder
.CreateZExt(Success
, CGF
.Int8Ty
);
621 static Value
*EmitAtomicIncrementValue(CodeGenFunction
&CGF
, const CallExpr
*E
,
622 AtomicOrdering Ordering
= AtomicOrdering::SequentiallyConsistent
) {
623 assert(E
->getArg(0)->getType()->isPointerType());
625 auto *IntTy
= CGF
.ConvertType(E
->getType());
626 Address DestAddr
= CheckAtomicAlignment(CGF
, E
);
627 auto *Result
= CGF
.Builder
.CreateAtomicRMW(
628 AtomicRMWInst::Add
, DestAddr
, ConstantInt::get(IntTy
, 1), Ordering
);
629 return CGF
.Builder
.CreateAdd(Result
, ConstantInt::get(IntTy
, 1));
632 static Value
*EmitAtomicDecrementValue(
633 CodeGenFunction
&CGF
, const CallExpr
*E
,
634 AtomicOrdering Ordering
= AtomicOrdering::SequentiallyConsistent
) {
635 assert(E
->getArg(0)->getType()->isPointerType());
637 auto *IntTy
= CGF
.ConvertType(E
->getType());
638 Address DestAddr
= CheckAtomicAlignment(CGF
, E
);
639 auto *Result
= CGF
.Builder
.CreateAtomicRMW(
640 AtomicRMWInst::Sub
, DestAddr
, ConstantInt::get(IntTy
, 1), Ordering
);
641 return CGF
.Builder
.CreateSub(Result
, ConstantInt::get(IntTy
, 1));
644 // Build a plain volatile load.
645 static Value
*EmitISOVolatileLoad(CodeGenFunction
&CGF
, const CallExpr
*E
) {
646 Value
*Ptr
= CGF
.EmitScalarExpr(E
->getArg(0));
647 QualType ElTy
= E
->getArg(0)->getType()->getPointeeType();
648 CharUnits LoadSize
= CGF
.getContext().getTypeSizeInChars(ElTy
);
650 llvm::IntegerType::get(CGF
.getLLVMContext(), LoadSize
.getQuantity() * 8);
651 llvm::LoadInst
*Load
= CGF
.Builder
.CreateAlignedLoad(ITy
, Ptr
, LoadSize
);
652 Load
->setVolatile(true);
656 // Build a plain volatile store.
657 static Value
*EmitISOVolatileStore(CodeGenFunction
&CGF
, const CallExpr
*E
) {
658 Value
*Ptr
= CGF
.EmitScalarExpr(E
->getArg(0));
659 Value
*Value
= CGF
.EmitScalarExpr(E
->getArg(1));
660 QualType ElTy
= E
->getArg(0)->getType()->getPointeeType();
661 CharUnits StoreSize
= CGF
.getContext().getTypeSizeInChars(ElTy
);
662 llvm::StoreInst
*Store
=
663 CGF
.Builder
.CreateAlignedStore(Value
, Ptr
, StoreSize
);
664 Store
->setVolatile(true);
668 // Emit a simple mangled intrinsic that has 1 argument and a return type
669 // matching the argument type. Depending on mode, this may be a constrained
670 // floating-point intrinsic.
671 static Value
*emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction
&CGF
,
672 const CallExpr
*E
, unsigned IntrinsicID
,
673 unsigned ConstrainedIntrinsicID
) {
674 llvm::Value
*Src0
= CGF
.EmitScalarExpr(E
->getArg(0));
676 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(CGF
, E
);
677 if (CGF
.Builder
.getIsFPConstrained()) {
678 Function
*F
= CGF
.CGM
.getIntrinsic(ConstrainedIntrinsicID
, Src0
->getType());
679 return CGF
.Builder
.CreateConstrainedFPCall(F
, { Src0
});
681 Function
*F
= CGF
.CGM
.getIntrinsic(IntrinsicID
, Src0
->getType());
682 return CGF
.Builder
.CreateCall(F
, Src0
);
686 // Emit an intrinsic that has 2 operands of the same type as its result.
687 // Depending on mode, this may be a constrained floating-point intrinsic.
688 static Value
*emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction
&CGF
,
689 const CallExpr
*E
, unsigned IntrinsicID
,
690 unsigned ConstrainedIntrinsicID
) {
691 llvm::Value
*Src0
= CGF
.EmitScalarExpr(E
->getArg(0));
692 llvm::Value
*Src1
= CGF
.EmitScalarExpr(E
->getArg(1));
694 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(CGF
, E
);
695 if (CGF
.Builder
.getIsFPConstrained()) {
696 Function
*F
= CGF
.CGM
.getIntrinsic(ConstrainedIntrinsicID
, Src0
->getType());
697 return CGF
.Builder
.CreateConstrainedFPCall(F
, { Src0
, Src1
});
699 Function
*F
= CGF
.CGM
.getIntrinsic(IntrinsicID
, Src0
->getType());
700 return CGF
.Builder
.CreateCall(F
, { Src0
, Src1
});
704 // Has second type mangled argument.
705 static Value
*emitBinaryExpMaybeConstrainedFPBuiltin(
706 CodeGenFunction
&CGF
, const CallExpr
*E
, llvm::Intrinsic::ID IntrinsicID
,
707 llvm::Intrinsic::ID ConstrainedIntrinsicID
) {
708 llvm::Value
*Src0
= CGF
.EmitScalarExpr(E
->getArg(0));
709 llvm::Value
*Src1
= CGF
.EmitScalarExpr(E
->getArg(1));
711 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(CGF
, E
);
712 if (CGF
.Builder
.getIsFPConstrained()) {
713 Function
*F
= CGF
.CGM
.getIntrinsic(ConstrainedIntrinsicID
,
714 {Src0
->getType(), Src1
->getType()});
715 return CGF
.Builder
.CreateConstrainedFPCall(F
, {Src0
, Src1
});
719 CGF
.CGM
.getIntrinsic(IntrinsicID
, {Src0
->getType(), Src1
->getType()});
720 return CGF
.Builder
.CreateCall(F
, {Src0
, Src1
});
723 // Emit an intrinsic that has 3 operands of the same type as its result.
724 // Depending on mode, this may be a constrained floating-point intrinsic.
725 static Value
*emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction
&CGF
,
726 const CallExpr
*E
, unsigned IntrinsicID
,
727 unsigned ConstrainedIntrinsicID
) {
728 llvm::Value
*Src0
= CGF
.EmitScalarExpr(E
->getArg(0));
729 llvm::Value
*Src1
= CGF
.EmitScalarExpr(E
->getArg(1));
730 llvm::Value
*Src2
= CGF
.EmitScalarExpr(E
->getArg(2));
732 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(CGF
, E
);
733 if (CGF
.Builder
.getIsFPConstrained()) {
734 Function
*F
= CGF
.CGM
.getIntrinsic(ConstrainedIntrinsicID
, Src0
->getType());
735 return CGF
.Builder
.CreateConstrainedFPCall(F
, { Src0
, Src1
, Src2
});
737 Function
*F
= CGF
.CGM
.getIntrinsic(IntrinsicID
, Src0
->getType());
738 return CGF
.Builder
.CreateCall(F
, { Src0
, Src1
, Src2
});
742 // Emit an intrinsic where all operands are of the same type as the result.
743 // Depending on mode, this may be a constrained floating-point intrinsic.
744 static Value
*emitCallMaybeConstrainedFPBuiltin(CodeGenFunction
&CGF
,
745 unsigned IntrinsicID
,
746 unsigned ConstrainedIntrinsicID
,
748 ArrayRef
<Value
*> Args
) {
750 if (CGF
.Builder
.getIsFPConstrained())
751 F
= CGF
.CGM
.getIntrinsic(ConstrainedIntrinsicID
, Ty
);
753 F
= CGF
.CGM
.getIntrinsic(IntrinsicID
, Ty
);
755 if (CGF
.Builder
.getIsFPConstrained())
756 return CGF
.Builder
.CreateConstrainedFPCall(F
, Args
);
758 return CGF
.Builder
.CreateCall(F
, Args
);
761 // Emit a simple intrinsic that has N scalar arguments and a return type
762 // matching the argument type. It is assumed that only the first argument is
764 template <unsigned N
>
765 static Value
*emitBuiltinWithOneOverloadedType(CodeGenFunction
&CGF
,
767 unsigned IntrinsicID
,
768 llvm::StringRef Name
= "") {
769 static_assert(N
, "expect non-empty argument");
770 SmallVector
<Value
*, N
> Args
;
771 for (unsigned I
= 0; I
< N
; ++I
)
772 Args
.push_back(CGF
.EmitScalarExpr(E
->getArg(I
)));
773 Function
*F
= CGF
.CGM
.getIntrinsic(IntrinsicID
, Args
[0]->getType());
774 return CGF
.Builder
.CreateCall(F
, Args
, Name
);
777 // Emit an intrinsic that has 4 operands of the same type as its result.
778 static Value
*emitQuaternaryBuiltin(CodeGenFunction
&CGF
, const CallExpr
*E
,
779 unsigned IntrinsicID
) {
780 llvm::Value
*Src0
= CGF
.EmitScalarExpr(E
->getArg(0));
781 llvm::Value
*Src1
= CGF
.EmitScalarExpr(E
->getArg(1));
782 llvm::Value
*Src2
= CGF
.EmitScalarExpr(E
->getArg(2));
783 llvm::Value
*Src3
= CGF
.EmitScalarExpr(E
->getArg(3));
785 Function
*F
= CGF
.CGM
.getIntrinsic(IntrinsicID
, Src0
->getType());
786 return CGF
.Builder
.CreateCall(F
, {Src0
, Src1
, Src2
, Src3
});
789 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
790 static Value
*emitFPIntBuiltin(CodeGenFunction
&CGF
,
792 unsigned IntrinsicID
) {
793 llvm::Value
*Src0
= CGF
.EmitScalarExpr(E
->getArg(0));
794 llvm::Value
*Src1
= CGF
.EmitScalarExpr(E
->getArg(1));
796 Function
*F
= CGF
.CGM
.getIntrinsic(IntrinsicID
, Src0
->getType());
797 return CGF
.Builder
.CreateCall(F
, {Src0
, Src1
});
800 // Emit an intrinsic that has overloaded integer result and fp operand.
802 emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction
&CGF
, const CallExpr
*E
,
803 unsigned IntrinsicID
,
804 unsigned ConstrainedIntrinsicID
) {
805 llvm::Type
*ResultType
= CGF
.ConvertType(E
->getType());
806 llvm::Value
*Src0
= CGF
.EmitScalarExpr(E
->getArg(0));
808 if (CGF
.Builder
.getIsFPConstrained()) {
809 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(CGF
, E
);
810 Function
*F
= CGF
.CGM
.getIntrinsic(ConstrainedIntrinsicID
,
811 {ResultType
, Src0
->getType()});
812 return CGF
.Builder
.CreateConstrainedFPCall(F
, {Src0
});
815 CGF
.CGM
.getIntrinsic(IntrinsicID
, {ResultType
, Src0
->getType()});
816 return CGF
.Builder
.CreateCall(F
, Src0
);
820 static Value
*emitFrexpBuiltin(CodeGenFunction
&CGF
, const CallExpr
*E
,
821 llvm::Intrinsic::ID IntrinsicID
) {
822 llvm::Value
*Src0
= CGF
.EmitScalarExpr(E
->getArg(0));
823 llvm::Value
*Src1
= CGF
.EmitScalarExpr(E
->getArg(1));
825 QualType IntPtrTy
= E
->getArg(1)->getType()->getPointeeType();
826 llvm::Type
*IntTy
= CGF
.ConvertType(IntPtrTy
);
828 CGF
.CGM
.getIntrinsic(IntrinsicID
, {Src0
->getType(), IntTy
});
829 llvm::Value
*Call
= CGF
.Builder
.CreateCall(F
, Src0
);
831 llvm::Value
*Exp
= CGF
.Builder
.CreateExtractValue(Call
, 1);
832 LValue LV
= CGF
.MakeNaturalAlignAddrLValue(Src1
, IntPtrTy
);
833 CGF
.EmitStoreOfScalar(Exp
, LV
);
835 return CGF
.Builder
.CreateExtractValue(Call
, 0);
838 /// EmitFAbs - Emit a call to @llvm.fabs().
839 static Value
*EmitFAbs(CodeGenFunction
&CGF
, Value
*V
) {
840 Function
*F
= CGF
.CGM
.getIntrinsic(Intrinsic::fabs
, V
->getType());
841 llvm::CallInst
*Call
= CGF
.Builder
.CreateCall(F
, V
);
842 Call
->setDoesNotAccessMemory();
846 /// Emit the computation of the sign bit for a floating point value. Returns
847 /// the i1 sign bit value.
848 static Value
*EmitSignBit(CodeGenFunction
&CGF
, Value
*V
) {
849 LLVMContext
&C
= CGF
.CGM
.getLLVMContext();
851 llvm::Type
*Ty
= V
->getType();
852 int Width
= Ty
->getPrimitiveSizeInBits();
853 llvm::Type
*IntTy
= llvm::IntegerType::get(C
, Width
);
854 V
= CGF
.Builder
.CreateBitCast(V
, IntTy
);
855 if (Ty
->isPPC_FP128Ty()) {
856 // We want the sign bit of the higher-order double. The bitcast we just
857 // did works as if the double-double was stored to memory and then
858 // read as an i128. The "store" will put the higher-order double in the
859 // lower address in both little- and big-Endian modes, but the "load"
860 // will treat those bits as a different part of the i128: the low bits in
861 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
862 // we need to shift the high bits down to the low before truncating.
864 if (CGF
.getTarget().isBigEndian()) {
865 Value
*ShiftCst
= llvm::ConstantInt::get(IntTy
, Width
);
866 V
= CGF
.Builder
.CreateLShr(V
, ShiftCst
);
868 // We are truncating value in order to extract the higher-order
869 // double, which we will be using to extract the sign from.
870 IntTy
= llvm::IntegerType::get(C
, Width
);
871 V
= CGF
.Builder
.CreateTrunc(V
, IntTy
);
873 Value
*Zero
= llvm::Constant::getNullValue(IntTy
);
874 return CGF
.Builder
.CreateICmpSLT(V
, Zero
);
877 /// Checks no arguments or results are passed indirectly in the ABI (i.e. via a
878 /// hidden pointer). This is used to check annotating FP libcalls (that could
879 /// set `errno`) with "int" TBAA metadata is safe. If any floating-point
880 /// arguments are passed indirectly, setup for the call could be incorrectly
882 static bool HasNoIndirectArgumentsOrResults(CGFunctionInfo
const &FnInfo
) {
883 auto IsIndirect
= [&](ABIArgInfo
const &info
) {
884 return info
.isIndirect() || info
.isIndirectAliased() || info
.isInAlloca();
886 return !IsIndirect(FnInfo
.getReturnInfo()) &&
887 llvm::none_of(FnInfo
.arguments(),
888 [&](CGFunctionInfoArgInfo
const &ArgInfo
) {
889 return IsIndirect(ArgInfo
.info
);
893 static RValue
emitLibraryCall(CodeGenFunction
&CGF
, const FunctionDecl
*FD
,
894 const CallExpr
*E
, llvm::Constant
*calleeValue
) {
895 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(CGF
, E
);
896 CGCallee callee
= CGCallee::forDirect(calleeValue
, GlobalDecl(FD
));
897 llvm::CallBase
*callOrInvoke
= nullptr;
898 CGFunctionInfo
const *FnInfo
= nullptr;
900 CGF
.EmitCall(E
->getCallee()->getType(), callee
, E
, ReturnValueSlot(),
901 /*Chain=*/nullptr, &callOrInvoke
, &FnInfo
);
903 if (unsigned BuiltinID
= FD
->getBuiltinID()) {
904 // Check whether a FP math builtin function, such as BI__builtin_expf
905 ASTContext
&Context
= CGF
.getContext();
906 bool ConstWithoutErrnoAndExceptions
=
907 Context
.BuiltinInfo
.isConstWithoutErrnoAndExceptions(BuiltinID
);
908 // Restrict to target with errno, for example, MacOS doesn't set errno.
909 // TODO: Support builtin function with complex type returned, eg: cacosh
910 if (ConstWithoutErrnoAndExceptions
&& CGF
.CGM
.getLangOpts().MathErrno
&&
911 !CGF
.Builder
.getIsFPConstrained() && Call
.isScalar() &&
912 HasNoIndirectArgumentsOrResults(*FnInfo
)) {
913 // Emit "int" TBAA metadata on FP math libcalls.
914 clang::QualType IntTy
= Context
.IntTy
;
915 TBAAAccessInfo TBAAInfo
= CGF
.CGM
.getTBAAAccessInfo(IntTy
);
916 CGF
.CGM
.DecorateInstructionWithTBAA(callOrInvoke
, TBAAInfo
);
922 /// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
923 /// depending on IntrinsicID.
925 /// \arg CGF The current codegen function.
926 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
927 /// \arg X The first argument to the llvm.*.with.overflow.*.
928 /// \arg Y The second argument to the llvm.*.with.overflow.*.
929 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
930 /// \returns The result (i.e. sum/product) returned by the intrinsic.
931 static llvm::Value
*EmitOverflowIntrinsic(CodeGenFunction
&CGF
,
932 const llvm::Intrinsic::ID IntrinsicID
,
933 llvm::Value
*X
, llvm::Value
*Y
,
934 llvm::Value
*&Carry
) {
935 // Make sure we have integers of the same width.
936 assert(X
->getType() == Y
->getType() &&
937 "Arguments must be the same type. (Did you forget to make sure both "
938 "arguments have the same integer width?)");
940 Function
*Callee
= CGF
.CGM
.getIntrinsic(IntrinsicID
, X
->getType());
941 llvm::Value
*Tmp
= CGF
.Builder
.CreateCall(Callee
, {X
, Y
});
942 Carry
= CGF
.Builder
.CreateExtractValue(Tmp
, 1);
943 return CGF
.Builder
.CreateExtractValue(Tmp
, 0);
946 static Value
*emitRangedBuiltin(CodeGenFunction
&CGF
, unsigned IntrinsicID
,
948 Function
*F
= CGF
.CGM
.getIntrinsic(IntrinsicID
, {});
949 llvm::CallInst
*Call
= CGF
.Builder
.CreateCall(F
);
950 llvm::ConstantRange
CR(APInt(32, low
), APInt(32, high
));
951 Call
->addRangeRetAttr(CR
);
952 Call
->addRetAttr(llvm::Attribute::AttrKind::NoUndef
);
957 struct WidthAndSignedness
{
963 static WidthAndSignedness
964 getIntegerWidthAndSignedness(const clang::ASTContext
&context
,
965 const clang::QualType Type
) {
966 assert(Type
->isIntegerType() && "Given type is not an integer.");
967 unsigned Width
= context
.getIntWidth(Type
);
968 bool Signed
= Type
->isSignedIntegerType();
969 return {Width
, Signed
};
972 // Given one or more integer types, this function produces an integer type that
973 // encompasses them: any value in one of the given types could be expressed in
974 // the encompassing type.
975 static struct WidthAndSignedness
976 EncompassingIntegerType(ArrayRef
<struct WidthAndSignedness
> Types
) {
977 assert(Types
.size() > 0 && "Empty list of types.");
979 // If any of the given types is signed, we must return a signed type.
981 for (const auto &Type
: Types
) {
982 Signed
|= Type
.Signed
;
985 // The encompassing type must have a width greater than or equal to the width
986 // of the specified types. Additionally, if the encompassing type is signed,
987 // its width must be strictly greater than the width of any unsigned types
990 for (const auto &Type
: Types
) {
991 unsigned MinWidth
= Type
.Width
+ (Signed
&& !Type
.Signed
);
992 if (Width
< MinWidth
) {
997 return {Width
, Signed
};
1000 Value
*CodeGenFunction::EmitVAStartEnd(Value
*ArgValue
, bool IsStart
) {
1001 Intrinsic::ID inst
= IsStart
? Intrinsic::vastart
: Intrinsic::vaend
;
1002 return Builder
.CreateCall(CGM
.getIntrinsic(inst
, {ArgValue
->getType()}),
1006 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
1007 /// __builtin_object_size(p, @p To) is correct
1008 static bool areBOSTypesCompatible(int From
, int To
) {
1009 // Note: Our __builtin_object_size implementation currently treats Type=0 and
1010 // Type=2 identically. Encoding this implementation detail here may make
1011 // improving __builtin_object_size difficult in the future, so it's omitted.
1012 return From
== To
|| (From
== 0 && To
== 1) || (From
== 3 && To
== 2);
1015 static llvm::Value
*
1016 getDefaultBuiltinObjectSizeResult(unsigned Type
, llvm::IntegerType
*ResType
) {
1017 return ConstantInt::get(ResType
, (Type
& 2) ? 0 : -1, /*isSigned=*/true);
1021 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr
*E
, unsigned Type
,
1022 llvm::IntegerType
*ResType
,
1023 llvm::Value
*EmittedE
,
1025 uint64_t ObjectSize
;
1026 if (!E
->tryEvaluateObjectSize(ObjectSize
, getContext(), Type
))
1027 return emitBuiltinObjectSize(E
, Type
, ResType
, EmittedE
, IsDynamic
);
1028 return ConstantInt::get(ResType
, ObjectSize
, /*isSigned=*/true);
1031 const FieldDecl
*CodeGenFunction::FindFlexibleArrayMemberFieldAndOffset(
1032 ASTContext
&Ctx
, const RecordDecl
*RD
, const FieldDecl
*FAMDecl
,
1034 const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel
=
1035 getLangOpts().getStrictFlexArraysLevel();
1036 uint32_t FieldNo
= 0;
1038 if (RD
->isImplicit())
1041 for (const FieldDecl
*FD
: RD
->fields()) {
1042 if ((!FAMDecl
|| FD
== FAMDecl
) &&
1043 Decl::isFlexibleArrayMemberLike(
1044 Ctx
, FD
, FD
->getType(), StrictFlexArraysLevel
,
1045 /*IgnoreTemplateOrMacroSubstitution=*/true)) {
1046 const ASTRecordLayout
&Layout
= Ctx
.getASTRecordLayout(RD
);
1047 Offset
+= Layout
.getFieldOffset(FieldNo
);
1051 QualType Ty
= FD
->getType();
1052 if (Ty
->isRecordType()) {
1053 if (const FieldDecl
*Field
= FindFlexibleArrayMemberFieldAndOffset(
1054 Ctx
, Ty
->getAsRecordDecl(), FAMDecl
, Offset
)) {
1055 const ASTRecordLayout
&Layout
= Ctx
.getASTRecordLayout(RD
);
1056 Offset
+= Layout
.getFieldOffset(FieldNo
);
1068 static unsigned CountCountedByAttrs(const RecordDecl
*RD
) {
1071 for (const FieldDecl
*FD
: RD
->fields()) {
1072 if (FD
->getType()->isCountAttributedType())
1075 QualType Ty
= FD
->getType();
1076 if (Ty
->isRecordType())
1077 Num
+= CountCountedByAttrs(Ty
->getAsRecordDecl());
1084 CodeGenFunction::emitFlexibleArrayMemberSize(const Expr
*E
, unsigned Type
,
1085 llvm::IntegerType
*ResType
) {
1086 // The code generated here calculates the size of a struct with a flexible
1087 // array member that uses the counted_by attribute. There are two instances
1091 // unsigned long flags;
1093 // int array[] __attribute__((counted_by(count)));
1096 // 1) bdos of the flexible array itself:
1098 // __builtin_dynamic_object_size(p->array, 1) ==
1099 // p->count * sizeof(*p->array)
1101 // 2) bdos of a pointer into the flexible array:
1103 // __builtin_dynamic_object_size(&p->array[42], 1) ==
1104 // (p->count - 42) * sizeof(*p->array)
1106 // 2) bdos of the whole struct, including the flexible array:
1108 // __builtin_dynamic_object_size(p, 1) ==
1109 // max(sizeof(struct s),
1110 // offsetof(struct s, array) + p->count * sizeof(*p->array))
1112 ASTContext
&Ctx
= getContext();
1113 const Expr
*Base
= E
->IgnoreParenImpCasts();
1114 const Expr
*Idx
= nullptr;
1116 if (const auto *UO
= dyn_cast
<UnaryOperator
>(Base
);
1117 UO
&& UO
->getOpcode() == UO_AddrOf
) {
1118 Expr
*SubExpr
= UO
->getSubExpr()->IgnoreParenImpCasts();
1119 if (const auto *ASE
= dyn_cast
<ArraySubscriptExpr
>(SubExpr
)) {
1120 Base
= ASE
->getBase()->IgnoreParenImpCasts();
1121 Idx
= ASE
->getIdx()->IgnoreParenImpCasts();
1123 if (const auto *IL
= dyn_cast
<IntegerLiteral
>(Idx
)) {
1124 int64_t Val
= IL
->getValue().getSExtValue();
1126 return getDefaultBuiltinObjectSizeResult(Type
, ResType
);
1129 // The index is 0, so we don't need to take it into account.
1133 // Potential pointer to another element in the struct.
1138 // Get the flexible array member Decl.
1139 const RecordDecl
*OuterRD
= nullptr;
1140 const FieldDecl
*FAMDecl
= nullptr;
1141 if (const auto *ME
= dyn_cast
<MemberExpr
>(Base
)) {
1142 // Check if \p Base is referencing the FAM itself.
1143 const ValueDecl
*VD
= ME
->getMemberDecl();
1144 OuterRD
= VD
->getDeclContext()->getOuterLexicalRecordContext();
1145 FAMDecl
= dyn_cast
<FieldDecl
>(VD
);
1148 } else if (const auto *DRE
= dyn_cast
<DeclRefExpr
>(Base
)) {
1149 // Check if we're pointing to the whole struct.
1150 QualType Ty
= DRE
->getDecl()->getType();
1151 if (Ty
->isPointerType())
1152 Ty
= Ty
->getPointeeType();
1153 OuterRD
= Ty
->getAsRecordDecl();
1155 // If we have a situation like this:
1157 // struct union_of_fams {
1160 // signed char normal_field;
1163 // int arr1[] __counted_by(count1);
1166 // signed char count2;
1167 // int arr2[] __counted_by(count2);
1172 // We don't know which 'count' to use in this scenario:
1174 // size_t get_size(struct union_of_fams *p) {
1175 // return __builtin_dynamic_object_size(p, 1);
1178 // Instead of calculating a wrong number, we give up.
1179 if (OuterRD
&& CountCountedByAttrs(OuterRD
) > 1)
1186 // We call FindFlexibleArrayMemberAndOffset even if FAMDecl is non-null to
1188 uint64_t Offset
= 0;
1190 FindFlexibleArrayMemberFieldAndOffset(Ctx
, OuterRD
, FAMDecl
, Offset
);
1191 Offset
= Ctx
.toCharUnitsFromBits(Offset
).getQuantity();
1193 if (!FAMDecl
|| !FAMDecl
->getType()->isCountAttributedType())
1194 // No flexible array member found or it doesn't have the "counted_by"
1198 const FieldDecl
*CountedByFD
= FAMDecl
->findCountedByField();
1200 // Can't find the field referenced by the "counted_by" attribute.
1203 if (isa
<DeclRefExpr
>(Base
))
1204 // The whole struct is specificed in the __bdos. The calculation of the
1205 // whole size of the structure can be done in two ways:
1207 // 1) sizeof(struct S) + count * sizeof(typeof(fam))
1208 // 2) offsetof(struct S, fam) + count * sizeof(typeof(fam))
1210 // The first will add additional padding after the end of the array,
1211 // allocation while the second method is more precise, but not quite
1212 // expected from programmers. See
1213 // https://lore.kernel.org/lkml/ZvV6X5FPBBW7CO1f@archlinux/ for a
1214 // discussion of the topic.
1216 // GCC isn't (currently) able to calculate __bdos on a pointer to the whole
1217 // structure. Therefore, because of the above issue, we'll choose to match
1218 // what GCC does for consistency's sake.
1221 // Build a load of the counted_by field.
1222 bool IsSigned
= CountedByFD
->getType()->isSignedIntegerType();
1223 Value
*CountedByInst
= EmitLoadOfCountedByField(Base
, FAMDecl
, CountedByFD
);
1225 return getDefaultBuiltinObjectSizeResult(Type
, ResType
);
1227 CountedByInst
= Builder
.CreateIntCast(CountedByInst
, ResType
, IsSigned
);
1229 // Build a load of the index and subtract it from the count.
1230 Value
*IdxInst
= nullptr;
1232 if (Idx
->HasSideEffects(getContext()))
1233 // We can't have side-effects.
1234 return getDefaultBuiltinObjectSizeResult(Type
, ResType
);
1236 bool IdxSigned
= Idx
->getType()->isSignedIntegerType();
1237 IdxInst
= EmitAnyExprToTemp(Idx
).getScalarVal();
1238 IdxInst
= Builder
.CreateIntCast(IdxInst
, ResType
, IdxSigned
);
1240 // We go ahead with the calculation here. If the index turns out to be
1241 // negative, we'll catch it at the end.
1243 Builder
.CreateSub(CountedByInst
, IdxInst
, "", !IsSigned
, IsSigned
);
1246 // Calculate how large the flexible array member is in bytes.
1247 const ArrayType
*ArrayTy
= Ctx
.getAsArrayType(FAMDecl
->getType());
1248 CharUnits Size
= Ctx
.getTypeSizeInChars(ArrayTy
->getElementType());
1249 llvm::Constant
*ElemSize
=
1250 llvm::ConstantInt::get(ResType
, Size
.getQuantity(), IsSigned
);
1252 Builder
.CreateMul(CountedByInst
, ElemSize
, "", !IsSigned
, IsSigned
);
1253 Res
= Builder
.CreateIntCast(Res
, ResType
, IsSigned
);
1255 // A negative \p IdxInst or \p CountedByInst means that the index lands
1256 // outside of the flexible array member. If that's the case, we want to
1258 Value
*Cmp
= Builder
.CreateIsNotNeg(CountedByInst
);
1260 Cmp
= Builder
.CreateAnd(Builder
.CreateIsNotNeg(IdxInst
), Cmp
);
1262 return Builder
.CreateSelect(Cmp
, Res
, ConstantInt::get(ResType
, 0, IsSigned
));
1265 /// Returns a Value corresponding to the size of the given expression.
1266 /// This Value may be either of the following:
1267 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on
1269 /// - A call to the @llvm.objectsize intrinsic
1271 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
1272 /// and we wouldn't otherwise try to reference a pass_object_size parameter,
1273 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
1275 CodeGenFunction::emitBuiltinObjectSize(const Expr
*E
, unsigned Type
,
1276 llvm::IntegerType
*ResType
,
1277 llvm::Value
*EmittedE
, bool IsDynamic
) {
1278 // We need to reference an argument if the pointer is a parameter with the
1279 // pass_object_size attribute.
1280 if (auto *D
= dyn_cast
<DeclRefExpr
>(E
->IgnoreParenImpCasts())) {
1281 auto *Param
= dyn_cast
<ParmVarDecl
>(D
->getDecl());
1282 auto *PS
= D
->getDecl()->getAttr
<PassObjectSizeAttr
>();
1283 if (Param
!= nullptr && PS
!= nullptr &&
1284 areBOSTypesCompatible(PS
->getType(), Type
)) {
1285 auto Iter
= SizeArguments
.find(Param
);
1286 assert(Iter
!= SizeArguments
.end());
1288 const ImplicitParamDecl
*D
= Iter
->second
;
1289 auto DIter
= LocalDeclMap
.find(D
);
1290 assert(DIter
!= LocalDeclMap
.end());
1292 return EmitLoadOfScalar(DIter
->second
, /*Volatile=*/false,
1293 getContext().getSizeType(), E
->getBeginLoc());
1298 // Emit special code for a flexible array member with the "counted_by"
1300 if (Value
*V
= emitFlexibleArrayMemberSize(E
, Type
, ResType
))
1304 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
1305 // evaluate E for side-effects. In either case, we shouldn't lower to
1306 // @llvm.objectsize.
1307 if (Type
== 3 || (!EmittedE
&& E
->HasSideEffects(getContext())))
1308 return getDefaultBuiltinObjectSizeResult(Type
, ResType
);
1310 Value
*Ptr
= EmittedE
? EmittedE
: EmitScalarExpr(E
);
1311 assert(Ptr
->getType()->isPointerTy() &&
1312 "Non-pointer passed to __builtin_object_size?");
1315 CGM
.getIntrinsic(Intrinsic::objectsize
, {ResType
, Ptr
->getType()});
1317 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
1318 Value
*Min
= Builder
.getInt1((Type
& 2) != 0);
1319 // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
1320 Value
*NullIsUnknown
= Builder
.getTrue();
1321 Value
*Dynamic
= Builder
.getInt1(IsDynamic
);
1322 return Builder
.CreateCall(F
, {Ptr
, Min
, NullIsUnknown
, Dynamic
});
1326 /// A struct to generically describe a bit test intrinsic.
1328 enum ActionKind
: uint8_t { TestOnly
, Complement
, Reset
, Set
};
1329 enum InterlockingKind
: uint8_t {
1338 InterlockingKind Interlocking
;
1341 static BitTest
decodeBitTestBuiltin(unsigned BuiltinID
);
1346 BitTest
BitTest::decodeBitTestBuiltin(unsigned BuiltinID
) {
1347 switch (BuiltinID
) {
1348 // Main portable variants.
1349 case Builtin::BI_bittest
:
1350 return {TestOnly
, Unlocked
, false};
1351 case Builtin::BI_bittestandcomplement
:
1352 return {Complement
, Unlocked
, false};
1353 case Builtin::BI_bittestandreset
:
1354 return {Reset
, Unlocked
, false};
1355 case Builtin::BI_bittestandset
:
1356 return {Set
, Unlocked
, false};
1357 case Builtin::BI_interlockedbittestandreset
:
1358 return {Reset
, Sequential
, false};
1359 case Builtin::BI_interlockedbittestandset
:
1360 return {Set
, Sequential
, false};
1362 // X86-specific 64-bit variants.
1363 case Builtin::BI_bittest64
:
1364 return {TestOnly
, Unlocked
, true};
1365 case Builtin::BI_bittestandcomplement64
:
1366 return {Complement
, Unlocked
, true};
1367 case Builtin::BI_bittestandreset64
:
1368 return {Reset
, Unlocked
, true};
1369 case Builtin::BI_bittestandset64
:
1370 return {Set
, Unlocked
, true};
1371 case Builtin::BI_interlockedbittestandreset64
:
1372 return {Reset
, Sequential
, true};
1373 case Builtin::BI_interlockedbittestandset64
:
1374 return {Set
, Sequential
, true};
1376 // ARM/AArch64-specific ordering variants.
1377 case Builtin::BI_interlockedbittestandset_acq
:
1378 return {Set
, Acquire
, false};
1379 case Builtin::BI_interlockedbittestandset_rel
:
1380 return {Set
, Release
, false};
1381 case Builtin::BI_interlockedbittestandset_nf
:
1382 return {Set
, NoFence
, false};
1383 case Builtin::BI_interlockedbittestandreset_acq
:
1384 return {Reset
, Acquire
, false};
1385 case Builtin::BI_interlockedbittestandreset_rel
:
1386 return {Reset
, Release
, false};
1387 case Builtin::BI_interlockedbittestandreset_nf
:
1388 return {Reset
, NoFence
, false};
1390 llvm_unreachable("expected only bittest intrinsics");
1393 static char bitActionToX86BTCode(BitTest::ActionKind A
) {
1395 case BitTest::TestOnly
: return '\0';
1396 case BitTest::Complement
: return 'c';
1397 case BitTest::Reset
: return 'r';
1398 case BitTest::Set
: return 's';
1400 llvm_unreachable("invalid action");
1403 static llvm::Value
*EmitX86BitTestIntrinsic(CodeGenFunction
&CGF
,
1405 const CallExpr
*E
, Value
*BitBase
,
1407 char Action
= bitActionToX86BTCode(BT
.Action
);
1408 char SizeSuffix
= BT
.Is64Bit
? 'q' : 'l';
1410 // Build the assembly.
1411 SmallString
<64> Asm
;
1412 raw_svector_ostream
AsmOS(Asm
);
1413 if (BT
.Interlocking
!= BitTest::Unlocked
)
1418 AsmOS
<< SizeSuffix
<< " $2, ($1)";
1420 // Build the constraints. FIXME: We should support immediates when possible.
1421 std::string Constraints
= "={@ccc},r,r,~{cc},~{memory}";
1422 std::string_view MachineClobbers
= CGF
.getTarget().getClobbers();
1423 if (!MachineClobbers
.empty()) {
1425 Constraints
+= MachineClobbers
;
1427 llvm::IntegerType
*IntType
= llvm::IntegerType::get(
1428 CGF
.getLLVMContext(),
1429 CGF
.getContext().getTypeSize(E
->getArg(1)->getType()));
1430 llvm::FunctionType
*FTy
=
1431 llvm::FunctionType::get(CGF
.Int8Ty
, {CGF
.UnqualPtrTy
, IntType
}, false);
1433 llvm::InlineAsm
*IA
=
1434 llvm::InlineAsm::get(FTy
, Asm
, Constraints
, /*hasSideEffects=*/true);
1435 return CGF
.Builder
.CreateCall(IA
, {BitBase
, BitPos
});
1438 static llvm::AtomicOrdering
1439 getBitTestAtomicOrdering(BitTest::InterlockingKind I
) {
1441 case BitTest::Unlocked
: return llvm::AtomicOrdering::NotAtomic
;
1442 case BitTest::Sequential
: return llvm::AtomicOrdering::SequentiallyConsistent
;
1443 case BitTest::Acquire
: return llvm::AtomicOrdering::Acquire
;
1444 case BitTest::Release
: return llvm::AtomicOrdering::Release
;
1445 case BitTest::NoFence
: return llvm::AtomicOrdering::Monotonic
;
1447 llvm_unreachable("invalid interlocking");
1450 /// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
1451 /// bits and a bit position and read and optionally modify the bit at that
1452 /// position. The position index can be arbitrarily large, i.e. it can be larger
1453 /// than 31 or 63, so we need an indexed load in the general case.
1454 static llvm::Value
*EmitBitTestIntrinsic(CodeGenFunction
&CGF
,
1456 const CallExpr
*E
) {
1457 Value
*BitBase
= CGF
.EmitScalarExpr(E
->getArg(0));
1458 Value
*BitPos
= CGF
.EmitScalarExpr(E
->getArg(1));
1460 BitTest BT
= BitTest::decodeBitTestBuiltin(BuiltinID
);
1462 // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
1463 // indexing operation internally. Use them if possible.
1464 if (CGF
.getTarget().getTriple().isX86())
1465 return EmitX86BitTestIntrinsic(CGF
, BT
, E
, BitBase
, BitPos
);
1467 // Otherwise, use generic code to load one byte and test the bit. Use all but
1468 // the bottom three bits as the array index, and the bottom three bits to form
1470 // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
1471 Value
*ByteIndex
= CGF
.Builder
.CreateAShr(
1472 BitPos
, llvm::ConstantInt::get(BitPos
->getType(), 3), "bittest.byteidx");
1473 Address
ByteAddr(CGF
.Builder
.CreateInBoundsGEP(CGF
.Int8Ty
, BitBase
, ByteIndex
,
1474 "bittest.byteaddr"),
1475 CGF
.Int8Ty
, CharUnits::One());
1477 CGF
.Builder
.CreateAnd(CGF
.Builder
.CreateTrunc(BitPos
, CGF
.Int8Ty
),
1478 llvm::ConstantInt::get(CGF
.Int8Ty
, 0x7));
1480 // The updating instructions will need a mask.
1481 Value
*Mask
= nullptr;
1482 if (BT
.Action
!= BitTest::TestOnly
) {
1483 Mask
= CGF
.Builder
.CreateShl(llvm::ConstantInt::get(CGF
.Int8Ty
, 1), PosLow
,
1487 // Check the action and ordering of the interlocked intrinsics.
1488 llvm::AtomicOrdering Ordering
= getBitTestAtomicOrdering(BT
.Interlocking
);
1490 Value
*OldByte
= nullptr;
1491 if (Ordering
!= llvm::AtomicOrdering::NotAtomic
) {
1492 // Emit a combined atomicrmw load/store operation for the interlocked
1494 llvm::AtomicRMWInst::BinOp RMWOp
= llvm::AtomicRMWInst::Or
;
1495 if (BT
.Action
== BitTest::Reset
) {
1496 Mask
= CGF
.Builder
.CreateNot(Mask
);
1497 RMWOp
= llvm::AtomicRMWInst::And
;
1499 OldByte
= CGF
.Builder
.CreateAtomicRMW(RMWOp
, ByteAddr
, Mask
, Ordering
);
1501 // Emit a plain load for the non-interlocked intrinsics.
1502 OldByte
= CGF
.Builder
.CreateLoad(ByteAddr
, "bittest.byte");
1503 Value
*NewByte
= nullptr;
1504 switch (BT
.Action
) {
1505 case BitTest::TestOnly
:
1506 // Don't store anything.
1508 case BitTest::Complement
:
1509 NewByte
= CGF
.Builder
.CreateXor(OldByte
, Mask
);
1511 case BitTest::Reset
:
1512 NewByte
= CGF
.Builder
.CreateAnd(OldByte
, CGF
.Builder
.CreateNot(Mask
));
1515 NewByte
= CGF
.Builder
.CreateOr(OldByte
, Mask
);
1519 CGF
.Builder
.CreateStore(NewByte
, ByteAddr
);
1522 // However we loaded the old byte, either by plain load or atomicrmw, shift
1523 // the bit into the low position and mask it to 0 or 1.
1524 Value
*ShiftedByte
= CGF
.Builder
.CreateLShr(OldByte
, PosLow
, "bittest.shr");
1525 return CGF
.Builder
.CreateAnd(
1526 ShiftedByte
, llvm::ConstantInt::get(CGF
.Int8Ty
, 1), "bittest.res");
1529 static llvm::Value
*emitPPCLoadReserveIntrinsic(CodeGenFunction
&CGF
,
1531 const CallExpr
*E
) {
1532 Value
*Addr
= CGF
.EmitScalarExpr(E
->getArg(0));
1534 SmallString
<64> Asm
;
1535 raw_svector_ostream
AsmOS(Asm
);
1536 llvm::IntegerType
*RetType
= CGF
.Int32Ty
;
1538 switch (BuiltinID
) {
1539 case clang::PPC::BI__builtin_ppc_ldarx
:
1541 RetType
= CGF
.Int64Ty
;
1543 case clang::PPC::BI__builtin_ppc_lwarx
:
1545 RetType
= CGF
.Int32Ty
;
1547 case clang::PPC::BI__builtin_ppc_lharx
:
1549 RetType
= CGF
.Int16Ty
;
1551 case clang::PPC::BI__builtin_ppc_lbarx
:
1553 RetType
= CGF
.Int8Ty
;
1556 llvm_unreachable("Expected only PowerPC load reserve intrinsics");
1559 AsmOS
<< "$0, ${1:y}";
1561 std::string Constraints
= "=r,*Z,~{memory}";
1562 std::string_view MachineClobbers
= CGF
.getTarget().getClobbers();
1563 if (!MachineClobbers
.empty()) {
1565 Constraints
+= MachineClobbers
;
1568 llvm::Type
*PtrType
= CGF
.UnqualPtrTy
;
1569 llvm::FunctionType
*FTy
= llvm::FunctionType::get(RetType
, {PtrType
}, false);
1571 llvm::InlineAsm
*IA
=
1572 llvm::InlineAsm::get(FTy
, Asm
, Constraints
, /*hasSideEffects=*/true);
1573 llvm::CallInst
*CI
= CGF
.Builder
.CreateCall(IA
, {Addr
});
1575 0, Attribute::get(CGF
.getLLVMContext(), Attribute::ElementType
, RetType
));
1580 enum class MSVCSetJmpKind
{
1587 /// MSVC handles setjmp a bit differently on different platforms. On every
1588 /// architecture except 32-bit x86, the frame address is passed. On x86, extra
1589 /// parameters can be passed as variadic arguments, but we always pass none.
1590 static RValue
EmitMSVCRTSetJmp(CodeGenFunction
&CGF
, MSVCSetJmpKind SJKind
,
1591 const CallExpr
*E
) {
1592 llvm::Value
*Arg1
= nullptr;
1593 llvm::Type
*Arg1Ty
= nullptr;
1595 bool IsVarArg
= false;
1596 if (SJKind
== MSVCSetJmpKind::_setjmp3
) {
1598 Arg1Ty
= CGF
.Int32Ty
;
1599 Arg1
= llvm::ConstantInt::get(CGF
.IntTy
, 0);
1602 Name
= SJKind
== MSVCSetJmpKind::_setjmp
? "_setjmp" : "_setjmpex";
1603 Arg1Ty
= CGF
.Int8PtrTy
;
1604 if (CGF
.getTarget().getTriple().getArch() == llvm::Triple::aarch64
) {
1605 Arg1
= CGF
.Builder
.CreateCall(
1606 CGF
.CGM
.getIntrinsic(Intrinsic::sponentry
, CGF
.AllocaInt8PtrTy
));
1608 Arg1
= CGF
.Builder
.CreateCall(
1609 CGF
.CGM
.getIntrinsic(Intrinsic::frameaddress
, CGF
.AllocaInt8PtrTy
),
1610 llvm::ConstantInt::get(CGF
.Int32Ty
, 0));
1613 // Mark the call site and declaration with ReturnsTwice.
1614 llvm::Type
*ArgTypes
[2] = {CGF
.Int8PtrTy
, Arg1Ty
};
1615 llvm::AttributeList ReturnsTwiceAttr
= llvm::AttributeList::get(
1616 CGF
.getLLVMContext(), llvm::AttributeList::FunctionIndex
,
1617 llvm::Attribute::ReturnsTwice
);
1618 llvm::FunctionCallee SetJmpFn
= CGF
.CGM
.CreateRuntimeFunction(
1619 llvm::FunctionType::get(CGF
.IntTy
, ArgTypes
, IsVarArg
), Name
,
1620 ReturnsTwiceAttr
, /*Local=*/true);
1622 llvm::Value
*Buf
= CGF
.Builder
.CreateBitOrPointerCast(
1623 CGF
.EmitScalarExpr(E
->getArg(0)), CGF
.Int8PtrTy
);
1624 llvm::Value
*Args
[] = {Buf
, Arg1
};
1625 llvm::CallBase
*CB
= CGF
.EmitRuntimeCallOrInvoke(SetJmpFn
, Args
);
1626 CB
->setAttributes(ReturnsTwiceAttr
);
1627 return RValue::get(CB
);
1630 // Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
1631 // we handle them here.
1632 enum class CodeGenFunction::MSVCIntrin
{
1636 _InterlockedCompareExchange
,
1637 _InterlockedDecrement
,
1638 _InterlockedExchange
,
1639 _InterlockedExchangeAdd
,
1640 _InterlockedExchangeSub
,
1641 _InterlockedIncrement
,
1644 _InterlockedExchangeAdd_acq
,
1645 _InterlockedExchangeAdd_rel
,
1646 _InterlockedExchangeAdd_nf
,
1647 _InterlockedExchange_acq
,
1648 _InterlockedExchange_rel
,
1649 _InterlockedExchange_nf
,
1650 _InterlockedCompareExchange_acq
,
1651 _InterlockedCompareExchange_rel
,
1652 _InterlockedCompareExchange_nf
,
1653 _InterlockedCompareExchange128
,
1654 _InterlockedCompareExchange128_acq
,
1655 _InterlockedCompareExchange128_rel
,
1656 _InterlockedCompareExchange128_nf
,
1660 _InterlockedXor_acq
,
1661 _InterlockedXor_rel
,
1663 _InterlockedAnd_acq
,
1664 _InterlockedAnd_rel
,
1666 _InterlockedIncrement_acq
,
1667 _InterlockedIncrement_rel
,
1668 _InterlockedIncrement_nf
,
1669 _InterlockedDecrement_acq
,
1670 _InterlockedDecrement_rel
,
1671 _InterlockedDecrement_nf
,
1675 static std::optional
<CodeGenFunction::MSVCIntrin
>
1676 translateArmToMsvcIntrin(unsigned BuiltinID
) {
1677 using MSVCIntrin
= CodeGenFunction::MSVCIntrin
;
1678 switch (BuiltinID
) {
1680 return std::nullopt
;
1681 case clang::ARM::BI_BitScanForward
:
1682 case clang::ARM::BI_BitScanForward64
:
1683 return MSVCIntrin::_BitScanForward
;
1684 case clang::ARM::BI_BitScanReverse
:
1685 case clang::ARM::BI_BitScanReverse64
:
1686 return MSVCIntrin::_BitScanReverse
;
1687 case clang::ARM::BI_InterlockedAnd64
:
1688 return MSVCIntrin::_InterlockedAnd
;
1689 case clang::ARM::BI_InterlockedExchange64
:
1690 return MSVCIntrin::_InterlockedExchange
;
1691 case clang::ARM::BI_InterlockedExchangeAdd64
:
1692 return MSVCIntrin::_InterlockedExchangeAdd
;
1693 case clang::ARM::BI_InterlockedExchangeSub64
:
1694 return MSVCIntrin::_InterlockedExchangeSub
;
1695 case clang::ARM::BI_InterlockedOr64
:
1696 return MSVCIntrin::_InterlockedOr
;
1697 case clang::ARM::BI_InterlockedXor64
:
1698 return MSVCIntrin::_InterlockedXor
;
1699 case clang::ARM::BI_InterlockedDecrement64
:
1700 return MSVCIntrin::_InterlockedDecrement
;
1701 case clang::ARM::BI_InterlockedIncrement64
:
1702 return MSVCIntrin::_InterlockedIncrement
;
1703 case clang::ARM::BI_InterlockedExchangeAdd8_acq
:
1704 case clang::ARM::BI_InterlockedExchangeAdd16_acq
:
1705 case clang::ARM::BI_InterlockedExchangeAdd_acq
:
1706 case clang::ARM::BI_InterlockedExchangeAdd64_acq
:
1707 return MSVCIntrin::_InterlockedExchangeAdd_acq
;
1708 case clang::ARM::BI_InterlockedExchangeAdd8_rel
:
1709 case clang::ARM::BI_InterlockedExchangeAdd16_rel
:
1710 case clang::ARM::BI_InterlockedExchangeAdd_rel
:
1711 case clang::ARM::BI_InterlockedExchangeAdd64_rel
:
1712 return MSVCIntrin::_InterlockedExchangeAdd_rel
;
1713 case clang::ARM::BI_InterlockedExchangeAdd8_nf
:
1714 case clang::ARM::BI_InterlockedExchangeAdd16_nf
:
1715 case clang::ARM::BI_InterlockedExchangeAdd_nf
:
1716 case clang::ARM::BI_InterlockedExchangeAdd64_nf
:
1717 return MSVCIntrin::_InterlockedExchangeAdd_nf
;
1718 case clang::ARM::BI_InterlockedExchange8_acq
:
1719 case clang::ARM::BI_InterlockedExchange16_acq
:
1720 case clang::ARM::BI_InterlockedExchange_acq
:
1721 case clang::ARM::BI_InterlockedExchange64_acq
:
1722 case clang::ARM::BI_InterlockedExchangePointer_acq
:
1723 return MSVCIntrin::_InterlockedExchange_acq
;
1724 case clang::ARM::BI_InterlockedExchange8_rel
:
1725 case clang::ARM::BI_InterlockedExchange16_rel
:
1726 case clang::ARM::BI_InterlockedExchange_rel
:
1727 case clang::ARM::BI_InterlockedExchange64_rel
:
1728 case clang::ARM::BI_InterlockedExchangePointer_rel
:
1729 return MSVCIntrin::_InterlockedExchange_rel
;
1730 case clang::ARM::BI_InterlockedExchange8_nf
:
1731 case clang::ARM::BI_InterlockedExchange16_nf
:
1732 case clang::ARM::BI_InterlockedExchange_nf
:
1733 case clang::ARM::BI_InterlockedExchange64_nf
:
1734 case clang::ARM::BI_InterlockedExchangePointer_nf
:
1735 return MSVCIntrin::_InterlockedExchange_nf
;
1736 case clang::ARM::BI_InterlockedCompareExchange8_acq
:
1737 case clang::ARM::BI_InterlockedCompareExchange16_acq
:
1738 case clang::ARM::BI_InterlockedCompareExchange_acq
:
1739 case clang::ARM::BI_InterlockedCompareExchange64_acq
:
1740 case clang::ARM::BI_InterlockedCompareExchangePointer_acq
:
1741 return MSVCIntrin::_InterlockedCompareExchange_acq
;
1742 case clang::ARM::BI_InterlockedCompareExchange8_rel
:
1743 case clang::ARM::BI_InterlockedCompareExchange16_rel
:
1744 case clang::ARM::BI_InterlockedCompareExchange_rel
:
1745 case clang::ARM::BI_InterlockedCompareExchange64_rel
:
1746 case clang::ARM::BI_InterlockedCompareExchangePointer_rel
:
1747 return MSVCIntrin::_InterlockedCompareExchange_rel
;
1748 case clang::ARM::BI_InterlockedCompareExchange8_nf
:
1749 case clang::ARM::BI_InterlockedCompareExchange16_nf
:
1750 case clang::ARM::BI_InterlockedCompareExchange_nf
:
1751 case clang::ARM::BI_InterlockedCompareExchange64_nf
:
1752 return MSVCIntrin::_InterlockedCompareExchange_nf
;
1753 case clang::ARM::BI_InterlockedOr8_acq
:
1754 case clang::ARM::BI_InterlockedOr16_acq
:
1755 case clang::ARM::BI_InterlockedOr_acq
:
1756 case clang::ARM::BI_InterlockedOr64_acq
:
1757 return MSVCIntrin::_InterlockedOr_acq
;
1758 case clang::ARM::BI_InterlockedOr8_rel
:
1759 case clang::ARM::BI_InterlockedOr16_rel
:
1760 case clang::ARM::BI_InterlockedOr_rel
:
1761 case clang::ARM::BI_InterlockedOr64_rel
:
1762 return MSVCIntrin::_InterlockedOr_rel
;
1763 case clang::ARM::BI_InterlockedOr8_nf
:
1764 case clang::ARM::BI_InterlockedOr16_nf
:
1765 case clang::ARM::BI_InterlockedOr_nf
:
1766 case clang::ARM::BI_InterlockedOr64_nf
:
1767 return MSVCIntrin::_InterlockedOr_nf
;
1768 case clang::ARM::BI_InterlockedXor8_acq
:
1769 case clang::ARM::BI_InterlockedXor16_acq
:
1770 case clang::ARM::BI_InterlockedXor_acq
:
1771 case clang::ARM::BI_InterlockedXor64_acq
:
1772 return MSVCIntrin::_InterlockedXor_acq
;
1773 case clang::ARM::BI_InterlockedXor8_rel
:
1774 case clang::ARM::BI_InterlockedXor16_rel
:
1775 case clang::ARM::BI_InterlockedXor_rel
:
1776 case clang::ARM::BI_InterlockedXor64_rel
:
1777 return MSVCIntrin::_InterlockedXor_rel
;
1778 case clang::ARM::BI_InterlockedXor8_nf
:
1779 case clang::ARM::BI_InterlockedXor16_nf
:
1780 case clang::ARM::BI_InterlockedXor_nf
:
1781 case clang::ARM::BI_InterlockedXor64_nf
:
1782 return MSVCIntrin::_InterlockedXor_nf
;
1783 case clang::ARM::BI_InterlockedAnd8_acq
:
1784 case clang::ARM::BI_InterlockedAnd16_acq
:
1785 case clang::ARM::BI_InterlockedAnd_acq
:
1786 case clang::ARM::BI_InterlockedAnd64_acq
:
1787 return MSVCIntrin::_InterlockedAnd_acq
;
1788 case clang::ARM::BI_InterlockedAnd8_rel
:
1789 case clang::ARM::BI_InterlockedAnd16_rel
:
1790 case clang::ARM::BI_InterlockedAnd_rel
:
1791 case clang::ARM::BI_InterlockedAnd64_rel
:
1792 return MSVCIntrin::_InterlockedAnd_rel
;
1793 case clang::ARM::BI_InterlockedAnd8_nf
:
1794 case clang::ARM::BI_InterlockedAnd16_nf
:
1795 case clang::ARM::BI_InterlockedAnd_nf
:
1796 case clang::ARM::BI_InterlockedAnd64_nf
:
1797 return MSVCIntrin::_InterlockedAnd_nf
;
1798 case clang::ARM::BI_InterlockedIncrement16_acq
:
1799 case clang::ARM::BI_InterlockedIncrement_acq
:
1800 case clang::ARM::BI_InterlockedIncrement64_acq
:
1801 return MSVCIntrin::_InterlockedIncrement_acq
;
1802 case clang::ARM::BI_InterlockedIncrement16_rel
:
1803 case clang::ARM::BI_InterlockedIncrement_rel
:
1804 case clang::ARM::BI_InterlockedIncrement64_rel
:
1805 return MSVCIntrin::_InterlockedIncrement_rel
;
1806 case clang::ARM::BI_InterlockedIncrement16_nf
:
1807 case clang::ARM::BI_InterlockedIncrement_nf
:
1808 case clang::ARM::BI_InterlockedIncrement64_nf
:
1809 return MSVCIntrin::_InterlockedIncrement_nf
;
1810 case clang::ARM::BI_InterlockedDecrement16_acq
:
1811 case clang::ARM::BI_InterlockedDecrement_acq
:
1812 case clang::ARM::BI_InterlockedDecrement64_acq
:
1813 return MSVCIntrin::_InterlockedDecrement_acq
;
1814 case clang::ARM::BI_InterlockedDecrement16_rel
:
1815 case clang::ARM::BI_InterlockedDecrement_rel
:
1816 case clang::ARM::BI_InterlockedDecrement64_rel
:
1817 return MSVCIntrin::_InterlockedDecrement_rel
;
1818 case clang::ARM::BI_InterlockedDecrement16_nf
:
1819 case clang::ARM::BI_InterlockedDecrement_nf
:
1820 case clang::ARM::BI_InterlockedDecrement64_nf
:
1821 return MSVCIntrin::_InterlockedDecrement_nf
;
1823 llvm_unreachable("must return from switch");
1826 static std::optional
<CodeGenFunction::MSVCIntrin
>
1827 translateAarch64ToMsvcIntrin(unsigned BuiltinID
) {
1828 using MSVCIntrin
= CodeGenFunction::MSVCIntrin
;
1829 switch (BuiltinID
) {
1831 return std::nullopt
;
1832 case clang::AArch64::BI_BitScanForward
:
1833 case clang::AArch64::BI_BitScanForward64
:
1834 return MSVCIntrin::_BitScanForward
;
1835 case clang::AArch64::BI_BitScanReverse
:
1836 case clang::AArch64::BI_BitScanReverse64
:
1837 return MSVCIntrin::_BitScanReverse
;
1838 case clang::AArch64::BI_InterlockedAnd64
:
1839 return MSVCIntrin::_InterlockedAnd
;
1840 case clang::AArch64::BI_InterlockedExchange64
:
1841 return MSVCIntrin::_InterlockedExchange
;
1842 case clang::AArch64::BI_InterlockedExchangeAdd64
:
1843 return MSVCIntrin::_InterlockedExchangeAdd
;
1844 case clang::AArch64::BI_InterlockedExchangeSub64
:
1845 return MSVCIntrin::_InterlockedExchangeSub
;
1846 case clang::AArch64::BI_InterlockedOr64
:
1847 return MSVCIntrin::_InterlockedOr
;
1848 case clang::AArch64::BI_InterlockedXor64
:
1849 return MSVCIntrin::_InterlockedXor
;
1850 case clang::AArch64::BI_InterlockedDecrement64
:
1851 return MSVCIntrin::_InterlockedDecrement
;
1852 case clang::AArch64::BI_InterlockedIncrement64
:
1853 return MSVCIntrin::_InterlockedIncrement
;
1854 case clang::AArch64::BI_InterlockedExchangeAdd8_acq
:
1855 case clang::AArch64::BI_InterlockedExchangeAdd16_acq
:
1856 case clang::AArch64::BI_InterlockedExchangeAdd_acq
:
1857 case clang::AArch64::BI_InterlockedExchangeAdd64_acq
:
1858 return MSVCIntrin::_InterlockedExchangeAdd_acq
;
1859 case clang::AArch64::BI_InterlockedExchangeAdd8_rel
:
1860 case clang::AArch64::BI_InterlockedExchangeAdd16_rel
:
1861 case clang::AArch64::BI_InterlockedExchangeAdd_rel
:
1862 case clang::AArch64::BI_InterlockedExchangeAdd64_rel
:
1863 return MSVCIntrin::_InterlockedExchangeAdd_rel
;
1864 case clang::AArch64::BI_InterlockedExchangeAdd8_nf
:
1865 case clang::AArch64::BI_InterlockedExchangeAdd16_nf
:
1866 case clang::AArch64::BI_InterlockedExchangeAdd_nf
:
1867 case clang::AArch64::BI_InterlockedExchangeAdd64_nf
:
1868 return MSVCIntrin::_InterlockedExchangeAdd_nf
;
1869 case clang::AArch64::BI_InterlockedExchange8_acq
:
1870 case clang::AArch64::BI_InterlockedExchange16_acq
:
1871 case clang::AArch64::BI_InterlockedExchange_acq
:
1872 case clang::AArch64::BI_InterlockedExchange64_acq
:
1873 case clang::AArch64::BI_InterlockedExchangePointer_acq
:
1874 return MSVCIntrin::_InterlockedExchange_acq
;
1875 case clang::AArch64::BI_InterlockedExchange8_rel
:
1876 case clang::AArch64::BI_InterlockedExchange16_rel
:
1877 case clang::AArch64::BI_InterlockedExchange_rel
:
1878 case clang::AArch64::BI_InterlockedExchange64_rel
:
1879 case clang::AArch64::BI_InterlockedExchangePointer_rel
:
1880 return MSVCIntrin::_InterlockedExchange_rel
;
1881 case clang::AArch64::BI_InterlockedExchange8_nf
:
1882 case clang::AArch64::BI_InterlockedExchange16_nf
:
1883 case clang::AArch64::BI_InterlockedExchange_nf
:
1884 case clang::AArch64::BI_InterlockedExchange64_nf
:
1885 case clang::AArch64::BI_InterlockedExchangePointer_nf
:
1886 return MSVCIntrin::_InterlockedExchange_nf
;
1887 case clang::AArch64::BI_InterlockedCompareExchange8_acq
:
1888 case clang::AArch64::BI_InterlockedCompareExchange16_acq
:
1889 case clang::AArch64::BI_InterlockedCompareExchange_acq
:
1890 case clang::AArch64::BI_InterlockedCompareExchange64_acq
:
1891 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq
:
1892 return MSVCIntrin::_InterlockedCompareExchange_acq
;
1893 case clang::AArch64::BI_InterlockedCompareExchange8_rel
:
1894 case clang::AArch64::BI_InterlockedCompareExchange16_rel
:
1895 case clang::AArch64::BI_InterlockedCompareExchange_rel
:
1896 case clang::AArch64::BI_InterlockedCompareExchange64_rel
:
1897 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel
:
1898 return MSVCIntrin::_InterlockedCompareExchange_rel
;
1899 case clang::AArch64::BI_InterlockedCompareExchange8_nf
:
1900 case clang::AArch64::BI_InterlockedCompareExchange16_nf
:
1901 case clang::AArch64::BI_InterlockedCompareExchange_nf
:
1902 case clang::AArch64::BI_InterlockedCompareExchange64_nf
:
1903 return MSVCIntrin::_InterlockedCompareExchange_nf
;
1904 case clang::AArch64::BI_InterlockedCompareExchange128
:
1905 return MSVCIntrin::_InterlockedCompareExchange128
;
1906 case clang::AArch64::BI_InterlockedCompareExchange128_acq
:
1907 return MSVCIntrin::_InterlockedCompareExchange128_acq
;
1908 case clang::AArch64::BI_InterlockedCompareExchange128_nf
:
1909 return MSVCIntrin::_InterlockedCompareExchange128_nf
;
1910 case clang::AArch64::BI_InterlockedCompareExchange128_rel
:
1911 return MSVCIntrin::_InterlockedCompareExchange128_rel
;
1912 case clang::AArch64::BI_InterlockedOr8_acq
:
1913 case clang::AArch64::BI_InterlockedOr16_acq
:
1914 case clang::AArch64::BI_InterlockedOr_acq
:
1915 case clang::AArch64::BI_InterlockedOr64_acq
:
1916 return MSVCIntrin::_InterlockedOr_acq
;
1917 case clang::AArch64::BI_InterlockedOr8_rel
:
1918 case clang::AArch64::BI_InterlockedOr16_rel
:
1919 case clang::AArch64::BI_InterlockedOr_rel
:
1920 case clang::AArch64::BI_InterlockedOr64_rel
:
1921 return MSVCIntrin::_InterlockedOr_rel
;
1922 case clang::AArch64::BI_InterlockedOr8_nf
:
1923 case clang::AArch64::BI_InterlockedOr16_nf
:
1924 case clang::AArch64::BI_InterlockedOr_nf
:
1925 case clang::AArch64::BI_InterlockedOr64_nf
:
1926 return MSVCIntrin::_InterlockedOr_nf
;
1927 case clang::AArch64::BI_InterlockedXor8_acq
:
1928 case clang::AArch64::BI_InterlockedXor16_acq
:
1929 case clang::AArch64::BI_InterlockedXor_acq
:
1930 case clang::AArch64::BI_InterlockedXor64_acq
:
1931 return MSVCIntrin::_InterlockedXor_acq
;
1932 case clang::AArch64::BI_InterlockedXor8_rel
:
1933 case clang::AArch64::BI_InterlockedXor16_rel
:
1934 case clang::AArch64::BI_InterlockedXor_rel
:
1935 case clang::AArch64::BI_InterlockedXor64_rel
:
1936 return MSVCIntrin::_InterlockedXor_rel
;
1937 case clang::AArch64::BI_InterlockedXor8_nf
:
1938 case clang::AArch64::BI_InterlockedXor16_nf
:
1939 case clang::AArch64::BI_InterlockedXor_nf
:
1940 case clang::AArch64::BI_InterlockedXor64_nf
:
1941 return MSVCIntrin::_InterlockedXor_nf
;
1942 case clang::AArch64::BI_InterlockedAnd8_acq
:
1943 case clang::AArch64::BI_InterlockedAnd16_acq
:
1944 case clang::AArch64::BI_InterlockedAnd_acq
:
1945 case clang::AArch64::BI_InterlockedAnd64_acq
:
1946 return MSVCIntrin::_InterlockedAnd_acq
;
1947 case clang::AArch64::BI_InterlockedAnd8_rel
:
1948 case clang::AArch64::BI_InterlockedAnd16_rel
:
1949 case clang::AArch64::BI_InterlockedAnd_rel
:
1950 case clang::AArch64::BI_InterlockedAnd64_rel
:
1951 return MSVCIntrin::_InterlockedAnd_rel
;
1952 case clang::AArch64::BI_InterlockedAnd8_nf
:
1953 case clang::AArch64::BI_InterlockedAnd16_nf
:
1954 case clang::AArch64::BI_InterlockedAnd_nf
:
1955 case clang::AArch64::BI_InterlockedAnd64_nf
:
1956 return MSVCIntrin::_InterlockedAnd_nf
;
1957 case clang::AArch64::BI_InterlockedIncrement16_acq
:
1958 case clang::AArch64::BI_InterlockedIncrement_acq
:
1959 case clang::AArch64::BI_InterlockedIncrement64_acq
:
1960 return MSVCIntrin::_InterlockedIncrement_acq
;
1961 case clang::AArch64::BI_InterlockedIncrement16_rel
:
1962 case clang::AArch64::BI_InterlockedIncrement_rel
:
1963 case clang::AArch64::BI_InterlockedIncrement64_rel
:
1964 return MSVCIntrin::_InterlockedIncrement_rel
;
1965 case clang::AArch64::BI_InterlockedIncrement16_nf
:
1966 case clang::AArch64::BI_InterlockedIncrement_nf
:
1967 case clang::AArch64::BI_InterlockedIncrement64_nf
:
1968 return MSVCIntrin::_InterlockedIncrement_nf
;
1969 case clang::AArch64::BI_InterlockedDecrement16_acq
:
1970 case clang::AArch64::BI_InterlockedDecrement_acq
:
1971 case clang::AArch64::BI_InterlockedDecrement64_acq
:
1972 return MSVCIntrin::_InterlockedDecrement_acq
;
1973 case clang::AArch64::BI_InterlockedDecrement16_rel
:
1974 case clang::AArch64::BI_InterlockedDecrement_rel
:
1975 case clang::AArch64::BI_InterlockedDecrement64_rel
:
1976 return MSVCIntrin::_InterlockedDecrement_rel
;
1977 case clang::AArch64::BI_InterlockedDecrement16_nf
:
1978 case clang::AArch64::BI_InterlockedDecrement_nf
:
1979 case clang::AArch64::BI_InterlockedDecrement64_nf
:
1980 return MSVCIntrin::_InterlockedDecrement_nf
;
1982 llvm_unreachable("must return from switch");
1985 static std::optional
<CodeGenFunction::MSVCIntrin
>
1986 translateX86ToMsvcIntrin(unsigned BuiltinID
) {
1987 using MSVCIntrin
= CodeGenFunction::MSVCIntrin
;
1988 switch (BuiltinID
) {
1990 return std::nullopt
;
1991 case clang::X86::BI_BitScanForward
:
1992 case clang::X86::BI_BitScanForward64
:
1993 return MSVCIntrin::_BitScanForward
;
1994 case clang::X86::BI_BitScanReverse
:
1995 case clang::X86::BI_BitScanReverse64
:
1996 return MSVCIntrin::_BitScanReverse
;
1997 case clang::X86::BI_InterlockedAnd64
:
1998 return MSVCIntrin::_InterlockedAnd
;
1999 case clang::X86::BI_InterlockedCompareExchange128
:
2000 return MSVCIntrin::_InterlockedCompareExchange128
;
2001 case clang::X86::BI_InterlockedExchange64
:
2002 return MSVCIntrin::_InterlockedExchange
;
2003 case clang::X86::BI_InterlockedExchangeAdd64
:
2004 return MSVCIntrin::_InterlockedExchangeAdd
;
2005 case clang::X86::BI_InterlockedExchangeSub64
:
2006 return MSVCIntrin::_InterlockedExchangeSub
;
2007 case clang::X86::BI_InterlockedOr64
:
2008 return MSVCIntrin::_InterlockedOr
;
2009 case clang::X86::BI_InterlockedXor64
:
2010 return MSVCIntrin::_InterlockedXor
;
2011 case clang::X86::BI_InterlockedDecrement64
:
2012 return MSVCIntrin::_InterlockedDecrement
;
2013 case clang::X86::BI_InterlockedIncrement64
:
2014 return MSVCIntrin::_InterlockedIncrement
;
2016 llvm_unreachable("must return from switch");
2019 // Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.
2020 Value
*CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID
,
2021 const CallExpr
*E
) {
2022 switch (BuiltinID
) {
2023 case MSVCIntrin::_BitScanForward
:
2024 case MSVCIntrin::_BitScanReverse
: {
2025 Address
IndexAddress(EmitPointerWithAlignment(E
->getArg(0)));
2026 Value
*ArgValue
= EmitScalarExpr(E
->getArg(1));
2028 llvm::Type
*ArgType
= ArgValue
->getType();
2029 llvm::Type
*IndexType
= IndexAddress
.getElementType();
2030 llvm::Type
*ResultType
= ConvertType(E
->getType());
2032 Value
*ArgZero
= llvm::Constant::getNullValue(ArgType
);
2033 Value
*ResZero
= llvm::Constant::getNullValue(ResultType
);
2034 Value
*ResOne
= llvm::ConstantInt::get(ResultType
, 1);
2036 BasicBlock
*Begin
= Builder
.GetInsertBlock();
2037 BasicBlock
*End
= createBasicBlock("bitscan_end", this->CurFn
);
2038 Builder
.SetInsertPoint(End
);
2039 PHINode
*Result
= Builder
.CreatePHI(ResultType
, 2, "bitscan_result");
2041 Builder
.SetInsertPoint(Begin
);
2042 Value
*IsZero
= Builder
.CreateICmpEQ(ArgValue
, ArgZero
);
2043 BasicBlock
*NotZero
= createBasicBlock("bitscan_not_zero", this->CurFn
);
2044 Builder
.CreateCondBr(IsZero
, End
, NotZero
);
2045 Result
->addIncoming(ResZero
, Begin
);
2047 Builder
.SetInsertPoint(NotZero
);
2049 if (BuiltinID
== MSVCIntrin::_BitScanForward
) {
2050 Function
*F
= CGM
.getIntrinsic(Intrinsic::cttz
, ArgType
);
2051 Value
*ZeroCount
= Builder
.CreateCall(F
, {ArgValue
, Builder
.getTrue()});
2052 ZeroCount
= Builder
.CreateIntCast(ZeroCount
, IndexType
, false);
2053 Builder
.CreateStore(ZeroCount
, IndexAddress
, false);
2055 unsigned ArgWidth
= cast
<llvm::IntegerType
>(ArgType
)->getBitWidth();
2056 Value
*ArgTypeLastIndex
= llvm::ConstantInt::get(IndexType
, ArgWidth
- 1);
2058 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctlz
, ArgType
);
2059 Value
*ZeroCount
= Builder
.CreateCall(F
, {ArgValue
, Builder
.getTrue()});
2060 ZeroCount
= Builder
.CreateIntCast(ZeroCount
, IndexType
, false);
2061 Value
*Index
= Builder
.CreateNSWSub(ArgTypeLastIndex
, ZeroCount
);
2062 Builder
.CreateStore(Index
, IndexAddress
, false);
2064 Builder
.CreateBr(End
);
2065 Result
->addIncoming(ResOne
, NotZero
);
2067 Builder
.SetInsertPoint(End
);
2070 case MSVCIntrin::_InterlockedAnd
:
2071 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And
, E
);
2072 case MSVCIntrin::_InterlockedExchange
:
2073 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg
, E
);
2074 case MSVCIntrin::_InterlockedExchangeAdd
:
2075 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add
, E
);
2076 case MSVCIntrin::_InterlockedExchangeSub
:
2077 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub
, E
);
2078 case MSVCIntrin::_InterlockedOr
:
2079 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or
, E
);
2080 case MSVCIntrin::_InterlockedXor
:
2081 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor
, E
);
2082 case MSVCIntrin::_InterlockedExchangeAdd_acq
:
2083 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add
, E
,
2084 AtomicOrdering::Acquire
);
2085 case MSVCIntrin::_InterlockedExchangeAdd_rel
:
2086 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add
, E
,
2087 AtomicOrdering::Release
);
2088 case MSVCIntrin::_InterlockedExchangeAdd_nf
:
2089 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add
, E
,
2090 AtomicOrdering::Monotonic
);
2091 case MSVCIntrin::_InterlockedExchange_acq
:
2092 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg
, E
,
2093 AtomicOrdering::Acquire
);
2094 case MSVCIntrin::_InterlockedExchange_rel
:
2095 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg
, E
,
2096 AtomicOrdering::Release
);
2097 case MSVCIntrin::_InterlockedExchange_nf
:
2098 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg
, E
,
2099 AtomicOrdering::Monotonic
);
2100 case MSVCIntrin::_InterlockedCompareExchange
:
2101 return EmitAtomicCmpXchgForMSIntrin(*this, E
);
2102 case MSVCIntrin::_InterlockedCompareExchange_acq
:
2103 return EmitAtomicCmpXchgForMSIntrin(*this, E
, AtomicOrdering::Acquire
);
2104 case MSVCIntrin::_InterlockedCompareExchange_rel
:
2105 return EmitAtomicCmpXchgForMSIntrin(*this, E
, AtomicOrdering::Release
);
2106 case MSVCIntrin::_InterlockedCompareExchange_nf
:
2107 return EmitAtomicCmpXchgForMSIntrin(*this, E
, AtomicOrdering::Monotonic
);
2108 case MSVCIntrin::_InterlockedCompareExchange128
:
2109 return EmitAtomicCmpXchg128ForMSIntrin(
2110 *this, E
, AtomicOrdering::SequentiallyConsistent
);
2111 case MSVCIntrin::_InterlockedCompareExchange128_acq
:
2112 return EmitAtomicCmpXchg128ForMSIntrin(*this, E
, AtomicOrdering::Acquire
);
2113 case MSVCIntrin::_InterlockedCompareExchange128_rel
:
2114 return EmitAtomicCmpXchg128ForMSIntrin(*this, E
, AtomicOrdering::Release
);
2115 case MSVCIntrin::_InterlockedCompareExchange128_nf
:
2116 return EmitAtomicCmpXchg128ForMSIntrin(*this, E
, AtomicOrdering::Monotonic
);
2117 case MSVCIntrin::_InterlockedOr_acq
:
2118 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or
, E
,
2119 AtomicOrdering::Acquire
);
2120 case MSVCIntrin::_InterlockedOr_rel
:
2121 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or
, E
,
2122 AtomicOrdering::Release
);
2123 case MSVCIntrin::_InterlockedOr_nf
:
2124 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or
, E
,
2125 AtomicOrdering::Monotonic
);
2126 case MSVCIntrin::_InterlockedXor_acq
:
2127 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor
, E
,
2128 AtomicOrdering::Acquire
);
2129 case MSVCIntrin::_InterlockedXor_rel
:
2130 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor
, E
,
2131 AtomicOrdering::Release
);
2132 case MSVCIntrin::_InterlockedXor_nf
:
2133 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor
, E
,
2134 AtomicOrdering::Monotonic
);
2135 case MSVCIntrin::_InterlockedAnd_acq
:
2136 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And
, E
,
2137 AtomicOrdering::Acquire
);
2138 case MSVCIntrin::_InterlockedAnd_rel
:
2139 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And
, E
,
2140 AtomicOrdering::Release
);
2141 case MSVCIntrin::_InterlockedAnd_nf
:
2142 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And
, E
,
2143 AtomicOrdering::Monotonic
);
2144 case MSVCIntrin::_InterlockedIncrement_acq
:
2145 return EmitAtomicIncrementValue(*this, E
, AtomicOrdering::Acquire
);
2146 case MSVCIntrin::_InterlockedIncrement_rel
:
2147 return EmitAtomicIncrementValue(*this, E
, AtomicOrdering::Release
);
2148 case MSVCIntrin::_InterlockedIncrement_nf
:
2149 return EmitAtomicIncrementValue(*this, E
, AtomicOrdering::Monotonic
);
2150 case MSVCIntrin::_InterlockedDecrement_acq
:
2151 return EmitAtomicDecrementValue(*this, E
, AtomicOrdering::Acquire
);
2152 case MSVCIntrin::_InterlockedDecrement_rel
:
2153 return EmitAtomicDecrementValue(*this, E
, AtomicOrdering::Release
);
2154 case MSVCIntrin::_InterlockedDecrement_nf
:
2155 return EmitAtomicDecrementValue(*this, E
, AtomicOrdering::Monotonic
);
2157 case MSVCIntrin::_InterlockedDecrement
:
2158 return EmitAtomicDecrementValue(*this, E
);
2159 case MSVCIntrin::_InterlockedIncrement
:
2160 return EmitAtomicIncrementValue(*this, E
);
2162 case MSVCIntrin::__fastfail
: {
2163 // Request immediate process termination from the kernel. The instruction
2164 // sequences to do this are documented on MSDN:
2165 // https://msdn.microsoft.com/en-us/library/dn774154.aspx
2166 llvm::Triple::ArchType ISA
= getTarget().getTriple().getArch();
2167 StringRef Asm
, Constraints
;
2170 ErrorUnsupported(E
, "__fastfail call for this architecture");
2172 case llvm::Triple::x86
:
2173 case llvm::Triple::x86_64
:
2175 Constraints
= "{cx}";
2177 case llvm::Triple::thumb
:
2179 Constraints
= "{r0}";
2181 case llvm::Triple::aarch64
:
2182 Asm
= "brk #0xF003";
2183 Constraints
= "{w0}";
2185 llvm::FunctionType
*FTy
= llvm::FunctionType::get(VoidTy
, {Int32Ty
}, false);
2186 llvm::InlineAsm
*IA
=
2187 llvm::InlineAsm::get(FTy
, Asm
, Constraints
, /*hasSideEffects=*/true);
2188 llvm::AttributeList NoReturnAttr
= llvm::AttributeList::get(
2189 getLLVMContext(), llvm::AttributeList::FunctionIndex
,
2190 llvm::Attribute::NoReturn
);
2191 llvm::CallInst
*CI
= Builder
.CreateCall(IA
, EmitScalarExpr(E
->getArg(0)));
2192 CI
->setAttributes(NoReturnAttr
);
2196 llvm_unreachable("Incorrect MSVC intrinsic!");
2200 // ARC cleanup for __builtin_os_log_format
2201 struct CallObjCArcUse final
: EHScopeStack::Cleanup
{
2202 CallObjCArcUse(llvm::Value
*object
) : object(object
) {}
2203 llvm::Value
*object
;
2205 void Emit(CodeGenFunction
&CGF
, Flags flags
) override
{
2206 CGF
.EmitARCIntrinsicUse(object
);
2211 Value
*CodeGenFunction::EmitCheckedArgForBuiltin(const Expr
*E
,
2212 BuiltinCheckKind Kind
) {
2213 assert((Kind
== BCK_CLZPassedZero
|| Kind
== BCK_CTZPassedZero
) &&
2214 "Unsupported builtin check kind");
2216 Value
*ArgValue
= EmitScalarExpr(E
);
2217 if (!SanOpts
.has(SanitizerKind::Builtin
))
2220 SanitizerScope
SanScope(this);
2221 Value
*Cond
= Builder
.CreateICmpNE(
2222 ArgValue
, llvm::Constant::getNullValue(ArgValue
->getType()));
2223 EmitCheck(std::make_pair(Cond
, SanitizerKind::Builtin
),
2224 SanitizerHandler::InvalidBuiltin
,
2225 {EmitCheckSourceLocation(E
->getExprLoc()),
2226 llvm::ConstantInt::get(Builder
.getInt8Ty(), Kind
)},
2231 Value
*CodeGenFunction::EmitCheckedArgForAssume(const Expr
*E
) {
2232 Value
*ArgValue
= EvaluateExprAsBool(E
);
2233 if (!SanOpts
.has(SanitizerKind::Builtin
))
2236 SanitizerScope
SanScope(this);
2238 std::make_pair(ArgValue
, SanitizerKind::Builtin
),
2239 SanitizerHandler::InvalidBuiltin
,
2240 {EmitCheckSourceLocation(E
->getExprLoc()),
2241 llvm::ConstantInt::get(Builder
.getInt8Ty(), BCK_AssumePassedFalse
)},
2246 static Value
*EmitAbs(CodeGenFunction
&CGF
, Value
*ArgValue
, bool HasNSW
) {
2247 return CGF
.Builder
.CreateBinaryIntrinsic(
2248 Intrinsic::abs
, ArgValue
,
2249 ConstantInt::get(CGF
.Builder
.getInt1Ty(), HasNSW
));
2252 static Value
*EmitOverflowCheckedAbs(CodeGenFunction
&CGF
, const CallExpr
*E
,
2253 bool SanitizeOverflow
) {
2254 Value
*ArgValue
= CGF
.EmitScalarExpr(E
->getArg(0));
2256 // Try to eliminate overflow check.
2257 if (const auto *VCI
= dyn_cast
<llvm::ConstantInt
>(ArgValue
)) {
2258 if (!VCI
->isMinSignedValue())
2259 return EmitAbs(CGF
, ArgValue
, true);
2262 CodeGenFunction::SanitizerScope
SanScope(&CGF
);
2264 Constant
*Zero
= Constant::getNullValue(ArgValue
->getType());
2265 Value
*ResultAndOverflow
= CGF
.Builder
.CreateBinaryIntrinsic(
2266 Intrinsic::ssub_with_overflow
, Zero
, ArgValue
);
2267 Value
*Result
= CGF
.Builder
.CreateExtractValue(ResultAndOverflow
, 0);
2268 Value
*NotOverflow
= CGF
.Builder
.CreateNot(
2269 CGF
.Builder
.CreateExtractValue(ResultAndOverflow
, 1));
2271 // TODO: support -ftrapv-handler.
2272 if (SanitizeOverflow
) {
2273 CGF
.EmitCheck({{NotOverflow
, SanitizerKind::SignedIntegerOverflow
}},
2274 SanitizerHandler::NegateOverflow
,
2275 {CGF
.EmitCheckSourceLocation(E
->getArg(0)->getExprLoc()),
2276 CGF
.EmitCheckTypeDescriptor(E
->getType())},
2279 CGF
.EmitTrapCheck(NotOverflow
, SanitizerHandler::SubOverflow
);
2281 Value
*CmpResult
= CGF
.Builder
.CreateICmpSLT(ArgValue
, Zero
, "abscond");
2282 return CGF
.Builder
.CreateSelect(CmpResult
, Result
, ArgValue
, "abs");
2285 /// Get the argument type for arguments to os_log_helper.
2286 static CanQualType
getOSLogArgType(ASTContext
&C
, int Size
) {
2287 QualType UnsignedTy
= C
.getIntTypeForBitwidth(Size
* 8, /*Signed=*/false);
2288 return C
.getCanonicalType(UnsignedTy
);
2291 llvm::Function
*CodeGenFunction::generateBuiltinOSLogHelperFunction(
2292 const analyze_os_log::OSLogBufferLayout
&Layout
,
2293 CharUnits BufferAlignment
) {
2294 ASTContext
&Ctx
= getContext();
2296 llvm::SmallString
<64> Name
;
2298 raw_svector_ostream
OS(Name
);
2299 OS
<< "__os_log_helper";
2300 OS
<< "_" << BufferAlignment
.getQuantity();
2301 OS
<< "_" << int(Layout
.getSummaryByte());
2302 OS
<< "_" << int(Layout
.getNumArgsByte());
2303 for (const auto &Item
: Layout
.Items
)
2304 OS
<< "_" << int(Item
.getSizeByte()) << "_"
2305 << int(Item
.getDescriptorByte());
2308 if (llvm::Function
*F
= CGM
.getModule().getFunction(Name
))
2311 llvm::SmallVector
<QualType
, 4> ArgTys
;
2312 FunctionArgList Args
;
2313 Args
.push_back(ImplicitParamDecl::Create(
2314 Ctx
, nullptr, SourceLocation(), &Ctx
.Idents
.get("buffer"), Ctx
.VoidPtrTy
,
2315 ImplicitParamKind::Other
));
2316 ArgTys
.emplace_back(Ctx
.VoidPtrTy
);
2318 for (unsigned int I
= 0, E
= Layout
.Items
.size(); I
< E
; ++I
) {
2319 char Size
= Layout
.Items
[I
].getSizeByte();
2323 QualType ArgTy
= getOSLogArgType(Ctx
, Size
);
2324 Args
.push_back(ImplicitParamDecl::Create(
2325 Ctx
, nullptr, SourceLocation(),
2326 &Ctx
.Idents
.get(std::string("arg") + llvm::to_string(I
)), ArgTy
,
2327 ImplicitParamKind::Other
));
2328 ArgTys
.emplace_back(ArgTy
);
2331 QualType ReturnTy
= Ctx
.VoidTy
;
2333 // The helper function has linkonce_odr linkage to enable the linker to merge
2334 // identical functions. To ensure the merging always happens, 'noinline' is
2335 // attached to the function when compiling with -Oz.
2336 const CGFunctionInfo
&FI
=
2337 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy
, Args
);
2338 llvm::FunctionType
*FuncTy
= CGM
.getTypes().GetFunctionType(FI
);
2339 llvm::Function
*Fn
= llvm::Function::Create(
2340 FuncTy
, llvm::GlobalValue::LinkOnceODRLinkage
, Name
, &CGM
.getModule());
2341 Fn
->setVisibility(llvm::GlobalValue::HiddenVisibility
);
2342 CGM
.SetLLVMFunctionAttributes(GlobalDecl(), FI
, Fn
, /*IsThunk=*/false);
2343 CGM
.SetLLVMFunctionAttributesForDefinition(nullptr, Fn
);
2344 Fn
->setDoesNotThrow();
2346 // Attach 'noinline' at -Oz.
2347 if (CGM
.getCodeGenOpts().OptimizeSize
== 2)
2348 Fn
->addFnAttr(llvm::Attribute::NoInline
);
2350 auto NL
= ApplyDebugLocation::CreateEmpty(*this);
2351 StartFunction(GlobalDecl(), ReturnTy
, Fn
, FI
, Args
);
2353 // Create a scope with an artificial location for the body of this function.
2354 auto AL
= ApplyDebugLocation::CreateArtificial(*this);
2357 Address BufAddr
= makeNaturalAddressForPointer(
2358 Builder
.CreateLoad(GetAddrOfLocalVar(Args
[0]), "buf"), Ctx
.VoidTy
,
2360 Builder
.CreateStore(Builder
.getInt8(Layout
.getSummaryByte()),
2361 Builder
.CreateConstByteGEP(BufAddr
, Offset
++, "summary"));
2362 Builder
.CreateStore(Builder
.getInt8(Layout
.getNumArgsByte()),
2363 Builder
.CreateConstByteGEP(BufAddr
, Offset
++, "numArgs"));
2366 for (const auto &Item
: Layout
.Items
) {
2367 Builder
.CreateStore(
2368 Builder
.getInt8(Item
.getDescriptorByte()),
2369 Builder
.CreateConstByteGEP(BufAddr
, Offset
++, "argDescriptor"));
2370 Builder
.CreateStore(
2371 Builder
.getInt8(Item
.getSizeByte()),
2372 Builder
.CreateConstByteGEP(BufAddr
, Offset
++, "argSize"));
2374 CharUnits Size
= Item
.size();
2375 if (!Size
.getQuantity())
2378 Address Arg
= GetAddrOfLocalVar(Args
[I
]);
2379 Address Addr
= Builder
.CreateConstByteGEP(BufAddr
, Offset
, "argData");
2380 Addr
= Addr
.withElementType(Arg
.getElementType());
2381 Builder
.CreateStore(Builder
.CreateLoad(Arg
), Addr
);
2391 RValue
CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr
&E
) {
2392 assert(E
.getNumArgs() >= 2 &&
2393 "__builtin_os_log_format takes at least 2 arguments");
2394 ASTContext
&Ctx
= getContext();
2395 analyze_os_log::OSLogBufferLayout Layout
;
2396 analyze_os_log::computeOSLogBufferLayout(Ctx
, &E
, Layout
);
2397 Address BufAddr
= EmitPointerWithAlignment(E
.getArg(0));
2398 llvm::SmallVector
<llvm::Value
*, 4> RetainableOperands
;
2400 // Ignore argument 1, the format string. It is not currently used.
2402 Args
.add(RValue::get(BufAddr
.emitRawPointer(*this)), Ctx
.VoidPtrTy
);
2404 for (const auto &Item
: Layout
.Items
) {
2405 int Size
= Item
.getSizeByte();
2409 llvm::Value
*ArgVal
;
2411 if (Item
.getKind() == analyze_os_log::OSLogBufferItem::MaskKind
) {
2413 for (unsigned I
= 0, E
= Item
.getMaskType().size(); I
< E
; ++I
)
2414 Val
|= ((uint64_t)Item
.getMaskType()[I
]) << I
* 8;
2415 ArgVal
= llvm::Constant::getIntegerValue(Int64Ty
, llvm::APInt(64, Val
));
2416 } else if (const Expr
*TheExpr
= Item
.getExpr()) {
2417 ArgVal
= EmitScalarExpr(TheExpr
, /*Ignore*/ false);
2419 // If a temporary object that requires destruction after the full
2420 // expression is passed, push a lifetime-extended cleanup to extend its
2421 // lifetime to the end of the enclosing block scope.
2422 auto LifetimeExtendObject
= [&](const Expr
*E
) {
2423 E
= E
->IgnoreParenCasts();
2424 // Extend lifetimes of objects returned by function calls and message
2427 // FIXME: We should do this in other cases in which temporaries are
2428 // created including arguments of non-ARC types (e.g., C++
2430 if (isa
<CallExpr
>(E
) || isa
<ObjCMessageExpr
>(E
))
2435 if (TheExpr
->getType()->isObjCRetainableType() &&
2436 getLangOpts().ObjCAutoRefCount
&& LifetimeExtendObject(TheExpr
)) {
2437 assert(getEvaluationKind(TheExpr
->getType()) == TEK_Scalar
&&
2438 "Only scalar can be a ObjC retainable type");
2439 if (!isa
<Constant
>(ArgVal
)) {
2440 CleanupKind Cleanup
= getARCCleanupKind();
2441 QualType Ty
= TheExpr
->getType();
2442 RawAddress Alloca
= RawAddress::invalid();
2443 RawAddress Addr
= CreateMemTemp(Ty
, "os.log.arg", &Alloca
);
2444 ArgVal
= EmitARCRetain(Ty
, ArgVal
);
2445 Builder
.CreateStore(ArgVal
, Addr
);
2446 pushLifetimeExtendedDestroy(Cleanup
, Alloca
, Ty
,
2447 CodeGenFunction::destroyARCStrongPrecise
,
2448 Cleanup
& EHCleanup
);
2450 // Push a clang.arc.use call to ensure ARC optimizer knows that the
2451 // argument has to be alive.
2452 if (CGM
.getCodeGenOpts().OptimizationLevel
!= 0)
2453 pushCleanupAfterFullExpr
<CallObjCArcUse
>(Cleanup
, ArgVal
);
2457 ArgVal
= Builder
.getInt32(Item
.getConstValue().getQuantity());
2460 unsigned ArgValSize
=
2461 CGM
.getDataLayout().getTypeSizeInBits(ArgVal
->getType());
2462 llvm::IntegerType
*IntTy
= llvm::Type::getIntNTy(getLLVMContext(),
2464 ArgVal
= Builder
.CreateBitOrPointerCast(ArgVal
, IntTy
);
2465 CanQualType ArgTy
= getOSLogArgType(Ctx
, Size
);
2466 // If ArgVal has type x86_fp80, zero-extend ArgVal.
2467 ArgVal
= Builder
.CreateZExtOrBitCast(ArgVal
, ConvertType(ArgTy
));
2468 Args
.add(RValue::get(ArgVal
), ArgTy
);
2471 const CGFunctionInfo
&FI
=
2472 CGM
.getTypes().arrangeBuiltinFunctionCall(Ctx
.VoidTy
, Args
);
2473 llvm::Function
*F
= CodeGenFunction(CGM
).generateBuiltinOSLogHelperFunction(
2474 Layout
, BufAddr
.getAlignment());
2475 EmitCall(FI
, CGCallee::forDirect(F
), ReturnValueSlot(), Args
);
2476 return RValue::get(BufAddr
, *this);
2479 static bool isSpecialUnsignedMultiplySignedResult(
2480 unsigned BuiltinID
, WidthAndSignedness Op1Info
, WidthAndSignedness Op2Info
,
2481 WidthAndSignedness ResultInfo
) {
2482 return BuiltinID
== Builtin::BI__builtin_mul_overflow
&&
2483 Op1Info
.Width
== Op2Info
.Width
&& Op2Info
.Width
== ResultInfo
.Width
&&
2484 !Op1Info
.Signed
&& !Op2Info
.Signed
&& ResultInfo
.Signed
;
2487 static RValue
EmitCheckedUnsignedMultiplySignedResult(
2488 CodeGenFunction
&CGF
, const clang::Expr
*Op1
, WidthAndSignedness Op1Info
,
2489 const clang::Expr
*Op2
, WidthAndSignedness Op2Info
,
2490 const clang::Expr
*ResultArg
, QualType ResultQTy
,
2491 WidthAndSignedness ResultInfo
) {
2492 assert(isSpecialUnsignedMultiplySignedResult(
2493 Builtin::BI__builtin_mul_overflow
, Op1Info
, Op2Info
, ResultInfo
) &&
2494 "Cannot specialize this multiply");
2496 llvm::Value
*V1
= CGF
.EmitScalarExpr(Op1
);
2497 llvm::Value
*V2
= CGF
.EmitScalarExpr(Op2
);
2499 llvm::Value
*HasOverflow
;
2500 llvm::Value
*Result
= EmitOverflowIntrinsic(
2501 CGF
, llvm::Intrinsic::umul_with_overflow
, V1
, V2
, HasOverflow
);
2503 // The intrinsic call will detect overflow when the value is > UINT_MAX,
2504 // however, since the original builtin had a signed result, we need to report
2505 // an overflow when the result is greater than INT_MAX.
2506 auto IntMax
= llvm::APInt::getSignedMaxValue(ResultInfo
.Width
);
2507 llvm::Value
*IntMaxValue
= llvm::ConstantInt::get(Result
->getType(), IntMax
);
2509 llvm::Value
*IntMaxOverflow
= CGF
.Builder
.CreateICmpUGT(Result
, IntMaxValue
);
2510 HasOverflow
= CGF
.Builder
.CreateOr(HasOverflow
, IntMaxOverflow
);
2513 ResultArg
->getType()->getPointeeType().isVolatileQualified();
2514 Address ResultPtr
= CGF
.EmitPointerWithAlignment(ResultArg
);
2515 CGF
.Builder
.CreateStore(CGF
.EmitToMemory(Result
, ResultQTy
), ResultPtr
,
2517 return RValue::get(HasOverflow
);
2520 /// Determine if a binop is a checked mixed-sign multiply we can specialize.
2521 static bool isSpecialMixedSignMultiply(unsigned BuiltinID
,
2522 WidthAndSignedness Op1Info
,
2523 WidthAndSignedness Op2Info
,
2524 WidthAndSignedness ResultInfo
) {
2525 return BuiltinID
== Builtin::BI__builtin_mul_overflow
&&
2526 std::max(Op1Info
.Width
, Op2Info
.Width
) >= ResultInfo
.Width
&&
2527 Op1Info
.Signed
!= Op2Info
.Signed
;
2530 /// Emit a checked mixed-sign multiply. This is a cheaper specialization of
2531 /// the generic checked-binop irgen.
2533 EmitCheckedMixedSignMultiply(CodeGenFunction
&CGF
, const clang::Expr
*Op1
,
2534 WidthAndSignedness Op1Info
, const clang::Expr
*Op2
,
2535 WidthAndSignedness Op2Info
,
2536 const clang::Expr
*ResultArg
, QualType ResultQTy
,
2537 WidthAndSignedness ResultInfo
) {
2538 assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow
, Op1Info
,
2539 Op2Info
, ResultInfo
) &&
2540 "Not a mixed-sign multipliction we can specialize");
2542 // Emit the signed and unsigned operands.
2543 const clang::Expr
*SignedOp
= Op1Info
.Signed
? Op1
: Op2
;
2544 const clang::Expr
*UnsignedOp
= Op1Info
.Signed
? Op2
: Op1
;
2545 llvm::Value
*Signed
= CGF
.EmitScalarExpr(SignedOp
);
2546 llvm::Value
*Unsigned
= CGF
.EmitScalarExpr(UnsignedOp
);
2547 unsigned SignedOpWidth
= Op1Info
.Signed
? Op1Info
.Width
: Op2Info
.Width
;
2548 unsigned UnsignedOpWidth
= Op1Info
.Signed
? Op2Info
.Width
: Op1Info
.Width
;
2550 // One of the operands may be smaller than the other. If so, [s|z]ext it.
2551 if (SignedOpWidth
< UnsignedOpWidth
)
2552 Signed
= CGF
.Builder
.CreateSExt(Signed
, Unsigned
->getType(), "op.sext");
2553 if (UnsignedOpWidth
< SignedOpWidth
)
2554 Unsigned
= CGF
.Builder
.CreateZExt(Unsigned
, Signed
->getType(), "op.zext");
2556 llvm::Type
*OpTy
= Signed
->getType();
2557 llvm::Value
*Zero
= llvm::Constant::getNullValue(OpTy
);
2558 Address ResultPtr
= CGF
.EmitPointerWithAlignment(ResultArg
);
2559 llvm::Type
*ResTy
= ResultPtr
.getElementType();
2560 unsigned OpWidth
= std::max(Op1Info
.Width
, Op2Info
.Width
);
2562 // Take the absolute value of the signed operand.
2563 llvm::Value
*IsNegative
= CGF
.Builder
.CreateICmpSLT(Signed
, Zero
);
2564 llvm::Value
*AbsOfNegative
= CGF
.Builder
.CreateSub(Zero
, Signed
);
2565 llvm::Value
*AbsSigned
=
2566 CGF
.Builder
.CreateSelect(IsNegative
, AbsOfNegative
, Signed
);
2568 // Perform a checked unsigned multiplication.
2569 llvm::Value
*UnsignedOverflow
;
2570 llvm::Value
*UnsignedResult
=
2571 EmitOverflowIntrinsic(CGF
, llvm::Intrinsic::umul_with_overflow
, AbsSigned
,
2572 Unsigned
, UnsignedOverflow
);
2574 llvm::Value
*Overflow
, *Result
;
2575 if (ResultInfo
.Signed
) {
2576 // Signed overflow occurs if the result is greater than INT_MAX or lesser
2577 // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
2579 llvm::APInt::getSignedMaxValue(ResultInfo
.Width
).zext(OpWidth
);
2580 llvm::Value
*MaxResult
=
2581 CGF
.Builder
.CreateAdd(llvm::ConstantInt::get(OpTy
, IntMax
),
2582 CGF
.Builder
.CreateZExt(IsNegative
, OpTy
));
2583 llvm::Value
*SignedOverflow
=
2584 CGF
.Builder
.CreateICmpUGT(UnsignedResult
, MaxResult
);
2585 Overflow
= CGF
.Builder
.CreateOr(UnsignedOverflow
, SignedOverflow
);
2587 // Prepare the signed result (possibly by negating it).
2588 llvm::Value
*NegativeResult
= CGF
.Builder
.CreateNeg(UnsignedResult
);
2589 llvm::Value
*SignedResult
=
2590 CGF
.Builder
.CreateSelect(IsNegative
, NegativeResult
, UnsignedResult
);
2591 Result
= CGF
.Builder
.CreateTrunc(SignedResult
, ResTy
);
2593 // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
2594 llvm::Value
*Underflow
= CGF
.Builder
.CreateAnd(
2595 IsNegative
, CGF
.Builder
.CreateIsNotNull(UnsignedResult
));
2596 Overflow
= CGF
.Builder
.CreateOr(UnsignedOverflow
, Underflow
);
2597 if (ResultInfo
.Width
< OpWidth
) {
2599 llvm::APInt::getMaxValue(ResultInfo
.Width
).zext(OpWidth
);
2600 llvm::Value
*TruncOverflow
= CGF
.Builder
.CreateICmpUGT(
2601 UnsignedResult
, llvm::ConstantInt::get(OpTy
, IntMax
));
2602 Overflow
= CGF
.Builder
.CreateOr(Overflow
, TruncOverflow
);
2605 // Negate the product if it would be negative in infinite precision.
2606 Result
= CGF
.Builder
.CreateSelect(
2607 IsNegative
, CGF
.Builder
.CreateNeg(UnsignedResult
), UnsignedResult
);
2609 Result
= CGF
.Builder
.CreateTrunc(Result
, ResTy
);
2611 assert(Overflow
&& Result
&& "Missing overflow or result");
2614 ResultArg
->getType()->getPointeeType().isVolatileQualified();
2615 CGF
.Builder
.CreateStore(CGF
.EmitToMemory(Result
, ResultQTy
), ResultPtr
,
2617 return RValue::get(Overflow
);
2621 TypeRequiresBuiltinLaunderImp(const ASTContext
&Ctx
, QualType Ty
,
2622 llvm::SmallPtrSetImpl
<const Decl
*> &Seen
) {
2623 if (const auto *Arr
= Ctx
.getAsArrayType(Ty
))
2624 Ty
= Ctx
.getBaseElementType(Arr
);
2626 const auto *Record
= Ty
->getAsCXXRecordDecl();
2630 // We've already checked this type, or are in the process of checking it.
2631 if (!Seen
.insert(Record
).second
)
2634 assert(Record
->hasDefinition() &&
2635 "Incomplete types should already be diagnosed");
2637 if (Record
->isDynamicClass())
2640 for (FieldDecl
*F
: Record
->fields()) {
2641 if (TypeRequiresBuiltinLaunderImp(Ctx
, F
->getType(), Seen
))
2647 /// Determine if the specified type requires laundering by checking if it is a
2648 /// dynamic class type or contains a subobject which is a dynamic class type.
2649 static bool TypeRequiresBuiltinLaunder(CodeGenModule
&CGM
, QualType Ty
) {
2650 if (!CGM
.getCodeGenOpts().StrictVTablePointers
)
2652 llvm::SmallPtrSet
<const Decl
*, 16> Seen
;
2653 return TypeRequiresBuiltinLaunderImp(CGM
.getContext(), Ty
, Seen
);
2656 RValue
CodeGenFunction::emitRotate(const CallExpr
*E
, bool IsRotateRight
) {
2657 llvm::Value
*Src
= EmitScalarExpr(E
->getArg(0));
2658 llvm::Value
*ShiftAmt
= EmitScalarExpr(E
->getArg(1));
2660 // The builtin's shift arg may have a different type than the source arg and
2661 // result, but the LLVM intrinsic uses the same type for all values.
2662 llvm::Type
*Ty
= Src
->getType();
2663 ShiftAmt
= Builder
.CreateIntCast(ShiftAmt
, Ty
, false);
2665 // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
2666 unsigned IID
= IsRotateRight
? Intrinsic::fshr
: Intrinsic::fshl
;
2667 Function
*F
= CGM
.getIntrinsic(IID
, Ty
);
2668 return RValue::get(Builder
.CreateCall(F
, { Src
, Src
, ShiftAmt
}));
2671 // Map math builtins for long-double to f128 version.
2672 static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID
) {
2673 switch (BuiltinID
) {
2674 #define MUTATE_LDBL(func) \
2675 case Builtin::BI__builtin_##func##l: \
2676 return Builtin::BI__builtin_##func##f128;
2697 MUTATE_LDBL(nearbyint
)
2701 MUTATE_LDBL(llround
)
2727 MUTATE_LDBL(huge_val
)
2728 MUTATE_LDBL(copysign
)
2729 MUTATE_LDBL(nextafter
)
2730 MUTATE_LDBL(nexttoward
)
2731 MUTATE_LDBL(remainder
)
2733 MUTATE_LDBL(scalbln
)
2743 static Value
*tryUseTestFPKind(CodeGenFunction
&CGF
, unsigned BuiltinID
,
2745 if (CGF
.Builder
.getIsFPConstrained() &&
2746 CGF
.Builder
.getDefaultConstrainedExcept() != fp::ebIgnore
) {
2748 CGF
.getTargetHooks().testFPKind(V
, BuiltinID
, CGF
.Builder
, CGF
.CGM
))
2754 static RValue
EmitHipStdParUnsupportedBuiltin(CodeGenFunction
*CGF
,
2755 const FunctionDecl
*FD
) {
2756 auto Name
= FD
->getNameAsString() + "__hipstdpar_unsupported";
2757 auto FnTy
= CGF
->CGM
.getTypes().GetFunctionType(FD
);
2758 auto UBF
= CGF
->CGM
.getModule().getOrInsertFunction(Name
, FnTy
);
2760 SmallVector
<Value
*, 16> Args
;
2761 for (auto &&FormalTy
: FnTy
->params())
2762 Args
.push_back(llvm::PoisonValue::get(FormalTy
));
2764 return RValue::get(CGF
->Builder
.CreateCall(UBF
, Args
));
2767 RValue
CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD
, unsigned BuiltinID
,
2769 ReturnValueSlot ReturnValue
) {
2770 assert(!getContext().BuiltinInfo
.isImmediate(BuiltinID
) &&
2771 "Should not codegen for consteval builtins");
2773 const FunctionDecl
*FD
= GD
.getDecl()->getAsFunction();
2774 // See if we can constant fold this builtin. If so, don't emit it at all.
2775 // TODO: Extend this handling to all builtin calls that we can constant-fold.
2776 Expr::EvalResult Result
;
2777 if (E
->isPRValue() && E
->EvaluateAsRValue(Result
, CGM
.getContext()) &&
2778 !Result
.hasSideEffects()) {
2779 if (Result
.Val
.isInt())
2780 return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
2781 Result
.Val
.getInt()));
2782 if (Result
.Val
.isFloat())
2783 return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
2784 Result
.Val
.getFloat()));
2787 // If current long-double semantics is IEEE 128-bit, replace math builtins
2788 // of long-double with f128 equivalent.
2789 // TODO: This mutation should also be applied to other targets other than PPC,
2790 // after backend supports IEEE 128-bit style libcalls.
2791 if (getTarget().getTriple().isPPC64() &&
2792 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
2793 BuiltinID
= mutateLongDoubleBuiltin(BuiltinID
);
2795 // If the builtin has been declared explicitly with an assembler label,
2796 // disable the specialized emitting below. Ideally we should communicate the
2797 // rename in IR, or at least avoid generating the intrinsic calls that are
2798 // likely to get lowered to the renamed library functions.
2799 const unsigned BuiltinIDIfNoAsmLabel
=
2800 FD
->hasAttr
<AsmLabelAttr
>() ? 0 : BuiltinID
;
2802 std::optional
<bool> ErrnoOverriden
;
2803 // ErrnoOverriden is true if math-errno is overriden via the
2804 // '#pragma float_control(precise, on)'. This pragma disables fast-math,
2805 // which implies math-errno.
2806 if (E
->hasStoredFPFeatures()) {
2807 FPOptionsOverride OP
= E
->getFPFeatures();
2808 if (OP
.hasMathErrnoOverride())
2809 ErrnoOverriden
= OP
.getMathErrnoOverride();
2811 // True if 'attribute__((optnone))' is used. This attribute overrides
2812 // fast-math which implies math-errno.
2813 bool OptNone
= CurFuncDecl
&& CurFuncDecl
->hasAttr
<OptimizeNoneAttr
>();
2815 // True if we are compiling at -O2 and errno has been disabled
2816 // using the '#pragma float_control(precise, off)', and
2817 // attribute opt-none hasn't been seen.
2818 bool ErrnoOverridenToFalseWithOpt
=
2819 ErrnoOverriden
.has_value() && !ErrnoOverriden
.value() && !OptNone
&&
2820 CGM
.getCodeGenOpts().OptimizationLevel
!= 0;
2822 // There are LLVM math intrinsics/instructions corresponding to math library
2823 // functions except the LLVM op will never set errno while the math library
2824 // might. Also, math builtins have the same semantics as their math library
2825 // twins. Thus, we can transform math library and builtin calls to their
2826 // LLVM counterparts if the call is marked 'const' (known to never set errno).
2827 // In case FP exceptions are enabled, the experimental versions of the
2828 // intrinsics model those.
2830 getContext().BuiltinInfo
.isConst(BuiltinID
);
2832 // There's a special case with the fma builtins where they are always const
2833 // if the target environment is GNU or the target is OS is Windows and we're
2834 // targeting the MSVCRT.dll environment.
2835 // FIXME: This list can be become outdated. Need to find a way to get it some
2837 switch (BuiltinID
) {
2838 case Builtin::BI__builtin_fma
:
2839 case Builtin::BI__builtin_fmaf
:
2840 case Builtin::BI__builtin_fmal
:
2841 case Builtin::BI__builtin_fmaf16
:
2842 case Builtin::BIfma
:
2843 case Builtin::BIfmaf
:
2844 case Builtin::BIfmal
: {
2845 auto &Trip
= CGM
.getTriple();
2846 if (Trip
.isGNUEnvironment() || Trip
.isOSMSVCRT())
2854 bool ConstWithoutErrnoAndExceptions
=
2855 getContext().BuiltinInfo
.isConstWithoutErrnoAndExceptions(BuiltinID
);
2856 bool ConstWithoutExceptions
=
2857 getContext().BuiltinInfo
.isConstWithoutExceptions(BuiltinID
);
2859 // ConstAttr is enabled in fast-math mode. In fast-math mode, math-errno is
2861 // Math intrinsics are generated only when math-errno is disabled. Any pragmas
2862 // or attributes that affect math-errno should prevent or allow math
2863 // intrincs to be generated. Intrinsics are generated:
2864 // 1- In fast math mode, unless math-errno is overriden
2865 // via '#pragma float_control(precise, on)', or via an
2866 // 'attribute__((optnone))'.
2867 // 2- If math-errno was enabled on command line but overriden
2868 // to false via '#pragma float_control(precise, off))' and
2869 // 'attribute__((optnone))' hasn't been used.
2870 // 3- If we are compiling with optimization and errno has been disabled
2871 // via '#pragma float_control(precise, off)', and
2872 // 'attribute__((optnone))' hasn't been used.
2874 bool ConstWithoutErrnoOrExceptions
=
2875 ConstWithoutErrnoAndExceptions
|| ConstWithoutExceptions
;
2876 bool GenerateIntrinsics
=
2877 (ConstAlways
&& !OptNone
) ||
2878 (!getLangOpts().MathErrno
&&
2879 !(ErrnoOverriden
.has_value() && ErrnoOverriden
.value()) && !OptNone
);
2880 if (!GenerateIntrinsics
) {
2881 GenerateIntrinsics
=
2882 ConstWithoutErrnoOrExceptions
&& !ConstWithoutErrnoAndExceptions
;
2883 if (!GenerateIntrinsics
)
2884 GenerateIntrinsics
=
2885 ConstWithoutErrnoOrExceptions
&&
2886 (!getLangOpts().MathErrno
&&
2887 !(ErrnoOverriden
.has_value() && ErrnoOverriden
.value()) && !OptNone
);
2888 if (!GenerateIntrinsics
)
2889 GenerateIntrinsics
=
2890 ConstWithoutErrnoOrExceptions
&& ErrnoOverridenToFalseWithOpt
;
2892 if (GenerateIntrinsics
) {
2893 switch (BuiltinIDIfNoAsmLabel
) {
2894 case Builtin::BIacos
:
2895 case Builtin::BIacosf
:
2896 case Builtin::BIacosl
:
2897 case Builtin::BI__builtin_acos
:
2898 case Builtin::BI__builtin_acosf
:
2899 case Builtin::BI__builtin_acosf16
:
2900 case Builtin::BI__builtin_acosl
:
2901 case Builtin::BI__builtin_acosf128
:
2902 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
2903 *this, E
, Intrinsic::acos
, Intrinsic::experimental_constrained_acos
));
2905 case Builtin::BIasin
:
2906 case Builtin::BIasinf
:
2907 case Builtin::BIasinl
:
2908 case Builtin::BI__builtin_asin
:
2909 case Builtin::BI__builtin_asinf
:
2910 case Builtin::BI__builtin_asinf16
:
2911 case Builtin::BI__builtin_asinl
:
2912 case Builtin::BI__builtin_asinf128
:
2913 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
2914 *this, E
, Intrinsic::asin
, Intrinsic::experimental_constrained_asin
));
2916 case Builtin::BIatan
:
2917 case Builtin::BIatanf
:
2918 case Builtin::BIatanl
:
2919 case Builtin::BI__builtin_atan
:
2920 case Builtin::BI__builtin_atanf
:
2921 case Builtin::BI__builtin_atanf16
:
2922 case Builtin::BI__builtin_atanl
:
2923 case Builtin::BI__builtin_atanf128
:
2924 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
2925 *this, E
, Intrinsic::atan
, Intrinsic::experimental_constrained_atan
));
2927 case Builtin::BIatan2
:
2928 case Builtin::BIatan2f
:
2929 case Builtin::BIatan2l
:
2930 case Builtin::BI__builtin_atan2
:
2931 case Builtin::BI__builtin_atan2f
:
2932 case Builtin::BI__builtin_atan2f16
:
2933 case Builtin::BI__builtin_atan2l
:
2934 case Builtin::BI__builtin_atan2f128
:
2935 return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(
2936 *this, E
, Intrinsic::atan2
,
2937 Intrinsic::experimental_constrained_atan2
));
2939 case Builtin::BIceil
:
2940 case Builtin::BIceilf
:
2941 case Builtin::BIceill
:
2942 case Builtin::BI__builtin_ceil
:
2943 case Builtin::BI__builtin_ceilf
:
2944 case Builtin::BI__builtin_ceilf16
:
2945 case Builtin::BI__builtin_ceill
:
2946 case Builtin::BI__builtin_ceilf128
:
2947 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
2949 Intrinsic::experimental_constrained_ceil
));
2951 case Builtin::BIcopysign
:
2952 case Builtin::BIcopysignf
:
2953 case Builtin::BIcopysignl
:
2954 case Builtin::BI__builtin_copysign
:
2955 case Builtin::BI__builtin_copysignf
:
2956 case Builtin::BI__builtin_copysignf16
:
2957 case Builtin::BI__builtin_copysignl
:
2958 case Builtin::BI__builtin_copysignf128
:
2960 emitBuiltinWithOneOverloadedType
<2>(*this, E
, Intrinsic::copysign
));
2962 case Builtin::BIcos
:
2963 case Builtin::BIcosf
:
2964 case Builtin::BIcosl
:
2965 case Builtin::BI__builtin_cos
:
2966 case Builtin::BI__builtin_cosf
:
2967 case Builtin::BI__builtin_cosf16
:
2968 case Builtin::BI__builtin_cosl
:
2969 case Builtin::BI__builtin_cosf128
:
2970 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
2972 Intrinsic::experimental_constrained_cos
));
2974 case Builtin::BIcosh
:
2975 case Builtin::BIcoshf
:
2976 case Builtin::BIcoshl
:
2977 case Builtin::BI__builtin_cosh
:
2978 case Builtin::BI__builtin_coshf
:
2979 case Builtin::BI__builtin_coshf16
:
2980 case Builtin::BI__builtin_coshl
:
2981 case Builtin::BI__builtin_coshf128
:
2982 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
2983 *this, E
, Intrinsic::cosh
, Intrinsic::experimental_constrained_cosh
));
2985 case Builtin::BIexp
:
2986 case Builtin::BIexpf
:
2987 case Builtin::BIexpl
:
2988 case Builtin::BI__builtin_exp
:
2989 case Builtin::BI__builtin_expf
:
2990 case Builtin::BI__builtin_expf16
:
2991 case Builtin::BI__builtin_expl
:
2992 case Builtin::BI__builtin_expf128
:
2993 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
2995 Intrinsic::experimental_constrained_exp
));
2997 case Builtin::BIexp2
:
2998 case Builtin::BIexp2f
:
2999 case Builtin::BIexp2l
:
3000 case Builtin::BI__builtin_exp2
:
3001 case Builtin::BI__builtin_exp2f
:
3002 case Builtin::BI__builtin_exp2f16
:
3003 case Builtin::BI__builtin_exp2l
:
3004 case Builtin::BI__builtin_exp2f128
:
3005 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
3007 Intrinsic::experimental_constrained_exp2
));
3008 case Builtin::BI__builtin_exp10
:
3009 case Builtin::BI__builtin_exp10f
:
3010 case Builtin::BI__builtin_exp10f16
:
3011 case Builtin::BI__builtin_exp10l
:
3012 case Builtin::BI__builtin_exp10f128
: {
3013 // TODO: strictfp support
3014 if (Builder
.getIsFPConstrained())
3017 emitBuiltinWithOneOverloadedType
<1>(*this, E
, Intrinsic::exp10
));
3019 case Builtin::BIfabs
:
3020 case Builtin::BIfabsf
:
3021 case Builtin::BIfabsl
:
3022 case Builtin::BI__builtin_fabs
:
3023 case Builtin::BI__builtin_fabsf
:
3024 case Builtin::BI__builtin_fabsf16
:
3025 case Builtin::BI__builtin_fabsl
:
3026 case Builtin::BI__builtin_fabsf128
:
3028 emitBuiltinWithOneOverloadedType
<1>(*this, E
, Intrinsic::fabs
));
3030 case Builtin::BIfloor
:
3031 case Builtin::BIfloorf
:
3032 case Builtin::BIfloorl
:
3033 case Builtin::BI__builtin_floor
:
3034 case Builtin::BI__builtin_floorf
:
3035 case Builtin::BI__builtin_floorf16
:
3036 case Builtin::BI__builtin_floorl
:
3037 case Builtin::BI__builtin_floorf128
:
3038 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
3040 Intrinsic::experimental_constrained_floor
));
3042 case Builtin::BIfma
:
3043 case Builtin::BIfmaf
:
3044 case Builtin::BIfmal
:
3045 case Builtin::BI__builtin_fma
:
3046 case Builtin::BI__builtin_fmaf
:
3047 case Builtin::BI__builtin_fmaf16
:
3048 case Builtin::BI__builtin_fmal
:
3049 case Builtin::BI__builtin_fmaf128
:
3050 return RValue::get(emitTernaryMaybeConstrainedFPBuiltin(*this, E
,
3052 Intrinsic::experimental_constrained_fma
));
3054 case Builtin::BIfmax
:
3055 case Builtin::BIfmaxf
:
3056 case Builtin::BIfmaxl
:
3057 case Builtin::BI__builtin_fmax
:
3058 case Builtin::BI__builtin_fmaxf
:
3059 case Builtin::BI__builtin_fmaxf16
:
3060 case Builtin::BI__builtin_fmaxl
:
3061 case Builtin::BI__builtin_fmaxf128
:
3062 return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E
,
3064 Intrinsic::experimental_constrained_maxnum
));
3066 case Builtin::BIfmin
:
3067 case Builtin::BIfminf
:
3068 case Builtin::BIfminl
:
3069 case Builtin::BI__builtin_fmin
:
3070 case Builtin::BI__builtin_fminf
:
3071 case Builtin::BI__builtin_fminf16
:
3072 case Builtin::BI__builtin_fminl
:
3073 case Builtin::BI__builtin_fminf128
:
3074 return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E
,
3076 Intrinsic::experimental_constrained_minnum
));
3078 case Builtin::BIfmaximum_num
:
3079 case Builtin::BIfmaximum_numf
:
3080 case Builtin::BIfmaximum_numl
:
3081 case Builtin::BI__builtin_fmaximum_num
:
3082 case Builtin::BI__builtin_fmaximum_numf
:
3083 case Builtin::BI__builtin_fmaximum_numf16
:
3084 case Builtin::BI__builtin_fmaximum_numl
:
3085 case Builtin::BI__builtin_fmaximum_numf128
:
3087 emitBuiltinWithOneOverloadedType
<2>(*this, E
, Intrinsic::maximumnum
));
3089 case Builtin::BIfminimum_num
:
3090 case Builtin::BIfminimum_numf
:
3091 case Builtin::BIfminimum_numl
:
3092 case Builtin::BI__builtin_fminimum_num
:
3093 case Builtin::BI__builtin_fminimum_numf
:
3094 case Builtin::BI__builtin_fminimum_numf16
:
3095 case Builtin::BI__builtin_fminimum_numl
:
3096 case Builtin::BI__builtin_fminimum_numf128
:
3098 emitBuiltinWithOneOverloadedType
<2>(*this, E
, Intrinsic::minimumnum
));
3100 // fmod() is a special-case. It maps to the frem instruction rather than an
3102 case Builtin::BIfmod
:
3103 case Builtin::BIfmodf
:
3104 case Builtin::BIfmodl
:
3105 case Builtin::BI__builtin_fmod
:
3106 case Builtin::BI__builtin_fmodf
:
3107 case Builtin::BI__builtin_fmodf16
:
3108 case Builtin::BI__builtin_fmodl
:
3109 case Builtin::BI__builtin_fmodf128
:
3110 case Builtin::BI__builtin_elementwise_fmod
: {
3111 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
3112 Value
*Arg1
= EmitScalarExpr(E
->getArg(0));
3113 Value
*Arg2
= EmitScalarExpr(E
->getArg(1));
3114 return RValue::get(Builder
.CreateFRem(Arg1
, Arg2
, "fmod"));
3117 case Builtin::BIlog
:
3118 case Builtin::BIlogf
:
3119 case Builtin::BIlogl
:
3120 case Builtin::BI__builtin_log
:
3121 case Builtin::BI__builtin_logf
:
3122 case Builtin::BI__builtin_logf16
:
3123 case Builtin::BI__builtin_logl
:
3124 case Builtin::BI__builtin_logf128
:
3125 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
3127 Intrinsic::experimental_constrained_log
));
3129 case Builtin::BIlog10
:
3130 case Builtin::BIlog10f
:
3131 case Builtin::BIlog10l
:
3132 case Builtin::BI__builtin_log10
:
3133 case Builtin::BI__builtin_log10f
:
3134 case Builtin::BI__builtin_log10f16
:
3135 case Builtin::BI__builtin_log10l
:
3136 case Builtin::BI__builtin_log10f128
:
3137 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
3139 Intrinsic::experimental_constrained_log10
));
3141 case Builtin::BIlog2
:
3142 case Builtin::BIlog2f
:
3143 case Builtin::BIlog2l
:
3144 case Builtin::BI__builtin_log2
:
3145 case Builtin::BI__builtin_log2f
:
3146 case Builtin::BI__builtin_log2f16
:
3147 case Builtin::BI__builtin_log2l
:
3148 case Builtin::BI__builtin_log2f128
:
3149 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
3151 Intrinsic::experimental_constrained_log2
));
3153 case Builtin::BInearbyint
:
3154 case Builtin::BInearbyintf
:
3155 case Builtin::BInearbyintl
:
3156 case Builtin::BI__builtin_nearbyint
:
3157 case Builtin::BI__builtin_nearbyintf
:
3158 case Builtin::BI__builtin_nearbyintl
:
3159 case Builtin::BI__builtin_nearbyintf128
:
3160 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
3161 Intrinsic::nearbyint
,
3162 Intrinsic::experimental_constrained_nearbyint
));
3164 case Builtin::BIpow
:
3165 case Builtin::BIpowf
:
3166 case Builtin::BIpowl
:
3167 case Builtin::BI__builtin_pow
:
3168 case Builtin::BI__builtin_powf
:
3169 case Builtin::BI__builtin_powf16
:
3170 case Builtin::BI__builtin_powl
:
3171 case Builtin::BI__builtin_powf128
:
3172 return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E
,
3174 Intrinsic::experimental_constrained_pow
));
3176 case Builtin::BIrint
:
3177 case Builtin::BIrintf
:
3178 case Builtin::BIrintl
:
3179 case Builtin::BI__builtin_rint
:
3180 case Builtin::BI__builtin_rintf
:
3181 case Builtin::BI__builtin_rintf16
:
3182 case Builtin::BI__builtin_rintl
:
3183 case Builtin::BI__builtin_rintf128
:
3184 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
3186 Intrinsic::experimental_constrained_rint
));
3188 case Builtin::BIround
:
3189 case Builtin::BIroundf
:
3190 case Builtin::BIroundl
:
3191 case Builtin::BI__builtin_round
:
3192 case Builtin::BI__builtin_roundf
:
3193 case Builtin::BI__builtin_roundf16
:
3194 case Builtin::BI__builtin_roundl
:
3195 case Builtin::BI__builtin_roundf128
:
3196 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
3198 Intrinsic::experimental_constrained_round
));
3200 case Builtin::BIroundeven
:
3201 case Builtin::BIroundevenf
:
3202 case Builtin::BIroundevenl
:
3203 case Builtin::BI__builtin_roundeven
:
3204 case Builtin::BI__builtin_roundevenf
:
3205 case Builtin::BI__builtin_roundevenf16
:
3206 case Builtin::BI__builtin_roundevenl
:
3207 case Builtin::BI__builtin_roundevenf128
:
3208 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
3209 Intrinsic::roundeven
,
3210 Intrinsic::experimental_constrained_roundeven
));
3212 case Builtin::BIsin
:
3213 case Builtin::BIsinf
:
3214 case Builtin::BIsinl
:
3215 case Builtin::BI__builtin_sin
:
3216 case Builtin::BI__builtin_sinf
:
3217 case Builtin::BI__builtin_sinf16
:
3218 case Builtin::BI__builtin_sinl
:
3219 case Builtin::BI__builtin_sinf128
:
3220 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
3222 Intrinsic::experimental_constrained_sin
));
3224 case Builtin::BIsinh
:
3225 case Builtin::BIsinhf
:
3226 case Builtin::BIsinhl
:
3227 case Builtin::BI__builtin_sinh
:
3228 case Builtin::BI__builtin_sinhf
:
3229 case Builtin::BI__builtin_sinhf16
:
3230 case Builtin::BI__builtin_sinhl
:
3231 case Builtin::BI__builtin_sinhf128
:
3232 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
3233 *this, E
, Intrinsic::sinh
, Intrinsic::experimental_constrained_sinh
));
3235 case Builtin::BIsqrt
:
3236 case Builtin::BIsqrtf
:
3237 case Builtin::BIsqrtl
:
3238 case Builtin::BI__builtin_sqrt
:
3239 case Builtin::BI__builtin_sqrtf
:
3240 case Builtin::BI__builtin_sqrtf16
:
3241 case Builtin::BI__builtin_sqrtl
:
3242 case Builtin::BI__builtin_sqrtf128
:
3243 case Builtin::BI__builtin_elementwise_sqrt
: {
3244 llvm::Value
*Call
= emitUnaryMaybeConstrainedFPBuiltin(
3245 *this, E
, Intrinsic::sqrt
, Intrinsic::experimental_constrained_sqrt
);
3246 SetSqrtFPAccuracy(Call
);
3247 return RValue::get(Call
);
3250 case Builtin::BItan
:
3251 case Builtin::BItanf
:
3252 case Builtin::BItanl
:
3253 case Builtin::BI__builtin_tan
:
3254 case Builtin::BI__builtin_tanf
:
3255 case Builtin::BI__builtin_tanf16
:
3256 case Builtin::BI__builtin_tanl
:
3257 case Builtin::BI__builtin_tanf128
:
3258 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
3259 *this, E
, Intrinsic::tan
, Intrinsic::experimental_constrained_tan
));
3261 case Builtin::BItanh
:
3262 case Builtin::BItanhf
:
3263 case Builtin::BItanhl
:
3264 case Builtin::BI__builtin_tanh
:
3265 case Builtin::BI__builtin_tanhf
:
3266 case Builtin::BI__builtin_tanhf16
:
3267 case Builtin::BI__builtin_tanhl
:
3268 case Builtin::BI__builtin_tanhf128
:
3269 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
3270 *this, E
, Intrinsic::tanh
, Intrinsic::experimental_constrained_tanh
));
3272 case Builtin::BItrunc
:
3273 case Builtin::BItruncf
:
3274 case Builtin::BItruncl
:
3275 case Builtin::BI__builtin_trunc
:
3276 case Builtin::BI__builtin_truncf
:
3277 case Builtin::BI__builtin_truncf16
:
3278 case Builtin::BI__builtin_truncl
:
3279 case Builtin::BI__builtin_truncf128
:
3280 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
3282 Intrinsic::experimental_constrained_trunc
));
3284 case Builtin::BIlround
:
3285 case Builtin::BIlroundf
:
3286 case Builtin::BIlroundl
:
3287 case Builtin::BI__builtin_lround
:
3288 case Builtin::BI__builtin_lroundf
:
3289 case Builtin::BI__builtin_lroundl
:
3290 case Builtin::BI__builtin_lroundf128
:
3291 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
3292 *this, E
, Intrinsic::lround
,
3293 Intrinsic::experimental_constrained_lround
));
3295 case Builtin::BIllround
:
3296 case Builtin::BIllroundf
:
3297 case Builtin::BIllroundl
:
3298 case Builtin::BI__builtin_llround
:
3299 case Builtin::BI__builtin_llroundf
:
3300 case Builtin::BI__builtin_llroundl
:
3301 case Builtin::BI__builtin_llroundf128
:
3302 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
3303 *this, E
, Intrinsic::llround
,
3304 Intrinsic::experimental_constrained_llround
));
3306 case Builtin::BIlrint
:
3307 case Builtin::BIlrintf
:
3308 case Builtin::BIlrintl
:
3309 case Builtin::BI__builtin_lrint
:
3310 case Builtin::BI__builtin_lrintf
:
3311 case Builtin::BI__builtin_lrintl
:
3312 case Builtin::BI__builtin_lrintf128
:
3313 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
3314 *this, E
, Intrinsic::lrint
,
3315 Intrinsic::experimental_constrained_lrint
));
3317 case Builtin::BIllrint
:
3318 case Builtin::BIllrintf
:
3319 case Builtin::BIllrintl
:
3320 case Builtin::BI__builtin_llrint
:
3321 case Builtin::BI__builtin_llrintf
:
3322 case Builtin::BI__builtin_llrintl
:
3323 case Builtin::BI__builtin_llrintf128
:
3324 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
3325 *this, E
, Intrinsic::llrint
,
3326 Intrinsic::experimental_constrained_llrint
));
3327 case Builtin::BI__builtin_ldexp
:
3328 case Builtin::BI__builtin_ldexpf
:
3329 case Builtin::BI__builtin_ldexpl
:
3330 case Builtin::BI__builtin_ldexpf16
:
3331 case Builtin::BI__builtin_ldexpf128
: {
3332 return RValue::get(emitBinaryExpMaybeConstrainedFPBuiltin(
3333 *this, E
, Intrinsic::ldexp
,
3334 Intrinsic::experimental_constrained_ldexp
));
3341 // Check NonnullAttribute/NullabilityArg and Alignment.
3342 auto EmitArgCheck
= [&](TypeCheckKind Kind
, Address A
, const Expr
*Arg
,
3344 Value
*Val
= A
.emitRawPointer(*this);
3345 EmitNonNullArgCheck(RValue::get(Val
), Arg
->getType(), Arg
->getExprLoc(), FD
,
3348 if (SanOpts
.has(SanitizerKind::Alignment
)) {
3349 SanitizerSet SkippedChecks
;
3350 SkippedChecks
.set(SanitizerKind::All
);
3351 SkippedChecks
.clear(SanitizerKind::Alignment
);
3352 SourceLocation Loc
= Arg
->getExprLoc();
3353 // Strip an implicit cast.
3354 if (auto *CE
= dyn_cast
<ImplicitCastExpr
>(Arg
))
3355 if (CE
->getCastKind() == CK_BitCast
)
3356 Arg
= CE
->getSubExpr();
3357 EmitTypeCheck(Kind
, Loc
, Val
, Arg
->getType(), A
.getAlignment(),
3362 switch (BuiltinIDIfNoAsmLabel
) {
3364 case Builtin::BI__builtin___CFStringMakeConstantString
:
3365 case Builtin::BI__builtin___NSStringMakeConstantString
:
3366 return RValue::get(ConstantEmitter(*this).emitAbstract(E
, E
->getType()));
3367 case Builtin::BI__builtin_stdarg_start
:
3368 case Builtin::BI__builtin_va_start
:
3369 case Builtin::BI__va_start
:
3370 case Builtin::BI__builtin_va_end
:
3371 EmitVAStartEnd(BuiltinID
== Builtin::BI__va_start
3372 ? EmitScalarExpr(E
->getArg(0))
3373 : EmitVAListRef(E
->getArg(0)).emitRawPointer(*this),
3374 BuiltinID
!= Builtin::BI__builtin_va_end
);
3375 return RValue::get(nullptr);
3376 case Builtin::BI__builtin_va_copy
: {
3377 Value
*DstPtr
= EmitVAListRef(E
->getArg(0)).emitRawPointer(*this);
3378 Value
*SrcPtr
= EmitVAListRef(E
->getArg(1)).emitRawPointer(*this);
3379 Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::vacopy
, {DstPtr
->getType()}),
3381 return RValue::get(nullptr);
3383 case Builtin::BIabs
:
3384 case Builtin::BIlabs
:
3385 case Builtin::BIllabs
:
3386 case Builtin::BI__builtin_abs
:
3387 case Builtin::BI__builtin_labs
:
3388 case Builtin::BI__builtin_llabs
: {
3389 bool SanitizeOverflow
= SanOpts
.has(SanitizerKind::SignedIntegerOverflow
);
3392 switch (getLangOpts().getSignedOverflowBehavior()) {
3393 case LangOptions::SOB_Defined
:
3394 Result
= EmitAbs(*this, EmitScalarExpr(E
->getArg(0)), false);
3396 case LangOptions::SOB_Undefined
:
3397 if (!SanitizeOverflow
) {
3398 Result
= EmitAbs(*this, EmitScalarExpr(E
->getArg(0)), true);
3402 case LangOptions::SOB_Trapping
:
3403 // TODO: Somehow handle the corner case when the address of abs is taken.
3404 Result
= EmitOverflowCheckedAbs(*this, E
, SanitizeOverflow
);
3407 return RValue::get(Result
);
3409 case Builtin::BI__builtin_complex
: {
3410 Value
*Real
= EmitScalarExpr(E
->getArg(0));
3411 Value
*Imag
= EmitScalarExpr(E
->getArg(1));
3412 return RValue::getComplex({Real
, Imag
});
3414 case Builtin::BI__builtin_conj
:
3415 case Builtin::BI__builtin_conjf
:
3416 case Builtin::BI__builtin_conjl
:
3417 case Builtin::BIconj
:
3418 case Builtin::BIconjf
:
3419 case Builtin::BIconjl
: {
3420 ComplexPairTy ComplexVal
= EmitComplexExpr(E
->getArg(0));
3421 Value
*Real
= ComplexVal
.first
;
3422 Value
*Imag
= ComplexVal
.second
;
3423 Imag
= Builder
.CreateFNeg(Imag
, "neg");
3424 return RValue::getComplex(std::make_pair(Real
, Imag
));
3426 case Builtin::BI__builtin_creal
:
3427 case Builtin::BI__builtin_crealf
:
3428 case Builtin::BI__builtin_creall
:
3429 case Builtin::BIcreal
:
3430 case Builtin::BIcrealf
:
3431 case Builtin::BIcreall
: {
3432 ComplexPairTy ComplexVal
= EmitComplexExpr(E
->getArg(0));
3433 return RValue::get(ComplexVal
.first
);
3436 case Builtin::BI__builtin_preserve_access_index
: {
3437 // Only enabled preserved access index region when debuginfo
3438 // is available as debuginfo is needed to preserve user-level
3440 if (!getDebugInfo()) {
3441 CGM
.Error(E
->getExprLoc(), "using builtin_preserve_access_index() without -g");
3442 return RValue::get(EmitScalarExpr(E
->getArg(0)));
3445 // Nested builtin_preserve_access_index() not supported
3446 if (IsInPreservedAIRegion
) {
3447 CGM
.Error(E
->getExprLoc(), "nested builtin_preserve_access_index() not supported");
3448 return RValue::get(EmitScalarExpr(E
->getArg(0)));
3451 IsInPreservedAIRegion
= true;
3452 Value
*Res
= EmitScalarExpr(E
->getArg(0));
3453 IsInPreservedAIRegion
= false;
3454 return RValue::get(Res
);
3457 case Builtin::BI__builtin_cimag
:
3458 case Builtin::BI__builtin_cimagf
:
3459 case Builtin::BI__builtin_cimagl
:
3460 case Builtin::BIcimag
:
3461 case Builtin::BIcimagf
:
3462 case Builtin::BIcimagl
: {
3463 ComplexPairTy ComplexVal
= EmitComplexExpr(E
->getArg(0));
3464 return RValue::get(ComplexVal
.second
);
3467 case Builtin::BI__builtin_clrsb
:
3468 case Builtin::BI__builtin_clrsbl
:
3469 case Builtin::BI__builtin_clrsbll
: {
3470 // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
3471 Value
*ArgValue
= EmitScalarExpr(E
->getArg(0));
3473 llvm::Type
*ArgType
= ArgValue
->getType();
3474 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctlz
, ArgType
);
3476 llvm::Type
*ResultType
= ConvertType(E
->getType());
3477 Value
*Zero
= llvm::Constant::getNullValue(ArgType
);
3478 Value
*IsNeg
= Builder
.CreateICmpSLT(ArgValue
, Zero
, "isneg");
3479 Value
*Inverse
= Builder
.CreateNot(ArgValue
, "not");
3480 Value
*Tmp
= Builder
.CreateSelect(IsNeg
, Inverse
, ArgValue
);
3481 Value
*Ctlz
= Builder
.CreateCall(F
, {Tmp
, Builder
.getFalse()});
3482 Value
*Result
= Builder
.CreateSub(Ctlz
, llvm::ConstantInt::get(ArgType
, 1));
3483 Result
= Builder
.CreateIntCast(Result
, ResultType
, /*isSigned*/true,
3485 return RValue::get(Result
);
3487 case Builtin::BI__builtin_ctzs
:
3488 case Builtin::BI__builtin_ctz
:
3489 case Builtin::BI__builtin_ctzl
:
3490 case Builtin::BI__builtin_ctzll
:
3491 case Builtin::BI__builtin_ctzg
: {
3492 bool HasFallback
= BuiltinIDIfNoAsmLabel
== Builtin::BI__builtin_ctzg
&&
3493 E
->getNumArgs() > 1;
3496 HasFallback
? EmitScalarExpr(E
->getArg(0))
3497 : EmitCheckedArgForBuiltin(E
->getArg(0), BCK_CTZPassedZero
);
3499 llvm::Type
*ArgType
= ArgValue
->getType();
3500 Function
*F
= CGM
.getIntrinsic(Intrinsic::cttz
, ArgType
);
3502 llvm::Type
*ResultType
= ConvertType(E
->getType());
3504 Builder
.getInt1(HasFallback
|| getTarget().isCLZForZeroUndef());
3505 Value
*Result
= Builder
.CreateCall(F
, {ArgValue
, ZeroUndef
});
3506 if (Result
->getType() != ResultType
)
3508 Builder
.CreateIntCast(Result
, ResultType
, /*isSigned*/ false, "cast");
3510 return RValue::get(Result
);
3512 Value
*Zero
= Constant::getNullValue(ArgType
);
3513 Value
*IsZero
= Builder
.CreateICmpEQ(ArgValue
, Zero
, "iszero");
3514 Value
*FallbackValue
= EmitScalarExpr(E
->getArg(1));
3515 Value
*ResultOrFallback
=
3516 Builder
.CreateSelect(IsZero
, FallbackValue
, Result
, "ctzg");
3517 return RValue::get(ResultOrFallback
);
3519 case Builtin::BI__builtin_clzs
:
3520 case Builtin::BI__builtin_clz
:
3521 case Builtin::BI__builtin_clzl
:
3522 case Builtin::BI__builtin_clzll
:
3523 case Builtin::BI__builtin_clzg
: {
3524 bool HasFallback
= BuiltinIDIfNoAsmLabel
== Builtin::BI__builtin_clzg
&&
3525 E
->getNumArgs() > 1;
3528 HasFallback
? EmitScalarExpr(E
->getArg(0))
3529 : EmitCheckedArgForBuiltin(E
->getArg(0), BCK_CLZPassedZero
);
3531 llvm::Type
*ArgType
= ArgValue
->getType();
3532 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctlz
, ArgType
);
3534 llvm::Type
*ResultType
= ConvertType(E
->getType());
3536 Builder
.getInt1(HasFallback
|| getTarget().isCLZForZeroUndef());
3537 Value
*Result
= Builder
.CreateCall(F
, {ArgValue
, ZeroUndef
});
3538 if (Result
->getType() != ResultType
)
3540 Builder
.CreateIntCast(Result
, ResultType
, /*isSigned*/ false, "cast");
3542 return RValue::get(Result
);
3544 Value
*Zero
= Constant::getNullValue(ArgType
);
3545 Value
*IsZero
= Builder
.CreateICmpEQ(ArgValue
, Zero
, "iszero");
3546 Value
*FallbackValue
= EmitScalarExpr(E
->getArg(1));
3547 Value
*ResultOrFallback
=
3548 Builder
.CreateSelect(IsZero
, FallbackValue
, Result
, "clzg");
3549 return RValue::get(ResultOrFallback
);
3551 case Builtin::BI__builtin_ffs
:
3552 case Builtin::BI__builtin_ffsl
:
3553 case Builtin::BI__builtin_ffsll
: {
3554 // ffs(x) -> x ? cttz(x) + 1 : 0
3555 Value
*ArgValue
= EmitScalarExpr(E
->getArg(0));
3557 llvm::Type
*ArgType
= ArgValue
->getType();
3558 Function
*F
= CGM
.getIntrinsic(Intrinsic::cttz
, ArgType
);
3560 llvm::Type
*ResultType
= ConvertType(E
->getType());
3562 Builder
.CreateAdd(Builder
.CreateCall(F
, {ArgValue
, Builder
.getTrue()}),
3563 llvm::ConstantInt::get(ArgType
, 1));
3564 Value
*Zero
= llvm::Constant::getNullValue(ArgType
);
3565 Value
*IsZero
= Builder
.CreateICmpEQ(ArgValue
, Zero
, "iszero");
3566 Value
*Result
= Builder
.CreateSelect(IsZero
, Zero
, Tmp
, "ffs");
3567 if (Result
->getType() != ResultType
)
3568 Result
= Builder
.CreateIntCast(Result
, ResultType
, /*isSigned*/true,
3570 return RValue::get(Result
);
3572 case Builtin::BI__builtin_parity
:
3573 case Builtin::BI__builtin_parityl
:
3574 case Builtin::BI__builtin_parityll
: {
3575 // parity(x) -> ctpop(x) & 1
3576 Value
*ArgValue
= EmitScalarExpr(E
->getArg(0));
3578 llvm::Type
*ArgType
= ArgValue
->getType();
3579 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctpop
, ArgType
);
3581 llvm::Type
*ResultType
= ConvertType(E
->getType());
3582 Value
*Tmp
= Builder
.CreateCall(F
, ArgValue
);
3583 Value
*Result
= Builder
.CreateAnd(Tmp
, llvm::ConstantInt::get(ArgType
, 1));
3584 if (Result
->getType() != ResultType
)
3585 Result
= Builder
.CreateIntCast(Result
, ResultType
, /*isSigned*/true,
3587 return RValue::get(Result
);
3589 case Builtin::BI__lzcnt16
:
3590 case Builtin::BI__lzcnt
:
3591 case Builtin::BI__lzcnt64
: {
3592 Value
*ArgValue
= EmitScalarExpr(E
->getArg(0));
3594 llvm::Type
*ArgType
= ArgValue
->getType();
3595 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctlz
, ArgType
);
3597 llvm::Type
*ResultType
= ConvertType(E
->getType());
3598 Value
*Result
= Builder
.CreateCall(F
, {ArgValue
, Builder
.getFalse()});
3599 if (Result
->getType() != ResultType
)
3600 Result
= Builder
.CreateIntCast(Result
, ResultType
, /*isSigned*/true,
3602 return RValue::get(Result
);
3604 case Builtin::BI__popcnt16
:
3605 case Builtin::BI__popcnt
:
3606 case Builtin::BI__popcnt64
:
3607 case Builtin::BI__builtin_popcount
:
3608 case Builtin::BI__builtin_popcountl
:
3609 case Builtin::BI__builtin_popcountll
:
3610 case Builtin::BI__builtin_popcountg
: {
3611 Value
*ArgValue
= EmitScalarExpr(E
->getArg(0));
3613 llvm::Type
*ArgType
= ArgValue
->getType();
3614 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctpop
, ArgType
);
3616 llvm::Type
*ResultType
= ConvertType(E
->getType());
3617 Value
*Result
= Builder
.CreateCall(F
, ArgValue
);
3618 if (Result
->getType() != ResultType
)
3620 Builder
.CreateIntCast(Result
, ResultType
, /*isSigned*/ false, "cast");
3621 return RValue::get(Result
);
3623 case Builtin::BI__builtin_unpredictable
: {
3624 // Always return the argument of __builtin_unpredictable. LLVM does not
3625 // handle this builtin. Metadata for this builtin should be added directly
3626 // to instructions such as branches or switches that use it.
3627 return RValue::get(EmitScalarExpr(E
->getArg(0)));
3629 case Builtin::BI__builtin_expect
: {
3630 Value
*ArgValue
= EmitScalarExpr(E
->getArg(0));
3631 llvm::Type
*ArgType
= ArgValue
->getType();
3633 Value
*ExpectedValue
= EmitScalarExpr(E
->getArg(1));
3634 // Don't generate llvm.expect on -O0 as the backend won't use it for
3636 // Note, we still IRGen ExpectedValue because it could have side-effects.
3637 if (CGM
.getCodeGenOpts().OptimizationLevel
== 0)
3638 return RValue::get(ArgValue
);
3640 Function
*FnExpect
= CGM
.getIntrinsic(Intrinsic::expect
, ArgType
);
3642 Builder
.CreateCall(FnExpect
, {ArgValue
, ExpectedValue
}, "expval");
3643 return RValue::get(Result
);
3645 case Builtin::BI__builtin_expect_with_probability
: {
3646 Value
*ArgValue
= EmitScalarExpr(E
->getArg(0));
3647 llvm::Type
*ArgType
= ArgValue
->getType();
3649 Value
*ExpectedValue
= EmitScalarExpr(E
->getArg(1));
3650 llvm::APFloat
Probability(0.0);
3651 const Expr
*ProbArg
= E
->getArg(2);
3652 bool EvalSucceed
= ProbArg
->EvaluateAsFloat(Probability
, CGM
.getContext());
3653 assert(EvalSucceed
&& "probability should be able to evaluate as float");
3655 bool LoseInfo
= false;
3656 Probability
.convert(llvm::APFloat::IEEEdouble(),
3657 llvm::RoundingMode::Dynamic
, &LoseInfo
);
3658 llvm::Type
*Ty
= ConvertType(ProbArg
->getType());
3659 Constant
*Confidence
= ConstantFP::get(Ty
, Probability
);
3660 // Don't generate llvm.expect.with.probability on -O0 as the backend
3661 // won't use it for anything.
3662 // Note, we still IRGen ExpectedValue because it could have side-effects.
3663 if (CGM
.getCodeGenOpts().OptimizationLevel
== 0)
3664 return RValue::get(ArgValue
);
3666 Function
*FnExpect
=
3667 CGM
.getIntrinsic(Intrinsic::expect_with_probability
, ArgType
);
3668 Value
*Result
= Builder
.CreateCall(
3669 FnExpect
, {ArgValue
, ExpectedValue
, Confidence
}, "expval");
3670 return RValue::get(Result
);
3672 case Builtin::BI__builtin_assume_aligned
: {
3673 const Expr
*Ptr
= E
->getArg(0);
3674 Value
*PtrValue
= EmitScalarExpr(Ptr
);
3675 Value
*OffsetValue
=
3676 (E
->getNumArgs() > 2) ? EmitScalarExpr(E
->getArg(2)) : nullptr;
3678 Value
*AlignmentValue
= EmitScalarExpr(E
->getArg(1));
3679 ConstantInt
*AlignmentCI
= cast
<ConstantInt
>(AlignmentValue
);
3680 if (AlignmentCI
->getValue().ugt(llvm::Value::MaximumAlignment
))
3681 AlignmentCI
= ConstantInt::get(AlignmentCI
->getIntegerType(),
3682 llvm::Value::MaximumAlignment
);
3684 emitAlignmentAssumption(PtrValue
, Ptr
,
3685 /*The expr loc is sufficient.*/ SourceLocation(),
3686 AlignmentCI
, OffsetValue
);
3687 return RValue::get(PtrValue
);
3689 case Builtin::BI__assume
:
3690 case Builtin::BI__builtin_assume
: {
3691 if (E
->getArg(0)->HasSideEffects(getContext()))
3692 return RValue::get(nullptr);
3694 Value
*ArgValue
= EmitCheckedArgForAssume(E
->getArg(0));
3695 Function
*FnAssume
= CGM
.getIntrinsic(Intrinsic::assume
);
3696 Builder
.CreateCall(FnAssume
, ArgValue
);
3697 return RValue::get(nullptr);
3699 case Builtin::BI__builtin_assume_separate_storage
: {
3700 const Expr
*Arg0
= E
->getArg(0);
3701 const Expr
*Arg1
= E
->getArg(1);
3703 Value
*Value0
= EmitScalarExpr(Arg0
);
3704 Value
*Value1
= EmitScalarExpr(Arg1
);
3706 Value
*Values
[] = {Value0
, Value1
};
3707 OperandBundleDefT
<Value
*> OBD("separate_storage", Values
);
3708 Builder
.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD
});
3709 return RValue::get(nullptr);
3711 case Builtin::BI__builtin_allow_runtime_check
: {
3713 cast
<StringLiteral
>(E
->getArg(0)->IgnoreParenCasts())->getString();
3714 LLVMContext
&Ctx
= CGM
.getLLVMContext();
3715 llvm::Value
*Allow
= Builder
.CreateCall(
3716 CGM
.getIntrinsic(llvm::Intrinsic::allow_runtime_check
),
3717 llvm::MetadataAsValue::get(Ctx
, llvm::MDString::get(Ctx
, Kind
)));
3718 return RValue::get(Allow
);
3720 case Builtin::BI__arithmetic_fence
: {
3721 // Create the builtin call if FastMath is selected, and the target
3722 // supports the builtin, otherwise just return the argument.
3723 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
3724 llvm::FastMathFlags FMF
= Builder
.getFastMathFlags();
3725 bool isArithmeticFenceEnabled
=
3726 FMF
.allowReassoc() &&
3727 getContext().getTargetInfo().checkArithmeticFenceSupported();
3728 QualType ArgType
= E
->getArg(0)->getType();
3729 if (ArgType
->isComplexType()) {
3730 if (isArithmeticFenceEnabled
) {
3731 QualType ElementType
= ArgType
->castAs
<ComplexType
>()->getElementType();
3732 ComplexPairTy ComplexVal
= EmitComplexExpr(E
->getArg(0));
3733 Value
*Real
= Builder
.CreateArithmeticFence(ComplexVal
.first
,
3734 ConvertType(ElementType
));
3735 Value
*Imag
= Builder
.CreateArithmeticFence(ComplexVal
.second
,
3736 ConvertType(ElementType
));
3737 return RValue::getComplex(std::make_pair(Real
, Imag
));
3739 ComplexPairTy ComplexVal
= EmitComplexExpr(E
->getArg(0));
3740 Value
*Real
= ComplexVal
.first
;
3741 Value
*Imag
= ComplexVal
.second
;
3742 return RValue::getComplex(std::make_pair(Real
, Imag
));
3744 Value
*ArgValue
= EmitScalarExpr(E
->getArg(0));
3745 if (isArithmeticFenceEnabled
)
3747 Builder
.CreateArithmeticFence(ArgValue
, ConvertType(ArgType
)));
3748 return RValue::get(ArgValue
);
3750 case Builtin::BI__builtin_bswap16
:
3751 case Builtin::BI__builtin_bswap32
:
3752 case Builtin::BI__builtin_bswap64
:
3753 case Builtin::BI_byteswap_ushort
:
3754 case Builtin::BI_byteswap_ulong
:
3755 case Builtin::BI_byteswap_uint64
: {
3757 emitBuiltinWithOneOverloadedType
<1>(*this, E
, Intrinsic::bswap
));
3759 case Builtin::BI__builtin_bitreverse8
:
3760 case Builtin::BI__builtin_bitreverse16
:
3761 case Builtin::BI__builtin_bitreverse32
:
3762 case Builtin::BI__builtin_bitreverse64
: {
3764 emitBuiltinWithOneOverloadedType
<1>(*this, E
, Intrinsic::bitreverse
));
3766 case Builtin::BI__builtin_rotateleft8
:
3767 case Builtin::BI__builtin_rotateleft16
:
3768 case Builtin::BI__builtin_rotateleft32
:
3769 case Builtin::BI__builtin_rotateleft64
:
3770 case Builtin::BI_rotl8
: // Microsoft variants of rotate left
3771 case Builtin::BI_rotl16
:
3772 case Builtin::BI_rotl
:
3773 case Builtin::BI_lrotl
:
3774 case Builtin::BI_rotl64
:
3775 return emitRotate(E
, false);
3777 case Builtin::BI__builtin_rotateright8
:
3778 case Builtin::BI__builtin_rotateright16
:
3779 case Builtin::BI__builtin_rotateright32
:
3780 case Builtin::BI__builtin_rotateright64
:
3781 case Builtin::BI_rotr8
: // Microsoft variants of rotate right
3782 case Builtin::BI_rotr16
:
3783 case Builtin::BI_rotr
:
3784 case Builtin::BI_lrotr
:
3785 case Builtin::BI_rotr64
:
3786 return emitRotate(E
, true);
3788 case Builtin::BI__builtin_constant_p
: {
3789 llvm::Type
*ResultType
= ConvertType(E
->getType());
3791 const Expr
*Arg
= E
->getArg(0);
3792 QualType ArgType
= Arg
->getType();
3793 // FIXME: The allowance for Obj-C pointers and block pointers is historical
3794 // and likely a mistake.
3795 if (!ArgType
->isIntegralOrEnumerationType() && !ArgType
->isFloatingType() &&
3796 !ArgType
->isObjCObjectPointerType() && !ArgType
->isBlockPointerType())
3797 // Per the GCC documentation, only numeric constants are recognized after
3799 return RValue::get(ConstantInt::get(ResultType
, 0));
3801 if (Arg
->HasSideEffects(getContext()))
3802 // The argument is unevaluated, so be conservative if it might have
3804 return RValue::get(ConstantInt::get(ResultType
, 0));
3806 Value
*ArgValue
= EmitScalarExpr(Arg
);
3807 if (ArgType
->isObjCObjectPointerType()) {
3808 // Convert Objective-C objects to id because we cannot distinguish between
3809 // LLVM types for Obj-C classes as they are opaque.
3810 ArgType
= CGM
.getContext().getObjCIdType();
3811 ArgValue
= Builder
.CreateBitCast(ArgValue
, ConvertType(ArgType
));
3814 CGM
.getIntrinsic(Intrinsic::is_constant
, ConvertType(ArgType
));
3815 Value
*Result
= Builder
.CreateCall(F
, ArgValue
);
3816 if (Result
->getType() != ResultType
)
3817 Result
= Builder
.CreateIntCast(Result
, ResultType
, /*isSigned*/false);
3818 return RValue::get(Result
);
3820 case Builtin::BI__builtin_dynamic_object_size
:
3821 case Builtin::BI__builtin_object_size
: {
3823 E
->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
3824 auto *ResType
= cast
<llvm::IntegerType
>(ConvertType(E
->getType()));
3826 // We pass this builtin onto the optimizer so that it can figure out the
3827 // object size in more complex cases.
3828 bool IsDynamic
= BuiltinID
== Builtin::BI__builtin_dynamic_object_size
;
3829 return RValue::get(emitBuiltinObjectSize(E
->getArg(0), Type
, ResType
,
3830 /*EmittedE=*/nullptr, IsDynamic
));
3832 case Builtin::BI__builtin_counted_by_ref
: {
3833 // Default to returning '(void *) 0'.
3834 llvm::Value
*Result
= llvm::ConstantPointerNull::get(
3835 llvm::PointerType::getUnqual(getLLVMContext()));
3837 const Expr
*Arg
= E
->getArg(0)->IgnoreParenImpCasts();
3839 if (auto *UO
= dyn_cast
<UnaryOperator
>(Arg
);
3840 UO
&& UO
->getOpcode() == UO_AddrOf
) {
3841 Arg
= UO
->getSubExpr()->IgnoreParenImpCasts();
3843 if (auto *ASE
= dyn_cast
<ArraySubscriptExpr
>(Arg
))
3844 Arg
= ASE
->getBase()->IgnoreParenImpCasts();
3847 if (const MemberExpr
*ME
= dyn_cast_if_present
<MemberExpr
>(Arg
)) {
3849 ME
->getMemberDecl()->getType()->getAs
<CountAttributedType
>();
3850 CATy
&& CATy
->getKind() == CountAttributedType::CountedBy
) {
3851 const auto *FAMDecl
= cast
<FieldDecl
>(ME
->getMemberDecl());
3852 if (const FieldDecl
*CountFD
= FAMDecl
->findCountedByField())
3853 Result
= GetCountedByFieldExprGEP(Arg
, FAMDecl
, CountFD
);
3855 llvm::report_fatal_error("Cannot find the counted_by 'count' field");
3859 return RValue::get(Result
);
3861 case Builtin::BI__builtin_prefetch
: {
3862 Value
*Locality
, *RW
, *Address
= EmitScalarExpr(E
->getArg(0));
3863 // FIXME: Technically these constants should of type 'int', yes?
3864 RW
= (E
->getNumArgs() > 1) ? EmitScalarExpr(E
->getArg(1)) :
3865 llvm::ConstantInt::get(Int32Ty
, 0);
3866 Locality
= (E
->getNumArgs() > 2) ? EmitScalarExpr(E
->getArg(2)) :
3867 llvm::ConstantInt::get(Int32Ty
, 3);
3868 Value
*Data
= llvm::ConstantInt::get(Int32Ty
, 1);
3869 Function
*F
= CGM
.getIntrinsic(Intrinsic::prefetch
, Address
->getType());
3870 Builder
.CreateCall(F
, {Address
, RW
, Locality
, Data
});
3871 return RValue::get(nullptr);
3873 case Builtin::BI__builtin_readcyclecounter
: {
3874 Function
*F
= CGM
.getIntrinsic(Intrinsic::readcyclecounter
);
3875 return RValue::get(Builder
.CreateCall(F
));
3877 case Builtin::BI__builtin_readsteadycounter
: {
3878 Function
*F
= CGM
.getIntrinsic(Intrinsic::readsteadycounter
);
3879 return RValue::get(Builder
.CreateCall(F
));
3881 case Builtin::BI__builtin___clear_cache
: {
3882 Value
*Begin
= EmitScalarExpr(E
->getArg(0));
3883 Value
*End
= EmitScalarExpr(E
->getArg(1));
3884 Function
*F
= CGM
.getIntrinsic(Intrinsic::clear_cache
);
3885 return RValue::get(Builder
.CreateCall(F
, {Begin
, End
}));
3887 case Builtin::BI__builtin_trap
:
3888 EmitTrapCall(Intrinsic::trap
);
3889 return RValue::get(nullptr);
3890 case Builtin::BI__builtin_verbose_trap
: {
3891 llvm::DILocation
*TrapLocation
= Builder
.getCurrentDebugLocation();
3892 if (getDebugInfo()) {
3893 TrapLocation
= getDebugInfo()->CreateTrapFailureMessageFor(
3894 TrapLocation
, *E
->getArg(0)->tryEvaluateString(getContext()),
3895 *E
->getArg(1)->tryEvaluateString(getContext()));
3897 ApplyDebugLocation
ApplyTrapDI(*this, TrapLocation
);
3898 // Currently no attempt is made to prevent traps from being merged.
3899 EmitTrapCall(Intrinsic::trap
);
3900 return RValue::get(nullptr);
3902 case Builtin::BI__debugbreak
:
3903 EmitTrapCall(Intrinsic::debugtrap
);
3904 return RValue::get(nullptr);
3905 case Builtin::BI__builtin_unreachable
: {
3906 EmitUnreachable(E
->getExprLoc());
3908 // We do need to preserve an insertion point.
3909 EmitBlock(createBasicBlock("unreachable.cont"));
3911 return RValue::get(nullptr);
3914 case Builtin::BI__builtin_powi
:
3915 case Builtin::BI__builtin_powif
:
3916 case Builtin::BI__builtin_powil
: {
3917 llvm::Value
*Src0
= EmitScalarExpr(E
->getArg(0));
3918 llvm::Value
*Src1
= EmitScalarExpr(E
->getArg(1));
3920 if (Builder
.getIsFPConstrained()) {
3921 // FIXME: llvm.powi has 2 mangling types,
3922 // llvm.experimental.constrained.powi has one.
3923 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
3924 Function
*F
= CGM
.getIntrinsic(Intrinsic::experimental_constrained_powi
,
3926 return RValue::get(Builder
.CreateConstrainedFPCall(F
, { Src0
, Src1
}));
3929 Function
*F
= CGM
.getIntrinsic(Intrinsic::powi
,
3930 { Src0
->getType(), Src1
->getType() });
3931 return RValue::get(Builder
.CreateCall(F
, { Src0
, Src1
}));
3933 case Builtin::BI__builtin_frexpl
: {
3934 // Linux PPC will not be adding additional PPCDoubleDouble support.
3935 // WIP to switch default to IEEE long double. Will emit libcall for
3936 // frexpl instead of legalizing this type in the BE.
3937 if (&getTarget().getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble())
3941 case Builtin::BI__builtin_frexp
:
3942 case Builtin::BI__builtin_frexpf
:
3943 case Builtin::BI__builtin_frexpf128
:
3944 case Builtin::BI__builtin_frexpf16
:
3945 return RValue::get(emitFrexpBuiltin(*this, E
, Intrinsic::frexp
));
3946 case Builtin::BI__builtin_isgreater
:
3947 case Builtin::BI__builtin_isgreaterequal
:
3948 case Builtin::BI__builtin_isless
:
3949 case Builtin::BI__builtin_islessequal
:
3950 case Builtin::BI__builtin_islessgreater
:
3951 case Builtin::BI__builtin_isunordered
: {
3952 // Ordered comparisons: we know the arguments to these are matching scalar
3953 // floating point values.
3954 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
3955 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
3956 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
3958 switch (BuiltinID
) {
3959 default: llvm_unreachable("Unknown ordered comparison");
3960 case Builtin::BI__builtin_isgreater
:
3961 LHS
= Builder
.CreateFCmpOGT(LHS
, RHS
, "cmp");
3963 case Builtin::BI__builtin_isgreaterequal
:
3964 LHS
= Builder
.CreateFCmpOGE(LHS
, RHS
, "cmp");
3966 case Builtin::BI__builtin_isless
:
3967 LHS
= Builder
.CreateFCmpOLT(LHS
, RHS
, "cmp");
3969 case Builtin::BI__builtin_islessequal
:
3970 LHS
= Builder
.CreateFCmpOLE(LHS
, RHS
, "cmp");
3972 case Builtin::BI__builtin_islessgreater
:
3973 LHS
= Builder
.CreateFCmpONE(LHS
, RHS
, "cmp");
3975 case Builtin::BI__builtin_isunordered
:
3976 LHS
= Builder
.CreateFCmpUNO(LHS
, RHS
, "cmp");
3979 // ZExt bool to int type.
3980 return RValue::get(Builder
.CreateZExt(LHS
, ConvertType(E
->getType())));
3983 case Builtin::BI__builtin_isnan
: {
3984 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
3985 Value
*V
= EmitScalarExpr(E
->getArg(0));
3986 if (Value
*Result
= tryUseTestFPKind(*this, BuiltinID
, V
))
3987 return RValue::get(Result
);
3989 Builder
.CreateZExt(Builder
.createIsFPClass(V
, FPClassTest::fcNan
),
3990 ConvertType(E
->getType())));
3993 case Builtin::BI__builtin_issignaling
: {
3994 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
3995 Value
*V
= EmitScalarExpr(E
->getArg(0));
3997 Builder
.CreateZExt(Builder
.createIsFPClass(V
, FPClassTest::fcSNan
),
3998 ConvertType(E
->getType())));
4001 case Builtin::BI__builtin_isinf
: {
4002 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
4003 Value
*V
= EmitScalarExpr(E
->getArg(0));
4004 if (Value
*Result
= tryUseTestFPKind(*this, BuiltinID
, V
))
4005 return RValue::get(Result
);
4007 Builder
.CreateZExt(Builder
.createIsFPClass(V
, FPClassTest::fcInf
),
4008 ConvertType(E
->getType())));
4011 case Builtin::BIfinite
:
4012 case Builtin::BI__finite
:
4013 case Builtin::BIfinitef
:
4014 case Builtin::BI__finitef
:
4015 case Builtin::BIfinitel
:
4016 case Builtin::BI__finitel
:
4017 case Builtin::BI__builtin_isfinite
: {
4018 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
4019 Value
*V
= EmitScalarExpr(E
->getArg(0));
4020 if (Value
*Result
= tryUseTestFPKind(*this, BuiltinID
, V
))
4021 return RValue::get(Result
);
4023 Builder
.CreateZExt(Builder
.createIsFPClass(V
, FPClassTest::fcFinite
),
4024 ConvertType(E
->getType())));
4027 case Builtin::BI__builtin_isnormal
: {
4028 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
4029 Value
*V
= EmitScalarExpr(E
->getArg(0));
4031 Builder
.CreateZExt(Builder
.createIsFPClass(V
, FPClassTest::fcNormal
),
4032 ConvertType(E
->getType())));
4035 case Builtin::BI__builtin_issubnormal
: {
4036 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
4037 Value
*V
= EmitScalarExpr(E
->getArg(0));
4039 Builder
.CreateZExt(Builder
.createIsFPClass(V
, FPClassTest::fcSubnormal
),
4040 ConvertType(E
->getType())));
4043 case Builtin::BI__builtin_iszero
: {
4044 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
4045 Value
*V
= EmitScalarExpr(E
->getArg(0));
4047 Builder
.CreateZExt(Builder
.createIsFPClass(V
, FPClassTest::fcZero
),
4048 ConvertType(E
->getType())));
4051 case Builtin::BI__builtin_isfpclass
: {
4052 Expr::EvalResult Result
;
4053 if (!E
->getArg(1)->EvaluateAsInt(Result
, CGM
.getContext()))
4055 uint64_t Test
= Result
.Val
.getInt().getLimitedValue();
4056 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
4057 Value
*V
= EmitScalarExpr(E
->getArg(0));
4058 return RValue::get(Builder
.CreateZExt(Builder
.createIsFPClass(V
, Test
),
4059 ConvertType(E
->getType())));
4062 case Builtin::BI__builtin_nondeterministic_value
: {
4063 llvm::Type
*Ty
= ConvertType(E
->getArg(0)->getType());
4065 Value
*Result
= PoisonValue::get(Ty
);
4066 Result
= Builder
.CreateFreeze(Result
);
4068 return RValue::get(Result
);
4071 case Builtin::BI__builtin_elementwise_abs
: {
4073 QualType QT
= E
->getArg(0)->getType();
4075 if (auto *VecTy
= QT
->getAs
<VectorType
>())
4076 QT
= VecTy
->getElementType();
4077 if (QT
->isIntegerType())
4078 Result
= Builder
.CreateBinaryIntrinsic(
4079 llvm::Intrinsic::abs
, EmitScalarExpr(E
->getArg(0)),
4080 Builder
.getFalse(), nullptr, "elt.abs");
4082 Result
= emitBuiltinWithOneOverloadedType
<1>(
4083 *this, E
, llvm::Intrinsic::fabs
, "elt.abs");
4085 return RValue::get(Result
);
4087 case Builtin::BI__builtin_elementwise_acos
:
4088 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4089 *this, E
, llvm::Intrinsic::acos
, "elt.acos"));
4090 case Builtin::BI__builtin_elementwise_asin
:
4091 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4092 *this, E
, llvm::Intrinsic::asin
, "elt.asin"));
4093 case Builtin::BI__builtin_elementwise_atan
:
4094 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4095 *this, E
, llvm::Intrinsic::atan
, "elt.atan"));
4096 case Builtin::BI__builtin_elementwise_atan2
:
4097 return RValue::get(emitBuiltinWithOneOverloadedType
<2>(
4098 *this, E
, llvm::Intrinsic::atan2
, "elt.atan2"));
4099 case Builtin::BI__builtin_elementwise_ceil
:
4100 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4101 *this, E
, llvm::Intrinsic::ceil
, "elt.ceil"));
4102 case Builtin::BI__builtin_elementwise_exp
:
4103 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4104 *this, E
, llvm::Intrinsic::exp
, "elt.exp"));
4105 case Builtin::BI__builtin_elementwise_exp2
:
4106 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4107 *this, E
, llvm::Intrinsic::exp2
, "elt.exp2"));
4108 case Builtin::BI__builtin_elementwise_log
:
4109 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4110 *this, E
, llvm::Intrinsic::log
, "elt.log"));
4111 case Builtin::BI__builtin_elementwise_log2
:
4112 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4113 *this, E
, llvm::Intrinsic::log2
, "elt.log2"));
4114 case Builtin::BI__builtin_elementwise_log10
:
4115 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4116 *this, E
, llvm::Intrinsic::log10
, "elt.log10"));
4117 case Builtin::BI__builtin_elementwise_pow
: {
4119 emitBuiltinWithOneOverloadedType
<2>(*this, E
, llvm::Intrinsic::pow
));
4121 case Builtin::BI__builtin_elementwise_bitreverse
:
4122 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4123 *this, E
, llvm::Intrinsic::bitreverse
, "elt.bitreverse"));
4124 case Builtin::BI__builtin_elementwise_cos
:
4125 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4126 *this, E
, llvm::Intrinsic::cos
, "elt.cos"));
4127 case Builtin::BI__builtin_elementwise_cosh
:
4128 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4129 *this, E
, llvm::Intrinsic::cosh
, "elt.cosh"));
4130 case Builtin::BI__builtin_elementwise_floor
:
4131 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4132 *this, E
, llvm::Intrinsic::floor
, "elt.floor"));
4133 case Builtin::BI__builtin_elementwise_popcount
:
4134 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4135 *this, E
, llvm::Intrinsic::ctpop
, "elt.ctpop"));
4136 case Builtin::BI__builtin_elementwise_roundeven
:
4137 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4138 *this, E
, llvm::Intrinsic::roundeven
, "elt.roundeven"));
4139 case Builtin::BI__builtin_elementwise_round
:
4140 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4141 *this, E
, llvm::Intrinsic::round
, "elt.round"));
4142 case Builtin::BI__builtin_elementwise_rint
:
4143 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4144 *this, E
, llvm::Intrinsic::rint
, "elt.rint"));
4145 case Builtin::BI__builtin_elementwise_nearbyint
:
4146 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4147 *this, E
, llvm::Intrinsic::nearbyint
, "elt.nearbyint"));
4148 case Builtin::BI__builtin_elementwise_sin
:
4149 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4150 *this, E
, llvm::Intrinsic::sin
, "elt.sin"));
4151 case Builtin::BI__builtin_elementwise_sinh
:
4152 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4153 *this, E
, llvm::Intrinsic::sinh
, "elt.sinh"));
4154 case Builtin::BI__builtin_elementwise_tan
:
4155 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4156 *this, E
, llvm::Intrinsic::tan
, "elt.tan"));
4157 case Builtin::BI__builtin_elementwise_tanh
:
4158 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4159 *this, E
, llvm::Intrinsic::tanh
, "elt.tanh"));
4160 case Builtin::BI__builtin_elementwise_trunc
:
4161 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4162 *this, E
, llvm::Intrinsic::trunc
, "elt.trunc"));
4163 case Builtin::BI__builtin_elementwise_canonicalize
:
4164 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4165 *this, E
, llvm::Intrinsic::canonicalize
, "elt.canonicalize"));
4166 case Builtin::BI__builtin_elementwise_copysign
:
4167 return RValue::get(emitBuiltinWithOneOverloadedType
<2>(
4168 *this, E
, llvm::Intrinsic::copysign
));
4169 case Builtin::BI__builtin_elementwise_fma
:
4171 emitBuiltinWithOneOverloadedType
<3>(*this, E
, llvm::Intrinsic::fma
));
4172 case Builtin::BI__builtin_elementwise_add_sat
:
4173 case Builtin::BI__builtin_elementwise_sub_sat
: {
4174 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
4175 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
4177 assert(Op0
->getType()->isIntOrIntVectorTy() && "integer type expected");
4178 QualType Ty
= E
->getArg(0)->getType();
4179 if (auto *VecTy
= Ty
->getAs
<VectorType
>())
4180 Ty
= VecTy
->getElementType();
4181 bool IsSigned
= Ty
->isSignedIntegerType();
4183 if (BuiltinIDIfNoAsmLabel
== Builtin::BI__builtin_elementwise_add_sat
)
4184 Opc
= IsSigned
? llvm::Intrinsic::sadd_sat
: llvm::Intrinsic::uadd_sat
;
4186 Opc
= IsSigned
? llvm::Intrinsic::ssub_sat
: llvm::Intrinsic::usub_sat
;
4187 Result
= Builder
.CreateBinaryIntrinsic(Opc
, Op0
, Op1
, nullptr, "elt.sat");
4188 return RValue::get(Result
);
4191 case Builtin::BI__builtin_elementwise_max
: {
4192 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
4193 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
4195 if (Op0
->getType()->isIntOrIntVectorTy()) {
4196 QualType Ty
= E
->getArg(0)->getType();
4197 if (auto *VecTy
= Ty
->getAs
<VectorType
>())
4198 Ty
= VecTy
->getElementType();
4199 Result
= Builder
.CreateBinaryIntrinsic(Ty
->isSignedIntegerType()
4200 ? llvm::Intrinsic::smax
4201 : llvm::Intrinsic::umax
,
4202 Op0
, Op1
, nullptr, "elt.max");
4204 Result
= Builder
.CreateMaxNum(Op0
, Op1
, "elt.max");
4205 return RValue::get(Result
);
4207 case Builtin::BI__builtin_elementwise_min
: {
4208 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
4209 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
4211 if (Op0
->getType()->isIntOrIntVectorTy()) {
4212 QualType Ty
= E
->getArg(0)->getType();
4213 if (auto *VecTy
= Ty
->getAs
<VectorType
>())
4214 Ty
= VecTy
->getElementType();
4215 Result
= Builder
.CreateBinaryIntrinsic(Ty
->isSignedIntegerType()
4216 ? llvm::Intrinsic::smin
4217 : llvm::Intrinsic::umin
,
4218 Op0
, Op1
, nullptr, "elt.min");
4220 Result
= Builder
.CreateMinNum(Op0
, Op1
, "elt.min");
4221 return RValue::get(Result
);
4224 case Builtin::BI__builtin_elementwise_maximum
: {
4225 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
4226 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
4227 Value
*Result
= Builder
.CreateBinaryIntrinsic(llvm::Intrinsic::maximum
, Op0
,
4228 Op1
, nullptr, "elt.maximum");
4229 return RValue::get(Result
);
4232 case Builtin::BI__builtin_elementwise_minimum
: {
4233 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
4234 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
4235 Value
*Result
= Builder
.CreateBinaryIntrinsic(llvm::Intrinsic::minimum
, Op0
,
4236 Op1
, nullptr, "elt.minimum");
4237 return RValue::get(Result
);
4240 case Builtin::BI__builtin_reduce_max
: {
4241 auto GetIntrinsicID
= [this](QualType QT
) {
4242 if (auto *VecTy
= QT
->getAs
<VectorType
>())
4243 QT
= VecTy
->getElementType();
4244 else if (QT
->isSizelessVectorType())
4245 QT
= QT
->getSizelessVectorEltType(CGM
.getContext());
4247 if (QT
->isSignedIntegerType())
4248 return llvm::Intrinsic::vector_reduce_smax
;
4249 if (QT
->isUnsignedIntegerType())
4250 return llvm::Intrinsic::vector_reduce_umax
;
4251 assert(QT
->isFloatingType() && "must have a float here");
4252 return llvm::Intrinsic::vector_reduce_fmax
;
4254 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4255 *this, E
, GetIntrinsicID(E
->getArg(0)->getType()), "rdx.min"));
4258 case Builtin::BI__builtin_reduce_min
: {
4259 auto GetIntrinsicID
= [this](QualType QT
) {
4260 if (auto *VecTy
= QT
->getAs
<VectorType
>())
4261 QT
= VecTy
->getElementType();
4262 else if (QT
->isSizelessVectorType())
4263 QT
= QT
->getSizelessVectorEltType(CGM
.getContext());
4265 if (QT
->isSignedIntegerType())
4266 return llvm::Intrinsic::vector_reduce_smin
;
4267 if (QT
->isUnsignedIntegerType())
4268 return llvm::Intrinsic::vector_reduce_umin
;
4269 assert(QT
->isFloatingType() && "must have a float here");
4270 return llvm::Intrinsic::vector_reduce_fmin
;
4273 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4274 *this, E
, GetIntrinsicID(E
->getArg(0)->getType()), "rdx.min"));
4277 case Builtin::BI__builtin_reduce_add
:
4278 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4279 *this, E
, llvm::Intrinsic::vector_reduce_add
, "rdx.add"));
4280 case Builtin::BI__builtin_reduce_mul
:
4281 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4282 *this, E
, llvm::Intrinsic::vector_reduce_mul
, "rdx.mul"));
4283 case Builtin::BI__builtin_reduce_xor
:
4284 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4285 *this, E
, llvm::Intrinsic::vector_reduce_xor
, "rdx.xor"));
4286 case Builtin::BI__builtin_reduce_or
:
4287 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4288 *this, E
, llvm::Intrinsic::vector_reduce_or
, "rdx.or"));
4289 case Builtin::BI__builtin_reduce_and
:
4290 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4291 *this, E
, llvm::Intrinsic::vector_reduce_and
, "rdx.and"));
4292 case Builtin::BI__builtin_reduce_maximum
:
4293 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4294 *this, E
, llvm::Intrinsic::vector_reduce_fmaximum
, "rdx.maximum"));
4295 case Builtin::BI__builtin_reduce_minimum
:
4296 return RValue::get(emitBuiltinWithOneOverloadedType
<1>(
4297 *this, E
, llvm::Intrinsic::vector_reduce_fminimum
, "rdx.minimum"));
4299 case Builtin::BI__builtin_matrix_transpose
: {
4300 auto *MatrixTy
= E
->getArg(0)->getType()->castAs
<ConstantMatrixType
>();
4301 Value
*MatValue
= EmitScalarExpr(E
->getArg(0));
4302 MatrixBuilder
MB(Builder
);
4303 Value
*Result
= MB
.CreateMatrixTranspose(MatValue
, MatrixTy
->getNumRows(),
4304 MatrixTy
->getNumColumns());
4305 return RValue::get(Result
);
4308 case Builtin::BI__builtin_matrix_column_major_load
: {
4309 MatrixBuilder
MB(Builder
);
4310 // Emit everything that isn't dependent on the first parameter type
4311 Value
*Stride
= EmitScalarExpr(E
->getArg(3));
4312 const auto *ResultTy
= E
->getType()->getAs
<ConstantMatrixType
>();
4313 auto *PtrTy
= E
->getArg(0)->getType()->getAs
<PointerType
>();
4314 assert(PtrTy
&& "arg0 must be of pointer type");
4315 bool IsVolatile
= PtrTy
->getPointeeType().isVolatileQualified();
4317 Address Src
= EmitPointerWithAlignment(E
->getArg(0));
4318 EmitNonNullArgCheck(RValue::get(Src
.emitRawPointer(*this)),
4319 E
->getArg(0)->getType(), E
->getArg(0)->getExprLoc(), FD
,
4321 Value
*Result
= MB
.CreateColumnMajorLoad(
4322 Src
.getElementType(), Src
.emitRawPointer(*this),
4323 Align(Src
.getAlignment().getQuantity()), Stride
, IsVolatile
,
4324 ResultTy
->getNumRows(), ResultTy
->getNumColumns(), "matrix");
4325 return RValue::get(Result
);
4328 case Builtin::BI__builtin_matrix_column_major_store
: {
4329 MatrixBuilder
MB(Builder
);
4330 Value
*Matrix
= EmitScalarExpr(E
->getArg(0));
4331 Address Dst
= EmitPointerWithAlignment(E
->getArg(1));
4332 Value
*Stride
= EmitScalarExpr(E
->getArg(2));
4334 const auto *MatrixTy
= E
->getArg(0)->getType()->getAs
<ConstantMatrixType
>();
4335 auto *PtrTy
= E
->getArg(1)->getType()->getAs
<PointerType
>();
4336 assert(PtrTy
&& "arg1 must be of pointer type");
4337 bool IsVolatile
= PtrTy
->getPointeeType().isVolatileQualified();
4339 EmitNonNullArgCheck(RValue::get(Dst
.emitRawPointer(*this)),
4340 E
->getArg(1)->getType(), E
->getArg(1)->getExprLoc(), FD
,
4342 Value
*Result
= MB
.CreateColumnMajorStore(
4343 Matrix
, Dst
.emitRawPointer(*this),
4344 Align(Dst
.getAlignment().getQuantity()), Stride
, IsVolatile
,
4345 MatrixTy
->getNumRows(), MatrixTy
->getNumColumns());
4346 return RValue::get(Result
);
4349 case Builtin::BI__builtin_isinf_sign
: {
4350 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
4351 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
4352 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4353 Value
*Arg
= EmitScalarExpr(E
->getArg(0));
4354 Value
*AbsArg
= EmitFAbs(*this, Arg
);
4355 Value
*IsInf
= Builder
.CreateFCmpOEQ(
4356 AbsArg
, ConstantFP::getInfinity(Arg
->getType()), "isinf");
4357 Value
*IsNeg
= EmitSignBit(*this, Arg
);
4359 llvm::Type
*IntTy
= ConvertType(E
->getType());
4360 Value
*Zero
= Constant::getNullValue(IntTy
);
4361 Value
*One
= ConstantInt::get(IntTy
, 1);
4362 Value
*NegativeOne
= ConstantInt::get(IntTy
, -1);
4363 Value
*SignResult
= Builder
.CreateSelect(IsNeg
, NegativeOne
, One
);
4364 Value
*Result
= Builder
.CreateSelect(IsInf
, SignResult
, Zero
);
4365 return RValue::get(Result
);
4368 case Builtin::BI__builtin_flt_rounds
: {
4369 Function
*F
= CGM
.getIntrinsic(Intrinsic::get_rounding
);
4371 llvm::Type
*ResultType
= ConvertType(E
->getType());
4372 Value
*Result
= Builder
.CreateCall(F
);
4373 if (Result
->getType() != ResultType
)
4374 Result
= Builder
.CreateIntCast(Result
, ResultType
, /*isSigned*/true,
4376 return RValue::get(Result
);
4379 case Builtin::BI__builtin_set_flt_rounds
: {
4380 Function
*F
= CGM
.getIntrinsic(Intrinsic::set_rounding
);
4382 Value
*V
= EmitScalarExpr(E
->getArg(0));
4383 Builder
.CreateCall(F
, V
);
4384 return RValue::get(nullptr);
4387 case Builtin::BI__builtin_fpclassify
: {
4388 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
4389 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4390 Value
*V
= EmitScalarExpr(E
->getArg(5));
4391 llvm::Type
*Ty
= ConvertType(E
->getArg(5)->getType());
4394 BasicBlock
*Begin
= Builder
.GetInsertBlock();
4395 BasicBlock
*End
= createBasicBlock("fpclassify_end", this->CurFn
);
4396 Builder
.SetInsertPoint(End
);
4398 Builder
.CreatePHI(ConvertType(E
->getArg(0)->getType()), 4,
4399 "fpclassify_result");
4401 // if (V==0) return FP_ZERO
4402 Builder
.SetInsertPoint(Begin
);
4403 Value
*IsZero
= Builder
.CreateFCmpOEQ(V
, Constant::getNullValue(Ty
),
4405 Value
*ZeroLiteral
= EmitScalarExpr(E
->getArg(4));
4406 BasicBlock
*NotZero
= createBasicBlock("fpclassify_not_zero", this->CurFn
);
4407 Builder
.CreateCondBr(IsZero
, End
, NotZero
);
4408 Result
->addIncoming(ZeroLiteral
, Begin
);
4410 // if (V != V) return FP_NAN
4411 Builder
.SetInsertPoint(NotZero
);
4412 Value
*IsNan
= Builder
.CreateFCmpUNO(V
, V
, "cmp");
4413 Value
*NanLiteral
= EmitScalarExpr(E
->getArg(0));
4414 BasicBlock
*NotNan
= createBasicBlock("fpclassify_not_nan", this->CurFn
);
4415 Builder
.CreateCondBr(IsNan
, End
, NotNan
);
4416 Result
->addIncoming(NanLiteral
, NotZero
);
4418 // if (fabs(V) == infinity) return FP_INFINITY
4419 Builder
.SetInsertPoint(NotNan
);
4420 Value
*VAbs
= EmitFAbs(*this, V
);
4422 Builder
.CreateFCmpOEQ(VAbs
, ConstantFP::getInfinity(V
->getType()),
4424 Value
*InfLiteral
= EmitScalarExpr(E
->getArg(1));
4425 BasicBlock
*NotInf
= createBasicBlock("fpclassify_not_inf", this->CurFn
);
4426 Builder
.CreateCondBr(IsInf
, End
, NotInf
);
4427 Result
->addIncoming(InfLiteral
, NotNan
);
4429 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
4430 Builder
.SetInsertPoint(NotInf
);
4431 APFloat Smallest
= APFloat::getSmallestNormalized(
4432 getContext().getFloatTypeSemantics(E
->getArg(5)->getType()));
4434 Builder
.CreateFCmpUGE(VAbs
, ConstantFP::get(V
->getContext(), Smallest
),
4436 Value
*NormalResult
=
4437 Builder
.CreateSelect(IsNormal
, EmitScalarExpr(E
->getArg(2)),
4438 EmitScalarExpr(E
->getArg(3)));
4439 Builder
.CreateBr(End
);
4440 Result
->addIncoming(NormalResult
, NotInf
);
4443 Builder
.SetInsertPoint(End
);
4444 return RValue::get(Result
);
4447 // An alloca will always return a pointer to the alloca (stack) address
4448 // space. This address space need not be the same as the AST / Language
4449 // default (e.g. in C / C++ auto vars are in the generic address space). At
4450 // the AST level this is handled within CreateTempAlloca et al., but for the
4451 // builtin / dynamic alloca we have to handle it here. We use an explicit cast
4452 // instead of passing an AS to CreateAlloca so as to not inhibit optimisation.
4453 case Builtin::BIalloca
:
4454 case Builtin::BI_alloca
:
4455 case Builtin::BI__builtin_alloca_uninitialized
:
4456 case Builtin::BI__builtin_alloca
: {
4457 Value
*Size
= EmitScalarExpr(E
->getArg(0));
4458 const TargetInfo
&TI
= getContext().getTargetInfo();
4459 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
4460 const Align SuitableAlignmentInBytes
=
4462 .toCharUnitsFromBits(TI
.getSuitableAlign())
4464 AllocaInst
*AI
= Builder
.CreateAlloca(Builder
.getInt8Ty(), Size
);
4465 AI
->setAlignment(SuitableAlignmentInBytes
);
4466 if (BuiltinID
!= Builtin::BI__builtin_alloca_uninitialized
)
4467 initializeAlloca(*this, AI
, Size
, SuitableAlignmentInBytes
);
4468 LangAS AAS
= getASTAllocaAddressSpace();
4469 LangAS EAS
= E
->getType()->getPointeeType().getAddressSpace();
4471 llvm::Type
*Ty
= CGM
.getTypes().ConvertType(E
->getType());
4472 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI
, AAS
,
4475 return RValue::get(AI
);
4478 case Builtin::BI__builtin_alloca_with_align_uninitialized
:
4479 case Builtin::BI__builtin_alloca_with_align
: {
4480 Value
*Size
= EmitScalarExpr(E
->getArg(0));
4481 Value
*AlignmentInBitsValue
= EmitScalarExpr(E
->getArg(1));
4482 auto *AlignmentInBitsCI
= cast
<ConstantInt
>(AlignmentInBitsValue
);
4483 unsigned AlignmentInBits
= AlignmentInBitsCI
->getZExtValue();
4484 const Align AlignmentInBytes
=
4485 CGM
.getContext().toCharUnitsFromBits(AlignmentInBits
).getAsAlign();
4486 AllocaInst
*AI
= Builder
.CreateAlloca(Builder
.getInt8Ty(), Size
);
4487 AI
->setAlignment(AlignmentInBytes
);
4488 if (BuiltinID
!= Builtin::BI__builtin_alloca_with_align_uninitialized
)
4489 initializeAlloca(*this, AI
, Size
, AlignmentInBytes
);
4490 LangAS AAS
= getASTAllocaAddressSpace();
4491 LangAS EAS
= E
->getType()->getPointeeType().getAddressSpace();
4493 llvm::Type
*Ty
= CGM
.getTypes().ConvertType(E
->getType());
4494 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI
, AAS
,
4497 return RValue::get(AI
);
4500 case Builtin::BIbzero
:
4501 case Builtin::BI__builtin_bzero
: {
4502 Address Dest
= EmitPointerWithAlignment(E
->getArg(0));
4503 Value
*SizeVal
= EmitScalarExpr(E
->getArg(1));
4504 EmitNonNullArgCheck(Dest
, E
->getArg(0)->getType(),
4505 E
->getArg(0)->getExprLoc(), FD
, 0);
4506 Builder
.CreateMemSet(Dest
, Builder
.getInt8(0), SizeVal
, false);
4507 return RValue::get(nullptr);
4510 case Builtin::BIbcopy
:
4511 case Builtin::BI__builtin_bcopy
: {
4512 Address Src
= EmitPointerWithAlignment(E
->getArg(0));
4513 Address Dest
= EmitPointerWithAlignment(E
->getArg(1));
4514 Value
*SizeVal
= EmitScalarExpr(E
->getArg(2));
4515 EmitNonNullArgCheck(RValue::get(Src
.emitRawPointer(*this)),
4516 E
->getArg(0)->getType(), E
->getArg(0)->getExprLoc(), FD
,
4518 EmitNonNullArgCheck(RValue::get(Dest
.emitRawPointer(*this)),
4519 E
->getArg(1)->getType(), E
->getArg(1)->getExprLoc(), FD
,
4521 Builder
.CreateMemMove(Dest
, Src
, SizeVal
, false);
4522 return RValue::get(nullptr);
4525 case Builtin::BImemcpy
:
4526 case Builtin::BI__builtin_memcpy
:
4527 case Builtin::BImempcpy
:
4528 case Builtin::BI__builtin_mempcpy
: {
4529 Address Dest
= EmitPointerWithAlignment(E
->getArg(0));
4530 Address Src
= EmitPointerWithAlignment(E
->getArg(1));
4531 Value
*SizeVal
= EmitScalarExpr(E
->getArg(2));
4532 EmitArgCheck(TCK_Store
, Dest
, E
->getArg(0), 0);
4533 EmitArgCheck(TCK_Load
, Src
, E
->getArg(1), 1);
4534 Builder
.CreateMemCpy(Dest
, Src
, SizeVal
, false);
4535 if (BuiltinID
== Builtin::BImempcpy
||
4536 BuiltinID
== Builtin::BI__builtin_mempcpy
)
4537 return RValue::get(Builder
.CreateInBoundsGEP(
4538 Dest
.getElementType(), Dest
.emitRawPointer(*this), SizeVal
));
4540 return RValue::get(Dest
, *this);
4543 case Builtin::BI__builtin_memcpy_inline
: {
4544 Address Dest
= EmitPointerWithAlignment(E
->getArg(0));
4545 Address Src
= EmitPointerWithAlignment(E
->getArg(1));
4547 E
->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4548 EmitArgCheck(TCK_Store
, Dest
, E
->getArg(0), 0);
4549 EmitArgCheck(TCK_Load
, Src
, E
->getArg(1), 1);
4550 Builder
.CreateMemCpyInline(Dest
, Src
, Size
);
4551 return RValue::get(nullptr);
4554 case Builtin::BI__builtin_char_memchr
:
4555 BuiltinID
= Builtin::BI__builtin_memchr
;
4558 case Builtin::BI__builtin___memcpy_chk
: {
4559 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
4560 Expr::EvalResult SizeResult
, DstSizeResult
;
4561 if (!E
->getArg(2)->EvaluateAsInt(SizeResult
, CGM
.getContext()) ||
4562 !E
->getArg(3)->EvaluateAsInt(DstSizeResult
, CGM
.getContext()))
4564 llvm::APSInt Size
= SizeResult
.Val
.getInt();
4565 llvm::APSInt DstSize
= DstSizeResult
.Val
.getInt();
4566 if (Size
.ugt(DstSize
))
4568 Address Dest
= EmitPointerWithAlignment(E
->getArg(0));
4569 Address Src
= EmitPointerWithAlignment(E
->getArg(1));
4570 Value
*SizeVal
= llvm::ConstantInt::get(Builder
.getContext(), Size
);
4571 Builder
.CreateMemCpy(Dest
, Src
, SizeVal
, false);
4572 return RValue::get(Dest
, *this);
4575 case Builtin::BI__builtin_objc_memmove_collectable
: {
4576 Address DestAddr
= EmitPointerWithAlignment(E
->getArg(0));
4577 Address SrcAddr
= EmitPointerWithAlignment(E
->getArg(1));
4578 Value
*SizeVal
= EmitScalarExpr(E
->getArg(2));
4579 CGM
.getObjCRuntime().EmitGCMemmoveCollectable(*this,
4580 DestAddr
, SrcAddr
, SizeVal
);
4581 return RValue::get(DestAddr
, *this);
4584 case Builtin::BI__builtin___memmove_chk
: {
4585 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
4586 Expr::EvalResult SizeResult
, DstSizeResult
;
4587 if (!E
->getArg(2)->EvaluateAsInt(SizeResult
, CGM
.getContext()) ||
4588 !E
->getArg(3)->EvaluateAsInt(DstSizeResult
, CGM
.getContext()))
4590 llvm::APSInt Size
= SizeResult
.Val
.getInt();
4591 llvm::APSInt DstSize
= DstSizeResult
.Val
.getInt();
4592 if (Size
.ugt(DstSize
))
4594 Address Dest
= EmitPointerWithAlignment(E
->getArg(0));
4595 Address Src
= EmitPointerWithAlignment(E
->getArg(1));
4596 Value
*SizeVal
= llvm::ConstantInt::get(Builder
.getContext(), Size
);
4597 Builder
.CreateMemMove(Dest
, Src
, SizeVal
, false);
4598 return RValue::get(Dest
, *this);
4601 case Builtin::BImemmove
:
4602 case Builtin::BI__builtin_memmove
: {
4603 Address Dest
= EmitPointerWithAlignment(E
->getArg(0));
4604 Address Src
= EmitPointerWithAlignment(E
->getArg(1));
4605 Value
*SizeVal
= EmitScalarExpr(E
->getArg(2));
4606 EmitArgCheck(TCK_Store
, Dest
, E
->getArg(0), 0);
4607 EmitArgCheck(TCK_Load
, Src
, E
->getArg(1), 1);
4608 Builder
.CreateMemMove(Dest
, Src
, SizeVal
, false);
4609 return RValue::get(Dest
, *this);
4611 case Builtin::BImemset
:
4612 case Builtin::BI__builtin_memset
: {
4613 Address Dest
= EmitPointerWithAlignment(E
->getArg(0));
4614 Value
*ByteVal
= Builder
.CreateTrunc(EmitScalarExpr(E
->getArg(1)),
4615 Builder
.getInt8Ty());
4616 Value
*SizeVal
= EmitScalarExpr(E
->getArg(2));
4617 EmitNonNullArgCheck(Dest
, E
->getArg(0)->getType(),
4618 E
->getArg(0)->getExprLoc(), FD
, 0);
4619 Builder
.CreateMemSet(Dest
, ByteVal
, SizeVal
, false);
4620 return RValue::get(Dest
, *this);
4622 case Builtin::BI__builtin_memset_inline
: {
4623 Address Dest
= EmitPointerWithAlignment(E
->getArg(0));
4625 Builder
.CreateTrunc(EmitScalarExpr(E
->getArg(1)), Builder
.getInt8Ty());
4627 E
->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4628 EmitNonNullArgCheck(RValue::get(Dest
.emitRawPointer(*this)),
4629 E
->getArg(0)->getType(), E
->getArg(0)->getExprLoc(), FD
,
4631 Builder
.CreateMemSetInline(Dest
, ByteVal
, Size
);
4632 return RValue::get(nullptr);
4634 case Builtin::BI__builtin___memset_chk
: {
4635 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
4636 Expr::EvalResult SizeResult
, DstSizeResult
;
4637 if (!E
->getArg(2)->EvaluateAsInt(SizeResult
, CGM
.getContext()) ||
4638 !E
->getArg(3)->EvaluateAsInt(DstSizeResult
, CGM
.getContext()))
4640 llvm::APSInt Size
= SizeResult
.Val
.getInt();
4641 llvm::APSInt DstSize
= DstSizeResult
.Val
.getInt();
4642 if (Size
.ugt(DstSize
))
4644 Address Dest
= EmitPointerWithAlignment(E
->getArg(0));
4645 Value
*ByteVal
= Builder
.CreateTrunc(EmitScalarExpr(E
->getArg(1)),
4646 Builder
.getInt8Ty());
4647 Value
*SizeVal
= llvm::ConstantInt::get(Builder
.getContext(), Size
);
4648 Builder
.CreateMemSet(Dest
, ByteVal
, SizeVal
, false);
4649 return RValue::get(Dest
, *this);
4651 case Builtin::BI__builtin_wmemchr
: {
4652 // The MSVC runtime library does not provide a definition of wmemchr, so we
4653 // need an inline implementation.
4654 if (!getTarget().getTriple().isOSMSVCRT())
4657 llvm::Type
*WCharTy
= ConvertType(getContext().WCharTy
);
4658 Value
*Str
= EmitScalarExpr(E
->getArg(0));
4659 Value
*Chr
= EmitScalarExpr(E
->getArg(1));
4660 Value
*Size
= EmitScalarExpr(E
->getArg(2));
4662 BasicBlock
*Entry
= Builder
.GetInsertBlock();
4663 BasicBlock
*CmpEq
= createBasicBlock("wmemchr.eq");
4664 BasicBlock
*Next
= createBasicBlock("wmemchr.next");
4665 BasicBlock
*Exit
= createBasicBlock("wmemchr.exit");
4666 Value
*SizeEq0
= Builder
.CreateICmpEQ(Size
, ConstantInt::get(SizeTy
, 0));
4667 Builder
.CreateCondBr(SizeEq0
, Exit
, CmpEq
);
4670 PHINode
*StrPhi
= Builder
.CreatePHI(Str
->getType(), 2);
4671 StrPhi
->addIncoming(Str
, Entry
);
4672 PHINode
*SizePhi
= Builder
.CreatePHI(SizeTy
, 2);
4673 SizePhi
->addIncoming(Size
, Entry
);
4674 CharUnits WCharAlign
=
4675 getContext().getTypeAlignInChars(getContext().WCharTy
);
4676 Value
*StrCh
= Builder
.CreateAlignedLoad(WCharTy
, StrPhi
, WCharAlign
);
4677 Value
*FoundChr
= Builder
.CreateConstInBoundsGEP1_32(WCharTy
, StrPhi
, 0);
4678 Value
*StrEqChr
= Builder
.CreateICmpEQ(StrCh
, Chr
);
4679 Builder
.CreateCondBr(StrEqChr
, Exit
, Next
);
4682 Value
*NextStr
= Builder
.CreateConstInBoundsGEP1_32(WCharTy
, StrPhi
, 1);
4683 Value
*NextSize
= Builder
.CreateSub(SizePhi
, ConstantInt::get(SizeTy
, 1));
4684 Value
*NextSizeEq0
=
4685 Builder
.CreateICmpEQ(NextSize
, ConstantInt::get(SizeTy
, 0));
4686 Builder
.CreateCondBr(NextSizeEq0
, Exit
, CmpEq
);
4687 StrPhi
->addIncoming(NextStr
, Next
);
4688 SizePhi
->addIncoming(NextSize
, Next
);
4691 PHINode
*Ret
= Builder
.CreatePHI(Str
->getType(), 3);
4692 Ret
->addIncoming(llvm::Constant::getNullValue(Str
->getType()), Entry
);
4693 Ret
->addIncoming(llvm::Constant::getNullValue(Str
->getType()), Next
);
4694 Ret
->addIncoming(FoundChr
, CmpEq
);
4695 return RValue::get(Ret
);
4697 case Builtin::BI__builtin_wmemcmp
: {
4698 // The MSVC runtime library does not provide a definition of wmemcmp, so we
4699 // need an inline implementation.
4700 if (!getTarget().getTriple().isOSMSVCRT())
4703 llvm::Type
*WCharTy
= ConvertType(getContext().WCharTy
);
4705 Value
*Dst
= EmitScalarExpr(E
->getArg(0));
4706 Value
*Src
= EmitScalarExpr(E
->getArg(1));
4707 Value
*Size
= EmitScalarExpr(E
->getArg(2));
4709 BasicBlock
*Entry
= Builder
.GetInsertBlock();
4710 BasicBlock
*CmpGT
= createBasicBlock("wmemcmp.gt");
4711 BasicBlock
*CmpLT
= createBasicBlock("wmemcmp.lt");
4712 BasicBlock
*Next
= createBasicBlock("wmemcmp.next");
4713 BasicBlock
*Exit
= createBasicBlock("wmemcmp.exit");
4714 Value
*SizeEq0
= Builder
.CreateICmpEQ(Size
, ConstantInt::get(SizeTy
, 0));
4715 Builder
.CreateCondBr(SizeEq0
, Exit
, CmpGT
);
4718 PHINode
*DstPhi
= Builder
.CreatePHI(Dst
->getType(), 2);
4719 DstPhi
->addIncoming(Dst
, Entry
);
4720 PHINode
*SrcPhi
= Builder
.CreatePHI(Src
->getType(), 2);
4721 SrcPhi
->addIncoming(Src
, Entry
);
4722 PHINode
*SizePhi
= Builder
.CreatePHI(SizeTy
, 2);
4723 SizePhi
->addIncoming(Size
, Entry
);
4724 CharUnits WCharAlign
=
4725 getContext().getTypeAlignInChars(getContext().WCharTy
);
4726 Value
*DstCh
= Builder
.CreateAlignedLoad(WCharTy
, DstPhi
, WCharAlign
);
4727 Value
*SrcCh
= Builder
.CreateAlignedLoad(WCharTy
, SrcPhi
, WCharAlign
);
4728 Value
*DstGtSrc
= Builder
.CreateICmpUGT(DstCh
, SrcCh
);
4729 Builder
.CreateCondBr(DstGtSrc
, Exit
, CmpLT
);
4732 Value
*DstLtSrc
= Builder
.CreateICmpULT(DstCh
, SrcCh
);
4733 Builder
.CreateCondBr(DstLtSrc
, Exit
, Next
);
4736 Value
*NextDst
= Builder
.CreateConstInBoundsGEP1_32(WCharTy
, DstPhi
, 1);
4737 Value
*NextSrc
= Builder
.CreateConstInBoundsGEP1_32(WCharTy
, SrcPhi
, 1);
4738 Value
*NextSize
= Builder
.CreateSub(SizePhi
, ConstantInt::get(SizeTy
, 1));
4739 Value
*NextSizeEq0
=
4740 Builder
.CreateICmpEQ(NextSize
, ConstantInt::get(SizeTy
, 0));
4741 Builder
.CreateCondBr(NextSizeEq0
, Exit
, CmpGT
);
4742 DstPhi
->addIncoming(NextDst
, Next
);
4743 SrcPhi
->addIncoming(NextSrc
, Next
);
4744 SizePhi
->addIncoming(NextSize
, Next
);
4747 PHINode
*Ret
= Builder
.CreatePHI(IntTy
, 4);
4748 Ret
->addIncoming(ConstantInt::get(IntTy
, 0), Entry
);
4749 Ret
->addIncoming(ConstantInt::get(IntTy
, 1), CmpGT
);
4750 Ret
->addIncoming(ConstantInt::get(IntTy
, -1), CmpLT
);
4751 Ret
->addIncoming(ConstantInt::get(IntTy
, 0), Next
);
4752 return RValue::get(Ret
);
4754 case Builtin::BI__builtin_dwarf_cfa
: {
4755 // The offset in bytes from the first argument to the CFA.
4757 // Why on earth is this in the frontend? Is there any reason at
4758 // all that the backend can't reasonably determine this while
4759 // lowering llvm.eh.dwarf.cfa()?
4761 // TODO: If there's a satisfactory reason, add a target hook for
4762 // this instead of hard-coding 0, which is correct for most targets.
4765 Function
*F
= CGM
.getIntrinsic(Intrinsic::eh_dwarf_cfa
);
4766 return RValue::get(Builder
.CreateCall(F
,
4767 llvm::ConstantInt::get(Int32Ty
, Offset
)));
4769 case Builtin::BI__builtin_return_address
: {
4770 Value
*Depth
= ConstantEmitter(*this).emitAbstract(E
->getArg(0),
4771 getContext().UnsignedIntTy
);
4772 Function
*F
= CGM
.getIntrinsic(Intrinsic::returnaddress
);
4773 return RValue::get(Builder
.CreateCall(F
, Depth
));
4775 case Builtin::BI_ReturnAddress
: {
4776 Function
*F
= CGM
.getIntrinsic(Intrinsic::returnaddress
);
4777 return RValue::get(Builder
.CreateCall(F
, Builder
.getInt32(0)));
4779 case Builtin::BI__builtin_frame_address
: {
4780 Value
*Depth
= ConstantEmitter(*this).emitAbstract(E
->getArg(0),
4781 getContext().UnsignedIntTy
);
4782 Function
*F
= CGM
.getIntrinsic(Intrinsic::frameaddress
, AllocaInt8PtrTy
);
4783 return RValue::get(Builder
.CreateCall(F
, Depth
));
4785 case Builtin::BI__builtin_extract_return_addr
: {
4786 Value
*Address
= EmitScalarExpr(E
->getArg(0));
4787 Value
*Result
= getTargetHooks().decodeReturnAddress(*this, Address
);
4788 return RValue::get(Result
);
4790 case Builtin::BI__builtin_frob_return_addr
: {
4791 Value
*Address
= EmitScalarExpr(E
->getArg(0));
4792 Value
*Result
= getTargetHooks().encodeReturnAddress(*this, Address
);
4793 return RValue::get(Result
);
4795 case Builtin::BI__builtin_dwarf_sp_column
: {
4796 llvm::IntegerType
*Ty
4797 = cast
<llvm::IntegerType
>(ConvertType(E
->getType()));
4798 int Column
= getTargetHooks().getDwarfEHStackPointer(CGM
);
4800 CGM
.ErrorUnsupported(E
, "__builtin_dwarf_sp_column");
4801 return RValue::get(llvm::UndefValue::get(Ty
));
4803 return RValue::get(llvm::ConstantInt::get(Ty
, Column
, true));
4805 case Builtin::BI__builtin_init_dwarf_reg_size_table
: {
4806 Value
*Address
= EmitScalarExpr(E
->getArg(0));
4807 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address
))
4808 CGM
.ErrorUnsupported(E
, "__builtin_init_dwarf_reg_size_table");
4809 return RValue::get(llvm::UndefValue::get(ConvertType(E
->getType())));
4811 case Builtin::BI__builtin_eh_return
: {
4812 Value
*Int
= EmitScalarExpr(E
->getArg(0));
4813 Value
*Ptr
= EmitScalarExpr(E
->getArg(1));
4815 llvm::IntegerType
*IntTy
= cast
<llvm::IntegerType
>(Int
->getType());
4816 assert((IntTy
->getBitWidth() == 32 || IntTy
->getBitWidth() == 64) &&
4817 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
4819 CGM
.getIntrinsic(IntTy
->getBitWidth() == 32 ? Intrinsic::eh_return_i32
4820 : Intrinsic::eh_return_i64
);
4821 Builder
.CreateCall(F
, {Int
, Ptr
});
4822 Builder
.CreateUnreachable();
4824 // We do need to preserve an insertion point.
4825 EmitBlock(createBasicBlock("builtin_eh_return.cont"));
4827 return RValue::get(nullptr);
4829 case Builtin::BI__builtin_unwind_init
: {
4830 Function
*F
= CGM
.getIntrinsic(Intrinsic::eh_unwind_init
);
4831 Builder
.CreateCall(F
);
4832 return RValue::get(nullptr);
4834 case Builtin::BI__builtin_extend_pointer
: {
4835 // Extends a pointer to the size of an _Unwind_Word, which is
4836 // uint64_t on all platforms. Generally this gets poked into a
4837 // register and eventually used as an address, so if the
4838 // addressing registers are wider than pointers and the platform
4839 // doesn't implicitly ignore high-order bits when doing
4840 // addressing, we need to make sure we zext / sext based on
4841 // the platform's expectations.
4843 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
4845 // Cast the pointer to intptr_t.
4846 Value
*Ptr
= EmitScalarExpr(E
->getArg(0));
4847 Value
*Result
= Builder
.CreatePtrToInt(Ptr
, IntPtrTy
, "extend.cast");
4849 // If that's 64 bits, we're done.
4850 if (IntPtrTy
->getBitWidth() == 64)
4851 return RValue::get(Result
);
4853 // Otherwise, ask the codegen data what to do.
4854 if (getTargetHooks().extendPointerWithSExt())
4855 return RValue::get(Builder
.CreateSExt(Result
, Int64Ty
, "extend.sext"));
4857 return RValue::get(Builder
.CreateZExt(Result
, Int64Ty
, "extend.zext"));
4859 case Builtin::BI__builtin_setjmp
: {
4860 // Buffer is a void**.
4861 Address Buf
= EmitPointerWithAlignment(E
->getArg(0));
4863 if (getTarget().getTriple().getArch() == llvm::Triple::systemz
) {
4864 // On this target, the back end fills in the context buffer completely.
4865 // It doesn't really matter if the frontend stores to the buffer before
4866 // calling setjmp, the back-end is going to overwrite them anyway.
4867 Function
*F
= CGM
.getIntrinsic(Intrinsic::eh_sjlj_setjmp
);
4868 return RValue::get(Builder
.CreateCall(F
, Buf
.emitRawPointer(*this)));
4871 // Store the frame pointer to the setjmp buffer.
4872 Value
*FrameAddr
= Builder
.CreateCall(
4873 CGM
.getIntrinsic(Intrinsic::frameaddress
, AllocaInt8PtrTy
),
4874 ConstantInt::get(Int32Ty
, 0));
4875 Builder
.CreateStore(FrameAddr
, Buf
);
4877 // Store the stack pointer to the setjmp buffer.
4878 Value
*StackAddr
= Builder
.CreateStackSave();
4879 assert(Buf
.emitRawPointer(*this)->getType() == StackAddr
->getType());
4881 Address StackSaveSlot
= Builder
.CreateConstInBoundsGEP(Buf
, 2);
4882 Builder
.CreateStore(StackAddr
, StackSaveSlot
);
4884 // Call LLVM's EH setjmp, which is lightweight.
4885 Function
*F
= CGM
.getIntrinsic(Intrinsic::eh_sjlj_setjmp
);
4886 return RValue::get(Builder
.CreateCall(F
, Buf
.emitRawPointer(*this)));
4888 case Builtin::BI__builtin_longjmp
: {
4889 Value
*Buf
= EmitScalarExpr(E
->getArg(0));
4891 // Call LLVM's EH longjmp, which is lightweight.
4892 Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::eh_sjlj_longjmp
), Buf
);
4894 // longjmp doesn't return; mark this as unreachable.
4895 Builder
.CreateUnreachable();
4897 // We do need to preserve an insertion point.
4898 EmitBlock(createBasicBlock("longjmp.cont"));
4900 return RValue::get(nullptr);
4902 case Builtin::BI__builtin_launder
: {
4903 const Expr
*Arg
= E
->getArg(0);
4904 QualType ArgTy
= Arg
->getType()->getPointeeType();
4905 Value
*Ptr
= EmitScalarExpr(Arg
);
4906 if (TypeRequiresBuiltinLaunder(CGM
, ArgTy
))
4907 Ptr
= Builder
.CreateLaunderInvariantGroup(Ptr
);
4909 return RValue::get(Ptr
);
4911 case Builtin::BI__sync_fetch_and_add
:
4912 case Builtin::BI__sync_fetch_and_sub
:
4913 case Builtin::BI__sync_fetch_and_or
:
4914 case Builtin::BI__sync_fetch_and_and
:
4915 case Builtin::BI__sync_fetch_and_xor
:
4916 case Builtin::BI__sync_fetch_and_nand
:
4917 case Builtin::BI__sync_add_and_fetch
:
4918 case Builtin::BI__sync_sub_and_fetch
:
4919 case Builtin::BI__sync_and_and_fetch
:
4920 case Builtin::BI__sync_or_and_fetch
:
4921 case Builtin::BI__sync_xor_and_fetch
:
4922 case Builtin::BI__sync_nand_and_fetch
:
4923 case Builtin::BI__sync_val_compare_and_swap
:
4924 case Builtin::BI__sync_bool_compare_and_swap
:
4925 case Builtin::BI__sync_lock_test_and_set
:
4926 case Builtin::BI__sync_lock_release
:
4927 case Builtin::BI__sync_swap
:
4928 llvm_unreachable("Shouldn't make it through sema");
4929 case Builtin::BI__sync_fetch_and_add_1
:
4930 case Builtin::BI__sync_fetch_and_add_2
:
4931 case Builtin::BI__sync_fetch_and_add_4
:
4932 case Builtin::BI__sync_fetch_and_add_8
:
4933 case Builtin::BI__sync_fetch_and_add_16
:
4934 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add
, E
);
4935 case Builtin::BI__sync_fetch_and_sub_1
:
4936 case Builtin::BI__sync_fetch_and_sub_2
:
4937 case Builtin::BI__sync_fetch_and_sub_4
:
4938 case Builtin::BI__sync_fetch_and_sub_8
:
4939 case Builtin::BI__sync_fetch_and_sub_16
:
4940 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub
, E
);
4941 case Builtin::BI__sync_fetch_and_or_1
:
4942 case Builtin::BI__sync_fetch_and_or_2
:
4943 case Builtin::BI__sync_fetch_and_or_4
:
4944 case Builtin::BI__sync_fetch_and_or_8
:
4945 case Builtin::BI__sync_fetch_and_or_16
:
4946 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or
, E
);
4947 case Builtin::BI__sync_fetch_and_and_1
:
4948 case Builtin::BI__sync_fetch_and_and_2
:
4949 case Builtin::BI__sync_fetch_and_and_4
:
4950 case Builtin::BI__sync_fetch_and_and_8
:
4951 case Builtin::BI__sync_fetch_and_and_16
:
4952 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And
, E
);
4953 case Builtin::BI__sync_fetch_and_xor_1
:
4954 case Builtin::BI__sync_fetch_and_xor_2
:
4955 case Builtin::BI__sync_fetch_and_xor_4
:
4956 case Builtin::BI__sync_fetch_and_xor_8
:
4957 case Builtin::BI__sync_fetch_and_xor_16
:
4958 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor
, E
);
4959 case Builtin::BI__sync_fetch_and_nand_1
:
4960 case Builtin::BI__sync_fetch_and_nand_2
:
4961 case Builtin::BI__sync_fetch_and_nand_4
:
4962 case Builtin::BI__sync_fetch_and_nand_8
:
4963 case Builtin::BI__sync_fetch_and_nand_16
:
4964 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand
, E
);
4966 // Clang extensions: not overloaded yet.
4967 case Builtin::BI__sync_fetch_and_min
:
4968 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min
, E
);
4969 case Builtin::BI__sync_fetch_and_max
:
4970 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max
, E
);
4971 case Builtin::BI__sync_fetch_and_umin
:
4972 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin
, E
);
4973 case Builtin::BI__sync_fetch_and_umax
:
4974 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax
, E
);
4976 case Builtin::BI__sync_add_and_fetch_1
:
4977 case Builtin::BI__sync_add_and_fetch_2
:
4978 case Builtin::BI__sync_add_and_fetch_4
:
4979 case Builtin::BI__sync_add_and_fetch_8
:
4980 case Builtin::BI__sync_add_and_fetch_16
:
4981 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add
, E
,
4982 llvm::Instruction::Add
);
4983 case Builtin::BI__sync_sub_and_fetch_1
:
4984 case Builtin::BI__sync_sub_and_fetch_2
:
4985 case Builtin::BI__sync_sub_and_fetch_4
:
4986 case Builtin::BI__sync_sub_and_fetch_8
:
4987 case Builtin::BI__sync_sub_and_fetch_16
:
4988 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub
, E
,
4989 llvm::Instruction::Sub
);
4990 case Builtin::BI__sync_and_and_fetch_1
:
4991 case Builtin::BI__sync_and_and_fetch_2
:
4992 case Builtin::BI__sync_and_and_fetch_4
:
4993 case Builtin::BI__sync_and_and_fetch_8
:
4994 case Builtin::BI__sync_and_and_fetch_16
:
4995 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And
, E
,
4996 llvm::Instruction::And
);
4997 case Builtin::BI__sync_or_and_fetch_1
:
4998 case Builtin::BI__sync_or_and_fetch_2
:
4999 case Builtin::BI__sync_or_and_fetch_4
:
5000 case Builtin::BI__sync_or_and_fetch_8
:
5001 case Builtin::BI__sync_or_and_fetch_16
:
5002 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or
, E
,
5003 llvm::Instruction::Or
);
5004 case Builtin::BI__sync_xor_and_fetch_1
:
5005 case Builtin::BI__sync_xor_and_fetch_2
:
5006 case Builtin::BI__sync_xor_and_fetch_4
:
5007 case Builtin::BI__sync_xor_and_fetch_8
:
5008 case Builtin::BI__sync_xor_and_fetch_16
:
5009 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor
, E
,
5010 llvm::Instruction::Xor
);
5011 case Builtin::BI__sync_nand_and_fetch_1
:
5012 case Builtin::BI__sync_nand_and_fetch_2
:
5013 case Builtin::BI__sync_nand_and_fetch_4
:
5014 case Builtin::BI__sync_nand_and_fetch_8
:
5015 case Builtin::BI__sync_nand_and_fetch_16
:
5016 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand
, E
,
5017 llvm::Instruction::And
, true);
5019 case Builtin::BI__sync_val_compare_and_swap_1
:
5020 case Builtin::BI__sync_val_compare_and_swap_2
:
5021 case Builtin::BI__sync_val_compare_and_swap_4
:
5022 case Builtin::BI__sync_val_compare_and_swap_8
:
5023 case Builtin::BI__sync_val_compare_and_swap_16
:
5024 return RValue::get(MakeAtomicCmpXchgValue(*this, E
, false));
5026 case Builtin::BI__sync_bool_compare_and_swap_1
:
5027 case Builtin::BI__sync_bool_compare_and_swap_2
:
5028 case Builtin::BI__sync_bool_compare_and_swap_4
:
5029 case Builtin::BI__sync_bool_compare_and_swap_8
:
5030 case Builtin::BI__sync_bool_compare_and_swap_16
:
5031 return RValue::get(MakeAtomicCmpXchgValue(*this, E
, true));
5033 case Builtin::BI__sync_swap_1
:
5034 case Builtin::BI__sync_swap_2
:
5035 case Builtin::BI__sync_swap_4
:
5036 case Builtin::BI__sync_swap_8
:
5037 case Builtin::BI__sync_swap_16
:
5038 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg
, E
);
5040 case Builtin::BI__sync_lock_test_and_set_1
:
5041 case Builtin::BI__sync_lock_test_and_set_2
:
5042 case Builtin::BI__sync_lock_test_and_set_4
:
5043 case Builtin::BI__sync_lock_test_and_set_8
:
5044 case Builtin::BI__sync_lock_test_and_set_16
:
5045 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg
, E
);
5047 case Builtin::BI__sync_lock_release_1
:
5048 case Builtin::BI__sync_lock_release_2
:
5049 case Builtin::BI__sync_lock_release_4
:
5050 case Builtin::BI__sync_lock_release_8
:
5051 case Builtin::BI__sync_lock_release_16
: {
5052 Address Ptr
= CheckAtomicAlignment(*this, E
);
5053 QualType ElTy
= E
->getArg(0)->getType()->getPointeeType();
5055 llvm::Type
*ITy
= llvm::IntegerType::get(getLLVMContext(),
5056 getContext().getTypeSize(ElTy
));
5057 llvm::StoreInst
*Store
=
5058 Builder
.CreateStore(llvm::Constant::getNullValue(ITy
), Ptr
);
5059 Store
->setAtomic(llvm::AtomicOrdering::Release
);
5060 return RValue::get(nullptr);
5063 case Builtin::BI__sync_synchronize
: {
5064 // We assume this is supposed to correspond to a C++0x-style
5065 // sequentially-consistent fence (i.e. this is only usable for
5066 // synchronization, not device I/O or anything like that). This intrinsic
5067 // is really badly designed in the sense that in theory, there isn't
5068 // any way to safely use it... but in practice, it mostly works
5069 // to use it with non-atomic loads and stores to get acquire/release
5071 Builder
.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent
);
5072 return RValue::get(nullptr);
5075 case Builtin::BI__builtin_nontemporal_load
:
5076 return RValue::get(EmitNontemporalLoad(*this, E
));
5077 case Builtin::BI__builtin_nontemporal_store
:
5078 return RValue::get(EmitNontemporalStore(*this, E
));
5079 case Builtin::BI__c11_atomic_is_lock_free
:
5080 case Builtin::BI__atomic_is_lock_free
: {
5081 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
5082 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
5083 // _Atomic(T) is always properly-aligned.
5084 const char *LibCallName
= "__atomic_is_lock_free";
5086 Args
.add(RValue::get(EmitScalarExpr(E
->getArg(0))),
5087 getContext().getSizeType());
5088 if (BuiltinID
== Builtin::BI__atomic_is_lock_free
)
5089 Args
.add(RValue::get(EmitScalarExpr(E
->getArg(1))),
5090 getContext().VoidPtrTy
);
5092 Args
.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy
)),
5093 getContext().VoidPtrTy
);
5094 const CGFunctionInfo
&FuncInfo
=
5095 CGM
.getTypes().arrangeBuiltinFunctionCall(E
->getType(), Args
);
5096 llvm::FunctionType
*FTy
= CGM
.getTypes().GetFunctionType(FuncInfo
);
5097 llvm::FunctionCallee Func
= CGM
.CreateRuntimeFunction(FTy
, LibCallName
);
5098 return EmitCall(FuncInfo
, CGCallee::forDirect(Func
),
5099 ReturnValueSlot(), Args
);
5102 case Builtin::BI__atomic_test_and_set
: {
5103 // Look at the argument type to determine whether this is a volatile
5104 // operation. The parameter type is always volatile.
5105 QualType PtrTy
= E
->getArg(0)->IgnoreImpCasts()->getType();
5107 PtrTy
->castAs
<PointerType
>()->getPointeeType().isVolatileQualified();
5110 EmitPointerWithAlignment(E
->getArg(0)).withElementType(Int8Ty
);
5112 Value
*NewVal
= Builder
.getInt8(1);
5113 Value
*Order
= EmitScalarExpr(E
->getArg(1));
5114 if (isa
<llvm::ConstantInt
>(Order
)) {
5115 int ord
= cast
<llvm::ConstantInt
>(Order
)->getZExtValue();
5116 AtomicRMWInst
*Result
= nullptr;
5118 case 0: // memory_order_relaxed
5119 default: // invalid order
5120 Result
= Builder
.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg
, Ptr
, NewVal
,
5121 llvm::AtomicOrdering::Monotonic
);
5123 case 1: // memory_order_consume
5124 case 2: // memory_order_acquire
5125 Result
= Builder
.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg
, Ptr
, NewVal
,
5126 llvm::AtomicOrdering::Acquire
);
5128 case 3: // memory_order_release
5129 Result
= Builder
.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg
, Ptr
, NewVal
,
5130 llvm::AtomicOrdering::Release
);
5132 case 4: // memory_order_acq_rel
5134 Result
= Builder
.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg
, Ptr
, NewVal
,
5135 llvm::AtomicOrdering::AcquireRelease
);
5137 case 5: // memory_order_seq_cst
5138 Result
= Builder
.CreateAtomicRMW(
5139 llvm::AtomicRMWInst::Xchg
, Ptr
, NewVal
,
5140 llvm::AtomicOrdering::SequentiallyConsistent
);
5143 Result
->setVolatile(Volatile
);
5144 return RValue::get(Builder
.CreateIsNotNull(Result
, "tobool"));
5147 llvm::BasicBlock
*ContBB
= createBasicBlock("atomic.continue", CurFn
);
5149 llvm::BasicBlock
*BBs
[5] = {
5150 createBasicBlock("monotonic", CurFn
),
5151 createBasicBlock("acquire", CurFn
),
5152 createBasicBlock("release", CurFn
),
5153 createBasicBlock("acqrel", CurFn
),
5154 createBasicBlock("seqcst", CurFn
)
5156 llvm::AtomicOrdering Orders
[5] = {
5157 llvm::AtomicOrdering::Monotonic
, llvm::AtomicOrdering::Acquire
,
5158 llvm::AtomicOrdering::Release
, llvm::AtomicOrdering::AcquireRelease
,
5159 llvm::AtomicOrdering::SequentiallyConsistent
};
5161 Order
= Builder
.CreateIntCast(Order
, Builder
.getInt32Ty(), false);
5162 llvm::SwitchInst
*SI
= Builder
.CreateSwitch(Order
, BBs
[0]);
5164 Builder
.SetInsertPoint(ContBB
);
5165 PHINode
*Result
= Builder
.CreatePHI(Int8Ty
, 5, "was_set");
5167 for (unsigned i
= 0; i
< 5; ++i
) {
5168 Builder
.SetInsertPoint(BBs
[i
]);
5169 AtomicRMWInst
*RMW
= Builder
.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg
,
5170 Ptr
, NewVal
, Orders
[i
]);
5171 RMW
->setVolatile(Volatile
);
5172 Result
->addIncoming(RMW
, BBs
[i
]);
5173 Builder
.CreateBr(ContBB
);
5176 SI
->addCase(Builder
.getInt32(0), BBs
[0]);
5177 SI
->addCase(Builder
.getInt32(1), BBs
[1]);
5178 SI
->addCase(Builder
.getInt32(2), BBs
[1]);
5179 SI
->addCase(Builder
.getInt32(3), BBs
[2]);
5180 SI
->addCase(Builder
.getInt32(4), BBs
[3]);
5181 SI
->addCase(Builder
.getInt32(5), BBs
[4]);
5183 Builder
.SetInsertPoint(ContBB
);
5184 return RValue::get(Builder
.CreateIsNotNull(Result
, "tobool"));
5187 case Builtin::BI__atomic_clear
: {
5188 QualType PtrTy
= E
->getArg(0)->IgnoreImpCasts()->getType();
5190 PtrTy
->castAs
<PointerType
>()->getPointeeType().isVolatileQualified();
5192 Address Ptr
= EmitPointerWithAlignment(E
->getArg(0));
5193 Ptr
= Ptr
.withElementType(Int8Ty
);
5194 Value
*NewVal
= Builder
.getInt8(0);
5195 Value
*Order
= EmitScalarExpr(E
->getArg(1));
5196 if (isa
<llvm::ConstantInt
>(Order
)) {
5197 int ord
= cast
<llvm::ConstantInt
>(Order
)->getZExtValue();
5198 StoreInst
*Store
= Builder
.CreateStore(NewVal
, Ptr
, Volatile
);
5200 case 0: // memory_order_relaxed
5201 default: // invalid order
5202 Store
->setOrdering(llvm::AtomicOrdering::Monotonic
);
5204 case 3: // memory_order_release
5205 Store
->setOrdering(llvm::AtomicOrdering::Release
);
5207 case 5: // memory_order_seq_cst
5208 Store
->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent
);
5211 return RValue::get(nullptr);
5214 llvm::BasicBlock
*ContBB
= createBasicBlock("atomic.continue", CurFn
);
5216 llvm::BasicBlock
*BBs
[3] = {
5217 createBasicBlock("monotonic", CurFn
),
5218 createBasicBlock("release", CurFn
),
5219 createBasicBlock("seqcst", CurFn
)
5221 llvm::AtomicOrdering Orders
[3] = {
5222 llvm::AtomicOrdering::Monotonic
, llvm::AtomicOrdering::Release
,
5223 llvm::AtomicOrdering::SequentiallyConsistent
};
5225 Order
= Builder
.CreateIntCast(Order
, Builder
.getInt32Ty(), false);
5226 llvm::SwitchInst
*SI
= Builder
.CreateSwitch(Order
, BBs
[0]);
5228 for (unsigned i
= 0; i
< 3; ++i
) {
5229 Builder
.SetInsertPoint(BBs
[i
]);
5230 StoreInst
*Store
= Builder
.CreateStore(NewVal
, Ptr
, Volatile
);
5231 Store
->setOrdering(Orders
[i
]);
5232 Builder
.CreateBr(ContBB
);
5235 SI
->addCase(Builder
.getInt32(0), BBs
[0]);
5236 SI
->addCase(Builder
.getInt32(3), BBs
[1]);
5237 SI
->addCase(Builder
.getInt32(5), BBs
[2]);
5239 Builder
.SetInsertPoint(ContBB
);
5240 return RValue::get(nullptr);
5243 case Builtin::BI__atomic_thread_fence
:
5244 case Builtin::BI__atomic_signal_fence
:
5245 case Builtin::BI__c11_atomic_thread_fence
:
5246 case Builtin::BI__c11_atomic_signal_fence
: {
5247 llvm::SyncScope::ID SSID
;
5248 if (BuiltinID
== Builtin::BI__atomic_signal_fence
||
5249 BuiltinID
== Builtin::BI__c11_atomic_signal_fence
)
5250 SSID
= llvm::SyncScope::SingleThread
;
5252 SSID
= llvm::SyncScope::System
;
5253 Value
*Order
= EmitScalarExpr(E
->getArg(0));
5254 if (isa
<llvm::ConstantInt
>(Order
)) {
5255 int ord
= cast
<llvm::ConstantInt
>(Order
)->getZExtValue();
5257 case 0: // memory_order_relaxed
5258 default: // invalid order
5260 case 1: // memory_order_consume
5261 case 2: // memory_order_acquire
5262 Builder
.CreateFence(llvm::AtomicOrdering::Acquire
, SSID
);
5264 case 3: // memory_order_release
5265 Builder
.CreateFence(llvm::AtomicOrdering::Release
, SSID
);
5267 case 4: // memory_order_acq_rel
5268 Builder
.CreateFence(llvm::AtomicOrdering::AcquireRelease
, SSID
);
5270 case 5: // memory_order_seq_cst
5271 Builder
.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent
, SSID
);
5274 return RValue::get(nullptr);
5277 llvm::BasicBlock
*AcquireBB
, *ReleaseBB
, *AcqRelBB
, *SeqCstBB
;
5278 AcquireBB
= createBasicBlock("acquire", CurFn
);
5279 ReleaseBB
= createBasicBlock("release", CurFn
);
5280 AcqRelBB
= createBasicBlock("acqrel", CurFn
);
5281 SeqCstBB
= createBasicBlock("seqcst", CurFn
);
5282 llvm::BasicBlock
*ContBB
= createBasicBlock("atomic.continue", CurFn
);
5284 Order
= Builder
.CreateIntCast(Order
, Builder
.getInt32Ty(), false);
5285 llvm::SwitchInst
*SI
= Builder
.CreateSwitch(Order
, ContBB
);
5287 Builder
.SetInsertPoint(AcquireBB
);
5288 Builder
.CreateFence(llvm::AtomicOrdering::Acquire
, SSID
);
5289 Builder
.CreateBr(ContBB
);
5290 SI
->addCase(Builder
.getInt32(1), AcquireBB
);
5291 SI
->addCase(Builder
.getInt32(2), AcquireBB
);
5293 Builder
.SetInsertPoint(ReleaseBB
);
5294 Builder
.CreateFence(llvm::AtomicOrdering::Release
, SSID
);
5295 Builder
.CreateBr(ContBB
);
5296 SI
->addCase(Builder
.getInt32(3), ReleaseBB
);
5298 Builder
.SetInsertPoint(AcqRelBB
);
5299 Builder
.CreateFence(llvm::AtomicOrdering::AcquireRelease
, SSID
);
5300 Builder
.CreateBr(ContBB
);
5301 SI
->addCase(Builder
.getInt32(4), AcqRelBB
);
5303 Builder
.SetInsertPoint(SeqCstBB
);
5304 Builder
.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent
, SSID
);
5305 Builder
.CreateBr(ContBB
);
5306 SI
->addCase(Builder
.getInt32(5), SeqCstBB
);
5308 Builder
.SetInsertPoint(ContBB
);
5309 return RValue::get(nullptr);
5311 case Builtin::BI__scoped_atomic_thread_fence
: {
5312 auto ScopeModel
= AtomicScopeModel::create(AtomicScopeModelKind::Generic
);
5314 Value
*Order
= EmitScalarExpr(E
->getArg(0));
5315 Value
*Scope
= EmitScalarExpr(E
->getArg(1));
5316 auto Ord
= dyn_cast
<llvm::ConstantInt
>(Order
);
5317 auto Scp
= dyn_cast
<llvm::ConstantInt
>(Scope
);
5319 SyncScope SS
= ScopeModel
->isValid(Scp
->getZExtValue())
5320 ? ScopeModel
->map(Scp
->getZExtValue())
5321 : ScopeModel
->map(ScopeModel
->getFallBackValue());
5322 switch (Ord
->getZExtValue()) {
5323 case 0: // memory_order_relaxed
5324 default: // invalid order
5326 case 1: // memory_order_consume
5327 case 2: // memory_order_acquire
5328 Builder
.CreateFence(
5329 llvm::AtomicOrdering::Acquire
,
5330 getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS
,
5331 llvm::AtomicOrdering::Acquire
,
5334 case 3: // memory_order_release
5335 Builder
.CreateFence(
5336 llvm::AtomicOrdering::Release
,
5337 getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS
,
5338 llvm::AtomicOrdering::Release
,
5341 case 4: // memory_order_acq_rel
5342 Builder
.CreateFence(llvm::AtomicOrdering::AcquireRelease
,
5343 getTargetHooks().getLLVMSyncScopeID(
5345 llvm::AtomicOrdering::AcquireRelease
,
5348 case 5: // memory_order_seq_cst
5349 Builder
.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent
,
5350 getTargetHooks().getLLVMSyncScopeID(
5352 llvm::AtomicOrdering::SequentiallyConsistent
,
5356 return RValue::get(nullptr);
5359 llvm::BasicBlock
*ContBB
= createBasicBlock("atomic.scope.continue", CurFn
);
5361 llvm::SmallVector
<std::pair
<llvm::BasicBlock
*, llvm::AtomicOrdering
>>
5364 switch (Ord
->getZExtValue()) {
5365 case 0: // memory_order_relaxed
5366 default: // invalid order
5367 ContBB
->eraseFromParent();
5368 return RValue::get(nullptr);
5369 case 1: // memory_order_consume
5370 case 2: // memory_order_acquire
5371 OrderBBs
.emplace_back(Builder
.GetInsertBlock(),
5372 llvm::AtomicOrdering::Acquire
);
5374 case 3: // memory_order_release
5375 OrderBBs
.emplace_back(Builder
.GetInsertBlock(),
5376 llvm::AtomicOrdering::Release
);
5378 case 4: // memory_order_acq_rel
5379 OrderBBs
.emplace_back(Builder
.GetInsertBlock(),
5380 llvm::AtomicOrdering::AcquireRelease
);
5382 case 5: // memory_order_seq_cst
5383 OrderBBs
.emplace_back(Builder
.GetInsertBlock(),
5384 llvm::AtomicOrdering::SequentiallyConsistent
);
5388 llvm::BasicBlock
*AcquireBB
= createBasicBlock("acquire", CurFn
);
5389 llvm::BasicBlock
*ReleaseBB
= createBasicBlock("release", CurFn
);
5390 llvm::BasicBlock
*AcqRelBB
= createBasicBlock("acqrel", CurFn
);
5391 llvm::BasicBlock
*SeqCstBB
= createBasicBlock("seqcst", CurFn
);
5393 Order
= Builder
.CreateIntCast(Order
, Builder
.getInt32Ty(), false);
5394 llvm::SwitchInst
*SI
= Builder
.CreateSwitch(Order
, ContBB
);
5395 SI
->addCase(Builder
.getInt32(1), AcquireBB
);
5396 SI
->addCase(Builder
.getInt32(2), AcquireBB
);
5397 SI
->addCase(Builder
.getInt32(3), ReleaseBB
);
5398 SI
->addCase(Builder
.getInt32(4), AcqRelBB
);
5399 SI
->addCase(Builder
.getInt32(5), SeqCstBB
);
5401 OrderBBs
.emplace_back(AcquireBB
, llvm::AtomicOrdering::Acquire
);
5402 OrderBBs
.emplace_back(ReleaseBB
, llvm::AtomicOrdering::Release
);
5403 OrderBBs
.emplace_back(AcqRelBB
, llvm::AtomicOrdering::AcquireRelease
);
5404 OrderBBs
.emplace_back(SeqCstBB
,
5405 llvm::AtomicOrdering::SequentiallyConsistent
);
5408 for (auto &[OrderBB
, Ordering
] : OrderBBs
) {
5409 Builder
.SetInsertPoint(OrderBB
);
5411 SyncScope SS
= ScopeModel
->isValid(Scp
->getZExtValue())
5412 ? ScopeModel
->map(Scp
->getZExtValue())
5413 : ScopeModel
->map(ScopeModel
->getFallBackValue());
5414 Builder
.CreateFence(Ordering
,
5415 getTargetHooks().getLLVMSyncScopeID(
5416 getLangOpts(), SS
, Ordering
, getLLVMContext()));
5417 Builder
.CreateBr(ContBB
);
5419 llvm::DenseMap
<unsigned, llvm::BasicBlock
*> BBs
;
5420 for (unsigned Scp
: ScopeModel
->getRuntimeValues())
5421 BBs
[Scp
] = createBasicBlock(getAsString(ScopeModel
->map(Scp
)), CurFn
);
5423 auto *SC
= Builder
.CreateIntCast(Scope
, Builder
.getInt32Ty(), false);
5424 llvm::SwitchInst
*SI
= Builder
.CreateSwitch(SC
, ContBB
);
5425 for (unsigned Scp
: ScopeModel
->getRuntimeValues()) {
5427 SI
->addCase(Builder
.getInt32(Scp
), B
);
5429 Builder
.SetInsertPoint(B
);
5430 Builder
.CreateFence(Ordering
, getTargetHooks().getLLVMSyncScopeID(
5431 getLangOpts(), ScopeModel
->map(Scp
),
5432 Ordering
, getLLVMContext()));
5433 Builder
.CreateBr(ContBB
);
5438 Builder
.SetInsertPoint(ContBB
);
5439 return RValue::get(nullptr);
5442 case Builtin::BI__builtin_signbit
:
5443 case Builtin::BI__builtin_signbitf
:
5444 case Builtin::BI__builtin_signbitl
: {
5446 Builder
.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E
->getArg(0))),
5447 ConvertType(E
->getType())));
5449 case Builtin::BI__warn_memset_zero_len
:
5450 return RValue::getIgnored();
5451 case Builtin::BI__annotation
: {
5452 // Re-encode each wide string to UTF8 and make an MDString.
5453 SmallVector
<Metadata
*, 1> Strings
;
5454 for (const Expr
*Arg
: E
->arguments()) {
5455 const auto *Str
= cast
<StringLiteral
>(Arg
->IgnoreParenCasts());
5456 assert(Str
->getCharByteWidth() == 2);
5457 StringRef WideBytes
= Str
->getBytes();
5458 std::string StrUtf8
;
5459 if (!convertUTF16ToUTF8String(
5460 ArrayRef(WideBytes
.data(), WideBytes
.size()), StrUtf8
)) {
5461 CGM
.ErrorUnsupported(E
, "non-UTF16 __annotation argument");
5464 Strings
.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8
));
5467 // Build and MDTuple of MDStrings and emit the intrinsic call.
5469 CGM
.getIntrinsic(llvm::Intrinsic::codeview_annotation
, {});
5470 MDTuple
*StrTuple
= MDTuple::get(getLLVMContext(), Strings
);
5471 Builder
.CreateCall(F
, MetadataAsValue::get(getLLVMContext(), StrTuple
));
5472 return RValue::getIgnored();
5474 case Builtin::BI__builtin_annotation
: {
5475 llvm::Value
*AnnVal
= EmitScalarExpr(E
->getArg(0));
5477 CGM
.getIntrinsic(llvm::Intrinsic::annotation
,
5478 {AnnVal
->getType(), CGM
.ConstGlobalsPtrTy
});
5480 // Get the annotation string, go through casts. Sema requires this to be a
5481 // non-wide string literal, potentially casted, so the cast<> is safe.
5482 const Expr
*AnnotationStrExpr
= E
->getArg(1)->IgnoreParenCasts();
5483 StringRef Str
= cast
<StringLiteral
>(AnnotationStrExpr
)->getString();
5485 EmitAnnotationCall(F
, AnnVal
, Str
, E
->getExprLoc(), nullptr));
5487 case Builtin::BI__builtin_addcb
:
5488 case Builtin::BI__builtin_addcs
:
5489 case Builtin::BI__builtin_addc
:
5490 case Builtin::BI__builtin_addcl
:
5491 case Builtin::BI__builtin_addcll
:
5492 case Builtin::BI__builtin_subcb
:
5493 case Builtin::BI__builtin_subcs
:
5494 case Builtin::BI__builtin_subc
:
5495 case Builtin::BI__builtin_subcl
:
5496 case Builtin::BI__builtin_subcll
: {
5498 // We translate all of these builtins from expressions of the form:
5499 // int x = ..., y = ..., carryin = ..., carryout, result;
5500 // result = __builtin_addc(x, y, carryin, &carryout);
5502 // to LLVM IR of the form:
5504 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
5505 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
5506 // %carry1 = extractvalue {i32, i1} %tmp1, 1
5507 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
5509 // %result = extractvalue {i32, i1} %tmp2, 0
5510 // %carry2 = extractvalue {i32, i1} %tmp2, 1
5511 // %tmp3 = or i1 %carry1, %carry2
5512 // %tmp4 = zext i1 %tmp3 to i32
5513 // store i32 %tmp4, i32* %carryout
5515 // Scalarize our inputs.
5516 llvm::Value
*X
= EmitScalarExpr(E
->getArg(0));
5517 llvm::Value
*Y
= EmitScalarExpr(E
->getArg(1));
5518 llvm::Value
*Carryin
= EmitScalarExpr(E
->getArg(2));
5519 Address CarryOutPtr
= EmitPointerWithAlignment(E
->getArg(3));
5521 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
5522 llvm::Intrinsic::ID IntrinsicId
;
5523 switch (BuiltinID
) {
5524 default: llvm_unreachable("Unknown multiprecision builtin id.");
5525 case Builtin::BI__builtin_addcb
:
5526 case Builtin::BI__builtin_addcs
:
5527 case Builtin::BI__builtin_addc
:
5528 case Builtin::BI__builtin_addcl
:
5529 case Builtin::BI__builtin_addcll
:
5530 IntrinsicId
= llvm::Intrinsic::uadd_with_overflow
;
5532 case Builtin::BI__builtin_subcb
:
5533 case Builtin::BI__builtin_subcs
:
5534 case Builtin::BI__builtin_subc
:
5535 case Builtin::BI__builtin_subcl
:
5536 case Builtin::BI__builtin_subcll
:
5537 IntrinsicId
= llvm::Intrinsic::usub_with_overflow
;
5541 // Construct our resulting LLVM IR expression.
5542 llvm::Value
*Carry1
;
5543 llvm::Value
*Sum1
= EmitOverflowIntrinsic(*this, IntrinsicId
,
5545 llvm::Value
*Carry2
;
5546 llvm::Value
*Sum2
= EmitOverflowIntrinsic(*this, IntrinsicId
,
5547 Sum1
, Carryin
, Carry2
);
5548 llvm::Value
*CarryOut
= Builder
.CreateZExt(Builder
.CreateOr(Carry1
, Carry2
),
5550 Builder
.CreateStore(CarryOut
, CarryOutPtr
);
5551 return RValue::get(Sum2
);
5554 case Builtin::BI__builtin_add_overflow
:
5555 case Builtin::BI__builtin_sub_overflow
:
5556 case Builtin::BI__builtin_mul_overflow
: {
5557 const clang::Expr
*LeftArg
= E
->getArg(0);
5558 const clang::Expr
*RightArg
= E
->getArg(1);
5559 const clang::Expr
*ResultArg
= E
->getArg(2);
5561 clang::QualType ResultQTy
=
5562 ResultArg
->getType()->castAs
<PointerType
>()->getPointeeType();
5564 WidthAndSignedness LeftInfo
=
5565 getIntegerWidthAndSignedness(CGM
.getContext(), LeftArg
->getType());
5566 WidthAndSignedness RightInfo
=
5567 getIntegerWidthAndSignedness(CGM
.getContext(), RightArg
->getType());
5568 WidthAndSignedness ResultInfo
=
5569 getIntegerWidthAndSignedness(CGM
.getContext(), ResultQTy
);
5571 // Handle mixed-sign multiplication as a special case, because adding
5572 // runtime or backend support for our generic irgen would be too expensive.
5573 if (isSpecialMixedSignMultiply(BuiltinID
, LeftInfo
, RightInfo
, ResultInfo
))
5574 return EmitCheckedMixedSignMultiply(*this, LeftArg
, LeftInfo
, RightArg
,
5575 RightInfo
, ResultArg
, ResultQTy
,
5578 if (isSpecialUnsignedMultiplySignedResult(BuiltinID
, LeftInfo
, RightInfo
,
5580 return EmitCheckedUnsignedMultiplySignedResult(
5581 *this, LeftArg
, LeftInfo
, RightArg
, RightInfo
, ResultArg
, ResultQTy
,
5584 WidthAndSignedness EncompassingInfo
=
5585 EncompassingIntegerType({LeftInfo
, RightInfo
, ResultInfo
});
5587 llvm::Type
*EncompassingLLVMTy
=
5588 llvm::IntegerType::get(CGM
.getLLVMContext(), EncompassingInfo
.Width
);
5590 llvm::Type
*ResultLLVMTy
= CGM
.getTypes().ConvertType(ResultQTy
);
5592 llvm::Intrinsic::ID IntrinsicId
;
5593 switch (BuiltinID
) {
5595 llvm_unreachable("Unknown overflow builtin id.");
5596 case Builtin::BI__builtin_add_overflow
:
5597 IntrinsicId
= EncompassingInfo
.Signed
5598 ? llvm::Intrinsic::sadd_with_overflow
5599 : llvm::Intrinsic::uadd_with_overflow
;
5601 case Builtin::BI__builtin_sub_overflow
:
5602 IntrinsicId
= EncompassingInfo
.Signed
5603 ? llvm::Intrinsic::ssub_with_overflow
5604 : llvm::Intrinsic::usub_with_overflow
;
5606 case Builtin::BI__builtin_mul_overflow
:
5607 IntrinsicId
= EncompassingInfo
.Signed
5608 ? llvm::Intrinsic::smul_with_overflow
5609 : llvm::Intrinsic::umul_with_overflow
;
5613 llvm::Value
*Left
= EmitScalarExpr(LeftArg
);
5614 llvm::Value
*Right
= EmitScalarExpr(RightArg
);
5615 Address ResultPtr
= EmitPointerWithAlignment(ResultArg
);
5617 // Extend each operand to the encompassing type.
5618 Left
= Builder
.CreateIntCast(Left
, EncompassingLLVMTy
, LeftInfo
.Signed
);
5619 Right
= Builder
.CreateIntCast(Right
, EncompassingLLVMTy
, RightInfo
.Signed
);
5621 // Perform the operation on the extended values.
5622 llvm::Value
*Overflow
, *Result
;
5623 Result
= EmitOverflowIntrinsic(*this, IntrinsicId
, Left
, Right
, Overflow
);
5625 if (EncompassingInfo
.Width
> ResultInfo
.Width
) {
5626 // The encompassing type is wider than the result type, so we need to
5628 llvm::Value
*ResultTrunc
= Builder
.CreateTrunc(Result
, ResultLLVMTy
);
5630 // To see if the truncation caused an overflow, we will extend
5631 // the result and then compare it to the original result.
5632 llvm::Value
*ResultTruncExt
= Builder
.CreateIntCast(
5633 ResultTrunc
, EncompassingLLVMTy
, ResultInfo
.Signed
);
5634 llvm::Value
*TruncationOverflow
=
5635 Builder
.CreateICmpNE(Result
, ResultTruncExt
);
5637 Overflow
= Builder
.CreateOr(Overflow
, TruncationOverflow
);
5638 Result
= ResultTrunc
;
5641 // Finally, store the result using the pointer.
5643 ResultArg
->getType()->getPointeeType().isVolatileQualified();
5644 Builder
.CreateStore(EmitToMemory(Result
, ResultQTy
), ResultPtr
, isVolatile
);
5646 return RValue::get(Overflow
);
5649 case Builtin::BI__builtin_uadd_overflow
:
5650 case Builtin::BI__builtin_uaddl_overflow
:
5651 case Builtin::BI__builtin_uaddll_overflow
:
5652 case Builtin::BI__builtin_usub_overflow
:
5653 case Builtin::BI__builtin_usubl_overflow
:
5654 case Builtin::BI__builtin_usubll_overflow
:
5655 case Builtin::BI__builtin_umul_overflow
:
5656 case Builtin::BI__builtin_umull_overflow
:
5657 case Builtin::BI__builtin_umulll_overflow
:
5658 case Builtin::BI__builtin_sadd_overflow
:
5659 case Builtin::BI__builtin_saddl_overflow
:
5660 case Builtin::BI__builtin_saddll_overflow
:
5661 case Builtin::BI__builtin_ssub_overflow
:
5662 case Builtin::BI__builtin_ssubl_overflow
:
5663 case Builtin::BI__builtin_ssubll_overflow
:
5664 case Builtin::BI__builtin_smul_overflow
:
5665 case Builtin::BI__builtin_smull_overflow
:
5666 case Builtin::BI__builtin_smulll_overflow
: {
5668 // We translate all of these builtins directly to the relevant llvm IR node.
5670 // Scalarize our inputs.
5671 llvm::Value
*X
= EmitScalarExpr(E
->getArg(0));
5672 llvm::Value
*Y
= EmitScalarExpr(E
->getArg(1));
5673 Address SumOutPtr
= EmitPointerWithAlignment(E
->getArg(2));
5675 // Decide which of the overflow intrinsics we are lowering to:
5676 llvm::Intrinsic::ID IntrinsicId
;
5677 switch (BuiltinID
) {
5678 default: llvm_unreachable("Unknown overflow builtin id.");
5679 case Builtin::BI__builtin_uadd_overflow
:
5680 case Builtin::BI__builtin_uaddl_overflow
:
5681 case Builtin::BI__builtin_uaddll_overflow
:
5682 IntrinsicId
= llvm::Intrinsic::uadd_with_overflow
;
5684 case Builtin::BI__builtin_usub_overflow
:
5685 case Builtin::BI__builtin_usubl_overflow
:
5686 case Builtin::BI__builtin_usubll_overflow
:
5687 IntrinsicId
= llvm::Intrinsic::usub_with_overflow
;
5689 case Builtin::BI__builtin_umul_overflow
:
5690 case Builtin::BI__builtin_umull_overflow
:
5691 case Builtin::BI__builtin_umulll_overflow
:
5692 IntrinsicId
= llvm::Intrinsic::umul_with_overflow
;
5694 case Builtin::BI__builtin_sadd_overflow
:
5695 case Builtin::BI__builtin_saddl_overflow
:
5696 case Builtin::BI__builtin_saddll_overflow
:
5697 IntrinsicId
= llvm::Intrinsic::sadd_with_overflow
;
5699 case Builtin::BI__builtin_ssub_overflow
:
5700 case Builtin::BI__builtin_ssubl_overflow
:
5701 case Builtin::BI__builtin_ssubll_overflow
:
5702 IntrinsicId
= llvm::Intrinsic::ssub_with_overflow
;
5704 case Builtin::BI__builtin_smul_overflow
:
5705 case Builtin::BI__builtin_smull_overflow
:
5706 case Builtin::BI__builtin_smulll_overflow
:
5707 IntrinsicId
= llvm::Intrinsic::smul_with_overflow
;
5713 llvm::Value
*Sum
= EmitOverflowIntrinsic(*this, IntrinsicId
, X
, Y
, Carry
);
5714 Builder
.CreateStore(Sum
, SumOutPtr
);
5716 return RValue::get(Carry
);
5718 case Builtin::BIaddressof
:
5719 case Builtin::BI__addressof
:
5720 case Builtin::BI__builtin_addressof
:
5721 return RValue::get(EmitLValue(E
->getArg(0)).getPointer(*this));
5722 case Builtin::BI__builtin_function_start
:
5723 return RValue::get(CGM
.GetFunctionStart(
5724 E
->getArg(0)->getAsBuiltinConstantDeclRef(CGM
.getContext())));
5725 case Builtin::BI__builtin_operator_new
:
5726 return EmitBuiltinNewDeleteCall(
5727 E
->getCallee()->getType()->castAs
<FunctionProtoType
>(), E
, false);
5728 case Builtin::BI__builtin_operator_delete
:
5729 EmitBuiltinNewDeleteCall(
5730 E
->getCallee()->getType()->castAs
<FunctionProtoType
>(), E
, true);
5731 return RValue::get(nullptr);
5733 case Builtin::BI__builtin_is_aligned
:
5734 return EmitBuiltinIsAligned(E
);
5735 case Builtin::BI__builtin_align_up
:
5736 return EmitBuiltinAlignTo(E
, true);
5737 case Builtin::BI__builtin_align_down
:
5738 return EmitBuiltinAlignTo(E
, false);
5740 case Builtin::BI__noop
:
5741 // __noop always evaluates to an integer literal zero.
5742 return RValue::get(ConstantInt::get(IntTy
, 0));
5743 case Builtin::BI__builtin_call_with_static_chain
: {
5744 const CallExpr
*Call
= cast
<CallExpr
>(E
->getArg(0));
5745 const Expr
*Chain
= E
->getArg(1);
5746 return EmitCall(Call
->getCallee()->getType(),
5747 EmitCallee(Call
->getCallee()), Call
, ReturnValue
,
5748 EmitScalarExpr(Chain
));
5750 case Builtin::BI_InterlockedExchange8
:
5751 case Builtin::BI_InterlockedExchange16
:
5752 case Builtin::BI_InterlockedExchange
:
5753 case Builtin::BI_InterlockedExchangePointer
:
5755 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange
, E
));
5756 case Builtin::BI_InterlockedCompareExchangePointer
:
5758 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange
, E
));
5759 case Builtin::BI_InterlockedCompareExchangePointer_nf
:
5761 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf
, E
));
5762 case Builtin::BI_InterlockedCompareExchange8
:
5763 case Builtin::BI_InterlockedCompareExchange16
:
5764 case Builtin::BI_InterlockedCompareExchange
:
5765 case Builtin::BI_InterlockedCompareExchange64
:
5766 return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E
));
5767 case Builtin::BI_InterlockedIncrement16
:
5768 case Builtin::BI_InterlockedIncrement
:
5770 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement
, E
));
5771 case Builtin::BI_InterlockedDecrement16
:
5772 case Builtin::BI_InterlockedDecrement
:
5774 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement
, E
));
5775 case Builtin::BI_InterlockedAnd8
:
5776 case Builtin::BI_InterlockedAnd16
:
5777 case Builtin::BI_InterlockedAnd
:
5778 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd
, E
));
5779 case Builtin::BI_InterlockedExchangeAdd8
:
5780 case Builtin::BI_InterlockedExchangeAdd16
:
5781 case Builtin::BI_InterlockedExchangeAdd
:
5783 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd
, E
));
5784 case Builtin::BI_InterlockedExchangeSub8
:
5785 case Builtin::BI_InterlockedExchangeSub16
:
5786 case Builtin::BI_InterlockedExchangeSub
:
5788 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub
, E
));
5789 case Builtin::BI_InterlockedOr8
:
5790 case Builtin::BI_InterlockedOr16
:
5791 case Builtin::BI_InterlockedOr
:
5792 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr
, E
));
5793 case Builtin::BI_InterlockedXor8
:
5794 case Builtin::BI_InterlockedXor16
:
5795 case Builtin::BI_InterlockedXor
:
5796 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor
, E
));
5798 case Builtin::BI_bittest64
:
5799 case Builtin::BI_bittest
:
5800 case Builtin::BI_bittestandcomplement64
:
5801 case Builtin::BI_bittestandcomplement
:
5802 case Builtin::BI_bittestandreset64
:
5803 case Builtin::BI_bittestandreset
:
5804 case Builtin::BI_bittestandset64
:
5805 case Builtin::BI_bittestandset
:
5806 case Builtin::BI_interlockedbittestandreset
:
5807 case Builtin::BI_interlockedbittestandreset64
:
5808 case Builtin::BI_interlockedbittestandset64
:
5809 case Builtin::BI_interlockedbittestandset
:
5810 case Builtin::BI_interlockedbittestandset_acq
:
5811 case Builtin::BI_interlockedbittestandset_rel
:
5812 case Builtin::BI_interlockedbittestandset_nf
:
5813 case Builtin::BI_interlockedbittestandreset_acq
:
5814 case Builtin::BI_interlockedbittestandreset_rel
:
5815 case Builtin::BI_interlockedbittestandreset_nf
:
5816 return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID
, E
));
5818 // These builtins exist to emit regular volatile loads and stores not
5819 // affected by the -fms-volatile setting.
5820 case Builtin::BI__iso_volatile_load8
:
5821 case Builtin::BI__iso_volatile_load16
:
5822 case Builtin::BI__iso_volatile_load32
:
5823 case Builtin::BI__iso_volatile_load64
:
5824 return RValue::get(EmitISOVolatileLoad(*this, E
));
5825 case Builtin::BI__iso_volatile_store8
:
5826 case Builtin::BI__iso_volatile_store16
:
5827 case Builtin::BI__iso_volatile_store32
:
5828 case Builtin::BI__iso_volatile_store64
:
5829 return RValue::get(EmitISOVolatileStore(*this, E
));
5831 case Builtin::BI__builtin_ptrauth_sign_constant
:
5832 return RValue::get(ConstantEmitter(*this).emitAbstract(E
, E
->getType()));
5834 case Builtin::BI__builtin_ptrauth_auth
:
5835 case Builtin::BI__builtin_ptrauth_auth_and_resign
:
5836 case Builtin::BI__builtin_ptrauth_blend_discriminator
:
5837 case Builtin::BI__builtin_ptrauth_sign_generic_data
:
5838 case Builtin::BI__builtin_ptrauth_sign_unauthenticated
:
5839 case Builtin::BI__builtin_ptrauth_strip
: {
5840 // Emit the arguments.
5841 SmallVector
<llvm::Value
*, 5> Args
;
5842 for (auto argExpr
: E
->arguments())
5843 Args
.push_back(EmitScalarExpr(argExpr
));
5845 // Cast the value to intptr_t, saving its original type.
5846 llvm::Type
*OrigValueType
= Args
[0]->getType();
5847 if (OrigValueType
->isPointerTy())
5848 Args
[0] = Builder
.CreatePtrToInt(Args
[0], IntPtrTy
);
5850 switch (BuiltinID
) {
5851 case Builtin::BI__builtin_ptrauth_auth_and_resign
:
5852 if (Args
[4]->getType()->isPointerTy())
5853 Args
[4] = Builder
.CreatePtrToInt(Args
[4], IntPtrTy
);
5856 case Builtin::BI__builtin_ptrauth_auth
:
5857 case Builtin::BI__builtin_ptrauth_sign_unauthenticated
:
5858 if (Args
[2]->getType()->isPointerTy())
5859 Args
[2] = Builder
.CreatePtrToInt(Args
[2], IntPtrTy
);
5862 case Builtin::BI__builtin_ptrauth_sign_generic_data
:
5863 if (Args
[1]->getType()->isPointerTy())
5864 Args
[1] = Builder
.CreatePtrToInt(Args
[1], IntPtrTy
);
5867 case Builtin::BI__builtin_ptrauth_blend_discriminator
:
5868 case Builtin::BI__builtin_ptrauth_strip
:
5872 // Call the intrinsic.
5873 auto IntrinsicID
= [&]() -> unsigned {
5874 switch (BuiltinID
) {
5875 case Builtin::BI__builtin_ptrauth_auth
:
5876 return llvm::Intrinsic::ptrauth_auth
;
5877 case Builtin::BI__builtin_ptrauth_auth_and_resign
:
5878 return llvm::Intrinsic::ptrauth_resign
;
5879 case Builtin::BI__builtin_ptrauth_blend_discriminator
:
5880 return llvm::Intrinsic::ptrauth_blend
;
5881 case Builtin::BI__builtin_ptrauth_sign_generic_data
:
5882 return llvm::Intrinsic::ptrauth_sign_generic
;
5883 case Builtin::BI__builtin_ptrauth_sign_unauthenticated
:
5884 return llvm::Intrinsic::ptrauth_sign
;
5885 case Builtin::BI__builtin_ptrauth_strip
:
5886 return llvm::Intrinsic::ptrauth_strip
;
5888 llvm_unreachable("bad ptrauth intrinsic");
5890 auto Intrinsic
= CGM
.getIntrinsic(IntrinsicID
);
5891 llvm::Value
*Result
= EmitRuntimeCall(Intrinsic
, Args
);
5893 if (BuiltinID
!= Builtin::BI__builtin_ptrauth_sign_generic_data
&&
5894 BuiltinID
!= Builtin::BI__builtin_ptrauth_blend_discriminator
&&
5895 OrigValueType
->isPointerTy()) {
5896 Result
= Builder
.CreateIntToPtr(Result
, OrigValueType
);
5898 return RValue::get(Result
);
5901 case Builtin::BI__exception_code
:
5902 case Builtin::BI_exception_code
:
5903 return RValue::get(EmitSEHExceptionCode());
5904 case Builtin::BI__exception_info
:
5905 case Builtin::BI_exception_info
:
5906 return RValue::get(EmitSEHExceptionInfo());
5907 case Builtin::BI__abnormal_termination
:
5908 case Builtin::BI_abnormal_termination
:
5909 return RValue::get(EmitSEHAbnormalTermination());
5910 case Builtin::BI_setjmpex
:
5911 if (getTarget().getTriple().isOSMSVCRT() && E
->getNumArgs() == 1 &&
5912 E
->getArg(0)->getType()->isPointerType())
5913 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex
, E
);
5915 case Builtin::BI_setjmp
:
5916 if (getTarget().getTriple().isOSMSVCRT() && E
->getNumArgs() == 1 &&
5917 E
->getArg(0)->getType()->isPointerType()) {
5918 if (getTarget().getTriple().getArch() == llvm::Triple::x86
)
5919 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3
, E
);
5920 else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64
)
5921 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex
, E
);
5922 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp
, E
);
5926 // C++ std:: builtins.
5927 case Builtin::BImove
:
5928 case Builtin::BImove_if_noexcept
:
5929 case Builtin::BIforward
:
5930 case Builtin::BIforward_like
:
5931 case Builtin::BIas_const
:
5932 return RValue::get(EmitLValue(E
->getArg(0)).getPointer(*this));
5933 case Builtin::BI__GetExceptionInfo
: {
5934 if (llvm::GlobalVariable
*GV
=
5935 CGM
.getCXXABI().getThrowInfo(FD
->getParamDecl(0)->getType()))
5936 return RValue::get(GV
);
5940 case Builtin::BI__fastfail
:
5941 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail
, E
));
5943 case Builtin::BI__builtin_coro_id
:
5944 return EmitCoroutineIntrinsic(E
, Intrinsic::coro_id
);
5945 case Builtin::BI__builtin_coro_promise
:
5946 return EmitCoroutineIntrinsic(E
, Intrinsic::coro_promise
);
5947 case Builtin::BI__builtin_coro_resume
:
5948 EmitCoroutineIntrinsic(E
, Intrinsic::coro_resume
);
5949 return RValue::get(nullptr);
5950 case Builtin::BI__builtin_coro_frame
:
5951 return EmitCoroutineIntrinsic(E
, Intrinsic::coro_frame
);
5952 case Builtin::BI__builtin_coro_noop
:
5953 return EmitCoroutineIntrinsic(E
, Intrinsic::coro_noop
);
5954 case Builtin::BI__builtin_coro_free
:
5955 return EmitCoroutineIntrinsic(E
, Intrinsic::coro_free
);
5956 case Builtin::BI__builtin_coro_destroy
:
5957 EmitCoroutineIntrinsic(E
, Intrinsic::coro_destroy
);
5958 return RValue::get(nullptr);
5959 case Builtin::BI__builtin_coro_done
:
5960 return EmitCoroutineIntrinsic(E
, Intrinsic::coro_done
);
5961 case Builtin::BI__builtin_coro_alloc
:
5962 return EmitCoroutineIntrinsic(E
, Intrinsic::coro_alloc
);
5963 case Builtin::BI__builtin_coro_begin
:
5964 return EmitCoroutineIntrinsic(E
, Intrinsic::coro_begin
);
5965 case Builtin::BI__builtin_coro_end
:
5966 return EmitCoroutineIntrinsic(E
, Intrinsic::coro_end
);
5967 case Builtin::BI__builtin_coro_suspend
:
5968 return EmitCoroutineIntrinsic(E
, Intrinsic::coro_suspend
);
5969 case Builtin::BI__builtin_coro_size
:
5970 return EmitCoroutineIntrinsic(E
, Intrinsic::coro_size
);
5971 case Builtin::BI__builtin_coro_align
:
5972 return EmitCoroutineIntrinsic(E
, Intrinsic::coro_align
);
5974 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
5975 case Builtin::BIread_pipe
:
5976 case Builtin::BIwrite_pipe
: {
5977 Value
*Arg0
= EmitScalarExpr(E
->getArg(0)),
5978 *Arg1
= EmitScalarExpr(E
->getArg(1));
5979 CGOpenCLRuntime
OpenCLRT(CGM
);
5980 Value
*PacketSize
= OpenCLRT
.getPipeElemSize(E
->getArg(0));
5981 Value
*PacketAlign
= OpenCLRT
.getPipeElemAlign(E
->getArg(0));
5983 // Type of the generic packet parameter.
5984 unsigned GenericAS
=
5985 getContext().getTargetAddressSpace(LangAS::opencl_generic
);
5986 llvm::Type
*I8PTy
= llvm::PointerType::get(getLLVMContext(), GenericAS
);
5988 // Testing which overloaded version we should generate the call for.
5989 if (2U == E
->getNumArgs()) {
5990 const char *Name
= (BuiltinID
== Builtin::BIread_pipe
) ? "__read_pipe_2"
5992 // Creating a generic function type to be able to call with any builtin or
5993 // user defined type.
5994 llvm::Type
*ArgTys
[] = {Arg0
->getType(), I8PTy
, Int32Ty
, Int32Ty
};
5995 llvm::FunctionType
*FTy
= llvm::FunctionType::get(
5996 Int32Ty
, llvm::ArrayRef
<llvm::Type
*>(ArgTys
), false);
5997 Value
*ACast
= Builder
.CreateAddrSpaceCast(Arg1
, I8PTy
);
5999 EmitRuntimeCall(CGM
.CreateRuntimeFunction(FTy
, Name
),
6000 {Arg0
, ACast
, PacketSize
, PacketAlign
}));
6002 assert(4 == E
->getNumArgs() &&
6003 "Illegal number of parameters to pipe function");
6004 const char *Name
= (BuiltinID
== Builtin::BIread_pipe
) ? "__read_pipe_4"
6007 llvm::Type
*ArgTys
[] = {Arg0
->getType(), Arg1
->getType(), Int32Ty
, I8PTy
,
6009 Value
*Arg2
= EmitScalarExpr(E
->getArg(2)),
6010 *Arg3
= EmitScalarExpr(E
->getArg(3));
6011 llvm::FunctionType
*FTy
= llvm::FunctionType::get(
6012 Int32Ty
, llvm::ArrayRef
<llvm::Type
*>(ArgTys
), false);
6013 Value
*ACast
= Builder
.CreateAddrSpaceCast(Arg3
, I8PTy
);
6014 // We know the third argument is an integer type, but we may need to cast
6016 if (Arg2
->getType() != Int32Ty
)
6017 Arg2
= Builder
.CreateZExtOrTrunc(Arg2
, Int32Ty
);
6019 EmitRuntimeCall(CGM
.CreateRuntimeFunction(FTy
, Name
),
6020 {Arg0
, Arg1
, Arg2
, ACast
, PacketSize
, PacketAlign
}));
6023 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
6025 case Builtin::BIreserve_read_pipe
:
6026 case Builtin::BIreserve_write_pipe
:
6027 case Builtin::BIwork_group_reserve_read_pipe
:
6028 case Builtin::BIwork_group_reserve_write_pipe
:
6029 case Builtin::BIsub_group_reserve_read_pipe
:
6030 case Builtin::BIsub_group_reserve_write_pipe
: {
6031 // Composing the mangled name for the function.
6033 if (BuiltinID
== Builtin::BIreserve_read_pipe
)
6034 Name
= "__reserve_read_pipe";
6035 else if (BuiltinID
== Builtin::BIreserve_write_pipe
)
6036 Name
= "__reserve_write_pipe";
6037 else if (BuiltinID
== Builtin::BIwork_group_reserve_read_pipe
)
6038 Name
= "__work_group_reserve_read_pipe";
6039 else if (BuiltinID
== Builtin::BIwork_group_reserve_write_pipe
)
6040 Name
= "__work_group_reserve_write_pipe";
6041 else if (BuiltinID
== Builtin::BIsub_group_reserve_read_pipe
)
6042 Name
= "__sub_group_reserve_read_pipe";
6044 Name
= "__sub_group_reserve_write_pipe";
6046 Value
*Arg0
= EmitScalarExpr(E
->getArg(0)),
6047 *Arg1
= EmitScalarExpr(E
->getArg(1));
6048 llvm::Type
*ReservedIDTy
= ConvertType(getContext().OCLReserveIDTy
);
6049 CGOpenCLRuntime
OpenCLRT(CGM
);
6050 Value
*PacketSize
= OpenCLRT
.getPipeElemSize(E
->getArg(0));
6051 Value
*PacketAlign
= OpenCLRT
.getPipeElemAlign(E
->getArg(0));
6053 // Building the generic function prototype.
6054 llvm::Type
*ArgTys
[] = {Arg0
->getType(), Int32Ty
, Int32Ty
, Int32Ty
};
6055 llvm::FunctionType
*FTy
= llvm::FunctionType::get(
6056 ReservedIDTy
, llvm::ArrayRef
<llvm::Type
*>(ArgTys
), false);
6057 // We know the second argument is an integer type, but we may need to cast
6059 if (Arg1
->getType() != Int32Ty
)
6060 Arg1
= Builder
.CreateZExtOrTrunc(Arg1
, Int32Ty
);
6061 return RValue::get(EmitRuntimeCall(CGM
.CreateRuntimeFunction(FTy
, Name
),
6062 {Arg0
, Arg1
, PacketSize
, PacketAlign
}));
6064 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
6066 case Builtin::BIcommit_read_pipe
:
6067 case Builtin::BIcommit_write_pipe
:
6068 case Builtin::BIwork_group_commit_read_pipe
:
6069 case Builtin::BIwork_group_commit_write_pipe
:
6070 case Builtin::BIsub_group_commit_read_pipe
:
6071 case Builtin::BIsub_group_commit_write_pipe
: {
6073 if (BuiltinID
== Builtin::BIcommit_read_pipe
)
6074 Name
= "__commit_read_pipe";
6075 else if (BuiltinID
== Builtin::BIcommit_write_pipe
)
6076 Name
= "__commit_write_pipe";
6077 else if (BuiltinID
== Builtin::BIwork_group_commit_read_pipe
)
6078 Name
= "__work_group_commit_read_pipe";
6079 else if (BuiltinID
== Builtin::BIwork_group_commit_write_pipe
)
6080 Name
= "__work_group_commit_write_pipe";
6081 else if (BuiltinID
== Builtin::BIsub_group_commit_read_pipe
)
6082 Name
= "__sub_group_commit_read_pipe";
6084 Name
= "__sub_group_commit_write_pipe";
6086 Value
*Arg0
= EmitScalarExpr(E
->getArg(0)),
6087 *Arg1
= EmitScalarExpr(E
->getArg(1));
6088 CGOpenCLRuntime
OpenCLRT(CGM
);
6089 Value
*PacketSize
= OpenCLRT
.getPipeElemSize(E
->getArg(0));
6090 Value
*PacketAlign
= OpenCLRT
.getPipeElemAlign(E
->getArg(0));
6092 // Building the generic function prototype.
6093 llvm::Type
*ArgTys
[] = {Arg0
->getType(), Arg1
->getType(), Int32Ty
, Int32Ty
};
6094 llvm::FunctionType
*FTy
=
6095 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
6096 llvm::ArrayRef
<llvm::Type
*>(ArgTys
), false);
6098 return RValue::get(EmitRuntimeCall(CGM
.CreateRuntimeFunction(FTy
, Name
),
6099 {Arg0
, Arg1
, PacketSize
, PacketAlign
}));
6101 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
6102 case Builtin::BIget_pipe_num_packets
:
6103 case Builtin::BIget_pipe_max_packets
: {
6104 const char *BaseName
;
6105 const auto *PipeTy
= E
->getArg(0)->getType()->castAs
<PipeType
>();
6106 if (BuiltinID
== Builtin::BIget_pipe_num_packets
)
6107 BaseName
= "__get_pipe_num_packets";
6109 BaseName
= "__get_pipe_max_packets";
6110 std::string Name
= std::string(BaseName
) +
6111 std::string(PipeTy
->isReadOnly() ? "_ro" : "_wo");
6113 // Building the generic function prototype.
6114 Value
*Arg0
= EmitScalarExpr(E
->getArg(0));
6115 CGOpenCLRuntime
OpenCLRT(CGM
);
6116 Value
*PacketSize
= OpenCLRT
.getPipeElemSize(E
->getArg(0));
6117 Value
*PacketAlign
= OpenCLRT
.getPipeElemAlign(E
->getArg(0));
6118 llvm::Type
*ArgTys
[] = {Arg0
->getType(), Int32Ty
, Int32Ty
};
6119 llvm::FunctionType
*FTy
= llvm::FunctionType::get(
6120 Int32Ty
, llvm::ArrayRef
<llvm::Type
*>(ArgTys
), false);
6122 return RValue::get(EmitRuntimeCall(CGM
.CreateRuntimeFunction(FTy
, Name
),
6123 {Arg0
, PacketSize
, PacketAlign
}));
6126 // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
6127 case Builtin::BIto_global
:
6128 case Builtin::BIto_local
:
6129 case Builtin::BIto_private
: {
6130 auto Arg0
= EmitScalarExpr(E
->getArg(0));
6131 auto NewArgT
= llvm::PointerType::get(
6133 CGM
.getContext().getTargetAddressSpace(LangAS::opencl_generic
));
6134 auto NewRetT
= llvm::PointerType::get(
6136 CGM
.getContext().getTargetAddressSpace(
6137 E
->getType()->getPointeeType().getAddressSpace()));
6138 auto FTy
= llvm::FunctionType::get(NewRetT
, {NewArgT
}, false);
6139 llvm::Value
*NewArg
;
6140 if (Arg0
->getType()->getPointerAddressSpace() !=
6141 NewArgT
->getPointerAddressSpace())
6142 NewArg
= Builder
.CreateAddrSpaceCast(Arg0
, NewArgT
);
6144 NewArg
= Builder
.CreateBitOrPointerCast(Arg0
, NewArgT
);
6145 auto NewName
= std::string("__") + E
->getDirectCallee()->getName().str();
6147 EmitRuntimeCall(CGM
.CreateRuntimeFunction(FTy
, NewName
), {NewArg
});
6148 return RValue::get(Builder
.CreateBitOrPointerCast(NewCall
,
6149 ConvertType(E
->getType())));
6152 // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
6153 // Table 6.13.17.1 specifies four overload forms of enqueue_kernel.
6154 // The code below expands the builtin call to a call to one of the following
6155 // functions that an OpenCL runtime library will have to provide:
6156 // __enqueue_kernel_basic
6157 // __enqueue_kernel_varargs
6158 // __enqueue_kernel_basic_events
6159 // __enqueue_kernel_events_varargs
6160 case Builtin::BIenqueue_kernel
: {
6161 StringRef Name
; // Generated function call name
6162 unsigned NumArgs
= E
->getNumArgs();
6164 llvm::Type
*QueueTy
= ConvertType(getContext().OCLQueueTy
);
6165 llvm::Type
*GenericVoidPtrTy
= Builder
.getPtrTy(
6166 getContext().getTargetAddressSpace(LangAS::opencl_generic
));
6168 llvm::Value
*Queue
= EmitScalarExpr(E
->getArg(0));
6169 llvm::Value
*Flags
= EmitScalarExpr(E
->getArg(1));
6170 LValue NDRangeL
= EmitAggExprToLValue(E
->getArg(2));
6171 llvm::Value
*Range
= NDRangeL
.getAddress().emitRawPointer(*this);
6172 llvm::Type
*RangeTy
= NDRangeL
.getAddress().getType();
6175 // The most basic form of the call with parameters:
6176 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
6177 Name
= "__enqueue_kernel_basic";
6178 llvm::Type
*ArgTys
[] = {QueueTy
, Int32Ty
, RangeTy
, GenericVoidPtrTy
,
6180 llvm::FunctionType
*FTy
= llvm::FunctionType::get(
6181 Int32Ty
, llvm::ArrayRef
<llvm::Type
*>(ArgTys
), false);
6184 CGM
.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E
->getArg(3));
6185 llvm::Value
*Kernel
=
6186 Builder
.CreatePointerCast(Info
.KernelHandle
, GenericVoidPtrTy
);
6187 llvm::Value
*Block
=
6188 Builder
.CreatePointerCast(Info
.BlockArg
, GenericVoidPtrTy
);
6190 auto RTCall
= EmitRuntimeCall(CGM
.CreateRuntimeFunction(FTy
, Name
),
6191 {Queue
, Flags
, Range
, Kernel
, Block
});
6192 return RValue::get(RTCall
);
6194 assert(NumArgs
>= 5 && "Invalid enqueue_kernel signature");
6196 // Create a temporary array to hold the sizes of local pointer arguments
6197 // for the block. \p First is the position of the first size argument.
6198 auto CreateArrayForSizeVar
= [=](unsigned First
)
6199 -> std::tuple
<llvm::Value
*, llvm::Value
*, llvm::Value
*> {
6200 llvm::APInt
ArraySize(32, NumArgs
- First
);
6201 QualType SizeArrayTy
= getContext().getConstantArrayType(
6202 getContext().getSizeType(), ArraySize
, nullptr,
6203 ArraySizeModifier::Normal
,
6204 /*IndexTypeQuals=*/0);
6205 auto Tmp
= CreateMemTemp(SizeArrayTy
, "block_sizes");
6206 llvm::Value
*TmpPtr
= Tmp
.getPointer();
6207 // The EmitLifetime* pair expect a naked Alloca as their last argument,
6208 // however for cases where the default AS is not the Alloca AS, Tmp is
6209 // actually the Alloca ascasted to the default AS, hence the
6210 // stripPointerCasts()
6211 llvm::Value
*Alloca
= TmpPtr
->stripPointerCasts();
6212 llvm::Value
*TmpSize
= EmitLifetimeStart(
6213 CGM
.getDataLayout().getTypeAllocSize(Tmp
.getElementType()), Alloca
);
6214 llvm::Value
*ElemPtr
;
6215 // Each of the following arguments specifies the size of the corresponding
6216 // argument passed to the enqueued block.
6217 auto *Zero
= llvm::ConstantInt::get(IntTy
, 0);
6218 for (unsigned I
= First
; I
< NumArgs
; ++I
) {
6219 auto *Index
= llvm::ConstantInt::get(IntTy
, I
- First
);
6220 auto *GEP
= Builder
.CreateGEP(Tmp
.getElementType(), TmpPtr
,
6225 Builder
.CreateZExtOrTrunc(EmitScalarExpr(E
->getArg(I
)), SizeTy
);
6226 Builder
.CreateAlignedStore(
6227 V
, GEP
, CGM
.getDataLayout().getPrefTypeAlign(SizeTy
));
6229 // Return the Alloca itself rather than a potential ascast as this is only
6230 // used by the paired EmitLifetimeEnd.
6231 return std::tie(ElemPtr
, TmpSize
, Alloca
);
6234 // Could have events and/or varargs.
6235 if (E
->getArg(3)->getType()->isBlockPointerType()) {
6236 // No events passed, but has variadic arguments.
6237 Name
= "__enqueue_kernel_varargs";
6239 CGM
.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E
->getArg(3));
6240 llvm::Value
*Kernel
=
6241 Builder
.CreatePointerCast(Info
.KernelHandle
, GenericVoidPtrTy
);
6242 auto *Block
= Builder
.CreatePointerCast(Info
.BlockArg
, GenericVoidPtrTy
);
6243 llvm::Value
*ElemPtr
, *TmpSize
, *TmpPtr
;
6244 std::tie(ElemPtr
, TmpSize
, TmpPtr
) = CreateArrayForSizeVar(4);
6246 // Create a vector of the arguments, as well as a constant value to
6247 // express to the runtime the number of variadic arguments.
6248 llvm::Value
*const Args
[] = {Queue
, Flags
,
6250 Block
, ConstantInt::get(IntTy
, NumArgs
- 4),
6252 llvm::Type
*const ArgTys
[] = {
6253 QueueTy
, IntTy
, RangeTy
, GenericVoidPtrTy
,
6254 GenericVoidPtrTy
, IntTy
, ElemPtr
->getType()};
6256 llvm::FunctionType
*FTy
= llvm::FunctionType::get(Int32Ty
, ArgTys
, false);
6257 auto Call
= RValue::get(
6258 EmitRuntimeCall(CGM
.CreateRuntimeFunction(FTy
, Name
), Args
));
6260 EmitLifetimeEnd(TmpSize
, TmpPtr
);
6263 // Any calls now have event arguments passed.
6265 llvm::PointerType
*PtrTy
= llvm::PointerType::get(
6266 CGM
.getLLVMContext(),
6267 CGM
.getContext().getTargetAddressSpace(LangAS::opencl_generic
));
6269 llvm::Value
*NumEvents
=
6270 Builder
.CreateZExtOrTrunc(EmitScalarExpr(E
->getArg(3)), Int32Ty
);
6272 // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
6273 // to be a null pointer constant (including `0` literal), we can take it
6274 // into account and emit null pointer directly.
6275 llvm::Value
*EventWaitList
= nullptr;
6276 if (E
->getArg(4)->isNullPointerConstant(
6277 getContext(), Expr::NPC_ValueDependentIsNotNull
)) {
6278 EventWaitList
= llvm::ConstantPointerNull::get(PtrTy
);
6281 E
->getArg(4)->getType()->isArrayType()
6282 ? EmitArrayToPointerDecay(E
->getArg(4)).emitRawPointer(*this)
6283 : EmitScalarExpr(E
->getArg(4));
6284 // Convert to generic address space.
6285 EventWaitList
= Builder
.CreatePointerCast(EventWaitList
, PtrTy
);
6287 llvm::Value
*EventRet
= nullptr;
6288 if (E
->getArg(5)->isNullPointerConstant(
6289 getContext(), Expr::NPC_ValueDependentIsNotNull
)) {
6290 EventRet
= llvm::ConstantPointerNull::get(PtrTy
);
6293 Builder
.CreatePointerCast(EmitScalarExpr(E
->getArg(5)), PtrTy
);
6297 CGM
.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E
->getArg(6));
6298 llvm::Value
*Kernel
=
6299 Builder
.CreatePointerCast(Info
.KernelHandle
, GenericVoidPtrTy
);
6300 llvm::Value
*Block
=
6301 Builder
.CreatePointerCast(Info
.BlockArg
, GenericVoidPtrTy
);
6303 std::vector
<llvm::Type
*> ArgTys
= {
6304 QueueTy
, Int32Ty
, RangeTy
, Int32Ty
,
6305 PtrTy
, PtrTy
, GenericVoidPtrTy
, GenericVoidPtrTy
};
6307 std::vector
<llvm::Value
*> Args
= {Queue
, Flags
, Range
,
6308 NumEvents
, EventWaitList
, EventRet
,
6312 // Has events but no variadics.
6313 Name
= "__enqueue_kernel_basic_events";
6314 llvm::FunctionType
*FTy
= llvm::FunctionType::get(
6315 Int32Ty
, llvm::ArrayRef
<llvm::Type
*>(ArgTys
), false);
6317 EmitRuntimeCall(CGM
.CreateRuntimeFunction(FTy
, Name
),
6318 llvm::ArrayRef
<llvm::Value
*>(Args
)));
6320 // Has event info and variadics
6321 // Pass the number of variadics to the runtime function too.
6322 Args
.push_back(ConstantInt::get(Int32Ty
, NumArgs
- 7));
6323 ArgTys
.push_back(Int32Ty
);
6324 Name
= "__enqueue_kernel_events_varargs";
6326 llvm::Value
*ElemPtr
, *TmpSize
, *TmpPtr
;
6327 std::tie(ElemPtr
, TmpSize
, TmpPtr
) = CreateArrayForSizeVar(7);
6328 Args
.push_back(ElemPtr
);
6329 ArgTys
.push_back(ElemPtr
->getType());
6331 llvm::FunctionType
*FTy
= llvm::FunctionType::get(
6332 Int32Ty
, llvm::ArrayRef
<llvm::Type
*>(ArgTys
), false);
6334 RValue::get(EmitRuntimeCall(CGM
.CreateRuntimeFunction(FTy
, Name
),
6335 llvm::ArrayRef
<llvm::Value
*>(Args
)));
6337 EmitLifetimeEnd(TmpSize
, TmpPtr
);
6340 llvm_unreachable("Unexpected enqueue_kernel signature");
6342 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
6344 case Builtin::BIget_kernel_work_group_size
: {
6345 llvm::Type
*GenericVoidPtrTy
= Builder
.getPtrTy(
6346 getContext().getTargetAddressSpace(LangAS::opencl_generic
));
6348 CGM
.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E
->getArg(0));
6350 Builder
.CreatePointerCast(Info
.KernelHandle
, GenericVoidPtrTy
);
6351 Value
*Arg
= Builder
.CreatePointerCast(Info
.BlockArg
, GenericVoidPtrTy
);
6352 return RValue::get(EmitRuntimeCall(
6353 CGM
.CreateRuntimeFunction(
6354 llvm::FunctionType::get(IntTy
, {GenericVoidPtrTy
, GenericVoidPtrTy
},
6356 "__get_kernel_work_group_size_impl"),
6359 case Builtin::BIget_kernel_preferred_work_group_size_multiple
: {
6360 llvm::Type
*GenericVoidPtrTy
= Builder
.getPtrTy(
6361 getContext().getTargetAddressSpace(LangAS::opencl_generic
));
6363 CGM
.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E
->getArg(0));
6365 Builder
.CreatePointerCast(Info
.KernelHandle
, GenericVoidPtrTy
);
6366 Value
*Arg
= Builder
.CreatePointerCast(Info
.BlockArg
, GenericVoidPtrTy
);
6367 return RValue::get(EmitRuntimeCall(
6368 CGM
.CreateRuntimeFunction(
6369 llvm::FunctionType::get(IntTy
, {GenericVoidPtrTy
, GenericVoidPtrTy
},
6371 "__get_kernel_preferred_work_group_size_multiple_impl"),
6374 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange
:
6375 case Builtin::BIget_kernel_sub_group_count_for_ndrange
: {
6376 llvm::Type
*GenericVoidPtrTy
= Builder
.getPtrTy(
6377 getContext().getTargetAddressSpace(LangAS::opencl_generic
));
6378 LValue NDRangeL
= EmitAggExprToLValue(E
->getArg(0));
6379 llvm::Value
*NDRange
= NDRangeL
.getAddress().emitRawPointer(*this);
6381 CGM
.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E
->getArg(1));
6383 Builder
.CreatePointerCast(Info
.KernelHandle
, GenericVoidPtrTy
);
6384 Value
*Block
= Builder
.CreatePointerCast(Info
.BlockArg
, GenericVoidPtrTy
);
6386 BuiltinID
== Builtin::BIget_kernel_max_sub_group_size_for_ndrange
6387 ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
6388 : "__get_kernel_sub_group_count_for_ndrange_impl";
6389 return RValue::get(EmitRuntimeCall(
6390 CGM
.CreateRuntimeFunction(
6391 llvm::FunctionType::get(
6392 IntTy
, {NDRange
->getType(), GenericVoidPtrTy
, GenericVoidPtrTy
},
6395 {NDRange
, Kernel
, Block
}));
6397 case Builtin::BI__builtin_store_half
:
6398 case Builtin::BI__builtin_store_halff
: {
6399 Value
*Val
= EmitScalarExpr(E
->getArg(0));
6400 Address Address
= EmitPointerWithAlignment(E
->getArg(1));
6401 Value
*HalfVal
= Builder
.CreateFPTrunc(Val
, Builder
.getHalfTy());
6402 Builder
.CreateStore(HalfVal
, Address
);
6403 return RValue::get(nullptr);
6405 case Builtin::BI__builtin_load_half
: {
6406 Address Address
= EmitPointerWithAlignment(E
->getArg(0));
6407 Value
*HalfVal
= Builder
.CreateLoad(Address
);
6408 return RValue::get(Builder
.CreateFPExt(HalfVal
, Builder
.getDoubleTy()));
6410 case Builtin::BI__builtin_load_halff
: {
6411 Address Address
= EmitPointerWithAlignment(E
->getArg(0));
6412 Value
*HalfVal
= Builder
.CreateLoad(Address
);
6413 return RValue::get(Builder
.CreateFPExt(HalfVal
, Builder
.getFloatTy()));
6415 case Builtin::BI__builtin_printf
:
6416 case Builtin::BIprintf
:
6417 if (getTarget().getTriple().isNVPTX() ||
6418 getTarget().getTriple().isAMDGCN() ||
6419 (getTarget().getTriple().isSPIRV() &&
6420 getTarget().getTriple().getVendor() == Triple::VendorType::AMD
)) {
6421 if (getTarget().getTriple().isNVPTX())
6422 return EmitNVPTXDevicePrintfCallExpr(E
);
6423 if ((getTarget().getTriple().isAMDGCN() ||
6424 getTarget().getTriple().isSPIRV()) &&
6426 return EmitAMDGPUDevicePrintfCallExpr(E
);
6430 case Builtin::BI__builtin_canonicalize
:
6431 case Builtin::BI__builtin_canonicalizef
:
6432 case Builtin::BI__builtin_canonicalizef16
:
6433 case Builtin::BI__builtin_canonicalizel
:
6435 emitBuiltinWithOneOverloadedType
<1>(*this, E
, Intrinsic::canonicalize
));
6437 case Builtin::BI__builtin_thread_pointer
: {
6438 if (!getContext().getTargetInfo().isTLSSupported())
6439 CGM
.ErrorUnsupported(E
, "__builtin_thread_pointer");
6440 // Fall through - it's already mapped to the intrinsic by ClangBuiltin.
6443 case Builtin::BI__builtin_os_log_format
:
6444 return emitBuiltinOSLogFormat(*E
);
6446 case Builtin::BI__xray_customevent
: {
6447 if (!ShouldXRayInstrumentFunction())
6448 return RValue::getIgnored();
6450 if (!CGM
.getCodeGenOpts().XRayInstrumentationBundle
.has(
6451 XRayInstrKind::Custom
))
6452 return RValue::getIgnored();
6454 if (const auto *XRayAttr
= CurFuncDecl
->getAttr
<XRayInstrumentAttr
>())
6455 if (XRayAttr
->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
6456 return RValue::getIgnored();
6458 Function
*F
= CGM
.getIntrinsic(Intrinsic::xray_customevent
);
6459 auto FTy
= F
->getFunctionType();
6460 auto Arg0
= E
->getArg(0);
6461 auto Arg0Val
= EmitScalarExpr(Arg0
);
6462 auto Arg0Ty
= Arg0
->getType();
6463 auto PTy0
= FTy
->getParamType(0);
6464 if (PTy0
!= Arg0Val
->getType()) {
6465 if (Arg0Ty
->isArrayType())
6466 Arg0Val
= EmitArrayToPointerDecay(Arg0
).emitRawPointer(*this);
6468 Arg0Val
= Builder
.CreatePointerCast(Arg0Val
, PTy0
);
6470 auto Arg1
= EmitScalarExpr(E
->getArg(1));
6471 auto PTy1
= FTy
->getParamType(1);
6472 if (PTy1
!= Arg1
->getType())
6473 Arg1
= Builder
.CreateTruncOrBitCast(Arg1
, PTy1
);
6474 return RValue::get(Builder
.CreateCall(F
, {Arg0Val
, Arg1
}));
6477 case Builtin::BI__xray_typedevent
: {
6478 // TODO: There should be a way to always emit events even if the current
6479 // function is not instrumented. Losing events in a stream can cripple
6481 if (!ShouldXRayInstrumentFunction())
6482 return RValue::getIgnored();
6484 if (!CGM
.getCodeGenOpts().XRayInstrumentationBundle
.has(
6485 XRayInstrKind::Typed
))
6486 return RValue::getIgnored();
6488 if (const auto *XRayAttr
= CurFuncDecl
->getAttr
<XRayInstrumentAttr
>())
6489 if (XRayAttr
->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
6490 return RValue::getIgnored();
6492 Function
*F
= CGM
.getIntrinsic(Intrinsic::xray_typedevent
);
6493 auto FTy
= F
->getFunctionType();
6494 auto Arg0
= EmitScalarExpr(E
->getArg(0));
6495 auto PTy0
= FTy
->getParamType(0);
6496 if (PTy0
!= Arg0
->getType())
6497 Arg0
= Builder
.CreateTruncOrBitCast(Arg0
, PTy0
);
6498 auto Arg1
= E
->getArg(1);
6499 auto Arg1Val
= EmitScalarExpr(Arg1
);
6500 auto Arg1Ty
= Arg1
->getType();
6501 auto PTy1
= FTy
->getParamType(1);
6502 if (PTy1
!= Arg1Val
->getType()) {
6503 if (Arg1Ty
->isArrayType())
6504 Arg1Val
= EmitArrayToPointerDecay(Arg1
).emitRawPointer(*this);
6506 Arg1Val
= Builder
.CreatePointerCast(Arg1Val
, PTy1
);
6508 auto Arg2
= EmitScalarExpr(E
->getArg(2));
6509 auto PTy2
= FTy
->getParamType(2);
6510 if (PTy2
!= Arg2
->getType())
6511 Arg2
= Builder
.CreateTruncOrBitCast(Arg2
, PTy2
);
6512 return RValue::get(Builder
.CreateCall(F
, {Arg0
, Arg1Val
, Arg2
}));
6515 case Builtin::BI__builtin_ms_va_start
:
6516 case Builtin::BI__builtin_ms_va_end
:
6518 EmitVAStartEnd(EmitMSVAListRef(E
->getArg(0)).emitRawPointer(*this),
6519 BuiltinID
== Builtin::BI__builtin_ms_va_start
));
6521 case Builtin::BI__builtin_ms_va_copy
: {
6522 // Lower this manually. We can't reliably determine whether or not any
6523 // given va_copy() is for a Win64 va_list from the calling convention
6524 // alone, because it's legal to do this from a System V ABI function.
6525 // With opaque pointer types, we won't have enough information in LLVM
6526 // IR to determine this from the argument types, either. Best to do it
6527 // now, while we have enough information.
6528 Address DestAddr
= EmitMSVAListRef(E
->getArg(0));
6529 Address SrcAddr
= EmitMSVAListRef(E
->getArg(1));
6531 DestAddr
= DestAddr
.withElementType(Int8PtrTy
);
6532 SrcAddr
= SrcAddr
.withElementType(Int8PtrTy
);
6534 Value
*ArgPtr
= Builder
.CreateLoad(SrcAddr
, "ap.val");
6535 return RValue::get(Builder
.CreateStore(ArgPtr
, DestAddr
));
6538 case Builtin::BI__builtin_get_device_side_mangled_name
: {
6539 auto Name
= CGM
.getCUDARuntime().getDeviceSideName(
6540 cast
<DeclRefExpr
>(E
->getArg(0)->IgnoreImpCasts())->getDecl());
6541 auto Str
= CGM
.GetAddrOfConstantCString(Name
, "");
6542 return RValue::get(Str
.getPointer());
6546 // If this is an alias for a lib function (e.g. __builtin_sin), emit
6547 // the call using the normal call path, but using the unmangled
6548 // version of the function name.
6549 if (getContext().BuiltinInfo
.isLibFunction(BuiltinID
))
6550 return emitLibraryCall(*this, FD
, E
,
6551 CGM
.getBuiltinLibFunction(FD
, BuiltinID
));
6553 // If this is a predefined lib function (e.g. malloc), emit the call
6554 // using exactly the normal call path.
6555 if (getContext().BuiltinInfo
.isPredefinedLibFunction(BuiltinID
))
6556 return emitLibraryCall(*this, FD
, E
, CGM
.getRawFunctionPointer(FD
));
6558 // Check that a call to a target specific builtin has the correct target
6560 // This is down here to avoid non-target specific builtins, however, if
6561 // generic builtins start to require generic target features then we
6562 // can move this up to the beginning of the function.
6563 checkTargetFeatures(E
, FD
);
6565 if (unsigned VectorWidth
= getContext().BuiltinInfo
.getRequiredVectorWidth(BuiltinID
))
6566 LargestVectorWidth
= std::max(LargestVectorWidth
, VectorWidth
);
6568 // See if we have a target specific intrinsic.
6569 StringRef Name
= getContext().BuiltinInfo
.getName(BuiltinID
);
6570 Intrinsic::ID IntrinsicID
= Intrinsic::not_intrinsic
;
6572 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
6573 if (!Prefix
.empty()) {
6574 IntrinsicID
= Intrinsic::getIntrinsicForClangBuiltin(Prefix
.data(), Name
);
6575 if (IntrinsicID
== Intrinsic::not_intrinsic
&& Prefix
== "spv" &&
6576 getTarget().getTriple().getOS() == llvm::Triple::OSType::AMDHSA
)
6577 IntrinsicID
= Intrinsic::getIntrinsicForClangBuiltin("amdgcn", Name
);
6578 // NOTE we don't need to perform a compatibility flag check here since the
6579 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
6580 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
6581 if (IntrinsicID
== Intrinsic::not_intrinsic
)
6582 IntrinsicID
= Intrinsic::getIntrinsicForMSBuiltin(Prefix
.data(), Name
);
6585 if (IntrinsicID
!= Intrinsic::not_intrinsic
) {
6586 SmallVector
<Value
*, 16> Args
;
6588 // Find out if any arguments are required to be integer constant
6590 unsigned ICEArguments
= 0;
6591 ASTContext::GetBuiltinTypeError Error
;
6592 getContext().GetBuiltinType(BuiltinID
, Error
, &ICEArguments
);
6593 assert(Error
== ASTContext::GE_None
&& "Should not codegen an error");
6595 Function
*F
= CGM
.getIntrinsic(IntrinsicID
);
6596 llvm::FunctionType
*FTy
= F
->getFunctionType();
6598 for (unsigned i
= 0, e
= E
->getNumArgs(); i
!= e
; ++i
) {
6599 Value
*ArgValue
= EmitScalarOrConstFoldImmArg(ICEArguments
, i
, E
);
6600 // If the intrinsic arg type is different from the builtin arg type
6601 // we need to do a bit cast.
6602 llvm::Type
*PTy
= FTy
->getParamType(i
);
6603 if (PTy
!= ArgValue
->getType()) {
6604 // XXX - vector of pointers?
6605 if (auto *PtrTy
= dyn_cast
<llvm::PointerType
>(PTy
)) {
6606 if (PtrTy
->getAddressSpace() !=
6607 ArgValue
->getType()->getPointerAddressSpace()) {
6608 ArgValue
= Builder
.CreateAddrSpaceCast(
6609 ArgValue
, llvm::PointerType::get(getLLVMContext(),
6610 PtrTy
->getAddressSpace()));
6614 // Cast vector type (e.g., v256i32) to x86_amx, this only happen
6615 // in amx intrinsics.
6616 if (PTy
->isX86_AMXTy())
6617 ArgValue
= Builder
.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile
,
6618 {ArgValue
->getType()}, {ArgValue
});
6620 ArgValue
= Builder
.CreateBitCast(ArgValue
, PTy
);
6623 Args
.push_back(ArgValue
);
6626 Value
*V
= Builder
.CreateCall(F
, Args
);
6627 QualType BuiltinRetType
= E
->getType();
6629 llvm::Type
*RetTy
= VoidTy
;
6630 if (!BuiltinRetType
->isVoidType())
6631 RetTy
= ConvertType(BuiltinRetType
);
6633 if (RetTy
!= V
->getType()) {
6634 // XXX - vector of pointers?
6635 if (auto *PtrTy
= dyn_cast
<llvm::PointerType
>(RetTy
)) {
6636 if (PtrTy
->getAddressSpace() != V
->getType()->getPointerAddressSpace()) {
6637 V
= Builder
.CreateAddrSpaceCast(
6638 V
, llvm::PointerType::get(getLLVMContext(),
6639 PtrTy
->getAddressSpace()));
6643 // Cast x86_amx to vector type (e.g., v256i32), this only happen
6644 // in amx intrinsics.
6645 if (V
->getType()->isX86_AMXTy())
6646 V
= Builder
.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector
, {RetTy
},
6649 V
= Builder
.CreateBitCast(V
, RetTy
);
6652 if (RetTy
->isVoidTy())
6653 return RValue::get(nullptr);
6655 return RValue::get(V
);
6658 // Some target-specific builtins can have aggregate return values, e.g.
6659 // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
6660 // ReturnValue to be non-null, so that the target-specific emission code can
6661 // always just emit into it.
6662 TypeEvaluationKind EvalKind
= getEvaluationKind(E
->getType());
6663 if (EvalKind
== TEK_Aggregate
&& ReturnValue
.isNull()) {
6664 Address DestPtr
= CreateMemTemp(E
->getType(), "agg.tmp");
6665 ReturnValue
= ReturnValueSlot(DestPtr
, false);
6668 // Now see if we can emit a target-specific builtin.
6669 if (Value
*V
= EmitTargetBuiltinExpr(BuiltinID
, E
, ReturnValue
)) {
6672 if (V
->getType()->isVoidTy())
6673 return RValue::get(nullptr);
6674 return RValue::get(V
);
6676 return RValue::getAggregate(ReturnValue
.getAddress(),
6677 ReturnValue
.isVolatile());
6679 llvm_unreachable("No current target builtin returns complex");
6681 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
6684 // EmitHLSLBuiltinExpr will check getLangOpts().HLSL
6685 if (Value
*V
= EmitHLSLBuiltinExpr(BuiltinID
, E
, ReturnValue
)) {
6688 if (V
->getType()->isVoidTy())
6689 return RValue::get(nullptr);
6690 return RValue::get(V
);
6692 return RValue::getAggregate(ReturnValue
.getAddress(),
6693 ReturnValue
.isVolatile());
6695 llvm_unreachable("No current hlsl builtin returns complex");
6697 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
6700 if (getLangOpts().HIPStdPar
&& getLangOpts().CUDAIsDevice
)
6701 return EmitHipStdParUnsupportedBuiltin(this, FD
);
6703 ErrorUnsupported(E
, "builtin function");
6705 // Unknown builtin, for now just dump it out and return undef.
6706 return GetUndefRValue(E
->getType());
6709 static Value
*EmitTargetArchBuiltinExpr(CodeGenFunction
*CGF
,
6710 unsigned BuiltinID
, const CallExpr
*E
,
6711 ReturnValueSlot ReturnValue
,
6712 llvm::Triple::ArchType Arch
) {
6713 // When compiling in HipStdPar mode we have to be conservative in rejecting
6714 // target specific features in the FE, and defer the possible error to the
6715 // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
6716 // referenced by an accelerator executable function, we emit an error.
6717 // Returning nullptr here leads to the builtin being handled in
6718 // EmitStdParUnsupportedBuiltin.
6719 if (CGF
->getLangOpts().HIPStdPar
&& CGF
->getLangOpts().CUDAIsDevice
&&
6720 Arch
!= CGF
->getTarget().getTriple().getArch())
6724 case llvm::Triple::arm
:
6725 case llvm::Triple::armeb
:
6726 case llvm::Triple::thumb
:
6727 case llvm::Triple::thumbeb
:
6728 return CGF
->EmitARMBuiltinExpr(BuiltinID
, E
, ReturnValue
, Arch
);
6729 case llvm::Triple::aarch64
:
6730 case llvm::Triple::aarch64_32
:
6731 case llvm::Triple::aarch64_be
:
6732 return CGF
->EmitAArch64BuiltinExpr(BuiltinID
, E
, Arch
);
6733 case llvm::Triple::bpfeb
:
6734 case llvm::Triple::bpfel
:
6735 return CGF
->EmitBPFBuiltinExpr(BuiltinID
, E
);
6736 case llvm::Triple::x86
:
6737 case llvm::Triple::x86_64
:
6738 return CGF
->EmitX86BuiltinExpr(BuiltinID
, E
);
6739 case llvm::Triple::ppc
:
6740 case llvm::Triple::ppcle
:
6741 case llvm::Triple::ppc64
:
6742 case llvm::Triple::ppc64le
:
6743 return CGF
->EmitPPCBuiltinExpr(BuiltinID
, E
);
6744 case llvm::Triple::r600
:
6745 case llvm::Triple::amdgcn
:
6746 return CGF
->EmitAMDGPUBuiltinExpr(BuiltinID
, E
);
6747 case llvm::Triple::systemz
:
6748 return CGF
->EmitSystemZBuiltinExpr(BuiltinID
, E
);
6749 case llvm::Triple::nvptx
:
6750 case llvm::Triple::nvptx64
:
6751 return CGF
->EmitNVPTXBuiltinExpr(BuiltinID
, E
);
6752 case llvm::Triple::wasm32
:
6753 case llvm::Triple::wasm64
:
6754 return CGF
->EmitWebAssemblyBuiltinExpr(BuiltinID
, E
);
6755 case llvm::Triple::hexagon
:
6756 return CGF
->EmitHexagonBuiltinExpr(BuiltinID
, E
);
6757 case llvm::Triple::riscv32
:
6758 case llvm::Triple::riscv64
:
6759 return CGF
->EmitRISCVBuiltinExpr(BuiltinID
, E
, ReturnValue
);
6760 case llvm::Triple::spirv64
:
6761 if (CGF
->getTarget().getTriple().getOS() != llvm::Triple::OSType::AMDHSA
)
6763 return CGF
->EmitAMDGPUBuiltinExpr(BuiltinID
, E
);
6769 Value
*CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID
,
6771 ReturnValueSlot ReturnValue
) {
6772 if (getContext().BuiltinInfo
.isAuxBuiltinID(BuiltinID
)) {
6773 assert(getContext().getAuxTargetInfo() && "Missing aux target info");
6774 return EmitTargetArchBuiltinExpr(
6775 this, getContext().BuiltinInfo
.getAuxBuiltinID(BuiltinID
), E
,
6776 ReturnValue
, getContext().getAuxTargetInfo()->getTriple().getArch());
6779 return EmitTargetArchBuiltinExpr(this, BuiltinID
, E
, ReturnValue
,
6780 getTarget().getTriple().getArch());
6783 static llvm::FixedVectorType
*GetNeonType(CodeGenFunction
*CGF
,
6784 NeonTypeFlags TypeFlags
,
6785 bool HasLegalHalfType
= true,
6787 bool AllowBFloatArgsAndRet
= true) {
6788 int IsQuad
= TypeFlags
.isQuad();
6789 switch (TypeFlags
.getEltType()) {
6790 case NeonTypeFlags::Int8
:
6791 case NeonTypeFlags::Poly8
:
6792 return llvm::FixedVectorType::get(CGF
->Int8Ty
, V1Ty
? 1 : (8 << IsQuad
));
6793 case NeonTypeFlags::Int16
:
6794 case NeonTypeFlags::Poly16
:
6795 return llvm::FixedVectorType::get(CGF
->Int16Ty
, V1Ty
? 1 : (4 << IsQuad
));
6796 case NeonTypeFlags::BFloat16
:
6797 if (AllowBFloatArgsAndRet
)
6798 return llvm::FixedVectorType::get(CGF
->BFloatTy
, V1Ty
? 1 : (4 << IsQuad
));
6800 return llvm::FixedVectorType::get(CGF
->Int16Ty
, V1Ty
? 1 : (4 << IsQuad
));
6801 case NeonTypeFlags::Float16
:
6802 if (HasLegalHalfType
)
6803 return llvm::FixedVectorType::get(CGF
->HalfTy
, V1Ty
? 1 : (4 << IsQuad
));
6805 return llvm::FixedVectorType::get(CGF
->Int16Ty
, V1Ty
? 1 : (4 << IsQuad
));
6806 case NeonTypeFlags::Int32
:
6807 return llvm::FixedVectorType::get(CGF
->Int32Ty
, V1Ty
? 1 : (2 << IsQuad
));
6808 case NeonTypeFlags::Int64
:
6809 case NeonTypeFlags::Poly64
:
6810 return llvm::FixedVectorType::get(CGF
->Int64Ty
, V1Ty
? 1 : (1 << IsQuad
));
6811 case NeonTypeFlags::Poly128
:
6812 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
6813 // There is a lot of i128 and f128 API missing.
6814 // so we use v16i8 to represent poly128 and get pattern matched.
6815 return llvm::FixedVectorType::get(CGF
->Int8Ty
, 16);
6816 case NeonTypeFlags::Float32
:
6817 return llvm::FixedVectorType::get(CGF
->FloatTy
, V1Ty
? 1 : (2 << IsQuad
));
6818 case NeonTypeFlags::Float64
:
6819 return llvm::FixedVectorType::get(CGF
->DoubleTy
, V1Ty
? 1 : (1 << IsQuad
));
6821 llvm_unreachable("Unknown vector element type!");
6824 static llvm::VectorType
*GetFloatNeonType(CodeGenFunction
*CGF
,
6825 NeonTypeFlags IntTypeFlags
) {
6826 int IsQuad
= IntTypeFlags
.isQuad();
6827 switch (IntTypeFlags
.getEltType()) {
6828 case NeonTypeFlags::Int16
:
6829 return llvm::FixedVectorType::get(CGF
->HalfTy
, (4 << IsQuad
));
6830 case NeonTypeFlags::Int32
:
6831 return llvm::FixedVectorType::get(CGF
->FloatTy
, (2 << IsQuad
));
6832 case NeonTypeFlags::Int64
:
6833 return llvm::FixedVectorType::get(CGF
->DoubleTy
, (1 << IsQuad
));
6835 llvm_unreachable("Type can't be converted to floating-point!");
6839 Value
*CodeGenFunction::EmitNeonSplat(Value
*V
, Constant
*C
,
6840 const ElementCount
&Count
) {
6841 Value
*SV
= llvm::ConstantVector::getSplat(Count
, C
);
6842 return Builder
.CreateShuffleVector(V
, V
, SV
, "lane");
6845 Value
*CodeGenFunction::EmitNeonSplat(Value
*V
, Constant
*C
) {
6846 ElementCount EC
= cast
<llvm::VectorType
>(V
->getType())->getElementCount();
6847 return EmitNeonSplat(V
, C
, EC
);
6850 Value
*CodeGenFunction::EmitNeonCall(Function
*F
, SmallVectorImpl
<Value
*> &Ops
,
6852 unsigned shift
, bool rightshift
) {
6854 for (Function::const_arg_iterator ai
= F
->arg_begin(), ae
= F
->arg_end();
6855 ai
!= ae
; ++ai
, ++j
) {
6856 if (F
->isConstrainedFPIntrinsic())
6857 if (ai
->getType()->isMetadataTy())
6859 if (shift
> 0 && shift
== j
)
6860 Ops
[j
] = EmitNeonShiftVector(Ops
[j
], ai
->getType(), rightshift
);
6862 Ops
[j
] = Builder
.CreateBitCast(Ops
[j
], ai
->getType(), name
);
6865 if (F
->isConstrainedFPIntrinsic())
6866 return Builder
.CreateConstrainedFPCall(F
, Ops
, name
);
6868 return Builder
.CreateCall(F
, Ops
, name
);
6871 Value
*CodeGenFunction::EmitNeonShiftVector(Value
*V
, llvm::Type
*Ty
,
6873 int SV
= cast
<ConstantInt
>(V
)->getSExtValue();
6874 return ConstantInt::get(Ty
, neg
? -SV
: SV
);
6877 // Right-shift a vector by a constant.
6878 Value
*CodeGenFunction::EmitNeonRShiftImm(Value
*Vec
, Value
*Shift
,
6879 llvm::Type
*Ty
, bool usgn
,
6881 llvm::VectorType
*VTy
= cast
<llvm::VectorType
>(Ty
);
6883 int ShiftAmt
= cast
<ConstantInt
>(Shift
)->getSExtValue();
6884 int EltSize
= VTy
->getScalarSizeInBits();
6886 Vec
= Builder
.CreateBitCast(Vec
, Ty
);
6888 // lshr/ashr are undefined when the shift amount is equal to the vector
6890 if (ShiftAmt
== EltSize
) {
6892 // Right-shifting an unsigned value by its size yields 0.
6893 return llvm::ConstantAggregateZero::get(VTy
);
6895 // Right-shifting a signed value by its size is equivalent
6896 // to a shift of size-1.
6898 Shift
= ConstantInt::get(VTy
->getElementType(), ShiftAmt
);
6902 Shift
= EmitNeonShiftVector(Shift
, Ty
, false);
6904 return Builder
.CreateLShr(Vec
, Shift
, name
);
6906 return Builder
.CreateAShr(Vec
, Shift
, name
);
6910 AddRetType
= (1 << 0),
6911 Add1ArgType
= (1 << 1),
6912 Add2ArgTypes
= (1 << 2),
6914 VectorizeRetType
= (1 << 3),
6915 VectorizeArgTypes
= (1 << 4),
6917 InventFloatType
= (1 << 5),
6918 UnsignedAlts
= (1 << 6),
6920 Use64BitVectors
= (1 << 7),
6921 Use128BitVectors
= (1 << 8),
6923 Vectorize1ArgType
= Add1ArgType
| VectorizeArgTypes
,
6924 VectorRet
= AddRetType
| VectorizeRetType
,
6925 VectorRetGetArgs01
=
6926 AddRetType
| Add2ArgTypes
| VectorizeRetType
| VectorizeArgTypes
,
6928 AddRetType
| VectorizeRetType
| Add1ArgType
| InventFloatType
6932 struct ARMVectorIntrinsicInfo
{
6933 const char *NameHint
;
6935 unsigned LLVMIntrinsic
;
6936 unsigned AltLLVMIntrinsic
;
6937 uint64_t TypeModifier
;
6939 bool operator<(unsigned RHSBuiltinID
) const {
6940 return BuiltinID
< RHSBuiltinID
;
6942 bool operator<(const ARMVectorIntrinsicInfo
&TE
) const {
6943 return BuiltinID
< TE
.BuiltinID
;
6946 } // end anonymous namespace
6948 #define NEONMAP0(NameBase) \
6949 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
6951 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
6952 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6953 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
6955 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
6956 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6957 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
6960 static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap
[] = {
6961 NEONMAP1(__a32_vcvt_bf16_f32
, arm_neon_vcvtfp2bf
, 0),
6962 NEONMAP0(splat_lane_v
),
6963 NEONMAP0(splat_laneq_v
),
6964 NEONMAP0(splatq_lane_v
),
6965 NEONMAP0(splatq_laneq_v
),
6966 NEONMAP2(vabd_v
, arm_neon_vabdu
, arm_neon_vabds
, Add1ArgType
| UnsignedAlts
),
6967 NEONMAP2(vabdq_v
, arm_neon_vabdu
, arm_neon_vabds
, Add1ArgType
| UnsignedAlts
),
6968 NEONMAP1(vabs_v
, arm_neon_vabs
, 0),
6969 NEONMAP1(vabsq_v
, arm_neon_vabs
, 0),
6973 NEONMAP1(vaesdq_u8
, arm_neon_aesd
, 0),
6974 NEONMAP1(vaeseq_u8
, arm_neon_aese
, 0),
6975 NEONMAP1(vaesimcq_u8
, arm_neon_aesimc
, 0),
6976 NEONMAP1(vaesmcq_u8
, arm_neon_aesmc
, 0),
6977 NEONMAP1(vbfdot_f32
, arm_neon_bfdot
, 0),
6978 NEONMAP1(vbfdotq_f32
, arm_neon_bfdot
, 0),
6979 NEONMAP1(vbfmlalbq_f32
, arm_neon_bfmlalb
, 0),
6980 NEONMAP1(vbfmlaltq_f32
, arm_neon_bfmlalt
, 0),
6981 NEONMAP1(vbfmmlaq_f32
, arm_neon_bfmmla
, 0),
6982 NEONMAP1(vbsl_v
, arm_neon_vbsl
, AddRetType
),
6983 NEONMAP1(vbslq_v
, arm_neon_vbsl
, AddRetType
),
6984 NEONMAP1(vcadd_rot270_f16
, arm_neon_vcadd_rot270
, Add1ArgType
),
6985 NEONMAP1(vcadd_rot270_f32
, arm_neon_vcadd_rot270
, Add1ArgType
),
6986 NEONMAP1(vcadd_rot90_f16
, arm_neon_vcadd_rot90
, Add1ArgType
),
6987 NEONMAP1(vcadd_rot90_f32
, arm_neon_vcadd_rot90
, Add1ArgType
),
6988 NEONMAP1(vcaddq_rot270_f16
, arm_neon_vcadd_rot270
, Add1ArgType
),
6989 NEONMAP1(vcaddq_rot270_f32
, arm_neon_vcadd_rot270
, Add1ArgType
),
6990 NEONMAP1(vcaddq_rot270_f64
, arm_neon_vcadd_rot270
, Add1ArgType
),
6991 NEONMAP1(vcaddq_rot90_f16
, arm_neon_vcadd_rot90
, Add1ArgType
),
6992 NEONMAP1(vcaddq_rot90_f32
, arm_neon_vcadd_rot90
, Add1ArgType
),
6993 NEONMAP1(vcaddq_rot90_f64
, arm_neon_vcadd_rot90
, Add1ArgType
),
6994 NEONMAP1(vcage_v
, arm_neon_vacge
, 0),
6995 NEONMAP1(vcageq_v
, arm_neon_vacge
, 0),
6996 NEONMAP1(vcagt_v
, arm_neon_vacgt
, 0),
6997 NEONMAP1(vcagtq_v
, arm_neon_vacgt
, 0),
6998 NEONMAP1(vcale_v
, arm_neon_vacge
, 0),
6999 NEONMAP1(vcaleq_v
, arm_neon_vacge
, 0),
7000 NEONMAP1(vcalt_v
, arm_neon_vacgt
, 0),
7001 NEONMAP1(vcaltq_v
, arm_neon_vacgt
, 0),
7010 NEONMAP1(vcls_v
, arm_neon_vcls
, Add1ArgType
),
7011 NEONMAP1(vclsq_v
, arm_neon_vcls
, Add1ArgType
),
7014 NEONMAP1(vclz_v
, ctlz
, Add1ArgType
),
7015 NEONMAP1(vclzq_v
, ctlz
, Add1ArgType
),
7016 NEONMAP1(vcnt_v
, ctpop
, Add1ArgType
),
7017 NEONMAP1(vcntq_v
, ctpop
, Add1ArgType
),
7018 NEONMAP1(vcvt_f16_f32
, arm_neon_vcvtfp2hf
, 0),
7019 NEONMAP0(vcvt_f16_s16
),
7020 NEONMAP0(vcvt_f16_u16
),
7021 NEONMAP1(vcvt_f32_f16
, arm_neon_vcvthf2fp
, 0),
7022 NEONMAP0(vcvt_f32_v
),
7023 NEONMAP1(vcvt_n_f16_s16
, arm_neon_vcvtfxs2fp
, 0),
7024 NEONMAP1(vcvt_n_f16_u16
, arm_neon_vcvtfxu2fp
, 0),
7025 NEONMAP2(vcvt_n_f32_v
, arm_neon_vcvtfxu2fp
, arm_neon_vcvtfxs2fp
, 0),
7026 NEONMAP1(vcvt_n_s16_f16
, arm_neon_vcvtfp2fxs
, 0),
7027 NEONMAP1(vcvt_n_s32_v
, arm_neon_vcvtfp2fxs
, 0),
7028 NEONMAP1(vcvt_n_s64_v
, arm_neon_vcvtfp2fxs
, 0),
7029 NEONMAP1(vcvt_n_u16_f16
, arm_neon_vcvtfp2fxu
, 0),
7030 NEONMAP1(vcvt_n_u32_v
, arm_neon_vcvtfp2fxu
, 0),
7031 NEONMAP1(vcvt_n_u64_v
, arm_neon_vcvtfp2fxu
, 0),
7032 NEONMAP0(vcvt_s16_f16
),
7033 NEONMAP0(vcvt_s32_v
),
7034 NEONMAP0(vcvt_s64_v
),
7035 NEONMAP0(vcvt_u16_f16
),
7036 NEONMAP0(vcvt_u32_v
),
7037 NEONMAP0(vcvt_u64_v
),
7038 NEONMAP1(vcvta_s16_f16
, arm_neon_vcvtas
, 0),
7039 NEONMAP1(vcvta_s32_v
, arm_neon_vcvtas
, 0),
7040 NEONMAP1(vcvta_s64_v
, arm_neon_vcvtas
, 0),
7041 NEONMAP1(vcvta_u16_f16
, arm_neon_vcvtau
, 0),
7042 NEONMAP1(vcvta_u32_v
, arm_neon_vcvtau
, 0),
7043 NEONMAP1(vcvta_u64_v
, arm_neon_vcvtau
, 0),
7044 NEONMAP1(vcvtaq_s16_f16
, arm_neon_vcvtas
, 0),
7045 NEONMAP1(vcvtaq_s32_v
, arm_neon_vcvtas
, 0),
7046 NEONMAP1(vcvtaq_s64_v
, arm_neon_vcvtas
, 0),
7047 NEONMAP1(vcvtaq_u16_f16
, arm_neon_vcvtau
, 0),
7048 NEONMAP1(vcvtaq_u32_v
, arm_neon_vcvtau
, 0),
7049 NEONMAP1(vcvtaq_u64_v
, arm_neon_vcvtau
, 0),
7050 NEONMAP1(vcvth_bf16_f32
, arm_neon_vcvtbfp2bf
, 0),
7051 NEONMAP1(vcvtm_s16_f16
, arm_neon_vcvtms
, 0),
7052 NEONMAP1(vcvtm_s32_v
, arm_neon_vcvtms
, 0),
7053 NEONMAP1(vcvtm_s64_v
, arm_neon_vcvtms
, 0),
7054 NEONMAP1(vcvtm_u16_f16
, arm_neon_vcvtmu
, 0),
7055 NEONMAP1(vcvtm_u32_v
, arm_neon_vcvtmu
, 0),
7056 NEONMAP1(vcvtm_u64_v
, arm_neon_vcvtmu
, 0),
7057 NEONMAP1(vcvtmq_s16_f16
, arm_neon_vcvtms
, 0),
7058 NEONMAP1(vcvtmq_s32_v
, arm_neon_vcvtms
, 0),
7059 NEONMAP1(vcvtmq_s64_v
, arm_neon_vcvtms
, 0),
7060 NEONMAP1(vcvtmq_u16_f16
, arm_neon_vcvtmu
, 0),
7061 NEONMAP1(vcvtmq_u32_v
, arm_neon_vcvtmu
, 0),
7062 NEONMAP1(vcvtmq_u64_v
, arm_neon_vcvtmu
, 0),
7063 NEONMAP1(vcvtn_s16_f16
, arm_neon_vcvtns
, 0),
7064 NEONMAP1(vcvtn_s32_v
, arm_neon_vcvtns
, 0),
7065 NEONMAP1(vcvtn_s64_v
, arm_neon_vcvtns
, 0),
7066 NEONMAP1(vcvtn_u16_f16
, arm_neon_vcvtnu
, 0),
7067 NEONMAP1(vcvtn_u32_v
, arm_neon_vcvtnu
, 0),
7068 NEONMAP1(vcvtn_u64_v
, arm_neon_vcvtnu
, 0),
7069 NEONMAP1(vcvtnq_s16_f16
, arm_neon_vcvtns
, 0),
7070 NEONMAP1(vcvtnq_s32_v
, arm_neon_vcvtns
, 0),
7071 NEONMAP1(vcvtnq_s64_v
, arm_neon_vcvtns
, 0),
7072 NEONMAP1(vcvtnq_u16_f16
, arm_neon_vcvtnu
, 0),
7073 NEONMAP1(vcvtnq_u32_v
, arm_neon_vcvtnu
, 0),
7074 NEONMAP1(vcvtnq_u64_v
, arm_neon_vcvtnu
, 0),
7075 NEONMAP1(vcvtp_s16_f16
, arm_neon_vcvtps
, 0),
7076 NEONMAP1(vcvtp_s32_v
, arm_neon_vcvtps
, 0),
7077 NEONMAP1(vcvtp_s64_v
, arm_neon_vcvtps
, 0),
7078 NEONMAP1(vcvtp_u16_f16
, arm_neon_vcvtpu
, 0),
7079 NEONMAP1(vcvtp_u32_v
, arm_neon_vcvtpu
, 0),
7080 NEONMAP1(vcvtp_u64_v
, arm_neon_vcvtpu
, 0),
7081 NEONMAP1(vcvtpq_s16_f16
, arm_neon_vcvtps
, 0),
7082 NEONMAP1(vcvtpq_s32_v
, arm_neon_vcvtps
, 0),
7083 NEONMAP1(vcvtpq_s64_v
, arm_neon_vcvtps
, 0),
7084 NEONMAP1(vcvtpq_u16_f16
, arm_neon_vcvtpu
, 0),
7085 NEONMAP1(vcvtpq_u32_v
, arm_neon_vcvtpu
, 0),
7086 NEONMAP1(vcvtpq_u64_v
, arm_neon_vcvtpu
, 0),
7087 NEONMAP0(vcvtq_f16_s16
),
7088 NEONMAP0(vcvtq_f16_u16
),
7089 NEONMAP0(vcvtq_f32_v
),
7090 NEONMAP1(vcvtq_n_f16_s16
, arm_neon_vcvtfxs2fp
, 0),
7091 NEONMAP1(vcvtq_n_f16_u16
, arm_neon_vcvtfxu2fp
, 0),
7092 NEONMAP2(vcvtq_n_f32_v
, arm_neon_vcvtfxu2fp
, arm_neon_vcvtfxs2fp
, 0),
7093 NEONMAP1(vcvtq_n_s16_f16
, arm_neon_vcvtfp2fxs
, 0),
7094 NEONMAP1(vcvtq_n_s32_v
, arm_neon_vcvtfp2fxs
, 0),
7095 NEONMAP1(vcvtq_n_s64_v
, arm_neon_vcvtfp2fxs
, 0),
7096 NEONMAP1(vcvtq_n_u16_f16
, arm_neon_vcvtfp2fxu
, 0),
7097 NEONMAP1(vcvtq_n_u32_v
, arm_neon_vcvtfp2fxu
, 0),
7098 NEONMAP1(vcvtq_n_u64_v
, arm_neon_vcvtfp2fxu
, 0),
7099 NEONMAP0(vcvtq_s16_f16
),
7100 NEONMAP0(vcvtq_s32_v
),
7101 NEONMAP0(vcvtq_s64_v
),
7102 NEONMAP0(vcvtq_u16_f16
),
7103 NEONMAP0(vcvtq_u32_v
),
7104 NEONMAP0(vcvtq_u64_v
),
7105 NEONMAP1(vdot_s32
, arm_neon_sdot
, 0),
7106 NEONMAP1(vdot_u32
, arm_neon_udot
, 0),
7107 NEONMAP1(vdotq_s32
, arm_neon_sdot
, 0),
7108 NEONMAP1(vdotq_u32
, arm_neon_udot
, 0),
7113 NEONMAP2(vhadd_v
, arm_neon_vhaddu
, arm_neon_vhadds
, Add1ArgType
| UnsignedAlts
),
7114 NEONMAP2(vhaddq_v
, arm_neon_vhaddu
, arm_neon_vhadds
, Add1ArgType
| UnsignedAlts
),
7115 NEONMAP2(vhsub_v
, arm_neon_vhsubu
, arm_neon_vhsubs
, Add1ArgType
| UnsignedAlts
),
7116 NEONMAP2(vhsubq_v
, arm_neon_vhsubu
, arm_neon_vhsubs
, Add1ArgType
| UnsignedAlts
),
7117 NEONMAP0(vld1_dup_v
),
7118 NEONMAP1(vld1_v
, arm_neon_vld1
, 0),
7119 NEONMAP1(vld1_x2_v
, arm_neon_vld1x2
, 0),
7120 NEONMAP1(vld1_x3_v
, arm_neon_vld1x3
, 0),
7121 NEONMAP1(vld1_x4_v
, arm_neon_vld1x4
, 0),
7122 NEONMAP0(vld1q_dup_v
),
7123 NEONMAP1(vld1q_v
, arm_neon_vld1
, 0),
7124 NEONMAP1(vld1q_x2_v
, arm_neon_vld1x2
, 0),
7125 NEONMAP1(vld1q_x3_v
, arm_neon_vld1x3
, 0),
7126 NEONMAP1(vld1q_x4_v
, arm_neon_vld1x4
, 0),
7127 NEONMAP1(vld2_dup_v
, arm_neon_vld2dup
, 0),
7128 NEONMAP1(vld2_lane_v
, arm_neon_vld2lane
, 0),
7129 NEONMAP1(vld2_v
, arm_neon_vld2
, 0),
7130 NEONMAP1(vld2q_dup_v
, arm_neon_vld2dup
, 0),
7131 NEONMAP1(vld2q_lane_v
, arm_neon_vld2lane
, 0),
7132 NEONMAP1(vld2q_v
, arm_neon_vld2
, 0),
7133 NEONMAP1(vld3_dup_v
, arm_neon_vld3dup
, 0),
7134 NEONMAP1(vld3_lane_v
, arm_neon_vld3lane
, 0),
7135 NEONMAP1(vld3_v
, arm_neon_vld3
, 0),
7136 NEONMAP1(vld3q_dup_v
, arm_neon_vld3dup
, 0),
7137 NEONMAP1(vld3q_lane_v
, arm_neon_vld3lane
, 0),
7138 NEONMAP1(vld3q_v
, arm_neon_vld3
, 0),
7139 NEONMAP1(vld4_dup_v
, arm_neon_vld4dup
, 0),
7140 NEONMAP1(vld4_lane_v
, arm_neon_vld4lane
, 0),
7141 NEONMAP1(vld4_v
, arm_neon_vld4
, 0),
7142 NEONMAP1(vld4q_dup_v
, arm_neon_vld4dup
, 0),
7143 NEONMAP1(vld4q_lane_v
, arm_neon_vld4lane
, 0),
7144 NEONMAP1(vld4q_v
, arm_neon_vld4
, 0),
7145 NEONMAP2(vmax_v
, arm_neon_vmaxu
, arm_neon_vmaxs
, Add1ArgType
| UnsignedAlts
),
7146 NEONMAP1(vmaxnm_v
, arm_neon_vmaxnm
, Add1ArgType
),
7147 NEONMAP1(vmaxnmq_v
, arm_neon_vmaxnm
, Add1ArgType
),
7148 NEONMAP2(vmaxq_v
, arm_neon_vmaxu
, arm_neon_vmaxs
, Add1ArgType
| UnsignedAlts
),
7149 NEONMAP2(vmin_v
, arm_neon_vminu
, arm_neon_vmins
, Add1ArgType
| UnsignedAlts
),
7150 NEONMAP1(vminnm_v
, arm_neon_vminnm
, Add1ArgType
),
7151 NEONMAP1(vminnmq_v
, arm_neon_vminnm
, Add1ArgType
),
7152 NEONMAP2(vminq_v
, arm_neon_vminu
, arm_neon_vmins
, Add1ArgType
| UnsignedAlts
),
7153 NEONMAP1(vmmlaq_s32
, arm_neon_smmla
, 0),
7154 NEONMAP1(vmmlaq_u32
, arm_neon_ummla
, 0),
7157 NEONMAP1(vmul_v
, arm_neon_vmulp
, Add1ArgType
),
7159 NEONMAP1(vmulq_v
, arm_neon_vmulp
, Add1ArgType
),
7160 NEONMAP2(vpadal_v
, arm_neon_vpadalu
, arm_neon_vpadals
, UnsignedAlts
),
7161 NEONMAP2(vpadalq_v
, arm_neon_vpadalu
, arm_neon_vpadals
, UnsignedAlts
),
7162 NEONMAP1(vpadd_v
, arm_neon_vpadd
, Add1ArgType
),
7163 NEONMAP2(vpaddl_v
, arm_neon_vpaddlu
, arm_neon_vpaddls
, UnsignedAlts
),
7164 NEONMAP2(vpaddlq_v
, arm_neon_vpaddlu
, arm_neon_vpaddls
, UnsignedAlts
),
7165 NEONMAP1(vpaddq_v
, arm_neon_vpadd
, Add1ArgType
),
7166 NEONMAP2(vpmax_v
, arm_neon_vpmaxu
, arm_neon_vpmaxs
, Add1ArgType
| UnsignedAlts
),
7167 NEONMAP2(vpmin_v
, arm_neon_vpminu
, arm_neon_vpmins
, Add1ArgType
| UnsignedAlts
),
7168 NEONMAP1(vqabs_v
, arm_neon_vqabs
, Add1ArgType
),
7169 NEONMAP1(vqabsq_v
, arm_neon_vqabs
, Add1ArgType
),
7170 NEONMAP2(vqadd_v
, uadd_sat
, sadd_sat
, Add1ArgType
| UnsignedAlts
),
7171 NEONMAP2(vqaddq_v
, uadd_sat
, sadd_sat
, Add1ArgType
| UnsignedAlts
),
7172 NEONMAP2(vqdmlal_v
, arm_neon_vqdmull
, sadd_sat
, 0),
7173 NEONMAP2(vqdmlsl_v
, arm_neon_vqdmull
, ssub_sat
, 0),
7174 NEONMAP1(vqdmulh_v
, arm_neon_vqdmulh
, Add1ArgType
),
7175 NEONMAP1(vqdmulhq_v
, arm_neon_vqdmulh
, Add1ArgType
),
7176 NEONMAP1(vqdmull_v
, arm_neon_vqdmull
, Add1ArgType
),
7177 NEONMAP2(vqmovn_v
, arm_neon_vqmovnu
, arm_neon_vqmovns
, Add1ArgType
| UnsignedAlts
),
7178 NEONMAP1(vqmovun_v
, arm_neon_vqmovnsu
, Add1ArgType
),
7179 NEONMAP1(vqneg_v
, arm_neon_vqneg
, Add1ArgType
),
7180 NEONMAP1(vqnegq_v
, arm_neon_vqneg
, Add1ArgType
),
7181 NEONMAP1(vqrdmlah_s16
, arm_neon_vqrdmlah
, Add1ArgType
),
7182 NEONMAP1(vqrdmlah_s32
, arm_neon_vqrdmlah
, Add1ArgType
),
7183 NEONMAP1(vqrdmlahq_s16
, arm_neon_vqrdmlah
, Add1ArgType
),
7184 NEONMAP1(vqrdmlahq_s32
, arm_neon_vqrdmlah
, Add1ArgType
),
7185 NEONMAP1(vqrdmlsh_s16
, arm_neon_vqrdmlsh
, Add1ArgType
),
7186 NEONMAP1(vqrdmlsh_s32
, arm_neon_vqrdmlsh
, Add1ArgType
),
7187 NEONMAP1(vqrdmlshq_s16
, arm_neon_vqrdmlsh
, Add1ArgType
),
7188 NEONMAP1(vqrdmlshq_s32
, arm_neon_vqrdmlsh
, Add1ArgType
),
7189 NEONMAP1(vqrdmulh_v
, arm_neon_vqrdmulh
, Add1ArgType
),
7190 NEONMAP1(vqrdmulhq_v
, arm_neon_vqrdmulh
, Add1ArgType
),
7191 NEONMAP2(vqrshl_v
, arm_neon_vqrshiftu
, arm_neon_vqrshifts
, Add1ArgType
| UnsignedAlts
),
7192 NEONMAP2(vqrshlq_v
, arm_neon_vqrshiftu
, arm_neon_vqrshifts
, Add1ArgType
| UnsignedAlts
),
7193 NEONMAP2(vqshl_n_v
, arm_neon_vqshiftu
, arm_neon_vqshifts
, UnsignedAlts
),
7194 NEONMAP2(vqshl_v
, arm_neon_vqshiftu
, arm_neon_vqshifts
, Add1ArgType
| UnsignedAlts
),
7195 NEONMAP2(vqshlq_n_v
, arm_neon_vqshiftu
, arm_neon_vqshifts
, UnsignedAlts
),
7196 NEONMAP2(vqshlq_v
, arm_neon_vqshiftu
, arm_neon_vqshifts
, Add1ArgType
| UnsignedAlts
),
7197 NEONMAP1(vqshlu_n_v
, arm_neon_vqshiftsu
, 0),
7198 NEONMAP1(vqshluq_n_v
, arm_neon_vqshiftsu
, 0),
7199 NEONMAP2(vqsub_v
, usub_sat
, ssub_sat
, Add1ArgType
| UnsignedAlts
),
7200 NEONMAP2(vqsubq_v
, usub_sat
, ssub_sat
, Add1ArgType
| UnsignedAlts
),
7201 NEONMAP1(vraddhn_v
, arm_neon_vraddhn
, Add1ArgType
),
7202 NEONMAP2(vrecpe_v
, arm_neon_vrecpe
, arm_neon_vrecpe
, 0),
7203 NEONMAP2(vrecpeq_v
, arm_neon_vrecpe
, arm_neon_vrecpe
, 0),
7204 NEONMAP1(vrecps_v
, arm_neon_vrecps
, Add1ArgType
),
7205 NEONMAP1(vrecpsq_v
, arm_neon_vrecps
, Add1ArgType
),
7206 NEONMAP2(vrhadd_v
, arm_neon_vrhaddu
, arm_neon_vrhadds
, Add1ArgType
| UnsignedAlts
),
7207 NEONMAP2(vrhaddq_v
, arm_neon_vrhaddu
, arm_neon_vrhadds
, Add1ArgType
| UnsignedAlts
),
7208 NEONMAP1(vrnd_v
, arm_neon_vrintz
, Add1ArgType
),
7209 NEONMAP1(vrnda_v
, arm_neon_vrinta
, Add1ArgType
),
7210 NEONMAP1(vrndaq_v
, arm_neon_vrinta
, Add1ArgType
),
7213 NEONMAP1(vrndm_v
, arm_neon_vrintm
, Add1ArgType
),
7214 NEONMAP1(vrndmq_v
, arm_neon_vrintm
, Add1ArgType
),
7215 NEONMAP1(vrndn_v
, arm_neon_vrintn
, Add1ArgType
),
7216 NEONMAP1(vrndnq_v
, arm_neon_vrintn
, Add1ArgType
),
7217 NEONMAP1(vrndp_v
, arm_neon_vrintp
, Add1ArgType
),
7218 NEONMAP1(vrndpq_v
, arm_neon_vrintp
, Add1ArgType
),
7219 NEONMAP1(vrndq_v
, arm_neon_vrintz
, Add1ArgType
),
7220 NEONMAP1(vrndx_v
, arm_neon_vrintx
, Add1ArgType
),
7221 NEONMAP1(vrndxq_v
, arm_neon_vrintx
, Add1ArgType
),
7222 NEONMAP2(vrshl_v
, arm_neon_vrshiftu
, arm_neon_vrshifts
, Add1ArgType
| UnsignedAlts
),
7223 NEONMAP2(vrshlq_v
, arm_neon_vrshiftu
, arm_neon_vrshifts
, Add1ArgType
| UnsignedAlts
),
7224 NEONMAP2(vrshr_n_v
, arm_neon_vrshiftu
, arm_neon_vrshifts
, UnsignedAlts
),
7225 NEONMAP2(vrshrq_n_v
, arm_neon_vrshiftu
, arm_neon_vrshifts
, UnsignedAlts
),
7226 NEONMAP2(vrsqrte_v
, arm_neon_vrsqrte
, arm_neon_vrsqrte
, 0),
7227 NEONMAP2(vrsqrteq_v
, arm_neon_vrsqrte
, arm_neon_vrsqrte
, 0),
7228 NEONMAP1(vrsqrts_v
, arm_neon_vrsqrts
, Add1ArgType
),
7229 NEONMAP1(vrsqrtsq_v
, arm_neon_vrsqrts
, Add1ArgType
),
7230 NEONMAP1(vrsubhn_v
, arm_neon_vrsubhn
, Add1ArgType
),
7231 NEONMAP1(vsha1su0q_u32
, arm_neon_sha1su0
, 0),
7232 NEONMAP1(vsha1su1q_u32
, arm_neon_sha1su1
, 0),
7233 NEONMAP1(vsha256h2q_u32
, arm_neon_sha256h2
, 0),
7234 NEONMAP1(vsha256hq_u32
, arm_neon_sha256h
, 0),
7235 NEONMAP1(vsha256su0q_u32
, arm_neon_sha256su0
, 0),
7236 NEONMAP1(vsha256su1q_u32
, arm_neon_sha256su1
, 0),
7238 NEONMAP2(vshl_v
, arm_neon_vshiftu
, arm_neon_vshifts
, Add1ArgType
| UnsignedAlts
),
7239 NEONMAP0(vshll_n_v
),
7240 NEONMAP0(vshlq_n_v
),
7241 NEONMAP2(vshlq_v
, arm_neon_vshiftu
, arm_neon_vshifts
, Add1ArgType
| UnsignedAlts
),
7243 NEONMAP0(vshrn_n_v
),
7244 NEONMAP0(vshrq_n_v
),
7245 NEONMAP1(vst1_v
, arm_neon_vst1
, 0),
7246 NEONMAP1(vst1_x2_v
, arm_neon_vst1x2
, 0),
7247 NEONMAP1(vst1_x3_v
, arm_neon_vst1x3
, 0),
7248 NEONMAP1(vst1_x4_v
, arm_neon_vst1x4
, 0),
7249 NEONMAP1(vst1q_v
, arm_neon_vst1
, 0),
7250 NEONMAP1(vst1q_x2_v
, arm_neon_vst1x2
, 0),
7251 NEONMAP1(vst1q_x3_v
, arm_neon_vst1x3
, 0),
7252 NEONMAP1(vst1q_x4_v
, arm_neon_vst1x4
, 0),
7253 NEONMAP1(vst2_lane_v
, arm_neon_vst2lane
, 0),
7254 NEONMAP1(vst2_v
, arm_neon_vst2
, 0),
7255 NEONMAP1(vst2q_lane_v
, arm_neon_vst2lane
, 0),
7256 NEONMAP1(vst2q_v
, arm_neon_vst2
, 0),
7257 NEONMAP1(vst3_lane_v
, arm_neon_vst3lane
, 0),
7258 NEONMAP1(vst3_v
, arm_neon_vst3
, 0),
7259 NEONMAP1(vst3q_lane_v
, arm_neon_vst3lane
, 0),
7260 NEONMAP1(vst3q_v
, arm_neon_vst3
, 0),
7261 NEONMAP1(vst4_lane_v
, arm_neon_vst4lane
, 0),
7262 NEONMAP1(vst4_v
, arm_neon_vst4
, 0),
7263 NEONMAP1(vst4q_lane_v
, arm_neon_vst4lane
, 0),
7264 NEONMAP1(vst4q_v
, arm_neon_vst4
, 0),
7270 NEONMAP1(vusdot_s32
, arm_neon_usdot
, 0),
7271 NEONMAP1(vusdotq_s32
, arm_neon_usdot
, 0),
7272 NEONMAP1(vusmmlaq_s32
, arm_neon_usmmla
, 0),
7279 static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap
[] = {
7280 NEONMAP1(__a64_vcvtq_low_bf16_f32
, aarch64_neon_bfcvtn
, 0),
7281 NEONMAP0(splat_lane_v
),
7282 NEONMAP0(splat_laneq_v
),
7283 NEONMAP0(splatq_lane_v
),
7284 NEONMAP0(splatq_laneq_v
),
7285 NEONMAP1(vabs_v
, aarch64_neon_abs
, 0),
7286 NEONMAP1(vabsq_v
, aarch64_neon_abs
, 0),
7289 NEONMAP0(vaddq_p128
),
7291 NEONMAP1(vaesdq_u8
, aarch64_crypto_aesd
, 0),
7292 NEONMAP1(vaeseq_u8
, aarch64_crypto_aese
, 0),
7293 NEONMAP1(vaesimcq_u8
, aarch64_crypto_aesimc
, 0),
7294 NEONMAP1(vaesmcq_u8
, aarch64_crypto_aesmc
, 0),
7295 NEONMAP2(vbcaxq_s16
, aarch64_crypto_bcaxu
, aarch64_crypto_bcaxs
, Add1ArgType
| UnsignedAlts
),
7296 NEONMAP2(vbcaxq_s32
, aarch64_crypto_bcaxu
, aarch64_crypto_bcaxs
, Add1ArgType
| UnsignedAlts
),
7297 NEONMAP2(vbcaxq_s64
, aarch64_crypto_bcaxu
, aarch64_crypto_bcaxs
, Add1ArgType
| UnsignedAlts
),
7298 NEONMAP2(vbcaxq_s8
, aarch64_crypto_bcaxu
, aarch64_crypto_bcaxs
, Add1ArgType
| UnsignedAlts
),
7299 NEONMAP2(vbcaxq_u16
, aarch64_crypto_bcaxu
, aarch64_crypto_bcaxs
, Add1ArgType
| UnsignedAlts
),
7300 NEONMAP2(vbcaxq_u32
, aarch64_crypto_bcaxu
, aarch64_crypto_bcaxs
, Add1ArgType
| UnsignedAlts
),
7301 NEONMAP2(vbcaxq_u64
, aarch64_crypto_bcaxu
, aarch64_crypto_bcaxs
, Add1ArgType
| UnsignedAlts
),
7302 NEONMAP2(vbcaxq_u8
, aarch64_crypto_bcaxu
, aarch64_crypto_bcaxs
, Add1ArgType
| UnsignedAlts
),
7303 NEONMAP1(vbfdot_f32
, aarch64_neon_bfdot
, 0),
7304 NEONMAP1(vbfdotq_f32
, aarch64_neon_bfdot
, 0),
7305 NEONMAP1(vbfmlalbq_f32
, aarch64_neon_bfmlalb
, 0),
7306 NEONMAP1(vbfmlaltq_f32
, aarch64_neon_bfmlalt
, 0),
7307 NEONMAP1(vbfmmlaq_f32
, aarch64_neon_bfmmla
, 0),
7308 NEONMAP1(vcadd_rot270_f16
, aarch64_neon_vcadd_rot270
, Add1ArgType
),
7309 NEONMAP1(vcadd_rot270_f32
, aarch64_neon_vcadd_rot270
, Add1ArgType
),
7310 NEONMAP1(vcadd_rot90_f16
, aarch64_neon_vcadd_rot90
, Add1ArgType
),
7311 NEONMAP1(vcadd_rot90_f32
, aarch64_neon_vcadd_rot90
, Add1ArgType
),
7312 NEONMAP1(vcaddq_rot270_f16
, aarch64_neon_vcadd_rot270
, Add1ArgType
),
7313 NEONMAP1(vcaddq_rot270_f32
, aarch64_neon_vcadd_rot270
, Add1ArgType
),
7314 NEONMAP1(vcaddq_rot270_f64
, aarch64_neon_vcadd_rot270
, Add1ArgType
),
7315 NEONMAP1(vcaddq_rot90_f16
, aarch64_neon_vcadd_rot90
, Add1ArgType
),
7316 NEONMAP1(vcaddq_rot90_f32
, aarch64_neon_vcadd_rot90
, Add1ArgType
),
7317 NEONMAP1(vcaddq_rot90_f64
, aarch64_neon_vcadd_rot90
, Add1ArgType
),
7318 NEONMAP1(vcage_v
, aarch64_neon_facge
, 0),
7319 NEONMAP1(vcageq_v
, aarch64_neon_facge
, 0),
7320 NEONMAP1(vcagt_v
, aarch64_neon_facgt
, 0),
7321 NEONMAP1(vcagtq_v
, aarch64_neon_facgt
, 0),
7322 NEONMAP1(vcale_v
, aarch64_neon_facge
, 0),
7323 NEONMAP1(vcaleq_v
, aarch64_neon_facge
, 0),
7324 NEONMAP1(vcalt_v
, aarch64_neon_facgt
, 0),
7325 NEONMAP1(vcaltq_v
, aarch64_neon_facgt
, 0),
7334 NEONMAP1(vcls_v
, aarch64_neon_cls
, Add1ArgType
),
7335 NEONMAP1(vclsq_v
, aarch64_neon_cls
, Add1ArgType
),
7338 NEONMAP1(vclz_v
, ctlz
, Add1ArgType
),
7339 NEONMAP1(vclzq_v
, ctlz
, Add1ArgType
),
7340 NEONMAP1(vcmla_f16
, aarch64_neon_vcmla_rot0
, Add1ArgType
),
7341 NEONMAP1(vcmla_f32
, aarch64_neon_vcmla_rot0
, Add1ArgType
),
7342 NEONMAP1(vcmla_rot180_f16
, aarch64_neon_vcmla_rot180
, Add1ArgType
),
7343 NEONMAP1(vcmla_rot180_f32
, aarch64_neon_vcmla_rot180
, Add1ArgType
),
7344 NEONMAP1(vcmla_rot270_f16
, aarch64_neon_vcmla_rot270
, Add1ArgType
),
7345 NEONMAP1(vcmla_rot270_f32
, aarch64_neon_vcmla_rot270
, Add1ArgType
),
7346 NEONMAP1(vcmla_rot90_f16
, aarch64_neon_vcmla_rot90
, Add1ArgType
),
7347 NEONMAP1(vcmla_rot90_f32
, aarch64_neon_vcmla_rot90
, Add1ArgType
),
7348 NEONMAP1(vcmlaq_f16
, aarch64_neon_vcmla_rot0
, Add1ArgType
),
7349 NEONMAP1(vcmlaq_f32
, aarch64_neon_vcmla_rot0
, Add1ArgType
),
7350 NEONMAP1(vcmlaq_f64
, aarch64_neon_vcmla_rot0
, Add1ArgType
),
7351 NEONMAP1(vcmlaq_rot180_f16
, aarch64_neon_vcmla_rot180
, Add1ArgType
),
7352 NEONMAP1(vcmlaq_rot180_f32
, aarch64_neon_vcmla_rot180
, Add1ArgType
),
7353 NEONMAP1(vcmlaq_rot180_f64
, aarch64_neon_vcmla_rot180
, Add1ArgType
),
7354 NEONMAP1(vcmlaq_rot270_f16
, aarch64_neon_vcmla_rot270
, Add1ArgType
),
7355 NEONMAP1(vcmlaq_rot270_f32
, aarch64_neon_vcmla_rot270
, Add1ArgType
),
7356 NEONMAP1(vcmlaq_rot270_f64
, aarch64_neon_vcmla_rot270
, Add1ArgType
),
7357 NEONMAP1(vcmlaq_rot90_f16
, aarch64_neon_vcmla_rot90
, Add1ArgType
),
7358 NEONMAP1(vcmlaq_rot90_f32
, aarch64_neon_vcmla_rot90
, Add1ArgType
),
7359 NEONMAP1(vcmlaq_rot90_f64
, aarch64_neon_vcmla_rot90
, Add1ArgType
),
7360 NEONMAP1(vcnt_v
, ctpop
, Add1ArgType
),
7361 NEONMAP1(vcntq_v
, ctpop
, Add1ArgType
),
7362 NEONMAP1(vcvt_f16_f32
, aarch64_neon_vcvtfp2hf
, 0),
7363 NEONMAP0(vcvt_f16_s16
),
7364 NEONMAP0(vcvt_f16_u16
),
7365 NEONMAP1(vcvt_f32_f16
, aarch64_neon_vcvthf2fp
, 0),
7366 NEONMAP0(vcvt_f32_v
),
7367 NEONMAP1(vcvt_n_f16_s16
, aarch64_neon_vcvtfxs2fp
, 0),
7368 NEONMAP1(vcvt_n_f16_u16
, aarch64_neon_vcvtfxu2fp
, 0),
7369 NEONMAP2(vcvt_n_f32_v
, aarch64_neon_vcvtfxu2fp
, aarch64_neon_vcvtfxs2fp
, 0),
7370 NEONMAP2(vcvt_n_f64_v
, aarch64_neon_vcvtfxu2fp
, aarch64_neon_vcvtfxs2fp
, 0),
7371 NEONMAP1(vcvt_n_s16_f16
, aarch64_neon_vcvtfp2fxs
, 0),
7372 NEONMAP1(vcvt_n_s32_v
, aarch64_neon_vcvtfp2fxs
, 0),
7373 NEONMAP1(vcvt_n_s64_v
, aarch64_neon_vcvtfp2fxs
, 0),
7374 NEONMAP1(vcvt_n_u16_f16
, aarch64_neon_vcvtfp2fxu
, 0),
7375 NEONMAP1(vcvt_n_u32_v
, aarch64_neon_vcvtfp2fxu
, 0),
7376 NEONMAP1(vcvt_n_u64_v
, aarch64_neon_vcvtfp2fxu
, 0),
7377 NEONMAP0(vcvtq_f16_s16
),
7378 NEONMAP0(vcvtq_f16_u16
),
7379 NEONMAP0(vcvtq_f32_v
),
7380 NEONMAP1(vcvtq_high_bf16_f32
, aarch64_neon_bfcvtn2
, 0),
7381 NEONMAP1(vcvtq_n_f16_s16
, aarch64_neon_vcvtfxs2fp
, 0),
7382 NEONMAP1(vcvtq_n_f16_u16
, aarch64_neon_vcvtfxu2fp
, 0),
7383 NEONMAP2(vcvtq_n_f32_v
, aarch64_neon_vcvtfxu2fp
, aarch64_neon_vcvtfxs2fp
, 0),
7384 NEONMAP2(vcvtq_n_f64_v
, aarch64_neon_vcvtfxu2fp
, aarch64_neon_vcvtfxs2fp
, 0),
7385 NEONMAP1(vcvtq_n_s16_f16
, aarch64_neon_vcvtfp2fxs
, 0),
7386 NEONMAP1(vcvtq_n_s32_v
, aarch64_neon_vcvtfp2fxs
, 0),
7387 NEONMAP1(vcvtq_n_s64_v
, aarch64_neon_vcvtfp2fxs
, 0),
7388 NEONMAP1(vcvtq_n_u16_f16
, aarch64_neon_vcvtfp2fxu
, 0),
7389 NEONMAP1(vcvtq_n_u32_v
, aarch64_neon_vcvtfp2fxu
, 0),
7390 NEONMAP1(vcvtq_n_u64_v
, aarch64_neon_vcvtfp2fxu
, 0),
7391 NEONMAP1(vcvtx_f32_v
, aarch64_neon_fcvtxn
, AddRetType
| Add1ArgType
),
7392 NEONMAP1(vdot_s32
, aarch64_neon_sdot
, 0),
7393 NEONMAP1(vdot_u32
, aarch64_neon_udot
, 0),
7394 NEONMAP1(vdotq_s32
, aarch64_neon_sdot
, 0),
7395 NEONMAP1(vdotq_u32
, aarch64_neon_udot
, 0),
7396 NEONMAP2(veor3q_s16
, aarch64_crypto_eor3u
, aarch64_crypto_eor3s
, Add1ArgType
| UnsignedAlts
),
7397 NEONMAP2(veor3q_s32
, aarch64_crypto_eor3u
, aarch64_crypto_eor3s
, Add1ArgType
| UnsignedAlts
),
7398 NEONMAP2(veor3q_s64
, aarch64_crypto_eor3u
, aarch64_crypto_eor3s
, Add1ArgType
| UnsignedAlts
),
7399 NEONMAP2(veor3q_s8
, aarch64_crypto_eor3u
, aarch64_crypto_eor3s
, Add1ArgType
| UnsignedAlts
),
7400 NEONMAP2(veor3q_u16
, aarch64_crypto_eor3u
, aarch64_crypto_eor3s
, Add1ArgType
| UnsignedAlts
),
7401 NEONMAP2(veor3q_u32
, aarch64_crypto_eor3u
, aarch64_crypto_eor3s
, Add1ArgType
| UnsignedAlts
),
7402 NEONMAP2(veor3q_u64
, aarch64_crypto_eor3u
, aarch64_crypto_eor3s
, Add1ArgType
| UnsignedAlts
),
7403 NEONMAP2(veor3q_u8
, aarch64_crypto_eor3u
, aarch64_crypto_eor3s
, Add1ArgType
| UnsignedAlts
),
7408 NEONMAP1(vfmlal_high_f16
, aarch64_neon_fmlal2
, 0),
7409 NEONMAP1(vfmlal_low_f16
, aarch64_neon_fmlal
, 0),
7410 NEONMAP1(vfmlalq_high_f16
, aarch64_neon_fmlal2
, 0),
7411 NEONMAP1(vfmlalq_low_f16
, aarch64_neon_fmlal
, 0),
7412 NEONMAP1(vfmlsl_high_f16
, aarch64_neon_fmlsl2
, 0),
7413 NEONMAP1(vfmlsl_low_f16
, aarch64_neon_fmlsl
, 0),
7414 NEONMAP1(vfmlslq_high_f16
, aarch64_neon_fmlsl2
, 0),
7415 NEONMAP1(vfmlslq_low_f16
, aarch64_neon_fmlsl
, 0),
7416 NEONMAP2(vhadd_v
, aarch64_neon_uhadd
, aarch64_neon_shadd
, Add1ArgType
| UnsignedAlts
),
7417 NEONMAP2(vhaddq_v
, aarch64_neon_uhadd
, aarch64_neon_shadd
, Add1ArgType
| UnsignedAlts
),
7418 NEONMAP2(vhsub_v
, aarch64_neon_uhsub
, aarch64_neon_shsub
, Add1ArgType
| UnsignedAlts
),
7419 NEONMAP2(vhsubq_v
, aarch64_neon_uhsub
, aarch64_neon_shsub
, Add1ArgType
| UnsignedAlts
),
7420 NEONMAP1(vld1_x2_v
, aarch64_neon_ld1x2
, 0),
7421 NEONMAP1(vld1_x3_v
, aarch64_neon_ld1x3
, 0),
7422 NEONMAP1(vld1_x4_v
, aarch64_neon_ld1x4
, 0),
7423 NEONMAP1(vld1q_x2_v
, aarch64_neon_ld1x2
, 0),
7424 NEONMAP1(vld1q_x3_v
, aarch64_neon_ld1x3
, 0),
7425 NEONMAP1(vld1q_x4_v
, aarch64_neon_ld1x4
, 0),
7426 NEONMAP1(vmmlaq_s32
, aarch64_neon_smmla
, 0),
7427 NEONMAP1(vmmlaq_u32
, aarch64_neon_ummla
, 0),
7430 NEONMAP1(vmul_v
, aarch64_neon_pmul
, Add1ArgType
),
7431 NEONMAP1(vmulq_v
, aarch64_neon_pmul
, Add1ArgType
),
7432 NEONMAP1(vpadd_v
, aarch64_neon_addp
, Add1ArgType
),
7433 NEONMAP2(vpaddl_v
, aarch64_neon_uaddlp
, aarch64_neon_saddlp
, UnsignedAlts
),
7434 NEONMAP2(vpaddlq_v
, aarch64_neon_uaddlp
, aarch64_neon_saddlp
, UnsignedAlts
),
7435 NEONMAP1(vpaddq_v
, aarch64_neon_addp
, Add1ArgType
),
7436 NEONMAP1(vqabs_v
, aarch64_neon_sqabs
, Add1ArgType
),
7437 NEONMAP1(vqabsq_v
, aarch64_neon_sqabs
, Add1ArgType
),
7438 NEONMAP2(vqadd_v
, aarch64_neon_uqadd
, aarch64_neon_sqadd
, Add1ArgType
| UnsignedAlts
),
7439 NEONMAP2(vqaddq_v
, aarch64_neon_uqadd
, aarch64_neon_sqadd
, Add1ArgType
| UnsignedAlts
),
7440 NEONMAP2(vqdmlal_v
, aarch64_neon_sqdmull
, aarch64_neon_sqadd
, 0),
7441 NEONMAP2(vqdmlsl_v
, aarch64_neon_sqdmull
, aarch64_neon_sqsub
, 0),
7442 NEONMAP1(vqdmulh_lane_v
, aarch64_neon_sqdmulh_lane
, 0),
7443 NEONMAP1(vqdmulh_laneq_v
, aarch64_neon_sqdmulh_laneq
, 0),
7444 NEONMAP1(vqdmulh_v
, aarch64_neon_sqdmulh
, Add1ArgType
),
7445 NEONMAP1(vqdmulhq_lane_v
, aarch64_neon_sqdmulh_lane
, 0),
7446 NEONMAP1(vqdmulhq_laneq_v
, aarch64_neon_sqdmulh_laneq
, 0),
7447 NEONMAP1(vqdmulhq_v
, aarch64_neon_sqdmulh
, Add1ArgType
),
7448 NEONMAP1(vqdmull_v
, aarch64_neon_sqdmull
, Add1ArgType
),
7449 NEONMAP2(vqmovn_v
, aarch64_neon_uqxtn
, aarch64_neon_sqxtn
, Add1ArgType
| UnsignedAlts
),
7450 NEONMAP1(vqmovun_v
, aarch64_neon_sqxtun
, Add1ArgType
),
7451 NEONMAP1(vqneg_v
, aarch64_neon_sqneg
, Add1ArgType
),
7452 NEONMAP1(vqnegq_v
, aarch64_neon_sqneg
, Add1ArgType
),
7453 NEONMAP1(vqrdmlah_s16
, aarch64_neon_sqrdmlah
, Add1ArgType
),
7454 NEONMAP1(vqrdmlah_s32
, aarch64_neon_sqrdmlah
, Add1ArgType
),
7455 NEONMAP1(vqrdmlahq_s16
, aarch64_neon_sqrdmlah
, Add1ArgType
),
7456 NEONMAP1(vqrdmlahq_s32
, aarch64_neon_sqrdmlah
, Add1ArgType
),
7457 NEONMAP1(vqrdmlsh_s16
, aarch64_neon_sqrdmlsh
, Add1ArgType
),
7458 NEONMAP1(vqrdmlsh_s32
, aarch64_neon_sqrdmlsh
, Add1ArgType
),
7459 NEONMAP1(vqrdmlshq_s16
, aarch64_neon_sqrdmlsh
, Add1ArgType
),
7460 NEONMAP1(vqrdmlshq_s32
, aarch64_neon_sqrdmlsh
, Add1ArgType
),
7461 NEONMAP1(vqrdmulh_lane_v
, aarch64_neon_sqrdmulh_lane
, 0),
7462 NEONMAP1(vqrdmulh_laneq_v
, aarch64_neon_sqrdmulh_laneq
, 0),
7463 NEONMAP1(vqrdmulh_v
, aarch64_neon_sqrdmulh
, Add1ArgType
),
7464 NEONMAP1(vqrdmulhq_lane_v
, aarch64_neon_sqrdmulh_lane
, 0),
7465 NEONMAP1(vqrdmulhq_laneq_v
, aarch64_neon_sqrdmulh_laneq
, 0),
7466 NEONMAP1(vqrdmulhq_v
, aarch64_neon_sqrdmulh
, Add1ArgType
),
7467 NEONMAP2(vqrshl_v
, aarch64_neon_uqrshl
, aarch64_neon_sqrshl
, Add1ArgType
| UnsignedAlts
),
7468 NEONMAP2(vqrshlq_v
, aarch64_neon_uqrshl
, aarch64_neon_sqrshl
, Add1ArgType
| UnsignedAlts
),
7469 NEONMAP2(vqshl_n_v
, aarch64_neon_uqshl
, aarch64_neon_sqshl
, UnsignedAlts
),
7470 NEONMAP2(vqshl_v
, aarch64_neon_uqshl
, aarch64_neon_sqshl
, Add1ArgType
| UnsignedAlts
),
7471 NEONMAP2(vqshlq_n_v
, aarch64_neon_uqshl
, aarch64_neon_sqshl
,UnsignedAlts
),
7472 NEONMAP2(vqshlq_v
, aarch64_neon_uqshl
, aarch64_neon_sqshl
, Add1ArgType
| UnsignedAlts
),
7473 NEONMAP1(vqshlu_n_v
, aarch64_neon_sqshlu
, 0),
7474 NEONMAP1(vqshluq_n_v
, aarch64_neon_sqshlu
, 0),
7475 NEONMAP2(vqsub_v
, aarch64_neon_uqsub
, aarch64_neon_sqsub
, Add1ArgType
| UnsignedAlts
),
7476 NEONMAP2(vqsubq_v
, aarch64_neon_uqsub
, aarch64_neon_sqsub
, Add1ArgType
| UnsignedAlts
),
7477 NEONMAP1(vraddhn_v
, aarch64_neon_raddhn
, Add1ArgType
),
7478 NEONMAP1(vrax1q_u64
, aarch64_crypto_rax1
, 0),
7479 NEONMAP2(vrecpe_v
, aarch64_neon_frecpe
, aarch64_neon_urecpe
, 0),
7480 NEONMAP2(vrecpeq_v
, aarch64_neon_frecpe
, aarch64_neon_urecpe
, 0),
7481 NEONMAP1(vrecps_v
, aarch64_neon_frecps
, Add1ArgType
),
7482 NEONMAP1(vrecpsq_v
, aarch64_neon_frecps
, Add1ArgType
),
7483 NEONMAP2(vrhadd_v
, aarch64_neon_urhadd
, aarch64_neon_srhadd
, Add1ArgType
| UnsignedAlts
),
7484 NEONMAP2(vrhaddq_v
, aarch64_neon_urhadd
, aarch64_neon_srhadd
, Add1ArgType
| UnsignedAlts
),
7485 NEONMAP1(vrnd32x_f32
, aarch64_neon_frint32x
, Add1ArgType
),
7486 NEONMAP1(vrnd32x_f64
, aarch64_neon_frint32x
, Add1ArgType
),
7487 NEONMAP1(vrnd32xq_f32
, aarch64_neon_frint32x
, Add1ArgType
),
7488 NEONMAP1(vrnd32xq_f64
, aarch64_neon_frint32x
, Add1ArgType
),
7489 NEONMAP1(vrnd32z_f32
, aarch64_neon_frint32z
, Add1ArgType
),
7490 NEONMAP1(vrnd32z_f64
, aarch64_neon_frint32z
, Add1ArgType
),
7491 NEONMAP1(vrnd32zq_f32
, aarch64_neon_frint32z
, Add1ArgType
),
7492 NEONMAP1(vrnd32zq_f64
, aarch64_neon_frint32z
, Add1ArgType
),
7493 NEONMAP1(vrnd64x_f32
, aarch64_neon_frint64x
, Add1ArgType
),
7494 NEONMAP1(vrnd64x_f64
, aarch64_neon_frint64x
, Add1ArgType
),
7495 NEONMAP1(vrnd64xq_f32
, aarch64_neon_frint64x
, Add1ArgType
),
7496 NEONMAP1(vrnd64xq_f64
, aarch64_neon_frint64x
, Add1ArgType
),
7497 NEONMAP1(vrnd64z_f32
, aarch64_neon_frint64z
, Add1ArgType
),
7498 NEONMAP1(vrnd64z_f64
, aarch64_neon_frint64z
, Add1ArgType
),
7499 NEONMAP1(vrnd64zq_f32
, aarch64_neon_frint64z
, Add1ArgType
),
7500 NEONMAP1(vrnd64zq_f64
, aarch64_neon_frint64z
, Add1ArgType
),
7503 NEONMAP2(vrshl_v
, aarch64_neon_urshl
, aarch64_neon_srshl
, Add1ArgType
| UnsignedAlts
),
7504 NEONMAP2(vrshlq_v
, aarch64_neon_urshl
, aarch64_neon_srshl
, Add1ArgType
| UnsignedAlts
),
7505 NEONMAP2(vrshr_n_v
, aarch64_neon_urshl
, aarch64_neon_srshl
, UnsignedAlts
),
7506 NEONMAP2(vrshrq_n_v
, aarch64_neon_urshl
, aarch64_neon_srshl
, UnsignedAlts
),
7507 NEONMAP2(vrsqrte_v
, aarch64_neon_frsqrte
, aarch64_neon_ursqrte
, 0),
7508 NEONMAP2(vrsqrteq_v
, aarch64_neon_frsqrte
, aarch64_neon_ursqrte
, 0),
7509 NEONMAP1(vrsqrts_v
, aarch64_neon_frsqrts
, Add1ArgType
),
7510 NEONMAP1(vrsqrtsq_v
, aarch64_neon_frsqrts
, Add1ArgType
),
7511 NEONMAP1(vrsubhn_v
, aarch64_neon_rsubhn
, Add1ArgType
),
7512 NEONMAP1(vsha1su0q_u32
, aarch64_crypto_sha1su0
, 0),
7513 NEONMAP1(vsha1su1q_u32
, aarch64_crypto_sha1su1
, 0),
7514 NEONMAP1(vsha256h2q_u32
, aarch64_crypto_sha256h2
, 0),
7515 NEONMAP1(vsha256hq_u32
, aarch64_crypto_sha256h
, 0),
7516 NEONMAP1(vsha256su0q_u32
, aarch64_crypto_sha256su0
, 0),
7517 NEONMAP1(vsha256su1q_u32
, aarch64_crypto_sha256su1
, 0),
7518 NEONMAP1(vsha512h2q_u64
, aarch64_crypto_sha512h2
, 0),
7519 NEONMAP1(vsha512hq_u64
, aarch64_crypto_sha512h
, 0),
7520 NEONMAP1(vsha512su0q_u64
, aarch64_crypto_sha512su0
, 0),
7521 NEONMAP1(vsha512su1q_u64
, aarch64_crypto_sha512su1
, 0),
7523 NEONMAP2(vshl_v
, aarch64_neon_ushl
, aarch64_neon_sshl
, Add1ArgType
| UnsignedAlts
),
7524 NEONMAP0(vshll_n_v
),
7525 NEONMAP0(vshlq_n_v
),
7526 NEONMAP2(vshlq_v
, aarch64_neon_ushl
, aarch64_neon_sshl
, Add1ArgType
| UnsignedAlts
),
7528 NEONMAP0(vshrn_n_v
),
7529 NEONMAP0(vshrq_n_v
),
7530 NEONMAP1(vsm3partw1q_u32
, aarch64_crypto_sm3partw1
, 0),
7531 NEONMAP1(vsm3partw2q_u32
, aarch64_crypto_sm3partw2
, 0),
7532 NEONMAP1(vsm3ss1q_u32
, aarch64_crypto_sm3ss1
, 0),
7533 NEONMAP1(vsm3tt1aq_u32
, aarch64_crypto_sm3tt1a
, 0),
7534 NEONMAP1(vsm3tt1bq_u32
, aarch64_crypto_sm3tt1b
, 0),
7535 NEONMAP1(vsm3tt2aq_u32
, aarch64_crypto_sm3tt2a
, 0),
7536 NEONMAP1(vsm3tt2bq_u32
, aarch64_crypto_sm3tt2b
, 0),
7537 NEONMAP1(vsm4ekeyq_u32
, aarch64_crypto_sm4ekey
, 0),
7538 NEONMAP1(vsm4eq_u32
, aarch64_crypto_sm4e
, 0),
7539 NEONMAP1(vst1_x2_v
, aarch64_neon_st1x2
, 0),
7540 NEONMAP1(vst1_x3_v
, aarch64_neon_st1x3
, 0),
7541 NEONMAP1(vst1_x4_v
, aarch64_neon_st1x4
, 0),
7542 NEONMAP1(vst1q_x2_v
, aarch64_neon_st1x2
, 0),
7543 NEONMAP1(vst1q_x3_v
, aarch64_neon_st1x3
, 0),
7544 NEONMAP1(vst1q_x4_v
, aarch64_neon_st1x4
, 0),
7548 NEONMAP1(vusdot_s32
, aarch64_neon_usdot
, 0),
7549 NEONMAP1(vusdotq_s32
, aarch64_neon_usdot
, 0),
7550 NEONMAP1(vusmmlaq_s32
, aarch64_neon_usmmla
, 0),
7551 NEONMAP1(vxarq_u64
, aarch64_crypto_xar
, 0),
7554 static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap
[] = {
7555 NEONMAP1(vabdd_f64
, aarch64_sisd_fabd
, Add1ArgType
),
7556 NEONMAP1(vabds_f32
, aarch64_sisd_fabd
, Add1ArgType
),
7557 NEONMAP1(vabsd_s64
, aarch64_neon_abs
, Add1ArgType
),
7558 NEONMAP1(vaddlv_s32
, aarch64_neon_saddlv
, AddRetType
| Add1ArgType
),
7559 NEONMAP1(vaddlv_u32
, aarch64_neon_uaddlv
, AddRetType
| Add1ArgType
),
7560 NEONMAP1(vaddlvq_s32
, aarch64_neon_saddlv
, AddRetType
| Add1ArgType
),
7561 NEONMAP1(vaddlvq_u32
, aarch64_neon_uaddlv
, AddRetType
| Add1ArgType
),
7562 NEONMAP1(vaddv_f32
, aarch64_neon_faddv
, AddRetType
| Add1ArgType
),
7563 NEONMAP1(vaddv_s32
, aarch64_neon_saddv
, AddRetType
| Add1ArgType
),
7564 NEONMAP1(vaddv_u32
, aarch64_neon_uaddv
, AddRetType
| Add1ArgType
),
7565 NEONMAP1(vaddvq_f32
, aarch64_neon_faddv
, AddRetType
| Add1ArgType
),
7566 NEONMAP1(vaddvq_f64
, aarch64_neon_faddv
, AddRetType
| Add1ArgType
),
7567 NEONMAP1(vaddvq_s32
, aarch64_neon_saddv
, AddRetType
| Add1ArgType
),
7568 NEONMAP1(vaddvq_s64
, aarch64_neon_saddv
, AddRetType
| Add1ArgType
),
7569 NEONMAP1(vaddvq_u32
, aarch64_neon_uaddv
, AddRetType
| Add1ArgType
),
7570 NEONMAP1(vaddvq_u64
, aarch64_neon_uaddv
, AddRetType
| Add1ArgType
),
7571 NEONMAP1(vcaged_f64
, aarch64_neon_facge
, AddRetType
| Add1ArgType
),
7572 NEONMAP1(vcages_f32
, aarch64_neon_facge
, AddRetType
| Add1ArgType
),
7573 NEONMAP1(vcagtd_f64
, aarch64_neon_facgt
, AddRetType
| Add1ArgType
),
7574 NEONMAP1(vcagts_f32
, aarch64_neon_facgt
, AddRetType
| Add1ArgType
),
7575 NEONMAP1(vcaled_f64
, aarch64_neon_facge
, AddRetType
| Add1ArgType
),
7576 NEONMAP1(vcales_f32
, aarch64_neon_facge
, AddRetType
| Add1ArgType
),
7577 NEONMAP1(vcaltd_f64
, aarch64_neon_facgt
, AddRetType
| Add1ArgType
),
7578 NEONMAP1(vcalts_f32
, aarch64_neon_facgt
, AddRetType
| Add1ArgType
),
7579 NEONMAP1(vcvtad_s64_f64
, aarch64_neon_fcvtas
, AddRetType
| Add1ArgType
),
7580 NEONMAP1(vcvtad_u64_f64
, aarch64_neon_fcvtau
, AddRetType
| Add1ArgType
),
7581 NEONMAP1(vcvtas_s32_f32
, aarch64_neon_fcvtas
, AddRetType
| Add1ArgType
),
7582 NEONMAP1(vcvtas_u32_f32
, aarch64_neon_fcvtau
, AddRetType
| Add1ArgType
),
7583 NEONMAP1(vcvtd_n_f64_s64
, aarch64_neon_vcvtfxs2fp
, AddRetType
| Add1ArgType
),
7584 NEONMAP1(vcvtd_n_f64_u64
, aarch64_neon_vcvtfxu2fp
, AddRetType
| Add1ArgType
),
7585 NEONMAP1(vcvtd_n_s64_f64
, aarch64_neon_vcvtfp2fxs
, AddRetType
| Add1ArgType
),
7586 NEONMAP1(vcvtd_n_u64_f64
, aarch64_neon_vcvtfp2fxu
, AddRetType
| Add1ArgType
),
7587 NEONMAP1(vcvtd_s64_f64
, aarch64_neon_fcvtzs
, AddRetType
| Add1ArgType
),
7588 NEONMAP1(vcvtd_u64_f64
, aarch64_neon_fcvtzu
, AddRetType
| Add1ArgType
),
7589 NEONMAP1(vcvth_bf16_f32
, aarch64_neon_bfcvt
, 0),
7590 NEONMAP1(vcvtmd_s64_f64
, aarch64_neon_fcvtms
, AddRetType
| Add1ArgType
),
7591 NEONMAP1(vcvtmd_u64_f64
, aarch64_neon_fcvtmu
, AddRetType
| Add1ArgType
),
7592 NEONMAP1(vcvtms_s32_f32
, aarch64_neon_fcvtms
, AddRetType
| Add1ArgType
),
7593 NEONMAP1(vcvtms_u32_f32
, aarch64_neon_fcvtmu
, AddRetType
| Add1ArgType
),
7594 NEONMAP1(vcvtnd_s64_f64
, aarch64_neon_fcvtns
, AddRetType
| Add1ArgType
),
7595 NEONMAP1(vcvtnd_u64_f64
, aarch64_neon_fcvtnu
, AddRetType
| Add1ArgType
),
7596 NEONMAP1(vcvtns_s32_f32
, aarch64_neon_fcvtns
, AddRetType
| Add1ArgType
),
7597 NEONMAP1(vcvtns_u32_f32
, aarch64_neon_fcvtnu
, AddRetType
| Add1ArgType
),
7598 NEONMAP1(vcvtpd_s64_f64
, aarch64_neon_fcvtps
, AddRetType
| Add1ArgType
),
7599 NEONMAP1(vcvtpd_u64_f64
, aarch64_neon_fcvtpu
, AddRetType
| Add1ArgType
),
7600 NEONMAP1(vcvtps_s32_f32
, aarch64_neon_fcvtps
, AddRetType
| Add1ArgType
),
7601 NEONMAP1(vcvtps_u32_f32
, aarch64_neon_fcvtpu
, AddRetType
| Add1ArgType
),
7602 NEONMAP1(vcvts_n_f32_s32
, aarch64_neon_vcvtfxs2fp
, AddRetType
| Add1ArgType
),
7603 NEONMAP1(vcvts_n_f32_u32
, aarch64_neon_vcvtfxu2fp
, AddRetType
| Add1ArgType
),
7604 NEONMAP1(vcvts_n_s32_f32
, aarch64_neon_vcvtfp2fxs
, AddRetType
| Add1ArgType
),
7605 NEONMAP1(vcvts_n_u32_f32
, aarch64_neon_vcvtfp2fxu
, AddRetType
| Add1ArgType
),
7606 NEONMAP1(vcvts_s32_f32
, aarch64_neon_fcvtzs
, AddRetType
| Add1ArgType
),
7607 NEONMAP1(vcvts_u32_f32
, aarch64_neon_fcvtzu
, AddRetType
| Add1ArgType
),
7608 NEONMAP1(vcvtxd_f32_f64
, aarch64_sisd_fcvtxn
, 0),
7609 NEONMAP1(vmaxnmv_f32
, aarch64_neon_fmaxnmv
, AddRetType
| Add1ArgType
),
7610 NEONMAP1(vmaxnmvq_f32
, aarch64_neon_fmaxnmv
, AddRetType
| Add1ArgType
),
7611 NEONMAP1(vmaxnmvq_f64
, aarch64_neon_fmaxnmv
, AddRetType
| Add1ArgType
),
7612 NEONMAP1(vmaxv_f32
, aarch64_neon_fmaxv
, AddRetType
| Add1ArgType
),
7613 NEONMAP1(vmaxv_s32
, aarch64_neon_smaxv
, AddRetType
| Add1ArgType
),
7614 NEONMAP1(vmaxv_u32
, aarch64_neon_umaxv
, AddRetType
| Add1ArgType
),
7615 NEONMAP1(vmaxvq_f32
, aarch64_neon_fmaxv
, AddRetType
| Add1ArgType
),
7616 NEONMAP1(vmaxvq_f64
, aarch64_neon_fmaxv
, AddRetType
| Add1ArgType
),
7617 NEONMAP1(vmaxvq_s32
, aarch64_neon_smaxv
, AddRetType
| Add1ArgType
),
7618 NEONMAP1(vmaxvq_u32
, aarch64_neon_umaxv
, AddRetType
| Add1ArgType
),
7619 NEONMAP1(vminnmv_f32
, aarch64_neon_fminnmv
, AddRetType
| Add1ArgType
),
7620 NEONMAP1(vminnmvq_f32
, aarch64_neon_fminnmv
, AddRetType
| Add1ArgType
),
7621 NEONMAP1(vminnmvq_f64
, aarch64_neon_fminnmv
, AddRetType
| Add1ArgType
),
7622 NEONMAP1(vminv_f32
, aarch64_neon_fminv
, AddRetType
| Add1ArgType
),
7623 NEONMAP1(vminv_s32
, aarch64_neon_sminv
, AddRetType
| Add1ArgType
),
7624 NEONMAP1(vminv_u32
, aarch64_neon_uminv
, AddRetType
| Add1ArgType
),
7625 NEONMAP1(vminvq_f32
, aarch64_neon_fminv
, AddRetType
| Add1ArgType
),
7626 NEONMAP1(vminvq_f64
, aarch64_neon_fminv
, AddRetType
| Add1ArgType
),
7627 NEONMAP1(vminvq_s32
, aarch64_neon_sminv
, AddRetType
| Add1ArgType
),
7628 NEONMAP1(vminvq_u32
, aarch64_neon_uminv
, AddRetType
| Add1ArgType
),
7629 NEONMAP1(vmull_p64
, aarch64_neon_pmull64
, 0),
7630 NEONMAP1(vmulxd_f64
, aarch64_neon_fmulx
, Add1ArgType
),
7631 NEONMAP1(vmulxs_f32
, aarch64_neon_fmulx
, Add1ArgType
),
7632 NEONMAP1(vpaddd_s64
, aarch64_neon_uaddv
, AddRetType
| Add1ArgType
),
7633 NEONMAP1(vpaddd_u64
, aarch64_neon_uaddv
, AddRetType
| Add1ArgType
),
7634 NEONMAP1(vpmaxnmqd_f64
, aarch64_neon_fmaxnmv
, AddRetType
| Add1ArgType
),
7635 NEONMAP1(vpmaxnms_f32
, aarch64_neon_fmaxnmv
, AddRetType
| Add1ArgType
),
7636 NEONMAP1(vpmaxqd_f64
, aarch64_neon_fmaxv
, AddRetType
| Add1ArgType
),
7637 NEONMAP1(vpmaxs_f32
, aarch64_neon_fmaxv
, AddRetType
| Add1ArgType
),
7638 NEONMAP1(vpminnmqd_f64
, aarch64_neon_fminnmv
, AddRetType
| Add1ArgType
),
7639 NEONMAP1(vpminnms_f32
, aarch64_neon_fminnmv
, AddRetType
| Add1ArgType
),
7640 NEONMAP1(vpminqd_f64
, aarch64_neon_fminv
, AddRetType
| Add1ArgType
),
7641 NEONMAP1(vpmins_f32
, aarch64_neon_fminv
, AddRetType
| Add1ArgType
),
7642 NEONMAP1(vqabsb_s8
, aarch64_neon_sqabs
, Vectorize1ArgType
| Use64BitVectors
),
7643 NEONMAP1(vqabsd_s64
, aarch64_neon_sqabs
, Add1ArgType
),
7644 NEONMAP1(vqabsh_s16
, aarch64_neon_sqabs
, Vectorize1ArgType
| Use64BitVectors
),
7645 NEONMAP1(vqabss_s32
, aarch64_neon_sqabs
, Add1ArgType
),
7646 NEONMAP1(vqaddb_s8
, aarch64_neon_sqadd
, Vectorize1ArgType
| Use64BitVectors
),
7647 NEONMAP1(vqaddb_u8
, aarch64_neon_uqadd
, Vectorize1ArgType
| Use64BitVectors
),
7648 NEONMAP1(vqaddd_s64
, aarch64_neon_sqadd
, Add1ArgType
),
7649 NEONMAP1(vqaddd_u64
, aarch64_neon_uqadd
, Add1ArgType
),
7650 NEONMAP1(vqaddh_s16
, aarch64_neon_sqadd
, Vectorize1ArgType
| Use64BitVectors
),
7651 NEONMAP1(vqaddh_u16
, aarch64_neon_uqadd
, Vectorize1ArgType
| Use64BitVectors
),
7652 NEONMAP1(vqadds_s32
, aarch64_neon_sqadd
, Add1ArgType
),
7653 NEONMAP1(vqadds_u32
, aarch64_neon_uqadd
, Add1ArgType
),
7654 NEONMAP1(vqdmulhh_s16
, aarch64_neon_sqdmulh
, Vectorize1ArgType
| Use64BitVectors
),
7655 NEONMAP1(vqdmulhs_s32
, aarch64_neon_sqdmulh
, Add1ArgType
),
7656 NEONMAP1(vqdmullh_s16
, aarch64_neon_sqdmull
, VectorRet
| Use128BitVectors
),
7657 NEONMAP1(vqdmulls_s32
, aarch64_neon_sqdmulls_scalar
, 0),
7658 NEONMAP1(vqmovnd_s64
, aarch64_neon_scalar_sqxtn
, AddRetType
| Add1ArgType
),
7659 NEONMAP1(vqmovnd_u64
, aarch64_neon_scalar_uqxtn
, AddRetType
| Add1ArgType
),
7660 NEONMAP1(vqmovnh_s16
, aarch64_neon_sqxtn
, VectorRet
| Use64BitVectors
),
7661 NEONMAP1(vqmovnh_u16
, aarch64_neon_uqxtn
, VectorRet
| Use64BitVectors
),
7662 NEONMAP1(vqmovns_s32
, aarch64_neon_sqxtn
, VectorRet
| Use64BitVectors
),
7663 NEONMAP1(vqmovns_u32
, aarch64_neon_uqxtn
, VectorRet
| Use64BitVectors
),
7664 NEONMAP1(vqmovund_s64
, aarch64_neon_scalar_sqxtun
, AddRetType
| Add1ArgType
),
7665 NEONMAP1(vqmovunh_s16
, aarch64_neon_sqxtun
, VectorRet
| Use64BitVectors
),
7666 NEONMAP1(vqmovuns_s32
, aarch64_neon_sqxtun
, VectorRet
| Use64BitVectors
),
7667 NEONMAP1(vqnegb_s8
, aarch64_neon_sqneg
, Vectorize1ArgType
| Use64BitVectors
),
7668 NEONMAP1(vqnegd_s64
, aarch64_neon_sqneg
, Add1ArgType
),
7669 NEONMAP1(vqnegh_s16
, aarch64_neon_sqneg
, Vectorize1ArgType
| Use64BitVectors
),
7670 NEONMAP1(vqnegs_s32
, aarch64_neon_sqneg
, Add1ArgType
),
7671 NEONMAP1(vqrdmlahh_s16
, aarch64_neon_sqrdmlah
, Vectorize1ArgType
| Use64BitVectors
),
7672 NEONMAP1(vqrdmlahs_s32
, aarch64_neon_sqrdmlah
, Add1ArgType
),
7673 NEONMAP1(vqrdmlshh_s16
, aarch64_neon_sqrdmlsh
, Vectorize1ArgType
| Use64BitVectors
),
7674 NEONMAP1(vqrdmlshs_s32
, aarch64_neon_sqrdmlsh
, Add1ArgType
),
7675 NEONMAP1(vqrdmulhh_s16
, aarch64_neon_sqrdmulh
, Vectorize1ArgType
| Use64BitVectors
),
7676 NEONMAP1(vqrdmulhs_s32
, aarch64_neon_sqrdmulh
, Add1ArgType
),
7677 NEONMAP1(vqrshlb_s8
, aarch64_neon_sqrshl
, Vectorize1ArgType
| Use64BitVectors
),
7678 NEONMAP1(vqrshlb_u8
, aarch64_neon_uqrshl
, Vectorize1ArgType
| Use64BitVectors
),
7679 NEONMAP1(vqrshld_s64
, aarch64_neon_sqrshl
, Add1ArgType
),
7680 NEONMAP1(vqrshld_u64
, aarch64_neon_uqrshl
, Add1ArgType
),
7681 NEONMAP1(vqrshlh_s16
, aarch64_neon_sqrshl
, Vectorize1ArgType
| Use64BitVectors
),
7682 NEONMAP1(vqrshlh_u16
, aarch64_neon_uqrshl
, Vectorize1ArgType
| Use64BitVectors
),
7683 NEONMAP1(vqrshls_s32
, aarch64_neon_sqrshl
, Add1ArgType
),
7684 NEONMAP1(vqrshls_u32
, aarch64_neon_uqrshl
, Add1ArgType
),
7685 NEONMAP1(vqrshrnd_n_s64
, aarch64_neon_sqrshrn
, AddRetType
),
7686 NEONMAP1(vqrshrnd_n_u64
, aarch64_neon_uqrshrn
, AddRetType
),
7687 NEONMAP1(vqrshrnh_n_s16
, aarch64_neon_sqrshrn
, VectorRet
| Use64BitVectors
),
7688 NEONMAP1(vqrshrnh_n_u16
, aarch64_neon_uqrshrn
, VectorRet
| Use64BitVectors
),
7689 NEONMAP1(vqrshrns_n_s32
, aarch64_neon_sqrshrn
, VectorRet
| Use64BitVectors
),
7690 NEONMAP1(vqrshrns_n_u32
, aarch64_neon_uqrshrn
, VectorRet
| Use64BitVectors
),
7691 NEONMAP1(vqrshrund_n_s64
, aarch64_neon_sqrshrun
, AddRetType
),
7692 NEONMAP1(vqrshrunh_n_s16
, aarch64_neon_sqrshrun
, VectorRet
| Use64BitVectors
),
7693 NEONMAP1(vqrshruns_n_s32
, aarch64_neon_sqrshrun
, VectorRet
| Use64BitVectors
),
7694 NEONMAP1(vqshlb_n_s8
, aarch64_neon_sqshl
, Vectorize1ArgType
| Use64BitVectors
),
7695 NEONMAP1(vqshlb_n_u8
, aarch64_neon_uqshl
, Vectorize1ArgType
| Use64BitVectors
),
7696 NEONMAP1(vqshlb_s8
, aarch64_neon_sqshl
, Vectorize1ArgType
| Use64BitVectors
),
7697 NEONMAP1(vqshlb_u8
, aarch64_neon_uqshl
, Vectorize1ArgType
| Use64BitVectors
),
7698 NEONMAP1(vqshld_s64
, aarch64_neon_sqshl
, Add1ArgType
),
7699 NEONMAP1(vqshld_u64
, aarch64_neon_uqshl
, Add1ArgType
),
7700 NEONMAP1(vqshlh_n_s16
, aarch64_neon_sqshl
, Vectorize1ArgType
| Use64BitVectors
),
7701 NEONMAP1(vqshlh_n_u16
, aarch64_neon_uqshl
, Vectorize1ArgType
| Use64BitVectors
),
7702 NEONMAP1(vqshlh_s16
, aarch64_neon_sqshl
, Vectorize1ArgType
| Use64BitVectors
),
7703 NEONMAP1(vqshlh_u16
, aarch64_neon_uqshl
, Vectorize1ArgType
| Use64BitVectors
),
7704 NEONMAP1(vqshls_n_s32
, aarch64_neon_sqshl
, Add1ArgType
),
7705 NEONMAP1(vqshls_n_u32
, aarch64_neon_uqshl
, Add1ArgType
),
7706 NEONMAP1(vqshls_s32
, aarch64_neon_sqshl
, Add1ArgType
),
7707 NEONMAP1(vqshls_u32
, aarch64_neon_uqshl
, Add1ArgType
),
7708 NEONMAP1(vqshlub_n_s8
, aarch64_neon_sqshlu
, Vectorize1ArgType
| Use64BitVectors
),
7709 NEONMAP1(vqshluh_n_s16
, aarch64_neon_sqshlu
, Vectorize1ArgType
| Use64BitVectors
),
7710 NEONMAP1(vqshlus_n_s32
, aarch64_neon_sqshlu
, Add1ArgType
),
7711 NEONMAP1(vqshrnd_n_s64
, aarch64_neon_sqshrn
, AddRetType
),
7712 NEONMAP1(vqshrnd_n_u64
, aarch64_neon_uqshrn
, AddRetType
),
7713 NEONMAP1(vqshrnh_n_s16
, aarch64_neon_sqshrn
, VectorRet
| Use64BitVectors
),
7714 NEONMAP1(vqshrnh_n_u16
, aarch64_neon_uqshrn
, VectorRet
| Use64BitVectors
),
7715 NEONMAP1(vqshrns_n_s32
, aarch64_neon_sqshrn
, VectorRet
| Use64BitVectors
),
7716 NEONMAP1(vqshrns_n_u32
, aarch64_neon_uqshrn
, VectorRet
| Use64BitVectors
),
7717 NEONMAP1(vqshrund_n_s64
, aarch64_neon_sqshrun
, AddRetType
),
7718 NEONMAP1(vqshrunh_n_s16
, aarch64_neon_sqshrun
, VectorRet
| Use64BitVectors
),
7719 NEONMAP1(vqshruns_n_s32
, aarch64_neon_sqshrun
, VectorRet
| Use64BitVectors
),
7720 NEONMAP1(vqsubb_s8
, aarch64_neon_sqsub
, Vectorize1ArgType
| Use64BitVectors
),
7721 NEONMAP1(vqsubb_u8
, aarch64_neon_uqsub
, Vectorize1ArgType
| Use64BitVectors
),
7722 NEONMAP1(vqsubd_s64
, aarch64_neon_sqsub
, Add1ArgType
),
7723 NEONMAP1(vqsubd_u64
, aarch64_neon_uqsub
, Add1ArgType
),
7724 NEONMAP1(vqsubh_s16
, aarch64_neon_sqsub
, Vectorize1ArgType
| Use64BitVectors
),
7725 NEONMAP1(vqsubh_u16
, aarch64_neon_uqsub
, Vectorize1ArgType
| Use64BitVectors
),
7726 NEONMAP1(vqsubs_s32
, aarch64_neon_sqsub
, Add1ArgType
),
7727 NEONMAP1(vqsubs_u32
, aarch64_neon_uqsub
, Add1ArgType
),
7728 NEONMAP1(vrecped_f64
, aarch64_neon_frecpe
, Add1ArgType
),
7729 NEONMAP1(vrecpes_f32
, aarch64_neon_frecpe
, Add1ArgType
),
7730 NEONMAP1(vrecpxd_f64
, aarch64_neon_frecpx
, Add1ArgType
),
7731 NEONMAP1(vrecpxs_f32
, aarch64_neon_frecpx
, Add1ArgType
),
7732 NEONMAP1(vrshld_s64
, aarch64_neon_srshl
, Add1ArgType
),
7733 NEONMAP1(vrshld_u64
, aarch64_neon_urshl
, Add1ArgType
),
7734 NEONMAP1(vrsqrted_f64
, aarch64_neon_frsqrte
, Add1ArgType
),
7735 NEONMAP1(vrsqrtes_f32
, aarch64_neon_frsqrte
, Add1ArgType
),
7736 NEONMAP1(vrsqrtsd_f64
, aarch64_neon_frsqrts
, Add1ArgType
),
7737 NEONMAP1(vrsqrtss_f32
, aarch64_neon_frsqrts
, Add1ArgType
),
7738 NEONMAP1(vsha1cq_u32
, aarch64_crypto_sha1c
, 0),
7739 NEONMAP1(vsha1h_u32
, aarch64_crypto_sha1h
, 0),
7740 NEONMAP1(vsha1mq_u32
, aarch64_crypto_sha1m
, 0),
7741 NEONMAP1(vsha1pq_u32
, aarch64_crypto_sha1p
, 0),
7742 NEONMAP1(vshld_s64
, aarch64_neon_sshl
, Add1ArgType
),
7743 NEONMAP1(vshld_u64
, aarch64_neon_ushl
, Add1ArgType
),
7744 NEONMAP1(vslid_n_s64
, aarch64_neon_vsli
, Vectorize1ArgType
),
7745 NEONMAP1(vslid_n_u64
, aarch64_neon_vsli
, Vectorize1ArgType
),
7746 NEONMAP1(vsqaddb_u8
, aarch64_neon_usqadd
, Vectorize1ArgType
| Use64BitVectors
),
7747 NEONMAP1(vsqaddd_u64
, aarch64_neon_usqadd
, Add1ArgType
),
7748 NEONMAP1(vsqaddh_u16
, aarch64_neon_usqadd
, Vectorize1ArgType
| Use64BitVectors
),
7749 NEONMAP1(vsqadds_u32
, aarch64_neon_usqadd
, Add1ArgType
),
7750 NEONMAP1(vsrid_n_s64
, aarch64_neon_vsri
, Vectorize1ArgType
),
7751 NEONMAP1(vsrid_n_u64
, aarch64_neon_vsri
, Vectorize1ArgType
),
7752 NEONMAP1(vuqaddb_s8
, aarch64_neon_suqadd
, Vectorize1ArgType
| Use64BitVectors
),
7753 NEONMAP1(vuqaddd_s64
, aarch64_neon_suqadd
, Add1ArgType
),
7754 NEONMAP1(vuqaddh_s16
, aarch64_neon_suqadd
, Vectorize1ArgType
| Use64BitVectors
),
7755 NEONMAP1(vuqadds_s32
, aarch64_neon_suqadd
, Add1ArgType
),
7756 // FP16 scalar intrinisics go here.
7757 NEONMAP1(vabdh_f16
, aarch64_sisd_fabd
, Add1ArgType
),
7758 NEONMAP1(vcvtah_s32_f16
, aarch64_neon_fcvtas
, AddRetType
| Add1ArgType
),
7759 NEONMAP1(vcvtah_s64_f16
, aarch64_neon_fcvtas
, AddRetType
| Add1ArgType
),
7760 NEONMAP1(vcvtah_u32_f16
, aarch64_neon_fcvtau
, AddRetType
| Add1ArgType
),
7761 NEONMAP1(vcvtah_u64_f16
, aarch64_neon_fcvtau
, AddRetType
| Add1ArgType
),
7762 NEONMAP1(vcvth_n_f16_s32
, aarch64_neon_vcvtfxs2fp
, AddRetType
| Add1ArgType
),
7763 NEONMAP1(vcvth_n_f16_s64
, aarch64_neon_vcvtfxs2fp
, AddRetType
| Add1ArgType
),
7764 NEONMAP1(vcvth_n_f16_u32
, aarch64_neon_vcvtfxu2fp
, AddRetType
| Add1ArgType
),
7765 NEONMAP1(vcvth_n_f16_u64
, aarch64_neon_vcvtfxu2fp
, AddRetType
| Add1ArgType
),
7766 NEONMAP1(vcvth_n_s32_f16
, aarch64_neon_vcvtfp2fxs
, AddRetType
| Add1ArgType
),
7767 NEONMAP1(vcvth_n_s64_f16
, aarch64_neon_vcvtfp2fxs
, AddRetType
| Add1ArgType
),
7768 NEONMAP1(vcvth_n_u32_f16
, aarch64_neon_vcvtfp2fxu
, AddRetType
| Add1ArgType
),
7769 NEONMAP1(vcvth_n_u64_f16
, aarch64_neon_vcvtfp2fxu
, AddRetType
| Add1ArgType
),
7770 NEONMAP1(vcvth_s32_f16
, aarch64_neon_fcvtzs
, AddRetType
| Add1ArgType
),
7771 NEONMAP1(vcvth_s64_f16
, aarch64_neon_fcvtzs
, AddRetType
| Add1ArgType
),
7772 NEONMAP1(vcvth_u32_f16
, aarch64_neon_fcvtzu
, AddRetType
| Add1ArgType
),
7773 NEONMAP1(vcvth_u64_f16
, aarch64_neon_fcvtzu
, AddRetType
| Add1ArgType
),
7774 NEONMAP1(vcvtmh_s32_f16
, aarch64_neon_fcvtms
, AddRetType
| Add1ArgType
),
7775 NEONMAP1(vcvtmh_s64_f16
, aarch64_neon_fcvtms
, AddRetType
| Add1ArgType
),
7776 NEONMAP1(vcvtmh_u32_f16
, aarch64_neon_fcvtmu
, AddRetType
| Add1ArgType
),
7777 NEONMAP1(vcvtmh_u64_f16
, aarch64_neon_fcvtmu
, AddRetType
| Add1ArgType
),
7778 NEONMAP1(vcvtnh_s32_f16
, aarch64_neon_fcvtns
, AddRetType
| Add1ArgType
),
7779 NEONMAP1(vcvtnh_s64_f16
, aarch64_neon_fcvtns
, AddRetType
| Add1ArgType
),
7780 NEONMAP1(vcvtnh_u32_f16
, aarch64_neon_fcvtnu
, AddRetType
| Add1ArgType
),
7781 NEONMAP1(vcvtnh_u64_f16
, aarch64_neon_fcvtnu
, AddRetType
| Add1ArgType
),
7782 NEONMAP1(vcvtph_s32_f16
, aarch64_neon_fcvtps
, AddRetType
| Add1ArgType
),
7783 NEONMAP1(vcvtph_s64_f16
, aarch64_neon_fcvtps
, AddRetType
| Add1ArgType
),
7784 NEONMAP1(vcvtph_u32_f16
, aarch64_neon_fcvtpu
, AddRetType
| Add1ArgType
),
7785 NEONMAP1(vcvtph_u64_f16
, aarch64_neon_fcvtpu
, AddRetType
| Add1ArgType
),
7786 NEONMAP1(vmulxh_f16
, aarch64_neon_fmulx
, Add1ArgType
),
7787 NEONMAP1(vrecpeh_f16
, aarch64_neon_frecpe
, Add1ArgType
),
7788 NEONMAP1(vrecpxh_f16
, aarch64_neon_frecpx
, Add1ArgType
),
7789 NEONMAP1(vrsqrteh_f16
, aarch64_neon_frsqrte
, Add1ArgType
),
7790 NEONMAP1(vrsqrtsh_f16
, aarch64_neon_frsqrts
, Add1ArgType
),
7793 // Some intrinsics are equivalent for codegen.
7794 static const std::pair
<unsigned, unsigned> NEONEquivalentIntrinsicMap
[] = {
7795 { NEON::BI__builtin_neon_splat_lane_bf16
, NEON::BI__builtin_neon_splat_lane_v
, },
7796 { NEON::BI__builtin_neon_splat_laneq_bf16
, NEON::BI__builtin_neon_splat_laneq_v
, },
7797 { NEON::BI__builtin_neon_splatq_lane_bf16
, NEON::BI__builtin_neon_splatq_lane_v
, },
7798 { NEON::BI__builtin_neon_splatq_laneq_bf16
, NEON::BI__builtin_neon_splatq_laneq_v
, },
7799 { NEON::BI__builtin_neon_vabd_f16
, NEON::BI__builtin_neon_vabd_v
, },
7800 { NEON::BI__builtin_neon_vabdq_f16
, NEON::BI__builtin_neon_vabdq_v
, },
7801 { NEON::BI__builtin_neon_vabs_f16
, NEON::BI__builtin_neon_vabs_v
, },
7802 { NEON::BI__builtin_neon_vabsq_f16
, NEON::BI__builtin_neon_vabsq_v
, },
7803 { NEON::BI__builtin_neon_vcage_f16
, NEON::BI__builtin_neon_vcage_v
, },
7804 { NEON::BI__builtin_neon_vcageq_f16
, NEON::BI__builtin_neon_vcageq_v
, },
7805 { NEON::BI__builtin_neon_vcagt_f16
, NEON::BI__builtin_neon_vcagt_v
, },
7806 { NEON::BI__builtin_neon_vcagtq_f16
, NEON::BI__builtin_neon_vcagtq_v
, },
7807 { NEON::BI__builtin_neon_vcale_f16
, NEON::BI__builtin_neon_vcale_v
, },
7808 { NEON::BI__builtin_neon_vcaleq_f16
, NEON::BI__builtin_neon_vcaleq_v
, },
7809 { NEON::BI__builtin_neon_vcalt_f16
, NEON::BI__builtin_neon_vcalt_v
, },
7810 { NEON::BI__builtin_neon_vcaltq_f16
, NEON::BI__builtin_neon_vcaltq_v
, },
7811 { NEON::BI__builtin_neon_vceqz_f16
, NEON::BI__builtin_neon_vceqz_v
, },
7812 { NEON::BI__builtin_neon_vceqzq_f16
, NEON::BI__builtin_neon_vceqzq_v
, },
7813 { NEON::BI__builtin_neon_vcgez_f16
, NEON::BI__builtin_neon_vcgez_v
, },
7814 { NEON::BI__builtin_neon_vcgezq_f16
, NEON::BI__builtin_neon_vcgezq_v
, },
7815 { NEON::BI__builtin_neon_vcgtz_f16
, NEON::BI__builtin_neon_vcgtz_v
, },
7816 { NEON::BI__builtin_neon_vcgtzq_f16
, NEON::BI__builtin_neon_vcgtzq_v
, },
7817 { NEON::BI__builtin_neon_vclez_f16
, NEON::BI__builtin_neon_vclez_v
, },
7818 { NEON::BI__builtin_neon_vclezq_f16
, NEON::BI__builtin_neon_vclezq_v
, },
7819 { NEON::BI__builtin_neon_vcltz_f16
, NEON::BI__builtin_neon_vcltz_v
, },
7820 { NEON::BI__builtin_neon_vcltzq_f16
, NEON::BI__builtin_neon_vcltzq_v
, },
7821 { NEON::BI__builtin_neon_vfma_f16
, NEON::BI__builtin_neon_vfma_v
, },
7822 { NEON::BI__builtin_neon_vfma_lane_f16
, NEON::BI__builtin_neon_vfma_lane_v
, },
7823 { NEON::BI__builtin_neon_vfma_laneq_f16
, NEON::BI__builtin_neon_vfma_laneq_v
, },
7824 { NEON::BI__builtin_neon_vfmaq_f16
, NEON::BI__builtin_neon_vfmaq_v
, },
7825 { NEON::BI__builtin_neon_vfmaq_lane_f16
, NEON::BI__builtin_neon_vfmaq_lane_v
, },
7826 { NEON::BI__builtin_neon_vfmaq_laneq_f16
, NEON::BI__builtin_neon_vfmaq_laneq_v
, },
7827 { NEON::BI__builtin_neon_vld1_bf16_x2
, NEON::BI__builtin_neon_vld1_x2_v
},
7828 { NEON::BI__builtin_neon_vld1_bf16_x3
, NEON::BI__builtin_neon_vld1_x3_v
},
7829 { NEON::BI__builtin_neon_vld1_bf16_x4
, NEON::BI__builtin_neon_vld1_x4_v
},
7830 { NEON::BI__builtin_neon_vld1_bf16
, NEON::BI__builtin_neon_vld1_v
},
7831 { NEON::BI__builtin_neon_vld1_dup_bf16
, NEON::BI__builtin_neon_vld1_dup_v
},
7832 { NEON::BI__builtin_neon_vld1_lane_bf16
, NEON::BI__builtin_neon_vld1_lane_v
},
7833 { NEON::BI__builtin_neon_vld1q_bf16_x2
, NEON::BI__builtin_neon_vld1q_x2_v
},
7834 { NEON::BI__builtin_neon_vld1q_bf16_x3
, NEON::BI__builtin_neon_vld1q_x3_v
},
7835 { NEON::BI__builtin_neon_vld1q_bf16_x4
, NEON::BI__builtin_neon_vld1q_x4_v
},
7836 { NEON::BI__builtin_neon_vld1q_bf16
, NEON::BI__builtin_neon_vld1q_v
},
7837 { NEON::BI__builtin_neon_vld1q_dup_bf16
, NEON::BI__builtin_neon_vld1q_dup_v
},
7838 { NEON::BI__builtin_neon_vld1q_lane_bf16
, NEON::BI__builtin_neon_vld1q_lane_v
},
7839 { NEON::BI__builtin_neon_vld2_bf16
, NEON::BI__builtin_neon_vld2_v
},
7840 { NEON::BI__builtin_neon_vld2_dup_bf16
, NEON::BI__builtin_neon_vld2_dup_v
},
7841 { NEON::BI__builtin_neon_vld2_lane_bf16
, NEON::BI__builtin_neon_vld2_lane_v
},
7842 { NEON::BI__builtin_neon_vld2q_bf16
, NEON::BI__builtin_neon_vld2q_v
},
7843 { NEON::BI__builtin_neon_vld2q_dup_bf16
, NEON::BI__builtin_neon_vld2q_dup_v
},
7844 { NEON::BI__builtin_neon_vld2q_lane_bf16
, NEON::BI__builtin_neon_vld2q_lane_v
},
7845 { NEON::BI__builtin_neon_vld3_bf16
, NEON::BI__builtin_neon_vld3_v
},
7846 { NEON::BI__builtin_neon_vld3_dup_bf16
, NEON::BI__builtin_neon_vld3_dup_v
},
7847 { NEON::BI__builtin_neon_vld3_lane_bf16
, NEON::BI__builtin_neon_vld3_lane_v
},
7848 { NEON::BI__builtin_neon_vld3q_bf16
, NEON::BI__builtin_neon_vld3q_v
},
7849 { NEON::BI__builtin_neon_vld3q_dup_bf16
, NEON::BI__builtin_neon_vld3q_dup_v
},
7850 { NEON::BI__builtin_neon_vld3q_lane_bf16
, NEON::BI__builtin_neon_vld3q_lane_v
},
7851 { NEON::BI__builtin_neon_vld4_bf16
, NEON::BI__builtin_neon_vld4_v
},
7852 { NEON::BI__builtin_neon_vld4_dup_bf16
, NEON::BI__builtin_neon_vld4_dup_v
},
7853 { NEON::BI__builtin_neon_vld4_lane_bf16
, NEON::BI__builtin_neon_vld4_lane_v
},
7854 { NEON::BI__builtin_neon_vld4q_bf16
, NEON::BI__builtin_neon_vld4q_v
},
7855 { NEON::BI__builtin_neon_vld4q_dup_bf16
, NEON::BI__builtin_neon_vld4q_dup_v
},
7856 { NEON::BI__builtin_neon_vld4q_lane_bf16
, NEON::BI__builtin_neon_vld4q_lane_v
},
7857 { NEON::BI__builtin_neon_vmax_f16
, NEON::BI__builtin_neon_vmax_v
, },
7858 { NEON::BI__builtin_neon_vmaxnm_f16
, NEON::BI__builtin_neon_vmaxnm_v
, },
7859 { NEON::BI__builtin_neon_vmaxnmq_f16
, NEON::BI__builtin_neon_vmaxnmq_v
, },
7860 { NEON::BI__builtin_neon_vmaxq_f16
, NEON::BI__builtin_neon_vmaxq_v
, },
7861 { NEON::BI__builtin_neon_vmin_f16
, NEON::BI__builtin_neon_vmin_v
, },
7862 { NEON::BI__builtin_neon_vminnm_f16
, NEON::BI__builtin_neon_vminnm_v
, },
7863 { NEON::BI__builtin_neon_vminnmq_f16
, NEON::BI__builtin_neon_vminnmq_v
, },
7864 { NEON::BI__builtin_neon_vminq_f16
, NEON::BI__builtin_neon_vminq_v
, },
7865 { NEON::BI__builtin_neon_vmulx_f16
, NEON::BI__builtin_neon_vmulx_v
, },
7866 { NEON::BI__builtin_neon_vmulxq_f16
, NEON::BI__builtin_neon_vmulxq_v
, },
7867 { NEON::BI__builtin_neon_vpadd_f16
, NEON::BI__builtin_neon_vpadd_v
, },
7868 { NEON::BI__builtin_neon_vpaddq_f16
, NEON::BI__builtin_neon_vpaddq_v
, },
7869 { NEON::BI__builtin_neon_vpmax_f16
, NEON::BI__builtin_neon_vpmax_v
, },
7870 { NEON::BI__builtin_neon_vpmaxnm_f16
, NEON::BI__builtin_neon_vpmaxnm_v
, },
7871 { NEON::BI__builtin_neon_vpmaxnmq_f16
, NEON::BI__builtin_neon_vpmaxnmq_v
, },
7872 { NEON::BI__builtin_neon_vpmaxq_f16
, NEON::BI__builtin_neon_vpmaxq_v
, },
7873 { NEON::BI__builtin_neon_vpmin_f16
, NEON::BI__builtin_neon_vpmin_v
, },
7874 { NEON::BI__builtin_neon_vpminnm_f16
, NEON::BI__builtin_neon_vpminnm_v
, },
7875 { NEON::BI__builtin_neon_vpminnmq_f16
, NEON::BI__builtin_neon_vpminnmq_v
, },
7876 { NEON::BI__builtin_neon_vpminq_f16
, NEON::BI__builtin_neon_vpminq_v
, },
7877 { NEON::BI__builtin_neon_vrecpe_f16
, NEON::BI__builtin_neon_vrecpe_v
, },
7878 { NEON::BI__builtin_neon_vrecpeq_f16
, NEON::BI__builtin_neon_vrecpeq_v
, },
7879 { NEON::BI__builtin_neon_vrecps_f16
, NEON::BI__builtin_neon_vrecps_v
, },
7880 { NEON::BI__builtin_neon_vrecpsq_f16
, NEON::BI__builtin_neon_vrecpsq_v
, },
7881 { NEON::BI__builtin_neon_vrnd_f16
, NEON::BI__builtin_neon_vrnd_v
, },
7882 { NEON::BI__builtin_neon_vrnda_f16
, NEON::BI__builtin_neon_vrnda_v
, },
7883 { NEON::BI__builtin_neon_vrndaq_f16
, NEON::BI__builtin_neon_vrndaq_v
, },
7884 { NEON::BI__builtin_neon_vrndi_f16
, NEON::BI__builtin_neon_vrndi_v
, },
7885 { NEON::BI__builtin_neon_vrndiq_f16
, NEON::BI__builtin_neon_vrndiq_v
, },
7886 { NEON::BI__builtin_neon_vrndm_f16
, NEON::BI__builtin_neon_vrndm_v
, },
7887 { NEON::BI__builtin_neon_vrndmq_f16
, NEON::BI__builtin_neon_vrndmq_v
, },
7888 { NEON::BI__builtin_neon_vrndn_f16
, NEON::BI__builtin_neon_vrndn_v
, },
7889 { NEON::BI__builtin_neon_vrndnq_f16
, NEON::BI__builtin_neon_vrndnq_v
, },
7890 { NEON::BI__builtin_neon_vrndp_f16
, NEON::BI__builtin_neon_vrndp_v
, },
7891 { NEON::BI__builtin_neon_vrndpq_f16
, NEON::BI__builtin_neon_vrndpq_v
, },
7892 { NEON::BI__builtin_neon_vrndq_f16
, NEON::BI__builtin_neon_vrndq_v
, },
7893 { NEON::BI__builtin_neon_vrndx_f16
, NEON::BI__builtin_neon_vrndx_v
, },
7894 { NEON::BI__builtin_neon_vrndxq_f16
, NEON::BI__builtin_neon_vrndxq_v
, },
7895 { NEON::BI__builtin_neon_vrsqrte_f16
, NEON::BI__builtin_neon_vrsqrte_v
, },
7896 { NEON::BI__builtin_neon_vrsqrteq_f16
, NEON::BI__builtin_neon_vrsqrteq_v
, },
7897 { NEON::BI__builtin_neon_vrsqrts_f16
, NEON::BI__builtin_neon_vrsqrts_v
, },
7898 { NEON::BI__builtin_neon_vrsqrtsq_f16
, NEON::BI__builtin_neon_vrsqrtsq_v
, },
7899 { NEON::BI__builtin_neon_vsqrt_f16
, NEON::BI__builtin_neon_vsqrt_v
, },
7900 { NEON::BI__builtin_neon_vsqrtq_f16
, NEON::BI__builtin_neon_vsqrtq_v
, },
7901 { NEON::BI__builtin_neon_vst1_bf16_x2
, NEON::BI__builtin_neon_vst1_x2_v
},
7902 { NEON::BI__builtin_neon_vst1_bf16_x3
, NEON::BI__builtin_neon_vst1_x3_v
},
7903 { NEON::BI__builtin_neon_vst1_bf16_x4
, NEON::BI__builtin_neon_vst1_x4_v
},
7904 { NEON::BI__builtin_neon_vst1_bf16
, NEON::BI__builtin_neon_vst1_v
},
7905 { NEON::BI__builtin_neon_vst1_lane_bf16
, NEON::BI__builtin_neon_vst1_lane_v
},
7906 { NEON::BI__builtin_neon_vst1q_bf16_x2
, NEON::BI__builtin_neon_vst1q_x2_v
},
7907 { NEON::BI__builtin_neon_vst1q_bf16_x3
, NEON::BI__builtin_neon_vst1q_x3_v
},
7908 { NEON::BI__builtin_neon_vst1q_bf16_x4
, NEON::BI__builtin_neon_vst1q_x4_v
},
7909 { NEON::BI__builtin_neon_vst1q_bf16
, NEON::BI__builtin_neon_vst1q_v
},
7910 { NEON::BI__builtin_neon_vst1q_lane_bf16
, NEON::BI__builtin_neon_vst1q_lane_v
},
7911 { NEON::BI__builtin_neon_vst2_bf16
, NEON::BI__builtin_neon_vst2_v
},
7912 { NEON::BI__builtin_neon_vst2_lane_bf16
, NEON::BI__builtin_neon_vst2_lane_v
},
7913 { NEON::BI__builtin_neon_vst2q_bf16
, NEON::BI__builtin_neon_vst2q_v
},
7914 { NEON::BI__builtin_neon_vst2q_lane_bf16
, NEON::BI__builtin_neon_vst2q_lane_v
},
7915 { NEON::BI__builtin_neon_vst3_bf16
, NEON::BI__builtin_neon_vst3_v
},
7916 { NEON::BI__builtin_neon_vst3_lane_bf16
, NEON::BI__builtin_neon_vst3_lane_v
},
7917 { NEON::BI__builtin_neon_vst3q_bf16
, NEON::BI__builtin_neon_vst3q_v
},
7918 { NEON::BI__builtin_neon_vst3q_lane_bf16
, NEON::BI__builtin_neon_vst3q_lane_v
},
7919 { NEON::BI__builtin_neon_vst4_bf16
, NEON::BI__builtin_neon_vst4_v
},
7920 { NEON::BI__builtin_neon_vst4_lane_bf16
, NEON::BI__builtin_neon_vst4_lane_v
},
7921 { NEON::BI__builtin_neon_vst4q_bf16
, NEON::BI__builtin_neon_vst4q_v
},
7922 { NEON::BI__builtin_neon_vst4q_lane_bf16
, NEON::BI__builtin_neon_vst4q_lane_v
},
7923 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
7924 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
7925 // arbitrary one to be handled as tha canonical variation.
7926 { NEON::BI__builtin_neon_vldap1_lane_u64
, NEON::BI__builtin_neon_vldap1_lane_s64
},
7927 { NEON::BI__builtin_neon_vldap1_lane_f64
, NEON::BI__builtin_neon_vldap1_lane_s64
},
7928 { NEON::BI__builtin_neon_vldap1_lane_p64
, NEON::BI__builtin_neon_vldap1_lane_s64
},
7929 { NEON::BI__builtin_neon_vldap1q_lane_u64
, NEON::BI__builtin_neon_vldap1q_lane_s64
},
7930 { NEON::BI__builtin_neon_vldap1q_lane_f64
, NEON::BI__builtin_neon_vldap1q_lane_s64
},
7931 { NEON::BI__builtin_neon_vldap1q_lane_p64
, NEON::BI__builtin_neon_vldap1q_lane_s64
},
7932 { NEON::BI__builtin_neon_vstl1_lane_u64
, NEON::BI__builtin_neon_vstl1_lane_s64
},
7933 { NEON::BI__builtin_neon_vstl1_lane_f64
, NEON::BI__builtin_neon_vstl1_lane_s64
},
7934 { NEON::BI__builtin_neon_vstl1_lane_p64
, NEON::BI__builtin_neon_vstl1_lane_s64
},
7935 { NEON::BI__builtin_neon_vstl1q_lane_u64
, NEON::BI__builtin_neon_vstl1q_lane_s64
},
7936 { NEON::BI__builtin_neon_vstl1q_lane_f64
, NEON::BI__builtin_neon_vstl1q_lane_s64
},
7937 { NEON::BI__builtin_neon_vstl1q_lane_p64
, NEON::BI__builtin_neon_vstl1q_lane_s64
},
7944 #define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7946 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7950 #define SVEMAP2(NameBase, TypeModifier) \
7951 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
7952 static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap
[] = {
7953 #define GET_SVE_LLVM_INTRINSIC_MAP
7954 #include "clang/Basic/arm_sve_builtin_cg.inc"
7955 #include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
7956 #undef GET_SVE_LLVM_INTRINSIC_MAP
7962 #define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7964 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7968 #define SMEMAP2(NameBase, TypeModifier) \
7969 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
7970 static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap
[] = {
7971 #define GET_SME_LLVM_INTRINSIC_MAP
7972 #include "clang/Basic/arm_sme_builtin_cg.inc"
7973 #undef GET_SME_LLVM_INTRINSIC_MAP
7979 static bool NEONSIMDIntrinsicsProvenSorted
= false;
7981 static bool AArch64SIMDIntrinsicsProvenSorted
= false;
7982 static bool AArch64SISDIntrinsicsProvenSorted
= false;
7983 static bool AArch64SVEIntrinsicsProvenSorted
= false;
7984 static bool AArch64SMEIntrinsicsProvenSorted
= false;
7986 static const ARMVectorIntrinsicInfo
*
7987 findARMVectorIntrinsicInMap(ArrayRef
<ARMVectorIntrinsicInfo
> IntrinsicMap
,
7988 unsigned BuiltinID
, bool &MapProvenSorted
) {
7991 if (!MapProvenSorted
) {
7992 assert(llvm::is_sorted(IntrinsicMap
));
7993 MapProvenSorted
= true;
7997 const ARMVectorIntrinsicInfo
*Builtin
=
7998 llvm::lower_bound(IntrinsicMap
, BuiltinID
);
8000 if (Builtin
!= IntrinsicMap
.end() && Builtin
->BuiltinID
== BuiltinID
)
8006 Function
*CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID
,
8008 llvm::Type
*ArgType
,
8009 const CallExpr
*E
) {
8011 if (Modifier
& Use64BitVectors
)
8013 else if (Modifier
& Use128BitVectors
)
8017 SmallVector
<llvm::Type
*, 3> Tys
;
8018 if (Modifier
& AddRetType
) {
8019 llvm::Type
*Ty
= ConvertType(E
->getCallReturnType(getContext()));
8020 if (Modifier
& VectorizeRetType
)
8021 Ty
= llvm::FixedVectorType::get(
8022 Ty
, VectorSize
? VectorSize
/ Ty
->getPrimitiveSizeInBits() : 1);
8028 if (Modifier
& VectorizeArgTypes
) {
8029 int Elts
= VectorSize
? VectorSize
/ ArgType
->getPrimitiveSizeInBits() : 1;
8030 ArgType
= llvm::FixedVectorType::get(ArgType
, Elts
);
8033 if (Modifier
& (Add1ArgType
| Add2ArgTypes
))
8034 Tys
.push_back(ArgType
);
8036 if (Modifier
& Add2ArgTypes
)
8037 Tys
.push_back(ArgType
);
8039 if (Modifier
& InventFloatType
)
8040 Tys
.push_back(FloatTy
);
8042 return CGM
.getIntrinsic(IntrinsicID
, Tys
);
8045 static Value
*EmitCommonNeonSISDBuiltinExpr(
8046 CodeGenFunction
&CGF
, const ARMVectorIntrinsicInfo
&SISDInfo
,
8047 SmallVectorImpl
<Value
*> &Ops
, const CallExpr
*E
) {
8048 unsigned BuiltinID
= SISDInfo
.BuiltinID
;
8049 unsigned int Int
= SISDInfo
.LLVMIntrinsic
;
8050 unsigned Modifier
= SISDInfo
.TypeModifier
;
8051 const char *s
= SISDInfo
.NameHint
;
8053 switch (BuiltinID
) {
8054 case NEON::BI__builtin_neon_vcled_s64
:
8055 case NEON::BI__builtin_neon_vcled_u64
:
8056 case NEON::BI__builtin_neon_vcles_f32
:
8057 case NEON::BI__builtin_neon_vcled_f64
:
8058 case NEON::BI__builtin_neon_vcltd_s64
:
8059 case NEON::BI__builtin_neon_vcltd_u64
:
8060 case NEON::BI__builtin_neon_vclts_f32
:
8061 case NEON::BI__builtin_neon_vcltd_f64
:
8062 case NEON::BI__builtin_neon_vcales_f32
:
8063 case NEON::BI__builtin_neon_vcaled_f64
:
8064 case NEON::BI__builtin_neon_vcalts_f32
:
8065 case NEON::BI__builtin_neon_vcaltd_f64
:
8066 // Only one direction of comparisons actually exist, cmle is actually a cmge
8067 // with swapped operands. The table gives us the right intrinsic but we
8068 // still need to do the swap.
8069 std::swap(Ops
[0], Ops
[1]);
8073 assert(Int
&& "Generic code assumes a valid intrinsic");
8075 // Determine the type(s) of this overloaded AArch64 intrinsic.
8076 const Expr
*Arg
= E
->getArg(0);
8077 llvm::Type
*ArgTy
= CGF
.ConvertType(Arg
->getType());
8078 Function
*F
= CGF
.LookupNeonLLVMIntrinsic(Int
, Modifier
, ArgTy
, E
);
8081 ConstantInt
*C0
= ConstantInt::get(CGF
.SizeTy
, 0);
8082 for (Function::const_arg_iterator ai
= F
->arg_begin(), ae
= F
->arg_end();
8083 ai
!= ae
; ++ai
, ++j
) {
8084 llvm::Type
*ArgTy
= ai
->getType();
8085 if (Ops
[j
]->getType()->getPrimitiveSizeInBits() ==
8086 ArgTy
->getPrimitiveSizeInBits())
8089 assert(ArgTy
->isVectorTy() && !Ops
[j
]->getType()->isVectorTy());
8090 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
8091 // it before inserting.
8092 Ops
[j
] = CGF
.Builder
.CreateTruncOrBitCast(
8093 Ops
[j
], cast
<llvm::VectorType
>(ArgTy
)->getElementType());
8095 CGF
.Builder
.CreateInsertElement(PoisonValue::get(ArgTy
), Ops
[j
], C0
);
8098 Value
*Result
= CGF
.EmitNeonCall(F
, Ops
, s
);
8099 llvm::Type
*ResultType
= CGF
.ConvertType(E
->getType());
8100 if (ResultType
->getPrimitiveSizeInBits().getFixedValue() <
8101 Result
->getType()->getPrimitiveSizeInBits().getFixedValue())
8102 return CGF
.Builder
.CreateExtractElement(Result
, C0
);
8104 return CGF
.Builder
.CreateBitCast(Result
, ResultType
, s
);
8107 Value
*CodeGenFunction::EmitCommonNeonBuiltinExpr(
8108 unsigned BuiltinID
, unsigned LLVMIntrinsic
, unsigned AltLLVMIntrinsic
,
8109 const char *NameHint
, unsigned Modifier
, const CallExpr
*E
,
8110 SmallVectorImpl
<llvm::Value
*> &Ops
, Address PtrOp0
, Address PtrOp1
,
8111 llvm::Triple::ArchType Arch
) {
8112 // Get the last argument, which specifies the vector type.
8113 const Expr
*Arg
= E
->getArg(E
->getNumArgs() - 1);
8114 std::optional
<llvm::APSInt
> NeonTypeConst
=
8115 Arg
->getIntegerConstantExpr(getContext());
8119 // Determine the type of this overloaded NEON intrinsic.
8120 NeonTypeFlags
Type(NeonTypeConst
->getZExtValue());
8121 bool Usgn
= Type
.isUnsigned();
8122 bool Quad
= Type
.isQuad();
8123 const bool HasLegalHalfType
= getTarget().hasLegalHalfType();
8124 const bool AllowBFloatArgsAndRet
=
8125 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
8127 llvm::FixedVectorType
*VTy
=
8128 GetNeonType(this, Type
, HasLegalHalfType
, false, AllowBFloatArgsAndRet
);
8129 llvm::Type
*Ty
= VTy
;
8133 auto getAlignmentValue32
= [&](Address addr
) -> Value
* {
8134 return Builder
.getInt32(addr
.getAlignment().getQuantity());
8137 unsigned Int
= LLVMIntrinsic
;
8138 if ((Modifier
& UnsignedAlts
) && !Usgn
)
8139 Int
= AltLLVMIntrinsic
;
8141 switch (BuiltinID
) {
8143 case NEON::BI__builtin_neon_splat_lane_v
:
8144 case NEON::BI__builtin_neon_splat_laneq_v
:
8145 case NEON::BI__builtin_neon_splatq_lane_v
:
8146 case NEON::BI__builtin_neon_splatq_laneq_v
: {
8147 auto NumElements
= VTy
->getElementCount();
8148 if (BuiltinID
== NEON::BI__builtin_neon_splatq_lane_v
)
8149 NumElements
= NumElements
* 2;
8150 if (BuiltinID
== NEON::BI__builtin_neon_splat_laneq_v
)
8151 NumElements
= NumElements
.divideCoefficientBy(2);
8153 Ops
[0] = Builder
.CreateBitCast(Ops
[0], VTy
);
8154 return EmitNeonSplat(Ops
[0], cast
<ConstantInt
>(Ops
[1]), NumElements
);
8156 case NEON::BI__builtin_neon_vpadd_v
:
8157 case NEON::BI__builtin_neon_vpaddq_v
:
8158 // We don't allow fp/int overloading of intrinsics.
8159 if (VTy
->getElementType()->isFloatingPointTy() &&
8160 Int
== Intrinsic::aarch64_neon_addp
)
8161 Int
= Intrinsic::aarch64_neon_faddp
;
8163 case NEON::BI__builtin_neon_vabs_v
:
8164 case NEON::BI__builtin_neon_vabsq_v
:
8165 if (VTy
->getElementType()->isFloatingPointTy())
8166 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::fabs
, Ty
), Ops
, "vabs");
8167 return EmitNeonCall(CGM
.getIntrinsic(LLVMIntrinsic
, Ty
), Ops
, "vabs");
8168 case NEON::BI__builtin_neon_vadd_v
:
8169 case NEON::BI__builtin_neon_vaddq_v
: {
8170 llvm::Type
*VTy
= llvm::FixedVectorType::get(Int8Ty
, Quad
? 16 : 8);
8171 Ops
[0] = Builder
.CreateBitCast(Ops
[0], VTy
);
8172 Ops
[1] = Builder
.CreateBitCast(Ops
[1], VTy
);
8173 Ops
[0] = Builder
.CreateXor(Ops
[0], Ops
[1]);
8174 return Builder
.CreateBitCast(Ops
[0], Ty
);
8176 case NEON::BI__builtin_neon_vaddhn_v
: {
8177 llvm::FixedVectorType
*SrcTy
=
8178 llvm::FixedVectorType::getExtendedElementVectorType(VTy
);
8180 // %sum = add <4 x i32> %lhs, %rhs
8181 Ops
[0] = Builder
.CreateBitCast(Ops
[0], SrcTy
);
8182 Ops
[1] = Builder
.CreateBitCast(Ops
[1], SrcTy
);
8183 Ops
[0] = Builder
.CreateAdd(Ops
[0], Ops
[1], "vaddhn");
8185 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
8186 Constant
*ShiftAmt
=
8187 ConstantInt::get(SrcTy
, SrcTy
->getScalarSizeInBits() / 2);
8188 Ops
[0] = Builder
.CreateLShr(Ops
[0], ShiftAmt
, "vaddhn");
8190 // %res = trunc <4 x i32> %high to <4 x i16>
8191 return Builder
.CreateTrunc(Ops
[0], VTy
, "vaddhn");
8193 case NEON::BI__builtin_neon_vcale_v
:
8194 case NEON::BI__builtin_neon_vcaleq_v
:
8195 case NEON::BI__builtin_neon_vcalt_v
:
8196 case NEON::BI__builtin_neon_vcaltq_v
:
8197 std::swap(Ops
[0], Ops
[1]);
8199 case NEON::BI__builtin_neon_vcage_v
:
8200 case NEON::BI__builtin_neon_vcageq_v
:
8201 case NEON::BI__builtin_neon_vcagt_v
:
8202 case NEON::BI__builtin_neon_vcagtq_v
: {
8204 switch (VTy
->getScalarSizeInBits()) {
8205 default: llvm_unreachable("unexpected type");
8216 auto *VecFlt
= llvm::FixedVectorType::get(Ty
, VTy
->getNumElements());
8217 llvm::Type
*Tys
[] = { VTy
, VecFlt
};
8218 Function
*F
= CGM
.getIntrinsic(LLVMIntrinsic
, Tys
);
8219 return EmitNeonCall(F
, Ops
, NameHint
);
8221 case NEON::BI__builtin_neon_vceqz_v
:
8222 case NEON::BI__builtin_neon_vceqzq_v
:
8223 return EmitAArch64CompareBuiltinExpr(Ops
[0], Ty
, ICmpInst::FCMP_OEQ
,
8224 ICmpInst::ICMP_EQ
, "vceqz");
8225 case NEON::BI__builtin_neon_vcgez_v
:
8226 case NEON::BI__builtin_neon_vcgezq_v
:
8227 return EmitAArch64CompareBuiltinExpr(Ops
[0], Ty
, ICmpInst::FCMP_OGE
,
8228 ICmpInst::ICMP_SGE
, "vcgez");
8229 case NEON::BI__builtin_neon_vclez_v
:
8230 case NEON::BI__builtin_neon_vclezq_v
:
8231 return EmitAArch64CompareBuiltinExpr(Ops
[0], Ty
, ICmpInst::FCMP_OLE
,
8232 ICmpInst::ICMP_SLE
, "vclez");
8233 case NEON::BI__builtin_neon_vcgtz_v
:
8234 case NEON::BI__builtin_neon_vcgtzq_v
:
8235 return EmitAArch64CompareBuiltinExpr(Ops
[0], Ty
, ICmpInst::FCMP_OGT
,
8236 ICmpInst::ICMP_SGT
, "vcgtz");
8237 case NEON::BI__builtin_neon_vcltz_v
:
8238 case NEON::BI__builtin_neon_vcltzq_v
:
8239 return EmitAArch64CompareBuiltinExpr(Ops
[0], Ty
, ICmpInst::FCMP_OLT
,
8240 ICmpInst::ICMP_SLT
, "vcltz");
8241 case NEON::BI__builtin_neon_vclz_v
:
8242 case NEON::BI__builtin_neon_vclzq_v
:
8243 // We generate target-independent intrinsic, which needs a second argument
8244 // for whether or not clz of zero is undefined; on ARM it isn't.
8245 Ops
.push_back(Builder
.getInt1(getTarget().isCLZForZeroUndef()));
8247 case NEON::BI__builtin_neon_vcvt_f32_v
:
8248 case NEON::BI__builtin_neon_vcvtq_f32_v
:
8249 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
8250 Ty
= GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32
, false, Quad
),
8252 return Usgn
? Builder
.CreateUIToFP(Ops
[0], Ty
, "vcvt")
8253 : Builder
.CreateSIToFP(Ops
[0], Ty
, "vcvt");
8254 case NEON::BI__builtin_neon_vcvt_f16_s16
:
8255 case NEON::BI__builtin_neon_vcvt_f16_u16
:
8256 case NEON::BI__builtin_neon_vcvtq_f16_s16
:
8257 case NEON::BI__builtin_neon_vcvtq_f16_u16
:
8258 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
8259 Ty
= GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16
, false, Quad
),
8261 return Usgn
? Builder
.CreateUIToFP(Ops
[0], Ty
, "vcvt")
8262 : Builder
.CreateSIToFP(Ops
[0], Ty
, "vcvt");
8263 case NEON::BI__builtin_neon_vcvt_n_f16_s16
:
8264 case NEON::BI__builtin_neon_vcvt_n_f16_u16
:
8265 case NEON::BI__builtin_neon_vcvtq_n_f16_s16
:
8266 case NEON::BI__builtin_neon_vcvtq_n_f16_u16
: {
8267 llvm::Type
*Tys
[2] = { GetFloatNeonType(this, Type
), Ty
};
8268 Function
*F
= CGM
.getIntrinsic(Int
, Tys
);
8269 return EmitNeonCall(F
, Ops
, "vcvt_n");
8271 case NEON::BI__builtin_neon_vcvt_n_f32_v
:
8272 case NEON::BI__builtin_neon_vcvt_n_f64_v
:
8273 case NEON::BI__builtin_neon_vcvtq_n_f32_v
:
8274 case NEON::BI__builtin_neon_vcvtq_n_f64_v
: {
8275 llvm::Type
*Tys
[2] = { GetFloatNeonType(this, Type
), Ty
};
8276 Int
= Usgn
? LLVMIntrinsic
: AltLLVMIntrinsic
;
8277 Function
*F
= CGM
.getIntrinsic(Int
, Tys
);
8278 return EmitNeonCall(F
, Ops
, "vcvt_n");
8280 case NEON::BI__builtin_neon_vcvt_n_s16_f16
:
8281 case NEON::BI__builtin_neon_vcvt_n_s32_v
:
8282 case NEON::BI__builtin_neon_vcvt_n_u16_f16
:
8283 case NEON::BI__builtin_neon_vcvt_n_u32_v
:
8284 case NEON::BI__builtin_neon_vcvt_n_s64_v
:
8285 case NEON::BI__builtin_neon_vcvt_n_u64_v
:
8286 case NEON::BI__builtin_neon_vcvtq_n_s16_f16
:
8287 case NEON::BI__builtin_neon_vcvtq_n_s32_v
:
8288 case NEON::BI__builtin_neon_vcvtq_n_u16_f16
:
8289 case NEON::BI__builtin_neon_vcvtq_n_u32_v
:
8290 case NEON::BI__builtin_neon_vcvtq_n_s64_v
:
8291 case NEON::BI__builtin_neon_vcvtq_n_u64_v
: {
8292 llvm::Type
*Tys
[2] = { Ty
, GetFloatNeonType(this, Type
) };
8293 Function
*F
= CGM
.getIntrinsic(LLVMIntrinsic
, Tys
);
8294 return EmitNeonCall(F
, Ops
, "vcvt_n");
8296 case NEON::BI__builtin_neon_vcvt_s32_v
:
8297 case NEON::BI__builtin_neon_vcvt_u32_v
:
8298 case NEON::BI__builtin_neon_vcvt_s64_v
:
8299 case NEON::BI__builtin_neon_vcvt_u64_v
:
8300 case NEON::BI__builtin_neon_vcvt_s16_f16
:
8301 case NEON::BI__builtin_neon_vcvt_u16_f16
:
8302 case NEON::BI__builtin_neon_vcvtq_s32_v
:
8303 case NEON::BI__builtin_neon_vcvtq_u32_v
:
8304 case NEON::BI__builtin_neon_vcvtq_s64_v
:
8305 case NEON::BI__builtin_neon_vcvtq_u64_v
:
8306 case NEON::BI__builtin_neon_vcvtq_s16_f16
:
8307 case NEON::BI__builtin_neon_vcvtq_u16_f16
: {
8308 Ops
[0] = Builder
.CreateBitCast(Ops
[0], GetFloatNeonType(this, Type
));
8309 return Usgn
? Builder
.CreateFPToUI(Ops
[0], Ty
, "vcvt")
8310 : Builder
.CreateFPToSI(Ops
[0], Ty
, "vcvt");
8312 case NEON::BI__builtin_neon_vcvta_s16_f16
:
8313 case NEON::BI__builtin_neon_vcvta_s32_v
:
8314 case NEON::BI__builtin_neon_vcvta_s64_v
:
8315 case NEON::BI__builtin_neon_vcvta_u16_f16
:
8316 case NEON::BI__builtin_neon_vcvta_u32_v
:
8317 case NEON::BI__builtin_neon_vcvta_u64_v
:
8318 case NEON::BI__builtin_neon_vcvtaq_s16_f16
:
8319 case NEON::BI__builtin_neon_vcvtaq_s32_v
:
8320 case NEON::BI__builtin_neon_vcvtaq_s64_v
:
8321 case NEON::BI__builtin_neon_vcvtaq_u16_f16
:
8322 case NEON::BI__builtin_neon_vcvtaq_u32_v
:
8323 case NEON::BI__builtin_neon_vcvtaq_u64_v
:
8324 case NEON::BI__builtin_neon_vcvtn_s16_f16
:
8325 case NEON::BI__builtin_neon_vcvtn_s32_v
:
8326 case NEON::BI__builtin_neon_vcvtn_s64_v
:
8327 case NEON::BI__builtin_neon_vcvtn_u16_f16
:
8328 case NEON::BI__builtin_neon_vcvtn_u32_v
:
8329 case NEON::BI__builtin_neon_vcvtn_u64_v
:
8330 case NEON::BI__builtin_neon_vcvtnq_s16_f16
:
8331 case NEON::BI__builtin_neon_vcvtnq_s32_v
:
8332 case NEON::BI__builtin_neon_vcvtnq_s64_v
:
8333 case NEON::BI__builtin_neon_vcvtnq_u16_f16
:
8334 case NEON::BI__builtin_neon_vcvtnq_u32_v
:
8335 case NEON::BI__builtin_neon_vcvtnq_u64_v
:
8336 case NEON::BI__builtin_neon_vcvtp_s16_f16
:
8337 case NEON::BI__builtin_neon_vcvtp_s32_v
:
8338 case NEON::BI__builtin_neon_vcvtp_s64_v
:
8339 case NEON::BI__builtin_neon_vcvtp_u16_f16
:
8340 case NEON::BI__builtin_neon_vcvtp_u32_v
:
8341 case NEON::BI__builtin_neon_vcvtp_u64_v
:
8342 case NEON::BI__builtin_neon_vcvtpq_s16_f16
:
8343 case NEON::BI__builtin_neon_vcvtpq_s32_v
:
8344 case NEON::BI__builtin_neon_vcvtpq_s64_v
:
8345 case NEON::BI__builtin_neon_vcvtpq_u16_f16
:
8346 case NEON::BI__builtin_neon_vcvtpq_u32_v
:
8347 case NEON::BI__builtin_neon_vcvtpq_u64_v
:
8348 case NEON::BI__builtin_neon_vcvtm_s16_f16
:
8349 case NEON::BI__builtin_neon_vcvtm_s32_v
:
8350 case NEON::BI__builtin_neon_vcvtm_s64_v
:
8351 case NEON::BI__builtin_neon_vcvtm_u16_f16
:
8352 case NEON::BI__builtin_neon_vcvtm_u32_v
:
8353 case NEON::BI__builtin_neon_vcvtm_u64_v
:
8354 case NEON::BI__builtin_neon_vcvtmq_s16_f16
:
8355 case NEON::BI__builtin_neon_vcvtmq_s32_v
:
8356 case NEON::BI__builtin_neon_vcvtmq_s64_v
:
8357 case NEON::BI__builtin_neon_vcvtmq_u16_f16
:
8358 case NEON::BI__builtin_neon_vcvtmq_u32_v
:
8359 case NEON::BI__builtin_neon_vcvtmq_u64_v
: {
8360 llvm::Type
*Tys
[2] = { Ty
, GetFloatNeonType(this, Type
) };
8361 return EmitNeonCall(CGM
.getIntrinsic(LLVMIntrinsic
, Tys
), Ops
, NameHint
);
8363 case NEON::BI__builtin_neon_vcvtx_f32_v
: {
8364 llvm::Type
*Tys
[2] = { VTy
->getTruncatedElementVectorType(VTy
), Ty
};
8365 return EmitNeonCall(CGM
.getIntrinsic(LLVMIntrinsic
, Tys
), Ops
, NameHint
);
8368 case NEON::BI__builtin_neon_vext_v
:
8369 case NEON::BI__builtin_neon_vextq_v
: {
8370 int CV
= cast
<ConstantInt
>(Ops
[2])->getSExtValue();
8371 SmallVector
<int, 16> Indices
;
8372 for (unsigned i
= 0, e
= VTy
->getNumElements(); i
!= e
; ++i
)
8373 Indices
.push_back(i
+CV
);
8375 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
8376 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
8377 return Builder
.CreateShuffleVector(Ops
[0], Ops
[1], Indices
, "vext");
8379 case NEON::BI__builtin_neon_vfma_v
:
8380 case NEON::BI__builtin_neon_vfmaq_v
: {
8381 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
8382 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
8383 Ops
[2] = Builder
.CreateBitCast(Ops
[2], Ty
);
8385 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
8386 return emitCallMaybeConstrainedFPBuiltin(
8387 *this, Intrinsic::fma
, Intrinsic::experimental_constrained_fma
, Ty
,
8388 {Ops
[1], Ops
[2], Ops
[0]});
8390 case NEON::BI__builtin_neon_vld1_v
:
8391 case NEON::BI__builtin_neon_vld1q_v
: {
8392 llvm::Type
*Tys
[] = {Ty
, Int8PtrTy
};
8393 Ops
.push_back(getAlignmentValue32(PtrOp0
));
8394 return EmitNeonCall(CGM
.getIntrinsic(LLVMIntrinsic
, Tys
), Ops
, "vld1");
8396 case NEON::BI__builtin_neon_vld1_x2_v
:
8397 case NEON::BI__builtin_neon_vld1q_x2_v
:
8398 case NEON::BI__builtin_neon_vld1_x3_v
:
8399 case NEON::BI__builtin_neon_vld1q_x3_v
:
8400 case NEON::BI__builtin_neon_vld1_x4_v
:
8401 case NEON::BI__builtin_neon_vld1q_x4_v
: {
8402 llvm::Type
*Tys
[2] = {VTy
, UnqualPtrTy
};
8403 Function
*F
= CGM
.getIntrinsic(LLVMIntrinsic
, Tys
);
8404 Ops
[1] = Builder
.CreateCall(F
, Ops
[1], "vld1xN");
8405 return Builder
.CreateDefaultAlignedStore(Ops
[1], Ops
[0]);
8407 case NEON::BI__builtin_neon_vld2_v
:
8408 case NEON::BI__builtin_neon_vld2q_v
:
8409 case NEON::BI__builtin_neon_vld3_v
:
8410 case NEON::BI__builtin_neon_vld3q_v
:
8411 case NEON::BI__builtin_neon_vld4_v
:
8412 case NEON::BI__builtin_neon_vld4q_v
:
8413 case NEON::BI__builtin_neon_vld2_dup_v
:
8414 case NEON::BI__builtin_neon_vld2q_dup_v
:
8415 case NEON::BI__builtin_neon_vld3_dup_v
:
8416 case NEON::BI__builtin_neon_vld3q_dup_v
:
8417 case NEON::BI__builtin_neon_vld4_dup_v
:
8418 case NEON::BI__builtin_neon_vld4q_dup_v
: {
8419 llvm::Type
*Tys
[] = {Ty
, Int8PtrTy
};
8420 Function
*F
= CGM
.getIntrinsic(LLVMIntrinsic
, Tys
);
8421 Value
*Align
= getAlignmentValue32(PtrOp1
);
8422 Ops
[1] = Builder
.CreateCall(F
, {Ops
[1], Align
}, NameHint
);
8423 return Builder
.CreateDefaultAlignedStore(Ops
[1], Ops
[0]);
8425 case NEON::BI__builtin_neon_vld1_dup_v
:
8426 case NEON::BI__builtin_neon_vld1q_dup_v
: {
8427 Value
*V
= PoisonValue::get(Ty
);
8428 PtrOp0
= PtrOp0
.withElementType(VTy
->getElementType());
8429 LoadInst
*Ld
= Builder
.CreateLoad(PtrOp0
);
8430 llvm::Constant
*CI
= ConstantInt::get(SizeTy
, 0);
8431 Ops
[0] = Builder
.CreateInsertElement(V
, Ld
, CI
);
8432 return EmitNeonSplat(Ops
[0], CI
);
8434 case NEON::BI__builtin_neon_vld2_lane_v
:
8435 case NEON::BI__builtin_neon_vld2q_lane_v
:
8436 case NEON::BI__builtin_neon_vld3_lane_v
:
8437 case NEON::BI__builtin_neon_vld3q_lane_v
:
8438 case NEON::BI__builtin_neon_vld4_lane_v
:
8439 case NEON::BI__builtin_neon_vld4q_lane_v
: {
8440 llvm::Type
*Tys
[] = {Ty
, Int8PtrTy
};
8441 Function
*F
= CGM
.getIntrinsic(LLVMIntrinsic
, Tys
);
8442 for (unsigned I
= 2; I
< Ops
.size() - 1; ++I
)
8443 Ops
[I
] = Builder
.CreateBitCast(Ops
[I
], Ty
);
8444 Ops
.push_back(getAlignmentValue32(PtrOp1
));
8445 Ops
[1] = Builder
.CreateCall(F
, ArrayRef(Ops
).slice(1), NameHint
);
8446 return Builder
.CreateDefaultAlignedStore(Ops
[1], Ops
[0]);
8448 case NEON::BI__builtin_neon_vmovl_v
: {
8449 llvm::FixedVectorType
*DTy
=
8450 llvm::FixedVectorType::getTruncatedElementVectorType(VTy
);
8451 Ops
[0] = Builder
.CreateBitCast(Ops
[0], DTy
);
8453 return Builder
.CreateZExt(Ops
[0], Ty
, "vmovl");
8454 return Builder
.CreateSExt(Ops
[0], Ty
, "vmovl");
8456 case NEON::BI__builtin_neon_vmovn_v
: {
8457 llvm::FixedVectorType
*QTy
=
8458 llvm::FixedVectorType::getExtendedElementVectorType(VTy
);
8459 Ops
[0] = Builder
.CreateBitCast(Ops
[0], QTy
);
8460 return Builder
.CreateTrunc(Ops
[0], Ty
, "vmovn");
8462 case NEON::BI__builtin_neon_vmull_v
:
8463 // FIXME: the integer vmull operations could be emitted in terms of pure
8464 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
8465 // hoisting the exts outside loops. Until global ISel comes along that can
8466 // see through such movement this leads to bad CodeGen. So we need an
8467 // intrinsic for now.
8468 Int
= Usgn
? Intrinsic::arm_neon_vmullu
: Intrinsic::arm_neon_vmulls
;
8469 Int
= Type
.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp
: Int
;
8470 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vmull");
8471 case NEON::BI__builtin_neon_vpadal_v
:
8472 case NEON::BI__builtin_neon_vpadalq_v
: {
8473 // The source operand type has twice as many elements of half the size.
8474 unsigned EltBits
= VTy
->getElementType()->getPrimitiveSizeInBits();
8476 llvm::IntegerType::get(getLLVMContext(), EltBits
/ 2);
8478 llvm::FixedVectorType::get(EltTy
, VTy
->getNumElements() * 2);
8479 llvm::Type
*Tys
[2] = { Ty
, NarrowTy
};
8480 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, NameHint
);
8482 case NEON::BI__builtin_neon_vpaddl_v
:
8483 case NEON::BI__builtin_neon_vpaddlq_v
: {
8484 // The source operand type has twice as many elements of half the size.
8485 unsigned EltBits
= VTy
->getElementType()->getPrimitiveSizeInBits();
8486 llvm::Type
*EltTy
= llvm::IntegerType::get(getLLVMContext(), EltBits
/ 2);
8488 llvm::FixedVectorType::get(EltTy
, VTy
->getNumElements() * 2);
8489 llvm::Type
*Tys
[2] = { Ty
, NarrowTy
};
8490 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vpaddl");
8492 case NEON::BI__builtin_neon_vqdmlal_v
:
8493 case NEON::BI__builtin_neon_vqdmlsl_v
: {
8494 SmallVector
<Value
*, 2> MulOps(Ops
.begin() + 1, Ops
.end());
8496 EmitNeonCall(CGM
.getIntrinsic(LLVMIntrinsic
, Ty
), MulOps
, "vqdmlal");
8498 return EmitNeonCall(CGM
.getIntrinsic(AltLLVMIntrinsic
, Ty
), Ops
, NameHint
);
8500 case NEON::BI__builtin_neon_vqdmulhq_lane_v
:
8501 case NEON::BI__builtin_neon_vqdmulh_lane_v
:
8502 case NEON::BI__builtin_neon_vqrdmulhq_lane_v
:
8503 case NEON::BI__builtin_neon_vqrdmulh_lane_v
: {
8504 auto *RTy
= cast
<llvm::FixedVectorType
>(Ty
);
8505 if (BuiltinID
== NEON::BI__builtin_neon_vqdmulhq_lane_v
||
8506 BuiltinID
== NEON::BI__builtin_neon_vqrdmulhq_lane_v
)
8507 RTy
= llvm::FixedVectorType::get(RTy
->getElementType(),
8508 RTy
->getNumElements() * 2);
8509 llvm::Type
*Tys
[2] = {
8510 RTy
, GetNeonType(this, NeonTypeFlags(Type
.getEltType(), false,
8511 /*isQuad*/ false))};
8512 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, NameHint
);
8514 case NEON::BI__builtin_neon_vqdmulhq_laneq_v
:
8515 case NEON::BI__builtin_neon_vqdmulh_laneq_v
:
8516 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v
:
8517 case NEON::BI__builtin_neon_vqrdmulh_laneq_v
: {
8518 llvm::Type
*Tys
[2] = {
8519 Ty
, GetNeonType(this, NeonTypeFlags(Type
.getEltType(), false,
8521 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, NameHint
);
8523 case NEON::BI__builtin_neon_vqshl_n_v
:
8524 case NEON::BI__builtin_neon_vqshlq_n_v
:
8525 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vqshl_n",
8527 case NEON::BI__builtin_neon_vqshlu_n_v
:
8528 case NEON::BI__builtin_neon_vqshluq_n_v
:
8529 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vqshlu_n",
8531 case NEON::BI__builtin_neon_vrecpe_v
:
8532 case NEON::BI__builtin_neon_vrecpeq_v
:
8533 case NEON::BI__builtin_neon_vrsqrte_v
:
8534 case NEON::BI__builtin_neon_vrsqrteq_v
:
8535 Int
= Ty
->isFPOrFPVectorTy() ? LLVMIntrinsic
: AltLLVMIntrinsic
;
8536 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, NameHint
);
8537 case NEON::BI__builtin_neon_vrndi_v
:
8538 case NEON::BI__builtin_neon_vrndiq_v
:
8539 Int
= Builder
.getIsFPConstrained()
8540 ? Intrinsic::experimental_constrained_nearbyint
8541 : Intrinsic::nearbyint
;
8542 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, NameHint
);
8543 case NEON::BI__builtin_neon_vrshr_n_v
:
8544 case NEON::BI__builtin_neon_vrshrq_n_v
:
8545 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vrshr_n",
8547 case NEON::BI__builtin_neon_vsha512hq_u64
:
8548 case NEON::BI__builtin_neon_vsha512h2q_u64
:
8549 case NEON::BI__builtin_neon_vsha512su0q_u64
:
8550 case NEON::BI__builtin_neon_vsha512su1q_u64
: {
8551 Function
*F
= CGM
.getIntrinsic(Int
);
8552 return EmitNeonCall(F
, Ops
, "");
8554 case NEON::BI__builtin_neon_vshl_n_v
:
8555 case NEON::BI__builtin_neon_vshlq_n_v
:
8556 Ops
[1] = EmitNeonShiftVector(Ops
[1], Ty
, false);
8557 return Builder
.CreateShl(Builder
.CreateBitCast(Ops
[0],Ty
), Ops
[1],
8559 case NEON::BI__builtin_neon_vshll_n_v
: {
8560 llvm::FixedVectorType
*SrcTy
=
8561 llvm::FixedVectorType::getTruncatedElementVectorType(VTy
);
8562 Ops
[0] = Builder
.CreateBitCast(Ops
[0], SrcTy
);
8564 Ops
[0] = Builder
.CreateZExt(Ops
[0], VTy
);
8566 Ops
[0] = Builder
.CreateSExt(Ops
[0], VTy
);
8567 Ops
[1] = EmitNeonShiftVector(Ops
[1], VTy
, false);
8568 return Builder
.CreateShl(Ops
[0], Ops
[1], "vshll_n");
8570 case NEON::BI__builtin_neon_vshrn_n_v
: {
8571 llvm::FixedVectorType
*SrcTy
=
8572 llvm::FixedVectorType::getExtendedElementVectorType(VTy
);
8573 Ops
[0] = Builder
.CreateBitCast(Ops
[0], SrcTy
);
8574 Ops
[1] = EmitNeonShiftVector(Ops
[1], SrcTy
, false);
8576 Ops
[0] = Builder
.CreateLShr(Ops
[0], Ops
[1]);
8578 Ops
[0] = Builder
.CreateAShr(Ops
[0], Ops
[1]);
8579 return Builder
.CreateTrunc(Ops
[0], Ty
, "vshrn_n");
8581 case NEON::BI__builtin_neon_vshr_n_v
:
8582 case NEON::BI__builtin_neon_vshrq_n_v
:
8583 return EmitNeonRShiftImm(Ops
[0], Ops
[1], Ty
, Usgn
, "vshr_n");
8584 case NEON::BI__builtin_neon_vst1_v
:
8585 case NEON::BI__builtin_neon_vst1q_v
:
8586 case NEON::BI__builtin_neon_vst2_v
:
8587 case NEON::BI__builtin_neon_vst2q_v
:
8588 case NEON::BI__builtin_neon_vst3_v
:
8589 case NEON::BI__builtin_neon_vst3q_v
:
8590 case NEON::BI__builtin_neon_vst4_v
:
8591 case NEON::BI__builtin_neon_vst4q_v
:
8592 case NEON::BI__builtin_neon_vst2_lane_v
:
8593 case NEON::BI__builtin_neon_vst2q_lane_v
:
8594 case NEON::BI__builtin_neon_vst3_lane_v
:
8595 case NEON::BI__builtin_neon_vst3q_lane_v
:
8596 case NEON::BI__builtin_neon_vst4_lane_v
:
8597 case NEON::BI__builtin_neon_vst4q_lane_v
: {
8598 llvm::Type
*Tys
[] = {Int8PtrTy
, Ty
};
8599 Ops
.push_back(getAlignmentValue32(PtrOp0
));
8600 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "");
8602 case NEON::BI__builtin_neon_vsm3partw1q_u32
:
8603 case NEON::BI__builtin_neon_vsm3partw2q_u32
:
8604 case NEON::BI__builtin_neon_vsm3ss1q_u32
:
8605 case NEON::BI__builtin_neon_vsm4ekeyq_u32
:
8606 case NEON::BI__builtin_neon_vsm4eq_u32
: {
8607 Function
*F
= CGM
.getIntrinsic(Int
);
8608 return EmitNeonCall(F
, Ops
, "");
8610 case NEON::BI__builtin_neon_vsm3tt1aq_u32
:
8611 case NEON::BI__builtin_neon_vsm3tt1bq_u32
:
8612 case NEON::BI__builtin_neon_vsm3tt2aq_u32
:
8613 case NEON::BI__builtin_neon_vsm3tt2bq_u32
: {
8614 Function
*F
= CGM
.getIntrinsic(Int
);
8615 Ops
[3] = Builder
.CreateZExt(Ops
[3], Int64Ty
);
8616 return EmitNeonCall(F
, Ops
, "");
8618 case NEON::BI__builtin_neon_vst1_x2_v
:
8619 case NEON::BI__builtin_neon_vst1q_x2_v
:
8620 case NEON::BI__builtin_neon_vst1_x3_v
:
8621 case NEON::BI__builtin_neon_vst1q_x3_v
:
8622 case NEON::BI__builtin_neon_vst1_x4_v
:
8623 case NEON::BI__builtin_neon_vst1q_x4_v
: {
8624 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
8625 // in AArch64 it comes last. We may want to stick to one or another.
8626 if (Arch
== llvm::Triple::aarch64
|| Arch
== llvm::Triple::aarch64_be
||
8627 Arch
== llvm::Triple::aarch64_32
) {
8628 llvm::Type
*Tys
[2] = {VTy
, UnqualPtrTy
};
8629 std::rotate(Ops
.begin(), Ops
.begin() + 1, Ops
.end());
8630 return EmitNeonCall(CGM
.getIntrinsic(LLVMIntrinsic
, Tys
), Ops
, "");
8632 llvm::Type
*Tys
[2] = {UnqualPtrTy
, VTy
};
8633 return EmitNeonCall(CGM
.getIntrinsic(LLVMIntrinsic
, Tys
), Ops
, "");
8635 case NEON::BI__builtin_neon_vsubhn_v
: {
8636 llvm::FixedVectorType
*SrcTy
=
8637 llvm::FixedVectorType::getExtendedElementVectorType(VTy
);
8639 // %sum = add <4 x i32> %lhs, %rhs
8640 Ops
[0] = Builder
.CreateBitCast(Ops
[0], SrcTy
);
8641 Ops
[1] = Builder
.CreateBitCast(Ops
[1], SrcTy
);
8642 Ops
[0] = Builder
.CreateSub(Ops
[0], Ops
[1], "vsubhn");
8644 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
8645 Constant
*ShiftAmt
=
8646 ConstantInt::get(SrcTy
, SrcTy
->getScalarSizeInBits() / 2);
8647 Ops
[0] = Builder
.CreateLShr(Ops
[0], ShiftAmt
, "vsubhn");
8649 // %res = trunc <4 x i32> %high to <4 x i16>
8650 return Builder
.CreateTrunc(Ops
[0], VTy
, "vsubhn");
8652 case NEON::BI__builtin_neon_vtrn_v
:
8653 case NEON::BI__builtin_neon_vtrnq_v
: {
8654 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
8655 Ops
[2] = Builder
.CreateBitCast(Ops
[2], Ty
);
8656 Value
*SV
= nullptr;
8658 for (unsigned vi
= 0; vi
!= 2; ++vi
) {
8659 SmallVector
<int, 16> Indices
;
8660 for (unsigned i
= 0, e
= VTy
->getNumElements(); i
!= e
; i
+= 2) {
8661 Indices
.push_back(i
+vi
);
8662 Indices
.push_back(i
+e
+vi
);
8664 Value
*Addr
= Builder
.CreateConstInBoundsGEP1_32(Ty
, Ops
[0], vi
);
8665 SV
= Builder
.CreateShuffleVector(Ops
[1], Ops
[2], Indices
, "vtrn");
8666 SV
= Builder
.CreateDefaultAlignedStore(SV
, Addr
);
8670 case NEON::BI__builtin_neon_vtst_v
:
8671 case NEON::BI__builtin_neon_vtstq_v
: {
8672 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
8673 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
8674 Ops
[0] = Builder
.CreateAnd(Ops
[0], Ops
[1]);
8675 Ops
[0] = Builder
.CreateICmp(ICmpInst::ICMP_NE
, Ops
[0],
8676 ConstantAggregateZero::get(Ty
));
8677 return Builder
.CreateSExt(Ops
[0], Ty
, "vtst");
8679 case NEON::BI__builtin_neon_vuzp_v
:
8680 case NEON::BI__builtin_neon_vuzpq_v
: {
8681 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
8682 Ops
[2] = Builder
.CreateBitCast(Ops
[2], Ty
);
8683 Value
*SV
= nullptr;
8685 for (unsigned vi
= 0; vi
!= 2; ++vi
) {
8686 SmallVector
<int, 16> Indices
;
8687 for (unsigned i
= 0, e
= VTy
->getNumElements(); i
!= e
; ++i
)
8688 Indices
.push_back(2*i
+vi
);
8690 Value
*Addr
= Builder
.CreateConstInBoundsGEP1_32(Ty
, Ops
[0], vi
);
8691 SV
= Builder
.CreateShuffleVector(Ops
[1], Ops
[2], Indices
, "vuzp");
8692 SV
= Builder
.CreateDefaultAlignedStore(SV
, Addr
);
8696 case NEON::BI__builtin_neon_vxarq_u64
: {
8697 Function
*F
= CGM
.getIntrinsic(Int
);
8698 Ops
[2] = Builder
.CreateZExt(Ops
[2], Int64Ty
);
8699 return EmitNeonCall(F
, Ops
, "");
8701 case NEON::BI__builtin_neon_vzip_v
:
8702 case NEON::BI__builtin_neon_vzipq_v
: {
8703 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
8704 Ops
[2] = Builder
.CreateBitCast(Ops
[2], Ty
);
8705 Value
*SV
= nullptr;
8707 for (unsigned vi
= 0; vi
!= 2; ++vi
) {
8708 SmallVector
<int, 16> Indices
;
8709 for (unsigned i
= 0, e
= VTy
->getNumElements(); i
!= e
; i
+= 2) {
8710 Indices
.push_back((i
+ vi
*e
) >> 1);
8711 Indices
.push_back(((i
+ vi
*e
) >> 1)+e
);
8713 Value
*Addr
= Builder
.CreateConstInBoundsGEP1_32(Ty
, Ops
[0], vi
);
8714 SV
= Builder
.CreateShuffleVector(Ops
[1], Ops
[2], Indices
, "vzip");
8715 SV
= Builder
.CreateDefaultAlignedStore(SV
, Addr
);
8719 case NEON::BI__builtin_neon_vdot_s32
:
8720 case NEON::BI__builtin_neon_vdot_u32
:
8721 case NEON::BI__builtin_neon_vdotq_s32
:
8722 case NEON::BI__builtin_neon_vdotq_u32
: {
8724 llvm::FixedVectorType::get(Int8Ty
, Ty
->getPrimitiveSizeInBits() / 8);
8725 llvm::Type
*Tys
[2] = { Ty
, InputTy
};
8726 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vdot");
8728 case NEON::BI__builtin_neon_vfmlal_low_f16
:
8729 case NEON::BI__builtin_neon_vfmlalq_low_f16
: {
8731 llvm::FixedVectorType::get(HalfTy
, Ty
->getPrimitiveSizeInBits() / 16);
8732 llvm::Type
*Tys
[2] = { Ty
, InputTy
};
8733 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vfmlal_low");
8735 case NEON::BI__builtin_neon_vfmlsl_low_f16
:
8736 case NEON::BI__builtin_neon_vfmlslq_low_f16
: {
8738 llvm::FixedVectorType::get(HalfTy
, Ty
->getPrimitiveSizeInBits() / 16);
8739 llvm::Type
*Tys
[2] = { Ty
, InputTy
};
8740 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vfmlsl_low");
8742 case NEON::BI__builtin_neon_vfmlal_high_f16
:
8743 case NEON::BI__builtin_neon_vfmlalq_high_f16
: {
8745 llvm::FixedVectorType::get(HalfTy
, Ty
->getPrimitiveSizeInBits() / 16);
8746 llvm::Type
*Tys
[2] = { Ty
, InputTy
};
8747 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vfmlal_high");
8749 case NEON::BI__builtin_neon_vfmlsl_high_f16
:
8750 case NEON::BI__builtin_neon_vfmlslq_high_f16
: {
8752 llvm::FixedVectorType::get(HalfTy
, Ty
->getPrimitiveSizeInBits() / 16);
8753 llvm::Type
*Tys
[2] = { Ty
, InputTy
};
8754 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vfmlsl_high");
8756 case NEON::BI__builtin_neon_vmmlaq_s32
:
8757 case NEON::BI__builtin_neon_vmmlaq_u32
: {
8759 llvm::FixedVectorType::get(Int8Ty
, Ty
->getPrimitiveSizeInBits() / 8);
8760 llvm::Type
*Tys
[2] = { Ty
, InputTy
};
8761 return EmitNeonCall(CGM
.getIntrinsic(LLVMIntrinsic
, Tys
), Ops
, "vmmla");
8763 case NEON::BI__builtin_neon_vusmmlaq_s32
: {
8765 llvm::FixedVectorType::get(Int8Ty
, Ty
->getPrimitiveSizeInBits() / 8);
8766 llvm::Type
*Tys
[2] = { Ty
, InputTy
};
8767 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vusmmla");
8769 case NEON::BI__builtin_neon_vusdot_s32
:
8770 case NEON::BI__builtin_neon_vusdotq_s32
: {
8772 llvm::FixedVectorType::get(Int8Ty
, Ty
->getPrimitiveSizeInBits() / 8);
8773 llvm::Type
*Tys
[2] = { Ty
, InputTy
};
8774 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vusdot");
8776 case NEON::BI__builtin_neon_vbfdot_f32
:
8777 case NEON::BI__builtin_neon_vbfdotq_f32
: {
8778 llvm::Type
*InputTy
=
8779 llvm::FixedVectorType::get(BFloatTy
, Ty
->getPrimitiveSizeInBits() / 16);
8780 llvm::Type
*Tys
[2] = { Ty
, InputTy
};
8781 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vbfdot");
8783 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32
: {
8784 llvm::Type
*Tys
[1] = { Ty
};
8785 Function
*F
= CGM
.getIntrinsic(Int
, Tys
);
8786 return EmitNeonCall(F
, Ops
, "vcvtfp2bf");
8791 assert(Int
&& "Expected valid intrinsic number");
8793 // Determine the type(s) of this overloaded AArch64 intrinsic.
8794 Function
*F
= LookupNeonLLVMIntrinsic(Int
, Modifier
, Ty
, E
);
8796 Value
*Result
= EmitNeonCall(F
, Ops
, NameHint
);
8797 llvm::Type
*ResultType
= ConvertType(E
->getType());
8798 // AArch64 intrinsic one-element vector type cast to
8799 // scalar type expected by the builtin
8800 return Builder
.CreateBitCast(Result
, ResultType
, NameHint
);
8803 Value
*CodeGenFunction::EmitAArch64CompareBuiltinExpr(
8804 Value
*Op
, llvm::Type
*Ty
, const CmpInst::Predicate Fp
,
8805 const CmpInst::Predicate Ip
, const Twine
&Name
) {
8806 llvm::Type
*OTy
= Op
->getType();
8808 // FIXME: this is utterly horrific. We should not be looking at previous
8809 // codegen context to find out what needs doing. Unfortunately TableGen
8810 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
8812 if (BitCastInst
*BI
= dyn_cast
<BitCastInst
>(Op
))
8813 OTy
= BI
->getOperand(0)->getType();
8815 Op
= Builder
.CreateBitCast(Op
, OTy
);
8816 if (OTy
->getScalarType()->isFloatingPointTy()) {
8817 if (Fp
== CmpInst::FCMP_OEQ
)
8818 Op
= Builder
.CreateFCmp(Fp
, Op
, Constant::getNullValue(OTy
));
8820 Op
= Builder
.CreateFCmpS(Fp
, Op
, Constant::getNullValue(OTy
));
8822 Op
= Builder
.CreateICmp(Ip
, Op
, Constant::getNullValue(OTy
));
8824 return Builder
.CreateSExt(Op
, Ty
, Name
);
8827 static Value
*packTBLDVectorList(CodeGenFunction
&CGF
, ArrayRef
<Value
*> Ops
,
8828 Value
*ExtOp
, Value
*IndexOp
,
8829 llvm::Type
*ResTy
, unsigned IntID
,
8831 SmallVector
<Value
*, 2> TblOps
;
8833 TblOps
.push_back(ExtOp
);
8835 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
8836 SmallVector
<int, 16> Indices
;
8837 auto *TblTy
= cast
<llvm::FixedVectorType
>(Ops
[0]->getType());
8838 for (unsigned i
= 0, e
= TblTy
->getNumElements(); i
!= e
; ++i
) {
8839 Indices
.push_back(2*i
);
8840 Indices
.push_back(2*i
+1);
8843 int PairPos
= 0, End
= Ops
.size() - 1;
8844 while (PairPos
< End
) {
8845 TblOps
.push_back(CGF
.Builder
.CreateShuffleVector(Ops
[PairPos
],
8846 Ops
[PairPos
+1], Indices
,
8851 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
8852 // of the 128-bit lookup table with zero.
8853 if (PairPos
== End
) {
8854 Value
*ZeroTbl
= ConstantAggregateZero::get(TblTy
);
8855 TblOps
.push_back(CGF
.Builder
.CreateShuffleVector(Ops
[PairPos
],
8856 ZeroTbl
, Indices
, Name
));
8860 TblOps
.push_back(IndexOp
);
8861 TblF
= CGF
.CGM
.getIntrinsic(IntID
, ResTy
);
8863 return CGF
.EmitNeonCall(TblF
, TblOps
, Name
);
8866 Value
*CodeGenFunction::GetValueForARMHint(unsigned BuiltinID
) {
8868 switch (BuiltinID
) {
8871 case clang::ARM::BI__builtin_arm_nop
:
8874 case clang::ARM::BI__builtin_arm_yield
:
8875 case clang::ARM::BI__yield
:
8878 case clang::ARM::BI__builtin_arm_wfe
:
8879 case clang::ARM::BI__wfe
:
8882 case clang::ARM::BI__builtin_arm_wfi
:
8883 case clang::ARM::BI__wfi
:
8886 case clang::ARM::BI__builtin_arm_sev
:
8887 case clang::ARM::BI__sev
:
8890 case clang::ARM::BI__builtin_arm_sevl
:
8891 case clang::ARM::BI__sevl
:
8896 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::arm_hint
),
8897 llvm::ConstantInt::get(Int32Ty
, Value
));
8900 enum SpecialRegisterAccessKind
{
8906 // Generates the IR for __builtin_read_exec_*.
8907 // Lowers the builtin to amdgcn_ballot intrinsic.
8908 static Value
*EmitAMDGCNBallotForExec(CodeGenFunction
&CGF
, const CallExpr
*E
,
8909 llvm::Type
*RegisterType
,
8910 llvm::Type
*ValueType
, bool isExecHi
) {
8911 CodeGen::CGBuilderTy
&Builder
= CGF
.Builder
;
8912 CodeGen::CodeGenModule
&CGM
= CGF
.CGM
;
8914 Function
*F
= CGM
.getIntrinsic(Intrinsic::amdgcn_ballot
, {RegisterType
});
8915 llvm::Value
*Call
= Builder
.CreateCall(F
, {Builder
.getInt1(true)});
8918 Value
*Rt2
= Builder
.CreateLShr(Call
, 32);
8919 Rt2
= Builder
.CreateTrunc(Rt2
, CGF
.Int32Ty
);
8926 // Generates the IR for the read/write special register builtin,
8927 // ValueType is the type of the value that is to be written or read,
8928 // RegisterType is the type of the register being written to or read from.
8929 static Value
*EmitSpecialRegisterBuiltin(CodeGenFunction
&CGF
,
8931 llvm::Type
*RegisterType
,
8932 llvm::Type
*ValueType
,
8933 SpecialRegisterAccessKind AccessKind
,
8934 StringRef SysReg
= "") {
8935 // write and register intrinsics only support 32, 64 and 128 bit operations.
8936 assert((RegisterType
->isIntegerTy(32) || RegisterType
->isIntegerTy(64) ||
8937 RegisterType
->isIntegerTy(128)) &&
8938 "Unsupported size for register.");
8940 CodeGen::CGBuilderTy
&Builder
= CGF
.Builder
;
8941 CodeGen::CodeGenModule
&CGM
= CGF
.CGM
;
8942 LLVMContext
&Context
= CGM
.getLLVMContext();
8944 if (SysReg
.empty()) {
8945 const Expr
*SysRegStrExpr
= E
->getArg(0)->IgnoreParenCasts();
8946 SysReg
= cast
<clang::StringLiteral
>(SysRegStrExpr
)->getString();
8949 llvm::Metadata
*Ops
[] = { llvm::MDString::get(Context
, SysReg
) };
8950 llvm::MDNode
*RegName
= llvm::MDNode::get(Context
, Ops
);
8951 llvm::Value
*Metadata
= llvm::MetadataAsValue::get(Context
, RegName
);
8953 llvm::Type
*Types
[] = { RegisterType
};
8955 bool MixedTypes
= RegisterType
->isIntegerTy(64) && ValueType
->isIntegerTy(32);
8956 assert(!(RegisterType
->isIntegerTy(32) && ValueType
->isIntegerTy(64))
8957 && "Can't fit 64-bit value in 32-bit register");
8959 if (AccessKind
!= Write
) {
8960 assert(AccessKind
== NormalRead
|| AccessKind
== VolatileRead
);
8961 llvm::Function
*F
= CGM
.getIntrinsic(
8962 AccessKind
== VolatileRead
? llvm::Intrinsic::read_volatile_register
8963 : llvm::Intrinsic::read_register
,
8965 llvm::Value
*Call
= Builder
.CreateCall(F
, Metadata
);
8968 // Read into 64 bit register and then truncate result to 32 bit.
8969 return Builder
.CreateTrunc(Call
, ValueType
);
8971 if (ValueType
->isPointerTy())
8972 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
8973 return Builder
.CreateIntToPtr(Call
, ValueType
);
8978 llvm::Function
*F
= CGM
.getIntrinsic(llvm::Intrinsic::write_register
, Types
);
8979 llvm::Value
*ArgValue
= CGF
.EmitScalarExpr(E
->getArg(1));
8981 // Extend 32 bit write value to 64 bit to pass to write.
8982 ArgValue
= Builder
.CreateZExt(ArgValue
, RegisterType
);
8983 return Builder
.CreateCall(F
, { Metadata
, ArgValue
});
8986 if (ValueType
->isPointerTy()) {
8987 // Have VoidPtrTy ArgValue but want to return an i32/i64.
8988 ArgValue
= Builder
.CreatePtrToInt(ArgValue
, RegisterType
);
8989 return Builder
.CreateCall(F
, { Metadata
, ArgValue
});
8992 return Builder
.CreateCall(F
, { Metadata
, ArgValue
});
8995 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
8996 /// argument that specifies the vector type.
8997 static bool HasExtraNeonArgument(unsigned BuiltinID
) {
8998 switch (BuiltinID
) {
9000 case NEON::BI__builtin_neon_vget_lane_i8
:
9001 case NEON::BI__builtin_neon_vget_lane_i16
:
9002 case NEON::BI__builtin_neon_vget_lane_bf16
:
9003 case NEON::BI__builtin_neon_vget_lane_i32
:
9004 case NEON::BI__builtin_neon_vget_lane_i64
:
9005 case NEON::BI__builtin_neon_vget_lane_f32
:
9006 case NEON::BI__builtin_neon_vgetq_lane_i8
:
9007 case NEON::BI__builtin_neon_vgetq_lane_i16
:
9008 case NEON::BI__builtin_neon_vgetq_lane_bf16
:
9009 case NEON::BI__builtin_neon_vgetq_lane_i32
:
9010 case NEON::BI__builtin_neon_vgetq_lane_i64
:
9011 case NEON::BI__builtin_neon_vgetq_lane_f32
:
9012 case NEON::BI__builtin_neon_vduph_lane_bf16
:
9013 case NEON::BI__builtin_neon_vduph_laneq_bf16
:
9014 case NEON::BI__builtin_neon_vset_lane_i8
:
9015 case NEON::BI__builtin_neon_vset_lane_i16
:
9016 case NEON::BI__builtin_neon_vset_lane_bf16
:
9017 case NEON::BI__builtin_neon_vset_lane_i32
:
9018 case NEON::BI__builtin_neon_vset_lane_i64
:
9019 case NEON::BI__builtin_neon_vset_lane_f32
:
9020 case NEON::BI__builtin_neon_vsetq_lane_i8
:
9021 case NEON::BI__builtin_neon_vsetq_lane_i16
:
9022 case NEON::BI__builtin_neon_vsetq_lane_bf16
:
9023 case NEON::BI__builtin_neon_vsetq_lane_i32
:
9024 case NEON::BI__builtin_neon_vsetq_lane_i64
:
9025 case NEON::BI__builtin_neon_vsetq_lane_f32
:
9026 case NEON::BI__builtin_neon_vsha1h_u32
:
9027 case NEON::BI__builtin_neon_vsha1cq_u32
:
9028 case NEON::BI__builtin_neon_vsha1pq_u32
:
9029 case NEON::BI__builtin_neon_vsha1mq_u32
:
9030 case NEON::BI__builtin_neon_vcvth_bf16_f32
:
9031 case clang::ARM::BI_MoveToCoprocessor
:
9032 case clang::ARM::BI_MoveToCoprocessor2
:
9038 Value
*CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID
,
9040 ReturnValueSlot ReturnValue
,
9041 llvm::Triple::ArchType Arch
) {
9042 if (auto Hint
= GetValueForARMHint(BuiltinID
))
9045 if (BuiltinID
== clang::ARM::BI__emit
) {
9046 bool IsThumb
= getTarget().getTriple().getArch() == llvm::Triple::thumb
;
9047 llvm::FunctionType
*FTy
=
9048 llvm::FunctionType::get(VoidTy
, /*Variadic=*/false);
9050 Expr::EvalResult Result
;
9051 if (!E
->getArg(0)->EvaluateAsInt(Result
, CGM
.getContext()))
9052 llvm_unreachable("Sema will ensure that the parameter is constant");
9054 llvm::APSInt Value
= Result
.Val
.getInt();
9055 uint64_t ZExtValue
= Value
.zextOrTrunc(IsThumb
? 16 : 32).getZExtValue();
9057 llvm::InlineAsm
*Emit
=
9058 IsThumb
? InlineAsm::get(FTy
, ".inst.n 0x" + utohexstr(ZExtValue
), "",
9059 /*hasSideEffects=*/true)
9060 : InlineAsm::get(FTy
, ".inst 0x" + utohexstr(ZExtValue
), "",
9061 /*hasSideEffects=*/true);
9063 return Builder
.CreateCall(Emit
);
9066 if (BuiltinID
== clang::ARM::BI__builtin_arm_dbg
) {
9067 Value
*Option
= EmitScalarExpr(E
->getArg(0));
9068 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::arm_dbg
), Option
);
9071 if (BuiltinID
== clang::ARM::BI__builtin_arm_prefetch
) {
9072 Value
*Address
= EmitScalarExpr(E
->getArg(0));
9073 Value
*RW
= EmitScalarExpr(E
->getArg(1));
9074 Value
*IsData
= EmitScalarExpr(E
->getArg(2));
9076 // Locality is not supported on ARM target
9077 Value
*Locality
= llvm::ConstantInt::get(Int32Ty
, 3);
9079 Function
*F
= CGM
.getIntrinsic(Intrinsic::prefetch
, Address
->getType());
9080 return Builder
.CreateCall(F
, {Address
, RW
, Locality
, IsData
});
9083 if (BuiltinID
== clang::ARM::BI__builtin_arm_rbit
) {
9084 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
9085 return Builder
.CreateCall(
9086 CGM
.getIntrinsic(Intrinsic::bitreverse
, Arg
->getType()), Arg
, "rbit");
9089 if (BuiltinID
== clang::ARM::BI__builtin_arm_clz
||
9090 BuiltinID
== clang::ARM::BI__builtin_arm_clz64
) {
9091 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
9092 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctlz
, Arg
->getType());
9093 Value
*Res
= Builder
.CreateCall(F
, {Arg
, Builder
.getInt1(false)});
9094 if (BuiltinID
== clang::ARM::BI__builtin_arm_clz64
)
9095 Res
= Builder
.CreateTrunc(Res
, Builder
.getInt32Ty());
9100 if (BuiltinID
== clang::ARM::BI__builtin_arm_cls
) {
9101 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
9102 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::arm_cls
), Arg
, "cls");
9104 if (BuiltinID
== clang::ARM::BI__builtin_arm_cls64
) {
9105 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
9106 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::arm_cls64
), Arg
,
9110 if (BuiltinID
== clang::ARM::BI__clear_cache
) {
9111 assert(E
->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
9112 const FunctionDecl
*FD
= E
->getDirectCallee();
9114 for (unsigned i
= 0; i
< 2; i
++)
9115 Ops
[i
] = EmitScalarExpr(E
->getArg(i
));
9116 llvm::Type
*Ty
= CGM
.getTypes().ConvertType(FD
->getType());
9117 llvm::FunctionType
*FTy
= cast
<llvm::FunctionType
>(Ty
);
9118 StringRef Name
= FD
->getName();
9119 return EmitNounwindRuntimeCall(CGM
.CreateRuntimeFunction(FTy
, Name
), Ops
);
9122 if (BuiltinID
== clang::ARM::BI__builtin_arm_mcrr
||
9123 BuiltinID
== clang::ARM::BI__builtin_arm_mcrr2
) {
9126 switch (BuiltinID
) {
9127 default: llvm_unreachable("unexpected builtin");
9128 case clang::ARM::BI__builtin_arm_mcrr
:
9129 F
= CGM
.getIntrinsic(Intrinsic::arm_mcrr
);
9131 case clang::ARM::BI__builtin_arm_mcrr2
:
9132 F
= CGM
.getIntrinsic(Intrinsic::arm_mcrr2
);
9136 // MCRR{2} instruction has 5 operands but
9137 // the intrinsic has 4 because Rt and Rt2
9138 // are represented as a single unsigned 64
9139 // bit integer in the intrinsic definition
9140 // but internally it's represented as 2 32
9143 Value
*Coproc
= EmitScalarExpr(E
->getArg(0));
9144 Value
*Opc1
= EmitScalarExpr(E
->getArg(1));
9145 Value
*RtAndRt2
= EmitScalarExpr(E
->getArg(2));
9146 Value
*CRm
= EmitScalarExpr(E
->getArg(3));
9148 Value
*C1
= llvm::ConstantInt::get(Int64Ty
, 32);
9149 Value
*Rt
= Builder
.CreateTruncOrBitCast(RtAndRt2
, Int32Ty
);
9150 Value
*Rt2
= Builder
.CreateLShr(RtAndRt2
, C1
);
9151 Rt2
= Builder
.CreateTruncOrBitCast(Rt2
, Int32Ty
);
9153 return Builder
.CreateCall(F
, {Coproc
, Opc1
, Rt
, Rt2
, CRm
});
9156 if (BuiltinID
== clang::ARM::BI__builtin_arm_mrrc
||
9157 BuiltinID
== clang::ARM::BI__builtin_arm_mrrc2
) {
9160 switch (BuiltinID
) {
9161 default: llvm_unreachable("unexpected builtin");
9162 case clang::ARM::BI__builtin_arm_mrrc
:
9163 F
= CGM
.getIntrinsic(Intrinsic::arm_mrrc
);
9165 case clang::ARM::BI__builtin_arm_mrrc2
:
9166 F
= CGM
.getIntrinsic(Intrinsic::arm_mrrc2
);
9170 Value
*Coproc
= EmitScalarExpr(E
->getArg(0));
9171 Value
*Opc1
= EmitScalarExpr(E
->getArg(1));
9172 Value
*CRm
= EmitScalarExpr(E
->getArg(2));
9173 Value
*RtAndRt2
= Builder
.CreateCall(F
, {Coproc
, Opc1
, CRm
});
9175 // Returns an unsigned 64 bit integer, represented
9176 // as two 32 bit integers.
9178 Value
*Rt
= Builder
.CreateExtractValue(RtAndRt2
, 1);
9179 Value
*Rt1
= Builder
.CreateExtractValue(RtAndRt2
, 0);
9180 Rt
= Builder
.CreateZExt(Rt
, Int64Ty
);
9181 Rt1
= Builder
.CreateZExt(Rt1
, Int64Ty
);
9183 Value
*ShiftCast
= llvm::ConstantInt::get(Int64Ty
, 32);
9184 RtAndRt2
= Builder
.CreateShl(Rt
, ShiftCast
, "shl", true);
9185 RtAndRt2
= Builder
.CreateOr(RtAndRt2
, Rt1
);
9187 return Builder
.CreateBitCast(RtAndRt2
, ConvertType(E
->getType()));
9190 if (BuiltinID
== clang::ARM::BI__builtin_arm_ldrexd
||
9191 ((BuiltinID
== clang::ARM::BI__builtin_arm_ldrex
||
9192 BuiltinID
== clang::ARM::BI__builtin_arm_ldaex
) &&
9193 getContext().getTypeSize(E
->getType()) == 64) ||
9194 BuiltinID
== clang::ARM::BI__ldrexd
) {
9197 switch (BuiltinID
) {
9198 default: llvm_unreachable("unexpected builtin");
9199 case clang::ARM::BI__builtin_arm_ldaex
:
9200 F
= CGM
.getIntrinsic(Intrinsic::arm_ldaexd
);
9202 case clang::ARM::BI__builtin_arm_ldrexd
:
9203 case clang::ARM::BI__builtin_arm_ldrex
:
9204 case clang::ARM::BI__ldrexd
:
9205 F
= CGM
.getIntrinsic(Intrinsic::arm_ldrexd
);
9209 Value
*LdPtr
= EmitScalarExpr(E
->getArg(0));
9210 Value
*Val
= Builder
.CreateCall(F
, LdPtr
, "ldrexd");
9212 Value
*Val0
= Builder
.CreateExtractValue(Val
, 1);
9213 Value
*Val1
= Builder
.CreateExtractValue(Val
, 0);
9214 Val0
= Builder
.CreateZExt(Val0
, Int64Ty
);
9215 Val1
= Builder
.CreateZExt(Val1
, Int64Ty
);
9217 Value
*ShiftCst
= llvm::ConstantInt::get(Int64Ty
, 32);
9218 Val
= Builder
.CreateShl(Val0
, ShiftCst
, "shl", true /* nuw */);
9219 Val
= Builder
.CreateOr(Val
, Val1
);
9220 return Builder
.CreateBitCast(Val
, ConvertType(E
->getType()));
9223 if (BuiltinID
== clang::ARM::BI__builtin_arm_ldrex
||
9224 BuiltinID
== clang::ARM::BI__builtin_arm_ldaex
) {
9225 Value
*LoadAddr
= EmitScalarExpr(E
->getArg(0));
9227 QualType Ty
= E
->getType();
9228 llvm::Type
*RealResTy
= ConvertType(Ty
);
9230 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty
));
9232 Function
*F
= CGM
.getIntrinsic(
9233 BuiltinID
== clang::ARM::BI__builtin_arm_ldaex
? Intrinsic::arm_ldaex
9234 : Intrinsic::arm_ldrex
,
9236 CallInst
*Val
= Builder
.CreateCall(F
, LoadAddr
, "ldrex");
9238 0, Attribute::get(getLLVMContext(), Attribute::ElementType
, IntTy
));
9240 if (RealResTy
->isPointerTy())
9241 return Builder
.CreateIntToPtr(Val
, RealResTy
);
9243 llvm::Type
*IntResTy
= llvm::IntegerType::get(
9244 getLLVMContext(), CGM
.getDataLayout().getTypeSizeInBits(RealResTy
));
9245 return Builder
.CreateBitCast(Builder
.CreateTruncOrBitCast(Val
, IntResTy
),
9250 if (BuiltinID
== clang::ARM::BI__builtin_arm_strexd
||
9251 ((BuiltinID
== clang::ARM::BI__builtin_arm_stlex
||
9252 BuiltinID
== clang::ARM::BI__builtin_arm_strex
) &&
9253 getContext().getTypeSize(E
->getArg(0)->getType()) == 64)) {
9254 Function
*F
= CGM
.getIntrinsic(
9255 BuiltinID
== clang::ARM::BI__builtin_arm_stlex
? Intrinsic::arm_stlexd
9256 : Intrinsic::arm_strexd
);
9257 llvm::Type
*STy
= llvm::StructType::get(Int32Ty
, Int32Ty
);
9259 Address Tmp
= CreateMemTemp(E
->getArg(0)->getType());
9260 Value
*Val
= EmitScalarExpr(E
->getArg(0));
9261 Builder
.CreateStore(Val
, Tmp
);
9263 Address LdPtr
= Tmp
.withElementType(STy
);
9264 Val
= Builder
.CreateLoad(LdPtr
);
9266 Value
*Arg0
= Builder
.CreateExtractValue(Val
, 0);
9267 Value
*Arg1
= Builder
.CreateExtractValue(Val
, 1);
9268 Value
*StPtr
= EmitScalarExpr(E
->getArg(1));
9269 return Builder
.CreateCall(F
, {Arg0
, Arg1
, StPtr
}, "strexd");
9272 if (BuiltinID
== clang::ARM::BI__builtin_arm_strex
||
9273 BuiltinID
== clang::ARM::BI__builtin_arm_stlex
) {
9274 Value
*StoreVal
= EmitScalarExpr(E
->getArg(0));
9275 Value
*StoreAddr
= EmitScalarExpr(E
->getArg(1));
9277 QualType Ty
= E
->getArg(0)->getType();
9278 llvm::Type
*StoreTy
=
9279 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty
));
9281 if (StoreVal
->getType()->isPointerTy())
9282 StoreVal
= Builder
.CreatePtrToInt(StoreVal
, Int32Ty
);
9284 llvm::Type
*IntTy
= llvm::IntegerType::get(
9286 CGM
.getDataLayout().getTypeSizeInBits(StoreVal
->getType()));
9287 StoreVal
= Builder
.CreateBitCast(StoreVal
, IntTy
);
9288 StoreVal
= Builder
.CreateZExtOrBitCast(StoreVal
, Int32Ty
);
9291 Function
*F
= CGM
.getIntrinsic(
9292 BuiltinID
== clang::ARM::BI__builtin_arm_stlex
? Intrinsic::arm_stlex
9293 : Intrinsic::arm_strex
,
9294 StoreAddr
->getType());
9296 CallInst
*CI
= Builder
.CreateCall(F
, {StoreVal
, StoreAddr
}, "strex");
9298 1, Attribute::get(getLLVMContext(), Attribute::ElementType
, StoreTy
));
9302 if (BuiltinID
== clang::ARM::BI__builtin_arm_clrex
) {
9303 Function
*F
= CGM
.getIntrinsic(Intrinsic::arm_clrex
);
9304 return Builder
.CreateCall(F
);
9308 Intrinsic::ID CRCIntrinsicID
= Intrinsic::not_intrinsic
;
9309 switch (BuiltinID
) {
9310 case clang::ARM::BI__builtin_arm_crc32b
:
9311 CRCIntrinsicID
= Intrinsic::arm_crc32b
; break;
9312 case clang::ARM::BI__builtin_arm_crc32cb
:
9313 CRCIntrinsicID
= Intrinsic::arm_crc32cb
; break;
9314 case clang::ARM::BI__builtin_arm_crc32h
:
9315 CRCIntrinsicID
= Intrinsic::arm_crc32h
; break;
9316 case clang::ARM::BI__builtin_arm_crc32ch
:
9317 CRCIntrinsicID
= Intrinsic::arm_crc32ch
; break;
9318 case clang::ARM::BI__builtin_arm_crc32w
:
9319 case clang::ARM::BI__builtin_arm_crc32d
:
9320 CRCIntrinsicID
= Intrinsic::arm_crc32w
; break;
9321 case clang::ARM::BI__builtin_arm_crc32cw
:
9322 case clang::ARM::BI__builtin_arm_crc32cd
:
9323 CRCIntrinsicID
= Intrinsic::arm_crc32cw
; break;
9326 if (CRCIntrinsicID
!= Intrinsic::not_intrinsic
) {
9327 Value
*Arg0
= EmitScalarExpr(E
->getArg(0));
9328 Value
*Arg1
= EmitScalarExpr(E
->getArg(1));
9330 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
9331 // intrinsics, hence we need different codegen for these cases.
9332 if (BuiltinID
== clang::ARM::BI__builtin_arm_crc32d
||
9333 BuiltinID
== clang::ARM::BI__builtin_arm_crc32cd
) {
9334 Value
*C1
= llvm::ConstantInt::get(Int64Ty
, 32);
9335 Value
*Arg1a
= Builder
.CreateTruncOrBitCast(Arg1
, Int32Ty
);
9336 Value
*Arg1b
= Builder
.CreateLShr(Arg1
, C1
);
9337 Arg1b
= Builder
.CreateTruncOrBitCast(Arg1b
, Int32Ty
);
9339 Function
*F
= CGM
.getIntrinsic(CRCIntrinsicID
);
9340 Value
*Res
= Builder
.CreateCall(F
, {Arg0
, Arg1a
});
9341 return Builder
.CreateCall(F
, {Res
, Arg1b
});
9343 Arg1
= Builder
.CreateZExtOrBitCast(Arg1
, Int32Ty
);
9345 Function
*F
= CGM
.getIntrinsic(CRCIntrinsicID
);
9346 return Builder
.CreateCall(F
, {Arg0
, Arg1
});
9350 if (BuiltinID
== clang::ARM::BI__builtin_arm_rsr
||
9351 BuiltinID
== clang::ARM::BI__builtin_arm_rsr64
||
9352 BuiltinID
== clang::ARM::BI__builtin_arm_rsrp
||
9353 BuiltinID
== clang::ARM::BI__builtin_arm_wsr
||
9354 BuiltinID
== clang::ARM::BI__builtin_arm_wsr64
||
9355 BuiltinID
== clang::ARM::BI__builtin_arm_wsrp
) {
9357 SpecialRegisterAccessKind AccessKind
= Write
;
9358 if (BuiltinID
== clang::ARM::BI__builtin_arm_rsr
||
9359 BuiltinID
== clang::ARM::BI__builtin_arm_rsr64
||
9360 BuiltinID
== clang::ARM::BI__builtin_arm_rsrp
)
9361 AccessKind
= VolatileRead
;
9363 bool IsPointerBuiltin
= BuiltinID
== clang::ARM::BI__builtin_arm_rsrp
||
9364 BuiltinID
== clang::ARM::BI__builtin_arm_wsrp
;
9366 bool Is64Bit
= BuiltinID
== clang::ARM::BI__builtin_arm_rsr64
||
9367 BuiltinID
== clang::ARM::BI__builtin_arm_wsr64
;
9369 llvm::Type
*ValueType
;
9370 llvm::Type
*RegisterType
;
9371 if (IsPointerBuiltin
) {
9372 ValueType
= VoidPtrTy
;
9373 RegisterType
= Int32Ty
;
9374 } else if (Is64Bit
) {
9375 ValueType
= RegisterType
= Int64Ty
;
9377 ValueType
= RegisterType
= Int32Ty
;
9380 return EmitSpecialRegisterBuiltin(*this, E
, RegisterType
, ValueType
,
9384 if (BuiltinID
== ARM::BI__builtin_sponentry
) {
9385 llvm::Function
*F
= CGM
.getIntrinsic(Intrinsic::sponentry
, AllocaInt8PtrTy
);
9386 return Builder
.CreateCall(F
);
9389 // Handle MSVC intrinsics before argument evaluation to prevent double
9391 if (std::optional
<MSVCIntrin
> MsvcIntId
= translateArmToMsvcIntrin(BuiltinID
))
9392 return EmitMSVCBuiltinExpr(*MsvcIntId
, E
);
9394 // Deal with MVE builtins
9395 if (Value
*Result
= EmitARMMVEBuiltinExpr(BuiltinID
, E
, ReturnValue
, Arch
))
9397 // Handle CDE builtins
9398 if (Value
*Result
= EmitARMCDEBuiltinExpr(BuiltinID
, E
, ReturnValue
, Arch
))
9401 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
9402 auto It
= llvm::find_if(NEONEquivalentIntrinsicMap
, [BuiltinID
](auto &P
) {
9403 return P
.first
== BuiltinID
;
9405 if (It
!= end(NEONEquivalentIntrinsicMap
))
9406 BuiltinID
= It
->second
;
9408 // Find out if any arguments are required to be integer constant
9410 unsigned ICEArguments
= 0;
9411 ASTContext::GetBuiltinTypeError Error
;
9412 getContext().GetBuiltinType(BuiltinID
, Error
, &ICEArguments
);
9413 assert(Error
== ASTContext::GE_None
&& "Should not codegen an error");
9415 auto getAlignmentValue32
= [&](Address addr
) -> Value
* {
9416 return Builder
.getInt32(addr
.getAlignment().getQuantity());
9419 Address PtrOp0
= Address::invalid();
9420 Address PtrOp1
= Address::invalid();
9421 SmallVector
<Value
*, 4> Ops
;
9422 bool HasExtraArg
= HasExtraNeonArgument(BuiltinID
);
9423 unsigned NumArgs
= E
->getNumArgs() - (HasExtraArg
? 1 : 0);
9424 for (unsigned i
= 0, e
= NumArgs
; i
!= e
; i
++) {
9426 switch (BuiltinID
) {
9427 case NEON::BI__builtin_neon_vld1_v
:
9428 case NEON::BI__builtin_neon_vld1q_v
:
9429 case NEON::BI__builtin_neon_vld1q_lane_v
:
9430 case NEON::BI__builtin_neon_vld1_lane_v
:
9431 case NEON::BI__builtin_neon_vld1_dup_v
:
9432 case NEON::BI__builtin_neon_vld1q_dup_v
:
9433 case NEON::BI__builtin_neon_vst1_v
:
9434 case NEON::BI__builtin_neon_vst1q_v
:
9435 case NEON::BI__builtin_neon_vst1q_lane_v
:
9436 case NEON::BI__builtin_neon_vst1_lane_v
:
9437 case NEON::BI__builtin_neon_vst2_v
:
9438 case NEON::BI__builtin_neon_vst2q_v
:
9439 case NEON::BI__builtin_neon_vst2_lane_v
:
9440 case NEON::BI__builtin_neon_vst2q_lane_v
:
9441 case NEON::BI__builtin_neon_vst3_v
:
9442 case NEON::BI__builtin_neon_vst3q_v
:
9443 case NEON::BI__builtin_neon_vst3_lane_v
:
9444 case NEON::BI__builtin_neon_vst3q_lane_v
:
9445 case NEON::BI__builtin_neon_vst4_v
:
9446 case NEON::BI__builtin_neon_vst4q_v
:
9447 case NEON::BI__builtin_neon_vst4_lane_v
:
9448 case NEON::BI__builtin_neon_vst4q_lane_v
:
9449 // Get the alignment for the argument in addition to the value;
9450 // we'll use it later.
9451 PtrOp0
= EmitPointerWithAlignment(E
->getArg(0));
9452 Ops
.push_back(PtrOp0
.emitRawPointer(*this));
9457 switch (BuiltinID
) {
9458 case NEON::BI__builtin_neon_vld2_v
:
9459 case NEON::BI__builtin_neon_vld2q_v
:
9460 case NEON::BI__builtin_neon_vld3_v
:
9461 case NEON::BI__builtin_neon_vld3q_v
:
9462 case NEON::BI__builtin_neon_vld4_v
:
9463 case NEON::BI__builtin_neon_vld4q_v
:
9464 case NEON::BI__builtin_neon_vld2_lane_v
:
9465 case NEON::BI__builtin_neon_vld2q_lane_v
:
9466 case NEON::BI__builtin_neon_vld3_lane_v
:
9467 case NEON::BI__builtin_neon_vld3q_lane_v
:
9468 case NEON::BI__builtin_neon_vld4_lane_v
:
9469 case NEON::BI__builtin_neon_vld4q_lane_v
:
9470 case NEON::BI__builtin_neon_vld2_dup_v
:
9471 case NEON::BI__builtin_neon_vld2q_dup_v
:
9472 case NEON::BI__builtin_neon_vld3_dup_v
:
9473 case NEON::BI__builtin_neon_vld3q_dup_v
:
9474 case NEON::BI__builtin_neon_vld4_dup_v
:
9475 case NEON::BI__builtin_neon_vld4q_dup_v
:
9476 // Get the alignment for the argument in addition to the value;
9477 // we'll use it later.
9478 PtrOp1
= EmitPointerWithAlignment(E
->getArg(1));
9479 Ops
.push_back(PtrOp1
.emitRawPointer(*this));
9484 Ops
.push_back(EmitScalarOrConstFoldImmArg(ICEArguments
, i
, E
));
9487 switch (BuiltinID
) {
9490 case NEON::BI__builtin_neon_vget_lane_i8
:
9491 case NEON::BI__builtin_neon_vget_lane_i16
:
9492 case NEON::BI__builtin_neon_vget_lane_i32
:
9493 case NEON::BI__builtin_neon_vget_lane_i64
:
9494 case NEON::BI__builtin_neon_vget_lane_bf16
:
9495 case NEON::BI__builtin_neon_vget_lane_f32
:
9496 case NEON::BI__builtin_neon_vgetq_lane_i8
:
9497 case NEON::BI__builtin_neon_vgetq_lane_i16
:
9498 case NEON::BI__builtin_neon_vgetq_lane_i32
:
9499 case NEON::BI__builtin_neon_vgetq_lane_i64
:
9500 case NEON::BI__builtin_neon_vgetq_lane_bf16
:
9501 case NEON::BI__builtin_neon_vgetq_lane_f32
:
9502 case NEON::BI__builtin_neon_vduph_lane_bf16
:
9503 case NEON::BI__builtin_neon_vduph_laneq_bf16
:
9504 return Builder
.CreateExtractElement(Ops
[0], Ops
[1], "vget_lane");
9506 case NEON::BI__builtin_neon_vrndns_f32
: {
9507 Value
*Arg
= EmitScalarExpr(E
->getArg(0));
9508 llvm::Type
*Tys
[] = {Arg
->getType()};
9509 Function
*F
= CGM
.getIntrinsic(Intrinsic::arm_neon_vrintn
, Tys
);
9510 return Builder
.CreateCall(F
, {Arg
}, "vrndn"); }
9512 case NEON::BI__builtin_neon_vset_lane_i8
:
9513 case NEON::BI__builtin_neon_vset_lane_i16
:
9514 case NEON::BI__builtin_neon_vset_lane_i32
:
9515 case NEON::BI__builtin_neon_vset_lane_i64
:
9516 case NEON::BI__builtin_neon_vset_lane_bf16
:
9517 case NEON::BI__builtin_neon_vset_lane_f32
:
9518 case NEON::BI__builtin_neon_vsetq_lane_i8
:
9519 case NEON::BI__builtin_neon_vsetq_lane_i16
:
9520 case NEON::BI__builtin_neon_vsetq_lane_i32
:
9521 case NEON::BI__builtin_neon_vsetq_lane_i64
:
9522 case NEON::BI__builtin_neon_vsetq_lane_bf16
:
9523 case NEON::BI__builtin_neon_vsetq_lane_f32
:
9524 return Builder
.CreateInsertElement(Ops
[1], Ops
[0], Ops
[2], "vset_lane");
9526 case NEON::BI__builtin_neon_vsha1h_u32
:
9527 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_sha1h
), Ops
,
9529 case NEON::BI__builtin_neon_vsha1cq_u32
:
9530 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_sha1c
), Ops
,
9532 case NEON::BI__builtin_neon_vsha1pq_u32
:
9533 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_sha1p
), Ops
,
9535 case NEON::BI__builtin_neon_vsha1mq_u32
:
9536 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_sha1m
), Ops
,
9539 case NEON::BI__builtin_neon_vcvth_bf16_f32
: {
9540 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf
), Ops
,
9544 // The ARM _MoveToCoprocessor builtins put the input register value as
9545 // the first argument, but the LLVM intrinsic expects it as the third one.
9546 case clang::ARM::BI_MoveToCoprocessor
:
9547 case clang::ARM::BI_MoveToCoprocessor2
: {
9548 Function
*F
= CGM
.getIntrinsic(BuiltinID
== clang::ARM::BI_MoveToCoprocessor
9549 ? Intrinsic::arm_mcr
9550 : Intrinsic::arm_mcr2
);
9551 return Builder
.CreateCall(F
, {Ops
[1], Ops
[2], Ops
[0],
9552 Ops
[3], Ops
[4], Ops
[5]});
9556 // Get the last argument, which specifies the vector type.
9557 assert(HasExtraArg
);
9558 const Expr
*Arg
= E
->getArg(E
->getNumArgs()-1);
9559 std::optional
<llvm::APSInt
> Result
=
9560 Arg
->getIntegerConstantExpr(getContext());
9564 if (BuiltinID
== clang::ARM::BI__builtin_arm_vcvtr_f
||
9565 BuiltinID
== clang::ARM::BI__builtin_arm_vcvtr_d
) {
9566 // Determine the overloaded type of this builtin.
9568 if (BuiltinID
== clang::ARM::BI__builtin_arm_vcvtr_f
)
9573 // Determine whether this is an unsigned conversion or not.
9574 bool usgn
= Result
->getZExtValue() == 1;
9575 unsigned Int
= usgn
? Intrinsic::arm_vcvtru
: Intrinsic::arm_vcvtr
;
9577 // Call the appropriate intrinsic.
9578 Function
*F
= CGM
.getIntrinsic(Int
, Ty
);
9579 return Builder
.CreateCall(F
, Ops
, "vcvtr");
9582 // Determine the type of this overloaded NEON intrinsic.
9583 NeonTypeFlags Type
= Result
->getZExtValue();
9584 bool usgn
= Type
.isUnsigned();
9585 bool rightShift
= false;
9587 llvm::FixedVectorType
*VTy
=
9588 GetNeonType(this, Type
, getTarget().hasLegalHalfType(), false,
9589 getTarget().hasBFloat16Type());
9590 llvm::Type
*Ty
= VTy
;
9594 // Many NEON builtins have identical semantics and uses in ARM and
9595 // AArch64. Emit these in a single function.
9596 auto IntrinsicMap
= ArrayRef(ARMSIMDIntrinsicMap
);
9597 const ARMVectorIntrinsicInfo
*Builtin
= findARMVectorIntrinsicInMap(
9598 IntrinsicMap
, BuiltinID
, NEONSIMDIntrinsicsProvenSorted
);
9600 return EmitCommonNeonBuiltinExpr(
9601 Builtin
->BuiltinID
, Builtin
->LLVMIntrinsic
, Builtin
->AltLLVMIntrinsic
,
9602 Builtin
->NameHint
, Builtin
->TypeModifier
, E
, Ops
, PtrOp0
, PtrOp1
, Arch
);
9605 switch (BuiltinID
) {
9606 default: return nullptr;
9607 case NEON::BI__builtin_neon_vld1q_lane_v
:
9608 // Handle 64-bit integer elements as a special case. Use shuffles of
9609 // one-element vectors to avoid poor code for i64 in the backend.
9610 if (VTy
->getElementType()->isIntegerTy(64)) {
9611 // Extract the other lane.
9612 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
9613 int Lane
= cast
<ConstantInt
>(Ops
[2])->getZExtValue();
9614 Value
*SV
= llvm::ConstantVector::get(ConstantInt::get(Int32Ty
, 1-Lane
));
9615 Ops
[1] = Builder
.CreateShuffleVector(Ops
[1], Ops
[1], SV
);
9616 // Load the value as a one-element vector.
9617 Ty
= llvm::FixedVectorType::get(VTy
->getElementType(), 1);
9618 llvm::Type
*Tys
[] = {Ty
, Int8PtrTy
};
9619 Function
*F
= CGM
.getIntrinsic(Intrinsic::arm_neon_vld1
, Tys
);
9620 Value
*Align
= getAlignmentValue32(PtrOp0
);
9621 Value
*Ld
= Builder
.CreateCall(F
, {Ops
[0], Align
});
9623 int Indices
[] = {1 - Lane
, Lane
};
9624 return Builder
.CreateShuffleVector(Ops
[1], Ld
, Indices
, "vld1q_lane");
9627 case NEON::BI__builtin_neon_vld1_lane_v
: {
9628 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
9629 PtrOp0
= PtrOp0
.withElementType(VTy
->getElementType());
9630 Value
*Ld
= Builder
.CreateLoad(PtrOp0
);
9631 return Builder
.CreateInsertElement(Ops
[1], Ld
, Ops
[2], "vld1_lane");
9633 case NEON::BI__builtin_neon_vqrshrn_n_v
:
9635 usgn
? Intrinsic::arm_neon_vqrshiftnu
: Intrinsic::arm_neon_vqrshiftns
;
9636 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vqrshrn_n",
9638 case NEON::BI__builtin_neon_vqrshrun_n_v
:
9639 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu
, Ty
),
9640 Ops
, "vqrshrun_n", 1, true);
9641 case NEON::BI__builtin_neon_vqshrn_n_v
:
9642 Int
= usgn
? Intrinsic::arm_neon_vqshiftnu
: Intrinsic::arm_neon_vqshiftns
;
9643 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vqshrn_n",
9645 case NEON::BI__builtin_neon_vqshrun_n_v
:
9646 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu
, Ty
),
9647 Ops
, "vqshrun_n", 1, true);
9648 case NEON::BI__builtin_neon_vrecpe_v
:
9649 case NEON::BI__builtin_neon_vrecpeq_v
:
9650 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vrecpe
, Ty
),
9652 case NEON::BI__builtin_neon_vrshrn_n_v
:
9653 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vrshiftn
, Ty
),
9654 Ops
, "vrshrn_n", 1, true);
9655 case NEON::BI__builtin_neon_vrsra_n_v
:
9656 case NEON::BI__builtin_neon_vrsraq_n_v
:
9657 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
9658 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
9659 Ops
[2] = EmitNeonShiftVector(Ops
[2], Ty
, true);
9660 Int
= usgn
? Intrinsic::arm_neon_vrshiftu
: Intrinsic::arm_neon_vrshifts
;
9661 Ops
[1] = Builder
.CreateCall(CGM
.getIntrinsic(Int
, Ty
), {Ops
[1], Ops
[2]});
9662 return Builder
.CreateAdd(Ops
[0], Ops
[1], "vrsra_n");
9663 case NEON::BI__builtin_neon_vsri_n_v
:
9664 case NEON::BI__builtin_neon_vsriq_n_v
:
9667 case NEON::BI__builtin_neon_vsli_n_v
:
9668 case NEON::BI__builtin_neon_vsliq_n_v
:
9669 Ops
[2] = EmitNeonShiftVector(Ops
[2], Ty
, rightShift
);
9670 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vshiftins
, Ty
),
9672 case NEON::BI__builtin_neon_vsra_n_v
:
9673 case NEON::BI__builtin_neon_vsraq_n_v
:
9674 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
9675 Ops
[1] = EmitNeonRShiftImm(Ops
[1], Ops
[2], Ty
, usgn
, "vsra_n");
9676 return Builder
.CreateAdd(Ops
[0], Ops
[1]);
9677 case NEON::BI__builtin_neon_vst1q_lane_v
:
9678 // Handle 64-bit integer elements as a special case. Use a shuffle to get
9679 // a one-element vector and avoid poor code for i64 in the backend.
9680 if (VTy
->getElementType()->isIntegerTy(64)) {
9681 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
9682 Value
*SV
= llvm::ConstantVector::get(cast
<llvm::Constant
>(Ops
[2]));
9683 Ops
[1] = Builder
.CreateShuffleVector(Ops
[1], Ops
[1], SV
);
9684 Ops
[2] = getAlignmentValue32(PtrOp0
);
9685 llvm::Type
*Tys
[] = {Int8PtrTy
, Ops
[1]->getType()};
9686 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vst1
,
9690 case NEON::BI__builtin_neon_vst1_lane_v
: {
9691 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
9692 Ops
[1] = Builder
.CreateExtractElement(Ops
[1], Ops
[2]);
9693 return Builder
.CreateStore(Ops
[1],
9694 PtrOp0
.withElementType(Ops
[1]->getType()));
9696 case NEON::BI__builtin_neon_vtbl1_v
:
9697 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vtbl1
),
9699 case NEON::BI__builtin_neon_vtbl2_v
:
9700 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vtbl2
),
9702 case NEON::BI__builtin_neon_vtbl3_v
:
9703 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vtbl3
),
9705 case NEON::BI__builtin_neon_vtbl4_v
:
9706 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vtbl4
),
9708 case NEON::BI__builtin_neon_vtbx1_v
:
9709 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vtbx1
),
9711 case NEON::BI__builtin_neon_vtbx2_v
:
9712 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vtbx2
),
9714 case NEON::BI__builtin_neon_vtbx3_v
:
9715 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vtbx3
),
9717 case NEON::BI__builtin_neon_vtbx4_v
:
9718 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vtbx4
),
9723 template<typename Integer
>
9724 static Integer
GetIntegerConstantValue(const Expr
*E
, ASTContext
&Context
) {
9725 return E
->getIntegerConstantExpr(Context
)->getExtValue();
9728 static llvm::Value
*SignOrZeroExtend(CGBuilderTy
&Builder
, llvm::Value
*V
,
9729 llvm::Type
*T
, bool Unsigned
) {
9730 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
9731 // which finds it convenient to specify signed/unsigned as a boolean flag.
9732 return Unsigned
? Builder
.CreateZExt(V
, T
) : Builder
.CreateSExt(V
, T
);
9735 static llvm::Value
*MVEImmediateShr(CGBuilderTy
&Builder
, llvm::Value
*V
,
9736 uint32_t Shift
, bool Unsigned
) {
9737 // MVE helper function for integer shift right. This must handle signed vs
9738 // unsigned, and also deal specially with the case where the shift count is
9739 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
9740 // undefined behavior, but in MVE it's legal, so we must convert it to code
9741 // that is not undefined in IR.
9742 unsigned LaneBits
= cast
<llvm::VectorType
>(V
->getType())
9744 ->getPrimitiveSizeInBits();
9745 if (Shift
== LaneBits
) {
9746 // An unsigned shift of the full lane size always generates zero, so we can
9747 // simply emit a zero vector. A signed shift of the full lane size does the
9748 // same thing as shifting by one bit fewer.
9750 return llvm::Constant::getNullValue(V
->getType());
9754 return Unsigned
? Builder
.CreateLShr(V
, Shift
) : Builder
.CreateAShr(V
, Shift
);
9757 static llvm::Value
*ARMMVEVectorSplat(CGBuilderTy
&Builder
, llvm::Value
*V
) {
9758 // MVE-specific helper function for a vector splat, which infers the element
9759 // count of the output vector by knowing that MVE vectors are all 128 bits
9761 unsigned Elements
= 128 / V
->getType()->getPrimitiveSizeInBits();
9762 return Builder
.CreateVectorSplat(Elements
, V
);
9765 static llvm::Value
*ARMMVEVectorReinterpret(CGBuilderTy
&Builder
,
9766 CodeGenFunction
*CGF
,
9768 llvm::Type
*DestType
) {
9769 // Convert one MVE vector type into another by reinterpreting its in-register
9772 // Little-endian, this is identical to a bitcast (which reinterprets the
9773 // memory format). But big-endian, they're not necessarily the same, because
9774 // the register and memory formats map to each other differently depending on
9777 // We generate a bitcast whenever we can (if we're little-endian, or if the
9778 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
9779 // that performs the different kind of reinterpretation.
9780 if (CGF
->getTarget().isBigEndian() &&
9781 V
->getType()->getScalarSizeInBits() != DestType
->getScalarSizeInBits()) {
9782 return Builder
.CreateCall(
9783 CGF
->CGM
.getIntrinsic(Intrinsic::arm_mve_vreinterpretq
,
9784 {DestType
, V
->getType()}),
9787 return Builder
.CreateBitCast(V
, DestType
);
9791 static llvm::Value
*VectorUnzip(CGBuilderTy
&Builder
, llvm::Value
*V
, bool Odd
) {
9792 // Make a shufflevector that extracts every other element of a vector (evens
9793 // or odds, as desired).
9794 SmallVector
<int, 16> Indices
;
9795 unsigned InputElements
=
9796 cast
<llvm::FixedVectorType
>(V
->getType())->getNumElements();
9797 for (unsigned i
= 0; i
< InputElements
; i
+= 2)
9798 Indices
.push_back(i
+ Odd
);
9799 return Builder
.CreateShuffleVector(V
, Indices
);
9802 static llvm::Value
*VectorZip(CGBuilderTy
&Builder
, llvm::Value
*V0
,
9804 // Make a shufflevector that interleaves two vectors element by element.
9805 assert(V0
->getType() == V1
->getType() && "Can't zip different vector types");
9806 SmallVector
<int, 16> Indices
;
9807 unsigned InputElements
=
9808 cast
<llvm::FixedVectorType
>(V0
->getType())->getNumElements();
9809 for (unsigned i
= 0; i
< InputElements
; i
++) {
9810 Indices
.push_back(i
);
9811 Indices
.push_back(i
+ InputElements
);
9813 return Builder
.CreateShuffleVector(V0
, V1
, Indices
);
9816 template<unsigned HighBit
, unsigned OtherBits
>
9817 static llvm::Value
*ARMMVEConstantSplat(CGBuilderTy
&Builder
, llvm::Type
*VT
) {
9818 // MVE-specific helper function to make a vector splat of a constant such as
9819 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
9820 llvm::Type
*T
= cast
<llvm::VectorType
>(VT
)->getElementType();
9821 unsigned LaneBits
= T
->getPrimitiveSizeInBits();
9822 uint32_t Value
= HighBit
<< (LaneBits
- 1);
9824 Value
|= (1UL << (LaneBits
- 1)) - 1;
9825 llvm::Value
*Lane
= llvm::ConstantInt::get(T
, Value
);
9826 return ARMMVEVectorSplat(Builder
, Lane
);
9829 static llvm::Value
*ARMMVEVectorElementReverse(CGBuilderTy
&Builder
,
9831 unsigned ReverseWidth
) {
9832 // MVE-specific helper function which reverses the elements of a
9833 // vector within every (ReverseWidth)-bit collection of lanes.
9834 SmallVector
<int, 16> Indices
;
9835 unsigned LaneSize
= V
->getType()->getScalarSizeInBits();
9836 unsigned Elements
= 128 / LaneSize
;
9837 unsigned Mask
= ReverseWidth
/ LaneSize
- 1;
9838 for (unsigned i
= 0; i
< Elements
; i
++)
9839 Indices
.push_back(i
^ Mask
);
9840 return Builder
.CreateShuffleVector(V
, Indices
);
9843 Value
*CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID
,
9845 ReturnValueSlot ReturnValue
,
9846 llvm::Triple::ArchType Arch
) {
9847 enum class CustomCodeGen
{ VLD24
, VST24
} CustomCodeGenType
;
9848 Intrinsic::ID IRIntr
;
9849 unsigned NumVectors
;
9851 // Code autogenerated by Tablegen will handle all the simple builtins.
9852 switch (BuiltinID
) {
9853 #include "clang/Basic/arm_mve_builtin_cg.inc"
9855 // If we didn't match an MVE builtin id at all, go back to the
9856 // main EmitARMBuiltinExpr.
9861 // Anything that breaks from that switch is an MVE builtin that
9862 // needs handwritten code to generate.
9864 switch (CustomCodeGenType
) {
9866 case CustomCodeGen::VLD24
: {
9867 llvm::SmallVector
<Value
*, 4> Ops
;
9868 llvm::SmallVector
<llvm::Type
*, 4> Tys
;
9870 auto MvecCType
= E
->getType();
9871 auto MvecLType
= ConvertType(MvecCType
);
9872 assert(MvecLType
->isStructTy() &&
9873 "Return type for vld[24]q should be a struct");
9874 assert(MvecLType
->getStructNumElements() == 1 &&
9875 "Return-type struct for vld[24]q should have one element");
9876 auto MvecLTypeInner
= MvecLType
->getStructElementType(0);
9877 assert(MvecLTypeInner
->isArrayTy() &&
9878 "Return-type struct for vld[24]q should contain an array");
9879 assert(MvecLTypeInner
->getArrayNumElements() == NumVectors
&&
9880 "Array member of return-type struct vld[24]q has wrong length");
9881 auto VecLType
= MvecLTypeInner
->getArrayElementType();
9883 Tys
.push_back(VecLType
);
9885 auto Addr
= E
->getArg(0);
9886 Ops
.push_back(EmitScalarExpr(Addr
));
9887 Tys
.push_back(ConvertType(Addr
->getType()));
9889 Function
*F
= CGM
.getIntrinsic(IRIntr
, ArrayRef(Tys
));
9890 Value
*LoadResult
= Builder
.CreateCall(F
, Ops
);
9891 Value
*MvecOut
= PoisonValue::get(MvecLType
);
9892 for (unsigned i
= 0; i
< NumVectors
; ++i
) {
9893 Value
*Vec
= Builder
.CreateExtractValue(LoadResult
, i
);
9894 MvecOut
= Builder
.CreateInsertValue(MvecOut
, Vec
, {0, i
});
9897 if (ReturnValue
.isNull())
9900 return Builder
.CreateStore(MvecOut
, ReturnValue
.getAddress());
9903 case CustomCodeGen::VST24
: {
9904 llvm::SmallVector
<Value
*, 4> Ops
;
9905 llvm::SmallVector
<llvm::Type
*, 4> Tys
;
9907 auto Addr
= E
->getArg(0);
9908 Ops
.push_back(EmitScalarExpr(Addr
));
9909 Tys
.push_back(ConvertType(Addr
->getType()));
9911 auto MvecCType
= E
->getArg(1)->getType();
9912 auto MvecLType
= ConvertType(MvecCType
);
9913 assert(MvecLType
->isStructTy() && "Data type for vst2q should be a struct");
9914 assert(MvecLType
->getStructNumElements() == 1 &&
9915 "Data-type struct for vst2q should have one element");
9916 auto MvecLTypeInner
= MvecLType
->getStructElementType(0);
9917 assert(MvecLTypeInner
->isArrayTy() &&
9918 "Data-type struct for vst2q should contain an array");
9919 assert(MvecLTypeInner
->getArrayNumElements() == NumVectors
&&
9920 "Array member of return-type struct vld[24]q has wrong length");
9921 auto VecLType
= MvecLTypeInner
->getArrayElementType();
9923 Tys
.push_back(VecLType
);
9925 AggValueSlot MvecSlot
= CreateAggTemp(MvecCType
);
9926 EmitAggExpr(E
->getArg(1), MvecSlot
);
9927 auto Mvec
= Builder
.CreateLoad(MvecSlot
.getAddress());
9928 for (unsigned i
= 0; i
< NumVectors
; i
++)
9929 Ops
.push_back(Builder
.CreateExtractValue(Mvec
, {0, i
}));
9931 Function
*F
= CGM
.getIntrinsic(IRIntr
, ArrayRef(Tys
));
9932 Value
*ToReturn
= nullptr;
9933 for (unsigned i
= 0; i
< NumVectors
; i
++) {
9934 Ops
.push_back(llvm::ConstantInt::get(Int32Ty
, i
));
9935 ToReturn
= Builder
.CreateCall(F
, Ops
);
9941 llvm_unreachable("unknown custom codegen type.");
9944 Value
*CodeGenFunction::EmitARMCDEBuiltinExpr(unsigned BuiltinID
,
9946 ReturnValueSlot ReturnValue
,
9947 llvm::Triple::ArchType Arch
) {
9948 switch (BuiltinID
) {
9951 #include "clang/Basic/arm_cde_builtin_cg.inc"
9955 static Value
*EmitAArch64TblBuiltinExpr(CodeGenFunction
&CGF
, unsigned BuiltinID
,
9957 SmallVectorImpl
<Value
*> &Ops
,
9958 llvm::Triple::ArchType Arch
) {
9959 unsigned int Int
= 0;
9960 const char *s
= nullptr;
9962 switch (BuiltinID
) {
9965 case NEON::BI__builtin_neon_vtbl1_v
:
9966 case NEON::BI__builtin_neon_vqtbl1_v
:
9967 case NEON::BI__builtin_neon_vqtbl1q_v
:
9968 case NEON::BI__builtin_neon_vtbl2_v
:
9969 case NEON::BI__builtin_neon_vqtbl2_v
:
9970 case NEON::BI__builtin_neon_vqtbl2q_v
:
9971 case NEON::BI__builtin_neon_vtbl3_v
:
9972 case NEON::BI__builtin_neon_vqtbl3_v
:
9973 case NEON::BI__builtin_neon_vqtbl3q_v
:
9974 case NEON::BI__builtin_neon_vtbl4_v
:
9975 case NEON::BI__builtin_neon_vqtbl4_v
:
9976 case NEON::BI__builtin_neon_vqtbl4q_v
:
9978 case NEON::BI__builtin_neon_vtbx1_v
:
9979 case NEON::BI__builtin_neon_vqtbx1_v
:
9980 case NEON::BI__builtin_neon_vqtbx1q_v
:
9981 case NEON::BI__builtin_neon_vtbx2_v
:
9982 case NEON::BI__builtin_neon_vqtbx2_v
:
9983 case NEON::BI__builtin_neon_vqtbx2q_v
:
9984 case NEON::BI__builtin_neon_vtbx3_v
:
9985 case NEON::BI__builtin_neon_vqtbx3_v
:
9986 case NEON::BI__builtin_neon_vqtbx3q_v
:
9987 case NEON::BI__builtin_neon_vtbx4_v
:
9988 case NEON::BI__builtin_neon_vqtbx4_v
:
9989 case NEON::BI__builtin_neon_vqtbx4q_v
:
9993 assert(E
->getNumArgs() >= 3);
9995 // Get the last argument, which specifies the vector type.
9996 const Expr
*Arg
= E
->getArg(E
->getNumArgs() - 1);
9997 std::optional
<llvm::APSInt
> Result
=
9998 Arg
->getIntegerConstantExpr(CGF
.getContext());
10002 // Determine the type of this overloaded NEON intrinsic.
10003 NeonTypeFlags Type
= Result
->getZExtValue();
10004 llvm::FixedVectorType
*Ty
= GetNeonType(&CGF
, Type
);
10008 CodeGen::CGBuilderTy
&Builder
= CGF
.Builder
;
10010 // AArch64 scalar builtins are not overloaded, they do not have an extra
10011 // argument that specifies the vector type, need to handle each case.
10012 switch (BuiltinID
) {
10013 case NEON::BI__builtin_neon_vtbl1_v
: {
10014 return packTBLDVectorList(CGF
, ArrayRef(Ops
).slice(0, 1), nullptr, Ops
[1],
10015 Ty
, Intrinsic::aarch64_neon_tbl1
, "vtbl1");
10017 case NEON::BI__builtin_neon_vtbl2_v
: {
10018 return packTBLDVectorList(CGF
, ArrayRef(Ops
).slice(0, 2), nullptr, Ops
[2],
10019 Ty
, Intrinsic::aarch64_neon_tbl1
, "vtbl1");
10021 case NEON::BI__builtin_neon_vtbl3_v
: {
10022 return packTBLDVectorList(CGF
, ArrayRef(Ops
).slice(0, 3), nullptr, Ops
[3],
10023 Ty
, Intrinsic::aarch64_neon_tbl2
, "vtbl2");
10025 case NEON::BI__builtin_neon_vtbl4_v
: {
10026 return packTBLDVectorList(CGF
, ArrayRef(Ops
).slice(0, 4), nullptr, Ops
[4],
10027 Ty
, Intrinsic::aarch64_neon_tbl2
, "vtbl2");
10029 case NEON::BI__builtin_neon_vtbx1_v
: {
10031 packTBLDVectorList(CGF
, ArrayRef(Ops
).slice(1, 1), nullptr, Ops
[2], Ty
,
10032 Intrinsic::aarch64_neon_tbl1
, "vtbl1");
10034 llvm::Constant
*EightV
= ConstantInt::get(Ty
, 8);
10035 Value
*CmpRes
= Builder
.CreateICmp(ICmpInst::ICMP_UGE
, Ops
[2], EightV
);
10036 CmpRes
= Builder
.CreateSExt(CmpRes
, Ty
);
10038 Value
*EltsFromInput
= Builder
.CreateAnd(CmpRes
, Ops
[0]);
10039 Value
*EltsFromTbl
= Builder
.CreateAnd(Builder
.CreateNot(CmpRes
), TblRes
);
10040 return Builder
.CreateOr(EltsFromInput
, EltsFromTbl
, "vtbx");
10042 case NEON::BI__builtin_neon_vtbx2_v
: {
10043 return packTBLDVectorList(CGF
, ArrayRef(Ops
).slice(1, 2), Ops
[0], Ops
[3],
10044 Ty
, Intrinsic::aarch64_neon_tbx1
, "vtbx1");
10046 case NEON::BI__builtin_neon_vtbx3_v
: {
10048 packTBLDVectorList(CGF
, ArrayRef(Ops
).slice(1, 3), nullptr, Ops
[4], Ty
,
10049 Intrinsic::aarch64_neon_tbl2
, "vtbl2");
10051 llvm::Constant
*TwentyFourV
= ConstantInt::get(Ty
, 24);
10052 Value
*CmpRes
= Builder
.CreateICmp(ICmpInst::ICMP_UGE
, Ops
[4],
10054 CmpRes
= Builder
.CreateSExt(CmpRes
, Ty
);
10056 Value
*EltsFromInput
= Builder
.CreateAnd(CmpRes
, Ops
[0]);
10057 Value
*EltsFromTbl
= Builder
.CreateAnd(Builder
.CreateNot(CmpRes
), TblRes
);
10058 return Builder
.CreateOr(EltsFromInput
, EltsFromTbl
, "vtbx");
10060 case NEON::BI__builtin_neon_vtbx4_v
: {
10061 return packTBLDVectorList(CGF
, ArrayRef(Ops
).slice(1, 4), Ops
[0], Ops
[5],
10062 Ty
, Intrinsic::aarch64_neon_tbx2
, "vtbx2");
10064 case NEON::BI__builtin_neon_vqtbl1_v
:
10065 case NEON::BI__builtin_neon_vqtbl1q_v
:
10066 Int
= Intrinsic::aarch64_neon_tbl1
; s
= "vtbl1"; break;
10067 case NEON::BI__builtin_neon_vqtbl2_v
:
10068 case NEON::BI__builtin_neon_vqtbl2q_v
: {
10069 Int
= Intrinsic::aarch64_neon_tbl2
; s
= "vtbl2"; break;
10070 case NEON::BI__builtin_neon_vqtbl3_v
:
10071 case NEON::BI__builtin_neon_vqtbl3q_v
:
10072 Int
= Intrinsic::aarch64_neon_tbl3
; s
= "vtbl3"; break;
10073 case NEON::BI__builtin_neon_vqtbl4_v
:
10074 case NEON::BI__builtin_neon_vqtbl4q_v
:
10075 Int
= Intrinsic::aarch64_neon_tbl4
; s
= "vtbl4"; break;
10076 case NEON::BI__builtin_neon_vqtbx1_v
:
10077 case NEON::BI__builtin_neon_vqtbx1q_v
:
10078 Int
= Intrinsic::aarch64_neon_tbx1
; s
= "vtbx1"; break;
10079 case NEON::BI__builtin_neon_vqtbx2_v
:
10080 case NEON::BI__builtin_neon_vqtbx2q_v
:
10081 Int
= Intrinsic::aarch64_neon_tbx2
; s
= "vtbx2"; break;
10082 case NEON::BI__builtin_neon_vqtbx3_v
:
10083 case NEON::BI__builtin_neon_vqtbx3q_v
:
10084 Int
= Intrinsic::aarch64_neon_tbx3
; s
= "vtbx3"; break;
10085 case NEON::BI__builtin_neon_vqtbx4_v
:
10086 case NEON::BI__builtin_neon_vqtbx4q_v
:
10087 Int
= Intrinsic::aarch64_neon_tbx4
; s
= "vtbx4"; break;
10094 Function
*F
= CGF
.CGM
.getIntrinsic(Int
, Ty
);
10095 return CGF
.EmitNeonCall(F
, Ops
, s
);
10098 Value
*CodeGenFunction::vectorWrapScalar16(Value
*Op
) {
10099 auto *VTy
= llvm::FixedVectorType::get(Int16Ty
, 4);
10100 Op
= Builder
.CreateBitCast(Op
, Int16Ty
);
10101 Value
*V
= PoisonValue::get(VTy
);
10102 llvm::Constant
*CI
= ConstantInt::get(SizeTy
, 0);
10103 Op
= Builder
.CreateInsertElement(V
, Op
, CI
);
10107 /// SVEBuiltinMemEltTy - Returns the memory element type for this memory
10108 /// access builtin. Only required if it can't be inferred from the base pointer
10110 llvm::Type
*CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags
&TypeFlags
) {
10111 switch (TypeFlags
.getMemEltType()) {
10112 case SVETypeFlags::MemEltTyDefault
:
10113 return getEltType(TypeFlags
);
10114 case SVETypeFlags::MemEltTyInt8
:
10115 return Builder
.getInt8Ty();
10116 case SVETypeFlags::MemEltTyInt16
:
10117 return Builder
.getInt16Ty();
10118 case SVETypeFlags::MemEltTyInt32
:
10119 return Builder
.getInt32Ty();
10120 case SVETypeFlags::MemEltTyInt64
:
10121 return Builder
.getInt64Ty();
10123 llvm_unreachable("Unknown MemEltType");
10126 llvm::Type
*CodeGenFunction::getEltType(const SVETypeFlags
&TypeFlags
) {
10127 switch (TypeFlags
.getEltType()) {
10129 llvm_unreachable("Invalid SVETypeFlag!");
10131 case SVETypeFlags::EltTyInt8
:
10132 return Builder
.getInt8Ty();
10133 case SVETypeFlags::EltTyInt16
:
10134 return Builder
.getInt16Ty();
10135 case SVETypeFlags::EltTyInt32
:
10136 return Builder
.getInt32Ty();
10137 case SVETypeFlags::EltTyInt64
:
10138 return Builder
.getInt64Ty();
10139 case SVETypeFlags::EltTyInt128
:
10140 return Builder
.getInt128Ty();
10142 case SVETypeFlags::EltTyFloat16
:
10143 return Builder
.getHalfTy();
10144 case SVETypeFlags::EltTyFloat32
:
10145 return Builder
.getFloatTy();
10146 case SVETypeFlags::EltTyFloat64
:
10147 return Builder
.getDoubleTy();
10149 case SVETypeFlags::EltTyBFloat16
:
10150 return Builder
.getBFloatTy();
10152 case SVETypeFlags::EltTyBool8
:
10153 case SVETypeFlags::EltTyBool16
:
10154 case SVETypeFlags::EltTyBool32
:
10155 case SVETypeFlags::EltTyBool64
:
10156 return Builder
.getInt1Ty();
10160 // Return the llvm predicate vector type corresponding to the specified element
10162 llvm::ScalableVectorType
*
10163 CodeGenFunction::getSVEPredType(const SVETypeFlags
&TypeFlags
) {
10164 switch (TypeFlags
.getEltType()) {
10165 default: llvm_unreachable("Unhandled SVETypeFlag!");
10167 case SVETypeFlags::EltTyInt8
:
10168 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 16);
10169 case SVETypeFlags::EltTyInt16
:
10170 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 8);
10171 case SVETypeFlags::EltTyInt32
:
10172 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 4);
10173 case SVETypeFlags::EltTyInt64
:
10174 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 2);
10176 case SVETypeFlags::EltTyBFloat16
:
10177 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 8);
10178 case SVETypeFlags::EltTyFloat16
:
10179 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 8);
10180 case SVETypeFlags::EltTyFloat32
:
10181 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 4);
10182 case SVETypeFlags::EltTyFloat64
:
10183 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 2);
10185 case SVETypeFlags::EltTyBool8
:
10186 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 16);
10187 case SVETypeFlags::EltTyBool16
:
10188 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 8);
10189 case SVETypeFlags::EltTyBool32
:
10190 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 4);
10191 case SVETypeFlags::EltTyBool64
:
10192 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 2);
10196 // Return the llvm vector type corresponding to the specified element TypeFlags.
10197 llvm::ScalableVectorType
*
10198 CodeGenFunction::getSVEType(const SVETypeFlags
&TypeFlags
) {
10199 switch (TypeFlags
.getEltType()) {
10201 llvm_unreachable("Invalid SVETypeFlag!");
10203 case SVETypeFlags::EltTyInt8
:
10204 return llvm::ScalableVectorType::get(Builder
.getInt8Ty(), 16);
10205 case SVETypeFlags::EltTyInt16
:
10206 return llvm::ScalableVectorType::get(Builder
.getInt16Ty(), 8);
10207 case SVETypeFlags::EltTyInt32
:
10208 return llvm::ScalableVectorType::get(Builder
.getInt32Ty(), 4);
10209 case SVETypeFlags::EltTyInt64
:
10210 return llvm::ScalableVectorType::get(Builder
.getInt64Ty(), 2);
10212 case SVETypeFlags::EltTyMFloat8
:
10213 return llvm::ScalableVectorType::get(Builder
.getInt8Ty(), 16);
10214 case SVETypeFlags::EltTyFloat16
:
10215 return llvm::ScalableVectorType::get(Builder
.getHalfTy(), 8);
10216 case SVETypeFlags::EltTyBFloat16
:
10217 return llvm::ScalableVectorType::get(Builder
.getBFloatTy(), 8);
10218 case SVETypeFlags::EltTyFloat32
:
10219 return llvm::ScalableVectorType::get(Builder
.getFloatTy(), 4);
10220 case SVETypeFlags::EltTyFloat64
:
10221 return llvm::ScalableVectorType::get(Builder
.getDoubleTy(), 2);
10223 case SVETypeFlags::EltTyBool8
:
10224 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 16);
10225 case SVETypeFlags::EltTyBool16
:
10226 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 8);
10227 case SVETypeFlags::EltTyBool32
:
10228 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 4);
10229 case SVETypeFlags::EltTyBool64
:
10230 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 2);
10235 CodeGenFunction::EmitSVEAllTruePred(const SVETypeFlags
&TypeFlags
) {
10237 CGM
.getIntrinsic(Intrinsic::aarch64_sve_ptrue
, getSVEPredType(TypeFlags
));
10238 return Builder
.CreateCall(Ptrue
, {Builder
.getInt32(/*SV_ALL*/ 31)});
10241 constexpr unsigned SVEBitsPerBlock
= 128;
10243 static llvm::ScalableVectorType
*getSVEVectorForElementType(llvm::Type
*EltTy
) {
10244 unsigned NumElts
= SVEBitsPerBlock
/ EltTy
->getScalarSizeInBits();
10245 return llvm::ScalableVectorType::get(EltTy
, NumElts
);
10248 // Reinterpret the input predicate so that it can be used to correctly isolate
10249 // the elements of the specified datatype.
10250 Value
*CodeGenFunction::EmitSVEPredicateCast(Value
*Pred
,
10251 llvm::ScalableVectorType
*VTy
) {
10253 if (isa
<TargetExtType
>(Pred
->getType()) &&
10254 cast
<TargetExtType
>(Pred
->getType())->getName() == "aarch64.svcount")
10257 auto *RTy
= llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy
);
10258 if (Pred
->getType() == RTy
)
10262 llvm::Type
*IntrinsicTy
;
10263 switch (VTy
->getMinNumElements()) {
10265 llvm_unreachable("unsupported element count!");
10270 IntID
= Intrinsic::aarch64_sve_convert_from_svbool
;
10274 IntID
= Intrinsic::aarch64_sve_convert_to_svbool
;
10275 IntrinsicTy
= Pred
->getType();
10279 Function
*F
= CGM
.getIntrinsic(IntID
, IntrinsicTy
);
10280 Value
*C
= Builder
.CreateCall(F
, Pred
);
10281 assert(C
->getType() == RTy
&& "Unexpected return type!");
10285 Value
*CodeGenFunction::EmitSVEPredicateTupleCast(Value
*PredTuple
,
10286 llvm::StructType
*Ty
) {
10287 if (PredTuple
->getType() == Ty
)
10290 Value
*Ret
= llvm::PoisonValue::get(Ty
);
10291 for (unsigned I
= 0; I
< Ty
->getNumElements(); ++I
) {
10292 Value
*Pred
= Builder
.CreateExtractValue(PredTuple
, I
);
10293 Pred
= EmitSVEPredicateCast(
10294 Pred
, cast
<llvm::ScalableVectorType
>(Ty
->getTypeAtIndex(I
)));
10295 Ret
= Builder
.CreateInsertValue(Ret
, Pred
, I
);
10301 Value
*CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags
&TypeFlags
,
10302 SmallVectorImpl
<Value
*> &Ops
,
10304 auto *ResultTy
= getSVEType(TypeFlags
);
10305 auto *OverloadedTy
=
10306 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags
), ResultTy
);
10308 Function
*F
= nullptr;
10309 if (Ops
[1]->getType()->isVectorTy())
10310 // This is the "vector base, scalar offset" case. In order to uniquely
10311 // map this built-in to an LLVM IR intrinsic, we need both the return type
10312 // and the type of the vector base.
10313 F
= CGM
.getIntrinsic(IntID
, {OverloadedTy
, Ops
[1]->getType()});
10315 // This is the "scalar base, vector offset case". The type of the offset
10316 // is encoded in the name of the intrinsic. We only need to specify the
10317 // return type in order to uniquely map this built-in to an LLVM IR
10319 F
= CGM
.getIntrinsic(IntID
, OverloadedTy
);
10321 // At the ACLE level there's only one predicate type, svbool_t, which is
10322 // mapped to <n x 16 x i1>. However, this might be incompatible with the
10323 // actual type being loaded. For example, when loading doubles (i64) the
10324 // predicate should be <n x 2 x i1> instead. At the IR level the type of
10325 // the predicate and the data being loaded must match. Cast to the type
10326 // expected by the intrinsic. The intrinsic itself should be defined in
10327 // a way than enforces relations between parameter types.
10328 Ops
[0] = EmitSVEPredicateCast(
10329 Ops
[0], cast
<llvm::ScalableVectorType
>(F
->getArg(0)->getType()));
10331 // Pass 0 when the offset is missing. This can only be applied when using
10332 // the "vector base" addressing mode for which ACLE allows no offset. The
10333 // corresponding LLVM IR always requires an offset.
10334 if (Ops
.size() == 2) {
10335 assert(Ops
[1]->getType()->isVectorTy() && "Scalar base requires an offset");
10336 Ops
.push_back(ConstantInt::get(Int64Ty
, 0));
10339 // For "vector base, scalar index" scale the index so that it becomes a
10341 if (!TypeFlags
.isByteIndexed() && Ops
[1]->getType()->isVectorTy()) {
10342 unsigned BytesPerElt
=
10343 OverloadedTy
->getElementType()->getScalarSizeInBits() / 8;
10344 Ops
[2] = Builder
.CreateShl(Ops
[2], Log2_32(BytesPerElt
));
10347 Value
*Call
= Builder
.CreateCall(F
, Ops
);
10349 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
10350 // other cases it's folded into a nop.
10351 return TypeFlags
.isZExtReturn() ? Builder
.CreateZExt(Call
, ResultTy
)
10352 : Builder
.CreateSExt(Call
, ResultTy
);
10355 Value
*CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags
&TypeFlags
,
10356 SmallVectorImpl
<Value
*> &Ops
,
10358 auto *SrcDataTy
= getSVEType(TypeFlags
);
10359 auto *OverloadedTy
=
10360 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags
), SrcDataTy
);
10362 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
10363 // it's the first argument. Move it accordingly.
10364 Ops
.insert(Ops
.begin(), Ops
.pop_back_val());
10366 Function
*F
= nullptr;
10367 if (Ops
[2]->getType()->isVectorTy())
10368 // This is the "vector base, scalar offset" case. In order to uniquely
10369 // map this built-in to an LLVM IR intrinsic, we need both the return type
10370 // and the type of the vector base.
10371 F
= CGM
.getIntrinsic(IntID
, {OverloadedTy
, Ops
[2]->getType()});
10373 // This is the "scalar base, vector offset case". The type of the offset
10374 // is encoded in the name of the intrinsic. We only need to specify the
10375 // return type in order to uniquely map this built-in to an LLVM IR
10377 F
= CGM
.getIntrinsic(IntID
, OverloadedTy
);
10379 // Pass 0 when the offset is missing. This can only be applied when using
10380 // the "vector base" addressing mode for which ACLE allows no offset. The
10381 // corresponding LLVM IR always requires an offset.
10382 if (Ops
.size() == 3) {
10383 assert(Ops
[1]->getType()->isVectorTy() && "Scalar base requires an offset");
10384 Ops
.push_back(ConstantInt::get(Int64Ty
, 0));
10387 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
10388 // folded into a nop.
10389 Ops
[0] = Builder
.CreateTrunc(Ops
[0], OverloadedTy
);
10391 // At the ACLE level there's only one predicate type, svbool_t, which is
10392 // mapped to <n x 16 x i1>. However, this might be incompatible with the
10393 // actual type being stored. For example, when storing doubles (i64) the
10394 // predicated should be <n x 2 x i1> instead. At the IR level the type of
10395 // the predicate and the data being stored must match. Cast to the type
10396 // expected by the intrinsic. The intrinsic itself should be defined in
10397 // a way that enforces relations between parameter types.
10398 Ops
[1] = EmitSVEPredicateCast(
10399 Ops
[1], cast
<llvm::ScalableVectorType
>(F
->getArg(1)->getType()));
10401 // For "vector base, scalar index" scale the index so that it becomes a
10403 if (!TypeFlags
.isByteIndexed() && Ops
[2]->getType()->isVectorTy()) {
10404 unsigned BytesPerElt
=
10405 OverloadedTy
->getElementType()->getScalarSizeInBits() / 8;
10406 Ops
[3] = Builder
.CreateShl(Ops
[3], Log2_32(BytesPerElt
));
10409 return Builder
.CreateCall(F
, Ops
);
10412 Value
*CodeGenFunction::EmitSVEGatherPrefetch(const SVETypeFlags
&TypeFlags
,
10413 SmallVectorImpl
<Value
*> &Ops
,
10415 // The gather prefetches are overloaded on the vector input - this can either
10416 // be the vector of base addresses or vector of offsets.
10417 auto *OverloadedTy
= dyn_cast
<llvm::ScalableVectorType
>(Ops
[1]->getType());
10419 OverloadedTy
= cast
<llvm::ScalableVectorType
>(Ops
[2]->getType());
10421 // Cast the predicate from svbool_t to the right number of elements.
10422 Ops
[0] = EmitSVEPredicateCast(Ops
[0], OverloadedTy
);
10424 // vector + imm addressing modes
10425 if (Ops
[1]->getType()->isVectorTy()) {
10426 if (Ops
.size() == 3) {
10427 // Pass 0 for 'vector+imm' when the index is omitted.
10428 Ops
.push_back(ConstantInt::get(Int64Ty
, 0));
10430 // The sv_prfop is the last operand in the builtin and IR intrinsic.
10431 std::swap(Ops
[2], Ops
[3]);
10433 // Index needs to be passed as scaled offset.
10434 llvm::Type
*MemEltTy
= SVEBuiltinMemEltTy(TypeFlags
);
10435 unsigned BytesPerElt
= MemEltTy
->getPrimitiveSizeInBits() / 8;
10436 if (BytesPerElt
> 1)
10437 Ops
[2] = Builder
.CreateShl(Ops
[2], Log2_32(BytesPerElt
));
10441 Function
*F
= CGM
.getIntrinsic(IntID
, OverloadedTy
);
10442 return Builder
.CreateCall(F
, Ops
);
10445 Value
*CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags
&TypeFlags
,
10446 SmallVectorImpl
<Value
*> &Ops
,
10448 llvm::ScalableVectorType
*VTy
= getSVEType(TypeFlags
);
10449 Value
*Predicate
= EmitSVEPredicateCast(Ops
[0], VTy
);
10450 Value
*BasePtr
= Ops
[1];
10452 // Does the load have an offset?
10453 if (Ops
.size() > 2)
10454 BasePtr
= Builder
.CreateGEP(VTy
, BasePtr
, Ops
[2]);
10456 Function
*F
= CGM
.getIntrinsic(IntID
, {VTy
});
10457 return Builder
.CreateCall(F
, {Predicate
, BasePtr
});
10460 Value
*CodeGenFunction::EmitSVEStructStore(const SVETypeFlags
&TypeFlags
,
10461 SmallVectorImpl
<Value
*> &Ops
,
10463 llvm::ScalableVectorType
*VTy
= getSVEType(TypeFlags
);
10467 case Intrinsic::aarch64_sve_st2
:
10468 case Intrinsic::aarch64_sve_st1_pn_x2
:
10469 case Intrinsic::aarch64_sve_stnt1_pn_x2
:
10470 case Intrinsic::aarch64_sve_st2q
:
10473 case Intrinsic::aarch64_sve_st3
:
10474 case Intrinsic::aarch64_sve_st3q
:
10477 case Intrinsic::aarch64_sve_st4
:
10478 case Intrinsic::aarch64_sve_st1_pn_x4
:
10479 case Intrinsic::aarch64_sve_stnt1_pn_x4
:
10480 case Intrinsic::aarch64_sve_st4q
:
10484 llvm_unreachable("unknown intrinsic!");
10487 Value
*Predicate
= EmitSVEPredicateCast(Ops
[0], VTy
);
10488 Value
*BasePtr
= Ops
[1];
10490 // Does the store have an offset?
10491 if (Ops
.size() > (2 + N
))
10492 BasePtr
= Builder
.CreateGEP(VTy
, BasePtr
, Ops
[2]);
10494 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
10495 // need to break up the tuple vector.
10496 SmallVector
<llvm::Value
*, 5> Operands
;
10497 for (unsigned I
= Ops
.size() - N
; I
< Ops
.size(); ++I
)
10498 Operands
.push_back(Ops
[I
]);
10499 Operands
.append({Predicate
, BasePtr
});
10500 Function
*F
= CGM
.getIntrinsic(IntID
, { VTy
});
10502 return Builder
.CreateCall(F
, Operands
);
10505 // SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
10506 // svpmullt_pair intrinsics, with the exception that their results are bitcast
10507 // to a wider type.
10508 Value
*CodeGenFunction::EmitSVEPMull(const SVETypeFlags
&TypeFlags
,
10509 SmallVectorImpl
<Value
*> &Ops
,
10510 unsigned BuiltinID
) {
10511 // Splat scalar operand to vector (intrinsics with _n infix)
10512 if (TypeFlags
.hasSplatOperand()) {
10513 unsigned OpNo
= TypeFlags
.getSplatOperand();
10514 Ops
[OpNo
] = EmitSVEDupX(Ops
[OpNo
]);
10517 // The pair-wise function has a narrower overloaded type.
10518 Function
*F
= CGM
.getIntrinsic(BuiltinID
, Ops
[0]->getType());
10519 Value
*Call
= Builder
.CreateCall(F
, {Ops
[0], Ops
[1]});
10521 // Now bitcast to the wider result type.
10522 llvm::ScalableVectorType
*Ty
= getSVEType(TypeFlags
);
10523 return EmitSVEReinterpret(Call
, Ty
);
10526 Value
*CodeGenFunction::EmitSVEMovl(const SVETypeFlags
&TypeFlags
,
10527 ArrayRef
<Value
*> Ops
, unsigned BuiltinID
) {
10528 llvm::Type
*OverloadedTy
= getSVEType(TypeFlags
);
10529 Function
*F
= CGM
.getIntrinsic(BuiltinID
, OverloadedTy
);
10530 return Builder
.CreateCall(F
, {Ops
[0], Builder
.getInt32(0)});
10533 Value
*CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags
&TypeFlags
,
10534 SmallVectorImpl
<Value
*> &Ops
,
10535 unsigned BuiltinID
) {
10536 auto *MemEltTy
= SVEBuiltinMemEltTy(TypeFlags
);
10537 auto *VectorTy
= getSVEVectorForElementType(MemEltTy
);
10538 auto *MemoryTy
= llvm::ScalableVectorType::get(MemEltTy
, VectorTy
);
10540 Value
*Predicate
= EmitSVEPredicateCast(Ops
[0], MemoryTy
);
10541 Value
*BasePtr
= Ops
[1];
10543 // Implement the index operand if not omitted.
10544 if (Ops
.size() > 3)
10545 BasePtr
= Builder
.CreateGEP(MemoryTy
, BasePtr
, Ops
[2]);
10547 Value
*PrfOp
= Ops
.back();
10549 Function
*F
= CGM
.getIntrinsic(BuiltinID
, Predicate
->getType());
10550 return Builder
.CreateCall(F
, {Predicate
, BasePtr
, PrfOp
});
10553 Value
*CodeGenFunction::EmitSVEMaskedLoad(const CallExpr
*E
,
10554 llvm::Type
*ReturnTy
,
10555 SmallVectorImpl
<Value
*> &Ops
,
10556 unsigned IntrinsicID
,
10557 bool IsZExtReturn
) {
10558 QualType LangPTy
= E
->getArg(1)->getType();
10559 llvm::Type
*MemEltTy
= CGM
.getTypes().ConvertType(
10560 LangPTy
->castAs
<PointerType
>()->getPointeeType());
10562 // The vector type that is returned may be different from the
10563 // eventual type loaded from memory.
10564 auto VectorTy
= cast
<llvm::ScalableVectorType
>(ReturnTy
);
10565 llvm::ScalableVectorType
*MemoryTy
= nullptr;
10566 llvm::ScalableVectorType
*PredTy
= nullptr;
10567 bool IsQuadLoad
= false;
10568 switch (IntrinsicID
) {
10569 case Intrinsic::aarch64_sve_ld1uwq
:
10570 case Intrinsic::aarch64_sve_ld1udq
:
10571 MemoryTy
= llvm::ScalableVectorType::get(MemEltTy
, 1);
10572 PredTy
= llvm::ScalableVectorType::get(
10573 llvm::Type::getInt1Ty(getLLVMContext()), 1);
10577 MemoryTy
= llvm::ScalableVectorType::get(MemEltTy
, VectorTy
);
10582 Value
*Predicate
= EmitSVEPredicateCast(Ops
[0], PredTy
);
10583 Value
*BasePtr
= Ops
[1];
10585 // Does the load have an offset?
10586 if (Ops
.size() > 2)
10587 BasePtr
= Builder
.CreateGEP(MemoryTy
, BasePtr
, Ops
[2]);
10589 Function
*F
= CGM
.getIntrinsic(IntrinsicID
, IsQuadLoad
? VectorTy
: MemoryTy
);
10591 cast
<llvm::Instruction
>(Builder
.CreateCall(F
, {Predicate
, BasePtr
}));
10592 auto TBAAInfo
= CGM
.getTBAAAccessInfo(LangPTy
->getPointeeType());
10593 CGM
.DecorateInstructionWithTBAA(Load
, TBAAInfo
);
10598 return IsZExtReturn
? Builder
.CreateZExt(Load
, VectorTy
)
10599 : Builder
.CreateSExt(Load
, VectorTy
);
10602 Value
*CodeGenFunction::EmitSVEMaskedStore(const CallExpr
*E
,
10603 SmallVectorImpl
<Value
*> &Ops
,
10604 unsigned IntrinsicID
) {
10605 QualType LangPTy
= E
->getArg(1)->getType();
10606 llvm::Type
*MemEltTy
= CGM
.getTypes().ConvertType(
10607 LangPTy
->castAs
<PointerType
>()->getPointeeType());
10609 // The vector type that is stored may be different from the
10610 // eventual type stored to memory.
10611 auto VectorTy
= cast
<llvm::ScalableVectorType
>(Ops
.back()->getType());
10612 auto MemoryTy
= llvm::ScalableVectorType::get(MemEltTy
, VectorTy
);
10614 auto PredTy
= MemoryTy
;
10615 auto AddrMemoryTy
= MemoryTy
;
10616 bool IsQuadStore
= false;
10618 switch (IntrinsicID
) {
10619 case Intrinsic::aarch64_sve_st1wq
:
10620 case Intrinsic::aarch64_sve_st1dq
:
10621 AddrMemoryTy
= llvm::ScalableVectorType::get(MemEltTy
, 1);
10623 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
10624 IsQuadStore
= true;
10629 Value
*Predicate
= EmitSVEPredicateCast(Ops
[0], PredTy
);
10630 Value
*BasePtr
= Ops
[1];
10632 // Does the store have an offset?
10633 if (Ops
.size() == 4)
10634 BasePtr
= Builder
.CreateGEP(AddrMemoryTy
, BasePtr
, Ops
[2]);
10636 // Last value is always the data
10638 IsQuadStore
? Ops
.back() : Builder
.CreateTrunc(Ops
.back(), MemoryTy
);
10641 CGM
.getIntrinsic(IntrinsicID
, IsQuadStore
? VectorTy
: MemoryTy
);
10643 cast
<llvm::Instruction
>(Builder
.CreateCall(F
, {Val
, Predicate
, BasePtr
}));
10644 auto TBAAInfo
= CGM
.getTBAAAccessInfo(LangPTy
->getPointeeType());
10645 CGM
.DecorateInstructionWithTBAA(Store
, TBAAInfo
);
10649 Value
*CodeGenFunction::EmitSMELd1St1(const SVETypeFlags
&TypeFlags
,
10650 SmallVectorImpl
<Value
*> &Ops
,
10652 Ops
[2] = EmitSVEPredicateCast(
10653 Ops
[2], getSVEVectorForElementType(SVEBuiltinMemEltTy(TypeFlags
)));
10655 SmallVector
<Value
*> NewOps
;
10656 NewOps
.push_back(Ops
[2]);
10658 llvm::Value
*BasePtr
= Ops
[3];
10659 llvm::Value
*RealSlice
= Ops
[1];
10660 // If the intrinsic contains the vnum parameter, multiply it with the vector
10662 if (Ops
.size() == 5) {
10663 Function
*StreamingVectorLength
=
10664 CGM
.getIntrinsic(Intrinsic::aarch64_sme_cntsb
);
10665 llvm::Value
*StreamingVectorLengthCall
=
10666 Builder
.CreateCall(StreamingVectorLength
);
10667 llvm::Value
*Mulvl
=
10668 Builder
.CreateMul(StreamingVectorLengthCall
, Ops
[4], "mulvl");
10669 // The type of the ptr parameter is void *, so use Int8Ty here.
10670 BasePtr
= Builder
.CreateGEP(Int8Ty
, Ops
[3], Mulvl
);
10671 RealSlice
= Builder
.CreateZExt(RealSlice
, Int64Ty
);
10672 RealSlice
= Builder
.CreateAdd(RealSlice
, Ops
[4]);
10673 RealSlice
= Builder
.CreateTrunc(RealSlice
, Int32Ty
);
10675 NewOps
.push_back(BasePtr
);
10676 NewOps
.push_back(Ops
[0]);
10677 NewOps
.push_back(RealSlice
);
10678 Function
*F
= CGM
.getIntrinsic(IntID
);
10679 return Builder
.CreateCall(F
, NewOps
);
10682 Value
*CodeGenFunction::EmitSMEReadWrite(const SVETypeFlags
&TypeFlags
,
10683 SmallVectorImpl
<Value
*> &Ops
,
10685 auto *VecTy
= getSVEType(TypeFlags
);
10686 Function
*F
= CGM
.getIntrinsic(IntID
, VecTy
);
10687 if (TypeFlags
.isReadZA())
10688 Ops
[1] = EmitSVEPredicateCast(Ops
[1], VecTy
);
10689 else if (TypeFlags
.isWriteZA())
10690 Ops
[2] = EmitSVEPredicateCast(Ops
[2], VecTy
);
10691 return Builder
.CreateCall(F
, Ops
);
10694 Value
*CodeGenFunction::EmitSMEZero(const SVETypeFlags
&TypeFlags
,
10695 SmallVectorImpl
<Value
*> &Ops
,
10697 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
10698 if (Ops
.size() == 0)
10699 Ops
.push_back(llvm::ConstantInt::get(Int32Ty
, 255));
10700 Function
*F
= CGM
.getIntrinsic(IntID
, {});
10701 return Builder
.CreateCall(F
, Ops
);
10704 Value
*CodeGenFunction::EmitSMELdrStr(const SVETypeFlags
&TypeFlags
,
10705 SmallVectorImpl
<Value
*> &Ops
,
10707 if (Ops
.size() == 2)
10708 Ops
.push_back(Builder
.getInt32(0));
10710 Ops
[2] = Builder
.CreateIntCast(Ops
[2], Int32Ty
, true);
10711 Function
*F
= CGM
.getIntrinsic(IntID
, {});
10712 return Builder
.CreateCall(F
, Ops
);
10715 // Limit the usage of scalable llvm IR generated by the ACLE by using the
10716 // sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
10717 Value
*CodeGenFunction::EmitSVEDupX(Value
*Scalar
, llvm::Type
*Ty
) {
10718 return Builder
.CreateVectorSplat(
10719 cast
<llvm::VectorType
>(Ty
)->getElementCount(), Scalar
);
10722 Value
*CodeGenFunction::EmitSVEDupX(Value
* Scalar
) {
10723 return EmitSVEDupX(Scalar
, getSVEVectorForElementType(Scalar
->getType()));
10726 Value
*CodeGenFunction::EmitSVEReinterpret(Value
*Val
, llvm::Type
*Ty
) {
10727 // FIXME: For big endian this needs an additional REV, or needs a separate
10728 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
10729 // instruction is defined as 'bitwise' equivalent from memory point of
10730 // view (when storing/reloading), whereas the svreinterpret builtin
10731 // implements bitwise equivalent cast from register point of view.
10732 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
10734 if (auto *StructTy
= dyn_cast
<StructType
>(Ty
)) {
10735 Value
*Tuple
= llvm::PoisonValue::get(Ty
);
10737 for (unsigned I
= 0; I
< StructTy
->getNumElements(); ++I
) {
10738 Value
*In
= Builder
.CreateExtractValue(Val
, I
);
10739 Value
*Out
= Builder
.CreateBitCast(In
, StructTy
->getTypeAtIndex(I
));
10740 Tuple
= Builder
.CreateInsertValue(Tuple
, Out
, I
);
10746 return Builder
.CreateBitCast(Val
, Ty
);
10749 static void InsertExplicitZeroOperand(CGBuilderTy
&Builder
, llvm::Type
*Ty
,
10750 SmallVectorImpl
<Value
*> &Ops
) {
10751 auto *SplatZero
= Constant::getNullValue(Ty
);
10752 Ops
.insert(Ops
.begin(), SplatZero
);
10755 static void InsertExplicitUndefOperand(CGBuilderTy
&Builder
, llvm::Type
*Ty
,
10756 SmallVectorImpl
<Value
*> &Ops
) {
10757 auto *SplatUndef
= UndefValue::get(Ty
);
10758 Ops
.insert(Ops
.begin(), SplatUndef
);
10761 SmallVector
<llvm::Type
*, 2>
10762 CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags
&TypeFlags
,
10763 llvm::Type
*ResultType
,
10764 ArrayRef
<Value
*> Ops
) {
10765 if (TypeFlags
.isOverloadNone())
10768 llvm::Type
*DefaultType
= getSVEType(TypeFlags
);
10770 if (TypeFlags
.isOverloadWhileOrMultiVecCvt())
10771 return {DefaultType
, Ops
[1]->getType()};
10773 if (TypeFlags
.isOverloadWhileRW())
10774 return {getSVEPredType(TypeFlags
), Ops
[0]->getType()};
10776 if (TypeFlags
.isOverloadCvt())
10777 return {Ops
[0]->getType(), Ops
.back()->getType()};
10779 if (TypeFlags
.isReductionQV() && !ResultType
->isScalableTy() &&
10780 ResultType
->isVectorTy())
10781 return {ResultType
, Ops
[1]->getType()};
10783 assert(TypeFlags
.isOverloadDefault() && "Unexpected value for overloads");
10784 return {DefaultType
};
10787 Value
*CodeGenFunction::EmitSVETupleSetOrGet(const SVETypeFlags
&TypeFlags
,
10788 ArrayRef
<Value
*> Ops
) {
10789 assert((TypeFlags
.isTupleSet() || TypeFlags
.isTupleGet()) &&
10790 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
10791 unsigned Idx
= cast
<ConstantInt
>(Ops
[1])->getZExtValue();
10793 if (TypeFlags
.isTupleSet())
10794 return Builder
.CreateInsertValue(Ops
[0], Ops
[2], Idx
);
10795 return Builder
.CreateExtractValue(Ops
[0], Idx
);
10798 Value
*CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags
&TypeFlags
,
10800 ArrayRef
<Value
*> Ops
) {
10801 assert(TypeFlags
.isTupleCreate() && "Expects TypleFlag isTupleCreate");
10803 Value
*Tuple
= llvm::PoisonValue::get(Ty
);
10804 for (unsigned Idx
= 0; Idx
< Ops
.size(); Idx
++)
10805 Tuple
= Builder
.CreateInsertValue(Tuple
, Ops
[Idx
], Idx
);
10810 void CodeGenFunction::GetAArch64SVEProcessedOperands(
10811 unsigned BuiltinID
, const CallExpr
*E
, SmallVectorImpl
<Value
*> &Ops
,
10812 SVETypeFlags TypeFlags
) {
10813 // Find out if any arguments are required to be integer constant expressions.
10814 unsigned ICEArguments
= 0;
10815 ASTContext::GetBuiltinTypeError Error
;
10816 getContext().GetBuiltinType(BuiltinID
, Error
, &ICEArguments
);
10817 assert(Error
== ASTContext::GE_None
&& "Should not codegen an error");
10819 // Tuple set/get only requires one insert/extract vector, which is
10820 // created by EmitSVETupleSetOrGet.
10821 bool IsTupleGetOrSet
= TypeFlags
.isTupleSet() || TypeFlags
.isTupleGet();
10823 for (unsigned i
= 0, e
= E
->getNumArgs(); i
!= e
; i
++) {
10824 bool IsICE
= ICEArguments
& (1 << i
);
10825 Value
*Arg
= EmitScalarExpr(E
->getArg(i
));
10828 // If this is required to be a constant, constant fold it so that we know
10829 // that the generated intrinsic gets a ConstantInt.
10830 std::optional
<llvm::APSInt
> Result
=
10831 E
->getArg(i
)->getIntegerConstantExpr(getContext());
10832 assert(Result
&& "Expected argument to be a constant");
10834 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
10835 // truncate because the immediate has been range checked and no valid
10836 // immediate requires more than a handful of bits.
10837 *Result
= Result
->extOrTrunc(32);
10838 Ops
.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result
));
10842 if (isa
<StructType
>(Arg
->getType()) && !IsTupleGetOrSet
) {
10843 for (unsigned I
= 0; I
< Arg
->getType()->getStructNumElements(); ++I
)
10844 Ops
.push_back(Builder
.CreateExtractValue(Arg
, I
));
10849 Ops
.push_back(Arg
);
10853 Value
*CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID
,
10854 const CallExpr
*E
) {
10855 llvm::Type
*Ty
= ConvertType(E
->getType());
10856 if (BuiltinID
>= SVE::BI__builtin_sve_reinterpret_s8_s8
&&
10857 BuiltinID
<= SVE::BI__builtin_sve_reinterpret_f64_f64_x4
) {
10858 Value
*Val
= EmitScalarExpr(E
->getArg(0));
10859 return EmitSVEReinterpret(Val
, Ty
);
10862 auto *Builtin
= findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap
, BuiltinID
,
10863 AArch64SVEIntrinsicsProvenSorted
);
10865 llvm::SmallVector
<Value
*, 4> Ops
;
10866 SVETypeFlags
TypeFlags(Builtin
->TypeModifier
);
10867 GetAArch64SVEProcessedOperands(BuiltinID
, E
, Ops
, TypeFlags
);
10869 if (TypeFlags
.isLoad())
10870 return EmitSVEMaskedLoad(E
, Ty
, Ops
, Builtin
->LLVMIntrinsic
,
10871 TypeFlags
.isZExtReturn());
10872 else if (TypeFlags
.isStore())
10873 return EmitSVEMaskedStore(E
, Ops
, Builtin
->LLVMIntrinsic
);
10874 else if (TypeFlags
.isGatherLoad())
10875 return EmitSVEGatherLoad(TypeFlags
, Ops
, Builtin
->LLVMIntrinsic
);
10876 else if (TypeFlags
.isScatterStore())
10877 return EmitSVEScatterStore(TypeFlags
, Ops
, Builtin
->LLVMIntrinsic
);
10878 else if (TypeFlags
.isPrefetch())
10879 return EmitSVEPrefetchLoad(TypeFlags
, Ops
, Builtin
->LLVMIntrinsic
);
10880 else if (TypeFlags
.isGatherPrefetch())
10881 return EmitSVEGatherPrefetch(TypeFlags
, Ops
, Builtin
->LLVMIntrinsic
);
10882 else if (TypeFlags
.isStructLoad())
10883 return EmitSVEStructLoad(TypeFlags
, Ops
, Builtin
->LLVMIntrinsic
);
10884 else if (TypeFlags
.isStructStore())
10885 return EmitSVEStructStore(TypeFlags
, Ops
, Builtin
->LLVMIntrinsic
);
10886 else if (TypeFlags
.isTupleSet() || TypeFlags
.isTupleGet())
10887 return EmitSVETupleSetOrGet(TypeFlags
, Ops
);
10888 else if (TypeFlags
.isTupleCreate())
10889 return EmitSVETupleCreate(TypeFlags
, Ty
, Ops
);
10890 else if (TypeFlags
.isUndef())
10891 return UndefValue::get(Ty
);
10892 else if (Builtin
->LLVMIntrinsic
!= 0) {
10893 // Emit set FPMR for intrinsics that require it
10894 if (TypeFlags
.setsFPMR())
10895 Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::aarch64_set_fpmr
),
10896 Ops
.pop_back_val());
10897 if (TypeFlags
.getMergeType() == SVETypeFlags::MergeZeroExp
)
10898 InsertExplicitZeroOperand(Builder
, Ty
, Ops
);
10900 if (TypeFlags
.getMergeType() == SVETypeFlags::MergeAnyExp
)
10901 InsertExplicitUndefOperand(Builder
, Ty
, Ops
);
10903 // Some ACLE builtins leave out the argument to specify the predicate
10904 // pattern, which is expected to be expanded to an SV_ALL pattern.
10905 if (TypeFlags
.isAppendSVALL())
10906 Ops
.push_back(Builder
.getInt32(/*SV_ALL*/ 31));
10907 if (TypeFlags
.isInsertOp1SVALL())
10908 Ops
.insert(&Ops
[1], Builder
.getInt32(/*SV_ALL*/ 31));
10910 // Predicates must match the main datatype.
10911 for (unsigned i
= 0, e
= Ops
.size(); i
!= e
; ++i
)
10912 if (auto PredTy
= dyn_cast
<llvm::VectorType
>(Ops
[i
]->getType()))
10913 if (PredTy
->getElementType()->isIntegerTy(1))
10914 Ops
[i
] = EmitSVEPredicateCast(Ops
[i
], getSVEType(TypeFlags
));
10916 // Splat scalar operand to vector (intrinsics with _n infix)
10917 if (TypeFlags
.hasSplatOperand()) {
10918 unsigned OpNo
= TypeFlags
.getSplatOperand();
10919 Ops
[OpNo
] = EmitSVEDupX(Ops
[OpNo
]);
10922 if (TypeFlags
.isReverseCompare())
10923 std::swap(Ops
[1], Ops
[2]);
10924 else if (TypeFlags
.isReverseUSDOT())
10925 std::swap(Ops
[1], Ops
[2]);
10926 else if (TypeFlags
.isReverseMergeAnyBinOp() &&
10927 TypeFlags
.getMergeType() == SVETypeFlags::MergeAny
)
10928 std::swap(Ops
[1], Ops
[2]);
10929 else if (TypeFlags
.isReverseMergeAnyAccOp() &&
10930 TypeFlags
.getMergeType() == SVETypeFlags::MergeAny
)
10931 std::swap(Ops
[1], Ops
[3]);
10933 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
10934 if (TypeFlags
.getMergeType() == SVETypeFlags::MergeZero
) {
10935 llvm::Type
*OpndTy
= Ops
[1]->getType();
10936 auto *SplatZero
= Constant::getNullValue(OpndTy
);
10937 Ops
[1] = Builder
.CreateSelect(Ops
[0], Ops
[1], SplatZero
);
10940 Function
*F
= CGM
.getIntrinsic(Builtin
->LLVMIntrinsic
,
10941 getSVEOverloadTypes(TypeFlags
, Ty
, Ops
));
10942 Value
*Call
= Builder
.CreateCall(F
, Ops
);
10944 if (Call
->getType() == Ty
)
10947 // Predicate results must be converted to svbool_t.
10948 if (auto PredTy
= dyn_cast
<llvm::ScalableVectorType
>(Ty
))
10949 return EmitSVEPredicateCast(Call
, PredTy
);
10950 if (auto PredTupleTy
= dyn_cast
<llvm::StructType
>(Ty
))
10951 return EmitSVEPredicateTupleCast(Call
, PredTupleTy
);
10953 llvm_unreachable("unsupported element count!");
10956 switch (BuiltinID
) {
10960 case SVE::BI__builtin_sve_svreinterpret_b
: {
10962 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10963 Function
*CastFromSVCountF
=
10964 CGM
.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool
, SVCountTy
);
10965 return Builder
.CreateCall(CastFromSVCountF
, Ops
[0]);
10967 case SVE::BI__builtin_sve_svreinterpret_c
: {
10969 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10970 Function
*CastToSVCountF
=
10971 CGM
.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool
, SVCountTy
);
10972 return Builder
.CreateCall(CastToSVCountF
, Ops
[0]);
10975 case SVE::BI__builtin_sve_svpsel_lane_b8
:
10976 case SVE::BI__builtin_sve_svpsel_lane_b16
:
10977 case SVE::BI__builtin_sve_svpsel_lane_b32
:
10978 case SVE::BI__builtin_sve_svpsel_lane_b64
:
10979 case SVE::BI__builtin_sve_svpsel_lane_c8
:
10980 case SVE::BI__builtin_sve_svpsel_lane_c16
:
10981 case SVE::BI__builtin_sve_svpsel_lane_c32
:
10982 case SVE::BI__builtin_sve_svpsel_lane_c64
: {
10983 bool IsSVCount
= isa
<TargetExtType
>(Ops
[0]->getType());
10984 assert(((!IsSVCount
|| cast
<TargetExtType
>(Ops
[0]->getType())->getName() ==
10985 "aarch64.svcount")) &&
10986 "Unexpected TargetExtType");
10988 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10989 Function
*CastFromSVCountF
=
10990 CGM
.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool
, SVCountTy
);
10991 Function
*CastToSVCountF
=
10992 CGM
.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool
, SVCountTy
);
10994 auto OverloadedTy
= getSVEType(SVETypeFlags(Builtin
->TypeModifier
));
10995 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_sve_psel
, OverloadedTy
);
10996 llvm::Value
*Ops0
=
10997 IsSVCount
? Builder
.CreateCall(CastFromSVCountF
, Ops
[0]) : Ops
[0];
10998 llvm::Value
*Ops1
= EmitSVEPredicateCast(Ops
[1], OverloadedTy
);
10999 llvm::Value
*PSel
= Builder
.CreateCall(F
, {Ops0
, Ops1
, Ops
[2]});
11000 return IsSVCount
? Builder
.CreateCall(CastToSVCountF
, PSel
) : PSel
;
11002 case SVE::BI__builtin_sve_svmov_b_z
: {
11003 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
11004 SVETypeFlags
TypeFlags(Builtin
->TypeModifier
);
11005 llvm::Type
* OverloadedTy
= getSVEType(TypeFlags
);
11006 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_sve_and_z
, OverloadedTy
);
11007 return Builder
.CreateCall(F
, {Ops
[0], Ops
[1], Ops
[1]});
11010 case SVE::BI__builtin_sve_svnot_b_z
: {
11011 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
11012 SVETypeFlags
TypeFlags(Builtin
->TypeModifier
);
11013 llvm::Type
* OverloadedTy
= getSVEType(TypeFlags
);
11014 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_sve_eor_z
, OverloadedTy
);
11015 return Builder
.CreateCall(F
, {Ops
[0], Ops
[1], Ops
[0]});
11018 case SVE::BI__builtin_sve_svmovlb_u16
:
11019 case SVE::BI__builtin_sve_svmovlb_u32
:
11020 case SVE::BI__builtin_sve_svmovlb_u64
:
11021 return EmitSVEMovl(TypeFlags
, Ops
, Intrinsic::aarch64_sve_ushllb
);
11023 case SVE::BI__builtin_sve_svmovlb_s16
:
11024 case SVE::BI__builtin_sve_svmovlb_s32
:
11025 case SVE::BI__builtin_sve_svmovlb_s64
:
11026 return EmitSVEMovl(TypeFlags
, Ops
, Intrinsic::aarch64_sve_sshllb
);
11028 case SVE::BI__builtin_sve_svmovlt_u16
:
11029 case SVE::BI__builtin_sve_svmovlt_u32
:
11030 case SVE::BI__builtin_sve_svmovlt_u64
:
11031 return EmitSVEMovl(TypeFlags
, Ops
, Intrinsic::aarch64_sve_ushllt
);
11033 case SVE::BI__builtin_sve_svmovlt_s16
:
11034 case SVE::BI__builtin_sve_svmovlt_s32
:
11035 case SVE::BI__builtin_sve_svmovlt_s64
:
11036 return EmitSVEMovl(TypeFlags
, Ops
, Intrinsic::aarch64_sve_sshllt
);
11038 case SVE::BI__builtin_sve_svpmullt_u16
:
11039 case SVE::BI__builtin_sve_svpmullt_u64
:
11040 case SVE::BI__builtin_sve_svpmullt_n_u16
:
11041 case SVE::BI__builtin_sve_svpmullt_n_u64
:
11042 return EmitSVEPMull(TypeFlags
, Ops
, Intrinsic::aarch64_sve_pmullt_pair
);
11044 case SVE::BI__builtin_sve_svpmullb_u16
:
11045 case SVE::BI__builtin_sve_svpmullb_u64
:
11046 case SVE::BI__builtin_sve_svpmullb_n_u16
:
11047 case SVE::BI__builtin_sve_svpmullb_n_u64
:
11048 return EmitSVEPMull(TypeFlags
, Ops
, Intrinsic::aarch64_sve_pmullb_pair
);
11050 case SVE::BI__builtin_sve_svdup_n_b8
:
11051 case SVE::BI__builtin_sve_svdup_n_b16
:
11052 case SVE::BI__builtin_sve_svdup_n_b32
:
11053 case SVE::BI__builtin_sve_svdup_n_b64
: {
11055 Builder
.CreateICmpNE(Ops
[0], Constant::getNullValue(Ops
[0]->getType()));
11056 llvm::ScalableVectorType
*OverloadedTy
= getSVEType(TypeFlags
);
11057 Value
*Dup
= EmitSVEDupX(CmpNE
, OverloadedTy
);
11058 return EmitSVEPredicateCast(Dup
, cast
<llvm::ScalableVectorType
>(Ty
));
11061 case SVE::BI__builtin_sve_svdupq_n_b8
:
11062 case SVE::BI__builtin_sve_svdupq_n_b16
:
11063 case SVE::BI__builtin_sve_svdupq_n_b32
:
11064 case SVE::BI__builtin_sve_svdupq_n_b64
:
11065 case SVE::BI__builtin_sve_svdupq_n_u8
:
11066 case SVE::BI__builtin_sve_svdupq_n_s8
:
11067 case SVE::BI__builtin_sve_svdupq_n_u64
:
11068 case SVE::BI__builtin_sve_svdupq_n_f64
:
11069 case SVE::BI__builtin_sve_svdupq_n_s64
:
11070 case SVE::BI__builtin_sve_svdupq_n_u16
:
11071 case SVE::BI__builtin_sve_svdupq_n_f16
:
11072 case SVE::BI__builtin_sve_svdupq_n_bf16
:
11073 case SVE::BI__builtin_sve_svdupq_n_s16
:
11074 case SVE::BI__builtin_sve_svdupq_n_u32
:
11075 case SVE::BI__builtin_sve_svdupq_n_f32
:
11076 case SVE::BI__builtin_sve_svdupq_n_s32
: {
11077 // These builtins are implemented by storing each element to an array and using
11078 // ld1rq to materialize a vector.
11079 unsigned NumOpnds
= Ops
.size();
11082 cast
<llvm::VectorType
>(Ty
)->getElementType()->isIntegerTy(1);
11084 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
11085 // so that the compare can use the width that is natural for the expected
11086 // number of predicate lanes.
11087 llvm::Type
*EltTy
= Ops
[0]->getType();
11089 EltTy
= IntegerType::get(getLLVMContext(), SVEBitsPerBlock
/ NumOpnds
);
11091 SmallVector
<llvm::Value
*, 16> VecOps
;
11092 for (unsigned I
= 0; I
< NumOpnds
; ++I
)
11093 VecOps
.push_back(Builder
.CreateZExt(Ops
[I
], EltTy
));
11094 Value
*Vec
= BuildVector(VecOps
);
11096 llvm::Type
*OverloadedTy
= getSVEVectorForElementType(EltTy
);
11097 Value
*InsertSubVec
= Builder
.CreateInsertVector(
11098 OverloadedTy
, PoisonValue::get(OverloadedTy
), Vec
, Builder
.getInt64(0));
11101 CGM
.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane
, OverloadedTy
);
11103 Builder
.CreateCall(F
, {InsertSubVec
, Builder
.getInt64(0)});
11108 SVETypeFlags
TypeFlags(Builtin
->TypeModifier
);
11109 Value
*Pred
= EmitSVEAllTruePred(TypeFlags
);
11111 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
11112 F
= CGM
.getIntrinsic(NumOpnds
== 2 ? Intrinsic::aarch64_sve_cmpne
11113 : Intrinsic::aarch64_sve_cmpne_wide
,
11115 Value
*Call
= Builder
.CreateCall(
11116 F
, {Pred
, DupQLane
, EmitSVEDupX(Builder
.getInt64(0))});
11117 return EmitSVEPredicateCast(Call
, cast
<llvm::ScalableVectorType
>(Ty
));
11120 case SVE::BI__builtin_sve_svpfalse_b
:
11121 return ConstantInt::getFalse(Ty
);
11123 case SVE::BI__builtin_sve_svpfalse_c
: {
11124 auto SVBoolTy
= ScalableVectorType::get(Builder
.getInt1Ty(), 16);
11125 Function
*CastToSVCountF
=
11126 CGM
.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool
, Ty
);
11127 return Builder
.CreateCall(CastToSVCountF
, ConstantInt::getFalse(SVBoolTy
));
11130 case SVE::BI__builtin_sve_svlen_bf16
:
11131 case SVE::BI__builtin_sve_svlen_f16
:
11132 case SVE::BI__builtin_sve_svlen_f32
:
11133 case SVE::BI__builtin_sve_svlen_f64
:
11134 case SVE::BI__builtin_sve_svlen_s8
:
11135 case SVE::BI__builtin_sve_svlen_s16
:
11136 case SVE::BI__builtin_sve_svlen_s32
:
11137 case SVE::BI__builtin_sve_svlen_s64
:
11138 case SVE::BI__builtin_sve_svlen_u8
:
11139 case SVE::BI__builtin_sve_svlen_u16
:
11140 case SVE::BI__builtin_sve_svlen_u32
:
11141 case SVE::BI__builtin_sve_svlen_u64
: {
11142 SVETypeFlags
TF(Builtin
->TypeModifier
);
11143 auto VTy
= cast
<llvm::VectorType
>(getSVEType(TF
));
11145 llvm::ConstantInt::get(Ty
, VTy
->getElementCount().getKnownMinValue());
11147 Function
*F
= CGM
.getIntrinsic(Intrinsic::vscale
, Ty
);
11148 return Builder
.CreateMul(NumEls
, Builder
.CreateCall(F
));
11151 case SVE::BI__builtin_sve_svtbl2_u8
:
11152 case SVE::BI__builtin_sve_svtbl2_s8
:
11153 case SVE::BI__builtin_sve_svtbl2_u16
:
11154 case SVE::BI__builtin_sve_svtbl2_s16
:
11155 case SVE::BI__builtin_sve_svtbl2_u32
:
11156 case SVE::BI__builtin_sve_svtbl2_s32
:
11157 case SVE::BI__builtin_sve_svtbl2_u64
:
11158 case SVE::BI__builtin_sve_svtbl2_s64
:
11159 case SVE::BI__builtin_sve_svtbl2_f16
:
11160 case SVE::BI__builtin_sve_svtbl2_bf16
:
11161 case SVE::BI__builtin_sve_svtbl2_f32
:
11162 case SVE::BI__builtin_sve_svtbl2_f64
: {
11163 SVETypeFlags
TF(Builtin
->TypeModifier
);
11164 auto VTy
= cast
<llvm::ScalableVectorType
>(getSVEType(TF
));
11165 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_sve_tbl2
, VTy
);
11166 return Builder
.CreateCall(F
, Ops
);
11169 case SVE::BI__builtin_sve_svset_neonq_s8
:
11170 case SVE::BI__builtin_sve_svset_neonq_s16
:
11171 case SVE::BI__builtin_sve_svset_neonq_s32
:
11172 case SVE::BI__builtin_sve_svset_neonq_s64
:
11173 case SVE::BI__builtin_sve_svset_neonq_u8
:
11174 case SVE::BI__builtin_sve_svset_neonq_u16
:
11175 case SVE::BI__builtin_sve_svset_neonq_u32
:
11176 case SVE::BI__builtin_sve_svset_neonq_u64
:
11177 case SVE::BI__builtin_sve_svset_neonq_f16
:
11178 case SVE::BI__builtin_sve_svset_neonq_f32
:
11179 case SVE::BI__builtin_sve_svset_neonq_f64
:
11180 case SVE::BI__builtin_sve_svset_neonq_bf16
: {
11181 return Builder
.CreateInsertVector(Ty
, Ops
[0], Ops
[1], Builder
.getInt64(0));
11184 case SVE::BI__builtin_sve_svget_neonq_s8
:
11185 case SVE::BI__builtin_sve_svget_neonq_s16
:
11186 case SVE::BI__builtin_sve_svget_neonq_s32
:
11187 case SVE::BI__builtin_sve_svget_neonq_s64
:
11188 case SVE::BI__builtin_sve_svget_neonq_u8
:
11189 case SVE::BI__builtin_sve_svget_neonq_u16
:
11190 case SVE::BI__builtin_sve_svget_neonq_u32
:
11191 case SVE::BI__builtin_sve_svget_neonq_u64
:
11192 case SVE::BI__builtin_sve_svget_neonq_f16
:
11193 case SVE::BI__builtin_sve_svget_neonq_f32
:
11194 case SVE::BI__builtin_sve_svget_neonq_f64
:
11195 case SVE::BI__builtin_sve_svget_neonq_bf16
: {
11196 return Builder
.CreateExtractVector(Ty
, Ops
[0], Builder
.getInt64(0));
11199 case SVE::BI__builtin_sve_svdup_neonq_s8
:
11200 case SVE::BI__builtin_sve_svdup_neonq_s16
:
11201 case SVE::BI__builtin_sve_svdup_neonq_s32
:
11202 case SVE::BI__builtin_sve_svdup_neonq_s64
:
11203 case SVE::BI__builtin_sve_svdup_neonq_u8
:
11204 case SVE::BI__builtin_sve_svdup_neonq_u16
:
11205 case SVE::BI__builtin_sve_svdup_neonq_u32
:
11206 case SVE::BI__builtin_sve_svdup_neonq_u64
:
11207 case SVE::BI__builtin_sve_svdup_neonq_f16
:
11208 case SVE::BI__builtin_sve_svdup_neonq_f32
:
11209 case SVE::BI__builtin_sve_svdup_neonq_f64
:
11210 case SVE::BI__builtin_sve_svdup_neonq_bf16
: {
11211 Value
*Insert
= Builder
.CreateInsertVector(Ty
, PoisonValue::get(Ty
), Ops
[0],
11212 Builder
.getInt64(0));
11213 return Builder
.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane
, {Ty
},
11214 {Insert
, Builder
.getInt64(0)});
11218 /// Should not happen
11222 static void swapCommutativeSMEOperands(unsigned BuiltinID
,
11223 SmallVectorImpl
<Value
*> &Ops
) {
11225 switch (BuiltinID
) {
11228 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1
:
11231 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2
:
11232 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2
:
11235 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4
:
11236 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4
:
11242 for (unsigned I
= 0; I
< MultiVec
; ++I
)
11243 std::swap(Ops
[I
+ 1], Ops
[I
+ 1 + MultiVec
]);
11246 Value
*CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID
,
11247 const CallExpr
*E
) {
11248 auto *Builtin
= findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap
, BuiltinID
,
11249 AArch64SMEIntrinsicsProvenSorted
);
11251 llvm::SmallVector
<Value
*, 4> Ops
;
11252 SVETypeFlags
TypeFlags(Builtin
->TypeModifier
);
11253 GetAArch64SVEProcessedOperands(BuiltinID
, E
, Ops
, TypeFlags
);
11255 if (TypeFlags
.isLoad() || TypeFlags
.isStore())
11256 return EmitSMELd1St1(TypeFlags
, Ops
, Builtin
->LLVMIntrinsic
);
11257 else if (TypeFlags
.isReadZA() || TypeFlags
.isWriteZA())
11258 return EmitSMEReadWrite(TypeFlags
, Ops
, Builtin
->LLVMIntrinsic
);
11259 else if (BuiltinID
== SME::BI__builtin_sme_svzero_mask_za
||
11260 BuiltinID
== SME::BI__builtin_sme_svzero_za
)
11261 return EmitSMEZero(TypeFlags
, Ops
, Builtin
->LLVMIntrinsic
);
11262 else if (BuiltinID
== SME::BI__builtin_sme_svldr_vnum_za
||
11263 BuiltinID
== SME::BI__builtin_sme_svstr_vnum_za
||
11264 BuiltinID
== SME::BI__builtin_sme_svldr_za
||
11265 BuiltinID
== SME::BI__builtin_sme_svstr_za
)
11266 return EmitSMELdrStr(TypeFlags
, Ops
, Builtin
->LLVMIntrinsic
);
11268 // Emit set FPMR for intrinsics that require it
11269 if (TypeFlags
.setsFPMR())
11270 Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::aarch64_set_fpmr
),
11271 Ops
.pop_back_val());
11272 // Handle builtins which require their multi-vector operands to be swapped
11273 swapCommutativeSMEOperands(BuiltinID
, Ops
);
11275 // Should not happen!
11276 if (Builtin
->LLVMIntrinsic
== 0)
11279 // Predicates must match the main datatype.
11280 for (unsigned i
= 0, e
= Ops
.size(); i
!= e
; ++i
)
11281 if (auto PredTy
= dyn_cast
<llvm::VectorType
>(Ops
[i
]->getType()))
11282 if (PredTy
->getElementType()->isIntegerTy(1))
11283 Ops
[i
] = EmitSVEPredicateCast(Ops
[i
], getSVEType(TypeFlags
));
11286 TypeFlags
.isOverloadNone()
11287 ? CGM
.getIntrinsic(Builtin
->LLVMIntrinsic
)
11288 : CGM
.getIntrinsic(Builtin
->LLVMIntrinsic
, {getSVEType(TypeFlags
)});
11290 return Builder
.CreateCall(F
, Ops
);
11293 Value
*CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID
,
11295 llvm::Triple::ArchType Arch
) {
11296 if (BuiltinID
>= clang::AArch64::FirstSVEBuiltin
&&
11297 BuiltinID
<= clang::AArch64::LastSVEBuiltin
)
11298 return EmitAArch64SVEBuiltinExpr(BuiltinID
, E
);
11300 if (BuiltinID
>= clang::AArch64::FirstSMEBuiltin
&&
11301 BuiltinID
<= clang::AArch64::LastSMEBuiltin
)
11302 return EmitAArch64SMEBuiltinExpr(BuiltinID
, E
);
11304 if (BuiltinID
== Builtin::BI__builtin_cpu_supports
)
11305 return EmitAArch64CpuSupports(E
);
11307 unsigned HintID
= static_cast<unsigned>(-1);
11308 switch (BuiltinID
) {
11310 case clang::AArch64::BI__builtin_arm_nop
:
11313 case clang::AArch64::BI__builtin_arm_yield
:
11314 case clang::AArch64::BI__yield
:
11317 case clang::AArch64::BI__builtin_arm_wfe
:
11318 case clang::AArch64::BI__wfe
:
11321 case clang::AArch64::BI__builtin_arm_wfi
:
11322 case clang::AArch64::BI__wfi
:
11325 case clang::AArch64::BI__builtin_arm_sev
:
11326 case clang::AArch64::BI__sev
:
11329 case clang::AArch64::BI__builtin_arm_sevl
:
11330 case clang::AArch64::BI__sevl
:
11335 if (HintID
!= static_cast<unsigned>(-1)) {
11336 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_hint
);
11337 return Builder
.CreateCall(F
, llvm::ConstantInt::get(Int32Ty
, HintID
));
11340 if (BuiltinID
== clang::AArch64::BI__builtin_arm_trap
) {
11341 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_break
);
11342 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
11343 return Builder
.CreateCall(F
, Builder
.CreateZExt(Arg
, CGM
.Int32Ty
));
11346 if (BuiltinID
== clang::AArch64::BI__builtin_arm_get_sme_state
) {
11347 // Create call to __arm_sme_state and store the results to the two pointers.
11348 CallInst
*CI
= EmitRuntimeCall(CGM
.CreateRuntimeFunction(
11349 llvm::FunctionType::get(StructType::get(CGM
.Int64Ty
, CGM
.Int64Ty
), {},
11351 "__arm_sme_state"));
11352 auto Attrs
= AttributeList().addFnAttribute(getLLVMContext(),
11353 "aarch64_pstate_sm_compatible");
11354 CI
->setAttributes(Attrs
);
11355 CI
->setCallingConv(
11356 llvm::CallingConv::
11357 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2
);
11358 Builder
.CreateStore(Builder
.CreateExtractValue(CI
, 0),
11359 EmitPointerWithAlignment(E
->getArg(0)));
11360 return Builder
.CreateStore(Builder
.CreateExtractValue(CI
, 1),
11361 EmitPointerWithAlignment(E
->getArg(1)));
11364 if (BuiltinID
== clang::AArch64::BI__builtin_arm_rbit
) {
11365 assert((getContext().getTypeSize(E
->getType()) == 32) &&
11366 "rbit of unusual size!");
11367 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
11368 return Builder
.CreateCall(
11369 CGM
.getIntrinsic(Intrinsic::bitreverse
, Arg
->getType()), Arg
, "rbit");
11371 if (BuiltinID
== clang::AArch64::BI__builtin_arm_rbit64
) {
11372 assert((getContext().getTypeSize(E
->getType()) == 64) &&
11373 "rbit of unusual size!");
11374 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
11375 return Builder
.CreateCall(
11376 CGM
.getIntrinsic(Intrinsic::bitreverse
, Arg
->getType()), Arg
, "rbit");
11379 if (BuiltinID
== clang::AArch64::BI__builtin_arm_clz
||
11380 BuiltinID
== clang::AArch64::BI__builtin_arm_clz64
) {
11381 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
11382 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctlz
, Arg
->getType());
11383 Value
*Res
= Builder
.CreateCall(F
, {Arg
, Builder
.getInt1(false)});
11384 if (BuiltinID
== clang::AArch64::BI__builtin_arm_clz64
)
11385 Res
= Builder
.CreateTrunc(Res
, Builder
.getInt32Ty());
11389 if (BuiltinID
== clang::AArch64::BI__builtin_arm_cls
) {
11390 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
11391 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::aarch64_cls
), Arg
,
11394 if (BuiltinID
== clang::AArch64::BI__builtin_arm_cls64
) {
11395 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
11396 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::aarch64_cls64
), Arg
,
11400 if (BuiltinID
== clang::AArch64::BI__builtin_arm_rint32zf
||
11401 BuiltinID
== clang::AArch64::BI__builtin_arm_rint32z
) {
11402 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
11403 llvm::Type
*Ty
= Arg
->getType();
11404 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::aarch64_frint32z
, Ty
),
11408 if (BuiltinID
== clang::AArch64::BI__builtin_arm_rint64zf
||
11409 BuiltinID
== clang::AArch64::BI__builtin_arm_rint64z
) {
11410 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
11411 llvm::Type
*Ty
= Arg
->getType();
11412 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::aarch64_frint64z
, Ty
),
11416 if (BuiltinID
== clang::AArch64::BI__builtin_arm_rint32xf
||
11417 BuiltinID
== clang::AArch64::BI__builtin_arm_rint32x
) {
11418 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
11419 llvm::Type
*Ty
= Arg
->getType();
11420 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::aarch64_frint32x
, Ty
),
11424 if (BuiltinID
== clang::AArch64::BI__builtin_arm_rint64xf
||
11425 BuiltinID
== clang::AArch64::BI__builtin_arm_rint64x
) {
11426 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
11427 llvm::Type
*Ty
= Arg
->getType();
11428 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::aarch64_frint64x
, Ty
),
11432 if (BuiltinID
== clang::AArch64::BI__builtin_arm_jcvt
) {
11433 assert((getContext().getTypeSize(E
->getType()) == 32) &&
11434 "__jcvt of unusual size!");
11435 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
11436 return Builder
.CreateCall(
11437 CGM
.getIntrinsic(Intrinsic::aarch64_fjcvtzs
), Arg
);
11440 if (BuiltinID
== clang::AArch64::BI__builtin_arm_ld64b
||
11441 BuiltinID
== clang::AArch64::BI__builtin_arm_st64b
||
11442 BuiltinID
== clang::AArch64::BI__builtin_arm_st64bv
||
11443 BuiltinID
== clang::AArch64::BI__builtin_arm_st64bv0
) {
11444 llvm::Value
*MemAddr
= EmitScalarExpr(E
->getArg(0));
11445 llvm::Value
*ValPtr
= EmitScalarExpr(E
->getArg(1));
11447 if (BuiltinID
== clang::AArch64::BI__builtin_arm_ld64b
) {
11448 // Load from the address via an LLVM intrinsic, receiving a
11449 // tuple of 8 i64 words, and store each one to ValPtr.
11450 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_ld64b
);
11451 llvm::Value
*Val
= Builder
.CreateCall(F
, MemAddr
);
11452 llvm::Value
*ToRet
;
11453 for (size_t i
= 0; i
< 8; i
++) {
11454 llvm::Value
*ValOffsetPtr
=
11455 Builder
.CreateGEP(Int64Ty
, ValPtr
, Builder
.getInt32(i
));
11457 Address(ValOffsetPtr
, Int64Ty
, CharUnits::fromQuantity(8));
11458 ToRet
= Builder
.CreateStore(Builder
.CreateExtractValue(Val
, i
), Addr
);
11462 // Load 8 i64 words from ValPtr, and store them to the address
11463 // via an LLVM intrinsic.
11464 SmallVector
<llvm::Value
*, 9> Args
;
11465 Args
.push_back(MemAddr
);
11466 for (size_t i
= 0; i
< 8; i
++) {
11467 llvm::Value
*ValOffsetPtr
=
11468 Builder
.CreateGEP(Int64Ty
, ValPtr
, Builder
.getInt32(i
));
11470 Address(ValOffsetPtr
, Int64Ty
, CharUnits::fromQuantity(8));
11471 Args
.push_back(Builder
.CreateLoad(Addr
));
11474 auto Intr
= (BuiltinID
== clang::AArch64::BI__builtin_arm_st64b
11475 ? Intrinsic::aarch64_st64b
11476 : BuiltinID
== clang::AArch64::BI__builtin_arm_st64bv
11477 ? Intrinsic::aarch64_st64bv
11478 : Intrinsic::aarch64_st64bv0
);
11479 Function
*F
= CGM
.getIntrinsic(Intr
);
11480 return Builder
.CreateCall(F
, Args
);
11484 if (BuiltinID
== clang::AArch64::BI__builtin_arm_rndr
||
11485 BuiltinID
== clang::AArch64::BI__builtin_arm_rndrrs
) {
11487 auto Intr
= (BuiltinID
== clang::AArch64::BI__builtin_arm_rndr
11488 ? Intrinsic::aarch64_rndr
11489 : Intrinsic::aarch64_rndrrs
);
11490 Function
*F
= CGM
.getIntrinsic(Intr
);
11491 llvm::Value
*Val
= Builder
.CreateCall(F
);
11492 Value
*RandomValue
= Builder
.CreateExtractValue(Val
, 0);
11493 Value
*Status
= Builder
.CreateExtractValue(Val
, 1);
11495 Address MemAddress
= EmitPointerWithAlignment(E
->getArg(0));
11496 Builder
.CreateStore(RandomValue
, MemAddress
);
11497 Status
= Builder
.CreateZExt(Status
, Int32Ty
);
11501 if (BuiltinID
== clang::AArch64::BI__clear_cache
) {
11502 assert(E
->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
11503 const FunctionDecl
*FD
= E
->getDirectCallee();
11505 for (unsigned i
= 0; i
< 2; i
++)
11506 Ops
[i
] = EmitScalarExpr(E
->getArg(i
));
11507 llvm::Type
*Ty
= CGM
.getTypes().ConvertType(FD
->getType());
11508 llvm::FunctionType
*FTy
= cast
<llvm::FunctionType
>(Ty
);
11509 StringRef Name
= FD
->getName();
11510 return EmitNounwindRuntimeCall(CGM
.CreateRuntimeFunction(FTy
, Name
), Ops
);
11513 if ((BuiltinID
== clang::AArch64::BI__builtin_arm_ldrex
||
11514 BuiltinID
== clang::AArch64::BI__builtin_arm_ldaex
) &&
11515 getContext().getTypeSize(E
->getType()) == 128) {
11517 CGM
.getIntrinsic(BuiltinID
== clang::AArch64::BI__builtin_arm_ldaex
11518 ? Intrinsic::aarch64_ldaxp
11519 : Intrinsic::aarch64_ldxp
);
11521 Value
*LdPtr
= EmitScalarExpr(E
->getArg(0));
11522 Value
*Val
= Builder
.CreateCall(F
, LdPtr
, "ldxp");
11524 Value
*Val0
= Builder
.CreateExtractValue(Val
, 1);
11525 Value
*Val1
= Builder
.CreateExtractValue(Val
, 0);
11526 llvm::Type
*Int128Ty
= llvm::IntegerType::get(getLLVMContext(), 128);
11527 Val0
= Builder
.CreateZExt(Val0
, Int128Ty
);
11528 Val1
= Builder
.CreateZExt(Val1
, Int128Ty
);
11530 Value
*ShiftCst
= llvm::ConstantInt::get(Int128Ty
, 64);
11531 Val
= Builder
.CreateShl(Val0
, ShiftCst
, "shl", true /* nuw */);
11532 Val
= Builder
.CreateOr(Val
, Val1
);
11533 return Builder
.CreateBitCast(Val
, ConvertType(E
->getType()));
11534 } else if (BuiltinID
== clang::AArch64::BI__builtin_arm_ldrex
||
11535 BuiltinID
== clang::AArch64::BI__builtin_arm_ldaex
) {
11536 Value
*LoadAddr
= EmitScalarExpr(E
->getArg(0));
11538 QualType Ty
= E
->getType();
11539 llvm::Type
*RealResTy
= ConvertType(Ty
);
11540 llvm::Type
*IntTy
=
11541 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty
));
11544 CGM
.getIntrinsic(BuiltinID
== clang::AArch64::BI__builtin_arm_ldaex
11545 ? Intrinsic::aarch64_ldaxr
11546 : Intrinsic::aarch64_ldxr
,
11548 CallInst
*Val
= Builder
.CreateCall(F
, LoadAddr
, "ldxr");
11550 0, Attribute::get(getLLVMContext(), Attribute::ElementType
, IntTy
));
11552 if (RealResTy
->isPointerTy())
11553 return Builder
.CreateIntToPtr(Val
, RealResTy
);
11555 llvm::Type
*IntResTy
= llvm::IntegerType::get(
11556 getLLVMContext(), CGM
.getDataLayout().getTypeSizeInBits(RealResTy
));
11557 return Builder
.CreateBitCast(Builder
.CreateTruncOrBitCast(Val
, IntResTy
),
11561 if ((BuiltinID
== clang::AArch64::BI__builtin_arm_strex
||
11562 BuiltinID
== clang::AArch64::BI__builtin_arm_stlex
) &&
11563 getContext().getTypeSize(E
->getArg(0)->getType()) == 128) {
11565 CGM
.getIntrinsic(BuiltinID
== clang::AArch64::BI__builtin_arm_stlex
11566 ? Intrinsic::aarch64_stlxp
11567 : Intrinsic::aarch64_stxp
);
11568 llvm::Type
*STy
= llvm::StructType::get(Int64Ty
, Int64Ty
);
11570 Address Tmp
= CreateMemTemp(E
->getArg(0)->getType());
11571 EmitAnyExprToMem(E
->getArg(0), Tmp
, Qualifiers(), /*init*/ true);
11573 Tmp
= Tmp
.withElementType(STy
);
11574 llvm::Value
*Val
= Builder
.CreateLoad(Tmp
);
11576 Value
*Arg0
= Builder
.CreateExtractValue(Val
, 0);
11577 Value
*Arg1
= Builder
.CreateExtractValue(Val
, 1);
11578 Value
*StPtr
= EmitScalarExpr(E
->getArg(1));
11579 return Builder
.CreateCall(F
, {Arg0
, Arg1
, StPtr
}, "stxp");
11582 if (BuiltinID
== clang::AArch64::BI__builtin_arm_strex
||
11583 BuiltinID
== clang::AArch64::BI__builtin_arm_stlex
) {
11584 Value
*StoreVal
= EmitScalarExpr(E
->getArg(0));
11585 Value
*StoreAddr
= EmitScalarExpr(E
->getArg(1));
11587 QualType Ty
= E
->getArg(0)->getType();
11588 llvm::Type
*StoreTy
=
11589 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty
));
11591 if (StoreVal
->getType()->isPointerTy())
11592 StoreVal
= Builder
.CreatePtrToInt(StoreVal
, Int64Ty
);
11594 llvm::Type
*IntTy
= llvm::IntegerType::get(
11596 CGM
.getDataLayout().getTypeSizeInBits(StoreVal
->getType()));
11597 StoreVal
= Builder
.CreateBitCast(StoreVal
, IntTy
);
11598 StoreVal
= Builder
.CreateZExtOrBitCast(StoreVal
, Int64Ty
);
11602 CGM
.getIntrinsic(BuiltinID
== clang::AArch64::BI__builtin_arm_stlex
11603 ? Intrinsic::aarch64_stlxr
11604 : Intrinsic::aarch64_stxr
,
11605 StoreAddr
->getType());
11606 CallInst
*CI
= Builder
.CreateCall(F
, {StoreVal
, StoreAddr
}, "stxr");
11608 1, Attribute::get(getLLVMContext(), Attribute::ElementType
, StoreTy
));
11612 if (BuiltinID
== clang::AArch64::BI__getReg
) {
11613 Expr::EvalResult Result
;
11614 if (!E
->getArg(0)->EvaluateAsInt(Result
, CGM
.getContext()))
11615 llvm_unreachable("Sema will ensure that the parameter is constant");
11617 llvm::APSInt Value
= Result
.Val
.getInt();
11618 LLVMContext
&Context
= CGM
.getLLVMContext();
11619 std::string Reg
= Value
== 31 ? "sp" : "x" + toString(Value
, 10);
11621 llvm::Metadata
*Ops
[] = {llvm::MDString::get(Context
, Reg
)};
11622 llvm::MDNode
*RegName
= llvm::MDNode::get(Context
, Ops
);
11623 llvm::Value
*Metadata
= llvm::MetadataAsValue::get(Context
, RegName
);
11625 llvm::Function
*F
=
11626 CGM
.getIntrinsic(llvm::Intrinsic::read_register
, {Int64Ty
});
11627 return Builder
.CreateCall(F
, Metadata
);
11630 if (BuiltinID
== clang::AArch64::BI__break
) {
11631 Expr::EvalResult Result
;
11632 if (!E
->getArg(0)->EvaluateAsInt(Result
, CGM
.getContext()))
11633 llvm_unreachable("Sema will ensure that the parameter is constant");
11635 llvm::Function
*F
= CGM
.getIntrinsic(llvm::Intrinsic::aarch64_break
);
11636 return Builder
.CreateCall(F
, {EmitScalarExpr(E
->getArg(0))});
11639 if (BuiltinID
== clang::AArch64::BI__builtin_arm_clrex
) {
11640 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_clrex
);
11641 return Builder
.CreateCall(F
);
11644 if (BuiltinID
== clang::AArch64::BI_ReadWriteBarrier
)
11645 return Builder
.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent
,
11646 llvm::SyncScope::SingleThread
);
11649 Intrinsic::ID CRCIntrinsicID
= Intrinsic::not_intrinsic
;
11650 switch (BuiltinID
) {
11651 case clang::AArch64::BI__builtin_arm_crc32b
:
11652 CRCIntrinsicID
= Intrinsic::aarch64_crc32b
; break;
11653 case clang::AArch64::BI__builtin_arm_crc32cb
:
11654 CRCIntrinsicID
= Intrinsic::aarch64_crc32cb
; break;
11655 case clang::AArch64::BI__builtin_arm_crc32h
:
11656 CRCIntrinsicID
= Intrinsic::aarch64_crc32h
; break;
11657 case clang::AArch64::BI__builtin_arm_crc32ch
:
11658 CRCIntrinsicID
= Intrinsic::aarch64_crc32ch
; break;
11659 case clang::AArch64::BI__builtin_arm_crc32w
:
11660 CRCIntrinsicID
= Intrinsic::aarch64_crc32w
; break;
11661 case clang::AArch64::BI__builtin_arm_crc32cw
:
11662 CRCIntrinsicID
= Intrinsic::aarch64_crc32cw
; break;
11663 case clang::AArch64::BI__builtin_arm_crc32d
:
11664 CRCIntrinsicID
= Intrinsic::aarch64_crc32x
; break;
11665 case clang::AArch64::BI__builtin_arm_crc32cd
:
11666 CRCIntrinsicID
= Intrinsic::aarch64_crc32cx
; break;
11669 if (CRCIntrinsicID
!= Intrinsic::not_intrinsic
) {
11670 Value
*Arg0
= EmitScalarExpr(E
->getArg(0));
11671 Value
*Arg1
= EmitScalarExpr(E
->getArg(1));
11672 Function
*F
= CGM
.getIntrinsic(CRCIntrinsicID
);
11674 llvm::Type
*DataTy
= F
->getFunctionType()->getParamType(1);
11675 Arg1
= Builder
.CreateZExtOrBitCast(Arg1
, DataTy
);
11677 return Builder
.CreateCall(F
, {Arg0
, Arg1
});
11680 // Memory Operations (MOPS)
11681 if (BuiltinID
== AArch64::BI__builtin_arm_mops_memset_tag
) {
11682 Value
*Dst
= EmitScalarExpr(E
->getArg(0));
11683 Value
*Val
= EmitScalarExpr(E
->getArg(1));
11684 Value
*Size
= EmitScalarExpr(E
->getArg(2));
11685 Val
= Builder
.CreateTrunc(Val
, Int8Ty
);
11686 Size
= Builder
.CreateIntCast(Size
, Int64Ty
, false);
11687 return Builder
.CreateCall(
11688 CGM
.getIntrinsic(Intrinsic::aarch64_mops_memset_tag
), {Dst
, Val
, Size
});
11691 // Memory Tagging Extensions (MTE) Intrinsics
11692 Intrinsic::ID MTEIntrinsicID
= Intrinsic::not_intrinsic
;
11693 switch (BuiltinID
) {
11694 case clang::AArch64::BI__builtin_arm_irg
:
11695 MTEIntrinsicID
= Intrinsic::aarch64_irg
; break;
11696 case clang::AArch64::BI__builtin_arm_addg
:
11697 MTEIntrinsicID
= Intrinsic::aarch64_addg
; break;
11698 case clang::AArch64::BI__builtin_arm_gmi
:
11699 MTEIntrinsicID
= Intrinsic::aarch64_gmi
; break;
11700 case clang::AArch64::BI__builtin_arm_ldg
:
11701 MTEIntrinsicID
= Intrinsic::aarch64_ldg
; break;
11702 case clang::AArch64::BI__builtin_arm_stg
:
11703 MTEIntrinsicID
= Intrinsic::aarch64_stg
; break;
11704 case clang::AArch64::BI__builtin_arm_subp
:
11705 MTEIntrinsicID
= Intrinsic::aarch64_subp
; break;
11708 if (MTEIntrinsicID
!= Intrinsic::not_intrinsic
) {
11709 if (MTEIntrinsicID
== Intrinsic::aarch64_irg
) {
11710 Value
*Pointer
= EmitScalarExpr(E
->getArg(0));
11711 Value
*Mask
= EmitScalarExpr(E
->getArg(1));
11713 Mask
= Builder
.CreateZExt(Mask
, Int64Ty
);
11714 return Builder
.CreateCall(CGM
.getIntrinsic(MTEIntrinsicID
),
11717 if (MTEIntrinsicID
== Intrinsic::aarch64_addg
) {
11718 Value
*Pointer
= EmitScalarExpr(E
->getArg(0));
11719 Value
*TagOffset
= EmitScalarExpr(E
->getArg(1));
11721 TagOffset
= Builder
.CreateZExt(TagOffset
, Int64Ty
);
11722 return Builder
.CreateCall(CGM
.getIntrinsic(MTEIntrinsicID
),
11723 {Pointer
, TagOffset
});
11725 if (MTEIntrinsicID
== Intrinsic::aarch64_gmi
) {
11726 Value
*Pointer
= EmitScalarExpr(E
->getArg(0));
11727 Value
*ExcludedMask
= EmitScalarExpr(E
->getArg(1));
11729 ExcludedMask
= Builder
.CreateZExt(ExcludedMask
, Int64Ty
);
11730 return Builder
.CreateCall(
11731 CGM
.getIntrinsic(MTEIntrinsicID
), {Pointer
, ExcludedMask
});
11733 // Although it is possible to supply a different return
11734 // address (first arg) to this intrinsic, for now we set
11735 // return address same as input address.
11736 if (MTEIntrinsicID
== Intrinsic::aarch64_ldg
) {
11737 Value
*TagAddress
= EmitScalarExpr(E
->getArg(0));
11738 return Builder
.CreateCall(CGM
.getIntrinsic(MTEIntrinsicID
),
11739 {TagAddress
, TagAddress
});
11741 // Although it is possible to supply a different tag (to set)
11742 // to this intrinsic (as first arg), for now we supply
11743 // the tag that is in input address arg (common use case).
11744 if (MTEIntrinsicID
== Intrinsic::aarch64_stg
) {
11745 Value
*TagAddress
= EmitScalarExpr(E
->getArg(0));
11746 return Builder
.CreateCall(CGM
.getIntrinsic(MTEIntrinsicID
),
11747 {TagAddress
, TagAddress
});
11749 if (MTEIntrinsicID
== Intrinsic::aarch64_subp
) {
11750 Value
*PointerA
= EmitScalarExpr(E
->getArg(0));
11751 Value
*PointerB
= EmitScalarExpr(E
->getArg(1));
11752 return Builder
.CreateCall(
11753 CGM
.getIntrinsic(MTEIntrinsicID
), {PointerA
, PointerB
});
11757 if (BuiltinID
== clang::AArch64::BI__builtin_arm_rsr
||
11758 BuiltinID
== clang::AArch64::BI__builtin_arm_rsr64
||
11759 BuiltinID
== clang::AArch64::BI__builtin_arm_rsr128
||
11760 BuiltinID
== clang::AArch64::BI__builtin_arm_rsrp
||
11761 BuiltinID
== clang::AArch64::BI__builtin_arm_wsr
||
11762 BuiltinID
== clang::AArch64::BI__builtin_arm_wsr64
||
11763 BuiltinID
== clang::AArch64::BI__builtin_arm_wsr128
||
11764 BuiltinID
== clang::AArch64::BI__builtin_arm_wsrp
) {
11766 SpecialRegisterAccessKind AccessKind
= Write
;
11767 if (BuiltinID
== clang::AArch64::BI__builtin_arm_rsr
||
11768 BuiltinID
== clang::AArch64::BI__builtin_arm_rsr64
||
11769 BuiltinID
== clang::AArch64::BI__builtin_arm_rsr128
||
11770 BuiltinID
== clang::AArch64::BI__builtin_arm_rsrp
)
11771 AccessKind
= VolatileRead
;
11773 bool IsPointerBuiltin
= BuiltinID
== clang::AArch64::BI__builtin_arm_rsrp
||
11774 BuiltinID
== clang::AArch64::BI__builtin_arm_wsrp
;
11776 bool Is32Bit
= BuiltinID
== clang::AArch64::BI__builtin_arm_rsr
||
11777 BuiltinID
== clang::AArch64::BI__builtin_arm_wsr
;
11779 bool Is128Bit
= BuiltinID
== clang::AArch64::BI__builtin_arm_rsr128
||
11780 BuiltinID
== clang::AArch64::BI__builtin_arm_wsr128
;
11782 llvm::Type
*ValueType
;
11783 llvm::Type
*RegisterType
= Int64Ty
;
11785 ValueType
= Int32Ty
;
11786 } else if (Is128Bit
) {
11787 llvm::Type
*Int128Ty
=
11788 llvm::IntegerType::getInt128Ty(CGM
.getLLVMContext());
11789 ValueType
= Int128Ty
;
11790 RegisterType
= Int128Ty
;
11791 } else if (IsPointerBuiltin
) {
11792 ValueType
= VoidPtrTy
;
11794 ValueType
= Int64Ty
;
11797 return EmitSpecialRegisterBuiltin(*this, E
, RegisterType
, ValueType
,
11801 if (BuiltinID
== clang::AArch64::BI_ReadStatusReg
||
11802 BuiltinID
== clang::AArch64::BI_WriteStatusReg
) {
11803 LLVMContext
&Context
= CGM
.getLLVMContext();
11806 E
->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
11808 std::string SysRegStr
;
11809 llvm::raw_string_ostream(SysRegStr
) <<
11810 ((1 << 1) | ((SysReg
>> 14) & 1)) << ":" <<
11811 ((SysReg
>> 11) & 7) << ":" <<
11812 ((SysReg
>> 7) & 15) << ":" <<
11813 ((SysReg
>> 3) & 15) << ":" <<
11816 llvm::Metadata
*Ops
[] = { llvm::MDString::get(Context
, SysRegStr
) };
11817 llvm::MDNode
*RegName
= llvm::MDNode::get(Context
, Ops
);
11818 llvm::Value
*Metadata
= llvm::MetadataAsValue::get(Context
, RegName
);
11820 llvm::Type
*RegisterType
= Int64Ty
;
11821 llvm::Type
*Types
[] = { RegisterType
};
11823 if (BuiltinID
== clang::AArch64::BI_ReadStatusReg
) {
11824 llvm::Function
*F
= CGM
.getIntrinsic(llvm::Intrinsic::read_register
, Types
);
11826 return Builder
.CreateCall(F
, Metadata
);
11829 llvm::Function
*F
= CGM
.getIntrinsic(llvm::Intrinsic::write_register
, Types
);
11830 llvm::Value
*ArgValue
= EmitScalarExpr(E
->getArg(1));
11832 return Builder
.CreateCall(F
, { Metadata
, ArgValue
});
11835 if (BuiltinID
== clang::AArch64::BI_AddressOfReturnAddress
) {
11836 llvm::Function
*F
=
11837 CGM
.getIntrinsic(Intrinsic::addressofreturnaddress
, AllocaInt8PtrTy
);
11838 return Builder
.CreateCall(F
);
11841 if (BuiltinID
== clang::AArch64::BI__builtin_sponentry
) {
11842 llvm::Function
*F
= CGM
.getIntrinsic(Intrinsic::sponentry
, AllocaInt8PtrTy
);
11843 return Builder
.CreateCall(F
);
11846 if (BuiltinID
== clang::AArch64::BI__mulh
||
11847 BuiltinID
== clang::AArch64::BI__umulh
) {
11848 llvm::Type
*ResType
= ConvertType(E
->getType());
11849 llvm::Type
*Int128Ty
= llvm::IntegerType::get(getLLVMContext(), 128);
11851 bool IsSigned
= BuiltinID
== clang::AArch64::BI__mulh
;
11853 Builder
.CreateIntCast(EmitScalarExpr(E
->getArg(0)), Int128Ty
, IsSigned
);
11855 Builder
.CreateIntCast(EmitScalarExpr(E
->getArg(1)), Int128Ty
, IsSigned
);
11857 Value
*MulResult
, *HigherBits
;
11859 MulResult
= Builder
.CreateNSWMul(LHS
, RHS
);
11860 HigherBits
= Builder
.CreateAShr(MulResult
, 64);
11862 MulResult
= Builder
.CreateNUWMul(LHS
, RHS
);
11863 HigherBits
= Builder
.CreateLShr(MulResult
, 64);
11865 HigherBits
= Builder
.CreateIntCast(HigherBits
, ResType
, IsSigned
);
11870 if (BuiltinID
== AArch64::BI__writex18byte
||
11871 BuiltinID
== AArch64::BI__writex18word
||
11872 BuiltinID
== AArch64::BI__writex18dword
||
11873 BuiltinID
== AArch64::BI__writex18qword
) {
11874 // Process the args first
11875 Value
*OffsetArg
= EmitScalarExpr(E
->getArg(0));
11876 Value
*DataArg
= EmitScalarExpr(E
->getArg(1));
11879 llvm::Value
*X18
= readX18AsPtr(*this);
11881 // Store val at x18 + offset
11882 Value
*Offset
= Builder
.CreateZExt(OffsetArg
, Int64Ty
);
11883 Value
*Ptr
= Builder
.CreateGEP(Int8Ty
, X18
, Offset
);
11885 Builder
.CreateAlignedStore(DataArg
, Ptr
, CharUnits::One());
11889 if (BuiltinID
== AArch64::BI__readx18byte
||
11890 BuiltinID
== AArch64::BI__readx18word
||
11891 BuiltinID
== AArch64::BI__readx18dword
||
11892 BuiltinID
== AArch64::BI__readx18qword
) {
11893 // Process the args first
11894 Value
*OffsetArg
= EmitScalarExpr(E
->getArg(0));
11897 llvm::Value
*X18
= readX18AsPtr(*this);
11899 // Load x18 + offset
11900 Value
*Offset
= Builder
.CreateZExt(OffsetArg
, Int64Ty
);
11901 Value
*Ptr
= Builder
.CreateGEP(Int8Ty
, X18
, Offset
);
11902 llvm::Type
*IntTy
= ConvertType(E
->getType());
11903 LoadInst
*Load
= Builder
.CreateAlignedLoad(IntTy
, Ptr
, CharUnits::One());
11907 if (BuiltinID
== AArch64::BI__addx18byte
||
11908 BuiltinID
== AArch64::BI__addx18word
||
11909 BuiltinID
== AArch64::BI__addx18dword
||
11910 BuiltinID
== AArch64::BI__addx18qword
||
11911 BuiltinID
== AArch64::BI__incx18byte
||
11912 BuiltinID
== AArch64::BI__incx18word
||
11913 BuiltinID
== AArch64::BI__incx18dword
||
11914 BuiltinID
== AArch64::BI__incx18qword
) {
11917 switch (BuiltinID
) {
11918 case AArch64::BI__incx18byte
:
11920 isIncrement
= true;
11922 case AArch64::BI__incx18word
:
11924 isIncrement
= true;
11926 case AArch64::BI__incx18dword
:
11928 isIncrement
= true;
11930 case AArch64::BI__incx18qword
:
11932 isIncrement
= true;
11935 IntTy
= ConvertType(E
->getArg(1)->getType());
11936 isIncrement
= false;
11939 // Process the args first
11940 Value
*OffsetArg
= EmitScalarExpr(E
->getArg(0));
11942 isIncrement
? ConstantInt::get(IntTy
, 1) : EmitScalarExpr(E
->getArg(1));
11945 llvm::Value
*X18
= readX18AsPtr(*this);
11947 // Load x18 + offset
11948 Value
*Offset
= Builder
.CreateZExt(OffsetArg
, Int64Ty
);
11949 Value
*Ptr
= Builder
.CreateGEP(Int8Ty
, X18
, Offset
);
11950 LoadInst
*Load
= Builder
.CreateAlignedLoad(IntTy
, Ptr
, CharUnits::One());
11953 Value
*AddResult
= Builder
.CreateAdd(Load
, ValToAdd
);
11955 // Store val at x18 + offset
11957 Builder
.CreateAlignedStore(AddResult
, Ptr
, CharUnits::One());
11961 if (BuiltinID
== AArch64::BI_CopyDoubleFromInt64
||
11962 BuiltinID
== AArch64::BI_CopyFloatFromInt32
||
11963 BuiltinID
== AArch64::BI_CopyInt32FromFloat
||
11964 BuiltinID
== AArch64::BI_CopyInt64FromDouble
) {
11965 Value
*Arg
= EmitScalarExpr(E
->getArg(0));
11966 llvm::Type
*RetTy
= ConvertType(E
->getType());
11967 return Builder
.CreateBitCast(Arg
, RetTy
);
11970 if (BuiltinID
== AArch64::BI_CountLeadingOnes
||
11971 BuiltinID
== AArch64::BI_CountLeadingOnes64
||
11972 BuiltinID
== AArch64::BI_CountLeadingZeros
||
11973 BuiltinID
== AArch64::BI_CountLeadingZeros64
) {
11974 Value
*Arg
= EmitScalarExpr(E
->getArg(0));
11975 llvm::Type
*ArgType
= Arg
->getType();
11977 if (BuiltinID
== AArch64::BI_CountLeadingOnes
||
11978 BuiltinID
== AArch64::BI_CountLeadingOnes64
)
11979 Arg
= Builder
.CreateXor(Arg
, Constant::getAllOnesValue(ArgType
));
11981 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctlz
, ArgType
);
11982 Value
*Result
= Builder
.CreateCall(F
, {Arg
, Builder
.getInt1(false)});
11984 if (BuiltinID
== AArch64::BI_CountLeadingOnes64
||
11985 BuiltinID
== AArch64::BI_CountLeadingZeros64
)
11986 Result
= Builder
.CreateTrunc(Result
, Builder
.getInt32Ty());
11990 if (BuiltinID
== AArch64::BI_CountLeadingSigns
||
11991 BuiltinID
== AArch64::BI_CountLeadingSigns64
) {
11992 Value
*Arg
= EmitScalarExpr(E
->getArg(0));
11994 Function
*F
= (BuiltinID
== AArch64::BI_CountLeadingSigns
)
11995 ? CGM
.getIntrinsic(Intrinsic::aarch64_cls
)
11996 : CGM
.getIntrinsic(Intrinsic::aarch64_cls64
);
11998 Value
*Result
= Builder
.CreateCall(F
, Arg
, "cls");
11999 if (BuiltinID
== AArch64::BI_CountLeadingSigns64
)
12000 Result
= Builder
.CreateTrunc(Result
, Builder
.getInt32Ty());
12004 if (BuiltinID
== AArch64::BI_CountOneBits
||
12005 BuiltinID
== AArch64::BI_CountOneBits64
) {
12006 Value
*ArgValue
= EmitScalarExpr(E
->getArg(0));
12007 llvm::Type
*ArgType
= ArgValue
->getType();
12008 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctpop
, ArgType
);
12010 Value
*Result
= Builder
.CreateCall(F
, ArgValue
);
12011 if (BuiltinID
== AArch64::BI_CountOneBits64
)
12012 Result
= Builder
.CreateTrunc(Result
, Builder
.getInt32Ty());
12016 if (BuiltinID
== AArch64::BI__prefetch
) {
12017 Value
*Address
= EmitScalarExpr(E
->getArg(0));
12018 Value
*RW
= llvm::ConstantInt::get(Int32Ty
, 0);
12019 Value
*Locality
= ConstantInt::get(Int32Ty
, 3);
12020 Value
*Data
= llvm::ConstantInt::get(Int32Ty
, 1);
12021 Function
*F
= CGM
.getIntrinsic(Intrinsic::prefetch
, Address
->getType());
12022 return Builder
.CreateCall(F
, {Address
, RW
, Locality
, Data
});
12025 if (BuiltinID
== AArch64::BI__hlt
) {
12026 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_hlt
);
12027 Builder
.CreateCall(F
, {EmitScalarExpr(E
->getArg(0))});
12029 // Return 0 for convenience, even though MSVC returns some other undefined
12031 return ConstantInt::get(Builder
.getInt32Ty(), 0);
12034 // Handle MSVC intrinsics before argument evaluation to prevent double
12036 if (std::optional
<MSVCIntrin
> MsvcIntId
=
12037 translateAarch64ToMsvcIntrin(BuiltinID
))
12038 return EmitMSVCBuiltinExpr(*MsvcIntId
, E
);
12040 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
12041 auto It
= llvm::find_if(NEONEquivalentIntrinsicMap
, [BuiltinID
](auto &P
) {
12042 return P
.first
== BuiltinID
;
12044 if (It
!= end(NEONEquivalentIntrinsicMap
))
12045 BuiltinID
= It
->second
;
12047 // Find out if any arguments are required to be integer constant
12049 unsigned ICEArguments
= 0;
12050 ASTContext::GetBuiltinTypeError Error
;
12051 getContext().GetBuiltinType(BuiltinID
, Error
, &ICEArguments
);
12052 assert(Error
== ASTContext::GE_None
&& "Should not codegen an error");
12054 llvm::SmallVector
<Value
*, 4> Ops
;
12055 Address PtrOp0
= Address::invalid();
12056 for (unsigned i
= 0, e
= E
->getNumArgs() - 1; i
!= e
; i
++) {
12058 switch (BuiltinID
) {
12059 case NEON::BI__builtin_neon_vld1_v
:
12060 case NEON::BI__builtin_neon_vld1q_v
:
12061 case NEON::BI__builtin_neon_vld1_dup_v
:
12062 case NEON::BI__builtin_neon_vld1q_dup_v
:
12063 case NEON::BI__builtin_neon_vld1_lane_v
:
12064 case NEON::BI__builtin_neon_vld1q_lane_v
:
12065 case NEON::BI__builtin_neon_vst1_v
:
12066 case NEON::BI__builtin_neon_vst1q_v
:
12067 case NEON::BI__builtin_neon_vst1_lane_v
:
12068 case NEON::BI__builtin_neon_vst1q_lane_v
:
12069 case NEON::BI__builtin_neon_vldap1_lane_s64
:
12070 case NEON::BI__builtin_neon_vldap1q_lane_s64
:
12071 case NEON::BI__builtin_neon_vstl1_lane_s64
:
12072 case NEON::BI__builtin_neon_vstl1q_lane_s64
:
12073 // Get the alignment for the argument in addition to the value;
12074 // we'll use it later.
12075 PtrOp0
= EmitPointerWithAlignment(E
->getArg(0));
12076 Ops
.push_back(PtrOp0
.emitRawPointer(*this));
12080 Ops
.push_back(EmitScalarOrConstFoldImmArg(ICEArguments
, i
, E
));
12083 auto SISDMap
= ArrayRef(AArch64SISDIntrinsicMap
);
12084 const ARMVectorIntrinsicInfo
*Builtin
= findARMVectorIntrinsicInMap(
12085 SISDMap
, BuiltinID
, AArch64SISDIntrinsicsProvenSorted
);
12088 Ops
.push_back(EmitScalarExpr(E
->getArg(E
->getNumArgs() - 1)));
12089 Value
*Result
= EmitCommonNeonSISDBuiltinExpr(*this, *Builtin
, Ops
, E
);
12090 assert(Result
&& "SISD intrinsic should have been handled");
12094 const Expr
*Arg
= E
->getArg(E
->getNumArgs()-1);
12095 NeonTypeFlags
Type(0);
12096 if (std::optional
<llvm::APSInt
> Result
=
12097 Arg
->getIntegerConstantExpr(getContext()))
12098 // Determine the type of this overloaded NEON intrinsic.
12099 Type
= NeonTypeFlags(Result
->getZExtValue());
12101 bool usgn
= Type
.isUnsigned();
12102 bool quad
= Type
.isQuad();
12104 // Handle non-overloaded intrinsics first.
12105 switch (BuiltinID
) {
12107 case NEON::BI__builtin_neon_vabsh_f16
:
12108 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
12109 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::fabs
, HalfTy
), Ops
, "vabs");
12110 case NEON::BI__builtin_neon_vaddq_p128
: {
12111 llvm::Type
*Ty
= GetNeonType(this, NeonTypeFlags::Poly128
);
12112 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
12113 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
12114 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
12115 Ops
[0] = Builder
.CreateXor(Ops
[0], Ops
[1]);
12116 llvm::Type
*Int128Ty
= llvm::Type::getIntNTy(getLLVMContext(), 128);
12117 return Builder
.CreateBitCast(Ops
[0], Int128Ty
);
12119 case NEON::BI__builtin_neon_vldrq_p128
: {
12120 llvm::Type
*Int128Ty
= llvm::Type::getIntNTy(getLLVMContext(), 128);
12121 Value
*Ptr
= EmitScalarExpr(E
->getArg(0));
12122 return Builder
.CreateAlignedLoad(Int128Ty
, Ptr
,
12123 CharUnits::fromQuantity(16));
12125 case NEON::BI__builtin_neon_vstrq_p128
: {
12126 Value
*Ptr
= Ops
[0];
12127 return Builder
.CreateDefaultAlignedStore(EmitScalarExpr(E
->getArg(1)), Ptr
);
12129 case NEON::BI__builtin_neon_vcvts_f32_u32
:
12130 case NEON::BI__builtin_neon_vcvtd_f64_u64
:
12133 case NEON::BI__builtin_neon_vcvts_f32_s32
:
12134 case NEON::BI__builtin_neon_vcvtd_f64_s64
: {
12135 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
12136 bool Is64
= Ops
[0]->getType()->getPrimitiveSizeInBits() == 64;
12137 llvm::Type
*InTy
= Is64
? Int64Ty
: Int32Ty
;
12138 llvm::Type
*FTy
= Is64
? DoubleTy
: FloatTy
;
12139 Ops
[0] = Builder
.CreateBitCast(Ops
[0], InTy
);
12141 return Builder
.CreateUIToFP(Ops
[0], FTy
);
12142 return Builder
.CreateSIToFP(Ops
[0], FTy
);
12144 case NEON::BI__builtin_neon_vcvth_f16_u16
:
12145 case NEON::BI__builtin_neon_vcvth_f16_u32
:
12146 case NEON::BI__builtin_neon_vcvth_f16_u64
:
12149 case NEON::BI__builtin_neon_vcvth_f16_s16
:
12150 case NEON::BI__builtin_neon_vcvth_f16_s32
:
12151 case NEON::BI__builtin_neon_vcvth_f16_s64
: {
12152 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
12153 llvm::Type
*FTy
= HalfTy
;
12155 if (Ops
[0]->getType()->getPrimitiveSizeInBits() == 64)
12157 else if (Ops
[0]->getType()->getPrimitiveSizeInBits() == 32)
12161 Ops
[0] = Builder
.CreateBitCast(Ops
[0], InTy
);
12163 return Builder
.CreateUIToFP(Ops
[0], FTy
);
12164 return Builder
.CreateSIToFP(Ops
[0], FTy
);
12166 case NEON::BI__builtin_neon_vcvtah_u16_f16
:
12167 case NEON::BI__builtin_neon_vcvtmh_u16_f16
:
12168 case NEON::BI__builtin_neon_vcvtnh_u16_f16
:
12169 case NEON::BI__builtin_neon_vcvtph_u16_f16
:
12170 case NEON::BI__builtin_neon_vcvth_u16_f16
:
12171 case NEON::BI__builtin_neon_vcvtah_s16_f16
:
12172 case NEON::BI__builtin_neon_vcvtmh_s16_f16
:
12173 case NEON::BI__builtin_neon_vcvtnh_s16_f16
:
12174 case NEON::BI__builtin_neon_vcvtph_s16_f16
:
12175 case NEON::BI__builtin_neon_vcvth_s16_f16
: {
12177 llvm::Type
* InTy
= Int32Ty
;
12178 llvm::Type
* FTy
= HalfTy
;
12179 llvm::Type
*Tys
[2] = {InTy
, FTy
};
12180 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
12181 switch (BuiltinID
) {
12182 default: llvm_unreachable("missing builtin ID in switch!");
12183 case NEON::BI__builtin_neon_vcvtah_u16_f16
:
12184 Int
= Intrinsic::aarch64_neon_fcvtau
; break;
12185 case NEON::BI__builtin_neon_vcvtmh_u16_f16
:
12186 Int
= Intrinsic::aarch64_neon_fcvtmu
; break;
12187 case NEON::BI__builtin_neon_vcvtnh_u16_f16
:
12188 Int
= Intrinsic::aarch64_neon_fcvtnu
; break;
12189 case NEON::BI__builtin_neon_vcvtph_u16_f16
:
12190 Int
= Intrinsic::aarch64_neon_fcvtpu
; break;
12191 case NEON::BI__builtin_neon_vcvth_u16_f16
:
12192 Int
= Intrinsic::aarch64_neon_fcvtzu
; break;
12193 case NEON::BI__builtin_neon_vcvtah_s16_f16
:
12194 Int
= Intrinsic::aarch64_neon_fcvtas
; break;
12195 case NEON::BI__builtin_neon_vcvtmh_s16_f16
:
12196 Int
= Intrinsic::aarch64_neon_fcvtms
; break;
12197 case NEON::BI__builtin_neon_vcvtnh_s16_f16
:
12198 Int
= Intrinsic::aarch64_neon_fcvtns
; break;
12199 case NEON::BI__builtin_neon_vcvtph_s16_f16
:
12200 Int
= Intrinsic::aarch64_neon_fcvtps
; break;
12201 case NEON::BI__builtin_neon_vcvth_s16_f16
:
12202 Int
= Intrinsic::aarch64_neon_fcvtzs
; break;
12204 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "fcvt");
12205 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
12207 case NEON::BI__builtin_neon_vcaleh_f16
:
12208 case NEON::BI__builtin_neon_vcalth_f16
:
12209 case NEON::BI__builtin_neon_vcageh_f16
:
12210 case NEON::BI__builtin_neon_vcagth_f16
: {
12212 llvm::Type
* InTy
= Int32Ty
;
12213 llvm::Type
* FTy
= HalfTy
;
12214 llvm::Type
*Tys
[2] = {InTy
, FTy
};
12215 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
12216 switch (BuiltinID
) {
12217 default: llvm_unreachable("missing builtin ID in switch!");
12218 case NEON::BI__builtin_neon_vcageh_f16
:
12219 Int
= Intrinsic::aarch64_neon_facge
; break;
12220 case NEON::BI__builtin_neon_vcagth_f16
:
12221 Int
= Intrinsic::aarch64_neon_facgt
; break;
12222 case NEON::BI__builtin_neon_vcaleh_f16
:
12223 Int
= Intrinsic::aarch64_neon_facge
; std::swap(Ops
[0], Ops
[1]); break;
12224 case NEON::BI__builtin_neon_vcalth_f16
:
12225 Int
= Intrinsic::aarch64_neon_facgt
; std::swap(Ops
[0], Ops
[1]); break;
12227 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "facg");
12228 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
12230 case NEON::BI__builtin_neon_vcvth_n_s16_f16
:
12231 case NEON::BI__builtin_neon_vcvth_n_u16_f16
: {
12233 llvm::Type
* InTy
= Int32Ty
;
12234 llvm::Type
* FTy
= HalfTy
;
12235 llvm::Type
*Tys
[2] = {InTy
, FTy
};
12236 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
12237 switch (BuiltinID
) {
12238 default: llvm_unreachable("missing builtin ID in switch!");
12239 case NEON::BI__builtin_neon_vcvth_n_s16_f16
:
12240 Int
= Intrinsic::aarch64_neon_vcvtfp2fxs
; break;
12241 case NEON::BI__builtin_neon_vcvth_n_u16_f16
:
12242 Int
= Intrinsic::aarch64_neon_vcvtfp2fxu
; break;
12244 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "fcvth_n");
12245 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
12247 case NEON::BI__builtin_neon_vcvth_n_f16_s16
:
12248 case NEON::BI__builtin_neon_vcvth_n_f16_u16
: {
12250 llvm::Type
* FTy
= HalfTy
;
12251 llvm::Type
* InTy
= Int32Ty
;
12252 llvm::Type
*Tys
[2] = {FTy
, InTy
};
12253 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
12254 switch (BuiltinID
) {
12255 default: llvm_unreachable("missing builtin ID in switch!");
12256 case NEON::BI__builtin_neon_vcvth_n_f16_s16
:
12257 Int
= Intrinsic::aarch64_neon_vcvtfxs2fp
;
12258 Ops
[0] = Builder
.CreateSExt(Ops
[0], InTy
, "sext");
12260 case NEON::BI__builtin_neon_vcvth_n_f16_u16
:
12261 Int
= Intrinsic::aarch64_neon_vcvtfxu2fp
;
12262 Ops
[0] = Builder
.CreateZExt(Ops
[0], InTy
);
12265 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "fcvth_n");
12267 case NEON::BI__builtin_neon_vpaddd_s64
: {
12268 auto *Ty
= llvm::FixedVectorType::get(Int64Ty
, 2);
12269 Value
*Vec
= EmitScalarExpr(E
->getArg(0));
12270 // The vector is v2f64, so make sure it's bitcast to that.
12271 Vec
= Builder
.CreateBitCast(Vec
, Ty
, "v2i64");
12272 llvm::Value
*Idx0
= llvm::ConstantInt::get(SizeTy
, 0);
12273 llvm::Value
*Idx1
= llvm::ConstantInt::get(SizeTy
, 1);
12274 Value
*Op0
= Builder
.CreateExtractElement(Vec
, Idx0
, "lane0");
12275 Value
*Op1
= Builder
.CreateExtractElement(Vec
, Idx1
, "lane1");
12276 // Pairwise addition of a v2f64 into a scalar f64.
12277 return Builder
.CreateAdd(Op0
, Op1
, "vpaddd");
12279 case NEON::BI__builtin_neon_vpaddd_f64
: {
12280 auto *Ty
= llvm::FixedVectorType::get(DoubleTy
, 2);
12281 Value
*Vec
= EmitScalarExpr(E
->getArg(0));
12282 // The vector is v2f64, so make sure it's bitcast to that.
12283 Vec
= Builder
.CreateBitCast(Vec
, Ty
, "v2f64");
12284 llvm::Value
*Idx0
= llvm::ConstantInt::get(SizeTy
, 0);
12285 llvm::Value
*Idx1
= llvm::ConstantInt::get(SizeTy
, 1);
12286 Value
*Op0
= Builder
.CreateExtractElement(Vec
, Idx0
, "lane0");
12287 Value
*Op1
= Builder
.CreateExtractElement(Vec
, Idx1
, "lane1");
12288 // Pairwise addition of a v2f64 into a scalar f64.
12289 return Builder
.CreateFAdd(Op0
, Op1
, "vpaddd");
12291 case NEON::BI__builtin_neon_vpadds_f32
: {
12292 auto *Ty
= llvm::FixedVectorType::get(FloatTy
, 2);
12293 Value
*Vec
= EmitScalarExpr(E
->getArg(0));
12294 // The vector is v2f32, so make sure it's bitcast to that.
12295 Vec
= Builder
.CreateBitCast(Vec
, Ty
, "v2f32");
12296 llvm::Value
*Idx0
= llvm::ConstantInt::get(SizeTy
, 0);
12297 llvm::Value
*Idx1
= llvm::ConstantInt::get(SizeTy
, 1);
12298 Value
*Op0
= Builder
.CreateExtractElement(Vec
, Idx0
, "lane0");
12299 Value
*Op1
= Builder
.CreateExtractElement(Vec
, Idx1
, "lane1");
12300 // Pairwise addition of a v2f32 into a scalar f32.
12301 return Builder
.CreateFAdd(Op0
, Op1
, "vpaddd");
12303 case NEON::BI__builtin_neon_vceqzd_s64
:
12304 case NEON::BI__builtin_neon_vceqzd_f64
:
12305 case NEON::BI__builtin_neon_vceqzs_f32
:
12306 case NEON::BI__builtin_neon_vceqzh_f16
:
12307 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
12308 return EmitAArch64CompareBuiltinExpr(
12309 Ops
[0], ConvertType(E
->getCallReturnType(getContext())),
12310 ICmpInst::FCMP_OEQ
, ICmpInst::ICMP_EQ
, "vceqz");
12311 case NEON::BI__builtin_neon_vcgezd_s64
:
12312 case NEON::BI__builtin_neon_vcgezd_f64
:
12313 case NEON::BI__builtin_neon_vcgezs_f32
:
12314 case NEON::BI__builtin_neon_vcgezh_f16
:
12315 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
12316 return EmitAArch64CompareBuiltinExpr(
12317 Ops
[0], ConvertType(E
->getCallReturnType(getContext())),
12318 ICmpInst::FCMP_OGE
, ICmpInst::ICMP_SGE
, "vcgez");
12319 case NEON::BI__builtin_neon_vclezd_s64
:
12320 case NEON::BI__builtin_neon_vclezd_f64
:
12321 case NEON::BI__builtin_neon_vclezs_f32
:
12322 case NEON::BI__builtin_neon_vclezh_f16
:
12323 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
12324 return EmitAArch64CompareBuiltinExpr(
12325 Ops
[0], ConvertType(E
->getCallReturnType(getContext())),
12326 ICmpInst::FCMP_OLE
, ICmpInst::ICMP_SLE
, "vclez");
12327 case NEON::BI__builtin_neon_vcgtzd_s64
:
12328 case NEON::BI__builtin_neon_vcgtzd_f64
:
12329 case NEON::BI__builtin_neon_vcgtzs_f32
:
12330 case NEON::BI__builtin_neon_vcgtzh_f16
:
12331 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
12332 return EmitAArch64CompareBuiltinExpr(
12333 Ops
[0], ConvertType(E
->getCallReturnType(getContext())),
12334 ICmpInst::FCMP_OGT
, ICmpInst::ICMP_SGT
, "vcgtz");
12335 case NEON::BI__builtin_neon_vcltzd_s64
:
12336 case NEON::BI__builtin_neon_vcltzd_f64
:
12337 case NEON::BI__builtin_neon_vcltzs_f32
:
12338 case NEON::BI__builtin_neon_vcltzh_f16
:
12339 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
12340 return EmitAArch64CompareBuiltinExpr(
12341 Ops
[0], ConvertType(E
->getCallReturnType(getContext())),
12342 ICmpInst::FCMP_OLT
, ICmpInst::ICMP_SLT
, "vcltz");
12344 case NEON::BI__builtin_neon_vceqzd_u64
: {
12345 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
12346 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Int64Ty
);
12348 Builder
.CreateICmpEQ(Ops
[0], llvm::Constant::getNullValue(Int64Ty
));
12349 return Builder
.CreateSExt(Ops
[0], Int64Ty
, "vceqzd");
12351 case NEON::BI__builtin_neon_vceqd_f64
:
12352 case NEON::BI__builtin_neon_vcled_f64
:
12353 case NEON::BI__builtin_neon_vcltd_f64
:
12354 case NEON::BI__builtin_neon_vcged_f64
:
12355 case NEON::BI__builtin_neon_vcgtd_f64
: {
12356 llvm::CmpInst::Predicate P
;
12357 switch (BuiltinID
) {
12358 default: llvm_unreachable("missing builtin ID in switch!");
12359 case NEON::BI__builtin_neon_vceqd_f64
: P
= llvm::FCmpInst::FCMP_OEQ
; break;
12360 case NEON::BI__builtin_neon_vcled_f64
: P
= llvm::FCmpInst::FCMP_OLE
; break;
12361 case NEON::BI__builtin_neon_vcltd_f64
: P
= llvm::FCmpInst::FCMP_OLT
; break;
12362 case NEON::BI__builtin_neon_vcged_f64
: P
= llvm::FCmpInst::FCMP_OGE
; break;
12363 case NEON::BI__builtin_neon_vcgtd_f64
: P
= llvm::FCmpInst::FCMP_OGT
; break;
12365 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
12366 Ops
[0] = Builder
.CreateBitCast(Ops
[0], DoubleTy
);
12367 Ops
[1] = Builder
.CreateBitCast(Ops
[1], DoubleTy
);
12368 if (P
== llvm::FCmpInst::FCMP_OEQ
)
12369 Ops
[0] = Builder
.CreateFCmp(P
, Ops
[0], Ops
[1]);
12371 Ops
[0] = Builder
.CreateFCmpS(P
, Ops
[0], Ops
[1]);
12372 return Builder
.CreateSExt(Ops
[0], Int64Ty
, "vcmpd");
12374 case NEON::BI__builtin_neon_vceqs_f32
:
12375 case NEON::BI__builtin_neon_vcles_f32
:
12376 case NEON::BI__builtin_neon_vclts_f32
:
12377 case NEON::BI__builtin_neon_vcges_f32
:
12378 case NEON::BI__builtin_neon_vcgts_f32
: {
12379 llvm::CmpInst::Predicate P
;
12380 switch (BuiltinID
) {
12381 default: llvm_unreachable("missing builtin ID in switch!");
12382 case NEON::BI__builtin_neon_vceqs_f32
: P
= llvm::FCmpInst::FCMP_OEQ
; break;
12383 case NEON::BI__builtin_neon_vcles_f32
: P
= llvm::FCmpInst::FCMP_OLE
; break;
12384 case NEON::BI__builtin_neon_vclts_f32
: P
= llvm::FCmpInst::FCMP_OLT
; break;
12385 case NEON::BI__builtin_neon_vcges_f32
: P
= llvm::FCmpInst::FCMP_OGE
; break;
12386 case NEON::BI__builtin_neon_vcgts_f32
: P
= llvm::FCmpInst::FCMP_OGT
; break;
12388 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
12389 Ops
[0] = Builder
.CreateBitCast(Ops
[0], FloatTy
);
12390 Ops
[1] = Builder
.CreateBitCast(Ops
[1], FloatTy
);
12391 if (P
== llvm::FCmpInst::FCMP_OEQ
)
12392 Ops
[0] = Builder
.CreateFCmp(P
, Ops
[0], Ops
[1]);
12394 Ops
[0] = Builder
.CreateFCmpS(P
, Ops
[0], Ops
[1]);
12395 return Builder
.CreateSExt(Ops
[0], Int32Ty
, "vcmpd");
12397 case NEON::BI__builtin_neon_vceqh_f16
:
12398 case NEON::BI__builtin_neon_vcleh_f16
:
12399 case NEON::BI__builtin_neon_vclth_f16
:
12400 case NEON::BI__builtin_neon_vcgeh_f16
:
12401 case NEON::BI__builtin_neon_vcgth_f16
: {
12402 llvm::CmpInst::Predicate P
;
12403 switch (BuiltinID
) {
12404 default: llvm_unreachable("missing builtin ID in switch!");
12405 case NEON::BI__builtin_neon_vceqh_f16
: P
= llvm::FCmpInst::FCMP_OEQ
; break;
12406 case NEON::BI__builtin_neon_vcleh_f16
: P
= llvm::FCmpInst::FCMP_OLE
; break;
12407 case NEON::BI__builtin_neon_vclth_f16
: P
= llvm::FCmpInst::FCMP_OLT
; break;
12408 case NEON::BI__builtin_neon_vcgeh_f16
: P
= llvm::FCmpInst::FCMP_OGE
; break;
12409 case NEON::BI__builtin_neon_vcgth_f16
: P
= llvm::FCmpInst::FCMP_OGT
; break;
12411 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
12412 Ops
[0] = Builder
.CreateBitCast(Ops
[0], HalfTy
);
12413 Ops
[1] = Builder
.CreateBitCast(Ops
[1], HalfTy
);
12414 if (P
== llvm::FCmpInst::FCMP_OEQ
)
12415 Ops
[0] = Builder
.CreateFCmp(P
, Ops
[0], Ops
[1]);
12417 Ops
[0] = Builder
.CreateFCmpS(P
, Ops
[0], Ops
[1]);
12418 return Builder
.CreateSExt(Ops
[0], Int16Ty
, "vcmpd");
12420 case NEON::BI__builtin_neon_vceqd_s64
:
12421 case NEON::BI__builtin_neon_vceqd_u64
:
12422 case NEON::BI__builtin_neon_vcgtd_s64
:
12423 case NEON::BI__builtin_neon_vcgtd_u64
:
12424 case NEON::BI__builtin_neon_vcltd_s64
:
12425 case NEON::BI__builtin_neon_vcltd_u64
:
12426 case NEON::BI__builtin_neon_vcged_u64
:
12427 case NEON::BI__builtin_neon_vcged_s64
:
12428 case NEON::BI__builtin_neon_vcled_u64
:
12429 case NEON::BI__builtin_neon_vcled_s64
: {
12430 llvm::CmpInst::Predicate P
;
12431 switch (BuiltinID
) {
12432 default: llvm_unreachable("missing builtin ID in switch!");
12433 case NEON::BI__builtin_neon_vceqd_s64
:
12434 case NEON::BI__builtin_neon_vceqd_u64
:P
= llvm::ICmpInst::ICMP_EQ
;break;
12435 case NEON::BI__builtin_neon_vcgtd_s64
:P
= llvm::ICmpInst::ICMP_SGT
;break;
12436 case NEON::BI__builtin_neon_vcgtd_u64
:P
= llvm::ICmpInst::ICMP_UGT
;break;
12437 case NEON::BI__builtin_neon_vcltd_s64
:P
= llvm::ICmpInst::ICMP_SLT
;break;
12438 case NEON::BI__builtin_neon_vcltd_u64
:P
= llvm::ICmpInst::ICMP_ULT
;break;
12439 case NEON::BI__builtin_neon_vcged_u64
:P
= llvm::ICmpInst::ICMP_UGE
;break;
12440 case NEON::BI__builtin_neon_vcged_s64
:P
= llvm::ICmpInst::ICMP_SGE
;break;
12441 case NEON::BI__builtin_neon_vcled_u64
:P
= llvm::ICmpInst::ICMP_ULE
;break;
12442 case NEON::BI__builtin_neon_vcled_s64
:P
= llvm::ICmpInst::ICMP_SLE
;break;
12444 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
12445 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Int64Ty
);
12446 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Int64Ty
);
12447 Ops
[0] = Builder
.CreateICmp(P
, Ops
[0], Ops
[1]);
12448 return Builder
.CreateSExt(Ops
[0], Int64Ty
, "vceqd");
12450 case NEON::BI__builtin_neon_vtstd_s64
:
12451 case NEON::BI__builtin_neon_vtstd_u64
: {
12452 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
12453 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Int64Ty
);
12454 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Int64Ty
);
12455 Ops
[0] = Builder
.CreateAnd(Ops
[0], Ops
[1]);
12456 Ops
[0] = Builder
.CreateICmp(ICmpInst::ICMP_NE
, Ops
[0],
12457 llvm::Constant::getNullValue(Int64Ty
));
12458 return Builder
.CreateSExt(Ops
[0], Int64Ty
, "vtstd");
12460 case NEON::BI__builtin_neon_vset_lane_i8
:
12461 case NEON::BI__builtin_neon_vset_lane_i16
:
12462 case NEON::BI__builtin_neon_vset_lane_i32
:
12463 case NEON::BI__builtin_neon_vset_lane_i64
:
12464 case NEON::BI__builtin_neon_vset_lane_bf16
:
12465 case NEON::BI__builtin_neon_vset_lane_f32
:
12466 case NEON::BI__builtin_neon_vsetq_lane_i8
:
12467 case NEON::BI__builtin_neon_vsetq_lane_i16
:
12468 case NEON::BI__builtin_neon_vsetq_lane_i32
:
12469 case NEON::BI__builtin_neon_vsetq_lane_i64
:
12470 case NEON::BI__builtin_neon_vsetq_lane_bf16
:
12471 case NEON::BI__builtin_neon_vsetq_lane_f32
:
12472 Ops
.push_back(EmitScalarExpr(E
->getArg(2)));
12473 return Builder
.CreateInsertElement(Ops
[1], Ops
[0], Ops
[2], "vset_lane");
12474 case NEON::BI__builtin_neon_vset_lane_f64
:
12475 // The vector type needs a cast for the v1f64 variant.
12477 Builder
.CreateBitCast(Ops
[1], llvm::FixedVectorType::get(DoubleTy
, 1));
12478 Ops
.push_back(EmitScalarExpr(E
->getArg(2)));
12479 return Builder
.CreateInsertElement(Ops
[1], Ops
[0], Ops
[2], "vset_lane");
12480 case NEON::BI__builtin_neon_vsetq_lane_f64
:
12481 // The vector type needs a cast for the v2f64 variant.
12483 Builder
.CreateBitCast(Ops
[1], llvm::FixedVectorType::get(DoubleTy
, 2));
12484 Ops
.push_back(EmitScalarExpr(E
->getArg(2)));
12485 return Builder
.CreateInsertElement(Ops
[1], Ops
[0], Ops
[2], "vset_lane");
12487 case NEON::BI__builtin_neon_vget_lane_i8
:
12488 case NEON::BI__builtin_neon_vdupb_lane_i8
:
12490 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(Int8Ty
, 8));
12491 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
12493 case NEON::BI__builtin_neon_vgetq_lane_i8
:
12494 case NEON::BI__builtin_neon_vdupb_laneq_i8
:
12496 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(Int8Ty
, 16));
12497 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
12499 case NEON::BI__builtin_neon_vget_lane_i16
:
12500 case NEON::BI__builtin_neon_vduph_lane_i16
:
12502 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(Int16Ty
, 4));
12503 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
12505 case NEON::BI__builtin_neon_vgetq_lane_i16
:
12506 case NEON::BI__builtin_neon_vduph_laneq_i16
:
12508 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(Int16Ty
, 8));
12509 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
12511 case NEON::BI__builtin_neon_vget_lane_i32
:
12512 case NEON::BI__builtin_neon_vdups_lane_i32
:
12514 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(Int32Ty
, 2));
12515 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
12517 case NEON::BI__builtin_neon_vdups_lane_f32
:
12519 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(FloatTy
, 2));
12520 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
12522 case NEON::BI__builtin_neon_vgetq_lane_i32
:
12523 case NEON::BI__builtin_neon_vdups_laneq_i32
:
12525 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(Int32Ty
, 4));
12526 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
12528 case NEON::BI__builtin_neon_vget_lane_i64
:
12529 case NEON::BI__builtin_neon_vdupd_lane_i64
:
12531 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(Int64Ty
, 1));
12532 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
12534 case NEON::BI__builtin_neon_vdupd_lane_f64
:
12536 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(DoubleTy
, 1));
12537 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
12539 case NEON::BI__builtin_neon_vgetq_lane_i64
:
12540 case NEON::BI__builtin_neon_vdupd_laneq_i64
:
12542 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(Int64Ty
, 2));
12543 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
12545 case NEON::BI__builtin_neon_vget_lane_f32
:
12547 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(FloatTy
, 2));
12548 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
12550 case NEON::BI__builtin_neon_vget_lane_f64
:
12552 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(DoubleTy
, 1));
12553 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
12555 case NEON::BI__builtin_neon_vgetq_lane_f32
:
12556 case NEON::BI__builtin_neon_vdups_laneq_f32
:
12558 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(FloatTy
, 4));
12559 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
12561 case NEON::BI__builtin_neon_vgetq_lane_f64
:
12562 case NEON::BI__builtin_neon_vdupd_laneq_f64
:
12564 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(DoubleTy
, 2));
12565 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
12567 case NEON::BI__builtin_neon_vaddh_f16
:
12568 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
12569 return Builder
.CreateFAdd(Ops
[0], Ops
[1], "vaddh");
12570 case NEON::BI__builtin_neon_vsubh_f16
:
12571 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
12572 return Builder
.CreateFSub(Ops
[0], Ops
[1], "vsubh");
12573 case NEON::BI__builtin_neon_vmulh_f16
:
12574 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
12575 return Builder
.CreateFMul(Ops
[0], Ops
[1], "vmulh");
12576 case NEON::BI__builtin_neon_vdivh_f16
:
12577 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
12578 return Builder
.CreateFDiv(Ops
[0], Ops
[1], "vdivh");
12579 case NEON::BI__builtin_neon_vfmah_f16
:
12580 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12581 return emitCallMaybeConstrainedFPBuiltin(
12582 *this, Intrinsic::fma
, Intrinsic::experimental_constrained_fma
, HalfTy
,
12583 {EmitScalarExpr(E
->getArg(1)), EmitScalarExpr(E
->getArg(2)), Ops
[0]});
12584 case NEON::BI__builtin_neon_vfmsh_f16
: {
12585 Value
* Neg
= Builder
.CreateFNeg(EmitScalarExpr(E
->getArg(1)), "vsubh");
12587 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12588 return emitCallMaybeConstrainedFPBuiltin(
12589 *this, Intrinsic::fma
, Intrinsic::experimental_constrained_fma
, HalfTy
,
12590 {Neg
, EmitScalarExpr(E
->getArg(2)), Ops
[0]});
12592 case NEON::BI__builtin_neon_vaddd_s64
:
12593 case NEON::BI__builtin_neon_vaddd_u64
:
12594 return Builder
.CreateAdd(Ops
[0], EmitScalarExpr(E
->getArg(1)), "vaddd");
12595 case NEON::BI__builtin_neon_vsubd_s64
:
12596 case NEON::BI__builtin_neon_vsubd_u64
:
12597 return Builder
.CreateSub(Ops
[0], EmitScalarExpr(E
->getArg(1)), "vsubd");
12598 case NEON::BI__builtin_neon_vqdmlalh_s16
:
12599 case NEON::BI__builtin_neon_vqdmlslh_s16
: {
12600 SmallVector
<Value
*, 2> ProductOps
;
12601 ProductOps
.push_back(vectorWrapScalar16(Ops
[1]));
12602 ProductOps
.push_back(vectorWrapScalar16(EmitScalarExpr(E
->getArg(2))));
12603 auto *VTy
= llvm::FixedVectorType::get(Int32Ty
, 4);
12604 Ops
[1] = EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_sqdmull
, VTy
),
12605 ProductOps
, "vqdmlXl");
12606 Constant
*CI
= ConstantInt::get(SizeTy
, 0);
12607 Ops
[1] = Builder
.CreateExtractElement(Ops
[1], CI
, "lane0");
12609 unsigned AccumInt
= BuiltinID
== NEON::BI__builtin_neon_vqdmlalh_s16
12610 ? Intrinsic::aarch64_neon_sqadd
12611 : Intrinsic::aarch64_neon_sqsub
;
12612 return EmitNeonCall(CGM
.getIntrinsic(AccumInt
, Int32Ty
), Ops
, "vqdmlXl");
12614 case NEON::BI__builtin_neon_vqshlud_n_s64
: {
12615 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
12616 Ops
[1] = Builder
.CreateZExt(Ops
[1], Int64Ty
);
12617 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_sqshlu
, Int64Ty
),
12620 case NEON::BI__builtin_neon_vqshld_n_u64
:
12621 case NEON::BI__builtin_neon_vqshld_n_s64
: {
12622 unsigned Int
= BuiltinID
== NEON::BI__builtin_neon_vqshld_n_u64
12623 ? Intrinsic::aarch64_neon_uqshl
12624 : Intrinsic::aarch64_neon_sqshl
;
12625 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
12626 Ops
[1] = Builder
.CreateZExt(Ops
[1], Int64Ty
);
12627 return EmitNeonCall(CGM
.getIntrinsic(Int
, Int64Ty
), Ops
, "vqshl_n");
12629 case NEON::BI__builtin_neon_vrshrd_n_u64
:
12630 case NEON::BI__builtin_neon_vrshrd_n_s64
: {
12631 unsigned Int
= BuiltinID
== NEON::BI__builtin_neon_vrshrd_n_u64
12632 ? Intrinsic::aarch64_neon_urshl
12633 : Intrinsic::aarch64_neon_srshl
;
12634 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
12635 int SV
= cast
<ConstantInt
>(Ops
[1])->getSExtValue();
12636 Ops
[1] = ConstantInt::get(Int64Ty
, -SV
);
12637 return EmitNeonCall(CGM
.getIntrinsic(Int
, Int64Ty
), Ops
, "vrshr_n");
12639 case NEON::BI__builtin_neon_vrsrad_n_u64
:
12640 case NEON::BI__builtin_neon_vrsrad_n_s64
: {
12641 unsigned Int
= BuiltinID
== NEON::BI__builtin_neon_vrsrad_n_u64
12642 ? Intrinsic::aarch64_neon_urshl
12643 : Intrinsic::aarch64_neon_srshl
;
12644 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Int64Ty
);
12645 Ops
.push_back(Builder
.CreateNeg(EmitScalarExpr(E
->getArg(2))));
12646 Ops
[1] = Builder
.CreateCall(CGM
.getIntrinsic(Int
, Int64Ty
),
12647 {Ops
[1], Builder
.CreateSExt(Ops
[2], Int64Ty
)});
12648 return Builder
.CreateAdd(Ops
[0], Builder
.CreateBitCast(Ops
[1], Int64Ty
));
12650 case NEON::BI__builtin_neon_vshld_n_s64
:
12651 case NEON::BI__builtin_neon_vshld_n_u64
: {
12652 llvm::ConstantInt
*Amt
= cast
<ConstantInt
>(EmitScalarExpr(E
->getArg(1)));
12653 return Builder
.CreateShl(
12654 Ops
[0], ConstantInt::get(Int64Ty
, Amt
->getZExtValue()), "shld_n");
12656 case NEON::BI__builtin_neon_vshrd_n_s64
: {
12657 llvm::ConstantInt
*Amt
= cast
<ConstantInt
>(EmitScalarExpr(E
->getArg(1)));
12658 return Builder
.CreateAShr(
12659 Ops
[0], ConstantInt::get(Int64Ty
, std::min(static_cast<uint64_t>(63),
12660 Amt
->getZExtValue())),
12663 case NEON::BI__builtin_neon_vshrd_n_u64
: {
12664 llvm::ConstantInt
*Amt
= cast
<ConstantInt
>(EmitScalarExpr(E
->getArg(1)));
12665 uint64_t ShiftAmt
= Amt
->getZExtValue();
12666 // Right-shifting an unsigned value by its size yields 0.
12667 if (ShiftAmt
== 64)
12668 return ConstantInt::get(Int64Ty
, 0);
12669 return Builder
.CreateLShr(Ops
[0], ConstantInt::get(Int64Ty
, ShiftAmt
),
12672 case NEON::BI__builtin_neon_vsrad_n_s64
: {
12673 llvm::ConstantInt
*Amt
= cast
<ConstantInt
>(EmitScalarExpr(E
->getArg(2)));
12674 Ops
[1] = Builder
.CreateAShr(
12675 Ops
[1], ConstantInt::get(Int64Ty
, std::min(static_cast<uint64_t>(63),
12676 Amt
->getZExtValue())),
12678 return Builder
.CreateAdd(Ops
[0], Ops
[1]);
12680 case NEON::BI__builtin_neon_vsrad_n_u64
: {
12681 llvm::ConstantInt
*Amt
= cast
<ConstantInt
>(EmitScalarExpr(E
->getArg(2)));
12682 uint64_t ShiftAmt
= Amt
->getZExtValue();
12683 // Right-shifting an unsigned value by its size yields 0.
12684 // As Op + 0 = Op, return Ops[0] directly.
12685 if (ShiftAmt
== 64)
12687 Ops
[1] = Builder
.CreateLShr(Ops
[1], ConstantInt::get(Int64Ty
, ShiftAmt
),
12689 return Builder
.CreateAdd(Ops
[0], Ops
[1]);
12691 case NEON::BI__builtin_neon_vqdmlalh_lane_s16
:
12692 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16
:
12693 case NEON::BI__builtin_neon_vqdmlslh_lane_s16
:
12694 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16
: {
12695 Ops
[2] = Builder
.CreateExtractElement(Ops
[2], EmitScalarExpr(E
->getArg(3)),
12697 SmallVector
<Value
*, 2> ProductOps
;
12698 ProductOps
.push_back(vectorWrapScalar16(Ops
[1]));
12699 ProductOps
.push_back(vectorWrapScalar16(Ops
[2]));
12700 auto *VTy
= llvm::FixedVectorType::get(Int32Ty
, 4);
12701 Ops
[1] = EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_sqdmull
, VTy
),
12702 ProductOps
, "vqdmlXl");
12703 Constant
*CI
= ConstantInt::get(SizeTy
, 0);
12704 Ops
[1] = Builder
.CreateExtractElement(Ops
[1], CI
, "lane0");
12707 unsigned AccInt
= (BuiltinID
== NEON::BI__builtin_neon_vqdmlalh_lane_s16
||
12708 BuiltinID
== NEON::BI__builtin_neon_vqdmlalh_laneq_s16
)
12709 ? Intrinsic::aarch64_neon_sqadd
12710 : Intrinsic::aarch64_neon_sqsub
;
12711 return EmitNeonCall(CGM
.getIntrinsic(AccInt
, Int32Ty
), Ops
, "vqdmlXl");
12713 case NEON::BI__builtin_neon_vqdmlals_s32
:
12714 case NEON::BI__builtin_neon_vqdmlsls_s32
: {
12715 SmallVector
<Value
*, 2> ProductOps
;
12716 ProductOps
.push_back(Ops
[1]);
12717 ProductOps
.push_back(EmitScalarExpr(E
->getArg(2)));
12719 EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar
),
12720 ProductOps
, "vqdmlXl");
12722 unsigned AccumInt
= BuiltinID
== NEON::BI__builtin_neon_vqdmlals_s32
12723 ? Intrinsic::aarch64_neon_sqadd
12724 : Intrinsic::aarch64_neon_sqsub
;
12725 return EmitNeonCall(CGM
.getIntrinsic(AccumInt
, Int64Ty
), Ops
, "vqdmlXl");
12727 case NEON::BI__builtin_neon_vqdmlals_lane_s32
:
12728 case NEON::BI__builtin_neon_vqdmlals_laneq_s32
:
12729 case NEON::BI__builtin_neon_vqdmlsls_lane_s32
:
12730 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32
: {
12731 Ops
[2] = Builder
.CreateExtractElement(Ops
[2], EmitScalarExpr(E
->getArg(3)),
12733 SmallVector
<Value
*, 2> ProductOps
;
12734 ProductOps
.push_back(Ops
[1]);
12735 ProductOps
.push_back(Ops
[2]);
12737 EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar
),
12738 ProductOps
, "vqdmlXl");
12741 unsigned AccInt
= (BuiltinID
== NEON::BI__builtin_neon_vqdmlals_lane_s32
||
12742 BuiltinID
== NEON::BI__builtin_neon_vqdmlals_laneq_s32
)
12743 ? Intrinsic::aarch64_neon_sqadd
12744 : Intrinsic::aarch64_neon_sqsub
;
12745 return EmitNeonCall(CGM
.getIntrinsic(AccInt
, Int64Ty
), Ops
, "vqdmlXl");
12747 case NEON::BI__builtin_neon_vget_lane_bf16
:
12748 case NEON::BI__builtin_neon_vduph_lane_bf16
:
12749 case NEON::BI__builtin_neon_vduph_lane_f16
: {
12750 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
12753 case NEON::BI__builtin_neon_vgetq_lane_bf16
:
12754 case NEON::BI__builtin_neon_vduph_laneq_bf16
:
12755 case NEON::BI__builtin_neon_vduph_laneq_f16
: {
12756 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
12760 case clang::AArch64::BI_InterlockedAdd
:
12761 case clang::AArch64::BI_InterlockedAdd64
: {
12762 Address DestAddr
= CheckAtomicAlignment(*this, E
);
12763 Value
*Val
= EmitScalarExpr(E
->getArg(1));
12764 AtomicRMWInst
*RMWI
=
12765 Builder
.CreateAtomicRMW(AtomicRMWInst::Add
, DestAddr
, Val
,
12766 llvm::AtomicOrdering::SequentiallyConsistent
);
12767 return Builder
.CreateAdd(RMWI
, Val
);
12771 llvm::FixedVectorType
*VTy
= GetNeonType(this, Type
);
12772 llvm::Type
*Ty
= VTy
;
12776 // Not all intrinsics handled by the common case work for AArch64 yet, so only
12777 // defer to common code if it's been added to our special map.
12778 Builtin
= findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap
, BuiltinID
,
12779 AArch64SIMDIntrinsicsProvenSorted
);
12782 return EmitCommonNeonBuiltinExpr(
12783 Builtin
->BuiltinID
, Builtin
->LLVMIntrinsic
, Builtin
->AltLLVMIntrinsic
,
12784 Builtin
->NameHint
, Builtin
->TypeModifier
, E
, Ops
,
12785 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch
);
12787 if (Value
*V
= EmitAArch64TblBuiltinExpr(*this, BuiltinID
, E
, Ops
, Arch
))
12791 switch (BuiltinID
) {
12792 default: return nullptr;
12793 case NEON::BI__builtin_neon_vbsl_v
:
12794 case NEON::BI__builtin_neon_vbslq_v
: {
12795 llvm::Type
*BitTy
= llvm::VectorType::getInteger(VTy
);
12796 Ops
[0] = Builder
.CreateBitCast(Ops
[0], BitTy
, "vbsl");
12797 Ops
[1] = Builder
.CreateBitCast(Ops
[1], BitTy
, "vbsl");
12798 Ops
[2] = Builder
.CreateBitCast(Ops
[2], BitTy
, "vbsl");
12800 Ops
[1] = Builder
.CreateAnd(Ops
[0], Ops
[1], "vbsl");
12801 Ops
[2] = Builder
.CreateAnd(Builder
.CreateNot(Ops
[0]), Ops
[2], "vbsl");
12802 Ops
[0] = Builder
.CreateOr(Ops
[1], Ops
[2], "vbsl");
12803 return Builder
.CreateBitCast(Ops
[0], Ty
);
12805 case NEON::BI__builtin_neon_vfma_lane_v
:
12806 case NEON::BI__builtin_neon_vfmaq_lane_v
: { // Only used for FP types
12807 // The ARM builtins (and instructions) have the addend as the first
12808 // operand, but the 'fma' intrinsics have it last. Swap it around here.
12809 Value
*Addend
= Ops
[0];
12810 Value
*Multiplicand
= Ops
[1];
12811 Value
*LaneSource
= Ops
[2];
12812 Ops
[0] = Multiplicand
;
12813 Ops
[1] = LaneSource
;
12816 // Now adjust things to handle the lane access.
12817 auto *SourceTy
= BuiltinID
== NEON::BI__builtin_neon_vfmaq_lane_v
12818 ? llvm::FixedVectorType::get(VTy
->getElementType(),
12819 VTy
->getNumElements() / 2)
12821 llvm::Constant
*cst
= cast
<Constant
>(Ops
[3]);
12822 Value
*SV
= llvm::ConstantVector::getSplat(VTy
->getElementCount(), cst
);
12823 Ops
[1] = Builder
.CreateBitCast(Ops
[1], SourceTy
);
12824 Ops
[1] = Builder
.CreateShuffleVector(Ops
[1], Ops
[1], SV
, "lane");
12827 Int
= Builder
.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
12829 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "fmla");
12831 case NEON::BI__builtin_neon_vfma_laneq_v
: {
12832 auto *VTy
= cast
<llvm::FixedVectorType
>(Ty
);
12833 // v1f64 fma should be mapped to Neon scalar f64 fma
12834 if (VTy
&& VTy
->getElementType() == DoubleTy
) {
12835 Ops
[0] = Builder
.CreateBitCast(Ops
[0], DoubleTy
);
12836 Ops
[1] = Builder
.CreateBitCast(Ops
[1], DoubleTy
);
12837 llvm::FixedVectorType
*VTy
=
12838 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64
, false, true));
12839 Ops
[2] = Builder
.CreateBitCast(Ops
[2], VTy
);
12840 Ops
[2] = Builder
.CreateExtractElement(Ops
[2], Ops
[3], "extract");
12842 Result
= emitCallMaybeConstrainedFPBuiltin(
12843 *this, Intrinsic::fma
, Intrinsic::experimental_constrained_fma
,
12844 DoubleTy
, {Ops
[1], Ops
[2], Ops
[0]});
12845 return Builder
.CreateBitCast(Result
, Ty
);
12847 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
12848 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
12850 auto *STy
= llvm::FixedVectorType::get(VTy
->getElementType(),
12851 VTy
->getNumElements() * 2);
12852 Ops
[2] = Builder
.CreateBitCast(Ops
[2], STy
);
12853 Value
*SV
= llvm::ConstantVector::getSplat(VTy
->getElementCount(),
12854 cast
<ConstantInt
>(Ops
[3]));
12855 Ops
[2] = Builder
.CreateShuffleVector(Ops
[2], Ops
[2], SV
, "lane");
12857 return emitCallMaybeConstrainedFPBuiltin(
12858 *this, Intrinsic::fma
, Intrinsic::experimental_constrained_fma
, Ty
,
12859 {Ops
[2], Ops
[1], Ops
[0]});
12861 case NEON::BI__builtin_neon_vfmaq_laneq_v
: {
12862 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
12863 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
12865 Ops
[2] = Builder
.CreateBitCast(Ops
[2], Ty
);
12866 Ops
[2] = EmitNeonSplat(Ops
[2], cast
<ConstantInt
>(Ops
[3]));
12867 return emitCallMaybeConstrainedFPBuiltin(
12868 *this, Intrinsic::fma
, Intrinsic::experimental_constrained_fma
, Ty
,
12869 {Ops
[2], Ops
[1], Ops
[0]});
12871 case NEON::BI__builtin_neon_vfmah_lane_f16
:
12872 case NEON::BI__builtin_neon_vfmas_lane_f32
:
12873 case NEON::BI__builtin_neon_vfmah_laneq_f16
:
12874 case NEON::BI__builtin_neon_vfmas_laneq_f32
:
12875 case NEON::BI__builtin_neon_vfmad_lane_f64
:
12876 case NEON::BI__builtin_neon_vfmad_laneq_f64
: {
12877 Ops
.push_back(EmitScalarExpr(E
->getArg(3)));
12878 llvm::Type
*Ty
= ConvertType(E
->getCallReturnType(getContext()));
12879 Ops
[2] = Builder
.CreateExtractElement(Ops
[2], Ops
[3], "extract");
12880 return emitCallMaybeConstrainedFPBuiltin(
12881 *this, Intrinsic::fma
, Intrinsic::experimental_constrained_fma
, Ty
,
12882 {Ops
[1], Ops
[2], Ops
[0]});
12884 case NEON::BI__builtin_neon_vmull_v
:
12885 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12886 Int
= usgn
? Intrinsic::aarch64_neon_umull
: Intrinsic::aarch64_neon_smull
;
12887 if (Type
.isPoly()) Int
= Intrinsic::aarch64_neon_pmull
;
12888 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vmull");
12889 case NEON::BI__builtin_neon_vmax_v
:
12890 case NEON::BI__builtin_neon_vmaxq_v
:
12891 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12892 Int
= usgn
? Intrinsic::aarch64_neon_umax
: Intrinsic::aarch64_neon_smax
;
12893 if (Ty
->isFPOrFPVectorTy()) Int
= Intrinsic::aarch64_neon_fmax
;
12894 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vmax");
12895 case NEON::BI__builtin_neon_vmaxh_f16
: {
12896 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
12897 Int
= Intrinsic::aarch64_neon_fmax
;
12898 return EmitNeonCall(CGM
.getIntrinsic(Int
, HalfTy
), Ops
, "vmax");
12900 case NEON::BI__builtin_neon_vmin_v
:
12901 case NEON::BI__builtin_neon_vminq_v
:
12902 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12903 Int
= usgn
? Intrinsic::aarch64_neon_umin
: Intrinsic::aarch64_neon_smin
;
12904 if (Ty
->isFPOrFPVectorTy()) Int
= Intrinsic::aarch64_neon_fmin
;
12905 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vmin");
12906 case NEON::BI__builtin_neon_vminh_f16
: {
12907 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
12908 Int
= Intrinsic::aarch64_neon_fmin
;
12909 return EmitNeonCall(CGM
.getIntrinsic(Int
, HalfTy
), Ops
, "vmin");
12911 case NEON::BI__builtin_neon_vabd_v
:
12912 case NEON::BI__builtin_neon_vabdq_v
:
12913 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12914 Int
= usgn
? Intrinsic::aarch64_neon_uabd
: Intrinsic::aarch64_neon_sabd
;
12915 if (Ty
->isFPOrFPVectorTy()) Int
= Intrinsic::aarch64_neon_fabd
;
12916 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vabd");
12917 case NEON::BI__builtin_neon_vpadal_v
:
12918 case NEON::BI__builtin_neon_vpadalq_v
: {
12919 unsigned ArgElts
= VTy
->getNumElements();
12920 llvm::IntegerType
*EltTy
= cast
<IntegerType
>(VTy
->getElementType());
12921 unsigned BitWidth
= EltTy
->getBitWidth();
12922 auto *ArgTy
= llvm::FixedVectorType::get(
12923 llvm::IntegerType::get(getLLVMContext(), BitWidth
/ 2), 2 * ArgElts
);
12924 llvm::Type
* Tys
[2] = { VTy
, ArgTy
};
12925 Int
= usgn
? Intrinsic::aarch64_neon_uaddlp
: Intrinsic::aarch64_neon_saddlp
;
12926 SmallVector
<llvm::Value
*, 1> TmpOps
;
12927 TmpOps
.push_back(Ops
[1]);
12928 Function
*F
= CGM
.getIntrinsic(Int
, Tys
);
12929 llvm::Value
*tmp
= EmitNeonCall(F
, TmpOps
, "vpadal");
12930 llvm::Value
*addend
= Builder
.CreateBitCast(Ops
[0], tmp
->getType());
12931 return Builder
.CreateAdd(tmp
, addend
);
12933 case NEON::BI__builtin_neon_vpmin_v
:
12934 case NEON::BI__builtin_neon_vpminq_v
:
12935 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12936 Int
= usgn
? Intrinsic::aarch64_neon_uminp
: Intrinsic::aarch64_neon_sminp
;
12937 if (Ty
->isFPOrFPVectorTy()) Int
= Intrinsic::aarch64_neon_fminp
;
12938 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vpmin");
12939 case NEON::BI__builtin_neon_vpmax_v
:
12940 case NEON::BI__builtin_neon_vpmaxq_v
:
12941 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12942 Int
= usgn
? Intrinsic::aarch64_neon_umaxp
: Intrinsic::aarch64_neon_smaxp
;
12943 if (Ty
->isFPOrFPVectorTy()) Int
= Intrinsic::aarch64_neon_fmaxp
;
12944 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vpmax");
12945 case NEON::BI__builtin_neon_vminnm_v
:
12946 case NEON::BI__builtin_neon_vminnmq_v
:
12947 Int
= Intrinsic::aarch64_neon_fminnm
;
12948 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vminnm");
12949 case NEON::BI__builtin_neon_vminnmh_f16
:
12950 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
12951 Int
= Intrinsic::aarch64_neon_fminnm
;
12952 return EmitNeonCall(CGM
.getIntrinsic(Int
, HalfTy
), Ops
, "vminnm");
12953 case NEON::BI__builtin_neon_vmaxnm_v
:
12954 case NEON::BI__builtin_neon_vmaxnmq_v
:
12955 Int
= Intrinsic::aarch64_neon_fmaxnm
;
12956 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vmaxnm");
12957 case NEON::BI__builtin_neon_vmaxnmh_f16
:
12958 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
12959 Int
= Intrinsic::aarch64_neon_fmaxnm
;
12960 return EmitNeonCall(CGM
.getIntrinsic(Int
, HalfTy
), Ops
, "vmaxnm");
12961 case NEON::BI__builtin_neon_vrecpss_f32
: {
12962 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
12963 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_frecps
, FloatTy
),
12966 case NEON::BI__builtin_neon_vrecpsd_f64
:
12967 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
12968 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_frecps
, DoubleTy
),
12970 case NEON::BI__builtin_neon_vrecpsh_f16
:
12971 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
12972 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_frecps
, HalfTy
),
12974 case NEON::BI__builtin_neon_vqshrun_n_v
:
12975 Int
= Intrinsic::aarch64_neon_sqshrun
;
12976 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vqshrun_n");
12977 case NEON::BI__builtin_neon_vqrshrun_n_v
:
12978 Int
= Intrinsic::aarch64_neon_sqrshrun
;
12979 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vqrshrun_n");
12980 case NEON::BI__builtin_neon_vqshrn_n_v
:
12981 Int
= usgn
? Intrinsic::aarch64_neon_uqshrn
: Intrinsic::aarch64_neon_sqshrn
;
12982 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vqshrn_n");
12983 case NEON::BI__builtin_neon_vrshrn_n_v
:
12984 Int
= Intrinsic::aarch64_neon_rshrn
;
12985 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vrshrn_n");
12986 case NEON::BI__builtin_neon_vqrshrn_n_v
:
12987 Int
= usgn
? Intrinsic::aarch64_neon_uqrshrn
: Intrinsic::aarch64_neon_sqrshrn
;
12988 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vqrshrn_n");
12989 case NEON::BI__builtin_neon_vrndah_f16
: {
12990 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
12991 Int
= Builder
.getIsFPConstrained()
12992 ? Intrinsic::experimental_constrained_round
12993 : Intrinsic::round
;
12994 return EmitNeonCall(CGM
.getIntrinsic(Int
, HalfTy
), Ops
, "vrnda");
12996 case NEON::BI__builtin_neon_vrnda_v
:
12997 case NEON::BI__builtin_neon_vrndaq_v
: {
12998 Int
= Builder
.getIsFPConstrained()
12999 ? Intrinsic::experimental_constrained_round
13000 : Intrinsic::round
;
13001 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vrnda");
13003 case NEON::BI__builtin_neon_vrndih_f16
: {
13004 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13005 Int
= Builder
.getIsFPConstrained()
13006 ? Intrinsic::experimental_constrained_nearbyint
13007 : Intrinsic::nearbyint
;
13008 return EmitNeonCall(CGM
.getIntrinsic(Int
, HalfTy
), Ops
, "vrndi");
13010 case NEON::BI__builtin_neon_vrndmh_f16
: {
13011 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13012 Int
= Builder
.getIsFPConstrained()
13013 ? Intrinsic::experimental_constrained_floor
13014 : Intrinsic::floor
;
13015 return EmitNeonCall(CGM
.getIntrinsic(Int
, HalfTy
), Ops
, "vrndm");
13017 case NEON::BI__builtin_neon_vrndm_v
:
13018 case NEON::BI__builtin_neon_vrndmq_v
: {
13019 Int
= Builder
.getIsFPConstrained()
13020 ? Intrinsic::experimental_constrained_floor
13021 : Intrinsic::floor
;
13022 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vrndm");
13024 case NEON::BI__builtin_neon_vrndnh_f16
: {
13025 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13026 Int
= Builder
.getIsFPConstrained()
13027 ? Intrinsic::experimental_constrained_roundeven
13028 : Intrinsic::roundeven
;
13029 return EmitNeonCall(CGM
.getIntrinsic(Int
, HalfTy
), Ops
, "vrndn");
13031 case NEON::BI__builtin_neon_vrndn_v
:
13032 case NEON::BI__builtin_neon_vrndnq_v
: {
13033 Int
= Builder
.getIsFPConstrained()
13034 ? Intrinsic::experimental_constrained_roundeven
13035 : Intrinsic::roundeven
;
13036 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vrndn");
13038 case NEON::BI__builtin_neon_vrndns_f32
: {
13039 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13040 Int
= Builder
.getIsFPConstrained()
13041 ? Intrinsic::experimental_constrained_roundeven
13042 : Intrinsic::roundeven
;
13043 return EmitNeonCall(CGM
.getIntrinsic(Int
, FloatTy
), Ops
, "vrndn");
13045 case NEON::BI__builtin_neon_vrndph_f16
: {
13046 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13047 Int
= Builder
.getIsFPConstrained()
13048 ? Intrinsic::experimental_constrained_ceil
13050 return EmitNeonCall(CGM
.getIntrinsic(Int
, HalfTy
), Ops
, "vrndp");
13052 case NEON::BI__builtin_neon_vrndp_v
:
13053 case NEON::BI__builtin_neon_vrndpq_v
: {
13054 Int
= Builder
.getIsFPConstrained()
13055 ? Intrinsic::experimental_constrained_ceil
13057 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vrndp");
13059 case NEON::BI__builtin_neon_vrndxh_f16
: {
13060 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13061 Int
= Builder
.getIsFPConstrained()
13062 ? Intrinsic::experimental_constrained_rint
13064 return EmitNeonCall(CGM
.getIntrinsic(Int
, HalfTy
), Ops
, "vrndx");
13066 case NEON::BI__builtin_neon_vrndx_v
:
13067 case NEON::BI__builtin_neon_vrndxq_v
: {
13068 Int
= Builder
.getIsFPConstrained()
13069 ? Intrinsic::experimental_constrained_rint
13071 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vrndx");
13073 case NEON::BI__builtin_neon_vrndh_f16
: {
13074 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13075 Int
= Builder
.getIsFPConstrained()
13076 ? Intrinsic::experimental_constrained_trunc
13077 : Intrinsic::trunc
;
13078 return EmitNeonCall(CGM
.getIntrinsic(Int
, HalfTy
), Ops
, "vrndz");
13080 case NEON::BI__builtin_neon_vrnd32x_f32
:
13081 case NEON::BI__builtin_neon_vrnd32xq_f32
:
13082 case NEON::BI__builtin_neon_vrnd32x_f64
:
13083 case NEON::BI__builtin_neon_vrnd32xq_f64
: {
13084 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13085 Int
= Intrinsic::aarch64_neon_frint32x
;
13086 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vrnd32x");
13088 case NEON::BI__builtin_neon_vrnd32z_f32
:
13089 case NEON::BI__builtin_neon_vrnd32zq_f32
:
13090 case NEON::BI__builtin_neon_vrnd32z_f64
:
13091 case NEON::BI__builtin_neon_vrnd32zq_f64
: {
13092 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13093 Int
= Intrinsic::aarch64_neon_frint32z
;
13094 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vrnd32z");
13096 case NEON::BI__builtin_neon_vrnd64x_f32
:
13097 case NEON::BI__builtin_neon_vrnd64xq_f32
:
13098 case NEON::BI__builtin_neon_vrnd64x_f64
:
13099 case NEON::BI__builtin_neon_vrnd64xq_f64
: {
13100 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13101 Int
= Intrinsic::aarch64_neon_frint64x
;
13102 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vrnd64x");
13104 case NEON::BI__builtin_neon_vrnd64z_f32
:
13105 case NEON::BI__builtin_neon_vrnd64zq_f32
:
13106 case NEON::BI__builtin_neon_vrnd64z_f64
:
13107 case NEON::BI__builtin_neon_vrnd64zq_f64
: {
13108 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13109 Int
= Intrinsic::aarch64_neon_frint64z
;
13110 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vrnd64z");
13112 case NEON::BI__builtin_neon_vrnd_v
:
13113 case NEON::BI__builtin_neon_vrndq_v
: {
13114 Int
= Builder
.getIsFPConstrained()
13115 ? Intrinsic::experimental_constrained_trunc
13116 : Intrinsic::trunc
;
13117 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vrndz");
13119 case NEON::BI__builtin_neon_vcvt_f64_v
:
13120 case NEON::BI__builtin_neon_vcvtq_f64_v
:
13121 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
13122 Ty
= GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64
, false, quad
));
13123 return usgn
? Builder
.CreateUIToFP(Ops
[0], Ty
, "vcvt")
13124 : Builder
.CreateSIToFP(Ops
[0], Ty
, "vcvt");
13125 case NEON::BI__builtin_neon_vcvt_f64_f32
: {
13126 assert(Type
.getEltType() == NeonTypeFlags::Float64
&& quad
&&
13127 "unexpected vcvt_f64_f32 builtin");
13128 NeonTypeFlags SrcFlag
= NeonTypeFlags(NeonTypeFlags::Float32
, false, false);
13129 Ops
[0] = Builder
.CreateBitCast(Ops
[0], GetNeonType(this, SrcFlag
));
13131 return Builder
.CreateFPExt(Ops
[0], Ty
, "vcvt");
13133 case NEON::BI__builtin_neon_vcvt_f32_f64
: {
13134 assert(Type
.getEltType() == NeonTypeFlags::Float32
&&
13135 "unexpected vcvt_f32_f64 builtin");
13136 NeonTypeFlags SrcFlag
= NeonTypeFlags(NeonTypeFlags::Float64
, false, true);
13137 Ops
[0] = Builder
.CreateBitCast(Ops
[0], GetNeonType(this, SrcFlag
));
13139 return Builder
.CreateFPTrunc(Ops
[0], Ty
, "vcvt");
13141 case NEON::BI__builtin_neon_vcvt_s32_v
:
13142 case NEON::BI__builtin_neon_vcvt_u32_v
:
13143 case NEON::BI__builtin_neon_vcvt_s64_v
:
13144 case NEON::BI__builtin_neon_vcvt_u64_v
:
13145 case NEON::BI__builtin_neon_vcvt_s16_f16
:
13146 case NEON::BI__builtin_neon_vcvt_u16_f16
:
13147 case NEON::BI__builtin_neon_vcvtq_s32_v
:
13148 case NEON::BI__builtin_neon_vcvtq_u32_v
:
13149 case NEON::BI__builtin_neon_vcvtq_s64_v
:
13150 case NEON::BI__builtin_neon_vcvtq_u64_v
:
13151 case NEON::BI__builtin_neon_vcvtq_s16_f16
:
13152 case NEON::BI__builtin_neon_vcvtq_u16_f16
: {
13154 usgn
? Intrinsic::aarch64_neon_fcvtzu
: Intrinsic::aarch64_neon_fcvtzs
;
13155 llvm::Type
*Tys
[2] = {Ty
, GetFloatNeonType(this, Type
)};
13156 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vcvtz");
13158 case NEON::BI__builtin_neon_vcvta_s16_f16
:
13159 case NEON::BI__builtin_neon_vcvta_u16_f16
:
13160 case NEON::BI__builtin_neon_vcvta_s32_v
:
13161 case NEON::BI__builtin_neon_vcvtaq_s16_f16
:
13162 case NEON::BI__builtin_neon_vcvtaq_s32_v
:
13163 case NEON::BI__builtin_neon_vcvta_u32_v
:
13164 case NEON::BI__builtin_neon_vcvtaq_u16_f16
:
13165 case NEON::BI__builtin_neon_vcvtaq_u32_v
:
13166 case NEON::BI__builtin_neon_vcvta_s64_v
:
13167 case NEON::BI__builtin_neon_vcvtaq_s64_v
:
13168 case NEON::BI__builtin_neon_vcvta_u64_v
:
13169 case NEON::BI__builtin_neon_vcvtaq_u64_v
: {
13170 Int
= usgn
? Intrinsic::aarch64_neon_fcvtau
: Intrinsic::aarch64_neon_fcvtas
;
13171 llvm::Type
*Tys
[2] = { Ty
, GetFloatNeonType(this, Type
) };
13172 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vcvta");
13174 case NEON::BI__builtin_neon_vcvtm_s16_f16
:
13175 case NEON::BI__builtin_neon_vcvtm_s32_v
:
13176 case NEON::BI__builtin_neon_vcvtmq_s16_f16
:
13177 case NEON::BI__builtin_neon_vcvtmq_s32_v
:
13178 case NEON::BI__builtin_neon_vcvtm_u16_f16
:
13179 case NEON::BI__builtin_neon_vcvtm_u32_v
:
13180 case NEON::BI__builtin_neon_vcvtmq_u16_f16
:
13181 case NEON::BI__builtin_neon_vcvtmq_u32_v
:
13182 case NEON::BI__builtin_neon_vcvtm_s64_v
:
13183 case NEON::BI__builtin_neon_vcvtmq_s64_v
:
13184 case NEON::BI__builtin_neon_vcvtm_u64_v
:
13185 case NEON::BI__builtin_neon_vcvtmq_u64_v
: {
13186 Int
= usgn
? Intrinsic::aarch64_neon_fcvtmu
: Intrinsic::aarch64_neon_fcvtms
;
13187 llvm::Type
*Tys
[2] = { Ty
, GetFloatNeonType(this, Type
) };
13188 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vcvtm");
13190 case NEON::BI__builtin_neon_vcvtn_s16_f16
:
13191 case NEON::BI__builtin_neon_vcvtn_s32_v
:
13192 case NEON::BI__builtin_neon_vcvtnq_s16_f16
:
13193 case NEON::BI__builtin_neon_vcvtnq_s32_v
:
13194 case NEON::BI__builtin_neon_vcvtn_u16_f16
:
13195 case NEON::BI__builtin_neon_vcvtn_u32_v
:
13196 case NEON::BI__builtin_neon_vcvtnq_u16_f16
:
13197 case NEON::BI__builtin_neon_vcvtnq_u32_v
:
13198 case NEON::BI__builtin_neon_vcvtn_s64_v
:
13199 case NEON::BI__builtin_neon_vcvtnq_s64_v
:
13200 case NEON::BI__builtin_neon_vcvtn_u64_v
:
13201 case NEON::BI__builtin_neon_vcvtnq_u64_v
: {
13202 Int
= usgn
? Intrinsic::aarch64_neon_fcvtnu
: Intrinsic::aarch64_neon_fcvtns
;
13203 llvm::Type
*Tys
[2] = { Ty
, GetFloatNeonType(this, Type
) };
13204 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vcvtn");
13206 case NEON::BI__builtin_neon_vcvtp_s16_f16
:
13207 case NEON::BI__builtin_neon_vcvtp_s32_v
:
13208 case NEON::BI__builtin_neon_vcvtpq_s16_f16
:
13209 case NEON::BI__builtin_neon_vcvtpq_s32_v
:
13210 case NEON::BI__builtin_neon_vcvtp_u16_f16
:
13211 case NEON::BI__builtin_neon_vcvtp_u32_v
:
13212 case NEON::BI__builtin_neon_vcvtpq_u16_f16
:
13213 case NEON::BI__builtin_neon_vcvtpq_u32_v
:
13214 case NEON::BI__builtin_neon_vcvtp_s64_v
:
13215 case NEON::BI__builtin_neon_vcvtpq_s64_v
:
13216 case NEON::BI__builtin_neon_vcvtp_u64_v
:
13217 case NEON::BI__builtin_neon_vcvtpq_u64_v
: {
13218 Int
= usgn
? Intrinsic::aarch64_neon_fcvtpu
: Intrinsic::aarch64_neon_fcvtps
;
13219 llvm::Type
*Tys
[2] = { Ty
, GetFloatNeonType(this, Type
) };
13220 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vcvtp");
13222 case NEON::BI__builtin_neon_vmulx_v
:
13223 case NEON::BI__builtin_neon_vmulxq_v
: {
13224 Int
= Intrinsic::aarch64_neon_fmulx
;
13225 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vmulx");
13227 case NEON::BI__builtin_neon_vmulxh_lane_f16
:
13228 case NEON::BI__builtin_neon_vmulxh_laneq_f16
: {
13229 // vmulx_lane should be mapped to Neon scalar mulx after
13230 // extracting the scalar element
13231 Ops
.push_back(EmitScalarExpr(E
->getArg(2)));
13232 Ops
[1] = Builder
.CreateExtractElement(Ops
[1], Ops
[2], "extract");
13234 Int
= Intrinsic::aarch64_neon_fmulx
;
13235 return EmitNeonCall(CGM
.getIntrinsic(Int
, HalfTy
), Ops
, "vmulx");
13237 case NEON::BI__builtin_neon_vmul_lane_v
:
13238 case NEON::BI__builtin_neon_vmul_laneq_v
: {
13239 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
13241 if (BuiltinID
== NEON::BI__builtin_neon_vmul_laneq_v
)
13243 Ops
[0] = Builder
.CreateBitCast(Ops
[0], DoubleTy
);
13244 llvm::FixedVectorType
*VTy
=
13245 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64
, false, Quad
));
13246 Ops
[1] = Builder
.CreateBitCast(Ops
[1], VTy
);
13247 Ops
[1] = Builder
.CreateExtractElement(Ops
[1], Ops
[2], "extract");
13248 Value
*Result
= Builder
.CreateFMul(Ops
[0], Ops
[1]);
13249 return Builder
.CreateBitCast(Result
, Ty
);
13251 case NEON::BI__builtin_neon_vnegd_s64
:
13252 return Builder
.CreateNeg(EmitScalarExpr(E
->getArg(0)), "vnegd");
13253 case NEON::BI__builtin_neon_vnegh_f16
:
13254 return Builder
.CreateFNeg(EmitScalarExpr(E
->getArg(0)), "vnegh");
13255 case NEON::BI__builtin_neon_vpmaxnm_v
:
13256 case NEON::BI__builtin_neon_vpmaxnmq_v
: {
13257 Int
= Intrinsic::aarch64_neon_fmaxnmp
;
13258 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vpmaxnm");
13260 case NEON::BI__builtin_neon_vpminnm_v
:
13261 case NEON::BI__builtin_neon_vpminnmq_v
: {
13262 Int
= Intrinsic::aarch64_neon_fminnmp
;
13263 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vpminnm");
13265 case NEON::BI__builtin_neon_vsqrth_f16
: {
13266 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13267 Int
= Builder
.getIsFPConstrained()
13268 ? Intrinsic::experimental_constrained_sqrt
13270 return EmitNeonCall(CGM
.getIntrinsic(Int
, HalfTy
), Ops
, "vsqrt");
13272 case NEON::BI__builtin_neon_vsqrt_v
:
13273 case NEON::BI__builtin_neon_vsqrtq_v
: {
13274 Int
= Builder
.getIsFPConstrained()
13275 ? Intrinsic::experimental_constrained_sqrt
13277 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
13278 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vsqrt");
13280 case NEON::BI__builtin_neon_vrbit_v
:
13281 case NEON::BI__builtin_neon_vrbitq_v
: {
13282 Int
= Intrinsic::bitreverse
;
13283 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vrbit");
13285 case NEON::BI__builtin_neon_vaddv_u8
:
13286 // FIXME: These are handled by the AArch64 scalar code.
13289 case NEON::BI__builtin_neon_vaddv_s8
: {
13290 Int
= usgn
? Intrinsic::aarch64_neon_uaddv
: Intrinsic::aarch64_neon_saddv
;
13292 VTy
= llvm::FixedVectorType::get(Int8Ty
, 8);
13293 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13294 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13295 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vaddv");
13296 return Builder
.CreateTrunc(Ops
[0], Int8Ty
);
13298 case NEON::BI__builtin_neon_vaddv_u16
:
13301 case NEON::BI__builtin_neon_vaddv_s16
: {
13302 Int
= usgn
? Intrinsic::aarch64_neon_uaddv
: Intrinsic::aarch64_neon_saddv
;
13304 VTy
= llvm::FixedVectorType::get(Int16Ty
, 4);
13305 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13306 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13307 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vaddv");
13308 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
13310 case NEON::BI__builtin_neon_vaddvq_u8
:
13313 case NEON::BI__builtin_neon_vaddvq_s8
: {
13314 Int
= usgn
? Intrinsic::aarch64_neon_uaddv
: Intrinsic::aarch64_neon_saddv
;
13316 VTy
= llvm::FixedVectorType::get(Int8Ty
, 16);
13317 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13318 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13319 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vaddv");
13320 return Builder
.CreateTrunc(Ops
[0], Int8Ty
);
13322 case NEON::BI__builtin_neon_vaddvq_u16
:
13325 case NEON::BI__builtin_neon_vaddvq_s16
: {
13326 Int
= usgn
? Intrinsic::aarch64_neon_uaddv
: Intrinsic::aarch64_neon_saddv
;
13328 VTy
= llvm::FixedVectorType::get(Int16Ty
, 8);
13329 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13330 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13331 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vaddv");
13332 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
13334 case NEON::BI__builtin_neon_vmaxv_u8
: {
13335 Int
= Intrinsic::aarch64_neon_umaxv
;
13337 VTy
= llvm::FixedVectorType::get(Int8Ty
, 8);
13338 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13339 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13340 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vmaxv");
13341 return Builder
.CreateTrunc(Ops
[0], Int8Ty
);
13343 case NEON::BI__builtin_neon_vmaxv_u16
: {
13344 Int
= Intrinsic::aarch64_neon_umaxv
;
13346 VTy
= llvm::FixedVectorType::get(Int16Ty
, 4);
13347 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13348 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13349 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vmaxv");
13350 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
13352 case NEON::BI__builtin_neon_vmaxvq_u8
: {
13353 Int
= Intrinsic::aarch64_neon_umaxv
;
13355 VTy
= llvm::FixedVectorType::get(Int8Ty
, 16);
13356 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13357 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13358 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vmaxv");
13359 return Builder
.CreateTrunc(Ops
[0], Int8Ty
);
13361 case NEON::BI__builtin_neon_vmaxvq_u16
: {
13362 Int
= Intrinsic::aarch64_neon_umaxv
;
13364 VTy
= llvm::FixedVectorType::get(Int16Ty
, 8);
13365 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13366 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13367 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vmaxv");
13368 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
13370 case NEON::BI__builtin_neon_vmaxv_s8
: {
13371 Int
= Intrinsic::aarch64_neon_smaxv
;
13373 VTy
= llvm::FixedVectorType::get(Int8Ty
, 8);
13374 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13375 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13376 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vmaxv");
13377 return Builder
.CreateTrunc(Ops
[0], Int8Ty
);
13379 case NEON::BI__builtin_neon_vmaxv_s16
: {
13380 Int
= Intrinsic::aarch64_neon_smaxv
;
13382 VTy
= llvm::FixedVectorType::get(Int16Ty
, 4);
13383 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13384 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13385 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vmaxv");
13386 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
13388 case NEON::BI__builtin_neon_vmaxvq_s8
: {
13389 Int
= Intrinsic::aarch64_neon_smaxv
;
13391 VTy
= llvm::FixedVectorType::get(Int8Ty
, 16);
13392 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13393 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13394 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vmaxv");
13395 return Builder
.CreateTrunc(Ops
[0], Int8Ty
);
13397 case NEON::BI__builtin_neon_vmaxvq_s16
: {
13398 Int
= Intrinsic::aarch64_neon_smaxv
;
13400 VTy
= llvm::FixedVectorType::get(Int16Ty
, 8);
13401 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13402 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13403 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vmaxv");
13404 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
13406 case NEON::BI__builtin_neon_vmaxv_f16
: {
13407 Int
= Intrinsic::aarch64_neon_fmaxv
;
13409 VTy
= llvm::FixedVectorType::get(HalfTy
, 4);
13410 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13411 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13412 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vmaxv");
13413 return Builder
.CreateTrunc(Ops
[0], HalfTy
);
13415 case NEON::BI__builtin_neon_vmaxvq_f16
: {
13416 Int
= Intrinsic::aarch64_neon_fmaxv
;
13418 VTy
= llvm::FixedVectorType::get(HalfTy
, 8);
13419 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13420 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13421 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vmaxv");
13422 return Builder
.CreateTrunc(Ops
[0], HalfTy
);
13424 case NEON::BI__builtin_neon_vminv_u8
: {
13425 Int
= Intrinsic::aarch64_neon_uminv
;
13427 VTy
= llvm::FixedVectorType::get(Int8Ty
, 8);
13428 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13429 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13430 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vminv");
13431 return Builder
.CreateTrunc(Ops
[0], Int8Ty
);
13433 case NEON::BI__builtin_neon_vminv_u16
: {
13434 Int
= Intrinsic::aarch64_neon_uminv
;
13436 VTy
= llvm::FixedVectorType::get(Int16Ty
, 4);
13437 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13438 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13439 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vminv");
13440 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
13442 case NEON::BI__builtin_neon_vminvq_u8
: {
13443 Int
= Intrinsic::aarch64_neon_uminv
;
13445 VTy
= llvm::FixedVectorType::get(Int8Ty
, 16);
13446 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13447 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13448 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vminv");
13449 return Builder
.CreateTrunc(Ops
[0], Int8Ty
);
13451 case NEON::BI__builtin_neon_vminvq_u16
: {
13452 Int
= Intrinsic::aarch64_neon_uminv
;
13454 VTy
= llvm::FixedVectorType::get(Int16Ty
, 8);
13455 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13456 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13457 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vminv");
13458 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
13460 case NEON::BI__builtin_neon_vminv_s8
: {
13461 Int
= Intrinsic::aarch64_neon_sminv
;
13463 VTy
= llvm::FixedVectorType::get(Int8Ty
, 8);
13464 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13465 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13466 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vminv");
13467 return Builder
.CreateTrunc(Ops
[0], Int8Ty
);
13469 case NEON::BI__builtin_neon_vminv_s16
: {
13470 Int
= Intrinsic::aarch64_neon_sminv
;
13472 VTy
= llvm::FixedVectorType::get(Int16Ty
, 4);
13473 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13474 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13475 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vminv");
13476 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
13478 case NEON::BI__builtin_neon_vminvq_s8
: {
13479 Int
= Intrinsic::aarch64_neon_sminv
;
13481 VTy
= llvm::FixedVectorType::get(Int8Ty
, 16);
13482 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13483 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13484 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vminv");
13485 return Builder
.CreateTrunc(Ops
[0], Int8Ty
);
13487 case NEON::BI__builtin_neon_vminvq_s16
: {
13488 Int
= Intrinsic::aarch64_neon_sminv
;
13490 VTy
= llvm::FixedVectorType::get(Int16Ty
, 8);
13491 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13492 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13493 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vminv");
13494 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
13496 case NEON::BI__builtin_neon_vminv_f16
: {
13497 Int
= Intrinsic::aarch64_neon_fminv
;
13499 VTy
= llvm::FixedVectorType::get(HalfTy
, 4);
13500 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13501 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13502 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vminv");
13503 return Builder
.CreateTrunc(Ops
[0], HalfTy
);
13505 case NEON::BI__builtin_neon_vminvq_f16
: {
13506 Int
= Intrinsic::aarch64_neon_fminv
;
13508 VTy
= llvm::FixedVectorType::get(HalfTy
, 8);
13509 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13510 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13511 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vminv");
13512 return Builder
.CreateTrunc(Ops
[0], HalfTy
);
13514 case NEON::BI__builtin_neon_vmaxnmv_f16
: {
13515 Int
= Intrinsic::aarch64_neon_fmaxnmv
;
13517 VTy
= llvm::FixedVectorType::get(HalfTy
, 4);
13518 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13519 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13520 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vmaxnmv");
13521 return Builder
.CreateTrunc(Ops
[0], HalfTy
);
13523 case NEON::BI__builtin_neon_vmaxnmvq_f16
: {
13524 Int
= Intrinsic::aarch64_neon_fmaxnmv
;
13526 VTy
= llvm::FixedVectorType::get(HalfTy
, 8);
13527 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13528 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13529 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vmaxnmv");
13530 return Builder
.CreateTrunc(Ops
[0], HalfTy
);
13532 case NEON::BI__builtin_neon_vminnmv_f16
: {
13533 Int
= Intrinsic::aarch64_neon_fminnmv
;
13535 VTy
= llvm::FixedVectorType::get(HalfTy
, 4);
13536 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13537 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13538 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vminnmv");
13539 return Builder
.CreateTrunc(Ops
[0], HalfTy
);
13541 case NEON::BI__builtin_neon_vminnmvq_f16
: {
13542 Int
= Intrinsic::aarch64_neon_fminnmv
;
13544 VTy
= llvm::FixedVectorType::get(HalfTy
, 8);
13545 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13546 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13547 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vminnmv");
13548 return Builder
.CreateTrunc(Ops
[0], HalfTy
);
13550 case NEON::BI__builtin_neon_vmul_n_f64
: {
13551 Ops
[0] = Builder
.CreateBitCast(Ops
[0], DoubleTy
);
13552 Value
*RHS
= Builder
.CreateBitCast(EmitScalarExpr(E
->getArg(1)), DoubleTy
);
13553 return Builder
.CreateFMul(Ops
[0], RHS
);
13555 case NEON::BI__builtin_neon_vaddlv_u8
: {
13556 Int
= Intrinsic::aarch64_neon_uaddlv
;
13558 VTy
= llvm::FixedVectorType::get(Int8Ty
, 8);
13559 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13560 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13561 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vaddlv");
13562 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
13564 case NEON::BI__builtin_neon_vaddlv_u16
: {
13565 Int
= Intrinsic::aarch64_neon_uaddlv
;
13567 VTy
= llvm::FixedVectorType::get(Int16Ty
, 4);
13568 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13569 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13570 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vaddlv");
13572 case NEON::BI__builtin_neon_vaddlvq_u8
: {
13573 Int
= Intrinsic::aarch64_neon_uaddlv
;
13575 VTy
= llvm::FixedVectorType::get(Int8Ty
, 16);
13576 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13577 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13578 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vaddlv");
13579 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
13581 case NEON::BI__builtin_neon_vaddlvq_u16
: {
13582 Int
= Intrinsic::aarch64_neon_uaddlv
;
13584 VTy
= llvm::FixedVectorType::get(Int16Ty
, 8);
13585 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13586 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13587 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vaddlv");
13589 case NEON::BI__builtin_neon_vaddlv_s8
: {
13590 Int
= Intrinsic::aarch64_neon_saddlv
;
13592 VTy
= llvm::FixedVectorType::get(Int8Ty
, 8);
13593 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13594 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13595 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vaddlv");
13596 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
13598 case NEON::BI__builtin_neon_vaddlv_s16
: {
13599 Int
= Intrinsic::aarch64_neon_saddlv
;
13601 VTy
= llvm::FixedVectorType::get(Int16Ty
, 4);
13602 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13603 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13604 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vaddlv");
13606 case NEON::BI__builtin_neon_vaddlvq_s8
: {
13607 Int
= Intrinsic::aarch64_neon_saddlv
;
13609 VTy
= llvm::FixedVectorType::get(Int8Ty
, 16);
13610 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13611 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13612 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vaddlv");
13613 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
13615 case NEON::BI__builtin_neon_vaddlvq_s16
: {
13616 Int
= Intrinsic::aarch64_neon_saddlv
;
13618 VTy
= llvm::FixedVectorType::get(Int16Ty
, 8);
13619 llvm::Type
*Tys
[2] = { Ty
, VTy
};
13620 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
13621 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vaddlv");
13623 case NEON::BI__builtin_neon_vsri_n_v
:
13624 case NEON::BI__builtin_neon_vsriq_n_v
: {
13625 Int
= Intrinsic::aarch64_neon_vsri
;
13626 llvm::Function
*Intrin
= CGM
.getIntrinsic(Int
, Ty
);
13627 return EmitNeonCall(Intrin
, Ops
, "vsri_n");
13629 case NEON::BI__builtin_neon_vsli_n_v
:
13630 case NEON::BI__builtin_neon_vsliq_n_v
: {
13631 Int
= Intrinsic::aarch64_neon_vsli
;
13632 llvm::Function
*Intrin
= CGM
.getIntrinsic(Int
, Ty
);
13633 return EmitNeonCall(Intrin
, Ops
, "vsli_n");
13635 case NEON::BI__builtin_neon_vsra_n_v
:
13636 case NEON::BI__builtin_neon_vsraq_n_v
:
13637 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
13638 Ops
[1] = EmitNeonRShiftImm(Ops
[1], Ops
[2], Ty
, usgn
, "vsra_n");
13639 return Builder
.CreateAdd(Ops
[0], Ops
[1]);
13640 case NEON::BI__builtin_neon_vrsra_n_v
:
13641 case NEON::BI__builtin_neon_vrsraq_n_v
: {
13642 Int
= usgn
? Intrinsic::aarch64_neon_urshl
: Intrinsic::aarch64_neon_srshl
;
13643 SmallVector
<llvm::Value
*,2> TmpOps
;
13644 TmpOps
.push_back(Ops
[1]);
13645 TmpOps
.push_back(Ops
[2]);
13646 Function
* F
= CGM
.getIntrinsic(Int
, Ty
);
13647 llvm::Value
*tmp
= EmitNeonCall(F
, TmpOps
, "vrshr_n", 1, true);
13648 Ops
[0] = Builder
.CreateBitCast(Ops
[0], VTy
);
13649 return Builder
.CreateAdd(Ops
[0], tmp
);
13651 case NEON::BI__builtin_neon_vld1_v
:
13652 case NEON::BI__builtin_neon_vld1q_v
: {
13653 return Builder
.CreateAlignedLoad(VTy
, Ops
[0], PtrOp0
.getAlignment());
13655 case NEON::BI__builtin_neon_vst1_v
:
13656 case NEON::BI__builtin_neon_vst1q_v
:
13657 Ops
[1] = Builder
.CreateBitCast(Ops
[1], VTy
);
13658 return Builder
.CreateAlignedStore(Ops
[1], Ops
[0], PtrOp0
.getAlignment());
13659 case NEON::BI__builtin_neon_vld1_lane_v
:
13660 case NEON::BI__builtin_neon_vld1q_lane_v
: {
13661 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
13662 Ops
[0] = Builder
.CreateAlignedLoad(VTy
->getElementType(), Ops
[0],
13663 PtrOp0
.getAlignment());
13664 return Builder
.CreateInsertElement(Ops
[1], Ops
[0], Ops
[2], "vld1_lane");
13666 case NEON::BI__builtin_neon_vldap1_lane_s64
:
13667 case NEON::BI__builtin_neon_vldap1q_lane_s64
: {
13668 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
13669 llvm::LoadInst
*LI
= Builder
.CreateAlignedLoad(
13670 VTy
->getElementType(), Ops
[0], PtrOp0
.getAlignment());
13671 LI
->setAtomic(llvm::AtomicOrdering::Acquire
);
13673 return Builder
.CreateInsertElement(Ops
[1], Ops
[0], Ops
[2], "vldap1_lane");
13675 case NEON::BI__builtin_neon_vld1_dup_v
:
13676 case NEON::BI__builtin_neon_vld1q_dup_v
: {
13677 Value
*V
= PoisonValue::get(Ty
);
13678 Ops
[0] = Builder
.CreateAlignedLoad(VTy
->getElementType(), Ops
[0],
13679 PtrOp0
.getAlignment());
13680 llvm::Constant
*CI
= ConstantInt::get(Int32Ty
, 0);
13681 Ops
[0] = Builder
.CreateInsertElement(V
, Ops
[0], CI
);
13682 return EmitNeonSplat(Ops
[0], CI
);
13684 case NEON::BI__builtin_neon_vst1_lane_v
:
13685 case NEON::BI__builtin_neon_vst1q_lane_v
:
13686 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
13687 Ops
[1] = Builder
.CreateExtractElement(Ops
[1], Ops
[2]);
13688 return Builder
.CreateAlignedStore(Ops
[1], Ops
[0], PtrOp0
.getAlignment());
13689 case NEON::BI__builtin_neon_vstl1_lane_s64
:
13690 case NEON::BI__builtin_neon_vstl1q_lane_s64
: {
13691 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
13692 Ops
[1] = Builder
.CreateExtractElement(Ops
[1], Ops
[2]);
13693 llvm::StoreInst
*SI
=
13694 Builder
.CreateAlignedStore(Ops
[1], Ops
[0], PtrOp0
.getAlignment());
13695 SI
->setAtomic(llvm::AtomicOrdering::Release
);
13698 case NEON::BI__builtin_neon_vld2_v
:
13699 case NEON::BI__builtin_neon_vld2q_v
: {
13700 llvm::Type
*Tys
[2] = {VTy
, UnqualPtrTy
};
13701 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_neon_ld2
, Tys
);
13702 Ops
[1] = Builder
.CreateCall(F
, Ops
[1], "vld2");
13703 return Builder
.CreateDefaultAlignedStore(Ops
[1], Ops
[0]);
13705 case NEON::BI__builtin_neon_vld3_v
:
13706 case NEON::BI__builtin_neon_vld3q_v
: {
13707 llvm::Type
*Tys
[2] = {VTy
, UnqualPtrTy
};
13708 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_neon_ld3
, Tys
);
13709 Ops
[1] = Builder
.CreateCall(F
, Ops
[1], "vld3");
13710 return Builder
.CreateDefaultAlignedStore(Ops
[1], Ops
[0]);
13712 case NEON::BI__builtin_neon_vld4_v
:
13713 case NEON::BI__builtin_neon_vld4q_v
: {
13714 llvm::Type
*Tys
[2] = {VTy
, UnqualPtrTy
};
13715 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_neon_ld4
, Tys
);
13716 Ops
[1] = Builder
.CreateCall(F
, Ops
[1], "vld4");
13717 return Builder
.CreateDefaultAlignedStore(Ops
[1], Ops
[0]);
13719 case NEON::BI__builtin_neon_vld2_dup_v
:
13720 case NEON::BI__builtin_neon_vld2q_dup_v
: {
13721 llvm::Type
*Tys
[2] = {VTy
, UnqualPtrTy
};
13722 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_neon_ld2r
, Tys
);
13723 Ops
[1] = Builder
.CreateCall(F
, Ops
[1], "vld2");
13724 return Builder
.CreateDefaultAlignedStore(Ops
[1], Ops
[0]);
13726 case NEON::BI__builtin_neon_vld3_dup_v
:
13727 case NEON::BI__builtin_neon_vld3q_dup_v
: {
13728 llvm::Type
*Tys
[2] = {VTy
, UnqualPtrTy
};
13729 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_neon_ld3r
, Tys
);
13730 Ops
[1] = Builder
.CreateCall(F
, Ops
[1], "vld3");
13731 return Builder
.CreateDefaultAlignedStore(Ops
[1], Ops
[0]);
13733 case NEON::BI__builtin_neon_vld4_dup_v
:
13734 case NEON::BI__builtin_neon_vld4q_dup_v
: {
13735 llvm::Type
*Tys
[2] = {VTy
, UnqualPtrTy
};
13736 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_neon_ld4r
, Tys
);
13737 Ops
[1] = Builder
.CreateCall(F
, Ops
[1], "vld4");
13738 return Builder
.CreateDefaultAlignedStore(Ops
[1], Ops
[0]);
13740 case NEON::BI__builtin_neon_vld2_lane_v
:
13741 case NEON::BI__builtin_neon_vld2q_lane_v
: {
13742 llvm::Type
*Tys
[2] = { VTy
, Ops
[1]->getType() };
13743 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_neon_ld2lane
, Tys
);
13744 std::rotate(Ops
.begin() + 1, Ops
.begin() + 2, Ops
.end());
13745 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
13746 Ops
[2] = Builder
.CreateBitCast(Ops
[2], Ty
);
13747 Ops
[3] = Builder
.CreateZExt(Ops
[3], Int64Ty
);
13748 Ops
[1] = Builder
.CreateCall(F
, ArrayRef(Ops
).slice(1), "vld2_lane");
13749 return Builder
.CreateDefaultAlignedStore(Ops
[1], Ops
[0]);
13751 case NEON::BI__builtin_neon_vld3_lane_v
:
13752 case NEON::BI__builtin_neon_vld3q_lane_v
: {
13753 llvm::Type
*Tys
[2] = { VTy
, Ops
[1]->getType() };
13754 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_neon_ld3lane
, Tys
);
13755 std::rotate(Ops
.begin() + 1, Ops
.begin() + 2, Ops
.end());
13756 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
13757 Ops
[2] = Builder
.CreateBitCast(Ops
[2], Ty
);
13758 Ops
[3] = Builder
.CreateBitCast(Ops
[3], Ty
);
13759 Ops
[4] = Builder
.CreateZExt(Ops
[4], Int64Ty
);
13760 Ops
[1] = Builder
.CreateCall(F
, ArrayRef(Ops
).slice(1), "vld3_lane");
13761 return Builder
.CreateDefaultAlignedStore(Ops
[1], Ops
[0]);
13763 case NEON::BI__builtin_neon_vld4_lane_v
:
13764 case NEON::BI__builtin_neon_vld4q_lane_v
: {
13765 llvm::Type
*Tys
[2] = { VTy
, Ops
[1]->getType() };
13766 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_neon_ld4lane
, Tys
);
13767 std::rotate(Ops
.begin() + 1, Ops
.begin() + 2, Ops
.end());
13768 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
13769 Ops
[2] = Builder
.CreateBitCast(Ops
[2], Ty
);
13770 Ops
[3] = Builder
.CreateBitCast(Ops
[3], Ty
);
13771 Ops
[4] = Builder
.CreateBitCast(Ops
[4], Ty
);
13772 Ops
[5] = Builder
.CreateZExt(Ops
[5], Int64Ty
);
13773 Ops
[1] = Builder
.CreateCall(F
, ArrayRef(Ops
).slice(1), "vld4_lane");
13774 return Builder
.CreateDefaultAlignedStore(Ops
[1], Ops
[0]);
13776 case NEON::BI__builtin_neon_vst2_v
:
13777 case NEON::BI__builtin_neon_vst2q_v
: {
13778 std::rotate(Ops
.begin(), Ops
.begin() + 1, Ops
.end());
13779 llvm::Type
*Tys
[2] = { VTy
, Ops
[2]->getType() };
13780 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_st2
, Tys
),
13783 case NEON::BI__builtin_neon_vst2_lane_v
:
13784 case NEON::BI__builtin_neon_vst2q_lane_v
: {
13785 std::rotate(Ops
.begin(), Ops
.begin() + 1, Ops
.end());
13786 Ops
[2] = Builder
.CreateZExt(Ops
[2], Int64Ty
);
13787 llvm::Type
*Tys
[2] = { VTy
, Ops
[3]->getType() };
13788 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_st2lane
, Tys
),
13791 case NEON::BI__builtin_neon_vst3_v
:
13792 case NEON::BI__builtin_neon_vst3q_v
: {
13793 std::rotate(Ops
.begin(), Ops
.begin() + 1, Ops
.end());
13794 llvm::Type
*Tys
[2] = { VTy
, Ops
[3]->getType() };
13795 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_st3
, Tys
),
13798 case NEON::BI__builtin_neon_vst3_lane_v
:
13799 case NEON::BI__builtin_neon_vst3q_lane_v
: {
13800 std::rotate(Ops
.begin(), Ops
.begin() + 1, Ops
.end());
13801 Ops
[3] = Builder
.CreateZExt(Ops
[3], Int64Ty
);
13802 llvm::Type
*Tys
[2] = { VTy
, Ops
[4]->getType() };
13803 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_st3lane
, Tys
),
13806 case NEON::BI__builtin_neon_vst4_v
:
13807 case NEON::BI__builtin_neon_vst4q_v
: {
13808 std::rotate(Ops
.begin(), Ops
.begin() + 1, Ops
.end());
13809 llvm::Type
*Tys
[2] = { VTy
, Ops
[4]->getType() };
13810 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_st4
, Tys
),
13813 case NEON::BI__builtin_neon_vst4_lane_v
:
13814 case NEON::BI__builtin_neon_vst4q_lane_v
: {
13815 std::rotate(Ops
.begin(), Ops
.begin() + 1, Ops
.end());
13816 Ops
[4] = Builder
.CreateZExt(Ops
[4], Int64Ty
);
13817 llvm::Type
*Tys
[2] = { VTy
, Ops
[5]->getType() };
13818 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_st4lane
, Tys
),
13821 case NEON::BI__builtin_neon_vtrn_v
:
13822 case NEON::BI__builtin_neon_vtrnq_v
: {
13823 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
13824 Ops
[2] = Builder
.CreateBitCast(Ops
[2], Ty
);
13825 Value
*SV
= nullptr;
13827 for (unsigned vi
= 0; vi
!= 2; ++vi
) {
13828 SmallVector
<int, 16> Indices
;
13829 for (unsigned i
= 0, e
= VTy
->getNumElements(); i
!= e
; i
+= 2) {
13830 Indices
.push_back(i
+vi
);
13831 Indices
.push_back(i
+e
+vi
);
13833 Value
*Addr
= Builder
.CreateConstInBoundsGEP1_32(Ty
, Ops
[0], vi
);
13834 SV
= Builder
.CreateShuffleVector(Ops
[1], Ops
[2], Indices
, "vtrn");
13835 SV
= Builder
.CreateDefaultAlignedStore(SV
, Addr
);
13839 case NEON::BI__builtin_neon_vuzp_v
:
13840 case NEON::BI__builtin_neon_vuzpq_v
: {
13841 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
13842 Ops
[2] = Builder
.CreateBitCast(Ops
[2], Ty
);
13843 Value
*SV
= nullptr;
13845 for (unsigned vi
= 0; vi
!= 2; ++vi
) {
13846 SmallVector
<int, 16> Indices
;
13847 for (unsigned i
= 0, e
= VTy
->getNumElements(); i
!= e
; ++i
)
13848 Indices
.push_back(2*i
+vi
);
13850 Value
*Addr
= Builder
.CreateConstInBoundsGEP1_32(Ty
, Ops
[0], vi
);
13851 SV
= Builder
.CreateShuffleVector(Ops
[1], Ops
[2], Indices
, "vuzp");
13852 SV
= Builder
.CreateDefaultAlignedStore(SV
, Addr
);
13856 case NEON::BI__builtin_neon_vzip_v
:
13857 case NEON::BI__builtin_neon_vzipq_v
: {
13858 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
13859 Ops
[2] = Builder
.CreateBitCast(Ops
[2], Ty
);
13860 Value
*SV
= nullptr;
13862 for (unsigned vi
= 0; vi
!= 2; ++vi
) {
13863 SmallVector
<int, 16> Indices
;
13864 for (unsigned i
= 0, e
= VTy
->getNumElements(); i
!= e
; i
+= 2) {
13865 Indices
.push_back((i
+ vi
*e
) >> 1);
13866 Indices
.push_back(((i
+ vi
*e
) >> 1)+e
);
13868 Value
*Addr
= Builder
.CreateConstInBoundsGEP1_32(Ty
, Ops
[0], vi
);
13869 SV
= Builder
.CreateShuffleVector(Ops
[1], Ops
[2], Indices
, "vzip");
13870 SV
= Builder
.CreateDefaultAlignedStore(SV
, Addr
);
13874 case NEON::BI__builtin_neon_vqtbl1q_v
: {
13875 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_tbl1
, Ty
),
13878 case NEON::BI__builtin_neon_vqtbl2q_v
: {
13879 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_tbl2
, Ty
),
13882 case NEON::BI__builtin_neon_vqtbl3q_v
: {
13883 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_tbl3
, Ty
),
13886 case NEON::BI__builtin_neon_vqtbl4q_v
: {
13887 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_tbl4
, Ty
),
13890 case NEON::BI__builtin_neon_vqtbx1q_v
: {
13891 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_tbx1
, Ty
),
13894 case NEON::BI__builtin_neon_vqtbx2q_v
: {
13895 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_tbx2
, Ty
),
13898 case NEON::BI__builtin_neon_vqtbx3q_v
: {
13899 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_tbx3
, Ty
),
13902 case NEON::BI__builtin_neon_vqtbx4q_v
: {
13903 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_tbx4
, Ty
),
13906 case NEON::BI__builtin_neon_vsqadd_v
:
13907 case NEON::BI__builtin_neon_vsqaddq_v
: {
13908 Int
= Intrinsic::aarch64_neon_usqadd
;
13909 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vsqadd");
13911 case NEON::BI__builtin_neon_vuqadd_v
:
13912 case NEON::BI__builtin_neon_vuqaddq_v
: {
13913 Int
= Intrinsic::aarch64_neon_suqadd
;
13914 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vuqadd");
13917 case NEON::BI__builtin_neon_vluti2_laneq_bf16
:
13918 case NEON::BI__builtin_neon_vluti2_laneq_f16
:
13919 case NEON::BI__builtin_neon_vluti2_laneq_p16
:
13920 case NEON::BI__builtin_neon_vluti2_laneq_p8
:
13921 case NEON::BI__builtin_neon_vluti2_laneq_s16
:
13922 case NEON::BI__builtin_neon_vluti2_laneq_s8
:
13923 case NEON::BI__builtin_neon_vluti2_laneq_u16
:
13924 case NEON::BI__builtin_neon_vluti2_laneq_u8
: {
13925 Int
= Intrinsic::aarch64_neon_vluti2_laneq
;
13926 llvm::Type
*Tys
[2];
13928 Tys
[1] = GetNeonType(this, NeonTypeFlags(Type
.getEltType(), false,
13929 /*isQuad*/ false));
13930 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vluti2_laneq");
13932 case NEON::BI__builtin_neon_vluti2q_laneq_bf16
:
13933 case NEON::BI__builtin_neon_vluti2q_laneq_f16
:
13934 case NEON::BI__builtin_neon_vluti2q_laneq_p16
:
13935 case NEON::BI__builtin_neon_vluti2q_laneq_p8
:
13936 case NEON::BI__builtin_neon_vluti2q_laneq_s16
:
13937 case NEON::BI__builtin_neon_vluti2q_laneq_s8
:
13938 case NEON::BI__builtin_neon_vluti2q_laneq_u16
:
13939 case NEON::BI__builtin_neon_vluti2q_laneq_u8
: {
13940 Int
= Intrinsic::aarch64_neon_vluti2_laneq
;
13941 llvm::Type
*Tys
[2];
13943 Tys
[1] = GetNeonType(this, NeonTypeFlags(Type
.getEltType(), false,
13945 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vluti2_laneq");
13947 case NEON::BI__builtin_neon_vluti2_lane_bf16
:
13948 case NEON::BI__builtin_neon_vluti2_lane_f16
:
13949 case NEON::BI__builtin_neon_vluti2_lane_p16
:
13950 case NEON::BI__builtin_neon_vluti2_lane_p8
:
13951 case NEON::BI__builtin_neon_vluti2_lane_s16
:
13952 case NEON::BI__builtin_neon_vluti2_lane_s8
:
13953 case NEON::BI__builtin_neon_vluti2_lane_u16
:
13954 case NEON::BI__builtin_neon_vluti2_lane_u8
: {
13955 Int
= Intrinsic::aarch64_neon_vluti2_lane
;
13956 llvm::Type
*Tys
[2];
13958 Tys
[1] = GetNeonType(this, NeonTypeFlags(Type
.getEltType(), false,
13959 /*isQuad*/ false));
13960 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vluti2_lane");
13962 case NEON::BI__builtin_neon_vluti2q_lane_bf16
:
13963 case NEON::BI__builtin_neon_vluti2q_lane_f16
:
13964 case NEON::BI__builtin_neon_vluti2q_lane_p16
:
13965 case NEON::BI__builtin_neon_vluti2q_lane_p8
:
13966 case NEON::BI__builtin_neon_vluti2q_lane_s16
:
13967 case NEON::BI__builtin_neon_vluti2q_lane_s8
:
13968 case NEON::BI__builtin_neon_vluti2q_lane_u16
:
13969 case NEON::BI__builtin_neon_vluti2q_lane_u8
: {
13970 Int
= Intrinsic::aarch64_neon_vluti2_lane
;
13971 llvm::Type
*Tys
[2];
13973 Tys
[1] = GetNeonType(this, NeonTypeFlags(Type
.getEltType(), false,
13975 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vluti2_lane");
13977 case NEON::BI__builtin_neon_vluti4q_lane_p8
:
13978 case NEON::BI__builtin_neon_vluti4q_lane_s8
:
13979 case NEON::BI__builtin_neon_vluti4q_lane_u8
: {
13980 Int
= Intrinsic::aarch64_neon_vluti4q_lane
;
13981 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vluti4q_lane");
13983 case NEON::BI__builtin_neon_vluti4q_laneq_p8
:
13984 case NEON::BI__builtin_neon_vluti4q_laneq_s8
:
13985 case NEON::BI__builtin_neon_vluti4q_laneq_u8
: {
13986 Int
= Intrinsic::aarch64_neon_vluti4q_laneq
;
13987 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vluti4q_laneq");
13989 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2
:
13990 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2
:
13991 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2
:
13992 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2
:
13993 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2
: {
13994 Int
= Intrinsic::aarch64_neon_vluti4q_lane_x2
;
13995 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vluti4q_lane_x2");
13997 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2
:
13998 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2
:
13999 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2
:
14000 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2
:
14001 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2
: {
14002 Int
= Intrinsic::aarch64_neon_vluti4q_laneq_x2
;
14003 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vluti4q_laneq_x2");
14006 case NEON::BI__builtin_neon_vamin_f16
:
14007 case NEON::BI__builtin_neon_vaminq_f16
:
14008 case NEON::BI__builtin_neon_vamin_f32
:
14009 case NEON::BI__builtin_neon_vaminq_f32
:
14010 case NEON::BI__builtin_neon_vaminq_f64
: {
14011 Int
= Intrinsic::aarch64_neon_famin
;
14012 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "famin");
14014 case NEON::BI__builtin_neon_vamax_f16
:
14015 case NEON::BI__builtin_neon_vamaxq_f16
:
14016 case NEON::BI__builtin_neon_vamax_f32
:
14017 case NEON::BI__builtin_neon_vamaxq_f32
:
14018 case NEON::BI__builtin_neon_vamaxq_f64
: {
14019 Int
= Intrinsic::aarch64_neon_famax
;
14020 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "famax");
14022 case NEON::BI__builtin_neon_vscale_f16
:
14023 case NEON::BI__builtin_neon_vscaleq_f16
:
14024 case NEON::BI__builtin_neon_vscale_f32
:
14025 case NEON::BI__builtin_neon_vscaleq_f32
:
14026 case NEON::BI__builtin_neon_vscaleq_f64
: {
14027 Int
= Intrinsic::aarch64_neon_fp8_fscale
;
14028 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "fscale");
14033 Value
*CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID
,
14034 const CallExpr
*E
) {
14035 assert((BuiltinID
== BPF::BI__builtin_preserve_field_info
||
14036 BuiltinID
== BPF::BI__builtin_btf_type_id
||
14037 BuiltinID
== BPF::BI__builtin_preserve_type_info
||
14038 BuiltinID
== BPF::BI__builtin_preserve_enum_value
) &&
14039 "unexpected BPF builtin");
14041 // A sequence number, injected into IR builtin functions, to
14042 // prevent CSE given the only difference of the function
14043 // may just be the debuginfo metadata.
14044 static uint32_t BuiltinSeqNum
;
14046 switch (BuiltinID
) {
14048 llvm_unreachable("Unexpected BPF builtin");
14049 case BPF::BI__builtin_preserve_field_info
: {
14050 const Expr
*Arg
= E
->getArg(0);
14051 bool IsBitField
= Arg
->IgnoreParens()->getObjectKind() == OK_BitField
;
14053 if (!getDebugInfo()) {
14054 CGM
.Error(E
->getExprLoc(),
14055 "using __builtin_preserve_field_info() without -g");
14056 return IsBitField
? EmitLValue(Arg
).getRawBitFieldPointer(*this)
14057 : EmitLValue(Arg
).emitRawPointer(*this);
14060 // Enable underlying preserve_*_access_index() generation.
14061 bool OldIsInPreservedAIRegion
= IsInPreservedAIRegion
;
14062 IsInPreservedAIRegion
= true;
14063 Value
*FieldAddr
= IsBitField
? EmitLValue(Arg
).getRawBitFieldPointer(*this)
14064 : EmitLValue(Arg
).emitRawPointer(*this);
14065 IsInPreservedAIRegion
= OldIsInPreservedAIRegion
;
14067 ConstantInt
*C
= cast
<ConstantInt
>(EmitScalarExpr(E
->getArg(1)));
14068 Value
*InfoKind
= ConstantInt::get(Int64Ty
, C
->getSExtValue());
14070 // Built the IR for the preserve_field_info intrinsic.
14071 llvm::Function
*FnGetFieldInfo
= llvm::Intrinsic::getOrInsertDeclaration(
14072 &CGM
.getModule(), llvm::Intrinsic::bpf_preserve_field_info
,
14073 {FieldAddr
->getType()});
14074 return Builder
.CreateCall(FnGetFieldInfo
, {FieldAddr
, InfoKind
});
14076 case BPF::BI__builtin_btf_type_id
:
14077 case BPF::BI__builtin_preserve_type_info
: {
14078 if (!getDebugInfo()) {
14079 CGM
.Error(E
->getExprLoc(), "using builtin function without -g");
14083 const Expr
*Arg0
= E
->getArg(0);
14084 llvm::DIType
*DbgInfo
= getDebugInfo()->getOrCreateStandaloneType(
14085 Arg0
->getType(), Arg0
->getExprLoc());
14087 ConstantInt
*Flag
= cast
<ConstantInt
>(EmitScalarExpr(E
->getArg(1)));
14088 Value
*FlagValue
= ConstantInt::get(Int64Ty
, Flag
->getSExtValue());
14089 Value
*SeqNumVal
= ConstantInt::get(Int32Ty
, BuiltinSeqNum
++);
14091 llvm::Function
*FnDecl
;
14092 if (BuiltinID
== BPF::BI__builtin_btf_type_id
)
14093 FnDecl
= llvm::Intrinsic::getOrInsertDeclaration(
14094 &CGM
.getModule(), llvm::Intrinsic::bpf_btf_type_id
, {});
14096 FnDecl
= llvm::Intrinsic::getOrInsertDeclaration(
14097 &CGM
.getModule(), llvm::Intrinsic::bpf_preserve_type_info
, {});
14098 CallInst
*Fn
= Builder
.CreateCall(FnDecl
, {SeqNumVal
, FlagValue
});
14099 Fn
->setMetadata(LLVMContext::MD_preserve_access_index
, DbgInfo
);
14102 case BPF::BI__builtin_preserve_enum_value
: {
14103 if (!getDebugInfo()) {
14104 CGM
.Error(E
->getExprLoc(), "using builtin function without -g");
14108 const Expr
*Arg0
= E
->getArg(0);
14109 llvm::DIType
*DbgInfo
= getDebugInfo()->getOrCreateStandaloneType(
14110 Arg0
->getType(), Arg0
->getExprLoc());
14113 const auto *UO
= cast
<UnaryOperator
>(Arg0
->IgnoreParens());
14114 const auto *CE
= cast
<CStyleCastExpr
>(UO
->getSubExpr());
14115 const auto *DR
= cast
<DeclRefExpr
>(CE
->getSubExpr());
14116 const auto *Enumerator
= cast
<EnumConstantDecl
>(DR
->getDecl());
14118 auto InitVal
= Enumerator
->getInitVal();
14119 std::string InitValStr
;
14120 if (InitVal
.isNegative() || InitVal
> uint64_t(INT64_MAX
))
14121 InitValStr
= std::to_string(InitVal
.getSExtValue());
14123 InitValStr
= std::to_string(InitVal
.getZExtValue());
14124 std::string EnumStr
= Enumerator
->getNameAsString() + ":" + InitValStr
;
14125 Value
*EnumStrVal
= Builder
.CreateGlobalString(EnumStr
);
14127 ConstantInt
*Flag
= cast
<ConstantInt
>(EmitScalarExpr(E
->getArg(1)));
14128 Value
*FlagValue
= ConstantInt::get(Int64Ty
, Flag
->getSExtValue());
14129 Value
*SeqNumVal
= ConstantInt::get(Int32Ty
, BuiltinSeqNum
++);
14131 llvm::Function
*IntrinsicFn
= llvm::Intrinsic::getOrInsertDeclaration(
14132 &CGM
.getModule(), llvm::Intrinsic::bpf_preserve_enum_value
, {});
14134 Builder
.CreateCall(IntrinsicFn
, {SeqNumVal
, EnumStrVal
, FlagValue
});
14135 Fn
->setMetadata(LLVMContext::MD_preserve_access_index
, DbgInfo
);
14141 llvm::Value
*CodeGenFunction::
14142 BuildVector(ArrayRef
<llvm::Value
*> Ops
) {
14143 assert((Ops
.size() & (Ops
.size() - 1)) == 0 &&
14144 "Not a power-of-two sized vector!");
14145 bool AllConstants
= true;
14146 for (unsigned i
= 0, e
= Ops
.size(); i
!= e
&& AllConstants
; ++i
)
14147 AllConstants
&= isa
<Constant
>(Ops
[i
]);
14149 // If this is a constant vector, create a ConstantVector.
14150 if (AllConstants
) {
14151 SmallVector
<llvm::Constant
*, 16> CstOps
;
14152 for (unsigned i
= 0, e
= Ops
.size(); i
!= e
; ++i
)
14153 CstOps
.push_back(cast
<Constant
>(Ops
[i
]));
14154 return llvm::ConstantVector::get(CstOps
);
14157 // Otherwise, insertelement the values to build the vector.
14158 Value
*Result
= llvm::PoisonValue::get(
14159 llvm::FixedVectorType::get(Ops
[0]->getType(), Ops
.size()));
14161 for (unsigned i
= 0, e
= Ops
.size(); i
!= e
; ++i
)
14162 Result
= Builder
.CreateInsertElement(Result
, Ops
[i
], Builder
.getInt64(i
));
14167 // Convert the mask from an integer type to a vector of i1.
14168 static Value
*getMaskVecValue(CodeGenFunction
&CGF
, Value
*Mask
,
14169 unsigned NumElts
) {
14171 auto *MaskTy
= llvm::FixedVectorType::get(
14172 CGF
.Builder
.getInt1Ty(),
14173 cast
<IntegerType
>(Mask
->getType())->getBitWidth());
14174 Value
*MaskVec
= CGF
.Builder
.CreateBitCast(Mask
, MaskTy
);
14176 // If we have less than 8 elements, then the starting mask was an i8 and
14177 // we need to extract down to the right number of elements.
14180 for (unsigned i
= 0; i
!= NumElts
; ++i
)
14182 MaskVec
= CGF
.Builder
.CreateShuffleVector(
14183 MaskVec
, MaskVec
, ArrayRef(Indices
, NumElts
), "extract");
14188 static Value
*EmitX86MaskedStore(CodeGenFunction
&CGF
, ArrayRef
<Value
*> Ops
,
14190 Value
*Ptr
= Ops
[0];
14192 Value
*MaskVec
= getMaskVecValue(
14194 cast
<llvm::FixedVectorType
>(Ops
[1]->getType())->getNumElements());
14196 return CGF
.Builder
.CreateMaskedStore(Ops
[1], Ptr
, Alignment
, MaskVec
);
14199 static Value
*EmitX86MaskedLoad(CodeGenFunction
&CGF
, ArrayRef
<Value
*> Ops
,
14201 llvm::Type
*Ty
= Ops
[1]->getType();
14202 Value
*Ptr
= Ops
[0];
14204 Value
*MaskVec
= getMaskVecValue(
14205 CGF
, Ops
[2], cast
<llvm::FixedVectorType
>(Ty
)->getNumElements());
14207 return CGF
.Builder
.CreateMaskedLoad(Ty
, Ptr
, Alignment
, MaskVec
, Ops
[1]);
14210 static Value
*EmitX86ExpandLoad(CodeGenFunction
&CGF
,
14211 ArrayRef
<Value
*> Ops
) {
14212 auto *ResultTy
= cast
<llvm::VectorType
>(Ops
[1]->getType());
14213 Value
*Ptr
= Ops
[0];
14215 Value
*MaskVec
= getMaskVecValue(
14216 CGF
, Ops
[2], cast
<FixedVectorType
>(ResultTy
)->getNumElements());
14218 llvm::Function
*F
= CGF
.CGM
.getIntrinsic(Intrinsic::masked_expandload
,
14220 return CGF
.Builder
.CreateCall(F
, { Ptr
, MaskVec
, Ops
[1] });
14223 static Value
*EmitX86CompressExpand(CodeGenFunction
&CGF
,
14224 ArrayRef
<Value
*> Ops
,
14226 auto *ResultTy
= cast
<llvm::FixedVectorType
>(Ops
[1]->getType());
14228 Value
*MaskVec
= getMaskVecValue(CGF
, Ops
[2], ResultTy
->getNumElements());
14230 Intrinsic::ID IID
= IsCompress
? Intrinsic::x86_avx512_mask_compress
14231 : Intrinsic::x86_avx512_mask_expand
;
14232 llvm::Function
*F
= CGF
.CGM
.getIntrinsic(IID
, ResultTy
);
14233 return CGF
.Builder
.CreateCall(F
, { Ops
[0], Ops
[1], MaskVec
});
14236 static Value
*EmitX86CompressStore(CodeGenFunction
&CGF
,
14237 ArrayRef
<Value
*> Ops
) {
14238 auto *ResultTy
= cast
<llvm::FixedVectorType
>(Ops
[1]->getType());
14239 Value
*Ptr
= Ops
[0];
14241 Value
*MaskVec
= getMaskVecValue(CGF
, Ops
[2], ResultTy
->getNumElements());
14243 llvm::Function
*F
= CGF
.CGM
.getIntrinsic(Intrinsic::masked_compressstore
,
14245 return CGF
.Builder
.CreateCall(F
, { Ops
[1], Ptr
, MaskVec
});
14248 static Value
*EmitX86MaskLogic(CodeGenFunction
&CGF
, Instruction::BinaryOps Opc
,
14249 ArrayRef
<Value
*> Ops
,
14250 bool InvertLHS
= false) {
14251 unsigned NumElts
= Ops
[0]->getType()->getIntegerBitWidth();
14252 Value
*LHS
= getMaskVecValue(CGF
, Ops
[0], NumElts
);
14253 Value
*RHS
= getMaskVecValue(CGF
, Ops
[1], NumElts
);
14256 LHS
= CGF
.Builder
.CreateNot(LHS
);
14258 return CGF
.Builder
.CreateBitCast(CGF
.Builder
.CreateBinOp(Opc
, LHS
, RHS
),
14259 Ops
[0]->getType());
14262 static Value
*EmitX86FunnelShift(CodeGenFunction
&CGF
, Value
*Op0
, Value
*Op1
,
14263 Value
*Amt
, bool IsRight
) {
14264 llvm::Type
*Ty
= Op0
->getType();
14266 // Amount may be scalar immediate, in which case create a splat vector.
14267 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
14268 // we only care about the lowest log2 bits anyway.
14269 if (Amt
->getType() != Ty
) {
14270 unsigned NumElts
= cast
<llvm::FixedVectorType
>(Ty
)->getNumElements();
14271 Amt
= CGF
.Builder
.CreateIntCast(Amt
, Ty
->getScalarType(), false);
14272 Amt
= CGF
.Builder
.CreateVectorSplat(NumElts
, Amt
);
14275 unsigned IID
= IsRight
? Intrinsic::fshr
: Intrinsic::fshl
;
14276 Function
*F
= CGF
.CGM
.getIntrinsic(IID
, Ty
);
14277 return CGF
.Builder
.CreateCall(F
, {Op0
, Op1
, Amt
});
14280 static Value
*EmitX86vpcom(CodeGenFunction
&CGF
, ArrayRef
<Value
*> Ops
,
14282 Value
*Op0
= Ops
[0];
14283 Value
*Op1
= Ops
[1];
14284 llvm::Type
*Ty
= Op0
->getType();
14285 uint64_t Imm
= cast
<llvm::ConstantInt
>(Ops
[2])->getZExtValue() & 0x7;
14287 CmpInst::Predicate Pred
;
14290 Pred
= IsSigned
? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT
;
14293 Pred
= IsSigned
? ICmpInst::ICMP_SLE
: ICmpInst::ICMP_ULE
;
14296 Pred
= IsSigned
? ICmpInst::ICMP_SGT
: ICmpInst::ICMP_UGT
;
14299 Pred
= IsSigned
? ICmpInst::ICMP_SGE
: ICmpInst::ICMP_UGE
;
14302 Pred
= ICmpInst::ICMP_EQ
;
14305 Pred
= ICmpInst::ICMP_NE
;
14308 return llvm::Constant::getNullValue(Ty
); // FALSE
14310 return llvm::Constant::getAllOnesValue(Ty
); // TRUE
14312 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
14315 Value
*Cmp
= CGF
.Builder
.CreateICmp(Pred
, Op0
, Op1
);
14316 Value
*Res
= CGF
.Builder
.CreateSExt(Cmp
, Ty
);
14320 static Value
*EmitX86Select(CodeGenFunction
&CGF
,
14321 Value
*Mask
, Value
*Op0
, Value
*Op1
) {
14323 // If the mask is all ones just return first argument.
14324 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
14325 if (C
->isAllOnesValue())
14328 Mask
= getMaskVecValue(
14329 CGF
, Mask
, cast
<llvm::FixedVectorType
>(Op0
->getType())->getNumElements());
14331 return CGF
.Builder
.CreateSelect(Mask
, Op0
, Op1
);
14334 static Value
*EmitX86ScalarSelect(CodeGenFunction
&CGF
,
14335 Value
*Mask
, Value
*Op0
, Value
*Op1
) {
14336 // If the mask is all ones just return first argument.
14337 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
14338 if (C
->isAllOnesValue())
14341 auto *MaskTy
= llvm::FixedVectorType::get(
14342 CGF
.Builder
.getInt1Ty(), Mask
->getType()->getIntegerBitWidth());
14343 Mask
= CGF
.Builder
.CreateBitCast(Mask
, MaskTy
);
14344 Mask
= CGF
.Builder
.CreateExtractElement(Mask
, (uint64_t)0);
14345 return CGF
.Builder
.CreateSelect(Mask
, Op0
, Op1
);
14348 static Value
*EmitX86MaskedCompareResult(CodeGenFunction
&CGF
, Value
*Cmp
,
14349 unsigned NumElts
, Value
*MaskIn
) {
14351 const auto *C
= dyn_cast
<Constant
>(MaskIn
);
14352 if (!C
|| !C
->isAllOnesValue())
14353 Cmp
= CGF
.Builder
.CreateAnd(Cmp
, getMaskVecValue(CGF
, MaskIn
, NumElts
));
14358 for (unsigned i
= 0; i
!= NumElts
; ++i
)
14360 for (unsigned i
= NumElts
; i
!= 8; ++i
)
14361 Indices
[i
] = i
% NumElts
+ NumElts
;
14362 Cmp
= CGF
.Builder
.CreateShuffleVector(
14363 Cmp
, llvm::Constant::getNullValue(Cmp
->getType()), Indices
);
14366 return CGF
.Builder
.CreateBitCast(Cmp
,
14367 IntegerType::get(CGF
.getLLVMContext(),
14368 std::max(NumElts
, 8U)));
14371 static Value
*EmitX86MaskedCompare(CodeGenFunction
&CGF
, unsigned CC
,
14372 bool Signed
, ArrayRef
<Value
*> Ops
) {
14373 assert((Ops
.size() == 2 || Ops
.size() == 4) &&
14374 "Unexpected number of arguments");
14376 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
14380 Cmp
= Constant::getNullValue(
14381 llvm::FixedVectorType::get(CGF
.Builder
.getInt1Ty(), NumElts
));
14382 } else if (CC
== 7) {
14383 Cmp
= Constant::getAllOnesValue(
14384 llvm::FixedVectorType::get(CGF
.Builder
.getInt1Ty(), NumElts
));
14386 ICmpInst::Predicate Pred
;
14388 default: llvm_unreachable("Unknown condition code");
14389 case 0: Pred
= ICmpInst::ICMP_EQ
; break;
14390 case 1: Pred
= Signed
? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT
; break;
14391 case 2: Pred
= Signed
? ICmpInst::ICMP_SLE
: ICmpInst::ICMP_ULE
; break;
14392 case 4: Pred
= ICmpInst::ICMP_NE
; break;
14393 case 5: Pred
= Signed
? ICmpInst::ICMP_SGE
: ICmpInst::ICMP_UGE
; break;
14394 case 6: Pred
= Signed
? ICmpInst::ICMP_SGT
: ICmpInst::ICMP_UGT
; break;
14396 Cmp
= CGF
.Builder
.CreateICmp(Pred
, Ops
[0], Ops
[1]);
14399 Value
*MaskIn
= nullptr;
14400 if (Ops
.size() == 4)
14403 return EmitX86MaskedCompareResult(CGF
, Cmp
, NumElts
, MaskIn
);
14406 static Value
*EmitX86ConvertToMask(CodeGenFunction
&CGF
, Value
*In
) {
14407 Value
*Zero
= Constant::getNullValue(In
->getType());
14408 return EmitX86MaskedCompare(CGF
, 1, true, { In
, Zero
});
14411 static Value
*EmitX86ConvertIntToFp(CodeGenFunction
&CGF
, const CallExpr
*E
,
14412 ArrayRef
<Value
*> Ops
, bool IsSigned
) {
14413 unsigned Rnd
= cast
<llvm::ConstantInt
>(Ops
[3])->getZExtValue();
14414 llvm::Type
*Ty
= Ops
[1]->getType();
14418 Intrinsic::ID IID
= IsSigned
? Intrinsic::x86_avx512_sitofp_round
14419 : Intrinsic::x86_avx512_uitofp_round
;
14420 Function
*F
= CGF
.CGM
.getIntrinsic(IID
, { Ty
, Ops
[0]->getType() });
14421 Res
= CGF
.Builder
.CreateCall(F
, { Ops
[0], Ops
[3] });
14423 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(CGF
, E
);
14424 Res
= IsSigned
? CGF
.Builder
.CreateSIToFP(Ops
[0], Ty
)
14425 : CGF
.Builder
.CreateUIToFP(Ops
[0], Ty
);
14428 return EmitX86Select(CGF
, Ops
[2], Res
, Ops
[1]);
14431 // Lowers X86 FMA intrinsics to IR.
14432 static Value
*EmitX86FMAExpr(CodeGenFunction
&CGF
, const CallExpr
*E
,
14433 ArrayRef
<Value
*> Ops
, unsigned BuiltinID
,
14436 bool Subtract
= false;
14437 Intrinsic::ID IID
= Intrinsic::not_intrinsic
;
14438 switch (BuiltinID
) {
14440 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3
:
14443 case clang::X86::BI__builtin_ia32_vfmaddph512_mask
:
14444 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz
:
14445 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3
:
14446 IID
= llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512
;
14448 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3
:
14451 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask
:
14452 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz
:
14453 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3
:
14454 IID
= llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512
;
14456 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3
:
14459 case clang::X86::BI__builtin_ia32_vfmaddps512_mask
:
14460 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz
:
14461 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3
:
14462 IID
= llvm::Intrinsic::x86_avx512_vfmadd_ps_512
; break;
14463 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3
:
14466 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask
:
14467 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz
:
14468 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3
:
14469 IID
= llvm::Intrinsic::x86_avx512_vfmadd_pd_512
; break;
14470 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3
:
14473 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask
:
14474 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz
:
14475 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3
:
14476 IID
= llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512
;
14478 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3
:
14481 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask
:
14482 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz
:
14483 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3
:
14484 IID
= llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512
;
14486 case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3
:
14489 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask
:
14490 case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz
:
14491 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3
:
14492 IID
= llvm::Intrinsic::x86_avx10_vfmaddph256
;
14494 case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3
:
14497 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask
:
14498 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz
:
14499 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3
:
14500 IID
= llvm::Intrinsic::x86_avx10_vfmaddsubph256
;
14502 case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3
:
14505 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask
:
14506 case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz
:
14507 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3
:
14508 IID
= llvm::Intrinsic::x86_avx10_vfmaddps256
;
14510 case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3
:
14513 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask
:
14514 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz
:
14515 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3
:
14516 IID
= llvm::Intrinsic::x86_avx10_vfmaddpd256
;
14518 case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3
:
14521 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask
:
14522 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz
:
14523 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3
:
14524 IID
= llvm::Intrinsic::x86_avx10_vfmaddsubps256
;
14526 case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3
:
14529 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask
:
14530 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz
:
14531 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3
:
14532 IID
= llvm::Intrinsic::x86_avx10_vfmaddsubpd256
;
14541 C
= CGF
.Builder
.CreateFNeg(C
);
14545 // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
14546 if (IID
!= Intrinsic::not_intrinsic
&&
14547 (cast
<llvm::ConstantInt
>(Ops
.back())->getZExtValue() != (uint64_t)4 ||
14549 Function
*Intr
= CGF
.CGM
.getIntrinsic(IID
);
14550 Res
= CGF
.Builder
.CreateCall(Intr
, {A
, B
, C
, Ops
.back() });
14552 llvm::Type
*Ty
= A
->getType();
14554 if (CGF
.Builder
.getIsFPConstrained()) {
14555 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(CGF
, E
);
14556 FMA
= CGF
.CGM
.getIntrinsic(Intrinsic::experimental_constrained_fma
, Ty
);
14557 Res
= CGF
.Builder
.CreateConstrainedFPCall(FMA
, {A
, B
, C
});
14559 FMA
= CGF
.CGM
.getIntrinsic(Intrinsic::fma
, Ty
);
14560 Res
= CGF
.Builder
.CreateCall(FMA
, {A
, B
, C
});
14564 // Handle any required masking.
14565 Value
*MaskFalseVal
= nullptr;
14566 switch (BuiltinID
) {
14567 case clang::X86::BI__builtin_ia32_vfmaddph512_mask
:
14568 case clang::X86::BI__builtin_ia32_vfmaddps512_mask
:
14569 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask
:
14570 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask
:
14571 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask
:
14572 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask
:
14573 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask
:
14574 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask
:
14575 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask
:
14576 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask
:
14577 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask
:
14578 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask
:
14579 MaskFalseVal
= Ops
[0];
14581 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz
:
14582 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz
:
14583 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz
:
14584 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz
:
14585 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz
:
14586 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz
:
14587 case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz
:
14588 case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz
:
14589 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz
:
14590 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz
:
14591 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz
:
14592 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz
:
14593 MaskFalseVal
= Constant::getNullValue(Ops
[0]->getType());
14595 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3
:
14596 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3
:
14597 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3
:
14598 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3
:
14599 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3
:
14600 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3
:
14601 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3
:
14602 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3
:
14603 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3
:
14604 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3
:
14605 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3
:
14606 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3
:
14607 case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3
:
14608 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3
:
14609 case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3
:
14610 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3
:
14611 case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3
:
14612 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3
:
14613 case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3
:
14614 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3
:
14615 case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3
:
14616 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3
:
14617 case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3
:
14618 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3
:
14619 MaskFalseVal
= Ops
[2];
14624 return EmitX86Select(CGF
, Ops
[3], Res
, MaskFalseVal
);
14629 static Value
*EmitScalarFMAExpr(CodeGenFunction
&CGF
, const CallExpr
*E
,
14630 MutableArrayRef
<Value
*> Ops
, Value
*Upper
,
14631 bool ZeroMask
= false, unsigned PTIdx
= 0,
14632 bool NegAcc
= false) {
14634 if (Ops
.size() > 4)
14635 Rnd
= cast
<llvm::ConstantInt
>(Ops
[4])->getZExtValue();
14638 Ops
[2] = CGF
.Builder
.CreateFNeg(Ops
[2]);
14640 Ops
[0] = CGF
.Builder
.CreateExtractElement(Ops
[0], (uint64_t)0);
14641 Ops
[1] = CGF
.Builder
.CreateExtractElement(Ops
[1], (uint64_t)0);
14642 Ops
[2] = CGF
.Builder
.CreateExtractElement(Ops
[2], (uint64_t)0);
14647 switch (Ops
[0]->getType()->getPrimitiveSizeInBits()) {
14649 IID
= Intrinsic::x86_avx512fp16_vfmadd_f16
;
14652 IID
= Intrinsic::x86_avx512_vfmadd_f32
;
14655 IID
= Intrinsic::x86_avx512_vfmadd_f64
;
14658 llvm_unreachable("Unexpected size");
14660 Res
= CGF
.Builder
.CreateCall(CGF
.CGM
.getIntrinsic(IID
),
14661 {Ops
[0], Ops
[1], Ops
[2], Ops
[4]});
14662 } else if (CGF
.Builder
.getIsFPConstrained()) {
14663 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(CGF
, E
);
14664 Function
*FMA
= CGF
.CGM
.getIntrinsic(
14665 Intrinsic::experimental_constrained_fma
, Ops
[0]->getType());
14666 Res
= CGF
.Builder
.CreateConstrainedFPCall(FMA
, Ops
.slice(0, 3));
14668 Function
*FMA
= CGF
.CGM
.getIntrinsic(Intrinsic::fma
, Ops
[0]->getType());
14669 Res
= CGF
.Builder
.CreateCall(FMA
, Ops
.slice(0, 3));
14671 // If we have more than 3 arguments, we need to do masking.
14672 if (Ops
.size() > 3) {
14673 Value
*PassThru
= ZeroMask
? Constant::getNullValue(Res
->getType())
14676 // If we negated the accumulator and the its the PassThru value we need to
14677 // bypass the negate. Conveniently Upper should be the same thing in this
14679 if (NegAcc
&& PTIdx
== 2)
14680 PassThru
= CGF
.Builder
.CreateExtractElement(Upper
, (uint64_t)0);
14682 Res
= EmitX86ScalarSelect(CGF
, Ops
[3], Res
, PassThru
);
14684 return CGF
.Builder
.CreateInsertElement(Upper
, Res
, (uint64_t)0);
14687 static Value
*EmitX86Muldq(CodeGenFunction
&CGF
, bool IsSigned
,
14688 ArrayRef
<Value
*> Ops
) {
14689 llvm::Type
*Ty
= Ops
[0]->getType();
14690 // Arguments have a vXi32 type so cast to vXi64.
14691 Ty
= llvm::FixedVectorType::get(CGF
.Int64Ty
,
14692 Ty
->getPrimitiveSizeInBits() / 64);
14693 Value
*LHS
= CGF
.Builder
.CreateBitCast(Ops
[0], Ty
);
14694 Value
*RHS
= CGF
.Builder
.CreateBitCast(Ops
[1], Ty
);
14697 // Shift left then arithmetic shift right.
14698 Constant
*ShiftAmt
= ConstantInt::get(Ty
, 32);
14699 LHS
= CGF
.Builder
.CreateShl(LHS
, ShiftAmt
);
14700 LHS
= CGF
.Builder
.CreateAShr(LHS
, ShiftAmt
);
14701 RHS
= CGF
.Builder
.CreateShl(RHS
, ShiftAmt
);
14702 RHS
= CGF
.Builder
.CreateAShr(RHS
, ShiftAmt
);
14704 // Clear the upper bits.
14705 Constant
*Mask
= ConstantInt::get(Ty
, 0xffffffff);
14706 LHS
= CGF
.Builder
.CreateAnd(LHS
, Mask
);
14707 RHS
= CGF
.Builder
.CreateAnd(RHS
, Mask
);
14710 return CGF
.Builder
.CreateMul(LHS
, RHS
);
14713 // Emit a masked pternlog intrinsic. This only exists because the header has to
14714 // use a macro and we aren't able to pass the input argument to a pternlog
14715 // builtin and a select builtin without evaluating it twice.
14716 static Value
*EmitX86Ternlog(CodeGenFunction
&CGF
, bool ZeroMask
,
14717 ArrayRef
<Value
*> Ops
) {
14718 llvm::Type
*Ty
= Ops
[0]->getType();
14720 unsigned VecWidth
= Ty
->getPrimitiveSizeInBits();
14721 unsigned EltWidth
= Ty
->getScalarSizeInBits();
14723 if (VecWidth
== 128 && EltWidth
== 32)
14724 IID
= Intrinsic::x86_avx512_pternlog_d_128
;
14725 else if (VecWidth
== 256 && EltWidth
== 32)
14726 IID
= Intrinsic::x86_avx512_pternlog_d_256
;
14727 else if (VecWidth
== 512 && EltWidth
== 32)
14728 IID
= Intrinsic::x86_avx512_pternlog_d_512
;
14729 else if (VecWidth
== 128 && EltWidth
== 64)
14730 IID
= Intrinsic::x86_avx512_pternlog_q_128
;
14731 else if (VecWidth
== 256 && EltWidth
== 64)
14732 IID
= Intrinsic::x86_avx512_pternlog_q_256
;
14733 else if (VecWidth
== 512 && EltWidth
== 64)
14734 IID
= Intrinsic::x86_avx512_pternlog_q_512
;
14736 llvm_unreachable("Unexpected intrinsic");
14738 Value
*Ternlog
= CGF
.Builder
.CreateCall(CGF
.CGM
.getIntrinsic(IID
),
14740 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(Ty
) : Ops
[0];
14741 return EmitX86Select(CGF
, Ops
[4], Ternlog
, PassThru
);
14744 static Value
*EmitX86SExtMask(CodeGenFunction
&CGF
, Value
*Op
,
14745 llvm::Type
*DstTy
) {
14746 unsigned NumberOfElements
=
14747 cast
<llvm::FixedVectorType
>(DstTy
)->getNumElements();
14748 Value
*Mask
= getMaskVecValue(CGF
, Op
, NumberOfElements
);
14749 return CGF
.Builder
.CreateSExt(Mask
, DstTy
, "vpmovm2");
14752 Value
*CodeGenFunction::EmitX86CpuIs(const CallExpr
*E
) {
14753 const Expr
*CPUExpr
= E
->getArg(0)->IgnoreParenCasts();
14754 StringRef CPUStr
= cast
<clang::StringLiteral
>(CPUExpr
)->getString();
14755 return EmitX86CpuIs(CPUStr
);
14758 // Convert F16 halfs to floats.
14759 static Value
*EmitX86CvtF16ToFloatExpr(CodeGenFunction
&CGF
,
14760 ArrayRef
<Value
*> Ops
,
14761 llvm::Type
*DstTy
) {
14762 assert((Ops
.size() == 1 || Ops
.size() == 3 || Ops
.size() == 4) &&
14763 "Unknown cvtph2ps intrinsic");
14765 // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
14766 if (Ops
.size() == 4 && cast
<llvm::ConstantInt
>(Ops
[3])->getZExtValue() != 4) {
14768 CGF
.CGM
.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512
);
14769 return CGF
.Builder
.CreateCall(F
, {Ops
[0], Ops
[1], Ops
[2], Ops
[3]});
14772 unsigned NumDstElts
= cast
<llvm::FixedVectorType
>(DstTy
)->getNumElements();
14773 Value
*Src
= Ops
[0];
14775 // Extract the subvector.
14777 cast
<llvm::FixedVectorType
>(Src
->getType())->getNumElements()) {
14778 assert(NumDstElts
== 4 && "Unexpected vector size");
14779 Src
= CGF
.Builder
.CreateShuffleVector(Src
, ArrayRef
<int>{0, 1, 2, 3});
14782 // Bitcast from vXi16 to vXf16.
14783 auto *HalfTy
= llvm::FixedVectorType::get(
14784 llvm::Type::getHalfTy(CGF
.getLLVMContext()), NumDstElts
);
14785 Src
= CGF
.Builder
.CreateBitCast(Src
, HalfTy
);
14787 // Perform the fp-extension.
14788 Value
*Res
= CGF
.Builder
.CreateFPExt(Src
, DstTy
, "cvtph2ps");
14790 if (Ops
.size() >= 3)
14791 Res
= EmitX86Select(CGF
, Ops
[2], Res
, Ops
[1]);
14795 Value
*CodeGenFunction::EmitX86CpuIs(StringRef CPUStr
) {
14797 llvm::Type
*Int32Ty
= Builder
.getInt32Ty();
14799 // Matching the struct layout from the compiler-rt/libgcc structure that is
14801 // unsigned int __cpu_vendor;
14802 // unsigned int __cpu_type;
14803 // unsigned int __cpu_subtype;
14804 // unsigned int __cpu_features[1];
14805 llvm::Type
*STy
= llvm::StructType::get(Int32Ty
, Int32Ty
, Int32Ty
,
14806 llvm::ArrayType::get(Int32Ty
, 1));
14808 // Grab the global __cpu_model.
14809 llvm::Constant
*CpuModel
= CGM
.CreateRuntimeVariable(STy
, "__cpu_model");
14810 cast
<llvm::GlobalValue
>(CpuModel
)->setDSOLocal(true);
14812 // Calculate the index needed to access the correct field based on the
14813 // range. Also adjust the expected value.
14816 std::tie(Index
, Value
) = StringSwitch
<std::pair
<unsigned, unsigned>>(CPUStr
)
14817 #define X86_VENDOR(ENUM, STRING) \
14818 .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
14819 #define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \
14820 .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14821 #define X86_CPU_TYPE(ENUM, STR) \
14822 .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14823 #define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \
14824 .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14825 #define X86_CPU_SUBTYPE(ENUM, STR) \
14826 .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14827 #include "llvm/TargetParser/X86TargetParser.def"
14829 assert(Value
!= 0 && "Invalid CPUStr passed to CpuIs");
14831 // Grab the appropriate field from __cpu_model.
14832 llvm::Value
*Idxs
[] = {ConstantInt::get(Int32Ty
, 0),
14833 ConstantInt::get(Int32Ty
, Index
)};
14834 llvm::Value
*CpuValue
= Builder
.CreateInBoundsGEP(STy
, CpuModel
, Idxs
);
14835 CpuValue
= Builder
.CreateAlignedLoad(Int32Ty
, CpuValue
,
14836 CharUnits::fromQuantity(4));
14838 // Check the value of the field against the requested value.
14839 return Builder
.CreateICmpEQ(CpuValue
,
14840 llvm::ConstantInt::get(Int32Ty
, Value
));
14843 Value
*CodeGenFunction::EmitX86CpuSupports(const CallExpr
*E
) {
14844 const Expr
*FeatureExpr
= E
->getArg(0)->IgnoreParenCasts();
14845 StringRef FeatureStr
= cast
<StringLiteral
>(FeatureExpr
)->getString();
14846 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr
))
14847 return Builder
.getFalse();
14848 return EmitX86CpuSupports(FeatureStr
);
14851 Value
*CodeGenFunction::EmitX86CpuSupports(ArrayRef
<StringRef
> FeatureStrs
) {
14852 return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs
));
14856 CodeGenFunction::EmitX86CpuSupports(std::array
<uint32_t, 4> FeatureMask
) {
14857 Value
*Result
= Builder
.getTrue();
14858 if (FeatureMask
[0] != 0) {
14859 // Matching the struct layout from the compiler-rt/libgcc structure that is
14861 // unsigned int __cpu_vendor;
14862 // unsigned int __cpu_type;
14863 // unsigned int __cpu_subtype;
14864 // unsigned int __cpu_features[1];
14865 llvm::Type
*STy
= llvm::StructType::get(Int32Ty
, Int32Ty
, Int32Ty
,
14866 llvm::ArrayType::get(Int32Ty
, 1));
14868 // Grab the global __cpu_model.
14869 llvm::Constant
*CpuModel
= CGM
.CreateRuntimeVariable(STy
, "__cpu_model");
14870 cast
<llvm::GlobalValue
>(CpuModel
)->setDSOLocal(true);
14872 // Grab the first (0th) element from the field __cpu_features off of the
14873 // global in the struct STy.
14874 Value
*Idxs
[] = {Builder
.getInt32(0), Builder
.getInt32(3),
14875 Builder
.getInt32(0)};
14876 Value
*CpuFeatures
= Builder
.CreateInBoundsGEP(STy
, CpuModel
, Idxs
);
14877 Value
*Features
= Builder
.CreateAlignedLoad(Int32Ty
, CpuFeatures
,
14878 CharUnits::fromQuantity(4));
14880 // Check the value of the bit corresponding to the feature requested.
14881 Value
*Mask
= Builder
.getInt32(FeatureMask
[0]);
14882 Value
*Bitset
= Builder
.CreateAnd(Features
, Mask
);
14883 Value
*Cmp
= Builder
.CreateICmpEQ(Bitset
, Mask
);
14884 Result
= Builder
.CreateAnd(Result
, Cmp
);
14887 llvm::Type
*ATy
= llvm::ArrayType::get(Int32Ty
, 3);
14888 llvm::Constant
*CpuFeatures2
=
14889 CGM
.CreateRuntimeVariable(ATy
, "__cpu_features2");
14890 cast
<llvm::GlobalValue
>(CpuFeatures2
)->setDSOLocal(true);
14891 for (int i
= 1; i
!= 4; ++i
) {
14892 const uint32_t M
= FeatureMask
[i
];
14895 Value
*Idxs
[] = {Builder
.getInt32(0), Builder
.getInt32(i
- 1)};
14896 Value
*Features
= Builder
.CreateAlignedLoad(
14897 Int32Ty
, Builder
.CreateInBoundsGEP(ATy
, CpuFeatures2
, Idxs
),
14898 CharUnits::fromQuantity(4));
14899 // Check the value of the bit corresponding to the feature requested.
14900 Value
*Mask
= Builder
.getInt32(M
);
14901 Value
*Bitset
= Builder
.CreateAnd(Features
, Mask
);
14902 Value
*Cmp
= Builder
.CreateICmpEQ(Bitset
, Mask
);
14903 Result
= Builder
.CreateAnd(Result
, Cmp
);
14909 Value
*CodeGenFunction::EmitAArch64CpuInit() {
14910 llvm::FunctionType
*FTy
= llvm::FunctionType::get(VoidTy
, false);
14911 llvm::FunctionCallee Func
=
14912 CGM
.CreateRuntimeFunction(FTy
, "__init_cpu_features_resolver");
14913 cast
<llvm::GlobalValue
>(Func
.getCallee())->setDSOLocal(true);
14914 cast
<llvm::GlobalValue
>(Func
.getCallee())
14915 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass
);
14916 return Builder
.CreateCall(Func
);
14919 Value
*CodeGenFunction::EmitRISCVCpuInit() {
14920 llvm::FunctionType
*FTy
= llvm::FunctionType::get(VoidTy
, {VoidPtrTy
}, false);
14921 llvm::FunctionCallee Func
=
14922 CGM
.CreateRuntimeFunction(FTy
, "__init_riscv_feature_bits");
14923 auto *CalleeGV
= cast
<llvm::GlobalValue
>(Func
.getCallee());
14924 CalleeGV
->setDSOLocal(true);
14925 CalleeGV
->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass
);
14926 return Builder
.CreateCall(Func
, {llvm::ConstantPointerNull::get(VoidPtrTy
)});
14929 Value
*CodeGenFunction::EmitX86CpuInit() {
14930 llvm::FunctionType
*FTy
= llvm::FunctionType::get(VoidTy
,
14931 /*Variadic*/ false);
14932 llvm::FunctionCallee Func
=
14933 CGM
.CreateRuntimeFunction(FTy
, "__cpu_indicator_init");
14934 cast
<llvm::GlobalValue
>(Func
.getCallee())->setDSOLocal(true);
14935 cast
<llvm::GlobalValue
>(Func
.getCallee())
14936 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass
);
14937 return Builder
.CreateCall(Func
);
14940 Value
*CodeGenFunction::EmitAArch64CpuSupports(const CallExpr
*E
) {
14941 const Expr
*ArgExpr
= E
->getArg(0)->IgnoreParenCasts();
14942 StringRef ArgStr
= cast
<StringLiteral
>(ArgExpr
)->getString();
14943 llvm::SmallVector
<StringRef
, 8> Features
;
14944 ArgStr
.split(Features
, "+");
14945 for (auto &Feature
: Features
) {
14946 Feature
= Feature
.trim();
14947 if (!llvm::AArch64::parseFMVExtension(Feature
))
14948 return Builder
.getFalse();
14949 if (Feature
!= "default")
14950 Features
.push_back(Feature
);
14952 return EmitAArch64CpuSupports(Features
);
14956 CodeGenFunction::EmitAArch64CpuSupports(ArrayRef
<StringRef
> FeaturesStrs
) {
14957 uint64_t FeaturesMask
= llvm::AArch64::getCpuSupportsMask(FeaturesStrs
);
14958 Value
*Result
= Builder
.getTrue();
14959 if (FeaturesMask
!= 0) {
14960 // Get features from structure in runtime library
14962 // unsigned long long features;
14963 // } __aarch64_cpu_features;
14964 llvm::Type
*STy
= llvm::StructType::get(Int64Ty
);
14965 llvm::Constant
*AArch64CPUFeatures
=
14966 CGM
.CreateRuntimeVariable(STy
, "__aarch64_cpu_features");
14967 cast
<llvm::GlobalValue
>(AArch64CPUFeatures
)->setDSOLocal(true);
14968 llvm::Value
*CpuFeatures
= Builder
.CreateGEP(
14969 STy
, AArch64CPUFeatures
,
14970 {ConstantInt::get(Int32Ty
, 0), ConstantInt::get(Int32Ty
, 0)});
14971 Value
*Features
= Builder
.CreateAlignedLoad(Int64Ty
, CpuFeatures
,
14972 CharUnits::fromQuantity(8));
14973 Value
*Mask
= Builder
.getInt64(FeaturesMask
);
14974 Value
*Bitset
= Builder
.CreateAnd(Features
, Mask
);
14975 Value
*Cmp
= Builder
.CreateICmpEQ(Bitset
, Mask
);
14976 Result
= Builder
.CreateAnd(Result
, Cmp
);
14981 Value
*CodeGenFunction::EmitRISCVCpuSupports(const CallExpr
*E
) {
14983 const Expr
*FeatureExpr
= E
->getArg(0)->IgnoreParenCasts();
14984 StringRef FeatureStr
= cast
<StringLiteral
>(FeatureExpr
)->getString();
14985 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr
))
14986 return Builder
.getFalse();
14988 return EmitRISCVCpuSupports(ArrayRef
<StringRef
>(FeatureStr
));
14991 static Value
*loadRISCVFeatureBits(unsigned Index
, CGBuilderTy
&Builder
,
14992 CodeGenModule
&CGM
) {
14993 llvm::Type
*Int32Ty
= Builder
.getInt32Ty();
14994 llvm::Type
*Int64Ty
= Builder
.getInt64Ty();
14995 llvm::ArrayType
*ArrayOfInt64Ty
=
14996 llvm::ArrayType::get(Int64Ty
, llvm::RISCVISAInfo::FeatureBitSize
);
14997 llvm::Type
*StructTy
= llvm::StructType::get(Int32Ty
, ArrayOfInt64Ty
);
14998 llvm::Constant
*RISCVFeaturesBits
=
14999 CGM
.CreateRuntimeVariable(StructTy
, "__riscv_feature_bits");
15000 cast
<llvm::GlobalValue
>(RISCVFeaturesBits
)->setDSOLocal(true);
15001 Value
*IndexVal
= llvm::ConstantInt::get(Int32Ty
, Index
);
15002 llvm::Value
*GEPIndices
[] = {Builder
.getInt32(0), Builder
.getInt32(1),
15005 Builder
.CreateInBoundsGEP(StructTy
, RISCVFeaturesBits
, GEPIndices
);
15006 Value
*FeaturesBit
=
15007 Builder
.CreateAlignedLoad(Int64Ty
, Ptr
, CharUnits::fromQuantity(8));
15008 return FeaturesBit
;
15011 Value
*CodeGenFunction::EmitRISCVCpuSupports(ArrayRef
<StringRef
> FeaturesStrs
) {
15012 const unsigned RISCVFeatureLength
= llvm::RISCVISAInfo::FeatureBitSize
;
15013 uint64_t RequireBitMasks
[RISCVFeatureLength
] = {0};
15015 for (auto Feat
: FeaturesStrs
) {
15016 auto [GroupID
, BitPos
] = RISCVISAInfo::getRISCVFeaturesBitsInfo(Feat
);
15018 // If there isn't BitPos for this feature, skip this version.
15019 // It also report the warning to user during compilation.
15021 return Builder
.getFalse();
15023 RequireBitMasks
[GroupID
] |= (1ULL << BitPos
);
15026 Value
*Result
= nullptr;
15027 for (unsigned Idx
= 0; Idx
< RISCVFeatureLength
; Idx
++) {
15028 if (RequireBitMasks
[Idx
] == 0)
15031 Value
*Mask
= Builder
.getInt64(RequireBitMasks
[Idx
]);
15033 Builder
.CreateAnd(loadRISCVFeatureBits(Idx
, Builder
, CGM
), Mask
);
15034 Value
*CmpV
= Builder
.CreateICmpEQ(Bitset
, Mask
);
15035 Result
= (!Result
) ? CmpV
: Builder
.CreateAnd(Result
, CmpV
);
15038 assert(Result
&& "Should have value here.");
15043 Value
*CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID
,
15044 const CallExpr
*E
) {
15045 if (BuiltinID
== Builtin::BI__builtin_cpu_is
)
15046 return EmitX86CpuIs(E
);
15047 if (BuiltinID
== Builtin::BI__builtin_cpu_supports
)
15048 return EmitX86CpuSupports(E
);
15049 if (BuiltinID
== Builtin::BI__builtin_cpu_init
)
15050 return EmitX86CpuInit();
15052 // Handle MSVC intrinsics before argument evaluation to prevent double
15054 if (std::optional
<MSVCIntrin
> MsvcIntId
= translateX86ToMsvcIntrin(BuiltinID
))
15055 return EmitMSVCBuiltinExpr(*MsvcIntId
, E
);
15057 SmallVector
<Value
*, 4> Ops
;
15058 bool IsMaskFCmp
= false;
15059 bool IsConjFMA
= false;
15061 // Find out if any arguments are required to be integer constant expressions.
15062 unsigned ICEArguments
= 0;
15063 ASTContext::GetBuiltinTypeError Error
;
15064 getContext().GetBuiltinType(BuiltinID
, Error
, &ICEArguments
);
15065 assert(Error
== ASTContext::GE_None
&& "Should not codegen an error");
15067 for (unsigned i
= 0, e
= E
->getNumArgs(); i
!= e
; i
++) {
15068 Ops
.push_back(EmitScalarOrConstFoldImmArg(ICEArguments
, i
, E
));
15071 // These exist so that the builtin that takes an immediate can be bounds
15072 // checked by clang to avoid passing bad immediates to the backend. Since
15073 // AVX has a larger immediate than SSE we would need separate builtins to
15074 // do the different bounds checking. Rather than create a clang specific
15075 // SSE only builtin, this implements eight separate builtins to match gcc
15077 auto getCmpIntrinsicCall
= [this, &Ops
](Intrinsic::ID ID
, unsigned Imm
) {
15078 Ops
.push_back(llvm::ConstantInt::get(Int8Ty
, Imm
));
15079 llvm::Function
*F
= CGM
.getIntrinsic(ID
);
15080 return Builder
.CreateCall(F
, Ops
);
15083 // For the vector forms of FP comparisons, translate the builtins directly to
15085 // TODO: The builtins could be removed if the SSE header files used vector
15086 // extension comparisons directly (vector ordered/unordered may need
15087 // additional support via __builtin_isnan()).
15088 auto getVectorFCmpIR
= [this, &Ops
, E
](CmpInst::Predicate Pred
,
15089 bool IsSignaling
) {
15090 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
15093 Cmp
= Builder
.CreateFCmpS(Pred
, Ops
[0], Ops
[1]);
15095 Cmp
= Builder
.CreateFCmp(Pred
, Ops
[0], Ops
[1]);
15096 llvm::VectorType
*FPVecTy
= cast
<llvm::VectorType
>(Ops
[0]->getType());
15097 llvm::VectorType
*IntVecTy
= llvm::VectorType::getInteger(FPVecTy
);
15098 Value
*Sext
= Builder
.CreateSExt(Cmp
, IntVecTy
);
15099 return Builder
.CreateBitCast(Sext
, FPVecTy
);
15102 switch (BuiltinID
) {
15103 default: return nullptr;
15104 case X86::BI_mm_prefetch
: {
15105 Value
*Address
= Ops
[0];
15106 ConstantInt
*C
= cast
<ConstantInt
>(Ops
[1]);
15107 Value
*RW
= ConstantInt::get(Int32Ty
, (C
->getZExtValue() >> 2) & 0x1);
15108 Value
*Locality
= ConstantInt::get(Int32Ty
, C
->getZExtValue() & 0x3);
15109 Value
*Data
= ConstantInt::get(Int32Ty
, 1);
15110 Function
*F
= CGM
.getIntrinsic(Intrinsic::prefetch
, Address
->getType());
15111 return Builder
.CreateCall(F
, {Address
, RW
, Locality
, Data
});
15113 case X86::BI_mm_clflush
: {
15114 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::x86_sse2_clflush
),
15117 case X86::BI_mm_lfence
: {
15118 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::x86_sse2_lfence
));
15120 case X86::BI_mm_mfence
: {
15121 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::x86_sse2_mfence
));
15123 case X86::BI_mm_sfence
: {
15124 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::x86_sse_sfence
));
15126 case X86::BI_mm_pause
: {
15127 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::x86_sse2_pause
));
15129 case X86::BI__rdtsc
: {
15130 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::x86_rdtsc
));
15132 case X86::BI__builtin_ia32_rdtscp
: {
15133 Value
*Call
= Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::x86_rdtscp
));
15134 Builder
.CreateDefaultAlignedStore(Builder
.CreateExtractValue(Call
, 1),
15136 return Builder
.CreateExtractValue(Call
, 0);
15138 case X86::BI__builtin_ia32_lzcnt_u16
:
15139 case X86::BI__builtin_ia32_lzcnt_u32
:
15140 case X86::BI__builtin_ia32_lzcnt_u64
: {
15141 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctlz
, Ops
[0]->getType());
15142 return Builder
.CreateCall(F
, {Ops
[0], Builder
.getInt1(false)});
15144 case X86::BI__builtin_ia32_tzcnt_u16
:
15145 case X86::BI__builtin_ia32_tzcnt_u32
:
15146 case X86::BI__builtin_ia32_tzcnt_u64
: {
15147 Function
*F
= CGM
.getIntrinsic(Intrinsic::cttz
, Ops
[0]->getType());
15148 return Builder
.CreateCall(F
, {Ops
[0], Builder
.getInt1(false)});
15150 case X86::BI__builtin_ia32_undef128
:
15151 case X86::BI__builtin_ia32_undef256
:
15152 case X86::BI__builtin_ia32_undef512
:
15153 // The x86 definition of "undef" is not the same as the LLVM definition
15154 // (PR32176). We leave optimizing away an unnecessary zero constant to the
15155 // IR optimizer and backend.
15156 // TODO: If we had a "freeze" IR instruction to generate a fixed undef
15157 // value, we should use that here instead of a zero.
15158 return llvm::Constant::getNullValue(ConvertType(E
->getType()));
15159 case X86::BI__builtin_ia32_vec_ext_v4hi
:
15160 case X86::BI__builtin_ia32_vec_ext_v16qi
:
15161 case X86::BI__builtin_ia32_vec_ext_v8hi
:
15162 case X86::BI__builtin_ia32_vec_ext_v4si
:
15163 case X86::BI__builtin_ia32_vec_ext_v4sf
:
15164 case X86::BI__builtin_ia32_vec_ext_v2di
:
15165 case X86::BI__builtin_ia32_vec_ext_v32qi
:
15166 case X86::BI__builtin_ia32_vec_ext_v16hi
:
15167 case X86::BI__builtin_ia32_vec_ext_v8si
:
15168 case X86::BI__builtin_ia32_vec_ext_v4di
: {
15170 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
15171 uint64_t Index
= cast
<ConstantInt
>(Ops
[1])->getZExtValue();
15172 Index
&= NumElts
- 1;
15173 // These builtins exist so we can ensure the index is an ICE and in range.
15174 // Otherwise we could just do this in the header file.
15175 return Builder
.CreateExtractElement(Ops
[0], Index
);
15177 case X86::BI__builtin_ia32_vec_set_v4hi
:
15178 case X86::BI__builtin_ia32_vec_set_v16qi
:
15179 case X86::BI__builtin_ia32_vec_set_v8hi
:
15180 case X86::BI__builtin_ia32_vec_set_v4si
:
15181 case X86::BI__builtin_ia32_vec_set_v2di
:
15182 case X86::BI__builtin_ia32_vec_set_v32qi
:
15183 case X86::BI__builtin_ia32_vec_set_v16hi
:
15184 case X86::BI__builtin_ia32_vec_set_v8si
:
15185 case X86::BI__builtin_ia32_vec_set_v4di
: {
15187 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
15188 unsigned Index
= cast
<ConstantInt
>(Ops
[2])->getZExtValue();
15189 Index
&= NumElts
- 1;
15190 // These builtins exist so we can ensure the index is an ICE and in range.
15191 // Otherwise we could just do this in the header file.
15192 return Builder
.CreateInsertElement(Ops
[0], Ops
[1], Index
);
15194 case X86::BI_mm_setcsr
:
15195 case X86::BI__builtin_ia32_ldmxcsr
: {
15196 RawAddress Tmp
= CreateMemTemp(E
->getArg(0)->getType());
15197 Builder
.CreateStore(Ops
[0], Tmp
);
15198 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::x86_sse_ldmxcsr
),
15201 case X86::BI_mm_getcsr
:
15202 case X86::BI__builtin_ia32_stmxcsr
: {
15203 RawAddress Tmp
= CreateMemTemp(E
->getType());
15204 Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::x86_sse_stmxcsr
),
15206 return Builder
.CreateLoad(Tmp
, "stmxcsr");
15208 case X86::BI__builtin_ia32_xsave
:
15209 case X86::BI__builtin_ia32_xsave64
:
15210 case X86::BI__builtin_ia32_xrstor
:
15211 case X86::BI__builtin_ia32_xrstor64
:
15212 case X86::BI__builtin_ia32_xsaveopt
:
15213 case X86::BI__builtin_ia32_xsaveopt64
:
15214 case X86::BI__builtin_ia32_xrstors
:
15215 case X86::BI__builtin_ia32_xrstors64
:
15216 case X86::BI__builtin_ia32_xsavec
:
15217 case X86::BI__builtin_ia32_xsavec64
:
15218 case X86::BI__builtin_ia32_xsaves
:
15219 case X86::BI__builtin_ia32_xsaves64
:
15220 case X86::BI__builtin_ia32_xsetbv
:
15221 case X86::BI_xsetbv
: {
15223 #define INTRINSIC_X86_XSAVE_ID(NAME) \
15224 case X86::BI__builtin_ia32_##NAME: \
15225 ID = Intrinsic::x86_##NAME; \
15227 switch (BuiltinID
) {
15228 default: llvm_unreachable("Unsupported intrinsic!");
15229 INTRINSIC_X86_XSAVE_ID(xsave
);
15230 INTRINSIC_X86_XSAVE_ID(xsave64
);
15231 INTRINSIC_X86_XSAVE_ID(xrstor
);
15232 INTRINSIC_X86_XSAVE_ID(xrstor64
);
15233 INTRINSIC_X86_XSAVE_ID(xsaveopt
);
15234 INTRINSIC_X86_XSAVE_ID(xsaveopt64
);
15235 INTRINSIC_X86_XSAVE_ID(xrstors
);
15236 INTRINSIC_X86_XSAVE_ID(xrstors64
);
15237 INTRINSIC_X86_XSAVE_ID(xsavec
);
15238 INTRINSIC_X86_XSAVE_ID(xsavec64
);
15239 INTRINSIC_X86_XSAVE_ID(xsaves
);
15240 INTRINSIC_X86_XSAVE_ID(xsaves64
);
15241 INTRINSIC_X86_XSAVE_ID(xsetbv
);
15242 case X86::BI_xsetbv
:
15243 ID
= Intrinsic::x86_xsetbv
;
15246 #undef INTRINSIC_X86_XSAVE_ID
15247 Value
*Mhi
= Builder
.CreateTrunc(
15248 Builder
.CreateLShr(Ops
[1], ConstantInt::get(Int64Ty
, 32)), Int32Ty
);
15249 Value
*Mlo
= Builder
.CreateTrunc(Ops
[1], Int32Ty
);
15251 Ops
.push_back(Mlo
);
15252 return Builder
.CreateCall(CGM
.getIntrinsic(ID
), Ops
);
15254 case X86::BI__builtin_ia32_xgetbv
:
15255 case X86::BI_xgetbv
:
15256 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::x86_xgetbv
), Ops
);
15257 case X86::BI__builtin_ia32_storedqudi128_mask
:
15258 case X86::BI__builtin_ia32_storedqusi128_mask
:
15259 case X86::BI__builtin_ia32_storedquhi128_mask
:
15260 case X86::BI__builtin_ia32_storedquqi128_mask
:
15261 case X86::BI__builtin_ia32_storeupd128_mask
:
15262 case X86::BI__builtin_ia32_storeups128_mask
:
15263 case X86::BI__builtin_ia32_storedqudi256_mask
:
15264 case X86::BI__builtin_ia32_storedqusi256_mask
:
15265 case X86::BI__builtin_ia32_storedquhi256_mask
:
15266 case X86::BI__builtin_ia32_storedquqi256_mask
:
15267 case X86::BI__builtin_ia32_storeupd256_mask
:
15268 case X86::BI__builtin_ia32_storeups256_mask
:
15269 case X86::BI__builtin_ia32_storedqudi512_mask
:
15270 case X86::BI__builtin_ia32_storedqusi512_mask
:
15271 case X86::BI__builtin_ia32_storedquhi512_mask
:
15272 case X86::BI__builtin_ia32_storedquqi512_mask
:
15273 case X86::BI__builtin_ia32_storeupd512_mask
:
15274 case X86::BI__builtin_ia32_storeups512_mask
:
15275 return EmitX86MaskedStore(*this, Ops
, Align(1));
15277 case X86::BI__builtin_ia32_storesbf16128_mask
:
15278 case X86::BI__builtin_ia32_storesh128_mask
:
15279 case X86::BI__builtin_ia32_storess128_mask
:
15280 case X86::BI__builtin_ia32_storesd128_mask
:
15281 return EmitX86MaskedStore(*this, Ops
, Align(1));
15283 case X86::BI__builtin_ia32_cvtmask2b128
:
15284 case X86::BI__builtin_ia32_cvtmask2b256
:
15285 case X86::BI__builtin_ia32_cvtmask2b512
:
15286 case X86::BI__builtin_ia32_cvtmask2w128
:
15287 case X86::BI__builtin_ia32_cvtmask2w256
:
15288 case X86::BI__builtin_ia32_cvtmask2w512
:
15289 case X86::BI__builtin_ia32_cvtmask2d128
:
15290 case X86::BI__builtin_ia32_cvtmask2d256
:
15291 case X86::BI__builtin_ia32_cvtmask2d512
:
15292 case X86::BI__builtin_ia32_cvtmask2q128
:
15293 case X86::BI__builtin_ia32_cvtmask2q256
:
15294 case X86::BI__builtin_ia32_cvtmask2q512
:
15295 return EmitX86SExtMask(*this, Ops
[0], ConvertType(E
->getType()));
15297 case X86::BI__builtin_ia32_cvtb2mask128
:
15298 case X86::BI__builtin_ia32_cvtb2mask256
:
15299 case X86::BI__builtin_ia32_cvtb2mask512
:
15300 case X86::BI__builtin_ia32_cvtw2mask128
:
15301 case X86::BI__builtin_ia32_cvtw2mask256
:
15302 case X86::BI__builtin_ia32_cvtw2mask512
:
15303 case X86::BI__builtin_ia32_cvtd2mask128
:
15304 case X86::BI__builtin_ia32_cvtd2mask256
:
15305 case X86::BI__builtin_ia32_cvtd2mask512
:
15306 case X86::BI__builtin_ia32_cvtq2mask128
:
15307 case X86::BI__builtin_ia32_cvtq2mask256
:
15308 case X86::BI__builtin_ia32_cvtq2mask512
:
15309 return EmitX86ConvertToMask(*this, Ops
[0]);
15311 case X86::BI__builtin_ia32_cvtdq2ps512_mask
:
15312 case X86::BI__builtin_ia32_cvtqq2ps512_mask
:
15313 case X86::BI__builtin_ia32_cvtqq2pd512_mask
:
15314 case X86::BI__builtin_ia32_vcvtw2ph512_mask
:
15315 case X86::BI__builtin_ia32_vcvtdq2ph512_mask
:
15316 case X86::BI__builtin_ia32_vcvtqq2ph512_mask
:
15317 case X86::BI__builtin_ia32_vcvtdq2ph256_round_mask
:
15318 case X86::BI__builtin_ia32_vcvtdq2ps256_round_mask
:
15319 case X86::BI__builtin_ia32_vcvtqq2pd256_round_mask
:
15320 case X86::BI__builtin_ia32_vcvtqq2ph256_round_mask
:
15321 case X86::BI__builtin_ia32_vcvtqq2ps256_round_mask
:
15322 case X86::BI__builtin_ia32_vcvtw2ph256_round_mask
:
15323 return EmitX86ConvertIntToFp(*this, E
, Ops
, /*IsSigned*/ true);
15324 case X86::BI__builtin_ia32_cvtudq2ps512_mask
:
15325 case X86::BI__builtin_ia32_cvtuqq2ps512_mask
:
15326 case X86::BI__builtin_ia32_cvtuqq2pd512_mask
:
15327 case X86::BI__builtin_ia32_vcvtuw2ph512_mask
:
15328 case X86::BI__builtin_ia32_vcvtudq2ph512_mask
:
15329 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask
:
15330 case X86::BI__builtin_ia32_vcvtudq2ph256_round_mask
:
15331 case X86::BI__builtin_ia32_vcvtudq2ps256_round_mask
:
15332 case X86::BI__builtin_ia32_vcvtuqq2pd256_round_mask
:
15333 case X86::BI__builtin_ia32_vcvtuqq2ph256_round_mask
:
15334 case X86::BI__builtin_ia32_vcvtuqq2ps256_round_mask
:
15335 case X86::BI__builtin_ia32_vcvtuw2ph256_round_mask
:
15336 return EmitX86ConvertIntToFp(*this, E
, Ops
, /*IsSigned*/ false);
15338 case X86::BI__builtin_ia32_vfmaddss3
:
15339 case X86::BI__builtin_ia32_vfmaddsd3
:
15340 case X86::BI__builtin_ia32_vfmaddsh3_mask
:
15341 case X86::BI__builtin_ia32_vfmaddss3_mask
:
15342 case X86::BI__builtin_ia32_vfmaddsd3_mask
:
15343 return EmitScalarFMAExpr(*this, E
, Ops
, Ops
[0]);
15344 case X86::BI__builtin_ia32_vfmaddss
:
15345 case X86::BI__builtin_ia32_vfmaddsd
:
15346 return EmitScalarFMAExpr(*this, E
, Ops
,
15347 Constant::getNullValue(Ops
[0]->getType()));
15348 case X86::BI__builtin_ia32_vfmaddsh3_maskz
:
15349 case X86::BI__builtin_ia32_vfmaddss3_maskz
:
15350 case X86::BI__builtin_ia32_vfmaddsd3_maskz
:
15351 return EmitScalarFMAExpr(*this, E
, Ops
, Ops
[0], /*ZeroMask*/ true);
15352 case X86::BI__builtin_ia32_vfmaddsh3_mask3
:
15353 case X86::BI__builtin_ia32_vfmaddss3_mask3
:
15354 case X86::BI__builtin_ia32_vfmaddsd3_mask3
:
15355 return EmitScalarFMAExpr(*this, E
, Ops
, Ops
[2], /*ZeroMask*/ false, 2);
15356 case X86::BI__builtin_ia32_vfmsubsh3_mask3
:
15357 case X86::BI__builtin_ia32_vfmsubss3_mask3
:
15358 case X86::BI__builtin_ia32_vfmsubsd3_mask3
:
15359 return EmitScalarFMAExpr(*this, E
, Ops
, Ops
[2], /*ZeroMask*/ false, 2,
15361 case X86::BI__builtin_ia32_vfmaddph
:
15362 case X86::BI__builtin_ia32_vfmaddps
:
15363 case X86::BI__builtin_ia32_vfmaddpd
:
15364 case X86::BI__builtin_ia32_vfmaddph256
:
15365 case X86::BI__builtin_ia32_vfmaddps256
:
15366 case X86::BI__builtin_ia32_vfmaddpd256
:
15367 case X86::BI__builtin_ia32_vfmaddph512_mask
:
15368 case X86::BI__builtin_ia32_vfmaddph512_maskz
:
15369 case X86::BI__builtin_ia32_vfmaddph512_mask3
:
15370 case X86::BI__builtin_ia32_vfmaddnepbh128
:
15371 case X86::BI__builtin_ia32_vfmaddnepbh256
:
15372 case X86::BI__builtin_ia32_vfmaddnepbh512
:
15373 case X86::BI__builtin_ia32_vfmaddps512_mask
:
15374 case X86::BI__builtin_ia32_vfmaddps512_maskz
:
15375 case X86::BI__builtin_ia32_vfmaddps512_mask3
:
15376 case X86::BI__builtin_ia32_vfmsubps512_mask3
:
15377 case X86::BI__builtin_ia32_vfmaddpd512_mask
:
15378 case X86::BI__builtin_ia32_vfmaddpd512_maskz
:
15379 case X86::BI__builtin_ia32_vfmaddpd512_mask3
:
15380 case X86::BI__builtin_ia32_vfmsubpd512_mask3
:
15381 case X86::BI__builtin_ia32_vfmsubph512_mask3
:
15382 case X86::BI__builtin_ia32_vfmaddph256_round_mask
:
15383 case X86::BI__builtin_ia32_vfmaddph256_round_maskz
:
15384 case X86::BI__builtin_ia32_vfmaddph256_round_mask3
:
15385 case X86::BI__builtin_ia32_vfmaddps256_round_mask
:
15386 case X86::BI__builtin_ia32_vfmaddps256_round_maskz
:
15387 case X86::BI__builtin_ia32_vfmaddps256_round_mask3
:
15388 case X86::BI__builtin_ia32_vfmsubps256_round_mask3
:
15389 case X86::BI__builtin_ia32_vfmaddpd256_round_mask
:
15390 case X86::BI__builtin_ia32_vfmaddpd256_round_maskz
:
15391 case X86::BI__builtin_ia32_vfmaddpd256_round_mask3
:
15392 case X86::BI__builtin_ia32_vfmsubpd256_round_mask3
:
15393 case X86::BI__builtin_ia32_vfmsubph256_round_mask3
:
15394 return EmitX86FMAExpr(*this, E
, Ops
, BuiltinID
, /*IsAddSub*/ false);
15395 case X86::BI__builtin_ia32_vfmaddsubph512_mask
:
15396 case X86::BI__builtin_ia32_vfmaddsubph512_maskz
:
15397 case X86::BI__builtin_ia32_vfmaddsubph512_mask3
:
15398 case X86::BI__builtin_ia32_vfmsubaddph512_mask3
:
15399 case X86::BI__builtin_ia32_vfmaddsubps512_mask
:
15400 case X86::BI__builtin_ia32_vfmaddsubps512_maskz
:
15401 case X86::BI__builtin_ia32_vfmaddsubps512_mask3
:
15402 case X86::BI__builtin_ia32_vfmsubaddps512_mask3
:
15403 case X86::BI__builtin_ia32_vfmaddsubpd512_mask
:
15404 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz
:
15405 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3
:
15406 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3
:
15407 case X86::BI__builtin_ia32_vfmaddsubph256_round_mask
:
15408 case X86::BI__builtin_ia32_vfmaddsubph256_round_maskz
:
15409 case X86::BI__builtin_ia32_vfmaddsubph256_round_mask3
:
15410 case X86::BI__builtin_ia32_vfmsubaddph256_round_mask3
:
15411 case X86::BI__builtin_ia32_vfmaddsubps256_round_mask
:
15412 case X86::BI__builtin_ia32_vfmaddsubps256_round_maskz
:
15413 case X86::BI__builtin_ia32_vfmaddsubps256_round_mask3
:
15414 case X86::BI__builtin_ia32_vfmsubaddps256_round_mask3
:
15415 case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask
:
15416 case X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz
:
15417 case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3
:
15418 case X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3
:
15419 return EmitX86FMAExpr(*this, E
, Ops
, BuiltinID
, /*IsAddSub*/ true);
15421 case X86::BI__builtin_ia32_movdqa32store128_mask
:
15422 case X86::BI__builtin_ia32_movdqa64store128_mask
:
15423 case X86::BI__builtin_ia32_storeaps128_mask
:
15424 case X86::BI__builtin_ia32_storeapd128_mask
:
15425 case X86::BI__builtin_ia32_movdqa32store256_mask
:
15426 case X86::BI__builtin_ia32_movdqa64store256_mask
:
15427 case X86::BI__builtin_ia32_storeaps256_mask
:
15428 case X86::BI__builtin_ia32_storeapd256_mask
:
15429 case X86::BI__builtin_ia32_movdqa32store512_mask
:
15430 case X86::BI__builtin_ia32_movdqa64store512_mask
:
15431 case X86::BI__builtin_ia32_storeaps512_mask
:
15432 case X86::BI__builtin_ia32_storeapd512_mask
:
15433 return EmitX86MaskedStore(
15435 getContext().getTypeAlignInChars(E
->getArg(1)->getType()).getAsAlign());
15437 case X86::BI__builtin_ia32_loadups128_mask
:
15438 case X86::BI__builtin_ia32_loadups256_mask
:
15439 case X86::BI__builtin_ia32_loadups512_mask
:
15440 case X86::BI__builtin_ia32_loadupd128_mask
:
15441 case X86::BI__builtin_ia32_loadupd256_mask
:
15442 case X86::BI__builtin_ia32_loadupd512_mask
:
15443 case X86::BI__builtin_ia32_loaddquqi128_mask
:
15444 case X86::BI__builtin_ia32_loaddquqi256_mask
:
15445 case X86::BI__builtin_ia32_loaddquqi512_mask
:
15446 case X86::BI__builtin_ia32_loaddquhi128_mask
:
15447 case X86::BI__builtin_ia32_loaddquhi256_mask
:
15448 case X86::BI__builtin_ia32_loaddquhi512_mask
:
15449 case X86::BI__builtin_ia32_loaddqusi128_mask
:
15450 case X86::BI__builtin_ia32_loaddqusi256_mask
:
15451 case X86::BI__builtin_ia32_loaddqusi512_mask
:
15452 case X86::BI__builtin_ia32_loaddqudi128_mask
:
15453 case X86::BI__builtin_ia32_loaddqudi256_mask
:
15454 case X86::BI__builtin_ia32_loaddqudi512_mask
:
15455 return EmitX86MaskedLoad(*this, Ops
, Align(1));
15457 case X86::BI__builtin_ia32_loadsbf16128_mask
:
15458 case X86::BI__builtin_ia32_loadsh128_mask
:
15459 case X86::BI__builtin_ia32_loadss128_mask
:
15460 case X86::BI__builtin_ia32_loadsd128_mask
:
15461 return EmitX86MaskedLoad(*this, Ops
, Align(1));
15463 case X86::BI__builtin_ia32_loadaps128_mask
:
15464 case X86::BI__builtin_ia32_loadaps256_mask
:
15465 case X86::BI__builtin_ia32_loadaps512_mask
:
15466 case X86::BI__builtin_ia32_loadapd128_mask
:
15467 case X86::BI__builtin_ia32_loadapd256_mask
:
15468 case X86::BI__builtin_ia32_loadapd512_mask
:
15469 case X86::BI__builtin_ia32_movdqa32load128_mask
:
15470 case X86::BI__builtin_ia32_movdqa32load256_mask
:
15471 case X86::BI__builtin_ia32_movdqa32load512_mask
:
15472 case X86::BI__builtin_ia32_movdqa64load128_mask
:
15473 case X86::BI__builtin_ia32_movdqa64load256_mask
:
15474 case X86::BI__builtin_ia32_movdqa64load512_mask
:
15475 return EmitX86MaskedLoad(
15477 getContext().getTypeAlignInChars(E
->getArg(1)->getType()).getAsAlign());
15479 case X86::BI__builtin_ia32_expandloaddf128_mask
:
15480 case X86::BI__builtin_ia32_expandloaddf256_mask
:
15481 case X86::BI__builtin_ia32_expandloaddf512_mask
:
15482 case X86::BI__builtin_ia32_expandloadsf128_mask
:
15483 case X86::BI__builtin_ia32_expandloadsf256_mask
:
15484 case X86::BI__builtin_ia32_expandloadsf512_mask
:
15485 case X86::BI__builtin_ia32_expandloaddi128_mask
:
15486 case X86::BI__builtin_ia32_expandloaddi256_mask
:
15487 case X86::BI__builtin_ia32_expandloaddi512_mask
:
15488 case X86::BI__builtin_ia32_expandloadsi128_mask
:
15489 case X86::BI__builtin_ia32_expandloadsi256_mask
:
15490 case X86::BI__builtin_ia32_expandloadsi512_mask
:
15491 case X86::BI__builtin_ia32_expandloadhi128_mask
:
15492 case X86::BI__builtin_ia32_expandloadhi256_mask
:
15493 case X86::BI__builtin_ia32_expandloadhi512_mask
:
15494 case X86::BI__builtin_ia32_expandloadqi128_mask
:
15495 case X86::BI__builtin_ia32_expandloadqi256_mask
:
15496 case X86::BI__builtin_ia32_expandloadqi512_mask
:
15497 return EmitX86ExpandLoad(*this, Ops
);
15499 case X86::BI__builtin_ia32_compressstoredf128_mask
:
15500 case X86::BI__builtin_ia32_compressstoredf256_mask
:
15501 case X86::BI__builtin_ia32_compressstoredf512_mask
:
15502 case X86::BI__builtin_ia32_compressstoresf128_mask
:
15503 case X86::BI__builtin_ia32_compressstoresf256_mask
:
15504 case X86::BI__builtin_ia32_compressstoresf512_mask
:
15505 case X86::BI__builtin_ia32_compressstoredi128_mask
:
15506 case X86::BI__builtin_ia32_compressstoredi256_mask
:
15507 case X86::BI__builtin_ia32_compressstoredi512_mask
:
15508 case X86::BI__builtin_ia32_compressstoresi128_mask
:
15509 case X86::BI__builtin_ia32_compressstoresi256_mask
:
15510 case X86::BI__builtin_ia32_compressstoresi512_mask
:
15511 case X86::BI__builtin_ia32_compressstorehi128_mask
:
15512 case X86::BI__builtin_ia32_compressstorehi256_mask
:
15513 case X86::BI__builtin_ia32_compressstorehi512_mask
:
15514 case X86::BI__builtin_ia32_compressstoreqi128_mask
:
15515 case X86::BI__builtin_ia32_compressstoreqi256_mask
:
15516 case X86::BI__builtin_ia32_compressstoreqi512_mask
:
15517 return EmitX86CompressStore(*this, Ops
);
15519 case X86::BI__builtin_ia32_expanddf128_mask
:
15520 case X86::BI__builtin_ia32_expanddf256_mask
:
15521 case X86::BI__builtin_ia32_expanddf512_mask
:
15522 case X86::BI__builtin_ia32_expandsf128_mask
:
15523 case X86::BI__builtin_ia32_expandsf256_mask
:
15524 case X86::BI__builtin_ia32_expandsf512_mask
:
15525 case X86::BI__builtin_ia32_expanddi128_mask
:
15526 case X86::BI__builtin_ia32_expanddi256_mask
:
15527 case X86::BI__builtin_ia32_expanddi512_mask
:
15528 case X86::BI__builtin_ia32_expandsi128_mask
:
15529 case X86::BI__builtin_ia32_expandsi256_mask
:
15530 case X86::BI__builtin_ia32_expandsi512_mask
:
15531 case X86::BI__builtin_ia32_expandhi128_mask
:
15532 case X86::BI__builtin_ia32_expandhi256_mask
:
15533 case X86::BI__builtin_ia32_expandhi512_mask
:
15534 case X86::BI__builtin_ia32_expandqi128_mask
:
15535 case X86::BI__builtin_ia32_expandqi256_mask
:
15536 case X86::BI__builtin_ia32_expandqi512_mask
:
15537 return EmitX86CompressExpand(*this, Ops
, /*IsCompress*/false);
15539 case X86::BI__builtin_ia32_compressdf128_mask
:
15540 case X86::BI__builtin_ia32_compressdf256_mask
:
15541 case X86::BI__builtin_ia32_compressdf512_mask
:
15542 case X86::BI__builtin_ia32_compresssf128_mask
:
15543 case X86::BI__builtin_ia32_compresssf256_mask
:
15544 case X86::BI__builtin_ia32_compresssf512_mask
:
15545 case X86::BI__builtin_ia32_compressdi128_mask
:
15546 case X86::BI__builtin_ia32_compressdi256_mask
:
15547 case X86::BI__builtin_ia32_compressdi512_mask
:
15548 case X86::BI__builtin_ia32_compresssi128_mask
:
15549 case X86::BI__builtin_ia32_compresssi256_mask
:
15550 case X86::BI__builtin_ia32_compresssi512_mask
:
15551 case X86::BI__builtin_ia32_compresshi128_mask
:
15552 case X86::BI__builtin_ia32_compresshi256_mask
:
15553 case X86::BI__builtin_ia32_compresshi512_mask
:
15554 case X86::BI__builtin_ia32_compressqi128_mask
:
15555 case X86::BI__builtin_ia32_compressqi256_mask
:
15556 case X86::BI__builtin_ia32_compressqi512_mask
:
15557 return EmitX86CompressExpand(*this, Ops
, /*IsCompress*/true);
15559 case X86::BI__builtin_ia32_gather3div2df
:
15560 case X86::BI__builtin_ia32_gather3div2di
:
15561 case X86::BI__builtin_ia32_gather3div4df
:
15562 case X86::BI__builtin_ia32_gather3div4di
:
15563 case X86::BI__builtin_ia32_gather3div4sf
:
15564 case X86::BI__builtin_ia32_gather3div4si
:
15565 case X86::BI__builtin_ia32_gather3div8sf
:
15566 case X86::BI__builtin_ia32_gather3div8si
:
15567 case X86::BI__builtin_ia32_gather3siv2df
:
15568 case X86::BI__builtin_ia32_gather3siv2di
:
15569 case X86::BI__builtin_ia32_gather3siv4df
:
15570 case X86::BI__builtin_ia32_gather3siv4di
:
15571 case X86::BI__builtin_ia32_gather3siv4sf
:
15572 case X86::BI__builtin_ia32_gather3siv4si
:
15573 case X86::BI__builtin_ia32_gather3siv8sf
:
15574 case X86::BI__builtin_ia32_gather3siv8si
:
15575 case X86::BI__builtin_ia32_gathersiv8df
:
15576 case X86::BI__builtin_ia32_gathersiv16sf
:
15577 case X86::BI__builtin_ia32_gatherdiv8df
:
15578 case X86::BI__builtin_ia32_gatherdiv16sf
:
15579 case X86::BI__builtin_ia32_gathersiv8di
:
15580 case X86::BI__builtin_ia32_gathersiv16si
:
15581 case X86::BI__builtin_ia32_gatherdiv8di
:
15582 case X86::BI__builtin_ia32_gatherdiv16si
: {
15584 switch (BuiltinID
) {
15585 default: llvm_unreachable("Unexpected builtin");
15586 case X86::BI__builtin_ia32_gather3div2df
:
15587 IID
= Intrinsic::x86_avx512_mask_gather3div2_df
;
15589 case X86::BI__builtin_ia32_gather3div2di
:
15590 IID
= Intrinsic::x86_avx512_mask_gather3div2_di
;
15592 case X86::BI__builtin_ia32_gather3div4df
:
15593 IID
= Intrinsic::x86_avx512_mask_gather3div4_df
;
15595 case X86::BI__builtin_ia32_gather3div4di
:
15596 IID
= Intrinsic::x86_avx512_mask_gather3div4_di
;
15598 case X86::BI__builtin_ia32_gather3div4sf
:
15599 IID
= Intrinsic::x86_avx512_mask_gather3div4_sf
;
15601 case X86::BI__builtin_ia32_gather3div4si
:
15602 IID
= Intrinsic::x86_avx512_mask_gather3div4_si
;
15604 case X86::BI__builtin_ia32_gather3div8sf
:
15605 IID
= Intrinsic::x86_avx512_mask_gather3div8_sf
;
15607 case X86::BI__builtin_ia32_gather3div8si
:
15608 IID
= Intrinsic::x86_avx512_mask_gather3div8_si
;
15610 case X86::BI__builtin_ia32_gather3siv2df
:
15611 IID
= Intrinsic::x86_avx512_mask_gather3siv2_df
;
15613 case X86::BI__builtin_ia32_gather3siv2di
:
15614 IID
= Intrinsic::x86_avx512_mask_gather3siv2_di
;
15616 case X86::BI__builtin_ia32_gather3siv4df
:
15617 IID
= Intrinsic::x86_avx512_mask_gather3siv4_df
;
15619 case X86::BI__builtin_ia32_gather3siv4di
:
15620 IID
= Intrinsic::x86_avx512_mask_gather3siv4_di
;
15622 case X86::BI__builtin_ia32_gather3siv4sf
:
15623 IID
= Intrinsic::x86_avx512_mask_gather3siv4_sf
;
15625 case X86::BI__builtin_ia32_gather3siv4si
:
15626 IID
= Intrinsic::x86_avx512_mask_gather3siv4_si
;
15628 case X86::BI__builtin_ia32_gather3siv8sf
:
15629 IID
= Intrinsic::x86_avx512_mask_gather3siv8_sf
;
15631 case X86::BI__builtin_ia32_gather3siv8si
:
15632 IID
= Intrinsic::x86_avx512_mask_gather3siv8_si
;
15634 case X86::BI__builtin_ia32_gathersiv8df
:
15635 IID
= Intrinsic::x86_avx512_mask_gather_dpd_512
;
15637 case X86::BI__builtin_ia32_gathersiv16sf
:
15638 IID
= Intrinsic::x86_avx512_mask_gather_dps_512
;
15640 case X86::BI__builtin_ia32_gatherdiv8df
:
15641 IID
= Intrinsic::x86_avx512_mask_gather_qpd_512
;
15643 case X86::BI__builtin_ia32_gatherdiv16sf
:
15644 IID
= Intrinsic::x86_avx512_mask_gather_qps_512
;
15646 case X86::BI__builtin_ia32_gathersiv8di
:
15647 IID
= Intrinsic::x86_avx512_mask_gather_dpq_512
;
15649 case X86::BI__builtin_ia32_gathersiv16si
:
15650 IID
= Intrinsic::x86_avx512_mask_gather_dpi_512
;
15652 case X86::BI__builtin_ia32_gatherdiv8di
:
15653 IID
= Intrinsic::x86_avx512_mask_gather_qpq_512
;
15655 case X86::BI__builtin_ia32_gatherdiv16si
:
15656 IID
= Intrinsic::x86_avx512_mask_gather_qpi_512
;
15660 unsigned MinElts
= std::min(
15661 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements(),
15662 cast
<llvm::FixedVectorType
>(Ops
[2]->getType())->getNumElements());
15663 Ops
[3] = getMaskVecValue(*this, Ops
[3], MinElts
);
15664 Function
*Intr
= CGM
.getIntrinsic(IID
);
15665 return Builder
.CreateCall(Intr
, Ops
);
15668 case X86::BI__builtin_ia32_scattersiv8df
:
15669 case X86::BI__builtin_ia32_scattersiv16sf
:
15670 case X86::BI__builtin_ia32_scatterdiv8df
:
15671 case X86::BI__builtin_ia32_scatterdiv16sf
:
15672 case X86::BI__builtin_ia32_scattersiv8di
:
15673 case X86::BI__builtin_ia32_scattersiv16si
:
15674 case X86::BI__builtin_ia32_scatterdiv8di
:
15675 case X86::BI__builtin_ia32_scatterdiv16si
:
15676 case X86::BI__builtin_ia32_scatterdiv2df
:
15677 case X86::BI__builtin_ia32_scatterdiv2di
:
15678 case X86::BI__builtin_ia32_scatterdiv4df
:
15679 case X86::BI__builtin_ia32_scatterdiv4di
:
15680 case X86::BI__builtin_ia32_scatterdiv4sf
:
15681 case X86::BI__builtin_ia32_scatterdiv4si
:
15682 case X86::BI__builtin_ia32_scatterdiv8sf
:
15683 case X86::BI__builtin_ia32_scatterdiv8si
:
15684 case X86::BI__builtin_ia32_scattersiv2df
:
15685 case X86::BI__builtin_ia32_scattersiv2di
:
15686 case X86::BI__builtin_ia32_scattersiv4df
:
15687 case X86::BI__builtin_ia32_scattersiv4di
:
15688 case X86::BI__builtin_ia32_scattersiv4sf
:
15689 case X86::BI__builtin_ia32_scattersiv4si
:
15690 case X86::BI__builtin_ia32_scattersiv8sf
:
15691 case X86::BI__builtin_ia32_scattersiv8si
: {
15693 switch (BuiltinID
) {
15694 default: llvm_unreachable("Unexpected builtin");
15695 case X86::BI__builtin_ia32_scattersiv8df
:
15696 IID
= Intrinsic::x86_avx512_mask_scatter_dpd_512
;
15698 case X86::BI__builtin_ia32_scattersiv16sf
:
15699 IID
= Intrinsic::x86_avx512_mask_scatter_dps_512
;
15701 case X86::BI__builtin_ia32_scatterdiv8df
:
15702 IID
= Intrinsic::x86_avx512_mask_scatter_qpd_512
;
15704 case X86::BI__builtin_ia32_scatterdiv16sf
:
15705 IID
= Intrinsic::x86_avx512_mask_scatter_qps_512
;
15707 case X86::BI__builtin_ia32_scattersiv8di
:
15708 IID
= Intrinsic::x86_avx512_mask_scatter_dpq_512
;
15710 case X86::BI__builtin_ia32_scattersiv16si
:
15711 IID
= Intrinsic::x86_avx512_mask_scatter_dpi_512
;
15713 case X86::BI__builtin_ia32_scatterdiv8di
:
15714 IID
= Intrinsic::x86_avx512_mask_scatter_qpq_512
;
15716 case X86::BI__builtin_ia32_scatterdiv16si
:
15717 IID
= Intrinsic::x86_avx512_mask_scatter_qpi_512
;
15719 case X86::BI__builtin_ia32_scatterdiv2df
:
15720 IID
= Intrinsic::x86_avx512_mask_scatterdiv2_df
;
15722 case X86::BI__builtin_ia32_scatterdiv2di
:
15723 IID
= Intrinsic::x86_avx512_mask_scatterdiv2_di
;
15725 case X86::BI__builtin_ia32_scatterdiv4df
:
15726 IID
= Intrinsic::x86_avx512_mask_scatterdiv4_df
;
15728 case X86::BI__builtin_ia32_scatterdiv4di
:
15729 IID
= Intrinsic::x86_avx512_mask_scatterdiv4_di
;
15731 case X86::BI__builtin_ia32_scatterdiv4sf
:
15732 IID
= Intrinsic::x86_avx512_mask_scatterdiv4_sf
;
15734 case X86::BI__builtin_ia32_scatterdiv4si
:
15735 IID
= Intrinsic::x86_avx512_mask_scatterdiv4_si
;
15737 case X86::BI__builtin_ia32_scatterdiv8sf
:
15738 IID
= Intrinsic::x86_avx512_mask_scatterdiv8_sf
;
15740 case X86::BI__builtin_ia32_scatterdiv8si
:
15741 IID
= Intrinsic::x86_avx512_mask_scatterdiv8_si
;
15743 case X86::BI__builtin_ia32_scattersiv2df
:
15744 IID
= Intrinsic::x86_avx512_mask_scattersiv2_df
;
15746 case X86::BI__builtin_ia32_scattersiv2di
:
15747 IID
= Intrinsic::x86_avx512_mask_scattersiv2_di
;
15749 case X86::BI__builtin_ia32_scattersiv4df
:
15750 IID
= Intrinsic::x86_avx512_mask_scattersiv4_df
;
15752 case X86::BI__builtin_ia32_scattersiv4di
:
15753 IID
= Intrinsic::x86_avx512_mask_scattersiv4_di
;
15755 case X86::BI__builtin_ia32_scattersiv4sf
:
15756 IID
= Intrinsic::x86_avx512_mask_scattersiv4_sf
;
15758 case X86::BI__builtin_ia32_scattersiv4si
:
15759 IID
= Intrinsic::x86_avx512_mask_scattersiv4_si
;
15761 case X86::BI__builtin_ia32_scattersiv8sf
:
15762 IID
= Intrinsic::x86_avx512_mask_scattersiv8_sf
;
15764 case X86::BI__builtin_ia32_scattersiv8si
:
15765 IID
= Intrinsic::x86_avx512_mask_scattersiv8_si
;
15769 unsigned MinElts
= std::min(
15770 cast
<llvm::FixedVectorType
>(Ops
[2]->getType())->getNumElements(),
15771 cast
<llvm::FixedVectorType
>(Ops
[3]->getType())->getNumElements());
15772 Ops
[1] = getMaskVecValue(*this, Ops
[1], MinElts
);
15773 Function
*Intr
= CGM
.getIntrinsic(IID
);
15774 return Builder
.CreateCall(Intr
, Ops
);
15777 case X86::BI__builtin_ia32_vextractf128_pd256
:
15778 case X86::BI__builtin_ia32_vextractf128_ps256
:
15779 case X86::BI__builtin_ia32_vextractf128_si256
:
15780 case X86::BI__builtin_ia32_extract128i256
:
15781 case X86::BI__builtin_ia32_extractf64x4_mask
:
15782 case X86::BI__builtin_ia32_extractf32x4_mask
:
15783 case X86::BI__builtin_ia32_extracti64x4_mask
:
15784 case X86::BI__builtin_ia32_extracti32x4_mask
:
15785 case X86::BI__builtin_ia32_extractf32x8_mask
:
15786 case X86::BI__builtin_ia32_extracti32x8_mask
:
15787 case X86::BI__builtin_ia32_extractf32x4_256_mask
:
15788 case X86::BI__builtin_ia32_extracti32x4_256_mask
:
15789 case X86::BI__builtin_ia32_extractf64x2_256_mask
:
15790 case X86::BI__builtin_ia32_extracti64x2_256_mask
:
15791 case X86::BI__builtin_ia32_extractf64x2_512_mask
:
15792 case X86::BI__builtin_ia32_extracti64x2_512_mask
: {
15793 auto *DstTy
= cast
<llvm::FixedVectorType
>(ConvertType(E
->getType()));
15794 unsigned NumElts
= DstTy
->getNumElements();
15795 unsigned SrcNumElts
=
15796 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
15797 unsigned SubVectors
= SrcNumElts
/ NumElts
;
15798 unsigned Index
= cast
<ConstantInt
>(Ops
[1])->getZExtValue();
15799 assert(llvm::isPowerOf2_32(SubVectors
) && "Expected power of 2 subvectors");
15800 Index
&= SubVectors
- 1; // Remove any extra bits.
15804 for (unsigned i
= 0; i
!= NumElts
; ++i
)
15805 Indices
[i
] = i
+ Index
;
15807 Value
*Res
= Builder
.CreateShuffleVector(Ops
[0], ArrayRef(Indices
, NumElts
),
15810 if (Ops
.size() == 4)
15811 Res
= EmitX86Select(*this, Ops
[3], Res
, Ops
[2]);
15815 case X86::BI__builtin_ia32_vinsertf128_pd256
:
15816 case X86::BI__builtin_ia32_vinsertf128_ps256
:
15817 case X86::BI__builtin_ia32_vinsertf128_si256
:
15818 case X86::BI__builtin_ia32_insert128i256
:
15819 case X86::BI__builtin_ia32_insertf64x4
:
15820 case X86::BI__builtin_ia32_insertf32x4
:
15821 case X86::BI__builtin_ia32_inserti64x4
:
15822 case X86::BI__builtin_ia32_inserti32x4
:
15823 case X86::BI__builtin_ia32_insertf32x8
:
15824 case X86::BI__builtin_ia32_inserti32x8
:
15825 case X86::BI__builtin_ia32_insertf32x4_256
:
15826 case X86::BI__builtin_ia32_inserti32x4_256
:
15827 case X86::BI__builtin_ia32_insertf64x2_256
:
15828 case X86::BI__builtin_ia32_inserti64x2_256
:
15829 case X86::BI__builtin_ia32_insertf64x2_512
:
15830 case X86::BI__builtin_ia32_inserti64x2_512
: {
15831 unsigned DstNumElts
=
15832 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
15833 unsigned SrcNumElts
=
15834 cast
<llvm::FixedVectorType
>(Ops
[1]->getType())->getNumElements();
15835 unsigned SubVectors
= DstNumElts
/ SrcNumElts
;
15836 unsigned Index
= cast
<ConstantInt
>(Ops
[2])->getZExtValue();
15837 assert(llvm::isPowerOf2_32(SubVectors
) && "Expected power of 2 subvectors");
15838 Index
&= SubVectors
- 1; // Remove any extra bits.
15839 Index
*= SrcNumElts
;
15842 for (unsigned i
= 0; i
!= DstNumElts
; ++i
)
15843 Indices
[i
] = (i
>= SrcNumElts
) ? SrcNumElts
+ (i
% SrcNumElts
) : i
;
15845 Value
*Op1
= Builder
.CreateShuffleVector(
15846 Ops
[1], ArrayRef(Indices
, DstNumElts
), "widen");
15848 for (unsigned i
= 0; i
!= DstNumElts
; ++i
) {
15849 if (i
>= Index
&& i
< (Index
+ SrcNumElts
))
15850 Indices
[i
] = (i
- Index
) + DstNumElts
;
15855 return Builder
.CreateShuffleVector(Ops
[0], Op1
,
15856 ArrayRef(Indices
, DstNumElts
), "insert");
15858 case X86::BI__builtin_ia32_pmovqd512_mask
:
15859 case X86::BI__builtin_ia32_pmovwb512_mask
: {
15860 Value
*Res
= Builder
.CreateTrunc(Ops
[0], Ops
[1]->getType());
15861 return EmitX86Select(*this, Ops
[2], Res
, Ops
[1]);
15863 case X86::BI__builtin_ia32_pmovdb512_mask
:
15864 case X86::BI__builtin_ia32_pmovdw512_mask
:
15865 case X86::BI__builtin_ia32_pmovqw512_mask
: {
15866 if (const auto *C
= dyn_cast
<Constant
>(Ops
[2]))
15867 if (C
->isAllOnesValue())
15868 return Builder
.CreateTrunc(Ops
[0], Ops
[1]->getType());
15871 switch (BuiltinID
) {
15872 default: llvm_unreachable("Unsupported intrinsic!");
15873 case X86::BI__builtin_ia32_pmovdb512_mask
:
15874 IID
= Intrinsic::x86_avx512_mask_pmov_db_512
;
15876 case X86::BI__builtin_ia32_pmovdw512_mask
:
15877 IID
= Intrinsic::x86_avx512_mask_pmov_dw_512
;
15879 case X86::BI__builtin_ia32_pmovqw512_mask
:
15880 IID
= Intrinsic::x86_avx512_mask_pmov_qw_512
;
15884 Function
*Intr
= CGM
.getIntrinsic(IID
);
15885 return Builder
.CreateCall(Intr
, Ops
);
15887 case X86::BI__builtin_ia32_pblendw128
:
15888 case X86::BI__builtin_ia32_blendpd
:
15889 case X86::BI__builtin_ia32_blendps
:
15890 case X86::BI__builtin_ia32_blendpd256
:
15891 case X86::BI__builtin_ia32_blendps256
:
15892 case X86::BI__builtin_ia32_pblendw256
:
15893 case X86::BI__builtin_ia32_pblendd128
:
15894 case X86::BI__builtin_ia32_pblendd256
: {
15896 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
15897 unsigned Imm
= cast
<llvm::ConstantInt
>(Ops
[2])->getZExtValue();
15900 // If there are more than 8 elements, the immediate is used twice so make
15901 // sure we handle that.
15902 for (unsigned i
= 0; i
!= NumElts
; ++i
)
15903 Indices
[i
] = ((Imm
>> (i
% 8)) & 0x1) ? NumElts
+ i
: i
;
15905 return Builder
.CreateShuffleVector(Ops
[0], Ops
[1],
15906 ArrayRef(Indices
, NumElts
), "blend");
15908 case X86::BI__builtin_ia32_pshuflw
:
15909 case X86::BI__builtin_ia32_pshuflw256
:
15910 case X86::BI__builtin_ia32_pshuflw512
: {
15911 uint32_t Imm
= cast
<llvm::ConstantInt
>(Ops
[1])->getZExtValue();
15912 auto *Ty
= cast
<llvm::FixedVectorType
>(Ops
[0]->getType());
15913 unsigned NumElts
= Ty
->getNumElements();
15915 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15916 Imm
= (Imm
& 0xff) * 0x01010101;
15919 for (unsigned l
= 0; l
!= NumElts
; l
+= 8) {
15920 for (unsigned i
= 0; i
!= 4; ++i
) {
15921 Indices
[l
+ i
] = l
+ (Imm
& 3);
15924 for (unsigned i
= 4; i
!= 8; ++i
)
15925 Indices
[l
+ i
] = l
+ i
;
15928 return Builder
.CreateShuffleVector(Ops
[0], ArrayRef(Indices
, NumElts
),
15931 case X86::BI__builtin_ia32_pshufhw
:
15932 case X86::BI__builtin_ia32_pshufhw256
:
15933 case X86::BI__builtin_ia32_pshufhw512
: {
15934 uint32_t Imm
= cast
<llvm::ConstantInt
>(Ops
[1])->getZExtValue();
15935 auto *Ty
= cast
<llvm::FixedVectorType
>(Ops
[0]->getType());
15936 unsigned NumElts
= Ty
->getNumElements();
15938 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15939 Imm
= (Imm
& 0xff) * 0x01010101;
15942 for (unsigned l
= 0; l
!= NumElts
; l
+= 8) {
15943 for (unsigned i
= 0; i
!= 4; ++i
)
15944 Indices
[l
+ i
] = l
+ i
;
15945 for (unsigned i
= 4; i
!= 8; ++i
) {
15946 Indices
[l
+ i
] = l
+ 4 + (Imm
& 3);
15951 return Builder
.CreateShuffleVector(Ops
[0], ArrayRef(Indices
, NumElts
),
15954 case X86::BI__builtin_ia32_pshufd
:
15955 case X86::BI__builtin_ia32_pshufd256
:
15956 case X86::BI__builtin_ia32_pshufd512
:
15957 case X86::BI__builtin_ia32_vpermilpd
:
15958 case X86::BI__builtin_ia32_vpermilps
:
15959 case X86::BI__builtin_ia32_vpermilpd256
:
15960 case X86::BI__builtin_ia32_vpermilps256
:
15961 case X86::BI__builtin_ia32_vpermilpd512
:
15962 case X86::BI__builtin_ia32_vpermilps512
: {
15963 uint32_t Imm
= cast
<llvm::ConstantInt
>(Ops
[1])->getZExtValue();
15964 auto *Ty
= cast
<llvm::FixedVectorType
>(Ops
[0]->getType());
15965 unsigned NumElts
= Ty
->getNumElements();
15966 unsigned NumLanes
= Ty
->getPrimitiveSizeInBits() / 128;
15967 unsigned NumLaneElts
= NumElts
/ NumLanes
;
15969 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15970 Imm
= (Imm
& 0xff) * 0x01010101;
15973 for (unsigned l
= 0; l
!= NumElts
; l
+= NumLaneElts
) {
15974 for (unsigned i
= 0; i
!= NumLaneElts
; ++i
) {
15975 Indices
[i
+ l
] = (Imm
% NumLaneElts
) + l
;
15976 Imm
/= NumLaneElts
;
15980 return Builder
.CreateShuffleVector(Ops
[0], ArrayRef(Indices
, NumElts
),
15983 case X86::BI__builtin_ia32_shufpd
:
15984 case X86::BI__builtin_ia32_shufpd256
:
15985 case X86::BI__builtin_ia32_shufpd512
:
15986 case X86::BI__builtin_ia32_shufps
:
15987 case X86::BI__builtin_ia32_shufps256
:
15988 case X86::BI__builtin_ia32_shufps512
: {
15989 uint32_t Imm
= cast
<llvm::ConstantInt
>(Ops
[2])->getZExtValue();
15990 auto *Ty
= cast
<llvm::FixedVectorType
>(Ops
[0]->getType());
15991 unsigned NumElts
= Ty
->getNumElements();
15992 unsigned NumLanes
= Ty
->getPrimitiveSizeInBits() / 128;
15993 unsigned NumLaneElts
= NumElts
/ NumLanes
;
15995 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15996 Imm
= (Imm
& 0xff) * 0x01010101;
15999 for (unsigned l
= 0; l
!= NumElts
; l
+= NumLaneElts
) {
16000 for (unsigned i
= 0; i
!= NumLaneElts
; ++i
) {
16001 unsigned Index
= Imm
% NumLaneElts
;
16002 Imm
/= NumLaneElts
;
16003 if (i
>= (NumLaneElts
/ 2))
16005 Indices
[l
+ i
] = l
+ Index
;
16009 return Builder
.CreateShuffleVector(Ops
[0], Ops
[1],
16010 ArrayRef(Indices
, NumElts
), "shufp");
16012 case X86::BI__builtin_ia32_permdi256
:
16013 case X86::BI__builtin_ia32_permdf256
:
16014 case X86::BI__builtin_ia32_permdi512
:
16015 case X86::BI__builtin_ia32_permdf512
: {
16016 unsigned Imm
= cast
<llvm::ConstantInt
>(Ops
[1])->getZExtValue();
16017 auto *Ty
= cast
<llvm::FixedVectorType
>(Ops
[0]->getType());
16018 unsigned NumElts
= Ty
->getNumElements();
16020 // These intrinsics operate on 256-bit lanes of four 64-bit elements.
16022 for (unsigned l
= 0; l
!= NumElts
; l
+= 4)
16023 for (unsigned i
= 0; i
!= 4; ++i
)
16024 Indices
[l
+ i
] = l
+ ((Imm
>> (2 * i
)) & 0x3);
16026 return Builder
.CreateShuffleVector(Ops
[0], ArrayRef(Indices
, NumElts
),
16029 case X86::BI__builtin_ia32_palignr128
:
16030 case X86::BI__builtin_ia32_palignr256
:
16031 case X86::BI__builtin_ia32_palignr512
: {
16032 unsigned ShiftVal
= cast
<llvm::ConstantInt
>(Ops
[2])->getZExtValue() & 0xff;
16035 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
16036 assert(NumElts
% 16 == 0);
16038 // If palignr is shifting the pair of vectors more than the size of two
16039 // lanes, emit zero.
16040 if (ShiftVal
>= 32)
16041 return llvm::Constant::getNullValue(ConvertType(E
->getType()));
16043 // If palignr is shifting the pair of input vectors more than one lane,
16044 // but less than two lanes, convert to shifting in zeroes.
16045 if (ShiftVal
> 16) {
16048 Ops
[0] = llvm::Constant::getNullValue(Ops
[0]->getType());
16052 // 256-bit palignr operates on 128-bit lanes so we need to handle that
16053 for (unsigned l
= 0; l
!= NumElts
; l
+= 16) {
16054 for (unsigned i
= 0; i
!= 16; ++i
) {
16055 unsigned Idx
= ShiftVal
+ i
;
16057 Idx
+= NumElts
- 16; // End of lane, switch operand.
16058 Indices
[l
+ i
] = Idx
+ l
;
16062 return Builder
.CreateShuffleVector(Ops
[1], Ops
[0],
16063 ArrayRef(Indices
, NumElts
), "palignr");
16065 case X86::BI__builtin_ia32_alignd128
:
16066 case X86::BI__builtin_ia32_alignd256
:
16067 case X86::BI__builtin_ia32_alignd512
:
16068 case X86::BI__builtin_ia32_alignq128
:
16069 case X86::BI__builtin_ia32_alignq256
:
16070 case X86::BI__builtin_ia32_alignq512
: {
16072 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
16073 unsigned ShiftVal
= cast
<llvm::ConstantInt
>(Ops
[2])->getZExtValue() & 0xff;
16075 // Mask the shift amount to width of a vector.
16076 ShiftVal
&= NumElts
- 1;
16079 for (unsigned i
= 0; i
!= NumElts
; ++i
)
16080 Indices
[i
] = i
+ ShiftVal
;
16082 return Builder
.CreateShuffleVector(Ops
[1], Ops
[0],
16083 ArrayRef(Indices
, NumElts
), "valign");
16085 case X86::BI__builtin_ia32_shuf_f32x4_256
:
16086 case X86::BI__builtin_ia32_shuf_f64x2_256
:
16087 case X86::BI__builtin_ia32_shuf_i32x4_256
:
16088 case X86::BI__builtin_ia32_shuf_i64x2_256
:
16089 case X86::BI__builtin_ia32_shuf_f32x4
:
16090 case X86::BI__builtin_ia32_shuf_f64x2
:
16091 case X86::BI__builtin_ia32_shuf_i32x4
:
16092 case X86::BI__builtin_ia32_shuf_i64x2
: {
16093 unsigned Imm
= cast
<llvm::ConstantInt
>(Ops
[2])->getZExtValue();
16094 auto *Ty
= cast
<llvm::FixedVectorType
>(Ops
[0]->getType());
16095 unsigned NumElts
= Ty
->getNumElements();
16096 unsigned NumLanes
= Ty
->getPrimitiveSizeInBits() == 512 ? 4 : 2;
16097 unsigned NumLaneElts
= NumElts
/ NumLanes
;
16100 for (unsigned l
= 0; l
!= NumElts
; l
+= NumLaneElts
) {
16101 unsigned Index
= (Imm
% NumLanes
) * NumLaneElts
;
16102 Imm
/= NumLanes
; // Discard the bits we just used.
16103 if (l
>= (NumElts
/ 2))
16104 Index
+= NumElts
; // Switch to other source.
16105 for (unsigned i
= 0; i
!= NumLaneElts
; ++i
) {
16106 Indices
[l
+ i
] = Index
+ i
;
16110 return Builder
.CreateShuffleVector(Ops
[0], Ops
[1],
16111 ArrayRef(Indices
, NumElts
), "shuf");
16114 case X86::BI__builtin_ia32_vperm2f128_pd256
:
16115 case X86::BI__builtin_ia32_vperm2f128_ps256
:
16116 case X86::BI__builtin_ia32_vperm2f128_si256
:
16117 case X86::BI__builtin_ia32_permti256
: {
16118 unsigned Imm
= cast
<llvm::ConstantInt
>(Ops
[2])->getZExtValue();
16120 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
16122 // This takes a very simple approach since there are two lanes and a
16123 // shuffle can have 2 inputs. So we reserve the first input for the first
16124 // lane and the second input for the second lane. This may result in
16125 // duplicate sources, but this can be dealt with in the backend.
16129 for (unsigned l
= 0; l
!= 2; ++l
) {
16130 // Determine the source for this lane.
16131 if (Imm
& (1 << ((l
* 4) + 3)))
16132 OutOps
[l
] = llvm::ConstantAggregateZero::get(Ops
[0]->getType());
16133 else if (Imm
& (1 << ((l
* 4) + 1)))
16134 OutOps
[l
] = Ops
[1];
16136 OutOps
[l
] = Ops
[0];
16138 for (unsigned i
= 0; i
!= NumElts
/2; ++i
) {
16139 // Start with ith element of the source for this lane.
16140 unsigned Idx
= (l
* NumElts
) + i
;
16141 // If bit 0 of the immediate half is set, switch to the high half of
16143 if (Imm
& (1 << (l
* 4)))
16145 Indices
[(l
* (NumElts
/2)) + i
] = Idx
;
16149 return Builder
.CreateShuffleVector(OutOps
[0], OutOps
[1],
16150 ArrayRef(Indices
, NumElts
), "vperm");
16153 case X86::BI__builtin_ia32_pslldqi128_byteshift
:
16154 case X86::BI__builtin_ia32_pslldqi256_byteshift
:
16155 case X86::BI__builtin_ia32_pslldqi512_byteshift
: {
16156 unsigned ShiftVal
= cast
<llvm::ConstantInt
>(Ops
[1])->getZExtValue() & 0xff;
16157 auto *ResultType
= cast
<llvm::FixedVectorType
>(Ops
[0]->getType());
16158 // Builtin type is vXi64 so multiply by 8 to get bytes.
16159 unsigned NumElts
= ResultType
->getNumElements() * 8;
16161 // If pslldq is shifting the vector more than 15 bytes, emit zero.
16162 if (ShiftVal
>= 16)
16163 return llvm::Constant::getNullValue(ResultType
);
16166 // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
16167 for (unsigned l
= 0; l
!= NumElts
; l
+= 16) {
16168 for (unsigned i
= 0; i
!= 16; ++i
) {
16169 unsigned Idx
= NumElts
+ i
- ShiftVal
;
16170 if (Idx
< NumElts
) Idx
-= NumElts
- 16; // end of lane, switch operand.
16171 Indices
[l
+ i
] = Idx
+ l
;
16175 auto *VecTy
= llvm::FixedVectorType::get(Int8Ty
, NumElts
);
16176 Value
*Cast
= Builder
.CreateBitCast(Ops
[0], VecTy
, "cast");
16177 Value
*Zero
= llvm::Constant::getNullValue(VecTy
);
16178 Value
*SV
= Builder
.CreateShuffleVector(
16179 Zero
, Cast
, ArrayRef(Indices
, NumElts
), "pslldq");
16180 return Builder
.CreateBitCast(SV
, Ops
[0]->getType(), "cast");
16182 case X86::BI__builtin_ia32_psrldqi128_byteshift
:
16183 case X86::BI__builtin_ia32_psrldqi256_byteshift
:
16184 case X86::BI__builtin_ia32_psrldqi512_byteshift
: {
16185 unsigned ShiftVal
= cast
<llvm::ConstantInt
>(Ops
[1])->getZExtValue() & 0xff;
16186 auto *ResultType
= cast
<llvm::FixedVectorType
>(Ops
[0]->getType());
16187 // Builtin type is vXi64 so multiply by 8 to get bytes.
16188 unsigned NumElts
= ResultType
->getNumElements() * 8;
16190 // If psrldq is shifting the vector more than 15 bytes, emit zero.
16191 if (ShiftVal
>= 16)
16192 return llvm::Constant::getNullValue(ResultType
);
16195 // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
16196 for (unsigned l
= 0; l
!= NumElts
; l
+= 16) {
16197 for (unsigned i
= 0; i
!= 16; ++i
) {
16198 unsigned Idx
= i
+ ShiftVal
;
16199 if (Idx
>= 16) Idx
+= NumElts
- 16; // end of lane, switch operand.
16200 Indices
[l
+ i
] = Idx
+ l
;
16204 auto *VecTy
= llvm::FixedVectorType::get(Int8Ty
, NumElts
);
16205 Value
*Cast
= Builder
.CreateBitCast(Ops
[0], VecTy
, "cast");
16206 Value
*Zero
= llvm::Constant::getNullValue(VecTy
);
16207 Value
*SV
= Builder
.CreateShuffleVector(
16208 Cast
, Zero
, ArrayRef(Indices
, NumElts
), "psrldq");
16209 return Builder
.CreateBitCast(SV
, ResultType
, "cast");
16211 case X86::BI__builtin_ia32_kshiftliqi
:
16212 case X86::BI__builtin_ia32_kshiftlihi
:
16213 case X86::BI__builtin_ia32_kshiftlisi
:
16214 case X86::BI__builtin_ia32_kshiftlidi
: {
16215 unsigned ShiftVal
= cast
<llvm::ConstantInt
>(Ops
[1])->getZExtValue() & 0xff;
16216 unsigned NumElts
= Ops
[0]->getType()->getIntegerBitWidth();
16218 if (ShiftVal
>= NumElts
)
16219 return llvm::Constant::getNullValue(Ops
[0]->getType());
16221 Value
*In
= getMaskVecValue(*this, Ops
[0], NumElts
);
16224 for (unsigned i
= 0; i
!= NumElts
; ++i
)
16225 Indices
[i
] = NumElts
+ i
- ShiftVal
;
16227 Value
*Zero
= llvm::Constant::getNullValue(In
->getType());
16228 Value
*SV
= Builder
.CreateShuffleVector(
16229 Zero
, In
, ArrayRef(Indices
, NumElts
), "kshiftl");
16230 return Builder
.CreateBitCast(SV
, Ops
[0]->getType());
16232 case X86::BI__builtin_ia32_kshiftriqi
:
16233 case X86::BI__builtin_ia32_kshiftrihi
:
16234 case X86::BI__builtin_ia32_kshiftrisi
:
16235 case X86::BI__builtin_ia32_kshiftridi
: {
16236 unsigned ShiftVal
= cast
<llvm::ConstantInt
>(Ops
[1])->getZExtValue() & 0xff;
16237 unsigned NumElts
= Ops
[0]->getType()->getIntegerBitWidth();
16239 if (ShiftVal
>= NumElts
)
16240 return llvm::Constant::getNullValue(Ops
[0]->getType());
16242 Value
*In
= getMaskVecValue(*this, Ops
[0], NumElts
);
16245 for (unsigned i
= 0; i
!= NumElts
; ++i
)
16246 Indices
[i
] = i
+ ShiftVal
;
16248 Value
*Zero
= llvm::Constant::getNullValue(In
->getType());
16249 Value
*SV
= Builder
.CreateShuffleVector(
16250 In
, Zero
, ArrayRef(Indices
, NumElts
), "kshiftr");
16251 return Builder
.CreateBitCast(SV
, Ops
[0]->getType());
16253 case X86::BI__builtin_ia32_movnti
:
16254 case X86::BI__builtin_ia32_movnti64
:
16255 case X86::BI__builtin_ia32_movntsd
:
16256 case X86::BI__builtin_ia32_movntss
: {
16257 llvm::MDNode
*Node
= llvm::MDNode::get(
16258 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder
.getInt32(1)));
16260 Value
*Ptr
= Ops
[0];
16261 Value
*Src
= Ops
[1];
16263 // Extract the 0'th element of the source vector.
16264 if (BuiltinID
== X86::BI__builtin_ia32_movntsd
||
16265 BuiltinID
== X86::BI__builtin_ia32_movntss
)
16266 Src
= Builder
.CreateExtractElement(Src
, (uint64_t)0, "extract");
16268 // Unaligned nontemporal store of the scalar value.
16269 StoreInst
*SI
= Builder
.CreateDefaultAlignedStore(Src
, Ptr
);
16270 SI
->setMetadata(llvm::LLVMContext::MD_nontemporal
, Node
);
16271 SI
->setAlignment(llvm::Align(1));
16274 // Rotate is a special case of funnel shift - 1st 2 args are the same.
16275 case X86::BI__builtin_ia32_vprotb
:
16276 case X86::BI__builtin_ia32_vprotw
:
16277 case X86::BI__builtin_ia32_vprotd
:
16278 case X86::BI__builtin_ia32_vprotq
:
16279 case X86::BI__builtin_ia32_vprotbi
:
16280 case X86::BI__builtin_ia32_vprotwi
:
16281 case X86::BI__builtin_ia32_vprotdi
:
16282 case X86::BI__builtin_ia32_vprotqi
:
16283 case X86::BI__builtin_ia32_prold128
:
16284 case X86::BI__builtin_ia32_prold256
:
16285 case X86::BI__builtin_ia32_prold512
:
16286 case X86::BI__builtin_ia32_prolq128
:
16287 case X86::BI__builtin_ia32_prolq256
:
16288 case X86::BI__builtin_ia32_prolq512
:
16289 case X86::BI__builtin_ia32_prolvd128
:
16290 case X86::BI__builtin_ia32_prolvd256
:
16291 case X86::BI__builtin_ia32_prolvd512
:
16292 case X86::BI__builtin_ia32_prolvq128
:
16293 case X86::BI__builtin_ia32_prolvq256
:
16294 case X86::BI__builtin_ia32_prolvq512
:
16295 return EmitX86FunnelShift(*this, Ops
[0], Ops
[0], Ops
[1], false);
16296 case X86::BI__builtin_ia32_prord128
:
16297 case X86::BI__builtin_ia32_prord256
:
16298 case X86::BI__builtin_ia32_prord512
:
16299 case X86::BI__builtin_ia32_prorq128
:
16300 case X86::BI__builtin_ia32_prorq256
:
16301 case X86::BI__builtin_ia32_prorq512
:
16302 case X86::BI__builtin_ia32_prorvd128
:
16303 case X86::BI__builtin_ia32_prorvd256
:
16304 case X86::BI__builtin_ia32_prorvd512
:
16305 case X86::BI__builtin_ia32_prorvq128
:
16306 case X86::BI__builtin_ia32_prorvq256
:
16307 case X86::BI__builtin_ia32_prorvq512
:
16308 return EmitX86FunnelShift(*this, Ops
[0], Ops
[0], Ops
[1], true);
16309 case X86::BI__builtin_ia32_selectb_128
:
16310 case X86::BI__builtin_ia32_selectb_256
:
16311 case X86::BI__builtin_ia32_selectb_512
:
16312 case X86::BI__builtin_ia32_selectw_128
:
16313 case X86::BI__builtin_ia32_selectw_256
:
16314 case X86::BI__builtin_ia32_selectw_512
:
16315 case X86::BI__builtin_ia32_selectd_128
:
16316 case X86::BI__builtin_ia32_selectd_256
:
16317 case X86::BI__builtin_ia32_selectd_512
:
16318 case X86::BI__builtin_ia32_selectq_128
:
16319 case X86::BI__builtin_ia32_selectq_256
:
16320 case X86::BI__builtin_ia32_selectq_512
:
16321 case X86::BI__builtin_ia32_selectph_128
:
16322 case X86::BI__builtin_ia32_selectph_256
:
16323 case X86::BI__builtin_ia32_selectph_512
:
16324 case X86::BI__builtin_ia32_selectpbf_128
:
16325 case X86::BI__builtin_ia32_selectpbf_256
:
16326 case X86::BI__builtin_ia32_selectpbf_512
:
16327 case X86::BI__builtin_ia32_selectps_128
:
16328 case X86::BI__builtin_ia32_selectps_256
:
16329 case X86::BI__builtin_ia32_selectps_512
:
16330 case X86::BI__builtin_ia32_selectpd_128
:
16331 case X86::BI__builtin_ia32_selectpd_256
:
16332 case X86::BI__builtin_ia32_selectpd_512
:
16333 return EmitX86Select(*this, Ops
[0], Ops
[1], Ops
[2]);
16334 case X86::BI__builtin_ia32_selectsh_128
:
16335 case X86::BI__builtin_ia32_selectsbf_128
:
16336 case X86::BI__builtin_ia32_selectss_128
:
16337 case X86::BI__builtin_ia32_selectsd_128
: {
16338 Value
*A
= Builder
.CreateExtractElement(Ops
[1], (uint64_t)0);
16339 Value
*B
= Builder
.CreateExtractElement(Ops
[2], (uint64_t)0);
16340 A
= EmitX86ScalarSelect(*this, Ops
[0], A
, B
);
16341 return Builder
.CreateInsertElement(Ops
[1], A
, (uint64_t)0);
16343 case X86::BI__builtin_ia32_cmpb128_mask
:
16344 case X86::BI__builtin_ia32_cmpb256_mask
:
16345 case X86::BI__builtin_ia32_cmpb512_mask
:
16346 case X86::BI__builtin_ia32_cmpw128_mask
:
16347 case X86::BI__builtin_ia32_cmpw256_mask
:
16348 case X86::BI__builtin_ia32_cmpw512_mask
:
16349 case X86::BI__builtin_ia32_cmpd128_mask
:
16350 case X86::BI__builtin_ia32_cmpd256_mask
:
16351 case X86::BI__builtin_ia32_cmpd512_mask
:
16352 case X86::BI__builtin_ia32_cmpq128_mask
:
16353 case X86::BI__builtin_ia32_cmpq256_mask
:
16354 case X86::BI__builtin_ia32_cmpq512_mask
: {
16355 unsigned CC
= cast
<llvm::ConstantInt
>(Ops
[2])->getZExtValue() & 0x7;
16356 return EmitX86MaskedCompare(*this, CC
, true, Ops
);
16358 case X86::BI__builtin_ia32_ucmpb128_mask
:
16359 case X86::BI__builtin_ia32_ucmpb256_mask
:
16360 case X86::BI__builtin_ia32_ucmpb512_mask
:
16361 case X86::BI__builtin_ia32_ucmpw128_mask
:
16362 case X86::BI__builtin_ia32_ucmpw256_mask
:
16363 case X86::BI__builtin_ia32_ucmpw512_mask
:
16364 case X86::BI__builtin_ia32_ucmpd128_mask
:
16365 case X86::BI__builtin_ia32_ucmpd256_mask
:
16366 case X86::BI__builtin_ia32_ucmpd512_mask
:
16367 case X86::BI__builtin_ia32_ucmpq128_mask
:
16368 case X86::BI__builtin_ia32_ucmpq256_mask
:
16369 case X86::BI__builtin_ia32_ucmpq512_mask
: {
16370 unsigned CC
= cast
<llvm::ConstantInt
>(Ops
[2])->getZExtValue() & 0x7;
16371 return EmitX86MaskedCompare(*this, CC
, false, Ops
);
16373 case X86::BI__builtin_ia32_vpcomb
:
16374 case X86::BI__builtin_ia32_vpcomw
:
16375 case X86::BI__builtin_ia32_vpcomd
:
16376 case X86::BI__builtin_ia32_vpcomq
:
16377 return EmitX86vpcom(*this, Ops
, true);
16378 case X86::BI__builtin_ia32_vpcomub
:
16379 case X86::BI__builtin_ia32_vpcomuw
:
16380 case X86::BI__builtin_ia32_vpcomud
:
16381 case X86::BI__builtin_ia32_vpcomuq
:
16382 return EmitX86vpcom(*this, Ops
, false);
16384 case X86::BI__builtin_ia32_kortestcqi
:
16385 case X86::BI__builtin_ia32_kortestchi
:
16386 case X86::BI__builtin_ia32_kortestcsi
:
16387 case X86::BI__builtin_ia32_kortestcdi
: {
16388 Value
*Or
= EmitX86MaskLogic(*this, Instruction::Or
, Ops
);
16389 Value
*C
= llvm::Constant::getAllOnesValue(Ops
[0]->getType());
16390 Value
*Cmp
= Builder
.CreateICmpEQ(Or
, C
);
16391 return Builder
.CreateZExt(Cmp
, ConvertType(E
->getType()));
16393 case X86::BI__builtin_ia32_kortestzqi
:
16394 case X86::BI__builtin_ia32_kortestzhi
:
16395 case X86::BI__builtin_ia32_kortestzsi
:
16396 case X86::BI__builtin_ia32_kortestzdi
: {
16397 Value
*Or
= EmitX86MaskLogic(*this, Instruction::Or
, Ops
);
16398 Value
*C
= llvm::Constant::getNullValue(Ops
[0]->getType());
16399 Value
*Cmp
= Builder
.CreateICmpEQ(Or
, C
);
16400 return Builder
.CreateZExt(Cmp
, ConvertType(E
->getType()));
16403 case X86::BI__builtin_ia32_ktestcqi
:
16404 case X86::BI__builtin_ia32_ktestzqi
:
16405 case X86::BI__builtin_ia32_ktestchi
:
16406 case X86::BI__builtin_ia32_ktestzhi
:
16407 case X86::BI__builtin_ia32_ktestcsi
:
16408 case X86::BI__builtin_ia32_ktestzsi
:
16409 case X86::BI__builtin_ia32_ktestcdi
:
16410 case X86::BI__builtin_ia32_ktestzdi
: {
16412 switch (BuiltinID
) {
16413 default: llvm_unreachable("Unsupported intrinsic!");
16414 case X86::BI__builtin_ia32_ktestcqi
:
16415 IID
= Intrinsic::x86_avx512_ktestc_b
;
16417 case X86::BI__builtin_ia32_ktestzqi
:
16418 IID
= Intrinsic::x86_avx512_ktestz_b
;
16420 case X86::BI__builtin_ia32_ktestchi
:
16421 IID
= Intrinsic::x86_avx512_ktestc_w
;
16423 case X86::BI__builtin_ia32_ktestzhi
:
16424 IID
= Intrinsic::x86_avx512_ktestz_w
;
16426 case X86::BI__builtin_ia32_ktestcsi
:
16427 IID
= Intrinsic::x86_avx512_ktestc_d
;
16429 case X86::BI__builtin_ia32_ktestzsi
:
16430 IID
= Intrinsic::x86_avx512_ktestz_d
;
16432 case X86::BI__builtin_ia32_ktestcdi
:
16433 IID
= Intrinsic::x86_avx512_ktestc_q
;
16435 case X86::BI__builtin_ia32_ktestzdi
:
16436 IID
= Intrinsic::x86_avx512_ktestz_q
;
16440 unsigned NumElts
= Ops
[0]->getType()->getIntegerBitWidth();
16441 Value
*LHS
= getMaskVecValue(*this, Ops
[0], NumElts
);
16442 Value
*RHS
= getMaskVecValue(*this, Ops
[1], NumElts
);
16443 Function
*Intr
= CGM
.getIntrinsic(IID
);
16444 return Builder
.CreateCall(Intr
, {LHS
, RHS
});
16447 case X86::BI__builtin_ia32_kaddqi
:
16448 case X86::BI__builtin_ia32_kaddhi
:
16449 case X86::BI__builtin_ia32_kaddsi
:
16450 case X86::BI__builtin_ia32_kadddi
: {
16452 switch (BuiltinID
) {
16453 default: llvm_unreachable("Unsupported intrinsic!");
16454 case X86::BI__builtin_ia32_kaddqi
:
16455 IID
= Intrinsic::x86_avx512_kadd_b
;
16457 case X86::BI__builtin_ia32_kaddhi
:
16458 IID
= Intrinsic::x86_avx512_kadd_w
;
16460 case X86::BI__builtin_ia32_kaddsi
:
16461 IID
= Intrinsic::x86_avx512_kadd_d
;
16463 case X86::BI__builtin_ia32_kadddi
:
16464 IID
= Intrinsic::x86_avx512_kadd_q
;
16468 unsigned NumElts
= Ops
[0]->getType()->getIntegerBitWidth();
16469 Value
*LHS
= getMaskVecValue(*this, Ops
[0], NumElts
);
16470 Value
*RHS
= getMaskVecValue(*this, Ops
[1], NumElts
);
16471 Function
*Intr
= CGM
.getIntrinsic(IID
);
16472 Value
*Res
= Builder
.CreateCall(Intr
, {LHS
, RHS
});
16473 return Builder
.CreateBitCast(Res
, Ops
[0]->getType());
16475 case X86::BI__builtin_ia32_kandqi
:
16476 case X86::BI__builtin_ia32_kandhi
:
16477 case X86::BI__builtin_ia32_kandsi
:
16478 case X86::BI__builtin_ia32_kanddi
:
16479 return EmitX86MaskLogic(*this, Instruction::And
, Ops
);
16480 case X86::BI__builtin_ia32_kandnqi
:
16481 case X86::BI__builtin_ia32_kandnhi
:
16482 case X86::BI__builtin_ia32_kandnsi
:
16483 case X86::BI__builtin_ia32_kandndi
:
16484 return EmitX86MaskLogic(*this, Instruction::And
, Ops
, true);
16485 case X86::BI__builtin_ia32_korqi
:
16486 case X86::BI__builtin_ia32_korhi
:
16487 case X86::BI__builtin_ia32_korsi
:
16488 case X86::BI__builtin_ia32_kordi
:
16489 return EmitX86MaskLogic(*this, Instruction::Or
, Ops
);
16490 case X86::BI__builtin_ia32_kxnorqi
:
16491 case X86::BI__builtin_ia32_kxnorhi
:
16492 case X86::BI__builtin_ia32_kxnorsi
:
16493 case X86::BI__builtin_ia32_kxnordi
:
16494 return EmitX86MaskLogic(*this, Instruction::Xor
, Ops
, true);
16495 case X86::BI__builtin_ia32_kxorqi
:
16496 case X86::BI__builtin_ia32_kxorhi
:
16497 case X86::BI__builtin_ia32_kxorsi
:
16498 case X86::BI__builtin_ia32_kxordi
:
16499 return EmitX86MaskLogic(*this, Instruction::Xor
, Ops
);
16500 case X86::BI__builtin_ia32_knotqi
:
16501 case X86::BI__builtin_ia32_knothi
:
16502 case X86::BI__builtin_ia32_knotsi
:
16503 case X86::BI__builtin_ia32_knotdi
: {
16504 unsigned NumElts
= Ops
[0]->getType()->getIntegerBitWidth();
16505 Value
*Res
= getMaskVecValue(*this, Ops
[0], NumElts
);
16506 return Builder
.CreateBitCast(Builder
.CreateNot(Res
),
16507 Ops
[0]->getType());
16509 case X86::BI__builtin_ia32_kmovb
:
16510 case X86::BI__builtin_ia32_kmovw
:
16511 case X86::BI__builtin_ia32_kmovd
:
16512 case X86::BI__builtin_ia32_kmovq
: {
16513 // Bitcast to vXi1 type and then back to integer. This gets the mask
16514 // register type into the IR, but might be optimized out depending on
16515 // what's around it.
16516 unsigned NumElts
= Ops
[0]->getType()->getIntegerBitWidth();
16517 Value
*Res
= getMaskVecValue(*this, Ops
[0], NumElts
);
16518 return Builder
.CreateBitCast(Res
, Ops
[0]->getType());
16521 case X86::BI__builtin_ia32_kunpckdi
:
16522 case X86::BI__builtin_ia32_kunpcksi
:
16523 case X86::BI__builtin_ia32_kunpckhi
: {
16524 unsigned NumElts
= Ops
[0]->getType()->getIntegerBitWidth();
16525 Value
*LHS
= getMaskVecValue(*this, Ops
[0], NumElts
);
16526 Value
*RHS
= getMaskVecValue(*this, Ops
[1], NumElts
);
16528 for (unsigned i
= 0; i
!= NumElts
; ++i
)
16531 // First extract half of each vector. This gives better codegen than
16532 // doing it in a single shuffle.
16533 LHS
= Builder
.CreateShuffleVector(LHS
, LHS
, ArrayRef(Indices
, NumElts
/ 2));
16534 RHS
= Builder
.CreateShuffleVector(RHS
, RHS
, ArrayRef(Indices
, NumElts
/ 2));
16535 // Concat the vectors.
16536 // NOTE: Operands are swapped to match the intrinsic definition.
16538 Builder
.CreateShuffleVector(RHS
, LHS
, ArrayRef(Indices
, NumElts
));
16539 return Builder
.CreateBitCast(Res
, Ops
[0]->getType());
16542 case X86::BI__builtin_ia32_vplzcntd_128
:
16543 case X86::BI__builtin_ia32_vplzcntd_256
:
16544 case X86::BI__builtin_ia32_vplzcntd_512
:
16545 case X86::BI__builtin_ia32_vplzcntq_128
:
16546 case X86::BI__builtin_ia32_vplzcntq_256
:
16547 case X86::BI__builtin_ia32_vplzcntq_512
: {
16548 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctlz
, Ops
[0]->getType());
16549 return Builder
.CreateCall(F
, {Ops
[0],Builder
.getInt1(false)});
16551 case X86::BI__builtin_ia32_sqrtss
:
16552 case X86::BI__builtin_ia32_sqrtsd
: {
16553 Value
*A
= Builder
.CreateExtractElement(Ops
[0], (uint64_t)0);
16555 if (Builder
.getIsFPConstrained()) {
16556 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
16557 F
= CGM
.getIntrinsic(Intrinsic::experimental_constrained_sqrt
,
16559 A
= Builder
.CreateConstrainedFPCall(F
, {A
});
16561 F
= CGM
.getIntrinsic(Intrinsic::sqrt
, A
->getType());
16562 A
= Builder
.CreateCall(F
, {A
});
16564 return Builder
.CreateInsertElement(Ops
[0], A
, (uint64_t)0);
16566 case X86::BI__builtin_ia32_sqrtsh_round_mask
:
16567 case X86::BI__builtin_ia32_sqrtsd_round_mask
:
16568 case X86::BI__builtin_ia32_sqrtss_round_mask
: {
16569 unsigned CC
= cast
<llvm::ConstantInt
>(Ops
[4])->getZExtValue();
16570 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
16571 // otherwise keep the intrinsic.
16575 switch (BuiltinID
) {
16577 llvm_unreachable("Unsupported intrinsic!");
16578 case X86::BI__builtin_ia32_sqrtsh_round_mask
:
16579 IID
= Intrinsic::x86_avx512fp16_mask_sqrt_sh
;
16581 case X86::BI__builtin_ia32_sqrtsd_round_mask
:
16582 IID
= Intrinsic::x86_avx512_mask_sqrt_sd
;
16584 case X86::BI__builtin_ia32_sqrtss_round_mask
:
16585 IID
= Intrinsic::x86_avx512_mask_sqrt_ss
;
16588 return Builder
.CreateCall(CGM
.getIntrinsic(IID
), Ops
);
16590 Value
*A
= Builder
.CreateExtractElement(Ops
[1], (uint64_t)0);
16592 if (Builder
.getIsFPConstrained()) {
16593 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
16594 F
= CGM
.getIntrinsic(Intrinsic::experimental_constrained_sqrt
,
16596 A
= Builder
.CreateConstrainedFPCall(F
, A
);
16598 F
= CGM
.getIntrinsic(Intrinsic::sqrt
, A
->getType());
16599 A
= Builder
.CreateCall(F
, A
);
16601 Value
*Src
= Builder
.CreateExtractElement(Ops
[2], (uint64_t)0);
16602 A
= EmitX86ScalarSelect(*this, Ops
[3], A
, Src
);
16603 return Builder
.CreateInsertElement(Ops
[0], A
, (uint64_t)0);
16605 case X86::BI__builtin_ia32_sqrtpd256
:
16606 case X86::BI__builtin_ia32_sqrtpd
:
16607 case X86::BI__builtin_ia32_sqrtps256
:
16608 case X86::BI__builtin_ia32_sqrtps
:
16609 case X86::BI__builtin_ia32_sqrtph256
:
16610 case X86::BI__builtin_ia32_sqrtph
:
16611 case X86::BI__builtin_ia32_sqrtph512
:
16612 case X86::BI__builtin_ia32_vsqrtnepbf16256
:
16613 case X86::BI__builtin_ia32_vsqrtnepbf16
:
16614 case X86::BI__builtin_ia32_vsqrtnepbf16512
:
16615 case X86::BI__builtin_ia32_sqrtps512
:
16616 case X86::BI__builtin_ia32_sqrtpd512
: {
16617 if (Ops
.size() == 2) {
16618 unsigned CC
= cast
<llvm::ConstantInt
>(Ops
[1])->getZExtValue();
16619 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
16620 // otherwise keep the intrinsic.
16624 switch (BuiltinID
) {
16626 llvm_unreachable("Unsupported intrinsic!");
16627 case X86::BI__builtin_ia32_sqrtph512
:
16628 IID
= Intrinsic::x86_avx512fp16_sqrt_ph_512
;
16630 case X86::BI__builtin_ia32_sqrtps512
:
16631 IID
= Intrinsic::x86_avx512_sqrt_ps_512
;
16633 case X86::BI__builtin_ia32_sqrtpd512
:
16634 IID
= Intrinsic::x86_avx512_sqrt_pd_512
;
16637 return Builder
.CreateCall(CGM
.getIntrinsic(IID
), Ops
);
16640 if (Builder
.getIsFPConstrained()) {
16641 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
16642 Function
*F
= CGM
.getIntrinsic(Intrinsic::experimental_constrained_sqrt
,
16643 Ops
[0]->getType());
16644 return Builder
.CreateConstrainedFPCall(F
, Ops
[0]);
16646 Function
*F
= CGM
.getIntrinsic(Intrinsic::sqrt
, Ops
[0]->getType());
16647 return Builder
.CreateCall(F
, Ops
[0]);
16651 case X86::BI__builtin_ia32_pmuludq128
:
16652 case X86::BI__builtin_ia32_pmuludq256
:
16653 case X86::BI__builtin_ia32_pmuludq512
:
16654 return EmitX86Muldq(*this, /*IsSigned*/false, Ops
);
16656 case X86::BI__builtin_ia32_pmuldq128
:
16657 case X86::BI__builtin_ia32_pmuldq256
:
16658 case X86::BI__builtin_ia32_pmuldq512
:
16659 return EmitX86Muldq(*this, /*IsSigned*/true, Ops
);
16661 case X86::BI__builtin_ia32_pternlogd512_mask
:
16662 case X86::BI__builtin_ia32_pternlogq512_mask
:
16663 case X86::BI__builtin_ia32_pternlogd128_mask
:
16664 case X86::BI__builtin_ia32_pternlogd256_mask
:
16665 case X86::BI__builtin_ia32_pternlogq128_mask
:
16666 case X86::BI__builtin_ia32_pternlogq256_mask
:
16667 return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops
);
16669 case X86::BI__builtin_ia32_pternlogd512_maskz
:
16670 case X86::BI__builtin_ia32_pternlogq512_maskz
:
16671 case X86::BI__builtin_ia32_pternlogd128_maskz
:
16672 case X86::BI__builtin_ia32_pternlogd256_maskz
:
16673 case X86::BI__builtin_ia32_pternlogq128_maskz
:
16674 case X86::BI__builtin_ia32_pternlogq256_maskz
:
16675 return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops
);
16677 case X86::BI__builtin_ia32_vpshldd128
:
16678 case X86::BI__builtin_ia32_vpshldd256
:
16679 case X86::BI__builtin_ia32_vpshldd512
:
16680 case X86::BI__builtin_ia32_vpshldq128
:
16681 case X86::BI__builtin_ia32_vpshldq256
:
16682 case X86::BI__builtin_ia32_vpshldq512
:
16683 case X86::BI__builtin_ia32_vpshldw128
:
16684 case X86::BI__builtin_ia32_vpshldw256
:
16685 case X86::BI__builtin_ia32_vpshldw512
:
16686 return EmitX86FunnelShift(*this, Ops
[0], Ops
[1], Ops
[2], false);
16688 case X86::BI__builtin_ia32_vpshrdd128
:
16689 case X86::BI__builtin_ia32_vpshrdd256
:
16690 case X86::BI__builtin_ia32_vpshrdd512
:
16691 case X86::BI__builtin_ia32_vpshrdq128
:
16692 case X86::BI__builtin_ia32_vpshrdq256
:
16693 case X86::BI__builtin_ia32_vpshrdq512
:
16694 case X86::BI__builtin_ia32_vpshrdw128
:
16695 case X86::BI__builtin_ia32_vpshrdw256
:
16696 case X86::BI__builtin_ia32_vpshrdw512
:
16697 // Ops 0 and 1 are swapped.
16698 return EmitX86FunnelShift(*this, Ops
[1], Ops
[0], Ops
[2], true);
16700 case X86::BI__builtin_ia32_vpshldvd128
:
16701 case X86::BI__builtin_ia32_vpshldvd256
:
16702 case X86::BI__builtin_ia32_vpshldvd512
:
16703 case X86::BI__builtin_ia32_vpshldvq128
:
16704 case X86::BI__builtin_ia32_vpshldvq256
:
16705 case X86::BI__builtin_ia32_vpshldvq512
:
16706 case X86::BI__builtin_ia32_vpshldvw128
:
16707 case X86::BI__builtin_ia32_vpshldvw256
:
16708 case X86::BI__builtin_ia32_vpshldvw512
:
16709 return EmitX86FunnelShift(*this, Ops
[0], Ops
[1], Ops
[2], false);
16711 case X86::BI__builtin_ia32_vpshrdvd128
:
16712 case X86::BI__builtin_ia32_vpshrdvd256
:
16713 case X86::BI__builtin_ia32_vpshrdvd512
:
16714 case X86::BI__builtin_ia32_vpshrdvq128
:
16715 case X86::BI__builtin_ia32_vpshrdvq256
:
16716 case X86::BI__builtin_ia32_vpshrdvq512
:
16717 case X86::BI__builtin_ia32_vpshrdvw128
:
16718 case X86::BI__builtin_ia32_vpshrdvw256
:
16719 case X86::BI__builtin_ia32_vpshrdvw512
:
16720 // Ops 0 and 1 are swapped.
16721 return EmitX86FunnelShift(*this, Ops
[1], Ops
[0], Ops
[2], true);
16724 case X86::BI__builtin_ia32_reduce_fadd_pd512
:
16725 case X86::BI__builtin_ia32_reduce_fadd_ps512
:
16726 case X86::BI__builtin_ia32_reduce_fadd_ph512
:
16727 case X86::BI__builtin_ia32_reduce_fadd_ph256
:
16728 case X86::BI__builtin_ia32_reduce_fadd_ph128
: {
16730 CGM
.getIntrinsic(Intrinsic::vector_reduce_fadd
, Ops
[1]->getType());
16731 IRBuilder
<>::FastMathFlagGuard
FMFGuard(Builder
);
16732 Builder
.getFastMathFlags().setAllowReassoc();
16733 return Builder
.CreateCall(F
, {Ops
[0], Ops
[1]});
16735 case X86::BI__builtin_ia32_reduce_fmul_pd512
:
16736 case X86::BI__builtin_ia32_reduce_fmul_ps512
:
16737 case X86::BI__builtin_ia32_reduce_fmul_ph512
:
16738 case X86::BI__builtin_ia32_reduce_fmul_ph256
:
16739 case X86::BI__builtin_ia32_reduce_fmul_ph128
: {
16741 CGM
.getIntrinsic(Intrinsic::vector_reduce_fmul
, Ops
[1]->getType());
16742 IRBuilder
<>::FastMathFlagGuard
FMFGuard(Builder
);
16743 Builder
.getFastMathFlags().setAllowReassoc();
16744 return Builder
.CreateCall(F
, {Ops
[0], Ops
[1]});
16746 case X86::BI__builtin_ia32_reduce_fmax_pd512
:
16747 case X86::BI__builtin_ia32_reduce_fmax_ps512
:
16748 case X86::BI__builtin_ia32_reduce_fmax_ph512
:
16749 case X86::BI__builtin_ia32_reduce_fmax_ph256
:
16750 case X86::BI__builtin_ia32_reduce_fmax_ph128
: {
16752 CGM
.getIntrinsic(Intrinsic::vector_reduce_fmax
, Ops
[0]->getType());
16753 IRBuilder
<>::FastMathFlagGuard
FMFGuard(Builder
);
16754 Builder
.getFastMathFlags().setNoNaNs();
16755 return Builder
.CreateCall(F
, {Ops
[0]});
16757 case X86::BI__builtin_ia32_reduce_fmin_pd512
:
16758 case X86::BI__builtin_ia32_reduce_fmin_ps512
:
16759 case X86::BI__builtin_ia32_reduce_fmin_ph512
:
16760 case X86::BI__builtin_ia32_reduce_fmin_ph256
:
16761 case X86::BI__builtin_ia32_reduce_fmin_ph128
: {
16763 CGM
.getIntrinsic(Intrinsic::vector_reduce_fmin
, Ops
[0]->getType());
16764 IRBuilder
<>::FastMathFlagGuard
FMFGuard(Builder
);
16765 Builder
.getFastMathFlags().setNoNaNs();
16766 return Builder
.CreateCall(F
, {Ops
[0]});
16769 case X86::BI__builtin_ia32_rdrand16_step
:
16770 case X86::BI__builtin_ia32_rdrand32_step
:
16771 case X86::BI__builtin_ia32_rdrand64_step
:
16772 case X86::BI__builtin_ia32_rdseed16_step
:
16773 case X86::BI__builtin_ia32_rdseed32_step
:
16774 case X86::BI__builtin_ia32_rdseed64_step
: {
16776 switch (BuiltinID
) {
16777 default: llvm_unreachable("Unsupported intrinsic!");
16778 case X86::BI__builtin_ia32_rdrand16_step
:
16779 ID
= Intrinsic::x86_rdrand_16
;
16781 case X86::BI__builtin_ia32_rdrand32_step
:
16782 ID
= Intrinsic::x86_rdrand_32
;
16784 case X86::BI__builtin_ia32_rdrand64_step
:
16785 ID
= Intrinsic::x86_rdrand_64
;
16787 case X86::BI__builtin_ia32_rdseed16_step
:
16788 ID
= Intrinsic::x86_rdseed_16
;
16790 case X86::BI__builtin_ia32_rdseed32_step
:
16791 ID
= Intrinsic::x86_rdseed_32
;
16793 case X86::BI__builtin_ia32_rdseed64_step
:
16794 ID
= Intrinsic::x86_rdseed_64
;
16798 Value
*Call
= Builder
.CreateCall(CGM
.getIntrinsic(ID
));
16799 Builder
.CreateDefaultAlignedStore(Builder
.CreateExtractValue(Call
, 0),
16801 return Builder
.CreateExtractValue(Call
, 1);
16803 case X86::BI__builtin_ia32_addcarryx_u32
:
16804 case X86::BI__builtin_ia32_addcarryx_u64
:
16805 case X86::BI__builtin_ia32_subborrow_u32
:
16806 case X86::BI__builtin_ia32_subborrow_u64
: {
16808 switch (BuiltinID
) {
16809 default: llvm_unreachable("Unsupported intrinsic!");
16810 case X86::BI__builtin_ia32_addcarryx_u32
:
16811 IID
= Intrinsic::x86_addcarry_32
;
16813 case X86::BI__builtin_ia32_addcarryx_u64
:
16814 IID
= Intrinsic::x86_addcarry_64
;
16816 case X86::BI__builtin_ia32_subborrow_u32
:
16817 IID
= Intrinsic::x86_subborrow_32
;
16819 case X86::BI__builtin_ia32_subborrow_u64
:
16820 IID
= Intrinsic::x86_subborrow_64
;
16824 Value
*Call
= Builder
.CreateCall(CGM
.getIntrinsic(IID
),
16825 { Ops
[0], Ops
[1], Ops
[2] });
16826 Builder
.CreateDefaultAlignedStore(Builder
.CreateExtractValue(Call
, 1),
16828 return Builder
.CreateExtractValue(Call
, 0);
16831 case X86::BI__builtin_ia32_fpclassps128_mask
:
16832 case X86::BI__builtin_ia32_fpclassps256_mask
:
16833 case X86::BI__builtin_ia32_fpclassps512_mask
:
16834 case X86::BI__builtin_ia32_vfpclasspbf16128_mask
:
16835 case X86::BI__builtin_ia32_vfpclasspbf16256_mask
:
16836 case X86::BI__builtin_ia32_vfpclasspbf16512_mask
:
16837 case X86::BI__builtin_ia32_fpclassph128_mask
:
16838 case X86::BI__builtin_ia32_fpclassph256_mask
:
16839 case X86::BI__builtin_ia32_fpclassph512_mask
:
16840 case X86::BI__builtin_ia32_fpclasspd128_mask
:
16841 case X86::BI__builtin_ia32_fpclasspd256_mask
:
16842 case X86::BI__builtin_ia32_fpclasspd512_mask
: {
16844 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
16845 Value
*MaskIn
= Ops
[2];
16846 Ops
.erase(&Ops
[2]);
16849 switch (BuiltinID
) {
16850 default: llvm_unreachable("Unsupported intrinsic!");
16851 case X86::BI__builtin_ia32_vfpclasspbf16128_mask
:
16852 ID
= Intrinsic::x86_avx10_fpclass_nepbf16_128
;
16854 case X86::BI__builtin_ia32_vfpclasspbf16256_mask
:
16855 ID
= Intrinsic::x86_avx10_fpclass_nepbf16_256
;
16857 case X86::BI__builtin_ia32_vfpclasspbf16512_mask
:
16858 ID
= Intrinsic::x86_avx10_fpclass_nepbf16_512
;
16860 case X86::BI__builtin_ia32_fpclassph128_mask
:
16861 ID
= Intrinsic::x86_avx512fp16_fpclass_ph_128
;
16863 case X86::BI__builtin_ia32_fpclassph256_mask
:
16864 ID
= Intrinsic::x86_avx512fp16_fpclass_ph_256
;
16866 case X86::BI__builtin_ia32_fpclassph512_mask
:
16867 ID
= Intrinsic::x86_avx512fp16_fpclass_ph_512
;
16869 case X86::BI__builtin_ia32_fpclassps128_mask
:
16870 ID
= Intrinsic::x86_avx512_fpclass_ps_128
;
16872 case X86::BI__builtin_ia32_fpclassps256_mask
:
16873 ID
= Intrinsic::x86_avx512_fpclass_ps_256
;
16875 case X86::BI__builtin_ia32_fpclassps512_mask
:
16876 ID
= Intrinsic::x86_avx512_fpclass_ps_512
;
16878 case X86::BI__builtin_ia32_fpclasspd128_mask
:
16879 ID
= Intrinsic::x86_avx512_fpclass_pd_128
;
16881 case X86::BI__builtin_ia32_fpclasspd256_mask
:
16882 ID
= Intrinsic::x86_avx512_fpclass_pd_256
;
16884 case X86::BI__builtin_ia32_fpclasspd512_mask
:
16885 ID
= Intrinsic::x86_avx512_fpclass_pd_512
;
16889 Value
*Fpclass
= Builder
.CreateCall(CGM
.getIntrinsic(ID
), Ops
);
16890 return EmitX86MaskedCompareResult(*this, Fpclass
, NumElts
, MaskIn
);
16893 case X86::BI__builtin_ia32_vp2intersect_q_512
:
16894 case X86::BI__builtin_ia32_vp2intersect_q_256
:
16895 case X86::BI__builtin_ia32_vp2intersect_q_128
:
16896 case X86::BI__builtin_ia32_vp2intersect_d_512
:
16897 case X86::BI__builtin_ia32_vp2intersect_d_256
:
16898 case X86::BI__builtin_ia32_vp2intersect_d_128
: {
16900 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
16903 switch (BuiltinID
) {
16904 default: llvm_unreachable("Unsupported intrinsic!");
16905 case X86::BI__builtin_ia32_vp2intersect_q_512
:
16906 ID
= Intrinsic::x86_avx512_vp2intersect_q_512
;
16908 case X86::BI__builtin_ia32_vp2intersect_q_256
:
16909 ID
= Intrinsic::x86_avx512_vp2intersect_q_256
;
16911 case X86::BI__builtin_ia32_vp2intersect_q_128
:
16912 ID
= Intrinsic::x86_avx512_vp2intersect_q_128
;
16914 case X86::BI__builtin_ia32_vp2intersect_d_512
:
16915 ID
= Intrinsic::x86_avx512_vp2intersect_d_512
;
16917 case X86::BI__builtin_ia32_vp2intersect_d_256
:
16918 ID
= Intrinsic::x86_avx512_vp2intersect_d_256
;
16920 case X86::BI__builtin_ia32_vp2intersect_d_128
:
16921 ID
= Intrinsic::x86_avx512_vp2intersect_d_128
;
16925 Value
*Call
= Builder
.CreateCall(CGM
.getIntrinsic(ID
), {Ops
[0], Ops
[1]});
16926 Value
*Result
= Builder
.CreateExtractValue(Call
, 0);
16927 Result
= EmitX86MaskedCompareResult(*this, Result
, NumElts
, nullptr);
16928 Builder
.CreateDefaultAlignedStore(Result
, Ops
[2]);
16930 Result
= Builder
.CreateExtractValue(Call
, 1);
16931 Result
= EmitX86MaskedCompareResult(*this, Result
, NumElts
, nullptr);
16932 return Builder
.CreateDefaultAlignedStore(Result
, Ops
[3]);
16935 case X86::BI__builtin_ia32_vpmultishiftqb128
:
16936 case X86::BI__builtin_ia32_vpmultishiftqb256
:
16937 case X86::BI__builtin_ia32_vpmultishiftqb512
: {
16939 switch (BuiltinID
) {
16940 default: llvm_unreachable("Unsupported intrinsic!");
16941 case X86::BI__builtin_ia32_vpmultishiftqb128
:
16942 ID
= Intrinsic::x86_avx512_pmultishift_qb_128
;
16944 case X86::BI__builtin_ia32_vpmultishiftqb256
:
16945 ID
= Intrinsic::x86_avx512_pmultishift_qb_256
;
16947 case X86::BI__builtin_ia32_vpmultishiftqb512
:
16948 ID
= Intrinsic::x86_avx512_pmultishift_qb_512
;
16952 return Builder
.CreateCall(CGM
.getIntrinsic(ID
), Ops
);
16955 case X86::BI__builtin_ia32_vpshufbitqmb128_mask
:
16956 case X86::BI__builtin_ia32_vpshufbitqmb256_mask
:
16957 case X86::BI__builtin_ia32_vpshufbitqmb512_mask
: {
16959 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
16960 Value
*MaskIn
= Ops
[2];
16961 Ops
.erase(&Ops
[2]);
16964 switch (BuiltinID
) {
16965 default: llvm_unreachable("Unsupported intrinsic!");
16966 case X86::BI__builtin_ia32_vpshufbitqmb128_mask
:
16967 ID
= Intrinsic::x86_avx512_vpshufbitqmb_128
;
16969 case X86::BI__builtin_ia32_vpshufbitqmb256_mask
:
16970 ID
= Intrinsic::x86_avx512_vpshufbitqmb_256
;
16972 case X86::BI__builtin_ia32_vpshufbitqmb512_mask
:
16973 ID
= Intrinsic::x86_avx512_vpshufbitqmb_512
;
16977 Value
*Shufbit
= Builder
.CreateCall(CGM
.getIntrinsic(ID
), Ops
);
16978 return EmitX86MaskedCompareResult(*this, Shufbit
, NumElts
, MaskIn
);
16981 // packed comparison intrinsics
16982 case X86::BI__builtin_ia32_cmpeqps
:
16983 case X86::BI__builtin_ia32_cmpeqpd
:
16984 return getVectorFCmpIR(CmpInst::FCMP_OEQ
, /*IsSignaling*/false);
16985 case X86::BI__builtin_ia32_cmpltps
:
16986 case X86::BI__builtin_ia32_cmpltpd
:
16987 return getVectorFCmpIR(CmpInst::FCMP_OLT
, /*IsSignaling*/true);
16988 case X86::BI__builtin_ia32_cmpleps
:
16989 case X86::BI__builtin_ia32_cmplepd
:
16990 return getVectorFCmpIR(CmpInst::FCMP_OLE
, /*IsSignaling*/true);
16991 case X86::BI__builtin_ia32_cmpunordps
:
16992 case X86::BI__builtin_ia32_cmpunordpd
:
16993 return getVectorFCmpIR(CmpInst::FCMP_UNO
, /*IsSignaling*/false);
16994 case X86::BI__builtin_ia32_cmpneqps
:
16995 case X86::BI__builtin_ia32_cmpneqpd
:
16996 return getVectorFCmpIR(CmpInst::FCMP_UNE
, /*IsSignaling*/false);
16997 case X86::BI__builtin_ia32_cmpnltps
:
16998 case X86::BI__builtin_ia32_cmpnltpd
:
16999 return getVectorFCmpIR(CmpInst::FCMP_UGE
, /*IsSignaling*/true);
17000 case X86::BI__builtin_ia32_cmpnleps
:
17001 case X86::BI__builtin_ia32_cmpnlepd
:
17002 return getVectorFCmpIR(CmpInst::FCMP_UGT
, /*IsSignaling*/true);
17003 case X86::BI__builtin_ia32_cmpordps
:
17004 case X86::BI__builtin_ia32_cmpordpd
:
17005 return getVectorFCmpIR(CmpInst::FCMP_ORD
, /*IsSignaling*/false);
17006 case X86::BI__builtin_ia32_cmpph128_mask
:
17007 case X86::BI__builtin_ia32_cmpph256_mask
:
17008 case X86::BI__builtin_ia32_cmpph512_mask
:
17009 case X86::BI__builtin_ia32_cmpps128_mask
:
17010 case X86::BI__builtin_ia32_cmpps256_mask
:
17011 case X86::BI__builtin_ia32_cmpps512_mask
:
17012 case X86::BI__builtin_ia32_cmppd128_mask
:
17013 case X86::BI__builtin_ia32_cmppd256_mask
:
17014 case X86::BI__builtin_ia32_cmppd512_mask
:
17015 case X86::BI__builtin_ia32_vcmppd256_round_mask
:
17016 case X86::BI__builtin_ia32_vcmpps256_round_mask
:
17017 case X86::BI__builtin_ia32_vcmpph256_round_mask
:
17018 case X86::BI__builtin_ia32_vcmppbf16512_mask
:
17019 case X86::BI__builtin_ia32_vcmppbf16256_mask
:
17020 case X86::BI__builtin_ia32_vcmppbf16128_mask
:
17023 case X86::BI__builtin_ia32_cmpps
:
17024 case X86::BI__builtin_ia32_cmpps256
:
17025 case X86::BI__builtin_ia32_cmppd
:
17026 case X86::BI__builtin_ia32_cmppd256
: {
17027 // Lowering vector comparisons to fcmp instructions, while
17028 // ignoring signalling behaviour requested
17029 // ignoring rounding mode requested
17030 // This is only possible if fp-model is not strict and FENV_ACCESS is off.
17032 // The third argument is the comparison condition, and integer in the
17034 unsigned CC
= cast
<llvm::ConstantInt
>(Ops
[2])->getZExtValue() & 0x1f;
17036 // Lowering to IR fcmp instruction.
17037 // Ignoring requested signaling behaviour,
17038 // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
17039 FCmpInst::Predicate Pred
;
17041 // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
17042 // behavior is inverted. We'll handle that after the switch.
17043 switch (CC
& 0xf) {
17044 case 0x00: Pred
= FCmpInst::FCMP_OEQ
; IsSignaling
= false; break;
17045 case 0x01: Pred
= FCmpInst::FCMP_OLT
; IsSignaling
= true; break;
17046 case 0x02: Pred
= FCmpInst::FCMP_OLE
; IsSignaling
= true; break;
17047 case 0x03: Pred
= FCmpInst::FCMP_UNO
; IsSignaling
= false; break;
17048 case 0x04: Pred
= FCmpInst::FCMP_UNE
; IsSignaling
= false; break;
17049 case 0x05: Pred
= FCmpInst::FCMP_UGE
; IsSignaling
= true; break;
17050 case 0x06: Pred
= FCmpInst::FCMP_UGT
; IsSignaling
= true; break;
17051 case 0x07: Pred
= FCmpInst::FCMP_ORD
; IsSignaling
= false; break;
17052 case 0x08: Pred
= FCmpInst::FCMP_UEQ
; IsSignaling
= false; break;
17053 case 0x09: Pred
= FCmpInst::FCMP_ULT
; IsSignaling
= true; break;
17054 case 0x0a: Pred
= FCmpInst::FCMP_ULE
; IsSignaling
= true; break;
17055 case 0x0b: Pred
= FCmpInst::FCMP_FALSE
; IsSignaling
= false; break;
17056 case 0x0c: Pred
= FCmpInst::FCMP_ONE
; IsSignaling
= false; break;
17057 case 0x0d: Pred
= FCmpInst::FCMP_OGE
; IsSignaling
= true; break;
17058 case 0x0e: Pred
= FCmpInst::FCMP_OGT
; IsSignaling
= true; break;
17059 case 0x0f: Pred
= FCmpInst::FCMP_TRUE
; IsSignaling
= false; break;
17060 default: llvm_unreachable("Unhandled CC");
17063 // Invert the signalling behavior for 16-31.
17065 IsSignaling
= !IsSignaling
;
17067 // If the predicate is true or false and we're using constrained intrinsics,
17068 // we don't have a compare intrinsic we can use. Just use the legacy X86
17069 // specific intrinsic.
17070 // If the intrinsic is mask enabled and we're using constrained intrinsics,
17071 // use the legacy X86 specific intrinsic.
17072 if (Builder
.getIsFPConstrained() &&
17073 (Pred
== FCmpInst::FCMP_TRUE
|| Pred
== FCmpInst::FCMP_FALSE
||
17077 switch (BuiltinID
) {
17078 default: llvm_unreachable("Unexpected builtin");
17079 case X86::BI__builtin_ia32_cmpps
:
17080 IID
= Intrinsic::x86_sse_cmp_ps
;
17082 case X86::BI__builtin_ia32_cmpps256
:
17083 IID
= Intrinsic::x86_avx_cmp_ps_256
;
17085 case X86::BI__builtin_ia32_cmppd
:
17086 IID
= Intrinsic::x86_sse2_cmp_pd
;
17088 case X86::BI__builtin_ia32_cmppd256
:
17089 IID
= Intrinsic::x86_avx_cmp_pd_256
;
17091 case X86::BI__builtin_ia32_cmpph128_mask
:
17092 IID
= Intrinsic::x86_avx512fp16_mask_cmp_ph_128
;
17094 case X86::BI__builtin_ia32_cmpph256_mask
:
17095 IID
= Intrinsic::x86_avx512fp16_mask_cmp_ph_256
;
17097 case X86::BI__builtin_ia32_cmpph512_mask
:
17098 IID
= Intrinsic::x86_avx512fp16_mask_cmp_ph_512
;
17100 case X86::BI__builtin_ia32_cmpps512_mask
:
17101 IID
= Intrinsic::x86_avx512_mask_cmp_ps_512
;
17103 case X86::BI__builtin_ia32_cmppd512_mask
:
17104 IID
= Intrinsic::x86_avx512_mask_cmp_pd_512
;
17106 case X86::BI__builtin_ia32_cmpps128_mask
:
17107 IID
= Intrinsic::x86_avx512_mask_cmp_ps_128
;
17109 case X86::BI__builtin_ia32_cmpps256_mask
:
17110 IID
= Intrinsic::x86_avx512_mask_cmp_ps_256
;
17112 case X86::BI__builtin_ia32_cmppd128_mask
:
17113 IID
= Intrinsic::x86_avx512_mask_cmp_pd_128
;
17115 case X86::BI__builtin_ia32_cmppd256_mask
:
17116 IID
= Intrinsic::x86_avx512_mask_cmp_pd_256
;
17120 Function
*Intr
= CGM
.getIntrinsic(IID
);
17123 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
17124 Ops
[3] = getMaskVecValue(*this, Ops
[3], NumElts
);
17125 Value
*Cmp
= Builder
.CreateCall(Intr
, Ops
);
17126 return EmitX86MaskedCompareResult(*this, Cmp
, NumElts
, nullptr);
17129 return Builder
.CreateCall(Intr
, Ops
);
17132 // Builtins without the _mask suffix return a vector of integers
17133 // of the same width as the input vectors
17135 // We ignore SAE if strict FP is disabled. We only keep precise
17136 // exception behavior under strict FP.
17137 // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
17138 // object will be required.
17140 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
17143 Cmp
= Builder
.CreateFCmpS(Pred
, Ops
[0], Ops
[1]);
17145 Cmp
= Builder
.CreateFCmp(Pred
, Ops
[0], Ops
[1]);
17146 return EmitX86MaskedCompareResult(*this, Cmp
, NumElts
, Ops
[3]);
17149 return getVectorFCmpIR(Pred
, IsSignaling
);
17152 // SSE scalar comparison intrinsics
17153 case X86::BI__builtin_ia32_cmpeqss
:
17154 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss
, 0);
17155 case X86::BI__builtin_ia32_cmpltss
:
17156 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss
, 1);
17157 case X86::BI__builtin_ia32_cmpless
:
17158 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss
, 2);
17159 case X86::BI__builtin_ia32_cmpunordss
:
17160 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss
, 3);
17161 case X86::BI__builtin_ia32_cmpneqss
:
17162 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss
, 4);
17163 case X86::BI__builtin_ia32_cmpnltss
:
17164 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss
, 5);
17165 case X86::BI__builtin_ia32_cmpnless
:
17166 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss
, 6);
17167 case X86::BI__builtin_ia32_cmpordss
:
17168 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss
, 7);
17169 case X86::BI__builtin_ia32_cmpeqsd
:
17170 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd
, 0);
17171 case X86::BI__builtin_ia32_cmpltsd
:
17172 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd
, 1);
17173 case X86::BI__builtin_ia32_cmplesd
:
17174 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd
, 2);
17175 case X86::BI__builtin_ia32_cmpunordsd
:
17176 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd
, 3);
17177 case X86::BI__builtin_ia32_cmpneqsd
:
17178 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd
, 4);
17179 case X86::BI__builtin_ia32_cmpnltsd
:
17180 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd
, 5);
17181 case X86::BI__builtin_ia32_cmpnlesd
:
17182 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd
, 6);
17183 case X86::BI__builtin_ia32_cmpordsd
:
17184 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd
, 7);
17186 // f16c half2float intrinsics
17187 case X86::BI__builtin_ia32_vcvtph2ps
:
17188 case X86::BI__builtin_ia32_vcvtph2ps256
:
17189 case X86::BI__builtin_ia32_vcvtph2ps_mask
:
17190 case X86::BI__builtin_ia32_vcvtph2ps256_mask
:
17191 case X86::BI__builtin_ia32_vcvtph2ps512_mask
: {
17192 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
17193 return EmitX86CvtF16ToFloatExpr(*this, Ops
, ConvertType(E
->getType()));
17196 // AVX512 bf16 intrinsics
17197 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask
: {
17198 Ops
[2] = getMaskVecValue(
17200 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements());
17201 Intrinsic::ID IID
= Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128
;
17202 return Builder
.CreateCall(CGM
.getIntrinsic(IID
), Ops
);
17204 case X86::BI__builtin_ia32_cvtsbf162ss_32
:
17205 return Builder
.CreateFPExt(Ops
[0], Builder
.getFloatTy());
17207 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask
:
17208 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask
: {
17210 switch (BuiltinID
) {
17211 default: llvm_unreachable("Unsupported intrinsic!");
17212 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask
:
17213 IID
= Intrinsic::x86_avx512bf16_cvtneps2bf16_256
;
17215 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask
:
17216 IID
= Intrinsic::x86_avx512bf16_cvtneps2bf16_512
;
17219 Value
*Res
= Builder
.CreateCall(CGM
.getIntrinsic(IID
), Ops
[0]);
17220 return EmitX86Select(*this, Ops
[2], Res
, Ops
[1]);
17223 case X86::BI__cpuid
:
17224 case X86::BI__cpuidex
: {
17225 Value
*FuncId
= EmitScalarExpr(E
->getArg(1));
17226 Value
*SubFuncId
= BuiltinID
== X86::BI__cpuidex
17227 ? EmitScalarExpr(E
->getArg(2))
17228 : llvm::ConstantInt::get(Int32Ty
, 0);
17230 llvm::StructType
*CpuidRetTy
=
17231 llvm::StructType::get(Int32Ty
, Int32Ty
, Int32Ty
, Int32Ty
);
17232 llvm::FunctionType
*FTy
=
17233 llvm::FunctionType::get(CpuidRetTy
, {Int32Ty
, Int32Ty
}, false);
17235 StringRef Asm
, Constraints
;
17236 if (getTarget().getTriple().getArch() == llvm::Triple::x86
) {
17238 Constraints
= "={ax},={bx},={cx},={dx},{ax},{cx}";
17240 // x86-64 uses %rbx as the base register, so preserve it.
17241 Asm
= "xchgq %rbx, ${1:q}\n"
17243 "xchgq %rbx, ${1:q}";
17244 Constraints
= "={ax},=r,={cx},={dx},0,2";
17247 llvm::InlineAsm
*IA
= llvm::InlineAsm::get(FTy
, Asm
, Constraints
,
17248 /*hasSideEffects=*/false);
17249 Value
*IACall
= Builder
.CreateCall(IA
, {FuncId
, SubFuncId
});
17250 Value
*BasePtr
= EmitScalarExpr(E
->getArg(0));
17251 Value
*Store
= nullptr;
17252 for (unsigned i
= 0; i
< 4; i
++) {
17253 Value
*Extracted
= Builder
.CreateExtractValue(IACall
, i
);
17254 Value
*StorePtr
= Builder
.CreateConstInBoundsGEP1_32(Int32Ty
, BasePtr
, i
);
17255 Store
= Builder
.CreateAlignedStore(Extracted
, StorePtr
, getIntAlign());
17258 // Return the last store instruction to signal that we have emitted the
17263 case X86::BI__emul
:
17264 case X86::BI__emulu
: {
17265 llvm::Type
*Int64Ty
= llvm::IntegerType::get(getLLVMContext(), 64);
17266 bool isSigned
= (BuiltinID
== X86::BI__emul
);
17267 Value
*LHS
= Builder
.CreateIntCast(Ops
[0], Int64Ty
, isSigned
);
17268 Value
*RHS
= Builder
.CreateIntCast(Ops
[1], Int64Ty
, isSigned
);
17269 return Builder
.CreateMul(LHS
, RHS
, "", !isSigned
, isSigned
);
17271 case X86::BI__mulh
:
17272 case X86::BI__umulh
:
17273 case X86::BI_mul128
:
17274 case X86::BI_umul128
: {
17275 llvm::Type
*ResType
= ConvertType(E
->getType());
17276 llvm::Type
*Int128Ty
= llvm::IntegerType::get(getLLVMContext(), 128);
17278 bool IsSigned
= (BuiltinID
== X86::BI__mulh
|| BuiltinID
== X86::BI_mul128
);
17279 Value
*LHS
= Builder
.CreateIntCast(Ops
[0], Int128Ty
, IsSigned
);
17280 Value
*RHS
= Builder
.CreateIntCast(Ops
[1], Int128Ty
, IsSigned
);
17282 Value
*MulResult
, *HigherBits
;
17284 MulResult
= Builder
.CreateNSWMul(LHS
, RHS
);
17285 HigherBits
= Builder
.CreateAShr(MulResult
, 64);
17287 MulResult
= Builder
.CreateNUWMul(LHS
, RHS
);
17288 HigherBits
= Builder
.CreateLShr(MulResult
, 64);
17290 HigherBits
= Builder
.CreateIntCast(HigherBits
, ResType
, IsSigned
);
17292 if (BuiltinID
== X86::BI__mulh
|| BuiltinID
== X86::BI__umulh
)
17295 Address HighBitsAddress
= EmitPointerWithAlignment(E
->getArg(2));
17296 Builder
.CreateStore(HigherBits
, HighBitsAddress
);
17297 return Builder
.CreateIntCast(MulResult
, ResType
, IsSigned
);
17300 case X86::BI__faststorefence
: {
17301 return Builder
.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent
,
17302 llvm::SyncScope::System
);
17304 case X86::BI__shiftleft128
:
17305 case X86::BI__shiftright128
: {
17306 llvm::Function
*F
= CGM
.getIntrinsic(
17307 BuiltinID
== X86::BI__shiftleft128
? Intrinsic::fshl
: Intrinsic::fshr
,
17309 // Flip low/high ops and zero-extend amount to matching type.
17310 // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
17311 // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
17312 std::swap(Ops
[0], Ops
[1]);
17313 Ops
[2] = Builder
.CreateZExt(Ops
[2], Int64Ty
);
17314 return Builder
.CreateCall(F
, Ops
);
17316 case X86::BI_ReadWriteBarrier
:
17317 case X86::BI_ReadBarrier
:
17318 case X86::BI_WriteBarrier
: {
17319 return Builder
.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent
,
17320 llvm::SyncScope::SingleThread
);
17323 case X86::BI_AddressOfReturnAddress
: {
17325 CGM
.getIntrinsic(Intrinsic::addressofreturnaddress
, AllocaInt8PtrTy
);
17326 return Builder
.CreateCall(F
);
17328 case X86::BI__stosb
: {
17329 // We treat __stosb as a volatile memset - it may not generate "rep stosb"
17330 // instruction, but it will create a memset that won't be optimized away.
17331 return Builder
.CreateMemSet(Ops
[0], Ops
[1], Ops
[2], Align(1), true);
17333 // Corresponding to intrisics which will return 2 tiles (tile0_tile1).
17334 case X86::BI__builtin_ia32_t2rpntlvwz0_internal
:
17335 case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal
:
17336 case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal
:
17337 case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal
:
17338 case X86::BI__builtin_ia32_t2rpntlvwz1_internal
:
17339 case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal
:
17340 case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal
:
17341 case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal
: {
17343 switch (BuiltinID
) {
17345 llvm_unreachable("Unsupported intrinsic!");
17346 case X86::BI__builtin_ia32_t2rpntlvwz0_internal
:
17347 IID
= Intrinsic::x86_t2rpntlvwz0_internal
;
17349 case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal
:
17350 IID
= Intrinsic::x86_t2rpntlvwz0rs_internal
;
17352 case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal
:
17353 IID
= Intrinsic::x86_t2rpntlvwz0t1_internal
;
17355 case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal
:
17356 IID
= Intrinsic::x86_t2rpntlvwz0rst1_internal
;
17358 case X86::BI__builtin_ia32_t2rpntlvwz1_internal
:
17359 IID
= Intrinsic::x86_t2rpntlvwz1_internal
;
17361 case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal
:
17362 IID
= Intrinsic::x86_t2rpntlvwz1rs_internal
;
17364 case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal
:
17365 IID
= Intrinsic::x86_t2rpntlvwz1t1_internal
;
17367 case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal
:
17368 IID
= Intrinsic::x86_t2rpntlvwz1rst1_internal
;
17372 // Ops = (Row0, Col0, Col1, DstPtr0, DstPtr1, SrcPtr, Stride)
17373 Value
*Call
= Builder
.CreateCall(CGM
.getIntrinsic(IID
),
17374 {Ops
[0], Ops
[1], Ops
[2], Ops
[5], Ops
[6]});
17376 auto *PtrTy
= E
->getArg(3)->getType()->getAs
<PointerType
>();
17377 assert(PtrTy
&& "arg3 must be of pointer type");
17378 QualType PtreeTy
= PtrTy
->getPointeeType();
17379 llvm::Type
*TyPtee
= ConvertType(PtreeTy
);
17381 // Bitcast amx type (x86_amx) to vector type (256 x i32)
17382 // Then store tile0 into DstPtr0
17383 Value
*T0
= Builder
.CreateExtractValue(Call
, 0);
17384 Value
*VecT0
= Builder
.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector
,
17386 Builder
.CreateDefaultAlignedStore(VecT0
, Ops
[3]);
17388 // Then store tile1 into DstPtr1
17389 Value
*T1
= Builder
.CreateExtractValue(Call
, 1);
17390 Value
*VecT1
= Builder
.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector
,
17392 Value
*Store
= Builder
.CreateDefaultAlignedStore(VecT1
, Ops
[4]);
17394 // Note: Here we escape directly use x86_tilestored64_internal to store
17395 // the results due to it can't make sure the Mem written scope. This may
17396 // cause shapes reloads after first amx intrinsic, which current amx reg-
17397 // ister allocation has no ability to handle it.
17402 // llvm.trap makes a ud2a instruction on x86.
17403 return EmitTrapCall(Intrinsic::trap
);
17404 case X86::BI__int2c
: {
17405 // This syscall signals a driver assertion failure in x86 NT kernels.
17406 llvm::FunctionType
*FTy
= llvm::FunctionType::get(VoidTy
, false);
17407 llvm::InlineAsm
*IA
=
17408 llvm::InlineAsm::get(FTy
, "int $$0x2c", "", /*hasSideEffects=*/true);
17409 llvm::AttributeList NoReturnAttr
= llvm::AttributeList::get(
17410 getLLVMContext(), llvm::AttributeList::FunctionIndex
,
17411 llvm::Attribute::NoReturn
);
17412 llvm::CallInst
*CI
= Builder
.CreateCall(IA
);
17413 CI
->setAttributes(NoReturnAttr
);
17416 case X86::BI__readfsbyte
:
17417 case X86::BI__readfsword
:
17418 case X86::BI__readfsdword
:
17419 case X86::BI__readfsqword
: {
17420 llvm::Type
*IntTy
= ConvertType(E
->getType());
17421 Value
*Ptr
= Builder
.CreateIntToPtr(
17422 Ops
[0], llvm::PointerType::get(getLLVMContext(), 257));
17423 LoadInst
*Load
= Builder
.CreateAlignedLoad(
17424 IntTy
, Ptr
, getContext().getTypeAlignInChars(E
->getType()));
17425 Load
->setVolatile(true);
17428 case X86::BI__readgsbyte
:
17429 case X86::BI__readgsword
:
17430 case X86::BI__readgsdword
:
17431 case X86::BI__readgsqword
: {
17432 llvm::Type
*IntTy
= ConvertType(E
->getType());
17433 Value
*Ptr
= Builder
.CreateIntToPtr(
17434 Ops
[0], llvm::PointerType::get(getLLVMContext(), 256));
17435 LoadInst
*Load
= Builder
.CreateAlignedLoad(
17436 IntTy
, Ptr
, getContext().getTypeAlignInChars(E
->getType()));
17437 Load
->setVolatile(true);
17440 case X86::BI__builtin_ia32_encodekey128_u32
: {
17441 Intrinsic::ID IID
= Intrinsic::x86_encodekey128
;
17443 Value
*Call
= Builder
.CreateCall(CGM
.getIntrinsic(IID
), {Ops
[0], Ops
[1]});
17445 for (int i
= 0; i
< 3; ++i
) {
17446 Value
*Extract
= Builder
.CreateExtractValue(Call
, i
+ 1);
17447 Value
*Ptr
= Builder
.CreateConstGEP1_32(Int8Ty
, Ops
[2], i
* 16);
17448 Builder
.CreateAlignedStore(Extract
, Ptr
, Align(1));
17451 return Builder
.CreateExtractValue(Call
, 0);
17453 case X86::BI__builtin_ia32_encodekey256_u32
: {
17454 Intrinsic::ID IID
= Intrinsic::x86_encodekey256
;
17457 Builder
.CreateCall(CGM
.getIntrinsic(IID
), {Ops
[0], Ops
[1], Ops
[2]});
17459 for (int i
= 0; i
< 4; ++i
) {
17460 Value
*Extract
= Builder
.CreateExtractValue(Call
, i
+ 1);
17461 Value
*Ptr
= Builder
.CreateConstGEP1_32(Int8Ty
, Ops
[3], i
* 16);
17462 Builder
.CreateAlignedStore(Extract
, Ptr
, Align(1));
17465 return Builder
.CreateExtractValue(Call
, 0);
17467 case X86::BI__builtin_ia32_aesenc128kl_u8
:
17468 case X86::BI__builtin_ia32_aesdec128kl_u8
:
17469 case X86::BI__builtin_ia32_aesenc256kl_u8
:
17470 case X86::BI__builtin_ia32_aesdec256kl_u8
: {
17472 StringRef BlockName
;
17473 switch (BuiltinID
) {
17475 llvm_unreachable("Unexpected builtin");
17476 case X86::BI__builtin_ia32_aesenc128kl_u8
:
17477 IID
= Intrinsic::x86_aesenc128kl
;
17478 BlockName
= "aesenc128kl";
17480 case X86::BI__builtin_ia32_aesdec128kl_u8
:
17481 IID
= Intrinsic::x86_aesdec128kl
;
17482 BlockName
= "aesdec128kl";
17484 case X86::BI__builtin_ia32_aesenc256kl_u8
:
17485 IID
= Intrinsic::x86_aesenc256kl
;
17486 BlockName
= "aesenc256kl";
17488 case X86::BI__builtin_ia32_aesdec256kl_u8
:
17489 IID
= Intrinsic::x86_aesdec256kl
;
17490 BlockName
= "aesdec256kl";
17494 Value
*Call
= Builder
.CreateCall(CGM
.getIntrinsic(IID
), {Ops
[1], Ops
[2]});
17496 BasicBlock
*NoError
=
17497 createBasicBlock(BlockName
+ "_no_error", this->CurFn
);
17498 BasicBlock
*Error
= createBasicBlock(BlockName
+ "_error", this->CurFn
);
17499 BasicBlock
*End
= createBasicBlock(BlockName
+ "_end", this->CurFn
);
17501 Value
*Ret
= Builder
.CreateExtractValue(Call
, 0);
17502 Value
*Succ
= Builder
.CreateTrunc(Ret
, Builder
.getInt1Ty());
17503 Value
*Out
= Builder
.CreateExtractValue(Call
, 1);
17504 Builder
.CreateCondBr(Succ
, NoError
, Error
);
17506 Builder
.SetInsertPoint(NoError
);
17507 Builder
.CreateDefaultAlignedStore(Out
, Ops
[0]);
17508 Builder
.CreateBr(End
);
17510 Builder
.SetInsertPoint(Error
);
17511 Constant
*Zero
= llvm::Constant::getNullValue(Out
->getType());
17512 Builder
.CreateDefaultAlignedStore(Zero
, Ops
[0]);
17513 Builder
.CreateBr(End
);
17515 Builder
.SetInsertPoint(End
);
17516 return Builder
.CreateExtractValue(Call
, 0);
17518 case X86::BI__builtin_ia32_aesencwide128kl_u8
:
17519 case X86::BI__builtin_ia32_aesdecwide128kl_u8
:
17520 case X86::BI__builtin_ia32_aesencwide256kl_u8
:
17521 case X86::BI__builtin_ia32_aesdecwide256kl_u8
: {
17523 StringRef BlockName
;
17524 switch (BuiltinID
) {
17525 case X86::BI__builtin_ia32_aesencwide128kl_u8
:
17526 IID
= Intrinsic::x86_aesencwide128kl
;
17527 BlockName
= "aesencwide128kl";
17529 case X86::BI__builtin_ia32_aesdecwide128kl_u8
:
17530 IID
= Intrinsic::x86_aesdecwide128kl
;
17531 BlockName
= "aesdecwide128kl";
17533 case X86::BI__builtin_ia32_aesencwide256kl_u8
:
17534 IID
= Intrinsic::x86_aesencwide256kl
;
17535 BlockName
= "aesencwide256kl";
17537 case X86::BI__builtin_ia32_aesdecwide256kl_u8
:
17538 IID
= Intrinsic::x86_aesdecwide256kl
;
17539 BlockName
= "aesdecwide256kl";
17543 llvm::Type
*Ty
= FixedVectorType::get(Builder
.getInt64Ty(), 2);
17546 for (int i
= 0; i
!= 8; ++i
) {
17547 Value
*Ptr
= Builder
.CreateConstGEP1_32(Ty
, Ops
[1], i
);
17548 InOps
[i
+ 1] = Builder
.CreateAlignedLoad(Ty
, Ptr
, Align(16));
17551 Value
*Call
= Builder
.CreateCall(CGM
.getIntrinsic(IID
), InOps
);
17553 BasicBlock
*NoError
=
17554 createBasicBlock(BlockName
+ "_no_error", this->CurFn
);
17555 BasicBlock
*Error
= createBasicBlock(BlockName
+ "_error", this->CurFn
);
17556 BasicBlock
*End
= createBasicBlock(BlockName
+ "_end", this->CurFn
);
17558 Value
*Ret
= Builder
.CreateExtractValue(Call
, 0);
17559 Value
*Succ
= Builder
.CreateTrunc(Ret
, Builder
.getInt1Ty());
17560 Builder
.CreateCondBr(Succ
, NoError
, Error
);
17562 Builder
.SetInsertPoint(NoError
);
17563 for (int i
= 0; i
!= 8; ++i
) {
17564 Value
*Extract
= Builder
.CreateExtractValue(Call
, i
+ 1);
17565 Value
*Ptr
= Builder
.CreateConstGEP1_32(Extract
->getType(), Ops
[0], i
);
17566 Builder
.CreateAlignedStore(Extract
, Ptr
, Align(16));
17568 Builder
.CreateBr(End
);
17570 Builder
.SetInsertPoint(Error
);
17571 for (int i
= 0; i
!= 8; ++i
) {
17572 Value
*Out
= Builder
.CreateExtractValue(Call
, i
+ 1);
17573 Constant
*Zero
= llvm::Constant::getNullValue(Out
->getType());
17574 Value
*Ptr
= Builder
.CreateConstGEP1_32(Out
->getType(), Ops
[0], i
);
17575 Builder
.CreateAlignedStore(Zero
, Ptr
, Align(16));
17577 Builder
.CreateBr(End
);
17579 Builder
.SetInsertPoint(End
);
17580 return Builder
.CreateExtractValue(Call
, 0);
17582 case X86::BI__builtin_ia32_vfcmaddcph512_mask
:
17585 case X86::BI__builtin_ia32_vfmaddcph512_mask
: {
17586 Intrinsic::ID IID
= IsConjFMA
17587 ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
17588 : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512
;
17589 Value
*Call
= Builder
.CreateCall(CGM
.getIntrinsic(IID
), Ops
);
17590 return EmitX86Select(*this, Ops
[3], Call
, Ops
[0]);
17592 case X86::BI__builtin_ia32_vfcmaddcph256_round_mask
:
17595 case X86::BI__builtin_ia32_vfmaddcph256_round_mask
: {
17596 Intrinsic::ID IID
= IsConjFMA
? Intrinsic::x86_avx10_mask_vfcmaddcph256
17597 : Intrinsic::x86_avx10_mask_vfmaddcph256
;
17598 Value
*Call
= Builder
.CreateCall(CGM
.getIntrinsic(IID
), Ops
);
17599 return EmitX86Select(*this, Ops
[3], Call
, Ops
[0]);
17601 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask
:
17604 case X86::BI__builtin_ia32_vfmaddcsh_round_mask
: {
17605 Intrinsic::ID IID
= IsConjFMA
? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
17606 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh
;
17607 Value
*Call
= Builder
.CreateCall(CGM
.getIntrinsic(IID
), Ops
);
17608 Value
*And
= Builder
.CreateAnd(Ops
[3], llvm::ConstantInt::get(Int8Ty
, 1));
17609 return EmitX86Select(*this, And
, Call
, Ops
[0]);
17611 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3
:
17614 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3
: {
17615 Intrinsic::ID IID
= IsConjFMA
? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
17616 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh
;
17617 Value
*Call
= Builder
.CreateCall(CGM
.getIntrinsic(IID
), Ops
);
17618 static constexpr int Mask
[] = {0, 5, 6, 7};
17619 return Builder
.CreateShuffleVector(Call
, Ops
[2], Mask
);
17621 case X86::BI__builtin_ia32_prefetchi
:
17622 return Builder
.CreateCall(
17623 CGM
.getIntrinsic(Intrinsic::prefetch
, Ops
[0]->getType()),
17624 {Ops
[0], llvm::ConstantInt::get(Int32Ty
, 0), Ops
[1],
17625 llvm::ConstantInt::get(Int32Ty
, 0)});
17629 Value
*CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID
,
17630 const CallExpr
*E
) {
17631 // Do not emit the builtin arguments in the arguments of a function call,
17632 // because the evaluation order of function arguments is not specified in C++.
17633 // This is important when testing to ensure the arguments are emitted in the
17634 // same order every time. Eg:
17636 // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
17637 // EmitScalarExpr(E->getArg(1)), "swdiv");
17639 // Value *Op0 = EmitScalarExpr(E->getArg(0));
17640 // Value *Op1 = EmitScalarExpr(E->getArg(1));
17641 // return Builder.CreateFDiv(Op0, Op1, "swdiv")
17643 Intrinsic::ID ID
= Intrinsic::not_intrinsic
;
17645 #include "llvm/TargetParser/PPCTargetParser.def"
17646 auto GenAIXPPCBuiltinCpuExpr
= [&](unsigned SupportMethod
, unsigned FieldIdx
,
17647 unsigned Mask
, CmpInst::Predicate CompOp
,
17648 unsigned OpValue
) -> Value
* {
17649 if (SupportMethod
== BUILTIN_PPC_FALSE
)
17650 return llvm::ConstantInt::getFalse(ConvertType(E
->getType()));
17652 if (SupportMethod
== BUILTIN_PPC_TRUE
)
17653 return llvm::ConstantInt::getTrue(ConvertType(E
->getType()));
17655 assert(SupportMethod
<= SYS_CALL
&& "Invalid value for SupportMethod.");
17657 llvm::Value
*FieldValue
= nullptr;
17658 if (SupportMethod
== USE_SYS_CONF
) {
17659 llvm::Type
*STy
= llvm::StructType::get(PPC_SYSTEMCONFIG_TYPE
);
17660 llvm::Constant
*SysConf
=
17661 CGM
.CreateRuntimeVariable(STy
, "_system_configuration");
17663 // Grab the appropriate field from _system_configuration.
17664 llvm::Value
*Idxs
[] = {ConstantInt::get(Int32Ty
, 0),
17665 ConstantInt::get(Int32Ty
, FieldIdx
)};
17667 FieldValue
= Builder
.CreateInBoundsGEP(STy
, SysConf
, Idxs
);
17668 FieldValue
= Builder
.CreateAlignedLoad(Int32Ty
, FieldValue
,
17669 CharUnits::fromQuantity(4));
17670 } else if (SupportMethod
== SYS_CALL
) {
17671 llvm::FunctionType
*FTy
=
17672 llvm::FunctionType::get(Int64Ty
, Int32Ty
, false);
17673 llvm::FunctionCallee Func
=
17674 CGM
.CreateRuntimeFunction(FTy
, "getsystemcfg");
17677 Builder
.CreateCall(Func
, {ConstantInt::get(Int32Ty
, FieldIdx
)});
17679 assert(FieldValue
&&
17680 "SupportMethod value is not defined in PPCTargetParser.def.");
17683 FieldValue
= Builder
.CreateAnd(FieldValue
, Mask
);
17685 llvm::Type
*ValueType
= FieldValue
->getType();
17686 bool IsValueType64Bit
= ValueType
->isIntegerTy(64);
17688 (IsValueType64Bit
|| ValueType
->isIntegerTy(32)) &&
17689 "Only 32/64-bit integers are supported in GenAIXPPCBuiltinCpuExpr().");
17691 return Builder
.CreateICmp(
17692 CompOp
, FieldValue
,
17693 ConstantInt::get(IsValueType64Bit
? Int64Ty
: Int32Ty
, OpValue
));
17696 switch (BuiltinID
) {
17697 default: return nullptr;
17699 case Builtin::BI__builtin_cpu_is
: {
17700 const Expr
*CPUExpr
= E
->getArg(0)->IgnoreParenCasts();
17701 StringRef CPUStr
= cast
<clang::StringLiteral
>(CPUExpr
)->getString();
17702 llvm::Triple Triple
= getTarget().getTriple();
17704 unsigned LinuxSupportMethod
, LinuxIDValue
, AIXSupportMethod
, AIXIDValue
;
17705 typedef std::tuple
<unsigned, unsigned, unsigned, unsigned> CPUInfo
;
17707 std::tie(LinuxSupportMethod
, LinuxIDValue
, AIXSupportMethod
, AIXIDValue
) =
17708 static_cast<CPUInfo
>(StringSwitch
<CPUInfo
>(CPUStr
)
17709 #define PPC_CPU(NAME, Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, \
17711 .Case(NAME, {Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, AIXID})
17712 #include "llvm/TargetParser/PPCTargetParser.def"
17713 .Default({BUILTIN_PPC_UNSUPPORTED
, 0,
17714 BUILTIN_PPC_UNSUPPORTED
, 0}));
17716 if (Triple
.isOSAIX()) {
17717 assert((AIXSupportMethod
!= BUILTIN_PPC_UNSUPPORTED
) &&
17718 "Invalid CPU name. Missed by SemaChecking?");
17719 return GenAIXPPCBuiltinCpuExpr(AIXSupportMethod
, AIX_SYSCON_IMPL_IDX
, 0,
17720 ICmpInst::ICMP_EQ
, AIXIDValue
);
17723 assert(Triple
.isOSLinux() &&
17724 "__builtin_cpu_is() is only supported for AIX and Linux.");
17726 assert((LinuxSupportMethod
!= BUILTIN_PPC_UNSUPPORTED
) &&
17727 "Invalid CPU name. Missed by SemaChecking?");
17729 if (LinuxSupportMethod
== BUILTIN_PPC_FALSE
)
17730 return llvm::ConstantInt::getFalse(ConvertType(E
->getType()));
17732 Value
*Op0
= llvm::ConstantInt::get(Int32Ty
, PPC_FAWORD_CPUID
);
17733 llvm::Function
*F
= CGM
.getIntrinsic(Intrinsic::ppc_fixed_addr_ld
);
17734 Value
*TheCall
= Builder
.CreateCall(F
, {Op0
}, "cpu_is");
17735 return Builder
.CreateICmpEQ(TheCall
,
17736 llvm::ConstantInt::get(Int32Ty
, LinuxIDValue
));
17738 case Builtin::BI__builtin_cpu_supports
: {
17739 llvm::Triple Triple
= getTarget().getTriple();
17740 const Expr
*CPUExpr
= E
->getArg(0)->IgnoreParenCasts();
17741 StringRef CPUStr
= cast
<clang::StringLiteral
>(CPUExpr
)->getString();
17742 if (Triple
.isOSAIX()) {
17743 unsigned SupportMethod
, FieldIdx
, Mask
, Value
;
17744 CmpInst::Predicate CompOp
;
17745 typedef std::tuple
<unsigned, unsigned, unsigned, CmpInst::Predicate
,
17748 std::tie(SupportMethod
, FieldIdx
, Mask
, CompOp
, Value
) =
17749 static_cast<CPUSupportType
>(StringSwitch
<CPUSupportType
>(CPUStr
)
17750 #define PPC_AIX_FEATURE(NAME, DESC, SUPPORT_METHOD, INDEX, MASK, COMP_OP, \
17752 .Case(NAME, {SUPPORT_METHOD, INDEX, MASK, COMP_OP, VALUE})
17753 #include "llvm/TargetParser/PPCTargetParser.def"
17754 .Default({BUILTIN_PPC_FALSE
, 0, 0,
17755 CmpInst::Predicate(), 0}));
17756 return GenAIXPPCBuiltinCpuExpr(SupportMethod
, FieldIdx
, Mask
, CompOp
,
17760 assert(Triple
.isOSLinux() &&
17761 "__builtin_cpu_supports() is only supported for AIX and Linux.");
17762 unsigned FeatureWord
;
17764 std::tie(FeatureWord
, BitMask
) =
17765 StringSwitch
<std::pair
<unsigned, unsigned>>(CPUStr
)
17766 #define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \
17767 .Case(Name, {FA_WORD, Bitmask})
17768 #include "llvm/TargetParser/PPCTargetParser.def"
17771 return Builder
.getFalse();
17772 Value
*Op0
= llvm::ConstantInt::get(Int32Ty
, FeatureWord
);
17773 llvm::Function
*F
= CGM
.getIntrinsic(Intrinsic::ppc_fixed_addr_ld
);
17774 Value
*TheCall
= Builder
.CreateCall(F
, {Op0
}, "cpu_supports");
17776 Builder
.CreateAnd(TheCall
, llvm::ConstantInt::get(Int32Ty
, BitMask
));
17777 return Builder
.CreateICmpNE(Mask
, llvm::Constant::getNullValue(Int32Ty
));
17778 #undef PPC_FAWORD_HWCAP
17779 #undef PPC_FAWORD_HWCAP2
17780 #undef PPC_FAWORD_CPUID
17783 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
17784 // call __builtin_readcyclecounter.
17785 case PPC::BI__builtin_ppc_get_timebase
:
17786 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::readcyclecounter
));
17788 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
17789 case PPC::BI__builtin_altivec_lvx
:
17790 case PPC::BI__builtin_altivec_lvxl
:
17791 case PPC::BI__builtin_altivec_lvebx
:
17792 case PPC::BI__builtin_altivec_lvehx
:
17793 case PPC::BI__builtin_altivec_lvewx
:
17794 case PPC::BI__builtin_altivec_lvsl
:
17795 case PPC::BI__builtin_altivec_lvsr
:
17796 case PPC::BI__builtin_vsx_lxvd2x
:
17797 case PPC::BI__builtin_vsx_lxvw4x
:
17798 case PPC::BI__builtin_vsx_lxvd2x_be
:
17799 case PPC::BI__builtin_vsx_lxvw4x_be
:
17800 case PPC::BI__builtin_vsx_lxvl
:
17801 case PPC::BI__builtin_vsx_lxvll
:
17803 SmallVector
<Value
*, 2> Ops
;
17804 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
17805 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
17806 if (!(BuiltinID
== PPC::BI__builtin_vsx_lxvl
||
17807 BuiltinID
== PPC::BI__builtin_vsx_lxvll
)) {
17808 Ops
[0] = Builder
.CreateGEP(Int8Ty
, Ops
[1], Ops
[0]);
17812 switch (BuiltinID
) {
17813 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
17814 case PPC::BI__builtin_altivec_lvx
:
17815 ID
= Intrinsic::ppc_altivec_lvx
;
17817 case PPC::BI__builtin_altivec_lvxl
:
17818 ID
= Intrinsic::ppc_altivec_lvxl
;
17820 case PPC::BI__builtin_altivec_lvebx
:
17821 ID
= Intrinsic::ppc_altivec_lvebx
;
17823 case PPC::BI__builtin_altivec_lvehx
:
17824 ID
= Intrinsic::ppc_altivec_lvehx
;
17826 case PPC::BI__builtin_altivec_lvewx
:
17827 ID
= Intrinsic::ppc_altivec_lvewx
;
17829 case PPC::BI__builtin_altivec_lvsl
:
17830 ID
= Intrinsic::ppc_altivec_lvsl
;
17832 case PPC::BI__builtin_altivec_lvsr
:
17833 ID
= Intrinsic::ppc_altivec_lvsr
;
17835 case PPC::BI__builtin_vsx_lxvd2x
:
17836 ID
= Intrinsic::ppc_vsx_lxvd2x
;
17838 case PPC::BI__builtin_vsx_lxvw4x
:
17839 ID
= Intrinsic::ppc_vsx_lxvw4x
;
17841 case PPC::BI__builtin_vsx_lxvd2x_be
:
17842 ID
= Intrinsic::ppc_vsx_lxvd2x_be
;
17844 case PPC::BI__builtin_vsx_lxvw4x_be
:
17845 ID
= Intrinsic::ppc_vsx_lxvw4x_be
;
17847 case PPC::BI__builtin_vsx_lxvl
:
17848 ID
= Intrinsic::ppc_vsx_lxvl
;
17850 case PPC::BI__builtin_vsx_lxvll
:
17851 ID
= Intrinsic::ppc_vsx_lxvll
;
17854 llvm::Function
*F
= CGM
.getIntrinsic(ID
);
17855 return Builder
.CreateCall(F
, Ops
, "");
17858 // vec_st, vec_xst_be
17859 case PPC::BI__builtin_altivec_stvx
:
17860 case PPC::BI__builtin_altivec_stvxl
:
17861 case PPC::BI__builtin_altivec_stvebx
:
17862 case PPC::BI__builtin_altivec_stvehx
:
17863 case PPC::BI__builtin_altivec_stvewx
:
17864 case PPC::BI__builtin_vsx_stxvd2x
:
17865 case PPC::BI__builtin_vsx_stxvw4x
:
17866 case PPC::BI__builtin_vsx_stxvd2x_be
:
17867 case PPC::BI__builtin_vsx_stxvw4x_be
:
17868 case PPC::BI__builtin_vsx_stxvl
:
17869 case PPC::BI__builtin_vsx_stxvll
:
17871 SmallVector
<Value
*, 3> Ops
;
17872 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
17873 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
17874 Ops
.push_back(EmitScalarExpr(E
->getArg(2)));
17875 if (!(BuiltinID
== PPC::BI__builtin_vsx_stxvl
||
17876 BuiltinID
== PPC::BI__builtin_vsx_stxvll
)) {
17877 Ops
[1] = Builder
.CreateGEP(Int8Ty
, Ops
[2], Ops
[1]);
17881 switch (BuiltinID
) {
17882 default: llvm_unreachable("Unsupported st intrinsic!");
17883 case PPC::BI__builtin_altivec_stvx
:
17884 ID
= Intrinsic::ppc_altivec_stvx
;
17886 case PPC::BI__builtin_altivec_stvxl
:
17887 ID
= Intrinsic::ppc_altivec_stvxl
;
17889 case PPC::BI__builtin_altivec_stvebx
:
17890 ID
= Intrinsic::ppc_altivec_stvebx
;
17892 case PPC::BI__builtin_altivec_stvehx
:
17893 ID
= Intrinsic::ppc_altivec_stvehx
;
17895 case PPC::BI__builtin_altivec_stvewx
:
17896 ID
= Intrinsic::ppc_altivec_stvewx
;
17898 case PPC::BI__builtin_vsx_stxvd2x
:
17899 ID
= Intrinsic::ppc_vsx_stxvd2x
;
17901 case PPC::BI__builtin_vsx_stxvw4x
:
17902 ID
= Intrinsic::ppc_vsx_stxvw4x
;
17904 case PPC::BI__builtin_vsx_stxvd2x_be
:
17905 ID
= Intrinsic::ppc_vsx_stxvd2x_be
;
17907 case PPC::BI__builtin_vsx_stxvw4x_be
:
17908 ID
= Intrinsic::ppc_vsx_stxvw4x_be
;
17910 case PPC::BI__builtin_vsx_stxvl
:
17911 ID
= Intrinsic::ppc_vsx_stxvl
;
17913 case PPC::BI__builtin_vsx_stxvll
:
17914 ID
= Intrinsic::ppc_vsx_stxvll
;
17917 llvm::Function
*F
= CGM
.getIntrinsic(ID
);
17918 return Builder
.CreateCall(F
, Ops
, "");
17920 case PPC::BI__builtin_vsx_ldrmb
: {
17921 // Essentially boils down to performing an unaligned VMX load sequence so
17922 // as to avoid crossing a page boundary and then shuffling the elements
17923 // into the right side of the vector register.
17924 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
17925 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
17926 int64_t NumBytes
= cast
<ConstantInt
>(Op1
)->getZExtValue();
17927 llvm::Type
*ResTy
= ConvertType(E
->getType());
17928 bool IsLE
= getTarget().isLittleEndian();
17930 // If the user wants the entire vector, just load the entire vector.
17931 if (NumBytes
== 16) {
17933 Builder
.CreateLoad(Address(Op0
, ResTy
, CharUnits::fromQuantity(1)));
17937 // Reverse the bytes on LE.
17938 SmallVector
<int, 16> RevMask
;
17939 for (int Idx
= 0; Idx
< 16; Idx
++)
17940 RevMask
.push_back(15 - Idx
);
17941 return Builder
.CreateShuffleVector(LD
, LD
, RevMask
);
17944 llvm::Function
*Lvx
= CGM
.getIntrinsic(Intrinsic::ppc_altivec_lvx
);
17945 llvm::Function
*Lvs
= CGM
.getIntrinsic(IsLE
? Intrinsic::ppc_altivec_lvsr
17946 : Intrinsic::ppc_altivec_lvsl
);
17947 llvm::Function
*Vperm
= CGM
.getIntrinsic(Intrinsic::ppc_altivec_vperm
);
17948 Value
*HiMem
= Builder
.CreateGEP(
17949 Int8Ty
, Op0
, ConstantInt::get(Op1
->getType(), NumBytes
- 1));
17950 Value
*LoLd
= Builder
.CreateCall(Lvx
, Op0
, "ld.lo");
17951 Value
*HiLd
= Builder
.CreateCall(Lvx
, HiMem
, "ld.hi");
17952 Value
*Mask1
= Builder
.CreateCall(Lvs
, Op0
, "mask1");
17954 Op0
= IsLE
? HiLd
: LoLd
;
17955 Op1
= IsLE
? LoLd
: HiLd
;
17956 Value
*AllElts
= Builder
.CreateCall(Vperm
, {Op0
, Op1
, Mask1
}, "shuffle1");
17957 Constant
*Zero
= llvm::Constant::getNullValue(IsLE
? ResTy
: AllElts
->getType());
17960 SmallVector
<int, 16> Consts
;
17961 for (int Idx
= 0; Idx
< 16; Idx
++) {
17962 int Val
= (NumBytes
- Idx
- 1 >= 0) ? (NumBytes
- Idx
- 1)
17963 : 16 - (NumBytes
- Idx
);
17964 Consts
.push_back(Val
);
17966 return Builder
.CreateShuffleVector(Builder
.CreateBitCast(AllElts
, ResTy
),
17969 SmallVector
<Constant
*, 16> Consts
;
17970 for (int Idx
= 0; Idx
< 16; Idx
++)
17971 Consts
.push_back(Builder
.getInt8(NumBytes
+ Idx
));
17972 Value
*Mask2
= ConstantVector::get(Consts
);
17973 return Builder
.CreateBitCast(
17974 Builder
.CreateCall(Vperm
, {Zero
, AllElts
, Mask2
}, "shuffle2"), ResTy
);
17976 case PPC::BI__builtin_vsx_strmb
: {
17977 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
17978 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
17979 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
17980 int64_t NumBytes
= cast
<ConstantInt
>(Op1
)->getZExtValue();
17981 bool IsLE
= getTarget().isLittleEndian();
17982 auto StoreSubVec
= [&](unsigned Width
, unsigned Offset
, unsigned EltNo
) {
17983 // Storing the whole vector, simply store it on BE and reverse bytes and
17986 Value
*StVec
= Op2
;
17988 SmallVector
<int, 16> RevMask
;
17989 for (int Idx
= 0; Idx
< 16; Idx
++)
17990 RevMask
.push_back(15 - Idx
);
17991 StVec
= Builder
.CreateShuffleVector(Op2
, Op2
, RevMask
);
17993 return Builder
.CreateStore(
17994 StVec
, Address(Op0
, Op2
->getType(), CharUnits::fromQuantity(1)));
17996 auto *ConvTy
= Int64Ty
;
17997 unsigned NumElts
= 0;
18000 llvm_unreachable("width for stores must be a power of 2");
18018 Value
*Vec
= Builder
.CreateBitCast(
18019 Op2
, llvm::FixedVectorType::get(ConvTy
, NumElts
));
18021 Builder
.CreateGEP(Int8Ty
, Op0
, ConstantInt::get(Int64Ty
, Offset
));
18022 Value
*Elt
= Builder
.CreateExtractElement(Vec
, EltNo
);
18023 if (IsLE
&& Width
> 1) {
18024 Function
*F
= CGM
.getIntrinsic(Intrinsic::bswap
, ConvTy
);
18025 Elt
= Builder
.CreateCall(F
, Elt
);
18027 return Builder
.CreateStore(
18028 Elt
, Address(Ptr
, ConvTy
, CharUnits::fromQuantity(1)));
18030 unsigned Stored
= 0;
18031 unsigned RemainingBytes
= NumBytes
;
18033 if (NumBytes
== 16)
18034 return StoreSubVec(16, 0, 0);
18035 if (NumBytes
>= 8) {
18036 Result
= StoreSubVec(8, NumBytes
- 8, IsLE
? 0 : 1);
18037 RemainingBytes
-= 8;
18040 if (RemainingBytes
>= 4) {
18041 Result
= StoreSubVec(4, NumBytes
- Stored
- 4,
18042 IsLE
? (Stored
>> 2) : 3 - (Stored
>> 2));
18043 RemainingBytes
-= 4;
18046 if (RemainingBytes
>= 2) {
18047 Result
= StoreSubVec(2, NumBytes
- Stored
- 2,
18048 IsLE
? (Stored
>> 1) : 7 - (Stored
>> 1));
18049 RemainingBytes
-= 2;
18052 if (RemainingBytes
)
18054 StoreSubVec(1, NumBytes
- Stored
- 1, IsLE
? Stored
: 15 - Stored
);
18058 case PPC::BI__builtin_vsx_xvsqrtsp
:
18059 case PPC::BI__builtin_vsx_xvsqrtdp
: {
18060 llvm::Type
*ResultType
= ConvertType(E
->getType());
18061 Value
*X
= EmitScalarExpr(E
->getArg(0));
18062 if (Builder
.getIsFPConstrained()) {
18063 llvm::Function
*F
= CGM
.getIntrinsic(
18064 Intrinsic::experimental_constrained_sqrt
, ResultType
);
18065 return Builder
.CreateConstrainedFPCall(F
, X
);
18067 llvm::Function
*F
= CGM
.getIntrinsic(Intrinsic::sqrt
, ResultType
);
18068 return Builder
.CreateCall(F
, X
);
18071 // Count leading zeros
18072 case PPC::BI__builtin_altivec_vclzb
:
18073 case PPC::BI__builtin_altivec_vclzh
:
18074 case PPC::BI__builtin_altivec_vclzw
:
18075 case PPC::BI__builtin_altivec_vclzd
: {
18076 llvm::Type
*ResultType
= ConvertType(E
->getType());
18077 Value
*X
= EmitScalarExpr(E
->getArg(0));
18078 Value
*Undef
= ConstantInt::get(Builder
.getInt1Ty(), false);
18079 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctlz
, ResultType
);
18080 return Builder
.CreateCall(F
, {X
, Undef
});
18082 case PPC::BI__builtin_altivec_vctzb
:
18083 case PPC::BI__builtin_altivec_vctzh
:
18084 case PPC::BI__builtin_altivec_vctzw
:
18085 case PPC::BI__builtin_altivec_vctzd
: {
18086 llvm::Type
*ResultType
= ConvertType(E
->getType());
18087 Value
*X
= EmitScalarExpr(E
->getArg(0));
18088 Value
*Undef
= ConstantInt::get(Builder
.getInt1Ty(), false);
18089 Function
*F
= CGM
.getIntrinsic(Intrinsic::cttz
, ResultType
);
18090 return Builder
.CreateCall(F
, {X
, Undef
});
18092 case PPC::BI__builtin_altivec_vinsd
:
18093 case PPC::BI__builtin_altivec_vinsw
:
18094 case PPC::BI__builtin_altivec_vinsd_elt
:
18095 case PPC::BI__builtin_altivec_vinsw_elt
: {
18096 llvm::Type
*ResultType
= ConvertType(E
->getType());
18097 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18098 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
18099 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
18101 bool IsUnaligned
= (BuiltinID
== PPC::BI__builtin_altivec_vinsw
||
18102 BuiltinID
== PPC::BI__builtin_altivec_vinsd
);
18104 bool Is32bit
= (BuiltinID
== PPC::BI__builtin_altivec_vinsw
||
18105 BuiltinID
== PPC::BI__builtin_altivec_vinsw_elt
);
18107 // The third argument must be a compile time constant.
18108 ConstantInt
*ArgCI
= dyn_cast
<ConstantInt
>(Op2
);
18110 "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
18112 // Valid value for the third argument is dependent on the input type and
18114 int ValidMaxValue
= 0;
18116 ValidMaxValue
= (Is32bit
) ? 12 : 8;
18118 ValidMaxValue
= (Is32bit
) ? 3 : 1;
18120 // Get value of third argument.
18121 int64_t ConstArg
= ArgCI
->getSExtValue();
18123 // Compose range checking error message.
18124 std::string RangeErrMsg
= IsUnaligned
? "byte" : "element";
18125 RangeErrMsg
+= " number " + llvm::to_string(ConstArg
);
18126 RangeErrMsg
+= " is outside of the valid range [0, ";
18127 RangeErrMsg
+= llvm::to_string(ValidMaxValue
) + "]";
18129 // Issue error if third argument is not within the valid range.
18130 if (ConstArg
< 0 || ConstArg
> ValidMaxValue
)
18131 CGM
.Error(E
->getExprLoc(), RangeErrMsg
);
18133 // Input to vec_replace_elt is an element index, convert to byte index.
18134 if (!IsUnaligned
) {
18135 ConstArg
*= Is32bit
? 4 : 8;
18136 // Fix the constant according to endianess.
18137 if (getTarget().isLittleEndian())
18138 ConstArg
= (Is32bit
? 12 : 8) - ConstArg
;
18141 ID
= Is32bit
? Intrinsic::ppc_altivec_vinsw
: Intrinsic::ppc_altivec_vinsd
;
18142 Op2
= ConstantInt::getSigned(Int32Ty
, ConstArg
);
18143 // Casting input to vector int as per intrinsic definition.
18146 ? Builder
.CreateBitCast(Op0
, llvm::FixedVectorType::get(Int32Ty
, 4))
18147 : Builder
.CreateBitCast(Op0
,
18148 llvm::FixedVectorType::get(Int64Ty
, 2));
18149 return Builder
.CreateBitCast(
18150 Builder
.CreateCall(CGM
.getIntrinsic(ID
), {Op0
, Op1
, Op2
}), ResultType
);
18152 case PPC::BI__builtin_altivec_vadduqm
:
18153 case PPC::BI__builtin_altivec_vsubuqm
: {
18154 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18155 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
18156 llvm::Type
*Int128Ty
= llvm::IntegerType::get(getLLVMContext(), 128);
18157 Op0
= Builder
.CreateBitCast(Op0
, llvm::FixedVectorType::get(Int128Ty
, 1));
18158 Op1
= Builder
.CreateBitCast(Op1
, llvm::FixedVectorType::get(Int128Ty
, 1));
18159 if (BuiltinID
== PPC::BI__builtin_altivec_vadduqm
)
18160 return Builder
.CreateAdd(Op0
, Op1
, "vadduqm");
18162 return Builder
.CreateSub(Op0
, Op1
, "vsubuqm");
18164 case PPC::BI__builtin_altivec_vaddcuq_c
:
18165 case PPC::BI__builtin_altivec_vsubcuq_c
: {
18166 SmallVector
<Value
*, 2> Ops
;
18167 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18168 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
18169 llvm::Type
*V1I128Ty
= llvm::FixedVectorType::get(
18170 llvm::IntegerType::get(getLLVMContext(), 128), 1);
18171 Ops
.push_back(Builder
.CreateBitCast(Op0
, V1I128Ty
));
18172 Ops
.push_back(Builder
.CreateBitCast(Op1
, V1I128Ty
));
18173 ID
= (BuiltinID
== PPC::BI__builtin_altivec_vaddcuq_c
)
18174 ? Intrinsic::ppc_altivec_vaddcuq
18175 : Intrinsic::ppc_altivec_vsubcuq
;
18176 return Builder
.CreateCall(CGM
.getIntrinsic(ID
), Ops
, "");
18178 case PPC::BI__builtin_altivec_vaddeuqm_c
:
18179 case PPC::BI__builtin_altivec_vaddecuq_c
:
18180 case PPC::BI__builtin_altivec_vsubeuqm_c
:
18181 case PPC::BI__builtin_altivec_vsubecuq_c
: {
18182 SmallVector
<Value
*, 3> Ops
;
18183 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18184 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
18185 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
18186 llvm::Type
*V1I128Ty
= llvm::FixedVectorType::get(
18187 llvm::IntegerType::get(getLLVMContext(), 128), 1);
18188 Ops
.push_back(Builder
.CreateBitCast(Op0
, V1I128Ty
));
18189 Ops
.push_back(Builder
.CreateBitCast(Op1
, V1I128Ty
));
18190 Ops
.push_back(Builder
.CreateBitCast(Op2
, V1I128Ty
));
18191 switch (BuiltinID
) {
18193 llvm_unreachable("Unsupported intrinsic!");
18194 case PPC::BI__builtin_altivec_vaddeuqm_c
:
18195 ID
= Intrinsic::ppc_altivec_vaddeuqm
;
18197 case PPC::BI__builtin_altivec_vaddecuq_c
:
18198 ID
= Intrinsic::ppc_altivec_vaddecuq
;
18200 case PPC::BI__builtin_altivec_vsubeuqm_c
:
18201 ID
= Intrinsic::ppc_altivec_vsubeuqm
;
18203 case PPC::BI__builtin_altivec_vsubecuq_c
:
18204 ID
= Intrinsic::ppc_altivec_vsubecuq
;
18207 return Builder
.CreateCall(CGM
.getIntrinsic(ID
), Ops
, "");
18209 case PPC::BI__builtin_ppc_rldimi
:
18210 case PPC::BI__builtin_ppc_rlwimi
: {
18211 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18212 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
18213 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
18214 Value
*Op3
= EmitScalarExpr(E
->getArg(3));
18215 // rldimi is 64-bit instruction, expand the intrinsic before isel to
18216 // leverage peephole and avoid legalization efforts.
18217 if (BuiltinID
== PPC::BI__builtin_ppc_rldimi
&&
18218 !getTarget().getTriple().isPPC64()) {
18219 Function
*F
= CGM
.getIntrinsic(Intrinsic::fshl
, Op0
->getType());
18220 Op2
= Builder
.CreateZExt(Op2
, Int64Ty
);
18221 Value
*Shift
= Builder
.CreateCall(F
, {Op0
, Op0
, Op2
});
18222 return Builder
.CreateOr(Builder
.CreateAnd(Shift
, Op3
),
18223 Builder
.CreateAnd(Op1
, Builder
.CreateNot(Op3
)));
18225 return Builder
.CreateCall(
18226 CGM
.getIntrinsic(BuiltinID
== PPC::BI__builtin_ppc_rldimi
18227 ? Intrinsic::ppc_rldimi
18228 : Intrinsic::ppc_rlwimi
),
18229 {Op0
, Op1
, Op2
, Op3
});
18231 case PPC::BI__builtin_ppc_rlwnm
: {
18232 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18233 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
18234 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
18235 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::ppc_rlwnm
),
18238 case PPC::BI__builtin_ppc_poppar4
:
18239 case PPC::BI__builtin_ppc_poppar8
: {
18240 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18241 llvm::Type
*ArgType
= Op0
->getType();
18242 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctpop
, ArgType
);
18243 Value
*Tmp
= Builder
.CreateCall(F
, Op0
);
18245 llvm::Type
*ResultType
= ConvertType(E
->getType());
18246 Value
*Result
= Builder
.CreateAnd(Tmp
, llvm::ConstantInt::get(ArgType
, 1));
18247 if (Result
->getType() != ResultType
)
18248 Result
= Builder
.CreateIntCast(Result
, ResultType
, /*isSigned*/true,
18252 case PPC::BI__builtin_ppc_cmpb
: {
18253 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18254 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
18255 if (getTarget().getTriple().isPPC64()) {
18257 CGM
.getIntrinsic(Intrinsic::ppc_cmpb
, {Int64Ty
, Int64Ty
, Int64Ty
});
18258 return Builder
.CreateCall(F
, {Op0
, Op1
}, "cmpb");
18260 // For 32 bit, emit the code as below:
18261 // %conv = trunc i64 %a to i32
18262 // %conv1 = trunc i64 %b to i32
18263 // %shr = lshr i64 %a, 32
18264 // %conv2 = trunc i64 %shr to i32
18265 // %shr3 = lshr i64 %b, 32
18266 // %conv4 = trunc i64 %shr3 to i32
18267 // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)
18268 // %conv5 = zext i32 %0 to i64
18269 // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)
18270 // %conv614 = zext i32 %1 to i64
18271 // %shl = shl nuw i64 %conv614, 32
18272 // %or = or i64 %shl, %conv5
18275 CGM
.getIntrinsic(Intrinsic::ppc_cmpb
, {Int32Ty
, Int32Ty
, Int32Ty
});
18276 Value
*ArgOneLo
= Builder
.CreateTrunc(Op0
, Int32Ty
);
18277 Value
*ArgTwoLo
= Builder
.CreateTrunc(Op1
, Int32Ty
);
18278 Constant
*ShiftAmt
= ConstantInt::get(Int64Ty
, 32);
18280 Builder
.CreateTrunc(Builder
.CreateLShr(Op0
, ShiftAmt
), Int32Ty
);
18282 Builder
.CreateTrunc(Builder
.CreateLShr(Op1
, ShiftAmt
), Int32Ty
);
18283 Value
*ResLo
= Builder
.CreateZExt(
18284 Builder
.CreateCall(F
, {ArgOneLo
, ArgTwoLo
}, "cmpb"), Int64Ty
);
18285 Value
*ResHiShift
= Builder
.CreateZExt(
18286 Builder
.CreateCall(F
, {ArgOneHi
, ArgTwoHi
}, "cmpb"), Int64Ty
);
18287 Value
*ResHi
= Builder
.CreateShl(ResHiShift
, ShiftAmt
);
18288 return Builder
.CreateOr(ResLo
, ResHi
);
18291 case PPC::BI__builtin_vsx_xvcpsgnsp
:
18292 case PPC::BI__builtin_vsx_xvcpsgndp
: {
18293 llvm::Type
*ResultType
= ConvertType(E
->getType());
18294 Value
*X
= EmitScalarExpr(E
->getArg(0));
18295 Value
*Y
= EmitScalarExpr(E
->getArg(1));
18296 ID
= Intrinsic::copysign
;
18297 llvm::Function
*F
= CGM
.getIntrinsic(ID
, ResultType
);
18298 return Builder
.CreateCall(F
, {X
, Y
});
18300 // Rounding/truncation
18301 case PPC::BI__builtin_vsx_xvrspip
:
18302 case PPC::BI__builtin_vsx_xvrdpip
:
18303 case PPC::BI__builtin_vsx_xvrdpim
:
18304 case PPC::BI__builtin_vsx_xvrspim
:
18305 case PPC::BI__builtin_vsx_xvrdpi
:
18306 case PPC::BI__builtin_vsx_xvrspi
:
18307 case PPC::BI__builtin_vsx_xvrdpic
:
18308 case PPC::BI__builtin_vsx_xvrspic
:
18309 case PPC::BI__builtin_vsx_xvrdpiz
:
18310 case PPC::BI__builtin_vsx_xvrspiz
: {
18311 llvm::Type
*ResultType
= ConvertType(E
->getType());
18312 Value
*X
= EmitScalarExpr(E
->getArg(0));
18313 if (BuiltinID
== PPC::BI__builtin_vsx_xvrdpim
||
18314 BuiltinID
== PPC::BI__builtin_vsx_xvrspim
)
18315 ID
= Builder
.getIsFPConstrained()
18316 ? Intrinsic::experimental_constrained_floor
18317 : Intrinsic::floor
;
18318 else if (BuiltinID
== PPC::BI__builtin_vsx_xvrdpi
||
18319 BuiltinID
== PPC::BI__builtin_vsx_xvrspi
)
18320 ID
= Builder
.getIsFPConstrained()
18321 ? Intrinsic::experimental_constrained_round
18322 : Intrinsic::round
;
18323 else if (BuiltinID
== PPC::BI__builtin_vsx_xvrdpic
||
18324 BuiltinID
== PPC::BI__builtin_vsx_xvrspic
)
18325 ID
= Builder
.getIsFPConstrained()
18326 ? Intrinsic::experimental_constrained_rint
18328 else if (BuiltinID
== PPC::BI__builtin_vsx_xvrdpip
||
18329 BuiltinID
== PPC::BI__builtin_vsx_xvrspip
)
18330 ID
= Builder
.getIsFPConstrained()
18331 ? Intrinsic::experimental_constrained_ceil
18333 else if (BuiltinID
== PPC::BI__builtin_vsx_xvrdpiz
||
18334 BuiltinID
== PPC::BI__builtin_vsx_xvrspiz
)
18335 ID
= Builder
.getIsFPConstrained()
18336 ? Intrinsic::experimental_constrained_trunc
18337 : Intrinsic::trunc
;
18338 llvm::Function
*F
= CGM
.getIntrinsic(ID
, ResultType
);
18339 return Builder
.getIsFPConstrained() ? Builder
.CreateConstrainedFPCall(F
, X
)
18340 : Builder
.CreateCall(F
, X
);
18344 case PPC::BI__builtin_vsx_xvabsdp
:
18345 case PPC::BI__builtin_vsx_xvabssp
: {
18346 llvm::Type
*ResultType
= ConvertType(E
->getType());
18347 Value
*X
= EmitScalarExpr(E
->getArg(0));
18348 llvm::Function
*F
= CGM
.getIntrinsic(Intrinsic::fabs
, ResultType
);
18349 return Builder
.CreateCall(F
, X
);
18352 // Fastmath by default
18353 case PPC::BI__builtin_ppc_recipdivf
:
18354 case PPC::BI__builtin_ppc_recipdivd
:
18355 case PPC::BI__builtin_ppc_rsqrtf
:
18356 case PPC::BI__builtin_ppc_rsqrtd
: {
18357 FastMathFlags FMF
= Builder
.getFastMathFlags();
18358 Builder
.getFastMathFlags().setFast();
18359 llvm::Type
*ResultType
= ConvertType(E
->getType());
18360 Value
*X
= EmitScalarExpr(E
->getArg(0));
18362 if (BuiltinID
== PPC::BI__builtin_ppc_recipdivf
||
18363 BuiltinID
== PPC::BI__builtin_ppc_recipdivd
) {
18364 Value
*Y
= EmitScalarExpr(E
->getArg(1));
18365 Value
*FDiv
= Builder
.CreateFDiv(X
, Y
, "recipdiv");
18366 Builder
.getFastMathFlags() &= (FMF
);
18369 auto *One
= ConstantFP::get(ResultType
, 1.0);
18370 llvm::Function
*F
= CGM
.getIntrinsic(Intrinsic::sqrt
, ResultType
);
18371 Value
*FDiv
= Builder
.CreateFDiv(One
, Builder
.CreateCall(F
, X
), "rsqrt");
18372 Builder
.getFastMathFlags() &= (FMF
);
18375 case PPC::BI__builtin_ppc_alignx
: {
18376 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18377 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
18378 ConstantInt
*AlignmentCI
= cast
<ConstantInt
>(Op0
);
18379 if (AlignmentCI
->getValue().ugt(llvm::Value::MaximumAlignment
))
18380 AlignmentCI
= ConstantInt::get(AlignmentCI
->getIntegerType(),
18381 llvm::Value::MaximumAlignment
);
18383 emitAlignmentAssumption(Op1
, E
->getArg(1),
18384 /*The expr loc is sufficient.*/ SourceLocation(),
18385 AlignmentCI
, nullptr);
18388 case PPC::BI__builtin_ppc_rdlam
: {
18389 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18390 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
18391 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
18392 llvm::Type
*Ty
= Op0
->getType();
18393 Value
*ShiftAmt
= Builder
.CreateIntCast(Op1
, Ty
, false);
18394 Function
*F
= CGM
.getIntrinsic(Intrinsic::fshl
, Ty
);
18395 Value
*Rotate
= Builder
.CreateCall(F
, {Op0
, Op0
, ShiftAmt
});
18396 return Builder
.CreateAnd(Rotate
, Op2
);
18398 case PPC::BI__builtin_ppc_load2r
: {
18399 Function
*F
= CGM
.getIntrinsic(Intrinsic::ppc_load2r
);
18400 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18401 Value
*LoadIntrinsic
= Builder
.CreateCall(F
, {Op0
});
18402 return Builder
.CreateTrunc(LoadIntrinsic
, Int16Ty
);
18405 case PPC::BI__builtin_ppc_fnmsub
:
18406 case PPC::BI__builtin_ppc_fnmsubs
:
18407 case PPC::BI__builtin_vsx_xvmaddadp
:
18408 case PPC::BI__builtin_vsx_xvmaddasp
:
18409 case PPC::BI__builtin_vsx_xvnmaddadp
:
18410 case PPC::BI__builtin_vsx_xvnmaddasp
:
18411 case PPC::BI__builtin_vsx_xvmsubadp
:
18412 case PPC::BI__builtin_vsx_xvmsubasp
:
18413 case PPC::BI__builtin_vsx_xvnmsubadp
:
18414 case PPC::BI__builtin_vsx_xvnmsubasp
: {
18415 llvm::Type
*ResultType
= ConvertType(E
->getType());
18416 Value
*X
= EmitScalarExpr(E
->getArg(0));
18417 Value
*Y
= EmitScalarExpr(E
->getArg(1));
18418 Value
*Z
= EmitScalarExpr(E
->getArg(2));
18420 if (Builder
.getIsFPConstrained())
18421 F
= CGM
.getIntrinsic(Intrinsic::experimental_constrained_fma
, ResultType
);
18423 F
= CGM
.getIntrinsic(Intrinsic::fma
, ResultType
);
18424 switch (BuiltinID
) {
18425 case PPC::BI__builtin_vsx_xvmaddadp
:
18426 case PPC::BI__builtin_vsx_xvmaddasp
:
18427 if (Builder
.getIsFPConstrained())
18428 return Builder
.CreateConstrainedFPCall(F
, {X
, Y
, Z
});
18430 return Builder
.CreateCall(F
, {X
, Y
, Z
});
18431 case PPC::BI__builtin_vsx_xvnmaddadp
:
18432 case PPC::BI__builtin_vsx_xvnmaddasp
:
18433 if (Builder
.getIsFPConstrained())
18434 return Builder
.CreateFNeg(
18435 Builder
.CreateConstrainedFPCall(F
, {X
, Y
, Z
}), "neg");
18437 return Builder
.CreateFNeg(Builder
.CreateCall(F
, {X
, Y
, Z
}), "neg");
18438 case PPC::BI__builtin_vsx_xvmsubadp
:
18439 case PPC::BI__builtin_vsx_xvmsubasp
:
18440 if (Builder
.getIsFPConstrained())
18441 return Builder
.CreateConstrainedFPCall(
18442 F
, {X
, Y
, Builder
.CreateFNeg(Z
, "neg")});
18444 return Builder
.CreateCall(F
, {X
, Y
, Builder
.CreateFNeg(Z
, "neg")});
18445 case PPC::BI__builtin_ppc_fnmsub
:
18446 case PPC::BI__builtin_ppc_fnmsubs
:
18447 case PPC::BI__builtin_vsx_xvnmsubadp
:
18448 case PPC::BI__builtin_vsx_xvnmsubasp
:
18449 if (Builder
.getIsFPConstrained())
18450 return Builder
.CreateFNeg(
18451 Builder
.CreateConstrainedFPCall(
18452 F
, {X
, Y
, Builder
.CreateFNeg(Z
, "neg")}),
18455 return Builder
.CreateCall(
18456 CGM
.getIntrinsic(Intrinsic::ppc_fnmsub
, ResultType
), {X
, Y
, Z
});
18458 llvm_unreachable("Unknown FMA operation");
18459 return nullptr; // Suppress no-return warning
18462 case PPC::BI__builtin_vsx_insertword
: {
18463 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18464 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
18465 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
18466 llvm::Function
*F
= CGM
.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw
);
18468 // Third argument is a compile time constant int. It must be clamped to
18469 // to the range [0, 12].
18470 ConstantInt
*ArgCI
= dyn_cast
<ConstantInt
>(Op2
);
18472 "Third arg to xxinsertw intrinsic must be constant integer");
18473 const int64_t MaxIndex
= 12;
18474 int64_t Index
= std::clamp(ArgCI
->getSExtValue(), (int64_t)0, MaxIndex
);
18476 // The builtin semantics don't exactly match the xxinsertw instructions
18477 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
18478 // word from the first argument, and inserts it in the second argument. The
18479 // instruction extracts the word from its second input register and inserts
18480 // it into its first input register, so swap the first and second arguments.
18481 std::swap(Op0
, Op1
);
18483 // Need to cast the second argument from a vector of unsigned int to a
18484 // vector of long long.
18485 Op1
= Builder
.CreateBitCast(Op1
, llvm::FixedVectorType::get(Int64Ty
, 2));
18487 if (getTarget().isLittleEndian()) {
18488 // Reverse the double words in the vector we will extract from.
18489 Op0
= Builder
.CreateBitCast(Op0
, llvm::FixedVectorType::get(Int64Ty
, 2));
18490 Op0
= Builder
.CreateShuffleVector(Op0
, Op0
, ArrayRef
<int>{1, 0});
18492 // Reverse the index.
18493 Index
= MaxIndex
- Index
;
18496 // Intrinsic expects the first arg to be a vector of int.
18497 Op0
= Builder
.CreateBitCast(Op0
, llvm::FixedVectorType::get(Int32Ty
, 4));
18498 Op2
= ConstantInt::getSigned(Int32Ty
, Index
);
18499 return Builder
.CreateCall(F
, {Op0
, Op1
, Op2
});
18502 case PPC::BI__builtin_vsx_extractuword
: {
18503 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18504 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
18505 llvm::Function
*F
= CGM
.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw
);
18507 // Intrinsic expects the first argument to be a vector of doublewords.
18508 Op0
= Builder
.CreateBitCast(Op0
, llvm::FixedVectorType::get(Int64Ty
, 2));
18510 // The second argument is a compile time constant int that needs to
18511 // be clamped to the range [0, 12].
18512 ConstantInt
*ArgCI
= dyn_cast
<ConstantInt
>(Op1
);
18514 "Second Arg to xxextractuw intrinsic must be a constant integer!");
18515 const int64_t MaxIndex
= 12;
18516 int64_t Index
= std::clamp(ArgCI
->getSExtValue(), (int64_t)0, MaxIndex
);
18518 if (getTarget().isLittleEndian()) {
18519 // Reverse the index.
18520 Index
= MaxIndex
- Index
;
18521 Op1
= ConstantInt::getSigned(Int32Ty
, Index
);
18523 // Emit the call, then reverse the double words of the results vector.
18524 Value
*Call
= Builder
.CreateCall(F
, {Op0
, Op1
});
18526 Value
*ShuffleCall
=
18527 Builder
.CreateShuffleVector(Call
, Call
, ArrayRef
<int>{1, 0});
18528 return ShuffleCall
;
18530 Op1
= ConstantInt::getSigned(Int32Ty
, Index
);
18531 return Builder
.CreateCall(F
, {Op0
, Op1
});
18535 case PPC::BI__builtin_vsx_xxpermdi
: {
18536 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18537 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
18538 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
18539 ConstantInt
*ArgCI
= dyn_cast
<ConstantInt
>(Op2
);
18540 assert(ArgCI
&& "Third arg must be constant integer!");
18542 unsigned Index
= ArgCI
->getZExtValue();
18543 Op0
= Builder
.CreateBitCast(Op0
, llvm::FixedVectorType::get(Int64Ty
, 2));
18544 Op1
= Builder
.CreateBitCast(Op1
, llvm::FixedVectorType::get(Int64Ty
, 2));
18546 // Account for endianness by treating this as just a shuffle. So we use the
18547 // same indices for both LE and BE in order to produce expected results in
18549 int ElemIdx0
= (Index
& 2) >> 1;
18550 int ElemIdx1
= 2 + (Index
& 1);
18552 int ShuffleElts
[2] = {ElemIdx0
, ElemIdx1
};
18553 Value
*ShuffleCall
= Builder
.CreateShuffleVector(Op0
, Op1
, ShuffleElts
);
18554 QualType BIRetType
= E
->getType();
18555 auto RetTy
= ConvertType(BIRetType
);
18556 return Builder
.CreateBitCast(ShuffleCall
, RetTy
);
18559 case PPC::BI__builtin_vsx_xxsldwi
: {
18560 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18561 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
18562 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
18563 ConstantInt
*ArgCI
= dyn_cast
<ConstantInt
>(Op2
);
18564 assert(ArgCI
&& "Third argument must be a compile time constant");
18565 unsigned Index
= ArgCI
->getZExtValue() & 0x3;
18566 Op0
= Builder
.CreateBitCast(Op0
, llvm::FixedVectorType::get(Int32Ty
, 4));
18567 Op1
= Builder
.CreateBitCast(Op1
, llvm::FixedVectorType::get(Int32Ty
, 4));
18569 // Create a shuffle mask
18574 if (getTarget().isLittleEndian()) {
18575 // Little endian element N comes from element 8+N-Index of the
18576 // concatenated wide vector (of course, using modulo arithmetic on
18577 // the total number of elements).
18578 ElemIdx0
= (8 - Index
) % 8;
18579 ElemIdx1
= (9 - Index
) % 8;
18580 ElemIdx2
= (10 - Index
) % 8;
18581 ElemIdx3
= (11 - Index
) % 8;
18583 // Big endian ElemIdx<N> = Index + N
18585 ElemIdx1
= Index
+ 1;
18586 ElemIdx2
= Index
+ 2;
18587 ElemIdx3
= Index
+ 3;
18590 int ShuffleElts
[4] = {ElemIdx0
, ElemIdx1
, ElemIdx2
, ElemIdx3
};
18591 Value
*ShuffleCall
= Builder
.CreateShuffleVector(Op0
, Op1
, ShuffleElts
);
18592 QualType BIRetType
= E
->getType();
18593 auto RetTy
= ConvertType(BIRetType
);
18594 return Builder
.CreateBitCast(ShuffleCall
, RetTy
);
18597 case PPC::BI__builtin_pack_vector_int128
: {
18598 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18599 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
18600 bool isLittleEndian
= getTarget().isLittleEndian();
18601 Value
*PoisonValue
=
18602 llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0
->getType(), 2));
18603 Value
*Res
= Builder
.CreateInsertElement(
18604 PoisonValue
, Op0
, (uint64_t)(isLittleEndian
? 1 : 0));
18605 Res
= Builder
.CreateInsertElement(Res
, Op1
,
18606 (uint64_t)(isLittleEndian
? 0 : 1));
18607 return Builder
.CreateBitCast(Res
, ConvertType(E
->getType()));
18610 case PPC::BI__builtin_unpack_vector_int128
: {
18611 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18612 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
18613 ConstantInt
*Index
= cast
<ConstantInt
>(Op1
);
18614 Value
*Unpacked
= Builder
.CreateBitCast(
18615 Op0
, llvm::FixedVectorType::get(ConvertType(E
->getType()), 2));
18617 if (getTarget().isLittleEndian())
18619 ConstantInt::get(Index
->getIntegerType(), 1 - Index
->getZExtValue());
18621 return Builder
.CreateExtractElement(Unpacked
, Index
);
18624 case PPC::BI__builtin_ppc_sthcx
: {
18625 llvm::Function
*F
= CGM
.getIntrinsic(Intrinsic::ppc_sthcx
);
18626 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18627 Value
*Op1
= Builder
.CreateSExt(EmitScalarExpr(E
->getArg(1)), Int32Ty
);
18628 return Builder
.CreateCall(F
, {Op0
, Op1
});
18631 // The PPC MMA builtins take a pointer to a __vector_quad as an argument.
18632 // Some of the MMA instructions accumulate their result into an existing
18633 // accumulator whereas the others generate a new accumulator. So we need to
18634 // use custom code generation to expand a builtin call with a pointer to a
18635 // load (if the corresponding instruction accumulates its result) followed by
18636 // the call to the intrinsic and a store of the result.
18637 #define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \
18638 case PPC::BI__builtin_##Name:
18639 #include "clang/Basic/BuiltinsPPC.def"
18641 SmallVector
<Value
*, 4> Ops
;
18642 for (unsigned i
= 0, e
= E
->getNumArgs(); i
!= e
; i
++)
18643 if (E
->getArg(i
)->getType()->isArrayType())
18645 EmitArrayToPointerDecay(E
->getArg(i
)).emitRawPointer(*this));
18647 Ops
.push_back(EmitScalarExpr(E
->getArg(i
)));
18648 // The first argument of these two builtins is a pointer used to store their
18649 // result. However, the llvm intrinsics return their result in multiple
18650 // return values. So, here we emit code extracting these values from the
18651 // intrinsic results and storing them using that pointer.
18652 if (BuiltinID
== PPC::BI__builtin_mma_disassemble_acc
||
18653 BuiltinID
== PPC::BI__builtin_vsx_disassemble_pair
||
18654 BuiltinID
== PPC::BI__builtin_mma_disassemble_pair
) {
18655 unsigned NumVecs
= 2;
18656 auto Intrinsic
= Intrinsic::ppc_vsx_disassemble_pair
;
18657 if (BuiltinID
== PPC::BI__builtin_mma_disassemble_acc
) {
18659 Intrinsic
= Intrinsic::ppc_mma_disassemble_acc
;
18661 llvm::Function
*F
= CGM
.getIntrinsic(Intrinsic
);
18662 Address Addr
= EmitPointerWithAlignment(E
->getArg(1));
18663 Value
*Vec
= Builder
.CreateLoad(Addr
);
18664 Value
*Call
= Builder
.CreateCall(F
, {Vec
});
18665 llvm::Type
*VTy
= llvm::FixedVectorType::get(Int8Ty
, 16);
18666 Value
*Ptr
= Ops
[0];
18667 for (unsigned i
=0; i
<NumVecs
; i
++) {
18668 Value
*Vec
= Builder
.CreateExtractValue(Call
, i
);
18669 llvm::ConstantInt
* Index
= llvm::ConstantInt::get(IntTy
, i
);
18670 Value
*GEP
= Builder
.CreateInBoundsGEP(VTy
, Ptr
, Index
);
18671 Builder
.CreateAlignedStore(Vec
, GEP
, MaybeAlign(16));
18675 if (BuiltinID
== PPC::BI__builtin_vsx_build_pair
||
18676 BuiltinID
== PPC::BI__builtin_mma_build_acc
) {
18677 // Reverse the order of the operands for LE, so the
18678 // same builtin call can be used on both LE and BE
18679 // without the need for the programmer to swap operands.
18680 // The operands are reversed starting from the second argument,
18681 // the first operand is the pointer to the pair/accumulator
18682 // that is being built.
18683 if (getTarget().isLittleEndian())
18684 std::reverse(Ops
.begin() + 1, Ops
.end());
18687 switch (BuiltinID
) {
18688 #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \
18689 case PPC::BI__builtin_##Name: \
18690 ID = Intrinsic::ppc_##Intr; \
18691 Accumulate = Acc; \
18693 #include "clang/Basic/BuiltinsPPC.def"
18695 if (BuiltinID
== PPC::BI__builtin_vsx_lxvp
||
18696 BuiltinID
== PPC::BI__builtin_vsx_stxvp
||
18697 BuiltinID
== PPC::BI__builtin_mma_lxvp
||
18698 BuiltinID
== PPC::BI__builtin_mma_stxvp
) {
18699 if (BuiltinID
== PPC::BI__builtin_vsx_lxvp
||
18700 BuiltinID
== PPC::BI__builtin_mma_lxvp
) {
18701 Ops
[0] = Builder
.CreateGEP(Int8Ty
, Ops
[1], Ops
[0]);
18703 Ops
[1] = Builder
.CreateGEP(Int8Ty
, Ops
[2], Ops
[1]);
18706 llvm::Function
*F
= CGM
.getIntrinsic(ID
);
18707 return Builder
.CreateCall(F
, Ops
, "");
18709 SmallVector
<Value
*, 4> CallOps
;
18711 Address Addr
= EmitPointerWithAlignment(E
->getArg(0));
18712 Value
*Acc
= Builder
.CreateLoad(Addr
);
18713 CallOps
.push_back(Acc
);
18715 for (unsigned i
=1; i
<Ops
.size(); i
++)
18716 CallOps
.push_back(Ops
[i
]);
18717 llvm::Function
*F
= CGM
.getIntrinsic(ID
);
18718 Value
*Call
= Builder
.CreateCall(F
, CallOps
);
18719 return Builder
.CreateAlignedStore(Call
, Ops
[0], MaybeAlign());
18722 case PPC::BI__builtin_ppc_compare_and_swap
:
18723 case PPC::BI__builtin_ppc_compare_and_swaplp
: {
18724 Address Addr
= EmitPointerWithAlignment(E
->getArg(0));
18725 Address OldValAddr
= EmitPointerWithAlignment(E
->getArg(1));
18726 Value
*OldVal
= Builder
.CreateLoad(OldValAddr
);
18727 QualType AtomicTy
= E
->getArg(0)->getType()->getPointeeType();
18728 LValue LV
= MakeAddrLValue(Addr
, AtomicTy
);
18729 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
18730 auto Pair
= EmitAtomicCompareExchange(
18731 LV
, RValue::get(OldVal
), RValue::get(Op2
), E
->getExprLoc(),
18732 llvm::AtomicOrdering::Monotonic
, llvm::AtomicOrdering::Monotonic
, true);
18733 // Unlike c11's atomic_compare_exchange, according to
18734 // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
18735 // > In either case, the contents of the memory location specified by addr
18736 // > are copied into the memory location specified by old_val_addr.
18737 // But it hasn't specified storing to OldValAddr is atomic or not and
18738 // which order to use. Now following XL's codegen, treat it as a normal
18740 Value
*LoadedVal
= Pair
.first
.getScalarVal();
18741 Builder
.CreateStore(LoadedVal
, OldValAddr
);
18742 return Builder
.CreateZExt(Pair
.second
, Builder
.getInt32Ty());
18744 case PPC::BI__builtin_ppc_fetch_and_add
:
18745 case PPC::BI__builtin_ppc_fetch_and_addlp
: {
18746 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add
, E
,
18747 llvm::AtomicOrdering::Monotonic
);
18749 case PPC::BI__builtin_ppc_fetch_and_and
:
18750 case PPC::BI__builtin_ppc_fetch_and_andlp
: {
18751 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And
, E
,
18752 llvm::AtomicOrdering::Monotonic
);
18755 case PPC::BI__builtin_ppc_fetch_and_or
:
18756 case PPC::BI__builtin_ppc_fetch_and_orlp
: {
18757 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or
, E
,
18758 llvm::AtomicOrdering::Monotonic
);
18760 case PPC::BI__builtin_ppc_fetch_and_swap
:
18761 case PPC::BI__builtin_ppc_fetch_and_swaplp
: {
18762 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg
, E
,
18763 llvm::AtomicOrdering::Monotonic
);
18765 case PPC::BI__builtin_ppc_ldarx
:
18766 case PPC::BI__builtin_ppc_lwarx
:
18767 case PPC::BI__builtin_ppc_lharx
:
18768 case PPC::BI__builtin_ppc_lbarx
:
18769 return emitPPCLoadReserveIntrinsic(*this, BuiltinID
, E
);
18770 case PPC::BI__builtin_ppc_mfspr
: {
18771 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18772 llvm::Type
*RetType
= CGM
.getDataLayout().getTypeSizeInBits(VoidPtrTy
) == 32
18775 Function
*F
= CGM
.getIntrinsic(Intrinsic::ppc_mfspr
, RetType
);
18776 return Builder
.CreateCall(F
, {Op0
});
18778 case PPC::BI__builtin_ppc_mtspr
: {
18779 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18780 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
18781 llvm::Type
*RetType
= CGM
.getDataLayout().getTypeSizeInBits(VoidPtrTy
) == 32
18784 Function
*F
= CGM
.getIntrinsic(Intrinsic::ppc_mtspr
, RetType
);
18785 return Builder
.CreateCall(F
, {Op0
, Op1
});
18787 case PPC::BI__builtin_ppc_popcntb
: {
18788 Value
*ArgValue
= EmitScalarExpr(E
->getArg(0));
18789 llvm::Type
*ArgType
= ArgValue
->getType();
18790 Function
*F
= CGM
.getIntrinsic(Intrinsic::ppc_popcntb
, {ArgType
, ArgType
});
18791 return Builder
.CreateCall(F
, {ArgValue
}, "popcntb");
18793 case PPC::BI__builtin_ppc_mtfsf
: {
18794 // The builtin takes a uint32 that needs to be cast to an
18795 // f64 to be passed to the intrinsic.
18796 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18797 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
18798 Value
*Cast
= Builder
.CreateUIToFP(Op1
, DoubleTy
);
18799 llvm::Function
*F
= CGM
.getIntrinsic(Intrinsic::ppc_mtfsf
);
18800 return Builder
.CreateCall(F
, {Op0
, Cast
}, "");
18803 case PPC::BI__builtin_ppc_swdiv_nochk
:
18804 case PPC::BI__builtin_ppc_swdivs_nochk
: {
18805 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18806 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
18807 FastMathFlags FMF
= Builder
.getFastMathFlags();
18808 Builder
.getFastMathFlags().setFast();
18809 Value
*FDiv
= Builder
.CreateFDiv(Op0
, Op1
, "swdiv_nochk");
18810 Builder
.getFastMathFlags() &= (FMF
);
18813 case PPC::BI__builtin_ppc_fric
:
18814 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
18815 *this, E
, Intrinsic::rint
,
18816 Intrinsic::experimental_constrained_rint
))
18818 case PPC::BI__builtin_ppc_frim
:
18819 case PPC::BI__builtin_ppc_frims
:
18820 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
18821 *this, E
, Intrinsic::floor
,
18822 Intrinsic::experimental_constrained_floor
))
18824 case PPC::BI__builtin_ppc_frin
:
18825 case PPC::BI__builtin_ppc_frins
:
18826 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
18827 *this, E
, Intrinsic::round
,
18828 Intrinsic::experimental_constrained_round
))
18830 case PPC::BI__builtin_ppc_frip
:
18831 case PPC::BI__builtin_ppc_frips
:
18832 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
18833 *this, E
, Intrinsic::ceil
,
18834 Intrinsic::experimental_constrained_ceil
))
18836 case PPC::BI__builtin_ppc_friz
:
18837 case PPC::BI__builtin_ppc_frizs
:
18838 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
18839 *this, E
, Intrinsic::trunc
,
18840 Intrinsic::experimental_constrained_trunc
))
18842 case PPC::BI__builtin_ppc_fsqrt
:
18843 case PPC::BI__builtin_ppc_fsqrts
:
18844 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
18845 *this, E
, Intrinsic::sqrt
,
18846 Intrinsic::experimental_constrained_sqrt
))
18848 case PPC::BI__builtin_ppc_test_data_class
: {
18849 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18850 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
18851 return Builder
.CreateCall(
18852 CGM
.getIntrinsic(Intrinsic::ppc_test_data_class
, Op0
->getType()),
18853 {Op0
, Op1
}, "test_data_class");
18855 case PPC::BI__builtin_ppc_maxfe
: {
18856 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18857 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
18858 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
18859 Value
*Op3
= EmitScalarExpr(E
->getArg(3));
18860 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::ppc_maxfe
),
18861 {Op0
, Op1
, Op2
, Op3
});
18863 case PPC::BI__builtin_ppc_maxfl
: {
18864 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18865 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
18866 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
18867 Value
*Op3
= EmitScalarExpr(E
->getArg(3));
18868 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::ppc_maxfl
),
18869 {Op0
, Op1
, Op2
, Op3
});
18871 case PPC::BI__builtin_ppc_maxfs
: {
18872 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18873 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
18874 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
18875 Value
*Op3
= EmitScalarExpr(E
->getArg(3));
18876 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::ppc_maxfs
),
18877 {Op0
, Op1
, Op2
, Op3
});
18879 case PPC::BI__builtin_ppc_minfe
: {
18880 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18881 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
18882 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
18883 Value
*Op3
= EmitScalarExpr(E
->getArg(3));
18884 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::ppc_minfe
),
18885 {Op0
, Op1
, Op2
, Op3
});
18887 case PPC::BI__builtin_ppc_minfl
: {
18888 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18889 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
18890 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
18891 Value
*Op3
= EmitScalarExpr(E
->getArg(3));
18892 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::ppc_minfl
),
18893 {Op0
, Op1
, Op2
, Op3
});
18895 case PPC::BI__builtin_ppc_minfs
: {
18896 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18897 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
18898 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
18899 Value
*Op3
= EmitScalarExpr(E
->getArg(3));
18900 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::ppc_minfs
),
18901 {Op0
, Op1
, Op2
, Op3
});
18903 case PPC::BI__builtin_ppc_swdiv
:
18904 case PPC::BI__builtin_ppc_swdivs
: {
18905 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
18906 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
18907 return Builder
.CreateFDiv(Op0
, Op1
, "swdiv");
18909 case PPC::BI__builtin_ppc_set_fpscr_rn
:
18910 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::ppc_setrnd
),
18911 {EmitScalarExpr(E
->getArg(0))});
18912 case PPC::BI__builtin_ppc_mffs
:
18913 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::ppc_readflm
));
18918 // If \p E is not null pointer, insert address space cast to match return
18919 // type of \p E if necessary.
18920 Value
*EmitAMDGPUDispatchPtr(CodeGenFunction
&CGF
,
18921 const CallExpr
*E
= nullptr) {
18922 auto *F
= CGF
.CGM
.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr
);
18923 auto *Call
= CGF
.Builder
.CreateCall(F
);
18925 Attribute::getWithDereferenceableBytes(Call
->getContext(), 64));
18926 Call
->addRetAttr(Attribute::getWithAlignment(Call
->getContext(), Align(4)));
18929 QualType BuiltinRetType
= E
->getType();
18930 auto *RetTy
= cast
<llvm::PointerType
>(CGF
.ConvertType(BuiltinRetType
));
18931 if (RetTy
== Call
->getType())
18933 return CGF
.Builder
.CreateAddrSpaceCast(Call
, RetTy
);
18936 Value
*EmitAMDGPUImplicitArgPtr(CodeGenFunction
&CGF
) {
18937 auto *F
= CGF
.CGM
.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr
);
18938 auto *Call
= CGF
.Builder
.CreateCall(F
);
18940 Attribute::getWithDereferenceableBytes(Call
->getContext(), 256));
18941 Call
->addRetAttr(Attribute::getWithAlignment(Call
->getContext(), Align(8)));
18945 // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
18946 /// Emit code based on Code Object ABI version.
18947 /// COV_4 : Emit code to use dispatch ptr
18948 /// COV_5+ : Emit code to use implicitarg ptr
18949 /// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
18950 /// and use its value for COV_4 or COV_5+ approach. It is used for
18951 /// compiling device libraries in an ABI-agnostic way.
18953 /// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
18954 /// clang during compilation of user code.
18955 Value
*EmitAMDGPUWorkGroupSize(CodeGenFunction
&CGF
, unsigned Index
) {
18956 llvm::LoadInst
*LD
;
18958 auto Cov
= CGF
.getTarget().getTargetOpts().CodeObjectVersion
;
18960 if (Cov
== CodeObjectVersionKind::COV_None
) {
18961 StringRef Name
= "__oclc_ABI_version";
18962 auto *ABIVersionC
= CGF
.CGM
.getModule().getNamedGlobal(Name
);
18964 ABIVersionC
= new llvm::GlobalVariable(
18965 CGF
.CGM
.getModule(), CGF
.Int32Ty
, false,
18966 llvm::GlobalValue::ExternalLinkage
, nullptr, Name
, nullptr,
18967 llvm::GlobalVariable::NotThreadLocal
,
18968 CGF
.CGM
.getContext().getTargetAddressSpace(LangAS::opencl_constant
));
18970 // This load will be eliminated by the IPSCCP because it is constant
18971 // weak_odr without externally_initialized. Either changing it to weak or
18972 // adding externally_initialized will keep the load.
18973 Value
*ABIVersion
= CGF
.Builder
.CreateAlignedLoad(CGF
.Int32Ty
, ABIVersionC
,
18974 CGF
.CGM
.getIntAlign());
18976 Value
*IsCOV5
= CGF
.Builder
.CreateICmpSGE(
18978 llvm::ConstantInt::get(CGF
.Int32Ty
, CodeObjectVersionKind::COV_5
));
18980 // Indexing the implicit kernarg segment.
18981 Value
*ImplicitGEP
= CGF
.Builder
.CreateConstGEP1_32(
18982 CGF
.Int8Ty
, EmitAMDGPUImplicitArgPtr(CGF
), 12 + Index
* 2);
18984 // Indexing the HSA kernel_dispatch_packet struct.
18985 Value
*DispatchGEP
= CGF
.Builder
.CreateConstGEP1_32(
18986 CGF
.Int8Ty
, EmitAMDGPUDispatchPtr(CGF
), 4 + Index
* 2);
18988 auto Result
= CGF
.Builder
.CreateSelect(IsCOV5
, ImplicitGEP
, DispatchGEP
);
18989 LD
= CGF
.Builder
.CreateLoad(
18990 Address(Result
, CGF
.Int16Ty
, CharUnits::fromQuantity(2)));
18992 Value
*GEP
= nullptr;
18993 if (Cov
>= CodeObjectVersionKind::COV_5
) {
18994 // Indexing the implicit kernarg segment.
18995 GEP
= CGF
.Builder
.CreateConstGEP1_32(
18996 CGF
.Int8Ty
, EmitAMDGPUImplicitArgPtr(CGF
), 12 + Index
* 2);
18998 // Indexing the HSA kernel_dispatch_packet struct.
18999 GEP
= CGF
.Builder
.CreateConstGEP1_32(
19000 CGF
.Int8Ty
, EmitAMDGPUDispatchPtr(CGF
), 4 + Index
* 2);
19002 LD
= CGF
.Builder
.CreateLoad(
19003 Address(GEP
, CGF
.Int16Ty
, CharUnits::fromQuantity(2)));
19006 llvm::MDBuilder
MDHelper(CGF
.getLLVMContext());
19007 llvm::MDNode
*RNode
= MDHelper
.createRange(APInt(16, 1),
19008 APInt(16, CGF
.getTarget().getMaxOpenCLWorkGroupSize() + 1));
19009 LD
->setMetadata(llvm::LLVMContext::MD_range
, RNode
);
19010 LD
->setMetadata(llvm::LLVMContext::MD_noundef
,
19011 llvm::MDNode::get(CGF
.getLLVMContext(), {}));
19012 LD
->setMetadata(llvm::LLVMContext::MD_invariant_load
,
19013 llvm::MDNode::get(CGF
.getLLVMContext(), {}));
19017 // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
19018 Value
*EmitAMDGPUGridSize(CodeGenFunction
&CGF
, unsigned Index
) {
19019 const unsigned XOffset
= 12;
19020 auto *DP
= EmitAMDGPUDispatchPtr(CGF
);
19021 // Indexing the HSA kernel_dispatch_packet struct.
19022 auto *Offset
= llvm::ConstantInt::get(CGF
.Int32Ty
, XOffset
+ Index
* 4);
19023 auto *GEP
= CGF
.Builder
.CreateGEP(CGF
.Int8Ty
, DP
, Offset
);
19024 auto *LD
= CGF
.Builder
.CreateLoad(
19025 Address(GEP
, CGF
.Int32Ty
, CharUnits::fromQuantity(4)));
19027 llvm::MDBuilder
MDB(CGF
.getLLVMContext());
19030 LD
->setMetadata(llvm::LLVMContext::MD_range
,
19031 MDB
.createRange(APInt(32, 1), APInt::getZero(32)));
19032 LD
->setMetadata(llvm::LLVMContext::MD_invariant_load
,
19033 llvm::MDNode::get(CGF
.getLLVMContext(), {}));
19038 // For processing memory ordering and memory scope arguments of various
19039 // amdgcn builtins.
19040 // \p Order takes a C++11 comptabile memory-ordering specifier and converts
19041 // it into LLVM's memory ordering specifier using atomic C ABI, and writes
19042 // to \p AO. \p Scope takes a const char * and converts it into AMDGCN
19043 // specific SyncScopeID and writes it to \p SSID.
19044 void CodeGenFunction::ProcessOrderScopeAMDGCN(Value
*Order
, Value
*Scope
,
19045 llvm::AtomicOrdering
&AO
,
19046 llvm::SyncScope::ID
&SSID
) {
19047 int ord
= cast
<llvm::ConstantInt
>(Order
)->getZExtValue();
19049 // Map C11/C++11 memory ordering to LLVM memory ordering
19050 assert(llvm::isValidAtomicOrderingCABI(ord
));
19051 switch (static_cast<llvm::AtomicOrderingCABI
>(ord
)) {
19052 case llvm::AtomicOrderingCABI::acquire
:
19053 case llvm::AtomicOrderingCABI::consume
:
19054 AO
= llvm::AtomicOrdering::Acquire
;
19056 case llvm::AtomicOrderingCABI::release
:
19057 AO
= llvm::AtomicOrdering::Release
;
19059 case llvm::AtomicOrderingCABI::acq_rel
:
19060 AO
= llvm::AtomicOrdering::AcquireRelease
;
19062 case llvm::AtomicOrderingCABI::seq_cst
:
19063 AO
= llvm::AtomicOrdering::SequentiallyConsistent
;
19065 case llvm::AtomicOrderingCABI::relaxed
:
19066 AO
= llvm::AtomicOrdering::Monotonic
;
19070 // Some of the atomic builtins take the scope as a string name.
19072 if (llvm::getConstantStringInfo(Scope
, scp
)) {
19073 SSID
= getLLVMContext().getOrInsertSyncScopeID(scp
);
19077 // Older builtins had an enum argument for the memory scope.
19078 int scope
= cast
<llvm::ConstantInt
>(Scope
)->getZExtValue();
19080 case 0: // __MEMORY_SCOPE_SYSTEM
19081 SSID
= llvm::SyncScope::System
;
19083 case 1: // __MEMORY_SCOPE_DEVICE
19084 SSID
= getLLVMContext().getOrInsertSyncScopeID("agent");
19086 case 2: // __MEMORY_SCOPE_WRKGRP
19087 SSID
= getLLVMContext().getOrInsertSyncScopeID("workgroup");
19089 case 3: // __MEMORY_SCOPE_WVFRNT
19090 SSID
= getLLVMContext().getOrInsertSyncScopeID("wavefront");
19092 case 4: // __MEMORY_SCOPE_SINGLE
19093 SSID
= llvm::SyncScope::SingleThread
;
19096 SSID
= llvm::SyncScope::System
;
19101 llvm::Value
*CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments
,
19103 const CallExpr
*E
) {
19104 llvm::Value
*Arg
= nullptr;
19105 if ((ICEArguments
& (1 << Idx
)) == 0) {
19106 Arg
= EmitScalarExpr(E
->getArg(Idx
));
19108 // If this is required to be a constant, constant fold it so that we
19109 // know that the generated intrinsic gets a ConstantInt.
19110 std::optional
<llvm::APSInt
> Result
=
19111 E
->getArg(Idx
)->getIntegerConstantExpr(getContext());
19112 assert(Result
&& "Expected argument to be a constant");
19113 Arg
= llvm::ConstantInt::get(getLLVMContext(), *Result
);
19118 // Return dot product intrinsic that corresponds to the QT scalar type
19119 static Intrinsic::ID
getDotProductIntrinsic(CGHLSLRuntime
&RT
, QualType QT
) {
19120 if (QT
->isFloatingType())
19121 return RT
.getFDotIntrinsic();
19122 if (QT
->isSignedIntegerType())
19123 return RT
.getSDotIntrinsic();
19124 assert(QT
->isUnsignedIntegerType());
19125 return RT
.getUDotIntrinsic();
19128 static Intrinsic::ID
getFirstBitHighIntrinsic(CGHLSLRuntime
&RT
, QualType QT
) {
19129 if (QT
->hasSignedIntegerRepresentation()) {
19130 return RT
.getFirstBitSHighIntrinsic();
19133 assert(QT
->hasUnsignedIntegerRepresentation());
19134 return RT
.getFirstBitUHighIntrinsic();
19137 Value
*CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID
,
19139 ReturnValueSlot ReturnValue
) {
19140 if (!getLangOpts().HLSL
)
19143 switch (BuiltinID
) {
19144 case Builtin::BI__builtin_hlsl_resource_getpointer
: {
19145 Value
*HandleOp
= EmitScalarExpr(E
->getArg(0));
19146 Value
*IndexOp
= EmitScalarExpr(E
->getArg(1));
19148 // TODO: Map to an hlsl_device address space.
19149 llvm::Type
*RetTy
= llvm::PointerType::getUnqual(getLLVMContext());
19151 return Builder
.CreateIntrinsic(RetTy
, Intrinsic::dx_resource_getpointer
,
19152 ArrayRef
<Value
*>{HandleOp
, IndexOp
});
19154 case Builtin::BI__builtin_hlsl_all
: {
19155 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
19156 return Builder
.CreateIntrinsic(
19157 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
19158 CGM
.getHLSLRuntime().getAllIntrinsic(), ArrayRef
<Value
*>{Op0
}, nullptr,
19161 case Builtin::BI__builtin_hlsl_any
: {
19162 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
19163 return Builder
.CreateIntrinsic(
19164 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
19165 CGM
.getHLSLRuntime().getAnyIntrinsic(), ArrayRef
<Value
*>{Op0
}, nullptr,
19168 case Builtin::BI__builtin_hlsl_asdouble
:
19169 return handleAsDoubleBuiltin(*this, E
);
19170 case Builtin::BI__builtin_hlsl_elementwise_clamp
: {
19171 Value
*OpX
= EmitScalarExpr(E
->getArg(0));
19172 Value
*OpMin
= EmitScalarExpr(E
->getArg(1));
19173 Value
*OpMax
= EmitScalarExpr(E
->getArg(2));
19175 QualType Ty
= E
->getArg(0)->getType();
19176 if (auto *VecTy
= Ty
->getAs
<VectorType
>())
19177 Ty
= VecTy
->getElementType();
19179 Intrinsic::ID Intr
;
19180 if (Ty
->isFloatingType()) {
19181 Intr
= CGM
.getHLSLRuntime().getNClampIntrinsic();
19182 } else if (Ty
->isUnsignedIntegerType()) {
19183 Intr
= CGM
.getHLSLRuntime().getUClampIntrinsic();
19185 assert(Ty
->isSignedIntegerType());
19186 Intr
= CGM
.getHLSLRuntime().getSClampIntrinsic();
19188 return Builder
.CreateIntrinsic(
19189 /*ReturnType=*/OpX
->getType(), Intr
,
19190 ArrayRef
<Value
*>{OpX
, OpMin
, OpMax
}, nullptr, "hlsl.clamp");
19192 case Builtin::BI__builtin_hlsl_cross
: {
19193 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
19194 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
19195 assert(E
->getArg(0)->getType()->hasFloatingRepresentation() &&
19196 E
->getArg(1)->getType()->hasFloatingRepresentation() &&
19197 "cross operands must have a float representation");
19198 // make sure each vector has exactly 3 elements
19200 E
->getArg(0)->getType()->castAs
<VectorType
>()->getNumElements() == 3 &&
19201 E
->getArg(1)->getType()->castAs
<VectorType
>()->getNumElements() == 3 &&
19202 "input vectors must have 3 elements each");
19203 return Builder
.CreateIntrinsic(
19204 /*ReturnType=*/Op0
->getType(), CGM
.getHLSLRuntime().getCrossIntrinsic(),
19205 ArrayRef
<Value
*>{Op0
, Op1
}, nullptr, "hlsl.cross");
19207 case Builtin::BI__builtin_hlsl_dot
: {
19208 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
19209 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
19210 llvm::Type
*T0
= Op0
->getType();
19211 llvm::Type
*T1
= Op1
->getType();
19213 // If the arguments are scalars, just emit a multiply
19214 if (!T0
->isVectorTy() && !T1
->isVectorTy()) {
19215 if (T0
->isFloatingPointTy())
19216 return Builder
.CreateFMul(Op0
, Op1
, "hlsl.dot");
19218 if (T0
->isIntegerTy())
19219 return Builder
.CreateMul(Op0
, Op1
, "hlsl.dot");
19222 "Scalar dot product is only supported on ints and floats.");
19224 // For vectors, validate types and emit the appropriate intrinsic
19226 // A VectorSplat should have happened
19227 assert(T0
->isVectorTy() && T1
->isVectorTy() &&
19228 "Dot product of vector and scalar is not supported.");
19230 auto *VecTy0
= E
->getArg(0)->getType()->getAs
<VectorType
>();
19231 [[maybe_unused
]] auto *VecTy1
=
19232 E
->getArg(1)->getType()->getAs
<VectorType
>();
19234 assert(VecTy0
->getElementType() == VecTy1
->getElementType() &&
19235 "Dot product of vectors need the same element types.");
19237 assert(VecTy0
->getNumElements() == VecTy1
->getNumElements() &&
19238 "Dot product requires vectors to be of the same size.");
19240 return Builder
.CreateIntrinsic(
19241 /*ReturnType=*/T0
->getScalarType(),
19242 getDotProductIntrinsic(CGM
.getHLSLRuntime(), VecTy0
->getElementType()),
19243 ArrayRef
<Value
*>{Op0
, Op1
}, nullptr, "hlsl.dot");
19245 case Builtin::BI__builtin_hlsl_dot4add_i8packed
: {
19246 Value
*A
= EmitScalarExpr(E
->getArg(0));
19247 Value
*B
= EmitScalarExpr(E
->getArg(1));
19248 Value
*C
= EmitScalarExpr(E
->getArg(2));
19250 Intrinsic::ID ID
= CGM
.getHLSLRuntime().getDot4AddI8PackedIntrinsic();
19251 return Builder
.CreateIntrinsic(
19252 /*ReturnType=*/C
->getType(), ID
, ArrayRef
<Value
*>{A
, B
, C
}, nullptr,
19253 "hlsl.dot4add.i8packed");
19255 case Builtin::BI__builtin_hlsl_dot4add_u8packed
: {
19256 Value
*A
= EmitScalarExpr(E
->getArg(0));
19257 Value
*B
= EmitScalarExpr(E
->getArg(1));
19258 Value
*C
= EmitScalarExpr(E
->getArg(2));
19260 Intrinsic::ID ID
= CGM
.getHLSLRuntime().getDot4AddU8PackedIntrinsic();
19261 return Builder
.CreateIntrinsic(
19262 /*ReturnType=*/C
->getType(), ID
, ArrayRef
<Value
*>{A
, B
, C
}, nullptr,
19263 "hlsl.dot4add.u8packed");
19265 case Builtin::BI__builtin_hlsl_elementwise_firstbithigh
: {
19267 Value
*X
= EmitScalarExpr(E
->getArg(0));
19269 return Builder
.CreateIntrinsic(
19270 /*ReturnType=*/ConvertType(E
->getType()),
19271 getFirstBitHighIntrinsic(CGM
.getHLSLRuntime(), E
->getArg(0)->getType()),
19272 ArrayRef
<Value
*>{X
}, nullptr, "hlsl.firstbithigh");
19274 case Builtin::BI__builtin_hlsl_lerp
: {
19275 Value
*X
= EmitScalarExpr(E
->getArg(0));
19276 Value
*Y
= EmitScalarExpr(E
->getArg(1));
19277 Value
*S
= EmitScalarExpr(E
->getArg(2));
19278 if (!E
->getArg(0)->getType()->hasFloatingRepresentation())
19279 llvm_unreachable("lerp operand must have a float representation");
19280 return Builder
.CreateIntrinsic(
19281 /*ReturnType=*/X
->getType(), CGM
.getHLSLRuntime().getLerpIntrinsic(),
19282 ArrayRef
<Value
*>{X
, Y
, S
}, nullptr, "hlsl.lerp");
19284 case Builtin::BI__builtin_hlsl_length
: {
19285 Value
*X
= EmitScalarExpr(E
->getArg(0));
19287 assert(E
->getArg(0)->getType()->hasFloatingRepresentation() &&
19288 "length operand must have a float representation");
19289 // if the operand is a scalar, we can use the fabs llvm intrinsic directly
19290 if (!E
->getArg(0)->getType()->isVectorType())
19291 return EmitFAbs(*this, X
);
19293 return Builder
.CreateIntrinsic(
19294 /*ReturnType=*/X
->getType()->getScalarType(),
19295 CGM
.getHLSLRuntime().getLengthIntrinsic(), ArrayRef
<Value
*>{X
},
19296 nullptr, "hlsl.length");
19298 case Builtin::BI__builtin_hlsl_normalize
: {
19299 Value
*X
= EmitScalarExpr(E
->getArg(0));
19301 assert(E
->getArg(0)->getType()->hasFloatingRepresentation() &&
19302 "normalize operand must have a float representation");
19304 return Builder
.CreateIntrinsic(
19305 /*ReturnType=*/X
->getType(),
19306 CGM
.getHLSLRuntime().getNormalizeIntrinsic(), ArrayRef
<Value
*>{X
},
19307 nullptr, "hlsl.normalize");
19309 case Builtin::BI__builtin_hlsl_elementwise_degrees
: {
19310 Value
*X
= EmitScalarExpr(E
->getArg(0));
19312 assert(E
->getArg(0)->getType()->hasFloatingRepresentation() &&
19313 "degree operand must have a float representation");
19315 return Builder
.CreateIntrinsic(
19316 /*ReturnType=*/X
->getType(), CGM
.getHLSLRuntime().getDegreesIntrinsic(),
19317 ArrayRef
<Value
*>{X
}, nullptr, "hlsl.degrees");
19319 case Builtin::BI__builtin_hlsl_elementwise_frac
: {
19320 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
19321 if (!E
->getArg(0)->getType()->hasFloatingRepresentation())
19322 llvm_unreachable("frac operand must have a float representation");
19323 return Builder
.CreateIntrinsic(
19324 /*ReturnType=*/Op0
->getType(), CGM
.getHLSLRuntime().getFracIntrinsic(),
19325 ArrayRef
<Value
*>{Op0
}, nullptr, "hlsl.frac");
19327 case Builtin::BI__builtin_hlsl_elementwise_isinf
: {
19328 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
19329 llvm::Type
*Xty
= Op0
->getType();
19330 llvm::Type
*retType
= llvm::Type::getInt1Ty(this->getLLVMContext());
19331 if (Xty
->isVectorTy()) {
19332 auto *XVecTy
= E
->getArg(0)->getType()->getAs
<VectorType
>();
19333 retType
= llvm::VectorType::get(
19334 retType
, ElementCount::getFixed(XVecTy
->getNumElements()));
19336 if (!E
->getArg(0)->getType()->hasFloatingRepresentation())
19337 llvm_unreachable("isinf operand must have a float representation");
19338 return Builder
.CreateIntrinsic(retType
, Intrinsic::dx_isinf
,
19339 ArrayRef
<Value
*>{Op0
}, nullptr, "dx.isinf");
19341 case Builtin::BI__builtin_hlsl_mad
: {
19342 Value
*M
= EmitScalarExpr(E
->getArg(0));
19343 Value
*A
= EmitScalarExpr(E
->getArg(1));
19344 Value
*B
= EmitScalarExpr(E
->getArg(2));
19345 if (E
->getArg(0)->getType()->hasFloatingRepresentation())
19346 return Builder
.CreateIntrinsic(
19347 /*ReturnType*/ M
->getType(), Intrinsic::fmuladd
,
19348 ArrayRef
<Value
*>{M
, A
, B
}, nullptr, "hlsl.fmad");
19350 if (E
->getArg(0)->getType()->hasSignedIntegerRepresentation()) {
19351 if (CGM
.getTarget().getTriple().getArch() == llvm::Triple::dxil
)
19352 return Builder
.CreateIntrinsic(
19353 /*ReturnType*/ M
->getType(), Intrinsic::dx_imad
,
19354 ArrayRef
<Value
*>{M
, A
, B
}, nullptr, "dx.imad");
19356 Value
*Mul
= Builder
.CreateNSWMul(M
, A
);
19357 return Builder
.CreateNSWAdd(Mul
, B
);
19359 assert(E
->getArg(0)->getType()->hasUnsignedIntegerRepresentation());
19360 if (CGM
.getTarget().getTriple().getArch() == llvm::Triple::dxil
)
19361 return Builder
.CreateIntrinsic(
19362 /*ReturnType=*/M
->getType(), Intrinsic::dx_umad
,
19363 ArrayRef
<Value
*>{M
, A
, B
}, nullptr, "dx.umad");
19365 Value
*Mul
= Builder
.CreateNUWMul(M
, A
);
19366 return Builder
.CreateNUWAdd(Mul
, B
);
19368 case Builtin::BI__builtin_hlsl_elementwise_rcp
: {
19369 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
19370 if (!E
->getArg(0)->getType()->hasFloatingRepresentation())
19371 llvm_unreachable("rcp operand must have a float representation");
19372 llvm::Type
*Ty
= Op0
->getType();
19373 llvm::Type
*EltTy
= Ty
->getScalarType();
19374 Constant
*One
= Ty
->isVectorTy()
19375 ? ConstantVector::getSplat(
19376 ElementCount::getFixed(
19377 cast
<FixedVectorType
>(Ty
)->getNumElements()),
19378 ConstantFP::get(EltTy
, 1.0))
19379 : ConstantFP::get(EltTy
, 1.0);
19380 return Builder
.CreateFDiv(One
, Op0
, "hlsl.rcp");
19382 case Builtin::BI__builtin_hlsl_elementwise_rsqrt
: {
19383 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
19384 if (!E
->getArg(0)->getType()->hasFloatingRepresentation())
19385 llvm_unreachable("rsqrt operand must have a float representation");
19386 return Builder
.CreateIntrinsic(
19387 /*ReturnType=*/Op0
->getType(), CGM
.getHLSLRuntime().getRsqrtIntrinsic(),
19388 ArrayRef
<Value
*>{Op0
}, nullptr, "hlsl.rsqrt");
19390 case Builtin::BI__builtin_hlsl_elementwise_saturate
: {
19391 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
19392 assert(E
->getArg(0)->getType()->hasFloatingRepresentation() &&
19393 "saturate operand must have a float representation");
19394 return Builder
.CreateIntrinsic(
19395 /*ReturnType=*/Op0
->getType(),
19396 CGM
.getHLSLRuntime().getSaturateIntrinsic(), ArrayRef
<Value
*>{Op0
},
19397 nullptr, "hlsl.saturate");
19399 case Builtin::BI__builtin_hlsl_select
: {
19400 Value
*OpCond
= EmitScalarExpr(E
->getArg(0));
19401 RValue RValTrue
= EmitAnyExpr(E
->getArg(1));
19403 RValTrue
.isScalar()
19404 ? RValTrue
.getScalarVal()
19405 : RValTrue
.getAggregatePointer(E
->getArg(1)->getType(), *this);
19406 RValue RValFalse
= EmitAnyExpr(E
->getArg(2));
19408 RValFalse
.isScalar()
19409 ? RValFalse
.getScalarVal()
19410 : RValFalse
.getAggregatePointer(E
->getArg(2)->getType(), *this);
19413 Builder
.CreateSelect(OpCond
, OpTrue
, OpFalse
, "hlsl.select");
19414 if (!RValTrue
.isScalar())
19415 Builder
.CreateStore(SelectVal
, ReturnValue
.getAddress(),
19416 ReturnValue
.isVolatile());
19420 case Builtin::BI__builtin_hlsl_step
: {
19421 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
19422 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
19423 assert(E
->getArg(0)->getType()->hasFloatingRepresentation() &&
19424 E
->getArg(1)->getType()->hasFloatingRepresentation() &&
19425 "step operands must have a float representation");
19426 return Builder
.CreateIntrinsic(
19427 /*ReturnType=*/Op0
->getType(), CGM
.getHLSLRuntime().getStepIntrinsic(),
19428 ArrayRef
<Value
*>{Op0
, Op1
}, nullptr, "hlsl.step");
19430 case Builtin::BI__builtin_hlsl_wave_active_any_true
: {
19431 Value
*Op
= EmitScalarExpr(E
->getArg(0));
19432 assert(Op
->getType()->isIntegerTy(1) &&
19433 "Intrinsic WaveActiveAnyTrue operand must be a bool");
19435 Intrinsic::ID ID
= CGM
.getHLSLRuntime().getWaveActiveAnyTrueIntrinsic();
19436 return EmitRuntimeCall(
19437 Intrinsic::getOrInsertDeclaration(&CGM
.getModule(), ID
), {Op
});
19439 case Builtin::BI__builtin_hlsl_wave_active_count_bits
: {
19440 Value
*OpExpr
= EmitScalarExpr(E
->getArg(0));
19441 Intrinsic::ID ID
= CGM
.getHLSLRuntime().getWaveActiveCountBitsIntrinsic();
19442 return EmitRuntimeCall(
19443 Intrinsic::getOrInsertDeclaration(&CGM
.getModule(), ID
),
19446 case Builtin::BI__builtin_hlsl_wave_get_lane_index
: {
19447 // We don't define a SPIR-V intrinsic, instead it is a SPIR-V built-in
19448 // defined in SPIRVBuiltins.td. So instead we manually get the matching name
19449 // for the DirectX intrinsic and the demangled builtin name
19450 switch (CGM
.getTarget().getTriple().getArch()) {
19451 case llvm::Triple::dxil
:
19452 return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
19453 &CGM
.getModule(), Intrinsic::dx_wave_getlaneindex
));
19454 case llvm::Triple::spirv
:
19455 return EmitRuntimeCall(CGM
.CreateRuntimeFunction(
19456 llvm::FunctionType::get(IntTy
, {}, false),
19457 "__hlsl_wave_get_lane_index", {}, false, true));
19460 "Intrinsic WaveGetLaneIndex not supported by target architecture");
19463 case Builtin::BI__builtin_hlsl_wave_is_first_lane
: {
19464 Intrinsic::ID ID
= CGM
.getHLSLRuntime().getWaveIsFirstLaneIntrinsic();
19465 return EmitRuntimeCall(
19466 Intrinsic::getOrInsertDeclaration(&CGM
.getModule(), ID
));
19468 case Builtin::BI__builtin_hlsl_wave_read_lane_at
: {
19469 // Due to the use of variadic arguments we must explicitly retreive them and
19470 // create our function type.
19471 Value
*OpExpr
= EmitScalarExpr(E
->getArg(0));
19472 Value
*OpIndex
= EmitScalarExpr(E
->getArg(1));
19473 llvm::FunctionType
*FT
= llvm::FunctionType::get(
19474 OpExpr
->getType(), ArrayRef
{OpExpr
->getType(), OpIndex
->getType()},
19477 // Get overloaded name
19479 Intrinsic::getName(CGM
.getHLSLRuntime().getWaveReadLaneAtIntrinsic(),
19480 ArrayRef
{OpExpr
->getType()}, &CGM
.getModule());
19481 return EmitRuntimeCall(CGM
.CreateRuntimeFunction(FT
, Name
, {},
19483 /*AssumeConvergent=*/true),
19484 ArrayRef
{OpExpr
, OpIndex
}, "hlsl.wave.readlane");
19486 case Builtin::BI__builtin_hlsl_elementwise_sign
: {
19487 auto *Arg0
= E
->getArg(0);
19488 Value
*Op0
= EmitScalarExpr(Arg0
);
19489 llvm::Type
*Xty
= Op0
->getType();
19490 llvm::Type
*retType
= llvm::Type::getInt32Ty(this->getLLVMContext());
19491 if (Xty
->isVectorTy()) {
19492 auto *XVecTy
= Arg0
->getType()->getAs
<VectorType
>();
19493 retType
= llvm::VectorType::get(
19494 retType
, ElementCount::getFixed(XVecTy
->getNumElements()));
19496 assert((Arg0
->getType()->hasFloatingRepresentation() ||
19497 Arg0
->getType()->hasIntegerRepresentation()) &&
19498 "sign operand must have a float or int representation");
19500 if (Arg0
->getType()->hasUnsignedIntegerRepresentation()) {
19501 Value
*Cmp
= Builder
.CreateICmpEQ(Op0
, ConstantInt::get(Xty
, 0));
19502 return Builder
.CreateSelect(Cmp
, ConstantInt::get(retType
, 0),
19503 ConstantInt::get(retType
, 1), "hlsl.sign");
19506 return Builder
.CreateIntrinsic(
19507 retType
, CGM
.getHLSLRuntime().getSignIntrinsic(),
19508 ArrayRef
<Value
*>{Op0
}, nullptr, "hlsl.sign");
19510 case Builtin::BI__builtin_hlsl_elementwise_radians
: {
19511 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
19512 assert(E
->getArg(0)->getType()->hasFloatingRepresentation() &&
19513 "radians operand must have a float representation");
19514 return Builder
.CreateIntrinsic(
19515 /*ReturnType=*/Op0
->getType(),
19516 CGM
.getHLSLRuntime().getRadiansIntrinsic(), ArrayRef
<Value
*>{Op0
},
19517 nullptr, "hlsl.radians");
19519 case Builtin::BI__builtin_hlsl_buffer_update_counter
: {
19520 Value
*ResHandle
= EmitScalarExpr(E
->getArg(0));
19521 Value
*Offset
= EmitScalarExpr(E
->getArg(1));
19522 Value
*OffsetI8
= Builder
.CreateIntCast(Offset
, Int8Ty
, true);
19523 return Builder
.CreateIntrinsic(
19524 /*ReturnType=*/Offset
->getType(),
19525 CGM
.getHLSLRuntime().getBufferUpdateCounterIntrinsic(),
19526 ArrayRef
<Value
*>{ResHandle
, OffsetI8
}, nullptr);
19528 case Builtin::BI__builtin_hlsl_elementwise_splitdouble
: {
19530 assert((E
->getArg(0)->getType()->hasFloatingRepresentation() &&
19531 E
->getArg(1)->getType()->hasUnsignedIntegerRepresentation() &&
19532 E
->getArg(2)->getType()->hasUnsignedIntegerRepresentation()) &&
19533 "asuint operands types mismatch");
19534 return handleHlslSplitdouble(E
, this);
19536 case Builtin::BI__builtin_hlsl_elementwise_clip
:
19537 assert(E
->getArg(0)->getType()->hasFloatingRepresentation() &&
19538 "clip operands types mismatch");
19539 return handleHlslClip(E
, this);
19540 case Builtin::BI__builtin_hlsl_group_memory_barrier_with_group_sync
: {
19542 CGM
.getHLSLRuntime().getGroupMemoryBarrierWithGroupSyncIntrinsic();
19543 return EmitRuntimeCall(
19544 Intrinsic::getOrInsertDeclaration(&CGM
.getModule(), ID
));
19550 void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction
*Inst
,
19551 const CallExpr
*E
) {
19552 constexpr const char *Tag
= "amdgpu-as";
19554 LLVMContext
&Ctx
= Inst
->getContext();
19555 SmallVector
<MMRAMetadata::TagT
, 3> MMRAs
;
19556 for (unsigned K
= 2; K
< E
->getNumArgs(); ++K
) {
19557 llvm::Value
*V
= EmitScalarExpr(E
->getArg(K
));
19559 if (llvm::getConstantStringInfo(V
, AS
)) {
19560 MMRAs
.push_back({Tag
, AS
});
19561 // TODO: Delete the resulting unused constant?
19564 CGM
.Error(E
->getExprLoc(),
19565 "expected an address space name as a string literal");
19569 MMRAs
.erase(llvm::unique(MMRAs
), MMRAs
.end());
19570 Inst
->setMetadata(LLVMContext::MD_mmra
, MMRAMetadata::getMD(Ctx
, MMRAs
));
19573 Value
*CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID
,
19574 const CallExpr
*E
) {
19575 llvm::AtomicOrdering AO
= llvm::AtomicOrdering::SequentiallyConsistent
;
19576 llvm::SyncScope::ID SSID
;
19577 switch (BuiltinID
) {
19578 case AMDGPU::BI__builtin_amdgcn_div_scale
:
19579 case AMDGPU::BI__builtin_amdgcn_div_scalef
: {
19580 // Translate from the intrinsics's struct return to the builtin's out
19583 Address FlagOutPtr
= EmitPointerWithAlignment(E
->getArg(3));
19585 llvm::Value
*X
= EmitScalarExpr(E
->getArg(0));
19586 llvm::Value
*Y
= EmitScalarExpr(E
->getArg(1));
19587 llvm::Value
*Z
= EmitScalarExpr(E
->getArg(2));
19589 llvm::Function
*Callee
= CGM
.getIntrinsic(Intrinsic::amdgcn_div_scale
,
19592 llvm::Value
*Tmp
= Builder
.CreateCall(Callee
, {X
, Y
, Z
});
19594 llvm::Value
*Result
= Builder
.CreateExtractValue(Tmp
, 0);
19595 llvm::Value
*Flag
= Builder
.CreateExtractValue(Tmp
, 1);
19597 llvm::Type
*RealFlagType
= FlagOutPtr
.getElementType();
19599 llvm::Value
*FlagExt
= Builder
.CreateZExt(Flag
, RealFlagType
);
19600 Builder
.CreateStore(FlagExt
, FlagOutPtr
);
19603 case AMDGPU::BI__builtin_amdgcn_div_fmas
:
19604 case AMDGPU::BI__builtin_amdgcn_div_fmasf
: {
19605 llvm::Value
*Src0
= EmitScalarExpr(E
->getArg(0));
19606 llvm::Value
*Src1
= EmitScalarExpr(E
->getArg(1));
19607 llvm::Value
*Src2
= EmitScalarExpr(E
->getArg(2));
19608 llvm::Value
*Src3
= EmitScalarExpr(E
->getArg(3));
19610 llvm::Function
*F
= CGM
.getIntrinsic(Intrinsic::amdgcn_div_fmas
,
19612 llvm::Value
*Src3ToBool
= Builder
.CreateIsNotNull(Src3
);
19613 return Builder
.CreateCall(F
, {Src0
, Src1
, Src2
, Src3ToBool
});
19616 case AMDGPU::BI__builtin_amdgcn_ds_swizzle
:
19617 return emitBuiltinWithOneOverloadedType
<2>(*this, E
,
19618 Intrinsic::amdgcn_ds_swizzle
);
19619 case AMDGPU::BI__builtin_amdgcn_mov_dpp8
:
19620 case AMDGPU::BI__builtin_amdgcn_mov_dpp
:
19621 case AMDGPU::BI__builtin_amdgcn_update_dpp
: {
19622 llvm::SmallVector
<llvm::Value
*, 6> Args
;
19623 // Find out if any arguments are required to be integer constant
19625 unsigned ICEArguments
= 0;
19626 ASTContext::GetBuiltinTypeError Error
;
19627 getContext().GetBuiltinType(BuiltinID
, Error
, &ICEArguments
);
19628 assert(Error
== ASTContext::GE_None
&& "Should not codegen an error");
19629 llvm::Type
*DataTy
= ConvertType(E
->getArg(0)->getType());
19630 unsigned Size
= DataTy
->getPrimitiveSizeInBits();
19631 llvm::Type
*IntTy
=
19632 llvm::IntegerType::get(Builder
.getContext(), std::max(Size
, 32u));
19634 CGM
.getIntrinsic(BuiltinID
== AMDGPU::BI__builtin_amdgcn_mov_dpp8
19635 ? Intrinsic::amdgcn_mov_dpp8
19636 : Intrinsic::amdgcn_update_dpp
,
19638 assert(E
->getNumArgs() == 5 || E
->getNumArgs() == 6 ||
19639 E
->getNumArgs() == 2);
19640 bool InsertOld
= BuiltinID
== AMDGPU::BI__builtin_amdgcn_mov_dpp
;
19642 Args
.push_back(llvm::PoisonValue::get(IntTy
));
19643 for (unsigned I
= 0; I
!= E
->getNumArgs(); ++I
) {
19644 llvm::Value
*V
= EmitScalarOrConstFoldImmArg(ICEArguments
, I
, E
);
19645 if (I
< (BuiltinID
== AMDGPU::BI__builtin_amdgcn_update_dpp
? 2u : 1u) &&
19647 if (!DataTy
->isIntegerTy())
19648 V
= Builder
.CreateBitCast(
19649 V
, llvm::IntegerType::get(Builder
.getContext(), Size
));
19650 V
= Builder
.CreateZExtOrBitCast(V
, IntTy
);
19652 llvm::Type
*ExpTy
=
19653 F
->getFunctionType()->getFunctionParamType(I
+ InsertOld
);
19654 Args
.push_back(Builder
.CreateTruncOrBitCast(V
, ExpTy
));
19656 Value
*V
= Builder
.CreateCall(F
, Args
);
19657 if (Size
< 32 && !DataTy
->isIntegerTy())
19658 V
= Builder
.CreateTrunc(
19659 V
, llvm::IntegerType::get(Builder
.getContext(), Size
));
19660 return Builder
.CreateTruncOrBitCast(V
, DataTy
);
19662 case AMDGPU::BI__builtin_amdgcn_permlane16
:
19663 case AMDGPU::BI__builtin_amdgcn_permlanex16
:
19664 return emitBuiltinWithOneOverloadedType
<6>(
19666 BuiltinID
== AMDGPU::BI__builtin_amdgcn_permlane16
19667 ? Intrinsic::amdgcn_permlane16
19668 : Intrinsic::amdgcn_permlanex16
);
19669 case AMDGPU::BI__builtin_amdgcn_permlane64
:
19670 return emitBuiltinWithOneOverloadedType
<1>(*this, E
,
19671 Intrinsic::amdgcn_permlane64
);
19672 case AMDGPU::BI__builtin_amdgcn_readlane
:
19673 return emitBuiltinWithOneOverloadedType
<2>(*this, E
,
19674 Intrinsic::amdgcn_readlane
);
19675 case AMDGPU::BI__builtin_amdgcn_readfirstlane
:
19676 return emitBuiltinWithOneOverloadedType
<1>(*this, E
,
19677 Intrinsic::amdgcn_readfirstlane
);
19678 case AMDGPU::BI__builtin_amdgcn_div_fixup
:
19679 case AMDGPU::BI__builtin_amdgcn_div_fixupf
:
19680 case AMDGPU::BI__builtin_amdgcn_div_fixuph
:
19681 return emitBuiltinWithOneOverloadedType
<3>(*this, E
,
19682 Intrinsic::amdgcn_div_fixup
);
19683 case AMDGPU::BI__builtin_amdgcn_trig_preop
:
19684 case AMDGPU::BI__builtin_amdgcn_trig_preopf
:
19685 return emitFPIntBuiltin(*this, E
, Intrinsic::amdgcn_trig_preop
);
19686 case AMDGPU::BI__builtin_amdgcn_rcp
:
19687 case AMDGPU::BI__builtin_amdgcn_rcpf
:
19688 case AMDGPU::BI__builtin_amdgcn_rcph
:
19689 return emitBuiltinWithOneOverloadedType
<1>(*this, E
, Intrinsic::amdgcn_rcp
);
19690 case AMDGPU::BI__builtin_amdgcn_sqrt
:
19691 case AMDGPU::BI__builtin_amdgcn_sqrtf
:
19692 case AMDGPU::BI__builtin_amdgcn_sqrth
:
19693 return emitBuiltinWithOneOverloadedType
<1>(*this, E
,
19694 Intrinsic::amdgcn_sqrt
);
19695 case AMDGPU::BI__builtin_amdgcn_rsq
:
19696 case AMDGPU::BI__builtin_amdgcn_rsqf
:
19697 case AMDGPU::BI__builtin_amdgcn_rsqh
:
19698 return emitBuiltinWithOneOverloadedType
<1>(*this, E
, Intrinsic::amdgcn_rsq
);
19699 case AMDGPU::BI__builtin_amdgcn_rsq_clamp
:
19700 case AMDGPU::BI__builtin_amdgcn_rsq_clampf
:
19701 return emitBuiltinWithOneOverloadedType
<1>(*this, E
,
19702 Intrinsic::amdgcn_rsq_clamp
);
19703 case AMDGPU::BI__builtin_amdgcn_sinf
:
19704 case AMDGPU::BI__builtin_amdgcn_sinh
:
19705 return emitBuiltinWithOneOverloadedType
<1>(*this, E
, Intrinsic::amdgcn_sin
);
19706 case AMDGPU::BI__builtin_amdgcn_cosf
:
19707 case AMDGPU::BI__builtin_amdgcn_cosh
:
19708 return emitBuiltinWithOneOverloadedType
<1>(*this, E
, Intrinsic::amdgcn_cos
);
19709 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr
:
19710 return EmitAMDGPUDispatchPtr(*this, E
);
19711 case AMDGPU::BI__builtin_amdgcn_logf
:
19712 return emitBuiltinWithOneOverloadedType
<1>(*this, E
, Intrinsic::amdgcn_log
);
19713 case AMDGPU::BI__builtin_amdgcn_exp2f
:
19714 return emitBuiltinWithOneOverloadedType
<1>(*this, E
,
19715 Intrinsic::amdgcn_exp2
);
19716 case AMDGPU::BI__builtin_amdgcn_log_clampf
:
19717 return emitBuiltinWithOneOverloadedType
<1>(*this, E
,
19718 Intrinsic::amdgcn_log_clamp
);
19719 case AMDGPU::BI__builtin_amdgcn_ldexp
:
19720 case AMDGPU::BI__builtin_amdgcn_ldexpf
: {
19721 llvm::Value
*Src0
= EmitScalarExpr(E
->getArg(0));
19722 llvm::Value
*Src1
= EmitScalarExpr(E
->getArg(1));
19723 llvm::Function
*F
=
19724 CGM
.getIntrinsic(Intrinsic::ldexp
, {Src0
->getType(), Src1
->getType()});
19725 return Builder
.CreateCall(F
, {Src0
, Src1
});
19727 case AMDGPU::BI__builtin_amdgcn_ldexph
: {
19728 // The raw instruction has a different behavior for out of bounds exponent
19729 // values (implicit truncation instead of saturate to short_min/short_max).
19730 llvm::Value
*Src0
= EmitScalarExpr(E
->getArg(0));
19731 llvm::Value
*Src1
= EmitScalarExpr(E
->getArg(1));
19732 llvm::Function
*F
=
19733 CGM
.getIntrinsic(Intrinsic::ldexp
, {Src0
->getType(), Int16Ty
});
19734 return Builder
.CreateCall(F
, {Src0
, Builder
.CreateTrunc(Src1
, Int16Ty
)});
19736 case AMDGPU::BI__builtin_amdgcn_frexp_mant
:
19737 case AMDGPU::BI__builtin_amdgcn_frexp_mantf
:
19738 case AMDGPU::BI__builtin_amdgcn_frexp_manth
:
19739 return emitBuiltinWithOneOverloadedType
<1>(*this, E
,
19740 Intrinsic::amdgcn_frexp_mant
);
19741 case AMDGPU::BI__builtin_amdgcn_frexp_exp
:
19742 case AMDGPU::BI__builtin_amdgcn_frexp_expf
: {
19743 Value
*Src0
= EmitScalarExpr(E
->getArg(0));
19744 Function
*F
= CGM
.getIntrinsic(Intrinsic::amdgcn_frexp_exp
,
19745 { Builder
.getInt32Ty(), Src0
->getType() });
19746 return Builder
.CreateCall(F
, Src0
);
19748 case AMDGPU::BI__builtin_amdgcn_frexp_exph
: {
19749 Value
*Src0
= EmitScalarExpr(E
->getArg(0));
19750 Function
*F
= CGM
.getIntrinsic(Intrinsic::amdgcn_frexp_exp
,
19751 { Builder
.getInt16Ty(), Src0
->getType() });
19752 return Builder
.CreateCall(F
, Src0
);
19754 case AMDGPU::BI__builtin_amdgcn_fract
:
19755 case AMDGPU::BI__builtin_amdgcn_fractf
:
19756 case AMDGPU::BI__builtin_amdgcn_fracth
:
19757 return emitBuiltinWithOneOverloadedType
<1>(*this, E
,
19758 Intrinsic::amdgcn_fract
);
19759 case AMDGPU::BI__builtin_amdgcn_lerp
:
19760 return emitBuiltinWithOneOverloadedType
<3>(*this, E
,
19761 Intrinsic::amdgcn_lerp
);
19762 case AMDGPU::BI__builtin_amdgcn_ubfe
:
19763 return emitBuiltinWithOneOverloadedType
<3>(*this, E
,
19764 Intrinsic::amdgcn_ubfe
);
19765 case AMDGPU::BI__builtin_amdgcn_sbfe
:
19766 return emitBuiltinWithOneOverloadedType
<3>(*this, E
,
19767 Intrinsic::amdgcn_sbfe
);
19768 case AMDGPU::BI__builtin_amdgcn_ballot_w32
:
19769 case AMDGPU::BI__builtin_amdgcn_ballot_w64
: {
19770 llvm::Type
*ResultType
= ConvertType(E
->getType());
19771 llvm::Value
*Src
= EmitScalarExpr(E
->getArg(0));
19772 Function
*F
= CGM
.getIntrinsic(Intrinsic::amdgcn_ballot
, { ResultType
});
19773 return Builder
.CreateCall(F
, { Src
});
19775 case AMDGPU::BI__builtin_amdgcn_uicmp
:
19776 case AMDGPU::BI__builtin_amdgcn_uicmpl
:
19777 case AMDGPU::BI__builtin_amdgcn_sicmp
:
19778 case AMDGPU::BI__builtin_amdgcn_sicmpl
: {
19779 llvm::Value
*Src0
= EmitScalarExpr(E
->getArg(0));
19780 llvm::Value
*Src1
= EmitScalarExpr(E
->getArg(1));
19781 llvm::Value
*Src2
= EmitScalarExpr(E
->getArg(2));
19783 // FIXME-GFX10: How should 32 bit mask be handled?
19784 Function
*F
= CGM
.getIntrinsic(Intrinsic::amdgcn_icmp
,
19785 { Builder
.getInt64Ty(), Src0
->getType() });
19786 return Builder
.CreateCall(F
, { Src0
, Src1
, Src2
});
19788 case AMDGPU::BI__builtin_amdgcn_fcmp
:
19789 case AMDGPU::BI__builtin_amdgcn_fcmpf
: {
19790 llvm::Value
*Src0
= EmitScalarExpr(E
->getArg(0));
19791 llvm::Value
*Src1
= EmitScalarExpr(E
->getArg(1));
19792 llvm::Value
*Src2
= EmitScalarExpr(E
->getArg(2));
19794 // FIXME-GFX10: How should 32 bit mask be handled?
19795 Function
*F
= CGM
.getIntrinsic(Intrinsic::amdgcn_fcmp
,
19796 { Builder
.getInt64Ty(), Src0
->getType() });
19797 return Builder
.CreateCall(F
, { Src0
, Src1
, Src2
});
19799 case AMDGPU::BI__builtin_amdgcn_class
:
19800 case AMDGPU::BI__builtin_amdgcn_classf
:
19801 case AMDGPU::BI__builtin_amdgcn_classh
:
19802 return emitFPIntBuiltin(*this, E
, Intrinsic::amdgcn_class
);
19803 case AMDGPU::BI__builtin_amdgcn_fmed3f
:
19804 case AMDGPU::BI__builtin_amdgcn_fmed3h
:
19805 return emitBuiltinWithOneOverloadedType
<3>(*this, E
,
19806 Intrinsic::amdgcn_fmed3
);
19807 case AMDGPU::BI__builtin_amdgcn_ds_append
:
19808 case AMDGPU::BI__builtin_amdgcn_ds_consume
: {
19809 Intrinsic::ID Intrin
= BuiltinID
== AMDGPU::BI__builtin_amdgcn_ds_append
?
19810 Intrinsic::amdgcn_ds_append
: Intrinsic::amdgcn_ds_consume
;
19811 Value
*Src0
= EmitScalarExpr(E
->getArg(0));
19812 Function
*F
= CGM
.getIntrinsic(Intrin
, { Src0
->getType() });
19813 return Builder
.CreateCall(F
, { Src0
, Builder
.getFalse() });
19815 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32
:
19816 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32
:
19817 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16
:
19818 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16
:
19819 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16
:
19820 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16
:
19821 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16
:
19822 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16
:
19823 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32
:
19824 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32
:
19825 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32
:
19826 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16
:
19827 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16
:
19828 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16
: {
19830 switch (BuiltinID
) {
19831 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32
:
19832 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32
:
19833 IID
= Intrinsic::amdgcn_global_load_tr_b64
;
19835 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16
:
19836 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16
:
19837 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16
:
19838 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16
:
19839 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16
:
19840 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16
:
19841 IID
= Intrinsic::amdgcn_global_load_tr_b128
;
19843 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32
:
19844 IID
= Intrinsic::amdgcn_ds_read_tr4_b64
;
19846 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32
:
19847 IID
= Intrinsic::amdgcn_ds_read_tr8_b64
;
19849 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32
:
19850 IID
= Intrinsic::amdgcn_ds_read_tr6_b96
;
19852 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16
:
19853 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16
:
19854 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16
:
19855 IID
= Intrinsic::amdgcn_ds_read_tr16_b64
;
19858 llvm::Type
*LoadTy
= ConvertType(E
->getType());
19859 llvm::Value
*Addr
= EmitScalarExpr(E
->getArg(0));
19860 llvm::Function
*F
= CGM
.getIntrinsic(IID
, {LoadTy
});
19861 return Builder
.CreateCall(F
, {Addr
});
19863 case AMDGPU::BI__builtin_amdgcn_get_fpenv
: {
19864 Function
*F
= CGM
.getIntrinsic(Intrinsic::get_fpenv
,
19865 {llvm::Type::getInt64Ty(getLLVMContext())});
19866 return Builder
.CreateCall(F
);
19868 case AMDGPU::BI__builtin_amdgcn_set_fpenv
: {
19869 Function
*F
= CGM
.getIntrinsic(Intrinsic::set_fpenv
,
19870 {llvm::Type::getInt64Ty(getLLVMContext())});
19871 llvm::Value
*Env
= EmitScalarExpr(E
->getArg(0));
19872 return Builder
.CreateCall(F
, {Env
});
19874 case AMDGPU::BI__builtin_amdgcn_read_exec
:
19875 return EmitAMDGCNBallotForExec(*this, E
, Int64Ty
, Int64Ty
, false);
19876 case AMDGPU::BI__builtin_amdgcn_read_exec_lo
:
19877 return EmitAMDGCNBallotForExec(*this, E
, Int32Ty
, Int32Ty
, false);
19878 case AMDGPU::BI__builtin_amdgcn_read_exec_hi
:
19879 return EmitAMDGCNBallotForExec(*this, E
, Int64Ty
, Int64Ty
, true);
19880 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray
:
19881 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h
:
19882 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l
:
19883 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh
: {
19884 llvm::Value
*NodePtr
= EmitScalarExpr(E
->getArg(0));
19885 llvm::Value
*RayExtent
= EmitScalarExpr(E
->getArg(1));
19886 llvm::Value
*RayOrigin
= EmitScalarExpr(E
->getArg(2));
19887 llvm::Value
*RayDir
= EmitScalarExpr(E
->getArg(3));
19888 llvm::Value
*RayInverseDir
= EmitScalarExpr(E
->getArg(4));
19889 llvm::Value
*TextureDescr
= EmitScalarExpr(E
->getArg(5));
19891 // The builtins take these arguments as vec4 where the last element is
19892 // ignored. The intrinsic takes them as vec3.
19893 RayOrigin
= Builder
.CreateShuffleVector(RayOrigin
, RayOrigin
,
19894 ArrayRef
<int>{0, 1, 2});
19896 Builder
.CreateShuffleVector(RayDir
, RayDir
, ArrayRef
<int>{0, 1, 2});
19897 RayInverseDir
= Builder
.CreateShuffleVector(RayInverseDir
, RayInverseDir
,
19898 ArrayRef
<int>{0, 1, 2});
19900 Function
*F
= CGM
.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray
,
19901 {NodePtr
->getType(), RayDir
->getType()});
19902 return Builder
.CreateCall(F
, {NodePtr
, RayExtent
, RayOrigin
, RayDir
,
19903 RayInverseDir
, TextureDescr
});
19906 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn
: {
19907 SmallVector
<Value
*, 4> Args
;
19908 for (int i
= 0, e
= E
->getNumArgs(); i
!= e
; ++i
)
19909 Args
.push_back(EmitScalarExpr(E
->getArg(i
)));
19911 Function
*F
= CGM
.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn
);
19912 Value
*Call
= Builder
.CreateCall(F
, Args
);
19913 Value
*Rtn
= Builder
.CreateExtractValue(Call
, 0);
19914 Value
*A
= Builder
.CreateExtractValue(Call
, 1);
19915 llvm::Type
*RetTy
= ConvertType(E
->getType());
19916 Value
*I0
= Builder
.CreateInsertElement(PoisonValue::get(RetTy
), Rtn
,
19918 return Builder
.CreateInsertElement(I0
, A
, 1);
19920 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4
:
19921 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4
: {
19922 llvm::FixedVectorType
*VT
= FixedVectorType::get(Builder
.getInt32Ty(), 8);
19923 Function
*F
= CGM
.getIntrinsic(
19924 BuiltinID
== AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4
19925 ? Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4
19926 : Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4
,
19929 SmallVector
<Value
*, 9> Args
;
19930 for (unsigned I
= 0, N
= E
->getNumArgs(); I
!= N
; ++I
)
19931 Args
.push_back(EmitScalarExpr(E
->getArg(I
)));
19932 return Builder
.CreateCall(F
, Args
);
19934 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32
:
19935 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32
:
19936 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64
:
19937 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64
:
19938 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32
:
19939 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32
:
19940 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64
:
19941 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64
:
19942 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32
:
19943 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64
:
19944 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32
:
19945 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64
:
19946 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32
:
19947 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64
:
19948 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32
:
19949 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64
:
19950 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12
:
19951 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12
:
19952 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12
:
19953 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12
:
19954 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12
:
19955 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12
:
19956 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12
:
19957 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12
:
19958 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12
:
19959 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12
:
19960 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12
:
19961 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12
:
19962 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12
:
19963 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12
:
19964 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12
:
19965 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12
:
19966 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12
:
19967 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12
:
19968 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12
:
19969 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12
:
19970 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12
:
19971 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12
:
19972 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32
:
19973 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64
:
19974 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32
:
19975 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64
:
19976 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32
:
19977 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64
:
19978 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32
:
19979 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64
:
19980 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32
:
19981 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64
:
19982 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32
:
19983 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64
:
19984 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32
:
19985 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64
:
19986 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32
:
19987 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64
:
19988 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32
:
19989 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64
:
19990 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32
:
19991 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64
:
19992 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32
:
19993 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64
: {
19995 // These operations perform a matrix multiplication and accumulation of
19998 // We need to specify one type for matrices AB and one for matrices CD.
19999 // Sparse matrix operations can have different types for A and B as well as
20000 // an additional type for sparsity index.
20001 // Destination type should be put before types used for source operands.
20002 SmallVector
<unsigned, 2> ArgsForMatchingMatrixTypes
;
20003 // On GFX12, the intrinsics with 16-bit accumulator use a packed layout.
20004 // There is no need for the variable opsel argument, so always set it to
20006 bool AppendFalseForOpselArg
= false;
20007 unsigned BuiltinWMMAOp
;
20009 switch (BuiltinID
) {
20010 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32
:
20011 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64
:
20012 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12
:
20013 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12
:
20014 ArgsForMatchingMatrixTypes
= {2, 0}; // CD, AB
20015 BuiltinWMMAOp
= Intrinsic::amdgcn_wmma_f32_16x16x16_f16
;
20017 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32
:
20018 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64
:
20019 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12
:
20020 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12
:
20021 ArgsForMatchingMatrixTypes
= {2, 0}; // CD, AB
20022 BuiltinWMMAOp
= Intrinsic::amdgcn_wmma_f32_16x16x16_bf16
;
20024 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12
:
20025 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12
:
20026 AppendFalseForOpselArg
= true;
20028 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32
:
20029 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64
:
20030 ArgsForMatchingMatrixTypes
= {2, 0}; // CD, AB
20031 BuiltinWMMAOp
= Intrinsic::amdgcn_wmma_f16_16x16x16_f16
;
20033 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12
:
20034 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12
:
20035 AppendFalseForOpselArg
= true;
20037 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32
:
20038 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64
:
20039 ArgsForMatchingMatrixTypes
= {2, 0}; // CD, AB
20040 BuiltinWMMAOp
= Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16
;
20042 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32
:
20043 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64
:
20044 ArgsForMatchingMatrixTypes
= {2, 0}; // CD, AB
20045 BuiltinWMMAOp
= Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied
;
20047 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32
:
20048 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64
:
20049 ArgsForMatchingMatrixTypes
= {2, 0}; // CD, AB
20050 BuiltinWMMAOp
= Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied
;
20052 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32
:
20053 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64
:
20054 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12
:
20055 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12
:
20056 ArgsForMatchingMatrixTypes
= {4, 1}; // CD, AB
20057 BuiltinWMMAOp
= Intrinsic::amdgcn_wmma_i32_16x16x16_iu8
;
20059 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32
:
20060 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64
:
20061 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12
:
20062 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12
:
20063 ArgsForMatchingMatrixTypes
= {4, 1}; // CD, AB
20064 BuiltinWMMAOp
= Intrinsic::amdgcn_wmma_i32_16x16x16_iu4
;
20066 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12
:
20067 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12
:
20068 ArgsForMatchingMatrixTypes
= {2, 0}; // CD, AB
20069 BuiltinWMMAOp
= Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8
;
20071 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12
:
20072 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12
:
20073 ArgsForMatchingMatrixTypes
= {2, 0}; // CD, AB
20074 BuiltinWMMAOp
= Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8
;
20076 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12
:
20077 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12
:
20078 ArgsForMatchingMatrixTypes
= {2, 0}; // CD, AB
20079 BuiltinWMMAOp
= Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8
;
20081 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12
:
20082 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12
:
20083 ArgsForMatchingMatrixTypes
= {2, 0}; // CD, AB
20084 BuiltinWMMAOp
= Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8
;
20086 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12
:
20087 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12
:
20088 ArgsForMatchingMatrixTypes
= {4, 1}; // CD, AB
20089 BuiltinWMMAOp
= Intrinsic::amdgcn_wmma_i32_16x16x32_iu4
;
20091 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32
:
20092 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64
:
20093 ArgsForMatchingMatrixTypes
= {2, 0, 1, 3}; // CD, A, B, Index
20094 BuiltinWMMAOp
= Intrinsic::amdgcn_swmmac_f32_16x16x32_f16
;
20096 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32
:
20097 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64
:
20098 ArgsForMatchingMatrixTypes
= {2, 0, 1, 3}; // CD, A, B, Index
20099 BuiltinWMMAOp
= Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16
;
20101 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32
:
20102 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64
:
20103 ArgsForMatchingMatrixTypes
= {2, 0, 1, 3}; // CD, A, B, Index
20104 BuiltinWMMAOp
= Intrinsic::amdgcn_swmmac_f16_16x16x32_f16
;
20106 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32
:
20107 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64
:
20108 ArgsForMatchingMatrixTypes
= {2, 0, 1, 3}; // CD, A, B, Index
20109 BuiltinWMMAOp
= Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16
;
20111 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32
:
20112 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64
:
20113 ArgsForMatchingMatrixTypes
= {4, 1, 3, 5}; // CD, A, B, Index
20114 BuiltinWMMAOp
= Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8
;
20116 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32
:
20117 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64
:
20118 ArgsForMatchingMatrixTypes
= {4, 1, 3, 5}; // CD, A, B, Index
20119 BuiltinWMMAOp
= Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4
;
20121 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32
:
20122 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64
:
20123 ArgsForMatchingMatrixTypes
= {4, 1, 3, 5}; // CD, A, B, Index
20124 BuiltinWMMAOp
= Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4
;
20126 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32
:
20127 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64
:
20128 ArgsForMatchingMatrixTypes
= {2, 0, 1, 3}; // CD, A, B, Index
20129 BuiltinWMMAOp
= Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8
;
20131 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32
:
20132 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64
:
20133 ArgsForMatchingMatrixTypes
= {2, 0, 1, 3}; // CD, A, B, Index
20134 BuiltinWMMAOp
= Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8
;
20136 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32
:
20137 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64
:
20138 ArgsForMatchingMatrixTypes
= {2, 0, 1, 3}; // CD, A, B, Index
20139 BuiltinWMMAOp
= Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8
;
20141 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32
:
20142 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64
:
20143 ArgsForMatchingMatrixTypes
= {2, 0, 1, 3}; // CD, A, B, Index
20144 BuiltinWMMAOp
= Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8
;
20148 SmallVector
<Value
*, 6> Args
;
20149 for (int i
= 0, e
= E
->getNumArgs(); i
!= e
; ++i
)
20150 Args
.push_back(EmitScalarExpr(E
->getArg(i
)));
20151 if (AppendFalseForOpselArg
)
20152 Args
.push_back(Builder
.getFalse());
20154 SmallVector
<llvm::Type
*, 6> ArgTypes
;
20155 for (auto ArgIdx
: ArgsForMatchingMatrixTypes
)
20156 ArgTypes
.push_back(Args
[ArgIdx
]->getType());
20158 Function
*F
= CGM
.getIntrinsic(BuiltinWMMAOp
, ArgTypes
);
20159 return Builder
.CreateCall(F
, Args
);
20163 case AMDGPU::BI__builtin_amdgcn_workitem_id_x
:
20164 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x
, 0, 1024);
20165 case AMDGPU::BI__builtin_amdgcn_workitem_id_y
:
20166 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y
, 0, 1024);
20167 case AMDGPU::BI__builtin_amdgcn_workitem_id_z
:
20168 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z
, 0, 1024);
20170 // amdgcn workgroup size
20171 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x
:
20172 return EmitAMDGPUWorkGroupSize(*this, 0);
20173 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y
:
20174 return EmitAMDGPUWorkGroupSize(*this, 1);
20175 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z
:
20176 return EmitAMDGPUWorkGroupSize(*this, 2);
20178 // amdgcn grid size
20179 case AMDGPU::BI__builtin_amdgcn_grid_size_x
:
20180 return EmitAMDGPUGridSize(*this, 0);
20181 case AMDGPU::BI__builtin_amdgcn_grid_size_y
:
20182 return EmitAMDGPUGridSize(*this, 1);
20183 case AMDGPU::BI__builtin_amdgcn_grid_size_z
:
20184 return EmitAMDGPUGridSize(*this, 2);
20187 case AMDGPU::BI__builtin_r600_recipsqrt_ieee
:
20188 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef
:
20189 return emitBuiltinWithOneOverloadedType
<1>(*this, E
,
20190 Intrinsic::r600_recipsqrt_ieee
);
20191 case AMDGPU::BI__builtin_r600_read_tidig_x
:
20192 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x
, 0, 1024);
20193 case AMDGPU::BI__builtin_r600_read_tidig_y
:
20194 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y
, 0, 1024);
20195 case AMDGPU::BI__builtin_r600_read_tidig_z
:
20196 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z
, 0, 1024);
20197 case AMDGPU::BI__builtin_amdgcn_alignbit
: {
20198 llvm::Value
*Src0
= EmitScalarExpr(E
->getArg(0));
20199 llvm::Value
*Src1
= EmitScalarExpr(E
->getArg(1));
20200 llvm::Value
*Src2
= EmitScalarExpr(E
->getArg(2));
20201 Function
*F
= CGM
.getIntrinsic(Intrinsic::fshr
, Src0
->getType());
20202 return Builder
.CreateCall(F
, { Src0
, Src1
, Src2
});
20204 case AMDGPU::BI__builtin_amdgcn_fence
: {
20205 ProcessOrderScopeAMDGCN(EmitScalarExpr(E
->getArg(0)),
20206 EmitScalarExpr(E
->getArg(1)), AO
, SSID
);
20207 FenceInst
*Fence
= Builder
.CreateFence(AO
, SSID
);
20208 if (E
->getNumArgs() > 2)
20209 AddAMDGPUFenceAddressSpaceMMRA(Fence
, E
);
20212 case AMDGPU::BI__builtin_amdgcn_atomic_inc32
:
20213 case AMDGPU::BI__builtin_amdgcn_atomic_inc64
:
20214 case AMDGPU::BI__builtin_amdgcn_atomic_dec32
:
20215 case AMDGPU::BI__builtin_amdgcn_atomic_dec64
:
20216 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64
:
20217 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32
:
20218 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16
:
20219 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16
:
20220 case AMDGPU::BI__builtin_amdgcn_ds_faddf
:
20221 case AMDGPU::BI__builtin_amdgcn_ds_fminf
:
20222 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf
:
20223 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32
:
20224 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64
:
20225 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16
:
20226 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16
:
20227 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32
:
20228 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64
:
20229 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16
:
20230 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16
:
20231 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64
:
20232 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64
:
20233 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64
:
20234 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64
: {
20235 llvm::AtomicRMWInst::BinOp BinOp
;
20236 switch (BuiltinID
) {
20237 case AMDGPU::BI__builtin_amdgcn_atomic_inc32
:
20238 case AMDGPU::BI__builtin_amdgcn_atomic_inc64
:
20239 BinOp
= llvm::AtomicRMWInst::UIncWrap
;
20241 case AMDGPU::BI__builtin_amdgcn_atomic_dec32
:
20242 case AMDGPU::BI__builtin_amdgcn_atomic_dec64
:
20243 BinOp
= llvm::AtomicRMWInst::UDecWrap
;
20245 case AMDGPU::BI__builtin_amdgcn_ds_faddf
:
20246 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64
:
20247 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32
:
20248 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16
:
20249 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16
:
20250 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32
:
20251 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64
:
20252 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16
:
20253 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16
:
20254 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32
:
20255 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64
:
20256 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16
:
20257 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16
:
20258 BinOp
= llvm::AtomicRMWInst::FAdd
;
20260 case AMDGPU::BI__builtin_amdgcn_ds_fminf
:
20261 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64
:
20262 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64
:
20263 BinOp
= llvm::AtomicRMWInst::FMin
;
20265 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64
:
20266 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64
:
20267 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf
:
20268 BinOp
= llvm::AtomicRMWInst::FMax
;
20272 Address Ptr
= CheckAtomicAlignment(*this, E
);
20273 Value
*Val
= EmitScalarExpr(E
->getArg(1));
20274 llvm::Type
*OrigTy
= Val
->getType();
20275 QualType PtrTy
= E
->getArg(0)->IgnoreImpCasts()->getType();
20279 if (BuiltinID
== AMDGPU::BI__builtin_amdgcn_ds_faddf
||
20280 BuiltinID
== AMDGPU::BI__builtin_amdgcn_ds_fminf
||
20281 BuiltinID
== AMDGPU::BI__builtin_amdgcn_ds_fmaxf
) {
20282 // __builtin_amdgcn_ds_faddf/fminf/fmaxf has an explicit volatile argument
20284 cast
<ConstantInt
>(EmitScalarExpr(E
->getArg(4)))->getZExtValue();
20286 // Infer volatile from the passed type.
20288 PtrTy
->castAs
<PointerType
>()->getPointeeType().isVolatileQualified();
20291 if (E
->getNumArgs() >= 4) {
20292 // Some of the builtins have explicit ordering and scope arguments.
20293 ProcessOrderScopeAMDGCN(EmitScalarExpr(E
->getArg(2)),
20294 EmitScalarExpr(E
->getArg(3)), AO
, SSID
);
20296 // Most of the builtins do not have syncscope/order arguments. For DS
20297 // atomics the scope doesn't really matter, as they implicitly operate at
20298 // workgroup scope.
20300 // The global/flat cases need to use agent scope to consistently produce
20301 // the native instruction instead of a cmpxchg expansion.
20302 SSID
= getLLVMContext().getOrInsertSyncScopeID("agent");
20303 AO
= AtomicOrdering::Monotonic
;
20305 // The v2bf16 builtin uses i16 instead of a natural bfloat type.
20306 if (BuiltinID
== AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16
||
20307 BuiltinID
== AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16
||
20308 BuiltinID
== AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16
) {
20309 llvm::Type
*V2BF16Ty
= FixedVectorType::get(
20310 llvm::Type::getBFloatTy(Builder
.getContext()), 2);
20311 Val
= Builder
.CreateBitCast(Val
, V2BF16Ty
);
20315 llvm::AtomicRMWInst
*RMW
=
20316 Builder
.CreateAtomicRMW(BinOp
, Ptr
, Val
, AO
, SSID
);
20318 RMW
->setVolatile(true);
20320 unsigned AddrSpace
= Ptr
.getType()->getAddressSpace();
20321 if (AddrSpace
!= llvm::AMDGPUAS::LOCAL_ADDRESS
) {
20322 // Most targets require "amdgpu.no.fine.grained.memory" to emit the native
20323 // instruction for flat and global operations.
20324 llvm::MDTuple
*EmptyMD
= MDNode::get(getLLVMContext(), {});
20325 RMW
->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD
);
20327 // Most targets require "amdgpu.ignore.denormal.mode" to emit the native
20328 // instruction, but this only matters for float fadd.
20329 if (BinOp
== llvm::AtomicRMWInst::FAdd
&& Val
->getType()->isFloatTy())
20330 RMW
->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD
);
20333 return Builder
.CreateBitCast(RMW
, OrigTy
);
20335 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn
:
20336 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl
: {
20337 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
20338 llvm::Type
*ResultType
= ConvertType(E
->getType());
20339 // s_sendmsg_rtn is mangled using return type only.
20341 CGM
.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn
, {ResultType
});
20342 return Builder
.CreateCall(F
, {Arg
});
20344 case AMDGPU::BI__builtin_amdgcn_permlane16_swap
:
20345 case AMDGPU::BI__builtin_amdgcn_permlane32_swap
: {
20346 // Because builtin types are limited, and the intrinsic uses a struct/pair
20347 // output, marshal the pair-of-i32 to <2 x i32>.
20348 Value
*VDstOld
= EmitScalarExpr(E
->getArg(0));
20349 Value
*VSrcOld
= EmitScalarExpr(E
->getArg(1));
20350 Value
*FI
= EmitScalarExpr(E
->getArg(2));
20351 Value
*BoundCtrl
= EmitScalarExpr(E
->getArg(3));
20353 CGM
.getIntrinsic(BuiltinID
== AMDGPU::BI__builtin_amdgcn_permlane16_swap
20354 ? Intrinsic::amdgcn_permlane16_swap
20355 : Intrinsic::amdgcn_permlane32_swap
);
20356 llvm::CallInst
*Call
=
20357 Builder
.CreateCall(F
, {VDstOld
, VSrcOld
, FI
, BoundCtrl
});
20359 llvm::Value
*Elt0
= Builder
.CreateExtractValue(Call
, 0);
20360 llvm::Value
*Elt1
= Builder
.CreateExtractValue(Call
, 1);
20362 llvm::Type
*ResultType
= ConvertType(E
->getType());
20364 llvm::Value
*Insert0
= Builder
.CreateInsertElement(
20365 llvm::PoisonValue::get(ResultType
), Elt0
, UINT64_C(0));
20366 llvm::Value
*AsVector
=
20367 Builder
.CreateInsertElement(Insert0
, Elt1
, UINT64_C(1));
20370 case AMDGPU::BI__builtin_amdgcn_bitop3_b32
:
20371 case AMDGPU::BI__builtin_amdgcn_bitop3_b16
:
20372 return emitQuaternaryBuiltin(*this, E
, Intrinsic::amdgcn_bitop3
);
20373 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc
:
20374 return emitBuiltinWithOneOverloadedType
<4>(
20375 *this, E
, Intrinsic::amdgcn_make_buffer_rsrc
);
20376 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8
:
20377 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16
:
20378 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32
:
20379 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64
:
20380 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96
:
20381 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128
:
20382 return emitBuiltinWithOneOverloadedType
<5>(
20383 *this, E
, Intrinsic::amdgcn_raw_ptr_buffer_store
);
20384 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8
:
20385 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16
:
20386 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32
:
20387 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64
:
20388 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96
:
20389 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128
: {
20390 llvm::Type
*RetTy
= nullptr;
20391 switch (BuiltinID
) {
20392 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8
:
20395 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16
:
20398 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32
:
20401 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64
:
20402 RetTy
= llvm::FixedVectorType::get(Int32Ty
, /*NumElements=*/2);
20404 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96
:
20405 RetTy
= llvm::FixedVectorType::get(Int32Ty
, /*NumElements=*/3);
20407 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128
:
20408 RetTy
= llvm::FixedVectorType::get(Int32Ty
, /*NumElements=*/4);
20412 CGM
.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load
, RetTy
);
20413 return Builder
.CreateCall(
20414 F
, {EmitScalarExpr(E
->getArg(0)), EmitScalarExpr(E
->getArg(1)),
20415 EmitScalarExpr(E
->getArg(2)), EmitScalarExpr(E
->getArg(3))});
20417 case AMDGPU::BI__builtin_amdgcn_s_prefetch_data
:
20418 return emitBuiltinWithOneOverloadedType
<2>(
20419 *this, E
, Intrinsic::amdgcn_s_prefetch_data
);
20425 /// Handle a SystemZ function in which the final argument is a pointer
20426 /// to an int that receives the post-instruction CC value. At the LLVM level
20427 /// this is represented as a function that returns a {result, cc} pair.
20428 static Value
*EmitSystemZIntrinsicWithCC(CodeGenFunction
&CGF
,
20429 unsigned IntrinsicID
,
20430 const CallExpr
*E
) {
20431 unsigned NumArgs
= E
->getNumArgs() - 1;
20432 SmallVector
<Value
*, 8> Args(NumArgs
);
20433 for (unsigned I
= 0; I
< NumArgs
; ++I
)
20434 Args
[I
] = CGF
.EmitScalarExpr(E
->getArg(I
));
20435 Address CCPtr
= CGF
.EmitPointerWithAlignment(E
->getArg(NumArgs
));
20436 Function
*F
= CGF
.CGM
.getIntrinsic(IntrinsicID
);
20437 Value
*Call
= CGF
.Builder
.CreateCall(F
, Args
);
20438 Value
*CC
= CGF
.Builder
.CreateExtractValue(Call
, 1);
20439 CGF
.Builder
.CreateStore(CC
, CCPtr
);
20440 return CGF
.Builder
.CreateExtractValue(Call
, 0);
20443 Value
*CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID
,
20444 const CallExpr
*E
) {
20445 switch (BuiltinID
) {
20446 case SystemZ::BI__builtin_tbegin
: {
20447 Value
*TDB
= EmitScalarExpr(E
->getArg(0));
20448 Value
*Control
= llvm::ConstantInt::get(Int32Ty
, 0xff0c);
20449 Function
*F
= CGM
.getIntrinsic(Intrinsic::s390_tbegin
);
20450 return Builder
.CreateCall(F
, {TDB
, Control
});
20452 case SystemZ::BI__builtin_tbegin_nofloat
: {
20453 Value
*TDB
= EmitScalarExpr(E
->getArg(0));
20454 Value
*Control
= llvm::ConstantInt::get(Int32Ty
, 0xff0c);
20455 Function
*F
= CGM
.getIntrinsic(Intrinsic::s390_tbegin_nofloat
);
20456 return Builder
.CreateCall(F
, {TDB
, Control
});
20458 case SystemZ::BI__builtin_tbeginc
: {
20459 Value
*TDB
= llvm::ConstantPointerNull::get(Int8PtrTy
);
20460 Value
*Control
= llvm::ConstantInt::get(Int32Ty
, 0xff08);
20461 Function
*F
= CGM
.getIntrinsic(Intrinsic::s390_tbeginc
);
20462 return Builder
.CreateCall(F
, {TDB
, Control
});
20464 case SystemZ::BI__builtin_tabort
: {
20465 Value
*Data
= EmitScalarExpr(E
->getArg(0));
20466 Function
*F
= CGM
.getIntrinsic(Intrinsic::s390_tabort
);
20467 return Builder
.CreateCall(F
, Builder
.CreateSExt(Data
, Int64Ty
, "tabort"));
20469 case SystemZ::BI__builtin_non_tx_store
: {
20470 Value
*Address
= EmitScalarExpr(E
->getArg(0));
20471 Value
*Data
= EmitScalarExpr(E
->getArg(1));
20472 Function
*F
= CGM
.getIntrinsic(Intrinsic::s390_ntstg
);
20473 return Builder
.CreateCall(F
, {Data
, Address
});
20476 // Vector builtins. Note that most vector builtins are mapped automatically
20477 // to target-specific LLVM intrinsics. The ones handled specially here can
20478 // be represented via standard LLVM IR, which is preferable to enable common
20479 // LLVM optimizations.
20481 case SystemZ::BI__builtin_s390_vclzb
:
20482 case SystemZ::BI__builtin_s390_vclzh
:
20483 case SystemZ::BI__builtin_s390_vclzf
:
20484 case SystemZ::BI__builtin_s390_vclzg
: {
20485 llvm::Type
*ResultType
= ConvertType(E
->getType());
20486 Value
*X
= EmitScalarExpr(E
->getArg(0));
20487 Value
*Undef
= ConstantInt::get(Builder
.getInt1Ty(), false);
20488 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctlz
, ResultType
);
20489 return Builder
.CreateCall(F
, {X
, Undef
});
20492 case SystemZ::BI__builtin_s390_vctzb
:
20493 case SystemZ::BI__builtin_s390_vctzh
:
20494 case SystemZ::BI__builtin_s390_vctzf
:
20495 case SystemZ::BI__builtin_s390_vctzg
: {
20496 llvm::Type
*ResultType
= ConvertType(E
->getType());
20497 Value
*X
= EmitScalarExpr(E
->getArg(0));
20498 Value
*Undef
= ConstantInt::get(Builder
.getInt1Ty(), false);
20499 Function
*F
= CGM
.getIntrinsic(Intrinsic::cttz
, ResultType
);
20500 return Builder
.CreateCall(F
, {X
, Undef
});
20503 case SystemZ::BI__builtin_s390_verllb
:
20504 case SystemZ::BI__builtin_s390_verllh
:
20505 case SystemZ::BI__builtin_s390_verllf
:
20506 case SystemZ::BI__builtin_s390_verllg
: {
20507 llvm::Type
*ResultType
= ConvertType(E
->getType());
20508 llvm::Value
*Src
= EmitScalarExpr(E
->getArg(0));
20509 llvm::Value
*Amt
= EmitScalarExpr(E
->getArg(1));
20510 // Splat scalar rotate amount to vector type.
20511 unsigned NumElts
= cast
<llvm::FixedVectorType
>(ResultType
)->getNumElements();
20512 Amt
= Builder
.CreateIntCast(Amt
, ResultType
->getScalarType(), false);
20513 Amt
= Builder
.CreateVectorSplat(NumElts
, Amt
);
20514 Function
*F
= CGM
.getIntrinsic(Intrinsic::fshl
, ResultType
);
20515 return Builder
.CreateCall(F
, { Src
, Src
, Amt
});
20518 case SystemZ::BI__builtin_s390_verllvb
:
20519 case SystemZ::BI__builtin_s390_verllvh
:
20520 case SystemZ::BI__builtin_s390_verllvf
:
20521 case SystemZ::BI__builtin_s390_verllvg
: {
20522 llvm::Type
*ResultType
= ConvertType(E
->getType());
20523 llvm::Value
*Src
= EmitScalarExpr(E
->getArg(0));
20524 llvm::Value
*Amt
= EmitScalarExpr(E
->getArg(1));
20525 Function
*F
= CGM
.getIntrinsic(Intrinsic::fshl
, ResultType
);
20526 return Builder
.CreateCall(F
, { Src
, Src
, Amt
});
20529 case SystemZ::BI__builtin_s390_vfsqsb
:
20530 case SystemZ::BI__builtin_s390_vfsqdb
: {
20531 llvm::Type
*ResultType
= ConvertType(E
->getType());
20532 Value
*X
= EmitScalarExpr(E
->getArg(0));
20533 if (Builder
.getIsFPConstrained()) {
20534 Function
*F
= CGM
.getIntrinsic(Intrinsic::experimental_constrained_sqrt
, ResultType
);
20535 return Builder
.CreateConstrainedFPCall(F
, { X
});
20537 Function
*F
= CGM
.getIntrinsic(Intrinsic::sqrt
, ResultType
);
20538 return Builder
.CreateCall(F
, X
);
20541 case SystemZ::BI__builtin_s390_vfmasb
:
20542 case SystemZ::BI__builtin_s390_vfmadb
: {
20543 llvm::Type
*ResultType
= ConvertType(E
->getType());
20544 Value
*X
= EmitScalarExpr(E
->getArg(0));
20545 Value
*Y
= EmitScalarExpr(E
->getArg(1));
20546 Value
*Z
= EmitScalarExpr(E
->getArg(2));
20547 if (Builder
.getIsFPConstrained()) {
20548 Function
*F
= CGM
.getIntrinsic(Intrinsic::experimental_constrained_fma
, ResultType
);
20549 return Builder
.CreateConstrainedFPCall(F
, {X
, Y
, Z
});
20551 Function
*F
= CGM
.getIntrinsic(Intrinsic::fma
, ResultType
);
20552 return Builder
.CreateCall(F
, {X
, Y
, Z
});
20555 case SystemZ::BI__builtin_s390_vfmssb
:
20556 case SystemZ::BI__builtin_s390_vfmsdb
: {
20557 llvm::Type
*ResultType
= ConvertType(E
->getType());
20558 Value
*X
= EmitScalarExpr(E
->getArg(0));
20559 Value
*Y
= EmitScalarExpr(E
->getArg(1));
20560 Value
*Z
= EmitScalarExpr(E
->getArg(2));
20561 if (Builder
.getIsFPConstrained()) {
20562 Function
*F
= CGM
.getIntrinsic(Intrinsic::experimental_constrained_fma
, ResultType
);
20563 return Builder
.CreateConstrainedFPCall(F
, {X
, Y
, Builder
.CreateFNeg(Z
, "neg")});
20565 Function
*F
= CGM
.getIntrinsic(Intrinsic::fma
, ResultType
);
20566 return Builder
.CreateCall(F
, {X
, Y
, Builder
.CreateFNeg(Z
, "neg")});
20569 case SystemZ::BI__builtin_s390_vfnmasb
:
20570 case SystemZ::BI__builtin_s390_vfnmadb
: {
20571 llvm::Type
*ResultType
= ConvertType(E
->getType());
20572 Value
*X
= EmitScalarExpr(E
->getArg(0));
20573 Value
*Y
= EmitScalarExpr(E
->getArg(1));
20574 Value
*Z
= EmitScalarExpr(E
->getArg(2));
20575 if (Builder
.getIsFPConstrained()) {
20576 Function
*F
= CGM
.getIntrinsic(Intrinsic::experimental_constrained_fma
, ResultType
);
20577 return Builder
.CreateFNeg(Builder
.CreateConstrainedFPCall(F
, {X
, Y
, Z
}), "neg");
20579 Function
*F
= CGM
.getIntrinsic(Intrinsic::fma
, ResultType
);
20580 return Builder
.CreateFNeg(Builder
.CreateCall(F
, {X
, Y
, Z
}), "neg");
20583 case SystemZ::BI__builtin_s390_vfnmssb
:
20584 case SystemZ::BI__builtin_s390_vfnmsdb
: {
20585 llvm::Type
*ResultType
= ConvertType(E
->getType());
20586 Value
*X
= EmitScalarExpr(E
->getArg(0));
20587 Value
*Y
= EmitScalarExpr(E
->getArg(1));
20588 Value
*Z
= EmitScalarExpr(E
->getArg(2));
20589 if (Builder
.getIsFPConstrained()) {
20590 Function
*F
= CGM
.getIntrinsic(Intrinsic::experimental_constrained_fma
, ResultType
);
20591 Value
*NegZ
= Builder
.CreateFNeg(Z
, "sub");
20592 return Builder
.CreateFNeg(Builder
.CreateConstrainedFPCall(F
, {X
, Y
, NegZ
}));
20594 Function
*F
= CGM
.getIntrinsic(Intrinsic::fma
, ResultType
);
20595 Value
*NegZ
= Builder
.CreateFNeg(Z
, "neg");
20596 return Builder
.CreateFNeg(Builder
.CreateCall(F
, {X
, Y
, NegZ
}));
20599 case SystemZ::BI__builtin_s390_vflpsb
:
20600 case SystemZ::BI__builtin_s390_vflpdb
: {
20601 llvm::Type
*ResultType
= ConvertType(E
->getType());
20602 Value
*X
= EmitScalarExpr(E
->getArg(0));
20603 Function
*F
= CGM
.getIntrinsic(Intrinsic::fabs
, ResultType
);
20604 return Builder
.CreateCall(F
, X
);
20606 case SystemZ::BI__builtin_s390_vflnsb
:
20607 case SystemZ::BI__builtin_s390_vflndb
: {
20608 llvm::Type
*ResultType
= ConvertType(E
->getType());
20609 Value
*X
= EmitScalarExpr(E
->getArg(0));
20610 Function
*F
= CGM
.getIntrinsic(Intrinsic::fabs
, ResultType
);
20611 return Builder
.CreateFNeg(Builder
.CreateCall(F
, X
), "neg");
20613 case SystemZ::BI__builtin_s390_vfisb
:
20614 case SystemZ::BI__builtin_s390_vfidb
: {
20615 llvm::Type
*ResultType
= ConvertType(E
->getType());
20616 Value
*X
= EmitScalarExpr(E
->getArg(0));
20617 // Constant-fold the M4 and M5 mask arguments.
20618 llvm::APSInt M4
= *E
->getArg(1)->getIntegerConstantExpr(getContext());
20619 llvm::APSInt M5
= *E
->getArg(2)->getIntegerConstantExpr(getContext());
20620 // Check whether this instance can be represented via a LLVM standard
20621 // intrinsic. We only support some combinations of M4 and M5.
20622 Intrinsic::ID ID
= Intrinsic::not_intrinsic
;
20624 switch (M4
.getZExtValue()) {
20626 case 0: // IEEE-inexact exception allowed
20627 switch (M5
.getZExtValue()) {
20629 case 0: ID
= Intrinsic::rint
;
20630 CI
= Intrinsic::experimental_constrained_rint
; break;
20633 case 4: // IEEE-inexact exception suppressed
20634 switch (M5
.getZExtValue()) {
20636 case 0: ID
= Intrinsic::nearbyint
;
20637 CI
= Intrinsic::experimental_constrained_nearbyint
; break;
20638 case 1: ID
= Intrinsic::round
;
20639 CI
= Intrinsic::experimental_constrained_round
; break;
20640 case 5: ID
= Intrinsic::trunc
;
20641 CI
= Intrinsic::experimental_constrained_trunc
; break;
20642 case 6: ID
= Intrinsic::ceil
;
20643 CI
= Intrinsic::experimental_constrained_ceil
; break;
20644 case 7: ID
= Intrinsic::floor
;
20645 CI
= Intrinsic::experimental_constrained_floor
; break;
20649 if (ID
!= Intrinsic::not_intrinsic
) {
20650 if (Builder
.getIsFPConstrained()) {
20651 Function
*F
= CGM
.getIntrinsic(CI
, ResultType
);
20652 return Builder
.CreateConstrainedFPCall(F
, X
);
20654 Function
*F
= CGM
.getIntrinsic(ID
, ResultType
);
20655 return Builder
.CreateCall(F
, X
);
20658 switch (BuiltinID
) { // FIXME: constrained version?
20659 case SystemZ::BI__builtin_s390_vfisb
: ID
= Intrinsic::s390_vfisb
; break;
20660 case SystemZ::BI__builtin_s390_vfidb
: ID
= Intrinsic::s390_vfidb
; break;
20661 default: llvm_unreachable("Unknown BuiltinID");
20663 Function
*F
= CGM
.getIntrinsic(ID
);
20664 Value
*M4Value
= llvm::ConstantInt::get(getLLVMContext(), M4
);
20665 Value
*M5Value
= llvm::ConstantInt::get(getLLVMContext(), M5
);
20666 return Builder
.CreateCall(F
, {X
, M4Value
, M5Value
});
20668 case SystemZ::BI__builtin_s390_vfmaxsb
:
20669 case SystemZ::BI__builtin_s390_vfmaxdb
: {
20670 llvm::Type
*ResultType
= ConvertType(E
->getType());
20671 Value
*X
= EmitScalarExpr(E
->getArg(0));
20672 Value
*Y
= EmitScalarExpr(E
->getArg(1));
20673 // Constant-fold the M4 mask argument.
20674 llvm::APSInt M4
= *E
->getArg(2)->getIntegerConstantExpr(getContext());
20675 // Check whether this instance can be represented via a LLVM standard
20676 // intrinsic. We only support some values of M4.
20677 Intrinsic::ID ID
= Intrinsic::not_intrinsic
;
20679 switch (M4
.getZExtValue()) {
20681 case 4: ID
= Intrinsic::maxnum
;
20682 CI
= Intrinsic::experimental_constrained_maxnum
; break;
20684 if (ID
!= Intrinsic::not_intrinsic
) {
20685 if (Builder
.getIsFPConstrained()) {
20686 Function
*F
= CGM
.getIntrinsic(CI
, ResultType
);
20687 return Builder
.CreateConstrainedFPCall(F
, {X
, Y
});
20689 Function
*F
= CGM
.getIntrinsic(ID
, ResultType
);
20690 return Builder
.CreateCall(F
, {X
, Y
});
20693 switch (BuiltinID
) {
20694 case SystemZ::BI__builtin_s390_vfmaxsb
: ID
= Intrinsic::s390_vfmaxsb
; break;
20695 case SystemZ::BI__builtin_s390_vfmaxdb
: ID
= Intrinsic::s390_vfmaxdb
; break;
20696 default: llvm_unreachable("Unknown BuiltinID");
20698 Function
*F
= CGM
.getIntrinsic(ID
);
20699 Value
*M4Value
= llvm::ConstantInt::get(getLLVMContext(), M4
);
20700 return Builder
.CreateCall(F
, {X
, Y
, M4Value
});
20702 case SystemZ::BI__builtin_s390_vfminsb
:
20703 case SystemZ::BI__builtin_s390_vfmindb
: {
20704 llvm::Type
*ResultType
= ConvertType(E
->getType());
20705 Value
*X
= EmitScalarExpr(E
->getArg(0));
20706 Value
*Y
= EmitScalarExpr(E
->getArg(1));
20707 // Constant-fold the M4 mask argument.
20708 llvm::APSInt M4
= *E
->getArg(2)->getIntegerConstantExpr(getContext());
20709 // Check whether this instance can be represented via a LLVM standard
20710 // intrinsic. We only support some values of M4.
20711 Intrinsic::ID ID
= Intrinsic::not_intrinsic
;
20713 switch (M4
.getZExtValue()) {
20715 case 4: ID
= Intrinsic::minnum
;
20716 CI
= Intrinsic::experimental_constrained_minnum
; break;
20718 if (ID
!= Intrinsic::not_intrinsic
) {
20719 if (Builder
.getIsFPConstrained()) {
20720 Function
*F
= CGM
.getIntrinsic(CI
, ResultType
);
20721 return Builder
.CreateConstrainedFPCall(F
, {X
, Y
});
20723 Function
*F
= CGM
.getIntrinsic(ID
, ResultType
);
20724 return Builder
.CreateCall(F
, {X
, Y
});
20727 switch (BuiltinID
) {
20728 case SystemZ::BI__builtin_s390_vfminsb
: ID
= Intrinsic::s390_vfminsb
; break;
20729 case SystemZ::BI__builtin_s390_vfmindb
: ID
= Intrinsic::s390_vfmindb
; break;
20730 default: llvm_unreachable("Unknown BuiltinID");
20732 Function
*F
= CGM
.getIntrinsic(ID
);
20733 Value
*M4Value
= llvm::ConstantInt::get(getLLVMContext(), M4
);
20734 return Builder
.CreateCall(F
, {X
, Y
, M4Value
});
20737 case SystemZ::BI__builtin_s390_vlbrh
:
20738 case SystemZ::BI__builtin_s390_vlbrf
:
20739 case SystemZ::BI__builtin_s390_vlbrg
: {
20740 llvm::Type
*ResultType
= ConvertType(E
->getType());
20741 Value
*X
= EmitScalarExpr(E
->getArg(0));
20742 Function
*F
= CGM
.getIntrinsic(Intrinsic::bswap
, ResultType
);
20743 return Builder
.CreateCall(F
, X
);
20746 // Vector intrinsics that output the post-instruction CC value.
20748 #define INTRINSIC_WITH_CC(NAME) \
20749 case SystemZ::BI__builtin_##NAME: \
20750 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
20752 INTRINSIC_WITH_CC(s390_vpkshs
);
20753 INTRINSIC_WITH_CC(s390_vpksfs
);
20754 INTRINSIC_WITH_CC(s390_vpksgs
);
20756 INTRINSIC_WITH_CC(s390_vpklshs
);
20757 INTRINSIC_WITH_CC(s390_vpklsfs
);
20758 INTRINSIC_WITH_CC(s390_vpklsgs
);
20760 INTRINSIC_WITH_CC(s390_vceqbs
);
20761 INTRINSIC_WITH_CC(s390_vceqhs
);
20762 INTRINSIC_WITH_CC(s390_vceqfs
);
20763 INTRINSIC_WITH_CC(s390_vceqgs
);
20765 INTRINSIC_WITH_CC(s390_vchbs
);
20766 INTRINSIC_WITH_CC(s390_vchhs
);
20767 INTRINSIC_WITH_CC(s390_vchfs
);
20768 INTRINSIC_WITH_CC(s390_vchgs
);
20770 INTRINSIC_WITH_CC(s390_vchlbs
);
20771 INTRINSIC_WITH_CC(s390_vchlhs
);
20772 INTRINSIC_WITH_CC(s390_vchlfs
);
20773 INTRINSIC_WITH_CC(s390_vchlgs
);
20775 INTRINSIC_WITH_CC(s390_vfaebs
);
20776 INTRINSIC_WITH_CC(s390_vfaehs
);
20777 INTRINSIC_WITH_CC(s390_vfaefs
);
20779 INTRINSIC_WITH_CC(s390_vfaezbs
);
20780 INTRINSIC_WITH_CC(s390_vfaezhs
);
20781 INTRINSIC_WITH_CC(s390_vfaezfs
);
20783 INTRINSIC_WITH_CC(s390_vfeebs
);
20784 INTRINSIC_WITH_CC(s390_vfeehs
);
20785 INTRINSIC_WITH_CC(s390_vfeefs
);
20787 INTRINSIC_WITH_CC(s390_vfeezbs
);
20788 INTRINSIC_WITH_CC(s390_vfeezhs
);
20789 INTRINSIC_WITH_CC(s390_vfeezfs
);
20791 INTRINSIC_WITH_CC(s390_vfenebs
);
20792 INTRINSIC_WITH_CC(s390_vfenehs
);
20793 INTRINSIC_WITH_CC(s390_vfenefs
);
20795 INTRINSIC_WITH_CC(s390_vfenezbs
);
20796 INTRINSIC_WITH_CC(s390_vfenezhs
);
20797 INTRINSIC_WITH_CC(s390_vfenezfs
);
20799 INTRINSIC_WITH_CC(s390_vistrbs
);
20800 INTRINSIC_WITH_CC(s390_vistrhs
);
20801 INTRINSIC_WITH_CC(s390_vistrfs
);
20803 INTRINSIC_WITH_CC(s390_vstrcbs
);
20804 INTRINSIC_WITH_CC(s390_vstrchs
);
20805 INTRINSIC_WITH_CC(s390_vstrcfs
);
20807 INTRINSIC_WITH_CC(s390_vstrczbs
);
20808 INTRINSIC_WITH_CC(s390_vstrczhs
);
20809 INTRINSIC_WITH_CC(s390_vstrczfs
);
20811 INTRINSIC_WITH_CC(s390_vfcesbs
);
20812 INTRINSIC_WITH_CC(s390_vfcedbs
);
20813 INTRINSIC_WITH_CC(s390_vfchsbs
);
20814 INTRINSIC_WITH_CC(s390_vfchdbs
);
20815 INTRINSIC_WITH_CC(s390_vfchesbs
);
20816 INTRINSIC_WITH_CC(s390_vfchedbs
);
20818 INTRINSIC_WITH_CC(s390_vftcisb
);
20819 INTRINSIC_WITH_CC(s390_vftcidb
);
20821 INTRINSIC_WITH_CC(s390_vstrsb
);
20822 INTRINSIC_WITH_CC(s390_vstrsh
);
20823 INTRINSIC_WITH_CC(s390_vstrsf
);
20825 INTRINSIC_WITH_CC(s390_vstrszb
);
20826 INTRINSIC_WITH_CC(s390_vstrszh
);
20827 INTRINSIC_WITH_CC(s390_vstrszf
);
20829 #undef INTRINSIC_WITH_CC
20837 // Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
20838 struct NVPTXMmaLdstInfo
{
20839 unsigned NumResults
; // Number of elements to load/store
20840 // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
20845 #define MMA_INTR(geom_op_type, layout) \
20846 Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
20847 #define MMA_LDST(n, geom_op_type) \
20848 { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
20850 static NVPTXMmaLdstInfo
getNVPTXMmaLdstInfo(unsigned BuiltinID
) {
20851 switch (BuiltinID
) {
20853 case NVPTX::BI__hmma_m16n16k16_ld_a
:
20854 return MMA_LDST(8, m16n16k16_load_a_f16
);
20855 case NVPTX::BI__hmma_m16n16k16_ld_b
:
20856 return MMA_LDST(8, m16n16k16_load_b_f16
);
20857 case NVPTX::BI__hmma_m16n16k16_ld_c_f16
:
20858 return MMA_LDST(4, m16n16k16_load_c_f16
);
20859 case NVPTX::BI__hmma_m16n16k16_ld_c_f32
:
20860 return MMA_LDST(8, m16n16k16_load_c_f32
);
20861 case NVPTX::BI__hmma_m32n8k16_ld_a
:
20862 return MMA_LDST(8, m32n8k16_load_a_f16
);
20863 case NVPTX::BI__hmma_m32n8k16_ld_b
:
20864 return MMA_LDST(8, m32n8k16_load_b_f16
);
20865 case NVPTX::BI__hmma_m32n8k16_ld_c_f16
:
20866 return MMA_LDST(4, m32n8k16_load_c_f16
);
20867 case NVPTX::BI__hmma_m32n8k16_ld_c_f32
:
20868 return MMA_LDST(8, m32n8k16_load_c_f32
);
20869 case NVPTX::BI__hmma_m8n32k16_ld_a
:
20870 return MMA_LDST(8, m8n32k16_load_a_f16
);
20871 case NVPTX::BI__hmma_m8n32k16_ld_b
:
20872 return MMA_LDST(8, m8n32k16_load_b_f16
);
20873 case NVPTX::BI__hmma_m8n32k16_ld_c_f16
:
20874 return MMA_LDST(4, m8n32k16_load_c_f16
);
20875 case NVPTX::BI__hmma_m8n32k16_ld_c_f32
:
20876 return MMA_LDST(8, m8n32k16_load_c_f32
);
20878 // Integer MMA loads
20879 case NVPTX::BI__imma_m16n16k16_ld_a_s8
:
20880 return MMA_LDST(2, m16n16k16_load_a_s8
);
20881 case NVPTX::BI__imma_m16n16k16_ld_a_u8
:
20882 return MMA_LDST(2, m16n16k16_load_a_u8
);
20883 case NVPTX::BI__imma_m16n16k16_ld_b_s8
:
20884 return MMA_LDST(2, m16n16k16_load_b_s8
);
20885 case NVPTX::BI__imma_m16n16k16_ld_b_u8
:
20886 return MMA_LDST(2, m16n16k16_load_b_u8
);
20887 case NVPTX::BI__imma_m16n16k16_ld_c
:
20888 return MMA_LDST(8, m16n16k16_load_c_s32
);
20889 case NVPTX::BI__imma_m32n8k16_ld_a_s8
:
20890 return MMA_LDST(4, m32n8k16_load_a_s8
);
20891 case NVPTX::BI__imma_m32n8k16_ld_a_u8
:
20892 return MMA_LDST(4, m32n8k16_load_a_u8
);
20893 case NVPTX::BI__imma_m32n8k16_ld_b_s8
:
20894 return MMA_LDST(1, m32n8k16_load_b_s8
);
20895 case NVPTX::BI__imma_m32n8k16_ld_b_u8
:
20896 return MMA_LDST(1, m32n8k16_load_b_u8
);
20897 case NVPTX::BI__imma_m32n8k16_ld_c
:
20898 return MMA_LDST(8, m32n8k16_load_c_s32
);
20899 case NVPTX::BI__imma_m8n32k16_ld_a_s8
:
20900 return MMA_LDST(1, m8n32k16_load_a_s8
);
20901 case NVPTX::BI__imma_m8n32k16_ld_a_u8
:
20902 return MMA_LDST(1, m8n32k16_load_a_u8
);
20903 case NVPTX::BI__imma_m8n32k16_ld_b_s8
:
20904 return MMA_LDST(4, m8n32k16_load_b_s8
);
20905 case NVPTX::BI__imma_m8n32k16_ld_b_u8
:
20906 return MMA_LDST(4, m8n32k16_load_b_u8
);
20907 case NVPTX::BI__imma_m8n32k16_ld_c
:
20908 return MMA_LDST(8, m8n32k16_load_c_s32
);
20910 // Sub-integer MMA loads.
20911 // Only row/col layout is supported by A/B fragments.
20912 case NVPTX::BI__imma_m8n8k32_ld_a_s4
:
20913 return {1, 0, MMA_INTR(m8n8k32_load_a_s4
, row
)};
20914 case NVPTX::BI__imma_m8n8k32_ld_a_u4
:
20915 return {1, 0, MMA_INTR(m8n8k32_load_a_u4
, row
)};
20916 case NVPTX::BI__imma_m8n8k32_ld_b_s4
:
20917 return {1, MMA_INTR(m8n8k32_load_b_s4
, col
), 0};
20918 case NVPTX::BI__imma_m8n8k32_ld_b_u4
:
20919 return {1, MMA_INTR(m8n8k32_load_b_u4
, col
), 0};
20920 case NVPTX::BI__imma_m8n8k32_ld_c
:
20921 return MMA_LDST(2, m8n8k32_load_c_s32
);
20922 case NVPTX::BI__bmma_m8n8k128_ld_a_b1
:
20923 return {1, 0, MMA_INTR(m8n8k128_load_a_b1
, row
)};
20924 case NVPTX::BI__bmma_m8n8k128_ld_b_b1
:
20925 return {1, MMA_INTR(m8n8k128_load_b_b1
, col
), 0};
20926 case NVPTX::BI__bmma_m8n8k128_ld_c
:
20927 return MMA_LDST(2, m8n8k128_load_c_s32
);
20929 // Double MMA loads
20930 case NVPTX::BI__dmma_m8n8k4_ld_a
:
20931 return MMA_LDST(1, m8n8k4_load_a_f64
);
20932 case NVPTX::BI__dmma_m8n8k4_ld_b
:
20933 return MMA_LDST(1, m8n8k4_load_b_f64
);
20934 case NVPTX::BI__dmma_m8n8k4_ld_c
:
20935 return MMA_LDST(2, m8n8k4_load_c_f64
);
20937 // Alternate float MMA loads
20938 case NVPTX::BI__mma_bf16_m16n16k16_ld_a
:
20939 return MMA_LDST(4, m16n16k16_load_a_bf16
);
20940 case NVPTX::BI__mma_bf16_m16n16k16_ld_b
:
20941 return MMA_LDST(4, m16n16k16_load_b_bf16
);
20942 case NVPTX::BI__mma_bf16_m8n32k16_ld_a
:
20943 return MMA_LDST(2, m8n32k16_load_a_bf16
);
20944 case NVPTX::BI__mma_bf16_m8n32k16_ld_b
:
20945 return MMA_LDST(8, m8n32k16_load_b_bf16
);
20946 case NVPTX::BI__mma_bf16_m32n8k16_ld_a
:
20947 return MMA_LDST(8, m32n8k16_load_a_bf16
);
20948 case NVPTX::BI__mma_bf16_m32n8k16_ld_b
:
20949 return MMA_LDST(2, m32n8k16_load_b_bf16
);
20950 case NVPTX::BI__mma_tf32_m16n16k8_ld_a
:
20951 return MMA_LDST(4, m16n16k8_load_a_tf32
);
20952 case NVPTX::BI__mma_tf32_m16n16k8_ld_b
:
20953 return MMA_LDST(4, m16n16k8_load_b_tf32
);
20954 case NVPTX::BI__mma_tf32_m16n16k8_ld_c
:
20955 return MMA_LDST(8, m16n16k8_load_c_f32
);
20957 // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike
20958 // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
20959 // use fragment C for both loads and stores.
20961 case NVPTX::BI__hmma_m16n16k16_st_c_f16
:
20962 return MMA_LDST(4, m16n16k16_store_d_f16
);
20963 case NVPTX::BI__hmma_m16n16k16_st_c_f32
:
20964 return MMA_LDST(8, m16n16k16_store_d_f32
);
20965 case NVPTX::BI__hmma_m32n8k16_st_c_f16
:
20966 return MMA_LDST(4, m32n8k16_store_d_f16
);
20967 case NVPTX::BI__hmma_m32n8k16_st_c_f32
:
20968 return MMA_LDST(8, m32n8k16_store_d_f32
);
20969 case NVPTX::BI__hmma_m8n32k16_st_c_f16
:
20970 return MMA_LDST(4, m8n32k16_store_d_f16
);
20971 case NVPTX::BI__hmma_m8n32k16_st_c_f32
:
20972 return MMA_LDST(8, m8n32k16_store_d_f32
);
20974 // Integer and sub-integer MMA stores.
20975 // Another naming quirk. Unlike other MMA builtins that use PTX types in the
20976 // name, integer loads/stores use LLVM's i32.
20977 case NVPTX::BI__imma_m16n16k16_st_c_i32
:
20978 return MMA_LDST(8, m16n16k16_store_d_s32
);
20979 case NVPTX::BI__imma_m32n8k16_st_c_i32
:
20980 return MMA_LDST(8, m32n8k16_store_d_s32
);
20981 case NVPTX::BI__imma_m8n32k16_st_c_i32
:
20982 return MMA_LDST(8, m8n32k16_store_d_s32
);
20983 case NVPTX::BI__imma_m8n8k32_st_c_i32
:
20984 return MMA_LDST(2, m8n8k32_store_d_s32
);
20985 case NVPTX::BI__bmma_m8n8k128_st_c_i32
:
20986 return MMA_LDST(2, m8n8k128_store_d_s32
);
20988 // Double MMA store
20989 case NVPTX::BI__dmma_m8n8k4_st_c_f64
:
20990 return MMA_LDST(2, m8n8k4_store_d_f64
);
20992 // Alternate float MMA store
20993 case NVPTX::BI__mma_m16n16k8_st_c_f32
:
20994 return MMA_LDST(8, m16n16k8_store_d_f32
);
20997 llvm_unreachable("Unknown MMA builtin");
21004 struct NVPTXMmaInfo
{
21010 // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority
21011 // over 'col' for layout. The index of non-satf variants is expected to match
21012 // the undocumented layout constants used by CUDA's mma.hpp.
21013 std::array
<unsigned, 8> Variants
;
21015 unsigned getMMAIntrinsic(int Layout
, bool Satf
) {
21016 unsigned Index
= Layout
+ 4 * Satf
;
21017 if (Index
>= Variants
.size())
21019 return Variants
[Index
];
21023 // Returns an intrinsic that matches Layout and Satf for valid combinations of
21024 // Layout and Satf, 0 otherwise.
21025 static NVPTXMmaInfo
getNVPTXMmaInfo(unsigned BuiltinID
) {
21026 // clang-format off
21027 #define MMA_VARIANTS(geom, type) \
21028 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
21029 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
21030 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
21031 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
21032 #define MMA_SATF_VARIANTS(geom, type) \
21033 MMA_VARIANTS(geom, type), \
21034 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
21035 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
21036 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
21037 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
21038 // Sub-integer MMA only supports row.col layout.
21039 #define MMA_VARIANTS_I4(geom, type) \
21041 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
21045 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
21048 // b1 MMA does not support .satfinite.
21049 #define MMA_VARIANTS_B1_XOR(geom, type) \
21051 Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \
21058 #define MMA_VARIANTS_B1_AND(geom, type) \
21060 Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \
21068 switch (BuiltinID
) {
21070 // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while
21071 // NumEltsN of return value are ordered as A,B,C,D.
21072 case NVPTX::BI__hmma_m16n16k16_mma_f16f16
:
21073 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16
, f16_f16
)}}};
21074 case NVPTX::BI__hmma_m16n16k16_mma_f32f16
:
21075 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16
, f32_f16
)}}};
21076 case NVPTX::BI__hmma_m16n16k16_mma_f16f32
:
21077 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16
, f16_f32
)}}};
21078 case NVPTX::BI__hmma_m16n16k16_mma_f32f32
:
21079 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16
, f32_f32
)}}};
21080 case NVPTX::BI__hmma_m32n8k16_mma_f16f16
:
21081 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16
, f16_f16
)}}};
21082 case NVPTX::BI__hmma_m32n8k16_mma_f32f16
:
21083 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16
, f32_f16
)}}};
21084 case NVPTX::BI__hmma_m32n8k16_mma_f16f32
:
21085 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16
, f16_f32
)}}};
21086 case NVPTX::BI__hmma_m32n8k16_mma_f32f32
:
21087 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16
, f32_f32
)}}};
21088 case NVPTX::BI__hmma_m8n32k16_mma_f16f16
:
21089 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16
, f16_f16
)}}};
21090 case NVPTX::BI__hmma_m8n32k16_mma_f32f16
:
21091 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16
, f32_f16
)}}};
21092 case NVPTX::BI__hmma_m8n32k16_mma_f16f32
:
21093 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16
, f16_f32
)}}};
21094 case NVPTX::BI__hmma_m8n32k16_mma_f32f32
:
21095 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16
, f32_f32
)}}};
21098 case NVPTX::BI__imma_m16n16k16_mma_s8
:
21099 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16
, s8
)}}};
21100 case NVPTX::BI__imma_m16n16k16_mma_u8
:
21101 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16
, u8
)}}};
21102 case NVPTX::BI__imma_m32n8k16_mma_s8
:
21103 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16
, s8
)}}};
21104 case NVPTX::BI__imma_m32n8k16_mma_u8
:
21105 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16
, u8
)}}};
21106 case NVPTX::BI__imma_m8n32k16_mma_s8
:
21107 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16
, s8
)}}};
21108 case NVPTX::BI__imma_m8n32k16_mma_u8
:
21109 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16
, u8
)}}};
21112 case NVPTX::BI__imma_m8n8k32_mma_s4
:
21113 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32
, s4
)}}};
21114 case NVPTX::BI__imma_m8n8k32_mma_u4
:
21115 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32
, u4
)}}};
21116 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1
:
21117 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128
, b1
)}}};
21118 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1
:
21119 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128
, b1
)}}};
21122 case NVPTX::BI__dmma_m8n8k4_mma_f64
:
21123 return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4
, f64
)}}};
21125 // Alternate FP MMA
21126 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32
:
21127 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16
, bf16
)}}};
21128 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32
:
21129 return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16
, bf16
)}}};
21130 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32
:
21131 return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16
, bf16
)}}};
21132 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32
:
21133 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8
, tf32
)}}};
21135 llvm_unreachable("Unexpected builtin ID.");
21137 #undef MMA_VARIANTS
21138 #undef MMA_SATF_VARIANTS
21139 #undef MMA_VARIANTS_I4
21140 #undef MMA_VARIANTS_B1_AND
21141 #undef MMA_VARIANTS_B1_XOR
21144 static Value
*MakeLdu(unsigned IntrinsicID
, CodeGenFunction
&CGF
,
21145 const CallExpr
*E
) {
21146 Value
*Ptr
= CGF
.EmitScalarExpr(E
->getArg(0));
21147 QualType ArgType
= E
->getArg(0)->getType();
21148 clang::CharUnits Align
= CGF
.CGM
.getNaturalPointeeTypeAlignment(ArgType
);
21149 llvm::Type
*ElemTy
= CGF
.ConvertTypeForMem(ArgType
->getPointeeType());
21150 return CGF
.Builder
.CreateCall(
21151 CGF
.CGM
.getIntrinsic(IntrinsicID
, {ElemTy
, Ptr
->getType()}),
21152 {Ptr
, ConstantInt::get(CGF
.Builder
.getInt32Ty(), Align
.getQuantity())});
21155 static Value
*MakeLdg(CodeGenFunction
&CGF
, const CallExpr
*E
) {
21156 Value
*Ptr
= CGF
.EmitScalarExpr(E
->getArg(0));
21157 QualType ArgType
= E
->getArg(0)->getType();
21158 clang::CharUnits AlignV
= CGF
.CGM
.getNaturalPointeeTypeAlignment(ArgType
);
21159 llvm::Type
*ElemTy
= CGF
.ConvertTypeForMem(ArgType
->getPointeeType());
21161 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
21162 auto *ASC
= CGF
.Builder
.CreateAddrSpaceCast(Ptr
, CGF
.Builder
.getPtrTy(1));
21163 auto *LD
= CGF
.Builder
.CreateAlignedLoad(ElemTy
, ASC
, AlignV
.getAsAlign());
21164 MDNode
*MD
= MDNode::get(CGF
.Builder
.getContext(), {});
21165 LD
->setMetadata(LLVMContext::MD_invariant_load
, MD
);
21170 static Value
*MakeScopedAtomic(unsigned IntrinsicID
, CodeGenFunction
&CGF
,
21171 const CallExpr
*E
) {
21172 Value
*Ptr
= CGF
.EmitScalarExpr(E
->getArg(0));
21173 llvm::Type
*ElemTy
=
21174 CGF
.ConvertTypeForMem(E
->getArg(0)->getType()->getPointeeType());
21175 return CGF
.Builder
.CreateCall(
21176 CGF
.CGM
.getIntrinsic(IntrinsicID
, {ElemTy
, Ptr
->getType()}),
21177 {Ptr
, CGF
.EmitScalarExpr(E
->getArg(1))});
21180 static Value
*MakeCpAsync(unsigned IntrinsicID
, unsigned IntrinsicIDS
,
21181 CodeGenFunction
&CGF
, const CallExpr
*E
,
21183 return E
->getNumArgs() == 3
21184 ? CGF
.Builder
.CreateCall(CGF
.CGM
.getIntrinsic(IntrinsicIDS
),
21185 {CGF
.EmitScalarExpr(E
->getArg(0)),
21186 CGF
.EmitScalarExpr(E
->getArg(1)),
21187 CGF
.EmitScalarExpr(E
->getArg(2))})
21188 : CGF
.Builder
.CreateCall(CGF
.CGM
.getIntrinsic(IntrinsicID
),
21189 {CGF
.EmitScalarExpr(E
->getArg(0)),
21190 CGF
.EmitScalarExpr(E
->getArg(1))});
21193 static Value
*MakeHalfType(unsigned IntrinsicID
, unsigned BuiltinID
,
21194 const CallExpr
*E
, CodeGenFunction
&CGF
) {
21195 auto &C
= CGF
.CGM
.getContext();
21196 if (!(C
.getLangOpts().NativeHalfType
||
21197 !C
.getTargetInfo().useFP16ConversionIntrinsics())) {
21198 CGF
.CGM
.Error(E
->getExprLoc(), C
.BuiltinInfo
.getName(BuiltinID
).str() +
21199 " requires native half type support.");
21203 if (BuiltinID
== NVPTX::BI__nvvm_ldg_h
|| BuiltinID
== NVPTX::BI__nvvm_ldg_h2
)
21204 return MakeLdg(CGF
, E
);
21206 if (IntrinsicID
== Intrinsic::nvvm_ldu_global_f
)
21207 return MakeLdu(IntrinsicID
, CGF
, E
);
21209 SmallVector
<Value
*, 16> Args
;
21210 auto *F
= CGF
.CGM
.getIntrinsic(IntrinsicID
);
21211 auto *FTy
= F
->getFunctionType();
21212 unsigned ICEArguments
= 0;
21213 ASTContext::GetBuiltinTypeError Error
;
21214 C
.GetBuiltinType(BuiltinID
, Error
, &ICEArguments
);
21215 assert(Error
== ASTContext::GE_None
&& "Should not codegen an error");
21216 for (unsigned i
= 0, e
= E
->getNumArgs(); i
!= e
; ++i
) {
21217 assert((ICEArguments
& (1 << i
)) == 0);
21218 auto *ArgValue
= CGF
.EmitScalarExpr(E
->getArg(i
));
21219 auto *PTy
= FTy
->getParamType(i
);
21220 if (PTy
!= ArgValue
->getType())
21221 ArgValue
= CGF
.Builder
.CreateBitCast(ArgValue
, PTy
);
21222 Args
.push_back(ArgValue
);
21225 return CGF
.Builder
.CreateCall(F
, Args
);
21229 Value
*CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID
,
21230 const CallExpr
*E
) {
21231 switch (BuiltinID
) {
21232 case NVPTX::BI__nvvm_atom_add_gen_i
:
21233 case NVPTX::BI__nvvm_atom_add_gen_l
:
21234 case NVPTX::BI__nvvm_atom_add_gen_ll
:
21235 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add
, E
);
21237 case NVPTX::BI__nvvm_atom_sub_gen_i
:
21238 case NVPTX::BI__nvvm_atom_sub_gen_l
:
21239 case NVPTX::BI__nvvm_atom_sub_gen_ll
:
21240 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub
, E
);
21242 case NVPTX::BI__nvvm_atom_and_gen_i
:
21243 case NVPTX::BI__nvvm_atom_and_gen_l
:
21244 case NVPTX::BI__nvvm_atom_and_gen_ll
:
21245 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And
, E
);
21247 case NVPTX::BI__nvvm_atom_or_gen_i
:
21248 case NVPTX::BI__nvvm_atom_or_gen_l
:
21249 case NVPTX::BI__nvvm_atom_or_gen_ll
:
21250 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or
, E
);
21252 case NVPTX::BI__nvvm_atom_xor_gen_i
:
21253 case NVPTX::BI__nvvm_atom_xor_gen_l
:
21254 case NVPTX::BI__nvvm_atom_xor_gen_ll
:
21255 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor
, E
);
21257 case NVPTX::BI__nvvm_atom_xchg_gen_i
:
21258 case NVPTX::BI__nvvm_atom_xchg_gen_l
:
21259 case NVPTX::BI__nvvm_atom_xchg_gen_ll
:
21260 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg
, E
);
21262 case NVPTX::BI__nvvm_atom_max_gen_i
:
21263 case NVPTX::BI__nvvm_atom_max_gen_l
:
21264 case NVPTX::BI__nvvm_atom_max_gen_ll
:
21265 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max
, E
);
21267 case NVPTX::BI__nvvm_atom_max_gen_ui
:
21268 case NVPTX::BI__nvvm_atom_max_gen_ul
:
21269 case NVPTX::BI__nvvm_atom_max_gen_ull
:
21270 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax
, E
);
21272 case NVPTX::BI__nvvm_atom_min_gen_i
:
21273 case NVPTX::BI__nvvm_atom_min_gen_l
:
21274 case NVPTX::BI__nvvm_atom_min_gen_ll
:
21275 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min
, E
);
21277 case NVPTX::BI__nvvm_atom_min_gen_ui
:
21278 case NVPTX::BI__nvvm_atom_min_gen_ul
:
21279 case NVPTX::BI__nvvm_atom_min_gen_ull
:
21280 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin
, E
);
21282 case NVPTX::BI__nvvm_atom_cas_gen_us
:
21283 case NVPTX::BI__nvvm_atom_cas_gen_i
:
21284 case NVPTX::BI__nvvm_atom_cas_gen_l
:
21285 case NVPTX::BI__nvvm_atom_cas_gen_ll
:
21286 // __nvvm_atom_cas_gen_* should return the old value rather than the
21288 return MakeAtomicCmpXchgValue(*this, E
, /*ReturnBool=*/false);
21290 case NVPTX::BI__nvvm_atom_add_gen_f
:
21291 case NVPTX::BI__nvvm_atom_add_gen_d
: {
21292 Address DestAddr
= EmitPointerWithAlignment(E
->getArg(0));
21293 Value
*Val
= EmitScalarExpr(E
->getArg(1));
21295 return Builder
.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd
, DestAddr
, Val
,
21296 AtomicOrdering::SequentiallyConsistent
);
21299 case NVPTX::BI__nvvm_atom_inc_gen_ui
: {
21300 Value
*Ptr
= EmitScalarExpr(E
->getArg(0));
21301 Value
*Val
= EmitScalarExpr(E
->getArg(1));
21302 Function
*FnALI32
=
21303 CGM
.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32
, Ptr
->getType());
21304 return Builder
.CreateCall(FnALI32
, {Ptr
, Val
});
21307 case NVPTX::BI__nvvm_atom_dec_gen_ui
: {
21308 Value
*Ptr
= EmitScalarExpr(E
->getArg(0));
21309 Value
*Val
= EmitScalarExpr(E
->getArg(1));
21310 Function
*FnALD32
=
21311 CGM
.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32
, Ptr
->getType());
21312 return Builder
.CreateCall(FnALD32
, {Ptr
, Val
});
21315 case NVPTX::BI__nvvm_ldg_c
:
21316 case NVPTX::BI__nvvm_ldg_sc
:
21317 case NVPTX::BI__nvvm_ldg_c2
:
21318 case NVPTX::BI__nvvm_ldg_sc2
:
21319 case NVPTX::BI__nvvm_ldg_c4
:
21320 case NVPTX::BI__nvvm_ldg_sc4
:
21321 case NVPTX::BI__nvvm_ldg_s
:
21322 case NVPTX::BI__nvvm_ldg_s2
:
21323 case NVPTX::BI__nvvm_ldg_s4
:
21324 case NVPTX::BI__nvvm_ldg_i
:
21325 case NVPTX::BI__nvvm_ldg_i2
:
21326 case NVPTX::BI__nvvm_ldg_i4
:
21327 case NVPTX::BI__nvvm_ldg_l
:
21328 case NVPTX::BI__nvvm_ldg_l2
:
21329 case NVPTX::BI__nvvm_ldg_ll
:
21330 case NVPTX::BI__nvvm_ldg_ll2
:
21331 case NVPTX::BI__nvvm_ldg_uc
:
21332 case NVPTX::BI__nvvm_ldg_uc2
:
21333 case NVPTX::BI__nvvm_ldg_uc4
:
21334 case NVPTX::BI__nvvm_ldg_us
:
21335 case NVPTX::BI__nvvm_ldg_us2
:
21336 case NVPTX::BI__nvvm_ldg_us4
:
21337 case NVPTX::BI__nvvm_ldg_ui
:
21338 case NVPTX::BI__nvvm_ldg_ui2
:
21339 case NVPTX::BI__nvvm_ldg_ui4
:
21340 case NVPTX::BI__nvvm_ldg_ul
:
21341 case NVPTX::BI__nvvm_ldg_ul2
:
21342 case NVPTX::BI__nvvm_ldg_ull
:
21343 case NVPTX::BI__nvvm_ldg_ull2
:
21344 case NVPTX::BI__nvvm_ldg_f
:
21345 case NVPTX::BI__nvvm_ldg_f2
:
21346 case NVPTX::BI__nvvm_ldg_f4
:
21347 case NVPTX::BI__nvvm_ldg_d
:
21348 case NVPTX::BI__nvvm_ldg_d2
:
21349 // PTX Interoperability section 2.2: "For a vector with an even number of
21350 // elements, its alignment is set to number of elements times the alignment
21351 // of its member: n*alignof(t)."
21352 return MakeLdg(*this, E
);
21354 case NVPTX::BI__nvvm_ldu_c
:
21355 case NVPTX::BI__nvvm_ldu_sc
:
21356 case NVPTX::BI__nvvm_ldu_c2
:
21357 case NVPTX::BI__nvvm_ldu_sc2
:
21358 case NVPTX::BI__nvvm_ldu_c4
:
21359 case NVPTX::BI__nvvm_ldu_sc4
:
21360 case NVPTX::BI__nvvm_ldu_s
:
21361 case NVPTX::BI__nvvm_ldu_s2
:
21362 case NVPTX::BI__nvvm_ldu_s4
:
21363 case NVPTX::BI__nvvm_ldu_i
:
21364 case NVPTX::BI__nvvm_ldu_i2
:
21365 case NVPTX::BI__nvvm_ldu_i4
:
21366 case NVPTX::BI__nvvm_ldu_l
:
21367 case NVPTX::BI__nvvm_ldu_l2
:
21368 case NVPTX::BI__nvvm_ldu_ll
:
21369 case NVPTX::BI__nvvm_ldu_ll2
:
21370 case NVPTX::BI__nvvm_ldu_uc
:
21371 case NVPTX::BI__nvvm_ldu_uc2
:
21372 case NVPTX::BI__nvvm_ldu_uc4
:
21373 case NVPTX::BI__nvvm_ldu_us
:
21374 case NVPTX::BI__nvvm_ldu_us2
:
21375 case NVPTX::BI__nvvm_ldu_us4
:
21376 case NVPTX::BI__nvvm_ldu_ui
:
21377 case NVPTX::BI__nvvm_ldu_ui2
:
21378 case NVPTX::BI__nvvm_ldu_ui4
:
21379 case NVPTX::BI__nvvm_ldu_ul
:
21380 case NVPTX::BI__nvvm_ldu_ul2
:
21381 case NVPTX::BI__nvvm_ldu_ull
:
21382 case NVPTX::BI__nvvm_ldu_ull2
:
21383 return MakeLdu(Intrinsic::nvvm_ldu_global_i
, *this, E
);
21384 case NVPTX::BI__nvvm_ldu_f
:
21385 case NVPTX::BI__nvvm_ldu_f2
:
21386 case NVPTX::BI__nvvm_ldu_f4
:
21387 case NVPTX::BI__nvvm_ldu_d
:
21388 case NVPTX::BI__nvvm_ldu_d2
:
21389 return MakeLdu(Intrinsic::nvvm_ldu_global_f
, *this, E
);
21391 case NVPTX::BI__nvvm_atom_cta_add_gen_i
:
21392 case NVPTX::BI__nvvm_atom_cta_add_gen_l
:
21393 case NVPTX::BI__nvvm_atom_cta_add_gen_ll
:
21394 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta
, *this, E
);
21395 case NVPTX::BI__nvvm_atom_sys_add_gen_i
:
21396 case NVPTX::BI__nvvm_atom_sys_add_gen_l
:
21397 case NVPTX::BI__nvvm_atom_sys_add_gen_ll
:
21398 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys
, *this, E
);
21399 case NVPTX::BI__nvvm_atom_cta_add_gen_f
:
21400 case NVPTX::BI__nvvm_atom_cta_add_gen_d
:
21401 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta
, *this, E
);
21402 case NVPTX::BI__nvvm_atom_sys_add_gen_f
:
21403 case NVPTX::BI__nvvm_atom_sys_add_gen_d
:
21404 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys
, *this, E
);
21405 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i
:
21406 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l
:
21407 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll
:
21408 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta
, *this, E
);
21409 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i
:
21410 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l
:
21411 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll
:
21412 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys
, *this, E
);
21413 case NVPTX::BI__nvvm_atom_cta_max_gen_i
:
21414 case NVPTX::BI__nvvm_atom_cta_max_gen_ui
:
21415 case NVPTX::BI__nvvm_atom_cta_max_gen_l
:
21416 case NVPTX::BI__nvvm_atom_cta_max_gen_ul
:
21417 case NVPTX::BI__nvvm_atom_cta_max_gen_ll
:
21418 case NVPTX::BI__nvvm_atom_cta_max_gen_ull
:
21419 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta
, *this, E
);
21420 case NVPTX::BI__nvvm_atom_sys_max_gen_i
:
21421 case NVPTX::BI__nvvm_atom_sys_max_gen_ui
:
21422 case NVPTX::BI__nvvm_atom_sys_max_gen_l
:
21423 case NVPTX::BI__nvvm_atom_sys_max_gen_ul
:
21424 case NVPTX::BI__nvvm_atom_sys_max_gen_ll
:
21425 case NVPTX::BI__nvvm_atom_sys_max_gen_ull
:
21426 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys
, *this, E
);
21427 case NVPTX::BI__nvvm_atom_cta_min_gen_i
:
21428 case NVPTX::BI__nvvm_atom_cta_min_gen_ui
:
21429 case NVPTX::BI__nvvm_atom_cta_min_gen_l
:
21430 case NVPTX::BI__nvvm_atom_cta_min_gen_ul
:
21431 case NVPTX::BI__nvvm_atom_cta_min_gen_ll
:
21432 case NVPTX::BI__nvvm_atom_cta_min_gen_ull
:
21433 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta
, *this, E
);
21434 case NVPTX::BI__nvvm_atom_sys_min_gen_i
:
21435 case NVPTX::BI__nvvm_atom_sys_min_gen_ui
:
21436 case NVPTX::BI__nvvm_atom_sys_min_gen_l
:
21437 case NVPTX::BI__nvvm_atom_sys_min_gen_ul
:
21438 case NVPTX::BI__nvvm_atom_sys_min_gen_ll
:
21439 case NVPTX::BI__nvvm_atom_sys_min_gen_ull
:
21440 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys
, *this, E
);
21441 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui
:
21442 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta
, *this, E
);
21443 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui
:
21444 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta
, *this, E
);
21445 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui
:
21446 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys
, *this, E
);
21447 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui
:
21448 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys
, *this, E
);
21449 case NVPTX::BI__nvvm_atom_cta_and_gen_i
:
21450 case NVPTX::BI__nvvm_atom_cta_and_gen_l
:
21451 case NVPTX::BI__nvvm_atom_cta_and_gen_ll
:
21452 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta
, *this, E
);
21453 case NVPTX::BI__nvvm_atom_sys_and_gen_i
:
21454 case NVPTX::BI__nvvm_atom_sys_and_gen_l
:
21455 case NVPTX::BI__nvvm_atom_sys_and_gen_ll
:
21456 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys
, *this, E
);
21457 case NVPTX::BI__nvvm_atom_cta_or_gen_i
:
21458 case NVPTX::BI__nvvm_atom_cta_or_gen_l
:
21459 case NVPTX::BI__nvvm_atom_cta_or_gen_ll
:
21460 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta
, *this, E
);
21461 case NVPTX::BI__nvvm_atom_sys_or_gen_i
:
21462 case NVPTX::BI__nvvm_atom_sys_or_gen_l
:
21463 case NVPTX::BI__nvvm_atom_sys_or_gen_ll
:
21464 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys
, *this, E
);
21465 case NVPTX::BI__nvvm_atom_cta_xor_gen_i
:
21466 case NVPTX::BI__nvvm_atom_cta_xor_gen_l
:
21467 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll
:
21468 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta
, *this, E
);
21469 case NVPTX::BI__nvvm_atom_sys_xor_gen_i
:
21470 case NVPTX::BI__nvvm_atom_sys_xor_gen_l
:
21471 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll
:
21472 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys
, *this, E
);
21473 case NVPTX::BI__nvvm_atom_cta_cas_gen_us
:
21474 case NVPTX::BI__nvvm_atom_cta_cas_gen_i
:
21475 case NVPTX::BI__nvvm_atom_cta_cas_gen_l
:
21476 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll
: {
21477 Value
*Ptr
= EmitScalarExpr(E
->getArg(0));
21478 llvm::Type
*ElemTy
=
21479 ConvertTypeForMem(E
->getArg(0)->getType()->getPointeeType());
21480 return Builder
.CreateCall(
21482 Intrinsic::nvvm_atomic_cas_gen_i_cta
, {ElemTy
, Ptr
->getType()}),
21483 {Ptr
, EmitScalarExpr(E
->getArg(1)), EmitScalarExpr(E
->getArg(2))});
21485 case NVPTX::BI__nvvm_atom_sys_cas_gen_us
:
21486 case NVPTX::BI__nvvm_atom_sys_cas_gen_i
:
21487 case NVPTX::BI__nvvm_atom_sys_cas_gen_l
:
21488 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll
: {
21489 Value
*Ptr
= EmitScalarExpr(E
->getArg(0));
21490 llvm::Type
*ElemTy
=
21491 ConvertTypeForMem(E
->getArg(0)->getType()->getPointeeType());
21492 return Builder
.CreateCall(
21494 Intrinsic::nvvm_atomic_cas_gen_i_sys
, {ElemTy
, Ptr
->getType()}),
21495 {Ptr
, EmitScalarExpr(E
->getArg(1)), EmitScalarExpr(E
->getArg(2))});
21497 case NVPTX::BI__nvvm_match_all_sync_i32p
:
21498 case NVPTX::BI__nvvm_match_all_sync_i64p
: {
21499 Value
*Mask
= EmitScalarExpr(E
->getArg(0));
21500 Value
*Val
= EmitScalarExpr(E
->getArg(1));
21501 Address PredOutPtr
= EmitPointerWithAlignment(E
->getArg(2));
21502 Value
*ResultPair
= Builder
.CreateCall(
21503 CGM
.getIntrinsic(BuiltinID
== NVPTX::BI__nvvm_match_all_sync_i32p
21504 ? Intrinsic::nvvm_match_all_sync_i32p
21505 : Intrinsic::nvvm_match_all_sync_i64p
),
21507 Value
*Pred
= Builder
.CreateZExt(Builder
.CreateExtractValue(ResultPair
, 1),
21508 PredOutPtr
.getElementType());
21509 Builder
.CreateStore(Pred
, PredOutPtr
);
21510 return Builder
.CreateExtractValue(ResultPair
, 0);
21514 case NVPTX::BI__hmma_m16n16k16_ld_a
:
21515 case NVPTX::BI__hmma_m16n16k16_ld_b
:
21516 case NVPTX::BI__hmma_m16n16k16_ld_c_f16
:
21517 case NVPTX::BI__hmma_m16n16k16_ld_c_f32
:
21518 case NVPTX::BI__hmma_m32n8k16_ld_a
:
21519 case NVPTX::BI__hmma_m32n8k16_ld_b
:
21520 case NVPTX::BI__hmma_m32n8k16_ld_c_f16
:
21521 case NVPTX::BI__hmma_m32n8k16_ld_c_f32
:
21522 case NVPTX::BI__hmma_m8n32k16_ld_a
:
21523 case NVPTX::BI__hmma_m8n32k16_ld_b
:
21524 case NVPTX::BI__hmma_m8n32k16_ld_c_f16
:
21525 case NVPTX::BI__hmma_m8n32k16_ld_c_f32
:
21526 // Integer MMA loads.
21527 case NVPTX::BI__imma_m16n16k16_ld_a_s8
:
21528 case NVPTX::BI__imma_m16n16k16_ld_a_u8
:
21529 case NVPTX::BI__imma_m16n16k16_ld_b_s8
:
21530 case NVPTX::BI__imma_m16n16k16_ld_b_u8
:
21531 case NVPTX::BI__imma_m16n16k16_ld_c
:
21532 case NVPTX::BI__imma_m32n8k16_ld_a_s8
:
21533 case NVPTX::BI__imma_m32n8k16_ld_a_u8
:
21534 case NVPTX::BI__imma_m32n8k16_ld_b_s8
:
21535 case NVPTX::BI__imma_m32n8k16_ld_b_u8
:
21536 case NVPTX::BI__imma_m32n8k16_ld_c
:
21537 case NVPTX::BI__imma_m8n32k16_ld_a_s8
:
21538 case NVPTX::BI__imma_m8n32k16_ld_a_u8
:
21539 case NVPTX::BI__imma_m8n32k16_ld_b_s8
:
21540 case NVPTX::BI__imma_m8n32k16_ld_b_u8
:
21541 case NVPTX::BI__imma_m8n32k16_ld_c
:
21542 // Sub-integer MMA loads.
21543 case NVPTX::BI__imma_m8n8k32_ld_a_s4
:
21544 case NVPTX::BI__imma_m8n8k32_ld_a_u4
:
21545 case NVPTX::BI__imma_m8n8k32_ld_b_s4
:
21546 case NVPTX::BI__imma_m8n8k32_ld_b_u4
:
21547 case NVPTX::BI__imma_m8n8k32_ld_c
:
21548 case NVPTX::BI__bmma_m8n8k128_ld_a_b1
:
21549 case NVPTX::BI__bmma_m8n8k128_ld_b_b1
:
21550 case NVPTX::BI__bmma_m8n8k128_ld_c
:
21551 // Double MMA loads.
21552 case NVPTX::BI__dmma_m8n8k4_ld_a
:
21553 case NVPTX::BI__dmma_m8n8k4_ld_b
:
21554 case NVPTX::BI__dmma_m8n8k4_ld_c
:
21555 // Alternate float MMA loads.
21556 case NVPTX::BI__mma_bf16_m16n16k16_ld_a
:
21557 case NVPTX::BI__mma_bf16_m16n16k16_ld_b
:
21558 case NVPTX::BI__mma_bf16_m8n32k16_ld_a
:
21559 case NVPTX::BI__mma_bf16_m8n32k16_ld_b
:
21560 case NVPTX::BI__mma_bf16_m32n8k16_ld_a
:
21561 case NVPTX::BI__mma_bf16_m32n8k16_ld_b
:
21562 case NVPTX::BI__mma_tf32_m16n16k8_ld_a
:
21563 case NVPTX::BI__mma_tf32_m16n16k8_ld_b
:
21564 case NVPTX::BI__mma_tf32_m16n16k8_ld_c
: {
21565 Address Dst
= EmitPointerWithAlignment(E
->getArg(0));
21566 Value
*Src
= EmitScalarExpr(E
->getArg(1));
21567 Value
*Ldm
= EmitScalarExpr(E
->getArg(2));
21568 std::optional
<llvm::APSInt
> isColMajorArg
=
21569 E
->getArg(3)->getIntegerConstantExpr(getContext());
21570 if (!isColMajorArg
)
21572 bool isColMajor
= isColMajorArg
->getSExtValue();
21573 NVPTXMmaLdstInfo II
= getNVPTXMmaLdstInfo(BuiltinID
);
21574 unsigned IID
= isColMajor
? II
.IID_col
: II
.IID_row
;
21579 Builder
.CreateCall(CGM
.getIntrinsic(IID
, Src
->getType()), {Src
, Ldm
});
21581 // Save returned values.
21582 assert(II
.NumResults
);
21583 if (II
.NumResults
== 1) {
21584 Builder
.CreateAlignedStore(Result
, Dst
.emitRawPointer(*this),
21585 CharUnits::fromQuantity(4));
21587 for (unsigned i
= 0; i
< II
.NumResults
; ++i
) {
21588 Builder
.CreateAlignedStore(
21589 Builder
.CreateBitCast(Builder
.CreateExtractValue(Result
, i
),
21590 Dst
.getElementType()),
21591 Builder
.CreateGEP(Dst
.getElementType(), Dst
.emitRawPointer(*this),
21592 llvm::ConstantInt::get(IntTy
, i
)),
21593 CharUnits::fromQuantity(4));
21599 case NVPTX::BI__hmma_m16n16k16_st_c_f16
:
21600 case NVPTX::BI__hmma_m16n16k16_st_c_f32
:
21601 case NVPTX::BI__hmma_m32n8k16_st_c_f16
:
21602 case NVPTX::BI__hmma_m32n8k16_st_c_f32
:
21603 case NVPTX::BI__hmma_m8n32k16_st_c_f16
:
21604 case NVPTX::BI__hmma_m8n32k16_st_c_f32
:
21605 case NVPTX::BI__imma_m16n16k16_st_c_i32
:
21606 case NVPTX::BI__imma_m32n8k16_st_c_i32
:
21607 case NVPTX::BI__imma_m8n32k16_st_c_i32
:
21608 case NVPTX::BI__imma_m8n8k32_st_c_i32
:
21609 case NVPTX::BI__bmma_m8n8k128_st_c_i32
:
21610 case NVPTX::BI__dmma_m8n8k4_st_c_f64
:
21611 case NVPTX::BI__mma_m16n16k8_st_c_f32
: {
21612 Value
*Dst
= EmitScalarExpr(E
->getArg(0));
21613 Address Src
= EmitPointerWithAlignment(E
->getArg(1));
21614 Value
*Ldm
= EmitScalarExpr(E
->getArg(2));
21615 std::optional
<llvm::APSInt
> isColMajorArg
=
21616 E
->getArg(3)->getIntegerConstantExpr(getContext());
21617 if (!isColMajorArg
)
21619 bool isColMajor
= isColMajorArg
->getSExtValue();
21620 NVPTXMmaLdstInfo II
= getNVPTXMmaLdstInfo(BuiltinID
);
21621 unsigned IID
= isColMajor
? II
.IID_col
: II
.IID_row
;
21624 Function
*Intrinsic
=
21625 CGM
.getIntrinsic(IID
, Dst
->getType());
21626 llvm::Type
*ParamType
= Intrinsic
->getFunctionType()->getParamType(1);
21627 SmallVector
<Value
*, 10> Values
= {Dst
};
21628 for (unsigned i
= 0; i
< II
.NumResults
; ++i
) {
21629 Value
*V
= Builder
.CreateAlignedLoad(
21630 Src
.getElementType(),
21631 Builder
.CreateGEP(Src
.getElementType(), Src
.emitRawPointer(*this),
21632 llvm::ConstantInt::get(IntTy
, i
)),
21633 CharUnits::fromQuantity(4));
21634 Values
.push_back(Builder
.CreateBitCast(V
, ParamType
));
21636 Values
.push_back(Ldm
);
21637 Value
*Result
= Builder
.CreateCall(Intrinsic
, Values
);
21641 // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
21642 // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
21643 case NVPTX::BI__hmma_m16n16k16_mma_f16f16
:
21644 case NVPTX::BI__hmma_m16n16k16_mma_f32f16
:
21645 case NVPTX::BI__hmma_m16n16k16_mma_f32f32
:
21646 case NVPTX::BI__hmma_m16n16k16_mma_f16f32
:
21647 case NVPTX::BI__hmma_m32n8k16_mma_f16f16
:
21648 case NVPTX::BI__hmma_m32n8k16_mma_f32f16
:
21649 case NVPTX::BI__hmma_m32n8k16_mma_f32f32
:
21650 case NVPTX::BI__hmma_m32n8k16_mma_f16f32
:
21651 case NVPTX::BI__hmma_m8n32k16_mma_f16f16
:
21652 case NVPTX::BI__hmma_m8n32k16_mma_f32f16
:
21653 case NVPTX::BI__hmma_m8n32k16_mma_f32f32
:
21654 case NVPTX::BI__hmma_m8n32k16_mma_f16f32
:
21655 case NVPTX::BI__imma_m16n16k16_mma_s8
:
21656 case NVPTX::BI__imma_m16n16k16_mma_u8
:
21657 case NVPTX::BI__imma_m32n8k16_mma_s8
:
21658 case NVPTX::BI__imma_m32n8k16_mma_u8
:
21659 case NVPTX::BI__imma_m8n32k16_mma_s8
:
21660 case NVPTX::BI__imma_m8n32k16_mma_u8
:
21661 case NVPTX::BI__imma_m8n8k32_mma_s4
:
21662 case NVPTX::BI__imma_m8n8k32_mma_u4
:
21663 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1
:
21664 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1
:
21665 case NVPTX::BI__dmma_m8n8k4_mma_f64
:
21666 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32
:
21667 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32
:
21668 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32
:
21669 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32
: {
21670 Address Dst
= EmitPointerWithAlignment(E
->getArg(0));
21671 Address SrcA
= EmitPointerWithAlignment(E
->getArg(1));
21672 Address SrcB
= EmitPointerWithAlignment(E
->getArg(2));
21673 Address SrcC
= EmitPointerWithAlignment(E
->getArg(3));
21674 std::optional
<llvm::APSInt
> LayoutArg
=
21675 E
->getArg(4)->getIntegerConstantExpr(getContext());
21678 int Layout
= LayoutArg
->getSExtValue();
21679 if (Layout
< 0 || Layout
> 3)
21681 llvm::APSInt SatfArg
;
21682 if (BuiltinID
== NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1
||
21683 BuiltinID
== NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1
)
21684 SatfArg
= 0; // .b1 does not have satf argument.
21685 else if (std::optional
<llvm::APSInt
> OptSatfArg
=
21686 E
->getArg(5)->getIntegerConstantExpr(getContext()))
21687 SatfArg
= *OptSatfArg
;
21690 bool Satf
= SatfArg
.getSExtValue();
21691 NVPTXMmaInfo MI
= getNVPTXMmaInfo(BuiltinID
);
21692 unsigned IID
= MI
.getMMAIntrinsic(Layout
, Satf
);
21693 if (IID
== 0) // Unsupported combination of Layout/Satf.
21696 SmallVector
<Value
*, 24> Values
;
21697 Function
*Intrinsic
= CGM
.getIntrinsic(IID
);
21698 llvm::Type
*AType
= Intrinsic
->getFunctionType()->getParamType(0);
21700 for (unsigned i
= 0; i
< MI
.NumEltsA
; ++i
) {
21701 Value
*V
= Builder
.CreateAlignedLoad(
21702 SrcA
.getElementType(),
21703 Builder
.CreateGEP(SrcA
.getElementType(), SrcA
.emitRawPointer(*this),
21704 llvm::ConstantInt::get(IntTy
, i
)),
21705 CharUnits::fromQuantity(4));
21706 Values
.push_back(Builder
.CreateBitCast(V
, AType
));
21709 llvm::Type
*BType
= Intrinsic
->getFunctionType()->getParamType(MI
.NumEltsA
);
21710 for (unsigned i
= 0; i
< MI
.NumEltsB
; ++i
) {
21711 Value
*V
= Builder
.CreateAlignedLoad(
21712 SrcB
.getElementType(),
21713 Builder
.CreateGEP(SrcB
.getElementType(), SrcB
.emitRawPointer(*this),
21714 llvm::ConstantInt::get(IntTy
, i
)),
21715 CharUnits::fromQuantity(4));
21716 Values
.push_back(Builder
.CreateBitCast(V
, BType
));
21719 llvm::Type
*CType
=
21720 Intrinsic
->getFunctionType()->getParamType(MI
.NumEltsA
+ MI
.NumEltsB
);
21721 for (unsigned i
= 0; i
< MI
.NumEltsC
; ++i
) {
21722 Value
*V
= Builder
.CreateAlignedLoad(
21723 SrcC
.getElementType(),
21724 Builder
.CreateGEP(SrcC
.getElementType(), SrcC
.emitRawPointer(*this),
21725 llvm::ConstantInt::get(IntTy
, i
)),
21726 CharUnits::fromQuantity(4));
21727 Values
.push_back(Builder
.CreateBitCast(V
, CType
));
21729 Value
*Result
= Builder
.CreateCall(Intrinsic
, Values
);
21730 llvm::Type
*DType
= Dst
.getElementType();
21731 for (unsigned i
= 0; i
< MI
.NumEltsD
; ++i
)
21732 Builder
.CreateAlignedStore(
21733 Builder
.CreateBitCast(Builder
.CreateExtractValue(Result
, i
), DType
),
21734 Builder
.CreateGEP(Dst
.getElementType(), Dst
.emitRawPointer(*this),
21735 llvm::ConstantInt::get(IntTy
, i
)),
21736 CharUnits::fromQuantity(4));
21739 // The following builtins require half type support
21740 case NVPTX::BI__nvvm_ex2_approx_f16
:
21741 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16
, BuiltinID
, E
, *this);
21742 case NVPTX::BI__nvvm_ex2_approx_f16x2
:
21743 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2
, BuiltinID
, E
, *this);
21744 case NVPTX::BI__nvvm_ff2f16x2_rn
:
21745 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn
, BuiltinID
, E
, *this);
21746 case NVPTX::BI__nvvm_ff2f16x2_rn_relu
:
21747 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu
, BuiltinID
, E
, *this);
21748 case NVPTX::BI__nvvm_ff2f16x2_rz
:
21749 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz
, BuiltinID
, E
, *this);
21750 case NVPTX::BI__nvvm_ff2f16x2_rz_relu
:
21751 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu
, BuiltinID
, E
, *this);
21752 case NVPTX::BI__nvvm_fma_rn_f16
:
21753 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16
, BuiltinID
, E
, *this);
21754 case NVPTX::BI__nvvm_fma_rn_f16x2
:
21755 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2
, BuiltinID
, E
, *this);
21756 case NVPTX::BI__nvvm_fma_rn_ftz_f16
:
21757 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16
, BuiltinID
, E
, *this);
21758 case NVPTX::BI__nvvm_fma_rn_ftz_f16x2
:
21759 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2
, BuiltinID
, E
, *this);
21760 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16
:
21761 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16
, BuiltinID
, E
,
21763 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2
:
21764 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2
, BuiltinID
, E
,
21766 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16
:
21767 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16
, BuiltinID
, E
,
21769 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2
:
21770 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2
, BuiltinID
, E
,
21772 case NVPTX::BI__nvvm_fma_rn_relu_f16
:
21773 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16
, BuiltinID
, E
, *this);
21774 case NVPTX::BI__nvvm_fma_rn_relu_f16x2
:
21775 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2
, BuiltinID
, E
, *this);
21776 case NVPTX::BI__nvvm_fma_rn_sat_f16
:
21777 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16
, BuiltinID
, E
, *this);
21778 case NVPTX::BI__nvvm_fma_rn_sat_f16x2
:
21779 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2
, BuiltinID
, E
, *this);
21780 case NVPTX::BI__nvvm_fmax_f16
:
21781 return MakeHalfType(Intrinsic::nvvm_fmax_f16
, BuiltinID
, E
, *this);
21782 case NVPTX::BI__nvvm_fmax_f16x2
:
21783 return MakeHalfType(Intrinsic::nvvm_fmax_f16x2
, BuiltinID
, E
, *this);
21784 case NVPTX::BI__nvvm_fmax_ftz_f16
:
21785 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16
, BuiltinID
, E
, *this);
21786 case NVPTX::BI__nvvm_fmax_ftz_f16x2
:
21787 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2
, BuiltinID
, E
, *this);
21788 case NVPTX::BI__nvvm_fmax_ftz_nan_f16
:
21789 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16
, BuiltinID
, E
, *this);
21790 case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2
:
21791 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2
, BuiltinID
, E
,
21793 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16
:
21794 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16
, BuiltinID
,
21796 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2
:
21797 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2
,
21798 BuiltinID
, E
, *this);
21799 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16
:
21800 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16
, BuiltinID
, E
,
21802 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2
:
21803 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2
, BuiltinID
,
21805 case NVPTX::BI__nvvm_fmax_nan_f16
:
21806 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16
, BuiltinID
, E
, *this);
21807 case NVPTX::BI__nvvm_fmax_nan_f16x2
:
21808 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2
, BuiltinID
, E
, *this);
21809 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16
:
21810 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16
, BuiltinID
, E
,
21812 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2
:
21813 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2
, BuiltinID
,
21815 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16
:
21816 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16
, BuiltinID
, E
,
21818 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2
:
21819 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2
, BuiltinID
, E
,
21821 case NVPTX::BI__nvvm_fmin_f16
:
21822 return MakeHalfType(Intrinsic::nvvm_fmin_f16
, BuiltinID
, E
, *this);
21823 case NVPTX::BI__nvvm_fmin_f16x2
:
21824 return MakeHalfType(Intrinsic::nvvm_fmin_f16x2
, BuiltinID
, E
, *this);
21825 case NVPTX::BI__nvvm_fmin_ftz_f16
:
21826 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16
, BuiltinID
, E
, *this);
21827 case NVPTX::BI__nvvm_fmin_ftz_f16x2
:
21828 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2
, BuiltinID
, E
, *this);
21829 case NVPTX::BI__nvvm_fmin_ftz_nan_f16
:
21830 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16
, BuiltinID
, E
, *this);
21831 case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2
:
21832 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2
, BuiltinID
, E
,
21834 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16
:
21835 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16
, BuiltinID
,
21837 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2
:
21838 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2
,
21839 BuiltinID
, E
, *this);
21840 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16
:
21841 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16
, BuiltinID
, E
,
21843 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2
:
21844 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2
, BuiltinID
,
21846 case NVPTX::BI__nvvm_fmin_nan_f16
:
21847 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16
, BuiltinID
, E
, *this);
21848 case NVPTX::BI__nvvm_fmin_nan_f16x2
:
21849 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2
, BuiltinID
, E
, *this);
21850 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16
:
21851 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16
, BuiltinID
, E
,
21853 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2
:
21854 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2
, BuiltinID
,
21856 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16
:
21857 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16
, BuiltinID
, E
,
21859 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2
:
21860 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2
, BuiltinID
, E
,
21862 case NVPTX::BI__nvvm_ldg_h
:
21863 case NVPTX::BI__nvvm_ldg_h2
:
21864 return MakeHalfType(Intrinsic::not_intrinsic
, BuiltinID
, E
, *this);
21865 case NVPTX::BI__nvvm_ldu_h
:
21866 case NVPTX::BI__nvvm_ldu_h2
:
21867 return MakeHalfType(Intrinsic::nvvm_ldu_global_f
, BuiltinID
, E
, *this);
21868 case NVPTX::BI__nvvm_cp_async_ca_shared_global_4
:
21869 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4
,
21870 Intrinsic::nvvm_cp_async_ca_shared_global_4_s
, *this, E
,
21872 case NVPTX::BI__nvvm_cp_async_ca_shared_global_8
:
21873 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8
,
21874 Intrinsic::nvvm_cp_async_ca_shared_global_8_s
, *this, E
,
21876 case NVPTX::BI__nvvm_cp_async_ca_shared_global_16
:
21877 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16
,
21878 Intrinsic::nvvm_cp_async_ca_shared_global_16_s
, *this, E
,
21880 case NVPTX::BI__nvvm_cp_async_cg_shared_global_16
:
21881 return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16
,
21882 Intrinsic::nvvm_cp_async_cg_shared_global_16_s
, *this, E
,
21884 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x
:
21885 return Builder
.CreateCall(
21886 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x
));
21887 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y
:
21888 return Builder
.CreateCall(
21889 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y
));
21890 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z
:
21891 return Builder
.CreateCall(
21892 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z
));
21893 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w
:
21894 return Builder
.CreateCall(
21895 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w
));
21896 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x
:
21897 return Builder
.CreateCall(
21898 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x
));
21899 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y
:
21900 return Builder
.CreateCall(
21901 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y
));
21902 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z
:
21903 return Builder
.CreateCall(
21904 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z
));
21905 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w
:
21906 return Builder
.CreateCall(
21907 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w
));
21908 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x
:
21909 return Builder
.CreateCall(
21910 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x
));
21911 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y
:
21912 return Builder
.CreateCall(
21913 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y
));
21914 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z
:
21915 return Builder
.CreateCall(
21916 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z
));
21917 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w
:
21918 return Builder
.CreateCall(
21919 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w
));
21920 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x
:
21921 return Builder
.CreateCall(
21922 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x
));
21923 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y
:
21924 return Builder
.CreateCall(
21925 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y
));
21926 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z
:
21927 return Builder
.CreateCall(
21928 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z
));
21929 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w
:
21930 return Builder
.CreateCall(
21931 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w
));
21932 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank
:
21933 return Builder
.CreateCall(
21934 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank
));
21935 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank
:
21936 return Builder
.CreateCall(
21937 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank
));
21938 case NVPTX::BI__nvvm_is_explicit_cluster
:
21939 return Builder
.CreateCall(
21940 CGM
.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster
));
21941 case NVPTX::BI__nvvm_isspacep_shared_cluster
:
21942 return Builder
.CreateCall(
21943 CGM
.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster
),
21944 EmitScalarExpr(E
->getArg(0)));
21945 case NVPTX::BI__nvvm_mapa
:
21946 return Builder
.CreateCall(
21947 CGM
.getIntrinsic(Intrinsic::nvvm_mapa
),
21948 {EmitScalarExpr(E
->getArg(0)), EmitScalarExpr(E
->getArg(1))});
21949 case NVPTX::BI__nvvm_mapa_shared_cluster
:
21950 return Builder
.CreateCall(
21951 CGM
.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster
),
21952 {EmitScalarExpr(E
->getArg(0)), EmitScalarExpr(E
->getArg(1))});
21953 case NVPTX::BI__nvvm_getctarank
:
21954 return Builder
.CreateCall(
21955 CGM
.getIntrinsic(Intrinsic::nvvm_getctarank
),
21956 EmitScalarExpr(E
->getArg(0)));
21957 case NVPTX::BI__nvvm_getctarank_shared_cluster
:
21958 return Builder
.CreateCall(
21959 CGM
.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster
),
21960 EmitScalarExpr(E
->getArg(0)));
21961 case NVPTX::BI__nvvm_barrier_cluster_arrive
:
21962 return Builder
.CreateCall(
21963 CGM
.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive
));
21964 case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed
:
21965 return Builder
.CreateCall(
21966 CGM
.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed
));
21967 case NVPTX::BI__nvvm_barrier_cluster_wait
:
21968 return Builder
.CreateCall(
21969 CGM
.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait
));
21970 case NVPTX::BI__nvvm_fence_sc_cluster
:
21971 return Builder
.CreateCall(
21972 CGM
.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster
));
21979 struct BuiltinAlignArgs
{
21980 llvm::Value
*Src
= nullptr;
21981 llvm::Type
*SrcType
= nullptr;
21982 llvm::Value
*Alignment
= nullptr;
21983 llvm::Value
*Mask
= nullptr;
21984 llvm::IntegerType
*IntType
= nullptr;
21986 BuiltinAlignArgs(const CallExpr
*E
, CodeGenFunction
&CGF
) {
21987 QualType AstType
= E
->getArg(0)->getType();
21988 if (AstType
->isArrayType())
21989 Src
= CGF
.EmitArrayToPointerDecay(E
->getArg(0)).emitRawPointer(CGF
);
21991 Src
= CGF
.EmitScalarExpr(E
->getArg(0));
21992 SrcType
= Src
->getType();
21993 if (SrcType
->isPointerTy()) {
21994 IntType
= IntegerType::get(
21995 CGF
.getLLVMContext(),
21996 CGF
.CGM
.getDataLayout().getIndexTypeSizeInBits(SrcType
));
21998 assert(SrcType
->isIntegerTy());
21999 IntType
= cast
<llvm::IntegerType
>(SrcType
);
22001 Alignment
= CGF
.EmitScalarExpr(E
->getArg(1));
22002 Alignment
= CGF
.Builder
.CreateZExtOrTrunc(Alignment
, IntType
, "alignment");
22003 auto *One
= llvm::ConstantInt::get(IntType
, 1);
22004 Mask
= CGF
.Builder
.CreateSub(Alignment
, One
, "mask");
22009 /// Generate (x & (y-1)) == 0.
22010 RValue
CodeGenFunction::EmitBuiltinIsAligned(const CallExpr
*E
) {
22011 BuiltinAlignArgs
Args(E
, *this);
22012 llvm::Value
*SrcAddress
= Args
.Src
;
22013 if (Args
.SrcType
->isPointerTy())
22015 Builder
.CreateBitOrPointerCast(Args
.Src
, Args
.IntType
, "src_addr");
22016 return RValue::get(Builder
.CreateICmpEQ(
22017 Builder
.CreateAnd(SrcAddress
, Args
.Mask
, "set_bits"),
22018 llvm::Constant::getNullValue(Args
.IntType
), "is_aligned"));
22021 /// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
22022 /// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
22023 /// llvm.ptrmask intrinsic (with a GEP before in the align_up case).
22024 RValue
CodeGenFunction::EmitBuiltinAlignTo(const CallExpr
*E
, bool AlignUp
) {
22025 BuiltinAlignArgs
Args(E
, *this);
22026 llvm::Value
*SrcForMask
= Args
.Src
;
22028 // When aligning up we have to first add the mask to ensure we go over the
22029 // next alignment value and then align down to the next valid multiple.
22030 // By adding the mask, we ensure that align_up on an already aligned
22031 // value will not change the value.
22032 if (Args
.Src
->getType()->isPointerTy()) {
22033 if (getLangOpts().isSignedOverflowDefined())
22035 Builder
.CreateGEP(Int8Ty
, SrcForMask
, Args
.Mask
, "over_boundary");
22037 SrcForMask
= EmitCheckedInBoundsGEP(Int8Ty
, SrcForMask
, Args
.Mask
,
22038 /*SignedIndices=*/true,
22039 /*isSubtraction=*/false,
22040 E
->getExprLoc(), "over_boundary");
22042 SrcForMask
= Builder
.CreateAdd(SrcForMask
, Args
.Mask
, "over_boundary");
22045 // Invert the mask to only clear the lower bits.
22046 llvm::Value
*InvertedMask
= Builder
.CreateNot(Args
.Mask
, "inverted_mask");
22047 llvm::Value
*Result
= nullptr;
22048 if (Args
.Src
->getType()->isPointerTy()) {
22049 Result
= Builder
.CreateIntrinsic(
22050 Intrinsic::ptrmask
, {Args
.SrcType
, Args
.IntType
},
22051 {SrcForMask
, InvertedMask
}, nullptr, "aligned_result");
22053 Result
= Builder
.CreateAnd(SrcForMask
, InvertedMask
, "aligned_result");
22055 assert(Result
->getType() == Args
.SrcType
);
22056 return RValue::get(Result
);
22059 Value
*CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID
,
22060 const CallExpr
*E
) {
22061 switch (BuiltinID
) {
22062 case WebAssembly::BI__builtin_wasm_memory_size
: {
22063 llvm::Type
*ResultType
= ConvertType(E
->getType());
22064 Value
*I
= EmitScalarExpr(E
->getArg(0));
22066 CGM
.getIntrinsic(Intrinsic::wasm_memory_size
, ResultType
);
22067 return Builder
.CreateCall(Callee
, I
);
22069 case WebAssembly::BI__builtin_wasm_memory_grow
: {
22070 llvm::Type
*ResultType
= ConvertType(E
->getType());
22071 Value
*Args
[] = {EmitScalarExpr(E
->getArg(0)),
22072 EmitScalarExpr(E
->getArg(1))};
22074 CGM
.getIntrinsic(Intrinsic::wasm_memory_grow
, ResultType
);
22075 return Builder
.CreateCall(Callee
, Args
);
22077 case WebAssembly::BI__builtin_wasm_tls_size
: {
22078 llvm::Type
*ResultType
= ConvertType(E
->getType());
22079 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_tls_size
, ResultType
);
22080 return Builder
.CreateCall(Callee
);
22082 case WebAssembly::BI__builtin_wasm_tls_align
: {
22083 llvm::Type
*ResultType
= ConvertType(E
->getType());
22084 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_tls_align
, ResultType
);
22085 return Builder
.CreateCall(Callee
);
22087 case WebAssembly::BI__builtin_wasm_tls_base
: {
22088 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_tls_base
);
22089 return Builder
.CreateCall(Callee
);
22091 case WebAssembly::BI__builtin_wasm_throw
: {
22092 Value
*Tag
= EmitScalarExpr(E
->getArg(0));
22093 Value
*Obj
= EmitScalarExpr(E
->getArg(1));
22094 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_throw
);
22095 return Builder
.CreateCall(Callee
, {Tag
, Obj
});
22097 case WebAssembly::BI__builtin_wasm_rethrow
: {
22098 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_rethrow
);
22099 return Builder
.CreateCall(Callee
);
22101 case WebAssembly::BI__builtin_wasm_memory_atomic_wait32
: {
22102 Value
*Addr
= EmitScalarExpr(E
->getArg(0));
22103 Value
*Expected
= EmitScalarExpr(E
->getArg(1));
22104 Value
*Timeout
= EmitScalarExpr(E
->getArg(2));
22105 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32
);
22106 return Builder
.CreateCall(Callee
, {Addr
, Expected
, Timeout
});
22108 case WebAssembly::BI__builtin_wasm_memory_atomic_wait64
: {
22109 Value
*Addr
= EmitScalarExpr(E
->getArg(0));
22110 Value
*Expected
= EmitScalarExpr(E
->getArg(1));
22111 Value
*Timeout
= EmitScalarExpr(E
->getArg(2));
22112 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64
);
22113 return Builder
.CreateCall(Callee
, {Addr
, Expected
, Timeout
});
22115 case WebAssembly::BI__builtin_wasm_memory_atomic_notify
: {
22116 Value
*Addr
= EmitScalarExpr(E
->getArg(0));
22117 Value
*Count
= EmitScalarExpr(E
->getArg(1));
22118 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_memory_atomic_notify
);
22119 return Builder
.CreateCall(Callee
, {Addr
, Count
});
22121 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32
:
22122 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64
:
22123 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32
:
22124 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64
: {
22125 Value
*Src
= EmitScalarExpr(E
->getArg(0));
22126 llvm::Type
*ResT
= ConvertType(E
->getType());
22128 CGM
.getIntrinsic(Intrinsic::wasm_trunc_signed
, {ResT
, Src
->getType()});
22129 return Builder
.CreateCall(Callee
, {Src
});
22131 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32
:
22132 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64
:
22133 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32
:
22134 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64
: {
22135 Value
*Src
= EmitScalarExpr(E
->getArg(0));
22136 llvm::Type
*ResT
= ConvertType(E
->getType());
22137 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_trunc_unsigned
,
22138 {ResT
, Src
->getType()});
22139 return Builder
.CreateCall(Callee
, {Src
});
22141 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32
:
22142 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64
:
22143 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32
:
22144 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64
:
22145 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i16x8_f16x8
:
22146 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4
: {
22147 Value
*Src
= EmitScalarExpr(E
->getArg(0));
22148 llvm::Type
*ResT
= ConvertType(E
->getType());
22150 CGM
.getIntrinsic(Intrinsic::fptosi_sat
, {ResT
, Src
->getType()});
22151 return Builder
.CreateCall(Callee
, {Src
});
22153 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32
:
22154 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64
:
22155 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32
:
22156 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64
:
22157 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i16x8_f16x8
:
22158 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4
: {
22159 Value
*Src
= EmitScalarExpr(E
->getArg(0));
22160 llvm::Type
*ResT
= ConvertType(E
->getType());
22162 CGM
.getIntrinsic(Intrinsic::fptoui_sat
, {ResT
, Src
->getType()});
22163 return Builder
.CreateCall(Callee
, {Src
});
22165 case WebAssembly::BI__builtin_wasm_min_f32
:
22166 case WebAssembly::BI__builtin_wasm_min_f64
:
22167 case WebAssembly::BI__builtin_wasm_min_f16x8
:
22168 case WebAssembly::BI__builtin_wasm_min_f32x4
:
22169 case WebAssembly::BI__builtin_wasm_min_f64x2
: {
22170 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
22171 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
22173 CGM
.getIntrinsic(Intrinsic::minimum
, ConvertType(E
->getType()));
22174 return Builder
.CreateCall(Callee
, {LHS
, RHS
});
22176 case WebAssembly::BI__builtin_wasm_max_f32
:
22177 case WebAssembly::BI__builtin_wasm_max_f64
:
22178 case WebAssembly::BI__builtin_wasm_max_f16x8
:
22179 case WebAssembly::BI__builtin_wasm_max_f32x4
:
22180 case WebAssembly::BI__builtin_wasm_max_f64x2
: {
22181 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
22182 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
22184 CGM
.getIntrinsic(Intrinsic::maximum
, ConvertType(E
->getType()));
22185 return Builder
.CreateCall(Callee
, {LHS
, RHS
});
22187 case WebAssembly::BI__builtin_wasm_pmin_f16x8
:
22188 case WebAssembly::BI__builtin_wasm_pmin_f32x4
:
22189 case WebAssembly::BI__builtin_wasm_pmin_f64x2
: {
22190 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
22191 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
22193 CGM
.getIntrinsic(Intrinsic::wasm_pmin
, ConvertType(E
->getType()));
22194 return Builder
.CreateCall(Callee
, {LHS
, RHS
});
22196 case WebAssembly::BI__builtin_wasm_pmax_f16x8
:
22197 case WebAssembly::BI__builtin_wasm_pmax_f32x4
:
22198 case WebAssembly::BI__builtin_wasm_pmax_f64x2
: {
22199 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
22200 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
22202 CGM
.getIntrinsic(Intrinsic::wasm_pmax
, ConvertType(E
->getType()));
22203 return Builder
.CreateCall(Callee
, {LHS
, RHS
});
22205 case WebAssembly::BI__builtin_wasm_ceil_f16x8
:
22206 case WebAssembly::BI__builtin_wasm_floor_f16x8
:
22207 case WebAssembly::BI__builtin_wasm_trunc_f16x8
:
22208 case WebAssembly::BI__builtin_wasm_nearest_f16x8
:
22209 case WebAssembly::BI__builtin_wasm_ceil_f32x4
:
22210 case WebAssembly::BI__builtin_wasm_floor_f32x4
:
22211 case WebAssembly::BI__builtin_wasm_trunc_f32x4
:
22212 case WebAssembly::BI__builtin_wasm_nearest_f32x4
:
22213 case WebAssembly::BI__builtin_wasm_ceil_f64x2
:
22214 case WebAssembly::BI__builtin_wasm_floor_f64x2
:
22215 case WebAssembly::BI__builtin_wasm_trunc_f64x2
:
22216 case WebAssembly::BI__builtin_wasm_nearest_f64x2
: {
22218 switch (BuiltinID
) {
22219 case WebAssembly::BI__builtin_wasm_ceil_f16x8
:
22220 case WebAssembly::BI__builtin_wasm_ceil_f32x4
:
22221 case WebAssembly::BI__builtin_wasm_ceil_f64x2
:
22222 IntNo
= Intrinsic::ceil
;
22224 case WebAssembly::BI__builtin_wasm_floor_f16x8
:
22225 case WebAssembly::BI__builtin_wasm_floor_f32x4
:
22226 case WebAssembly::BI__builtin_wasm_floor_f64x2
:
22227 IntNo
= Intrinsic::floor
;
22229 case WebAssembly::BI__builtin_wasm_trunc_f16x8
:
22230 case WebAssembly::BI__builtin_wasm_trunc_f32x4
:
22231 case WebAssembly::BI__builtin_wasm_trunc_f64x2
:
22232 IntNo
= Intrinsic::trunc
;
22234 case WebAssembly::BI__builtin_wasm_nearest_f16x8
:
22235 case WebAssembly::BI__builtin_wasm_nearest_f32x4
:
22236 case WebAssembly::BI__builtin_wasm_nearest_f64x2
:
22237 IntNo
= Intrinsic::nearbyint
;
22240 llvm_unreachable("unexpected builtin ID");
22242 Value
*Value
= EmitScalarExpr(E
->getArg(0));
22243 Function
*Callee
= CGM
.getIntrinsic(IntNo
, ConvertType(E
->getType()));
22244 return Builder
.CreateCall(Callee
, Value
);
22246 case WebAssembly::BI__builtin_wasm_ref_null_extern
: {
22247 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_ref_null_extern
);
22248 return Builder
.CreateCall(Callee
);
22250 case WebAssembly::BI__builtin_wasm_ref_null_func
: {
22251 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_ref_null_func
);
22252 return Builder
.CreateCall(Callee
);
22254 case WebAssembly::BI__builtin_wasm_swizzle_i8x16
: {
22255 Value
*Src
= EmitScalarExpr(E
->getArg(0));
22256 Value
*Indices
= EmitScalarExpr(E
->getArg(1));
22257 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_swizzle
);
22258 return Builder
.CreateCall(Callee
, {Src
, Indices
});
22260 case WebAssembly::BI__builtin_wasm_abs_i8x16
:
22261 case WebAssembly::BI__builtin_wasm_abs_i16x8
:
22262 case WebAssembly::BI__builtin_wasm_abs_i32x4
:
22263 case WebAssembly::BI__builtin_wasm_abs_i64x2
: {
22264 Value
*Vec
= EmitScalarExpr(E
->getArg(0));
22265 Value
*Neg
= Builder
.CreateNeg(Vec
, "neg");
22266 Constant
*Zero
= llvm::Constant::getNullValue(Vec
->getType());
22267 Value
*ICmp
= Builder
.CreateICmpSLT(Vec
, Zero
, "abscond");
22268 return Builder
.CreateSelect(ICmp
, Neg
, Vec
, "abs");
22270 case WebAssembly::BI__builtin_wasm_avgr_u_i8x16
:
22271 case WebAssembly::BI__builtin_wasm_avgr_u_i16x8
: {
22272 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
22273 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
22274 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_avgr_unsigned
,
22275 ConvertType(E
->getType()));
22276 return Builder
.CreateCall(Callee
, {LHS
, RHS
});
22278 case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8
: {
22279 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
22280 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
22281 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed
);
22282 return Builder
.CreateCall(Callee
, {LHS
, RHS
});
22284 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8
:
22285 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8
:
22286 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4
:
22287 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4
: {
22288 Value
*Vec
= EmitScalarExpr(E
->getArg(0));
22290 switch (BuiltinID
) {
22291 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8
:
22292 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4
:
22293 IntNo
= Intrinsic::wasm_extadd_pairwise_signed
;
22295 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8
:
22296 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4
:
22297 IntNo
= Intrinsic::wasm_extadd_pairwise_unsigned
;
22300 llvm_unreachable("unexpected builtin ID");
22303 Function
*Callee
= CGM
.getIntrinsic(IntNo
, ConvertType(E
->getType()));
22304 return Builder
.CreateCall(Callee
, Vec
);
22306 case WebAssembly::BI__builtin_wasm_bitselect
: {
22307 Value
*V1
= EmitScalarExpr(E
->getArg(0));
22308 Value
*V2
= EmitScalarExpr(E
->getArg(1));
22309 Value
*C
= EmitScalarExpr(E
->getArg(2));
22311 CGM
.getIntrinsic(Intrinsic::wasm_bitselect
, ConvertType(E
->getType()));
22312 return Builder
.CreateCall(Callee
, {V1
, V2
, C
});
22314 case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8
: {
22315 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
22316 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
22317 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_dot
);
22318 return Builder
.CreateCall(Callee
, {LHS
, RHS
});
22320 case WebAssembly::BI__builtin_wasm_any_true_v128
:
22321 case WebAssembly::BI__builtin_wasm_all_true_i8x16
:
22322 case WebAssembly::BI__builtin_wasm_all_true_i16x8
:
22323 case WebAssembly::BI__builtin_wasm_all_true_i32x4
:
22324 case WebAssembly::BI__builtin_wasm_all_true_i64x2
: {
22326 switch (BuiltinID
) {
22327 case WebAssembly::BI__builtin_wasm_any_true_v128
:
22328 IntNo
= Intrinsic::wasm_anytrue
;
22330 case WebAssembly::BI__builtin_wasm_all_true_i8x16
:
22331 case WebAssembly::BI__builtin_wasm_all_true_i16x8
:
22332 case WebAssembly::BI__builtin_wasm_all_true_i32x4
:
22333 case WebAssembly::BI__builtin_wasm_all_true_i64x2
:
22334 IntNo
= Intrinsic::wasm_alltrue
;
22337 llvm_unreachable("unexpected builtin ID");
22339 Value
*Vec
= EmitScalarExpr(E
->getArg(0));
22340 Function
*Callee
= CGM
.getIntrinsic(IntNo
, Vec
->getType());
22341 return Builder
.CreateCall(Callee
, {Vec
});
22343 case WebAssembly::BI__builtin_wasm_bitmask_i8x16
:
22344 case WebAssembly::BI__builtin_wasm_bitmask_i16x8
:
22345 case WebAssembly::BI__builtin_wasm_bitmask_i32x4
:
22346 case WebAssembly::BI__builtin_wasm_bitmask_i64x2
: {
22347 Value
*Vec
= EmitScalarExpr(E
->getArg(0));
22349 CGM
.getIntrinsic(Intrinsic::wasm_bitmask
, Vec
->getType());
22350 return Builder
.CreateCall(Callee
, {Vec
});
22352 case WebAssembly::BI__builtin_wasm_abs_f16x8
:
22353 case WebAssembly::BI__builtin_wasm_abs_f32x4
:
22354 case WebAssembly::BI__builtin_wasm_abs_f64x2
: {
22355 Value
*Vec
= EmitScalarExpr(E
->getArg(0));
22356 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::fabs
, Vec
->getType());
22357 return Builder
.CreateCall(Callee
, {Vec
});
22359 case WebAssembly::BI__builtin_wasm_sqrt_f16x8
:
22360 case WebAssembly::BI__builtin_wasm_sqrt_f32x4
:
22361 case WebAssembly::BI__builtin_wasm_sqrt_f64x2
: {
22362 Value
*Vec
= EmitScalarExpr(E
->getArg(0));
22363 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::sqrt
, Vec
->getType());
22364 return Builder
.CreateCall(Callee
, {Vec
});
22366 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8
:
22367 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8
:
22368 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4
:
22369 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4
: {
22370 Value
*Low
= EmitScalarExpr(E
->getArg(0));
22371 Value
*High
= EmitScalarExpr(E
->getArg(1));
22373 switch (BuiltinID
) {
22374 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8
:
22375 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4
:
22376 IntNo
= Intrinsic::wasm_narrow_signed
;
22378 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8
:
22379 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4
:
22380 IntNo
= Intrinsic::wasm_narrow_unsigned
;
22383 llvm_unreachable("unexpected builtin ID");
22386 CGM
.getIntrinsic(IntNo
, {ConvertType(E
->getType()), Low
->getType()});
22387 return Builder
.CreateCall(Callee
, {Low
, High
});
22389 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4
:
22390 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4
: {
22391 Value
*Vec
= EmitScalarExpr(E
->getArg(0));
22393 switch (BuiltinID
) {
22394 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4
:
22395 IntNo
= Intrinsic::fptosi_sat
;
22397 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4
:
22398 IntNo
= Intrinsic::fptoui_sat
;
22401 llvm_unreachable("unexpected builtin ID");
22403 llvm::Type
*SrcT
= Vec
->getType();
22404 llvm::Type
*TruncT
= SrcT
->getWithNewType(Builder
.getInt32Ty());
22405 Function
*Callee
= CGM
.getIntrinsic(IntNo
, {TruncT
, SrcT
});
22406 Value
*Trunc
= Builder
.CreateCall(Callee
, Vec
);
22407 Value
*Splat
= Constant::getNullValue(TruncT
);
22408 return Builder
.CreateShuffleVector(Trunc
, Splat
, ArrayRef
<int>{0, 1, 2, 3});
22410 case WebAssembly::BI__builtin_wasm_shuffle_i8x16
: {
22413 Ops
[OpIdx
++] = EmitScalarExpr(E
->getArg(0));
22414 Ops
[OpIdx
++] = EmitScalarExpr(E
->getArg(1));
22415 while (OpIdx
< 18) {
22416 std::optional
<llvm::APSInt
> LaneConst
=
22417 E
->getArg(OpIdx
)->getIntegerConstantExpr(getContext());
22418 assert(LaneConst
&& "Constant arg isn't actually constant?");
22419 Ops
[OpIdx
++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst
);
22421 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_shuffle
);
22422 return Builder
.CreateCall(Callee
, Ops
);
22424 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8
:
22425 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8
:
22426 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4
:
22427 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4
:
22428 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2
:
22429 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2
: {
22430 Value
*A
= EmitScalarExpr(E
->getArg(0));
22431 Value
*B
= EmitScalarExpr(E
->getArg(1));
22432 Value
*C
= EmitScalarExpr(E
->getArg(2));
22434 switch (BuiltinID
) {
22435 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8
:
22436 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4
:
22437 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2
:
22438 IntNo
= Intrinsic::wasm_relaxed_madd
;
22440 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8
:
22441 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4
:
22442 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2
:
22443 IntNo
= Intrinsic::wasm_relaxed_nmadd
;
22446 llvm_unreachable("unexpected builtin ID");
22448 Function
*Callee
= CGM
.getIntrinsic(IntNo
, A
->getType());
22449 return Builder
.CreateCall(Callee
, {A
, B
, C
});
22451 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16
:
22452 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8
:
22453 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4
:
22454 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2
: {
22455 Value
*A
= EmitScalarExpr(E
->getArg(0));
22456 Value
*B
= EmitScalarExpr(E
->getArg(1));
22457 Value
*C
= EmitScalarExpr(E
->getArg(2));
22459 CGM
.getIntrinsic(Intrinsic::wasm_relaxed_laneselect
, A
->getType());
22460 return Builder
.CreateCall(Callee
, {A
, B
, C
});
22462 case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16
: {
22463 Value
*Src
= EmitScalarExpr(E
->getArg(0));
22464 Value
*Indices
= EmitScalarExpr(E
->getArg(1));
22465 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_relaxed_swizzle
);
22466 return Builder
.CreateCall(Callee
, {Src
, Indices
});
22468 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4
:
22469 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4
:
22470 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2
:
22471 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2
: {
22472 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
22473 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
22475 switch (BuiltinID
) {
22476 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4
:
22477 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2
:
22478 IntNo
= Intrinsic::wasm_relaxed_min
;
22480 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4
:
22481 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2
:
22482 IntNo
= Intrinsic::wasm_relaxed_max
;
22485 llvm_unreachable("unexpected builtin ID");
22487 Function
*Callee
= CGM
.getIntrinsic(IntNo
, LHS
->getType());
22488 return Builder
.CreateCall(Callee
, {LHS
, RHS
});
22490 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4
:
22491 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4
:
22492 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2
:
22493 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2
: {
22494 Value
*Vec
= EmitScalarExpr(E
->getArg(0));
22496 switch (BuiltinID
) {
22497 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4
:
22498 IntNo
= Intrinsic::wasm_relaxed_trunc_signed
;
22500 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4
:
22501 IntNo
= Intrinsic::wasm_relaxed_trunc_unsigned
;
22503 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2
:
22504 IntNo
= Intrinsic::wasm_relaxed_trunc_signed_zero
;
22506 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2
:
22507 IntNo
= Intrinsic::wasm_relaxed_trunc_unsigned_zero
;
22510 llvm_unreachable("unexpected builtin ID");
22512 Function
*Callee
= CGM
.getIntrinsic(IntNo
);
22513 return Builder
.CreateCall(Callee
, {Vec
});
22515 case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8
: {
22516 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
22517 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
22518 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed
);
22519 return Builder
.CreateCall(Callee
, {LHS
, RHS
});
22521 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8
: {
22522 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
22523 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
22525 CGM
.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed
);
22526 return Builder
.CreateCall(Callee
, {LHS
, RHS
});
22528 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4
: {
22529 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
22530 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
22531 Value
*Acc
= EmitScalarExpr(E
->getArg(2));
22533 CGM
.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed
);
22534 return Builder
.CreateCall(Callee
, {LHS
, RHS
, Acc
});
22536 case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4
: {
22537 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
22538 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
22539 Value
*Acc
= EmitScalarExpr(E
->getArg(2));
22541 CGM
.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32
);
22542 return Builder
.CreateCall(Callee
, {LHS
, RHS
, Acc
});
22544 case WebAssembly::BI__builtin_wasm_loadf16_f32
: {
22545 Value
*Addr
= EmitScalarExpr(E
->getArg(0));
22546 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_loadf16_f32
);
22547 return Builder
.CreateCall(Callee
, {Addr
});
22549 case WebAssembly::BI__builtin_wasm_storef16_f32
: {
22550 Value
*Val
= EmitScalarExpr(E
->getArg(0));
22551 Value
*Addr
= EmitScalarExpr(E
->getArg(1));
22552 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_storef16_f32
);
22553 return Builder
.CreateCall(Callee
, {Val
, Addr
});
22555 case WebAssembly::BI__builtin_wasm_splat_f16x8
: {
22556 Value
*Val
= EmitScalarExpr(E
->getArg(0));
22557 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_splat_f16x8
);
22558 return Builder
.CreateCall(Callee
, {Val
});
22560 case WebAssembly::BI__builtin_wasm_extract_lane_f16x8
: {
22561 Value
*Vector
= EmitScalarExpr(E
->getArg(0));
22562 Value
*Index
= EmitScalarExpr(E
->getArg(1));
22563 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_extract_lane_f16x8
);
22564 return Builder
.CreateCall(Callee
, {Vector
, Index
});
22566 case WebAssembly::BI__builtin_wasm_replace_lane_f16x8
: {
22567 Value
*Vector
= EmitScalarExpr(E
->getArg(0));
22568 Value
*Index
= EmitScalarExpr(E
->getArg(1));
22569 Value
*Val
= EmitScalarExpr(E
->getArg(2));
22570 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_replace_lane_f16x8
);
22571 return Builder
.CreateCall(Callee
, {Vector
, Index
, Val
});
22573 case WebAssembly::BI__builtin_wasm_table_get
: {
22574 assert(E
->getArg(0)->getType()->isArrayType());
22575 Value
*Table
= EmitArrayToPointerDecay(E
->getArg(0)).emitRawPointer(*this);
22576 Value
*Index
= EmitScalarExpr(E
->getArg(1));
22578 if (E
->getType().isWebAssemblyExternrefType())
22579 Callee
= CGM
.getIntrinsic(Intrinsic::wasm_table_get_externref
);
22580 else if (E
->getType().isWebAssemblyFuncrefType())
22581 Callee
= CGM
.getIntrinsic(Intrinsic::wasm_table_get_funcref
);
22584 "Unexpected reference type for __builtin_wasm_table_get");
22585 return Builder
.CreateCall(Callee
, {Table
, Index
});
22587 case WebAssembly::BI__builtin_wasm_table_set
: {
22588 assert(E
->getArg(0)->getType()->isArrayType());
22589 Value
*Table
= EmitArrayToPointerDecay(E
->getArg(0)).emitRawPointer(*this);
22590 Value
*Index
= EmitScalarExpr(E
->getArg(1));
22591 Value
*Val
= EmitScalarExpr(E
->getArg(2));
22593 if (E
->getArg(2)->getType().isWebAssemblyExternrefType())
22594 Callee
= CGM
.getIntrinsic(Intrinsic::wasm_table_set_externref
);
22595 else if (E
->getArg(2)->getType().isWebAssemblyFuncrefType())
22596 Callee
= CGM
.getIntrinsic(Intrinsic::wasm_table_set_funcref
);
22599 "Unexpected reference type for __builtin_wasm_table_set");
22600 return Builder
.CreateCall(Callee
, {Table
, Index
, Val
});
22602 case WebAssembly::BI__builtin_wasm_table_size
: {
22603 assert(E
->getArg(0)->getType()->isArrayType());
22604 Value
*Value
= EmitArrayToPointerDecay(E
->getArg(0)).emitRawPointer(*this);
22605 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_table_size
);
22606 return Builder
.CreateCall(Callee
, Value
);
22608 case WebAssembly::BI__builtin_wasm_table_grow
: {
22609 assert(E
->getArg(0)->getType()->isArrayType());
22610 Value
*Table
= EmitArrayToPointerDecay(E
->getArg(0)).emitRawPointer(*this);
22611 Value
*Val
= EmitScalarExpr(E
->getArg(1));
22612 Value
*NElems
= EmitScalarExpr(E
->getArg(2));
22615 if (E
->getArg(1)->getType().isWebAssemblyExternrefType())
22616 Callee
= CGM
.getIntrinsic(Intrinsic::wasm_table_grow_externref
);
22617 else if (E
->getArg(2)->getType().isWebAssemblyFuncrefType())
22618 Callee
= CGM
.getIntrinsic(Intrinsic::wasm_table_fill_funcref
);
22621 "Unexpected reference type for __builtin_wasm_table_grow");
22623 return Builder
.CreateCall(Callee
, {Table
, Val
, NElems
});
22625 case WebAssembly::BI__builtin_wasm_table_fill
: {
22626 assert(E
->getArg(0)->getType()->isArrayType());
22627 Value
*Table
= EmitArrayToPointerDecay(E
->getArg(0)).emitRawPointer(*this);
22628 Value
*Index
= EmitScalarExpr(E
->getArg(1));
22629 Value
*Val
= EmitScalarExpr(E
->getArg(2));
22630 Value
*NElems
= EmitScalarExpr(E
->getArg(3));
22633 if (E
->getArg(2)->getType().isWebAssemblyExternrefType())
22634 Callee
= CGM
.getIntrinsic(Intrinsic::wasm_table_fill_externref
);
22635 else if (E
->getArg(2)->getType().isWebAssemblyFuncrefType())
22636 Callee
= CGM
.getIntrinsic(Intrinsic::wasm_table_fill_funcref
);
22639 "Unexpected reference type for __builtin_wasm_table_fill");
22641 return Builder
.CreateCall(Callee
, {Table
, Index
, Val
, NElems
});
22643 case WebAssembly::BI__builtin_wasm_table_copy
: {
22644 assert(E
->getArg(0)->getType()->isArrayType());
22645 Value
*TableX
= EmitArrayToPointerDecay(E
->getArg(0)).emitRawPointer(*this);
22646 Value
*TableY
= EmitArrayToPointerDecay(E
->getArg(1)).emitRawPointer(*this);
22647 Value
*DstIdx
= EmitScalarExpr(E
->getArg(2));
22648 Value
*SrcIdx
= EmitScalarExpr(E
->getArg(3));
22649 Value
*NElems
= EmitScalarExpr(E
->getArg(4));
22651 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_table_copy
);
22653 return Builder
.CreateCall(Callee
, {TableX
, TableY
, SrcIdx
, DstIdx
, NElems
});
22660 static std::pair
<Intrinsic::ID
, unsigned>
22661 getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID
) {
22663 unsigned BuiltinID
;
22664 Intrinsic::ID IntrinsicID
;
22667 static Info Infos
[] = {
22668 #define CUSTOM_BUILTIN_MAPPING(x,s) \
22669 { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
22670 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci
, 0)
22671 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci
, 0)
22672 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci
, 0)
22673 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci
, 0)
22674 CUSTOM_BUILTIN_MAPPING(L2_loadri_pci
, 0)
22675 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci
, 0)
22676 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr
, 0)
22677 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr
, 0)
22678 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr
, 0)
22679 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr
, 0)
22680 CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr
, 0)
22681 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr
, 0)
22682 CUSTOM_BUILTIN_MAPPING(S2_storerb_pci
, 0)
22683 CUSTOM_BUILTIN_MAPPING(S2_storerh_pci
, 0)
22684 CUSTOM_BUILTIN_MAPPING(S2_storerf_pci
, 0)
22685 CUSTOM_BUILTIN_MAPPING(S2_storeri_pci
, 0)
22686 CUSTOM_BUILTIN_MAPPING(S2_storerd_pci
, 0)
22687 CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr
, 0)
22688 CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr
, 0)
22689 CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr
, 0)
22690 CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr
, 0)
22691 CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr
, 0)
22692 // Legacy builtins that take a vector in place of a vector predicate.
22693 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq
, 64)
22694 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq
, 64)
22695 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq
, 64)
22696 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq
, 64)
22697 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B
, 128)
22698 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B
, 128)
22699 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B
, 128)
22700 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B
, 128)
22701 #include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
22702 #undef CUSTOM_BUILTIN_MAPPING
22705 auto CmpInfo
= [] (Info A
, Info B
) { return A
.BuiltinID
< B
.BuiltinID
; };
22706 static const bool SortOnce
= (llvm::sort(Infos
, CmpInfo
), true);
22709 const Info
*F
= llvm::lower_bound(Infos
, Info
{BuiltinID
, 0, 0}, CmpInfo
);
22710 if (F
== std::end(Infos
) || F
->BuiltinID
!= BuiltinID
)
22711 return {Intrinsic::not_intrinsic
, 0};
22713 return {F
->IntrinsicID
, F
->VecLen
};
22716 Value
*CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID
,
22717 const CallExpr
*E
) {
22720 std::tie(ID
, VecLen
) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID
);
22722 auto MakeCircOp
= [this, E
](unsigned IntID
, bool IsLoad
) {
22723 // The base pointer is passed by address, so it needs to be loaded.
22724 Address A
= EmitPointerWithAlignment(E
->getArg(0));
22725 Address BP
= Address(A
.emitRawPointer(*this), Int8PtrTy
, A
.getAlignment());
22726 llvm::Value
*Base
= Builder
.CreateLoad(BP
);
22727 // The treatment of both loads and stores is the same: the arguments for
22728 // the builtin are the same as the arguments for the intrinsic.
22730 // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)
22731 // builtin(Base, Mod, Start) -> intr(Base, Mod, Start)
22733 // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)
22734 // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start)
22735 SmallVector
<llvm::Value
*,5> Ops
= { Base
};
22736 for (unsigned i
= 1, e
= E
->getNumArgs(); i
!= e
; ++i
)
22737 Ops
.push_back(EmitScalarExpr(E
->getArg(i
)));
22739 llvm::Value
*Result
= Builder
.CreateCall(CGM
.getIntrinsic(IntID
), Ops
);
22740 // The load intrinsics generate two results (Value, NewBase), stores
22741 // generate one (NewBase). The new base address needs to be stored.
22742 llvm::Value
*NewBase
= IsLoad
? Builder
.CreateExtractValue(Result
, 1)
22744 llvm::Value
*LV
= EmitScalarExpr(E
->getArg(0));
22745 Address Dest
= EmitPointerWithAlignment(E
->getArg(0));
22746 llvm::Value
*RetVal
=
22747 Builder
.CreateAlignedStore(NewBase
, LV
, Dest
.getAlignment());
22749 RetVal
= Builder
.CreateExtractValue(Result
, 0);
22753 // Handle the conversion of bit-reverse load intrinsics to bit code.
22754 // The intrinsic call after this function only reads from memory and the
22755 // write to memory is dealt by the store instruction.
22756 auto MakeBrevLd
= [this, E
](unsigned IntID
, llvm::Type
*DestTy
) {
22757 // The intrinsic generates one result, which is the new value for the base
22758 // pointer. It needs to be returned. The result of the load instruction is
22759 // passed to intrinsic by address, so the value needs to be stored.
22760 llvm::Value
*BaseAddress
= EmitScalarExpr(E
->getArg(0));
22762 // Expressions like &(*pt++) will be incremented per evaluation.
22763 // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
22765 Address DestAddr
= EmitPointerWithAlignment(E
->getArg(1));
22766 DestAddr
= DestAddr
.withElementType(Int8Ty
);
22767 llvm::Value
*DestAddress
= DestAddr
.emitRawPointer(*this);
22769 // Operands are Base, Dest, Modifier.
22770 // The intrinsic format in LLVM IR is defined as
22771 // { ValueType, i8* } (i8*, i32).
22772 llvm::Value
*Result
= Builder
.CreateCall(
22773 CGM
.getIntrinsic(IntID
), {BaseAddress
, EmitScalarExpr(E
->getArg(2))});
22775 // The value needs to be stored as the variable is passed by reference.
22776 llvm::Value
*DestVal
= Builder
.CreateExtractValue(Result
, 0);
22778 // The store needs to be truncated to fit the destination type.
22779 // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
22780 // to be handled with stores of respective destination type.
22781 DestVal
= Builder
.CreateTrunc(DestVal
, DestTy
);
22783 Builder
.CreateAlignedStore(DestVal
, DestAddress
, DestAddr
.getAlignment());
22784 // The updated value of the base pointer is returned.
22785 return Builder
.CreateExtractValue(Result
, 1);
22788 auto V2Q
= [this, VecLen
] (llvm::Value
*Vec
) {
22789 Intrinsic::ID ID
= VecLen
== 128 ? Intrinsic::hexagon_V6_vandvrt_128B
22790 : Intrinsic::hexagon_V6_vandvrt
;
22791 return Builder
.CreateCall(CGM
.getIntrinsic(ID
),
22792 {Vec
, Builder
.getInt32(-1)});
22794 auto Q2V
= [this, VecLen
] (llvm::Value
*Pred
) {
22795 Intrinsic::ID ID
= VecLen
== 128 ? Intrinsic::hexagon_V6_vandqrt_128B
22796 : Intrinsic::hexagon_V6_vandqrt
;
22797 return Builder
.CreateCall(CGM
.getIntrinsic(ID
),
22798 {Pred
, Builder
.getInt32(-1)});
22801 switch (BuiltinID
) {
22802 // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
22803 // and the corresponding C/C++ builtins use loads/stores to update
22805 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry
:
22806 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B
:
22807 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry
:
22808 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B
: {
22809 // Get the type from the 0-th argument.
22810 llvm::Type
*VecType
= ConvertType(E
->getArg(0)->getType());
22812 EmitPointerWithAlignment(E
->getArg(2)).withElementType(VecType
);
22813 llvm::Value
*PredIn
= V2Q(Builder
.CreateLoad(PredAddr
));
22814 llvm::Value
*Result
= Builder
.CreateCall(CGM
.getIntrinsic(ID
),
22815 {EmitScalarExpr(E
->getArg(0)), EmitScalarExpr(E
->getArg(1)), PredIn
});
22817 llvm::Value
*PredOut
= Builder
.CreateExtractValue(Result
, 1);
22818 Builder
.CreateAlignedStore(Q2V(PredOut
), PredAddr
.emitRawPointer(*this),
22819 PredAddr
.getAlignment());
22820 return Builder
.CreateExtractValue(Result
, 0);
22822 // These are identical to the builtins above, except they don't consume
22823 // input carry, only generate carry-out. Since they still produce two
22824 // outputs, generate the store of the predicate, but no load.
22825 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo
:
22826 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B
:
22827 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo
:
22828 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B
: {
22829 // Get the type from the 0-th argument.
22830 llvm::Type
*VecType
= ConvertType(E
->getArg(0)->getType());
22832 EmitPointerWithAlignment(E
->getArg(2)).withElementType(VecType
);
22833 llvm::Value
*Result
= Builder
.CreateCall(CGM
.getIntrinsic(ID
),
22834 {EmitScalarExpr(E
->getArg(0)), EmitScalarExpr(E
->getArg(1))});
22836 llvm::Value
*PredOut
= Builder
.CreateExtractValue(Result
, 1);
22837 Builder
.CreateAlignedStore(Q2V(PredOut
), PredAddr
.emitRawPointer(*this),
22838 PredAddr
.getAlignment());
22839 return Builder
.CreateExtractValue(Result
, 0);
22842 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq
:
22843 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq
:
22844 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq
:
22845 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq
:
22846 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B
:
22847 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B
:
22848 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B
:
22849 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B
: {
22850 SmallVector
<llvm::Value
*,4> Ops
;
22851 const Expr
*PredOp
= E
->getArg(0);
22852 // There will be an implicit cast to a boolean vector. Strip it.
22853 if (auto *Cast
= dyn_cast
<ImplicitCastExpr
>(PredOp
)) {
22854 if (Cast
->getCastKind() == CK_BitCast
)
22855 PredOp
= Cast
->getSubExpr();
22856 Ops
.push_back(V2Q(EmitScalarExpr(PredOp
)));
22858 for (int i
= 1, e
= E
->getNumArgs(); i
!= e
; ++i
)
22859 Ops
.push_back(EmitScalarExpr(E
->getArg(i
)));
22860 return Builder
.CreateCall(CGM
.getIntrinsic(ID
), Ops
);
22863 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci
:
22864 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci
:
22865 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci
:
22866 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci
:
22867 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci
:
22868 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci
:
22869 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr
:
22870 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr
:
22871 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr
:
22872 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr
:
22873 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr
:
22874 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr
:
22875 return MakeCircOp(ID
, /*IsLoad=*/true);
22876 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci
:
22877 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci
:
22878 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci
:
22879 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci
:
22880 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci
:
22881 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr
:
22882 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr
:
22883 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr
:
22884 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr
:
22885 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr
:
22886 return MakeCircOp(ID
, /*IsLoad=*/false);
22887 case Hexagon::BI__builtin_brev_ldub
:
22888 return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr
, Int8Ty
);
22889 case Hexagon::BI__builtin_brev_ldb
:
22890 return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr
, Int8Ty
);
22891 case Hexagon::BI__builtin_brev_lduh
:
22892 return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr
, Int16Ty
);
22893 case Hexagon::BI__builtin_brev_ldh
:
22894 return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr
, Int16Ty
);
22895 case Hexagon::BI__builtin_brev_ldw
:
22896 return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr
, Int32Ty
);
22897 case Hexagon::BI__builtin_brev_ldd
:
22898 return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr
, Int64Ty
);
22904 Value
*CodeGenFunction::EmitRISCVCpuIs(const CallExpr
*E
) {
22905 const Expr
*CPUExpr
= E
->getArg(0)->IgnoreParenCasts();
22906 StringRef CPUStr
= cast
<clang::StringLiteral
>(CPUExpr
)->getString();
22907 return EmitRISCVCpuIs(CPUStr
);
22910 Value
*CodeGenFunction::EmitRISCVCpuIs(StringRef CPUStr
) {
22911 llvm::Type
*Int32Ty
= Builder
.getInt32Ty();
22912 llvm::Type
*Int64Ty
= Builder
.getInt64Ty();
22913 llvm::StructType
*StructTy
= llvm::StructType::get(Int32Ty
, Int64Ty
, Int64Ty
);
22914 llvm::Constant
*RISCVCPUModel
=
22915 CGM
.CreateRuntimeVariable(StructTy
, "__riscv_cpu_model");
22916 cast
<llvm::GlobalValue
>(RISCVCPUModel
)->setDSOLocal(true);
22918 auto loadRISCVCPUID
= [&](unsigned Index
) {
22919 Value
*Ptr
= Builder
.CreateStructGEP(StructTy
, RISCVCPUModel
, Index
);
22920 Value
*CPUID
= Builder
.CreateAlignedLoad(StructTy
->getTypeAtIndex(Index
),
22921 Ptr
, llvm::MaybeAlign());
22925 const llvm::RISCV::CPUModel Model
= llvm::RISCV::getCPUModel(CPUStr
);
22927 // Compare mvendorid.
22928 Value
*VendorID
= loadRISCVCPUID(0);
22930 Builder
.CreateICmpEQ(VendorID
, Builder
.getInt32(Model
.MVendorID
));
22932 // Compare marchid.
22933 Value
*ArchID
= loadRISCVCPUID(1);
22934 Result
= Builder
.CreateAnd(
22935 Result
, Builder
.CreateICmpEQ(ArchID
, Builder
.getInt64(Model
.MArchID
)));
22938 Value
*ImpID
= loadRISCVCPUID(2);
22939 Result
= Builder
.CreateAnd(
22940 Result
, Builder
.CreateICmpEQ(ImpID
, Builder
.getInt64(Model
.MImpID
)));
22945 Value
*CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID
,
22947 ReturnValueSlot ReturnValue
) {
22949 if (BuiltinID
== Builtin::BI__builtin_cpu_supports
)
22950 return EmitRISCVCpuSupports(E
);
22951 if (BuiltinID
== Builtin::BI__builtin_cpu_init
)
22952 return EmitRISCVCpuInit();
22953 if (BuiltinID
== Builtin::BI__builtin_cpu_is
)
22954 return EmitRISCVCpuIs(E
);
22956 SmallVector
<Value
*, 4> Ops
;
22957 llvm::Type
*ResultType
= ConvertType(E
->getType());
22959 // Find out if any arguments are required to be integer constant expressions.
22960 unsigned ICEArguments
= 0;
22961 ASTContext::GetBuiltinTypeError Error
;
22962 getContext().GetBuiltinType(BuiltinID
, Error
, &ICEArguments
);
22963 if (Error
== ASTContext::GE_Missing_type
) {
22964 // Vector intrinsics don't have a type string.
22965 assert(BuiltinID
>= clang::RISCV::FirstRVVBuiltin
&&
22966 BuiltinID
<= clang::RISCV::LastRVVBuiltin
);
22968 if (BuiltinID
== RISCVVector::BI__builtin_rvv_vget_v
||
22969 BuiltinID
== RISCVVector::BI__builtin_rvv_vset_v
)
22970 ICEArguments
= 1 << 1;
22972 assert(Error
== ASTContext::GE_None
&& "Unexpected error");
22975 if (BuiltinID
== RISCV::BI__builtin_riscv_ntl_load
)
22976 ICEArguments
|= (1 << 1);
22977 if (BuiltinID
== RISCV::BI__builtin_riscv_ntl_store
)
22978 ICEArguments
|= (1 << 2);
22980 for (unsigned i
= 0, e
= E
->getNumArgs(); i
!= e
; i
++) {
22981 // Handle aggregate argument, namely RVV tuple types in segment load/store
22982 if (hasAggregateEvaluationKind(E
->getArg(i
)->getType())) {
22983 LValue L
= EmitAggExprToLValue(E
->getArg(i
));
22984 llvm::Value
*AggValue
= Builder
.CreateLoad(L
.getAddress());
22985 Ops
.push_back(AggValue
);
22988 Ops
.push_back(EmitScalarOrConstFoldImmArg(ICEArguments
, i
, E
));
22991 Intrinsic::ID ID
= Intrinsic::not_intrinsic
;
22992 // The 0th bit simulates the `vta` of RVV
22993 // The 1st bit simulates the `vma` of RVV
22994 constexpr unsigned RVV_VTA
= 0x1;
22995 constexpr unsigned RVV_VMA
= 0x2;
22996 int PolicyAttrs
= 0;
22997 bool IsMasked
= false;
22998 // This is used by segment load/store to determine it's llvm type.
22999 unsigned SegInstSEW
= 8;
23001 // Required for overloaded intrinsics.
23002 llvm::SmallVector
<llvm::Type
*, 2> IntrinsicTypes
;
23003 switch (BuiltinID
) {
23004 default: llvm_unreachable("unexpected builtin ID");
23005 case RISCV::BI__builtin_riscv_orc_b_32
:
23006 case RISCV::BI__builtin_riscv_orc_b_64
:
23007 case RISCV::BI__builtin_riscv_clmul_32
:
23008 case RISCV::BI__builtin_riscv_clmul_64
:
23009 case RISCV::BI__builtin_riscv_clmulh_32
:
23010 case RISCV::BI__builtin_riscv_clmulh_64
:
23011 case RISCV::BI__builtin_riscv_clmulr_32
:
23012 case RISCV::BI__builtin_riscv_clmulr_64
:
23013 case RISCV::BI__builtin_riscv_xperm4_32
:
23014 case RISCV::BI__builtin_riscv_xperm4_64
:
23015 case RISCV::BI__builtin_riscv_xperm8_32
:
23016 case RISCV::BI__builtin_riscv_xperm8_64
:
23017 case RISCV::BI__builtin_riscv_brev8_32
:
23018 case RISCV::BI__builtin_riscv_brev8_64
:
23019 case RISCV::BI__builtin_riscv_zip_32
:
23020 case RISCV::BI__builtin_riscv_unzip_32
: {
23021 switch (BuiltinID
) {
23022 default: llvm_unreachable("unexpected builtin ID");
23024 case RISCV::BI__builtin_riscv_orc_b_32
:
23025 case RISCV::BI__builtin_riscv_orc_b_64
:
23026 ID
= Intrinsic::riscv_orc_b
;
23030 case RISCV::BI__builtin_riscv_clmul_32
:
23031 case RISCV::BI__builtin_riscv_clmul_64
:
23032 ID
= Intrinsic::riscv_clmul
;
23034 case RISCV::BI__builtin_riscv_clmulh_32
:
23035 case RISCV::BI__builtin_riscv_clmulh_64
:
23036 ID
= Intrinsic::riscv_clmulh
;
23038 case RISCV::BI__builtin_riscv_clmulr_32
:
23039 case RISCV::BI__builtin_riscv_clmulr_64
:
23040 ID
= Intrinsic::riscv_clmulr
;
23044 case RISCV::BI__builtin_riscv_xperm8_32
:
23045 case RISCV::BI__builtin_riscv_xperm8_64
:
23046 ID
= Intrinsic::riscv_xperm8
;
23048 case RISCV::BI__builtin_riscv_xperm4_32
:
23049 case RISCV::BI__builtin_riscv_xperm4_64
:
23050 ID
= Intrinsic::riscv_xperm4
;
23054 case RISCV::BI__builtin_riscv_brev8_32
:
23055 case RISCV::BI__builtin_riscv_brev8_64
:
23056 ID
= Intrinsic::riscv_brev8
;
23058 case RISCV::BI__builtin_riscv_zip_32
:
23059 ID
= Intrinsic::riscv_zip
;
23061 case RISCV::BI__builtin_riscv_unzip_32
:
23062 ID
= Intrinsic::riscv_unzip
;
23066 IntrinsicTypes
= {ResultType
};
23073 case RISCV::BI__builtin_riscv_sha256sig0
:
23074 ID
= Intrinsic::riscv_sha256sig0
;
23076 case RISCV::BI__builtin_riscv_sha256sig1
:
23077 ID
= Intrinsic::riscv_sha256sig1
;
23079 case RISCV::BI__builtin_riscv_sha256sum0
:
23080 ID
= Intrinsic::riscv_sha256sum0
;
23082 case RISCV::BI__builtin_riscv_sha256sum1
:
23083 ID
= Intrinsic::riscv_sha256sum1
;
23087 case RISCV::BI__builtin_riscv_sm4ks
:
23088 ID
= Intrinsic::riscv_sm4ks
;
23090 case RISCV::BI__builtin_riscv_sm4ed
:
23091 ID
= Intrinsic::riscv_sm4ed
;
23095 case RISCV::BI__builtin_riscv_sm3p0
:
23096 ID
= Intrinsic::riscv_sm3p0
;
23098 case RISCV::BI__builtin_riscv_sm3p1
:
23099 ID
= Intrinsic::riscv_sm3p1
;
23102 case RISCV::BI__builtin_riscv_clz_32
:
23103 case RISCV::BI__builtin_riscv_clz_64
: {
23104 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctlz
, Ops
[0]->getType());
23105 Value
*Result
= Builder
.CreateCall(F
, {Ops
[0], Builder
.getInt1(false)});
23106 if (Result
->getType() != ResultType
)
23108 Builder
.CreateIntCast(Result
, ResultType
, /*isSigned*/ false, "cast");
23111 case RISCV::BI__builtin_riscv_ctz_32
:
23112 case RISCV::BI__builtin_riscv_ctz_64
: {
23113 Function
*F
= CGM
.getIntrinsic(Intrinsic::cttz
, Ops
[0]->getType());
23114 Value
*Result
= Builder
.CreateCall(F
, {Ops
[0], Builder
.getInt1(false)});
23115 if (Result
->getType() != ResultType
)
23117 Builder
.CreateIntCast(Result
, ResultType
, /*isSigned*/ false, "cast");
23122 case RISCV::BI__builtin_riscv_ntl_load
: {
23123 llvm::Type
*ResTy
= ConvertType(E
->getType());
23124 unsigned DomainVal
= 5; // Default __RISCV_NTLH_ALL
23125 if (Ops
.size() == 2)
23126 DomainVal
= cast
<ConstantInt
>(Ops
[1])->getZExtValue();
23128 llvm::MDNode
*RISCVDomainNode
= llvm::MDNode::get(
23130 llvm::ConstantAsMetadata::get(Builder
.getInt32(DomainVal
)));
23131 llvm::MDNode
*NontemporalNode
= llvm::MDNode::get(
23132 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder
.getInt32(1)));
23135 if(ResTy
->isScalableTy()) {
23136 const ScalableVectorType
*SVTy
= cast
<ScalableVectorType
>(ResTy
);
23137 llvm::Type
*ScalarTy
= ResTy
->getScalarType();
23138 Width
= ScalarTy
->getPrimitiveSizeInBits() *
23139 SVTy
->getElementCount().getKnownMinValue();
23141 Width
= ResTy
->getPrimitiveSizeInBits();
23142 LoadInst
*Load
= Builder
.CreateLoad(
23143 Address(Ops
[0], ResTy
, CharUnits::fromQuantity(Width
/ 8)));
23145 Load
->setMetadata(llvm::LLVMContext::MD_nontemporal
, NontemporalNode
);
23146 Load
->setMetadata(CGM
.getModule().getMDKindID("riscv-nontemporal-domain"),
23151 case RISCV::BI__builtin_riscv_ntl_store
: {
23152 unsigned DomainVal
= 5; // Default __RISCV_NTLH_ALL
23153 if (Ops
.size() == 3)
23154 DomainVal
= cast
<ConstantInt
>(Ops
[2])->getZExtValue();
23156 llvm::MDNode
*RISCVDomainNode
= llvm::MDNode::get(
23158 llvm::ConstantAsMetadata::get(Builder
.getInt32(DomainVal
)));
23159 llvm::MDNode
*NontemporalNode
= llvm::MDNode::get(
23160 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder
.getInt32(1)));
23162 StoreInst
*Store
= Builder
.CreateDefaultAlignedStore(Ops
[1], Ops
[0]);
23163 Store
->setMetadata(llvm::LLVMContext::MD_nontemporal
, NontemporalNode
);
23164 Store
->setMetadata(CGM
.getModule().getMDKindID("riscv-nontemporal-domain"),
23170 case RISCV::BI__builtin_riscv_cv_alu_addN
:
23171 ID
= Intrinsic::riscv_cv_alu_addN
;
23173 case RISCV::BI__builtin_riscv_cv_alu_addRN
:
23174 ID
= Intrinsic::riscv_cv_alu_addRN
;
23176 case RISCV::BI__builtin_riscv_cv_alu_adduN
:
23177 ID
= Intrinsic::riscv_cv_alu_adduN
;
23179 case RISCV::BI__builtin_riscv_cv_alu_adduRN
:
23180 ID
= Intrinsic::riscv_cv_alu_adduRN
;
23182 case RISCV::BI__builtin_riscv_cv_alu_clip
:
23183 ID
= Intrinsic::riscv_cv_alu_clip
;
23185 case RISCV::BI__builtin_riscv_cv_alu_clipu
:
23186 ID
= Intrinsic::riscv_cv_alu_clipu
;
23188 case RISCV::BI__builtin_riscv_cv_alu_extbs
:
23189 return Builder
.CreateSExt(Builder
.CreateTrunc(Ops
[0], Int8Ty
), Int32Ty
,
23191 case RISCV::BI__builtin_riscv_cv_alu_extbz
:
23192 return Builder
.CreateZExt(Builder
.CreateTrunc(Ops
[0], Int8Ty
), Int32Ty
,
23194 case RISCV::BI__builtin_riscv_cv_alu_exths
:
23195 return Builder
.CreateSExt(Builder
.CreateTrunc(Ops
[0], Int16Ty
), Int32Ty
,
23197 case RISCV::BI__builtin_riscv_cv_alu_exthz
:
23198 return Builder
.CreateZExt(Builder
.CreateTrunc(Ops
[0], Int16Ty
), Int32Ty
,
23200 case RISCV::BI__builtin_riscv_cv_alu_slet
:
23201 return Builder
.CreateZExt(Builder
.CreateICmpSLE(Ops
[0], Ops
[1]), Int32Ty
,
23203 case RISCV::BI__builtin_riscv_cv_alu_sletu
:
23204 return Builder
.CreateZExt(Builder
.CreateICmpULE(Ops
[0], Ops
[1]), Int32Ty
,
23206 case RISCV::BI__builtin_riscv_cv_alu_subN
:
23207 ID
= Intrinsic::riscv_cv_alu_subN
;
23209 case RISCV::BI__builtin_riscv_cv_alu_subRN
:
23210 ID
= Intrinsic::riscv_cv_alu_subRN
;
23212 case RISCV::BI__builtin_riscv_cv_alu_subuN
:
23213 ID
= Intrinsic::riscv_cv_alu_subuN
;
23215 case RISCV::BI__builtin_riscv_cv_alu_subuRN
:
23216 ID
= Intrinsic::riscv_cv_alu_subuRN
;
23219 // Vector builtins are handled from here.
23220 #include "clang/Basic/riscv_vector_builtin_cg.inc"
23222 // SiFive Vector builtins are handled from here.
23223 #include "clang/Basic/riscv_sifive_vector_builtin_cg.inc"
23226 assert(ID
!= Intrinsic::not_intrinsic
);
23228 llvm::Function
*F
= CGM
.getIntrinsic(ID
, IntrinsicTypes
);
23229 return Builder
.CreateCall(F
, Ops
, "");