1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This contains code to emit Builtin calls as LLVM code.
11 //===----------------------------------------------------------------------===//
14 #include "CGCUDARuntime.h"
16 #include "CGObjCRuntime.h"
17 #include "CGOpenCLRuntime.h"
18 #include "CGRecordLayout.h"
19 #include "CodeGenFunction.h"
20 #include "CodeGenModule.h"
21 #include "ConstantEmitter.h"
22 #include "PatternInit.h"
23 #include "TargetInfo.h"
24 #include "clang/AST/ASTContext.h"
25 #include "clang/AST/Attr.h"
26 #include "clang/AST/Decl.h"
27 #include "clang/AST/OSLog.h"
28 #include "clang/Basic/TargetBuiltins.h"
29 #include "clang/Basic/TargetInfo.h"
30 #include "clang/CodeGen/CGFunctionInfo.h"
31 #include "clang/Frontend/FrontendDiagnostic.h"
32 #include "llvm/ADT/APFloat.h"
33 #include "llvm/ADT/APInt.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/StringExtras.h"
36 #include "llvm/Analysis/ValueTracking.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/InlineAsm.h"
39 #include "llvm/IR/Intrinsics.h"
40 #include "llvm/IR/IntrinsicsAArch64.h"
41 #include "llvm/IR/IntrinsicsAMDGPU.h"
42 #include "llvm/IR/IntrinsicsARM.h"
43 #include "llvm/IR/IntrinsicsBPF.h"
44 #include "llvm/IR/IntrinsicsHexagon.h"
45 #include "llvm/IR/IntrinsicsLoongArch.h"
46 #include "llvm/IR/IntrinsicsNVPTX.h"
47 #include "llvm/IR/IntrinsicsPowerPC.h"
48 #include "llvm/IR/IntrinsicsR600.h"
49 #include "llvm/IR/IntrinsicsRISCV.h"
50 #include "llvm/IR/IntrinsicsS390.h"
51 #include "llvm/IR/IntrinsicsVE.h"
52 #include "llvm/IR/IntrinsicsWebAssembly.h"
53 #include "llvm/IR/IntrinsicsX86.h"
54 #include "llvm/IR/MDBuilder.h"
55 #include "llvm/IR/MatrixBuilder.h"
56 #include "llvm/Support/ConvertUTF.h"
57 #include "llvm/Support/ScopedPrinter.h"
58 #include "llvm/TargetParser/AArch64TargetParser.h"
59 #include "llvm/TargetParser/X86TargetParser.h"
63 using namespace clang
;
64 using namespace CodeGen
;
67 static void initializeAlloca(CodeGenFunction
&CGF
, AllocaInst
*AI
, Value
*Size
,
68 Align AlignmentInBytes
) {
70 switch (CGF
.getLangOpts().getTrivialAutoVarInit()) {
71 case LangOptions::TrivialAutoVarInitKind::Uninitialized
:
72 // Nothing to initialize.
74 case LangOptions::TrivialAutoVarInitKind::Zero
:
75 Byte
= CGF
.Builder
.getInt8(0x00);
77 case LangOptions::TrivialAutoVarInitKind::Pattern
: {
78 llvm::Type
*Int8
= llvm::IntegerType::getInt8Ty(CGF
.CGM
.getLLVMContext());
79 Byte
= llvm::dyn_cast
<llvm::ConstantInt
>(
80 initializationPatternFor(CGF
.CGM
, Int8
));
84 if (CGF
.CGM
.stopAutoInit())
86 auto *I
= CGF
.Builder
.CreateMemSet(AI
, Byte
, Size
, AlignmentInBytes
);
87 I
->addAnnotationMetadata("auto-init");
90 /// getBuiltinLibFunction - Given a builtin id for a function like
91 /// "__builtin_fabsf", return a Function* for "fabsf".
92 llvm::Constant
*CodeGenModule::getBuiltinLibFunction(const FunctionDecl
*FD
,
94 assert(Context
.BuiltinInfo
.isLibFunction(BuiltinID
));
96 // Get the name, skip over the __builtin_ prefix (if necessary).
100 // TODO: This list should be expanded or refactored after all GCC-compatible
101 // std libcall builtins are implemented.
102 static SmallDenseMap
<unsigned, StringRef
, 64> F128Builtins
{
103 {Builtin::BI__builtin___fprintf_chk
, "__fprintf_chkieee128"},
104 {Builtin::BI__builtin___printf_chk
, "__printf_chkieee128"},
105 {Builtin::BI__builtin___snprintf_chk
, "__snprintf_chkieee128"},
106 {Builtin::BI__builtin___sprintf_chk
, "__sprintf_chkieee128"},
107 {Builtin::BI__builtin___vfprintf_chk
, "__vfprintf_chkieee128"},
108 {Builtin::BI__builtin___vprintf_chk
, "__vprintf_chkieee128"},
109 {Builtin::BI__builtin___vsnprintf_chk
, "__vsnprintf_chkieee128"},
110 {Builtin::BI__builtin___vsprintf_chk
, "__vsprintf_chkieee128"},
111 {Builtin::BI__builtin_fprintf
, "__fprintfieee128"},
112 {Builtin::BI__builtin_printf
, "__printfieee128"},
113 {Builtin::BI__builtin_snprintf
, "__snprintfieee128"},
114 {Builtin::BI__builtin_sprintf
, "__sprintfieee128"},
115 {Builtin::BI__builtin_vfprintf
, "__vfprintfieee128"},
116 {Builtin::BI__builtin_vprintf
, "__vprintfieee128"},
117 {Builtin::BI__builtin_vsnprintf
, "__vsnprintfieee128"},
118 {Builtin::BI__builtin_vsprintf
, "__vsprintfieee128"},
119 {Builtin::BI__builtin_fscanf
, "__fscanfieee128"},
120 {Builtin::BI__builtin_scanf
, "__scanfieee128"},
121 {Builtin::BI__builtin_sscanf
, "__sscanfieee128"},
122 {Builtin::BI__builtin_vfscanf
, "__vfscanfieee128"},
123 {Builtin::BI__builtin_vscanf
, "__vscanfieee128"},
124 {Builtin::BI__builtin_vsscanf
, "__vsscanfieee128"},
125 {Builtin::BI__builtin_nexttowardf128
, "__nexttowardieee128"},
128 // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit
129 // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions
130 // if it is 64-bit 'long double' mode.
131 static SmallDenseMap
<unsigned, StringRef
, 4> AIXLongDouble64Builtins
{
132 {Builtin::BI__builtin_frexpl
, "frexp"},
133 {Builtin::BI__builtin_ldexpl
, "ldexp"},
134 {Builtin::BI__builtin_modfl
, "modf"},
137 // If the builtin has been declared explicitly with an assembler label,
138 // use the mangled name. This differs from the plain label on platforms
139 // that prefix labels.
140 if (FD
->hasAttr
<AsmLabelAttr
>())
141 Name
= getMangledName(D
);
143 // TODO: This mutation should also be applied to other targets other than
144 // PPC, after backend supports IEEE 128-bit style libcalls.
145 if (getTriple().isPPC64() &&
146 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
147 F128Builtins
.find(BuiltinID
) != F128Builtins
.end())
148 Name
= F128Builtins
[BuiltinID
];
149 else if (getTriple().isOSAIX() &&
150 &getTarget().getLongDoubleFormat() ==
151 &llvm::APFloat::IEEEdouble() &&
152 AIXLongDouble64Builtins
.find(BuiltinID
) !=
153 AIXLongDouble64Builtins
.end())
154 Name
= AIXLongDouble64Builtins
[BuiltinID
];
156 Name
= Context
.BuiltinInfo
.getName(BuiltinID
).substr(10);
159 llvm::FunctionType
*Ty
=
160 cast
<llvm::FunctionType
>(getTypes().ConvertType(FD
->getType()));
162 return GetOrCreateLLVMFunction(Name
, Ty
, D
, /*ForVTable=*/false);
165 /// Emit the conversions required to turn the given value into an
166 /// integer of the given size.
167 static Value
*EmitToInt(CodeGenFunction
&CGF
, llvm::Value
*V
,
168 QualType T
, llvm::IntegerType
*IntType
) {
169 V
= CGF
.EmitToMemory(V
, T
);
171 if (V
->getType()->isPointerTy())
172 return CGF
.Builder
.CreatePtrToInt(V
, IntType
);
174 assert(V
->getType() == IntType
);
178 static Value
*EmitFromInt(CodeGenFunction
&CGF
, llvm::Value
*V
,
179 QualType T
, llvm::Type
*ResultType
) {
180 V
= CGF
.EmitFromMemory(V
, T
);
182 if (ResultType
->isPointerTy())
183 return CGF
.Builder
.CreateIntToPtr(V
, ResultType
);
185 assert(V
->getType() == ResultType
);
189 static llvm::Value
*CheckAtomicAlignment(CodeGenFunction
&CGF
,
191 ASTContext
&Ctx
= CGF
.getContext();
192 Address Ptr
= CGF
.EmitPointerWithAlignment(E
->getArg(0));
193 unsigned Bytes
= Ptr
.getElementType()->isPointerTy()
194 ? Ctx
.getTypeSizeInChars(Ctx
.VoidPtrTy
).getQuantity()
195 : Ptr
.getElementType()->getScalarSizeInBits() / 8;
196 unsigned Align
= Ptr
.getAlignment().getQuantity();
197 if (Align
% Bytes
!= 0) {
198 DiagnosticsEngine
&Diags
= CGF
.CGM
.getDiags();
199 Diags
.Report(E
->getBeginLoc(), diag::warn_sync_op_misaligned
);
201 return Ptr
.getPointer();
204 /// Utility to insert an atomic instruction based on Intrinsic::ID
205 /// and the expression node.
206 static Value
*MakeBinaryAtomicValue(
207 CodeGenFunction
&CGF
, llvm::AtomicRMWInst::BinOp Kind
, const CallExpr
*E
,
208 AtomicOrdering Ordering
= AtomicOrdering::SequentiallyConsistent
) {
210 QualType T
= E
->getType();
211 assert(E
->getArg(0)->getType()->isPointerType());
212 assert(CGF
.getContext().hasSameUnqualifiedType(T
,
213 E
->getArg(0)->getType()->getPointeeType()));
214 assert(CGF
.getContext().hasSameUnqualifiedType(T
, E
->getArg(1)->getType()));
216 llvm::Value
*DestPtr
= CheckAtomicAlignment(CGF
, E
);
217 unsigned AddrSpace
= DestPtr
->getType()->getPointerAddressSpace();
219 llvm::IntegerType
*IntType
=
220 llvm::IntegerType::get(CGF
.getLLVMContext(),
221 CGF
.getContext().getTypeSize(T
));
222 llvm::Type
*IntPtrType
= IntType
->getPointerTo(AddrSpace
);
224 llvm::Value
*Args
[2];
225 Args
[0] = CGF
.Builder
.CreateBitCast(DestPtr
, IntPtrType
);
226 Args
[1] = CGF
.EmitScalarExpr(E
->getArg(1));
227 llvm::Type
*ValueType
= Args
[1]->getType();
228 Args
[1] = EmitToInt(CGF
, Args
[1], T
, IntType
);
230 llvm::Value
*Result
= CGF
.Builder
.CreateAtomicRMW(
231 Kind
, Args
[0], Args
[1], Ordering
);
232 return EmitFromInt(CGF
, Result
, T
, ValueType
);
235 static Value
*EmitNontemporalStore(CodeGenFunction
&CGF
, const CallExpr
*E
) {
236 Value
*Val
= CGF
.EmitScalarExpr(E
->getArg(0));
237 Value
*Address
= CGF
.EmitScalarExpr(E
->getArg(1));
239 // Convert the type of the pointer to a pointer to the stored type.
240 Val
= CGF
.EmitToMemory(Val
, E
->getArg(0)->getType());
241 unsigned SrcAddrSpace
= Address
->getType()->getPointerAddressSpace();
242 Value
*BC
= CGF
.Builder
.CreateBitCast(
243 Address
, llvm::PointerType::get(Val
->getType(), SrcAddrSpace
), "cast");
244 LValue LV
= CGF
.MakeNaturalAlignAddrLValue(BC
, E
->getArg(0)->getType());
245 LV
.setNontemporal(true);
246 CGF
.EmitStoreOfScalar(Val
, LV
, false);
250 static Value
*EmitNontemporalLoad(CodeGenFunction
&CGF
, const CallExpr
*E
) {
251 Value
*Address
= CGF
.EmitScalarExpr(E
->getArg(0));
253 LValue LV
= CGF
.MakeNaturalAlignAddrLValue(Address
, E
->getType());
254 LV
.setNontemporal(true);
255 return CGF
.EmitLoadOfScalar(LV
, E
->getExprLoc());
258 static RValue
EmitBinaryAtomic(CodeGenFunction
&CGF
,
259 llvm::AtomicRMWInst::BinOp Kind
,
261 return RValue::get(MakeBinaryAtomicValue(CGF
, Kind
, E
));
264 /// Utility to insert an atomic instruction based Intrinsic::ID and
265 /// the expression node, where the return value is the result of the
267 static RValue
EmitBinaryAtomicPost(CodeGenFunction
&CGF
,
268 llvm::AtomicRMWInst::BinOp Kind
,
270 Instruction::BinaryOps Op
,
271 bool Invert
= false) {
272 QualType T
= E
->getType();
273 assert(E
->getArg(0)->getType()->isPointerType());
274 assert(CGF
.getContext().hasSameUnqualifiedType(T
,
275 E
->getArg(0)->getType()->getPointeeType()));
276 assert(CGF
.getContext().hasSameUnqualifiedType(T
, E
->getArg(1)->getType()));
278 llvm::Value
*DestPtr
= CheckAtomicAlignment(CGF
, E
);
279 unsigned AddrSpace
= DestPtr
->getType()->getPointerAddressSpace();
281 llvm::IntegerType
*IntType
=
282 llvm::IntegerType::get(CGF
.getLLVMContext(),
283 CGF
.getContext().getTypeSize(T
));
284 llvm::Type
*IntPtrType
= IntType
->getPointerTo(AddrSpace
);
286 llvm::Value
*Args
[2];
287 Args
[1] = CGF
.EmitScalarExpr(E
->getArg(1));
288 llvm::Type
*ValueType
= Args
[1]->getType();
289 Args
[1] = EmitToInt(CGF
, Args
[1], T
, IntType
);
290 Args
[0] = CGF
.Builder
.CreateBitCast(DestPtr
, IntPtrType
);
292 llvm::Value
*Result
= CGF
.Builder
.CreateAtomicRMW(
293 Kind
, Args
[0], Args
[1], llvm::AtomicOrdering::SequentiallyConsistent
);
294 Result
= CGF
.Builder
.CreateBinOp(Op
, Result
, Args
[1]);
297 CGF
.Builder
.CreateBinOp(llvm::Instruction::Xor
, Result
,
298 llvm::ConstantInt::getAllOnesValue(IntType
));
299 Result
= EmitFromInt(CGF
, Result
, T
, ValueType
);
300 return RValue::get(Result
);
303 /// Utility to insert an atomic cmpxchg instruction.
305 /// @param CGF The current codegen function.
306 /// @param E Builtin call expression to convert to cmpxchg.
307 /// arg0 - address to operate on
308 /// arg1 - value to compare with
310 /// @param ReturnBool Specifies whether to return success flag of
311 /// cmpxchg result or the old value.
313 /// @returns result of cmpxchg, according to ReturnBool
315 /// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
316 /// invoke the function EmitAtomicCmpXchgForMSIntrin.
317 static Value
*MakeAtomicCmpXchgValue(CodeGenFunction
&CGF
, const CallExpr
*E
,
319 QualType T
= ReturnBool
? E
->getArg(1)->getType() : E
->getType();
320 llvm::Value
*DestPtr
= CheckAtomicAlignment(CGF
, E
);
321 unsigned AddrSpace
= DestPtr
->getType()->getPointerAddressSpace();
323 llvm::IntegerType
*IntType
= llvm::IntegerType::get(
324 CGF
.getLLVMContext(), CGF
.getContext().getTypeSize(T
));
325 llvm::Type
*IntPtrType
= IntType
->getPointerTo(AddrSpace
);
328 Args
[0] = CGF
.Builder
.CreateBitCast(DestPtr
, IntPtrType
);
329 Args
[1] = CGF
.EmitScalarExpr(E
->getArg(1));
330 llvm::Type
*ValueType
= Args
[1]->getType();
331 Args
[1] = EmitToInt(CGF
, Args
[1], T
, IntType
);
332 Args
[2] = EmitToInt(CGF
, CGF
.EmitScalarExpr(E
->getArg(2)), T
, IntType
);
334 Value
*Pair
= CGF
.Builder
.CreateAtomicCmpXchg(
335 Args
[0], Args
[1], Args
[2], llvm::AtomicOrdering::SequentiallyConsistent
,
336 llvm::AtomicOrdering::SequentiallyConsistent
);
338 // Extract boolean success flag and zext it to int.
339 return CGF
.Builder
.CreateZExt(CGF
.Builder
.CreateExtractValue(Pair
, 1),
340 CGF
.ConvertType(E
->getType()));
342 // Extract old value and emit it using the same type as compare value.
343 return EmitFromInt(CGF
, CGF
.Builder
.CreateExtractValue(Pair
, 0), T
,
347 /// This function should be invoked to emit atomic cmpxchg for Microsoft's
348 /// _InterlockedCompareExchange* intrinsics which have the following signature:
349 /// T _InterlockedCompareExchange(T volatile *Destination,
353 /// Whereas the llvm 'cmpxchg' instruction has the following syntax:
354 /// cmpxchg *Destination, Comparand, Exchange.
355 /// So we need to swap Comparand and Exchange when invoking
356 /// CreateAtomicCmpXchg. That is the reason we could not use the above utility
357 /// function MakeAtomicCmpXchgValue since it expects the arguments to be
361 Value
*EmitAtomicCmpXchgForMSIntrin(CodeGenFunction
&CGF
, const CallExpr
*E
,
362 AtomicOrdering SuccessOrdering
= AtomicOrdering::SequentiallyConsistent
) {
363 assert(E
->getArg(0)->getType()->isPointerType());
364 assert(CGF
.getContext().hasSameUnqualifiedType(
365 E
->getType(), E
->getArg(0)->getType()->getPointeeType()));
366 assert(CGF
.getContext().hasSameUnqualifiedType(E
->getType(),
367 E
->getArg(1)->getType()));
368 assert(CGF
.getContext().hasSameUnqualifiedType(E
->getType(),
369 E
->getArg(2)->getType()));
371 auto *Destination
= CGF
.EmitScalarExpr(E
->getArg(0));
372 auto *Comparand
= CGF
.EmitScalarExpr(E
->getArg(2));
373 auto *Exchange
= CGF
.EmitScalarExpr(E
->getArg(1));
375 // For Release ordering, the failure ordering should be Monotonic.
376 auto FailureOrdering
= SuccessOrdering
== AtomicOrdering::Release
?
377 AtomicOrdering::Monotonic
:
380 // The atomic instruction is marked volatile for consistency with MSVC. This
381 // blocks the few atomics optimizations that LLVM has. If we want to optimize
382 // _Interlocked* operations in the future, we will have to remove the volatile
384 auto *Result
= CGF
.Builder
.CreateAtomicCmpXchg(
385 Destination
, Comparand
, Exchange
,
386 SuccessOrdering
, FailureOrdering
);
387 Result
->setVolatile(true);
388 return CGF
.Builder
.CreateExtractValue(Result
, 0);
391 // 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
392 // prototyped like this:
394 // unsigned char _InterlockedCompareExchange128...(
395 // __int64 volatile * _Destination,
396 // __int64 _ExchangeHigh,
397 // __int64 _ExchangeLow,
398 // __int64 * _ComparandResult);
399 static Value
*EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction
&CGF
,
401 AtomicOrdering SuccessOrdering
) {
402 assert(E
->getNumArgs() == 4);
403 llvm::Value
*Destination
= CGF
.EmitScalarExpr(E
->getArg(0));
404 llvm::Value
*ExchangeHigh
= CGF
.EmitScalarExpr(E
->getArg(1));
405 llvm::Value
*ExchangeLow
= CGF
.EmitScalarExpr(E
->getArg(2));
406 llvm::Value
*ComparandPtr
= CGF
.EmitScalarExpr(E
->getArg(3));
408 assert(Destination
->getType()->isPointerTy());
409 assert(!ExchangeHigh
->getType()->isPointerTy());
410 assert(!ExchangeLow
->getType()->isPointerTy());
411 assert(ComparandPtr
->getType()->isPointerTy());
413 // For Release ordering, the failure ordering should be Monotonic.
414 auto FailureOrdering
= SuccessOrdering
== AtomicOrdering::Release
415 ? AtomicOrdering::Monotonic
418 // Convert to i128 pointers and values.
419 llvm::Type
*Int128Ty
= llvm::IntegerType::get(CGF
.getLLVMContext(), 128);
420 llvm::Type
*Int128PtrTy
= Int128Ty
->getPointerTo();
421 Destination
= CGF
.Builder
.CreateBitCast(Destination
, Int128PtrTy
);
422 Address
ComparandResult(CGF
.Builder
.CreateBitCast(ComparandPtr
, Int128PtrTy
),
423 Int128Ty
, CGF
.getContext().toCharUnitsFromBits(128));
425 // (((i128)hi) << 64) | ((i128)lo)
426 ExchangeHigh
= CGF
.Builder
.CreateZExt(ExchangeHigh
, Int128Ty
);
427 ExchangeLow
= CGF
.Builder
.CreateZExt(ExchangeLow
, Int128Ty
);
429 CGF
.Builder
.CreateShl(ExchangeHigh
, llvm::ConstantInt::get(Int128Ty
, 64));
430 llvm::Value
*Exchange
= CGF
.Builder
.CreateOr(ExchangeHigh
, ExchangeLow
);
432 // Load the comparand for the instruction.
433 llvm::Value
*Comparand
= CGF
.Builder
.CreateLoad(ComparandResult
);
435 auto *CXI
= CGF
.Builder
.CreateAtomicCmpXchg(Destination
, Comparand
, Exchange
,
436 SuccessOrdering
, FailureOrdering
);
438 // The atomic instruction is marked volatile for consistency with MSVC. This
439 // blocks the few atomics optimizations that LLVM has. If we want to optimize
440 // _Interlocked* operations in the future, we will have to remove the volatile
442 CXI
->setVolatile(true);
444 // Store the result as an outparameter.
445 CGF
.Builder
.CreateStore(CGF
.Builder
.CreateExtractValue(CXI
, 0),
448 // Get the success boolean and zero extend it to i8.
449 Value
*Success
= CGF
.Builder
.CreateExtractValue(CXI
, 1);
450 return CGF
.Builder
.CreateZExt(Success
, CGF
.Int8Ty
);
453 static Value
*EmitAtomicIncrementValue(CodeGenFunction
&CGF
, const CallExpr
*E
,
454 AtomicOrdering Ordering
= AtomicOrdering::SequentiallyConsistent
) {
455 assert(E
->getArg(0)->getType()->isPointerType());
457 auto *IntTy
= CGF
.ConvertType(E
->getType());
458 auto *Result
= CGF
.Builder
.CreateAtomicRMW(
460 CGF
.EmitScalarExpr(E
->getArg(0)),
461 ConstantInt::get(IntTy
, 1),
463 return CGF
.Builder
.CreateAdd(Result
, ConstantInt::get(IntTy
, 1));
466 static Value
*EmitAtomicDecrementValue(CodeGenFunction
&CGF
, const CallExpr
*E
,
467 AtomicOrdering Ordering
= AtomicOrdering::SequentiallyConsistent
) {
468 assert(E
->getArg(0)->getType()->isPointerType());
470 auto *IntTy
= CGF
.ConvertType(E
->getType());
471 auto *Result
= CGF
.Builder
.CreateAtomicRMW(
473 CGF
.EmitScalarExpr(E
->getArg(0)),
474 ConstantInt::get(IntTy
, 1),
476 return CGF
.Builder
.CreateSub(Result
, ConstantInt::get(IntTy
, 1));
479 // Build a plain volatile load.
480 static Value
*EmitISOVolatileLoad(CodeGenFunction
&CGF
, const CallExpr
*E
) {
481 Value
*Ptr
= CGF
.EmitScalarExpr(E
->getArg(0));
482 QualType ElTy
= E
->getArg(0)->getType()->getPointeeType();
483 CharUnits LoadSize
= CGF
.getContext().getTypeSizeInChars(ElTy
);
485 llvm::IntegerType::get(CGF
.getLLVMContext(), LoadSize
.getQuantity() * 8);
486 Ptr
= CGF
.Builder
.CreateBitCast(Ptr
, ITy
->getPointerTo());
487 llvm::LoadInst
*Load
= CGF
.Builder
.CreateAlignedLoad(ITy
, Ptr
, LoadSize
);
488 Load
->setVolatile(true);
492 // Build a plain volatile store.
493 static Value
*EmitISOVolatileStore(CodeGenFunction
&CGF
, const CallExpr
*E
) {
494 Value
*Ptr
= CGF
.EmitScalarExpr(E
->getArg(0));
495 Value
*Value
= CGF
.EmitScalarExpr(E
->getArg(1));
496 QualType ElTy
= E
->getArg(0)->getType()->getPointeeType();
497 CharUnits StoreSize
= CGF
.getContext().getTypeSizeInChars(ElTy
);
499 llvm::IntegerType::get(CGF
.getLLVMContext(), StoreSize
.getQuantity() * 8);
500 Ptr
= CGF
.Builder
.CreateBitCast(Ptr
, ITy
->getPointerTo());
501 llvm::StoreInst
*Store
=
502 CGF
.Builder
.CreateAlignedStore(Value
, Ptr
, StoreSize
);
503 Store
->setVolatile(true);
507 // Emit a simple mangled intrinsic that has 1 argument and a return type
508 // matching the argument type. Depending on mode, this may be a constrained
509 // floating-point intrinsic.
510 static Value
*emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction
&CGF
,
511 const CallExpr
*E
, unsigned IntrinsicID
,
512 unsigned ConstrainedIntrinsicID
) {
513 llvm::Value
*Src0
= CGF
.EmitScalarExpr(E
->getArg(0));
515 if (CGF
.Builder
.getIsFPConstrained()) {
516 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(CGF
, E
);
517 Function
*F
= CGF
.CGM
.getIntrinsic(ConstrainedIntrinsicID
, Src0
->getType());
518 return CGF
.Builder
.CreateConstrainedFPCall(F
, { Src0
});
520 Function
*F
= CGF
.CGM
.getIntrinsic(IntrinsicID
, Src0
->getType());
521 return CGF
.Builder
.CreateCall(F
, Src0
);
525 // Emit an intrinsic that has 2 operands of the same type as its result.
526 // Depending on mode, this may be a constrained floating-point intrinsic.
527 static Value
*emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction
&CGF
,
528 const CallExpr
*E
, unsigned IntrinsicID
,
529 unsigned ConstrainedIntrinsicID
) {
530 llvm::Value
*Src0
= CGF
.EmitScalarExpr(E
->getArg(0));
531 llvm::Value
*Src1
= CGF
.EmitScalarExpr(E
->getArg(1));
533 if (CGF
.Builder
.getIsFPConstrained()) {
534 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(CGF
, E
);
535 Function
*F
= CGF
.CGM
.getIntrinsic(ConstrainedIntrinsicID
, Src0
->getType());
536 return CGF
.Builder
.CreateConstrainedFPCall(F
, { Src0
, Src1
});
538 Function
*F
= CGF
.CGM
.getIntrinsic(IntrinsicID
, Src0
->getType());
539 return CGF
.Builder
.CreateCall(F
, { Src0
, Src1
});
543 // Has second type mangled argument.
544 static Value
*emitBinaryExpMaybeConstrainedFPBuiltin(
545 CodeGenFunction
&CGF
, const CallExpr
*E
, llvm::Intrinsic::ID IntrinsicID
,
546 llvm::Intrinsic::ID ConstrainedIntrinsicID
) {
547 llvm::Value
*Src0
= CGF
.EmitScalarExpr(E
->getArg(0));
548 llvm::Value
*Src1
= CGF
.EmitScalarExpr(E
->getArg(1));
550 if (CGF
.Builder
.getIsFPConstrained()) {
551 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(CGF
, E
);
552 Function
*F
= CGF
.CGM
.getIntrinsic(ConstrainedIntrinsicID
,
553 {Src0
->getType(), Src1
->getType()});
554 return CGF
.Builder
.CreateConstrainedFPCall(F
, {Src0
, Src1
});
558 CGF
.CGM
.getIntrinsic(IntrinsicID
, {Src0
->getType(), Src1
->getType()});
559 return CGF
.Builder
.CreateCall(F
, {Src0
, Src1
});
562 // Emit an intrinsic that has 3 operands of the same type as its result.
563 // Depending on mode, this may be a constrained floating-point intrinsic.
564 static Value
*emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction
&CGF
,
565 const CallExpr
*E
, unsigned IntrinsicID
,
566 unsigned ConstrainedIntrinsicID
) {
567 llvm::Value
*Src0
= CGF
.EmitScalarExpr(E
->getArg(0));
568 llvm::Value
*Src1
= CGF
.EmitScalarExpr(E
->getArg(1));
569 llvm::Value
*Src2
= CGF
.EmitScalarExpr(E
->getArg(2));
571 if (CGF
.Builder
.getIsFPConstrained()) {
572 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(CGF
, E
);
573 Function
*F
= CGF
.CGM
.getIntrinsic(ConstrainedIntrinsicID
, Src0
->getType());
574 return CGF
.Builder
.CreateConstrainedFPCall(F
, { Src0
, Src1
, Src2
});
576 Function
*F
= CGF
.CGM
.getIntrinsic(IntrinsicID
, Src0
->getType());
577 return CGF
.Builder
.CreateCall(F
, { Src0
, Src1
, Src2
});
581 // Emit an intrinsic where all operands are of the same type as the result.
582 // Depending on mode, this may be a constrained floating-point intrinsic.
583 static Value
*emitCallMaybeConstrainedFPBuiltin(CodeGenFunction
&CGF
,
584 unsigned IntrinsicID
,
585 unsigned ConstrainedIntrinsicID
,
587 ArrayRef
<Value
*> Args
) {
589 if (CGF
.Builder
.getIsFPConstrained())
590 F
= CGF
.CGM
.getIntrinsic(ConstrainedIntrinsicID
, Ty
);
592 F
= CGF
.CGM
.getIntrinsic(IntrinsicID
, Ty
);
594 if (CGF
.Builder
.getIsFPConstrained())
595 return CGF
.Builder
.CreateConstrainedFPCall(F
, Args
);
597 return CGF
.Builder
.CreateCall(F
, Args
);
600 // Emit a simple mangled intrinsic that has 1 argument and a return type
601 // matching the argument type.
602 static Value
*emitUnaryBuiltin(CodeGenFunction
&CGF
, const CallExpr
*E
,
603 unsigned IntrinsicID
,
604 llvm::StringRef Name
= "") {
605 llvm::Value
*Src0
= CGF
.EmitScalarExpr(E
->getArg(0));
607 Function
*F
= CGF
.CGM
.getIntrinsic(IntrinsicID
, Src0
->getType());
608 return CGF
.Builder
.CreateCall(F
, Src0
, Name
);
611 // Emit an intrinsic that has 2 operands of the same type as its result.
612 static Value
*emitBinaryBuiltin(CodeGenFunction
&CGF
,
614 unsigned IntrinsicID
) {
615 llvm::Value
*Src0
= CGF
.EmitScalarExpr(E
->getArg(0));
616 llvm::Value
*Src1
= CGF
.EmitScalarExpr(E
->getArg(1));
618 Function
*F
= CGF
.CGM
.getIntrinsic(IntrinsicID
, Src0
->getType());
619 return CGF
.Builder
.CreateCall(F
, { Src0
, Src1
});
622 // Emit an intrinsic that has 3 operands of the same type as its result.
623 static Value
*emitTernaryBuiltin(CodeGenFunction
&CGF
,
625 unsigned IntrinsicID
) {
626 llvm::Value
*Src0
= CGF
.EmitScalarExpr(E
->getArg(0));
627 llvm::Value
*Src1
= CGF
.EmitScalarExpr(E
->getArg(1));
628 llvm::Value
*Src2
= CGF
.EmitScalarExpr(E
->getArg(2));
630 Function
*F
= CGF
.CGM
.getIntrinsic(IntrinsicID
, Src0
->getType());
631 return CGF
.Builder
.CreateCall(F
, { Src0
, Src1
, Src2
});
634 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
635 static Value
*emitFPIntBuiltin(CodeGenFunction
&CGF
,
637 unsigned IntrinsicID
) {
638 llvm::Value
*Src0
= CGF
.EmitScalarExpr(E
->getArg(0));
639 llvm::Value
*Src1
= CGF
.EmitScalarExpr(E
->getArg(1));
641 Function
*F
= CGF
.CGM
.getIntrinsic(IntrinsicID
, Src0
->getType());
642 return CGF
.Builder
.CreateCall(F
, {Src0
, Src1
});
645 // Emit an intrinsic that has overloaded integer result and fp operand.
647 emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction
&CGF
, const CallExpr
*E
,
648 unsigned IntrinsicID
,
649 unsigned ConstrainedIntrinsicID
) {
650 llvm::Type
*ResultType
= CGF
.ConvertType(E
->getType());
651 llvm::Value
*Src0
= CGF
.EmitScalarExpr(E
->getArg(0));
653 if (CGF
.Builder
.getIsFPConstrained()) {
654 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(CGF
, E
);
655 Function
*F
= CGF
.CGM
.getIntrinsic(ConstrainedIntrinsicID
,
656 {ResultType
, Src0
->getType()});
657 return CGF
.Builder
.CreateConstrainedFPCall(F
, {Src0
});
660 CGF
.CGM
.getIntrinsic(IntrinsicID
, {ResultType
, Src0
->getType()});
661 return CGF
.Builder
.CreateCall(F
, Src0
);
665 /// EmitFAbs - Emit a call to @llvm.fabs().
666 static Value
*EmitFAbs(CodeGenFunction
&CGF
, Value
*V
) {
667 Function
*F
= CGF
.CGM
.getIntrinsic(Intrinsic::fabs
, V
->getType());
668 llvm::CallInst
*Call
= CGF
.Builder
.CreateCall(F
, V
);
669 Call
->setDoesNotAccessMemory();
673 /// Emit the computation of the sign bit for a floating point value. Returns
674 /// the i1 sign bit value.
675 static Value
*EmitSignBit(CodeGenFunction
&CGF
, Value
*V
) {
676 LLVMContext
&C
= CGF
.CGM
.getLLVMContext();
678 llvm::Type
*Ty
= V
->getType();
679 int Width
= Ty
->getPrimitiveSizeInBits();
680 llvm::Type
*IntTy
= llvm::IntegerType::get(C
, Width
);
681 V
= CGF
.Builder
.CreateBitCast(V
, IntTy
);
682 if (Ty
->isPPC_FP128Ty()) {
683 // We want the sign bit of the higher-order double. The bitcast we just
684 // did works as if the double-double was stored to memory and then
685 // read as an i128. The "store" will put the higher-order double in the
686 // lower address in both little- and big-Endian modes, but the "load"
687 // will treat those bits as a different part of the i128: the low bits in
688 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
689 // we need to shift the high bits down to the low before truncating.
691 if (CGF
.getTarget().isBigEndian()) {
692 Value
*ShiftCst
= llvm::ConstantInt::get(IntTy
, Width
);
693 V
= CGF
.Builder
.CreateLShr(V
, ShiftCst
);
695 // We are truncating value in order to extract the higher-order
696 // double, which we will be using to extract the sign from.
697 IntTy
= llvm::IntegerType::get(C
, Width
);
698 V
= CGF
.Builder
.CreateTrunc(V
, IntTy
);
700 Value
*Zero
= llvm::Constant::getNullValue(IntTy
);
701 return CGF
.Builder
.CreateICmpSLT(V
, Zero
);
704 static RValue
emitLibraryCall(CodeGenFunction
&CGF
, const FunctionDecl
*FD
,
705 const CallExpr
*E
, llvm::Constant
*calleeValue
) {
706 CGCallee callee
= CGCallee::forDirect(calleeValue
, GlobalDecl(FD
));
707 return CGF
.EmitCall(E
->getCallee()->getType(), callee
, E
, ReturnValueSlot());
710 /// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
711 /// depending on IntrinsicID.
713 /// \arg CGF The current codegen function.
714 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
715 /// \arg X The first argument to the llvm.*.with.overflow.*.
716 /// \arg Y The second argument to the llvm.*.with.overflow.*.
717 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
718 /// \returns The result (i.e. sum/product) returned by the intrinsic.
719 static llvm::Value
*EmitOverflowIntrinsic(CodeGenFunction
&CGF
,
720 const llvm::Intrinsic::ID IntrinsicID
,
721 llvm::Value
*X
, llvm::Value
*Y
,
722 llvm::Value
*&Carry
) {
723 // Make sure we have integers of the same width.
724 assert(X
->getType() == Y
->getType() &&
725 "Arguments must be the same type. (Did you forget to make sure both "
726 "arguments have the same integer width?)");
728 Function
*Callee
= CGF
.CGM
.getIntrinsic(IntrinsicID
, X
->getType());
729 llvm::Value
*Tmp
= CGF
.Builder
.CreateCall(Callee
, {X
, Y
});
730 Carry
= CGF
.Builder
.CreateExtractValue(Tmp
, 1);
731 return CGF
.Builder
.CreateExtractValue(Tmp
, 0);
734 static Value
*emitRangedBuiltin(CodeGenFunction
&CGF
,
735 unsigned IntrinsicID
,
737 llvm::MDBuilder
MDHelper(CGF
.getLLVMContext());
738 llvm::MDNode
*RNode
= MDHelper
.createRange(APInt(32, low
), APInt(32, high
));
739 Function
*F
= CGF
.CGM
.getIntrinsic(IntrinsicID
, {});
740 llvm::Instruction
*Call
= CGF
.Builder
.CreateCall(F
);
741 Call
->setMetadata(llvm::LLVMContext::MD_range
, RNode
);
742 Call
->setMetadata(llvm::LLVMContext::MD_noundef
,
743 llvm::MDNode::get(CGF
.getLLVMContext(), std::nullopt
));
748 struct WidthAndSignedness
{
754 static WidthAndSignedness
755 getIntegerWidthAndSignedness(const clang::ASTContext
&context
,
756 const clang::QualType Type
) {
757 assert(Type
->isIntegerType() && "Given type is not an integer.");
758 unsigned Width
= Type
->isBooleanType() ? 1
759 : Type
->isBitIntType() ? context
.getIntWidth(Type
)
760 : context
.getTypeInfo(Type
).Width
;
761 bool Signed
= Type
->isSignedIntegerType();
762 return {Width
, Signed
};
765 // Given one or more integer types, this function produces an integer type that
766 // encompasses them: any value in one of the given types could be expressed in
767 // the encompassing type.
768 static struct WidthAndSignedness
769 EncompassingIntegerType(ArrayRef
<struct WidthAndSignedness
> Types
) {
770 assert(Types
.size() > 0 && "Empty list of types.");
772 // If any of the given types is signed, we must return a signed type.
774 for (const auto &Type
: Types
) {
775 Signed
|= Type
.Signed
;
778 // The encompassing type must have a width greater than or equal to the width
779 // of the specified types. Additionally, if the encompassing type is signed,
780 // its width must be strictly greater than the width of any unsigned types
783 for (const auto &Type
: Types
) {
784 unsigned MinWidth
= Type
.Width
+ (Signed
&& !Type
.Signed
);
785 if (Width
< MinWidth
) {
790 return {Width
, Signed
};
793 Value
*CodeGenFunction::EmitVAStartEnd(Value
*ArgValue
, bool IsStart
) {
794 llvm::Type
*DestType
= Int8PtrTy
;
795 if (ArgValue
->getType() != DestType
)
797 Builder
.CreateBitCast(ArgValue
, DestType
, ArgValue
->getName().data());
799 Intrinsic::ID inst
= IsStart
? Intrinsic::vastart
: Intrinsic::vaend
;
800 return Builder
.CreateCall(CGM
.getIntrinsic(inst
), ArgValue
);
803 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
804 /// __builtin_object_size(p, @p To) is correct
805 static bool areBOSTypesCompatible(int From
, int To
) {
806 // Note: Our __builtin_object_size implementation currently treats Type=0 and
807 // Type=2 identically. Encoding this implementation detail here may make
808 // improving __builtin_object_size difficult in the future, so it's omitted.
809 return From
== To
|| (From
== 0 && To
== 1) || (From
== 3 && To
== 2);
813 getDefaultBuiltinObjectSizeResult(unsigned Type
, llvm::IntegerType
*ResType
) {
814 return ConstantInt::get(ResType
, (Type
& 2) ? 0 : -1, /*isSigned=*/true);
818 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr
*E
, unsigned Type
,
819 llvm::IntegerType
*ResType
,
820 llvm::Value
*EmittedE
,
823 if (!E
->tryEvaluateObjectSize(ObjectSize
, getContext(), Type
))
824 return emitBuiltinObjectSize(E
, Type
, ResType
, EmittedE
, IsDynamic
);
825 return ConstantInt::get(ResType
, ObjectSize
, /*isSigned=*/true);
828 /// Returns a Value corresponding to the size of the given expression.
829 /// This Value may be either of the following:
830 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on
832 /// - A call to the @llvm.objectsize intrinsic
834 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
835 /// and we wouldn't otherwise try to reference a pass_object_size parameter,
836 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
838 CodeGenFunction::emitBuiltinObjectSize(const Expr
*E
, unsigned Type
,
839 llvm::IntegerType
*ResType
,
840 llvm::Value
*EmittedE
, bool IsDynamic
) {
841 // We need to reference an argument if the pointer is a parameter with the
842 // pass_object_size attribute.
843 if (auto *D
= dyn_cast
<DeclRefExpr
>(E
->IgnoreParenImpCasts())) {
844 auto *Param
= dyn_cast
<ParmVarDecl
>(D
->getDecl());
845 auto *PS
= D
->getDecl()->getAttr
<PassObjectSizeAttr
>();
846 if (Param
!= nullptr && PS
!= nullptr &&
847 areBOSTypesCompatible(PS
->getType(), Type
)) {
848 auto Iter
= SizeArguments
.find(Param
);
849 assert(Iter
!= SizeArguments
.end());
851 const ImplicitParamDecl
*D
= Iter
->second
;
852 auto DIter
= LocalDeclMap
.find(D
);
853 assert(DIter
!= LocalDeclMap
.end());
855 return EmitLoadOfScalar(DIter
->second
, /*Volatile=*/false,
856 getContext().getSizeType(), E
->getBeginLoc());
860 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
861 // evaluate E for side-effects. In either case, we shouldn't lower to
863 if (Type
== 3 || (!EmittedE
&& E
->HasSideEffects(getContext())))
864 return getDefaultBuiltinObjectSizeResult(Type
, ResType
);
866 Value
*Ptr
= EmittedE
? EmittedE
: EmitScalarExpr(E
);
867 assert(Ptr
->getType()->isPointerTy() &&
868 "Non-pointer passed to __builtin_object_size?");
871 CGM
.getIntrinsic(Intrinsic::objectsize
, {ResType
, Ptr
->getType()});
873 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
874 Value
*Min
= Builder
.getInt1((Type
& 2) != 0);
875 // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
876 Value
*NullIsUnknown
= Builder
.getTrue();
877 Value
*Dynamic
= Builder
.getInt1(IsDynamic
);
878 return Builder
.CreateCall(F
, {Ptr
, Min
, NullIsUnknown
, Dynamic
});
882 /// A struct to generically describe a bit test intrinsic.
884 enum ActionKind
: uint8_t { TestOnly
, Complement
, Reset
, Set
};
885 enum InterlockingKind
: uint8_t {
894 InterlockingKind Interlocking
;
897 static BitTest
decodeBitTestBuiltin(unsigned BuiltinID
);
901 BitTest
BitTest::decodeBitTestBuiltin(unsigned BuiltinID
) {
903 // Main portable variants.
904 case Builtin::BI_bittest
:
905 return {TestOnly
, Unlocked
, false};
906 case Builtin::BI_bittestandcomplement
:
907 return {Complement
, Unlocked
, false};
908 case Builtin::BI_bittestandreset
:
909 return {Reset
, Unlocked
, false};
910 case Builtin::BI_bittestandset
:
911 return {Set
, Unlocked
, false};
912 case Builtin::BI_interlockedbittestandreset
:
913 return {Reset
, Sequential
, false};
914 case Builtin::BI_interlockedbittestandset
:
915 return {Set
, Sequential
, false};
917 // X86-specific 64-bit variants.
918 case Builtin::BI_bittest64
:
919 return {TestOnly
, Unlocked
, true};
920 case Builtin::BI_bittestandcomplement64
:
921 return {Complement
, Unlocked
, true};
922 case Builtin::BI_bittestandreset64
:
923 return {Reset
, Unlocked
, true};
924 case Builtin::BI_bittestandset64
:
925 return {Set
, Unlocked
, true};
926 case Builtin::BI_interlockedbittestandreset64
:
927 return {Reset
, Sequential
, true};
928 case Builtin::BI_interlockedbittestandset64
:
929 return {Set
, Sequential
, true};
931 // ARM/AArch64-specific ordering variants.
932 case Builtin::BI_interlockedbittestandset_acq
:
933 return {Set
, Acquire
, false};
934 case Builtin::BI_interlockedbittestandset_rel
:
935 return {Set
, Release
, false};
936 case Builtin::BI_interlockedbittestandset_nf
:
937 return {Set
, NoFence
, false};
938 case Builtin::BI_interlockedbittestandreset_acq
:
939 return {Reset
, Acquire
, false};
940 case Builtin::BI_interlockedbittestandreset_rel
:
941 return {Reset
, Release
, false};
942 case Builtin::BI_interlockedbittestandreset_nf
:
943 return {Reset
, NoFence
, false};
945 llvm_unreachable("expected only bittest intrinsics");
948 static char bitActionToX86BTCode(BitTest::ActionKind A
) {
950 case BitTest::TestOnly
: return '\0';
951 case BitTest::Complement
: return 'c';
952 case BitTest::Reset
: return 'r';
953 case BitTest::Set
: return 's';
955 llvm_unreachable("invalid action");
958 static llvm::Value
*EmitX86BitTestIntrinsic(CodeGenFunction
&CGF
,
960 const CallExpr
*E
, Value
*BitBase
,
962 char Action
= bitActionToX86BTCode(BT
.Action
);
963 char SizeSuffix
= BT
.Is64Bit
? 'q' : 'l';
965 // Build the assembly.
967 raw_svector_ostream
AsmOS(Asm
);
968 if (BT
.Interlocking
!= BitTest::Unlocked
)
973 AsmOS
<< SizeSuffix
<< " $2, ($1)";
975 // Build the constraints. FIXME: We should support immediates when possible.
976 std::string Constraints
= "={@ccc},r,r,~{cc},~{memory}";
977 std::string_view MachineClobbers
= CGF
.getTarget().getClobbers();
978 if (!MachineClobbers
.empty()) {
980 Constraints
+= MachineClobbers
;
982 llvm::IntegerType
*IntType
= llvm::IntegerType::get(
983 CGF
.getLLVMContext(),
984 CGF
.getContext().getTypeSize(E
->getArg(1)->getType()));
985 llvm::Type
*IntPtrType
= IntType
->getPointerTo();
986 llvm::FunctionType
*FTy
=
987 llvm::FunctionType::get(CGF
.Int8Ty
, {IntPtrType
, IntType
}, false);
989 llvm::InlineAsm
*IA
=
990 llvm::InlineAsm::get(FTy
, Asm
, Constraints
, /*hasSideEffects=*/true);
991 return CGF
.Builder
.CreateCall(IA
, {BitBase
, BitPos
});
994 static llvm::AtomicOrdering
995 getBitTestAtomicOrdering(BitTest::InterlockingKind I
) {
997 case BitTest::Unlocked
: return llvm::AtomicOrdering::NotAtomic
;
998 case BitTest::Sequential
: return llvm::AtomicOrdering::SequentiallyConsistent
;
999 case BitTest::Acquire
: return llvm::AtomicOrdering::Acquire
;
1000 case BitTest::Release
: return llvm::AtomicOrdering::Release
;
1001 case BitTest::NoFence
: return llvm::AtomicOrdering::Monotonic
;
1003 llvm_unreachable("invalid interlocking");
1006 /// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
1007 /// bits and a bit position and read and optionally modify the bit at that
1008 /// position. The position index can be arbitrarily large, i.e. it can be larger
1009 /// than 31 or 63, so we need an indexed load in the general case.
1010 static llvm::Value
*EmitBitTestIntrinsic(CodeGenFunction
&CGF
,
1012 const CallExpr
*E
) {
1013 Value
*BitBase
= CGF
.EmitScalarExpr(E
->getArg(0));
1014 Value
*BitPos
= CGF
.EmitScalarExpr(E
->getArg(1));
1016 BitTest BT
= BitTest::decodeBitTestBuiltin(BuiltinID
);
1018 // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
1019 // indexing operation internally. Use them if possible.
1020 if (CGF
.getTarget().getTriple().isX86())
1021 return EmitX86BitTestIntrinsic(CGF
, BT
, E
, BitBase
, BitPos
);
1023 // Otherwise, use generic code to load one byte and test the bit. Use all but
1024 // the bottom three bits as the array index, and the bottom three bits to form
1026 // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
1027 Value
*ByteIndex
= CGF
.Builder
.CreateAShr(
1028 BitPos
, llvm::ConstantInt::get(BitPos
->getType(), 3), "bittest.byteidx");
1029 Value
*BitBaseI8
= CGF
.Builder
.CreatePointerCast(BitBase
, CGF
.Int8PtrTy
);
1030 Address
ByteAddr(CGF
.Builder
.CreateInBoundsGEP(CGF
.Int8Ty
, BitBaseI8
,
1031 ByteIndex
, "bittest.byteaddr"),
1032 CGF
.Int8Ty
, CharUnits::One());
1034 CGF
.Builder
.CreateAnd(CGF
.Builder
.CreateTrunc(BitPos
, CGF
.Int8Ty
),
1035 llvm::ConstantInt::get(CGF
.Int8Ty
, 0x7));
1037 // The updating instructions will need a mask.
1038 Value
*Mask
= nullptr;
1039 if (BT
.Action
!= BitTest::TestOnly
) {
1040 Mask
= CGF
.Builder
.CreateShl(llvm::ConstantInt::get(CGF
.Int8Ty
, 1), PosLow
,
1044 // Check the action and ordering of the interlocked intrinsics.
1045 llvm::AtomicOrdering Ordering
= getBitTestAtomicOrdering(BT
.Interlocking
);
1047 Value
*OldByte
= nullptr;
1048 if (Ordering
!= llvm::AtomicOrdering::NotAtomic
) {
1049 // Emit a combined atomicrmw load/store operation for the interlocked
1051 llvm::AtomicRMWInst::BinOp RMWOp
= llvm::AtomicRMWInst::Or
;
1052 if (BT
.Action
== BitTest::Reset
) {
1053 Mask
= CGF
.Builder
.CreateNot(Mask
);
1054 RMWOp
= llvm::AtomicRMWInst::And
;
1056 OldByte
= CGF
.Builder
.CreateAtomicRMW(RMWOp
, ByteAddr
.getPointer(), Mask
,
1059 // Emit a plain load for the non-interlocked intrinsics.
1060 OldByte
= CGF
.Builder
.CreateLoad(ByteAddr
, "bittest.byte");
1061 Value
*NewByte
= nullptr;
1062 switch (BT
.Action
) {
1063 case BitTest::TestOnly
:
1064 // Don't store anything.
1066 case BitTest::Complement
:
1067 NewByte
= CGF
.Builder
.CreateXor(OldByte
, Mask
);
1069 case BitTest::Reset
:
1070 NewByte
= CGF
.Builder
.CreateAnd(OldByte
, CGF
.Builder
.CreateNot(Mask
));
1073 NewByte
= CGF
.Builder
.CreateOr(OldByte
, Mask
);
1077 CGF
.Builder
.CreateStore(NewByte
, ByteAddr
);
1080 // However we loaded the old byte, either by plain load or atomicrmw, shift
1081 // the bit into the low position and mask it to 0 or 1.
1082 Value
*ShiftedByte
= CGF
.Builder
.CreateLShr(OldByte
, PosLow
, "bittest.shr");
1083 return CGF
.Builder
.CreateAnd(
1084 ShiftedByte
, llvm::ConstantInt::get(CGF
.Int8Ty
, 1), "bittest.res");
1087 static llvm::Value
*emitPPCLoadReserveIntrinsic(CodeGenFunction
&CGF
,
1089 const CallExpr
*E
) {
1090 Value
*Addr
= CGF
.EmitScalarExpr(E
->getArg(0));
1092 SmallString
<64> Asm
;
1093 raw_svector_ostream
AsmOS(Asm
);
1094 llvm::IntegerType
*RetType
= CGF
.Int32Ty
;
1096 switch (BuiltinID
) {
1097 case clang::PPC::BI__builtin_ppc_ldarx
:
1099 RetType
= CGF
.Int64Ty
;
1101 case clang::PPC::BI__builtin_ppc_lwarx
:
1103 RetType
= CGF
.Int32Ty
;
1105 case clang::PPC::BI__builtin_ppc_lharx
:
1107 RetType
= CGF
.Int16Ty
;
1109 case clang::PPC::BI__builtin_ppc_lbarx
:
1111 RetType
= CGF
.Int8Ty
;
1114 llvm_unreachable("Expected only PowerPC load reserve intrinsics");
1117 AsmOS
<< "$0, ${1:y}";
1119 std::string Constraints
= "=r,*Z,~{memory}";
1120 std::string_view MachineClobbers
= CGF
.getTarget().getClobbers();
1121 if (!MachineClobbers
.empty()) {
1123 Constraints
+= MachineClobbers
;
1126 llvm::Type
*IntPtrType
= RetType
->getPointerTo();
1127 llvm::FunctionType
*FTy
=
1128 llvm::FunctionType::get(RetType
, {IntPtrType
}, false);
1130 llvm::InlineAsm
*IA
=
1131 llvm::InlineAsm::get(FTy
, Asm
, Constraints
, /*hasSideEffects=*/true);
1132 llvm::CallInst
*CI
= CGF
.Builder
.CreateCall(IA
, {Addr
});
1134 0, Attribute::get(CGF
.getLLVMContext(), Attribute::ElementType
, RetType
));
1139 enum class MSVCSetJmpKind
{
1146 /// MSVC handles setjmp a bit differently on different platforms. On every
1147 /// architecture except 32-bit x86, the frame address is passed. On x86, extra
1148 /// parameters can be passed as variadic arguments, but we always pass none.
1149 static RValue
EmitMSVCRTSetJmp(CodeGenFunction
&CGF
, MSVCSetJmpKind SJKind
,
1150 const CallExpr
*E
) {
1151 llvm::Value
*Arg1
= nullptr;
1152 llvm::Type
*Arg1Ty
= nullptr;
1154 bool IsVarArg
= false;
1155 if (SJKind
== MSVCSetJmpKind::_setjmp3
) {
1157 Arg1Ty
= CGF
.Int32Ty
;
1158 Arg1
= llvm::ConstantInt::get(CGF
.IntTy
, 0);
1161 Name
= SJKind
== MSVCSetJmpKind::_setjmp
? "_setjmp" : "_setjmpex";
1162 Arg1Ty
= CGF
.Int8PtrTy
;
1163 if (CGF
.getTarget().getTriple().getArch() == llvm::Triple::aarch64
) {
1164 Arg1
= CGF
.Builder
.CreateCall(
1165 CGF
.CGM
.getIntrinsic(Intrinsic::sponentry
, CGF
.AllocaInt8PtrTy
));
1167 Arg1
= CGF
.Builder
.CreateCall(
1168 CGF
.CGM
.getIntrinsic(Intrinsic::frameaddress
, CGF
.AllocaInt8PtrTy
),
1169 llvm::ConstantInt::get(CGF
.Int32Ty
, 0));
1172 // Mark the call site and declaration with ReturnsTwice.
1173 llvm::Type
*ArgTypes
[2] = {CGF
.Int8PtrTy
, Arg1Ty
};
1174 llvm::AttributeList ReturnsTwiceAttr
= llvm::AttributeList::get(
1175 CGF
.getLLVMContext(), llvm::AttributeList::FunctionIndex
,
1176 llvm::Attribute::ReturnsTwice
);
1177 llvm::FunctionCallee SetJmpFn
= CGF
.CGM
.CreateRuntimeFunction(
1178 llvm::FunctionType::get(CGF
.IntTy
, ArgTypes
, IsVarArg
), Name
,
1179 ReturnsTwiceAttr
, /*Local=*/true);
1181 llvm::Value
*Buf
= CGF
.Builder
.CreateBitOrPointerCast(
1182 CGF
.EmitScalarExpr(E
->getArg(0)), CGF
.Int8PtrTy
);
1183 llvm::Value
*Args
[] = {Buf
, Arg1
};
1184 llvm::CallBase
*CB
= CGF
.EmitRuntimeCallOrInvoke(SetJmpFn
, Args
);
1185 CB
->setAttributes(ReturnsTwiceAttr
);
1186 return RValue::get(CB
);
1189 // Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
1190 // we handle them here.
1191 enum class CodeGenFunction::MSVCIntrin
{
1195 _InterlockedDecrement
,
1196 _InterlockedExchange
,
1197 _InterlockedExchangeAdd
,
1198 _InterlockedExchangeSub
,
1199 _InterlockedIncrement
,
1202 _InterlockedExchangeAdd_acq
,
1203 _InterlockedExchangeAdd_rel
,
1204 _InterlockedExchangeAdd_nf
,
1205 _InterlockedExchange_acq
,
1206 _InterlockedExchange_rel
,
1207 _InterlockedExchange_nf
,
1208 _InterlockedCompareExchange_acq
,
1209 _InterlockedCompareExchange_rel
,
1210 _InterlockedCompareExchange_nf
,
1211 _InterlockedCompareExchange128
,
1212 _InterlockedCompareExchange128_acq
,
1213 _InterlockedCompareExchange128_rel
,
1214 _InterlockedCompareExchange128_nf
,
1218 _InterlockedXor_acq
,
1219 _InterlockedXor_rel
,
1221 _InterlockedAnd_acq
,
1222 _InterlockedAnd_rel
,
1224 _InterlockedIncrement_acq
,
1225 _InterlockedIncrement_rel
,
1226 _InterlockedIncrement_nf
,
1227 _InterlockedDecrement_acq
,
1228 _InterlockedDecrement_rel
,
1229 _InterlockedDecrement_nf
,
1233 static std::optional
<CodeGenFunction::MSVCIntrin
>
1234 translateArmToMsvcIntrin(unsigned BuiltinID
) {
1235 using MSVCIntrin
= CodeGenFunction::MSVCIntrin
;
1236 switch (BuiltinID
) {
1238 return std::nullopt
;
1239 case clang::ARM::BI_BitScanForward
:
1240 case clang::ARM::BI_BitScanForward64
:
1241 return MSVCIntrin::_BitScanForward
;
1242 case clang::ARM::BI_BitScanReverse
:
1243 case clang::ARM::BI_BitScanReverse64
:
1244 return MSVCIntrin::_BitScanReverse
;
1245 case clang::ARM::BI_InterlockedAnd64
:
1246 return MSVCIntrin::_InterlockedAnd
;
1247 case clang::ARM::BI_InterlockedExchange64
:
1248 return MSVCIntrin::_InterlockedExchange
;
1249 case clang::ARM::BI_InterlockedExchangeAdd64
:
1250 return MSVCIntrin::_InterlockedExchangeAdd
;
1251 case clang::ARM::BI_InterlockedExchangeSub64
:
1252 return MSVCIntrin::_InterlockedExchangeSub
;
1253 case clang::ARM::BI_InterlockedOr64
:
1254 return MSVCIntrin::_InterlockedOr
;
1255 case clang::ARM::BI_InterlockedXor64
:
1256 return MSVCIntrin::_InterlockedXor
;
1257 case clang::ARM::BI_InterlockedDecrement64
:
1258 return MSVCIntrin::_InterlockedDecrement
;
1259 case clang::ARM::BI_InterlockedIncrement64
:
1260 return MSVCIntrin::_InterlockedIncrement
;
1261 case clang::ARM::BI_InterlockedExchangeAdd8_acq
:
1262 case clang::ARM::BI_InterlockedExchangeAdd16_acq
:
1263 case clang::ARM::BI_InterlockedExchangeAdd_acq
:
1264 case clang::ARM::BI_InterlockedExchangeAdd64_acq
:
1265 return MSVCIntrin::_InterlockedExchangeAdd_acq
;
1266 case clang::ARM::BI_InterlockedExchangeAdd8_rel
:
1267 case clang::ARM::BI_InterlockedExchangeAdd16_rel
:
1268 case clang::ARM::BI_InterlockedExchangeAdd_rel
:
1269 case clang::ARM::BI_InterlockedExchangeAdd64_rel
:
1270 return MSVCIntrin::_InterlockedExchangeAdd_rel
;
1271 case clang::ARM::BI_InterlockedExchangeAdd8_nf
:
1272 case clang::ARM::BI_InterlockedExchangeAdd16_nf
:
1273 case clang::ARM::BI_InterlockedExchangeAdd_nf
:
1274 case clang::ARM::BI_InterlockedExchangeAdd64_nf
:
1275 return MSVCIntrin::_InterlockedExchangeAdd_nf
;
1276 case clang::ARM::BI_InterlockedExchange8_acq
:
1277 case clang::ARM::BI_InterlockedExchange16_acq
:
1278 case clang::ARM::BI_InterlockedExchange_acq
:
1279 case clang::ARM::BI_InterlockedExchange64_acq
:
1280 return MSVCIntrin::_InterlockedExchange_acq
;
1281 case clang::ARM::BI_InterlockedExchange8_rel
:
1282 case clang::ARM::BI_InterlockedExchange16_rel
:
1283 case clang::ARM::BI_InterlockedExchange_rel
:
1284 case clang::ARM::BI_InterlockedExchange64_rel
:
1285 return MSVCIntrin::_InterlockedExchange_rel
;
1286 case clang::ARM::BI_InterlockedExchange8_nf
:
1287 case clang::ARM::BI_InterlockedExchange16_nf
:
1288 case clang::ARM::BI_InterlockedExchange_nf
:
1289 case clang::ARM::BI_InterlockedExchange64_nf
:
1290 return MSVCIntrin::_InterlockedExchange_nf
;
1291 case clang::ARM::BI_InterlockedCompareExchange8_acq
:
1292 case clang::ARM::BI_InterlockedCompareExchange16_acq
:
1293 case clang::ARM::BI_InterlockedCompareExchange_acq
:
1294 case clang::ARM::BI_InterlockedCompareExchange64_acq
:
1295 return MSVCIntrin::_InterlockedCompareExchange_acq
;
1296 case clang::ARM::BI_InterlockedCompareExchange8_rel
:
1297 case clang::ARM::BI_InterlockedCompareExchange16_rel
:
1298 case clang::ARM::BI_InterlockedCompareExchange_rel
:
1299 case clang::ARM::BI_InterlockedCompareExchange64_rel
:
1300 return MSVCIntrin::_InterlockedCompareExchange_rel
;
1301 case clang::ARM::BI_InterlockedCompareExchange8_nf
:
1302 case clang::ARM::BI_InterlockedCompareExchange16_nf
:
1303 case clang::ARM::BI_InterlockedCompareExchange_nf
:
1304 case clang::ARM::BI_InterlockedCompareExchange64_nf
:
1305 return MSVCIntrin::_InterlockedCompareExchange_nf
;
1306 case clang::ARM::BI_InterlockedOr8_acq
:
1307 case clang::ARM::BI_InterlockedOr16_acq
:
1308 case clang::ARM::BI_InterlockedOr_acq
:
1309 case clang::ARM::BI_InterlockedOr64_acq
:
1310 return MSVCIntrin::_InterlockedOr_acq
;
1311 case clang::ARM::BI_InterlockedOr8_rel
:
1312 case clang::ARM::BI_InterlockedOr16_rel
:
1313 case clang::ARM::BI_InterlockedOr_rel
:
1314 case clang::ARM::BI_InterlockedOr64_rel
:
1315 return MSVCIntrin::_InterlockedOr_rel
;
1316 case clang::ARM::BI_InterlockedOr8_nf
:
1317 case clang::ARM::BI_InterlockedOr16_nf
:
1318 case clang::ARM::BI_InterlockedOr_nf
:
1319 case clang::ARM::BI_InterlockedOr64_nf
:
1320 return MSVCIntrin::_InterlockedOr_nf
;
1321 case clang::ARM::BI_InterlockedXor8_acq
:
1322 case clang::ARM::BI_InterlockedXor16_acq
:
1323 case clang::ARM::BI_InterlockedXor_acq
:
1324 case clang::ARM::BI_InterlockedXor64_acq
:
1325 return MSVCIntrin::_InterlockedXor_acq
;
1326 case clang::ARM::BI_InterlockedXor8_rel
:
1327 case clang::ARM::BI_InterlockedXor16_rel
:
1328 case clang::ARM::BI_InterlockedXor_rel
:
1329 case clang::ARM::BI_InterlockedXor64_rel
:
1330 return MSVCIntrin::_InterlockedXor_rel
;
1331 case clang::ARM::BI_InterlockedXor8_nf
:
1332 case clang::ARM::BI_InterlockedXor16_nf
:
1333 case clang::ARM::BI_InterlockedXor_nf
:
1334 case clang::ARM::BI_InterlockedXor64_nf
:
1335 return MSVCIntrin::_InterlockedXor_nf
;
1336 case clang::ARM::BI_InterlockedAnd8_acq
:
1337 case clang::ARM::BI_InterlockedAnd16_acq
:
1338 case clang::ARM::BI_InterlockedAnd_acq
:
1339 case clang::ARM::BI_InterlockedAnd64_acq
:
1340 return MSVCIntrin::_InterlockedAnd_acq
;
1341 case clang::ARM::BI_InterlockedAnd8_rel
:
1342 case clang::ARM::BI_InterlockedAnd16_rel
:
1343 case clang::ARM::BI_InterlockedAnd_rel
:
1344 case clang::ARM::BI_InterlockedAnd64_rel
:
1345 return MSVCIntrin::_InterlockedAnd_rel
;
1346 case clang::ARM::BI_InterlockedAnd8_nf
:
1347 case clang::ARM::BI_InterlockedAnd16_nf
:
1348 case clang::ARM::BI_InterlockedAnd_nf
:
1349 case clang::ARM::BI_InterlockedAnd64_nf
:
1350 return MSVCIntrin::_InterlockedAnd_nf
;
1351 case clang::ARM::BI_InterlockedIncrement16_acq
:
1352 case clang::ARM::BI_InterlockedIncrement_acq
:
1353 case clang::ARM::BI_InterlockedIncrement64_acq
:
1354 return MSVCIntrin::_InterlockedIncrement_acq
;
1355 case clang::ARM::BI_InterlockedIncrement16_rel
:
1356 case clang::ARM::BI_InterlockedIncrement_rel
:
1357 case clang::ARM::BI_InterlockedIncrement64_rel
:
1358 return MSVCIntrin::_InterlockedIncrement_rel
;
1359 case clang::ARM::BI_InterlockedIncrement16_nf
:
1360 case clang::ARM::BI_InterlockedIncrement_nf
:
1361 case clang::ARM::BI_InterlockedIncrement64_nf
:
1362 return MSVCIntrin::_InterlockedIncrement_nf
;
1363 case clang::ARM::BI_InterlockedDecrement16_acq
:
1364 case clang::ARM::BI_InterlockedDecrement_acq
:
1365 case clang::ARM::BI_InterlockedDecrement64_acq
:
1366 return MSVCIntrin::_InterlockedDecrement_acq
;
1367 case clang::ARM::BI_InterlockedDecrement16_rel
:
1368 case clang::ARM::BI_InterlockedDecrement_rel
:
1369 case clang::ARM::BI_InterlockedDecrement64_rel
:
1370 return MSVCIntrin::_InterlockedDecrement_rel
;
1371 case clang::ARM::BI_InterlockedDecrement16_nf
:
1372 case clang::ARM::BI_InterlockedDecrement_nf
:
1373 case clang::ARM::BI_InterlockedDecrement64_nf
:
1374 return MSVCIntrin::_InterlockedDecrement_nf
;
1376 llvm_unreachable("must return from switch");
1379 static std::optional
<CodeGenFunction::MSVCIntrin
>
1380 translateAarch64ToMsvcIntrin(unsigned BuiltinID
) {
1381 using MSVCIntrin
= CodeGenFunction::MSVCIntrin
;
1382 switch (BuiltinID
) {
1384 return std::nullopt
;
1385 case clang::AArch64::BI_BitScanForward
:
1386 case clang::AArch64::BI_BitScanForward64
:
1387 return MSVCIntrin::_BitScanForward
;
1388 case clang::AArch64::BI_BitScanReverse
:
1389 case clang::AArch64::BI_BitScanReverse64
:
1390 return MSVCIntrin::_BitScanReverse
;
1391 case clang::AArch64::BI_InterlockedAnd64
:
1392 return MSVCIntrin::_InterlockedAnd
;
1393 case clang::AArch64::BI_InterlockedExchange64
:
1394 return MSVCIntrin::_InterlockedExchange
;
1395 case clang::AArch64::BI_InterlockedExchangeAdd64
:
1396 return MSVCIntrin::_InterlockedExchangeAdd
;
1397 case clang::AArch64::BI_InterlockedExchangeSub64
:
1398 return MSVCIntrin::_InterlockedExchangeSub
;
1399 case clang::AArch64::BI_InterlockedOr64
:
1400 return MSVCIntrin::_InterlockedOr
;
1401 case clang::AArch64::BI_InterlockedXor64
:
1402 return MSVCIntrin::_InterlockedXor
;
1403 case clang::AArch64::BI_InterlockedDecrement64
:
1404 return MSVCIntrin::_InterlockedDecrement
;
1405 case clang::AArch64::BI_InterlockedIncrement64
:
1406 return MSVCIntrin::_InterlockedIncrement
;
1407 case clang::AArch64::BI_InterlockedExchangeAdd8_acq
:
1408 case clang::AArch64::BI_InterlockedExchangeAdd16_acq
:
1409 case clang::AArch64::BI_InterlockedExchangeAdd_acq
:
1410 case clang::AArch64::BI_InterlockedExchangeAdd64_acq
:
1411 return MSVCIntrin::_InterlockedExchangeAdd_acq
;
1412 case clang::AArch64::BI_InterlockedExchangeAdd8_rel
:
1413 case clang::AArch64::BI_InterlockedExchangeAdd16_rel
:
1414 case clang::AArch64::BI_InterlockedExchangeAdd_rel
:
1415 case clang::AArch64::BI_InterlockedExchangeAdd64_rel
:
1416 return MSVCIntrin::_InterlockedExchangeAdd_rel
;
1417 case clang::AArch64::BI_InterlockedExchangeAdd8_nf
:
1418 case clang::AArch64::BI_InterlockedExchangeAdd16_nf
:
1419 case clang::AArch64::BI_InterlockedExchangeAdd_nf
:
1420 case clang::AArch64::BI_InterlockedExchangeAdd64_nf
:
1421 return MSVCIntrin::_InterlockedExchangeAdd_nf
;
1422 case clang::AArch64::BI_InterlockedExchange8_acq
:
1423 case clang::AArch64::BI_InterlockedExchange16_acq
:
1424 case clang::AArch64::BI_InterlockedExchange_acq
:
1425 case clang::AArch64::BI_InterlockedExchange64_acq
:
1426 return MSVCIntrin::_InterlockedExchange_acq
;
1427 case clang::AArch64::BI_InterlockedExchange8_rel
:
1428 case clang::AArch64::BI_InterlockedExchange16_rel
:
1429 case clang::AArch64::BI_InterlockedExchange_rel
:
1430 case clang::AArch64::BI_InterlockedExchange64_rel
:
1431 return MSVCIntrin::_InterlockedExchange_rel
;
1432 case clang::AArch64::BI_InterlockedExchange8_nf
:
1433 case clang::AArch64::BI_InterlockedExchange16_nf
:
1434 case clang::AArch64::BI_InterlockedExchange_nf
:
1435 case clang::AArch64::BI_InterlockedExchange64_nf
:
1436 return MSVCIntrin::_InterlockedExchange_nf
;
1437 case clang::AArch64::BI_InterlockedCompareExchange8_acq
:
1438 case clang::AArch64::BI_InterlockedCompareExchange16_acq
:
1439 case clang::AArch64::BI_InterlockedCompareExchange_acq
:
1440 case clang::AArch64::BI_InterlockedCompareExchange64_acq
:
1441 return MSVCIntrin::_InterlockedCompareExchange_acq
;
1442 case clang::AArch64::BI_InterlockedCompareExchange8_rel
:
1443 case clang::AArch64::BI_InterlockedCompareExchange16_rel
:
1444 case clang::AArch64::BI_InterlockedCompareExchange_rel
:
1445 case clang::AArch64::BI_InterlockedCompareExchange64_rel
:
1446 return MSVCIntrin::_InterlockedCompareExchange_rel
;
1447 case clang::AArch64::BI_InterlockedCompareExchange8_nf
:
1448 case clang::AArch64::BI_InterlockedCompareExchange16_nf
:
1449 case clang::AArch64::BI_InterlockedCompareExchange_nf
:
1450 case clang::AArch64::BI_InterlockedCompareExchange64_nf
:
1451 return MSVCIntrin::_InterlockedCompareExchange_nf
;
1452 case clang::AArch64::BI_InterlockedCompareExchange128
:
1453 return MSVCIntrin::_InterlockedCompareExchange128
;
1454 case clang::AArch64::BI_InterlockedCompareExchange128_acq
:
1455 return MSVCIntrin::_InterlockedCompareExchange128_acq
;
1456 case clang::AArch64::BI_InterlockedCompareExchange128_nf
:
1457 return MSVCIntrin::_InterlockedCompareExchange128_nf
;
1458 case clang::AArch64::BI_InterlockedCompareExchange128_rel
:
1459 return MSVCIntrin::_InterlockedCompareExchange128_rel
;
1460 case clang::AArch64::BI_InterlockedOr8_acq
:
1461 case clang::AArch64::BI_InterlockedOr16_acq
:
1462 case clang::AArch64::BI_InterlockedOr_acq
:
1463 case clang::AArch64::BI_InterlockedOr64_acq
:
1464 return MSVCIntrin::_InterlockedOr_acq
;
1465 case clang::AArch64::BI_InterlockedOr8_rel
:
1466 case clang::AArch64::BI_InterlockedOr16_rel
:
1467 case clang::AArch64::BI_InterlockedOr_rel
:
1468 case clang::AArch64::BI_InterlockedOr64_rel
:
1469 return MSVCIntrin::_InterlockedOr_rel
;
1470 case clang::AArch64::BI_InterlockedOr8_nf
:
1471 case clang::AArch64::BI_InterlockedOr16_nf
:
1472 case clang::AArch64::BI_InterlockedOr_nf
:
1473 case clang::AArch64::BI_InterlockedOr64_nf
:
1474 return MSVCIntrin::_InterlockedOr_nf
;
1475 case clang::AArch64::BI_InterlockedXor8_acq
:
1476 case clang::AArch64::BI_InterlockedXor16_acq
:
1477 case clang::AArch64::BI_InterlockedXor_acq
:
1478 case clang::AArch64::BI_InterlockedXor64_acq
:
1479 return MSVCIntrin::_InterlockedXor_acq
;
1480 case clang::AArch64::BI_InterlockedXor8_rel
:
1481 case clang::AArch64::BI_InterlockedXor16_rel
:
1482 case clang::AArch64::BI_InterlockedXor_rel
:
1483 case clang::AArch64::BI_InterlockedXor64_rel
:
1484 return MSVCIntrin::_InterlockedXor_rel
;
1485 case clang::AArch64::BI_InterlockedXor8_nf
:
1486 case clang::AArch64::BI_InterlockedXor16_nf
:
1487 case clang::AArch64::BI_InterlockedXor_nf
:
1488 case clang::AArch64::BI_InterlockedXor64_nf
:
1489 return MSVCIntrin::_InterlockedXor_nf
;
1490 case clang::AArch64::BI_InterlockedAnd8_acq
:
1491 case clang::AArch64::BI_InterlockedAnd16_acq
:
1492 case clang::AArch64::BI_InterlockedAnd_acq
:
1493 case clang::AArch64::BI_InterlockedAnd64_acq
:
1494 return MSVCIntrin::_InterlockedAnd_acq
;
1495 case clang::AArch64::BI_InterlockedAnd8_rel
:
1496 case clang::AArch64::BI_InterlockedAnd16_rel
:
1497 case clang::AArch64::BI_InterlockedAnd_rel
:
1498 case clang::AArch64::BI_InterlockedAnd64_rel
:
1499 return MSVCIntrin::_InterlockedAnd_rel
;
1500 case clang::AArch64::BI_InterlockedAnd8_nf
:
1501 case clang::AArch64::BI_InterlockedAnd16_nf
:
1502 case clang::AArch64::BI_InterlockedAnd_nf
:
1503 case clang::AArch64::BI_InterlockedAnd64_nf
:
1504 return MSVCIntrin::_InterlockedAnd_nf
;
1505 case clang::AArch64::BI_InterlockedIncrement16_acq
:
1506 case clang::AArch64::BI_InterlockedIncrement_acq
:
1507 case clang::AArch64::BI_InterlockedIncrement64_acq
:
1508 return MSVCIntrin::_InterlockedIncrement_acq
;
1509 case clang::AArch64::BI_InterlockedIncrement16_rel
:
1510 case clang::AArch64::BI_InterlockedIncrement_rel
:
1511 case clang::AArch64::BI_InterlockedIncrement64_rel
:
1512 return MSVCIntrin::_InterlockedIncrement_rel
;
1513 case clang::AArch64::BI_InterlockedIncrement16_nf
:
1514 case clang::AArch64::BI_InterlockedIncrement_nf
:
1515 case clang::AArch64::BI_InterlockedIncrement64_nf
:
1516 return MSVCIntrin::_InterlockedIncrement_nf
;
1517 case clang::AArch64::BI_InterlockedDecrement16_acq
:
1518 case clang::AArch64::BI_InterlockedDecrement_acq
:
1519 case clang::AArch64::BI_InterlockedDecrement64_acq
:
1520 return MSVCIntrin::_InterlockedDecrement_acq
;
1521 case clang::AArch64::BI_InterlockedDecrement16_rel
:
1522 case clang::AArch64::BI_InterlockedDecrement_rel
:
1523 case clang::AArch64::BI_InterlockedDecrement64_rel
:
1524 return MSVCIntrin::_InterlockedDecrement_rel
;
1525 case clang::AArch64::BI_InterlockedDecrement16_nf
:
1526 case clang::AArch64::BI_InterlockedDecrement_nf
:
1527 case clang::AArch64::BI_InterlockedDecrement64_nf
:
1528 return MSVCIntrin::_InterlockedDecrement_nf
;
1530 llvm_unreachable("must return from switch");
1533 static std::optional
<CodeGenFunction::MSVCIntrin
>
1534 translateX86ToMsvcIntrin(unsigned BuiltinID
) {
1535 using MSVCIntrin
= CodeGenFunction::MSVCIntrin
;
1536 switch (BuiltinID
) {
1538 return std::nullopt
;
1539 case clang::X86::BI_BitScanForward
:
1540 case clang::X86::BI_BitScanForward64
:
1541 return MSVCIntrin::_BitScanForward
;
1542 case clang::X86::BI_BitScanReverse
:
1543 case clang::X86::BI_BitScanReverse64
:
1544 return MSVCIntrin::_BitScanReverse
;
1545 case clang::X86::BI_InterlockedAnd64
:
1546 return MSVCIntrin::_InterlockedAnd
;
1547 case clang::X86::BI_InterlockedCompareExchange128
:
1548 return MSVCIntrin::_InterlockedCompareExchange128
;
1549 case clang::X86::BI_InterlockedExchange64
:
1550 return MSVCIntrin::_InterlockedExchange
;
1551 case clang::X86::BI_InterlockedExchangeAdd64
:
1552 return MSVCIntrin::_InterlockedExchangeAdd
;
1553 case clang::X86::BI_InterlockedExchangeSub64
:
1554 return MSVCIntrin::_InterlockedExchangeSub
;
1555 case clang::X86::BI_InterlockedOr64
:
1556 return MSVCIntrin::_InterlockedOr
;
1557 case clang::X86::BI_InterlockedXor64
:
1558 return MSVCIntrin::_InterlockedXor
;
1559 case clang::X86::BI_InterlockedDecrement64
:
1560 return MSVCIntrin::_InterlockedDecrement
;
1561 case clang::X86::BI_InterlockedIncrement64
:
1562 return MSVCIntrin::_InterlockedIncrement
;
1564 llvm_unreachable("must return from switch");
1567 // Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.
1568 Value
*CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID
,
1569 const CallExpr
*E
) {
1570 switch (BuiltinID
) {
1571 case MSVCIntrin::_BitScanForward
:
1572 case MSVCIntrin::_BitScanReverse
: {
1573 Address
IndexAddress(EmitPointerWithAlignment(E
->getArg(0)));
1574 Value
*ArgValue
= EmitScalarExpr(E
->getArg(1));
1576 llvm::Type
*ArgType
= ArgValue
->getType();
1577 llvm::Type
*IndexType
= IndexAddress
.getElementType();
1578 llvm::Type
*ResultType
= ConvertType(E
->getType());
1580 Value
*ArgZero
= llvm::Constant::getNullValue(ArgType
);
1581 Value
*ResZero
= llvm::Constant::getNullValue(ResultType
);
1582 Value
*ResOne
= llvm::ConstantInt::get(ResultType
, 1);
1584 BasicBlock
*Begin
= Builder
.GetInsertBlock();
1585 BasicBlock
*End
= createBasicBlock("bitscan_end", this->CurFn
);
1586 Builder
.SetInsertPoint(End
);
1587 PHINode
*Result
= Builder
.CreatePHI(ResultType
, 2, "bitscan_result");
1589 Builder
.SetInsertPoint(Begin
);
1590 Value
*IsZero
= Builder
.CreateICmpEQ(ArgValue
, ArgZero
);
1591 BasicBlock
*NotZero
= createBasicBlock("bitscan_not_zero", this->CurFn
);
1592 Builder
.CreateCondBr(IsZero
, End
, NotZero
);
1593 Result
->addIncoming(ResZero
, Begin
);
1595 Builder
.SetInsertPoint(NotZero
);
1597 if (BuiltinID
== MSVCIntrin::_BitScanForward
) {
1598 Function
*F
= CGM
.getIntrinsic(Intrinsic::cttz
, ArgType
);
1599 Value
*ZeroCount
= Builder
.CreateCall(F
, {ArgValue
, Builder
.getTrue()});
1600 ZeroCount
= Builder
.CreateIntCast(ZeroCount
, IndexType
, false);
1601 Builder
.CreateStore(ZeroCount
, IndexAddress
, false);
1603 unsigned ArgWidth
= cast
<llvm::IntegerType
>(ArgType
)->getBitWidth();
1604 Value
*ArgTypeLastIndex
= llvm::ConstantInt::get(IndexType
, ArgWidth
- 1);
1606 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctlz
, ArgType
);
1607 Value
*ZeroCount
= Builder
.CreateCall(F
, {ArgValue
, Builder
.getTrue()});
1608 ZeroCount
= Builder
.CreateIntCast(ZeroCount
, IndexType
, false);
1609 Value
*Index
= Builder
.CreateNSWSub(ArgTypeLastIndex
, ZeroCount
);
1610 Builder
.CreateStore(Index
, IndexAddress
, false);
1612 Builder
.CreateBr(End
);
1613 Result
->addIncoming(ResOne
, NotZero
);
1615 Builder
.SetInsertPoint(End
);
1618 case MSVCIntrin::_InterlockedAnd
:
1619 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And
, E
);
1620 case MSVCIntrin::_InterlockedExchange
:
1621 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg
, E
);
1622 case MSVCIntrin::_InterlockedExchangeAdd
:
1623 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add
, E
);
1624 case MSVCIntrin::_InterlockedExchangeSub
:
1625 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub
, E
);
1626 case MSVCIntrin::_InterlockedOr
:
1627 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or
, E
);
1628 case MSVCIntrin::_InterlockedXor
:
1629 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor
, E
);
1630 case MSVCIntrin::_InterlockedExchangeAdd_acq
:
1631 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add
, E
,
1632 AtomicOrdering::Acquire
);
1633 case MSVCIntrin::_InterlockedExchangeAdd_rel
:
1634 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add
, E
,
1635 AtomicOrdering::Release
);
1636 case MSVCIntrin::_InterlockedExchangeAdd_nf
:
1637 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add
, E
,
1638 AtomicOrdering::Monotonic
);
1639 case MSVCIntrin::_InterlockedExchange_acq
:
1640 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg
, E
,
1641 AtomicOrdering::Acquire
);
1642 case MSVCIntrin::_InterlockedExchange_rel
:
1643 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg
, E
,
1644 AtomicOrdering::Release
);
1645 case MSVCIntrin::_InterlockedExchange_nf
:
1646 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg
, E
,
1647 AtomicOrdering::Monotonic
);
1648 case MSVCIntrin::_InterlockedCompareExchange_acq
:
1649 return EmitAtomicCmpXchgForMSIntrin(*this, E
, AtomicOrdering::Acquire
);
1650 case MSVCIntrin::_InterlockedCompareExchange_rel
:
1651 return EmitAtomicCmpXchgForMSIntrin(*this, E
, AtomicOrdering::Release
);
1652 case MSVCIntrin::_InterlockedCompareExchange_nf
:
1653 return EmitAtomicCmpXchgForMSIntrin(*this, E
, AtomicOrdering::Monotonic
);
1654 case MSVCIntrin::_InterlockedCompareExchange128
:
1655 return EmitAtomicCmpXchg128ForMSIntrin(
1656 *this, E
, AtomicOrdering::SequentiallyConsistent
);
1657 case MSVCIntrin::_InterlockedCompareExchange128_acq
:
1658 return EmitAtomicCmpXchg128ForMSIntrin(*this, E
, AtomicOrdering::Acquire
);
1659 case MSVCIntrin::_InterlockedCompareExchange128_rel
:
1660 return EmitAtomicCmpXchg128ForMSIntrin(*this, E
, AtomicOrdering::Release
);
1661 case MSVCIntrin::_InterlockedCompareExchange128_nf
:
1662 return EmitAtomicCmpXchg128ForMSIntrin(*this, E
, AtomicOrdering::Monotonic
);
1663 case MSVCIntrin::_InterlockedOr_acq
:
1664 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or
, E
,
1665 AtomicOrdering::Acquire
);
1666 case MSVCIntrin::_InterlockedOr_rel
:
1667 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or
, E
,
1668 AtomicOrdering::Release
);
1669 case MSVCIntrin::_InterlockedOr_nf
:
1670 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or
, E
,
1671 AtomicOrdering::Monotonic
);
1672 case MSVCIntrin::_InterlockedXor_acq
:
1673 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor
, E
,
1674 AtomicOrdering::Acquire
);
1675 case MSVCIntrin::_InterlockedXor_rel
:
1676 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor
, E
,
1677 AtomicOrdering::Release
);
1678 case MSVCIntrin::_InterlockedXor_nf
:
1679 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor
, E
,
1680 AtomicOrdering::Monotonic
);
1681 case MSVCIntrin::_InterlockedAnd_acq
:
1682 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And
, E
,
1683 AtomicOrdering::Acquire
);
1684 case MSVCIntrin::_InterlockedAnd_rel
:
1685 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And
, E
,
1686 AtomicOrdering::Release
);
1687 case MSVCIntrin::_InterlockedAnd_nf
:
1688 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And
, E
,
1689 AtomicOrdering::Monotonic
);
1690 case MSVCIntrin::_InterlockedIncrement_acq
:
1691 return EmitAtomicIncrementValue(*this, E
, AtomicOrdering::Acquire
);
1692 case MSVCIntrin::_InterlockedIncrement_rel
:
1693 return EmitAtomicIncrementValue(*this, E
, AtomicOrdering::Release
);
1694 case MSVCIntrin::_InterlockedIncrement_nf
:
1695 return EmitAtomicIncrementValue(*this, E
, AtomicOrdering::Monotonic
);
1696 case MSVCIntrin::_InterlockedDecrement_acq
:
1697 return EmitAtomicDecrementValue(*this, E
, AtomicOrdering::Acquire
);
1698 case MSVCIntrin::_InterlockedDecrement_rel
:
1699 return EmitAtomicDecrementValue(*this, E
, AtomicOrdering::Release
);
1700 case MSVCIntrin::_InterlockedDecrement_nf
:
1701 return EmitAtomicDecrementValue(*this, E
, AtomicOrdering::Monotonic
);
1703 case MSVCIntrin::_InterlockedDecrement
:
1704 return EmitAtomicDecrementValue(*this, E
);
1705 case MSVCIntrin::_InterlockedIncrement
:
1706 return EmitAtomicIncrementValue(*this, E
);
1708 case MSVCIntrin::__fastfail
: {
1709 // Request immediate process termination from the kernel. The instruction
1710 // sequences to do this are documented on MSDN:
1711 // https://msdn.microsoft.com/en-us/library/dn774154.aspx
1712 llvm::Triple::ArchType ISA
= getTarget().getTriple().getArch();
1713 StringRef Asm
, Constraints
;
1716 ErrorUnsupported(E
, "__fastfail call for this architecture");
1718 case llvm::Triple::x86
:
1719 case llvm::Triple::x86_64
:
1721 Constraints
= "{cx}";
1723 case llvm::Triple::thumb
:
1725 Constraints
= "{r0}";
1727 case llvm::Triple::aarch64
:
1728 Asm
= "brk #0xF003";
1729 Constraints
= "{w0}";
1731 llvm::FunctionType
*FTy
= llvm::FunctionType::get(VoidTy
, {Int32Ty
}, false);
1732 llvm::InlineAsm
*IA
=
1733 llvm::InlineAsm::get(FTy
, Asm
, Constraints
, /*hasSideEffects=*/true);
1734 llvm::AttributeList NoReturnAttr
= llvm::AttributeList::get(
1735 getLLVMContext(), llvm::AttributeList::FunctionIndex
,
1736 llvm::Attribute::NoReturn
);
1737 llvm::CallInst
*CI
= Builder
.CreateCall(IA
, EmitScalarExpr(E
->getArg(0)));
1738 CI
->setAttributes(NoReturnAttr
);
1742 llvm_unreachable("Incorrect MSVC intrinsic!");
1746 // ARC cleanup for __builtin_os_log_format
1747 struct CallObjCArcUse final
: EHScopeStack::Cleanup
{
1748 CallObjCArcUse(llvm::Value
*object
) : object(object
) {}
1749 llvm::Value
*object
;
1751 void Emit(CodeGenFunction
&CGF
, Flags flags
) override
{
1752 CGF
.EmitARCIntrinsicUse(object
);
1757 Value
*CodeGenFunction::EmitCheckedArgForBuiltin(const Expr
*E
,
1758 BuiltinCheckKind Kind
) {
1759 assert((Kind
== BCK_CLZPassedZero
|| Kind
== BCK_CTZPassedZero
)
1760 && "Unsupported builtin check kind");
1762 Value
*ArgValue
= EmitScalarExpr(E
);
1763 if (!SanOpts
.has(SanitizerKind::Builtin
))
1766 SanitizerScope
SanScope(this);
1767 Value
*Cond
= Builder
.CreateICmpNE(
1768 ArgValue
, llvm::Constant::getNullValue(ArgValue
->getType()));
1769 EmitCheck(std::make_pair(Cond
, SanitizerKind::Builtin
),
1770 SanitizerHandler::InvalidBuiltin
,
1771 {EmitCheckSourceLocation(E
->getExprLoc()),
1772 llvm::ConstantInt::get(Builder
.getInt8Ty(), Kind
)},
1777 /// Get the argument type for arguments to os_log_helper.
1778 static CanQualType
getOSLogArgType(ASTContext
&C
, int Size
) {
1779 QualType UnsignedTy
= C
.getIntTypeForBitwidth(Size
* 8, /*Signed=*/false);
1780 return C
.getCanonicalType(UnsignedTy
);
1783 llvm::Function
*CodeGenFunction::generateBuiltinOSLogHelperFunction(
1784 const analyze_os_log::OSLogBufferLayout
&Layout
,
1785 CharUnits BufferAlignment
) {
1786 ASTContext
&Ctx
= getContext();
1788 llvm::SmallString
<64> Name
;
1790 raw_svector_ostream
OS(Name
);
1791 OS
<< "__os_log_helper";
1792 OS
<< "_" << BufferAlignment
.getQuantity();
1793 OS
<< "_" << int(Layout
.getSummaryByte());
1794 OS
<< "_" << int(Layout
.getNumArgsByte());
1795 for (const auto &Item
: Layout
.Items
)
1796 OS
<< "_" << int(Item
.getSizeByte()) << "_"
1797 << int(Item
.getDescriptorByte());
1800 if (llvm::Function
*F
= CGM
.getModule().getFunction(Name
))
1803 llvm::SmallVector
<QualType
, 4> ArgTys
;
1804 FunctionArgList Args
;
1805 Args
.push_back(ImplicitParamDecl::Create(
1806 Ctx
, nullptr, SourceLocation(), &Ctx
.Idents
.get("buffer"), Ctx
.VoidPtrTy
,
1807 ImplicitParamDecl::Other
));
1808 ArgTys
.emplace_back(Ctx
.VoidPtrTy
);
1810 for (unsigned int I
= 0, E
= Layout
.Items
.size(); I
< E
; ++I
) {
1811 char Size
= Layout
.Items
[I
].getSizeByte();
1815 QualType ArgTy
= getOSLogArgType(Ctx
, Size
);
1816 Args
.push_back(ImplicitParamDecl::Create(
1817 Ctx
, nullptr, SourceLocation(),
1818 &Ctx
.Idents
.get(std::string("arg") + llvm::to_string(I
)), ArgTy
,
1819 ImplicitParamDecl::Other
));
1820 ArgTys
.emplace_back(ArgTy
);
1823 QualType ReturnTy
= Ctx
.VoidTy
;
1825 // The helper function has linkonce_odr linkage to enable the linker to merge
1826 // identical functions. To ensure the merging always happens, 'noinline' is
1827 // attached to the function when compiling with -Oz.
1828 const CGFunctionInfo
&FI
=
1829 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy
, Args
);
1830 llvm::FunctionType
*FuncTy
= CGM
.getTypes().GetFunctionType(FI
);
1831 llvm::Function
*Fn
= llvm::Function::Create(
1832 FuncTy
, llvm::GlobalValue::LinkOnceODRLinkage
, Name
, &CGM
.getModule());
1833 Fn
->setVisibility(llvm::GlobalValue::HiddenVisibility
);
1834 CGM
.SetLLVMFunctionAttributes(GlobalDecl(), FI
, Fn
, /*IsThunk=*/false);
1835 CGM
.SetLLVMFunctionAttributesForDefinition(nullptr, Fn
);
1836 Fn
->setDoesNotThrow();
1838 // Attach 'noinline' at -Oz.
1839 if (CGM
.getCodeGenOpts().OptimizeSize
== 2)
1840 Fn
->addFnAttr(llvm::Attribute::NoInline
);
1842 auto NL
= ApplyDebugLocation::CreateEmpty(*this);
1843 StartFunction(GlobalDecl(), ReturnTy
, Fn
, FI
, Args
);
1845 // Create a scope with an artificial location for the body of this function.
1846 auto AL
= ApplyDebugLocation::CreateArtificial(*this);
1850 Address(Builder
.CreateLoad(GetAddrOfLocalVar(Args
[0]), "buf"), Int8Ty
,
1852 Builder
.CreateStore(Builder
.getInt8(Layout
.getSummaryByte()),
1853 Builder
.CreateConstByteGEP(BufAddr
, Offset
++, "summary"));
1854 Builder
.CreateStore(Builder
.getInt8(Layout
.getNumArgsByte()),
1855 Builder
.CreateConstByteGEP(BufAddr
, Offset
++, "numArgs"));
1858 for (const auto &Item
: Layout
.Items
) {
1859 Builder
.CreateStore(
1860 Builder
.getInt8(Item
.getDescriptorByte()),
1861 Builder
.CreateConstByteGEP(BufAddr
, Offset
++, "argDescriptor"));
1862 Builder
.CreateStore(
1863 Builder
.getInt8(Item
.getSizeByte()),
1864 Builder
.CreateConstByteGEP(BufAddr
, Offset
++, "argSize"));
1866 CharUnits Size
= Item
.size();
1867 if (!Size
.getQuantity())
1870 Address Arg
= GetAddrOfLocalVar(Args
[I
]);
1871 Address Addr
= Builder
.CreateConstByteGEP(BufAddr
, Offset
, "argData");
1873 Builder
.CreateElementBitCast(Addr
, Arg
.getElementType(), "argDataCast");
1874 Builder
.CreateStore(Builder
.CreateLoad(Arg
), Addr
);
1884 RValue
CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr
&E
) {
1885 assert(E
.getNumArgs() >= 2 &&
1886 "__builtin_os_log_format takes at least 2 arguments");
1887 ASTContext
&Ctx
= getContext();
1888 analyze_os_log::OSLogBufferLayout Layout
;
1889 analyze_os_log::computeOSLogBufferLayout(Ctx
, &E
, Layout
);
1890 Address BufAddr
= EmitPointerWithAlignment(E
.getArg(0));
1891 llvm::SmallVector
<llvm::Value
*, 4> RetainableOperands
;
1893 // Ignore argument 1, the format string. It is not currently used.
1895 Args
.add(RValue::get(BufAddr
.getPointer()), Ctx
.VoidPtrTy
);
1897 for (const auto &Item
: Layout
.Items
) {
1898 int Size
= Item
.getSizeByte();
1902 llvm::Value
*ArgVal
;
1904 if (Item
.getKind() == analyze_os_log::OSLogBufferItem::MaskKind
) {
1906 for (unsigned I
= 0, E
= Item
.getMaskType().size(); I
< E
; ++I
)
1907 Val
|= ((uint64_t)Item
.getMaskType()[I
]) << I
* 8;
1908 ArgVal
= llvm::Constant::getIntegerValue(Int64Ty
, llvm::APInt(64, Val
));
1909 } else if (const Expr
*TheExpr
= Item
.getExpr()) {
1910 ArgVal
= EmitScalarExpr(TheExpr
, /*Ignore*/ false);
1912 // If a temporary object that requires destruction after the full
1913 // expression is passed, push a lifetime-extended cleanup to extend its
1914 // lifetime to the end of the enclosing block scope.
1915 auto LifetimeExtendObject
= [&](const Expr
*E
) {
1916 E
= E
->IgnoreParenCasts();
1917 // Extend lifetimes of objects returned by function calls and message
1920 // FIXME: We should do this in other cases in which temporaries are
1921 // created including arguments of non-ARC types (e.g., C++
1923 if (isa
<CallExpr
>(E
) || isa
<ObjCMessageExpr
>(E
))
1928 if (TheExpr
->getType()->isObjCRetainableType() &&
1929 getLangOpts().ObjCAutoRefCount
&& LifetimeExtendObject(TheExpr
)) {
1930 assert(getEvaluationKind(TheExpr
->getType()) == TEK_Scalar
&&
1931 "Only scalar can be a ObjC retainable type");
1932 if (!isa
<Constant
>(ArgVal
)) {
1933 CleanupKind Cleanup
= getARCCleanupKind();
1934 QualType Ty
= TheExpr
->getType();
1935 Address Alloca
= Address::invalid();
1936 Address Addr
= CreateMemTemp(Ty
, "os.log.arg", &Alloca
);
1937 ArgVal
= EmitARCRetain(Ty
, ArgVal
);
1938 Builder
.CreateStore(ArgVal
, Addr
);
1939 pushLifetimeExtendedDestroy(Cleanup
, Alloca
, Ty
,
1940 CodeGenFunction::destroyARCStrongPrecise
,
1941 Cleanup
& EHCleanup
);
1943 // Push a clang.arc.use call to ensure ARC optimizer knows that the
1944 // argument has to be alive.
1945 if (CGM
.getCodeGenOpts().OptimizationLevel
!= 0)
1946 pushCleanupAfterFullExpr
<CallObjCArcUse
>(Cleanup
, ArgVal
);
1950 ArgVal
= Builder
.getInt32(Item
.getConstValue().getQuantity());
1953 unsigned ArgValSize
=
1954 CGM
.getDataLayout().getTypeSizeInBits(ArgVal
->getType());
1955 llvm::IntegerType
*IntTy
= llvm::Type::getIntNTy(getLLVMContext(),
1957 ArgVal
= Builder
.CreateBitOrPointerCast(ArgVal
, IntTy
);
1958 CanQualType ArgTy
= getOSLogArgType(Ctx
, Size
);
1959 // If ArgVal has type x86_fp80, zero-extend ArgVal.
1960 ArgVal
= Builder
.CreateZExtOrBitCast(ArgVal
, ConvertType(ArgTy
));
1961 Args
.add(RValue::get(ArgVal
), ArgTy
);
1964 const CGFunctionInfo
&FI
=
1965 CGM
.getTypes().arrangeBuiltinFunctionCall(Ctx
.VoidTy
, Args
);
1966 llvm::Function
*F
= CodeGenFunction(CGM
).generateBuiltinOSLogHelperFunction(
1967 Layout
, BufAddr
.getAlignment());
1968 EmitCall(FI
, CGCallee::forDirect(F
), ReturnValueSlot(), Args
);
1969 return RValue::get(BufAddr
.getPointer());
1972 static bool isSpecialUnsignedMultiplySignedResult(
1973 unsigned BuiltinID
, WidthAndSignedness Op1Info
, WidthAndSignedness Op2Info
,
1974 WidthAndSignedness ResultInfo
) {
1975 return BuiltinID
== Builtin::BI__builtin_mul_overflow
&&
1976 Op1Info
.Width
== Op2Info
.Width
&& Op2Info
.Width
== ResultInfo
.Width
&&
1977 !Op1Info
.Signed
&& !Op2Info
.Signed
&& ResultInfo
.Signed
;
1980 static RValue
EmitCheckedUnsignedMultiplySignedResult(
1981 CodeGenFunction
&CGF
, const clang::Expr
*Op1
, WidthAndSignedness Op1Info
,
1982 const clang::Expr
*Op2
, WidthAndSignedness Op2Info
,
1983 const clang::Expr
*ResultArg
, QualType ResultQTy
,
1984 WidthAndSignedness ResultInfo
) {
1985 assert(isSpecialUnsignedMultiplySignedResult(
1986 Builtin::BI__builtin_mul_overflow
, Op1Info
, Op2Info
, ResultInfo
) &&
1987 "Cannot specialize this multiply");
1989 llvm::Value
*V1
= CGF
.EmitScalarExpr(Op1
);
1990 llvm::Value
*V2
= CGF
.EmitScalarExpr(Op2
);
1992 llvm::Value
*HasOverflow
;
1993 llvm::Value
*Result
= EmitOverflowIntrinsic(
1994 CGF
, llvm::Intrinsic::umul_with_overflow
, V1
, V2
, HasOverflow
);
1996 // The intrinsic call will detect overflow when the value is > UINT_MAX,
1997 // however, since the original builtin had a signed result, we need to report
1998 // an overflow when the result is greater than INT_MAX.
1999 auto IntMax
= llvm::APInt::getSignedMaxValue(ResultInfo
.Width
);
2000 llvm::Value
*IntMaxValue
= llvm::ConstantInt::get(Result
->getType(), IntMax
);
2002 llvm::Value
*IntMaxOverflow
= CGF
.Builder
.CreateICmpUGT(Result
, IntMaxValue
);
2003 HasOverflow
= CGF
.Builder
.CreateOr(HasOverflow
, IntMaxOverflow
);
2006 ResultArg
->getType()->getPointeeType().isVolatileQualified();
2007 Address ResultPtr
= CGF
.EmitPointerWithAlignment(ResultArg
);
2008 CGF
.Builder
.CreateStore(CGF
.EmitToMemory(Result
, ResultQTy
), ResultPtr
,
2010 return RValue::get(HasOverflow
);
2013 /// Determine if a binop is a checked mixed-sign multiply we can specialize.
2014 static bool isSpecialMixedSignMultiply(unsigned BuiltinID
,
2015 WidthAndSignedness Op1Info
,
2016 WidthAndSignedness Op2Info
,
2017 WidthAndSignedness ResultInfo
) {
2018 return BuiltinID
== Builtin::BI__builtin_mul_overflow
&&
2019 std::max(Op1Info
.Width
, Op2Info
.Width
) >= ResultInfo
.Width
&&
2020 Op1Info
.Signed
!= Op2Info
.Signed
;
2023 /// Emit a checked mixed-sign multiply. This is a cheaper specialization of
2024 /// the generic checked-binop irgen.
2026 EmitCheckedMixedSignMultiply(CodeGenFunction
&CGF
, const clang::Expr
*Op1
,
2027 WidthAndSignedness Op1Info
, const clang::Expr
*Op2
,
2028 WidthAndSignedness Op2Info
,
2029 const clang::Expr
*ResultArg
, QualType ResultQTy
,
2030 WidthAndSignedness ResultInfo
) {
2031 assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow
, Op1Info
,
2032 Op2Info
, ResultInfo
) &&
2033 "Not a mixed-sign multipliction we can specialize");
2035 // Emit the signed and unsigned operands.
2036 const clang::Expr
*SignedOp
= Op1Info
.Signed
? Op1
: Op2
;
2037 const clang::Expr
*UnsignedOp
= Op1Info
.Signed
? Op2
: Op1
;
2038 llvm::Value
*Signed
= CGF
.EmitScalarExpr(SignedOp
);
2039 llvm::Value
*Unsigned
= CGF
.EmitScalarExpr(UnsignedOp
);
2040 unsigned SignedOpWidth
= Op1Info
.Signed
? Op1Info
.Width
: Op2Info
.Width
;
2041 unsigned UnsignedOpWidth
= Op1Info
.Signed
? Op2Info
.Width
: Op1Info
.Width
;
2043 // One of the operands may be smaller than the other. If so, [s|z]ext it.
2044 if (SignedOpWidth
< UnsignedOpWidth
)
2045 Signed
= CGF
.Builder
.CreateSExt(Signed
, Unsigned
->getType(), "op.sext");
2046 if (UnsignedOpWidth
< SignedOpWidth
)
2047 Unsigned
= CGF
.Builder
.CreateZExt(Unsigned
, Signed
->getType(), "op.zext");
2049 llvm::Type
*OpTy
= Signed
->getType();
2050 llvm::Value
*Zero
= llvm::Constant::getNullValue(OpTy
);
2051 Address ResultPtr
= CGF
.EmitPointerWithAlignment(ResultArg
);
2052 llvm::Type
*ResTy
= ResultPtr
.getElementType();
2053 unsigned OpWidth
= std::max(Op1Info
.Width
, Op2Info
.Width
);
2055 // Take the absolute value of the signed operand.
2056 llvm::Value
*IsNegative
= CGF
.Builder
.CreateICmpSLT(Signed
, Zero
);
2057 llvm::Value
*AbsOfNegative
= CGF
.Builder
.CreateSub(Zero
, Signed
);
2058 llvm::Value
*AbsSigned
=
2059 CGF
.Builder
.CreateSelect(IsNegative
, AbsOfNegative
, Signed
);
2061 // Perform a checked unsigned multiplication.
2062 llvm::Value
*UnsignedOverflow
;
2063 llvm::Value
*UnsignedResult
=
2064 EmitOverflowIntrinsic(CGF
, llvm::Intrinsic::umul_with_overflow
, AbsSigned
,
2065 Unsigned
, UnsignedOverflow
);
2067 llvm::Value
*Overflow
, *Result
;
2068 if (ResultInfo
.Signed
) {
2069 // Signed overflow occurs if the result is greater than INT_MAX or lesser
2070 // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
2072 llvm::APInt::getSignedMaxValue(ResultInfo
.Width
).zext(OpWidth
);
2073 llvm::Value
*MaxResult
=
2074 CGF
.Builder
.CreateAdd(llvm::ConstantInt::get(OpTy
, IntMax
),
2075 CGF
.Builder
.CreateZExt(IsNegative
, OpTy
));
2076 llvm::Value
*SignedOverflow
=
2077 CGF
.Builder
.CreateICmpUGT(UnsignedResult
, MaxResult
);
2078 Overflow
= CGF
.Builder
.CreateOr(UnsignedOverflow
, SignedOverflow
);
2080 // Prepare the signed result (possibly by negating it).
2081 llvm::Value
*NegativeResult
= CGF
.Builder
.CreateNeg(UnsignedResult
);
2082 llvm::Value
*SignedResult
=
2083 CGF
.Builder
.CreateSelect(IsNegative
, NegativeResult
, UnsignedResult
);
2084 Result
= CGF
.Builder
.CreateTrunc(SignedResult
, ResTy
);
2086 // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
2087 llvm::Value
*Underflow
= CGF
.Builder
.CreateAnd(
2088 IsNegative
, CGF
.Builder
.CreateIsNotNull(UnsignedResult
));
2089 Overflow
= CGF
.Builder
.CreateOr(UnsignedOverflow
, Underflow
);
2090 if (ResultInfo
.Width
< OpWidth
) {
2092 llvm::APInt::getMaxValue(ResultInfo
.Width
).zext(OpWidth
);
2093 llvm::Value
*TruncOverflow
= CGF
.Builder
.CreateICmpUGT(
2094 UnsignedResult
, llvm::ConstantInt::get(OpTy
, IntMax
));
2095 Overflow
= CGF
.Builder
.CreateOr(Overflow
, TruncOverflow
);
2098 // Negate the product if it would be negative in infinite precision.
2099 Result
= CGF
.Builder
.CreateSelect(
2100 IsNegative
, CGF
.Builder
.CreateNeg(UnsignedResult
), UnsignedResult
);
2102 Result
= CGF
.Builder
.CreateTrunc(Result
, ResTy
);
2104 assert(Overflow
&& Result
&& "Missing overflow or result");
2107 ResultArg
->getType()->getPointeeType().isVolatileQualified();
2108 CGF
.Builder
.CreateStore(CGF
.EmitToMemory(Result
, ResultQTy
), ResultPtr
,
2110 return RValue::get(Overflow
);
2114 TypeRequiresBuiltinLaunderImp(const ASTContext
&Ctx
, QualType Ty
,
2115 llvm::SmallPtrSetImpl
<const Decl
*> &Seen
) {
2116 if (const auto *Arr
= Ctx
.getAsArrayType(Ty
))
2117 Ty
= Ctx
.getBaseElementType(Arr
);
2119 const auto *Record
= Ty
->getAsCXXRecordDecl();
2123 // We've already checked this type, or are in the process of checking it.
2124 if (!Seen
.insert(Record
).second
)
2127 assert(Record
->hasDefinition() &&
2128 "Incomplete types should already be diagnosed");
2130 if (Record
->isDynamicClass())
2133 for (FieldDecl
*F
: Record
->fields()) {
2134 if (TypeRequiresBuiltinLaunderImp(Ctx
, F
->getType(), Seen
))
2140 /// Determine if the specified type requires laundering by checking if it is a
2141 /// dynamic class type or contains a subobject which is a dynamic class type.
2142 static bool TypeRequiresBuiltinLaunder(CodeGenModule
&CGM
, QualType Ty
) {
2143 if (!CGM
.getCodeGenOpts().StrictVTablePointers
)
2145 llvm::SmallPtrSet
<const Decl
*, 16> Seen
;
2146 return TypeRequiresBuiltinLaunderImp(CGM
.getContext(), Ty
, Seen
);
2149 RValue
CodeGenFunction::emitRotate(const CallExpr
*E
, bool IsRotateRight
) {
2150 llvm::Value
*Src
= EmitScalarExpr(E
->getArg(0));
2151 llvm::Value
*ShiftAmt
= EmitScalarExpr(E
->getArg(1));
2153 // The builtin's shift arg may have a different type than the source arg and
2154 // result, but the LLVM intrinsic uses the same type for all values.
2155 llvm::Type
*Ty
= Src
->getType();
2156 ShiftAmt
= Builder
.CreateIntCast(ShiftAmt
, Ty
, false);
2158 // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
2159 unsigned IID
= IsRotateRight
? Intrinsic::fshr
: Intrinsic::fshl
;
2160 Function
*F
= CGM
.getIntrinsic(IID
, Ty
);
2161 return RValue::get(Builder
.CreateCall(F
, { Src
, Src
, ShiftAmt
}));
2164 // Map math builtins for long-double to f128 version.
2165 static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID
) {
2166 switch (BuiltinID
) {
2167 #define MUTATE_LDBL(func) \
2168 case Builtin::BI__builtin_##func##l: \
2169 return Builtin::BI__builtin_##func##f128;
2190 MUTATE_LDBL(nearbyint
)
2194 MUTATE_LDBL(llround
)
2220 MUTATE_LDBL(huge_val
)
2221 MUTATE_LDBL(copysign
)
2222 MUTATE_LDBL(nextafter
)
2223 MUTATE_LDBL(nexttoward
)
2224 MUTATE_LDBL(remainder
)
2226 MUTATE_LDBL(scalbln
)
2236 RValue
CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD
, unsigned BuiltinID
,
2238 ReturnValueSlot ReturnValue
) {
2239 const FunctionDecl
*FD
= GD
.getDecl()->getAsFunction();
2240 // See if we can constant fold this builtin. If so, don't emit it at all.
2241 // TODO: Extend this handling to all builtin calls that we can constant-fold.
2242 Expr::EvalResult Result
;
2243 if (E
->isPRValue() && E
->EvaluateAsRValue(Result
, CGM
.getContext()) &&
2244 !Result
.hasSideEffects()) {
2245 if (Result
.Val
.isInt())
2246 return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
2247 Result
.Val
.getInt()));
2248 if (Result
.Val
.isFloat())
2249 return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
2250 Result
.Val
.getFloat()));
2253 // If current long-double semantics is IEEE 128-bit, replace math builtins
2254 // of long-double with f128 equivalent.
2255 // TODO: This mutation should also be applied to other targets other than PPC,
2256 // after backend supports IEEE 128-bit style libcalls.
2257 if (getTarget().getTriple().isPPC64() &&
2258 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
2259 BuiltinID
= mutateLongDoubleBuiltin(BuiltinID
);
2261 // If the builtin has been declared explicitly with an assembler label,
2262 // disable the specialized emitting below. Ideally we should communicate the
2263 // rename in IR, or at least avoid generating the intrinsic calls that are
2264 // likely to get lowered to the renamed library functions.
2265 const unsigned BuiltinIDIfNoAsmLabel
=
2266 FD
->hasAttr
<AsmLabelAttr
>() ? 0 : BuiltinID
;
2268 // There are LLVM math intrinsics/instructions corresponding to math library
2269 // functions except the LLVM op will never set errno while the math library
2270 // might. Also, math builtins have the same semantics as their math library
2271 // twins. Thus, we can transform math library and builtin calls to their
2272 // LLVM counterparts if the call is marked 'const' (known to never set errno).
2273 // In case FP exceptions are enabled, the experimental versions of the
2274 // intrinsics model those.
2275 bool ConstWithoutErrnoAndExceptions
=
2276 getContext().BuiltinInfo
.isConstWithoutErrnoAndExceptions(BuiltinID
);
2277 bool ConstWithoutExceptions
=
2278 getContext().BuiltinInfo
.isConstWithoutExceptions(BuiltinID
);
2279 if (FD
->hasAttr
<ConstAttr
>() ||
2280 ((ConstWithoutErrnoAndExceptions
|| ConstWithoutExceptions
) &&
2281 (!ConstWithoutErrnoAndExceptions
|| (!getLangOpts().MathErrno
)))) {
2282 switch (BuiltinIDIfNoAsmLabel
) {
2283 case Builtin::BIceil
:
2284 case Builtin::BIceilf
:
2285 case Builtin::BIceill
:
2286 case Builtin::BI__builtin_ceil
:
2287 case Builtin::BI__builtin_ceilf
:
2288 case Builtin::BI__builtin_ceilf16
:
2289 case Builtin::BI__builtin_ceill
:
2290 case Builtin::BI__builtin_ceilf128
:
2291 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
2293 Intrinsic::experimental_constrained_ceil
));
2295 case Builtin::BIcopysign
:
2296 case Builtin::BIcopysignf
:
2297 case Builtin::BIcopysignl
:
2298 case Builtin::BI__builtin_copysign
:
2299 case Builtin::BI__builtin_copysignf
:
2300 case Builtin::BI__builtin_copysignf16
:
2301 case Builtin::BI__builtin_copysignl
:
2302 case Builtin::BI__builtin_copysignf128
:
2303 return RValue::get(emitBinaryBuiltin(*this, E
, Intrinsic::copysign
));
2305 case Builtin::BIcos
:
2306 case Builtin::BIcosf
:
2307 case Builtin::BIcosl
:
2308 case Builtin::BI__builtin_cos
:
2309 case Builtin::BI__builtin_cosf
:
2310 case Builtin::BI__builtin_cosf16
:
2311 case Builtin::BI__builtin_cosl
:
2312 case Builtin::BI__builtin_cosf128
:
2313 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
2315 Intrinsic::experimental_constrained_cos
));
2317 case Builtin::BIexp
:
2318 case Builtin::BIexpf
:
2319 case Builtin::BIexpl
:
2320 case Builtin::BI__builtin_exp
:
2321 case Builtin::BI__builtin_expf
:
2322 case Builtin::BI__builtin_expf16
:
2323 case Builtin::BI__builtin_expl
:
2324 case Builtin::BI__builtin_expf128
:
2325 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
2327 Intrinsic::experimental_constrained_exp
));
2329 case Builtin::BIexp2
:
2330 case Builtin::BIexp2f
:
2331 case Builtin::BIexp2l
:
2332 case Builtin::BI__builtin_exp2
:
2333 case Builtin::BI__builtin_exp2f
:
2334 case Builtin::BI__builtin_exp2f16
:
2335 case Builtin::BI__builtin_exp2l
:
2336 case Builtin::BI__builtin_exp2f128
:
2337 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
2339 Intrinsic::experimental_constrained_exp2
));
2341 case Builtin::BIfabs
:
2342 case Builtin::BIfabsf
:
2343 case Builtin::BIfabsl
:
2344 case Builtin::BI__builtin_fabs
:
2345 case Builtin::BI__builtin_fabsf
:
2346 case Builtin::BI__builtin_fabsf16
:
2347 case Builtin::BI__builtin_fabsl
:
2348 case Builtin::BI__builtin_fabsf128
:
2349 return RValue::get(emitUnaryBuiltin(*this, E
, Intrinsic::fabs
));
2351 case Builtin::BIfloor
:
2352 case Builtin::BIfloorf
:
2353 case Builtin::BIfloorl
:
2354 case Builtin::BI__builtin_floor
:
2355 case Builtin::BI__builtin_floorf
:
2356 case Builtin::BI__builtin_floorf16
:
2357 case Builtin::BI__builtin_floorl
:
2358 case Builtin::BI__builtin_floorf128
:
2359 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
2361 Intrinsic::experimental_constrained_floor
));
2363 case Builtin::BIfma
:
2364 case Builtin::BIfmaf
:
2365 case Builtin::BIfmal
:
2366 case Builtin::BI__builtin_fma
:
2367 case Builtin::BI__builtin_fmaf
:
2368 case Builtin::BI__builtin_fmaf16
:
2369 case Builtin::BI__builtin_fmal
:
2370 case Builtin::BI__builtin_fmaf128
:
2371 return RValue::get(emitTernaryMaybeConstrainedFPBuiltin(*this, E
,
2373 Intrinsic::experimental_constrained_fma
));
2375 case Builtin::BIfmax
:
2376 case Builtin::BIfmaxf
:
2377 case Builtin::BIfmaxl
:
2378 case Builtin::BI__builtin_fmax
:
2379 case Builtin::BI__builtin_fmaxf
:
2380 case Builtin::BI__builtin_fmaxf16
:
2381 case Builtin::BI__builtin_fmaxl
:
2382 case Builtin::BI__builtin_fmaxf128
:
2383 return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E
,
2385 Intrinsic::experimental_constrained_maxnum
));
2387 case Builtin::BIfmin
:
2388 case Builtin::BIfminf
:
2389 case Builtin::BIfminl
:
2390 case Builtin::BI__builtin_fmin
:
2391 case Builtin::BI__builtin_fminf
:
2392 case Builtin::BI__builtin_fminf16
:
2393 case Builtin::BI__builtin_fminl
:
2394 case Builtin::BI__builtin_fminf128
:
2395 return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E
,
2397 Intrinsic::experimental_constrained_minnum
));
2399 // fmod() is a special-case. It maps to the frem instruction rather than an
2401 case Builtin::BIfmod
:
2402 case Builtin::BIfmodf
:
2403 case Builtin::BIfmodl
:
2404 case Builtin::BI__builtin_fmod
:
2405 case Builtin::BI__builtin_fmodf
:
2406 case Builtin::BI__builtin_fmodf16
:
2407 case Builtin::BI__builtin_fmodl
:
2408 case Builtin::BI__builtin_fmodf128
: {
2409 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
2410 Value
*Arg1
= EmitScalarExpr(E
->getArg(0));
2411 Value
*Arg2
= EmitScalarExpr(E
->getArg(1));
2412 return RValue::get(Builder
.CreateFRem(Arg1
, Arg2
, "fmod"));
2415 case Builtin::BIlog
:
2416 case Builtin::BIlogf
:
2417 case Builtin::BIlogl
:
2418 case Builtin::BI__builtin_log
:
2419 case Builtin::BI__builtin_logf
:
2420 case Builtin::BI__builtin_logf16
:
2421 case Builtin::BI__builtin_logl
:
2422 case Builtin::BI__builtin_logf128
:
2423 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
2425 Intrinsic::experimental_constrained_log
));
2427 case Builtin::BIlog10
:
2428 case Builtin::BIlog10f
:
2429 case Builtin::BIlog10l
:
2430 case Builtin::BI__builtin_log10
:
2431 case Builtin::BI__builtin_log10f
:
2432 case Builtin::BI__builtin_log10f16
:
2433 case Builtin::BI__builtin_log10l
:
2434 case Builtin::BI__builtin_log10f128
:
2435 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
2437 Intrinsic::experimental_constrained_log10
));
2439 case Builtin::BIlog2
:
2440 case Builtin::BIlog2f
:
2441 case Builtin::BIlog2l
:
2442 case Builtin::BI__builtin_log2
:
2443 case Builtin::BI__builtin_log2f
:
2444 case Builtin::BI__builtin_log2f16
:
2445 case Builtin::BI__builtin_log2l
:
2446 case Builtin::BI__builtin_log2f128
:
2447 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
2449 Intrinsic::experimental_constrained_log2
));
2451 case Builtin::BInearbyint
:
2452 case Builtin::BInearbyintf
:
2453 case Builtin::BInearbyintl
:
2454 case Builtin::BI__builtin_nearbyint
:
2455 case Builtin::BI__builtin_nearbyintf
:
2456 case Builtin::BI__builtin_nearbyintl
:
2457 case Builtin::BI__builtin_nearbyintf128
:
2458 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
2459 Intrinsic::nearbyint
,
2460 Intrinsic::experimental_constrained_nearbyint
));
2462 case Builtin::BIpow
:
2463 case Builtin::BIpowf
:
2464 case Builtin::BIpowl
:
2465 case Builtin::BI__builtin_pow
:
2466 case Builtin::BI__builtin_powf
:
2467 case Builtin::BI__builtin_powf16
:
2468 case Builtin::BI__builtin_powl
:
2469 case Builtin::BI__builtin_powf128
:
2470 return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E
,
2472 Intrinsic::experimental_constrained_pow
));
2474 case Builtin::BIrint
:
2475 case Builtin::BIrintf
:
2476 case Builtin::BIrintl
:
2477 case Builtin::BI__builtin_rint
:
2478 case Builtin::BI__builtin_rintf
:
2479 case Builtin::BI__builtin_rintf16
:
2480 case Builtin::BI__builtin_rintl
:
2481 case Builtin::BI__builtin_rintf128
:
2482 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
2484 Intrinsic::experimental_constrained_rint
));
2486 case Builtin::BIround
:
2487 case Builtin::BIroundf
:
2488 case Builtin::BIroundl
:
2489 case Builtin::BI__builtin_round
:
2490 case Builtin::BI__builtin_roundf
:
2491 case Builtin::BI__builtin_roundf16
:
2492 case Builtin::BI__builtin_roundl
:
2493 case Builtin::BI__builtin_roundf128
:
2494 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
2496 Intrinsic::experimental_constrained_round
));
2498 case Builtin::BIroundeven
:
2499 case Builtin::BIroundevenf
:
2500 case Builtin::BIroundevenl
:
2501 case Builtin::BI__builtin_roundeven
:
2502 case Builtin::BI__builtin_roundevenf
:
2503 case Builtin::BI__builtin_roundevenf16
:
2504 case Builtin::BI__builtin_roundevenl
:
2505 case Builtin::BI__builtin_roundevenf128
:
2506 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
2507 Intrinsic::roundeven
,
2508 Intrinsic::experimental_constrained_roundeven
));
2510 case Builtin::BIsin
:
2511 case Builtin::BIsinf
:
2512 case Builtin::BIsinl
:
2513 case Builtin::BI__builtin_sin
:
2514 case Builtin::BI__builtin_sinf
:
2515 case Builtin::BI__builtin_sinf16
:
2516 case Builtin::BI__builtin_sinl
:
2517 case Builtin::BI__builtin_sinf128
:
2518 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
2520 Intrinsic::experimental_constrained_sin
));
2522 case Builtin::BIsqrt
:
2523 case Builtin::BIsqrtf
:
2524 case Builtin::BIsqrtl
:
2525 case Builtin::BI__builtin_sqrt
:
2526 case Builtin::BI__builtin_sqrtf
:
2527 case Builtin::BI__builtin_sqrtf16
:
2528 case Builtin::BI__builtin_sqrtl
:
2529 case Builtin::BI__builtin_sqrtf128
:
2530 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
2532 Intrinsic::experimental_constrained_sqrt
));
2534 case Builtin::BItrunc
:
2535 case Builtin::BItruncf
:
2536 case Builtin::BItruncl
:
2537 case Builtin::BI__builtin_trunc
:
2538 case Builtin::BI__builtin_truncf
:
2539 case Builtin::BI__builtin_truncf16
:
2540 case Builtin::BI__builtin_truncl
:
2541 case Builtin::BI__builtin_truncf128
:
2542 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E
,
2544 Intrinsic::experimental_constrained_trunc
));
2546 case Builtin::BIlround
:
2547 case Builtin::BIlroundf
:
2548 case Builtin::BIlroundl
:
2549 case Builtin::BI__builtin_lround
:
2550 case Builtin::BI__builtin_lroundf
:
2551 case Builtin::BI__builtin_lroundl
:
2552 case Builtin::BI__builtin_lroundf128
:
2553 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
2554 *this, E
, Intrinsic::lround
,
2555 Intrinsic::experimental_constrained_lround
));
2557 case Builtin::BIllround
:
2558 case Builtin::BIllroundf
:
2559 case Builtin::BIllroundl
:
2560 case Builtin::BI__builtin_llround
:
2561 case Builtin::BI__builtin_llroundf
:
2562 case Builtin::BI__builtin_llroundl
:
2563 case Builtin::BI__builtin_llroundf128
:
2564 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
2565 *this, E
, Intrinsic::llround
,
2566 Intrinsic::experimental_constrained_llround
));
2568 case Builtin::BIlrint
:
2569 case Builtin::BIlrintf
:
2570 case Builtin::BIlrintl
:
2571 case Builtin::BI__builtin_lrint
:
2572 case Builtin::BI__builtin_lrintf
:
2573 case Builtin::BI__builtin_lrintl
:
2574 case Builtin::BI__builtin_lrintf128
:
2575 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
2576 *this, E
, Intrinsic::lrint
,
2577 Intrinsic::experimental_constrained_lrint
));
2579 case Builtin::BIllrint
:
2580 case Builtin::BIllrintf
:
2581 case Builtin::BIllrintl
:
2582 case Builtin::BI__builtin_llrint
:
2583 case Builtin::BI__builtin_llrintf
:
2584 case Builtin::BI__builtin_llrintl
:
2585 case Builtin::BI__builtin_llrintf128
:
2586 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
2587 *this, E
, Intrinsic::llrint
,
2588 Intrinsic::experimental_constrained_llrint
));
2589 case Builtin::BI__builtin_ldexp
:
2590 case Builtin::BI__builtin_ldexpf
:
2591 case Builtin::BI__builtin_ldexpl
:
2592 case Builtin::BI__builtin_ldexpf16
:
2593 case Builtin::BI__builtin_ldexpf128
: {
2594 return RValue::get(emitBinaryExpMaybeConstrainedFPBuiltin(
2595 *this, E
, Intrinsic::ldexp
,
2596 Intrinsic::experimental_constrained_ldexp
));
2603 switch (BuiltinIDIfNoAsmLabel
) {
2605 case Builtin::BI__builtin___CFStringMakeConstantString
:
2606 case Builtin::BI__builtin___NSStringMakeConstantString
:
2607 return RValue::get(ConstantEmitter(*this).emitAbstract(E
, E
->getType()));
2608 case Builtin::BI__builtin_stdarg_start
:
2609 case Builtin::BI__builtin_va_start
:
2610 case Builtin::BI__va_start
:
2611 case Builtin::BI__builtin_va_end
:
2612 EmitVAStartEnd(BuiltinID
== Builtin::BI__va_start
2613 ? EmitScalarExpr(E
->getArg(0))
2614 : EmitVAListRef(E
->getArg(0)).getPointer(),
2615 BuiltinID
!= Builtin::BI__builtin_va_end
);
2616 return RValue::get(nullptr);
2617 case Builtin::BI__builtin_va_copy
: {
2618 Value
*DstPtr
= EmitVAListRef(E
->getArg(0)).getPointer();
2619 Value
*SrcPtr
= EmitVAListRef(E
->getArg(1)).getPointer();
2621 llvm::Type
*Type
= Int8PtrTy
;
2623 DstPtr
= Builder
.CreateBitCast(DstPtr
, Type
);
2624 SrcPtr
= Builder
.CreateBitCast(SrcPtr
, Type
);
2625 Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::vacopy
), {DstPtr
, SrcPtr
});
2626 return RValue::get(nullptr);
2628 case Builtin::BI__builtin_abs
:
2629 case Builtin::BI__builtin_labs
:
2630 case Builtin::BI__builtin_llabs
: {
2632 // The negation has 'nsw' because abs of INT_MIN is undefined.
2633 Value
*ArgValue
= EmitScalarExpr(E
->getArg(0));
2634 Value
*NegOp
= Builder
.CreateNSWNeg(ArgValue
, "neg");
2635 Constant
*Zero
= llvm::Constant::getNullValue(ArgValue
->getType());
2636 Value
*CmpResult
= Builder
.CreateICmpSLT(ArgValue
, Zero
, "abscond");
2637 Value
*Result
= Builder
.CreateSelect(CmpResult
, NegOp
, ArgValue
, "abs");
2638 return RValue::get(Result
);
2640 case Builtin::BI__builtin_complex
: {
2641 Value
*Real
= EmitScalarExpr(E
->getArg(0));
2642 Value
*Imag
= EmitScalarExpr(E
->getArg(1));
2643 return RValue::getComplex({Real
, Imag
});
2645 case Builtin::BI__builtin_conj
:
2646 case Builtin::BI__builtin_conjf
:
2647 case Builtin::BI__builtin_conjl
:
2648 case Builtin::BIconj
:
2649 case Builtin::BIconjf
:
2650 case Builtin::BIconjl
: {
2651 ComplexPairTy ComplexVal
= EmitComplexExpr(E
->getArg(0));
2652 Value
*Real
= ComplexVal
.first
;
2653 Value
*Imag
= ComplexVal
.second
;
2654 Imag
= Builder
.CreateFNeg(Imag
, "neg");
2655 return RValue::getComplex(std::make_pair(Real
, Imag
));
2657 case Builtin::BI__builtin_creal
:
2658 case Builtin::BI__builtin_crealf
:
2659 case Builtin::BI__builtin_creall
:
2660 case Builtin::BIcreal
:
2661 case Builtin::BIcrealf
:
2662 case Builtin::BIcreall
: {
2663 ComplexPairTy ComplexVal
= EmitComplexExpr(E
->getArg(0));
2664 return RValue::get(ComplexVal
.first
);
2667 case Builtin::BI__builtin_preserve_access_index
: {
2668 // Only enabled preserved access index region when debuginfo
2669 // is available as debuginfo is needed to preserve user-level
2671 if (!getDebugInfo()) {
2672 CGM
.Error(E
->getExprLoc(), "using builtin_preserve_access_index() without -g");
2673 return RValue::get(EmitScalarExpr(E
->getArg(0)));
2676 // Nested builtin_preserve_access_index() not supported
2677 if (IsInPreservedAIRegion
) {
2678 CGM
.Error(E
->getExprLoc(), "nested builtin_preserve_access_index() not supported");
2679 return RValue::get(EmitScalarExpr(E
->getArg(0)));
2682 IsInPreservedAIRegion
= true;
2683 Value
*Res
= EmitScalarExpr(E
->getArg(0));
2684 IsInPreservedAIRegion
= false;
2685 return RValue::get(Res
);
2688 case Builtin::BI__builtin_cimag
:
2689 case Builtin::BI__builtin_cimagf
:
2690 case Builtin::BI__builtin_cimagl
:
2691 case Builtin::BIcimag
:
2692 case Builtin::BIcimagf
:
2693 case Builtin::BIcimagl
: {
2694 ComplexPairTy ComplexVal
= EmitComplexExpr(E
->getArg(0));
2695 return RValue::get(ComplexVal
.second
);
2698 case Builtin::BI__builtin_clrsb
:
2699 case Builtin::BI__builtin_clrsbl
:
2700 case Builtin::BI__builtin_clrsbll
: {
2701 // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
2702 Value
*ArgValue
= EmitScalarExpr(E
->getArg(0));
2704 llvm::Type
*ArgType
= ArgValue
->getType();
2705 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctlz
, ArgType
);
2707 llvm::Type
*ResultType
= ConvertType(E
->getType());
2708 Value
*Zero
= llvm::Constant::getNullValue(ArgType
);
2709 Value
*IsNeg
= Builder
.CreateICmpSLT(ArgValue
, Zero
, "isneg");
2710 Value
*Inverse
= Builder
.CreateNot(ArgValue
, "not");
2711 Value
*Tmp
= Builder
.CreateSelect(IsNeg
, Inverse
, ArgValue
);
2712 Value
*Ctlz
= Builder
.CreateCall(F
, {Tmp
, Builder
.getFalse()});
2713 Value
*Result
= Builder
.CreateSub(Ctlz
, llvm::ConstantInt::get(ArgType
, 1));
2714 Result
= Builder
.CreateIntCast(Result
, ResultType
, /*isSigned*/true,
2716 return RValue::get(Result
);
2718 case Builtin::BI__builtin_ctzs
:
2719 case Builtin::BI__builtin_ctz
:
2720 case Builtin::BI__builtin_ctzl
:
2721 case Builtin::BI__builtin_ctzll
: {
2722 Value
*ArgValue
= EmitCheckedArgForBuiltin(E
->getArg(0), BCK_CTZPassedZero
);
2724 llvm::Type
*ArgType
= ArgValue
->getType();
2725 Function
*F
= CGM
.getIntrinsic(Intrinsic::cttz
, ArgType
);
2727 llvm::Type
*ResultType
= ConvertType(E
->getType());
2728 Value
*ZeroUndef
= Builder
.getInt1(getTarget().isCLZForZeroUndef());
2729 Value
*Result
= Builder
.CreateCall(F
, {ArgValue
, ZeroUndef
});
2730 if (Result
->getType() != ResultType
)
2731 Result
= Builder
.CreateIntCast(Result
, ResultType
, /*isSigned*/true,
2733 return RValue::get(Result
);
2735 case Builtin::BI__builtin_clzs
:
2736 case Builtin::BI__builtin_clz
:
2737 case Builtin::BI__builtin_clzl
:
2738 case Builtin::BI__builtin_clzll
: {
2739 Value
*ArgValue
= EmitCheckedArgForBuiltin(E
->getArg(0), BCK_CLZPassedZero
);
2741 llvm::Type
*ArgType
= ArgValue
->getType();
2742 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctlz
, ArgType
);
2744 llvm::Type
*ResultType
= ConvertType(E
->getType());
2745 Value
*ZeroUndef
= Builder
.getInt1(getTarget().isCLZForZeroUndef());
2746 Value
*Result
= Builder
.CreateCall(F
, {ArgValue
, ZeroUndef
});
2747 if (Result
->getType() != ResultType
)
2748 Result
= Builder
.CreateIntCast(Result
, ResultType
, /*isSigned*/true,
2750 return RValue::get(Result
);
2752 case Builtin::BI__builtin_ffs
:
2753 case Builtin::BI__builtin_ffsl
:
2754 case Builtin::BI__builtin_ffsll
: {
2755 // ffs(x) -> x ? cttz(x) + 1 : 0
2756 Value
*ArgValue
= EmitScalarExpr(E
->getArg(0));
2758 llvm::Type
*ArgType
= ArgValue
->getType();
2759 Function
*F
= CGM
.getIntrinsic(Intrinsic::cttz
, ArgType
);
2761 llvm::Type
*ResultType
= ConvertType(E
->getType());
2763 Builder
.CreateAdd(Builder
.CreateCall(F
, {ArgValue
, Builder
.getTrue()}),
2764 llvm::ConstantInt::get(ArgType
, 1));
2765 Value
*Zero
= llvm::Constant::getNullValue(ArgType
);
2766 Value
*IsZero
= Builder
.CreateICmpEQ(ArgValue
, Zero
, "iszero");
2767 Value
*Result
= Builder
.CreateSelect(IsZero
, Zero
, Tmp
, "ffs");
2768 if (Result
->getType() != ResultType
)
2769 Result
= Builder
.CreateIntCast(Result
, ResultType
, /*isSigned*/true,
2771 return RValue::get(Result
);
2773 case Builtin::BI__builtin_parity
:
2774 case Builtin::BI__builtin_parityl
:
2775 case Builtin::BI__builtin_parityll
: {
2776 // parity(x) -> ctpop(x) & 1
2777 Value
*ArgValue
= EmitScalarExpr(E
->getArg(0));
2779 llvm::Type
*ArgType
= ArgValue
->getType();
2780 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctpop
, ArgType
);
2782 llvm::Type
*ResultType
= ConvertType(E
->getType());
2783 Value
*Tmp
= Builder
.CreateCall(F
, ArgValue
);
2784 Value
*Result
= Builder
.CreateAnd(Tmp
, llvm::ConstantInt::get(ArgType
, 1));
2785 if (Result
->getType() != ResultType
)
2786 Result
= Builder
.CreateIntCast(Result
, ResultType
, /*isSigned*/true,
2788 return RValue::get(Result
);
2790 case Builtin::BI__lzcnt16
:
2791 case Builtin::BI__lzcnt
:
2792 case Builtin::BI__lzcnt64
: {
2793 Value
*ArgValue
= EmitScalarExpr(E
->getArg(0));
2795 llvm::Type
*ArgType
= ArgValue
->getType();
2796 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctlz
, ArgType
);
2798 llvm::Type
*ResultType
= ConvertType(E
->getType());
2799 Value
*Result
= Builder
.CreateCall(F
, {ArgValue
, Builder
.getFalse()});
2800 if (Result
->getType() != ResultType
)
2801 Result
= Builder
.CreateIntCast(Result
, ResultType
, /*isSigned*/true,
2803 return RValue::get(Result
);
2805 case Builtin::BI__popcnt16
:
2806 case Builtin::BI__popcnt
:
2807 case Builtin::BI__popcnt64
:
2808 case Builtin::BI__builtin_popcount
:
2809 case Builtin::BI__builtin_popcountl
:
2810 case Builtin::BI__builtin_popcountll
: {
2811 Value
*ArgValue
= EmitScalarExpr(E
->getArg(0));
2813 llvm::Type
*ArgType
= ArgValue
->getType();
2814 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctpop
, ArgType
);
2816 llvm::Type
*ResultType
= ConvertType(E
->getType());
2817 Value
*Result
= Builder
.CreateCall(F
, ArgValue
);
2818 if (Result
->getType() != ResultType
)
2819 Result
= Builder
.CreateIntCast(Result
, ResultType
, /*isSigned*/true,
2821 return RValue::get(Result
);
2823 case Builtin::BI__builtin_unpredictable
: {
2824 // Always return the argument of __builtin_unpredictable. LLVM does not
2825 // handle this builtin. Metadata for this builtin should be added directly
2826 // to instructions such as branches or switches that use it.
2827 return RValue::get(EmitScalarExpr(E
->getArg(0)));
2829 case Builtin::BI__builtin_expect
: {
2830 Value
*ArgValue
= EmitScalarExpr(E
->getArg(0));
2831 llvm::Type
*ArgType
= ArgValue
->getType();
2833 Value
*ExpectedValue
= EmitScalarExpr(E
->getArg(1));
2834 // Don't generate llvm.expect on -O0 as the backend won't use it for
2836 // Note, we still IRGen ExpectedValue because it could have side-effects.
2837 if (CGM
.getCodeGenOpts().OptimizationLevel
== 0)
2838 return RValue::get(ArgValue
);
2840 Function
*FnExpect
= CGM
.getIntrinsic(Intrinsic::expect
, ArgType
);
2842 Builder
.CreateCall(FnExpect
, {ArgValue
, ExpectedValue
}, "expval");
2843 return RValue::get(Result
);
2845 case Builtin::BI__builtin_expect_with_probability
: {
2846 Value
*ArgValue
= EmitScalarExpr(E
->getArg(0));
2847 llvm::Type
*ArgType
= ArgValue
->getType();
2849 Value
*ExpectedValue
= EmitScalarExpr(E
->getArg(1));
2850 llvm::APFloat
Probability(0.0);
2851 const Expr
*ProbArg
= E
->getArg(2);
2852 bool EvalSucceed
= ProbArg
->EvaluateAsFloat(Probability
, CGM
.getContext());
2853 assert(EvalSucceed
&& "probability should be able to evaluate as float");
2855 bool LoseInfo
= false;
2856 Probability
.convert(llvm::APFloat::IEEEdouble(),
2857 llvm::RoundingMode::Dynamic
, &LoseInfo
);
2858 llvm::Type
*Ty
= ConvertType(ProbArg
->getType());
2859 Constant
*Confidence
= ConstantFP::get(Ty
, Probability
);
2860 // Don't generate llvm.expect.with.probability on -O0 as the backend
2861 // won't use it for anything.
2862 // Note, we still IRGen ExpectedValue because it could have side-effects.
2863 if (CGM
.getCodeGenOpts().OptimizationLevel
== 0)
2864 return RValue::get(ArgValue
);
2866 Function
*FnExpect
=
2867 CGM
.getIntrinsic(Intrinsic::expect_with_probability
, ArgType
);
2868 Value
*Result
= Builder
.CreateCall(
2869 FnExpect
, {ArgValue
, ExpectedValue
, Confidence
}, "expval");
2870 return RValue::get(Result
);
2872 case Builtin::BI__builtin_assume_aligned
: {
2873 const Expr
*Ptr
= E
->getArg(0);
2874 Value
*PtrValue
= EmitScalarExpr(Ptr
);
2875 Value
*OffsetValue
=
2876 (E
->getNumArgs() > 2) ? EmitScalarExpr(E
->getArg(2)) : nullptr;
2878 Value
*AlignmentValue
= EmitScalarExpr(E
->getArg(1));
2879 ConstantInt
*AlignmentCI
= cast
<ConstantInt
>(AlignmentValue
);
2880 if (AlignmentCI
->getValue().ugt(llvm::Value::MaximumAlignment
))
2881 AlignmentCI
= ConstantInt::get(AlignmentCI
->getType(),
2882 llvm::Value::MaximumAlignment
);
2884 emitAlignmentAssumption(PtrValue
, Ptr
,
2885 /*The expr loc is sufficient.*/ SourceLocation(),
2886 AlignmentCI
, OffsetValue
);
2887 return RValue::get(PtrValue
);
2889 case Builtin::BI__assume
:
2890 case Builtin::BI__builtin_assume
: {
2891 if (E
->getArg(0)->HasSideEffects(getContext()))
2892 return RValue::get(nullptr);
2894 Value
*ArgValue
= EmitScalarExpr(E
->getArg(0));
2895 Function
*FnAssume
= CGM
.getIntrinsic(Intrinsic::assume
);
2896 Builder
.CreateCall(FnAssume
, ArgValue
);
2897 return RValue::get(nullptr);
2899 case Builtin::BI__builtin_assume_separate_storage
: {
2900 const Expr
*Arg0
= E
->getArg(0);
2901 const Expr
*Arg1
= E
->getArg(1);
2903 Value
*Value0
= EmitScalarExpr(Arg0
);
2904 Value
*Value1
= EmitScalarExpr(Arg1
);
2906 Value
*Values
[] = {Value0
, Value1
};
2907 OperandBundleDefT
<Value
*> OBD("separate_storage", Values
);
2908 Builder
.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD
});
2909 return RValue::get(nullptr);
2911 case Builtin::BI__arithmetic_fence
: {
2912 // Create the builtin call if FastMath is selected, and the target
2913 // supports the builtin, otherwise just return the argument.
2914 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
2915 llvm::FastMathFlags FMF
= Builder
.getFastMathFlags();
2916 bool isArithmeticFenceEnabled
=
2917 FMF
.allowReassoc() &&
2918 getContext().getTargetInfo().checkArithmeticFenceSupported();
2919 QualType ArgType
= E
->getArg(0)->getType();
2920 if (ArgType
->isComplexType()) {
2921 if (isArithmeticFenceEnabled
) {
2922 QualType ElementType
= ArgType
->castAs
<ComplexType
>()->getElementType();
2923 ComplexPairTy ComplexVal
= EmitComplexExpr(E
->getArg(0));
2924 Value
*Real
= Builder
.CreateArithmeticFence(ComplexVal
.first
,
2925 ConvertType(ElementType
));
2926 Value
*Imag
= Builder
.CreateArithmeticFence(ComplexVal
.second
,
2927 ConvertType(ElementType
));
2928 return RValue::getComplex(std::make_pair(Real
, Imag
));
2930 ComplexPairTy ComplexVal
= EmitComplexExpr(E
->getArg(0));
2931 Value
*Real
= ComplexVal
.first
;
2932 Value
*Imag
= ComplexVal
.second
;
2933 return RValue::getComplex(std::make_pair(Real
, Imag
));
2935 Value
*ArgValue
= EmitScalarExpr(E
->getArg(0));
2936 if (isArithmeticFenceEnabled
)
2938 Builder
.CreateArithmeticFence(ArgValue
, ConvertType(ArgType
)));
2939 return RValue::get(ArgValue
);
2941 case Builtin::BI__builtin_bswap16
:
2942 case Builtin::BI__builtin_bswap32
:
2943 case Builtin::BI__builtin_bswap64
:
2944 case Builtin::BI_byteswap_ushort
:
2945 case Builtin::BI_byteswap_ulong
:
2946 case Builtin::BI_byteswap_uint64
: {
2947 return RValue::get(emitUnaryBuiltin(*this, E
, Intrinsic::bswap
));
2949 case Builtin::BI__builtin_bitreverse8
:
2950 case Builtin::BI__builtin_bitreverse16
:
2951 case Builtin::BI__builtin_bitreverse32
:
2952 case Builtin::BI__builtin_bitreverse64
: {
2953 return RValue::get(emitUnaryBuiltin(*this, E
, Intrinsic::bitreverse
));
2955 case Builtin::BI__builtin_rotateleft8
:
2956 case Builtin::BI__builtin_rotateleft16
:
2957 case Builtin::BI__builtin_rotateleft32
:
2958 case Builtin::BI__builtin_rotateleft64
:
2959 case Builtin::BI_rotl8
: // Microsoft variants of rotate left
2960 case Builtin::BI_rotl16
:
2961 case Builtin::BI_rotl
:
2962 case Builtin::BI_lrotl
:
2963 case Builtin::BI_rotl64
:
2964 return emitRotate(E
, false);
2966 case Builtin::BI__builtin_rotateright8
:
2967 case Builtin::BI__builtin_rotateright16
:
2968 case Builtin::BI__builtin_rotateright32
:
2969 case Builtin::BI__builtin_rotateright64
:
2970 case Builtin::BI_rotr8
: // Microsoft variants of rotate right
2971 case Builtin::BI_rotr16
:
2972 case Builtin::BI_rotr
:
2973 case Builtin::BI_lrotr
:
2974 case Builtin::BI_rotr64
:
2975 return emitRotate(E
, true);
2977 case Builtin::BI__builtin_constant_p
: {
2978 llvm::Type
*ResultType
= ConvertType(E
->getType());
2980 const Expr
*Arg
= E
->getArg(0);
2981 QualType ArgType
= Arg
->getType();
2982 // FIXME: The allowance for Obj-C pointers and block pointers is historical
2983 // and likely a mistake.
2984 if (!ArgType
->isIntegralOrEnumerationType() && !ArgType
->isFloatingType() &&
2985 !ArgType
->isObjCObjectPointerType() && !ArgType
->isBlockPointerType())
2986 // Per the GCC documentation, only numeric constants are recognized after
2988 return RValue::get(ConstantInt::get(ResultType
, 0));
2990 if (Arg
->HasSideEffects(getContext()))
2991 // The argument is unevaluated, so be conservative if it might have
2993 return RValue::get(ConstantInt::get(ResultType
, 0));
2995 Value
*ArgValue
= EmitScalarExpr(Arg
);
2996 if (ArgType
->isObjCObjectPointerType()) {
2997 // Convert Objective-C objects to id because we cannot distinguish between
2998 // LLVM types for Obj-C classes as they are opaque.
2999 ArgType
= CGM
.getContext().getObjCIdType();
3000 ArgValue
= Builder
.CreateBitCast(ArgValue
, ConvertType(ArgType
));
3003 CGM
.getIntrinsic(Intrinsic::is_constant
, ConvertType(ArgType
));
3004 Value
*Result
= Builder
.CreateCall(F
, ArgValue
);
3005 if (Result
->getType() != ResultType
)
3006 Result
= Builder
.CreateIntCast(Result
, ResultType
, /*isSigned*/false);
3007 return RValue::get(Result
);
3009 case Builtin::BI__builtin_dynamic_object_size
:
3010 case Builtin::BI__builtin_object_size
: {
3012 E
->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
3013 auto *ResType
= cast
<llvm::IntegerType
>(ConvertType(E
->getType()));
3015 // We pass this builtin onto the optimizer so that it can figure out the
3016 // object size in more complex cases.
3017 bool IsDynamic
= BuiltinID
== Builtin::BI__builtin_dynamic_object_size
;
3018 return RValue::get(emitBuiltinObjectSize(E
->getArg(0), Type
, ResType
,
3019 /*EmittedE=*/nullptr, IsDynamic
));
3021 case Builtin::BI__builtin_prefetch
: {
3022 Value
*Locality
, *RW
, *Address
= EmitScalarExpr(E
->getArg(0));
3023 // FIXME: Technically these constants should of type 'int', yes?
3024 RW
= (E
->getNumArgs() > 1) ? EmitScalarExpr(E
->getArg(1)) :
3025 llvm::ConstantInt::get(Int32Ty
, 0);
3026 Locality
= (E
->getNumArgs() > 2) ? EmitScalarExpr(E
->getArg(2)) :
3027 llvm::ConstantInt::get(Int32Ty
, 3);
3028 Value
*Data
= llvm::ConstantInt::get(Int32Ty
, 1);
3029 Function
*F
= CGM
.getIntrinsic(Intrinsic::prefetch
, Address
->getType());
3030 Builder
.CreateCall(F
, {Address
, RW
, Locality
, Data
});
3031 return RValue::get(nullptr);
3033 case Builtin::BI__builtin_readcyclecounter
: {
3034 Function
*F
= CGM
.getIntrinsic(Intrinsic::readcyclecounter
);
3035 return RValue::get(Builder
.CreateCall(F
));
3037 case Builtin::BI__builtin___clear_cache
: {
3038 Value
*Begin
= EmitScalarExpr(E
->getArg(0));
3039 Value
*End
= EmitScalarExpr(E
->getArg(1));
3040 Function
*F
= CGM
.getIntrinsic(Intrinsic::clear_cache
);
3041 return RValue::get(Builder
.CreateCall(F
, {Begin
, End
}));
3043 case Builtin::BI__builtin_trap
:
3044 EmitTrapCall(Intrinsic::trap
);
3045 return RValue::get(nullptr);
3046 case Builtin::BI__debugbreak
:
3047 EmitTrapCall(Intrinsic::debugtrap
);
3048 return RValue::get(nullptr);
3049 case Builtin::BI__builtin_unreachable
: {
3050 EmitUnreachable(E
->getExprLoc());
3052 // We do need to preserve an insertion point.
3053 EmitBlock(createBasicBlock("unreachable.cont"));
3055 return RValue::get(nullptr);
3058 case Builtin::BI__builtin_powi
:
3059 case Builtin::BI__builtin_powif
:
3060 case Builtin::BI__builtin_powil
: {
3061 llvm::Value
*Src0
= EmitScalarExpr(E
->getArg(0));
3062 llvm::Value
*Src1
= EmitScalarExpr(E
->getArg(1));
3064 if (Builder
.getIsFPConstrained()) {
3065 // FIXME: llvm.powi has 2 mangling types,
3066 // llvm.experimental.constrained.powi has one.
3067 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
3068 Function
*F
= CGM
.getIntrinsic(Intrinsic::experimental_constrained_powi
,
3070 return RValue::get(Builder
.CreateConstrainedFPCall(F
, { Src0
, Src1
}));
3073 Function
*F
= CGM
.getIntrinsic(Intrinsic::powi
,
3074 { Src0
->getType(), Src1
->getType() });
3075 return RValue::get(Builder
.CreateCall(F
, { Src0
, Src1
}));
3077 case Builtin::BI__builtin_isgreater
:
3078 case Builtin::BI__builtin_isgreaterequal
:
3079 case Builtin::BI__builtin_isless
:
3080 case Builtin::BI__builtin_islessequal
:
3081 case Builtin::BI__builtin_islessgreater
:
3082 case Builtin::BI__builtin_isunordered
: {
3083 // Ordered comparisons: we know the arguments to these are matching scalar
3084 // floating point values.
3085 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
3086 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
3087 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
3089 switch (BuiltinID
) {
3090 default: llvm_unreachable("Unknown ordered comparison");
3091 case Builtin::BI__builtin_isgreater
:
3092 LHS
= Builder
.CreateFCmpOGT(LHS
, RHS
, "cmp");
3094 case Builtin::BI__builtin_isgreaterequal
:
3095 LHS
= Builder
.CreateFCmpOGE(LHS
, RHS
, "cmp");
3097 case Builtin::BI__builtin_isless
:
3098 LHS
= Builder
.CreateFCmpOLT(LHS
, RHS
, "cmp");
3100 case Builtin::BI__builtin_islessequal
:
3101 LHS
= Builder
.CreateFCmpOLE(LHS
, RHS
, "cmp");
3103 case Builtin::BI__builtin_islessgreater
:
3104 LHS
= Builder
.CreateFCmpONE(LHS
, RHS
, "cmp");
3106 case Builtin::BI__builtin_isunordered
:
3107 LHS
= Builder
.CreateFCmpUNO(LHS
, RHS
, "cmp");
3110 // ZExt bool to int type.
3111 return RValue::get(Builder
.CreateZExt(LHS
, ConvertType(E
->getType())));
3113 case Builtin::BI__builtin_isnan
: {
3114 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
3115 Value
*V
= EmitScalarExpr(E
->getArg(0));
3116 llvm::Type
*Ty
= V
->getType();
3117 const llvm::fltSemantics
&Semantics
= Ty
->getFltSemantics();
3118 if (!Builder
.getIsFPConstrained() ||
3119 Builder
.getDefaultConstrainedExcept() == fp::ebIgnore
||
3121 V
= Builder
.CreateFCmpUNO(V
, V
, "cmp");
3122 return RValue::get(Builder
.CreateZExt(V
, ConvertType(E
->getType())));
3125 if (Value
*Result
= getTargetHooks().testFPKind(V
, BuiltinID
, Builder
, CGM
))
3126 return RValue::get(Result
);
3128 // NaN has all exp bits set and a non zero significand. Therefore:
3129 // isnan(V) == ((exp mask - (abs(V) & exp mask)) < 0)
3130 unsigned bitsize
= Ty
->getScalarSizeInBits();
3131 llvm::IntegerType
*IntTy
= Builder
.getIntNTy(bitsize
);
3132 Value
*IntV
= Builder
.CreateBitCast(V
, IntTy
);
3133 APInt AndMask
= APInt::getSignedMaxValue(bitsize
);
3135 Builder
.CreateAnd(IntV
, llvm::ConstantInt::get(IntTy
, AndMask
));
3136 APInt ExpMask
= APFloat::getInf(Semantics
).bitcastToAPInt();
3138 Builder
.CreateSub(llvm::ConstantInt::get(IntTy
, ExpMask
), AbsV
);
3139 // V = sign bit (Sub) <=> V = (Sub < 0)
3140 V
= Builder
.CreateLShr(Sub
, llvm::ConstantInt::get(IntTy
, bitsize
- 1));
3142 V
= Builder
.CreateTrunc(V
, ConvertType(E
->getType()));
3143 return RValue::get(V
);
3146 case Builtin::BI__builtin_nondeterministic_value
: {
3147 llvm::Type
*Ty
= ConvertType(E
->getArg(0)->getType());
3149 Value
*Result
= PoisonValue::get(Ty
);
3150 Result
= Builder
.CreateFreeze(Result
);
3152 return RValue::get(Result
);
3155 case Builtin::BI__builtin_elementwise_abs
: {
3157 QualType QT
= E
->getArg(0)->getType();
3159 if (auto *VecTy
= QT
->getAs
<VectorType
>())
3160 QT
= VecTy
->getElementType();
3161 if (QT
->isIntegerType())
3162 Result
= Builder
.CreateBinaryIntrinsic(
3163 llvm::Intrinsic::abs
, EmitScalarExpr(E
->getArg(0)),
3164 Builder
.getFalse(), nullptr, "elt.abs");
3166 Result
= emitUnaryBuiltin(*this, E
, llvm::Intrinsic::fabs
, "elt.abs");
3168 return RValue::get(Result
);
3171 case Builtin::BI__builtin_elementwise_ceil
:
3173 emitUnaryBuiltin(*this, E
, llvm::Intrinsic::ceil
, "elt.ceil"));
3174 case Builtin::BI__builtin_elementwise_exp
:
3176 emitUnaryBuiltin(*this, E
, llvm::Intrinsic::exp
, "elt.exp"));
3177 case Builtin::BI__builtin_elementwise_exp2
:
3179 emitUnaryBuiltin(*this, E
, llvm::Intrinsic::exp2
, "elt.exp2"));
3180 case Builtin::BI__builtin_elementwise_log
:
3182 emitUnaryBuiltin(*this, E
, llvm::Intrinsic::log
, "elt.log"));
3183 case Builtin::BI__builtin_elementwise_log2
:
3185 emitUnaryBuiltin(*this, E
, llvm::Intrinsic::log2
, "elt.log2"));
3186 case Builtin::BI__builtin_elementwise_log10
:
3188 emitUnaryBuiltin(*this, E
, llvm::Intrinsic::log10
, "elt.log10"));
3189 case Builtin::BI__builtin_elementwise_cos
:
3191 emitUnaryBuiltin(*this, E
, llvm::Intrinsic::cos
, "elt.cos"));
3192 case Builtin::BI__builtin_elementwise_floor
:
3194 emitUnaryBuiltin(*this, E
, llvm::Intrinsic::floor
, "elt.floor"));
3195 case Builtin::BI__builtin_elementwise_roundeven
:
3196 return RValue::get(emitUnaryBuiltin(*this, E
, llvm::Intrinsic::roundeven
,
3198 case Builtin::BI__builtin_elementwise_sin
:
3200 emitUnaryBuiltin(*this, E
, llvm::Intrinsic::sin
, "elt.sin"));
3202 case Builtin::BI__builtin_elementwise_trunc
:
3204 emitUnaryBuiltin(*this, E
, llvm::Intrinsic::trunc
, "elt.trunc"));
3205 case Builtin::BI__builtin_elementwise_canonicalize
:
3207 emitUnaryBuiltin(*this, E
, llvm::Intrinsic::canonicalize
, "elt.trunc"));
3208 case Builtin::BI__builtin_elementwise_copysign
:
3209 return RValue::get(emitBinaryBuiltin(*this, E
, llvm::Intrinsic::copysign
));
3210 case Builtin::BI__builtin_elementwise_fma
:
3211 return RValue::get(emitTernaryBuiltin(*this, E
, llvm::Intrinsic::fma
));
3212 case Builtin::BI__builtin_elementwise_add_sat
:
3213 case Builtin::BI__builtin_elementwise_sub_sat
: {
3214 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
3215 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
3217 assert(Op0
->getType()->isIntOrIntVectorTy() && "integer type expected");
3218 QualType Ty
= E
->getArg(0)->getType();
3219 if (auto *VecTy
= Ty
->getAs
<VectorType
>())
3220 Ty
= VecTy
->getElementType();
3221 bool IsSigned
= Ty
->isSignedIntegerType();
3223 if (BuiltinIDIfNoAsmLabel
== Builtin::BI__builtin_elementwise_add_sat
)
3224 Opc
= IsSigned
? llvm::Intrinsic::sadd_sat
: llvm::Intrinsic::uadd_sat
;
3226 Opc
= IsSigned
? llvm::Intrinsic::ssub_sat
: llvm::Intrinsic::usub_sat
;
3227 Result
= Builder
.CreateBinaryIntrinsic(Opc
, Op0
, Op1
, nullptr, "elt.sat");
3228 return RValue::get(Result
);
3231 case Builtin::BI__builtin_elementwise_max
: {
3232 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
3233 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
3235 if (Op0
->getType()->isIntOrIntVectorTy()) {
3236 QualType Ty
= E
->getArg(0)->getType();
3237 if (auto *VecTy
= Ty
->getAs
<VectorType
>())
3238 Ty
= VecTy
->getElementType();
3239 Result
= Builder
.CreateBinaryIntrinsic(Ty
->isSignedIntegerType()
3240 ? llvm::Intrinsic::smax
3241 : llvm::Intrinsic::umax
,
3242 Op0
, Op1
, nullptr, "elt.max");
3244 Result
= Builder
.CreateMaxNum(Op0
, Op1
, "elt.max");
3245 return RValue::get(Result
);
3247 case Builtin::BI__builtin_elementwise_min
: {
3248 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
3249 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
3251 if (Op0
->getType()->isIntOrIntVectorTy()) {
3252 QualType Ty
= E
->getArg(0)->getType();
3253 if (auto *VecTy
= Ty
->getAs
<VectorType
>())
3254 Ty
= VecTy
->getElementType();
3255 Result
= Builder
.CreateBinaryIntrinsic(Ty
->isSignedIntegerType()
3256 ? llvm::Intrinsic::smin
3257 : llvm::Intrinsic::umin
,
3258 Op0
, Op1
, nullptr, "elt.min");
3260 Result
= Builder
.CreateMinNum(Op0
, Op1
, "elt.min");
3261 return RValue::get(Result
);
3264 case Builtin::BI__builtin_reduce_max
: {
3265 auto GetIntrinsicID
= [](QualType QT
) {
3266 if (auto *VecTy
= QT
->getAs
<VectorType
>())
3267 QT
= VecTy
->getElementType();
3268 if (QT
->isSignedIntegerType())
3269 return llvm::Intrinsic::vector_reduce_smax
;
3270 if (QT
->isUnsignedIntegerType())
3271 return llvm::Intrinsic::vector_reduce_umax
;
3272 assert(QT
->isFloatingType() && "must have a float here");
3273 return llvm::Intrinsic::vector_reduce_fmax
;
3275 return RValue::get(emitUnaryBuiltin(
3276 *this, E
, GetIntrinsicID(E
->getArg(0)->getType()), "rdx.min"));
3279 case Builtin::BI__builtin_reduce_min
: {
3280 auto GetIntrinsicID
= [](QualType QT
) {
3281 if (auto *VecTy
= QT
->getAs
<VectorType
>())
3282 QT
= VecTy
->getElementType();
3283 if (QT
->isSignedIntegerType())
3284 return llvm::Intrinsic::vector_reduce_smin
;
3285 if (QT
->isUnsignedIntegerType())
3286 return llvm::Intrinsic::vector_reduce_umin
;
3287 assert(QT
->isFloatingType() && "must have a float here");
3288 return llvm::Intrinsic::vector_reduce_fmin
;
3291 return RValue::get(emitUnaryBuiltin(
3292 *this, E
, GetIntrinsicID(E
->getArg(0)->getType()), "rdx.min"));
3295 case Builtin::BI__builtin_reduce_add
:
3296 return RValue::get(emitUnaryBuiltin(
3297 *this, E
, llvm::Intrinsic::vector_reduce_add
, "rdx.add"));
3298 case Builtin::BI__builtin_reduce_mul
:
3299 return RValue::get(emitUnaryBuiltin(
3300 *this, E
, llvm::Intrinsic::vector_reduce_mul
, "rdx.mul"));
3301 case Builtin::BI__builtin_reduce_xor
:
3302 return RValue::get(emitUnaryBuiltin(
3303 *this, E
, llvm::Intrinsic::vector_reduce_xor
, "rdx.xor"));
3304 case Builtin::BI__builtin_reduce_or
:
3305 return RValue::get(emitUnaryBuiltin(
3306 *this, E
, llvm::Intrinsic::vector_reduce_or
, "rdx.or"));
3307 case Builtin::BI__builtin_reduce_and
:
3308 return RValue::get(emitUnaryBuiltin(
3309 *this, E
, llvm::Intrinsic::vector_reduce_and
, "rdx.and"));
3311 case Builtin::BI__builtin_matrix_transpose
: {
3312 auto *MatrixTy
= E
->getArg(0)->getType()->castAs
<ConstantMatrixType
>();
3313 Value
*MatValue
= EmitScalarExpr(E
->getArg(0));
3314 MatrixBuilder
MB(Builder
);
3315 Value
*Result
= MB
.CreateMatrixTranspose(MatValue
, MatrixTy
->getNumRows(),
3316 MatrixTy
->getNumColumns());
3317 return RValue::get(Result
);
3320 case Builtin::BI__builtin_matrix_column_major_load
: {
3321 MatrixBuilder
MB(Builder
);
3322 // Emit everything that isn't dependent on the first parameter type
3323 Value
*Stride
= EmitScalarExpr(E
->getArg(3));
3324 const auto *ResultTy
= E
->getType()->getAs
<ConstantMatrixType
>();
3325 auto *PtrTy
= E
->getArg(0)->getType()->getAs
<PointerType
>();
3326 assert(PtrTy
&& "arg0 must be of pointer type");
3327 bool IsVolatile
= PtrTy
->getPointeeType().isVolatileQualified();
3329 Address Src
= EmitPointerWithAlignment(E
->getArg(0));
3330 EmitNonNullArgCheck(RValue::get(Src
.getPointer()), E
->getArg(0)->getType(),
3331 E
->getArg(0)->getExprLoc(), FD
, 0);
3332 Value
*Result
= MB
.CreateColumnMajorLoad(
3333 Src
.getElementType(), Src
.getPointer(),
3334 Align(Src
.getAlignment().getQuantity()), Stride
, IsVolatile
,
3335 ResultTy
->getNumRows(), ResultTy
->getNumColumns(),
3337 return RValue::get(Result
);
3340 case Builtin::BI__builtin_matrix_column_major_store
: {
3341 MatrixBuilder
MB(Builder
);
3342 Value
*Matrix
= EmitScalarExpr(E
->getArg(0));
3343 Address Dst
= EmitPointerWithAlignment(E
->getArg(1));
3344 Value
*Stride
= EmitScalarExpr(E
->getArg(2));
3346 const auto *MatrixTy
= E
->getArg(0)->getType()->getAs
<ConstantMatrixType
>();
3347 auto *PtrTy
= E
->getArg(1)->getType()->getAs
<PointerType
>();
3348 assert(PtrTy
&& "arg1 must be of pointer type");
3349 bool IsVolatile
= PtrTy
->getPointeeType().isVolatileQualified();
3351 EmitNonNullArgCheck(RValue::get(Dst
.getPointer()), E
->getArg(1)->getType(),
3352 E
->getArg(1)->getExprLoc(), FD
, 0);
3353 Value
*Result
= MB
.CreateColumnMajorStore(
3354 Matrix
, Dst
.getPointer(), Align(Dst
.getAlignment().getQuantity()),
3355 Stride
, IsVolatile
, MatrixTy
->getNumRows(), MatrixTy
->getNumColumns());
3356 return RValue::get(Result
);
3359 case Builtin::BIfinite
:
3360 case Builtin::BI__finite
:
3361 case Builtin::BIfinitef
:
3362 case Builtin::BI__finitef
:
3363 case Builtin::BIfinitel
:
3364 case Builtin::BI__finitel
:
3365 case Builtin::BI__builtin_isinf
:
3366 case Builtin::BI__builtin_isfinite
: {
3367 // isinf(x) --> fabs(x) == infinity
3368 // isfinite(x) --> fabs(x) != infinity
3369 // x != NaN via the ordered compare in either case.
3370 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
3371 Value
*V
= EmitScalarExpr(E
->getArg(0));
3372 llvm::Type
*Ty
= V
->getType();
3373 if (!Builder
.getIsFPConstrained() ||
3374 Builder
.getDefaultConstrainedExcept() == fp::ebIgnore
||
3376 Value
*Fabs
= EmitFAbs(*this, V
);
3377 Constant
*Infinity
= ConstantFP::getInfinity(V
->getType());
3378 CmpInst::Predicate Pred
= (BuiltinID
== Builtin::BI__builtin_isinf
)
3380 : CmpInst::FCMP_ONE
;
3381 Value
*FCmp
= Builder
.CreateFCmp(Pred
, Fabs
, Infinity
, "cmpinf");
3382 return RValue::get(Builder
.CreateZExt(FCmp
, ConvertType(E
->getType())));
3385 if (Value
*Result
= getTargetHooks().testFPKind(V
, BuiltinID
, Builder
, CGM
))
3386 return RValue::get(Result
);
3388 // Inf values have all exp bits set and a zero significand. Therefore:
3389 // isinf(V) == ((V << 1) == ((exp mask) << 1))
3390 // isfinite(V) == ((V << 1) < ((exp mask) << 1)) using unsigned comparison
3391 unsigned bitsize
= Ty
->getScalarSizeInBits();
3392 llvm::IntegerType
*IntTy
= Builder
.getIntNTy(bitsize
);
3393 Value
*IntV
= Builder
.CreateBitCast(V
, IntTy
);
3394 Value
*Shl1
= Builder
.CreateShl(IntV
, 1);
3395 const llvm::fltSemantics
&Semantics
= Ty
->getFltSemantics();
3396 APInt ExpMask
= APFloat::getInf(Semantics
).bitcastToAPInt();
3397 Value
*ExpMaskShl1
= llvm::ConstantInt::get(IntTy
, ExpMask
.shl(1));
3398 if (BuiltinID
== Builtin::BI__builtin_isinf
)
3399 V
= Builder
.CreateICmpEQ(Shl1
, ExpMaskShl1
);
3401 V
= Builder
.CreateICmpULT(Shl1
, ExpMaskShl1
);
3402 return RValue::get(Builder
.CreateZExt(V
, ConvertType(E
->getType())));
3405 case Builtin::BI__builtin_isinf_sign
: {
3406 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
3407 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
3408 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
3409 Value
*Arg
= EmitScalarExpr(E
->getArg(0));
3410 Value
*AbsArg
= EmitFAbs(*this, Arg
);
3411 Value
*IsInf
= Builder
.CreateFCmpOEQ(
3412 AbsArg
, ConstantFP::getInfinity(Arg
->getType()), "isinf");
3413 Value
*IsNeg
= EmitSignBit(*this, Arg
);
3415 llvm::Type
*IntTy
= ConvertType(E
->getType());
3416 Value
*Zero
= Constant::getNullValue(IntTy
);
3417 Value
*One
= ConstantInt::get(IntTy
, 1);
3418 Value
*NegativeOne
= ConstantInt::get(IntTy
, -1);
3419 Value
*SignResult
= Builder
.CreateSelect(IsNeg
, NegativeOne
, One
);
3420 Value
*Result
= Builder
.CreateSelect(IsInf
, SignResult
, Zero
);
3421 return RValue::get(Result
);
3424 case Builtin::BI__builtin_isnormal
: {
3425 // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
3426 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
3427 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
3428 Value
*V
= EmitScalarExpr(E
->getArg(0));
3429 Value
*Eq
= Builder
.CreateFCmpOEQ(V
, V
, "iseq");
3431 Value
*Abs
= EmitFAbs(*this, V
);
3432 Value
*IsLessThanInf
=
3433 Builder
.CreateFCmpULT(Abs
, ConstantFP::getInfinity(V
->getType()),"isinf");
3434 APFloat Smallest
= APFloat::getSmallestNormalized(
3435 getContext().getFloatTypeSemantics(E
->getArg(0)->getType()));
3437 Builder
.CreateFCmpUGE(Abs
, ConstantFP::get(V
->getContext(), Smallest
),
3439 V
= Builder
.CreateAnd(Eq
, IsLessThanInf
, "and");
3440 V
= Builder
.CreateAnd(V
, IsNormal
, "and");
3441 return RValue::get(Builder
.CreateZExt(V
, ConvertType(E
->getType())));
3444 case Builtin::BI__builtin_flt_rounds
: {
3445 Function
*F
= CGM
.getIntrinsic(Intrinsic::get_rounding
);
3447 llvm::Type
*ResultType
= ConvertType(E
->getType());
3448 Value
*Result
= Builder
.CreateCall(F
);
3449 if (Result
->getType() != ResultType
)
3450 Result
= Builder
.CreateIntCast(Result
, ResultType
, /*isSigned*/true,
3452 return RValue::get(Result
);
3455 case Builtin::BI__builtin_set_flt_rounds
: {
3456 Function
*F
= CGM
.getIntrinsic(Intrinsic::set_rounding
);
3458 Value
*V
= EmitScalarExpr(E
->getArg(0));
3459 Builder
.CreateCall(F
, V
);
3460 return RValue::get(nullptr);
3463 case Builtin::BI__builtin_fpclassify
: {
3464 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
3465 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
3466 Value
*V
= EmitScalarExpr(E
->getArg(5));
3467 llvm::Type
*Ty
= ConvertType(E
->getArg(5)->getType());
3470 BasicBlock
*Begin
= Builder
.GetInsertBlock();
3471 BasicBlock
*End
= createBasicBlock("fpclassify_end", this->CurFn
);
3472 Builder
.SetInsertPoint(End
);
3474 Builder
.CreatePHI(ConvertType(E
->getArg(0)->getType()), 4,
3475 "fpclassify_result");
3477 // if (V==0) return FP_ZERO
3478 Builder
.SetInsertPoint(Begin
);
3479 Value
*IsZero
= Builder
.CreateFCmpOEQ(V
, Constant::getNullValue(Ty
),
3481 Value
*ZeroLiteral
= EmitScalarExpr(E
->getArg(4));
3482 BasicBlock
*NotZero
= createBasicBlock("fpclassify_not_zero", this->CurFn
);
3483 Builder
.CreateCondBr(IsZero
, End
, NotZero
);
3484 Result
->addIncoming(ZeroLiteral
, Begin
);
3486 // if (V != V) return FP_NAN
3487 Builder
.SetInsertPoint(NotZero
);
3488 Value
*IsNan
= Builder
.CreateFCmpUNO(V
, V
, "cmp");
3489 Value
*NanLiteral
= EmitScalarExpr(E
->getArg(0));
3490 BasicBlock
*NotNan
= createBasicBlock("fpclassify_not_nan", this->CurFn
);
3491 Builder
.CreateCondBr(IsNan
, End
, NotNan
);
3492 Result
->addIncoming(NanLiteral
, NotZero
);
3494 // if (fabs(V) == infinity) return FP_INFINITY
3495 Builder
.SetInsertPoint(NotNan
);
3496 Value
*VAbs
= EmitFAbs(*this, V
);
3498 Builder
.CreateFCmpOEQ(VAbs
, ConstantFP::getInfinity(V
->getType()),
3500 Value
*InfLiteral
= EmitScalarExpr(E
->getArg(1));
3501 BasicBlock
*NotInf
= createBasicBlock("fpclassify_not_inf", this->CurFn
);
3502 Builder
.CreateCondBr(IsInf
, End
, NotInf
);
3503 Result
->addIncoming(InfLiteral
, NotNan
);
3505 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
3506 Builder
.SetInsertPoint(NotInf
);
3507 APFloat Smallest
= APFloat::getSmallestNormalized(
3508 getContext().getFloatTypeSemantics(E
->getArg(5)->getType()));
3510 Builder
.CreateFCmpUGE(VAbs
, ConstantFP::get(V
->getContext(), Smallest
),
3512 Value
*NormalResult
=
3513 Builder
.CreateSelect(IsNormal
, EmitScalarExpr(E
->getArg(2)),
3514 EmitScalarExpr(E
->getArg(3)));
3515 Builder
.CreateBr(End
);
3516 Result
->addIncoming(NormalResult
, NotInf
);
3519 Builder
.SetInsertPoint(End
);
3520 return RValue::get(Result
);
3523 case Builtin::BIalloca
:
3524 case Builtin::BI_alloca
:
3525 case Builtin::BI__builtin_alloca_uninitialized
:
3526 case Builtin::BI__builtin_alloca
: {
3527 Value
*Size
= EmitScalarExpr(E
->getArg(0));
3528 const TargetInfo
&TI
= getContext().getTargetInfo();
3529 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
3530 const Align SuitableAlignmentInBytes
=
3532 .toCharUnitsFromBits(TI
.getSuitableAlign())
3534 AllocaInst
*AI
= Builder
.CreateAlloca(Builder
.getInt8Ty(), Size
);
3535 AI
->setAlignment(SuitableAlignmentInBytes
);
3536 if (BuiltinID
!= Builtin::BI__builtin_alloca_uninitialized
)
3537 initializeAlloca(*this, AI
, Size
, SuitableAlignmentInBytes
);
3538 return RValue::get(AI
);
3541 case Builtin::BI__builtin_alloca_with_align_uninitialized
:
3542 case Builtin::BI__builtin_alloca_with_align
: {
3543 Value
*Size
= EmitScalarExpr(E
->getArg(0));
3544 Value
*AlignmentInBitsValue
= EmitScalarExpr(E
->getArg(1));
3545 auto *AlignmentInBitsCI
= cast
<ConstantInt
>(AlignmentInBitsValue
);
3546 unsigned AlignmentInBits
= AlignmentInBitsCI
->getZExtValue();
3547 const Align AlignmentInBytes
=
3548 CGM
.getContext().toCharUnitsFromBits(AlignmentInBits
).getAsAlign();
3549 AllocaInst
*AI
= Builder
.CreateAlloca(Builder
.getInt8Ty(), Size
);
3550 AI
->setAlignment(AlignmentInBytes
);
3551 if (BuiltinID
!= Builtin::BI__builtin_alloca_with_align_uninitialized
)
3552 initializeAlloca(*this, AI
, Size
, AlignmentInBytes
);
3553 return RValue::get(AI
);
3556 case Builtin::BIbzero
:
3557 case Builtin::BI__builtin_bzero
: {
3558 Address Dest
= EmitPointerWithAlignment(E
->getArg(0));
3559 Value
*SizeVal
= EmitScalarExpr(E
->getArg(1));
3560 EmitNonNullArgCheck(RValue::get(Dest
.getPointer()), E
->getArg(0)->getType(),
3561 E
->getArg(0)->getExprLoc(), FD
, 0);
3562 Builder
.CreateMemSet(Dest
, Builder
.getInt8(0), SizeVal
, false);
3563 return RValue::get(nullptr);
3565 case Builtin::BImemcpy
:
3566 case Builtin::BI__builtin_memcpy
:
3567 case Builtin::BImempcpy
:
3568 case Builtin::BI__builtin_mempcpy
: {
3569 Address Dest
= EmitPointerWithAlignment(E
->getArg(0));
3570 Address Src
= EmitPointerWithAlignment(E
->getArg(1));
3571 Value
*SizeVal
= EmitScalarExpr(E
->getArg(2));
3572 EmitNonNullArgCheck(RValue::get(Dest
.getPointer()), E
->getArg(0)->getType(),
3573 E
->getArg(0)->getExprLoc(), FD
, 0);
3574 EmitNonNullArgCheck(RValue::get(Src
.getPointer()), E
->getArg(1)->getType(),
3575 E
->getArg(1)->getExprLoc(), FD
, 1);
3576 Builder
.CreateMemCpy(Dest
, Src
, SizeVal
, false);
3577 if (BuiltinID
== Builtin::BImempcpy
||
3578 BuiltinID
== Builtin::BI__builtin_mempcpy
)
3579 return RValue::get(Builder
.CreateInBoundsGEP(Dest
.getElementType(),
3580 Dest
.getPointer(), SizeVal
));
3582 return RValue::get(Dest
.getPointer());
3585 case Builtin::BI__builtin_memcpy_inline
: {
3586 Address Dest
= EmitPointerWithAlignment(E
->getArg(0));
3587 Address Src
= EmitPointerWithAlignment(E
->getArg(1));
3589 E
->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
3590 EmitNonNullArgCheck(RValue::get(Dest
.getPointer()), E
->getArg(0)->getType(),
3591 E
->getArg(0)->getExprLoc(), FD
, 0);
3592 EmitNonNullArgCheck(RValue::get(Src
.getPointer()), E
->getArg(1)->getType(),
3593 E
->getArg(1)->getExprLoc(), FD
, 1);
3594 Builder
.CreateMemCpyInline(Dest
, Src
, Size
);
3595 return RValue::get(nullptr);
3598 case Builtin::BI__builtin_char_memchr
:
3599 BuiltinID
= Builtin::BI__builtin_memchr
;
3602 case Builtin::BI__builtin___memcpy_chk
: {
3603 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
3604 Expr::EvalResult SizeResult
, DstSizeResult
;
3605 if (!E
->getArg(2)->EvaluateAsInt(SizeResult
, CGM
.getContext()) ||
3606 !E
->getArg(3)->EvaluateAsInt(DstSizeResult
, CGM
.getContext()))
3608 llvm::APSInt Size
= SizeResult
.Val
.getInt();
3609 llvm::APSInt DstSize
= DstSizeResult
.Val
.getInt();
3610 if (Size
.ugt(DstSize
))
3612 Address Dest
= EmitPointerWithAlignment(E
->getArg(0));
3613 Address Src
= EmitPointerWithAlignment(E
->getArg(1));
3614 Value
*SizeVal
= llvm::ConstantInt::get(Builder
.getContext(), Size
);
3615 Builder
.CreateMemCpy(Dest
, Src
, SizeVal
, false);
3616 return RValue::get(Dest
.getPointer());
3619 case Builtin::BI__builtin_objc_memmove_collectable
: {
3620 Address DestAddr
= EmitPointerWithAlignment(E
->getArg(0));
3621 Address SrcAddr
= EmitPointerWithAlignment(E
->getArg(1));
3622 Value
*SizeVal
= EmitScalarExpr(E
->getArg(2));
3623 CGM
.getObjCRuntime().EmitGCMemmoveCollectable(*this,
3624 DestAddr
, SrcAddr
, SizeVal
);
3625 return RValue::get(DestAddr
.getPointer());
3628 case Builtin::BI__builtin___memmove_chk
: {
3629 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
3630 Expr::EvalResult SizeResult
, DstSizeResult
;
3631 if (!E
->getArg(2)->EvaluateAsInt(SizeResult
, CGM
.getContext()) ||
3632 !E
->getArg(3)->EvaluateAsInt(DstSizeResult
, CGM
.getContext()))
3634 llvm::APSInt Size
= SizeResult
.Val
.getInt();
3635 llvm::APSInt DstSize
= DstSizeResult
.Val
.getInt();
3636 if (Size
.ugt(DstSize
))
3638 Address Dest
= EmitPointerWithAlignment(E
->getArg(0));
3639 Address Src
= EmitPointerWithAlignment(E
->getArg(1));
3640 Value
*SizeVal
= llvm::ConstantInt::get(Builder
.getContext(), Size
);
3641 Builder
.CreateMemMove(Dest
, Src
, SizeVal
, false);
3642 return RValue::get(Dest
.getPointer());
3645 case Builtin::BImemmove
:
3646 case Builtin::BI__builtin_memmove
: {
3647 Address Dest
= EmitPointerWithAlignment(E
->getArg(0));
3648 Address Src
= EmitPointerWithAlignment(E
->getArg(1));
3649 Value
*SizeVal
= EmitScalarExpr(E
->getArg(2));
3650 EmitNonNullArgCheck(RValue::get(Dest
.getPointer()), E
->getArg(0)->getType(),
3651 E
->getArg(0)->getExprLoc(), FD
, 0);
3652 EmitNonNullArgCheck(RValue::get(Src
.getPointer()), E
->getArg(1)->getType(),
3653 E
->getArg(1)->getExprLoc(), FD
, 1);
3654 Builder
.CreateMemMove(Dest
, Src
, SizeVal
, false);
3655 return RValue::get(Dest
.getPointer());
3657 case Builtin::BImemset
:
3658 case Builtin::BI__builtin_memset
: {
3659 Address Dest
= EmitPointerWithAlignment(E
->getArg(0));
3660 Value
*ByteVal
= Builder
.CreateTrunc(EmitScalarExpr(E
->getArg(1)),
3661 Builder
.getInt8Ty());
3662 Value
*SizeVal
= EmitScalarExpr(E
->getArg(2));
3663 EmitNonNullArgCheck(RValue::get(Dest
.getPointer()), E
->getArg(0)->getType(),
3664 E
->getArg(0)->getExprLoc(), FD
, 0);
3665 Builder
.CreateMemSet(Dest
, ByteVal
, SizeVal
, false);
3666 return RValue::get(Dest
.getPointer());
3668 case Builtin::BI__builtin_memset_inline
: {
3669 Address Dest
= EmitPointerWithAlignment(E
->getArg(0));
3671 Builder
.CreateTrunc(EmitScalarExpr(E
->getArg(1)), Builder
.getInt8Ty());
3673 E
->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
3674 EmitNonNullArgCheck(RValue::get(Dest
.getPointer()), E
->getArg(0)->getType(),
3675 E
->getArg(0)->getExprLoc(), FD
, 0);
3676 Builder
.CreateMemSetInline(Dest
, ByteVal
, Size
);
3677 return RValue::get(nullptr);
3679 case Builtin::BI__builtin___memset_chk
: {
3680 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
3681 Expr::EvalResult SizeResult
, DstSizeResult
;
3682 if (!E
->getArg(2)->EvaluateAsInt(SizeResult
, CGM
.getContext()) ||
3683 !E
->getArg(3)->EvaluateAsInt(DstSizeResult
, CGM
.getContext()))
3685 llvm::APSInt Size
= SizeResult
.Val
.getInt();
3686 llvm::APSInt DstSize
= DstSizeResult
.Val
.getInt();
3687 if (Size
.ugt(DstSize
))
3689 Address Dest
= EmitPointerWithAlignment(E
->getArg(0));
3690 Value
*ByteVal
= Builder
.CreateTrunc(EmitScalarExpr(E
->getArg(1)),
3691 Builder
.getInt8Ty());
3692 Value
*SizeVal
= llvm::ConstantInt::get(Builder
.getContext(), Size
);
3693 Builder
.CreateMemSet(Dest
, ByteVal
, SizeVal
, false);
3694 return RValue::get(Dest
.getPointer());
3696 case Builtin::BI__builtin_wmemchr
: {
3697 // The MSVC runtime library does not provide a definition of wmemchr, so we
3698 // need an inline implementation.
3699 if (!getTarget().getTriple().isOSMSVCRT())
3702 llvm::Type
*WCharTy
= ConvertType(getContext().WCharTy
);
3703 Value
*Str
= EmitScalarExpr(E
->getArg(0));
3704 Value
*Chr
= EmitScalarExpr(E
->getArg(1));
3705 Value
*Size
= EmitScalarExpr(E
->getArg(2));
3707 BasicBlock
*Entry
= Builder
.GetInsertBlock();
3708 BasicBlock
*CmpEq
= createBasicBlock("wmemchr.eq");
3709 BasicBlock
*Next
= createBasicBlock("wmemchr.next");
3710 BasicBlock
*Exit
= createBasicBlock("wmemchr.exit");
3711 Value
*SizeEq0
= Builder
.CreateICmpEQ(Size
, ConstantInt::get(SizeTy
, 0));
3712 Builder
.CreateCondBr(SizeEq0
, Exit
, CmpEq
);
3715 PHINode
*StrPhi
= Builder
.CreatePHI(Str
->getType(), 2);
3716 StrPhi
->addIncoming(Str
, Entry
);
3717 PHINode
*SizePhi
= Builder
.CreatePHI(SizeTy
, 2);
3718 SizePhi
->addIncoming(Size
, Entry
);
3719 CharUnits WCharAlign
=
3720 getContext().getTypeAlignInChars(getContext().WCharTy
);
3721 Value
*StrCh
= Builder
.CreateAlignedLoad(WCharTy
, StrPhi
, WCharAlign
);
3722 Value
*FoundChr
= Builder
.CreateConstInBoundsGEP1_32(WCharTy
, StrPhi
, 0);
3723 Value
*StrEqChr
= Builder
.CreateICmpEQ(StrCh
, Chr
);
3724 Builder
.CreateCondBr(StrEqChr
, Exit
, Next
);
3727 Value
*NextStr
= Builder
.CreateConstInBoundsGEP1_32(WCharTy
, StrPhi
, 1);
3728 Value
*NextSize
= Builder
.CreateSub(SizePhi
, ConstantInt::get(SizeTy
, 1));
3729 Value
*NextSizeEq0
=
3730 Builder
.CreateICmpEQ(NextSize
, ConstantInt::get(SizeTy
, 0));
3731 Builder
.CreateCondBr(NextSizeEq0
, Exit
, CmpEq
);
3732 StrPhi
->addIncoming(NextStr
, Next
);
3733 SizePhi
->addIncoming(NextSize
, Next
);
3736 PHINode
*Ret
= Builder
.CreatePHI(Str
->getType(), 3);
3737 Ret
->addIncoming(llvm::Constant::getNullValue(Str
->getType()), Entry
);
3738 Ret
->addIncoming(llvm::Constant::getNullValue(Str
->getType()), Next
);
3739 Ret
->addIncoming(FoundChr
, CmpEq
);
3740 return RValue::get(Ret
);
3742 case Builtin::BI__builtin_wmemcmp
: {
3743 // The MSVC runtime library does not provide a definition of wmemcmp, so we
3744 // need an inline implementation.
3745 if (!getTarget().getTriple().isOSMSVCRT())
3748 llvm::Type
*WCharTy
= ConvertType(getContext().WCharTy
);
3750 Value
*Dst
= EmitScalarExpr(E
->getArg(0));
3751 Value
*Src
= EmitScalarExpr(E
->getArg(1));
3752 Value
*Size
= EmitScalarExpr(E
->getArg(2));
3754 BasicBlock
*Entry
= Builder
.GetInsertBlock();
3755 BasicBlock
*CmpGT
= createBasicBlock("wmemcmp.gt");
3756 BasicBlock
*CmpLT
= createBasicBlock("wmemcmp.lt");
3757 BasicBlock
*Next
= createBasicBlock("wmemcmp.next");
3758 BasicBlock
*Exit
= createBasicBlock("wmemcmp.exit");
3759 Value
*SizeEq0
= Builder
.CreateICmpEQ(Size
, ConstantInt::get(SizeTy
, 0));
3760 Builder
.CreateCondBr(SizeEq0
, Exit
, CmpGT
);
3763 PHINode
*DstPhi
= Builder
.CreatePHI(Dst
->getType(), 2);
3764 DstPhi
->addIncoming(Dst
, Entry
);
3765 PHINode
*SrcPhi
= Builder
.CreatePHI(Src
->getType(), 2);
3766 SrcPhi
->addIncoming(Src
, Entry
);
3767 PHINode
*SizePhi
= Builder
.CreatePHI(SizeTy
, 2);
3768 SizePhi
->addIncoming(Size
, Entry
);
3769 CharUnits WCharAlign
=
3770 getContext().getTypeAlignInChars(getContext().WCharTy
);
3771 Value
*DstCh
= Builder
.CreateAlignedLoad(WCharTy
, DstPhi
, WCharAlign
);
3772 Value
*SrcCh
= Builder
.CreateAlignedLoad(WCharTy
, SrcPhi
, WCharAlign
);
3773 Value
*DstGtSrc
= Builder
.CreateICmpUGT(DstCh
, SrcCh
);
3774 Builder
.CreateCondBr(DstGtSrc
, Exit
, CmpLT
);
3777 Value
*DstLtSrc
= Builder
.CreateICmpULT(DstCh
, SrcCh
);
3778 Builder
.CreateCondBr(DstLtSrc
, Exit
, Next
);
3781 Value
*NextDst
= Builder
.CreateConstInBoundsGEP1_32(WCharTy
, DstPhi
, 1);
3782 Value
*NextSrc
= Builder
.CreateConstInBoundsGEP1_32(WCharTy
, SrcPhi
, 1);
3783 Value
*NextSize
= Builder
.CreateSub(SizePhi
, ConstantInt::get(SizeTy
, 1));
3784 Value
*NextSizeEq0
=
3785 Builder
.CreateICmpEQ(NextSize
, ConstantInt::get(SizeTy
, 0));
3786 Builder
.CreateCondBr(NextSizeEq0
, Exit
, CmpGT
);
3787 DstPhi
->addIncoming(NextDst
, Next
);
3788 SrcPhi
->addIncoming(NextSrc
, Next
);
3789 SizePhi
->addIncoming(NextSize
, Next
);
3792 PHINode
*Ret
= Builder
.CreatePHI(IntTy
, 4);
3793 Ret
->addIncoming(ConstantInt::get(IntTy
, 0), Entry
);
3794 Ret
->addIncoming(ConstantInt::get(IntTy
, 1), CmpGT
);
3795 Ret
->addIncoming(ConstantInt::get(IntTy
, -1), CmpLT
);
3796 Ret
->addIncoming(ConstantInt::get(IntTy
, 0), Next
);
3797 return RValue::get(Ret
);
3799 case Builtin::BI__builtin_dwarf_cfa
: {
3800 // The offset in bytes from the first argument to the CFA.
3802 // Why on earth is this in the frontend? Is there any reason at
3803 // all that the backend can't reasonably determine this while
3804 // lowering llvm.eh.dwarf.cfa()?
3806 // TODO: If there's a satisfactory reason, add a target hook for
3807 // this instead of hard-coding 0, which is correct for most targets.
3810 Function
*F
= CGM
.getIntrinsic(Intrinsic::eh_dwarf_cfa
);
3811 return RValue::get(Builder
.CreateCall(F
,
3812 llvm::ConstantInt::get(Int32Ty
, Offset
)));
3814 case Builtin::BI__builtin_return_address
: {
3815 Value
*Depth
= ConstantEmitter(*this).emitAbstract(E
->getArg(0),
3816 getContext().UnsignedIntTy
);
3817 Function
*F
= CGM
.getIntrinsic(Intrinsic::returnaddress
);
3818 return RValue::get(Builder
.CreateCall(F
, Depth
));
3820 case Builtin::BI_ReturnAddress
: {
3821 Function
*F
= CGM
.getIntrinsic(Intrinsic::returnaddress
);
3822 return RValue::get(Builder
.CreateCall(F
, Builder
.getInt32(0)));
3824 case Builtin::BI__builtin_frame_address
: {
3825 Value
*Depth
= ConstantEmitter(*this).emitAbstract(E
->getArg(0),
3826 getContext().UnsignedIntTy
);
3827 Function
*F
= CGM
.getIntrinsic(Intrinsic::frameaddress
, AllocaInt8PtrTy
);
3828 return RValue::get(Builder
.CreateCall(F
, Depth
));
3830 case Builtin::BI__builtin_extract_return_addr
: {
3831 Value
*Address
= EmitScalarExpr(E
->getArg(0));
3832 Value
*Result
= getTargetHooks().decodeReturnAddress(*this, Address
);
3833 return RValue::get(Result
);
3835 case Builtin::BI__builtin_frob_return_addr
: {
3836 Value
*Address
= EmitScalarExpr(E
->getArg(0));
3837 Value
*Result
= getTargetHooks().encodeReturnAddress(*this, Address
);
3838 return RValue::get(Result
);
3840 case Builtin::BI__builtin_dwarf_sp_column
: {
3841 llvm::IntegerType
*Ty
3842 = cast
<llvm::IntegerType
>(ConvertType(E
->getType()));
3843 int Column
= getTargetHooks().getDwarfEHStackPointer(CGM
);
3845 CGM
.ErrorUnsupported(E
, "__builtin_dwarf_sp_column");
3846 return RValue::get(llvm::UndefValue::get(Ty
));
3848 return RValue::get(llvm::ConstantInt::get(Ty
, Column
, true));
3850 case Builtin::BI__builtin_init_dwarf_reg_size_table
: {
3851 Value
*Address
= EmitScalarExpr(E
->getArg(0));
3852 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address
))
3853 CGM
.ErrorUnsupported(E
, "__builtin_init_dwarf_reg_size_table");
3854 return RValue::get(llvm::UndefValue::get(ConvertType(E
->getType())));
3856 case Builtin::BI__builtin_eh_return
: {
3857 Value
*Int
= EmitScalarExpr(E
->getArg(0));
3858 Value
*Ptr
= EmitScalarExpr(E
->getArg(1));
3860 llvm::IntegerType
*IntTy
= cast
<llvm::IntegerType
>(Int
->getType());
3861 assert((IntTy
->getBitWidth() == 32 || IntTy
->getBitWidth() == 64) &&
3862 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
3864 CGM
.getIntrinsic(IntTy
->getBitWidth() == 32 ? Intrinsic::eh_return_i32
3865 : Intrinsic::eh_return_i64
);
3866 Builder
.CreateCall(F
, {Int
, Ptr
});
3867 Builder
.CreateUnreachable();
3869 // We do need to preserve an insertion point.
3870 EmitBlock(createBasicBlock("builtin_eh_return.cont"));
3872 return RValue::get(nullptr);
3874 case Builtin::BI__builtin_unwind_init
: {
3875 Function
*F
= CGM
.getIntrinsic(Intrinsic::eh_unwind_init
);
3876 Builder
.CreateCall(F
);
3877 return RValue::get(nullptr);
3879 case Builtin::BI__builtin_extend_pointer
: {
3880 // Extends a pointer to the size of an _Unwind_Word, which is
3881 // uint64_t on all platforms. Generally this gets poked into a
3882 // register and eventually used as an address, so if the
3883 // addressing registers are wider than pointers and the platform
3884 // doesn't implicitly ignore high-order bits when doing
3885 // addressing, we need to make sure we zext / sext based on
3886 // the platform's expectations.
3888 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
3890 // Cast the pointer to intptr_t.
3891 Value
*Ptr
= EmitScalarExpr(E
->getArg(0));
3892 Value
*Result
= Builder
.CreatePtrToInt(Ptr
, IntPtrTy
, "extend.cast");
3894 // If that's 64 bits, we're done.
3895 if (IntPtrTy
->getBitWidth() == 64)
3896 return RValue::get(Result
);
3898 // Otherwise, ask the codegen data what to do.
3899 if (getTargetHooks().extendPointerWithSExt())
3900 return RValue::get(Builder
.CreateSExt(Result
, Int64Ty
, "extend.sext"));
3902 return RValue::get(Builder
.CreateZExt(Result
, Int64Ty
, "extend.zext"));
3904 case Builtin::BI__builtin_setjmp
: {
3905 // Buffer is a void**.
3906 Address Buf
= EmitPointerWithAlignment(E
->getArg(0));
3908 // Store the frame pointer to the setjmp buffer.
3909 Value
*FrameAddr
= Builder
.CreateCall(
3910 CGM
.getIntrinsic(Intrinsic::frameaddress
, AllocaInt8PtrTy
),
3911 ConstantInt::get(Int32Ty
, 0));
3912 Builder
.CreateStore(FrameAddr
, Buf
);
3914 // Store the stack pointer to the setjmp buffer.
3916 Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::stacksave
));
3917 Address StackSaveSlot
= Builder
.CreateConstInBoundsGEP(Buf
, 2);
3918 Builder
.CreateStore(StackAddr
, StackSaveSlot
);
3920 // Call LLVM's EH setjmp, which is lightweight.
3921 Function
*F
= CGM
.getIntrinsic(Intrinsic::eh_sjlj_setjmp
);
3922 Buf
= Builder
.CreateElementBitCast(Buf
, Int8Ty
);
3923 return RValue::get(Builder
.CreateCall(F
, Buf
.getPointer()));
3925 case Builtin::BI__builtin_longjmp
: {
3926 Value
*Buf
= EmitScalarExpr(E
->getArg(0));
3927 Buf
= Builder
.CreateBitCast(Buf
, Int8PtrTy
);
3929 // Call LLVM's EH longjmp, which is lightweight.
3930 Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::eh_sjlj_longjmp
), Buf
);
3932 // longjmp doesn't return; mark this as unreachable.
3933 Builder
.CreateUnreachable();
3935 // We do need to preserve an insertion point.
3936 EmitBlock(createBasicBlock("longjmp.cont"));
3938 return RValue::get(nullptr);
3940 case Builtin::BI__builtin_launder
: {
3941 const Expr
*Arg
= E
->getArg(0);
3942 QualType ArgTy
= Arg
->getType()->getPointeeType();
3943 Value
*Ptr
= EmitScalarExpr(Arg
);
3944 if (TypeRequiresBuiltinLaunder(CGM
, ArgTy
))
3945 Ptr
= Builder
.CreateLaunderInvariantGroup(Ptr
);
3947 return RValue::get(Ptr
);
3949 case Builtin::BI__sync_fetch_and_add
:
3950 case Builtin::BI__sync_fetch_and_sub
:
3951 case Builtin::BI__sync_fetch_and_or
:
3952 case Builtin::BI__sync_fetch_and_and
:
3953 case Builtin::BI__sync_fetch_and_xor
:
3954 case Builtin::BI__sync_fetch_and_nand
:
3955 case Builtin::BI__sync_add_and_fetch
:
3956 case Builtin::BI__sync_sub_and_fetch
:
3957 case Builtin::BI__sync_and_and_fetch
:
3958 case Builtin::BI__sync_or_and_fetch
:
3959 case Builtin::BI__sync_xor_and_fetch
:
3960 case Builtin::BI__sync_nand_and_fetch
:
3961 case Builtin::BI__sync_val_compare_and_swap
:
3962 case Builtin::BI__sync_bool_compare_and_swap
:
3963 case Builtin::BI__sync_lock_test_and_set
:
3964 case Builtin::BI__sync_lock_release
:
3965 case Builtin::BI__sync_swap
:
3966 llvm_unreachable("Shouldn't make it through sema");
3967 case Builtin::BI__sync_fetch_and_add_1
:
3968 case Builtin::BI__sync_fetch_and_add_2
:
3969 case Builtin::BI__sync_fetch_and_add_4
:
3970 case Builtin::BI__sync_fetch_and_add_8
:
3971 case Builtin::BI__sync_fetch_and_add_16
:
3972 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add
, E
);
3973 case Builtin::BI__sync_fetch_and_sub_1
:
3974 case Builtin::BI__sync_fetch_and_sub_2
:
3975 case Builtin::BI__sync_fetch_and_sub_4
:
3976 case Builtin::BI__sync_fetch_and_sub_8
:
3977 case Builtin::BI__sync_fetch_and_sub_16
:
3978 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub
, E
);
3979 case Builtin::BI__sync_fetch_and_or_1
:
3980 case Builtin::BI__sync_fetch_and_or_2
:
3981 case Builtin::BI__sync_fetch_and_or_4
:
3982 case Builtin::BI__sync_fetch_and_or_8
:
3983 case Builtin::BI__sync_fetch_and_or_16
:
3984 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or
, E
);
3985 case Builtin::BI__sync_fetch_and_and_1
:
3986 case Builtin::BI__sync_fetch_and_and_2
:
3987 case Builtin::BI__sync_fetch_and_and_4
:
3988 case Builtin::BI__sync_fetch_and_and_8
:
3989 case Builtin::BI__sync_fetch_and_and_16
:
3990 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And
, E
);
3991 case Builtin::BI__sync_fetch_and_xor_1
:
3992 case Builtin::BI__sync_fetch_and_xor_2
:
3993 case Builtin::BI__sync_fetch_and_xor_4
:
3994 case Builtin::BI__sync_fetch_and_xor_8
:
3995 case Builtin::BI__sync_fetch_and_xor_16
:
3996 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor
, E
);
3997 case Builtin::BI__sync_fetch_and_nand_1
:
3998 case Builtin::BI__sync_fetch_and_nand_2
:
3999 case Builtin::BI__sync_fetch_and_nand_4
:
4000 case Builtin::BI__sync_fetch_and_nand_8
:
4001 case Builtin::BI__sync_fetch_and_nand_16
:
4002 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand
, E
);
4004 // Clang extensions: not overloaded yet.
4005 case Builtin::BI__sync_fetch_and_min
:
4006 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min
, E
);
4007 case Builtin::BI__sync_fetch_and_max
:
4008 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max
, E
);
4009 case Builtin::BI__sync_fetch_and_umin
:
4010 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin
, E
);
4011 case Builtin::BI__sync_fetch_and_umax
:
4012 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax
, E
);
4014 case Builtin::BI__sync_add_and_fetch_1
:
4015 case Builtin::BI__sync_add_and_fetch_2
:
4016 case Builtin::BI__sync_add_and_fetch_4
:
4017 case Builtin::BI__sync_add_and_fetch_8
:
4018 case Builtin::BI__sync_add_and_fetch_16
:
4019 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add
, E
,
4020 llvm::Instruction::Add
);
4021 case Builtin::BI__sync_sub_and_fetch_1
:
4022 case Builtin::BI__sync_sub_and_fetch_2
:
4023 case Builtin::BI__sync_sub_and_fetch_4
:
4024 case Builtin::BI__sync_sub_and_fetch_8
:
4025 case Builtin::BI__sync_sub_and_fetch_16
:
4026 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub
, E
,
4027 llvm::Instruction::Sub
);
4028 case Builtin::BI__sync_and_and_fetch_1
:
4029 case Builtin::BI__sync_and_and_fetch_2
:
4030 case Builtin::BI__sync_and_and_fetch_4
:
4031 case Builtin::BI__sync_and_and_fetch_8
:
4032 case Builtin::BI__sync_and_and_fetch_16
:
4033 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And
, E
,
4034 llvm::Instruction::And
);
4035 case Builtin::BI__sync_or_and_fetch_1
:
4036 case Builtin::BI__sync_or_and_fetch_2
:
4037 case Builtin::BI__sync_or_and_fetch_4
:
4038 case Builtin::BI__sync_or_and_fetch_8
:
4039 case Builtin::BI__sync_or_and_fetch_16
:
4040 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or
, E
,
4041 llvm::Instruction::Or
);
4042 case Builtin::BI__sync_xor_and_fetch_1
:
4043 case Builtin::BI__sync_xor_and_fetch_2
:
4044 case Builtin::BI__sync_xor_and_fetch_4
:
4045 case Builtin::BI__sync_xor_and_fetch_8
:
4046 case Builtin::BI__sync_xor_and_fetch_16
:
4047 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor
, E
,
4048 llvm::Instruction::Xor
);
4049 case Builtin::BI__sync_nand_and_fetch_1
:
4050 case Builtin::BI__sync_nand_and_fetch_2
:
4051 case Builtin::BI__sync_nand_and_fetch_4
:
4052 case Builtin::BI__sync_nand_and_fetch_8
:
4053 case Builtin::BI__sync_nand_and_fetch_16
:
4054 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand
, E
,
4055 llvm::Instruction::And
, true);
4057 case Builtin::BI__sync_val_compare_and_swap_1
:
4058 case Builtin::BI__sync_val_compare_and_swap_2
:
4059 case Builtin::BI__sync_val_compare_and_swap_4
:
4060 case Builtin::BI__sync_val_compare_and_swap_8
:
4061 case Builtin::BI__sync_val_compare_and_swap_16
:
4062 return RValue::get(MakeAtomicCmpXchgValue(*this, E
, false));
4064 case Builtin::BI__sync_bool_compare_and_swap_1
:
4065 case Builtin::BI__sync_bool_compare_and_swap_2
:
4066 case Builtin::BI__sync_bool_compare_and_swap_4
:
4067 case Builtin::BI__sync_bool_compare_and_swap_8
:
4068 case Builtin::BI__sync_bool_compare_and_swap_16
:
4069 return RValue::get(MakeAtomicCmpXchgValue(*this, E
, true));
4071 case Builtin::BI__sync_swap_1
:
4072 case Builtin::BI__sync_swap_2
:
4073 case Builtin::BI__sync_swap_4
:
4074 case Builtin::BI__sync_swap_8
:
4075 case Builtin::BI__sync_swap_16
:
4076 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg
, E
);
4078 case Builtin::BI__sync_lock_test_and_set_1
:
4079 case Builtin::BI__sync_lock_test_and_set_2
:
4080 case Builtin::BI__sync_lock_test_and_set_4
:
4081 case Builtin::BI__sync_lock_test_and_set_8
:
4082 case Builtin::BI__sync_lock_test_and_set_16
:
4083 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg
, E
);
4085 case Builtin::BI__sync_lock_release_1
:
4086 case Builtin::BI__sync_lock_release_2
:
4087 case Builtin::BI__sync_lock_release_4
:
4088 case Builtin::BI__sync_lock_release_8
:
4089 case Builtin::BI__sync_lock_release_16
: {
4090 Value
*Ptr
= CheckAtomicAlignment(*this, E
);
4091 QualType ElTy
= E
->getArg(0)->getType()->getPointeeType();
4092 CharUnits StoreSize
= getContext().getTypeSizeInChars(ElTy
);
4093 llvm::Type
*ITy
= llvm::IntegerType::get(getLLVMContext(),
4094 StoreSize
.getQuantity() * 8);
4095 Ptr
= Builder
.CreateBitCast(Ptr
, ITy
->getPointerTo());
4096 llvm::StoreInst
*Store
=
4097 Builder
.CreateAlignedStore(llvm::Constant::getNullValue(ITy
), Ptr
,
4099 Store
->setAtomic(llvm::AtomicOrdering::Release
);
4100 return RValue::get(nullptr);
4103 case Builtin::BI__sync_synchronize
: {
4104 // We assume this is supposed to correspond to a C++0x-style
4105 // sequentially-consistent fence (i.e. this is only usable for
4106 // synchronization, not device I/O or anything like that). This intrinsic
4107 // is really badly designed in the sense that in theory, there isn't
4108 // any way to safely use it... but in practice, it mostly works
4109 // to use it with non-atomic loads and stores to get acquire/release
4111 Builder
.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent
);
4112 return RValue::get(nullptr);
4115 case Builtin::BI__builtin_nontemporal_load
:
4116 return RValue::get(EmitNontemporalLoad(*this, E
));
4117 case Builtin::BI__builtin_nontemporal_store
:
4118 return RValue::get(EmitNontemporalStore(*this, E
));
4119 case Builtin::BI__c11_atomic_is_lock_free
:
4120 case Builtin::BI__atomic_is_lock_free
: {
4121 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
4122 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
4123 // _Atomic(T) is always properly-aligned.
4124 const char *LibCallName
= "__atomic_is_lock_free";
4126 Args
.add(RValue::get(EmitScalarExpr(E
->getArg(0))),
4127 getContext().getSizeType());
4128 if (BuiltinID
== Builtin::BI__atomic_is_lock_free
)
4129 Args
.add(RValue::get(EmitScalarExpr(E
->getArg(1))),
4130 getContext().VoidPtrTy
);
4132 Args
.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy
)),
4133 getContext().VoidPtrTy
);
4134 const CGFunctionInfo
&FuncInfo
=
4135 CGM
.getTypes().arrangeBuiltinFunctionCall(E
->getType(), Args
);
4136 llvm::FunctionType
*FTy
= CGM
.getTypes().GetFunctionType(FuncInfo
);
4137 llvm::FunctionCallee Func
= CGM
.CreateRuntimeFunction(FTy
, LibCallName
);
4138 return EmitCall(FuncInfo
, CGCallee::forDirect(Func
),
4139 ReturnValueSlot(), Args
);
4142 case Builtin::BI__atomic_test_and_set
: {
4143 // Look at the argument type to determine whether this is a volatile
4144 // operation. The parameter type is always volatile.
4145 QualType PtrTy
= E
->getArg(0)->IgnoreImpCasts()->getType();
4147 PtrTy
->castAs
<PointerType
>()->getPointeeType().isVolatileQualified();
4149 Value
*Ptr
= EmitScalarExpr(E
->getArg(0));
4150 unsigned AddrSpace
= Ptr
->getType()->getPointerAddressSpace();
4151 Ptr
= Builder
.CreateBitCast(Ptr
, Int8Ty
->getPointerTo(AddrSpace
));
4152 Value
*NewVal
= Builder
.getInt8(1);
4153 Value
*Order
= EmitScalarExpr(E
->getArg(1));
4154 if (isa
<llvm::ConstantInt
>(Order
)) {
4155 int ord
= cast
<llvm::ConstantInt
>(Order
)->getZExtValue();
4156 AtomicRMWInst
*Result
= nullptr;
4158 case 0: // memory_order_relaxed
4159 default: // invalid order
4160 Result
= Builder
.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg
, Ptr
, NewVal
,
4161 llvm::AtomicOrdering::Monotonic
);
4163 case 1: // memory_order_consume
4164 case 2: // memory_order_acquire
4165 Result
= Builder
.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg
, Ptr
, NewVal
,
4166 llvm::AtomicOrdering::Acquire
);
4168 case 3: // memory_order_release
4169 Result
= Builder
.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg
, Ptr
, NewVal
,
4170 llvm::AtomicOrdering::Release
);
4172 case 4: // memory_order_acq_rel
4174 Result
= Builder
.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg
, Ptr
, NewVal
,
4175 llvm::AtomicOrdering::AcquireRelease
);
4177 case 5: // memory_order_seq_cst
4178 Result
= Builder
.CreateAtomicRMW(
4179 llvm::AtomicRMWInst::Xchg
, Ptr
, NewVal
,
4180 llvm::AtomicOrdering::SequentiallyConsistent
);
4183 Result
->setVolatile(Volatile
);
4184 return RValue::get(Builder
.CreateIsNotNull(Result
, "tobool"));
4187 llvm::BasicBlock
*ContBB
= createBasicBlock("atomic.continue", CurFn
);
4189 llvm::BasicBlock
*BBs
[5] = {
4190 createBasicBlock("monotonic", CurFn
),
4191 createBasicBlock("acquire", CurFn
),
4192 createBasicBlock("release", CurFn
),
4193 createBasicBlock("acqrel", CurFn
),
4194 createBasicBlock("seqcst", CurFn
)
4196 llvm::AtomicOrdering Orders
[5] = {
4197 llvm::AtomicOrdering::Monotonic
, llvm::AtomicOrdering::Acquire
,
4198 llvm::AtomicOrdering::Release
, llvm::AtomicOrdering::AcquireRelease
,
4199 llvm::AtomicOrdering::SequentiallyConsistent
};
4201 Order
= Builder
.CreateIntCast(Order
, Builder
.getInt32Ty(), false);
4202 llvm::SwitchInst
*SI
= Builder
.CreateSwitch(Order
, BBs
[0]);
4204 Builder
.SetInsertPoint(ContBB
);
4205 PHINode
*Result
= Builder
.CreatePHI(Int8Ty
, 5, "was_set");
4207 for (unsigned i
= 0; i
< 5; ++i
) {
4208 Builder
.SetInsertPoint(BBs
[i
]);
4209 AtomicRMWInst
*RMW
= Builder
.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg
,
4210 Ptr
, NewVal
, Orders
[i
]);
4211 RMW
->setVolatile(Volatile
);
4212 Result
->addIncoming(RMW
, BBs
[i
]);
4213 Builder
.CreateBr(ContBB
);
4216 SI
->addCase(Builder
.getInt32(0), BBs
[0]);
4217 SI
->addCase(Builder
.getInt32(1), BBs
[1]);
4218 SI
->addCase(Builder
.getInt32(2), BBs
[1]);
4219 SI
->addCase(Builder
.getInt32(3), BBs
[2]);
4220 SI
->addCase(Builder
.getInt32(4), BBs
[3]);
4221 SI
->addCase(Builder
.getInt32(5), BBs
[4]);
4223 Builder
.SetInsertPoint(ContBB
);
4224 return RValue::get(Builder
.CreateIsNotNull(Result
, "tobool"));
4227 case Builtin::BI__atomic_clear
: {
4228 QualType PtrTy
= E
->getArg(0)->IgnoreImpCasts()->getType();
4230 PtrTy
->castAs
<PointerType
>()->getPointeeType().isVolatileQualified();
4232 Address Ptr
= EmitPointerWithAlignment(E
->getArg(0));
4233 Ptr
= Builder
.CreateElementBitCast(Ptr
, Int8Ty
);
4234 Value
*NewVal
= Builder
.getInt8(0);
4235 Value
*Order
= EmitScalarExpr(E
->getArg(1));
4236 if (isa
<llvm::ConstantInt
>(Order
)) {
4237 int ord
= cast
<llvm::ConstantInt
>(Order
)->getZExtValue();
4238 StoreInst
*Store
= Builder
.CreateStore(NewVal
, Ptr
, Volatile
);
4240 case 0: // memory_order_relaxed
4241 default: // invalid order
4242 Store
->setOrdering(llvm::AtomicOrdering::Monotonic
);
4244 case 3: // memory_order_release
4245 Store
->setOrdering(llvm::AtomicOrdering::Release
);
4247 case 5: // memory_order_seq_cst
4248 Store
->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent
);
4251 return RValue::get(nullptr);
4254 llvm::BasicBlock
*ContBB
= createBasicBlock("atomic.continue", CurFn
);
4256 llvm::BasicBlock
*BBs
[3] = {
4257 createBasicBlock("monotonic", CurFn
),
4258 createBasicBlock("release", CurFn
),
4259 createBasicBlock("seqcst", CurFn
)
4261 llvm::AtomicOrdering Orders
[3] = {
4262 llvm::AtomicOrdering::Monotonic
, llvm::AtomicOrdering::Release
,
4263 llvm::AtomicOrdering::SequentiallyConsistent
};
4265 Order
= Builder
.CreateIntCast(Order
, Builder
.getInt32Ty(), false);
4266 llvm::SwitchInst
*SI
= Builder
.CreateSwitch(Order
, BBs
[0]);
4268 for (unsigned i
= 0; i
< 3; ++i
) {
4269 Builder
.SetInsertPoint(BBs
[i
]);
4270 StoreInst
*Store
= Builder
.CreateStore(NewVal
, Ptr
, Volatile
);
4271 Store
->setOrdering(Orders
[i
]);
4272 Builder
.CreateBr(ContBB
);
4275 SI
->addCase(Builder
.getInt32(0), BBs
[0]);
4276 SI
->addCase(Builder
.getInt32(3), BBs
[1]);
4277 SI
->addCase(Builder
.getInt32(5), BBs
[2]);
4279 Builder
.SetInsertPoint(ContBB
);
4280 return RValue::get(nullptr);
4283 case Builtin::BI__atomic_thread_fence
:
4284 case Builtin::BI__atomic_signal_fence
:
4285 case Builtin::BI__c11_atomic_thread_fence
:
4286 case Builtin::BI__c11_atomic_signal_fence
: {
4287 llvm::SyncScope::ID SSID
;
4288 if (BuiltinID
== Builtin::BI__atomic_signal_fence
||
4289 BuiltinID
== Builtin::BI__c11_atomic_signal_fence
)
4290 SSID
= llvm::SyncScope::SingleThread
;
4292 SSID
= llvm::SyncScope::System
;
4293 Value
*Order
= EmitScalarExpr(E
->getArg(0));
4294 if (isa
<llvm::ConstantInt
>(Order
)) {
4295 int ord
= cast
<llvm::ConstantInt
>(Order
)->getZExtValue();
4297 case 0: // memory_order_relaxed
4298 default: // invalid order
4300 case 1: // memory_order_consume
4301 case 2: // memory_order_acquire
4302 Builder
.CreateFence(llvm::AtomicOrdering::Acquire
, SSID
);
4304 case 3: // memory_order_release
4305 Builder
.CreateFence(llvm::AtomicOrdering::Release
, SSID
);
4307 case 4: // memory_order_acq_rel
4308 Builder
.CreateFence(llvm::AtomicOrdering::AcquireRelease
, SSID
);
4310 case 5: // memory_order_seq_cst
4311 Builder
.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent
, SSID
);
4314 return RValue::get(nullptr);
4317 llvm::BasicBlock
*AcquireBB
, *ReleaseBB
, *AcqRelBB
, *SeqCstBB
;
4318 AcquireBB
= createBasicBlock("acquire", CurFn
);
4319 ReleaseBB
= createBasicBlock("release", CurFn
);
4320 AcqRelBB
= createBasicBlock("acqrel", CurFn
);
4321 SeqCstBB
= createBasicBlock("seqcst", CurFn
);
4322 llvm::BasicBlock
*ContBB
= createBasicBlock("atomic.continue", CurFn
);
4324 Order
= Builder
.CreateIntCast(Order
, Builder
.getInt32Ty(), false);
4325 llvm::SwitchInst
*SI
= Builder
.CreateSwitch(Order
, ContBB
);
4327 Builder
.SetInsertPoint(AcquireBB
);
4328 Builder
.CreateFence(llvm::AtomicOrdering::Acquire
, SSID
);
4329 Builder
.CreateBr(ContBB
);
4330 SI
->addCase(Builder
.getInt32(1), AcquireBB
);
4331 SI
->addCase(Builder
.getInt32(2), AcquireBB
);
4333 Builder
.SetInsertPoint(ReleaseBB
);
4334 Builder
.CreateFence(llvm::AtomicOrdering::Release
, SSID
);
4335 Builder
.CreateBr(ContBB
);
4336 SI
->addCase(Builder
.getInt32(3), ReleaseBB
);
4338 Builder
.SetInsertPoint(AcqRelBB
);
4339 Builder
.CreateFence(llvm::AtomicOrdering::AcquireRelease
, SSID
);
4340 Builder
.CreateBr(ContBB
);
4341 SI
->addCase(Builder
.getInt32(4), AcqRelBB
);
4343 Builder
.SetInsertPoint(SeqCstBB
);
4344 Builder
.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent
, SSID
);
4345 Builder
.CreateBr(ContBB
);
4346 SI
->addCase(Builder
.getInt32(5), SeqCstBB
);
4348 Builder
.SetInsertPoint(ContBB
);
4349 return RValue::get(nullptr);
4352 case Builtin::BI__builtin_signbit
:
4353 case Builtin::BI__builtin_signbitf
:
4354 case Builtin::BI__builtin_signbitl
: {
4356 Builder
.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E
->getArg(0))),
4357 ConvertType(E
->getType())));
4359 case Builtin::BI__warn_memset_zero_len
:
4360 return RValue::getIgnored();
4361 case Builtin::BI__annotation
: {
4362 // Re-encode each wide string to UTF8 and make an MDString.
4363 SmallVector
<Metadata
*, 1> Strings
;
4364 for (const Expr
*Arg
: E
->arguments()) {
4365 const auto *Str
= cast
<StringLiteral
>(Arg
->IgnoreParenCasts());
4366 assert(Str
->getCharByteWidth() == 2);
4367 StringRef WideBytes
= Str
->getBytes();
4368 std::string StrUtf8
;
4369 if (!convertUTF16ToUTF8String(
4370 ArrayRef(WideBytes
.data(), WideBytes
.size()), StrUtf8
)) {
4371 CGM
.ErrorUnsupported(E
, "non-UTF16 __annotation argument");
4374 Strings
.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8
));
4377 // Build and MDTuple of MDStrings and emit the intrinsic call.
4379 CGM
.getIntrinsic(llvm::Intrinsic::codeview_annotation
, {});
4380 MDTuple
*StrTuple
= MDTuple::get(getLLVMContext(), Strings
);
4381 Builder
.CreateCall(F
, MetadataAsValue::get(getLLVMContext(), StrTuple
));
4382 return RValue::getIgnored();
4384 case Builtin::BI__builtin_annotation
: {
4385 llvm::Value
*AnnVal
= EmitScalarExpr(E
->getArg(0));
4387 CGM
.getIntrinsic(llvm::Intrinsic::annotation
,
4388 {AnnVal
->getType(), CGM
.ConstGlobalsPtrTy
});
4390 // Get the annotation string, go through casts. Sema requires this to be a
4391 // non-wide string literal, potentially casted, so the cast<> is safe.
4392 const Expr
*AnnotationStrExpr
= E
->getArg(1)->IgnoreParenCasts();
4393 StringRef Str
= cast
<StringLiteral
>(AnnotationStrExpr
)->getString();
4395 EmitAnnotationCall(F
, AnnVal
, Str
, E
->getExprLoc(), nullptr));
4397 case Builtin::BI__builtin_addcb
:
4398 case Builtin::BI__builtin_addcs
:
4399 case Builtin::BI__builtin_addc
:
4400 case Builtin::BI__builtin_addcl
:
4401 case Builtin::BI__builtin_addcll
:
4402 case Builtin::BI__builtin_subcb
:
4403 case Builtin::BI__builtin_subcs
:
4404 case Builtin::BI__builtin_subc
:
4405 case Builtin::BI__builtin_subcl
:
4406 case Builtin::BI__builtin_subcll
: {
4408 // We translate all of these builtins from expressions of the form:
4409 // int x = ..., y = ..., carryin = ..., carryout, result;
4410 // result = __builtin_addc(x, y, carryin, &carryout);
4412 // to LLVM IR of the form:
4414 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
4415 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
4416 // %carry1 = extractvalue {i32, i1} %tmp1, 1
4417 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
4419 // %result = extractvalue {i32, i1} %tmp2, 0
4420 // %carry2 = extractvalue {i32, i1} %tmp2, 1
4421 // %tmp3 = or i1 %carry1, %carry2
4422 // %tmp4 = zext i1 %tmp3 to i32
4423 // store i32 %tmp4, i32* %carryout
4425 // Scalarize our inputs.
4426 llvm::Value
*X
= EmitScalarExpr(E
->getArg(0));
4427 llvm::Value
*Y
= EmitScalarExpr(E
->getArg(1));
4428 llvm::Value
*Carryin
= EmitScalarExpr(E
->getArg(2));
4429 Address CarryOutPtr
= EmitPointerWithAlignment(E
->getArg(3));
4431 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
4432 llvm::Intrinsic::ID IntrinsicId
;
4433 switch (BuiltinID
) {
4434 default: llvm_unreachable("Unknown multiprecision builtin id.");
4435 case Builtin::BI__builtin_addcb
:
4436 case Builtin::BI__builtin_addcs
:
4437 case Builtin::BI__builtin_addc
:
4438 case Builtin::BI__builtin_addcl
:
4439 case Builtin::BI__builtin_addcll
:
4440 IntrinsicId
= llvm::Intrinsic::uadd_with_overflow
;
4442 case Builtin::BI__builtin_subcb
:
4443 case Builtin::BI__builtin_subcs
:
4444 case Builtin::BI__builtin_subc
:
4445 case Builtin::BI__builtin_subcl
:
4446 case Builtin::BI__builtin_subcll
:
4447 IntrinsicId
= llvm::Intrinsic::usub_with_overflow
;
4451 // Construct our resulting LLVM IR expression.
4452 llvm::Value
*Carry1
;
4453 llvm::Value
*Sum1
= EmitOverflowIntrinsic(*this, IntrinsicId
,
4455 llvm::Value
*Carry2
;
4456 llvm::Value
*Sum2
= EmitOverflowIntrinsic(*this, IntrinsicId
,
4457 Sum1
, Carryin
, Carry2
);
4458 llvm::Value
*CarryOut
= Builder
.CreateZExt(Builder
.CreateOr(Carry1
, Carry2
),
4460 Builder
.CreateStore(CarryOut
, CarryOutPtr
);
4461 return RValue::get(Sum2
);
4464 case Builtin::BI__builtin_add_overflow
:
4465 case Builtin::BI__builtin_sub_overflow
:
4466 case Builtin::BI__builtin_mul_overflow
: {
4467 const clang::Expr
*LeftArg
= E
->getArg(0);
4468 const clang::Expr
*RightArg
= E
->getArg(1);
4469 const clang::Expr
*ResultArg
= E
->getArg(2);
4471 clang::QualType ResultQTy
=
4472 ResultArg
->getType()->castAs
<PointerType
>()->getPointeeType();
4474 WidthAndSignedness LeftInfo
=
4475 getIntegerWidthAndSignedness(CGM
.getContext(), LeftArg
->getType());
4476 WidthAndSignedness RightInfo
=
4477 getIntegerWidthAndSignedness(CGM
.getContext(), RightArg
->getType());
4478 WidthAndSignedness ResultInfo
=
4479 getIntegerWidthAndSignedness(CGM
.getContext(), ResultQTy
);
4481 // Handle mixed-sign multiplication as a special case, because adding
4482 // runtime or backend support for our generic irgen would be too expensive.
4483 if (isSpecialMixedSignMultiply(BuiltinID
, LeftInfo
, RightInfo
, ResultInfo
))
4484 return EmitCheckedMixedSignMultiply(*this, LeftArg
, LeftInfo
, RightArg
,
4485 RightInfo
, ResultArg
, ResultQTy
,
4488 if (isSpecialUnsignedMultiplySignedResult(BuiltinID
, LeftInfo
, RightInfo
,
4490 return EmitCheckedUnsignedMultiplySignedResult(
4491 *this, LeftArg
, LeftInfo
, RightArg
, RightInfo
, ResultArg
, ResultQTy
,
4494 WidthAndSignedness EncompassingInfo
=
4495 EncompassingIntegerType({LeftInfo
, RightInfo
, ResultInfo
});
4497 llvm::Type
*EncompassingLLVMTy
=
4498 llvm::IntegerType::get(CGM
.getLLVMContext(), EncompassingInfo
.Width
);
4500 llvm::Type
*ResultLLVMTy
= CGM
.getTypes().ConvertType(ResultQTy
);
4502 llvm::Intrinsic::ID IntrinsicId
;
4503 switch (BuiltinID
) {
4505 llvm_unreachable("Unknown overflow builtin id.");
4506 case Builtin::BI__builtin_add_overflow
:
4507 IntrinsicId
= EncompassingInfo
.Signed
4508 ? llvm::Intrinsic::sadd_with_overflow
4509 : llvm::Intrinsic::uadd_with_overflow
;
4511 case Builtin::BI__builtin_sub_overflow
:
4512 IntrinsicId
= EncompassingInfo
.Signed
4513 ? llvm::Intrinsic::ssub_with_overflow
4514 : llvm::Intrinsic::usub_with_overflow
;
4516 case Builtin::BI__builtin_mul_overflow
:
4517 IntrinsicId
= EncompassingInfo
.Signed
4518 ? llvm::Intrinsic::smul_with_overflow
4519 : llvm::Intrinsic::umul_with_overflow
;
4523 llvm::Value
*Left
= EmitScalarExpr(LeftArg
);
4524 llvm::Value
*Right
= EmitScalarExpr(RightArg
);
4525 Address ResultPtr
= EmitPointerWithAlignment(ResultArg
);
4527 // Extend each operand to the encompassing type.
4528 Left
= Builder
.CreateIntCast(Left
, EncompassingLLVMTy
, LeftInfo
.Signed
);
4529 Right
= Builder
.CreateIntCast(Right
, EncompassingLLVMTy
, RightInfo
.Signed
);
4531 // Perform the operation on the extended values.
4532 llvm::Value
*Overflow
, *Result
;
4533 Result
= EmitOverflowIntrinsic(*this, IntrinsicId
, Left
, Right
, Overflow
);
4535 if (EncompassingInfo
.Width
> ResultInfo
.Width
) {
4536 // The encompassing type is wider than the result type, so we need to
4538 llvm::Value
*ResultTrunc
= Builder
.CreateTrunc(Result
, ResultLLVMTy
);
4540 // To see if the truncation caused an overflow, we will extend
4541 // the result and then compare it to the original result.
4542 llvm::Value
*ResultTruncExt
= Builder
.CreateIntCast(
4543 ResultTrunc
, EncompassingLLVMTy
, ResultInfo
.Signed
);
4544 llvm::Value
*TruncationOverflow
=
4545 Builder
.CreateICmpNE(Result
, ResultTruncExt
);
4547 Overflow
= Builder
.CreateOr(Overflow
, TruncationOverflow
);
4548 Result
= ResultTrunc
;
4551 // Finally, store the result using the pointer.
4553 ResultArg
->getType()->getPointeeType().isVolatileQualified();
4554 Builder
.CreateStore(EmitToMemory(Result
, ResultQTy
), ResultPtr
, isVolatile
);
4556 return RValue::get(Overflow
);
4559 case Builtin::BI__builtin_uadd_overflow
:
4560 case Builtin::BI__builtin_uaddl_overflow
:
4561 case Builtin::BI__builtin_uaddll_overflow
:
4562 case Builtin::BI__builtin_usub_overflow
:
4563 case Builtin::BI__builtin_usubl_overflow
:
4564 case Builtin::BI__builtin_usubll_overflow
:
4565 case Builtin::BI__builtin_umul_overflow
:
4566 case Builtin::BI__builtin_umull_overflow
:
4567 case Builtin::BI__builtin_umulll_overflow
:
4568 case Builtin::BI__builtin_sadd_overflow
:
4569 case Builtin::BI__builtin_saddl_overflow
:
4570 case Builtin::BI__builtin_saddll_overflow
:
4571 case Builtin::BI__builtin_ssub_overflow
:
4572 case Builtin::BI__builtin_ssubl_overflow
:
4573 case Builtin::BI__builtin_ssubll_overflow
:
4574 case Builtin::BI__builtin_smul_overflow
:
4575 case Builtin::BI__builtin_smull_overflow
:
4576 case Builtin::BI__builtin_smulll_overflow
: {
4578 // We translate all of these builtins directly to the relevant llvm IR node.
4580 // Scalarize our inputs.
4581 llvm::Value
*X
= EmitScalarExpr(E
->getArg(0));
4582 llvm::Value
*Y
= EmitScalarExpr(E
->getArg(1));
4583 Address SumOutPtr
= EmitPointerWithAlignment(E
->getArg(2));
4585 // Decide which of the overflow intrinsics we are lowering to:
4586 llvm::Intrinsic::ID IntrinsicId
;
4587 switch (BuiltinID
) {
4588 default: llvm_unreachable("Unknown overflow builtin id.");
4589 case Builtin::BI__builtin_uadd_overflow
:
4590 case Builtin::BI__builtin_uaddl_overflow
:
4591 case Builtin::BI__builtin_uaddll_overflow
:
4592 IntrinsicId
= llvm::Intrinsic::uadd_with_overflow
;
4594 case Builtin::BI__builtin_usub_overflow
:
4595 case Builtin::BI__builtin_usubl_overflow
:
4596 case Builtin::BI__builtin_usubll_overflow
:
4597 IntrinsicId
= llvm::Intrinsic::usub_with_overflow
;
4599 case Builtin::BI__builtin_umul_overflow
:
4600 case Builtin::BI__builtin_umull_overflow
:
4601 case Builtin::BI__builtin_umulll_overflow
:
4602 IntrinsicId
= llvm::Intrinsic::umul_with_overflow
;
4604 case Builtin::BI__builtin_sadd_overflow
:
4605 case Builtin::BI__builtin_saddl_overflow
:
4606 case Builtin::BI__builtin_saddll_overflow
:
4607 IntrinsicId
= llvm::Intrinsic::sadd_with_overflow
;
4609 case Builtin::BI__builtin_ssub_overflow
:
4610 case Builtin::BI__builtin_ssubl_overflow
:
4611 case Builtin::BI__builtin_ssubll_overflow
:
4612 IntrinsicId
= llvm::Intrinsic::ssub_with_overflow
;
4614 case Builtin::BI__builtin_smul_overflow
:
4615 case Builtin::BI__builtin_smull_overflow
:
4616 case Builtin::BI__builtin_smulll_overflow
:
4617 IntrinsicId
= llvm::Intrinsic::smul_with_overflow
;
4623 llvm::Value
*Sum
= EmitOverflowIntrinsic(*this, IntrinsicId
, X
, Y
, Carry
);
4624 Builder
.CreateStore(Sum
, SumOutPtr
);
4626 return RValue::get(Carry
);
4628 case Builtin::BIaddressof
:
4629 case Builtin::BI__addressof
:
4630 case Builtin::BI__builtin_addressof
:
4631 return RValue::get(EmitLValue(E
->getArg(0)).getPointer(*this));
4632 case Builtin::BI__builtin_function_start
:
4633 return RValue::get(CGM
.GetFunctionStart(
4634 E
->getArg(0)->getAsBuiltinConstantDeclRef(CGM
.getContext())));
4635 case Builtin::BI__builtin_operator_new
:
4636 return EmitBuiltinNewDeleteCall(
4637 E
->getCallee()->getType()->castAs
<FunctionProtoType
>(), E
, false);
4638 case Builtin::BI__builtin_operator_delete
:
4639 EmitBuiltinNewDeleteCall(
4640 E
->getCallee()->getType()->castAs
<FunctionProtoType
>(), E
, true);
4641 return RValue::get(nullptr);
4643 case Builtin::BI__builtin_is_aligned
:
4644 return EmitBuiltinIsAligned(E
);
4645 case Builtin::BI__builtin_align_up
:
4646 return EmitBuiltinAlignTo(E
, true);
4647 case Builtin::BI__builtin_align_down
:
4648 return EmitBuiltinAlignTo(E
, false);
4650 case Builtin::BI__noop
:
4651 // __noop always evaluates to an integer literal zero.
4652 return RValue::get(ConstantInt::get(IntTy
, 0));
4653 case Builtin::BI__builtin_call_with_static_chain
: {
4654 const CallExpr
*Call
= cast
<CallExpr
>(E
->getArg(0));
4655 const Expr
*Chain
= E
->getArg(1);
4656 return EmitCall(Call
->getCallee()->getType(),
4657 EmitCallee(Call
->getCallee()), Call
, ReturnValue
,
4658 EmitScalarExpr(Chain
));
4660 case Builtin::BI_InterlockedExchange8
:
4661 case Builtin::BI_InterlockedExchange16
:
4662 case Builtin::BI_InterlockedExchange
:
4663 case Builtin::BI_InterlockedExchangePointer
:
4665 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange
, E
));
4666 case Builtin::BI_InterlockedCompareExchangePointer
:
4667 case Builtin::BI_InterlockedCompareExchangePointer_nf
: {
4669 llvm::IntegerType
*IntType
=
4670 IntegerType::get(getLLVMContext(),
4671 getContext().getTypeSize(E
->getType()));
4672 llvm::Type
*IntPtrType
= IntType
->getPointerTo();
4674 llvm::Value
*Destination
=
4675 Builder
.CreateBitCast(EmitScalarExpr(E
->getArg(0)), IntPtrType
);
4677 llvm::Value
*Exchange
= EmitScalarExpr(E
->getArg(1));
4678 RTy
= Exchange
->getType();
4679 Exchange
= Builder
.CreatePtrToInt(Exchange
, IntType
);
4681 llvm::Value
*Comparand
=
4682 Builder
.CreatePtrToInt(EmitScalarExpr(E
->getArg(2)), IntType
);
4685 BuiltinID
== Builtin::BI_InterlockedCompareExchangePointer_nf
?
4686 AtomicOrdering::Monotonic
: AtomicOrdering::SequentiallyConsistent
;
4688 auto Result
= Builder
.CreateAtomicCmpXchg(Destination
, Comparand
, Exchange
,
4689 Ordering
, Ordering
);
4690 Result
->setVolatile(true);
4692 return RValue::get(Builder
.CreateIntToPtr(Builder
.CreateExtractValue(Result
,
4696 case Builtin::BI_InterlockedCompareExchange8
:
4697 case Builtin::BI_InterlockedCompareExchange16
:
4698 case Builtin::BI_InterlockedCompareExchange
:
4699 case Builtin::BI_InterlockedCompareExchange64
:
4700 return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E
));
4701 case Builtin::BI_InterlockedIncrement16
:
4702 case Builtin::BI_InterlockedIncrement
:
4704 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement
, E
));
4705 case Builtin::BI_InterlockedDecrement16
:
4706 case Builtin::BI_InterlockedDecrement
:
4708 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement
, E
));
4709 case Builtin::BI_InterlockedAnd8
:
4710 case Builtin::BI_InterlockedAnd16
:
4711 case Builtin::BI_InterlockedAnd
:
4712 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd
, E
));
4713 case Builtin::BI_InterlockedExchangeAdd8
:
4714 case Builtin::BI_InterlockedExchangeAdd16
:
4715 case Builtin::BI_InterlockedExchangeAdd
:
4717 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd
, E
));
4718 case Builtin::BI_InterlockedExchangeSub8
:
4719 case Builtin::BI_InterlockedExchangeSub16
:
4720 case Builtin::BI_InterlockedExchangeSub
:
4722 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub
, E
));
4723 case Builtin::BI_InterlockedOr8
:
4724 case Builtin::BI_InterlockedOr16
:
4725 case Builtin::BI_InterlockedOr
:
4726 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr
, E
));
4727 case Builtin::BI_InterlockedXor8
:
4728 case Builtin::BI_InterlockedXor16
:
4729 case Builtin::BI_InterlockedXor
:
4730 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor
, E
));
4732 case Builtin::BI_bittest64
:
4733 case Builtin::BI_bittest
:
4734 case Builtin::BI_bittestandcomplement64
:
4735 case Builtin::BI_bittestandcomplement
:
4736 case Builtin::BI_bittestandreset64
:
4737 case Builtin::BI_bittestandreset
:
4738 case Builtin::BI_bittestandset64
:
4739 case Builtin::BI_bittestandset
:
4740 case Builtin::BI_interlockedbittestandreset
:
4741 case Builtin::BI_interlockedbittestandreset64
:
4742 case Builtin::BI_interlockedbittestandset64
:
4743 case Builtin::BI_interlockedbittestandset
:
4744 case Builtin::BI_interlockedbittestandset_acq
:
4745 case Builtin::BI_interlockedbittestandset_rel
:
4746 case Builtin::BI_interlockedbittestandset_nf
:
4747 case Builtin::BI_interlockedbittestandreset_acq
:
4748 case Builtin::BI_interlockedbittestandreset_rel
:
4749 case Builtin::BI_interlockedbittestandreset_nf
:
4750 return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID
, E
));
4752 // These builtins exist to emit regular volatile loads and stores not
4753 // affected by the -fms-volatile setting.
4754 case Builtin::BI__iso_volatile_load8
:
4755 case Builtin::BI__iso_volatile_load16
:
4756 case Builtin::BI__iso_volatile_load32
:
4757 case Builtin::BI__iso_volatile_load64
:
4758 return RValue::get(EmitISOVolatileLoad(*this, E
));
4759 case Builtin::BI__iso_volatile_store8
:
4760 case Builtin::BI__iso_volatile_store16
:
4761 case Builtin::BI__iso_volatile_store32
:
4762 case Builtin::BI__iso_volatile_store64
:
4763 return RValue::get(EmitISOVolatileStore(*this, E
));
4765 case Builtin::BI__exception_code
:
4766 case Builtin::BI_exception_code
:
4767 return RValue::get(EmitSEHExceptionCode());
4768 case Builtin::BI__exception_info
:
4769 case Builtin::BI_exception_info
:
4770 return RValue::get(EmitSEHExceptionInfo());
4771 case Builtin::BI__abnormal_termination
:
4772 case Builtin::BI_abnormal_termination
:
4773 return RValue::get(EmitSEHAbnormalTermination());
4774 case Builtin::BI_setjmpex
:
4775 if (getTarget().getTriple().isOSMSVCRT() && E
->getNumArgs() == 1 &&
4776 E
->getArg(0)->getType()->isPointerType())
4777 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex
, E
);
4779 case Builtin::BI_setjmp
:
4780 if (getTarget().getTriple().isOSMSVCRT() && E
->getNumArgs() == 1 &&
4781 E
->getArg(0)->getType()->isPointerType()) {
4782 if (getTarget().getTriple().getArch() == llvm::Triple::x86
)
4783 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3
, E
);
4784 else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64
)
4785 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex
, E
);
4786 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp
, E
);
4790 // C++ std:: builtins.
4791 case Builtin::BImove
:
4792 case Builtin::BImove_if_noexcept
:
4793 case Builtin::BIforward
:
4794 case Builtin::BIforward_like
:
4795 case Builtin::BIas_const
:
4796 return RValue::get(EmitLValue(E
->getArg(0)).getPointer(*this));
4797 case Builtin::BI__GetExceptionInfo
: {
4798 if (llvm::GlobalVariable
*GV
=
4799 CGM
.getCXXABI().getThrowInfo(FD
->getParamDecl(0)->getType()))
4800 return RValue::get(llvm::ConstantExpr::getBitCast(GV
, CGM
.Int8PtrTy
));
4804 case Builtin::BI__fastfail
:
4805 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail
, E
));
4807 case Builtin::BI__builtin_coro_id
:
4808 return EmitCoroutineIntrinsic(E
, Intrinsic::coro_id
);
4809 case Builtin::BI__builtin_coro_promise
:
4810 return EmitCoroutineIntrinsic(E
, Intrinsic::coro_promise
);
4811 case Builtin::BI__builtin_coro_resume
:
4812 EmitCoroutineIntrinsic(E
, Intrinsic::coro_resume
);
4813 return RValue::get(nullptr);
4814 case Builtin::BI__builtin_coro_frame
:
4815 return EmitCoroutineIntrinsic(E
, Intrinsic::coro_frame
);
4816 case Builtin::BI__builtin_coro_noop
:
4817 return EmitCoroutineIntrinsic(E
, Intrinsic::coro_noop
);
4818 case Builtin::BI__builtin_coro_free
:
4819 return EmitCoroutineIntrinsic(E
, Intrinsic::coro_free
);
4820 case Builtin::BI__builtin_coro_destroy
:
4821 EmitCoroutineIntrinsic(E
, Intrinsic::coro_destroy
);
4822 return RValue::get(nullptr);
4823 case Builtin::BI__builtin_coro_done
:
4824 return EmitCoroutineIntrinsic(E
, Intrinsic::coro_done
);
4825 case Builtin::BI__builtin_coro_alloc
:
4826 return EmitCoroutineIntrinsic(E
, Intrinsic::coro_alloc
);
4827 case Builtin::BI__builtin_coro_begin
:
4828 return EmitCoroutineIntrinsic(E
, Intrinsic::coro_begin
);
4829 case Builtin::BI__builtin_coro_end
:
4830 return EmitCoroutineIntrinsic(E
, Intrinsic::coro_end
);
4831 case Builtin::BI__builtin_coro_suspend
:
4832 return EmitCoroutineIntrinsic(E
, Intrinsic::coro_suspend
);
4833 case Builtin::BI__builtin_coro_size
:
4834 return EmitCoroutineIntrinsic(E
, Intrinsic::coro_size
);
4835 case Builtin::BI__builtin_coro_align
:
4836 return EmitCoroutineIntrinsic(E
, Intrinsic::coro_align
);
4838 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
4839 case Builtin::BIread_pipe
:
4840 case Builtin::BIwrite_pipe
: {
4841 Value
*Arg0
= EmitScalarExpr(E
->getArg(0)),
4842 *Arg1
= EmitScalarExpr(E
->getArg(1));
4843 CGOpenCLRuntime
OpenCLRT(CGM
);
4844 Value
*PacketSize
= OpenCLRT
.getPipeElemSize(E
->getArg(0));
4845 Value
*PacketAlign
= OpenCLRT
.getPipeElemAlign(E
->getArg(0));
4847 // Type of the generic packet parameter.
4848 unsigned GenericAS
=
4849 getContext().getTargetAddressSpace(LangAS::opencl_generic
);
4850 llvm::Type
*I8PTy
= llvm::PointerType::get(
4851 llvm::Type::getInt8Ty(getLLVMContext()), GenericAS
);
4853 // Testing which overloaded version we should generate the call for.
4854 if (2U == E
->getNumArgs()) {
4855 const char *Name
= (BuiltinID
== Builtin::BIread_pipe
) ? "__read_pipe_2"
4857 // Creating a generic function type to be able to call with any builtin or
4858 // user defined type.
4859 llvm::Type
*ArgTys
[] = {Arg0
->getType(), I8PTy
, Int32Ty
, Int32Ty
};
4860 llvm::FunctionType
*FTy
= llvm::FunctionType::get(
4861 Int32Ty
, llvm::ArrayRef
<llvm::Type
*>(ArgTys
), false);
4862 Value
*BCast
= Builder
.CreatePointerCast(Arg1
, I8PTy
);
4864 EmitRuntimeCall(CGM
.CreateRuntimeFunction(FTy
, Name
),
4865 {Arg0
, BCast
, PacketSize
, PacketAlign
}));
4867 assert(4 == E
->getNumArgs() &&
4868 "Illegal number of parameters to pipe function");
4869 const char *Name
= (BuiltinID
== Builtin::BIread_pipe
) ? "__read_pipe_4"
4872 llvm::Type
*ArgTys
[] = {Arg0
->getType(), Arg1
->getType(), Int32Ty
, I8PTy
,
4874 Value
*Arg2
= EmitScalarExpr(E
->getArg(2)),
4875 *Arg3
= EmitScalarExpr(E
->getArg(3));
4876 llvm::FunctionType
*FTy
= llvm::FunctionType::get(
4877 Int32Ty
, llvm::ArrayRef
<llvm::Type
*>(ArgTys
), false);
4878 Value
*BCast
= Builder
.CreatePointerCast(Arg3
, I8PTy
);
4879 // We know the third argument is an integer type, but we may need to cast
4881 if (Arg2
->getType() != Int32Ty
)
4882 Arg2
= Builder
.CreateZExtOrTrunc(Arg2
, Int32Ty
);
4884 EmitRuntimeCall(CGM
.CreateRuntimeFunction(FTy
, Name
),
4885 {Arg0
, Arg1
, Arg2
, BCast
, PacketSize
, PacketAlign
}));
4888 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
4890 case Builtin::BIreserve_read_pipe
:
4891 case Builtin::BIreserve_write_pipe
:
4892 case Builtin::BIwork_group_reserve_read_pipe
:
4893 case Builtin::BIwork_group_reserve_write_pipe
:
4894 case Builtin::BIsub_group_reserve_read_pipe
:
4895 case Builtin::BIsub_group_reserve_write_pipe
: {
4896 // Composing the mangled name for the function.
4898 if (BuiltinID
== Builtin::BIreserve_read_pipe
)
4899 Name
= "__reserve_read_pipe";
4900 else if (BuiltinID
== Builtin::BIreserve_write_pipe
)
4901 Name
= "__reserve_write_pipe";
4902 else if (BuiltinID
== Builtin::BIwork_group_reserve_read_pipe
)
4903 Name
= "__work_group_reserve_read_pipe";
4904 else if (BuiltinID
== Builtin::BIwork_group_reserve_write_pipe
)
4905 Name
= "__work_group_reserve_write_pipe";
4906 else if (BuiltinID
== Builtin::BIsub_group_reserve_read_pipe
)
4907 Name
= "__sub_group_reserve_read_pipe";
4909 Name
= "__sub_group_reserve_write_pipe";
4911 Value
*Arg0
= EmitScalarExpr(E
->getArg(0)),
4912 *Arg1
= EmitScalarExpr(E
->getArg(1));
4913 llvm::Type
*ReservedIDTy
= ConvertType(getContext().OCLReserveIDTy
);
4914 CGOpenCLRuntime
OpenCLRT(CGM
);
4915 Value
*PacketSize
= OpenCLRT
.getPipeElemSize(E
->getArg(0));
4916 Value
*PacketAlign
= OpenCLRT
.getPipeElemAlign(E
->getArg(0));
4918 // Building the generic function prototype.
4919 llvm::Type
*ArgTys
[] = {Arg0
->getType(), Int32Ty
, Int32Ty
, Int32Ty
};
4920 llvm::FunctionType
*FTy
= llvm::FunctionType::get(
4921 ReservedIDTy
, llvm::ArrayRef
<llvm::Type
*>(ArgTys
), false);
4922 // We know the second argument is an integer type, but we may need to cast
4924 if (Arg1
->getType() != Int32Ty
)
4925 Arg1
= Builder
.CreateZExtOrTrunc(Arg1
, Int32Ty
);
4926 return RValue::get(EmitRuntimeCall(CGM
.CreateRuntimeFunction(FTy
, Name
),
4927 {Arg0
, Arg1
, PacketSize
, PacketAlign
}));
4929 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
4931 case Builtin::BIcommit_read_pipe
:
4932 case Builtin::BIcommit_write_pipe
:
4933 case Builtin::BIwork_group_commit_read_pipe
:
4934 case Builtin::BIwork_group_commit_write_pipe
:
4935 case Builtin::BIsub_group_commit_read_pipe
:
4936 case Builtin::BIsub_group_commit_write_pipe
: {
4938 if (BuiltinID
== Builtin::BIcommit_read_pipe
)
4939 Name
= "__commit_read_pipe";
4940 else if (BuiltinID
== Builtin::BIcommit_write_pipe
)
4941 Name
= "__commit_write_pipe";
4942 else if (BuiltinID
== Builtin::BIwork_group_commit_read_pipe
)
4943 Name
= "__work_group_commit_read_pipe";
4944 else if (BuiltinID
== Builtin::BIwork_group_commit_write_pipe
)
4945 Name
= "__work_group_commit_write_pipe";
4946 else if (BuiltinID
== Builtin::BIsub_group_commit_read_pipe
)
4947 Name
= "__sub_group_commit_read_pipe";
4949 Name
= "__sub_group_commit_write_pipe";
4951 Value
*Arg0
= EmitScalarExpr(E
->getArg(0)),
4952 *Arg1
= EmitScalarExpr(E
->getArg(1));
4953 CGOpenCLRuntime
OpenCLRT(CGM
);
4954 Value
*PacketSize
= OpenCLRT
.getPipeElemSize(E
->getArg(0));
4955 Value
*PacketAlign
= OpenCLRT
.getPipeElemAlign(E
->getArg(0));
4957 // Building the generic function prototype.
4958 llvm::Type
*ArgTys
[] = {Arg0
->getType(), Arg1
->getType(), Int32Ty
, Int32Ty
};
4959 llvm::FunctionType
*FTy
=
4960 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
4961 llvm::ArrayRef
<llvm::Type
*>(ArgTys
), false);
4963 return RValue::get(EmitRuntimeCall(CGM
.CreateRuntimeFunction(FTy
, Name
),
4964 {Arg0
, Arg1
, PacketSize
, PacketAlign
}));
4966 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
4967 case Builtin::BIget_pipe_num_packets
:
4968 case Builtin::BIget_pipe_max_packets
: {
4969 const char *BaseName
;
4970 const auto *PipeTy
= E
->getArg(0)->getType()->castAs
<PipeType
>();
4971 if (BuiltinID
== Builtin::BIget_pipe_num_packets
)
4972 BaseName
= "__get_pipe_num_packets";
4974 BaseName
= "__get_pipe_max_packets";
4975 std::string Name
= std::string(BaseName
) +
4976 std::string(PipeTy
->isReadOnly() ? "_ro" : "_wo");
4978 // Building the generic function prototype.
4979 Value
*Arg0
= EmitScalarExpr(E
->getArg(0));
4980 CGOpenCLRuntime
OpenCLRT(CGM
);
4981 Value
*PacketSize
= OpenCLRT
.getPipeElemSize(E
->getArg(0));
4982 Value
*PacketAlign
= OpenCLRT
.getPipeElemAlign(E
->getArg(0));
4983 llvm::Type
*ArgTys
[] = {Arg0
->getType(), Int32Ty
, Int32Ty
};
4984 llvm::FunctionType
*FTy
= llvm::FunctionType::get(
4985 Int32Ty
, llvm::ArrayRef
<llvm::Type
*>(ArgTys
), false);
4987 return RValue::get(EmitRuntimeCall(CGM
.CreateRuntimeFunction(FTy
, Name
),
4988 {Arg0
, PacketSize
, PacketAlign
}));
4991 // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
4992 case Builtin::BIto_global
:
4993 case Builtin::BIto_local
:
4994 case Builtin::BIto_private
: {
4995 auto Arg0
= EmitScalarExpr(E
->getArg(0));
4996 auto NewArgT
= llvm::PointerType::get(Int8Ty
,
4997 CGM
.getContext().getTargetAddressSpace(LangAS::opencl_generic
));
4998 auto NewRetT
= llvm::PointerType::get(Int8Ty
,
4999 CGM
.getContext().getTargetAddressSpace(
5000 E
->getType()->getPointeeType().getAddressSpace()));
5001 auto FTy
= llvm::FunctionType::get(NewRetT
, {NewArgT
}, false);
5002 llvm::Value
*NewArg
;
5003 if (Arg0
->getType()->getPointerAddressSpace() !=
5004 NewArgT
->getPointerAddressSpace())
5005 NewArg
= Builder
.CreateAddrSpaceCast(Arg0
, NewArgT
);
5007 NewArg
= Builder
.CreateBitOrPointerCast(Arg0
, NewArgT
);
5008 auto NewName
= std::string("__") + E
->getDirectCallee()->getName().str();
5010 EmitRuntimeCall(CGM
.CreateRuntimeFunction(FTy
, NewName
), {NewArg
});
5011 return RValue::get(Builder
.CreateBitOrPointerCast(NewCall
,
5012 ConvertType(E
->getType())));
5015 // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
5016 // It contains four different overload formats specified in Table 6.13.17.1.
5017 case Builtin::BIenqueue_kernel
: {
5018 StringRef Name
; // Generated function call name
5019 unsigned NumArgs
= E
->getNumArgs();
5021 llvm::Type
*QueueTy
= ConvertType(getContext().OCLQueueTy
);
5022 llvm::Type
*GenericVoidPtrTy
= Builder
.getInt8PtrTy(
5023 getContext().getTargetAddressSpace(LangAS::opencl_generic
));
5025 llvm::Value
*Queue
= EmitScalarExpr(E
->getArg(0));
5026 llvm::Value
*Flags
= EmitScalarExpr(E
->getArg(1));
5027 LValue NDRangeL
= EmitAggExprToLValue(E
->getArg(2));
5028 llvm::Value
*Range
= NDRangeL
.getAddress(*this).getPointer();
5029 llvm::Type
*RangeTy
= NDRangeL
.getAddress(*this).getType();
5032 // The most basic form of the call with parameters:
5033 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
5034 Name
= "__enqueue_kernel_basic";
5035 llvm::Type
*ArgTys
[] = {QueueTy
, Int32Ty
, RangeTy
, GenericVoidPtrTy
,
5037 llvm::FunctionType
*FTy
= llvm::FunctionType::get(
5038 Int32Ty
, llvm::ArrayRef
<llvm::Type
*>(ArgTys
), false);
5041 CGM
.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E
->getArg(3));
5042 llvm::Value
*Kernel
=
5043 Builder
.CreatePointerCast(Info
.KernelHandle
, GenericVoidPtrTy
);
5044 llvm::Value
*Block
=
5045 Builder
.CreatePointerCast(Info
.BlockArg
, GenericVoidPtrTy
);
5047 AttrBuilder
B(Builder
.getContext());
5048 B
.addByValAttr(NDRangeL
.getAddress(*this).getElementType());
5049 llvm::AttributeList ByValAttrSet
=
5050 llvm::AttributeList::get(CGM
.getModule().getContext(), 3U, B
);
5053 EmitRuntimeCall(CGM
.CreateRuntimeFunction(FTy
, Name
, ByValAttrSet
),
5054 {Queue
, Flags
, Range
, Kernel
, Block
});
5055 RTCall
->setAttributes(ByValAttrSet
);
5056 return RValue::get(RTCall
);
5058 assert(NumArgs
>= 5 && "Invalid enqueue_kernel signature");
5060 // Create a temporary array to hold the sizes of local pointer arguments
5061 // for the block. \p First is the position of the first size argument.
5062 auto CreateArrayForSizeVar
= [=](unsigned First
)
5063 -> std::tuple
<llvm::Value
*, llvm::Value
*, llvm::Value
*> {
5064 llvm::APInt
ArraySize(32, NumArgs
- First
);
5065 QualType SizeArrayTy
= getContext().getConstantArrayType(
5066 getContext().getSizeType(), ArraySize
, nullptr, ArrayType::Normal
,
5067 /*IndexTypeQuals=*/0);
5068 auto Tmp
= CreateMemTemp(SizeArrayTy
, "block_sizes");
5069 llvm::Value
*TmpPtr
= Tmp
.getPointer();
5070 llvm::Value
*TmpSize
= EmitLifetimeStart(
5071 CGM
.getDataLayout().getTypeAllocSize(Tmp
.getElementType()), TmpPtr
);
5072 llvm::Value
*ElemPtr
;
5073 // Each of the following arguments specifies the size of the corresponding
5074 // argument passed to the enqueued block.
5075 auto *Zero
= llvm::ConstantInt::get(IntTy
, 0);
5076 for (unsigned I
= First
; I
< NumArgs
; ++I
) {
5077 auto *Index
= llvm::ConstantInt::get(IntTy
, I
- First
);
5078 auto *GEP
= Builder
.CreateGEP(Tmp
.getElementType(), TmpPtr
,
5083 Builder
.CreateZExtOrTrunc(EmitScalarExpr(E
->getArg(I
)), SizeTy
);
5084 Builder
.CreateAlignedStore(
5085 V
, GEP
, CGM
.getDataLayout().getPrefTypeAlign(SizeTy
));
5087 return std::tie(ElemPtr
, TmpSize
, TmpPtr
);
5090 // Could have events and/or varargs.
5091 if (E
->getArg(3)->getType()->isBlockPointerType()) {
5092 // No events passed, but has variadic arguments.
5093 Name
= "__enqueue_kernel_varargs";
5095 CGM
.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E
->getArg(3));
5096 llvm::Value
*Kernel
=
5097 Builder
.CreatePointerCast(Info
.KernelHandle
, GenericVoidPtrTy
);
5098 auto *Block
= Builder
.CreatePointerCast(Info
.BlockArg
, GenericVoidPtrTy
);
5099 llvm::Value
*ElemPtr
, *TmpSize
, *TmpPtr
;
5100 std::tie(ElemPtr
, TmpSize
, TmpPtr
) = CreateArrayForSizeVar(4);
5102 // Create a vector of the arguments, as well as a constant value to
5103 // express to the runtime the number of variadic arguments.
5104 llvm::Value
*const Args
[] = {Queue
, Flags
,
5106 Block
, ConstantInt::get(IntTy
, NumArgs
- 4),
5108 llvm::Type
*const ArgTys
[] = {
5109 QueueTy
, IntTy
, RangeTy
, GenericVoidPtrTy
,
5110 GenericVoidPtrTy
, IntTy
, ElemPtr
->getType()};
5112 llvm::FunctionType
*FTy
= llvm::FunctionType::get(Int32Ty
, ArgTys
, false);
5113 auto Call
= RValue::get(
5114 EmitRuntimeCall(CGM
.CreateRuntimeFunction(FTy
, Name
), Args
));
5116 EmitLifetimeEnd(TmpSize
, TmpPtr
);
5119 // Any calls now have event arguments passed.
5121 llvm::Type
*EventTy
= ConvertType(getContext().OCLClkEventTy
);
5122 llvm::PointerType
*EventPtrTy
= EventTy
->getPointerTo(
5123 CGM
.getContext().getTargetAddressSpace(LangAS::opencl_generic
));
5125 llvm::Value
*NumEvents
=
5126 Builder
.CreateZExtOrTrunc(EmitScalarExpr(E
->getArg(3)), Int32Ty
);
5128 // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
5129 // to be a null pointer constant (including `0` literal), we can take it
5130 // into account and emit null pointer directly.
5131 llvm::Value
*EventWaitList
= nullptr;
5132 if (E
->getArg(4)->isNullPointerConstant(
5133 getContext(), Expr::NPC_ValueDependentIsNotNull
)) {
5134 EventWaitList
= llvm::ConstantPointerNull::get(EventPtrTy
);
5136 EventWaitList
= E
->getArg(4)->getType()->isArrayType()
5137 ? EmitArrayToPointerDecay(E
->getArg(4)).getPointer()
5138 : EmitScalarExpr(E
->getArg(4));
5139 // Convert to generic address space.
5140 EventWaitList
= Builder
.CreatePointerCast(EventWaitList
, EventPtrTy
);
5142 llvm::Value
*EventRet
= nullptr;
5143 if (E
->getArg(5)->isNullPointerConstant(
5144 getContext(), Expr::NPC_ValueDependentIsNotNull
)) {
5145 EventRet
= llvm::ConstantPointerNull::get(EventPtrTy
);
5148 Builder
.CreatePointerCast(EmitScalarExpr(E
->getArg(5)), EventPtrTy
);
5152 CGM
.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E
->getArg(6));
5153 llvm::Value
*Kernel
=
5154 Builder
.CreatePointerCast(Info
.KernelHandle
, GenericVoidPtrTy
);
5155 llvm::Value
*Block
=
5156 Builder
.CreatePointerCast(Info
.BlockArg
, GenericVoidPtrTy
);
5158 std::vector
<llvm::Type
*> ArgTys
= {
5159 QueueTy
, Int32Ty
, RangeTy
, Int32Ty
,
5160 EventPtrTy
, EventPtrTy
, GenericVoidPtrTy
, GenericVoidPtrTy
};
5162 std::vector
<llvm::Value
*> Args
= {Queue
, Flags
, Range
,
5163 NumEvents
, EventWaitList
, EventRet
,
5167 // Has events but no variadics.
5168 Name
= "__enqueue_kernel_basic_events";
5169 llvm::FunctionType
*FTy
= llvm::FunctionType::get(
5170 Int32Ty
, llvm::ArrayRef
<llvm::Type
*>(ArgTys
), false);
5172 EmitRuntimeCall(CGM
.CreateRuntimeFunction(FTy
, Name
),
5173 llvm::ArrayRef
<llvm::Value
*>(Args
)));
5175 // Has event info and variadics
5176 // Pass the number of variadics to the runtime function too.
5177 Args
.push_back(ConstantInt::get(Int32Ty
, NumArgs
- 7));
5178 ArgTys
.push_back(Int32Ty
);
5179 Name
= "__enqueue_kernel_events_varargs";
5181 llvm::Value
*ElemPtr
, *TmpSize
, *TmpPtr
;
5182 std::tie(ElemPtr
, TmpSize
, TmpPtr
) = CreateArrayForSizeVar(7);
5183 Args
.push_back(ElemPtr
);
5184 ArgTys
.push_back(ElemPtr
->getType());
5186 llvm::FunctionType
*FTy
= llvm::FunctionType::get(
5187 Int32Ty
, llvm::ArrayRef
<llvm::Type
*>(ArgTys
), false);
5189 RValue::get(EmitRuntimeCall(CGM
.CreateRuntimeFunction(FTy
, Name
),
5190 llvm::ArrayRef
<llvm::Value
*>(Args
)));
5192 EmitLifetimeEnd(TmpSize
, TmpPtr
);
5197 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
5199 case Builtin::BIget_kernel_work_group_size
: {
5200 llvm::Type
*GenericVoidPtrTy
= Builder
.getInt8PtrTy(
5201 getContext().getTargetAddressSpace(LangAS::opencl_generic
));
5203 CGM
.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E
->getArg(0));
5205 Builder
.CreatePointerCast(Info
.KernelHandle
, GenericVoidPtrTy
);
5206 Value
*Arg
= Builder
.CreatePointerCast(Info
.BlockArg
, GenericVoidPtrTy
);
5207 return RValue::get(EmitRuntimeCall(
5208 CGM
.CreateRuntimeFunction(
5209 llvm::FunctionType::get(IntTy
, {GenericVoidPtrTy
, GenericVoidPtrTy
},
5211 "__get_kernel_work_group_size_impl"),
5214 case Builtin::BIget_kernel_preferred_work_group_size_multiple
: {
5215 llvm::Type
*GenericVoidPtrTy
= Builder
.getInt8PtrTy(
5216 getContext().getTargetAddressSpace(LangAS::opencl_generic
));
5218 CGM
.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E
->getArg(0));
5220 Builder
.CreatePointerCast(Info
.KernelHandle
, GenericVoidPtrTy
);
5221 Value
*Arg
= Builder
.CreatePointerCast(Info
.BlockArg
, GenericVoidPtrTy
);
5222 return RValue::get(EmitRuntimeCall(
5223 CGM
.CreateRuntimeFunction(
5224 llvm::FunctionType::get(IntTy
, {GenericVoidPtrTy
, GenericVoidPtrTy
},
5226 "__get_kernel_preferred_work_group_size_multiple_impl"),
5229 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange
:
5230 case Builtin::BIget_kernel_sub_group_count_for_ndrange
: {
5231 llvm::Type
*GenericVoidPtrTy
= Builder
.getInt8PtrTy(
5232 getContext().getTargetAddressSpace(LangAS::opencl_generic
));
5233 LValue NDRangeL
= EmitAggExprToLValue(E
->getArg(0));
5234 llvm::Value
*NDRange
= NDRangeL
.getAddress(*this).getPointer();
5236 CGM
.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E
->getArg(1));
5238 Builder
.CreatePointerCast(Info
.KernelHandle
, GenericVoidPtrTy
);
5239 Value
*Block
= Builder
.CreatePointerCast(Info
.BlockArg
, GenericVoidPtrTy
);
5241 BuiltinID
== Builtin::BIget_kernel_max_sub_group_size_for_ndrange
5242 ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
5243 : "__get_kernel_sub_group_count_for_ndrange_impl";
5244 return RValue::get(EmitRuntimeCall(
5245 CGM
.CreateRuntimeFunction(
5246 llvm::FunctionType::get(
5247 IntTy
, {NDRange
->getType(), GenericVoidPtrTy
, GenericVoidPtrTy
},
5250 {NDRange
, Kernel
, Block
}));
5253 case Builtin::BI__builtin_store_half
:
5254 case Builtin::BI__builtin_store_halff
: {
5255 Value
*Val
= EmitScalarExpr(E
->getArg(0));
5256 Address Address
= EmitPointerWithAlignment(E
->getArg(1));
5257 Value
*HalfVal
= Builder
.CreateFPTrunc(Val
, Builder
.getHalfTy());
5258 Builder
.CreateStore(HalfVal
, Address
);
5259 return RValue::get(nullptr);
5261 case Builtin::BI__builtin_load_half
: {
5262 Address Address
= EmitPointerWithAlignment(E
->getArg(0));
5263 Value
*HalfVal
= Builder
.CreateLoad(Address
);
5264 return RValue::get(Builder
.CreateFPExt(HalfVal
, Builder
.getDoubleTy()));
5266 case Builtin::BI__builtin_load_halff
: {
5267 Address Address
= EmitPointerWithAlignment(E
->getArg(0));
5268 Value
*HalfVal
= Builder
.CreateLoad(Address
);
5269 return RValue::get(Builder
.CreateFPExt(HalfVal
, Builder
.getFloatTy()));
5271 case Builtin::BIprintf
:
5272 if (getTarget().getTriple().isNVPTX() ||
5273 getTarget().getTriple().isAMDGCN()) {
5274 if (getLangOpts().OpenMPIsDevice
)
5275 return EmitOpenMPDevicePrintfCallExpr(E
);
5276 if (getTarget().getTriple().isNVPTX())
5277 return EmitNVPTXDevicePrintfCallExpr(E
);
5278 if (getTarget().getTriple().isAMDGCN() && getLangOpts().HIP
)
5279 return EmitAMDGPUDevicePrintfCallExpr(E
);
5283 case Builtin::BI__builtin_canonicalize
:
5284 case Builtin::BI__builtin_canonicalizef
:
5285 case Builtin::BI__builtin_canonicalizef16
:
5286 case Builtin::BI__builtin_canonicalizel
:
5287 return RValue::get(emitUnaryBuiltin(*this, E
, Intrinsic::canonicalize
));
5289 case Builtin::BI__builtin_thread_pointer
: {
5290 if (!getContext().getTargetInfo().isTLSSupported())
5291 CGM
.ErrorUnsupported(E
, "__builtin_thread_pointer");
5292 // Fall through - it's already mapped to the intrinsic by ClangBuiltin.
5295 case Builtin::BI__builtin_os_log_format
:
5296 return emitBuiltinOSLogFormat(*E
);
5298 case Builtin::BI__xray_customevent
: {
5299 if (!ShouldXRayInstrumentFunction())
5300 return RValue::getIgnored();
5302 if (!CGM
.getCodeGenOpts().XRayInstrumentationBundle
.has(
5303 XRayInstrKind::Custom
))
5304 return RValue::getIgnored();
5306 if (const auto *XRayAttr
= CurFuncDecl
->getAttr
<XRayInstrumentAttr
>())
5307 if (XRayAttr
->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
5308 return RValue::getIgnored();
5310 Function
*F
= CGM
.getIntrinsic(Intrinsic::xray_customevent
);
5311 auto FTy
= F
->getFunctionType();
5312 auto Arg0
= E
->getArg(0);
5313 auto Arg0Val
= EmitScalarExpr(Arg0
);
5314 auto Arg0Ty
= Arg0
->getType();
5315 auto PTy0
= FTy
->getParamType(0);
5316 if (PTy0
!= Arg0Val
->getType()) {
5317 if (Arg0Ty
->isArrayType())
5318 Arg0Val
= EmitArrayToPointerDecay(Arg0
).getPointer();
5320 Arg0Val
= Builder
.CreatePointerCast(Arg0Val
, PTy0
);
5322 auto Arg1
= EmitScalarExpr(E
->getArg(1));
5323 auto PTy1
= FTy
->getParamType(1);
5324 if (PTy1
!= Arg1
->getType())
5325 Arg1
= Builder
.CreateTruncOrBitCast(Arg1
, PTy1
);
5326 return RValue::get(Builder
.CreateCall(F
, {Arg0Val
, Arg1
}));
5329 case Builtin::BI__xray_typedevent
: {
5330 // TODO: There should be a way to always emit events even if the current
5331 // function is not instrumented. Losing events in a stream can cripple
5333 if (!ShouldXRayInstrumentFunction())
5334 return RValue::getIgnored();
5336 if (!CGM
.getCodeGenOpts().XRayInstrumentationBundle
.has(
5337 XRayInstrKind::Typed
))
5338 return RValue::getIgnored();
5340 if (const auto *XRayAttr
= CurFuncDecl
->getAttr
<XRayInstrumentAttr
>())
5341 if (XRayAttr
->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
5342 return RValue::getIgnored();
5344 Function
*F
= CGM
.getIntrinsic(Intrinsic::xray_typedevent
);
5345 auto FTy
= F
->getFunctionType();
5346 auto Arg0
= EmitScalarExpr(E
->getArg(0));
5347 auto PTy0
= FTy
->getParamType(0);
5348 if (PTy0
!= Arg0
->getType())
5349 Arg0
= Builder
.CreateTruncOrBitCast(Arg0
, PTy0
);
5350 auto Arg1
= E
->getArg(1);
5351 auto Arg1Val
= EmitScalarExpr(Arg1
);
5352 auto Arg1Ty
= Arg1
->getType();
5353 auto PTy1
= FTy
->getParamType(1);
5354 if (PTy1
!= Arg1Val
->getType()) {
5355 if (Arg1Ty
->isArrayType())
5356 Arg1Val
= EmitArrayToPointerDecay(Arg1
).getPointer();
5358 Arg1Val
= Builder
.CreatePointerCast(Arg1Val
, PTy1
);
5360 auto Arg2
= EmitScalarExpr(E
->getArg(2));
5361 auto PTy2
= FTy
->getParamType(2);
5362 if (PTy2
!= Arg2
->getType())
5363 Arg2
= Builder
.CreateTruncOrBitCast(Arg2
, PTy2
);
5364 return RValue::get(Builder
.CreateCall(F
, {Arg0
, Arg1Val
, Arg2
}));
5367 case Builtin::BI__builtin_ms_va_start
:
5368 case Builtin::BI__builtin_ms_va_end
:
5370 EmitVAStartEnd(EmitMSVAListRef(E
->getArg(0)).getPointer(),
5371 BuiltinID
== Builtin::BI__builtin_ms_va_start
));
5373 case Builtin::BI__builtin_ms_va_copy
: {
5374 // Lower this manually. We can't reliably determine whether or not any
5375 // given va_copy() is for a Win64 va_list from the calling convention
5376 // alone, because it's legal to do this from a System V ABI function.
5377 // With opaque pointer types, we won't have enough information in LLVM
5378 // IR to determine this from the argument types, either. Best to do it
5379 // now, while we have enough information.
5380 Address DestAddr
= EmitMSVAListRef(E
->getArg(0));
5381 Address SrcAddr
= EmitMSVAListRef(E
->getArg(1));
5383 llvm::Type
*BPP
= Int8PtrPtrTy
;
5385 DestAddr
= Address(Builder
.CreateBitCast(DestAddr
.getPointer(), BPP
, "cp"),
5386 Int8PtrTy
, DestAddr
.getAlignment());
5387 SrcAddr
= Address(Builder
.CreateBitCast(SrcAddr
.getPointer(), BPP
, "ap"),
5388 Int8PtrTy
, SrcAddr
.getAlignment());
5390 Value
*ArgPtr
= Builder
.CreateLoad(SrcAddr
, "ap.val");
5391 return RValue::get(Builder
.CreateStore(ArgPtr
, DestAddr
));
5394 case Builtin::BI__builtin_get_device_side_mangled_name
: {
5395 auto Name
= CGM
.getCUDARuntime().getDeviceSideName(
5396 cast
<DeclRefExpr
>(E
->getArg(0)->IgnoreImpCasts())->getDecl());
5397 auto Str
= CGM
.GetAddrOfConstantCString(Name
, "");
5398 llvm::Constant
*Zeros
[] = {llvm::ConstantInt::get(SizeTy
, 0),
5399 llvm::ConstantInt::get(SizeTy
, 0)};
5400 auto *Ptr
= llvm::ConstantExpr::getGetElementPtr(Str
.getElementType(),
5401 Str
.getPointer(), Zeros
);
5402 return RValue::get(Ptr
);
5406 // If this is an alias for a lib function (e.g. __builtin_sin), emit
5407 // the call using the normal call path, but using the unmangled
5408 // version of the function name.
5409 if (getContext().BuiltinInfo
.isLibFunction(BuiltinID
))
5410 return emitLibraryCall(*this, FD
, E
,
5411 CGM
.getBuiltinLibFunction(FD
, BuiltinID
));
5413 // If this is a predefined lib function (e.g. malloc), emit the call
5414 // using exactly the normal call path.
5415 if (getContext().BuiltinInfo
.isPredefinedLibFunction(BuiltinID
))
5416 return emitLibraryCall(*this, FD
, E
,
5417 cast
<llvm::Constant
>(EmitScalarExpr(E
->getCallee())));
5419 // Check that a call to a target specific builtin has the correct target
5421 // This is down here to avoid non-target specific builtins, however, if
5422 // generic builtins start to require generic target features then we
5423 // can move this up to the beginning of the function.
5424 checkTargetFeatures(E
, FD
);
5426 if (unsigned VectorWidth
= getContext().BuiltinInfo
.getRequiredVectorWidth(BuiltinID
))
5427 LargestVectorWidth
= std::max(LargestVectorWidth
, VectorWidth
);
5429 // See if we have a target specific intrinsic.
5430 StringRef Name
= getContext().BuiltinInfo
.getName(BuiltinID
);
5431 Intrinsic::ID IntrinsicID
= Intrinsic::not_intrinsic
;
5433 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
5434 if (!Prefix
.empty()) {
5435 IntrinsicID
= Intrinsic::getIntrinsicForClangBuiltin(Prefix
.data(), Name
);
5436 // NOTE we don't need to perform a compatibility flag check here since the
5437 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
5438 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
5439 if (IntrinsicID
== Intrinsic::not_intrinsic
)
5440 IntrinsicID
= Intrinsic::getIntrinsicForMSBuiltin(Prefix
.data(), Name
);
5443 if (IntrinsicID
!= Intrinsic::not_intrinsic
) {
5444 SmallVector
<Value
*, 16> Args
;
5446 // Find out if any arguments are required to be integer constant
5448 unsigned ICEArguments
= 0;
5449 ASTContext::GetBuiltinTypeError Error
;
5450 getContext().GetBuiltinType(BuiltinID
, Error
, &ICEArguments
);
5451 assert(Error
== ASTContext::GE_None
&& "Should not codegen an error");
5453 Function
*F
= CGM
.getIntrinsic(IntrinsicID
);
5454 llvm::FunctionType
*FTy
= F
->getFunctionType();
5456 for (unsigned i
= 0, e
= E
->getNumArgs(); i
!= e
; ++i
) {
5458 // If this is a normal argument, just emit it as a scalar.
5459 if ((ICEArguments
& (1 << i
)) == 0) {
5460 ArgValue
= EmitScalarExpr(E
->getArg(i
));
5462 // If this is required to be a constant, constant fold it so that we
5463 // know that the generated intrinsic gets a ConstantInt.
5464 ArgValue
= llvm::ConstantInt::get(
5466 *E
->getArg(i
)->getIntegerConstantExpr(getContext()));
5469 // If the intrinsic arg type is different from the builtin arg type
5470 // we need to do a bit cast.
5471 llvm::Type
*PTy
= FTy
->getParamType(i
);
5472 if (PTy
!= ArgValue
->getType()) {
5473 // XXX - vector of pointers?
5474 if (auto *PtrTy
= dyn_cast
<llvm::PointerType
>(PTy
)) {
5475 if (PtrTy
->getAddressSpace() !=
5476 ArgValue
->getType()->getPointerAddressSpace()) {
5477 ArgValue
= Builder
.CreateAddrSpaceCast(
5479 ArgValue
->getType()->getPointerTo(PtrTy
->getAddressSpace()));
5483 assert(PTy
->canLosslesslyBitCastTo(FTy
->getParamType(i
)) &&
5484 "Must be able to losslessly bit cast to param");
5485 // Cast vector type (e.g., v256i32) to x86_amx, this only happen
5486 // in amx intrinsics.
5487 if (PTy
->isX86_AMXTy())
5488 ArgValue
= Builder
.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile
,
5489 {ArgValue
->getType()}, {ArgValue
});
5491 ArgValue
= Builder
.CreateBitCast(ArgValue
, PTy
);
5494 Args
.push_back(ArgValue
);
5497 Value
*V
= Builder
.CreateCall(F
, Args
);
5498 QualType BuiltinRetType
= E
->getType();
5500 llvm::Type
*RetTy
= VoidTy
;
5501 if (!BuiltinRetType
->isVoidType())
5502 RetTy
= ConvertType(BuiltinRetType
);
5504 if (RetTy
!= V
->getType()) {
5505 // XXX - vector of pointers?
5506 if (auto *PtrTy
= dyn_cast
<llvm::PointerType
>(RetTy
)) {
5507 if (PtrTy
->getAddressSpace() != V
->getType()->getPointerAddressSpace()) {
5508 V
= Builder
.CreateAddrSpaceCast(
5509 V
, V
->getType()->getPointerTo(PtrTy
->getAddressSpace()));
5513 assert(V
->getType()->canLosslesslyBitCastTo(RetTy
) &&
5514 "Must be able to losslessly bit cast result type");
5515 // Cast x86_amx to vector type (e.g., v256i32), this only happen
5516 // in amx intrinsics.
5517 if (V
->getType()->isX86_AMXTy())
5518 V
= Builder
.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector
, {RetTy
},
5521 V
= Builder
.CreateBitCast(V
, RetTy
);
5524 if (RetTy
->isVoidTy())
5525 return RValue::get(nullptr);
5527 return RValue::get(V
);
5530 // Some target-specific builtins can have aggregate return values, e.g.
5531 // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
5532 // ReturnValue to be non-null, so that the target-specific emission code can
5533 // always just emit into it.
5534 TypeEvaluationKind EvalKind
= getEvaluationKind(E
->getType());
5535 if (EvalKind
== TEK_Aggregate
&& ReturnValue
.isNull()) {
5536 Address DestPtr
= CreateMemTemp(E
->getType(), "agg.tmp");
5537 ReturnValue
= ReturnValueSlot(DestPtr
, false);
5540 // Now see if we can emit a target-specific builtin.
5541 if (Value
*V
= EmitTargetBuiltinExpr(BuiltinID
, E
, ReturnValue
)) {
5544 if (V
->getType()->isVoidTy())
5545 return RValue::get(nullptr);
5546 return RValue::get(V
);
5548 return RValue::getAggregate(ReturnValue
.getValue(),
5549 ReturnValue
.isVolatile());
5551 llvm_unreachable("No current target builtin returns complex");
5553 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
5556 ErrorUnsupported(E
, "builtin function");
5558 // Unknown builtin, for now just dump it out and return undef.
5559 return GetUndefRValue(E
->getType());
5562 static Value
*EmitTargetArchBuiltinExpr(CodeGenFunction
*CGF
,
5563 unsigned BuiltinID
, const CallExpr
*E
,
5564 ReturnValueSlot ReturnValue
,
5565 llvm::Triple::ArchType Arch
) {
5567 case llvm::Triple::arm
:
5568 case llvm::Triple::armeb
:
5569 case llvm::Triple::thumb
:
5570 case llvm::Triple::thumbeb
:
5571 return CGF
->EmitARMBuiltinExpr(BuiltinID
, E
, ReturnValue
, Arch
);
5572 case llvm::Triple::aarch64
:
5573 case llvm::Triple::aarch64_32
:
5574 case llvm::Triple::aarch64_be
:
5575 return CGF
->EmitAArch64BuiltinExpr(BuiltinID
, E
, Arch
);
5576 case llvm::Triple::bpfeb
:
5577 case llvm::Triple::bpfel
:
5578 return CGF
->EmitBPFBuiltinExpr(BuiltinID
, E
);
5579 case llvm::Triple::x86
:
5580 case llvm::Triple::x86_64
:
5581 return CGF
->EmitX86BuiltinExpr(BuiltinID
, E
);
5582 case llvm::Triple::ppc
:
5583 case llvm::Triple::ppcle
:
5584 case llvm::Triple::ppc64
:
5585 case llvm::Triple::ppc64le
:
5586 return CGF
->EmitPPCBuiltinExpr(BuiltinID
, E
);
5587 case llvm::Triple::r600
:
5588 case llvm::Triple::amdgcn
:
5589 return CGF
->EmitAMDGPUBuiltinExpr(BuiltinID
, E
);
5590 case llvm::Triple::systemz
:
5591 return CGF
->EmitSystemZBuiltinExpr(BuiltinID
, E
);
5592 case llvm::Triple::nvptx
:
5593 case llvm::Triple::nvptx64
:
5594 return CGF
->EmitNVPTXBuiltinExpr(BuiltinID
, E
);
5595 case llvm::Triple::wasm32
:
5596 case llvm::Triple::wasm64
:
5597 return CGF
->EmitWebAssemblyBuiltinExpr(BuiltinID
, E
);
5598 case llvm::Triple::hexagon
:
5599 return CGF
->EmitHexagonBuiltinExpr(BuiltinID
, E
);
5600 case llvm::Triple::riscv32
:
5601 case llvm::Triple::riscv64
:
5602 return CGF
->EmitRISCVBuiltinExpr(BuiltinID
, E
, ReturnValue
);
5603 case llvm::Triple::loongarch32
:
5604 case llvm::Triple::loongarch64
:
5605 return CGF
->EmitLoongArchBuiltinExpr(BuiltinID
, E
);
5611 Value
*CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID
,
5613 ReturnValueSlot ReturnValue
) {
5614 if (getContext().BuiltinInfo
.isAuxBuiltinID(BuiltinID
)) {
5615 assert(getContext().getAuxTargetInfo() && "Missing aux target info");
5616 return EmitTargetArchBuiltinExpr(
5617 this, getContext().BuiltinInfo
.getAuxBuiltinID(BuiltinID
), E
,
5618 ReturnValue
, getContext().getAuxTargetInfo()->getTriple().getArch());
5621 return EmitTargetArchBuiltinExpr(this, BuiltinID
, E
, ReturnValue
,
5622 getTarget().getTriple().getArch());
5625 static llvm::FixedVectorType
*GetNeonType(CodeGenFunction
*CGF
,
5626 NeonTypeFlags TypeFlags
,
5627 bool HasLegalHalfType
= true,
5629 bool AllowBFloatArgsAndRet
= true) {
5630 int IsQuad
= TypeFlags
.isQuad();
5631 switch (TypeFlags
.getEltType()) {
5632 case NeonTypeFlags::Int8
:
5633 case NeonTypeFlags::Poly8
:
5634 return llvm::FixedVectorType::get(CGF
->Int8Ty
, V1Ty
? 1 : (8 << IsQuad
));
5635 case NeonTypeFlags::Int16
:
5636 case NeonTypeFlags::Poly16
:
5637 return llvm::FixedVectorType::get(CGF
->Int16Ty
, V1Ty
? 1 : (4 << IsQuad
));
5638 case NeonTypeFlags::BFloat16
:
5639 if (AllowBFloatArgsAndRet
)
5640 return llvm::FixedVectorType::get(CGF
->BFloatTy
, V1Ty
? 1 : (4 << IsQuad
));
5642 return llvm::FixedVectorType::get(CGF
->Int16Ty
, V1Ty
? 1 : (4 << IsQuad
));
5643 case NeonTypeFlags::Float16
:
5644 if (HasLegalHalfType
)
5645 return llvm::FixedVectorType::get(CGF
->HalfTy
, V1Ty
? 1 : (4 << IsQuad
));
5647 return llvm::FixedVectorType::get(CGF
->Int16Ty
, V1Ty
? 1 : (4 << IsQuad
));
5648 case NeonTypeFlags::Int32
:
5649 return llvm::FixedVectorType::get(CGF
->Int32Ty
, V1Ty
? 1 : (2 << IsQuad
));
5650 case NeonTypeFlags::Int64
:
5651 case NeonTypeFlags::Poly64
:
5652 return llvm::FixedVectorType::get(CGF
->Int64Ty
, V1Ty
? 1 : (1 << IsQuad
));
5653 case NeonTypeFlags::Poly128
:
5654 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
5655 // There is a lot of i128 and f128 API missing.
5656 // so we use v16i8 to represent poly128 and get pattern matched.
5657 return llvm::FixedVectorType::get(CGF
->Int8Ty
, 16);
5658 case NeonTypeFlags::Float32
:
5659 return llvm::FixedVectorType::get(CGF
->FloatTy
, V1Ty
? 1 : (2 << IsQuad
));
5660 case NeonTypeFlags::Float64
:
5661 return llvm::FixedVectorType::get(CGF
->DoubleTy
, V1Ty
? 1 : (1 << IsQuad
));
5663 llvm_unreachable("Unknown vector element type!");
5666 static llvm::VectorType
*GetFloatNeonType(CodeGenFunction
*CGF
,
5667 NeonTypeFlags IntTypeFlags
) {
5668 int IsQuad
= IntTypeFlags
.isQuad();
5669 switch (IntTypeFlags
.getEltType()) {
5670 case NeonTypeFlags::Int16
:
5671 return llvm::FixedVectorType::get(CGF
->HalfTy
, (4 << IsQuad
));
5672 case NeonTypeFlags::Int32
:
5673 return llvm::FixedVectorType::get(CGF
->FloatTy
, (2 << IsQuad
));
5674 case NeonTypeFlags::Int64
:
5675 return llvm::FixedVectorType::get(CGF
->DoubleTy
, (1 << IsQuad
));
5677 llvm_unreachable("Type can't be converted to floating-point!");
5681 Value
*CodeGenFunction::EmitNeonSplat(Value
*V
, Constant
*C
,
5682 const ElementCount
&Count
) {
5683 Value
*SV
= llvm::ConstantVector::getSplat(Count
, C
);
5684 return Builder
.CreateShuffleVector(V
, V
, SV
, "lane");
5687 Value
*CodeGenFunction::EmitNeonSplat(Value
*V
, Constant
*C
) {
5688 ElementCount EC
= cast
<llvm::VectorType
>(V
->getType())->getElementCount();
5689 return EmitNeonSplat(V
, C
, EC
);
5692 Value
*CodeGenFunction::EmitNeonCall(Function
*F
, SmallVectorImpl
<Value
*> &Ops
,
5694 unsigned shift
, bool rightshift
) {
5696 for (Function::const_arg_iterator ai
= F
->arg_begin(), ae
= F
->arg_end();
5697 ai
!= ae
; ++ai
, ++j
) {
5698 if (F
->isConstrainedFPIntrinsic())
5699 if (ai
->getType()->isMetadataTy())
5701 if (shift
> 0 && shift
== j
)
5702 Ops
[j
] = EmitNeonShiftVector(Ops
[j
], ai
->getType(), rightshift
);
5704 Ops
[j
] = Builder
.CreateBitCast(Ops
[j
], ai
->getType(), name
);
5707 if (F
->isConstrainedFPIntrinsic())
5708 return Builder
.CreateConstrainedFPCall(F
, Ops
, name
);
5710 return Builder
.CreateCall(F
, Ops
, name
);
5713 Value
*CodeGenFunction::EmitNeonShiftVector(Value
*V
, llvm::Type
*Ty
,
5715 int SV
= cast
<ConstantInt
>(V
)->getSExtValue();
5716 return ConstantInt::get(Ty
, neg
? -SV
: SV
);
5719 // Right-shift a vector by a constant.
5720 Value
*CodeGenFunction::EmitNeonRShiftImm(Value
*Vec
, Value
*Shift
,
5721 llvm::Type
*Ty
, bool usgn
,
5723 llvm::VectorType
*VTy
= cast
<llvm::VectorType
>(Ty
);
5725 int ShiftAmt
= cast
<ConstantInt
>(Shift
)->getSExtValue();
5726 int EltSize
= VTy
->getScalarSizeInBits();
5728 Vec
= Builder
.CreateBitCast(Vec
, Ty
);
5730 // lshr/ashr are undefined when the shift amount is equal to the vector
5732 if (ShiftAmt
== EltSize
) {
5734 // Right-shifting an unsigned value by its size yields 0.
5735 return llvm::ConstantAggregateZero::get(VTy
);
5737 // Right-shifting a signed value by its size is equivalent
5738 // to a shift of size-1.
5740 Shift
= ConstantInt::get(VTy
->getElementType(), ShiftAmt
);
5744 Shift
= EmitNeonShiftVector(Shift
, Ty
, false);
5746 return Builder
.CreateLShr(Vec
, Shift
, name
);
5748 return Builder
.CreateAShr(Vec
, Shift
, name
);
5752 AddRetType
= (1 << 0),
5753 Add1ArgType
= (1 << 1),
5754 Add2ArgTypes
= (1 << 2),
5756 VectorizeRetType
= (1 << 3),
5757 VectorizeArgTypes
= (1 << 4),
5759 InventFloatType
= (1 << 5),
5760 UnsignedAlts
= (1 << 6),
5762 Use64BitVectors
= (1 << 7),
5763 Use128BitVectors
= (1 << 8),
5765 Vectorize1ArgType
= Add1ArgType
| VectorizeArgTypes
,
5766 VectorRet
= AddRetType
| VectorizeRetType
,
5767 VectorRetGetArgs01
=
5768 AddRetType
| Add2ArgTypes
| VectorizeRetType
| VectorizeArgTypes
,
5770 AddRetType
| VectorizeRetType
| Add1ArgType
| InventFloatType
5774 struct ARMVectorIntrinsicInfo
{
5775 const char *NameHint
;
5777 unsigned LLVMIntrinsic
;
5778 unsigned AltLLVMIntrinsic
;
5779 uint64_t TypeModifier
;
5781 bool operator<(unsigned RHSBuiltinID
) const {
5782 return BuiltinID
< RHSBuiltinID
;
5784 bool operator<(const ARMVectorIntrinsicInfo
&TE
) const {
5785 return BuiltinID
< TE
.BuiltinID
;
5788 } // end anonymous namespace
5790 #define NEONMAP0(NameBase) \
5791 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
5793 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
5794 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
5795 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
5797 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
5798 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
5799 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
5802 static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap
[] = {
5803 NEONMAP1(__a32_vcvt_bf16_f32
, arm_neon_vcvtfp2bf
, 0),
5804 NEONMAP0(splat_lane_v
),
5805 NEONMAP0(splat_laneq_v
),
5806 NEONMAP0(splatq_lane_v
),
5807 NEONMAP0(splatq_laneq_v
),
5808 NEONMAP2(vabd_v
, arm_neon_vabdu
, arm_neon_vabds
, Add1ArgType
| UnsignedAlts
),
5809 NEONMAP2(vabdq_v
, arm_neon_vabdu
, arm_neon_vabds
, Add1ArgType
| UnsignedAlts
),
5810 NEONMAP1(vabs_v
, arm_neon_vabs
, 0),
5811 NEONMAP1(vabsq_v
, arm_neon_vabs
, 0),
5815 NEONMAP1(vaesdq_u8
, arm_neon_aesd
, 0),
5816 NEONMAP1(vaeseq_u8
, arm_neon_aese
, 0),
5817 NEONMAP1(vaesimcq_u8
, arm_neon_aesimc
, 0),
5818 NEONMAP1(vaesmcq_u8
, arm_neon_aesmc
, 0),
5819 NEONMAP1(vbfdot_f32
, arm_neon_bfdot
, 0),
5820 NEONMAP1(vbfdotq_f32
, arm_neon_bfdot
, 0),
5821 NEONMAP1(vbfmlalbq_f32
, arm_neon_bfmlalb
, 0),
5822 NEONMAP1(vbfmlaltq_f32
, arm_neon_bfmlalt
, 0),
5823 NEONMAP1(vbfmmlaq_f32
, arm_neon_bfmmla
, 0),
5824 NEONMAP1(vbsl_v
, arm_neon_vbsl
, AddRetType
),
5825 NEONMAP1(vbslq_v
, arm_neon_vbsl
, AddRetType
),
5826 NEONMAP1(vcadd_rot270_f16
, arm_neon_vcadd_rot270
, Add1ArgType
),
5827 NEONMAP1(vcadd_rot270_f32
, arm_neon_vcadd_rot270
, Add1ArgType
),
5828 NEONMAP1(vcadd_rot90_f16
, arm_neon_vcadd_rot90
, Add1ArgType
),
5829 NEONMAP1(vcadd_rot90_f32
, arm_neon_vcadd_rot90
, Add1ArgType
),
5830 NEONMAP1(vcaddq_rot270_f16
, arm_neon_vcadd_rot270
, Add1ArgType
),
5831 NEONMAP1(vcaddq_rot270_f32
, arm_neon_vcadd_rot270
, Add1ArgType
),
5832 NEONMAP1(vcaddq_rot270_f64
, arm_neon_vcadd_rot270
, Add1ArgType
),
5833 NEONMAP1(vcaddq_rot90_f16
, arm_neon_vcadd_rot90
, Add1ArgType
),
5834 NEONMAP1(vcaddq_rot90_f32
, arm_neon_vcadd_rot90
, Add1ArgType
),
5835 NEONMAP1(vcaddq_rot90_f64
, arm_neon_vcadd_rot90
, Add1ArgType
),
5836 NEONMAP1(vcage_v
, arm_neon_vacge
, 0),
5837 NEONMAP1(vcageq_v
, arm_neon_vacge
, 0),
5838 NEONMAP1(vcagt_v
, arm_neon_vacgt
, 0),
5839 NEONMAP1(vcagtq_v
, arm_neon_vacgt
, 0),
5840 NEONMAP1(vcale_v
, arm_neon_vacge
, 0),
5841 NEONMAP1(vcaleq_v
, arm_neon_vacge
, 0),
5842 NEONMAP1(vcalt_v
, arm_neon_vacgt
, 0),
5843 NEONMAP1(vcaltq_v
, arm_neon_vacgt
, 0),
5852 NEONMAP1(vcls_v
, arm_neon_vcls
, Add1ArgType
),
5853 NEONMAP1(vclsq_v
, arm_neon_vcls
, Add1ArgType
),
5856 NEONMAP1(vclz_v
, ctlz
, Add1ArgType
),
5857 NEONMAP1(vclzq_v
, ctlz
, Add1ArgType
),
5858 NEONMAP1(vcnt_v
, ctpop
, Add1ArgType
),
5859 NEONMAP1(vcntq_v
, ctpop
, Add1ArgType
),
5860 NEONMAP1(vcvt_f16_f32
, arm_neon_vcvtfp2hf
, 0),
5861 NEONMAP0(vcvt_f16_s16
),
5862 NEONMAP0(vcvt_f16_u16
),
5863 NEONMAP1(vcvt_f32_f16
, arm_neon_vcvthf2fp
, 0),
5864 NEONMAP0(vcvt_f32_v
),
5865 NEONMAP1(vcvt_n_f16_s16
, arm_neon_vcvtfxs2fp
, 0),
5866 NEONMAP1(vcvt_n_f16_u16
, arm_neon_vcvtfxu2fp
, 0),
5867 NEONMAP2(vcvt_n_f32_v
, arm_neon_vcvtfxu2fp
, arm_neon_vcvtfxs2fp
, 0),
5868 NEONMAP1(vcvt_n_s16_f16
, arm_neon_vcvtfp2fxs
, 0),
5869 NEONMAP1(vcvt_n_s32_v
, arm_neon_vcvtfp2fxs
, 0),
5870 NEONMAP1(vcvt_n_s64_v
, arm_neon_vcvtfp2fxs
, 0),
5871 NEONMAP1(vcvt_n_u16_f16
, arm_neon_vcvtfp2fxu
, 0),
5872 NEONMAP1(vcvt_n_u32_v
, arm_neon_vcvtfp2fxu
, 0),
5873 NEONMAP1(vcvt_n_u64_v
, arm_neon_vcvtfp2fxu
, 0),
5874 NEONMAP0(vcvt_s16_f16
),
5875 NEONMAP0(vcvt_s32_v
),
5876 NEONMAP0(vcvt_s64_v
),
5877 NEONMAP0(vcvt_u16_f16
),
5878 NEONMAP0(vcvt_u32_v
),
5879 NEONMAP0(vcvt_u64_v
),
5880 NEONMAP1(vcvta_s16_f16
, arm_neon_vcvtas
, 0),
5881 NEONMAP1(vcvta_s32_v
, arm_neon_vcvtas
, 0),
5882 NEONMAP1(vcvta_s64_v
, arm_neon_vcvtas
, 0),
5883 NEONMAP1(vcvta_u16_f16
, arm_neon_vcvtau
, 0),
5884 NEONMAP1(vcvta_u32_v
, arm_neon_vcvtau
, 0),
5885 NEONMAP1(vcvta_u64_v
, arm_neon_vcvtau
, 0),
5886 NEONMAP1(vcvtaq_s16_f16
, arm_neon_vcvtas
, 0),
5887 NEONMAP1(vcvtaq_s32_v
, arm_neon_vcvtas
, 0),
5888 NEONMAP1(vcvtaq_s64_v
, arm_neon_vcvtas
, 0),
5889 NEONMAP1(vcvtaq_u16_f16
, arm_neon_vcvtau
, 0),
5890 NEONMAP1(vcvtaq_u32_v
, arm_neon_vcvtau
, 0),
5891 NEONMAP1(vcvtaq_u64_v
, arm_neon_vcvtau
, 0),
5892 NEONMAP1(vcvth_bf16_f32
, arm_neon_vcvtbfp2bf
, 0),
5893 NEONMAP1(vcvtm_s16_f16
, arm_neon_vcvtms
, 0),
5894 NEONMAP1(vcvtm_s32_v
, arm_neon_vcvtms
, 0),
5895 NEONMAP1(vcvtm_s64_v
, arm_neon_vcvtms
, 0),
5896 NEONMAP1(vcvtm_u16_f16
, arm_neon_vcvtmu
, 0),
5897 NEONMAP1(vcvtm_u32_v
, arm_neon_vcvtmu
, 0),
5898 NEONMAP1(vcvtm_u64_v
, arm_neon_vcvtmu
, 0),
5899 NEONMAP1(vcvtmq_s16_f16
, arm_neon_vcvtms
, 0),
5900 NEONMAP1(vcvtmq_s32_v
, arm_neon_vcvtms
, 0),
5901 NEONMAP1(vcvtmq_s64_v
, arm_neon_vcvtms
, 0),
5902 NEONMAP1(vcvtmq_u16_f16
, arm_neon_vcvtmu
, 0),
5903 NEONMAP1(vcvtmq_u32_v
, arm_neon_vcvtmu
, 0),
5904 NEONMAP1(vcvtmq_u64_v
, arm_neon_vcvtmu
, 0),
5905 NEONMAP1(vcvtn_s16_f16
, arm_neon_vcvtns
, 0),
5906 NEONMAP1(vcvtn_s32_v
, arm_neon_vcvtns
, 0),
5907 NEONMAP1(vcvtn_s64_v
, arm_neon_vcvtns
, 0),
5908 NEONMAP1(vcvtn_u16_f16
, arm_neon_vcvtnu
, 0),
5909 NEONMAP1(vcvtn_u32_v
, arm_neon_vcvtnu
, 0),
5910 NEONMAP1(vcvtn_u64_v
, arm_neon_vcvtnu
, 0),
5911 NEONMAP1(vcvtnq_s16_f16
, arm_neon_vcvtns
, 0),
5912 NEONMAP1(vcvtnq_s32_v
, arm_neon_vcvtns
, 0),
5913 NEONMAP1(vcvtnq_s64_v
, arm_neon_vcvtns
, 0),
5914 NEONMAP1(vcvtnq_u16_f16
, arm_neon_vcvtnu
, 0),
5915 NEONMAP1(vcvtnq_u32_v
, arm_neon_vcvtnu
, 0),
5916 NEONMAP1(vcvtnq_u64_v
, arm_neon_vcvtnu
, 0),
5917 NEONMAP1(vcvtp_s16_f16
, arm_neon_vcvtps
, 0),
5918 NEONMAP1(vcvtp_s32_v
, arm_neon_vcvtps
, 0),
5919 NEONMAP1(vcvtp_s64_v
, arm_neon_vcvtps
, 0),
5920 NEONMAP1(vcvtp_u16_f16
, arm_neon_vcvtpu
, 0),
5921 NEONMAP1(vcvtp_u32_v
, arm_neon_vcvtpu
, 0),
5922 NEONMAP1(vcvtp_u64_v
, arm_neon_vcvtpu
, 0),
5923 NEONMAP1(vcvtpq_s16_f16
, arm_neon_vcvtps
, 0),
5924 NEONMAP1(vcvtpq_s32_v
, arm_neon_vcvtps
, 0),
5925 NEONMAP1(vcvtpq_s64_v
, arm_neon_vcvtps
, 0),
5926 NEONMAP1(vcvtpq_u16_f16
, arm_neon_vcvtpu
, 0),
5927 NEONMAP1(vcvtpq_u32_v
, arm_neon_vcvtpu
, 0),
5928 NEONMAP1(vcvtpq_u64_v
, arm_neon_vcvtpu
, 0),
5929 NEONMAP0(vcvtq_f16_s16
),
5930 NEONMAP0(vcvtq_f16_u16
),
5931 NEONMAP0(vcvtq_f32_v
),
5932 NEONMAP1(vcvtq_n_f16_s16
, arm_neon_vcvtfxs2fp
, 0),
5933 NEONMAP1(vcvtq_n_f16_u16
, arm_neon_vcvtfxu2fp
, 0),
5934 NEONMAP2(vcvtq_n_f32_v
, arm_neon_vcvtfxu2fp
, arm_neon_vcvtfxs2fp
, 0),
5935 NEONMAP1(vcvtq_n_s16_f16
, arm_neon_vcvtfp2fxs
, 0),
5936 NEONMAP1(vcvtq_n_s32_v
, arm_neon_vcvtfp2fxs
, 0),
5937 NEONMAP1(vcvtq_n_s64_v
, arm_neon_vcvtfp2fxs
, 0),
5938 NEONMAP1(vcvtq_n_u16_f16
, arm_neon_vcvtfp2fxu
, 0),
5939 NEONMAP1(vcvtq_n_u32_v
, arm_neon_vcvtfp2fxu
, 0),
5940 NEONMAP1(vcvtq_n_u64_v
, arm_neon_vcvtfp2fxu
, 0),
5941 NEONMAP0(vcvtq_s16_f16
),
5942 NEONMAP0(vcvtq_s32_v
),
5943 NEONMAP0(vcvtq_s64_v
),
5944 NEONMAP0(vcvtq_u16_f16
),
5945 NEONMAP0(vcvtq_u32_v
),
5946 NEONMAP0(vcvtq_u64_v
),
5947 NEONMAP1(vdot_s32
, arm_neon_sdot
, 0),
5948 NEONMAP1(vdot_u32
, arm_neon_udot
, 0),
5949 NEONMAP1(vdotq_s32
, arm_neon_sdot
, 0),
5950 NEONMAP1(vdotq_u32
, arm_neon_udot
, 0),
5955 NEONMAP2(vhadd_v
, arm_neon_vhaddu
, arm_neon_vhadds
, Add1ArgType
| UnsignedAlts
),
5956 NEONMAP2(vhaddq_v
, arm_neon_vhaddu
, arm_neon_vhadds
, Add1ArgType
| UnsignedAlts
),
5957 NEONMAP2(vhsub_v
, arm_neon_vhsubu
, arm_neon_vhsubs
, Add1ArgType
| UnsignedAlts
),
5958 NEONMAP2(vhsubq_v
, arm_neon_vhsubu
, arm_neon_vhsubs
, Add1ArgType
| UnsignedAlts
),
5959 NEONMAP0(vld1_dup_v
),
5960 NEONMAP1(vld1_v
, arm_neon_vld1
, 0),
5961 NEONMAP1(vld1_x2_v
, arm_neon_vld1x2
, 0),
5962 NEONMAP1(vld1_x3_v
, arm_neon_vld1x3
, 0),
5963 NEONMAP1(vld1_x4_v
, arm_neon_vld1x4
, 0),
5964 NEONMAP0(vld1q_dup_v
),
5965 NEONMAP1(vld1q_v
, arm_neon_vld1
, 0),
5966 NEONMAP1(vld1q_x2_v
, arm_neon_vld1x2
, 0),
5967 NEONMAP1(vld1q_x3_v
, arm_neon_vld1x3
, 0),
5968 NEONMAP1(vld1q_x4_v
, arm_neon_vld1x4
, 0),
5969 NEONMAP1(vld2_dup_v
, arm_neon_vld2dup
, 0),
5970 NEONMAP1(vld2_lane_v
, arm_neon_vld2lane
, 0),
5971 NEONMAP1(vld2_v
, arm_neon_vld2
, 0),
5972 NEONMAP1(vld2q_dup_v
, arm_neon_vld2dup
, 0),
5973 NEONMAP1(vld2q_lane_v
, arm_neon_vld2lane
, 0),
5974 NEONMAP1(vld2q_v
, arm_neon_vld2
, 0),
5975 NEONMAP1(vld3_dup_v
, arm_neon_vld3dup
, 0),
5976 NEONMAP1(vld3_lane_v
, arm_neon_vld3lane
, 0),
5977 NEONMAP1(vld3_v
, arm_neon_vld3
, 0),
5978 NEONMAP1(vld3q_dup_v
, arm_neon_vld3dup
, 0),
5979 NEONMAP1(vld3q_lane_v
, arm_neon_vld3lane
, 0),
5980 NEONMAP1(vld3q_v
, arm_neon_vld3
, 0),
5981 NEONMAP1(vld4_dup_v
, arm_neon_vld4dup
, 0),
5982 NEONMAP1(vld4_lane_v
, arm_neon_vld4lane
, 0),
5983 NEONMAP1(vld4_v
, arm_neon_vld4
, 0),
5984 NEONMAP1(vld4q_dup_v
, arm_neon_vld4dup
, 0),
5985 NEONMAP1(vld4q_lane_v
, arm_neon_vld4lane
, 0),
5986 NEONMAP1(vld4q_v
, arm_neon_vld4
, 0),
5987 NEONMAP2(vmax_v
, arm_neon_vmaxu
, arm_neon_vmaxs
, Add1ArgType
| UnsignedAlts
),
5988 NEONMAP1(vmaxnm_v
, arm_neon_vmaxnm
, Add1ArgType
),
5989 NEONMAP1(vmaxnmq_v
, arm_neon_vmaxnm
, Add1ArgType
),
5990 NEONMAP2(vmaxq_v
, arm_neon_vmaxu
, arm_neon_vmaxs
, Add1ArgType
| UnsignedAlts
),
5991 NEONMAP2(vmin_v
, arm_neon_vminu
, arm_neon_vmins
, Add1ArgType
| UnsignedAlts
),
5992 NEONMAP1(vminnm_v
, arm_neon_vminnm
, Add1ArgType
),
5993 NEONMAP1(vminnmq_v
, arm_neon_vminnm
, Add1ArgType
),
5994 NEONMAP2(vminq_v
, arm_neon_vminu
, arm_neon_vmins
, Add1ArgType
| UnsignedAlts
),
5995 NEONMAP1(vmmlaq_s32
, arm_neon_smmla
, 0),
5996 NEONMAP1(vmmlaq_u32
, arm_neon_ummla
, 0),
5999 NEONMAP1(vmul_v
, arm_neon_vmulp
, Add1ArgType
),
6001 NEONMAP1(vmulq_v
, arm_neon_vmulp
, Add1ArgType
),
6002 NEONMAP2(vpadal_v
, arm_neon_vpadalu
, arm_neon_vpadals
, UnsignedAlts
),
6003 NEONMAP2(vpadalq_v
, arm_neon_vpadalu
, arm_neon_vpadals
, UnsignedAlts
),
6004 NEONMAP1(vpadd_v
, arm_neon_vpadd
, Add1ArgType
),
6005 NEONMAP2(vpaddl_v
, arm_neon_vpaddlu
, arm_neon_vpaddls
, UnsignedAlts
),
6006 NEONMAP2(vpaddlq_v
, arm_neon_vpaddlu
, arm_neon_vpaddls
, UnsignedAlts
),
6007 NEONMAP1(vpaddq_v
, arm_neon_vpadd
, Add1ArgType
),
6008 NEONMAP2(vpmax_v
, arm_neon_vpmaxu
, arm_neon_vpmaxs
, Add1ArgType
| UnsignedAlts
),
6009 NEONMAP2(vpmin_v
, arm_neon_vpminu
, arm_neon_vpmins
, Add1ArgType
| UnsignedAlts
),
6010 NEONMAP1(vqabs_v
, arm_neon_vqabs
, Add1ArgType
),
6011 NEONMAP1(vqabsq_v
, arm_neon_vqabs
, Add1ArgType
),
6012 NEONMAP2(vqadd_v
, uadd_sat
, sadd_sat
, Add1ArgType
| UnsignedAlts
),
6013 NEONMAP2(vqaddq_v
, uadd_sat
, sadd_sat
, Add1ArgType
| UnsignedAlts
),
6014 NEONMAP2(vqdmlal_v
, arm_neon_vqdmull
, sadd_sat
, 0),
6015 NEONMAP2(vqdmlsl_v
, arm_neon_vqdmull
, ssub_sat
, 0),
6016 NEONMAP1(vqdmulh_v
, arm_neon_vqdmulh
, Add1ArgType
),
6017 NEONMAP1(vqdmulhq_v
, arm_neon_vqdmulh
, Add1ArgType
),
6018 NEONMAP1(vqdmull_v
, arm_neon_vqdmull
, Add1ArgType
),
6019 NEONMAP2(vqmovn_v
, arm_neon_vqmovnu
, arm_neon_vqmovns
, Add1ArgType
| UnsignedAlts
),
6020 NEONMAP1(vqmovun_v
, arm_neon_vqmovnsu
, Add1ArgType
),
6021 NEONMAP1(vqneg_v
, arm_neon_vqneg
, Add1ArgType
),
6022 NEONMAP1(vqnegq_v
, arm_neon_vqneg
, Add1ArgType
),
6023 NEONMAP1(vqrdmlah_s16
, arm_neon_vqrdmlah
, Add1ArgType
),
6024 NEONMAP1(vqrdmlah_s32
, arm_neon_vqrdmlah
, Add1ArgType
),
6025 NEONMAP1(vqrdmlahq_s16
, arm_neon_vqrdmlah
, Add1ArgType
),
6026 NEONMAP1(vqrdmlahq_s32
, arm_neon_vqrdmlah
, Add1ArgType
),
6027 NEONMAP1(vqrdmlsh_s16
, arm_neon_vqrdmlsh
, Add1ArgType
),
6028 NEONMAP1(vqrdmlsh_s32
, arm_neon_vqrdmlsh
, Add1ArgType
),
6029 NEONMAP1(vqrdmlshq_s16
, arm_neon_vqrdmlsh
, Add1ArgType
),
6030 NEONMAP1(vqrdmlshq_s32
, arm_neon_vqrdmlsh
, Add1ArgType
),
6031 NEONMAP1(vqrdmulh_v
, arm_neon_vqrdmulh
, Add1ArgType
),
6032 NEONMAP1(vqrdmulhq_v
, arm_neon_vqrdmulh
, Add1ArgType
),
6033 NEONMAP2(vqrshl_v
, arm_neon_vqrshiftu
, arm_neon_vqrshifts
, Add1ArgType
| UnsignedAlts
),
6034 NEONMAP2(vqrshlq_v
, arm_neon_vqrshiftu
, arm_neon_vqrshifts
, Add1ArgType
| UnsignedAlts
),
6035 NEONMAP2(vqshl_n_v
, arm_neon_vqshiftu
, arm_neon_vqshifts
, UnsignedAlts
),
6036 NEONMAP2(vqshl_v
, arm_neon_vqshiftu
, arm_neon_vqshifts
, Add1ArgType
| UnsignedAlts
),
6037 NEONMAP2(vqshlq_n_v
, arm_neon_vqshiftu
, arm_neon_vqshifts
, UnsignedAlts
),
6038 NEONMAP2(vqshlq_v
, arm_neon_vqshiftu
, arm_neon_vqshifts
, Add1ArgType
| UnsignedAlts
),
6039 NEONMAP1(vqshlu_n_v
, arm_neon_vqshiftsu
, 0),
6040 NEONMAP1(vqshluq_n_v
, arm_neon_vqshiftsu
, 0),
6041 NEONMAP2(vqsub_v
, usub_sat
, ssub_sat
, Add1ArgType
| UnsignedAlts
),
6042 NEONMAP2(vqsubq_v
, usub_sat
, ssub_sat
, Add1ArgType
| UnsignedAlts
),
6043 NEONMAP1(vraddhn_v
, arm_neon_vraddhn
, Add1ArgType
),
6044 NEONMAP2(vrecpe_v
, arm_neon_vrecpe
, arm_neon_vrecpe
, 0),
6045 NEONMAP2(vrecpeq_v
, arm_neon_vrecpe
, arm_neon_vrecpe
, 0),
6046 NEONMAP1(vrecps_v
, arm_neon_vrecps
, Add1ArgType
),
6047 NEONMAP1(vrecpsq_v
, arm_neon_vrecps
, Add1ArgType
),
6048 NEONMAP2(vrhadd_v
, arm_neon_vrhaddu
, arm_neon_vrhadds
, Add1ArgType
| UnsignedAlts
),
6049 NEONMAP2(vrhaddq_v
, arm_neon_vrhaddu
, arm_neon_vrhadds
, Add1ArgType
| UnsignedAlts
),
6050 NEONMAP1(vrnd_v
, arm_neon_vrintz
, Add1ArgType
),
6051 NEONMAP1(vrnda_v
, arm_neon_vrinta
, Add1ArgType
),
6052 NEONMAP1(vrndaq_v
, arm_neon_vrinta
, Add1ArgType
),
6055 NEONMAP1(vrndm_v
, arm_neon_vrintm
, Add1ArgType
),
6056 NEONMAP1(vrndmq_v
, arm_neon_vrintm
, Add1ArgType
),
6057 NEONMAP1(vrndn_v
, arm_neon_vrintn
, Add1ArgType
),
6058 NEONMAP1(vrndnq_v
, arm_neon_vrintn
, Add1ArgType
),
6059 NEONMAP1(vrndp_v
, arm_neon_vrintp
, Add1ArgType
),
6060 NEONMAP1(vrndpq_v
, arm_neon_vrintp
, Add1ArgType
),
6061 NEONMAP1(vrndq_v
, arm_neon_vrintz
, Add1ArgType
),
6062 NEONMAP1(vrndx_v
, arm_neon_vrintx
, Add1ArgType
),
6063 NEONMAP1(vrndxq_v
, arm_neon_vrintx
, Add1ArgType
),
6064 NEONMAP2(vrshl_v
, arm_neon_vrshiftu
, arm_neon_vrshifts
, Add1ArgType
| UnsignedAlts
),
6065 NEONMAP2(vrshlq_v
, arm_neon_vrshiftu
, arm_neon_vrshifts
, Add1ArgType
| UnsignedAlts
),
6066 NEONMAP2(vrshr_n_v
, arm_neon_vrshiftu
, arm_neon_vrshifts
, UnsignedAlts
),
6067 NEONMAP2(vrshrq_n_v
, arm_neon_vrshiftu
, arm_neon_vrshifts
, UnsignedAlts
),
6068 NEONMAP2(vrsqrte_v
, arm_neon_vrsqrte
, arm_neon_vrsqrte
, 0),
6069 NEONMAP2(vrsqrteq_v
, arm_neon_vrsqrte
, arm_neon_vrsqrte
, 0),
6070 NEONMAP1(vrsqrts_v
, arm_neon_vrsqrts
, Add1ArgType
),
6071 NEONMAP1(vrsqrtsq_v
, arm_neon_vrsqrts
, Add1ArgType
),
6072 NEONMAP1(vrsubhn_v
, arm_neon_vrsubhn
, Add1ArgType
),
6073 NEONMAP1(vsha1su0q_u32
, arm_neon_sha1su0
, 0),
6074 NEONMAP1(vsha1su1q_u32
, arm_neon_sha1su1
, 0),
6075 NEONMAP1(vsha256h2q_u32
, arm_neon_sha256h2
, 0),
6076 NEONMAP1(vsha256hq_u32
, arm_neon_sha256h
, 0),
6077 NEONMAP1(vsha256su0q_u32
, arm_neon_sha256su0
, 0),
6078 NEONMAP1(vsha256su1q_u32
, arm_neon_sha256su1
, 0),
6080 NEONMAP2(vshl_v
, arm_neon_vshiftu
, arm_neon_vshifts
, Add1ArgType
| UnsignedAlts
),
6081 NEONMAP0(vshll_n_v
),
6082 NEONMAP0(vshlq_n_v
),
6083 NEONMAP2(vshlq_v
, arm_neon_vshiftu
, arm_neon_vshifts
, Add1ArgType
| UnsignedAlts
),
6085 NEONMAP0(vshrn_n_v
),
6086 NEONMAP0(vshrq_n_v
),
6087 NEONMAP1(vst1_v
, arm_neon_vst1
, 0),
6088 NEONMAP1(vst1_x2_v
, arm_neon_vst1x2
, 0),
6089 NEONMAP1(vst1_x3_v
, arm_neon_vst1x3
, 0),
6090 NEONMAP1(vst1_x4_v
, arm_neon_vst1x4
, 0),
6091 NEONMAP1(vst1q_v
, arm_neon_vst1
, 0),
6092 NEONMAP1(vst1q_x2_v
, arm_neon_vst1x2
, 0),
6093 NEONMAP1(vst1q_x3_v
, arm_neon_vst1x3
, 0),
6094 NEONMAP1(vst1q_x4_v
, arm_neon_vst1x4
, 0),
6095 NEONMAP1(vst2_lane_v
, arm_neon_vst2lane
, 0),
6096 NEONMAP1(vst2_v
, arm_neon_vst2
, 0),
6097 NEONMAP1(vst2q_lane_v
, arm_neon_vst2lane
, 0),
6098 NEONMAP1(vst2q_v
, arm_neon_vst2
, 0),
6099 NEONMAP1(vst3_lane_v
, arm_neon_vst3lane
, 0),
6100 NEONMAP1(vst3_v
, arm_neon_vst3
, 0),
6101 NEONMAP1(vst3q_lane_v
, arm_neon_vst3lane
, 0),
6102 NEONMAP1(vst3q_v
, arm_neon_vst3
, 0),
6103 NEONMAP1(vst4_lane_v
, arm_neon_vst4lane
, 0),
6104 NEONMAP1(vst4_v
, arm_neon_vst4
, 0),
6105 NEONMAP1(vst4q_lane_v
, arm_neon_vst4lane
, 0),
6106 NEONMAP1(vst4q_v
, arm_neon_vst4
, 0),
6112 NEONMAP1(vusdot_s32
, arm_neon_usdot
, 0),
6113 NEONMAP1(vusdotq_s32
, arm_neon_usdot
, 0),
6114 NEONMAP1(vusmmlaq_s32
, arm_neon_usmmla
, 0),
6121 static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap
[] = {
6122 NEONMAP1(__a64_vcvtq_low_bf16_f32
, aarch64_neon_bfcvtn
, 0),
6123 NEONMAP0(splat_lane_v
),
6124 NEONMAP0(splat_laneq_v
),
6125 NEONMAP0(splatq_lane_v
),
6126 NEONMAP0(splatq_laneq_v
),
6127 NEONMAP1(vabs_v
, aarch64_neon_abs
, 0),
6128 NEONMAP1(vabsq_v
, aarch64_neon_abs
, 0),
6131 NEONMAP0(vaddq_p128
),
6133 NEONMAP1(vaesdq_u8
, aarch64_crypto_aesd
, 0),
6134 NEONMAP1(vaeseq_u8
, aarch64_crypto_aese
, 0),
6135 NEONMAP1(vaesimcq_u8
, aarch64_crypto_aesimc
, 0),
6136 NEONMAP1(vaesmcq_u8
, aarch64_crypto_aesmc
, 0),
6137 NEONMAP2(vbcaxq_s16
, aarch64_crypto_bcaxu
, aarch64_crypto_bcaxs
, Add1ArgType
| UnsignedAlts
),
6138 NEONMAP2(vbcaxq_s32
, aarch64_crypto_bcaxu
, aarch64_crypto_bcaxs
, Add1ArgType
| UnsignedAlts
),
6139 NEONMAP2(vbcaxq_s64
, aarch64_crypto_bcaxu
, aarch64_crypto_bcaxs
, Add1ArgType
| UnsignedAlts
),
6140 NEONMAP2(vbcaxq_s8
, aarch64_crypto_bcaxu
, aarch64_crypto_bcaxs
, Add1ArgType
| UnsignedAlts
),
6141 NEONMAP2(vbcaxq_u16
, aarch64_crypto_bcaxu
, aarch64_crypto_bcaxs
, Add1ArgType
| UnsignedAlts
),
6142 NEONMAP2(vbcaxq_u32
, aarch64_crypto_bcaxu
, aarch64_crypto_bcaxs
, Add1ArgType
| UnsignedAlts
),
6143 NEONMAP2(vbcaxq_u64
, aarch64_crypto_bcaxu
, aarch64_crypto_bcaxs
, Add1ArgType
| UnsignedAlts
),
6144 NEONMAP2(vbcaxq_u8
, aarch64_crypto_bcaxu
, aarch64_crypto_bcaxs
, Add1ArgType
| UnsignedAlts
),
6145 NEONMAP1(vbfdot_f32
, aarch64_neon_bfdot
, 0),
6146 NEONMAP1(vbfdotq_f32
, aarch64_neon_bfdot
, 0),
6147 NEONMAP1(vbfmlalbq_f32
, aarch64_neon_bfmlalb
, 0),
6148 NEONMAP1(vbfmlaltq_f32
, aarch64_neon_bfmlalt
, 0),
6149 NEONMAP1(vbfmmlaq_f32
, aarch64_neon_bfmmla
, 0),
6150 NEONMAP1(vcadd_rot270_f16
, aarch64_neon_vcadd_rot270
, Add1ArgType
),
6151 NEONMAP1(vcadd_rot270_f32
, aarch64_neon_vcadd_rot270
, Add1ArgType
),
6152 NEONMAP1(vcadd_rot90_f16
, aarch64_neon_vcadd_rot90
, Add1ArgType
),
6153 NEONMAP1(vcadd_rot90_f32
, aarch64_neon_vcadd_rot90
, Add1ArgType
),
6154 NEONMAP1(vcaddq_rot270_f16
, aarch64_neon_vcadd_rot270
, Add1ArgType
),
6155 NEONMAP1(vcaddq_rot270_f32
, aarch64_neon_vcadd_rot270
, Add1ArgType
),
6156 NEONMAP1(vcaddq_rot270_f64
, aarch64_neon_vcadd_rot270
, Add1ArgType
),
6157 NEONMAP1(vcaddq_rot90_f16
, aarch64_neon_vcadd_rot90
, Add1ArgType
),
6158 NEONMAP1(vcaddq_rot90_f32
, aarch64_neon_vcadd_rot90
, Add1ArgType
),
6159 NEONMAP1(vcaddq_rot90_f64
, aarch64_neon_vcadd_rot90
, Add1ArgType
),
6160 NEONMAP1(vcage_v
, aarch64_neon_facge
, 0),
6161 NEONMAP1(vcageq_v
, aarch64_neon_facge
, 0),
6162 NEONMAP1(vcagt_v
, aarch64_neon_facgt
, 0),
6163 NEONMAP1(vcagtq_v
, aarch64_neon_facgt
, 0),
6164 NEONMAP1(vcale_v
, aarch64_neon_facge
, 0),
6165 NEONMAP1(vcaleq_v
, aarch64_neon_facge
, 0),
6166 NEONMAP1(vcalt_v
, aarch64_neon_facgt
, 0),
6167 NEONMAP1(vcaltq_v
, aarch64_neon_facgt
, 0),
6176 NEONMAP1(vcls_v
, aarch64_neon_cls
, Add1ArgType
),
6177 NEONMAP1(vclsq_v
, aarch64_neon_cls
, Add1ArgType
),
6180 NEONMAP1(vclz_v
, ctlz
, Add1ArgType
),
6181 NEONMAP1(vclzq_v
, ctlz
, Add1ArgType
),
6182 NEONMAP1(vcmla_f16
, aarch64_neon_vcmla_rot0
, Add1ArgType
),
6183 NEONMAP1(vcmla_f32
, aarch64_neon_vcmla_rot0
, Add1ArgType
),
6184 NEONMAP1(vcmla_rot180_f16
, aarch64_neon_vcmla_rot180
, Add1ArgType
),
6185 NEONMAP1(vcmla_rot180_f32
, aarch64_neon_vcmla_rot180
, Add1ArgType
),
6186 NEONMAP1(vcmla_rot270_f16
, aarch64_neon_vcmla_rot270
, Add1ArgType
),
6187 NEONMAP1(vcmla_rot270_f32
, aarch64_neon_vcmla_rot270
, Add1ArgType
),
6188 NEONMAP1(vcmla_rot90_f16
, aarch64_neon_vcmla_rot90
, Add1ArgType
),
6189 NEONMAP1(vcmla_rot90_f32
, aarch64_neon_vcmla_rot90
, Add1ArgType
),
6190 NEONMAP1(vcmlaq_f16
, aarch64_neon_vcmla_rot0
, Add1ArgType
),
6191 NEONMAP1(vcmlaq_f32
, aarch64_neon_vcmla_rot0
, Add1ArgType
),
6192 NEONMAP1(vcmlaq_f64
, aarch64_neon_vcmla_rot0
, Add1ArgType
),
6193 NEONMAP1(vcmlaq_rot180_f16
, aarch64_neon_vcmla_rot180
, Add1ArgType
),
6194 NEONMAP1(vcmlaq_rot180_f32
, aarch64_neon_vcmla_rot180
, Add1ArgType
),
6195 NEONMAP1(vcmlaq_rot180_f64
, aarch64_neon_vcmla_rot180
, Add1ArgType
),
6196 NEONMAP1(vcmlaq_rot270_f16
, aarch64_neon_vcmla_rot270
, Add1ArgType
),
6197 NEONMAP1(vcmlaq_rot270_f32
, aarch64_neon_vcmla_rot270
, Add1ArgType
),
6198 NEONMAP1(vcmlaq_rot270_f64
, aarch64_neon_vcmla_rot270
, Add1ArgType
),
6199 NEONMAP1(vcmlaq_rot90_f16
, aarch64_neon_vcmla_rot90
, Add1ArgType
),
6200 NEONMAP1(vcmlaq_rot90_f32
, aarch64_neon_vcmla_rot90
, Add1ArgType
),
6201 NEONMAP1(vcmlaq_rot90_f64
, aarch64_neon_vcmla_rot90
, Add1ArgType
),
6202 NEONMAP1(vcnt_v
, ctpop
, Add1ArgType
),
6203 NEONMAP1(vcntq_v
, ctpop
, Add1ArgType
),
6204 NEONMAP1(vcvt_f16_f32
, aarch64_neon_vcvtfp2hf
, 0),
6205 NEONMAP0(vcvt_f16_s16
),
6206 NEONMAP0(vcvt_f16_u16
),
6207 NEONMAP1(vcvt_f32_f16
, aarch64_neon_vcvthf2fp
, 0),
6208 NEONMAP0(vcvt_f32_v
),
6209 NEONMAP1(vcvt_n_f16_s16
, aarch64_neon_vcvtfxs2fp
, 0),
6210 NEONMAP1(vcvt_n_f16_u16
, aarch64_neon_vcvtfxu2fp
, 0),
6211 NEONMAP2(vcvt_n_f32_v
, aarch64_neon_vcvtfxu2fp
, aarch64_neon_vcvtfxs2fp
, 0),
6212 NEONMAP2(vcvt_n_f64_v
, aarch64_neon_vcvtfxu2fp
, aarch64_neon_vcvtfxs2fp
, 0),
6213 NEONMAP1(vcvt_n_s16_f16
, aarch64_neon_vcvtfp2fxs
, 0),
6214 NEONMAP1(vcvt_n_s32_v
, aarch64_neon_vcvtfp2fxs
, 0),
6215 NEONMAP1(vcvt_n_s64_v
, aarch64_neon_vcvtfp2fxs
, 0),
6216 NEONMAP1(vcvt_n_u16_f16
, aarch64_neon_vcvtfp2fxu
, 0),
6217 NEONMAP1(vcvt_n_u32_v
, aarch64_neon_vcvtfp2fxu
, 0),
6218 NEONMAP1(vcvt_n_u64_v
, aarch64_neon_vcvtfp2fxu
, 0),
6219 NEONMAP0(vcvtq_f16_s16
),
6220 NEONMAP0(vcvtq_f16_u16
),
6221 NEONMAP0(vcvtq_f32_v
),
6222 NEONMAP1(vcvtq_high_bf16_f32
, aarch64_neon_bfcvtn2
, 0),
6223 NEONMAP1(vcvtq_n_f16_s16
, aarch64_neon_vcvtfxs2fp
, 0),
6224 NEONMAP1(vcvtq_n_f16_u16
, aarch64_neon_vcvtfxu2fp
, 0),
6225 NEONMAP2(vcvtq_n_f32_v
, aarch64_neon_vcvtfxu2fp
, aarch64_neon_vcvtfxs2fp
, 0),
6226 NEONMAP2(vcvtq_n_f64_v
, aarch64_neon_vcvtfxu2fp
, aarch64_neon_vcvtfxs2fp
, 0),
6227 NEONMAP1(vcvtq_n_s16_f16
, aarch64_neon_vcvtfp2fxs
, 0),
6228 NEONMAP1(vcvtq_n_s32_v
, aarch64_neon_vcvtfp2fxs
, 0),
6229 NEONMAP1(vcvtq_n_s64_v
, aarch64_neon_vcvtfp2fxs
, 0),
6230 NEONMAP1(vcvtq_n_u16_f16
, aarch64_neon_vcvtfp2fxu
, 0),
6231 NEONMAP1(vcvtq_n_u32_v
, aarch64_neon_vcvtfp2fxu
, 0),
6232 NEONMAP1(vcvtq_n_u64_v
, aarch64_neon_vcvtfp2fxu
, 0),
6233 NEONMAP1(vcvtx_f32_v
, aarch64_neon_fcvtxn
, AddRetType
| Add1ArgType
),
6234 NEONMAP1(vdot_s32
, aarch64_neon_sdot
, 0),
6235 NEONMAP1(vdot_u32
, aarch64_neon_udot
, 0),
6236 NEONMAP1(vdotq_s32
, aarch64_neon_sdot
, 0),
6237 NEONMAP1(vdotq_u32
, aarch64_neon_udot
, 0),
6238 NEONMAP2(veor3q_s16
, aarch64_crypto_eor3u
, aarch64_crypto_eor3s
, Add1ArgType
| UnsignedAlts
),
6239 NEONMAP2(veor3q_s32
, aarch64_crypto_eor3u
, aarch64_crypto_eor3s
, Add1ArgType
| UnsignedAlts
),
6240 NEONMAP2(veor3q_s64
, aarch64_crypto_eor3u
, aarch64_crypto_eor3s
, Add1ArgType
| UnsignedAlts
),
6241 NEONMAP2(veor3q_s8
, aarch64_crypto_eor3u
, aarch64_crypto_eor3s
, Add1ArgType
| UnsignedAlts
),
6242 NEONMAP2(veor3q_u16
, aarch64_crypto_eor3u
, aarch64_crypto_eor3s
, Add1ArgType
| UnsignedAlts
),
6243 NEONMAP2(veor3q_u32
, aarch64_crypto_eor3u
, aarch64_crypto_eor3s
, Add1ArgType
| UnsignedAlts
),
6244 NEONMAP2(veor3q_u64
, aarch64_crypto_eor3u
, aarch64_crypto_eor3s
, Add1ArgType
| UnsignedAlts
),
6245 NEONMAP2(veor3q_u8
, aarch64_crypto_eor3u
, aarch64_crypto_eor3s
, Add1ArgType
| UnsignedAlts
),
6250 NEONMAP1(vfmlal_high_f16
, aarch64_neon_fmlal2
, 0),
6251 NEONMAP1(vfmlal_low_f16
, aarch64_neon_fmlal
, 0),
6252 NEONMAP1(vfmlalq_high_f16
, aarch64_neon_fmlal2
, 0),
6253 NEONMAP1(vfmlalq_low_f16
, aarch64_neon_fmlal
, 0),
6254 NEONMAP1(vfmlsl_high_f16
, aarch64_neon_fmlsl2
, 0),
6255 NEONMAP1(vfmlsl_low_f16
, aarch64_neon_fmlsl
, 0),
6256 NEONMAP1(vfmlslq_high_f16
, aarch64_neon_fmlsl2
, 0),
6257 NEONMAP1(vfmlslq_low_f16
, aarch64_neon_fmlsl
, 0),
6258 NEONMAP2(vhadd_v
, aarch64_neon_uhadd
, aarch64_neon_shadd
, Add1ArgType
| UnsignedAlts
),
6259 NEONMAP2(vhaddq_v
, aarch64_neon_uhadd
, aarch64_neon_shadd
, Add1ArgType
| UnsignedAlts
),
6260 NEONMAP2(vhsub_v
, aarch64_neon_uhsub
, aarch64_neon_shsub
, Add1ArgType
| UnsignedAlts
),
6261 NEONMAP2(vhsubq_v
, aarch64_neon_uhsub
, aarch64_neon_shsub
, Add1ArgType
| UnsignedAlts
),
6262 NEONMAP1(vld1_x2_v
, aarch64_neon_ld1x2
, 0),
6263 NEONMAP1(vld1_x3_v
, aarch64_neon_ld1x3
, 0),
6264 NEONMAP1(vld1_x4_v
, aarch64_neon_ld1x4
, 0),
6265 NEONMAP1(vld1q_x2_v
, aarch64_neon_ld1x2
, 0),
6266 NEONMAP1(vld1q_x3_v
, aarch64_neon_ld1x3
, 0),
6267 NEONMAP1(vld1q_x4_v
, aarch64_neon_ld1x4
, 0),
6268 NEONMAP1(vmmlaq_s32
, aarch64_neon_smmla
, 0),
6269 NEONMAP1(vmmlaq_u32
, aarch64_neon_ummla
, 0),
6272 NEONMAP1(vmul_v
, aarch64_neon_pmul
, Add1ArgType
),
6273 NEONMAP1(vmulq_v
, aarch64_neon_pmul
, Add1ArgType
),
6274 NEONMAP1(vpadd_v
, aarch64_neon_addp
, Add1ArgType
),
6275 NEONMAP2(vpaddl_v
, aarch64_neon_uaddlp
, aarch64_neon_saddlp
, UnsignedAlts
),
6276 NEONMAP2(vpaddlq_v
, aarch64_neon_uaddlp
, aarch64_neon_saddlp
, UnsignedAlts
),
6277 NEONMAP1(vpaddq_v
, aarch64_neon_addp
, Add1ArgType
),
6278 NEONMAP1(vqabs_v
, aarch64_neon_sqabs
, Add1ArgType
),
6279 NEONMAP1(vqabsq_v
, aarch64_neon_sqabs
, Add1ArgType
),
6280 NEONMAP2(vqadd_v
, aarch64_neon_uqadd
, aarch64_neon_sqadd
, Add1ArgType
| UnsignedAlts
),
6281 NEONMAP2(vqaddq_v
, aarch64_neon_uqadd
, aarch64_neon_sqadd
, Add1ArgType
| UnsignedAlts
),
6282 NEONMAP2(vqdmlal_v
, aarch64_neon_sqdmull
, aarch64_neon_sqadd
, 0),
6283 NEONMAP2(vqdmlsl_v
, aarch64_neon_sqdmull
, aarch64_neon_sqsub
, 0),
6284 NEONMAP1(vqdmulh_lane_v
, aarch64_neon_sqdmulh_lane
, 0),
6285 NEONMAP1(vqdmulh_laneq_v
, aarch64_neon_sqdmulh_laneq
, 0),
6286 NEONMAP1(vqdmulh_v
, aarch64_neon_sqdmulh
, Add1ArgType
),
6287 NEONMAP1(vqdmulhq_lane_v
, aarch64_neon_sqdmulh_lane
, 0),
6288 NEONMAP1(vqdmulhq_laneq_v
, aarch64_neon_sqdmulh_laneq
, 0),
6289 NEONMAP1(vqdmulhq_v
, aarch64_neon_sqdmulh
, Add1ArgType
),
6290 NEONMAP1(vqdmull_v
, aarch64_neon_sqdmull
, Add1ArgType
),
6291 NEONMAP2(vqmovn_v
, aarch64_neon_uqxtn
, aarch64_neon_sqxtn
, Add1ArgType
| UnsignedAlts
),
6292 NEONMAP1(vqmovun_v
, aarch64_neon_sqxtun
, Add1ArgType
),
6293 NEONMAP1(vqneg_v
, aarch64_neon_sqneg
, Add1ArgType
),
6294 NEONMAP1(vqnegq_v
, aarch64_neon_sqneg
, Add1ArgType
),
6295 NEONMAP1(vqrdmlah_s16
, aarch64_neon_sqrdmlah
, Add1ArgType
),
6296 NEONMAP1(vqrdmlah_s32
, aarch64_neon_sqrdmlah
, Add1ArgType
),
6297 NEONMAP1(vqrdmlahq_s16
, aarch64_neon_sqrdmlah
, Add1ArgType
),
6298 NEONMAP1(vqrdmlahq_s32
, aarch64_neon_sqrdmlah
, Add1ArgType
),
6299 NEONMAP1(vqrdmlsh_s16
, aarch64_neon_sqrdmlsh
, Add1ArgType
),
6300 NEONMAP1(vqrdmlsh_s32
, aarch64_neon_sqrdmlsh
, Add1ArgType
),
6301 NEONMAP1(vqrdmlshq_s16
, aarch64_neon_sqrdmlsh
, Add1ArgType
),
6302 NEONMAP1(vqrdmlshq_s32
, aarch64_neon_sqrdmlsh
, Add1ArgType
),
6303 NEONMAP1(vqrdmulh_lane_v
, aarch64_neon_sqrdmulh_lane
, 0),
6304 NEONMAP1(vqrdmulh_laneq_v
, aarch64_neon_sqrdmulh_laneq
, 0),
6305 NEONMAP1(vqrdmulh_v
, aarch64_neon_sqrdmulh
, Add1ArgType
),
6306 NEONMAP1(vqrdmulhq_lane_v
, aarch64_neon_sqrdmulh_lane
, 0),
6307 NEONMAP1(vqrdmulhq_laneq_v
, aarch64_neon_sqrdmulh_laneq
, 0),
6308 NEONMAP1(vqrdmulhq_v
, aarch64_neon_sqrdmulh
, Add1ArgType
),
6309 NEONMAP2(vqrshl_v
, aarch64_neon_uqrshl
, aarch64_neon_sqrshl
, Add1ArgType
| UnsignedAlts
),
6310 NEONMAP2(vqrshlq_v
, aarch64_neon_uqrshl
, aarch64_neon_sqrshl
, Add1ArgType
| UnsignedAlts
),
6311 NEONMAP2(vqshl_n_v
, aarch64_neon_uqshl
, aarch64_neon_sqshl
, UnsignedAlts
),
6312 NEONMAP2(vqshl_v
, aarch64_neon_uqshl
, aarch64_neon_sqshl
, Add1ArgType
| UnsignedAlts
),
6313 NEONMAP2(vqshlq_n_v
, aarch64_neon_uqshl
, aarch64_neon_sqshl
,UnsignedAlts
),
6314 NEONMAP2(vqshlq_v
, aarch64_neon_uqshl
, aarch64_neon_sqshl
, Add1ArgType
| UnsignedAlts
),
6315 NEONMAP1(vqshlu_n_v
, aarch64_neon_sqshlu
, 0),
6316 NEONMAP1(vqshluq_n_v
, aarch64_neon_sqshlu
, 0),
6317 NEONMAP2(vqsub_v
, aarch64_neon_uqsub
, aarch64_neon_sqsub
, Add1ArgType
| UnsignedAlts
),
6318 NEONMAP2(vqsubq_v
, aarch64_neon_uqsub
, aarch64_neon_sqsub
, Add1ArgType
| UnsignedAlts
),
6319 NEONMAP1(vraddhn_v
, aarch64_neon_raddhn
, Add1ArgType
),
6320 NEONMAP1(vrax1q_u64
, aarch64_crypto_rax1
, 0),
6321 NEONMAP2(vrecpe_v
, aarch64_neon_frecpe
, aarch64_neon_urecpe
, 0),
6322 NEONMAP2(vrecpeq_v
, aarch64_neon_frecpe
, aarch64_neon_urecpe
, 0),
6323 NEONMAP1(vrecps_v
, aarch64_neon_frecps
, Add1ArgType
),
6324 NEONMAP1(vrecpsq_v
, aarch64_neon_frecps
, Add1ArgType
),
6325 NEONMAP2(vrhadd_v
, aarch64_neon_urhadd
, aarch64_neon_srhadd
, Add1ArgType
| UnsignedAlts
),
6326 NEONMAP2(vrhaddq_v
, aarch64_neon_urhadd
, aarch64_neon_srhadd
, Add1ArgType
| UnsignedAlts
),
6327 NEONMAP1(vrnd32x_f32
, aarch64_neon_frint32x
, Add1ArgType
),
6328 NEONMAP1(vrnd32xq_f32
, aarch64_neon_frint32x
, Add1ArgType
),
6329 NEONMAP1(vrnd32z_f32
, aarch64_neon_frint32z
, Add1ArgType
),
6330 NEONMAP1(vrnd32zq_f32
, aarch64_neon_frint32z
, Add1ArgType
),
6331 NEONMAP1(vrnd64x_f32
, aarch64_neon_frint64x
, Add1ArgType
),
6332 NEONMAP1(vrnd64xq_f32
, aarch64_neon_frint64x
, Add1ArgType
),
6333 NEONMAP1(vrnd64z_f32
, aarch64_neon_frint64z
, Add1ArgType
),
6334 NEONMAP1(vrnd64zq_f32
, aarch64_neon_frint64z
, Add1ArgType
),
6337 NEONMAP2(vrshl_v
, aarch64_neon_urshl
, aarch64_neon_srshl
, Add1ArgType
| UnsignedAlts
),
6338 NEONMAP2(vrshlq_v
, aarch64_neon_urshl
, aarch64_neon_srshl
, Add1ArgType
| UnsignedAlts
),
6339 NEONMAP2(vrshr_n_v
, aarch64_neon_urshl
, aarch64_neon_srshl
, UnsignedAlts
),
6340 NEONMAP2(vrshrq_n_v
, aarch64_neon_urshl
, aarch64_neon_srshl
, UnsignedAlts
),
6341 NEONMAP2(vrsqrte_v
, aarch64_neon_frsqrte
, aarch64_neon_ursqrte
, 0),
6342 NEONMAP2(vrsqrteq_v
, aarch64_neon_frsqrte
, aarch64_neon_ursqrte
, 0),
6343 NEONMAP1(vrsqrts_v
, aarch64_neon_frsqrts
, Add1ArgType
),
6344 NEONMAP1(vrsqrtsq_v
, aarch64_neon_frsqrts
, Add1ArgType
),
6345 NEONMAP1(vrsubhn_v
, aarch64_neon_rsubhn
, Add1ArgType
),
6346 NEONMAP1(vsha1su0q_u32
, aarch64_crypto_sha1su0
, 0),
6347 NEONMAP1(vsha1su1q_u32
, aarch64_crypto_sha1su1
, 0),
6348 NEONMAP1(vsha256h2q_u32
, aarch64_crypto_sha256h2
, 0),
6349 NEONMAP1(vsha256hq_u32
, aarch64_crypto_sha256h
, 0),
6350 NEONMAP1(vsha256su0q_u32
, aarch64_crypto_sha256su0
, 0),
6351 NEONMAP1(vsha256su1q_u32
, aarch64_crypto_sha256su1
, 0),
6352 NEONMAP1(vsha512h2q_u64
, aarch64_crypto_sha512h2
, 0),
6353 NEONMAP1(vsha512hq_u64
, aarch64_crypto_sha512h
, 0),
6354 NEONMAP1(vsha512su0q_u64
, aarch64_crypto_sha512su0
, 0),
6355 NEONMAP1(vsha512su1q_u64
, aarch64_crypto_sha512su1
, 0),
6357 NEONMAP2(vshl_v
, aarch64_neon_ushl
, aarch64_neon_sshl
, Add1ArgType
| UnsignedAlts
),
6358 NEONMAP0(vshll_n_v
),
6359 NEONMAP0(vshlq_n_v
),
6360 NEONMAP2(vshlq_v
, aarch64_neon_ushl
, aarch64_neon_sshl
, Add1ArgType
| UnsignedAlts
),
6362 NEONMAP0(vshrn_n_v
),
6363 NEONMAP0(vshrq_n_v
),
6364 NEONMAP1(vsm3partw1q_u32
, aarch64_crypto_sm3partw1
, 0),
6365 NEONMAP1(vsm3partw2q_u32
, aarch64_crypto_sm3partw2
, 0),
6366 NEONMAP1(vsm3ss1q_u32
, aarch64_crypto_sm3ss1
, 0),
6367 NEONMAP1(vsm3tt1aq_u32
, aarch64_crypto_sm3tt1a
, 0),
6368 NEONMAP1(vsm3tt1bq_u32
, aarch64_crypto_sm3tt1b
, 0),
6369 NEONMAP1(vsm3tt2aq_u32
, aarch64_crypto_sm3tt2a
, 0),
6370 NEONMAP1(vsm3tt2bq_u32
, aarch64_crypto_sm3tt2b
, 0),
6371 NEONMAP1(vsm4ekeyq_u32
, aarch64_crypto_sm4ekey
, 0),
6372 NEONMAP1(vsm4eq_u32
, aarch64_crypto_sm4e
, 0),
6373 NEONMAP1(vst1_x2_v
, aarch64_neon_st1x2
, 0),
6374 NEONMAP1(vst1_x3_v
, aarch64_neon_st1x3
, 0),
6375 NEONMAP1(vst1_x4_v
, aarch64_neon_st1x4
, 0),
6376 NEONMAP1(vst1q_x2_v
, aarch64_neon_st1x2
, 0),
6377 NEONMAP1(vst1q_x3_v
, aarch64_neon_st1x3
, 0),
6378 NEONMAP1(vst1q_x4_v
, aarch64_neon_st1x4
, 0),
6382 NEONMAP1(vusdot_s32
, aarch64_neon_usdot
, 0),
6383 NEONMAP1(vusdotq_s32
, aarch64_neon_usdot
, 0),
6384 NEONMAP1(vusmmlaq_s32
, aarch64_neon_usmmla
, 0),
6385 NEONMAP1(vxarq_u64
, aarch64_crypto_xar
, 0),
6388 static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap
[] = {
6389 NEONMAP1(vabdd_f64
, aarch64_sisd_fabd
, Add1ArgType
),
6390 NEONMAP1(vabds_f32
, aarch64_sisd_fabd
, Add1ArgType
),
6391 NEONMAP1(vabsd_s64
, aarch64_neon_abs
, Add1ArgType
),
6392 NEONMAP1(vaddlv_s32
, aarch64_neon_saddlv
, AddRetType
| Add1ArgType
),
6393 NEONMAP1(vaddlv_u32
, aarch64_neon_uaddlv
, AddRetType
| Add1ArgType
),
6394 NEONMAP1(vaddlvq_s32
, aarch64_neon_saddlv
, AddRetType
| Add1ArgType
),
6395 NEONMAP1(vaddlvq_u32
, aarch64_neon_uaddlv
, AddRetType
| Add1ArgType
),
6396 NEONMAP1(vaddv_f32
, aarch64_neon_faddv
, AddRetType
| Add1ArgType
),
6397 NEONMAP1(vaddv_s32
, aarch64_neon_saddv
, AddRetType
| Add1ArgType
),
6398 NEONMAP1(vaddv_u32
, aarch64_neon_uaddv
, AddRetType
| Add1ArgType
),
6399 NEONMAP1(vaddvq_f32
, aarch64_neon_faddv
, AddRetType
| Add1ArgType
),
6400 NEONMAP1(vaddvq_f64
, aarch64_neon_faddv
, AddRetType
| Add1ArgType
),
6401 NEONMAP1(vaddvq_s32
, aarch64_neon_saddv
, AddRetType
| Add1ArgType
),
6402 NEONMAP1(vaddvq_s64
, aarch64_neon_saddv
, AddRetType
| Add1ArgType
),
6403 NEONMAP1(vaddvq_u32
, aarch64_neon_uaddv
, AddRetType
| Add1ArgType
),
6404 NEONMAP1(vaddvq_u64
, aarch64_neon_uaddv
, AddRetType
| Add1ArgType
),
6405 NEONMAP1(vcaged_f64
, aarch64_neon_facge
, AddRetType
| Add1ArgType
),
6406 NEONMAP1(vcages_f32
, aarch64_neon_facge
, AddRetType
| Add1ArgType
),
6407 NEONMAP1(vcagtd_f64
, aarch64_neon_facgt
, AddRetType
| Add1ArgType
),
6408 NEONMAP1(vcagts_f32
, aarch64_neon_facgt
, AddRetType
| Add1ArgType
),
6409 NEONMAP1(vcaled_f64
, aarch64_neon_facge
, AddRetType
| Add1ArgType
),
6410 NEONMAP1(vcales_f32
, aarch64_neon_facge
, AddRetType
| Add1ArgType
),
6411 NEONMAP1(vcaltd_f64
, aarch64_neon_facgt
, AddRetType
| Add1ArgType
),
6412 NEONMAP1(vcalts_f32
, aarch64_neon_facgt
, AddRetType
| Add1ArgType
),
6413 NEONMAP1(vcvtad_s64_f64
, aarch64_neon_fcvtas
, AddRetType
| Add1ArgType
),
6414 NEONMAP1(vcvtad_u64_f64
, aarch64_neon_fcvtau
, AddRetType
| Add1ArgType
),
6415 NEONMAP1(vcvtas_s32_f32
, aarch64_neon_fcvtas
, AddRetType
| Add1ArgType
),
6416 NEONMAP1(vcvtas_u32_f32
, aarch64_neon_fcvtau
, AddRetType
| Add1ArgType
),
6417 NEONMAP1(vcvtd_n_f64_s64
, aarch64_neon_vcvtfxs2fp
, AddRetType
| Add1ArgType
),
6418 NEONMAP1(vcvtd_n_f64_u64
, aarch64_neon_vcvtfxu2fp
, AddRetType
| Add1ArgType
),
6419 NEONMAP1(vcvtd_n_s64_f64
, aarch64_neon_vcvtfp2fxs
, AddRetType
| Add1ArgType
),
6420 NEONMAP1(vcvtd_n_u64_f64
, aarch64_neon_vcvtfp2fxu
, AddRetType
| Add1ArgType
),
6421 NEONMAP1(vcvtd_s64_f64
, aarch64_neon_fcvtzs
, AddRetType
| Add1ArgType
),
6422 NEONMAP1(vcvtd_u64_f64
, aarch64_neon_fcvtzu
, AddRetType
| Add1ArgType
),
6423 NEONMAP1(vcvth_bf16_f32
, aarch64_neon_bfcvt
, 0),
6424 NEONMAP1(vcvtmd_s64_f64
, aarch64_neon_fcvtms
, AddRetType
| Add1ArgType
),
6425 NEONMAP1(vcvtmd_u64_f64
, aarch64_neon_fcvtmu
, AddRetType
| Add1ArgType
),
6426 NEONMAP1(vcvtms_s32_f32
, aarch64_neon_fcvtms
, AddRetType
| Add1ArgType
),
6427 NEONMAP1(vcvtms_u32_f32
, aarch64_neon_fcvtmu
, AddRetType
| Add1ArgType
),
6428 NEONMAP1(vcvtnd_s64_f64
, aarch64_neon_fcvtns
, AddRetType
| Add1ArgType
),
6429 NEONMAP1(vcvtnd_u64_f64
, aarch64_neon_fcvtnu
, AddRetType
| Add1ArgType
),
6430 NEONMAP1(vcvtns_s32_f32
, aarch64_neon_fcvtns
, AddRetType
| Add1ArgType
),
6431 NEONMAP1(vcvtns_u32_f32
, aarch64_neon_fcvtnu
, AddRetType
| Add1ArgType
),
6432 NEONMAP1(vcvtpd_s64_f64
, aarch64_neon_fcvtps
, AddRetType
| Add1ArgType
),
6433 NEONMAP1(vcvtpd_u64_f64
, aarch64_neon_fcvtpu
, AddRetType
| Add1ArgType
),
6434 NEONMAP1(vcvtps_s32_f32
, aarch64_neon_fcvtps
, AddRetType
| Add1ArgType
),
6435 NEONMAP1(vcvtps_u32_f32
, aarch64_neon_fcvtpu
, AddRetType
| Add1ArgType
),
6436 NEONMAP1(vcvts_n_f32_s32
, aarch64_neon_vcvtfxs2fp
, AddRetType
| Add1ArgType
),
6437 NEONMAP1(vcvts_n_f32_u32
, aarch64_neon_vcvtfxu2fp
, AddRetType
| Add1ArgType
),
6438 NEONMAP1(vcvts_n_s32_f32
, aarch64_neon_vcvtfp2fxs
, AddRetType
| Add1ArgType
),
6439 NEONMAP1(vcvts_n_u32_f32
, aarch64_neon_vcvtfp2fxu
, AddRetType
| Add1ArgType
),
6440 NEONMAP1(vcvts_s32_f32
, aarch64_neon_fcvtzs
, AddRetType
| Add1ArgType
),
6441 NEONMAP1(vcvts_u32_f32
, aarch64_neon_fcvtzu
, AddRetType
| Add1ArgType
),
6442 NEONMAP1(vcvtxd_f32_f64
, aarch64_sisd_fcvtxn
, 0),
6443 NEONMAP1(vmaxnmv_f32
, aarch64_neon_fmaxnmv
, AddRetType
| Add1ArgType
),
6444 NEONMAP1(vmaxnmvq_f32
, aarch64_neon_fmaxnmv
, AddRetType
| Add1ArgType
),
6445 NEONMAP1(vmaxnmvq_f64
, aarch64_neon_fmaxnmv
, AddRetType
| Add1ArgType
),
6446 NEONMAP1(vmaxv_f32
, aarch64_neon_fmaxv
, AddRetType
| Add1ArgType
),
6447 NEONMAP1(vmaxv_s32
, aarch64_neon_smaxv
, AddRetType
| Add1ArgType
),
6448 NEONMAP1(vmaxv_u32
, aarch64_neon_umaxv
, AddRetType
| Add1ArgType
),
6449 NEONMAP1(vmaxvq_f32
, aarch64_neon_fmaxv
, AddRetType
| Add1ArgType
),
6450 NEONMAP1(vmaxvq_f64
, aarch64_neon_fmaxv
, AddRetType
| Add1ArgType
),
6451 NEONMAP1(vmaxvq_s32
, aarch64_neon_smaxv
, AddRetType
| Add1ArgType
),
6452 NEONMAP1(vmaxvq_u32
, aarch64_neon_umaxv
, AddRetType
| Add1ArgType
),
6453 NEONMAP1(vminnmv_f32
, aarch64_neon_fminnmv
, AddRetType
| Add1ArgType
),
6454 NEONMAP1(vminnmvq_f32
, aarch64_neon_fminnmv
, AddRetType
| Add1ArgType
),
6455 NEONMAP1(vminnmvq_f64
, aarch64_neon_fminnmv
, AddRetType
| Add1ArgType
),
6456 NEONMAP1(vminv_f32
, aarch64_neon_fminv
, AddRetType
| Add1ArgType
),
6457 NEONMAP1(vminv_s32
, aarch64_neon_sminv
, AddRetType
| Add1ArgType
),
6458 NEONMAP1(vminv_u32
, aarch64_neon_uminv
, AddRetType
| Add1ArgType
),
6459 NEONMAP1(vminvq_f32
, aarch64_neon_fminv
, AddRetType
| Add1ArgType
),
6460 NEONMAP1(vminvq_f64
, aarch64_neon_fminv
, AddRetType
| Add1ArgType
),
6461 NEONMAP1(vminvq_s32
, aarch64_neon_sminv
, AddRetType
| Add1ArgType
),
6462 NEONMAP1(vminvq_u32
, aarch64_neon_uminv
, AddRetType
| Add1ArgType
),
6463 NEONMAP1(vmull_p64
, aarch64_neon_pmull64
, 0),
6464 NEONMAP1(vmulxd_f64
, aarch64_neon_fmulx
, Add1ArgType
),
6465 NEONMAP1(vmulxs_f32
, aarch64_neon_fmulx
, Add1ArgType
),
6466 NEONMAP1(vpaddd_s64
, aarch64_neon_uaddv
, AddRetType
| Add1ArgType
),
6467 NEONMAP1(vpaddd_u64
, aarch64_neon_uaddv
, AddRetType
| Add1ArgType
),
6468 NEONMAP1(vpmaxnmqd_f64
, aarch64_neon_fmaxnmv
, AddRetType
| Add1ArgType
),
6469 NEONMAP1(vpmaxnms_f32
, aarch64_neon_fmaxnmv
, AddRetType
| Add1ArgType
),
6470 NEONMAP1(vpmaxqd_f64
, aarch64_neon_fmaxv
, AddRetType
| Add1ArgType
),
6471 NEONMAP1(vpmaxs_f32
, aarch64_neon_fmaxv
, AddRetType
| Add1ArgType
),
6472 NEONMAP1(vpminnmqd_f64
, aarch64_neon_fminnmv
, AddRetType
| Add1ArgType
),
6473 NEONMAP1(vpminnms_f32
, aarch64_neon_fminnmv
, AddRetType
| Add1ArgType
),
6474 NEONMAP1(vpminqd_f64
, aarch64_neon_fminv
, AddRetType
| Add1ArgType
),
6475 NEONMAP1(vpmins_f32
, aarch64_neon_fminv
, AddRetType
| Add1ArgType
),
6476 NEONMAP1(vqabsb_s8
, aarch64_neon_sqabs
, Vectorize1ArgType
| Use64BitVectors
),
6477 NEONMAP1(vqabsd_s64
, aarch64_neon_sqabs
, Add1ArgType
),
6478 NEONMAP1(vqabsh_s16
, aarch64_neon_sqabs
, Vectorize1ArgType
| Use64BitVectors
),
6479 NEONMAP1(vqabss_s32
, aarch64_neon_sqabs
, Add1ArgType
),
6480 NEONMAP1(vqaddb_s8
, aarch64_neon_sqadd
, Vectorize1ArgType
| Use64BitVectors
),
6481 NEONMAP1(vqaddb_u8
, aarch64_neon_uqadd
, Vectorize1ArgType
| Use64BitVectors
),
6482 NEONMAP1(vqaddd_s64
, aarch64_neon_sqadd
, Add1ArgType
),
6483 NEONMAP1(vqaddd_u64
, aarch64_neon_uqadd
, Add1ArgType
),
6484 NEONMAP1(vqaddh_s16
, aarch64_neon_sqadd
, Vectorize1ArgType
| Use64BitVectors
),
6485 NEONMAP1(vqaddh_u16
, aarch64_neon_uqadd
, Vectorize1ArgType
| Use64BitVectors
),
6486 NEONMAP1(vqadds_s32
, aarch64_neon_sqadd
, Add1ArgType
),
6487 NEONMAP1(vqadds_u32
, aarch64_neon_uqadd
, Add1ArgType
),
6488 NEONMAP1(vqdmulhh_s16
, aarch64_neon_sqdmulh
, Vectorize1ArgType
| Use64BitVectors
),
6489 NEONMAP1(vqdmulhs_s32
, aarch64_neon_sqdmulh
, Add1ArgType
),
6490 NEONMAP1(vqdmullh_s16
, aarch64_neon_sqdmull
, VectorRet
| Use128BitVectors
),
6491 NEONMAP1(vqdmulls_s32
, aarch64_neon_sqdmulls_scalar
, 0),
6492 NEONMAP1(vqmovnd_s64
, aarch64_neon_scalar_sqxtn
, AddRetType
| Add1ArgType
),
6493 NEONMAP1(vqmovnd_u64
, aarch64_neon_scalar_uqxtn
, AddRetType
| Add1ArgType
),
6494 NEONMAP1(vqmovnh_s16
, aarch64_neon_sqxtn
, VectorRet
| Use64BitVectors
),
6495 NEONMAP1(vqmovnh_u16
, aarch64_neon_uqxtn
, VectorRet
| Use64BitVectors
),
6496 NEONMAP1(vqmovns_s32
, aarch64_neon_sqxtn
, VectorRet
| Use64BitVectors
),
6497 NEONMAP1(vqmovns_u32
, aarch64_neon_uqxtn
, VectorRet
| Use64BitVectors
),
6498 NEONMAP1(vqmovund_s64
, aarch64_neon_scalar_sqxtun
, AddRetType
| Add1ArgType
),
6499 NEONMAP1(vqmovunh_s16
, aarch64_neon_sqxtun
, VectorRet
| Use64BitVectors
),
6500 NEONMAP1(vqmovuns_s32
, aarch64_neon_sqxtun
, VectorRet
| Use64BitVectors
),
6501 NEONMAP1(vqnegb_s8
, aarch64_neon_sqneg
, Vectorize1ArgType
| Use64BitVectors
),
6502 NEONMAP1(vqnegd_s64
, aarch64_neon_sqneg
, Add1ArgType
),
6503 NEONMAP1(vqnegh_s16
, aarch64_neon_sqneg
, Vectorize1ArgType
| Use64BitVectors
),
6504 NEONMAP1(vqnegs_s32
, aarch64_neon_sqneg
, Add1ArgType
),
6505 NEONMAP1(vqrdmlahh_s16
, aarch64_neon_sqrdmlah
, Vectorize1ArgType
| Use64BitVectors
),
6506 NEONMAP1(vqrdmlahs_s32
, aarch64_neon_sqrdmlah
, Add1ArgType
),
6507 NEONMAP1(vqrdmlshh_s16
, aarch64_neon_sqrdmlsh
, Vectorize1ArgType
| Use64BitVectors
),
6508 NEONMAP1(vqrdmlshs_s32
, aarch64_neon_sqrdmlsh
, Add1ArgType
),
6509 NEONMAP1(vqrdmulhh_s16
, aarch64_neon_sqrdmulh
, Vectorize1ArgType
| Use64BitVectors
),
6510 NEONMAP1(vqrdmulhs_s32
, aarch64_neon_sqrdmulh
, Add1ArgType
),
6511 NEONMAP1(vqrshlb_s8
, aarch64_neon_sqrshl
, Vectorize1ArgType
| Use64BitVectors
),
6512 NEONMAP1(vqrshlb_u8
, aarch64_neon_uqrshl
, Vectorize1ArgType
| Use64BitVectors
),
6513 NEONMAP1(vqrshld_s64
, aarch64_neon_sqrshl
, Add1ArgType
),
6514 NEONMAP1(vqrshld_u64
, aarch64_neon_uqrshl
, Add1ArgType
),
6515 NEONMAP1(vqrshlh_s16
, aarch64_neon_sqrshl
, Vectorize1ArgType
| Use64BitVectors
),
6516 NEONMAP1(vqrshlh_u16
, aarch64_neon_uqrshl
, Vectorize1ArgType
| Use64BitVectors
),
6517 NEONMAP1(vqrshls_s32
, aarch64_neon_sqrshl
, Add1ArgType
),
6518 NEONMAP1(vqrshls_u32
, aarch64_neon_uqrshl
, Add1ArgType
),
6519 NEONMAP1(vqrshrnd_n_s64
, aarch64_neon_sqrshrn
, AddRetType
),
6520 NEONMAP1(vqrshrnd_n_u64
, aarch64_neon_uqrshrn
, AddRetType
),
6521 NEONMAP1(vqrshrnh_n_s16
, aarch64_neon_sqrshrn
, VectorRet
| Use64BitVectors
),
6522 NEONMAP1(vqrshrnh_n_u16
, aarch64_neon_uqrshrn
, VectorRet
| Use64BitVectors
),
6523 NEONMAP1(vqrshrns_n_s32
, aarch64_neon_sqrshrn
, VectorRet
| Use64BitVectors
),
6524 NEONMAP1(vqrshrns_n_u32
, aarch64_neon_uqrshrn
, VectorRet
| Use64BitVectors
),
6525 NEONMAP1(vqrshrund_n_s64
, aarch64_neon_sqrshrun
, AddRetType
),
6526 NEONMAP1(vqrshrunh_n_s16
, aarch64_neon_sqrshrun
, VectorRet
| Use64BitVectors
),
6527 NEONMAP1(vqrshruns_n_s32
, aarch64_neon_sqrshrun
, VectorRet
| Use64BitVectors
),
6528 NEONMAP1(vqshlb_n_s8
, aarch64_neon_sqshl
, Vectorize1ArgType
| Use64BitVectors
),
6529 NEONMAP1(vqshlb_n_u8
, aarch64_neon_uqshl
, Vectorize1ArgType
| Use64BitVectors
),
6530 NEONMAP1(vqshlb_s8
, aarch64_neon_sqshl
, Vectorize1ArgType
| Use64BitVectors
),
6531 NEONMAP1(vqshlb_u8
, aarch64_neon_uqshl
, Vectorize1ArgType
| Use64BitVectors
),
6532 NEONMAP1(vqshld_s64
, aarch64_neon_sqshl
, Add1ArgType
),
6533 NEONMAP1(vqshld_u64
, aarch64_neon_uqshl
, Add1ArgType
),
6534 NEONMAP1(vqshlh_n_s16
, aarch64_neon_sqshl
, Vectorize1ArgType
| Use64BitVectors
),
6535 NEONMAP1(vqshlh_n_u16
, aarch64_neon_uqshl
, Vectorize1ArgType
| Use64BitVectors
),
6536 NEONMAP1(vqshlh_s16
, aarch64_neon_sqshl
, Vectorize1ArgType
| Use64BitVectors
),
6537 NEONMAP1(vqshlh_u16
, aarch64_neon_uqshl
, Vectorize1ArgType
| Use64BitVectors
),
6538 NEONMAP1(vqshls_n_s32
, aarch64_neon_sqshl
, Add1ArgType
),
6539 NEONMAP1(vqshls_n_u32
, aarch64_neon_uqshl
, Add1ArgType
),
6540 NEONMAP1(vqshls_s32
, aarch64_neon_sqshl
, Add1ArgType
),
6541 NEONMAP1(vqshls_u32
, aarch64_neon_uqshl
, Add1ArgType
),
6542 NEONMAP1(vqshlub_n_s8
, aarch64_neon_sqshlu
, Vectorize1ArgType
| Use64BitVectors
),
6543 NEONMAP1(vqshluh_n_s16
, aarch64_neon_sqshlu
, Vectorize1ArgType
| Use64BitVectors
),
6544 NEONMAP1(vqshlus_n_s32
, aarch64_neon_sqshlu
, Add1ArgType
),
6545 NEONMAP1(vqshrnd_n_s64
, aarch64_neon_sqshrn
, AddRetType
),
6546 NEONMAP1(vqshrnd_n_u64
, aarch64_neon_uqshrn
, AddRetType
),
6547 NEONMAP1(vqshrnh_n_s16
, aarch64_neon_sqshrn
, VectorRet
| Use64BitVectors
),
6548 NEONMAP1(vqshrnh_n_u16
, aarch64_neon_uqshrn
, VectorRet
| Use64BitVectors
),
6549 NEONMAP1(vqshrns_n_s32
, aarch64_neon_sqshrn
, VectorRet
| Use64BitVectors
),
6550 NEONMAP1(vqshrns_n_u32
, aarch64_neon_uqshrn
, VectorRet
| Use64BitVectors
),
6551 NEONMAP1(vqshrund_n_s64
, aarch64_neon_sqshrun
, AddRetType
),
6552 NEONMAP1(vqshrunh_n_s16
, aarch64_neon_sqshrun
, VectorRet
| Use64BitVectors
),
6553 NEONMAP1(vqshruns_n_s32
, aarch64_neon_sqshrun
, VectorRet
| Use64BitVectors
),
6554 NEONMAP1(vqsubb_s8
, aarch64_neon_sqsub
, Vectorize1ArgType
| Use64BitVectors
),
6555 NEONMAP1(vqsubb_u8
, aarch64_neon_uqsub
, Vectorize1ArgType
| Use64BitVectors
),
6556 NEONMAP1(vqsubd_s64
, aarch64_neon_sqsub
, Add1ArgType
),
6557 NEONMAP1(vqsubd_u64
, aarch64_neon_uqsub
, Add1ArgType
),
6558 NEONMAP1(vqsubh_s16
, aarch64_neon_sqsub
, Vectorize1ArgType
| Use64BitVectors
),
6559 NEONMAP1(vqsubh_u16
, aarch64_neon_uqsub
, Vectorize1ArgType
| Use64BitVectors
),
6560 NEONMAP1(vqsubs_s32
, aarch64_neon_sqsub
, Add1ArgType
),
6561 NEONMAP1(vqsubs_u32
, aarch64_neon_uqsub
, Add1ArgType
),
6562 NEONMAP1(vrecped_f64
, aarch64_neon_frecpe
, Add1ArgType
),
6563 NEONMAP1(vrecpes_f32
, aarch64_neon_frecpe
, Add1ArgType
),
6564 NEONMAP1(vrecpxd_f64
, aarch64_neon_frecpx
, Add1ArgType
),
6565 NEONMAP1(vrecpxs_f32
, aarch64_neon_frecpx
, Add1ArgType
),
6566 NEONMAP1(vrshld_s64
, aarch64_neon_srshl
, Add1ArgType
),
6567 NEONMAP1(vrshld_u64
, aarch64_neon_urshl
, Add1ArgType
),
6568 NEONMAP1(vrsqrted_f64
, aarch64_neon_frsqrte
, Add1ArgType
),
6569 NEONMAP1(vrsqrtes_f32
, aarch64_neon_frsqrte
, Add1ArgType
),
6570 NEONMAP1(vrsqrtsd_f64
, aarch64_neon_frsqrts
, Add1ArgType
),
6571 NEONMAP1(vrsqrtss_f32
, aarch64_neon_frsqrts
, Add1ArgType
),
6572 NEONMAP1(vsha1cq_u32
, aarch64_crypto_sha1c
, 0),
6573 NEONMAP1(vsha1h_u32
, aarch64_crypto_sha1h
, 0),
6574 NEONMAP1(vsha1mq_u32
, aarch64_crypto_sha1m
, 0),
6575 NEONMAP1(vsha1pq_u32
, aarch64_crypto_sha1p
, 0),
6576 NEONMAP1(vshld_s64
, aarch64_neon_sshl
, Add1ArgType
),
6577 NEONMAP1(vshld_u64
, aarch64_neon_ushl
, Add1ArgType
),
6578 NEONMAP1(vslid_n_s64
, aarch64_neon_vsli
, Vectorize1ArgType
),
6579 NEONMAP1(vslid_n_u64
, aarch64_neon_vsli
, Vectorize1ArgType
),
6580 NEONMAP1(vsqaddb_u8
, aarch64_neon_usqadd
, Vectorize1ArgType
| Use64BitVectors
),
6581 NEONMAP1(vsqaddd_u64
, aarch64_neon_usqadd
, Add1ArgType
),
6582 NEONMAP1(vsqaddh_u16
, aarch64_neon_usqadd
, Vectorize1ArgType
| Use64BitVectors
),
6583 NEONMAP1(vsqadds_u32
, aarch64_neon_usqadd
, Add1ArgType
),
6584 NEONMAP1(vsrid_n_s64
, aarch64_neon_vsri
, Vectorize1ArgType
),
6585 NEONMAP1(vsrid_n_u64
, aarch64_neon_vsri
, Vectorize1ArgType
),
6586 NEONMAP1(vuqaddb_s8
, aarch64_neon_suqadd
, Vectorize1ArgType
| Use64BitVectors
),
6587 NEONMAP1(vuqaddd_s64
, aarch64_neon_suqadd
, Add1ArgType
),
6588 NEONMAP1(vuqaddh_s16
, aarch64_neon_suqadd
, Vectorize1ArgType
| Use64BitVectors
),
6589 NEONMAP1(vuqadds_s32
, aarch64_neon_suqadd
, Add1ArgType
),
6590 // FP16 scalar intrinisics go here.
6591 NEONMAP1(vabdh_f16
, aarch64_sisd_fabd
, Add1ArgType
),
6592 NEONMAP1(vcvtah_s32_f16
, aarch64_neon_fcvtas
, AddRetType
| Add1ArgType
),
6593 NEONMAP1(vcvtah_s64_f16
, aarch64_neon_fcvtas
, AddRetType
| Add1ArgType
),
6594 NEONMAP1(vcvtah_u32_f16
, aarch64_neon_fcvtau
, AddRetType
| Add1ArgType
),
6595 NEONMAP1(vcvtah_u64_f16
, aarch64_neon_fcvtau
, AddRetType
| Add1ArgType
),
6596 NEONMAP1(vcvth_n_f16_s32
, aarch64_neon_vcvtfxs2fp
, AddRetType
| Add1ArgType
),
6597 NEONMAP1(vcvth_n_f16_s64
, aarch64_neon_vcvtfxs2fp
, AddRetType
| Add1ArgType
),
6598 NEONMAP1(vcvth_n_f16_u32
, aarch64_neon_vcvtfxu2fp
, AddRetType
| Add1ArgType
),
6599 NEONMAP1(vcvth_n_f16_u64
, aarch64_neon_vcvtfxu2fp
, AddRetType
| Add1ArgType
),
6600 NEONMAP1(vcvth_n_s32_f16
, aarch64_neon_vcvtfp2fxs
, AddRetType
| Add1ArgType
),
6601 NEONMAP1(vcvth_n_s64_f16
, aarch64_neon_vcvtfp2fxs
, AddRetType
| Add1ArgType
),
6602 NEONMAP1(vcvth_n_u32_f16
, aarch64_neon_vcvtfp2fxu
, AddRetType
| Add1ArgType
),
6603 NEONMAP1(vcvth_n_u64_f16
, aarch64_neon_vcvtfp2fxu
, AddRetType
| Add1ArgType
),
6604 NEONMAP1(vcvth_s32_f16
, aarch64_neon_fcvtzs
, AddRetType
| Add1ArgType
),
6605 NEONMAP1(vcvth_s64_f16
, aarch64_neon_fcvtzs
, AddRetType
| Add1ArgType
),
6606 NEONMAP1(vcvth_u32_f16
, aarch64_neon_fcvtzu
, AddRetType
| Add1ArgType
),
6607 NEONMAP1(vcvth_u64_f16
, aarch64_neon_fcvtzu
, AddRetType
| Add1ArgType
),
6608 NEONMAP1(vcvtmh_s32_f16
, aarch64_neon_fcvtms
, AddRetType
| Add1ArgType
),
6609 NEONMAP1(vcvtmh_s64_f16
, aarch64_neon_fcvtms
, AddRetType
| Add1ArgType
),
6610 NEONMAP1(vcvtmh_u32_f16
, aarch64_neon_fcvtmu
, AddRetType
| Add1ArgType
),
6611 NEONMAP1(vcvtmh_u64_f16
, aarch64_neon_fcvtmu
, AddRetType
| Add1ArgType
),
6612 NEONMAP1(vcvtnh_s32_f16
, aarch64_neon_fcvtns
, AddRetType
| Add1ArgType
),
6613 NEONMAP1(vcvtnh_s64_f16
, aarch64_neon_fcvtns
, AddRetType
| Add1ArgType
),
6614 NEONMAP1(vcvtnh_u32_f16
, aarch64_neon_fcvtnu
, AddRetType
| Add1ArgType
),
6615 NEONMAP1(vcvtnh_u64_f16
, aarch64_neon_fcvtnu
, AddRetType
| Add1ArgType
),
6616 NEONMAP1(vcvtph_s32_f16
, aarch64_neon_fcvtps
, AddRetType
| Add1ArgType
),
6617 NEONMAP1(vcvtph_s64_f16
, aarch64_neon_fcvtps
, AddRetType
| Add1ArgType
),
6618 NEONMAP1(vcvtph_u32_f16
, aarch64_neon_fcvtpu
, AddRetType
| Add1ArgType
),
6619 NEONMAP1(vcvtph_u64_f16
, aarch64_neon_fcvtpu
, AddRetType
| Add1ArgType
),
6620 NEONMAP1(vmulxh_f16
, aarch64_neon_fmulx
, Add1ArgType
),
6621 NEONMAP1(vrecpeh_f16
, aarch64_neon_frecpe
, Add1ArgType
),
6622 NEONMAP1(vrecpxh_f16
, aarch64_neon_frecpx
, Add1ArgType
),
6623 NEONMAP1(vrsqrteh_f16
, aarch64_neon_frsqrte
, Add1ArgType
),
6624 NEONMAP1(vrsqrtsh_f16
, aarch64_neon_frsqrts
, Add1ArgType
),
6627 // Some intrinsics are equivalent for codegen.
6628 static const std::pair
<unsigned, unsigned> NEONEquivalentIntrinsicMap
[] = {
6629 { NEON::BI__builtin_neon_splat_lane_bf16
, NEON::BI__builtin_neon_splat_lane_v
, },
6630 { NEON::BI__builtin_neon_splat_laneq_bf16
, NEON::BI__builtin_neon_splat_laneq_v
, },
6631 { NEON::BI__builtin_neon_splatq_lane_bf16
, NEON::BI__builtin_neon_splatq_lane_v
, },
6632 { NEON::BI__builtin_neon_splatq_laneq_bf16
, NEON::BI__builtin_neon_splatq_laneq_v
, },
6633 { NEON::BI__builtin_neon_vabd_f16
, NEON::BI__builtin_neon_vabd_v
, },
6634 { NEON::BI__builtin_neon_vabdq_f16
, NEON::BI__builtin_neon_vabdq_v
, },
6635 { NEON::BI__builtin_neon_vabs_f16
, NEON::BI__builtin_neon_vabs_v
, },
6636 { NEON::BI__builtin_neon_vabsq_f16
, NEON::BI__builtin_neon_vabsq_v
, },
6637 { NEON::BI__builtin_neon_vbsl_f16
, NEON::BI__builtin_neon_vbsl_v
, },
6638 { NEON::BI__builtin_neon_vbslq_f16
, NEON::BI__builtin_neon_vbslq_v
, },
6639 { NEON::BI__builtin_neon_vcage_f16
, NEON::BI__builtin_neon_vcage_v
, },
6640 { NEON::BI__builtin_neon_vcageq_f16
, NEON::BI__builtin_neon_vcageq_v
, },
6641 { NEON::BI__builtin_neon_vcagt_f16
, NEON::BI__builtin_neon_vcagt_v
, },
6642 { NEON::BI__builtin_neon_vcagtq_f16
, NEON::BI__builtin_neon_vcagtq_v
, },
6643 { NEON::BI__builtin_neon_vcale_f16
, NEON::BI__builtin_neon_vcale_v
, },
6644 { NEON::BI__builtin_neon_vcaleq_f16
, NEON::BI__builtin_neon_vcaleq_v
, },
6645 { NEON::BI__builtin_neon_vcalt_f16
, NEON::BI__builtin_neon_vcalt_v
, },
6646 { NEON::BI__builtin_neon_vcaltq_f16
, NEON::BI__builtin_neon_vcaltq_v
, },
6647 { NEON::BI__builtin_neon_vceqz_f16
, NEON::BI__builtin_neon_vceqz_v
, },
6648 { NEON::BI__builtin_neon_vceqzq_f16
, NEON::BI__builtin_neon_vceqzq_v
, },
6649 { NEON::BI__builtin_neon_vcgez_f16
, NEON::BI__builtin_neon_vcgez_v
, },
6650 { NEON::BI__builtin_neon_vcgezq_f16
, NEON::BI__builtin_neon_vcgezq_v
, },
6651 { NEON::BI__builtin_neon_vcgtz_f16
, NEON::BI__builtin_neon_vcgtz_v
, },
6652 { NEON::BI__builtin_neon_vcgtzq_f16
, NEON::BI__builtin_neon_vcgtzq_v
, },
6653 { NEON::BI__builtin_neon_vclez_f16
, NEON::BI__builtin_neon_vclez_v
, },
6654 { NEON::BI__builtin_neon_vclezq_f16
, NEON::BI__builtin_neon_vclezq_v
, },
6655 { NEON::BI__builtin_neon_vcltz_f16
, NEON::BI__builtin_neon_vcltz_v
, },
6656 { NEON::BI__builtin_neon_vcltzq_f16
, NEON::BI__builtin_neon_vcltzq_v
, },
6657 { NEON::BI__builtin_neon_vext_f16
, NEON::BI__builtin_neon_vext_v
, },
6658 { NEON::BI__builtin_neon_vextq_f16
, NEON::BI__builtin_neon_vextq_v
, },
6659 { NEON::BI__builtin_neon_vfma_f16
, NEON::BI__builtin_neon_vfma_v
, },
6660 { NEON::BI__builtin_neon_vfma_lane_f16
, NEON::BI__builtin_neon_vfma_lane_v
, },
6661 { NEON::BI__builtin_neon_vfma_laneq_f16
, NEON::BI__builtin_neon_vfma_laneq_v
, },
6662 { NEON::BI__builtin_neon_vfmaq_f16
, NEON::BI__builtin_neon_vfmaq_v
, },
6663 { NEON::BI__builtin_neon_vfmaq_lane_f16
, NEON::BI__builtin_neon_vfmaq_lane_v
, },
6664 { NEON::BI__builtin_neon_vfmaq_laneq_f16
, NEON::BI__builtin_neon_vfmaq_laneq_v
, },
6665 { NEON::BI__builtin_neon_vld1_bf16_x2
, NEON::BI__builtin_neon_vld1_x2_v
},
6666 { NEON::BI__builtin_neon_vld1_bf16_x3
, NEON::BI__builtin_neon_vld1_x3_v
},
6667 { NEON::BI__builtin_neon_vld1_bf16_x4
, NEON::BI__builtin_neon_vld1_x4_v
},
6668 { NEON::BI__builtin_neon_vld1_bf16
, NEON::BI__builtin_neon_vld1_v
},
6669 { NEON::BI__builtin_neon_vld1_dup_bf16
, NEON::BI__builtin_neon_vld1_dup_v
},
6670 { NEON::BI__builtin_neon_vld1_lane_bf16
, NEON::BI__builtin_neon_vld1_lane_v
},
6671 { NEON::BI__builtin_neon_vld1q_bf16_x2
, NEON::BI__builtin_neon_vld1q_x2_v
},
6672 { NEON::BI__builtin_neon_vld1q_bf16_x3
, NEON::BI__builtin_neon_vld1q_x3_v
},
6673 { NEON::BI__builtin_neon_vld1q_bf16_x4
, NEON::BI__builtin_neon_vld1q_x4_v
},
6674 { NEON::BI__builtin_neon_vld1q_bf16
, NEON::BI__builtin_neon_vld1q_v
},
6675 { NEON::BI__builtin_neon_vld1q_dup_bf16
, NEON::BI__builtin_neon_vld1q_dup_v
},
6676 { NEON::BI__builtin_neon_vld1q_lane_bf16
, NEON::BI__builtin_neon_vld1q_lane_v
},
6677 { NEON::BI__builtin_neon_vld2_bf16
, NEON::BI__builtin_neon_vld2_v
},
6678 { NEON::BI__builtin_neon_vld2_dup_bf16
, NEON::BI__builtin_neon_vld2_dup_v
},
6679 { NEON::BI__builtin_neon_vld2_lane_bf16
, NEON::BI__builtin_neon_vld2_lane_v
},
6680 { NEON::BI__builtin_neon_vld2q_bf16
, NEON::BI__builtin_neon_vld2q_v
},
6681 { NEON::BI__builtin_neon_vld2q_dup_bf16
, NEON::BI__builtin_neon_vld2q_dup_v
},
6682 { NEON::BI__builtin_neon_vld2q_lane_bf16
, NEON::BI__builtin_neon_vld2q_lane_v
},
6683 { NEON::BI__builtin_neon_vld3_bf16
, NEON::BI__builtin_neon_vld3_v
},
6684 { NEON::BI__builtin_neon_vld3_dup_bf16
, NEON::BI__builtin_neon_vld3_dup_v
},
6685 { NEON::BI__builtin_neon_vld3_lane_bf16
, NEON::BI__builtin_neon_vld3_lane_v
},
6686 { NEON::BI__builtin_neon_vld3q_bf16
, NEON::BI__builtin_neon_vld3q_v
},
6687 { NEON::BI__builtin_neon_vld3q_dup_bf16
, NEON::BI__builtin_neon_vld3q_dup_v
},
6688 { NEON::BI__builtin_neon_vld3q_lane_bf16
, NEON::BI__builtin_neon_vld3q_lane_v
},
6689 { NEON::BI__builtin_neon_vld4_bf16
, NEON::BI__builtin_neon_vld4_v
},
6690 { NEON::BI__builtin_neon_vld4_dup_bf16
, NEON::BI__builtin_neon_vld4_dup_v
},
6691 { NEON::BI__builtin_neon_vld4_lane_bf16
, NEON::BI__builtin_neon_vld4_lane_v
},
6692 { NEON::BI__builtin_neon_vld4q_bf16
, NEON::BI__builtin_neon_vld4q_v
},
6693 { NEON::BI__builtin_neon_vld4q_dup_bf16
, NEON::BI__builtin_neon_vld4q_dup_v
},
6694 { NEON::BI__builtin_neon_vld4q_lane_bf16
, NEON::BI__builtin_neon_vld4q_lane_v
},
6695 { NEON::BI__builtin_neon_vmax_f16
, NEON::BI__builtin_neon_vmax_v
, },
6696 { NEON::BI__builtin_neon_vmaxnm_f16
, NEON::BI__builtin_neon_vmaxnm_v
, },
6697 { NEON::BI__builtin_neon_vmaxnmq_f16
, NEON::BI__builtin_neon_vmaxnmq_v
, },
6698 { NEON::BI__builtin_neon_vmaxq_f16
, NEON::BI__builtin_neon_vmaxq_v
, },
6699 { NEON::BI__builtin_neon_vmin_f16
, NEON::BI__builtin_neon_vmin_v
, },
6700 { NEON::BI__builtin_neon_vminnm_f16
, NEON::BI__builtin_neon_vminnm_v
, },
6701 { NEON::BI__builtin_neon_vminnmq_f16
, NEON::BI__builtin_neon_vminnmq_v
, },
6702 { NEON::BI__builtin_neon_vminq_f16
, NEON::BI__builtin_neon_vminq_v
, },
6703 { NEON::BI__builtin_neon_vmulx_f16
, NEON::BI__builtin_neon_vmulx_v
, },
6704 { NEON::BI__builtin_neon_vmulxq_f16
, NEON::BI__builtin_neon_vmulxq_v
, },
6705 { NEON::BI__builtin_neon_vpadd_f16
, NEON::BI__builtin_neon_vpadd_v
, },
6706 { NEON::BI__builtin_neon_vpaddq_f16
, NEON::BI__builtin_neon_vpaddq_v
, },
6707 { NEON::BI__builtin_neon_vpmax_f16
, NEON::BI__builtin_neon_vpmax_v
, },
6708 { NEON::BI__builtin_neon_vpmaxnm_f16
, NEON::BI__builtin_neon_vpmaxnm_v
, },
6709 { NEON::BI__builtin_neon_vpmaxnmq_f16
, NEON::BI__builtin_neon_vpmaxnmq_v
, },
6710 { NEON::BI__builtin_neon_vpmaxq_f16
, NEON::BI__builtin_neon_vpmaxq_v
, },
6711 { NEON::BI__builtin_neon_vpmin_f16
, NEON::BI__builtin_neon_vpmin_v
, },
6712 { NEON::BI__builtin_neon_vpminnm_f16
, NEON::BI__builtin_neon_vpminnm_v
, },
6713 { NEON::BI__builtin_neon_vpminnmq_f16
, NEON::BI__builtin_neon_vpminnmq_v
, },
6714 { NEON::BI__builtin_neon_vpminq_f16
, NEON::BI__builtin_neon_vpminq_v
, },
6715 { NEON::BI__builtin_neon_vrecpe_f16
, NEON::BI__builtin_neon_vrecpe_v
, },
6716 { NEON::BI__builtin_neon_vrecpeq_f16
, NEON::BI__builtin_neon_vrecpeq_v
, },
6717 { NEON::BI__builtin_neon_vrecps_f16
, NEON::BI__builtin_neon_vrecps_v
, },
6718 { NEON::BI__builtin_neon_vrecpsq_f16
, NEON::BI__builtin_neon_vrecpsq_v
, },
6719 { NEON::BI__builtin_neon_vrnd_f16
, NEON::BI__builtin_neon_vrnd_v
, },
6720 { NEON::BI__builtin_neon_vrnda_f16
, NEON::BI__builtin_neon_vrnda_v
, },
6721 { NEON::BI__builtin_neon_vrndaq_f16
, NEON::BI__builtin_neon_vrndaq_v
, },
6722 { NEON::BI__builtin_neon_vrndi_f16
, NEON::BI__builtin_neon_vrndi_v
, },
6723 { NEON::BI__builtin_neon_vrndiq_f16
, NEON::BI__builtin_neon_vrndiq_v
, },
6724 { NEON::BI__builtin_neon_vrndm_f16
, NEON::BI__builtin_neon_vrndm_v
, },
6725 { NEON::BI__builtin_neon_vrndmq_f16
, NEON::BI__builtin_neon_vrndmq_v
, },
6726 { NEON::BI__builtin_neon_vrndn_f16
, NEON::BI__builtin_neon_vrndn_v
, },
6727 { NEON::BI__builtin_neon_vrndnq_f16
, NEON::BI__builtin_neon_vrndnq_v
, },
6728 { NEON::BI__builtin_neon_vrndp_f16
, NEON::BI__builtin_neon_vrndp_v
, },
6729 { NEON::BI__builtin_neon_vrndpq_f16
, NEON::BI__builtin_neon_vrndpq_v
, },
6730 { NEON::BI__builtin_neon_vrndq_f16
, NEON::BI__builtin_neon_vrndq_v
, },
6731 { NEON::BI__builtin_neon_vrndx_f16
, NEON::BI__builtin_neon_vrndx_v
, },
6732 { NEON::BI__builtin_neon_vrndxq_f16
, NEON::BI__builtin_neon_vrndxq_v
, },
6733 { NEON::BI__builtin_neon_vrsqrte_f16
, NEON::BI__builtin_neon_vrsqrte_v
, },
6734 { NEON::BI__builtin_neon_vrsqrteq_f16
, NEON::BI__builtin_neon_vrsqrteq_v
, },
6735 { NEON::BI__builtin_neon_vrsqrts_f16
, NEON::BI__builtin_neon_vrsqrts_v
, },
6736 { NEON::BI__builtin_neon_vrsqrtsq_f16
, NEON::BI__builtin_neon_vrsqrtsq_v
, },
6737 { NEON::BI__builtin_neon_vsqrt_f16
, NEON::BI__builtin_neon_vsqrt_v
, },
6738 { NEON::BI__builtin_neon_vsqrtq_f16
, NEON::BI__builtin_neon_vsqrtq_v
, },
6739 { NEON::BI__builtin_neon_vst1_bf16_x2
, NEON::BI__builtin_neon_vst1_x2_v
},
6740 { NEON::BI__builtin_neon_vst1_bf16_x3
, NEON::BI__builtin_neon_vst1_x3_v
},
6741 { NEON::BI__builtin_neon_vst1_bf16_x4
, NEON::BI__builtin_neon_vst1_x4_v
},
6742 { NEON::BI__builtin_neon_vst1_bf16
, NEON::BI__builtin_neon_vst1_v
},
6743 { NEON::BI__builtin_neon_vst1_lane_bf16
, NEON::BI__builtin_neon_vst1_lane_v
},
6744 { NEON::BI__builtin_neon_vst1q_bf16_x2
, NEON::BI__builtin_neon_vst1q_x2_v
},
6745 { NEON::BI__builtin_neon_vst1q_bf16_x3
, NEON::BI__builtin_neon_vst1q_x3_v
},
6746 { NEON::BI__builtin_neon_vst1q_bf16_x4
, NEON::BI__builtin_neon_vst1q_x4_v
},
6747 { NEON::BI__builtin_neon_vst1q_bf16
, NEON::BI__builtin_neon_vst1q_v
},
6748 { NEON::BI__builtin_neon_vst1q_lane_bf16
, NEON::BI__builtin_neon_vst1q_lane_v
},
6749 { NEON::BI__builtin_neon_vst2_bf16
, NEON::BI__builtin_neon_vst2_v
},
6750 { NEON::BI__builtin_neon_vst2_lane_bf16
, NEON::BI__builtin_neon_vst2_lane_v
},
6751 { NEON::BI__builtin_neon_vst2q_bf16
, NEON::BI__builtin_neon_vst2q_v
},
6752 { NEON::BI__builtin_neon_vst2q_lane_bf16
, NEON::BI__builtin_neon_vst2q_lane_v
},
6753 { NEON::BI__builtin_neon_vst3_bf16
, NEON::BI__builtin_neon_vst3_v
},
6754 { NEON::BI__builtin_neon_vst3_lane_bf16
, NEON::BI__builtin_neon_vst3_lane_v
},
6755 { NEON::BI__builtin_neon_vst3q_bf16
, NEON::BI__builtin_neon_vst3q_v
},
6756 { NEON::BI__builtin_neon_vst3q_lane_bf16
, NEON::BI__builtin_neon_vst3q_lane_v
},
6757 { NEON::BI__builtin_neon_vst4_bf16
, NEON::BI__builtin_neon_vst4_v
},
6758 { NEON::BI__builtin_neon_vst4_lane_bf16
, NEON::BI__builtin_neon_vst4_lane_v
},
6759 { NEON::BI__builtin_neon_vst4q_bf16
, NEON::BI__builtin_neon_vst4q_v
},
6760 { NEON::BI__builtin_neon_vst4q_lane_bf16
, NEON::BI__builtin_neon_vst4q_lane_v
},
6761 { NEON::BI__builtin_neon_vtrn_f16
, NEON::BI__builtin_neon_vtrn_v
, },
6762 { NEON::BI__builtin_neon_vtrnq_f16
, NEON::BI__builtin_neon_vtrnq_v
, },
6763 { NEON::BI__builtin_neon_vuzp_f16
, NEON::BI__builtin_neon_vuzp_v
, },
6764 { NEON::BI__builtin_neon_vuzpq_f16
, NEON::BI__builtin_neon_vuzpq_v
, },
6765 { NEON::BI__builtin_neon_vzip_f16
, NEON::BI__builtin_neon_vzip_v
, },
6766 { NEON::BI__builtin_neon_vzipq_f16
, NEON::BI__builtin_neon_vzipq_v
, },
6773 #define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
6775 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
6779 #define SVEMAP2(NameBase, TypeModifier) \
6780 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
6781 static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap
[] = {
6782 #define GET_SVE_LLVM_INTRINSIC_MAP
6783 #include "clang/Basic/arm_sve_builtin_cg.inc"
6784 #include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
6785 #undef GET_SVE_LLVM_INTRINSIC_MAP
6791 #define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
6793 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
6797 #define SMEMAP2(NameBase, TypeModifier) \
6798 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
6799 static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap
[] = {
6800 #define GET_SME_LLVM_INTRINSIC_MAP
6801 #include "clang/Basic/arm_sme_builtin_cg.inc"
6802 #undef GET_SME_LLVM_INTRINSIC_MAP
6808 static bool NEONSIMDIntrinsicsProvenSorted
= false;
6810 static bool AArch64SIMDIntrinsicsProvenSorted
= false;
6811 static bool AArch64SISDIntrinsicsProvenSorted
= false;
6812 static bool AArch64SVEIntrinsicsProvenSorted
= false;
6813 static bool AArch64SMEIntrinsicsProvenSorted
= false;
6815 static const ARMVectorIntrinsicInfo
*
6816 findARMVectorIntrinsicInMap(ArrayRef
<ARMVectorIntrinsicInfo
> IntrinsicMap
,
6817 unsigned BuiltinID
, bool &MapProvenSorted
) {
6820 if (!MapProvenSorted
) {
6821 assert(llvm::is_sorted(IntrinsicMap
));
6822 MapProvenSorted
= true;
6826 const ARMVectorIntrinsicInfo
*Builtin
=
6827 llvm::lower_bound(IntrinsicMap
, BuiltinID
);
6829 if (Builtin
!= IntrinsicMap
.end() && Builtin
->BuiltinID
== BuiltinID
)
6835 Function
*CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID
,
6837 llvm::Type
*ArgType
,
6838 const CallExpr
*E
) {
6840 if (Modifier
& Use64BitVectors
)
6842 else if (Modifier
& Use128BitVectors
)
6846 SmallVector
<llvm::Type
*, 3> Tys
;
6847 if (Modifier
& AddRetType
) {
6848 llvm::Type
*Ty
= ConvertType(E
->getCallReturnType(getContext()));
6849 if (Modifier
& VectorizeRetType
)
6850 Ty
= llvm::FixedVectorType::get(
6851 Ty
, VectorSize
? VectorSize
/ Ty
->getPrimitiveSizeInBits() : 1);
6857 if (Modifier
& VectorizeArgTypes
) {
6858 int Elts
= VectorSize
? VectorSize
/ ArgType
->getPrimitiveSizeInBits() : 1;
6859 ArgType
= llvm::FixedVectorType::get(ArgType
, Elts
);
6862 if (Modifier
& (Add1ArgType
| Add2ArgTypes
))
6863 Tys
.push_back(ArgType
);
6865 if (Modifier
& Add2ArgTypes
)
6866 Tys
.push_back(ArgType
);
6868 if (Modifier
& InventFloatType
)
6869 Tys
.push_back(FloatTy
);
6871 return CGM
.getIntrinsic(IntrinsicID
, Tys
);
6874 static Value
*EmitCommonNeonSISDBuiltinExpr(
6875 CodeGenFunction
&CGF
, const ARMVectorIntrinsicInfo
&SISDInfo
,
6876 SmallVectorImpl
<Value
*> &Ops
, const CallExpr
*E
) {
6877 unsigned BuiltinID
= SISDInfo
.BuiltinID
;
6878 unsigned int Int
= SISDInfo
.LLVMIntrinsic
;
6879 unsigned Modifier
= SISDInfo
.TypeModifier
;
6880 const char *s
= SISDInfo
.NameHint
;
6882 switch (BuiltinID
) {
6883 case NEON::BI__builtin_neon_vcled_s64
:
6884 case NEON::BI__builtin_neon_vcled_u64
:
6885 case NEON::BI__builtin_neon_vcles_f32
:
6886 case NEON::BI__builtin_neon_vcled_f64
:
6887 case NEON::BI__builtin_neon_vcltd_s64
:
6888 case NEON::BI__builtin_neon_vcltd_u64
:
6889 case NEON::BI__builtin_neon_vclts_f32
:
6890 case NEON::BI__builtin_neon_vcltd_f64
:
6891 case NEON::BI__builtin_neon_vcales_f32
:
6892 case NEON::BI__builtin_neon_vcaled_f64
:
6893 case NEON::BI__builtin_neon_vcalts_f32
:
6894 case NEON::BI__builtin_neon_vcaltd_f64
:
6895 // Only one direction of comparisons actually exist, cmle is actually a cmge
6896 // with swapped operands. The table gives us the right intrinsic but we
6897 // still need to do the swap.
6898 std::swap(Ops
[0], Ops
[1]);
6902 assert(Int
&& "Generic code assumes a valid intrinsic");
6904 // Determine the type(s) of this overloaded AArch64 intrinsic.
6905 const Expr
*Arg
= E
->getArg(0);
6906 llvm::Type
*ArgTy
= CGF
.ConvertType(Arg
->getType());
6907 Function
*F
= CGF
.LookupNeonLLVMIntrinsic(Int
, Modifier
, ArgTy
, E
);
6910 ConstantInt
*C0
= ConstantInt::get(CGF
.SizeTy
, 0);
6911 for (Function::const_arg_iterator ai
= F
->arg_begin(), ae
= F
->arg_end();
6912 ai
!= ae
; ++ai
, ++j
) {
6913 llvm::Type
*ArgTy
= ai
->getType();
6914 if (Ops
[j
]->getType()->getPrimitiveSizeInBits() ==
6915 ArgTy
->getPrimitiveSizeInBits())
6918 assert(ArgTy
->isVectorTy() && !Ops
[j
]->getType()->isVectorTy());
6919 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
6920 // it before inserting.
6921 Ops
[j
] = CGF
.Builder
.CreateTruncOrBitCast(
6922 Ops
[j
], cast
<llvm::VectorType
>(ArgTy
)->getElementType());
6924 CGF
.Builder
.CreateInsertElement(PoisonValue::get(ArgTy
), Ops
[j
], C0
);
6927 Value
*Result
= CGF
.EmitNeonCall(F
, Ops
, s
);
6928 llvm::Type
*ResultType
= CGF
.ConvertType(E
->getType());
6929 if (ResultType
->getPrimitiveSizeInBits().getFixedValue() <
6930 Result
->getType()->getPrimitiveSizeInBits().getFixedValue())
6931 return CGF
.Builder
.CreateExtractElement(Result
, C0
);
6933 return CGF
.Builder
.CreateBitCast(Result
, ResultType
, s
);
6936 Value
*CodeGenFunction::EmitCommonNeonBuiltinExpr(
6937 unsigned BuiltinID
, unsigned LLVMIntrinsic
, unsigned AltLLVMIntrinsic
,
6938 const char *NameHint
, unsigned Modifier
, const CallExpr
*E
,
6939 SmallVectorImpl
<llvm::Value
*> &Ops
, Address PtrOp0
, Address PtrOp1
,
6940 llvm::Triple::ArchType Arch
) {
6941 // Get the last argument, which specifies the vector type.
6942 const Expr
*Arg
= E
->getArg(E
->getNumArgs() - 1);
6943 std::optional
<llvm::APSInt
> NeonTypeConst
=
6944 Arg
->getIntegerConstantExpr(getContext());
6948 // Determine the type of this overloaded NEON intrinsic.
6949 NeonTypeFlags
Type(NeonTypeConst
->getZExtValue());
6950 bool Usgn
= Type
.isUnsigned();
6951 bool Quad
= Type
.isQuad();
6952 const bool HasLegalHalfType
= getTarget().hasLegalHalfType();
6953 const bool AllowBFloatArgsAndRet
=
6954 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
6956 llvm::FixedVectorType
*VTy
=
6957 GetNeonType(this, Type
, HasLegalHalfType
, false, AllowBFloatArgsAndRet
);
6958 llvm::Type
*Ty
= VTy
;
6962 auto getAlignmentValue32
= [&](Address addr
) -> Value
* {
6963 return Builder
.getInt32(addr
.getAlignment().getQuantity());
6966 unsigned Int
= LLVMIntrinsic
;
6967 if ((Modifier
& UnsignedAlts
) && !Usgn
)
6968 Int
= AltLLVMIntrinsic
;
6970 switch (BuiltinID
) {
6972 case NEON::BI__builtin_neon_splat_lane_v
:
6973 case NEON::BI__builtin_neon_splat_laneq_v
:
6974 case NEON::BI__builtin_neon_splatq_lane_v
:
6975 case NEON::BI__builtin_neon_splatq_laneq_v
: {
6976 auto NumElements
= VTy
->getElementCount();
6977 if (BuiltinID
== NEON::BI__builtin_neon_splatq_lane_v
)
6978 NumElements
= NumElements
* 2;
6979 if (BuiltinID
== NEON::BI__builtin_neon_splat_laneq_v
)
6980 NumElements
= NumElements
.divideCoefficientBy(2);
6982 Ops
[0] = Builder
.CreateBitCast(Ops
[0], VTy
);
6983 return EmitNeonSplat(Ops
[0], cast
<ConstantInt
>(Ops
[1]), NumElements
);
6985 case NEON::BI__builtin_neon_vpadd_v
:
6986 case NEON::BI__builtin_neon_vpaddq_v
:
6987 // We don't allow fp/int overloading of intrinsics.
6988 if (VTy
->getElementType()->isFloatingPointTy() &&
6989 Int
== Intrinsic::aarch64_neon_addp
)
6990 Int
= Intrinsic::aarch64_neon_faddp
;
6992 case NEON::BI__builtin_neon_vabs_v
:
6993 case NEON::BI__builtin_neon_vabsq_v
:
6994 if (VTy
->getElementType()->isFloatingPointTy())
6995 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::fabs
, Ty
), Ops
, "vabs");
6996 return EmitNeonCall(CGM
.getIntrinsic(LLVMIntrinsic
, Ty
), Ops
, "vabs");
6997 case NEON::BI__builtin_neon_vadd_v
:
6998 case NEON::BI__builtin_neon_vaddq_v
: {
6999 llvm::Type
*VTy
= llvm::FixedVectorType::get(Int8Ty
, Quad
? 16 : 8);
7000 Ops
[0] = Builder
.CreateBitCast(Ops
[0], VTy
);
7001 Ops
[1] = Builder
.CreateBitCast(Ops
[1], VTy
);
7002 Ops
[0] = Builder
.CreateXor(Ops
[0], Ops
[1]);
7003 return Builder
.CreateBitCast(Ops
[0], Ty
);
7005 case NEON::BI__builtin_neon_vaddhn_v
: {
7006 llvm::FixedVectorType
*SrcTy
=
7007 llvm::FixedVectorType::getExtendedElementVectorType(VTy
);
7009 // %sum = add <4 x i32> %lhs, %rhs
7010 Ops
[0] = Builder
.CreateBitCast(Ops
[0], SrcTy
);
7011 Ops
[1] = Builder
.CreateBitCast(Ops
[1], SrcTy
);
7012 Ops
[0] = Builder
.CreateAdd(Ops
[0], Ops
[1], "vaddhn");
7014 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
7015 Constant
*ShiftAmt
=
7016 ConstantInt::get(SrcTy
, SrcTy
->getScalarSizeInBits() / 2);
7017 Ops
[0] = Builder
.CreateLShr(Ops
[0], ShiftAmt
, "vaddhn");
7019 // %res = trunc <4 x i32> %high to <4 x i16>
7020 return Builder
.CreateTrunc(Ops
[0], VTy
, "vaddhn");
7022 case NEON::BI__builtin_neon_vcale_v
:
7023 case NEON::BI__builtin_neon_vcaleq_v
:
7024 case NEON::BI__builtin_neon_vcalt_v
:
7025 case NEON::BI__builtin_neon_vcaltq_v
:
7026 std::swap(Ops
[0], Ops
[1]);
7028 case NEON::BI__builtin_neon_vcage_v
:
7029 case NEON::BI__builtin_neon_vcageq_v
:
7030 case NEON::BI__builtin_neon_vcagt_v
:
7031 case NEON::BI__builtin_neon_vcagtq_v
: {
7033 switch (VTy
->getScalarSizeInBits()) {
7034 default: llvm_unreachable("unexpected type");
7045 auto *VecFlt
= llvm::FixedVectorType::get(Ty
, VTy
->getNumElements());
7046 llvm::Type
*Tys
[] = { VTy
, VecFlt
};
7047 Function
*F
= CGM
.getIntrinsic(LLVMIntrinsic
, Tys
);
7048 return EmitNeonCall(F
, Ops
, NameHint
);
7050 case NEON::BI__builtin_neon_vceqz_v
:
7051 case NEON::BI__builtin_neon_vceqzq_v
:
7052 return EmitAArch64CompareBuiltinExpr(Ops
[0], Ty
, ICmpInst::FCMP_OEQ
,
7053 ICmpInst::ICMP_EQ
, "vceqz");
7054 case NEON::BI__builtin_neon_vcgez_v
:
7055 case NEON::BI__builtin_neon_vcgezq_v
:
7056 return EmitAArch64CompareBuiltinExpr(Ops
[0], Ty
, ICmpInst::FCMP_OGE
,
7057 ICmpInst::ICMP_SGE
, "vcgez");
7058 case NEON::BI__builtin_neon_vclez_v
:
7059 case NEON::BI__builtin_neon_vclezq_v
:
7060 return EmitAArch64CompareBuiltinExpr(Ops
[0], Ty
, ICmpInst::FCMP_OLE
,
7061 ICmpInst::ICMP_SLE
, "vclez");
7062 case NEON::BI__builtin_neon_vcgtz_v
:
7063 case NEON::BI__builtin_neon_vcgtzq_v
:
7064 return EmitAArch64CompareBuiltinExpr(Ops
[0], Ty
, ICmpInst::FCMP_OGT
,
7065 ICmpInst::ICMP_SGT
, "vcgtz");
7066 case NEON::BI__builtin_neon_vcltz_v
:
7067 case NEON::BI__builtin_neon_vcltzq_v
:
7068 return EmitAArch64CompareBuiltinExpr(Ops
[0], Ty
, ICmpInst::FCMP_OLT
,
7069 ICmpInst::ICMP_SLT
, "vcltz");
7070 case NEON::BI__builtin_neon_vclz_v
:
7071 case NEON::BI__builtin_neon_vclzq_v
:
7072 // We generate target-independent intrinsic, which needs a second argument
7073 // for whether or not clz of zero is undefined; on ARM it isn't.
7074 Ops
.push_back(Builder
.getInt1(getTarget().isCLZForZeroUndef()));
7076 case NEON::BI__builtin_neon_vcvt_f32_v
:
7077 case NEON::BI__builtin_neon_vcvtq_f32_v
:
7078 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
7079 Ty
= GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32
, false, Quad
),
7081 return Usgn
? Builder
.CreateUIToFP(Ops
[0], Ty
, "vcvt")
7082 : Builder
.CreateSIToFP(Ops
[0], Ty
, "vcvt");
7083 case NEON::BI__builtin_neon_vcvt_f16_s16
:
7084 case NEON::BI__builtin_neon_vcvt_f16_u16
:
7085 case NEON::BI__builtin_neon_vcvtq_f16_s16
:
7086 case NEON::BI__builtin_neon_vcvtq_f16_u16
:
7087 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
7088 Ty
= GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16
, false, Quad
),
7090 return Usgn
? Builder
.CreateUIToFP(Ops
[0], Ty
, "vcvt")
7091 : Builder
.CreateSIToFP(Ops
[0], Ty
, "vcvt");
7092 case NEON::BI__builtin_neon_vcvt_n_f16_s16
:
7093 case NEON::BI__builtin_neon_vcvt_n_f16_u16
:
7094 case NEON::BI__builtin_neon_vcvtq_n_f16_s16
:
7095 case NEON::BI__builtin_neon_vcvtq_n_f16_u16
: {
7096 llvm::Type
*Tys
[2] = { GetFloatNeonType(this, Type
), Ty
};
7097 Function
*F
= CGM
.getIntrinsic(Int
, Tys
);
7098 return EmitNeonCall(F
, Ops
, "vcvt_n");
7100 case NEON::BI__builtin_neon_vcvt_n_f32_v
:
7101 case NEON::BI__builtin_neon_vcvt_n_f64_v
:
7102 case NEON::BI__builtin_neon_vcvtq_n_f32_v
:
7103 case NEON::BI__builtin_neon_vcvtq_n_f64_v
: {
7104 llvm::Type
*Tys
[2] = { GetFloatNeonType(this, Type
), Ty
};
7105 Int
= Usgn
? LLVMIntrinsic
: AltLLVMIntrinsic
;
7106 Function
*F
= CGM
.getIntrinsic(Int
, Tys
);
7107 return EmitNeonCall(F
, Ops
, "vcvt_n");
7109 case NEON::BI__builtin_neon_vcvt_n_s16_f16
:
7110 case NEON::BI__builtin_neon_vcvt_n_s32_v
:
7111 case NEON::BI__builtin_neon_vcvt_n_u16_f16
:
7112 case NEON::BI__builtin_neon_vcvt_n_u32_v
:
7113 case NEON::BI__builtin_neon_vcvt_n_s64_v
:
7114 case NEON::BI__builtin_neon_vcvt_n_u64_v
:
7115 case NEON::BI__builtin_neon_vcvtq_n_s16_f16
:
7116 case NEON::BI__builtin_neon_vcvtq_n_s32_v
:
7117 case NEON::BI__builtin_neon_vcvtq_n_u16_f16
:
7118 case NEON::BI__builtin_neon_vcvtq_n_u32_v
:
7119 case NEON::BI__builtin_neon_vcvtq_n_s64_v
:
7120 case NEON::BI__builtin_neon_vcvtq_n_u64_v
: {
7121 llvm::Type
*Tys
[2] = { Ty
, GetFloatNeonType(this, Type
) };
7122 Function
*F
= CGM
.getIntrinsic(LLVMIntrinsic
, Tys
);
7123 return EmitNeonCall(F
, Ops
, "vcvt_n");
7125 case NEON::BI__builtin_neon_vcvt_s32_v
:
7126 case NEON::BI__builtin_neon_vcvt_u32_v
:
7127 case NEON::BI__builtin_neon_vcvt_s64_v
:
7128 case NEON::BI__builtin_neon_vcvt_u64_v
:
7129 case NEON::BI__builtin_neon_vcvt_s16_f16
:
7130 case NEON::BI__builtin_neon_vcvt_u16_f16
:
7131 case NEON::BI__builtin_neon_vcvtq_s32_v
:
7132 case NEON::BI__builtin_neon_vcvtq_u32_v
:
7133 case NEON::BI__builtin_neon_vcvtq_s64_v
:
7134 case NEON::BI__builtin_neon_vcvtq_u64_v
:
7135 case NEON::BI__builtin_neon_vcvtq_s16_f16
:
7136 case NEON::BI__builtin_neon_vcvtq_u16_f16
: {
7137 Ops
[0] = Builder
.CreateBitCast(Ops
[0], GetFloatNeonType(this, Type
));
7138 return Usgn
? Builder
.CreateFPToUI(Ops
[0], Ty
, "vcvt")
7139 : Builder
.CreateFPToSI(Ops
[0], Ty
, "vcvt");
7141 case NEON::BI__builtin_neon_vcvta_s16_f16
:
7142 case NEON::BI__builtin_neon_vcvta_s32_v
:
7143 case NEON::BI__builtin_neon_vcvta_s64_v
:
7144 case NEON::BI__builtin_neon_vcvta_u16_f16
:
7145 case NEON::BI__builtin_neon_vcvta_u32_v
:
7146 case NEON::BI__builtin_neon_vcvta_u64_v
:
7147 case NEON::BI__builtin_neon_vcvtaq_s16_f16
:
7148 case NEON::BI__builtin_neon_vcvtaq_s32_v
:
7149 case NEON::BI__builtin_neon_vcvtaq_s64_v
:
7150 case NEON::BI__builtin_neon_vcvtaq_u16_f16
:
7151 case NEON::BI__builtin_neon_vcvtaq_u32_v
:
7152 case NEON::BI__builtin_neon_vcvtaq_u64_v
:
7153 case NEON::BI__builtin_neon_vcvtn_s16_f16
:
7154 case NEON::BI__builtin_neon_vcvtn_s32_v
:
7155 case NEON::BI__builtin_neon_vcvtn_s64_v
:
7156 case NEON::BI__builtin_neon_vcvtn_u16_f16
:
7157 case NEON::BI__builtin_neon_vcvtn_u32_v
:
7158 case NEON::BI__builtin_neon_vcvtn_u64_v
:
7159 case NEON::BI__builtin_neon_vcvtnq_s16_f16
:
7160 case NEON::BI__builtin_neon_vcvtnq_s32_v
:
7161 case NEON::BI__builtin_neon_vcvtnq_s64_v
:
7162 case NEON::BI__builtin_neon_vcvtnq_u16_f16
:
7163 case NEON::BI__builtin_neon_vcvtnq_u32_v
:
7164 case NEON::BI__builtin_neon_vcvtnq_u64_v
:
7165 case NEON::BI__builtin_neon_vcvtp_s16_f16
:
7166 case NEON::BI__builtin_neon_vcvtp_s32_v
:
7167 case NEON::BI__builtin_neon_vcvtp_s64_v
:
7168 case NEON::BI__builtin_neon_vcvtp_u16_f16
:
7169 case NEON::BI__builtin_neon_vcvtp_u32_v
:
7170 case NEON::BI__builtin_neon_vcvtp_u64_v
:
7171 case NEON::BI__builtin_neon_vcvtpq_s16_f16
:
7172 case NEON::BI__builtin_neon_vcvtpq_s32_v
:
7173 case NEON::BI__builtin_neon_vcvtpq_s64_v
:
7174 case NEON::BI__builtin_neon_vcvtpq_u16_f16
:
7175 case NEON::BI__builtin_neon_vcvtpq_u32_v
:
7176 case NEON::BI__builtin_neon_vcvtpq_u64_v
:
7177 case NEON::BI__builtin_neon_vcvtm_s16_f16
:
7178 case NEON::BI__builtin_neon_vcvtm_s32_v
:
7179 case NEON::BI__builtin_neon_vcvtm_s64_v
:
7180 case NEON::BI__builtin_neon_vcvtm_u16_f16
:
7181 case NEON::BI__builtin_neon_vcvtm_u32_v
:
7182 case NEON::BI__builtin_neon_vcvtm_u64_v
:
7183 case NEON::BI__builtin_neon_vcvtmq_s16_f16
:
7184 case NEON::BI__builtin_neon_vcvtmq_s32_v
:
7185 case NEON::BI__builtin_neon_vcvtmq_s64_v
:
7186 case NEON::BI__builtin_neon_vcvtmq_u16_f16
:
7187 case NEON::BI__builtin_neon_vcvtmq_u32_v
:
7188 case NEON::BI__builtin_neon_vcvtmq_u64_v
: {
7189 llvm::Type
*Tys
[2] = { Ty
, GetFloatNeonType(this, Type
) };
7190 return EmitNeonCall(CGM
.getIntrinsic(LLVMIntrinsic
, Tys
), Ops
, NameHint
);
7192 case NEON::BI__builtin_neon_vcvtx_f32_v
: {
7193 llvm::Type
*Tys
[2] = { VTy
->getTruncatedElementVectorType(VTy
), Ty
};
7194 return EmitNeonCall(CGM
.getIntrinsic(LLVMIntrinsic
, Tys
), Ops
, NameHint
);
7197 case NEON::BI__builtin_neon_vext_v
:
7198 case NEON::BI__builtin_neon_vextq_v
: {
7199 int CV
= cast
<ConstantInt
>(Ops
[2])->getSExtValue();
7200 SmallVector
<int, 16> Indices
;
7201 for (unsigned i
= 0, e
= VTy
->getNumElements(); i
!= e
; ++i
)
7202 Indices
.push_back(i
+CV
);
7204 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
7205 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
7206 return Builder
.CreateShuffleVector(Ops
[0], Ops
[1], Indices
, "vext");
7208 case NEON::BI__builtin_neon_vfma_v
:
7209 case NEON::BI__builtin_neon_vfmaq_v
: {
7210 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
7211 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
7212 Ops
[2] = Builder
.CreateBitCast(Ops
[2], Ty
);
7214 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
7215 return emitCallMaybeConstrainedFPBuiltin(
7216 *this, Intrinsic::fma
, Intrinsic::experimental_constrained_fma
, Ty
,
7217 {Ops
[1], Ops
[2], Ops
[0]});
7219 case NEON::BI__builtin_neon_vld1_v
:
7220 case NEON::BI__builtin_neon_vld1q_v
: {
7221 llvm::Type
*Tys
[] = {Ty
, Int8PtrTy
};
7222 Ops
.push_back(getAlignmentValue32(PtrOp0
));
7223 return EmitNeonCall(CGM
.getIntrinsic(LLVMIntrinsic
, Tys
), Ops
, "vld1");
7225 case NEON::BI__builtin_neon_vld1_x2_v
:
7226 case NEON::BI__builtin_neon_vld1q_x2_v
:
7227 case NEON::BI__builtin_neon_vld1_x3_v
:
7228 case NEON::BI__builtin_neon_vld1q_x3_v
:
7229 case NEON::BI__builtin_neon_vld1_x4_v
:
7230 case NEON::BI__builtin_neon_vld1q_x4_v
: {
7231 llvm::Type
*PTy
= llvm::PointerType::getUnqual(VTy
->getElementType());
7232 Ops
[1] = Builder
.CreateBitCast(Ops
[1], PTy
);
7233 llvm::Type
*Tys
[2] = { VTy
, PTy
};
7234 Function
*F
= CGM
.getIntrinsic(LLVMIntrinsic
, Tys
);
7235 Ops
[1] = Builder
.CreateCall(F
, Ops
[1], "vld1xN");
7236 Ty
= llvm::PointerType::getUnqual(Ops
[1]->getType());
7237 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
7238 return Builder
.CreateDefaultAlignedStore(Ops
[1], Ops
[0]);
7240 case NEON::BI__builtin_neon_vld2_v
:
7241 case NEON::BI__builtin_neon_vld2q_v
:
7242 case NEON::BI__builtin_neon_vld3_v
:
7243 case NEON::BI__builtin_neon_vld3q_v
:
7244 case NEON::BI__builtin_neon_vld4_v
:
7245 case NEON::BI__builtin_neon_vld4q_v
:
7246 case NEON::BI__builtin_neon_vld2_dup_v
:
7247 case NEON::BI__builtin_neon_vld2q_dup_v
:
7248 case NEON::BI__builtin_neon_vld3_dup_v
:
7249 case NEON::BI__builtin_neon_vld3q_dup_v
:
7250 case NEON::BI__builtin_neon_vld4_dup_v
:
7251 case NEON::BI__builtin_neon_vld4q_dup_v
: {
7252 llvm::Type
*Tys
[] = {Ty
, Int8PtrTy
};
7253 Function
*F
= CGM
.getIntrinsic(LLVMIntrinsic
, Tys
);
7254 Value
*Align
= getAlignmentValue32(PtrOp1
);
7255 Ops
[1] = Builder
.CreateCall(F
, {Ops
[1], Align
}, NameHint
);
7256 Ty
= llvm::PointerType::getUnqual(Ops
[1]->getType());
7257 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
7258 return Builder
.CreateDefaultAlignedStore(Ops
[1], Ops
[0]);
7260 case NEON::BI__builtin_neon_vld1_dup_v
:
7261 case NEON::BI__builtin_neon_vld1q_dup_v
: {
7262 Value
*V
= PoisonValue::get(Ty
);
7263 PtrOp0
= Builder
.CreateElementBitCast(PtrOp0
, VTy
->getElementType());
7264 LoadInst
*Ld
= Builder
.CreateLoad(PtrOp0
);
7265 llvm::Constant
*CI
= ConstantInt::get(SizeTy
, 0);
7266 Ops
[0] = Builder
.CreateInsertElement(V
, Ld
, CI
);
7267 return EmitNeonSplat(Ops
[0], CI
);
7269 case NEON::BI__builtin_neon_vld2_lane_v
:
7270 case NEON::BI__builtin_neon_vld2q_lane_v
:
7271 case NEON::BI__builtin_neon_vld3_lane_v
:
7272 case NEON::BI__builtin_neon_vld3q_lane_v
:
7273 case NEON::BI__builtin_neon_vld4_lane_v
:
7274 case NEON::BI__builtin_neon_vld4q_lane_v
: {
7275 llvm::Type
*Tys
[] = {Ty
, Int8PtrTy
};
7276 Function
*F
= CGM
.getIntrinsic(LLVMIntrinsic
, Tys
);
7277 for (unsigned I
= 2; I
< Ops
.size() - 1; ++I
)
7278 Ops
[I
] = Builder
.CreateBitCast(Ops
[I
], Ty
);
7279 Ops
.push_back(getAlignmentValue32(PtrOp1
));
7280 Ops
[1] = Builder
.CreateCall(F
, ArrayRef(Ops
).slice(1), NameHint
);
7281 Ty
= llvm::PointerType::getUnqual(Ops
[1]->getType());
7282 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
7283 return Builder
.CreateDefaultAlignedStore(Ops
[1], Ops
[0]);
7285 case NEON::BI__builtin_neon_vmovl_v
: {
7286 llvm::FixedVectorType
*DTy
=
7287 llvm::FixedVectorType::getTruncatedElementVectorType(VTy
);
7288 Ops
[0] = Builder
.CreateBitCast(Ops
[0], DTy
);
7290 return Builder
.CreateZExt(Ops
[0], Ty
, "vmovl");
7291 return Builder
.CreateSExt(Ops
[0], Ty
, "vmovl");
7293 case NEON::BI__builtin_neon_vmovn_v
: {
7294 llvm::FixedVectorType
*QTy
=
7295 llvm::FixedVectorType::getExtendedElementVectorType(VTy
);
7296 Ops
[0] = Builder
.CreateBitCast(Ops
[0], QTy
);
7297 return Builder
.CreateTrunc(Ops
[0], Ty
, "vmovn");
7299 case NEON::BI__builtin_neon_vmull_v
:
7300 // FIXME: the integer vmull operations could be emitted in terms of pure
7301 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
7302 // hoisting the exts outside loops. Until global ISel comes along that can
7303 // see through such movement this leads to bad CodeGen. So we need an
7304 // intrinsic for now.
7305 Int
= Usgn
? Intrinsic::arm_neon_vmullu
: Intrinsic::arm_neon_vmulls
;
7306 Int
= Type
.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp
: Int
;
7307 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vmull");
7308 case NEON::BI__builtin_neon_vpadal_v
:
7309 case NEON::BI__builtin_neon_vpadalq_v
: {
7310 // The source operand type has twice as many elements of half the size.
7311 unsigned EltBits
= VTy
->getElementType()->getPrimitiveSizeInBits();
7313 llvm::IntegerType::get(getLLVMContext(), EltBits
/ 2);
7315 llvm::FixedVectorType::get(EltTy
, VTy
->getNumElements() * 2);
7316 llvm::Type
*Tys
[2] = { Ty
, NarrowTy
};
7317 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, NameHint
);
7319 case NEON::BI__builtin_neon_vpaddl_v
:
7320 case NEON::BI__builtin_neon_vpaddlq_v
: {
7321 // The source operand type has twice as many elements of half the size.
7322 unsigned EltBits
= VTy
->getElementType()->getPrimitiveSizeInBits();
7323 llvm::Type
*EltTy
= llvm::IntegerType::get(getLLVMContext(), EltBits
/ 2);
7325 llvm::FixedVectorType::get(EltTy
, VTy
->getNumElements() * 2);
7326 llvm::Type
*Tys
[2] = { Ty
, NarrowTy
};
7327 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vpaddl");
7329 case NEON::BI__builtin_neon_vqdmlal_v
:
7330 case NEON::BI__builtin_neon_vqdmlsl_v
: {
7331 SmallVector
<Value
*, 2> MulOps(Ops
.begin() + 1, Ops
.end());
7333 EmitNeonCall(CGM
.getIntrinsic(LLVMIntrinsic
, Ty
), MulOps
, "vqdmlal");
7335 return EmitNeonCall(CGM
.getIntrinsic(AltLLVMIntrinsic
, Ty
), Ops
, NameHint
);
7337 case NEON::BI__builtin_neon_vqdmulhq_lane_v
:
7338 case NEON::BI__builtin_neon_vqdmulh_lane_v
:
7339 case NEON::BI__builtin_neon_vqrdmulhq_lane_v
:
7340 case NEON::BI__builtin_neon_vqrdmulh_lane_v
: {
7341 auto *RTy
= cast
<llvm::FixedVectorType
>(Ty
);
7342 if (BuiltinID
== NEON::BI__builtin_neon_vqdmulhq_lane_v
||
7343 BuiltinID
== NEON::BI__builtin_neon_vqrdmulhq_lane_v
)
7344 RTy
= llvm::FixedVectorType::get(RTy
->getElementType(),
7345 RTy
->getNumElements() * 2);
7346 llvm::Type
*Tys
[2] = {
7347 RTy
, GetNeonType(this, NeonTypeFlags(Type
.getEltType(), false,
7348 /*isQuad*/ false))};
7349 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, NameHint
);
7351 case NEON::BI__builtin_neon_vqdmulhq_laneq_v
:
7352 case NEON::BI__builtin_neon_vqdmulh_laneq_v
:
7353 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v
:
7354 case NEON::BI__builtin_neon_vqrdmulh_laneq_v
: {
7355 llvm::Type
*Tys
[2] = {
7356 Ty
, GetNeonType(this, NeonTypeFlags(Type
.getEltType(), false,
7358 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, NameHint
);
7360 case NEON::BI__builtin_neon_vqshl_n_v
:
7361 case NEON::BI__builtin_neon_vqshlq_n_v
:
7362 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vqshl_n",
7364 case NEON::BI__builtin_neon_vqshlu_n_v
:
7365 case NEON::BI__builtin_neon_vqshluq_n_v
:
7366 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vqshlu_n",
7368 case NEON::BI__builtin_neon_vrecpe_v
:
7369 case NEON::BI__builtin_neon_vrecpeq_v
:
7370 case NEON::BI__builtin_neon_vrsqrte_v
:
7371 case NEON::BI__builtin_neon_vrsqrteq_v
:
7372 Int
= Ty
->isFPOrFPVectorTy() ? LLVMIntrinsic
: AltLLVMIntrinsic
;
7373 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, NameHint
);
7374 case NEON::BI__builtin_neon_vrndi_v
:
7375 case NEON::BI__builtin_neon_vrndiq_v
:
7376 Int
= Builder
.getIsFPConstrained()
7377 ? Intrinsic::experimental_constrained_nearbyint
7378 : Intrinsic::nearbyint
;
7379 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, NameHint
);
7380 case NEON::BI__builtin_neon_vrshr_n_v
:
7381 case NEON::BI__builtin_neon_vrshrq_n_v
:
7382 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vrshr_n",
7384 case NEON::BI__builtin_neon_vsha512hq_u64
:
7385 case NEON::BI__builtin_neon_vsha512h2q_u64
:
7386 case NEON::BI__builtin_neon_vsha512su0q_u64
:
7387 case NEON::BI__builtin_neon_vsha512su1q_u64
: {
7388 Function
*F
= CGM
.getIntrinsic(Int
);
7389 return EmitNeonCall(F
, Ops
, "");
7391 case NEON::BI__builtin_neon_vshl_n_v
:
7392 case NEON::BI__builtin_neon_vshlq_n_v
:
7393 Ops
[1] = EmitNeonShiftVector(Ops
[1], Ty
, false);
7394 return Builder
.CreateShl(Builder
.CreateBitCast(Ops
[0],Ty
), Ops
[1],
7396 case NEON::BI__builtin_neon_vshll_n_v
: {
7397 llvm::FixedVectorType
*SrcTy
=
7398 llvm::FixedVectorType::getTruncatedElementVectorType(VTy
);
7399 Ops
[0] = Builder
.CreateBitCast(Ops
[0], SrcTy
);
7401 Ops
[0] = Builder
.CreateZExt(Ops
[0], VTy
);
7403 Ops
[0] = Builder
.CreateSExt(Ops
[0], VTy
);
7404 Ops
[1] = EmitNeonShiftVector(Ops
[1], VTy
, false);
7405 return Builder
.CreateShl(Ops
[0], Ops
[1], "vshll_n");
7407 case NEON::BI__builtin_neon_vshrn_n_v
: {
7408 llvm::FixedVectorType
*SrcTy
=
7409 llvm::FixedVectorType::getExtendedElementVectorType(VTy
);
7410 Ops
[0] = Builder
.CreateBitCast(Ops
[0], SrcTy
);
7411 Ops
[1] = EmitNeonShiftVector(Ops
[1], SrcTy
, false);
7413 Ops
[0] = Builder
.CreateLShr(Ops
[0], Ops
[1]);
7415 Ops
[0] = Builder
.CreateAShr(Ops
[0], Ops
[1]);
7416 return Builder
.CreateTrunc(Ops
[0], Ty
, "vshrn_n");
7418 case NEON::BI__builtin_neon_vshr_n_v
:
7419 case NEON::BI__builtin_neon_vshrq_n_v
:
7420 return EmitNeonRShiftImm(Ops
[0], Ops
[1], Ty
, Usgn
, "vshr_n");
7421 case NEON::BI__builtin_neon_vst1_v
:
7422 case NEON::BI__builtin_neon_vst1q_v
:
7423 case NEON::BI__builtin_neon_vst2_v
:
7424 case NEON::BI__builtin_neon_vst2q_v
:
7425 case NEON::BI__builtin_neon_vst3_v
:
7426 case NEON::BI__builtin_neon_vst3q_v
:
7427 case NEON::BI__builtin_neon_vst4_v
:
7428 case NEON::BI__builtin_neon_vst4q_v
:
7429 case NEON::BI__builtin_neon_vst2_lane_v
:
7430 case NEON::BI__builtin_neon_vst2q_lane_v
:
7431 case NEON::BI__builtin_neon_vst3_lane_v
:
7432 case NEON::BI__builtin_neon_vst3q_lane_v
:
7433 case NEON::BI__builtin_neon_vst4_lane_v
:
7434 case NEON::BI__builtin_neon_vst4q_lane_v
: {
7435 llvm::Type
*Tys
[] = {Int8PtrTy
, Ty
};
7436 Ops
.push_back(getAlignmentValue32(PtrOp0
));
7437 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "");
7439 case NEON::BI__builtin_neon_vsm3partw1q_u32
:
7440 case NEON::BI__builtin_neon_vsm3partw2q_u32
:
7441 case NEON::BI__builtin_neon_vsm3ss1q_u32
:
7442 case NEON::BI__builtin_neon_vsm4ekeyq_u32
:
7443 case NEON::BI__builtin_neon_vsm4eq_u32
: {
7444 Function
*F
= CGM
.getIntrinsic(Int
);
7445 return EmitNeonCall(F
, Ops
, "");
7447 case NEON::BI__builtin_neon_vsm3tt1aq_u32
:
7448 case NEON::BI__builtin_neon_vsm3tt1bq_u32
:
7449 case NEON::BI__builtin_neon_vsm3tt2aq_u32
:
7450 case NEON::BI__builtin_neon_vsm3tt2bq_u32
: {
7451 Function
*F
= CGM
.getIntrinsic(Int
);
7452 Ops
[3] = Builder
.CreateZExt(Ops
[3], Int64Ty
);
7453 return EmitNeonCall(F
, Ops
, "");
7455 case NEON::BI__builtin_neon_vst1_x2_v
:
7456 case NEON::BI__builtin_neon_vst1q_x2_v
:
7457 case NEON::BI__builtin_neon_vst1_x3_v
:
7458 case NEON::BI__builtin_neon_vst1q_x3_v
:
7459 case NEON::BI__builtin_neon_vst1_x4_v
:
7460 case NEON::BI__builtin_neon_vst1q_x4_v
: {
7461 llvm::Type
*PTy
= llvm::PointerType::getUnqual(VTy
->getElementType());
7462 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
7463 // in AArch64 it comes last. We may want to stick to one or another.
7464 if (Arch
== llvm::Triple::aarch64
|| Arch
== llvm::Triple::aarch64_be
||
7465 Arch
== llvm::Triple::aarch64_32
) {
7466 llvm::Type
*Tys
[2] = { VTy
, PTy
};
7467 std::rotate(Ops
.begin(), Ops
.begin() + 1, Ops
.end());
7468 return EmitNeonCall(CGM
.getIntrinsic(LLVMIntrinsic
, Tys
), Ops
, "");
7470 llvm::Type
*Tys
[2] = { PTy
, VTy
};
7471 return EmitNeonCall(CGM
.getIntrinsic(LLVMIntrinsic
, Tys
), Ops
, "");
7473 case NEON::BI__builtin_neon_vsubhn_v
: {
7474 llvm::FixedVectorType
*SrcTy
=
7475 llvm::FixedVectorType::getExtendedElementVectorType(VTy
);
7477 // %sum = add <4 x i32> %lhs, %rhs
7478 Ops
[0] = Builder
.CreateBitCast(Ops
[0], SrcTy
);
7479 Ops
[1] = Builder
.CreateBitCast(Ops
[1], SrcTy
);
7480 Ops
[0] = Builder
.CreateSub(Ops
[0], Ops
[1], "vsubhn");
7482 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
7483 Constant
*ShiftAmt
=
7484 ConstantInt::get(SrcTy
, SrcTy
->getScalarSizeInBits() / 2);
7485 Ops
[0] = Builder
.CreateLShr(Ops
[0], ShiftAmt
, "vsubhn");
7487 // %res = trunc <4 x i32> %high to <4 x i16>
7488 return Builder
.CreateTrunc(Ops
[0], VTy
, "vsubhn");
7490 case NEON::BI__builtin_neon_vtrn_v
:
7491 case NEON::BI__builtin_neon_vtrnq_v
: {
7492 Ops
[0] = Builder
.CreateBitCast(Ops
[0], llvm::PointerType::getUnqual(Ty
));
7493 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
7494 Ops
[2] = Builder
.CreateBitCast(Ops
[2], Ty
);
7495 Value
*SV
= nullptr;
7497 for (unsigned vi
= 0; vi
!= 2; ++vi
) {
7498 SmallVector
<int, 16> Indices
;
7499 for (unsigned i
= 0, e
= VTy
->getNumElements(); i
!= e
; i
+= 2) {
7500 Indices
.push_back(i
+vi
);
7501 Indices
.push_back(i
+e
+vi
);
7503 Value
*Addr
= Builder
.CreateConstInBoundsGEP1_32(Ty
, Ops
[0], vi
);
7504 SV
= Builder
.CreateShuffleVector(Ops
[1], Ops
[2], Indices
, "vtrn");
7505 SV
= Builder
.CreateDefaultAlignedStore(SV
, Addr
);
7509 case NEON::BI__builtin_neon_vtst_v
:
7510 case NEON::BI__builtin_neon_vtstq_v
: {
7511 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
7512 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
7513 Ops
[0] = Builder
.CreateAnd(Ops
[0], Ops
[1]);
7514 Ops
[0] = Builder
.CreateICmp(ICmpInst::ICMP_NE
, Ops
[0],
7515 ConstantAggregateZero::get(Ty
));
7516 return Builder
.CreateSExt(Ops
[0], Ty
, "vtst");
7518 case NEON::BI__builtin_neon_vuzp_v
:
7519 case NEON::BI__builtin_neon_vuzpq_v
: {
7520 Ops
[0] = Builder
.CreateBitCast(Ops
[0], llvm::PointerType::getUnqual(Ty
));
7521 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
7522 Ops
[2] = Builder
.CreateBitCast(Ops
[2], Ty
);
7523 Value
*SV
= nullptr;
7525 for (unsigned vi
= 0; vi
!= 2; ++vi
) {
7526 SmallVector
<int, 16> Indices
;
7527 for (unsigned i
= 0, e
= VTy
->getNumElements(); i
!= e
; ++i
)
7528 Indices
.push_back(2*i
+vi
);
7530 Value
*Addr
= Builder
.CreateConstInBoundsGEP1_32(Ty
, Ops
[0], vi
);
7531 SV
= Builder
.CreateShuffleVector(Ops
[1], Ops
[2], Indices
, "vuzp");
7532 SV
= Builder
.CreateDefaultAlignedStore(SV
, Addr
);
7536 case NEON::BI__builtin_neon_vxarq_u64
: {
7537 Function
*F
= CGM
.getIntrinsic(Int
);
7538 Ops
[2] = Builder
.CreateZExt(Ops
[2], Int64Ty
);
7539 return EmitNeonCall(F
, Ops
, "");
7541 case NEON::BI__builtin_neon_vzip_v
:
7542 case NEON::BI__builtin_neon_vzipq_v
: {
7543 Ops
[0] = Builder
.CreateBitCast(Ops
[0], llvm::PointerType::getUnqual(Ty
));
7544 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
7545 Ops
[2] = Builder
.CreateBitCast(Ops
[2], Ty
);
7546 Value
*SV
= nullptr;
7548 for (unsigned vi
= 0; vi
!= 2; ++vi
) {
7549 SmallVector
<int, 16> Indices
;
7550 for (unsigned i
= 0, e
= VTy
->getNumElements(); i
!= e
; i
+= 2) {
7551 Indices
.push_back((i
+ vi
*e
) >> 1);
7552 Indices
.push_back(((i
+ vi
*e
) >> 1)+e
);
7554 Value
*Addr
= Builder
.CreateConstInBoundsGEP1_32(Ty
, Ops
[0], vi
);
7555 SV
= Builder
.CreateShuffleVector(Ops
[1], Ops
[2], Indices
, "vzip");
7556 SV
= Builder
.CreateDefaultAlignedStore(SV
, Addr
);
7560 case NEON::BI__builtin_neon_vdot_s32
:
7561 case NEON::BI__builtin_neon_vdot_u32
:
7562 case NEON::BI__builtin_neon_vdotq_s32
:
7563 case NEON::BI__builtin_neon_vdotq_u32
: {
7565 llvm::FixedVectorType::get(Int8Ty
, Ty
->getPrimitiveSizeInBits() / 8);
7566 llvm::Type
*Tys
[2] = { Ty
, InputTy
};
7567 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vdot");
7569 case NEON::BI__builtin_neon_vfmlal_low_f16
:
7570 case NEON::BI__builtin_neon_vfmlalq_low_f16
: {
7572 llvm::FixedVectorType::get(HalfTy
, Ty
->getPrimitiveSizeInBits() / 16);
7573 llvm::Type
*Tys
[2] = { Ty
, InputTy
};
7574 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vfmlal_low");
7576 case NEON::BI__builtin_neon_vfmlsl_low_f16
:
7577 case NEON::BI__builtin_neon_vfmlslq_low_f16
: {
7579 llvm::FixedVectorType::get(HalfTy
, Ty
->getPrimitiveSizeInBits() / 16);
7580 llvm::Type
*Tys
[2] = { Ty
, InputTy
};
7581 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vfmlsl_low");
7583 case NEON::BI__builtin_neon_vfmlal_high_f16
:
7584 case NEON::BI__builtin_neon_vfmlalq_high_f16
: {
7586 llvm::FixedVectorType::get(HalfTy
, Ty
->getPrimitiveSizeInBits() / 16);
7587 llvm::Type
*Tys
[2] = { Ty
, InputTy
};
7588 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vfmlal_high");
7590 case NEON::BI__builtin_neon_vfmlsl_high_f16
:
7591 case NEON::BI__builtin_neon_vfmlslq_high_f16
: {
7593 llvm::FixedVectorType::get(HalfTy
, Ty
->getPrimitiveSizeInBits() / 16);
7594 llvm::Type
*Tys
[2] = { Ty
, InputTy
};
7595 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vfmlsl_high");
7597 case NEON::BI__builtin_neon_vmmlaq_s32
:
7598 case NEON::BI__builtin_neon_vmmlaq_u32
: {
7600 llvm::FixedVectorType::get(Int8Ty
, Ty
->getPrimitiveSizeInBits() / 8);
7601 llvm::Type
*Tys
[2] = { Ty
, InputTy
};
7602 return EmitNeonCall(CGM
.getIntrinsic(LLVMIntrinsic
, Tys
), Ops
, "vmmla");
7604 case NEON::BI__builtin_neon_vusmmlaq_s32
: {
7606 llvm::FixedVectorType::get(Int8Ty
, Ty
->getPrimitiveSizeInBits() / 8);
7607 llvm::Type
*Tys
[2] = { Ty
, InputTy
};
7608 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vusmmla");
7610 case NEON::BI__builtin_neon_vusdot_s32
:
7611 case NEON::BI__builtin_neon_vusdotq_s32
: {
7613 llvm::FixedVectorType::get(Int8Ty
, Ty
->getPrimitiveSizeInBits() / 8);
7614 llvm::Type
*Tys
[2] = { Ty
, InputTy
};
7615 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vusdot");
7617 case NEON::BI__builtin_neon_vbfdot_f32
:
7618 case NEON::BI__builtin_neon_vbfdotq_f32
: {
7619 llvm::Type
*InputTy
=
7620 llvm::FixedVectorType::get(BFloatTy
, Ty
->getPrimitiveSizeInBits() / 16);
7621 llvm::Type
*Tys
[2] = { Ty
, InputTy
};
7622 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vbfdot");
7624 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32
: {
7625 llvm::Type
*Tys
[1] = { Ty
};
7626 Function
*F
= CGM
.getIntrinsic(Int
, Tys
);
7627 return EmitNeonCall(F
, Ops
, "vcvtfp2bf");
7632 assert(Int
&& "Expected valid intrinsic number");
7634 // Determine the type(s) of this overloaded AArch64 intrinsic.
7635 Function
*F
= LookupNeonLLVMIntrinsic(Int
, Modifier
, Ty
, E
);
7637 Value
*Result
= EmitNeonCall(F
, Ops
, NameHint
);
7638 llvm::Type
*ResultType
= ConvertType(E
->getType());
7639 // AArch64 intrinsic one-element vector type cast to
7640 // scalar type expected by the builtin
7641 return Builder
.CreateBitCast(Result
, ResultType
, NameHint
);
7644 Value
*CodeGenFunction::EmitAArch64CompareBuiltinExpr(
7645 Value
*Op
, llvm::Type
*Ty
, const CmpInst::Predicate Fp
,
7646 const CmpInst::Predicate Ip
, const Twine
&Name
) {
7647 llvm::Type
*OTy
= Op
->getType();
7649 // FIXME: this is utterly horrific. We should not be looking at previous
7650 // codegen context to find out what needs doing. Unfortunately TableGen
7651 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
7653 if (BitCastInst
*BI
= dyn_cast
<BitCastInst
>(Op
))
7654 OTy
= BI
->getOperand(0)->getType();
7656 Op
= Builder
.CreateBitCast(Op
, OTy
);
7657 if (OTy
->getScalarType()->isFloatingPointTy()) {
7658 if (Fp
== CmpInst::FCMP_OEQ
)
7659 Op
= Builder
.CreateFCmp(Fp
, Op
, Constant::getNullValue(OTy
));
7661 Op
= Builder
.CreateFCmpS(Fp
, Op
, Constant::getNullValue(OTy
));
7663 Op
= Builder
.CreateICmp(Ip
, Op
, Constant::getNullValue(OTy
));
7665 return Builder
.CreateSExt(Op
, Ty
, Name
);
7668 static Value
*packTBLDVectorList(CodeGenFunction
&CGF
, ArrayRef
<Value
*> Ops
,
7669 Value
*ExtOp
, Value
*IndexOp
,
7670 llvm::Type
*ResTy
, unsigned IntID
,
7672 SmallVector
<Value
*, 2> TblOps
;
7674 TblOps
.push_back(ExtOp
);
7676 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
7677 SmallVector
<int, 16> Indices
;
7678 auto *TblTy
= cast
<llvm::FixedVectorType
>(Ops
[0]->getType());
7679 for (unsigned i
= 0, e
= TblTy
->getNumElements(); i
!= e
; ++i
) {
7680 Indices
.push_back(2*i
);
7681 Indices
.push_back(2*i
+1);
7684 int PairPos
= 0, End
= Ops
.size() - 1;
7685 while (PairPos
< End
) {
7686 TblOps
.push_back(CGF
.Builder
.CreateShuffleVector(Ops
[PairPos
],
7687 Ops
[PairPos
+1], Indices
,
7692 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
7693 // of the 128-bit lookup table with zero.
7694 if (PairPos
== End
) {
7695 Value
*ZeroTbl
= ConstantAggregateZero::get(TblTy
);
7696 TblOps
.push_back(CGF
.Builder
.CreateShuffleVector(Ops
[PairPos
],
7697 ZeroTbl
, Indices
, Name
));
7701 TblOps
.push_back(IndexOp
);
7702 TblF
= CGF
.CGM
.getIntrinsic(IntID
, ResTy
);
7704 return CGF
.EmitNeonCall(TblF
, TblOps
, Name
);
7707 Value
*CodeGenFunction::GetValueForARMHint(unsigned BuiltinID
) {
7709 switch (BuiltinID
) {
7712 case clang::ARM::BI__builtin_arm_nop
:
7715 case clang::ARM::BI__builtin_arm_yield
:
7716 case clang::ARM::BI__yield
:
7719 case clang::ARM::BI__builtin_arm_wfe
:
7720 case clang::ARM::BI__wfe
:
7723 case clang::ARM::BI__builtin_arm_wfi
:
7724 case clang::ARM::BI__wfi
:
7727 case clang::ARM::BI__builtin_arm_sev
:
7728 case clang::ARM::BI__sev
:
7731 case clang::ARM::BI__builtin_arm_sevl
:
7732 case clang::ARM::BI__sevl
:
7737 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::arm_hint
),
7738 llvm::ConstantInt::get(Int32Ty
, Value
));
7741 enum SpecialRegisterAccessKind
{
7747 // Generates the IR for the read/write special register builtin,
7748 // ValueType is the type of the value that is to be written or read,
7749 // RegisterType is the type of the register being written to or read from.
7750 static Value
*EmitSpecialRegisterBuiltin(CodeGenFunction
&CGF
,
7752 llvm::Type
*RegisterType
,
7753 llvm::Type
*ValueType
,
7754 SpecialRegisterAccessKind AccessKind
,
7755 StringRef SysReg
= "") {
7756 // write and register intrinsics only support 32, 64 and 128 bit operations.
7757 assert((RegisterType
->isIntegerTy(32) || RegisterType
->isIntegerTy(64) ||
7758 RegisterType
->isIntegerTy(128)) &&
7759 "Unsupported size for register.");
7761 CodeGen::CGBuilderTy
&Builder
= CGF
.Builder
;
7762 CodeGen::CodeGenModule
&CGM
= CGF
.CGM
;
7763 LLVMContext
&Context
= CGM
.getLLVMContext();
7765 if (SysReg
.empty()) {
7766 const Expr
*SysRegStrExpr
= E
->getArg(0)->IgnoreParenCasts();
7767 SysReg
= cast
<clang::StringLiteral
>(SysRegStrExpr
)->getString();
7770 llvm::Metadata
*Ops
[] = { llvm::MDString::get(Context
, SysReg
) };
7771 llvm::MDNode
*RegName
= llvm::MDNode::get(Context
, Ops
);
7772 llvm::Value
*Metadata
= llvm::MetadataAsValue::get(Context
, RegName
);
7774 llvm::Type
*Types
[] = { RegisterType
};
7776 bool MixedTypes
= RegisterType
->isIntegerTy(64) && ValueType
->isIntegerTy(32);
7777 assert(!(RegisterType
->isIntegerTy(32) && ValueType
->isIntegerTy(64))
7778 && "Can't fit 64-bit value in 32-bit register");
7780 if (AccessKind
!= Write
) {
7781 assert(AccessKind
== NormalRead
|| AccessKind
== VolatileRead
);
7782 llvm::Function
*F
= CGM
.getIntrinsic(
7783 AccessKind
== VolatileRead
? llvm::Intrinsic::read_volatile_register
7784 : llvm::Intrinsic::read_register
,
7786 llvm::Value
*Call
= Builder
.CreateCall(F
, Metadata
);
7789 // Read into 64 bit register and then truncate result to 32 bit.
7790 return Builder
.CreateTrunc(Call
, ValueType
);
7792 if (ValueType
->isPointerTy())
7793 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
7794 return Builder
.CreateIntToPtr(Call
, ValueType
);
7799 llvm::Function
*F
= CGM
.getIntrinsic(llvm::Intrinsic::write_register
, Types
);
7800 llvm::Value
*ArgValue
= CGF
.EmitScalarExpr(E
->getArg(1));
7802 // Extend 32 bit write value to 64 bit to pass to write.
7803 ArgValue
= Builder
.CreateZExt(ArgValue
, RegisterType
);
7804 return Builder
.CreateCall(F
, { Metadata
, ArgValue
});
7807 if (ValueType
->isPointerTy()) {
7808 // Have VoidPtrTy ArgValue but want to return an i32/i64.
7809 ArgValue
= Builder
.CreatePtrToInt(ArgValue
, RegisterType
);
7810 return Builder
.CreateCall(F
, { Metadata
, ArgValue
});
7813 return Builder
.CreateCall(F
, { Metadata
, ArgValue
});
7816 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
7817 /// argument that specifies the vector type.
7818 static bool HasExtraNeonArgument(unsigned BuiltinID
) {
7819 switch (BuiltinID
) {
7821 case NEON::BI__builtin_neon_vget_lane_i8
:
7822 case NEON::BI__builtin_neon_vget_lane_i16
:
7823 case NEON::BI__builtin_neon_vget_lane_bf16
:
7824 case NEON::BI__builtin_neon_vget_lane_i32
:
7825 case NEON::BI__builtin_neon_vget_lane_i64
:
7826 case NEON::BI__builtin_neon_vget_lane_f32
:
7827 case NEON::BI__builtin_neon_vgetq_lane_i8
:
7828 case NEON::BI__builtin_neon_vgetq_lane_i16
:
7829 case NEON::BI__builtin_neon_vgetq_lane_bf16
:
7830 case NEON::BI__builtin_neon_vgetq_lane_i32
:
7831 case NEON::BI__builtin_neon_vgetq_lane_i64
:
7832 case NEON::BI__builtin_neon_vgetq_lane_f32
:
7833 case NEON::BI__builtin_neon_vduph_lane_bf16
:
7834 case NEON::BI__builtin_neon_vduph_laneq_bf16
:
7835 case NEON::BI__builtin_neon_vset_lane_i8
:
7836 case NEON::BI__builtin_neon_vset_lane_i16
:
7837 case NEON::BI__builtin_neon_vset_lane_bf16
:
7838 case NEON::BI__builtin_neon_vset_lane_i32
:
7839 case NEON::BI__builtin_neon_vset_lane_i64
:
7840 case NEON::BI__builtin_neon_vset_lane_f32
:
7841 case NEON::BI__builtin_neon_vsetq_lane_i8
:
7842 case NEON::BI__builtin_neon_vsetq_lane_i16
:
7843 case NEON::BI__builtin_neon_vsetq_lane_bf16
:
7844 case NEON::BI__builtin_neon_vsetq_lane_i32
:
7845 case NEON::BI__builtin_neon_vsetq_lane_i64
:
7846 case NEON::BI__builtin_neon_vsetq_lane_f32
:
7847 case NEON::BI__builtin_neon_vsha1h_u32
:
7848 case NEON::BI__builtin_neon_vsha1cq_u32
:
7849 case NEON::BI__builtin_neon_vsha1pq_u32
:
7850 case NEON::BI__builtin_neon_vsha1mq_u32
:
7851 case NEON::BI__builtin_neon_vcvth_bf16_f32
:
7852 case clang::ARM::BI_MoveToCoprocessor
:
7853 case clang::ARM::BI_MoveToCoprocessor2
:
7859 Value
*CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID
,
7861 ReturnValueSlot ReturnValue
,
7862 llvm::Triple::ArchType Arch
) {
7863 if (auto Hint
= GetValueForARMHint(BuiltinID
))
7866 if (BuiltinID
== clang::ARM::BI__emit
) {
7867 bool IsThumb
= getTarget().getTriple().getArch() == llvm::Triple::thumb
;
7868 llvm::FunctionType
*FTy
=
7869 llvm::FunctionType::get(VoidTy
, /*Variadic=*/false);
7871 Expr::EvalResult Result
;
7872 if (!E
->getArg(0)->EvaluateAsInt(Result
, CGM
.getContext()))
7873 llvm_unreachable("Sema will ensure that the parameter is constant");
7875 llvm::APSInt Value
= Result
.Val
.getInt();
7876 uint64_t ZExtValue
= Value
.zextOrTrunc(IsThumb
? 16 : 32).getZExtValue();
7878 llvm::InlineAsm
*Emit
=
7879 IsThumb
? InlineAsm::get(FTy
, ".inst.n 0x" + utohexstr(ZExtValue
), "",
7880 /*hasSideEffects=*/true)
7881 : InlineAsm::get(FTy
, ".inst 0x" + utohexstr(ZExtValue
), "",
7882 /*hasSideEffects=*/true);
7884 return Builder
.CreateCall(Emit
);
7887 if (BuiltinID
== clang::ARM::BI__builtin_arm_dbg
) {
7888 Value
*Option
= EmitScalarExpr(E
->getArg(0));
7889 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::arm_dbg
), Option
);
7892 if (BuiltinID
== clang::ARM::BI__builtin_arm_prefetch
) {
7893 Value
*Address
= EmitScalarExpr(E
->getArg(0));
7894 Value
*RW
= EmitScalarExpr(E
->getArg(1));
7895 Value
*IsData
= EmitScalarExpr(E
->getArg(2));
7897 // Locality is not supported on ARM target
7898 Value
*Locality
= llvm::ConstantInt::get(Int32Ty
, 3);
7900 Function
*F
= CGM
.getIntrinsic(Intrinsic::prefetch
, Address
->getType());
7901 return Builder
.CreateCall(F
, {Address
, RW
, Locality
, IsData
});
7904 if (BuiltinID
== clang::ARM::BI__builtin_arm_rbit
) {
7905 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
7906 return Builder
.CreateCall(
7907 CGM
.getIntrinsic(Intrinsic::bitreverse
, Arg
->getType()), Arg
, "rbit");
7910 if (BuiltinID
== clang::ARM::BI__builtin_arm_cls
) {
7911 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
7912 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::arm_cls
), Arg
, "cls");
7914 if (BuiltinID
== clang::ARM::BI__builtin_arm_cls64
) {
7915 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
7916 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::arm_cls64
), Arg
,
7920 if (BuiltinID
== clang::ARM::BI__clear_cache
) {
7921 assert(E
->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
7922 const FunctionDecl
*FD
= E
->getDirectCallee();
7924 for (unsigned i
= 0; i
< 2; i
++)
7925 Ops
[i
] = EmitScalarExpr(E
->getArg(i
));
7926 llvm::Type
*Ty
= CGM
.getTypes().ConvertType(FD
->getType());
7927 llvm::FunctionType
*FTy
= cast
<llvm::FunctionType
>(Ty
);
7928 StringRef Name
= FD
->getName();
7929 return EmitNounwindRuntimeCall(CGM
.CreateRuntimeFunction(FTy
, Name
), Ops
);
7932 if (BuiltinID
== clang::ARM::BI__builtin_arm_mcrr
||
7933 BuiltinID
== clang::ARM::BI__builtin_arm_mcrr2
) {
7936 switch (BuiltinID
) {
7937 default: llvm_unreachable("unexpected builtin");
7938 case clang::ARM::BI__builtin_arm_mcrr
:
7939 F
= CGM
.getIntrinsic(Intrinsic::arm_mcrr
);
7941 case clang::ARM::BI__builtin_arm_mcrr2
:
7942 F
= CGM
.getIntrinsic(Intrinsic::arm_mcrr2
);
7946 // MCRR{2} instruction has 5 operands but
7947 // the intrinsic has 4 because Rt and Rt2
7948 // are represented as a single unsigned 64
7949 // bit integer in the intrinsic definition
7950 // but internally it's represented as 2 32
7953 Value
*Coproc
= EmitScalarExpr(E
->getArg(0));
7954 Value
*Opc1
= EmitScalarExpr(E
->getArg(1));
7955 Value
*RtAndRt2
= EmitScalarExpr(E
->getArg(2));
7956 Value
*CRm
= EmitScalarExpr(E
->getArg(3));
7958 Value
*C1
= llvm::ConstantInt::get(Int64Ty
, 32);
7959 Value
*Rt
= Builder
.CreateTruncOrBitCast(RtAndRt2
, Int32Ty
);
7960 Value
*Rt2
= Builder
.CreateLShr(RtAndRt2
, C1
);
7961 Rt2
= Builder
.CreateTruncOrBitCast(Rt2
, Int32Ty
);
7963 return Builder
.CreateCall(F
, {Coproc
, Opc1
, Rt
, Rt2
, CRm
});
7966 if (BuiltinID
== clang::ARM::BI__builtin_arm_mrrc
||
7967 BuiltinID
== clang::ARM::BI__builtin_arm_mrrc2
) {
7970 switch (BuiltinID
) {
7971 default: llvm_unreachable("unexpected builtin");
7972 case clang::ARM::BI__builtin_arm_mrrc
:
7973 F
= CGM
.getIntrinsic(Intrinsic::arm_mrrc
);
7975 case clang::ARM::BI__builtin_arm_mrrc2
:
7976 F
= CGM
.getIntrinsic(Intrinsic::arm_mrrc2
);
7980 Value
*Coproc
= EmitScalarExpr(E
->getArg(0));
7981 Value
*Opc1
= EmitScalarExpr(E
->getArg(1));
7982 Value
*CRm
= EmitScalarExpr(E
->getArg(2));
7983 Value
*RtAndRt2
= Builder
.CreateCall(F
, {Coproc
, Opc1
, CRm
});
7985 // Returns an unsigned 64 bit integer, represented
7986 // as two 32 bit integers.
7988 Value
*Rt
= Builder
.CreateExtractValue(RtAndRt2
, 1);
7989 Value
*Rt1
= Builder
.CreateExtractValue(RtAndRt2
, 0);
7990 Rt
= Builder
.CreateZExt(Rt
, Int64Ty
);
7991 Rt1
= Builder
.CreateZExt(Rt1
, Int64Ty
);
7993 Value
*ShiftCast
= llvm::ConstantInt::get(Int64Ty
, 32);
7994 RtAndRt2
= Builder
.CreateShl(Rt
, ShiftCast
, "shl", true);
7995 RtAndRt2
= Builder
.CreateOr(RtAndRt2
, Rt1
);
7997 return Builder
.CreateBitCast(RtAndRt2
, ConvertType(E
->getType()));
8000 if (BuiltinID
== clang::ARM::BI__builtin_arm_ldrexd
||
8001 ((BuiltinID
== clang::ARM::BI__builtin_arm_ldrex
||
8002 BuiltinID
== clang::ARM::BI__builtin_arm_ldaex
) &&
8003 getContext().getTypeSize(E
->getType()) == 64) ||
8004 BuiltinID
== clang::ARM::BI__ldrexd
) {
8007 switch (BuiltinID
) {
8008 default: llvm_unreachable("unexpected builtin");
8009 case clang::ARM::BI__builtin_arm_ldaex
:
8010 F
= CGM
.getIntrinsic(Intrinsic::arm_ldaexd
);
8012 case clang::ARM::BI__builtin_arm_ldrexd
:
8013 case clang::ARM::BI__builtin_arm_ldrex
:
8014 case clang::ARM::BI__ldrexd
:
8015 F
= CGM
.getIntrinsic(Intrinsic::arm_ldrexd
);
8019 Value
*LdPtr
= EmitScalarExpr(E
->getArg(0));
8020 Value
*Val
= Builder
.CreateCall(F
, Builder
.CreateBitCast(LdPtr
, Int8PtrTy
),
8023 Value
*Val0
= Builder
.CreateExtractValue(Val
, 1);
8024 Value
*Val1
= Builder
.CreateExtractValue(Val
, 0);
8025 Val0
= Builder
.CreateZExt(Val0
, Int64Ty
);
8026 Val1
= Builder
.CreateZExt(Val1
, Int64Ty
);
8028 Value
*ShiftCst
= llvm::ConstantInt::get(Int64Ty
, 32);
8029 Val
= Builder
.CreateShl(Val0
, ShiftCst
, "shl", true /* nuw */);
8030 Val
= Builder
.CreateOr(Val
, Val1
);
8031 return Builder
.CreateBitCast(Val
, ConvertType(E
->getType()));
8034 if (BuiltinID
== clang::ARM::BI__builtin_arm_ldrex
||
8035 BuiltinID
== clang::ARM::BI__builtin_arm_ldaex
) {
8036 Value
*LoadAddr
= EmitScalarExpr(E
->getArg(0));
8038 QualType Ty
= E
->getType();
8039 llvm::Type
*RealResTy
= ConvertType(Ty
);
8041 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty
));
8042 llvm::Type
*PtrTy
= IntTy
->getPointerTo();
8043 LoadAddr
= Builder
.CreateBitCast(LoadAddr
, PtrTy
);
8045 Function
*F
= CGM
.getIntrinsic(
8046 BuiltinID
== clang::ARM::BI__builtin_arm_ldaex
? Intrinsic::arm_ldaex
8047 : Intrinsic::arm_ldrex
,
8049 CallInst
*Val
= Builder
.CreateCall(F
, LoadAddr
, "ldrex");
8051 0, Attribute::get(getLLVMContext(), Attribute::ElementType
, IntTy
));
8053 if (RealResTy
->isPointerTy())
8054 return Builder
.CreateIntToPtr(Val
, RealResTy
);
8056 llvm::Type
*IntResTy
= llvm::IntegerType::get(
8057 getLLVMContext(), CGM
.getDataLayout().getTypeSizeInBits(RealResTy
));
8058 return Builder
.CreateBitCast(Builder
.CreateTruncOrBitCast(Val
, IntResTy
),
8063 if (BuiltinID
== clang::ARM::BI__builtin_arm_strexd
||
8064 ((BuiltinID
== clang::ARM::BI__builtin_arm_stlex
||
8065 BuiltinID
== clang::ARM::BI__builtin_arm_strex
) &&
8066 getContext().getTypeSize(E
->getArg(0)->getType()) == 64)) {
8067 Function
*F
= CGM
.getIntrinsic(
8068 BuiltinID
== clang::ARM::BI__builtin_arm_stlex
? Intrinsic::arm_stlexd
8069 : Intrinsic::arm_strexd
);
8070 llvm::Type
*STy
= llvm::StructType::get(Int32Ty
, Int32Ty
);
8072 Address Tmp
= CreateMemTemp(E
->getArg(0)->getType());
8073 Value
*Val
= EmitScalarExpr(E
->getArg(0));
8074 Builder
.CreateStore(Val
, Tmp
);
8076 Address LdPtr
= Builder
.CreateElementBitCast(Tmp
, STy
);
8077 Val
= Builder
.CreateLoad(LdPtr
);
8079 Value
*Arg0
= Builder
.CreateExtractValue(Val
, 0);
8080 Value
*Arg1
= Builder
.CreateExtractValue(Val
, 1);
8081 Value
*StPtr
= Builder
.CreateBitCast(EmitScalarExpr(E
->getArg(1)), Int8PtrTy
);
8082 return Builder
.CreateCall(F
, {Arg0
, Arg1
, StPtr
}, "strexd");
8085 if (BuiltinID
== clang::ARM::BI__builtin_arm_strex
||
8086 BuiltinID
== clang::ARM::BI__builtin_arm_stlex
) {
8087 Value
*StoreVal
= EmitScalarExpr(E
->getArg(0));
8088 Value
*StoreAddr
= EmitScalarExpr(E
->getArg(1));
8090 QualType Ty
= E
->getArg(0)->getType();
8091 llvm::Type
*StoreTy
= llvm::IntegerType::get(getLLVMContext(),
8092 getContext().getTypeSize(Ty
));
8093 StoreAddr
= Builder
.CreateBitCast(StoreAddr
, StoreTy
->getPointerTo());
8095 if (StoreVal
->getType()->isPointerTy())
8096 StoreVal
= Builder
.CreatePtrToInt(StoreVal
, Int32Ty
);
8098 llvm::Type
*IntTy
= llvm::IntegerType::get(
8100 CGM
.getDataLayout().getTypeSizeInBits(StoreVal
->getType()));
8101 StoreVal
= Builder
.CreateBitCast(StoreVal
, IntTy
);
8102 StoreVal
= Builder
.CreateZExtOrBitCast(StoreVal
, Int32Ty
);
8105 Function
*F
= CGM
.getIntrinsic(
8106 BuiltinID
== clang::ARM::BI__builtin_arm_stlex
? Intrinsic::arm_stlex
8107 : Intrinsic::arm_strex
,
8108 StoreAddr
->getType());
8110 CallInst
*CI
= Builder
.CreateCall(F
, {StoreVal
, StoreAddr
}, "strex");
8112 1, Attribute::get(getLLVMContext(), Attribute::ElementType
, StoreTy
));
8116 if (BuiltinID
== clang::ARM::BI__builtin_arm_clrex
) {
8117 Function
*F
= CGM
.getIntrinsic(Intrinsic::arm_clrex
);
8118 return Builder
.CreateCall(F
);
8122 Intrinsic::ID CRCIntrinsicID
= Intrinsic::not_intrinsic
;
8123 switch (BuiltinID
) {
8124 case clang::ARM::BI__builtin_arm_crc32b
:
8125 CRCIntrinsicID
= Intrinsic::arm_crc32b
; break;
8126 case clang::ARM::BI__builtin_arm_crc32cb
:
8127 CRCIntrinsicID
= Intrinsic::arm_crc32cb
; break;
8128 case clang::ARM::BI__builtin_arm_crc32h
:
8129 CRCIntrinsicID
= Intrinsic::arm_crc32h
; break;
8130 case clang::ARM::BI__builtin_arm_crc32ch
:
8131 CRCIntrinsicID
= Intrinsic::arm_crc32ch
; break;
8132 case clang::ARM::BI__builtin_arm_crc32w
:
8133 case clang::ARM::BI__builtin_arm_crc32d
:
8134 CRCIntrinsicID
= Intrinsic::arm_crc32w
; break;
8135 case clang::ARM::BI__builtin_arm_crc32cw
:
8136 case clang::ARM::BI__builtin_arm_crc32cd
:
8137 CRCIntrinsicID
= Intrinsic::arm_crc32cw
; break;
8140 if (CRCIntrinsicID
!= Intrinsic::not_intrinsic
) {
8141 Value
*Arg0
= EmitScalarExpr(E
->getArg(0));
8142 Value
*Arg1
= EmitScalarExpr(E
->getArg(1));
8144 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
8145 // intrinsics, hence we need different codegen for these cases.
8146 if (BuiltinID
== clang::ARM::BI__builtin_arm_crc32d
||
8147 BuiltinID
== clang::ARM::BI__builtin_arm_crc32cd
) {
8148 Value
*C1
= llvm::ConstantInt::get(Int64Ty
, 32);
8149 Value
*Arg1a
= Builder
.CreateTruncOrBitCast(Arg1
, Int32Ty
);
8150 Value
*Arg1b
= Builder
.CreateLShr(Arg1
, C1
);
8151 Arg1b
= Builder
.CreateTruncOrBitCast(Arg1b
, Int32Ty
);
8153 Function
*F
= CGM
.getIntrinsic(CRCIntrinsicID
);
8154 Value
*Res
= Builder
.CreateCall(F
, {Arg0
, Arg1a
});
8155 return Builder
.CreateCall(F
, {Res
, Arg1b
});
8157 Arg1
= Builder
.CreateZExtOrBitCast(Arg1
, Int32Ty
);
8159 Function
*F
= CGM
.getIntrinsic(CRCIntrinsicID
);
8160 return Builder
.CreateCall(F
, {Arg0
, Arg1
});
8164 if (BuiltinID
== clang::ARM::BI__builtin_arm_rsr
||
8165 BuiltinID
== clang::ARM::BI__builtin_arm_rsr64
||
8166 BuiltinID
== clang::ARM::BI__builtin_arm_rsrp
||
8167 BuiltinID
== clang::ARM::BI__builtin_arm_wsr
||
8168 BuiltinID
== clang::ARM::BI__builtin_arm_wsr64
||
8169 BuiltinID
== clang::ARM::BI__builtin_arm_wsrp
) {
8171 SpecialRegisterAccessKind AccessKind
= Write
;
8172 if (BuiltinID
== clang::ARM::BI__builtin_arm_rsr
||
8173 BuiltinID
== clang::ARM::BI__builtin_arm_rsr64
||
8174 BuiltinID
== clang::ARM::BI__builtin_arm_rsrp
)
8175 AccessKind
= VolatileRead
;
8177 bool IsPointerBuiltin
= BuiltinID
== clang::ARM::BI__builtin_arm_rsrp
||
8178 BuiltinID
== clang::ARM::BI__builtin_arm_wsrp
;
8180 bool Is64Bit
= BuiltinID
== clang::ARM::BI__builtin_arm_rsr64
||
8181 BuiltinID
== clang::ARM::BI__builtin_arm_wsr64
;
8183 llvm::Type
*ValueType
;
8184 llvm::Type
*RegisterType
;
8185 if (IsPointerBuiltin
) {
8186 ValueType
= VoidPtrTy
;
8187 RegisterType
= Int32Ty
;
8188 } else if (Is64Bit
) {
8189 ValueType
= RegisterType
= Int64Ty
;
8191 ValueType
= RegisterType
= Int32Ty
;
8194 return EmitSpecialRegisterBuiltin(*this, E
, RegisterType
, ValueType
,
8198 if (BuiltinID
== ARM::BI__builtin_sponentry
) {
8199 llvm::Function
*F
= CGM
.getIntrinsic(Intrinsic::sponentry
, AllocaInt8PtrTy
);
8200 return Builder
.CreateCall(F
);
8203 // Handle MSVC intrinsics before argument evaluation to prevent double
8205 if (std::optional
<MSVCIntrin
> MsvcIntId
= translateArmToMsvcIntrin(BuiltinID
))
8206 return EmitMSVCBuiltinExpr(*MsvcIntId
, E
);
8208 // Deal with MVE builtins
8209 if (Value
*Result
= EmitARMMVEBuiltinExpr(BuiltinID
, E
, ReturnValue
, Arch
))
8211 // Handle CDE builtins
8212 if (Value
*Result
= EmitARMCDEBuiltinExpr(BuiltinID
, E
, ReturnValue
, Arch
))
8215 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
8216 auto It
= llvm::find_if(NEONEquivalentIntrinsicMap
, [BuiltinID
](auto &P
) {
8217 return P
.first
== BuiltinID
;
8219 if (It
!= end(NEONEquivalentIntrinsicMap
))
8220 BuiltinID
= It
->second
;
8222 // Find out if any arguments are required to be integer constant
8224 unsigned ICEArguments
= 0;
8225 ASTContext::GetBuiltinTypeError Error
;
8226 getContext().GetBuiltinType(BuiltinID
, Error
, &ICEArguments
);
8227 assert(Error
== ASTContext::GE_None
&& "Should not codegen an error");
8229 auto getAlignmentValue32
= [&](Address addr
) -> Value
* {
8230 return Builder
.getInt32(addr
.getAlignment().getQuantity());
8233 Address PtrOp0
= Address::invalid();
8234 Address PtrOp1
= Address::invalid();
8235 SmallVector
<Value
*, 4> Ops
;
8236 bool HasExtraArg
= HasExtraNeonArgument(BuiltinID
);
8237 unsigned NumArgs
= E
->getNumArgs() - (HasExtraArg
? 1 : 0);
8238 for (unsigned i
= 0, e
= NumArgs
; i
!= e
; i
++) {
8240 switch (BuiltinID
) {
8241 case NEON::BI__builtin_neon_vld1_v
:
8242 case NEON::BI__builtin_neon_vld1q_v
:
8243 case NEON::BI__builtin_neon_vld1q_lane_v
:
8244 case NEON::BI__builtin_neon_vld1_lane_v
:
8245 case NEON::BI__builtin_neon_vld1_dup_v
:
8246 case NEON::BI__builtin_neon_vld1q_dup_v
:
8247 case NEON::BI__builtin_neon_vst1_v
:
8248 case NEON::BI__builtin_neon_vst1q_v
:
8249 case NEON::BI__builtin_neon_vst1q_lane_v
:
8250 case NEON::BI__builtin_neon_vst1_lane_v
:
8251 case NEON::BI__builtin_neon_vst2_v
:
8252 case NEON::BI__builtin_neon_vst2q_v
:
8253 case NEON::BI__builtin_neon_vst2_lane_v
:
8254 case NEON::BI__builtin_neon_vst2q_lane_v
:
8255 case NEON::BI__builtin_neon_vst3_v
:
8256 case NEON::BI__builtin_neon_vst3q_v
:
8257 case NEON::BI__builtin_neon_vst3_lane_v
:
8258 case NEON::BI__builtin_neon_vst3q_lane_v
:
8259 case NEON::BI__builtin_neon_vst4_v
:
8260 case NEON::BI__builtin_neon_vst4q_v
:
8261 case NEON::BI__builtin_neon_vst4_lane_v
:
8262 case NEON::BI__builtin_neon_vst4q_lane_v
:
8263 // Get the alignment for the argument in addition to the value;
8264 // we'll use it later.
8265 PtrOp0
= EmitPointerWithAlignment(E
->getArg(0));
8266 Ops
.push_back(PtrOp0
.getPointer());
8271 switch (BuiltinID
) {
8272 case NEON::BI__builtin_neon_vld2_v
:
8273 case NEON::BI__builtin_neon_vld2q_v
:
8274 case NEON::BI__builtin_neon_vld3_v
:
8275 case NEON::BI__builtin_neon_vld3q_v
:
8276 case NEON::BI__builtin_neon_vld4_v
:
8277 case NEON::BI__builtin_neon_vld4q_v
:
8278 case NEON::BI__builtin_neon_vld2_lane_v
:
8279 case NEON::BI__builtin_neon_vld2q_lane_v
:
8280 case NEON::BI__builtin_neon_vld3_lane_v
:
8281 case NEON::BI__builtin_neon_vld3q_lane_v
:
8282 case NEON::BI__builtin_neon_vld4_lane_v
:
8283 case NEON::BI__builtin_neon_vld4q_lane_v
:
8284 case NEON::BI__builtin_neon_vld2_dup_v
:
8285 case NEON::BI__builtin_neon_vld2q_dup_v
:
8286 case NEON::BI__builtin_neon_vld3_dup_v
:
8287 case NEON::BI__builtin_neon_vld3q_dup_v
:
8288 case NEON::BI__builtin_neon_vld4_dup_v
:
8289 case NEON::BI__builtin_neon_vld4q_dup_v
:
8290 // Get the alignment for the argument in addition to the value;
8291 // we'll use it later.
8292 PtrOp1
= EmitPointerWithAlignment(E
->getArg(1));
8293 Ops
.push_back(PtrOp1
.getPointer());
8298 if ((ICEArguments
& (1 << i
)) == 0) {
8299 Ops
.push_back(EmitScalarExpr(E
->getArg(i
)));
8301 // If this is required to be a constant, constant fold it so that we know
8302 // that the generated intrinsic gets a ConstantInt.
8303 Ops
.push_back(llvm::ConstantInt::get(
8305 *E
->getArg(i
)->getIntegerConstantExpr(getContext())));
8309 switch (BuiltinID
) {
8312 case NEON::BI__builtin_neon_vget_lane_i8
:
8313 case NEON::BI__builtin_neon_vget_lane_i16
:
8314 case NEON::BI__builtin_neon_vget_lane_i32
:
8315 case NEON::BI__builtin_neon_vget_lane_i64
:
8316 case NEON::BI__builtin_neon_vget_lane_bf16
:
8317 case NEON::BI__builtin_neon_vget_lane_f32
:
8318 case NEON::BI__builtin_neon_vgetq_lane_i8
:
8319 case NEON::BI__builtin_neon_vgetq_lane_i16
:
8320 case NEON::BI__builtin_neon_vgetq_lane_i32
:
8321 case NEON::BI__builtin_neon_vgetq_lane_i64
:
8322 case NEON::BI__builtin_neon_vgetq_lane_bf16
:
8323 case NEON::BI__builtin_neon_vgetq_lane_f32
:
8324 case NEON::BI__builtin_neon_vduph_lane_bf16
:
8325 case NEON::BI__builtin_neon_vduph_laneq_bf16
:
8326 return Builder
.CreateExtractElement(Ops
[0], Ops
[1], "vget_lane");
8328 case NEON::BI__builtin_neon_vrndns_f32
: {
8329 Value
*Arg
= EmitScalarExpr(E
->getArg(0));
8330 llvm::Type
*Tys
[] = {Arg
->getType()};
8331 Function
*F
= CGM
.getIntrinsic(Intrinsic::arm_neon_vrintn
, Tys
);
8332 return Builder
.CreateCall(F
, {Arg
}, "vrndn"); }
8334 case NEON::BI__builtin_neon_vset_lane_i8
:
8335 case NEON::BI__builtin_neon_vset_lane_i16
:
8336 case NEON::BI__builtin_neon_vset_lane_i32
:
8337 case NEON::BI__builtin_neon_vset_lane_i64
:
8338 case NEON::BI__builtin_neon_vset_lane_bf16
:
8339 case NEON::BI__builtin_neon_vset_lane_f32
:
8340 case NEON::BI__builtin_neon_vsetq_lane_i8
:
8341 case NEON::BI__builtin_neon_vsetq_lane_i16
:
8342 case NEON::BI__builtin_neon_vsetq_lane_i32
:
8343 case NEON::BI__builtin_neon_vsetq_lane_i64
:
8344 case NEON::BI__builtin_neon_vsetq_lane_bf16
:
8345 case NEON::BI__builtin_neon_vsetq_lane_f32
:
8346 return Builder
.CreateInsertElement(Ops
[1], Ops
[0], Ops
[2], "vset_lane");
8348 case NEON::BI__builtin_neon_vsha1h_u32
:
8349 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_sha1h
), Ops
,
8351 case NEON::BI__builtin_neon_vsha1cq_u32
:
8352 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_sha1c
), Ops
,
8354 case NEON::BI__builtin_neon_vsha1pq_u32
:
8355 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_sha1p
), Ops
,
8357 case NEON::BI__builtin_neon_vsha1mq_u32
:
8358 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_sha1m
), Ops
,
8361 case NEON::BI__builtin_neon_vcvth_bf16_f32
: {
8362 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf
), Ops
,
8366 // The ARM _MoveToCoprocessor builtins put the input register value as
8367 // the first argument, but the LLVM intrinsic expects it as the third one.
8368 case clang::ARM::BI_MoveToCoprocessor
:
8369 case clang::ARM::BI_MoveToCoprocessor2
: {
8370 Function
*F
= CGM
.getIntrinsic(BuiltinID
== clang::ARM::BI_MoveToCoprocessor
8371 ? Intrinsic::arm_mcr
8372 : Intrinsic::arm_mcr2
);
8373 return Builder
.CreateCall(F
, {Ops
[1], Ops
[2], Ops
[0],
8374 Ops
[3], Ops
[4], Ops
[5]});
8378 // Get the last argument, which specifies the vector type.
8379 assert(HasExtraArg
);
8380 const Expr
*Arg
= E
->getArg(E
->getNumArgs()-1);
8381 std::optional
<llvm::APSInt
> Result
=
8382 Arg
->getIntegerConstantExpr(getContext());
8386 if (BuiltinID
== clang::ARM::BI__builtin_arm_vcvtr_f
||
8387 BuiltinID
== clang::ARM::BI__builtin_arm_vcvtr_d
) {
8388 // Determine the overloaded type of this builtin.
8390 if (BuiltinID
== clang::ARM::BI__builtin_arm_vcvtr_f
)
8395 // Determine whether this is an unsigned conversion or not.
8396 bool usgn
= Result
->getZExtValue() == 1;
8397 unsigned Int
= usgn
? Intrinsic::arm_vcvtru
: Intrinsic::arm_vcvtr
;
8399 // Call the appropriate intrinsic.
8400 Function
*F
= CGM
.getIntrinsic(Int
, Ty
);
8401 return Builder
.CreateCall(F
, Ops
, "vcvtr");
8404 // Determine the type of this overloaded NEON intrinsic.
8405 NeonTypeFlags Type
= Result
->getZExtValue();
8406 bool usgn
= Type
.isUnsigned();
8407 bool rightShift
= false;
8409 llvm::FixedVectorType
*VTy
=
8410 GetNeonType(this, Type
, getTarget().hasLegalHalfType(), false,
8411 getTarget().hasBFloat16Type());
8412 llvm::Type
*Ty
= VTy
;
8416 // Many NEON builtins have identical semantics and uses in ARM and
8417 // AArch64. Emit these in a single function.
8418 auto IntrinsicMap
= ArrayRef(ARMSIMDIntrinsicMap
);
8419 const ARMVectorIntrinsicInfo
*Builtin
= findARMVectorIntrinsicInMap(
8420 IntrinsicMap
, BuiltinID
, NEONSIMDIntrinsicsProvenSorted
);
8422 return EmitCommonNeonBuiltinExpr(
8423 Builtin
->BuiltinID
, Builtin
->LLVMIntrinsic
, Builtin
->AltLLVMIntrinsic
,
8424 Builtin
->NameHint
, Builtin
->TypeModifier
, E
, Ops
, PtrOp0
, PtrOp1
, Arch
);
8427 switch (BuiltinID
) {
8428 default: return nullptr;
8429 case NEON::BI__builtin_neon_vld1q_lane_v
:
8430 // Handle 64-bit integer elements as a special case. Use shuffles of
8431 // one-element vectors to avoid poor code for i64 in the backend.
8432 if (VTy
->getElementType()->isIntegerTy(64)) {
8433 // Extract the other lane.
8434 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
8435 int Lane
= cast
<ConstantInt
>(Ops
[2])->getZExtValue();
8436 Value
*SV
= llvm::ConstantVector::get(ConstantInt::get(Int32Ty
, 1-Lane
));
8437 Ops
[1] = Builder
.CreateShuffleVector(Ops
[1], Ops
[1], SV
);
8438 // Load the value as a one-element vector.
8439 Ty
= llvm::FixedVectorType::get(VTy
->getElementType(), 1);
8440 llvm::Type
*Tys
[] = {Ty
, Int8PtrTy
};
8441 Function
*F
= CGM
.getIntrinsic(Intrinsic::arm_neon_vld1
, Tys
);
8442 Value
*Align
= getAlignmentValue32(PtrOp0
);
8443 Value
*Ld
= Builder
.CreateCall(F
, {Ops
[0], Align
});
8445 int Indices
[] = {1 - Lane
, Lane
};
8446 return Builder
.CreateShuffleVector(Ops
[1], Ld
, Indices
, "vld1q_lane");
8449 case NEON::BI__builtin_neon_vld1_lane_v
: {
8450 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
8451 PtrOp0
= Builder
.CreateElementBitCast(PtrOp0
, VTy
->getElementType());
8452 Value
*Ld
= Builder
.CreateLoad(PtrOp0
);
8453 return Builder
.CreateInsertElement(Ops
[1], Ld
, Ops
[2], "vld1_lane");
8455 case NEON::BI__builtin_neon_vqrshrn_n_v
:
8457 usgn
? Intrinsic::arm_neon_vqrshiftnu
: Intrinsic::arm_neon_vqrshiftns
;
8458 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vqrshrn_n",
8460 case NEON::BI__builtin_neon_vqrshrun_n_v
:
8461 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu
, Ty
),
8462 Ops
, "vqrshrun_n", 1, true);
8463 case NEON::BI__builtin_neon_vqshrn_n_v
:
8464 Int
= usgn
? Intrinsic::arm_neon_vqshiftnu
: Intrinsic::arm_neon_vqshiftns
;
8465 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vqshrn_n",
8467 case NEON::BI__builtin_neon_vqshrun_n_v
:
8468 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu
, Ty
),
8469 Ops
, "vqshrun_n", 1, true);
8470 case NEON::BI__builtin_neon_vrecpe_v
:
8471 case NEON::BI__builtin_neon_vrecpeq_v
:
8472 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vrecpe
, Ty
),
8474 case NEON::BI__builtin_neon_vrshrn_n_v
:
8475 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vrshiftn
, Ty
),
8476 Ops
, "vrshrn_n", 1, true);
8477 case NEON::BI__builtin_neon_vrsra_n_v
:
8478 case NEON::BI__builtin_neon_vrsraq_n_v
:
8479 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
8480 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
8481 Ops
[2] = EmitNeonShiftVector(Ops
[2], Ty
, true);
8482 Int
= usgn
? Intrinsic::arm_neon_vrshiftu
: Intrinsic::arm_neon_vrshifts
;
8483 Ops
[1] = Builder
.CreateCall(CGM
.getIntrinsic(Int
, Ty
), {Ops
[1], Ops
[2]});
8484 return Builder
.CreateAdd(Ops
[0], Ops
[1], "vrsra_n");
8485 case NEON::BI__builtin_neon_vsri_n_v
:
8486 case NEON::BI__builtin_neon_vsriq_n_v
:
8489 case NEON::BI__builtin_neon_vsli_n_v
:
8490 case NEON::BI__builtin_neon_vsliq_n_v
:
8491 Ops
[2] = EmitNeonShiftVector(Ops
[2], Ty
, rightShift
);
8492 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vshiftins
, Ty
),
8494 case NEON::BI__builtin_neon_vsra_n_v
:
8495 case NEON::BI__builtin_neon_vsraq_n_v
:
8496 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
8497 Ops
[1] = EmitNeonRShiftImm(Ops
[1], Ops
[2], Ty
, usgn
, "vsra_n");
8498 return Builder
.CreateAdd(Ops
[0], Ops
[1]);
8499 case NEON::BI__builtin_neon_vst1q_lane_v
:
8500 // Handle 64-bit integer elements as a special case. Use a shuffle to get
8501 // a one-element vector and avoid poor code for i64 in the backend.
8502 if (VTy
->getElementType()->isIntegerTy(64)) {
8503 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
8504 Value
*SV
= llvm::ConstantVector::get(cast
<llvm::Constant
>(Ops
[2]));
8505 Ops
[1] = Builder
.CreateShuffleVector(Ops
[1], Ops
[1], SV
);
8506 Ops
[2] = getAlignmentValue32(PtrOp0
);
8507 llvm::Type
*Tys
[] = {Int8PtrTy
, Ops
[1]->getType()};
8508 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vst1
,
8512 case NEON::BI__builtin_neon_vst1_lane_v
: {
8513 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
8514 Ops
[1] = Builder
.CreateExtractElement(Ops
[1], Ops
[2]);
8515 auto St
= Builder
.CreateStore(
8516 Ops
[1], Builder
.CreateElementBitCast(PtrOp0
, Ops
[1]->getType()));
8519 case NEON::BI__builtin_neon_vtbl1_v
:
8520 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vtbl1
),
8522 case NEON::BI__builtin_neon_vtbl2_v
:
8523 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vtbl2
),
8525 case NEON::BI__builtin_neon_vtbl3_v
:
8526 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vtbl3
),
8528 case NEON::BI__builtin_neon_vtbl4_v
:
8529 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vtbl4
),
8531 case NEON::BI__builtin_neon_vtbx1_v
:
8532 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vtbx1
),
8534 case NEON::BI__builtin_neon_vtbx2_v
:
8535 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vtbx2
),
8537 case NEON::BI__builtin_neon_vtbx3_v
:
8538 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vtbx3
),
8540 case NEON::BI__builtin_neon_vtbx4_v
:
8541 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::arm_neon_vtbx4
),
8546 template<typename Integer
>
8547 static Integer
GetIntegerConstantValue(const Expr
*E
, ASTContext
&Context
) {
8548 return E
->getIntegerConstantExpr(Context
)->getExtValue();
8551 static llvm::Value
*SignOrZeroExtend(CGBuilderTy
&Builder
, llvm::Value
*V
,
8552 llvm::Type
*T
, bool Unsigned
) {
8553 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
8554 // which finds it convenient to specify signed/unsigned as a boolean flag.
8555 return Unsigned
? Builder
.CreateZExt(V
, T
) : Builder
.CreateSExt(V
, T
);
8558 static llvm::Value
*MVEImmediateShr(CGBuilderTy
&Builder
, llvm::Value
*V
,
8559 uint32_t Shift
, bool Unsigned
) {
8560 // MVE helper function for integer shift right. This must handle signed vs
8561 // unsigned, and also deal specially with the case where the shift count is
8562 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
8563 // undefined behavior, but in MVE it's legal, so we must convert it to code
8564 // that is not undefined in IR.
8565 unsigned LaneBits
= cast
<llvm::VectorType
>(V
->getType())
8567 ->getPrimitiveSizeInBits();
8568 if (Shift
== LaneBits
) {
8569 // An unsigned shift of the full lane size always generates zero, so we can
8570 // simply emit a zero vector. A signed shift of the full lane size does the
8571 // same thing as shifting by one bit fewer.
8573 return llvm::Constant::getNullValue(V
->getType());
8577 return Unsigned
? Builder
.CreateLShr(V
, Shift
) : Builder
.CreateAShr(V
, Shift
);
8580 static llvm::Value
*ARMMVEVectorSplat(CGBuilderTy
&Builder
, llvm::Value
*V
) {
8581 // MVE-specific helper function for a vector splat, which infers the element
8582 // count of the output vector by knowing that MVE vectors are all 128 bits
8584 unsigned Elements
= 128 / V
->getType()->getPrimitiveSizeInBits();
8585 return Builder
.CreateVectorSplat(Elements
, V
);
8588 static llvm::Value
*ARMMVEVectorReinterpret(CGBuilderTy
&Builder
,
8589 CodeGenFunction
*CGF
,
8591 llvm::Type
*DestType
) {
8592 // Convert one MVE vector type into another by reinterpreting its in-register
8595 // Little-endian, this is identical to a bitcast (which reinterprets the
8596 // memory format). But big-endian, they're not necessarily the same, because
8597 // the register and memory formats map to each other differently depending on
8600 // We generate a bitcast whenever we can (if we're little-endian, or if the
8601 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
8602 // that performs the different kind of reinterpretation.
8603 if (CGF
->getTarget().isBigEndian() &&
8604 V
->getType()->getScalarSizeInBits() != DestType
->getScalarSizeInBits()) {
8605 return Builder
.CreateCall(
8606 CGF
->CGM
.getIntrinsic(Intrinsic::arm_mve_vreinterpretq
,
8607 {DestType
, V
->getType()}),
8610 return Builder
.CreateBitCast(V
, DestType
);
8614 static llvm::Value
*VectorUnzip(CGBuilderTy
&Builder
, llvm::Value
*V
, bool Odd
) {
8615 // Make a shufflevector that extracts every other element of a vector (evens
8616 // or odds, as desired).
8617 SmallVector
<int, 16> Indices
;
8618 unsigned InputElements
=
8619 cast
<llvm::FixedVectorType
>(V
->getType())->getNumElements();
8620 for (unsigned i
= 0; i
< InputElements
; i
+= 2)
8621 Indices
.push_back(i
+ Odd
);
8622 return Builder
.CreateShuffleVector(V
, Indices
);
8625 static llvm::Value
*VectorZip(CGBuilderTy
&Builder
, llvm::Value
*V0
,
8627 // Make a shufflevector that interleaves two vectors element by element.
8628 assert(V0
->getType() == V1
->getType() && "Can't zip different vector types");
8629 SmallVector
<int, 16> Indices
;
8630 unsigned InputElements
=
8631 cast
<llvm::FixedVectorType
>(V0
->getType())->getNumElements();
8632 for (unsigned i
= 0; i
< InputElements
; i
++) {
8633 Indices
.push_back(i
);
8634 Indices
.push_back(i
+ InputElements
);
8636 return Builder
.CreateShuffleVector(V0
, V1
, Indices
);
8639 template<unsigned HighBit
, unsigned OtherBits
>
8640 static llvm::Value
*ARMMVEConstantSplat(CGBuilderTy
&Builder
, llvm::Type
*VT
) {
8641 // MVE-specific helper function to make a vector splat of a constant such as
8642 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
8643 llvm::Type
*T
= cast
<llvm::VectorType
>(VT
)->getElementType();
8644 unsigned LaneBits
= T
->getPrimitiveSizeInBits();
8645 uint32_t Value
= HighBit
<< (LaneBits
- 1);
8647 Value
|= (1UL << (LaneBits
- 1)) - 1;
8648 llvm::Value
*Lane
= llvm::ConstantInt::get(T
, Value
);
8649 return ARMMVEVectorSplat(Builder
, Lane
);
8652 static llvm::Value
*ARMMVEVectorElementReverse(CGBuilderTy
&Builder
,
8654 unsigned ReverseWidth
) {
8655 // MVE-specific helper function which reverses the elements of a
8656 // vector within every (ReverseWidth)-bit collection of lanes.
8657 SmallVector
<int, 16> Indices
;
8658 unsigned LaneSize
= V
->getType()->getScalarSizeInBits();
8659 unsigned Elements
= 128 / LaneSize
;
8660 unsigned Mask
= ReverseWidth
/ LaneSize
- 1;
8661 for (unsigned i
= 0; i
< Elements
; i
++)
8662 Indices
.push_back(i
^ Mask
);
8663 return Builder
.CreateShuffleVector(V
, Indices
);
8666 Value
*CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID
,
8668 ReturnValueSlot ReturnValue
,
8669 llvm::Triple::ArchType Arch
) {
8670 enum class CustomCodeGen
{ VLD24
, VST24
} CustomCodeGenType
;
8671 Intrinsic::ID IRIntr
;
8672 unsigned NumVectors
;
8674 // Code autogenerated by Tablegen will handle all the simple builtins.
8675 switch (BuiltinID
) {
8676 #include "clang/Basic/arm_mve_builtin_cg.inc"
8678 // If we didn't match an MVE builtin id at all, go back to the
8679 // main EmitARMBuiltinExpr.
8684 // Anything that breaks from that switch is an MVE builtin that
8685 // needs handwritten code to generate.
8687 switch (CustomCodeGenType
) {
8689 case CustomCodeGen::VLD24
: {
8690 llvm::SmallVector
<Value
*, 4> Ops
;
8691 llvm::SmallVector
<llvm::Type
*, 4> Tys
;
8693 auto MvecCType
= E
->getType();
8694 auto MvecLType
= ConvertType(MvecCType
);
8695 assert(MvecLType
->isStructTy() &&
8696 "Return type for vld[24]q should be a struct");
8697 assert(MvecLType
->getStructNumElements() == 1 &&
8698 "Return-type struct for vld[24]q should have one element");
8699 auto MvecLTypeInner
= MvecLType
->getStructElementType(0);
8700 assert(MvecLTypeInner
->isArrayTy() &&
8701 "Return-type struct for vld[24]q should contain an array");
8702 assert(MvecLTypeInner
->getArrayNumElements() == NumVectors
&&
8703 "Array member of return-type struct vld[24]q has wrong length");
8704 auto VecLType
= MvecLTypeInner
->getArrayElementType();
8706 Tys
.push_back(VecLType
);
8708 auto Addr
= E
->getArg(0);
8709 Ops
.push_back(EmitScalarExpr(Addr
));
8710 Tys
.push_back(ConvertType(Addr
->getType()));
8712 Function
*F
= CGM
.getIntrinsic(IRIntr
, ArrayRef(Tys
));
8713 Value
*LoadResult
= Builder
.CreateCall(F
, Ops
);
8714 Value
*MvecOut
= PoisonValue::get(MvecLType
);
8715 for (unsigned i
= 0; i
< NumVectors
; ++i
) {
8716 Value
*Vec
= Builder
.CreateExtractValue(LoadResult
, i
);
8717 MvecOut
= Builder
.CreateInsertValue(MvecOut
, Vec
, {0, i
});
8720 if (ReturnValue
.isNull())
8723 return Builder
.CreateStore(MvecOut
, ReturnValue
.getValue());
8726 case CustomCodeGen::VST24
: {
8727 llvm::SmallVector
<Value
*, 4> Ops
;
8728 llvm::SmallVector
<llvm::Type
*, 4> Tys
;
8730 auto Addr
= E
->getArg(0);
8731 Ops
.push_back(EmitScalarExpr(Addr
));
8732 Tys
.push_back(ConvertType(Addr
->getType()));
8734 auto MvecCType
= E
->getArg(1)->getType();
8735 auto MvecLType
= ConvertType(MvecCType
);
8736 assert(MvecLType
->isStructTy() && "Data type for vst2q should be a struct");
8737 assert(MvecLType
->getStructNumElements() == 1 &&
8738 "Data-type struct for vst2q should have one element");
8739 auto MvecLTypeInner
= MvecLType
->getStructElementType(0);
8740 assert(MvecLTypeInner
->isArrayTy() &&
8741 "Data-type struct for vst2q should contain an array");
8742 assert(MvecLTypeInner
->getArrayNumElements() == NumVectors
&&
8743 "Array member of return-type struct vld[24]q has wrong length");
8744 auto VecLType
= MvecLTypeInner
->getArrayElementType();
8746 Tys
.push_back(VecLType
);
8748 AggValueSlot MvecSlot
= CreateAggTemp(MvecCType
);
8749 EmitAggExpr(E
->getArg(1), MvecSlot
);
8750 auto Mvec
= Builder
.CreateLoad(MvecSlot
.getAddress());
8751 for (unsigned i
= 0; i
< NumVectors
; i
++)
8752 Ops
.push_back(Builder
.CreateExtractValue(Mvec
, {0, i
}));
8754 Function
*F
= CGM
.getIntrinsic(IRIntr
, ArrayRef(Tys
));
8755 Value
*ToReturn
= nullptr;
8756 for (unsigned i
= 0; i
< NumVectors
; i
++) {
8757 Ops
.push_back(llvm::ConstantInt::get(Int32Ty
, i
));
8758 ToReturn
= Builder
.CreateCall(F
, Ops
);
8764 llvm_unreachable("unknown custom codegen type.");
8767 Value
*CodeGenFunction::EmitARMCDEBuiltinExpr(unsigned BuiltinID
,
8769 ReturnValueSlot ReturnValue
,
8770 llvm::Triple::ArchType Arch
) {
8771 switch (BuiltinID
) {
8774 #include "clang/Basic/arm_cde_builtin_cg.inc"
8778 static Value
*EmitAArch64TblBuiltinExpr(CodeGenFunction
&CGF
, unsigned BuiltinID
,
8780 SmallVectorImpl
<Value
*> &Ops
,
8781 llvm::Triple::ArchType Arch
) {
8782 unsigned int Int
= 0;
8783 const char *s
= nullptr;
8785 switch (BuiltinID
) {
8788 case NEON::BI__builtin_neon_vtbl1_v
:
8789 case NEON::BI__builtin_neon_vqtbl1_v
:
8790 case NEON::BI__builtin_neon_vqtbl1q_v
:
8791 case NEON::BI__builtin_neon_vtbl2_v
:
8792 case NEON::BI__builtin_neon_vqtbl2_v
:
8793 case NEON::BI__builtin_neon_vqtbl2q_v
:
8794 case NEON::BI__builtin_neon_vtbl3_v
:
8795 case NEON::BI__builtin_neon_vqtbl3_v
:
8796 case NEON::BI__builtin_neon_vqtbl3q_v
:
8797 case NEON::BI__builtin_neon_vtbl4_v
:
8798 case NEON::BI__builtin_neon_vqtbl4_v
:
8799 case NEON::BI__builtin_neon_vqtbl4q_v
:
8801 case NEON::BI__builtin_neon_vtbx1_v
:
8802 case NEON::BI__builtin_neon_vqtbx1_v
:
8803 case NEON::BI__builtin_neon_vqtbx1q_v
:
8804 case NEON::BI__builtin_neon_vtbx2_v
:
8805 case NEON::BI__builtin_neon_vqtbx2_v
:
8806 case NEON::BI__builtin_neon_vqtbx2q_v
:
8807 case NEON::BI__builtin_neon_vtbx3_v
:
8808 case NEON::BI__builtin_neon_vqtbx3_v
:
8809 case NEON::BI__builtin_neon_vqtbx3q_v
:
8810 case NEON::BI__builtin_neon_vtbx4_v
:
8811 case NEON::BI__builtin_neon_vqtbx4_v
:
8812 case NEON::BI__builtin_neon_vqtbx4q_v
:
8816 assert(E
->getNumArgs() >= 3);
8818 // Get the last argument, which specifies the vector type.
8819 const Expr
*Arg
= E
->getArg(E
->getNumArgs() - 1);
8820 std::optional
<llvm::APSInt
> Result
=
8821 Arg
->getIntegerConstantExpr(CGF
.getContext());
8825 // Determine the type of this overloaded NEON intrinsic.
8826 NeonTypeFlags Type
= Result
->getZExtValue();
8827 llvm::FixedVectorType
*Ty
= GetNeonType(&CGF
, Type
);
8831 CodeGen::CGBuilderTy
&Builder
= CGF
.Builder
;
8833 // AArch64 scalar builtins are not overloaded, they do not have an extra
8834 // argument that specifies the vector type, need to handle each case.
8835 switch (BuiltinID
) {
8836 case NEON::BI__builtin_neon_vtbl1_v
: {
8837 return packTBLDVectorList(CGF
, ArrayRef(Ops
).slice(0, 1), nullptr, Ops
[1],
8838 Ty
, Intrinsic::aarch64_neon_tbl1
, "vtbl1");
8840 case NEON::BI__builtin_neon_vtbl2_v
: {
8841 return packTBLDVectorList(CGF
, ArrayRef(Ops
).slice(0, 2), nullptr, Ops
[2],
8842 Ty
, Intrinsic::aarch64_neon_tbl1
, "vtbl1");
8844 case NEON::BI__builtin_neon_vtbl3_v
: {
8845 return packTBLDVectorList(CGF
, ArrayRef(Ops
).slice(0, 3), nullptr, Ops
[3],
8846 Ty
, Intrinsic::aarch64_neon_tbl2
, "vtbl2");
8848 case NEON::BI__builtin_neon_vtbl4_v
: {
8849 return packTBLDVectorList(CGF
, ArrayRef(Ops
).slice(0, 4), nullptr, Ops
[4],
8850 Ty
, Intrinsic::aarch64_neon_tbl2
, "vtbl2");
8852 case NEON::BI__builtin_neon_vtbx1_v
: {
8854 packTBLDVectorList(CGF
, ArrayRef(Ops
).slice(1, 1), nullptr, Ops
[2], Ty
,
8855 Intrinsic::aarch64_neon_tbl1
, "vtbl1");
8857 llvm::Constant
*EightV
= ConstantInt::get(Ty
, 8);
8858 Value
*CmpRes
= Builder
.CreateICmp(ICmpInst::ICMP_UGE
, Ops
[2], EightV
);
8859 CmpRes
= Builder
.CreateSExt(CmpRes
, Ty
);
8861 Value
*EltsFromInput
= Builder
.CreateAnd(CmpRes
, Ops
[0]);
8862 Value
*EltsFromTbl
= Builder
.CreateAnd(Builder
.CreateNot(CmpRes
), TblRes
);
8863 return Builder
.CreateOr(EltsFromInput
, EltsFromTbl
, "vtbx");
8865 case NEON::BI__builtin_neon_vtbx2_v
: {
8866 return packTBLDVectorList(CGF
, ArrayRef(Ops
).slice(1, 2), Ops
[0], Ops
[3],
8867 Ty
, Intrinsic::aarch64_neon_tbx1
, "vtbx1");
8869 case NEON::BI__builtin_neon_vtbx3_v
: {
8871 packTBLDVectorList(CGF
, ArrayRef(Ops
).slice(1, 3), nullptr, Ops
[4], Ty
,
8872 Intrinsic::aarch64_neon_tbl2
, "vtbl2");
8874 llvm::Constant
*TwentyFourV
= ConstantInt::get(Ty
, 24);
8875 Value
*CmpRes
= Builder
.CreateICmp(ICmpInst::ICMP_UGE
, Ops
[4],
8877 CmpRes
= Builder
.CreateSExt(CmpRes
, Ty
);
8879 Value
*EltsFromInput
= Builder
.CreateAnd(CmpRes
, Ops
[0]);
8880 Value
*EltsFromTbl
= Builder
.CreateAnd(Builder
.CreateNot(CmpRes
), TblRes
);
8881 return Builder
.CreateOr(EltsFromInput
, EltsFromTbl
, "vtbx");
8883 case NEON::BI__builtin_neon_vtbx4_v
: {
8884 return packTBLDVectorList(CGF
, ArrayRef(Ops
).slice(1, 4), Ops
[0], Ops
[5],
8885 Ty
, Intrinsic::aarch64_neon_tbx2
, "vtbx2");
8887 case NEON::BI__builtin_neon_vqtbl1_v
:
8888 case NEON::BI__builtin_neon_vqtbl1q_v
:
8889 Int
= Intrinsic::aarch64_neon_tbl1
; s
= "vtbl1"; break;
8890 case NEON::BI__builtin_neon_vqtbl2_v
:
8891 case NEON::BI__builtin_neon_vqtbl2q_v
: {
8892 Int
= Intrinsic::aarch64_neon_tbl2
; s
= "vtbl2"; break;
8893 case NEON::BI__builtin_neon_vqtbl3_v
:
8894 case NEON::BI__builtin_neon_vqtbl3q_v
:
8895 Int
= Intrinsic::aarch64_neon_tbl3
; s
= "vtbl3"; break;
8896 case NEON::BI__builtin_neon_vqtbl4_v
:
8897 case NEON::BI__builtin_neon_vqtbl4q_v
:
8898 Int
= Intrinsic::aarch64_neon_tbl4
; s
= "vtbl4"; break;
8899 case NEON::BI__builtin_neon_vqtbx1_v
:
8900 case NEON::BI__builtin_neon_vqtbx1q_v
:
8901 Int
= Intrinsic::aarch64_neon_tbx1
; s
= "vtbx1"; break;
8902 case NEON::BI__builtin_neon_vqtbx2_v
:
8903 case NEON::BI__builtin_neon_vqtbx2q_v
:
8904 Int
= Intrinsic::aarch64_neon_tbx2
; s
= "vtbx2"; break;
8905 case NEON::BI__builtin_neon_vqtbx3_v
:
8906 case NEON::BI__builtin_neon_vqtbx3q_v
:
8907 Int
= Intrinsic::aarch64_neon_tbx3
; s
= "vtbx3"; break;
8908 case NEON::BI__builtin_neon_vqtbx4_v
:
8909 case NEON::BI__builtin_neon_vqtbx4q_v
:
8910 Int
= Intrinsic::aarch64_neon_tbx4
; s
= "vtbx4"; break;
8917 Function
*F
= CGF
.CGM
.getIntrinsic(Int
, Ty
);
8918 return CGF
.EmitNeonCall(F
, Ops
, s
);
8921 Value
*CodeGenFunction::vectorWrapScalar16(Value
*Op
) {
8922 auto *VTy
= llvm::FixedVectorType::get(Int16Ty
, 4);
8923 Op
= Builder
.CreateBitCast(Op
, Int16Ty
);
8924 Value
*V
= PoisonValue::get(VTy
);
8925 llvm::Constant
*CI
= ConstantInt::get(SizeTy
, 0);
8926 Op
= Builder
.CreateInsertElement(V
, Op
, CI
);
8930 /// SVEBuiltinMemEltTy - Returns the memory element type for this memory
8931 /// access builtin. Only required if it can't be inferred from the base pointer
8933 llvm::Type
*CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags
&TypeFlags
) {
8934 switch (TypeFlags
.getMemEltType()) {
8935 case SVETypeFlags::MemEltTyDefault
:
8936 return getEltType(TypeFlags
);
8937 case SVETypeFlags::MemEltTyInt8
:
8938 return Builder
.getInt8Ty();
8939 case SVETypeFlags::MemEltTyInt16
:
8940 return Builder
.getInt16Ty();
8941 case SVETypeFlags::MemEltTyInt32
:
8942 return Builder
.getInt32Ty();
8943 case SVETypeFlags::MemEltTyInt64
:
8944 return Builder
.getInt64Ty();
8946 llvm_unreachable("Unknown MemEltType");
8949 llvm::Type
*CodeGenFunction::getEltType(const SVETypeFlags
&TypeFlags
) {
8950 switch (TypeFlags
.getEltType()) {
8952 llvm_unreachable("Invalid SVETypeFlag!");
8954 case SVETypeFlags::EltTyInt8
:
8955 return Builder
.getInt8Ty();
8956 case SVETypeFlags::EltTyInt16
:
8957 return Builder
.getInt16Ty();
8958 case SVETypeFlags::EltTyInt32
:
8959 return Builder
.getInt32Ty();
8960 case SVETypeFlags::EltTyInt64
:
8961 return Builder
.getInt64Ty();
8962 case SVETypeFlags::EltTyInt128
:
8963 return Builder
.getInt128Ty();
8965 case SVETypeFlags::EltTyFloat16
:
8966 return Builder
.getHalfTy();
8967 case SVETypeFlags::EltTyFloat32
:
8968 return Builder
.getFloatTy();
8969 case SVETypeFlags::EltTyFloat64
:
8970 return Builder
.getDoubleTy();
8972 case SVETypeFlags::EltTyBFloat16
:
8973 return Builder
.getBFloatTy();
8975 case SVETypeFlags::EltTyBool8
:
8976 case SVETypeFlags::EltTyBool16
:
8977 case SVETypeFlags::EltTyBool32
:
8978 case SVETypeFlags::EltTyBool64
:
8979 return Builder
.getInt1Ty();
8983 // Return the llvm predicate vector type corresponding to the specified element
8985 llvm::ScalableVectorType
*
8986 CodeGenFunction::getSVEPredType(const SVETypeFlags
&TypeFlags
) {
8987 switch (TypeFlags
.getEltType()) {
8988 default: llvm_unreachable("Unhandled SVETypeFlag!");
8990 case SVETypeFlags::EltTyInt8
:
8991 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 16);
8992 case SVETypeFlags::EltTyInt16
:
8993 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 8);
8994 case SVETypeFlags::EltTyInt32
:
8995 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 4);
8996 case SVETypeFlags::EltTyInt64
:
8997 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 2);
8999 case SVETypeFlags::EltTyBFloat16
:
9000 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 8);
9001 case SVETypeFlags::EltTyFloat16
:
9002 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 8);
9003 case SVETypeFlags::EltTyFloat32
:
9004 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 4);
9005 case SVETypeFlags::EltTyFloat64
:
9006 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 2);
9008 case SVETypeFlags::EltTyBool8
:
9009 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 16);
9010 case SVETypeFlags::EltTyBool16
:
9011 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 8);
9012 case SVETypeFlags::EltTyBool32
:
9013 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 4);
9014 case SVETypeFlags::EltTyBool64
:
9015 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 2);
9019 // Return the llvm vector type corresponding to the specified element TypeFlags.
9020 llvm::ScalableVectorType
*
9021 CodeGenFunction::getSVEType(const SVETypeFlags
&TypeFlags
) {
9022 switch (TypeFlags
.getEltType()) {
9024 llvm_unreachable("Invalid SVETypeFlag!");
9026 case SVETypeFlags::EltTyInt8
:
9027 return llvm::ScalableVectorType::get(Builder
.getInt8Ty(), 16);
9028 case SVETypeFlags::EltTyInt16
:
9029 return llvm::ScalableVectorType::get(Builder
.getInt16Ty(), 8);
9030 case SVETypeFlags::EltTyInt32
:
9031 return llvm::ScalableVectorType::get(Builder
.getInt32Ty(), 4);
9032 case SVETypeFlags::EltTyInt64
:
9033 return llvm::ScalableVectorType::get(Builder
.getInt64Ty(), 2);
9035 case SVETypeFlags::EltTyFloat16
:
9036 return llvm::ScalableVectorType::get(Builder
.getHalfTy(), 8);
9037 case SVETypeFlags::EltTyBFloat16
:
9038 return llvm::ScalableVectorType::get(Builder
.getBFloatTy(), 8);
9039 case SVETypeFlags::EltTyFloat32
:
9040 return llvm::ScalableVectorType::get(Builder
.getFloatTy(), 4);
9041 case SVETypeFlags::EltTyFloat64
:
9042 return llvm::ScalableVectorType::get(Builder
.getDoubleTy(), 2);
9044 case SVETypeFlags::EltTyBool8
:
9045 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 16);
9046 case SVETypeFlags::EltTyBool16
:
9047 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 8);
9048 case SVETypeFlags::EltTyBool32
:
9049 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 4);
9050 case SVETypeFlags::EltTyBool64
:
9051 return llvm::ScalableVectorType::get(Builder
.getInt1Ty(), 2);
9056 CodeGenFunction::EmitSVEAllTruePred(const SVETypeFlags
&TypeFlags
) {
9058 CGM
.getIntrinsic(Intrinsic::aarch64_sve_ptrue
, getSVEPredType(TypeFlags
));
9059 return Builder
.CreateCall(Ptrue
, {Builder
.getInt32(/*SV_ALL*/ 31)});
9062 constexpr unsigned SVEBitsPerBlock
= 128;
9064 static llvm::ScalableVectorType
*getSVEVectorForElementType(llvm::Type
*EltTy
) {
9065 unsigned NumElts
= SVEBitsPerBlock
/ EltTy
->getScalarSizeInBits();
9066 return llvm::ScalableVectorType::get(EltTy
, NumElts
);
9069 // Reinterpret the input predicate so that it can be used to correctly isolate
9070 // the elements of the specified datatype.
9071 Value
*CodeGenFunction::EmitSVEPredicateCast(Value
*Pred
,
9072 llvm::ScalableVectorType
*VTy
) {
9073 auto *RTy
= llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy
);
9074 if (Pred
->getType() == RTy
)
9078 llvm::Type
*IntrinsicTy
;
9079 switch (VTy
->getMinNumElements()) {
9081 llvm_unreachable("unsupported element count!");
9086 IntID
= Intrinsic::aarch64_sve_convert_from_svbool
;
9090 IntID
= Intrinsic::aarch64_sve_convert_to_svbool
;
9091 IntrinsicTy
= Pred
->getType();
9095 Function
*F
= CGM
.getIntrinsic(IntID
, IntrinsicTy
);
9096 Value
*C
= Builder
.CreateCall(F
, Pred
);
9097 assert(C
->getType() == RTy
&& "Unexpected return type!");
9101 Value
*CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags
&TypeFlags
,
9102 SmallVectorImpl
<Value
*> &Ops
,
9104 auto *ResultTy
= getSVEType(TypeFlags
);
9105 auto *OverloadedTy
=
9106 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags
), ResultTy
);
9108 // At the ACLE level there's only one predicate type, svbool_t, which is
9109 // mapped to <n x 16 x i1>. However, this might be incompatible with the
9110 // actual type being loaded. For example, when loading doubles (i64) the
9111 // predicated should be <n x 2 x i1> instead. At the IR level the type of
9112 // the predicate and the data being loaded must match. Cast accordingly.
9113 Ops
[0] = EmitSVEPredicateCast(Ops
[0], OverloadedTy
);
9115 Function
*F
= nullptr;
9116 if (Ops
[1]->getType()->isVectorTy())
9117 // This is the "vector base, scalar offset" case. In order to uniquely
9118 // map this built-in to an LLVM IR intrinsic, we need both the return type
9119 // and the type of the vector base.
9120 F
= CGM
.getIntrinsic(IntID
, {OverloadedTy
, Ops
[1]->getType()});
9122 // This is the "scalar base, vector offset case". The type of the offset
9123 // is encoded in the name of the intrinsic. We only need to specify the
9124 // return type in order to uniquely map this built-in to an LLVM IR
9126 F
= CGM
.getIntrinsic(IntID
, OverloadedTy
);
9128 // Pass 0 when the offset is missing. This can only be applied when using
9129 // the "vector base" addressing mode for which ACLE allows no offset. The
9130 // corresponding LLVM IR always requires an offset.
9131 if (Ops
.size() == 2) {
9132 assert(Ops
[1]->getType()->isVectorTy() && "Scalar base requires an offset");
9133 Ops
.push_back(ConstantInt::get(Int64Ty
, 0));
9136 // For "vector base, scalar index" scale the index so that it becomes a
9138 if (!TypeFlags
.isByteIndexed() && Ops
[1]->getType()->isVectorTy()) {
9139 unsigned BytesPerElt
=
9140 OverloadedTy
->getElementType()->getScalarSizeInBits() / 8;
9141 Ops
[2] = Builder
.CreateShl(Ops
[2], Log2_32(BytesPerElt
));
9144 Value
*Call
= Builder
.CreateCall(F
, Ops
);
9146 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
9147 // other cases it's folded into a nop.
9148 return TypeFlags
.isZExtReturn() ? Builder
.CreateZExt(Call
, ResultTy
)
9149 : Builder
.CreateSExt(Call
, ResultTy
);
9152 Value
*CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags
&TypeFlags
,
9153 SmallVectorImpl
<Value
*> &Ops
,
9155 auto *SrcDataTy
= getSVEType(TypeFlags
);
9156 auto *OverloadedTy
=
9157 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags
), SrcDataTy
);
9159 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
9160 // it's the first argument. Move it accordingly.
9161 Ops
.insert(Ops
.begin(), Ops
.pop_back_val());
9163 Function
*F
= nullptr;
9164 if (Ops
[2]->getType()->isVectorTy())
9165 // This is the "vector base, scalar offset" case. In order to uniquely
9166 // map this built-in to an LLVM IR intrinsic, we need both the return type
9167 // and the type of the vector base.
9168 F
= CGM
.getIntrinsic(IntID
, {OverloadedTy
, Ops
[2]->getType()});
9170 // This is the "scalar base, vector offset case". The type of the offset
9171 // is encoded in the name of the intrinsic. We only need to specify the
9172 // return type in order to uniquely map this built-in to an LLVM IR
9174 F
= CGM
.getIntrinsic(IntID
, OverloadedTy
);
9176 // Pass 0 when the offset is missing. This can only be applied when using
9177 // the "vector base" addressing mode for which ACLE allows no offset. The
9178 // corresponding LLVM IR always requires an offset.
9179 if (Ops
.size() == 3) {
9180 assert(Ops
[1]->getType()->isVectorTy() && "Scalar base requires an offset");
9181 Ops
.push_back(ConstantInt::get(Int64Ty
, 0));
9184 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
9185 // folded into a nop.
9186 Ops
[0] = Builder
.CreateTrunc(Ops
[0], OverloadedTy
);
9188 // At the ACLE level there's only one predicate type, svbool_t, which is
9189 // mapped to <n x 16 x i1>. However, this might be incompatible with the
9190 // actual type being stored. For example, when storing doubles (i64) the
9191 // predicated should be <n x 2 x i1> instead. At the IR level the type of
9192 // the predicate and the data being stored must match. Cast accordingly.
9193 Ops
[1] = EmitSVEPredicateCast(Ops
[1], OverloadedTy
);
9195 // For "vector base, scalar index" scale the index so that it becomes a
9197 if (!TypeFlags
.isByteIndexed() && Ops
[2]->getType()->isVectorTy()) {
9198 unsigned BytesPerElt
=
9199 OverloadedTy
->getElementType()->getScalarSizeInBits() / 8;
9200 Ops
[3] = Builder
.CreateShl(Ops
[3], Log2_32(BytesPerElt
));
9203 return Builder
.CreateCall(F
, Ops
);
9206 Value
*CodeGenFunction::EmitSVEGatherPrefetch(const SVETypeFlags
&TypeFlags
,
9207 SmallVectorImpl
<Value
*> &Ops
,
9209 // The gather prefetches are overloaded on the vector input - this can either
9210 // be the vector of base addresses or vector of offsets.
9211 auto *OverloadedTy
= dyn_cast
<llvm::ScalableVectorType
>(Ops
[1]->getType());
9213 OverloadedTy
= cast
<llvm::ScalableVectorType
>(Ops
[2]->getType());
9215 // Cast the predicate from svbool_t to the right number of elements.
9216 Ops
[0] = EmitSVEPredicateCast(Ops
[0], OverloadedTy
);
9218 // vector + imm addressing modes
9219 if (Ops
[1]->getType()->isVectorTy()) {
9220 if (Ops
.size() == 3) {
9221 // Pass 0 for 'vector+imm' when the index is omitted.
9222 Ops
.push_back(ConstantInt::get(Int64Ty
, 0));
9224 // The sv_prfop is the last operand in the builtin and IR intrinsic.
9225 std::swap(Ops
[2], Ops
[3]);
9227 // Index needs to be passed as scaled offset.
9228 llvm::Type
*MemEltTy
= SVEBuiltinMemEltTy(TypeFlags
);
9229 unsigned BytesPerElt
= MemEltTy
->getPrimitiveSizeInBits() / 8;
9230 if (BytesPerElt
> 1)
9231 Ops
[2] = Builder
.CreateShl(Ops
[2], Log2_32(BytesPerElt
));
9235 Function
*F
= CGM
.getIntrinsic(IntID
, OverloadedTy
);
9236 return Builder
.CreateCall(F
, Ops
);
9239 Value
*CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags
&TypeFlags
,
9240 SmallVectorImpl
<Value
*> &Ops
,
9242 llvm::ScalableVectorType
*VTy
= getSVEType(TypeFlags
);
9243 auto VecPtrTy
= llvm::PointerType::getUnqual(VTy
);
9244 auto EltPtrTy
= llvm::PointerType::getUnqual(VTy
->getElementType());
9248 case Intrinsic::aarch64_sve_ld2_sret
:
9251 case Intrinsic::aarch64_sve_ld3_sret
:
9254 case Intrinsic::aarch64_sve_ld4_sret
:
9258 llvm_unreachable("unknown intrinsic!");
9260 auto RetTy
= llvm::VectorType::get(VTy
->getElementType(),
9261 VTy
->getElementCount() * N
);
9263 Value
*Predicate
= EmitSVEPredicateCast(Ops
[0], VTy
);
9264 Value
*BasePtr
= Builder
.CreateBitCast(Ops
[1], VecPtrTy
);
9266 // Does the load have an offset?
9268 BasePtr
= Builder
.CreateGEP(VTy
, BasePtr
, Ops
[2]);
9270 BasePtr
= Builder
.CreateBitCast(BasePtr
, EltPtrTy
);
9271 Function
*F
= CGM
.getIntrinsic(IntID
, {VTy
});
9272 Value
*Call
= Builder
.CreateCall(F
, {Predicate
, BasePtr
});
9273 unsigned MinElts
= VTy
->getMinNumElements();
9274 Value
*Ret
= llvm::PoisonValue::get(RetTy
);
9275 for (unsigned I
= 0; I
< N
; I
++) {
9276 Value
*Idx
= ConstantInt::get(CGM
.Int64Ty
, I
* MinElts
);
9277 Value
*SRet
= Builder
.CreateExtractValue(Call
, I
);
9278 Ret
= Builder
.CreateInsertVector(RetTy
, Ret
, SRet
, Idx
);
9283 Value
*CodeGenFunction::EmitSVEStructStore(const SVETypeFlags
&TypeFlags
,
9284 SmallVectorImpl
<Value
*> &Ops
,
9286 llvm::ScalableVectorType
*VTy
= getSVEType(TypeFlags
);
9287 auto VecPtrTy
= llvm::PointerType::getUnqual(VTy
);
9288 auto EltPtrTy
= llvm::PointerType::getUnqual(VTy
->getElementType());
9292 case Intrinsic::aarch64_sve_st2
:
9295 case Intrinsic::aarch64_sve_st3
:
9298 case Intrinsic::aarch64_sve_st4
:
9302 llvm_unreachable("unknown intrinsic!");
9305 Value
*Predicate
= EmitSVEPredicateCast(Ops
[0], VTy
);
9306 Value
*BasePtr
= Builder
.CreateBitCast(Ops
[1], VecPtrTy
);
9308 // Does the store have an offset?
9310 BasePtr
= Builder
.CreateGEP(VTy
, BasePtr
, Ops
[2]);
9312 BasePtr
= Builder
.CreateBitCast(BasePtr
, EltPtrTy
);
9313 Value
*Val
= Ops
.back();
9315 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
9316 // need to break up the tuple vector.
9317 SmallVector
<llvm::Value
*, 5> Operands
;
9318 unsigned MinElts
= VTy
->getElementCount().getKnownMinValue();
9319 for (unsigned I
= 0; I
< N
; ++I
) {
9320 Value
*Idx
= ConstantInt::get(CGM
.Int64Ty
, I
* MinElts
);
9321 Operands
.push_back(Builder
.CreateExtractVector(VTy
, Val
, Idx
));
9323 Operands
.append({Predicate
, BasePtr
});
9325 Function
*F
= CGM
.getIntrinsic(IntID
, { VTy
});
9326 return Builder
.CreateCall(F
, Operands
);
9329 // SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
9330 // svpmullt_pair intrinsics, with the exception that their results are bitcast
9332 Value
*CodeGenFunction::EmitSVEPMull(const SVETypeFlags
&TypeFlags
,
9333 SmallVectorImpl
<Value
*> &Ops
,
9334 unsigned BuiltinID
) {
9335 // Splat scalar operand to vector (intrinsics with _n infix)
9336 if (TypeFlags
.hasSplatOperand()) {
9337 unsigned OpNo
= TypeFlags
.getSplatOperand();
9338 Ops
[OpNo
] = EmitSVEDupX(Ops
[OpNo
]);
9341 // The pair-wise function has a narrower overloaded type.
9342 Function
*F
= CGM
.getIntrinsic(BuiltinID
, Ops
[0]->getType());
9343 Value
*Call
= Builder
.CreateCall(F
, {Ops
[0], Ops
[1]});
9345 // Now bitcast to the wider result type.
9346 llvm::ScalableVectorType
*Ty
= getSVEType(TypeFlags
);
9347 return EmitSVEReinterpret(Call
, Ty
);
9350 Value
*CodeGenFunction::EmitSVEMovl(const SVETypeFlags
&TypeFlags
,
9351 ArrayRef
<Value
*> Ops
, unsigned BuiltinID
) {
9352 llvm::Type
*OverloadedTy
= getSVEType(TypeFlags
);
9353 Function
*F
= CGM
.getIntrinsic(BuiltinID
, OverloadedTy
);
9354 return Builder
.CreateCall(F
, {Ops
[0], Builder
.getInt32(0)});
9357 Value
*CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags
&TypeFlags
,
9358 SmallVectorImpl
<Value
*> &Ops
,
9359 unsigned BuiltinID
) {
9360 auto *MemEltTy
= SVEBuiltinMemEltTy(TypeFlags
);
9361 auto *VectorTy
= getSVEVectorForElementType(MemEltTy
);
9362 auto *MemoryTy
= llvm::ScalableVectorType::get(MemEltTy
, VectorTy
);
9364 Value
*Predicate
= EmitSVEPredicateCast(Ops
[0], MemoryTy
);
9365 Value
*BasePtr
= Ops
[1];
9367 // Implement the index operand if not omitted.
9368 if (Ops
.size() > 3) {
9369 BasePtr
= Builder
.CreateBitCast(BasePtr
, MemoryTy
->getPointerTo());
9370 BasePtr
= Builder
.CreateGEP(MemoryTy
, BasePtr
, Ops
[2]);
9373 // Prefetch intriniscs always expect an i8*
9374 BasePtr
= Builder
.CreateBitCast(BasePtr
, llvm::PointerType::getUnqual(Int8Ty
));
9375 Value
*PrfOp
= Ops
.back();
9377 Function
*F
= CGM
.getIntrinsic(BuiltinID
, Predicate
->getType());
9378 return Builder
.CreateCall(F
, {Predicate
, BasePtr
, PrfOp
});
9381 Value
*CodeGenFunction::EmitSVEMaskedLoad(const CallExpr
*E
,
9382 llvm::Type
*ReturnTy
,
9383 SmallVectorImpl
<Value
*> &Ops
,
9385 bool IsZExtReturn
) {
9386 QualType LangPTy
= E
->getArg(1)->getType();
9387 llvm::Type
*MemEltTy
= CGM
.getTypes().ConvertType(
9388 LangPTy
->castAs
<PointerType
>()->getPointeeType());
9390 // The vector type that is returned may be different from the
9391 // eventual type loaded from memory.
9392 auto VectorTy
= cast
<llvm::ScalableVectorType
>(ReturnTy
);
9393 auto MemoryTy
= llvm::ScalableVectorType::get(MemEltTy
, VectorTy
);
9395 Value
*Predicate
= EmitSVEPredicateCast(Ops
[0], MemoryTy
);
9396 Value
*BasePtr
= Builder
.CreateBitCast(Ops
[1], MemoryTy
->getPointerTo());
9398 // Does the load have an offset?
9400 BasePtr
= Builder
.CreateGEP(MemoryTy
, BasePtr
, Ops
[2]);
9402 BasePtr
= Builder
.CreateBitCast(BasePtr
, MemEltTy
->getPointerTo());
9403 Function
*F
= CGM
.getIntrinsic(BuiltinID
, MemoryTy
);
9405 cast
<llvm::Instruction
>(Builder
.CreateCall(F
, {Predicate
, BasePtr
}));
9406 auto TBAAInfo
= CGM
.getTBAAAccessInfo(LangPTy
->getPointeeType());
9407 CGM
.DecorateInstructionWithTBAA(Load
, TBAAInfo
);
9409 return IsZExtReturn
? Builder
.CreateZExt(Load
, VectorTy
)
9410 : Builder
.CreateSExt(Load
, VectorTy
);
9413 Value
*CodeGenFunction::EmitSVEMaskedStore(const CallExpr
*E
,
9414 SmallVectorImpl
<Value
*> &Ops
,
9415 unsigned BuiltinID
) {
9416 QualType LangPTy
= E
->getArg(1)->getType();
9417 llvm::Type
*MemEltTy
= CGM
.getTypes().ConvertType(
9418 LangPTy
->castAs
<PointerType
>()->getPointeeType());
9420 // The vector type that is stored may be different from the
9421 // eventual type stored to memory.
9422 auto VectorTy
= cast
<llvm::ScalableVectorType
>(Ops
.back()->getType());
9423 auto MemoryTy
= llvm::ScalableVectorType::get(MemEltTy
, VectorTy
);
9425 Value
*Predicate
= EmitSVEPredicateCast(Ops
[0], MemoryTy
);
9426 Value
*BasePtr
= Builder
.CreateBitCast(Ops
[1], MemoryTy
->getPointerTo());
9428 // Does the store have an offset?
9429 if (Ops
.size() == 4)
9430 BasePtr
= Builder
.CreateGEP(MemoryTy
, BasePtr
, Ops
[2]);
9432 // Last value is always the data
9433 llvm::Value
*Val
= Builder
.CreateTrunc(Ops
.back(), MemoryTy
);
9435 BasePtr
= Builder
.CreateBitCast(BasePtr
, MemEltTy
->getPointerTo());
9436 Function
*F
= CGM
.getIntrinsic(BuiltinID
, MemoryTy
);
9438 cast
<llvm::Instruction
>(Builder
.CreateCall(F
, {Val
, Predicate
, BasePtr
}));
9439 auto TBAAInfo
= CGM
.getTBAAAccessInfo(LangPTy
->getPointeeType());
9440 CGM
.DecorateInstructionWithTBAA(Store
, TBAAInfo
);
9444 Value
*CodeGenFunction::EmitTileslice(Value
*Offset
, Value
*Base
) {
9445 llvm::Value
*CastOffset
= Builder
.CreateIntCast(Offset
, Int32Ty
, false);
9446 return Builder
.CreateAdd(Base
, CastOffset
, "tileslice");
9449 Value
*CodeGenFunction::EmitSMELd1St1(SVETypeFlags TypeFlags
,
9450 SmallVectorImpl
<Value
*> &Ops
,
9452 Ops
[3] = EmitSVEPredicateCast(
9453 Ops
[3], getSVEVectorForElementType(SVEBuiltinMemEltTy(TypeFlags
)));
9455 SmallVector
<Value
*> NewOps
;
9456 NewOps
.push_back(Ops
[3]);
9458 llvm::Value
*BasePtr
= Ops
[4];
9460 // If the intrinsic contains the vnum parameter, multiply it with the vector
9462 if (Ops
.size() == 6) {
9463 Function
*StreamingVectorLength
=
9464 CGM
.getIntrinsic(Intrinsic::aarch64_sme_cntsb
);
9465 llvm::Value
*StreamingVectorLengthCall
=
9466 Builder
.CreateCall(StreamingVectorLength
);
9467 llvm::Value
*Mulvl
=
9468 Builder
.CreateMul(StreamingVectorLengthCall
, Ops
[5], "mulvl");
9469 // The type of the ptr parameter is void *, so use Int8Ty here.
9470 BasePtr
= Builder
.CreateGEP(Int8Ty
, Ops
[4], Mulvl
);
9472 NewOps
.push_back(BasePtr
);
9473 NewOps
.push_back(Ops
[0]);
9474 NewOps
.push_back(EmitTileslice(Ops
[2], Ops
[1]));
9475 Function
*F
= CGM
.getIntrinsic(IntID
);
9476 return Builder
.CreateCall(F
, NewOps
);
9479 // Limit the usage of scalable llvm IR generated by the ACLE by using the
9480 // sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
9481 Value
*CodeGenFunction::EmitSVEDupX(Value
*Scalar
, llvm::Type
*Ty
) {
9482 return Builder
.CreateVectorSplat(
9483 cast
<llvm::VectorType
>(Ty
)->getElementCount(), Scalar
);
9486 Value
*CodeGenFunction::EmitSVEDupX(Value
* Scalar
) {
9487 return EmitSVEDupX(Scalar
, getSVEVectorForElementType(Scalar
->getType()));
9490 Value
*CodeGenFunction::EmitSVEReinterpret(Value
*Val
, llvm::Type
*Ty
) {
9491 // FIXME: For big endian this needs an additional REV, or needs a separate
9492 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
9493 // instruction is defined as 'bitwise' equivalent from memory point of
9494 // view (when storing/reloading), whereas the svreinterpret builtin
9495 // implements bitwise equivalent cast from register point of view.
9496 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
9497 return Builder
.CreateBitCast(Val
, Ty
);
9500 static void InsertExplicitZeroOperand(CGBuilderTy
&Builder
, llvm::Type
*Ty
,
9501 SmallVectorImpl
<Value
*> &Ops
) {
9502 auto *SplatZero
= Constant::getNullValue(Ty
);
9503 Ops
.insert(Ops
.begin(), SplatZero
);
9506 static void InsertExplicitUndefOperand(CGBuilderTy
&Builder
, llvm::Type
*Ty
,
9507 SmallVectorImpl
<Value
*> &Ops
) {
9508 auto *SplatUndef
= UndefValue::get(Ty
);
9509 Ops
.insert(Ops
.begin(), SplatUndef
);
9512 SmallVector
<llvm::Type
*, 2>
9513 CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags
&TypeFlags
,
9514 llvm::Type
*ResultType
,
9515 ArrayRef
<Value
*> Ops
) {
9516 if (TypeFlags
.isOverloadNone())
9519 llvm::Type
*DefaultType
= getSVEType(TypeFlags
);
9521 if (TypeFlags
.isOverloadWhile())
9522 return {DefaultType
, Ops
[1]->getType()};
9524 if (TypeFlags
.isOverloadWhileRW())
9525 return {getSVEPredType(TypeFlags
), Ops
[0]->getType()};
9527 if (TypeFlags
.isOverloadCvt())
9528 return {Ops
[0]->getType(), Ops
.back()->getType()};
9530 assert(TypeFlags
.isOverloadDefault() && "Unexpected value for overloads");
9531 return {DefaultType
};
9534 Value
*CodeGenFunction::EmitSVETupleSetOrGet(const SVETypeFlags
&TypeFlags
,
9536 ArrayRef
<Value
*> Ops
) {
9537 assert((TypeFlags
.isTupleSet() || TypeFlags
.isTupleGet()) &&
9538 "Expects TypleFlag isTupleSet or TypeFlags.isTupleSet()");
9540 unsigned I
= cast
<ConstantInt
>(Ops
[1])->getSExtValue();
9541 auto *SingleVecTy
= dyn_cast
<llvm::ScalableVectorType
>(
9542 TypeFlags
.isTupleSet() ? Ops
[2]->getType() : Ty
);
9543 Value
*Idx
= ConstantInt::get(CGM
.Int64Ty
,
9544 I
* SingleVecTy
->getMinNumElements());
9546 if (TypeFlags
.isTupleSet())
9547 return Builder
.CreateInsertVector(Ty
, Ops
[0], Ops
[2], Idx
);
9548 return Builder
.CreateExtractVector(Ty
, Ops
[0], Idx
);
9551 Value
*CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags
&TypeFlags
,
9553 ArrayRef
<Value
*> Ops
) {
9554 assert(TypeFlags
.isTupleCreate() && "Expects TypleFlag isTupleCreate");
9556 auto *SrcTy
= dyn_cast
<llvm::ScalableVectorType
>(Ops
[0]->getType());
9557 unsigned MinElts
= SrcTy
->getMinNumElements();
9558 Value
*Call
= llvm::PoisonValue::get(Ty
);
9559 for (unsigned I
= 0; I
< Ops
.size(); I
++) {
9560 Value
*Idx
= ConstantInt::get(CGM
.Int64Ty
, I
* MinElts
);
9561 Call
= Builder
.CreateInsertVector(Ty
, Call
, Ops
[I
], Idx
);
9567 Value
*CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID
,
9568 const CallExpr
*E
) {
9569 // Find out if any arguments are required to be integer constant expressions.
9570 unsigned ICEArguments
= 0;
9571 ASTContext::GetBuiltinTypeError Error
;
9572 getContext().GetBuiltinType(BuiltinID
, Error
, &ICEArguments
);
9573 assert(Error
== ASTContext::GE_None
&& "Should not codegen an error");
9575 llvm::Type
*Ty
= ConvertType(E
->getType());
9576 if (BuiltinID
>= SVE::BI__builtin_sve_reinterpret_s8_s8
&&
9577 BuiltinID
<= SVE::BI__builtin_sve_reinterpret_f64_f64
) {
9578 Value
*Val
= EmitScalarExpr(E
->getArg(0));
9579 return EmitSVEReinterpret(Val
, Ty
);
9582 llvm::SmallVector
<Value
*, 4> Ops
;
9583 for (unsigned i
= 0, e
= E
->getNumArgs(); i
!= e
; i
++) {
9584 if ((ICEArguments
& (1 << i
)) == 0)
9585 Ops
.push_back(EmitScalarExpr(E
->getArg(i
)));
9587 // If this is required to be a constant, constant fold it so that we know
9588 // that the generated intrinsic gets a ConstantInt.
9589 std::optional
<llvm::APSInt
> Result
=
9590 E
->getArg(i
)->getIntegerConstantExpr(getContext());
9591 assert(Result
&& "Expected argument to be a constant");
9593 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
9594 // truncate because the immediate has been range checked and no valid
9595 // immediate requires more than a handful of bits.
9596 *Result
= Result
->extOrTrunc(32);
9597 Ops
.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result
));
9601 auto *Builtin
= findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap
, BuiltinID
,
9602 AArch64SVEIntrinsicsProvenSorted
);
9603 SVETypeFlags
TypeFlags(Builtin
->TypeModifier
);
9604 if (TypeFlags
.isLoad())
9605 return EmitSVEMaskedLoad(E
, Ty
, Ops
, Builtin
->LLVMIntrinsic
,
9606 TypeFlags
.isZExtReturn());
9607 else if (TypeFlags
.isStore())
9608 return EmitSVEMaskedStore(E
, Ops
, Builtin
->LLVMIntrinsic
);
9609 else if (TypeFlags
.isGatherLoad())
9610 return EmitSVEGatherLoad(TypeFlags
, Ops
, Builtin
->LLVMIntrinsic
);
9611 else if (TypeFlags
.isScatterStore())
9612 return EmitSVEScatterStore(TypeFlags
, Ops
, Builtin
->LLVMIntrinsic
);
9613 else if (TypeFlags
.isPrefetch())
9614 return EmitSVEPrefetchLoad(TypeFlags
, Ops
, Builtin
->LLVMIntrinsic
);
9615 else if (TypeFlags
.isGatherPrefetch())
9616 return EmitSVEGatherPrefetch(TypeFlags
, Ops
, Builtin
->LLVMIntrinsic
);
9617 else if (TypeFlags
.isStructLoad())
9618 return EmitSVEStructLoad(TypeFlags
, Ops
, Builtin
->LLVMIntrinsic
);
9619 else if (TypeFlags
.isStructStore())
9620 return EmitSVEStructStore(TypeFlags
, Ops
, Builtin
->LLVMIntrinsic
);
9621 else if (TypeFlags
.isTupleSet() || TypeFlags
.isTupleGet())
9622 return EmitSVETupleSetOrGet(TypeFlags
, Ty
, Ops
);
9623 else if (TypeFlags
.isTupleCreate())
9624 return EmitSVETupleCreate(TypeFlags
, Ty
, Ops
);
9625 else if (TypeFlags
.isUndef())
9626 return UndefValue::get(Ty
);
9627 else if (Builtin
->LLVMIntrinsic
!= 0) {
9628 if (TypeFlags
.getMergeType() == SVETypeFlags::MergeZeroExp
)
9629 InsertExplicitZeroOperand(Builder
, Ty
, Ops
);
9631 if (TypeFlags
.getMergeType() == SVETypeFlags::MergeAnyExp
)
9632 InsertExplicitUndefOperand(Builder
, Ty
, Ops
);
9634 // Some ACLE builtins leave out the argument to specify the predicate
9635 // pattern, which is expected to be expanded to an SV_ALL pattern.
9636 if (TypeFlags
.isAppendSVALL())
9637 Ops
.push_back(Builder
.getInt32(/*SV_ALL*/ 31));
9638 if (TypeFlags
.isInsertOp1SVALL())
9639 Ops
.insert(&Ops
[1], Builder
.getInt32(/*SV_ALL*/ 31));
9641 // Predicates must match the main datatype.
9642 for (unsigned i
= 0, e
= Ops
.size(); i
!= e
; ++i
)
9643 if (auto PredTy
= dyn_cast
<llvm::VectorType
>(Ops
[i
]->getType()))
9644 if (PredTy
->getElementType()->isIntegerTy(1))
9645 Ops
[i
] = EmitSVEPredicateCast(Ops
[i
], getSVEType(TypeFlags
));
9647 // Splat scalar operand to vector (intrinsics with _n infix)
9648 if (TypeFlags
.hasSplatOperand()) {
9649 unsigned OpNo
= TypeFlags
.getSplatOperand();
9650 Ops
[OpNo
] = EmitSVEDupX(Ops
[OpNo
]);
9653 if (TypeFlags
.isReverseCompare())
9654 std::swap(Ops
[1], Ops
[2]);
9655 else if (TypeFlags
.isReverseUSDOT())
9656 std::swap(Ops
[1], Ops
[2]);
9657 else if (TypeFlags
.isReverseMergeAnyBinOp() &&
9658 TypeFlags
.getMergeType() == SVETypeFlags::MergeAny
)
9659 std::swap(Ops
[1], Ops
[2]);
9660 else if (TypeFlags
.isReverseMergeAnyAccOp() &&
9661 TypeFlags
.getMergeType() == SVETypeFlags::MergeAny
)
9662 std::swap(Ops
[1], Ops
[3]);
9664 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
9665 if (TypeFlags
.getMergeType() == SVETypeFlags::MergeZero
) {
9666 llvm::Type
*OpndTy
= Ops
[1]->getType();
9667 auto *SplatZero
= Constant::getNullValue(OpndTy
);
9668 Ops
[1] = Builder
.CreateSelect(Ops
[0], Ops
[1], SplatZero
);
9671 Function
*F
= CGM
.getIntrinsic(Builtin
->LLVMIntrinsic
,
9672 getSVEOverloadTypes(TypeFlags
, Ty
, Ops
));
9673 Value
*Call
= Builder
.CreateCall(F
, Ops
);
9675 // Predicate results must be converted to svbool_t.
9676 if (auto PredTy
= dyn_cast
<llvm::VectorType
>(Call
->getType()))
9677 if (PredTy
->getScalarType()->isIntegerTy(1))
9678 Call
= EmitSVEPredicateCast(Call
, cast
<llvm::ScalableVectorType
>(Ty
));
9683 switch (BuiltinID
) {
9687 case SVE::BI__builtin_sve_svmov_b_z
: {
9688 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
9689 SVETypeFlags
TypeFlags(Builtin
->TypeModifier
);
9690 llvm::Type
* OverloadedTy
= getSVEType(TypeFlags
);
9691 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_sve_and_z
, OverloadedTy
);
9692 return Builder
.CreateCall(F
, {Ops
[0], Ops
[1], Ops
[1]});
9695 case SVE::BI__builtin_sve_svnot_b_z
: {
9696 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
9697 SVETypeFlags
TypeFlags(Builtin
->TypeModifier
);
9698 llvm::Type
* OverloadedTy
= getSVEType(TypeFlags
);
9699 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_sve_eor_z
, OverloadedTy
);
9700 return Builder
.CreateCall(F
, {Ops
[0], Ops
[1], Ops
[0]});
9703 case SVE::BI__builtin_sve_svmovlb_u16
:
9704 case SVE::BI__builtin_sve_svmovlb_u32
:
9705 case SVE::BI__builtin_sve_svmovlb_u64
:
9706 return EmitSVEMovl(TypeFlags
, Ops
, Intrinsic::aarch64_sve_ushllb
);
9708 case SVE::BI__builtin_sve_svmovlb_s16
:
9709 case SVE::BI__builtin_sve_svmovlb_s32
:
9710 case SVE::BI__builtin_sve_svmovlb_s64
:
9711 return EmitSVEMovl(TypeFlags
, Ops
, Intrinsic::aarch64_sve_sshllb
);
9713 case SVE::BI__builtin_sve_svmovlt_u16
:
9714 case SVE::BI__builtin_sve_svmovlt_u32
:
9715 case SVE::BI__builtin_sve_svmovlt_u64
:
9716 return EmitSVEMovl(TypeFlags
, Ops
, Intrinsic::aarch64_sve_ushllt
);
9718 case SVE::BI__builtin_sve_svmovlt_s16
:
9719 case SVE::BI__builtin_sve_svmovlt_s32
:
9720 case SVE::BI__builtin_sve_svmovlt_s64
:
9721 return EmitSVEMovl(TypeFlags
, Ops
, Intrinsic::aarch64_sve_sshllt
);
9723 case SVE::BI__builtin_sve_svpmullt_u16
:
9724 case SVE::BI__builtin_sve_svpmullt_u64
:
9725 case SVE::BI__builtin_sve_svpmullt_n_u16
:
9726 case SVE::BI__builtin_sve_svpmullt_n_u64
:
9727 return EmitSVEPMull(TypeFlags
, Ops
, Intrinsic::aarch64_sve_pmullt_pair
);
9729 case SVE::BI__builtin_sve_svpmullb_u16
:
9730 case SVE::BI__builtin_sve_svpmullb_u64
:
9731 case SVE::BI__builtin_sve_svpmullb_n_u16
:
9732 case SVE::BI__builtin_sve_svpmullb_n_u64
:
9733 return EmitSVEPMull(TypeFlags
, Ops
, Intrinsic::aarch64_sve_pmullb_pair
);
9735 case SVE::BI__builtin_sve_svdup_n_b8
:
9736 case SVE::BI__builtin_sve_svdup_n_b16
:
9737 case SVE::BI__builtin_sve_svdup_n_b32
:
9738 case SVE::BI__builtin_sve_svdup_n_b64
: {
9740 Builder
.CreateICmpNE(Ops
[0], Constant::getNullValue(Ops
[0]->getType()));
9741 llvm::ScalableVectorType
*OverloadedTy
= getSVEType(TypeFlags
);
9742 Value
*Dup
= EmitSVEDupX(CmpNE
, OverloadedTy
);
9743 return EmitSVEPredicateCast(Dup
, cast
<llvm::ScalableVectorType
>(Ty
));
9746 case SVE::BI__builtin_sve_svdupq_n_b8
:
9747 case SVE::BI__builtin_sve_svdupq_n_b16
:
9748 case SVE::BI__builtin_sve_svdupq_n_b32
:
9749 case SVE::BI__builtin_sve_svdupq_n_b64
:
9750 case SVE::BI__builtin_sve_svdupq_n_u8
:
9751 case SVE::BI__builtin_sve_svdupq_n_s8
:
9752 case SVE::BI__builtin_sve_svdupq_n_u64
:
9753 case SVE::BI__builtin_sve_svdupq_n_f64
:
9754 case SVE::BI__builtin_sve_svdupq_n_s64
:
9755 case SVE::BI__builtin_sve_svdupq_n_u16
:
9756 case SVE::BI__builtin_sve_svdupq_n_f16
:
9757 case SVE::BI__builtin_sve_svdupq_n_bf16
:
9758 case SVE::BI__builtin_sve_svdupq_n_s16
:
9759 case SVE::BI__builtin_sve_svdupq_n_u32
:
9760 case SVE::BI__builtin_sve_svdupq_n_f32
:
9761 case SVE::BI__builtin_sve_svdupq_n_s32
: {
9762 // These builtins are implemented by storing each element to an array and using
9763 // ld1rq to materialize a vector.
9764 unsigned NumOpnds
= Ops
.size();
9767 cast
<llvm::VectorType
>(Ty
)->getElementType()->isIntegerTy(1);
9769 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
9770 // so that the compare can use the width that is natural for the expected
9771 // number of predicate lanes.
9772 llvm::Type
*EltTy
= Ops
[0]->getType();
9774 EltTy
= IntegerType::get(getLLVMContext(), SVEBitsPerBlock
/ NumOpnds
);
9776 SmallVector
<llvm::Value
*, 16> VecOps
;
9777 for (unsigned I
= 0; I
< NumOpnds
; ++I
)
9778 VecOps
.push_back(Builder
.CreateZExt(Ops
[I
], EltTy
));
9779 Value
*Vec
= BuildVector(VecOps
);
9781 llvm::Type
*OverloadedTy
= getSVEVectorForElementType(EltTy
);
9782 Value
*InsertSubVec
= Builder
.CreateInsertVector(
9783 OverloadedTy
, PoisonValue::get(OverloadedTy
), Vec
, Builder
.getInt64(0));
9786 CGM
.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane
, OverloadedTy
);
9788 Builder
.CreateCall(F
, {InsertSubVec
, Builder
.getInt64(0)});
9793 SVETypeFlags
TypeFlags(Builtin
->TypeModifier
);
9794 Value
*Pred
= EmitSVEAllTruePred(TypeFlags
);
9796 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
9797 F
= CGM
.getIntrinsic(NumOpnds
== 2 ? Intrinsic::aarch64_sve_cmpne
9798 : Intrinsic::aarch64_sve_cmpne_wide
,
9800 Value
*Call
= Builder
.CreateCall(
9801 F
, {Pred
, DupQLane
, EmitSVEDupX(Builder
.getInt64(0))});
9802 return EmitSVEPredicateCast(Call
, cast
<llvm::ScalableVectorType
>(Ty
));
9805 case SVE::BI__builtin_sve_svpfalse_b
:
9806 return ConstantInt::getFalse(Ty
);
9808 case SVE::BI__builtin_sve_svlen_bf16
:
9809 case SVE::BI__builtin_sve_svlen_f16
:
9810 case SVE::BI__builtin_sve_svlen_f32
:
9811 case SVE::BI__builtin_sve_svlen_f64
:
9812 case SVE::BI__builtin_sve_svlen_s8
:
9813 case SVE::BI__builtin_sve_svlen_s16
:
9814 case SVE::BI__builtin_sve_svlen_s32
:
9815 case SVE::BI__builtin_sve_svlen_s64
:
9816 case SVE::BI__builtin_sve_svlen_u8
:
9817 case SVE::BI__builtin_sve_svlen_u16
:
9818 case SVE::BI__builtin_sve_svlen_u32
:
9819 case SVE::BI__builtin_sve_svlen_u64
: {
9820 SVETypeFlags
TF(Builtin
->TypeModifier
);
9821 auto VTy
= cast
<llvm::VectorType
>(getSVEType(TF
));
9823 llvm::ConstantInt::get(Ty
, VTy
->getElementCount().getKnownMinValue());
9825 Function
*F
= CGM
.getIntrinsic(Intrinsic::vscale
, Ty
);
9826 return Builder
.CreateMul(NumEls
, Builder
.CreateCall(F
));
9829 case SVE::BI__builtin_sve_svtbl2_u8
:
9830 case SVE::BI__builtin_sve_svtbl2_s8
:
9831 case SVE::BI__builtin_sve_svtbl2_u16
:
9832 case SVE::BI__builtin_sve_svtbl2_s16
:
9833 case SVE::BI__builtin_sve_svtbl2_u32
:
9834 case SVE::BI__builtin_sve_svtbl2_s32
:
9835 case SVE::BI__builtin_sve_svtbl2_u64
:
9836 case SVE::BI__builtin_sve_svtbl2_s64
:
9837 case SVE::BI__builtin_sve_svtbl2_f16
:
9838 case SVE::BI__builtin_sve_svtbl2_bf16
:
9839 case SVE::BI__builtin_sve_svtbl2_f32
:
9840 case SVE::BI__builtin_sve_svtbl2_f64
: {
9841 SVETypeFlags
TF(Builtin
->TypeModifier
);
9842 auto VTy
= cast
<llvm::ScalableVectorType
>(getSVEType(TF
));
9843 Value
*V0
= Builder
.CreateExtractVector(VTy
, Ops
[0],
9844 ConstantInt::get(CGM
.Int64Ty
, 0));
9845 unsigned MinElts
= VTy
->getMinNumElements();
9846 Value
*V1
= Builder
.CreateExtractVector(
9847 VTy
, Ops
[0], ConstantInt::get(CGM
.Int64Ty
, MinElts
));
9848 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_sve_tbl2
, VTy
);
9849 return Builder
.CreateCall(F
, {V0
, V1
, Ops
[1]});
9852 case SVE::BI__builtin_sve_svset_neonq_s8
:
9853 case SVE::BI__builtin_sve_svset_neonq_s16
:
9854 case SVE::BI__builtin_sve_svset_neonq_s32
:
9855 case SVE::BI__builtin_sve_svset_neonq_s64
:
9856 case SVE::BI__builtin_sve_svset_neonq_u8
:
9857 case SVE::BI__builtin_sve_svset_neonq_u16
:
9858 case SVE::BI__builtin_sve_svset_neonq_u32
:
9859 case SVE::BI__builtin_sve_svset_neonq_u64
:
9860 case SVE::BI__builtin_sve_svset_neonq_f16
:
9861 case SVE::BI__builtin_sve_svset_neonq_f32
:
9862 case SVE::BI__builtin_sve_svset_neonq_f64
:
9863 case SVE::BI__builtin_sve_svset_neonq_bf16
: {
9864 return Builder
.CreateInsertVector(Ty
, Ops
[0], Ops
[1], Builder
.getInt64(0));
9867 case SVE::BI__builtin_sve_svget_neonq_s8
:
9868 case SVE::BI__builtin_sve_svget_neonq_s16
:
9869 case SVE::BI__builtin_sve_svget_neonq_s32
:
9870 case SVE::BI__builtin_sve_svget_neonq_s64
:
9871 case SVE::BI__builtin_sve_svget_neonq_u8
:
9872 case SVE::BI__builtin_sve_svget_neonq_u16
:
9873 case SVE::BI__builtin_sve_svget_neonq_u32
:
9874 case SVE::BI__builtin_sve_svget_neonq_u64
:
9875 case SVE::BI__builtin_sve_svget_neonq_f16
:
9876 case SVE::BI__builtin_sve_svget_neonq_f32
:
9877 case SVE::BI__builtin_sve_svget_neonq_f64
:
9878 case SVE::BI__builtin_sve_svget_neonq_bf16
: {
9879 return Builder
.CreateExtractVector(Ty
, Ops
[0], Builder
.getInt64(0));
9882 case SVE::BI__builtin_sve_svdup_neonq_s8
:
9883 case SVE::BI__builtin_sve_svdup_neonq_s16
:
9884 case SVE::BI__builtin_sve_svdup_neonq_s32
:
9885 case SVE::BI__builtin_sve_svdup_neonq_s64
:
9886 case SVE::BI__builtin_sve_svdup_neonq_u8
:
9887 case SVE::BI__builtin_sve_svdup_neonq_u16
:
9888 case SVE::BI__builtin_sve_svdup_neonq_u32
:
9889 case SVE::BI__builtin_sve_svdup_neonq_u64
:
9890 case SVE::BI__builtin_sve_svdup_neonq_f16
:
9891 case SVE::BI__builtin_sve_svdup_neonq_f32
:
9892 case SVE::BI__builtin_sve_svdup_neonq_f64
:
9893 case SVE::BI__builtin_sve_svdup_neonq_bf16
: {
9894 Value
*Insert
= Builder
.CreateInsertVector(Ty
, PoisonValue::get(Ty
), Ops
[0],
9895 Builder
.getInt64(0));
9896 return Builder
.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane
, {Ty
},
9897 {Insert
, Builder
.getInt64(0)});
9901 /// Should not happen
9905 Value
*CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID
,
9906 const CallExpr
*E
) {
9907 // Find out if any arguments are required to be integer constant expressions.
9908 unsigned ICEArguments
= 0;
9909 ASTContext::GetBuiltinTypeError Error
;
9910 getContext().GetBuiltinType(BuiltinID
, Error
, &ICEArguments
);
9911 assert(Error
== ASTContext::GE_None
&& "Should not codegen an error");
9913 llvm::SmallVector
<Value
*, 4> Ops
;
9914 for (unsigned i
= 0, e
= E
->getNumArgs(); i
!= e
; i
++) {
9915 if ((ICEArguments
& (1 << i
)) == 0)
9916 Ops
.push_back(EmitScalarExpr(E
->getArg(i
)));
9918 // If this is required to be a constant, constant fold it so that we know
9919 // that the generated intrinsic gets a ConstantInt.
9920 std::optional
<llvm::APSInt
> Result
=
9921 E
->getArg(i
)->getIntegerConstantExpr(getContext());
9922 assert(Result
&& "Expected argument to be a constant");
9924 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
9925 // truncate because the immediate has been range checked and no valid
9926 // immediate requires more than a handful of bits.
9927 *Result
= Result
->extOrTrunc(32);
9928 Ops
.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result
));
9932 auto *Builtin
= findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap
, BuiltinID
,
9933 AArch64SMEIntrinsicsProvenSorted
);
9934 SVETypeFlags
TypeFlags(Builtin
->TypeModifier
);
9935 if (TypeFlags
.isLoad() || TypeFlags
.isStore())
9936 return EmitSMELd1St1(TypeFlags
, Ops
, Builtin
->LLVMIntrinsic
);
9938 /// Should not happen
9942 Value
*CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID
,
9944 llvm::Triple::ArchType Arch
) {
9945 if (BuiltinID
>= clang::AArch64::FirstSVEBuiltin
&&
9946 BuiltinID
<= clang::AArch64::LastSVEBuiltin
)
9947 return EmitAArch64SVEBuiltinExpr(BuiltinID
, E
);
9949 if (BuiltinID
>= clang::AArch64::FirstSMEBuiltin
&&
9950 BuiltinID
<= clang::AArch64::LastSMEBuiltin
)
9951 return EmitAArch64SMEBuiltinExpr(BuiltinID
, E
);
9953 unsigned HintID
= static_cast<unsigned>(-1);
9954 switch (BuiltinID
) {
9956 case clang::AArch64::BI__builtin_arm_nop
:
9959 case clang::AArch64::BI__builtin_arm_yield
:
9960 case clang::AArch64::BI__yield
:
9963 case clang::AArch64::BI__builtin_arm_wfe
:
9964 case clang::AArch64::BI__wfe
:
9967 case clang::AArch64::BI__builtin_arm_wfi
:
9968 case clang::AArch64::BI__wfi
:
9971 case clang::AArch64::BI__builtin_arm_sev
:
9972 case clang::AArch64::BI__sev
:
9975 case clang::AArch64::BI__builtin_arm_sevl
:
9976 case clang::AArch64::BI__sevl
:
9981 if (HintID
!= static_cast<unsigned>(-1)) {
9982 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_hint
);
9983 return Builder
.CreateCall(F
, llvm::ConstantInt::get(Int32Ty
, HintID
));
9986 if (BuiltinID
== clang::AArch64::BI__builtin_arm_rbit
) {
9987 assert((getContext().getTypeSize(E
->getType()) == 32) &&
9988 "rbit of unusual size!");
9989 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
9990 return Builder
.CreateCall(
9991 CGM
.getIntrinsic(Intrinsic::bitreverse
, Arg
->getType()), Arg
, "rbit");
9993 if (BuiltinID
== clang::AArch64::BI__builtin_arm_rbit64
) {
9994 assert((getContext().getTypeSize(E
->getType()) == 64) &&
9995 "rbit of unusual size!");
9996 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
9997 return Builder
.CreateCall(
9998 CGM
.getIntrinsic(Intrinsic::bitreverse
, Arg
->getType()), Arg
, "rbit");
10001 if (BuiltinID
== clang::AArch64::BI__builtin_arm_cls
) {
10002 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
10003 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::aarch64_cls
), Arg
,
10006 if (BuiltinID
== clang::AArch64::BI__builtin_arm_cls64
) {
10007 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
10008 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::aarch64_cls64
), Arg
,
10012 if (BuiltinID
== clang::AArch64::BI__builtin_arm_rint32zf
||
10013 BuiltinID
== clang::AArch64::BI__builtin_arm_rint32z
) {
10014 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
10015 llvm::Type
*Ty
= Arg
->getType();
10016 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::aarch64_frint32z
, Ty
),
10020 if (BuiltinID
== clang::AArch64::BI__builtin_arm_rint64zf
||
10021 BuiltinID
== clang::AArch64::BI__builtin_arm_rint64z
) {
10022 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
10023 llvm::Type
*Ty
= Arg
->getType();
10024 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::aarch64_frint64z
, Ty
),
10028 if (BuiltinID
== clang::AArch64::BI__builtin_arm_rint32xf
||
10029 BuiltinID
== clang::AArch64::BI__builtin_arm_rint32x
) {
10030 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
10031 llvm::Type
*Ty
= Arg
->getType();
10032 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::aarch64_frint32x
, Ty
),
10036 if (BuiltinID
== clang::AArch64::BI__builtin_arm_rint64xf
||
10037 BuiltinID
== clang::AArch64::BI__builtin_arm_rint64x
) {
10038 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
10039 llvm::Type
*Ty
= Arg
->getType();
10040 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::aarch64_frint64x
, Ty
),
10044 if (BuiltinID
== clang::AArch64::BI__builtin_arm_jcvt
) {
10045 assert((getContext().getTypeSize(E
->getType()) == 32) &&
10046 "__jcvt of unusual size!");
10047 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
10048 return Builder
.CreateCall(
10049 CGM
.getIntrinsic(Intrinsic::aarch64_fjcvtzs
), Arg
);
10052 if (BuiltinID
== clang::AArch64::BI__builtin_arm_ld64b
||
10053 BuiltinID
== clang::AArch64::BI__builtin_arm_st64b
||
10054 BuiltinID
== clang::AArch64::BI__builtin_arm_st64bv
||
10055 BuiltinID
== clang::AArch64::BI__builtin_arm_st64bv0
) {
10056 llvm::Value
*MemAddr
= EmitScalarExpr(E
->getArg(0));
10057 llvm::Value
*ValPtr
= EmitScalarExpr(E
->getArg(1));
10059 if (BuiltinID
== clang::AArch64::BI__builtin_arm_ld64b
) {
10060 // Load from the address via an LLVM intrinsic, receiving a
10061 // tuple of 8 i64 words, and store each one to ValPtr.
10062 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_ld64b
);
10063 llvm::Value
*Val
= Builder
.CreateCall(F
, MemAddr
);
10064 llvm::Value
*ToRet
;
10065 for (size_t i
= 0; i
< 8; i
++) {
10066 llvm::Value
*ValOffsetPtr
=
10067 Builder
.CreateGEP(Int64Ty
, ValPtr
, Builder
.getInt32(i
));
10069 Address(ValOffsetPtr
, Int64Ty
, CharUnits::fromQuantity(8));
10070 ToRet
= Builder
.CreateStore(Builder
.CreateExtractValue(Val
, i
), Addr
);
10074 // Load 8 i64 words from ValPtr, and store them to the address
10075 // via an LLVM intrinsic.
10076 SmallVector
<llvm::Value
*, 9> Args
;
10077 Args
.push_back(MemAddr
);
10078 for (size_t i
= 0; i
< 8; i
++) {
10079 llvm::Value
*ValOffsetPtr
=
10080 Builder
.CreateGEP(Int64Ty
, ValPtr
, Builder
.getInt32(i
));
10082 Address(ValOffsetPtr
, Int64Ty
, CharUnits::fromQuantity(8));
10083 Args
.push_back(Builder
.CreateLoad(Addr
));
10086 auto Intr
= (BuiltinID
== clang::AArch64::BI__builtin_arm_st64b
10087 ? Intrinsic::aarch64_st64b
10088 : BuiltinID
== clang::AArch64::BI__builtin_arm_st64bv
10089 ? Intrinsic::aarch64_st64bv
10090 : Intrinsic::aarch64_st64bv0
);
10091 Function
*F
= CGM
.getIntrinsic(Intr
);
10092 return Builder
.CreateCall(F
, Args
);
10096 if (BuiltinID
== clang::AArch64::BI__builtin_arm_rndr
||
10097 BuiltinID
== clang::AArch64::BI__builtin_arm_rndrrs
) {
10099 auto Intr
= (BuiltinID
== clang::AArch64::BI__builtin_arm_rndr
10100 ? Intrinsic::aarch64_rndr
10101 : Intrinsic::aarch64_rndrrs
);
10102 Function
*F
= CGM
.getIntrinsic(Intr
);
10103 llvm::Value
*Val
= Builder
.CreateCall(F
);
10104 Value
*RandomValue
= Builder
.CreateExtractValue(Val
, 0);
10105 Value
*Status
= Builder
.CreateExtractValue(Val
, 1);
10107 Address MemAddress
= EmitPointerWithAlignment(E
->getArg(0));
10108 Builder
.CreateStore(RandomValue
, MemAddress
);
10109 Status
= Builder
.CreateZExt(Status
, Int32Ty
);
10113 if (BuiltinID
== clang::AArch64::BI__clear_cache
) {
10114 assert(E
->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
10115 const FunctionDecl
*FD
= E
->getDirectCallee();
10117 for (unsigned i
= 0; i
< 2; i
++)
10118 Ops
[i
] = EmitScalarExpr(E
->getArg(i
));
10119 llvm::Type
*Ty
= CGM
.getTypes().ConvertType(FD
->getType());
10120 llvm::FunctionType
*FTy
= cast
<llvm::FunctionType
>(Ty
);
10121 StringRef Name
= FD
->getName();
10122 return EmitNounwindRuntimeCall(CGM
.CreateRuntimeFunction(FTy
, Name
), Ops
);
10125 if ((BuiltinID
== clang::AArch64::BI__builtin_arm_ldrex
||
10126 BuiltinID
== clang::AArch64::BI__builtin_arm_ldaex
) &&
10127 getContext().getTypeSize(E
->getType()) == 128) {
10129 CGM
.getIntrinsic(BuiltinID
== clang::AArch64::BI__builtin_arm_ldaex
10130 ? Intrinsic::aarch64_ldaxp
10131 : Intrinsic::aarch64_ldxp
);
10133 Value
*LdPtr
= EmitScalarExpr(E
->getArg(0));
10134 Value
*Val
= Builder
.CreateCall(F
, Builder
.CreateBitCast(LdPtr
, Int8PtrTy
),
10137 Value
*Val0
= Builder
.CreateExtractValue(Val
, 1);
10138 Value
*Val1
= Builder
.CreateExtractValue(Val
, 0);
10139 llvm::Type
*Int128Ty
= llvm::IntegerType::get(getLLVMContext(), 128);
10140 Val0
= Builder
.CreateZExt(Val0
, Int128Ty
);
10141 Val1
= Builder
.CreateZExt(Val1
, Int128Ty
);
10143 Value
*ShiftCst
= llvm::ConstantInt::get(Int128Ty
, 64);
10144 Val
= Builder
.CreateShl(Val0
, ShiftCst
, "shl", true /* nuw */);
10145 Val
= Builder
.CreateOr(Val
, Val1
);
10146 return Builder
.CreateBitCast(Val
, ConvertType(E
->getType()));
10147 } else if (BuiltinID
== clang::AArch64::BI__builtin_arm_ldrex
||
10148 BuiltinID
== clang::AArch64::BI__builtin_arm_ldaex
) {
10149 Value
*LoadAddr
= EmitScalarExpr(E
->getArg(0));
10151 QualType Ty
= E
->getType();
10152 llvm::Type
*RealResTy
= ConvertType(Ty
);
10153 llvm::Type
*IntTy
=
10154 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty
));
10155 llvm::Type
*PtrTy
= IntTy
->getPointerTo();
10156 LoadAddr
= Builder
.CreateBitCast(LoadAddr
, PtrTy
);
10159 CGM
.getIntrinsic(BuiltinID
== clang::AArch64::BI__builtin_arm_ldaex
10160 ? Intrinsic::aarch64_ldaxr
10161 : Intrinsic::aarch64_ldxr
,
10163 CallInst
*Val
= Builder
.CreateCall(F
, LoadAddr
, "ldxr");
10165 0, Attribute::get(getLLVMContext(), Attribute::ElementType
, IntTy
));
10167 if (RealResTy
->isPointerTy())
10168 return Builder
.CreateIntToPtr(Val
, RealResTy
);
10170 llvm::Type
*IntResTy
= llvm::IntegerType::get(
10171 getLLVMContext(), CGM
.getDataLayout().getTypeSizeInBits(RealResTy
));
10172 return Builder
.CreateBitCast(Builder
.CreateTruncOrBitCast(Val
, IntResTy
),
10176 if ((BuiltinID
== clang::AArch64::BI__builtin_arm_strex
||
10177 BuiltinID
== clang::AArch64::BI__builtin_arm_stlex
) &&
10178 getContext().getTypeSize(E
->getArg(0)->getType()) == 128) {
10180 CGM
.getIntrinsic(BuiltinID
== clang::AArch64::BI__builtin_arm_stlex
10181 ? Intrinsic::aarch64_stlxp
10182 : Intrinsic::aarch64_stxp
);
10183 llvm::Type
*STy
= llvm::StructType::get(Int64Ty
, Int64Ty
);
10185 Address Tmp
= CreateMemTemp(E
->getArg(0)->getType());
10186 EmitAnyExprToMem(E
->getArg(0), Tmp
, Qualifiers(), /*init*/ true);
10188 Tmp
= Builder
.CreateElementBitCast(Tmp
, STy
);
10189 llvm::Value
*Val
= Builder
.CreateLoad(Tmp
);
10191 Value
*Arg0
= Builder
.CreateExtractValue(Val
, 0);
10192 Value
*Arg1
= Builder
.CreateExtractValue(Val
, 1);
10193 Value
*StPtr
= Builder
.CreateBitCast(EmitScalarExpr(E
->getArg(1)),
10195 return Builder
.CreateCall(F
, {Arg0
, Arg1
, StPtr
}, "stxp");
10198 if (BuiltinID
== clang::AArch64::BI__builtin_arm_strex
||
10199 BuiltinID
== clang::AArch64::BI__builtin_arm_stlex
) {
10200 Value
*StoreVal
= EmitScalarExpr(E
->getArg(0));
10201 Value
*StoreAddr
= EmitScalarExpr(E
->getArg(1));
10203 QualType Ty
= E
->getArg(0)->getType();
10204 llvm::Type
*StoreTy
= llvm::IntegerType::get(getLLVMContext(),
10205 getContext().getTypeSize(Ty
));
10206 StoreAddr
= Builder
.CreateBitCast(StoreAddr
, StoreTy
->getPointerTo());
10208 if (StoreVal
->getType()->isPointerTy())
10209 StoreVal
= Builder
.CreatePtrToInt(StoreVal
, Int64Ty
);
10211 llvm::Type
*IntTy
= llvm::IntegerType::get(
10213 CGM
.getDataLayout().getTypeSizeInBits(StoreVal
->getType()));
10214 StoreVal
= Builder
.CreateBitCast(StoreVal
, IntTy
);
10215 StoreVal
= Builder
.CreateZExtOrBitCast(StoreVal
, Int64Ty
);
10219 CGM
.getIntrinsic(BuiltinID
== clang::AArch64::BI__builtin_arm_stlex
10220 ? Intrinsic::aarch64_stlxr
10221 : Intrinsic::aarch64_stxr
,
10222 StoreAddr
->getType());
10223 CallInst
*CI
= Builder
.CreateCall(F
, {StoreVal
, StoreAddr
}, "stxr");
10225 1, Attribute::get(getLLVMContext(), Attribute::ElementType
, StoreTy
));
10229 if (BuiltinID
== clang::AArch64::BI__getReg
) {
10230 Expr::EvalResult Result
;
10231 if (!E
->getArg(0)->EvaluateAsInt(Result
, CGM
.getContext()))
10232 llvm_unreachable("Sema will ensure that the parameter is constant");
10234 llvm::APSInt Value
= Result
.Val
.getInt();
10235 LLVMContext
&Context
= CGM
.getLLVMContext();
10236 std::string Reg
= Value
== 31 ? "sp" : "x" + toString(Value
, 10);
10238 llvm::Metadata
*Ops
[] = {llvm::MDString::get(Context
, Reg
)};
10239 llvm::MDNode
*RegName
= llvm::MDNode::get(Context
, Ops
);
10240 llvm::Value
*Metadata
= llvm::MetadataAsValue::get(Context
, RegName
);
10242 llvm::Function
*F
=
10243 CGM
.getIntrinsic(llvm::Intrinsic::read_register
, {Int64Ty
});
10244 return Builder
.CreateCall(F
, Metadata
);
10247 if (BuiltinID
== clang::AArch64::BI__break
) {
10248 Expr::EvalResult Result
;
10249 if (!E
->getArg(0)->EvaluateAsInt(Result
, CGM
.getContext()))
10250 llvm_unreachable("Sema will ensure that the parameter is constant");
10252 llvm::Function
*F
= CGM
.getIntrinsic(llvm::Intrinsic::aarch64_break
);
10253 return Builder
.CreateCall(F
, {EmitScalarExpr(E
->getArg(0))});
10256 if (BuiltinID
== clang::AArch64::BI__builtin_arm_clrex
) {
10257 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_clrex
);
10258 return Builder
.CreateCall(F
);
10261 if (BuiltinID
== clang::AArch64::BI_ReadWriteBarrier
)
10262 return Builder
.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent
,
10263 llvm::SyncScope::SingleThread
);
10266 Intrinsic::ID CRCIntrinsicID
= Intrinsic::not_intrinsic
;
10267 switch (BuiltinID
) {
10268 case clang::AArch64::BI__builtin_arm_crc32b
:
10269 CRCIntrinsicID
= Intrinsic::aarch64_crc32b
; break;
10270 case clang::AArch64::BI__builtin_arm_crc32cb
:
10271 CRCIntrinsicID
= Intrinsic::aarch64_crc32cb
; break;
10272 case clang::AArch64::BI__builtin_arm_crc32h
:
10273 CRCIntrinsicID
= Intrinsic::aarch64_crc32h
; break;
10274 case clang::AArch64::BI__builtin_arm_crc32ch
:
10275 CRCIntrinsicID
= Intrinsic::aarch64_crc32ch
; break;
10276 case clang::AArch64::BI__builtin_arm_crc32w
:
10277 CRCIntrinsicID
= Intrinsic::aarch64_crc32w
; break;
10278 case clang::AArch64::BI__builtin_arm_crc32cw
:
10279 CRCIntrinsicID
= Intrinsic::aarch64_crc32cw
; break;
10280 case clang::AArch64::BI__builtin_arm_crc32d
:
10281 CRCIntrinsicID
= Intrinsic::aarch64_crc32x
; break;
10282 case clang::AArch64::BI__builtin_arm_crc32cd
:
10283 CRCIntrinsicID
= Intrinsic::aarch64_crc32cx
; break;
10286 if (CRCIntrinsicID
!= Intrinsic::not_intrinsic
) {
10287 Value
*Arg0
= EmitScalarExpr(E
->getArg(0));
10288 Value
*Arg1
= EmitScalarExpr(E
->getArg(1));
10289 Function
*F
= CGM
.getIntrinsic(CRCIntrinsicID
);
10291 llvm::Type
*DataTy
= F
->getFunctionType()->getParamType(1);
10292 Arg1
= Builder
.CreateZExtOrBitCast(Arg1
, DataTy
);
10294 return Builder
.CreateCall(F
, {Arg0
, Arg1
});
10297 // Memory Operations (MOPS)
10298 if (BuiltinID
== AArch64::BI__builtin_arm_mops_memset_tag
) {
10299 Value
*Dst
= EmitScalarExpr(E
->getArg(0));
10300 Value
*Val
= EmitScalarExpr(E
->getArg(1));
10301 Value
*Size
= EmitScalarExpr(E
->getArg(2));
10302 Dst
= Builder
.CreatePointerCast(Dst
, Int8PtrTy
);
10303 Val
= Builder
.CreateTrunc(Val
, Int8Ty
);
10304 Size
= Builder
.CreateIntCast(Size
, Int64Ty
, false);
10305 return Builder
.CreateCall(
10306 CGM
.getIntrinsic(Intrinsic::aarch64_mops_memset_tag
), {Dst
, Val
, Size
});
10309 // Memory Tagging Extensions (MTE) Intrinsics
10310 Intrinsic::ID MTEIntrinsicID
= Intrinsic::not_intrinsic
;
10311 switch (BuiltinID
) {
10312 case clang::AArch64::BI__builtin_arm_irg
:
10313 MTEIntrinsicID
= Intrinsic::aarch64_irg
; break;
10314 case clang::AArch64::BI__builtin_arm_addg
:
10315 MTEIntrinsicID
= Intrinsic::aarch64_addg
; break;
10316 case clang::AArch64::BI__builtin_arm_gmi
:
10317 MTEIntrinsicID
= Intrinsic::aarch64_gmi
; break;
10318 case clang::AArch64::BI__builtin_arm_ldg
:
10319 MTEIntrinsicID
= Intrinsic::aarch64_ldg
; break;
10320 case clang::AArch64::BI__builtin_arm_stg
:
10321 MTEIntrinsicID
= Intrinsic::aarch64_stg
; break;
10322 case clang::AArch64::BI__builtin_arm_subp
:
10323 MTEIntrinsicID
= Intrinsic::aarch64_subp
; break;
10326 if (MTEIntrinsicID
!= Intrinsic::not_intrinsic
) {
10327 llvm::Type
*T
= ConvertType(E
->getType());
10329 if (MTEIntrinsicID
== Intrinsic::aarch64_irg
) {
10330 Value
*Pointer
= EmitScalarExpr(E
->getArg(0));
10331 Value
*Mask
= EmitScalarExpr(E
->getArg(1));
10333 Pointer
= Builder
.CreatePointerCast(Pointer
, Int8PtrTy
);
10334 Mask
= Builder
.CreateZExt(Mask
, Int64Ty
);
10335 Value
*RV
= Builder
.CreateCall(
10336 CGM
.getIntrinsic(MTEIntrinsicID
), {Pointer
, Mask
});
10337 return Builder
.CreatePointerCast(RV
, T
);
10339 if (MTEIntrinsicID
== Intrinsic::aarch64_addg
) {
10340 Value
*Pointer
= EmitScalarExpr(E
->getArg(0));
10341 Value
*TagOffset
= EmitScalarExpr(E
->getArg(1));
10343 Pointer
= Builder
.CreatePointerCast(Pointer
, Int8PtrTy
);
10344 TagOffset
= Builder
.CreateZExt(TagOffset
, Int64Ty
);
10345 Value
*RV
= Builder
.CreateCall(
10346 CGM
.getIntrinsic(MTEIntrinsicID
), {Pointer
, TagOffset
});
10347 return Builder
.CreatePointerCast(RV
, T
);
10349 if (MTEIntrinsicID
== Intrinsic::aarch64_gmi
) {
10350 Value
*Pointer
= EmitScalarExpr(E
->getArg(0));
10351 Value
*ExcludedMask
= EmitScalarExpr(E
->getArg(1));
10353 ExcludedMask
= Builder
.CreateZExt(ExcludedMask
, Int64Ty
);
10354 Pointer
= Builder
.CreatePointerCast(Pointer
, Int8PtrTy
);
10355 return Builder
.CreateCall(
10356 CGM
.getIntrinsic(MTEIntrinsicID
), {Pointer
, ExcludedMask
});
10358 // Although it is possible to supply a different return
10359 // address (first arg) to this intrinsic, for now we set
10360 // return address same as input address.
10361 if (MTEIntrinsicID
== Intrinsic::aarch64_ldg
) {
10362 Value
*TagAddress
= EmitScalarExpr(E
->getArg(0));
10363 TagAddress
= Builder
.CreatePointerCast(TagAddress
, Int8PtrTy
);
10364 Value
*RV
= Builder
.CreateCall(
10365 CGM
.getIntrinsic(MTEIntrinsicID
), {TagAddress
, TagAddress
});
10366 return Builder
.CreatePointerCast(RV
, T
);
10368 // Although it is possible to supply a different tag (to set)
10369 // to this intrinsic (as first arg), for now we supply
10370 // the tag that is in input address arg (common use case).
10371 if (MTEIntrinsicID
== Intrinsic::aarch64_stg
) {
10372 Value
*TagAddress
= EmitScalarExpr(E
->getArg(0));
10373 TagAddress
= Builder
.CreatePointerCast(TagAddress
, Int8PtrTy
);
10374 return Builder
.CreateCall(
10375 CGM
.getIntrinsic(MTEIntrinsicID
), {TagAddress
, TagAddress
});
10377 if (MTEIntrinsicID
== Intrinsic::aarch64_subp
) {
10378 Value
*PointerA
= EmitScalarExpr(E
->getArg(0));
10379 Value
*PointerB
= EmitScalarExpr(E
->getArg(1));
10380 PointerA
= Builder
.CreatePointerCast(PointerA
, Int8PtrTy
);
10381 PointerB
= Builder
.CreatePointerCast(PointerB
, Int8PtrTy
);
10382 return Builder
.CreateCall(
10383 CGM
.getIntrinsic(MTEIntrinsicID
), {PointerA
, PointerB
});
10387 if (BuiltinID
== clang::AArch64::BI__builtin_arm_rsr
||
10388 BuiltinID
== clang::AArch64::BI__builtin_arm_rsr64
||
10389 BuiltinID
== clang::AArch64::BI__builtin_arm_rsr128
||
10390 BuiltinID
== clang::AArch64::BI__builtin_arm_rsrp
||
10391 BuiltinID
== clang::AArch64::BI__builtin_arm_wsr
||
10392 BuiltinID
== clang::AArch64::BI__builtin_arm_wsr64
||
10393 BuiltinID
== clang::AArch64::BI__builtin_arm_wsr128
||
10394 BuiltinID
== clang::AArch64::BI__builtin_arm_wsrp
) {
10396 SpecialRegisterAccessKind AccessKind
= Write
;
10397 if (BuiltinID
== clang::AArch64::BI__builtin_arm_rsr
||
10398 BuiltinID
== clang::AArch64::BI__builtin_arm_rsr64
||
10399 BuiltinID
== clang::AArch64::BI__builtin_arm_rsr128
||
10400 BuiltinID
== clang::AArch64::BI__builtin_arm_rsrp
)
10401 AccessKind
= VolatileRead
;
10403 bool IsPointerBuiltin
= BuiltinID
== clang::AArch64::BI__builtin_arm_rsrp
||
10404 BuiltinID
== clang::AArch64::BI__builtin_arm_wsrp
;
10406 bool Is32Bit
= BuiltinID
== clang::AArch64::BI__builtin_arm_rsr
||
10407 BuiltinID
== clang::AArch64::BI__builtin_arm_wsr
;
10409 bool Is128Bit
= BuiltinID
== clang::AArch64::BI__builtin_arm_rsr128
||
10410 BuiltinID
== clang::AArch64::BI__builtin_arm_wsr128
;
10412 llvm::Type
*ValueType
;
10413 llvm::Type
*RegisterType
= Int64Ty
;
10415 ValueType
= Int32Ty
;
10416 } else if (Is128Bit
) {
10417 llvm::Type
*Int128Ty
=
10418 llvm::IntegerType::getInt128Ty(CGM
.getLLVMContext());
10419 ValueType
= Int128Ty
;
10420 RegisterType
= Int128Ty
;
10421 } else if (IsPointerBuiltin
) {
10422 ValueType
= VoidPtrTy
;
10424 ValueType
= Int64Ty
;
10427 return EmitSpecialRegisterBuiltin(*this, E
, RegisterType
, ValueType
,
10431 if (BuiltinID
== clang::AArch64::BI_ReadStatusReg
||
10432 BuiltinID
== clang::AArch64::BI_WriteStatusReg
) {
10433 LLVMContext
&Context
= CGM
.getLLVMContext();
10436 E
->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
10438 std::string SysRegStr
;
10439 llvm::raw_string_ostream(SysRegStr
) <<
10440 ((1 << 1) | ((SysReg
>> 14) & 1)) << ":" <<
10441 ((SysReg
>> 11) & 7) << ":" <<
10442 ((SysReg
>> 7) & 15) << ":" <<
10443 ((SysReg
>> 3) & 15) << ":" <<
10446 llvm::Metadata
*Ops
[] = { llvm::MDString::get(Context
, SysRegStr
) };
10447 llvm::MDNode
*RegName
= llvm::MDNode::get(Context
, Ops
);
10448 llvm::Value
*Metadata
= llvm::MetadataAsValue::get(Context
, RegName
);
10450 llvm::Type
*RegisterType
= Int64Ty
;
10451 llvm::Type
*Types
[] = { RegisterType
};
10453 if (BuiltinID
== clang::AArch64::BI_ReadStatusReg
) {
10454 llvm::Function
*F
= CGM
.getIntrinsic(llvm::Intrinsic::read_register
, Types
);
10456 return Builder
.CreateCall(F
, Metadata
);
10459 llvm::Function
*F
= CGM
.getIntrinsic(llvm::Intrinsic::write_register
, Types
);
10460 llvm::Value
*ArgValue
= EmitScalarExpr(E
->getArg(1));
10462 return Builder
.CreateCall(F
, { Metadata
, ArgValue
});
10465 if (BuiltinID
== clang::AArch64::BI_AddressOfReturnAddress
) {
10466 llvm::Function
*F
=
10467 CGM
.getIntrinsic(Intrinsic::addressofreturnaddress
, AllocaInt8PtrTy
);
10468 return Builder
.CreateCall(F
);
10471 if (BuiltinID
== clang::AArch64::BI__builtin_sponentry
) {
10472 llvm::Function
*F
= CGM
.getIntrinsic(Intrinsic::sponentry
, AllocaInt8PtrTy
);
10473 return Builder
.CreateCall(F
);
10476 if (BuiltinID
== clang::AArch64::BI__mulh
||
10477 BuiltinID
== clang::AArch64::BI__umulh
) {
10478 llvm::Type
*ResType
= ConvertType(E
->getType());
10479 llvm::Type
*Int128Ty
= llvm::IntegerType::get(getLLVMContext(), 128);
10481 bool IsSigned
= BuiltinID
== clang::AArch64::BI__mulh
;
10483 Builder
.CreateIntCast(EmitScalarExpr(E
->getArg(0)), Int128Ty
, IsSigned
);
10485 Builder
.CreateIntCast(EmitScalarExpr(E
->getArg(1)), Int128Ty
, IsSigned
);
10487 Value
*MulResult
, *HigherBits
;
10489 MulResult
= Builder
.CreateNSWMul(LHS
, RHS
);
10490 HigherBits
= Builder
.CreateAShr(MulResult
, 64);
10492 MulResult
= Builder
.CreateNUWMul(LHS
, RHS
);
10493 HigherBits
= Builder
.CreateLShr(MulResult
, 64);
10495 HigherBits
= Builder
.CreateIntCast(HigherBits
, ResType
, IsSigned
);
10500 if (BuiltinID
== AArch64::BI__writex18byte
||
10501 BuiltinID
== AArch64::BI__writex18word
||
10502 BuiltinID
== AArch64::BI__writex18dword
||
10503 BuiltinID
== AArch64::BI__writex18qword
) {
10504 llvm::Type
*IntTy
= ConvertType(E
->getArg(1)->getType());
10507 LLVMContext
&Context
= CGM
.getLLVMContext();
10508 llvm::Metadata
*Ops
[] = {llvm::MDString::get(Context
, "x18")};
10509 llvm::MDNode
*RegName
= llvm::MDNode::get(Context
, Ops
);
10510 llvm::Value
*Metadata
= llvm::MetadataAsValue::get(Context
, RegName
);
10511 llvm::Function
*F
=
10512 CGM
.getIntrinsic(llvm::Intrinsic::read_register
, {Int64Ty
});
10513 llvm::Value
*X18
= Builder
.CreateCall(F
, Metadata
);
10514 X18
= Builder
.CreateIntToPtr(X18
, llvm::PointerType::get(Int8Ty
, 0));
10516 // Store val at x18 + offset
10517 Value
*Offset
= Builder
.CreateZExt(EmitScalarExpr(E
->getArg(0)), Int64Ty
);
10518 Value
*Ptr
= Builder
.CreateGEP(Int8Ty
, X18
, Offset
);
10519 Ptr
= Builder
.CreatePointerCast(Ptr
, llvm::PointerType::get(IntTy
, 0));
10520 Value
*Val
= EmitScalarExpr(E
->getArg(1));
10521 StoreInst
*Store
= Builder
.CreateAlignedStore(Val
, Ptr
, CharUnits::One());
10525 if (BuiltinID
== AArch64::BI__readx18byte
||
10526 BuiltinID
== AArch64::BI__readx18word
||
10527 BuiltinID
== AArch64::BI__readx18dword
||
10528 BuiltinID
== AArch64::BI__readx18qword
) {
10529 llvm::Type
*IntTy
= ConvertType(E
->getType());
10532 LLVMContext
&Context
= CGM
.getLLVMContext();
10533 llvm::Metadata
*Ops
[] = {llvm::MDString::get(Context
, "x18")};
10534 llvm::MDNode
*RegName
= llvm::MDNode::get(Context
, Ops
);
10535 llvm::Value
*Metadata
= llvm::MetadataAsValue::get(Context
, RegName
);
10536 llvm::Function
*F
=
10537 CGM
.getIntrinsic(llvm::Intrinsic::read_register
, {Int64Ty
});
10538 llvm::Value
*X18
= Builder
.CreateCall(F
, Metadata
);
10539 X18
= Builder
.CreateIntToPtr(X18
, llvm::PointerType::get(Int8Ty
, 0));
10541 // Load x18 + offset
10542 Value
*Offset
= Builder
.CreateZExt(EmitScalarExpr(E
->getArg(0)), Int64Ty
);
10543 Value
*Ptr
= Builder
.CreateGEP(Int8Ty
, X18
, Offset
);
10544 Ptr
= Builder
.CreatePointerCast(Ptr
, llvm::PointerType::get(IntTy
, 0));
10545 LoadInst
*Load
= Builder
.CreateAlignedLoad(IntTy
, Ptr
, CharUnits::One());
10549 // Handle MSVC intrinsics before argument evaluation to prevent double
10551 if (std::optional
<MSVCIntrin
> MsvcIntId
=
10552 translateAarch64ToMsvcIntrin(BuiltinID
))
10553 return EmitMSVCBuiltinExpr(*MsvcIntId
, E
);
10555 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
10556 auto It
= llvm::find_if(NEONEquivalentIntrinsicMap
, [BuiltinID
](auto &P
) {
10557 return P
.first
== BuiltinID
;
10559 if (It
!= end(NEONEquivalentIntrinsicMap
))
10560 BuiltinID
= It
->second
;
10562 // Find out if any arguments are required to be integer constant
10564 unsigned ICEArguments
= 0;
10565 ASTContext::GetBuiltinTypeError Error
;
10566 getContext().GetBuiltinType(BuiltinID
, Error
, &ICEArguments
);
10567 assert(Error
== ASTContext::GE_None
&& "Should not codegen an error");
10569 llvm::SmallVector
<Value
*, 4> Ops
;
10570 Address PtrOp0
= Address::invalid();
10571 for (unsigned i
= 0, e
= E
->getNumArgs() - 1; i
!= e
; i
++) {
10573 switch (BuiltinID
) {
10574 case NEON::BI__builtin_neon_vld1_v
:
10575 case NEON::BI__builtin_neon_vld1q_v
:
10576 case NEON::BI__builtin_neon_vld1_dup_v
:
10577 case NEON::BI__builtin_neon_vld1q_dup_v
:
10578 case NEON::BI__builtin_neon_vld1_lane_v
:
10579 case NEON::BI__builtin_neon_vld1q_lane_v
:
10580 case NEON::BI__builtin_neon_vst1_v
:
10581 case NEON::BI__builtin_neon_vst1q_v
:
10582 case NEON::BI__builtin_neon_vst1_lane_v
:
10583 case NEON::BI__builtin_neon_vst1q_lane_v
:
10584 // Get the alignment for the argument in addition to the value;
10585 // we'll use it later.
10586 PtrOp0
= EmitPointerWithAlignment(E
->getArg(0));
10587 Ops
.push_back(PtrOp0
.getPointer());
10591 if ((ICEArguments
& (1 << i
)) == 0) {
10592 Ops
.push_back(EmitScalarExpr(E
->getArg(i
)));
10594 // If this is required to be a constant, constant fold it so that we know
10595 // that the generated intrinsic gets a ConstantInt.
10596 Ops
.push_back(llvm::ConstantInt::get(
10598 *E
->getArg(i
)->getIntegerConstantExpr(getContext())));
10602 auto SISDMap
= ArrayRef(AArch64SISDIntrinsicMap
);
10603 const ARMVectorIntrinsicInfo
*Builtin
= findARMVectorIntrinsicInMap(
10604 SISDMap
, BuiltinID
, AArch64SISDIntrinsicsProvenSorted
);
10607 Ops
.push_back(EmitScalarExpr(E
->getArg(E
->getNumArgs() - 1)));
10608 Value
*Result
= EmitCommonNeonSISDBuiltinExpr(*this, *Builtin
, Ops
, E
);
10609 assert(Result
&& "SISD intrinsic should have been handled");
10613 const Expr
*Arg
= E
->getArg(E
->getNumArgs()-1);
10614 NeonTypeFlags
Type(0);
10615 if (std::optional
<llvm::APSInt
> Result
=
10616 Arg
->getIntegerConstantExpr(getContext()))
10617 // Determine the type of this overloaded NEON intrinsic.
10618 Type
= NeonTypeFlags(Result
->getZExtValue());
10620 bool usgn
= Type
.isUnsigned();
10621 bool quad
= Type
.isQuad();
10623 // Handle non-overloaded intrinsics first.
10624 switch (BuiltinID
) {
10626 case NEON::BI__builtin_neon_vabsh_f16
:
10627 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
10628 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::fabs
, HalfTy
), Ops
, "vabs");
10629 case NEON::BI__builtin_neon_vaddq_p128
: {
10630 llvm::Type
*Ty
= GetNeonType(this, NeonTypeFlags::Poly128
);
10631 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
10632 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
10633 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
10634 Ops
[0] = Builder
.CreateXor(Ops
[0], Ops
[1]);
10635 llvm::Type
*Int128Ty
= llvm::Type::getIntNTy(getLLVMContext(), 128);
10636 return Builder
.CreateBitCast(Ops
[0], Int128Ty
);
10638 case NEON::BI__builtin_neon_vldrq_p128
: {
10639 llvm::Type
*Int128Ty
= llvm::Type::getIntNTy(getLLVMContext(), 128);
10640 llvm::Type
*Int128PTy
= llvm::PointerType::get(Int128Ty
, 0);
10641 Value
*Ptr
= Builder
.CreateBitCast(EmitScalarExpr(E
->getArg(0)), Int128PTy
);
10642 return Builder
.CreateAlignedLoad(Int128Ty
, Ptr
,
10643 CharUnits::fromQuantity(16));
10645 case NEON::BI__builtin_neon_vstrq_p128
: {
10646 llvm::Type
*Int128PTy
= llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
10647 Value
*Ptr
= Builder
.CreateBitCast(Ops
[0], Int128PTy
);
10648 return Builder
.CreateDefaultAlignedStore(EmitScalarExpr(E
->getArg(1)), Ptr
);
10650 case NEON::BI__builtin_neon_vcvts_f32_u32
:
10651 case NEON::BI__builtin_neon_vcvtd_f64_u64
:
10654 case NEON::BI__builtin_neon_vcvts_f32_s32
:
10655 case NEON::BI__builtin_neon_vcvtd_f64_s64
: {
10656 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
10657 bool Is64
= Ops
[0]->getType()->getPrimitiveSizeInBits() == 64;
10658 llvm::Type
*InTy
= Is64
? Int64Ty
: Int32Ty
;
10659 llvm::Type
*FTy
= Is64
? DoubleTy
: FloatTy
;
10660 Ops
[0] = Builder
.CreateBitCast(Ops
[0], InTy
);
10662 return Builder
.CreateUIToFP(Ops
[0], FTy
);
10663 return Builder
.CreateSIToFP(Ops
[0], FTy
);
10665 case NEON::BI__builtin_neon_vcvth_f16_u16
:
10666 case NEON::BI__builtin_neon_vcvth_f16_u32
:
10667 case NEON::BI__builtin_neon_vcvth_f16_u64
:
10670 case NEON::BI__builtin_neon_vcvth_f16_s16
:
10671 case NEON::BI__builtin_neon_vcvth_f16_s32
:
10672 case NEON::BI__builtin_neon_vcvth_f16_s64
: {
10673 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
10674 llvm::Type
*FTy
= HalfTy
;
10676 if (Ops
[0]->getType()->getPrimitiveSizeInBits() == 64)
10678 else if (Ops
[0]->getType()->getPrimitiveSizeInBits() == 32)
10682 Ops
[0] = Builder
.CreateBitCast(Ops
[0], InTy
);
10684 return Builder
.CreateUIToFP(Ops
[0], FTy
);
10685 return Builder
.CreateSIToFP(Ops
[0], FTy
);
10687 case NEON::BI__builtin_neon_vcvtah_u16_f16
:
10688 case NEON::BI__builtin_neon_vcvtmh_u16_f16
:
10689 case NEON::BI__builtin_neon_vcvtnh_u16_f16
:
10690 case NEON::BI__builtin_neon_vcvtph_u16_f16
:
10691 case NEON::BI__builtin_neon_vcvth_u16_f16
:
10692 case NEON::BI__builtin_neon_vcvtah_s16_f16
:
10693 case NEON::BI__builtin_neon_vcvtmh_s16_f16
:
10694 case NEON::BI__builtin_neon_vcvtnh_s16_f16
:
10695 case NEON::BI__builtin_neon_vcvtph_s16_f16
:
10696 case NEON::BI__builtin_neon_vcvth_s16_f16
: {
10698 llvm::Type
* InTy
= Int32Ty
;
10699 llvm::Type
* FTy
= HalfTy
;
10700 llvm::Type
*Tys
[2] = {InTy
, FTy
};
10701 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
10702 switch (BuiltinID
) {
10703 default: llvm_unreachable("missing builtin ID in switch!");
10704 case NEON::BI__builtin_neon_vcvtah_u16_f16
:
10705 Int
= Intrinsic::aarch64_neon_fcvtau
; break;
10706 case NEON::BI__builtin_neon_vcvtmh_u16_f16
:
10707 Int
= Intrinsic::aarch64_neon_fcvtmu
; break;
10708 case NEON::BI__builtin_neon_vcvtnh_u16_f16
:
10709 Int
= Intrinsic::aarch64_neon_fcvtnu
; break;
10710 case NEON::BI__builtin_neon_vcvtph_u16_f16
:
10711 Int
= Intrinsic::aarch64_neon_fcvtpu
; break;
10712 case NEON::BI__builtin_neon_vcvth_u16_f16
:
10713 Int
= Intrinsic::aarch64_neon_fcvtzu
; break;
10714 case NEON::BI__builtin_neon_vcvtah_s16_f16
:
10715 Int
= Intrinsic::aarch64_neon_fcvtas
; break;
10716 case NEON::BI__builtin_neon_vcvtmh_s16_f16
:
10717 Int
= Intrinsic::aarch64_neon_fcvtms
; break;
10718 case NEON::BI__builtin_neon_vcvtnh_s16_f16
:
10719 Int
= Intrinsic::aarch64_neon_fcvtns
; break;
10720 case NEON::BI__builtin_neon_vcvtph_s16_f16
:
10721 Int
= Intrinsic::aarch64_neon_fcvtps
; break;
10722 case NEON::BI__builtin_neon_vcvth_s16_f16
:
10723 Int
= Intrinsic::aarch64_neon_fcvtzs
; break;
10725 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "fcvt");
10726 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
10728 case NEON::BI__builtin_neon_vcaleh_f16
:
10729 case NEON::BI__builtin_neon_vcalth_f16
:
10730 case NEON::BI__builtin_neon_vcageh_f16
:
10731 case NEON::BI__builtin_neon_vcagth_f16
: {
10733 llvm::Type
* InTy
= Int32Ty
;
10734 llvm::Type
* FTy
= HalfTy
;
10735 llvm::Type
*Tys
[2] = {InTy
, FTy
};
10736 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
10737 switch (BuiltinID
) {
10738 default: llvm_unreachable("missing builtin ID in switch!");
10739 case NEON::BI__builtin_neon_vcageh_f16
:
10740 Int
= Intrinsic::aarch64_neon_facge
; break;
10741 case NEON::BI__builtin_neon_vcagth_f16
:
10742 Int
= Intrinsic::aarch64_neon_facgt
; break;
10743 case NEON::BI__builtin_neon_vcaleh_f16
:
10744 Int
= Intrinsic::aarch64_neon_facge
; std::swap(Ops
[0], Ops
[1]); break;
10745 case NEON::BI__builtin_neon_vcalth_f16
:
10746 Int
= Intrinsic::aarch64_neon_facgt
; std::swap(Ops
[0], Ops
[1]); break;
10748 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "facg");
10749 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
10751 case NEON::BI__builtin_neon_vcvth_n_s16_f16
:
10752 case NEON::BI__builtin_neon_vcvth_n_u16_f16
: {
10754 llvm::Type
* InTy
= Int32Ty
;
10755 llvm::Type
* FTy
= HalfTy
;
10756 llvm::Type
*Tys
[2] = {InTy
, FTy
};
10757 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
10758 switch (BuiltinID
) {
10759 default: llvm_unreachable("missing builtin ID in switch!");
10760 case NEON::BI__builtin_neon_vcvth_n_s16_f16
:
10761 Int
= Intrinsic::aarch64_neon_vcvtfp2fxs
; break;
10762 case NEON::BI__builtin_neon_vcvth_n_u16_f16
:
10763 Int
= Intrinsic::aarch64_neon_vcvtfp2fxu
; break;
10765 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "fcvth_n");
10766 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
10768 case NEON::BI__builtin_neon_vcvth_n_f16_s16
:
10769 case NEON::BI__builtin_neon_vcvth_n_f16_u16
: {
10771 llvm::Type
* FTy
= HalfTy
;
10772 llvm::Type
* InTy
= Int32Ty
;
10773 llvm::Type
*Tys
[2] = {FTy
, InTy
};
10774 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
10775 switch (BuiltinID
) {
10776 default: llvm_unreachable("missing builtin ID in switch!");
10777 case NEON::BI__builtin_neon_vcvth_n_f16_s16
:
10778 Int
= Intrinsic::aarch64_neon_vcvtfxs2fp
;
10779 Ops
[0] = Builder
.CreateSExt(Ops
[0], InTy
, "sext");
10781 case NEON::BI__builtin_neon_vcvth_n_f16_u16
:
10782 Int
= Intrinsic::aarch64_neon_vcvtfxu2fp
;
10783 Ops
[0] = Builder
.CreateZExt(Ops
[0], InTy
);
10786 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "fcvth_n");
10788 case NEON::BI__builtin_neon_vpaddd_s64
: {
10789 auto *Ty
= llvm::FixedVectorType::get(Int64Ty
, 2);
10790 Value
*Vec
= EmitScalarExpr(E
->getArg(0));
10791 // The vector is v2f64, so make sure it's bitcast to that.
10792 Vec
= Builder
.CreateBitCast(Vec
, Ty
, "v2i64");
10793 llvm::Value
*Idx0
= llvm::ConstantInt::get(SizeTy
, 0);
10794 llvm::Value
*Idx1
= llvm::ConstantInt::get(SizeTy
, 1);
10795 Value
*Op0
= Builder
.CreateExtractElement(Vec
, Idx0
, "lane0");
10796 Value
*Op1
= Builder
.CreateExtractElement(Vec
, Idx1
, "lane1");
10797 // Pairwise addition of a v2f64 into a scalar f64.
10798 return Builder
.CreateAdd(Op0
, Op1
, "vpaddd");
10800 case NEON::BI__builtin_neon_vpaddd_f64
: {
10801 auto *Ty
= llvm::FixedVectorType::get(DoubleTy
, 2);
10802 Value
*Vec
= EmitScalarExpr(E
->getArg(0));
10803 // The vector is v2f64, so make sure it's bitcast to that.
10804 Vec
= Builder
.CreateBitCast(Vec
, Ty
, "v2f64");
10805 llvm::Value
*Idx0
= llvm::ConstantInt::get(SizeTy
, 0);
10806 llvm::Value
*Idx1
= llvm::ConstantInt::get(SizeTy
, 1);
10807 Value
*Op0
= Builder
.CreateExtractElement(Vec
, Idx0
, "lane0");
10808 Value
*Op1
= Builder
.CreateExtractElement(Vec
, Idx1
, "lane1");
10809 // Pairwise addition of a v2f64 into a scalar f64.
10810 return Builder
.CreateFAdd(Op0
, Op1
, "vpaddd");
10812 case NEON::BI__builtin_neon_vpadds_f32
: {
10813 auto *Ty
= llvm::FixedVectorType::get(FloatTy
, 2);
10814 Value
*Vec
= EmitScalarExpr(E
->getArg(0));
10815 // The vector is v2f32, so make sure it's bitcast to that.
10816 Vec
= Builder
.CreateBitCast(Vec
, Ty
, "v2f32");
10817 llvm::Value
*Idx0
= llvm::ConstantInt::get(SizeTy
, 0);
10818 llvm::Value
*Idx1
= llvm::ConstantInt::get(SizeTy
, 1);
10819 Value
*Op0
= Builder
.CreateExtractElement(Vec
, Idx0
, "lane0");
10820 Value
*Op1
= Builder
.CreateExtractElement(Vec
, Idx1
, "lane1");
10821 // Pairwise addition of a v2f32 into a scalar f32.
10822 return Builder
.CreateFAdd(Op0
, Op1
, "vpaddd");
10824 case NEON::BI__builtin_neon_vceqzd_s64
:
10825 case NEON::BI__builtin_neon_vceqzd_f64
:
10826 case NEON::BI__builtin_neon_vceqzs_f32
:
10827 case NEON::BI__builtin_neon_vceqzh_f16
:
10828 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
10829 return EmitAArch64CompareBuiltinExpr(
10830 Ops
[0], ConvertType(E
->getCallReturnType(getContext())),
10831 ICmpInst::FCMP_OEQ
, ICmpInst::ICMP_EQ
, "vceqz");
10832 case NEON::BI__builtin_neon_vcgezd_s64
:
10833 case NEON::BI__builtin_neon_vcgezd_f64
:
10834 case NEON::BI__builtin_neon_vcgezs_f32
:
10835 case NEON::BI__builtin_neon_vcgezh_f16
:
10836 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
10837 return EmitAArch64CompareBuiltinExpr(
10838 Ops
[0], ConvertType(E
->getCallReturnType(getContext())),
10839 ICmpInst::FCMP_OGE
, ICmpInst::ICMP_SGE
, "vcgez");
10840 case NEON::BI__builtin_neon_vclezd_s64
:
10841 case NEON::BI__builtin_neon_vclezd_f64
:
10842 case NEON::BI__builtin_neon_vclezs_f32
:
10843 case NEON::BI__builtin_neon_vclezh_f16
:
10844 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
10845 return EmitAArch64CompareBuiltinExpr(
10846 Ops
[0], ConvertType(E
->getCallReturnType(getContext())),
10847 ICmpInst::FCMP_OLE
, ICmpInst::ICMP_SLE
, "vclez");
10848 case NEON::BI__builtin_neon_vcgtzd_s64
:
10849 case NEON::BI__builtin_neon_vcgtzd_f64
:
10850 case NEON::BI__builtin_neon_vcgtzs_f32
:
10851 case NEON::BI__builtin_neon_vcgtzh_f16
:
10852 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
10853 return EmitAArch64CompareBuiltinExpr(
10854 Ops
[0], ConvertType(E
->getCallReturnType(getContext())),
10855 ICmpInst::FCMP_OGT
, ICmpInst::ICMP_SGT
, "vcgtz");
10856 case NEON::BI__builtin_neon_vcltzd_s64
:
10857 case NEON::BI__builtin_neon_vcltzd_f64
:
10858 case NEON::BI__builtin_neon_vcltzs_f32
:
10859 case NEON::BI__builtin_neon_vcltzh_f16
:
10860 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
10861 return EmitAArch64CompareBuiltinExpr(
10862 Ops
[0], ConvertType(E
->getCallReturnType(getContext())),
10863 ICmpInst::FCMP_OLT
, ICmpInst::ICMP_SLT
, "vcltz");
10865 case NEON::BI__builtin_neon_vceqzd_u64
: {
10866 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
10867 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Int64Ty
);
10869 Builder
.CreateICmpEQ(Ops
[0], llvm::Constant::getNullValue(Int64Ty
));
10870 return Builder
.CreateSExt(Ops
[0], Int64Ty
, "vceqzd");
10872 case NEON::BI__builtin_neon_vceqd_f64
:
10873 case NEON::BI__builtin_neon_vcled_f64
:
10874 case NEON::BI__builtin_neon_vcltd_f64
:
10875 case NEON::BI__builtin_neon_vcged_f64
:
10876 case NEON::BI__builtin_neon_vcgtd_f64
: {
10877 llvm::CmpInst::Predicate P
;
10878 switch (BuiltinID
) {
10879 default: llvm_unreachable("missing builtin ID in switch!");
10880 case NEON::BI__builtin_neon_vceqd_f64
: P
= llvm::FCmpInst::FCMP_OEQ
; break;
10881 case NEON::BI__builtin_neon_vcled_f64
: P
= llvm::FCmpInst::FCMP_OLE
; break;
10882 case NEON::BI__builtin_neon_vcltd_f64
: P
= llvm::FCmpInst::FCMP_OLT
; break;
10883 case NEON::BI__builtin_neon_vcged_f64
: P
= llvm::FCmpInst::FCMP_OGE
; break;
10884 case NEON::BI__builtin_neon_vcgtd_f64
: P
= llvm::FCmpInst::FCMP_OGT
; break;
10886 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
10887 Ops
[0] = Builder
.CreateBitCast(Ops
[0], DoubleTy
);
10888 Ops
[1] = Builder
.CreateBitCast(Ops
[1], DoubleTy
);
10889 if (P
== llvm::FCmpInst::FCMP_OEQ
)
10890 Ops
[0] = Builder
.CreateFCmp(P
, Ops
[0], Ops
[1]);
10892 Ops
[0] = Builder
.CreateFCmpS(P
, Ops
[0], Ops
[1]);
10893 return Builder
.CreateSExt(Ops
[0], Int64Ty
, "vcmpd");
10895 case NEON::BI__builtin_neon_vceqs_f32
:
10896 case NEON::BI__builtin_neon_vcles_f32
:
10897 case NEON::BI__builtin_neon_vclts_f32
:
10898 case NEON::BI__builtin_neon_vcges_f32
:
10899 case NEON::BI__builtin_neon_vcgts_f32
: {
10900 llvm::CmpInst::Predicate P
;
10901 switch (BuiltinID
) {
10902 default: llvm_unreachable("missing builtin ID in switch!");
10903 case NEON::BI__builtin_neon_vceqs_f32
: P
= llvm::FCmpInst::FCMP_OEQ
; break;
10904 case NEON::BI__builtin_neon_vcles_f32
: P
= llvm::FCmpInst::FCMP_OLE
; break;
10905 case NEON::BI__builtin_neon_vclts_f32
: P
= llvm::FCmpInst::FCMP_OLT
; break;
10906 case NEON::BI__builtin_neon_vcges_f32
: P
= llvm::FCmpInst::FCMP_OGE
; break;
10907 case NEON::BI__builtin_neon_vcgts_f32
: P
= llvm::FCmpInst::FCMP_OGT
; break;
10909 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
10910 Ops
[0] = Builder
.CreateBitCast(Ops
[0], FloatTy
);
10911 Ops
[1] = Builder
.CreateBitCast(Ops
[1], FloatTy
);
10912 if (P
== llvm::FCmpInst::FCMP_OEQ
)
10913 Ops
[0] = Builder
.CreateFCmp(P
, Ops
[0], Ops
[1]);
10915 Ops
[0] = Builder
.CreateFCmpS(P
, Ops
[0], Ops
[1]);
10916 return Builder
.CreateSExt(Ops
[0], Int32Ty
, "vcmpd");
10918 case NEON::BI__builtin_neon_vceqh_f16
:
10919 case NEON::BI__builtin_neon_vcleh_f16
:
10920 case NEON::BI__builtin_neon_vclth_f16
:
10921 case NEON::BI__builtin_neon_vcgeh_f16
:
10922 case NEON::BI__builtin_neon_vcgth_f16
: {
10923 llvm::CmpInst::Predicate P
;
10924 switch (BuiltinID
) {
10925 default: llvm_unreachable("missing builtin ID in switch!");
10926 case NEON::BI__builtin_neon_vceqh_f16
: P
= llvm::FCmpInst::FCMP_OEQ
; break;
10927 case NEON::BI__builtin_neon_vcleh_f16
: P
= llvm::FCmpInst::FCMP_OLE
; break;
10928 case NEON::BI__builtin_neon_vclth_f16
: P
= llvm::FCmpInst::FCMP_OLT
; break;
10929 case NEON::BI__builtin_neon_vcgeh_f16
: P
= llvm::FCmpInst::FCMP_OGE
; break;
10930 case NEON::BI__builtin_neon_vcgth_f16
: P
= llvm::FCmpInst::FCMP_OGT
; break;
10932 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
10933 Ops
[0] = Builder
.CreateBitCast(Ops
[0], HalfTy
);
10934 Ops
[1] = Builder
.CreateBitCast(Ops
[1], HalfTy
);
10935 if (P
== llvm::FCmpInst::FCMP_OEQ
)
10936 Ops
[0] = Builder
.CreateFCmp(P
, Ops
[0], Ops
[1]);
10938 Ops
[0] = Builder
.CreateFCmpS(P
, Ops
[0], Ops
[1]);
10939 return Builder
.CreateSExt(Ops
[0], Int16Ty
, "vcmpd");
10941 case NEON::BI__builtin_neon_vceqd_s64
:
10942 case NEON::BI__builtin_neon_vceqd_u64
:
10943 case NEON::BI__builtin_neon_vcgtd_s64
:
10944 case NEON::BI__builtin_neon_vcgtd_u64
:
10945 case NEON::BI__builtin_neon_vcltd_s64
:
10946 case NEON::BI__builtin_neon_vcltd_u64
:
10947 case NEON::BI__builtin_neon_vcged_u64
:
10948 case NEON::BI__builtin_neon_vcged_s64
:
10949 case NEON::BI__builtin_neon_vcled_u64
:
10950 case NEON::BI__builtin_neon_vcled_s64
: {
10951 llvm::CmpInst::Predicate P
;
10952 switch (BuiltinID
) {
10953 default: llvm_unreachable("missing builtin ID in switch!");
10954 case NEON::BI__builtin_neon_vceqd_s64
:
10955 case NEON::BI__builtin_neon_vceqd_u64
:P
= llvm::ICmpInst::ICMP_EQ
;break;
10956 case NEON::BI__builtin_neon_vcgtd_s64
:P
= llvm::ICmpInst::ICMP_SGT
;break;
10957 case NEON::BI__builtin_neon_vcgtd_u64
:P
= llvm::ICmpInst::ICMP_UGT
;break;
10958 case NEON::BI__builtin_neon_vcltd_s64
:P
= llvm::ICmpInst::ICMP_SLT
;break;
10959 case NEON::BI__builtin_neon_vcltd_u64
:P
= llvm::ICmpInst::ICMP_ULT
;break;
10960 case NEON::BI__builtin_neon_vcged_u64
:P
= llvm::ICmpInst::ICMP_UGE
;break;
10961 case NEON::BI__builtin_neon_vcged_s64
:P
= llvm::ICmpInst::ICMP_SGE
;break;
10962 case NEON::BI__builtin_neon_vcled_u64
:P
= llvm::ICmpInst::ICMP_ULE
;break;
10963 case NEON::BI__builtin_neon_vcled_s64
:P
= llvm::ICmpInst::ICMP_SLE
;break;
10965 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
10966 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Int64Ty
);
10967 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Int64Ty
);
10968 Ops
[0] = Builder
.CreateICmp(P
, Ops
[0], Ops
[1]);
10969 return Builder
.CreateSExt(Ops
[0], Int64Ty
, "vceqd");
10971 case NEON::BI__builtin_neon_vtstd_s64
:
10972 case NEON::BI__builtin_neon_vtstd_u64
: {
10973 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
10974 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Int64Ty
);
10975 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Int64Ty
);
10976 Ops
[0] = Builder
.CreateAnd(Ops
[0], Ops
[1]);
10977 Ops
[0] = Builder
.CreateICmp(ICmpInst::ICMP_NE
, Ops
[0],
10978 llvm::Constant::getNullValue(Int64Ty
));
10979 return Builder
.CreateSExt(Ops
[0], Int64Ty
, "vtstd");
10981 case NEON::BI__builtin_neon_vset_lane_i8
:
10982 case NEON::BI__builtin_neon_vset_lane_i16
:
10983 case NEON::BI__builtin_neon_vset_lane_i32
:
10984 case NEON::BI__builtin_neon_vset_lane_i64
:
10985 case NEON::BI__builtin_neon_vset_lane_bf16
:
10986 case NEON::BI__builtin_neon_vset_lane_f32
:
10987 case NEON::BI__builtin_neon_vsetq_lane_i8
:
10988 case NEON::BI__builtin_neon_vsetq_lane_i16
:
10989 case NEON::BI__builtin_neon_vsetq_lane_i32
:
10990 case NEON::BI__builtin_neon_vsetq_lane_i64
:
10991 case NEON::BI__builtin_neon_vsetq_lane_bf16
:
10992 case NEON::BI__builtin_neon_vsetq_lane_f32
:
10993 Ops
.push_back(EmitScalarExpr(E
->getArg(2)));
10994 return Builder
.CreateInsertElement(Ops
[1], Ops
[0], Ops
[2], "vset_lane");
10995 case NEON::BI__builtin_neon_vset_lane_f64
:
10996 // The vector type needs a cast for the v1f64 variant.
10998 Builder
.CreateBitCast(Ops
[1], llvm::FixedVectorType::get(DoubleTy
, 1));
10999 Ops
.push_back(EmitScalarExpr(E
->getArg(2)));
11000 return Builder
.CreateInsertElement(Ops
[1], Ops
[0], Ops
[2], "vset_lane");
11001 case NEON::BI__builtin_neon_vsetq_lane_f64
:
11002 // The vector type needs a cast for the v2f64 variant.
11004 Builder
.CreateBitCast(Ops
[1], llvm::FixedVectorType::get(DoubleTy
, 2));
11005 Ops
.push_back(EmitScalarExpr(E
->getArg(2)));
11006 return Builder
.CreateInsertElement(Ops
[1], Ops
[0], Ops
[2], "vset_lane");
11008 case NEON::BI__builtin_neon_vget_lane_i8
:
11009 case NEON::BI__builtin_neon_vdupb_lane_i8
:
11011 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(Int8Ty
, 8));
11012 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
11014 case NEON::BI__builtin_neon_vgetq_lane_i8
:
11015 case NEON::BI__builtin_neon_vdupb_laneq_i8
:
11017 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(Int8Ty
, 16));
11018 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
11020 case NEON::BI__builtin_neon_vget_lane_i16
:
11021 case NEON::BI__builtin_neon_vduph_lane_i16
:
11023 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(Int16Ty
, 4));
11024 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
11026 case NEON::BI__builtin_neon_vgetq_lane_i16
:
11027 case NEON::BI__builtin_neon_vduph_laneq_i16
:
11029 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(Int16Ty
, 8));
11030 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
11032 case NEON::BI__builtin_neon_vget_lane_i32
:
11033 case NEON::BI__builtin_neon_vdups_lane_i32
:
11035 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(Int32Ty
, 2));
11036 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
11038 case NEON::BI__builtin_neon_vdups_lane_f32
:
11040 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(FloatTy
, 2));
11041 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
11043 case NEON::BI__builtin_neon_vgetq_lane_i32
:
11044 case NEON::BI__builtin_neon_vdups_laneq_i32
:
11046 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(Int32Ty
, 4));
11047 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
11049 case NEON::BI__builtin_neon_vget_lane_i64
:
11050 case NEON::BI__builtin_neon_vdupd_lane_i64
:
11052 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(Int64Ty
, 1));
11053 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
11055 case NEON::BI__builtin_neon_vdupd_lane_f64
:
11057 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(DoubleTy
, 1));
11058 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
11060 case NEON::BI__builtin_neon_vgetq_lane_i64
:
11061 case NEON::BI__builtin_neon_vdupd_laneq_i64
:
11063 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(Int64Ty
, 2));
11064 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
11066 case NEON::BI__builtin_neon_vget_lane_f32
:
11068 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(FloatTy
, 2));
11069 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
11071 case NEON::BI__builtin_neon_vget_lane_f64
:
11073 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(DoubleTy
, 1));
11074 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
11076 case NEON::BI__builtin_neon_vgetq_lane_f32
:
11077 case NEON::BI__builtin_neon_vdups_laneq_f32
:
11079 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(FloatTy
, 4));
11080 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
11082 case NEON::BI__builtin_neon_vgetq_lane_f64
:
11083 case NEON::BI__builtin_neon_vdupd_laneq_f64
:
11085 Builder
.CreateBitCast(Ops
[0], llvm::FixedVectorType::get(DoubleTy
, 2));
11086 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
11088 case NEON::BI__builtin_neon_vaddh_f16
:
11089 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
11090 return Builder
.CreateFAdd(Ops
[0], Ops
[1], "vaddh");
11091 case NEON::BI__builtin_neon_vsubh_f16
:
11092 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
11093 return Builder
.CreateFSub(Ops
[0], Ops
[1], "vsubh");
11094 case NEON::BI__builtin_neon_vmulh_f16
:
11095 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
11096 return Builder
.CreateFMul(Ops
[0], Ops
[1], "vmulh");
11097 case NEON::BI__builtin_neon_vdivh_f16
:
11098 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
11099 return Builder
.CreateFDiv(Ops
[0], Ops
[1], "vdivh");
11100 case NEON::BI__builtin_neon_vfmah_f16
:
11101 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
11102 return emitCallMaybeConstrainedFPBuiltin(
11103 *this, Intrinsic::fma
, Intrinsic::experimental_constrained_fma
, HalfTy
,
11104 {EmitScalarExpr(E
->getArg(1)), EmitScalarExpr(E
->getArg(2)), Ops
[0]});
11105 case NEON::BI__builtin_neon_vfmsh_f16
: {
11106 Value
* Neg
= Builder
.CreateFNeg(EmitScalarExpr(E
->getArg(1)), "vsubh");
11108 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
11109 return emitCallMaybeConstrainedFPBuiltin(
11110 *this, Intrinsic::fma
, Intrinsic::experimental_constrained_fma
, HalfTy
,
11111 {Neg
, EmitScalarExpr(E
->getArg(2)), Ops
[0]});
11113 case NEON::BI__builtin_neon_vaddd_s64
:
11114 case NEON::BI__builtin_neon_vaddd_u64
:
11115 return Builder
.CreateAdd(Ops
[0], EmitScalarExpr(E
->getArg(1)), "vaddd");
11116 case NEON::BI__builtin_neon_vsubd_s64
:
11117 case NEON::BI__builtin_neon_vsubd_u64
:
11118 return Builder
.CreateSub(Ops
[0], EmitScalarExpr(E
->getArg(1)), "vsubd");
11119 case NEON::BI__builtin_neon_vqdmlalh_s16
:
11120 case NEON::BI__builtin_neon_vqdmlslh_s16
: {
11121 SmallVector
<Value
*, 2> ProductOps
;
11122 ProductOps
.push_back(vectorWrapScalar16(Ops
[1]));
11123 ProductOps
.push_back(vectorWrapScalar16(EmitScalarExpr(E
->getArg(2))));
11124 auto *VTy
= llvm::FixedVectorType::get(Int32Ty
, 4);
11125 Ops
[1] = EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_sqdmull
, VTy
),
11126 ProductOps
, "vqdmlXl");
11127 Constant
*CI
= ConstantInt::get(SizeTy
, 0);
11128 Ops
[1] = Builder
.CreateExtractElement(Ops
[1], CI
, "lane0");
11130 unsigned AccumInt
= BuiltinID
== NEON::BI__builtin_neon_vqdmlalh_s16
11131 ? Intrinsic::aarch64_neon_sqadd
11132 : Intrinsic::aarch64_neon_sqsub
;
11133 return EmitNeonCall(CGM
.getIntrinsic(AccumInt
, Int32Ty
), Ops
, "vqdmlXl");
11135 case NEON::BI__builtin_neon_vqshlud_n_s64
: {
11136 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
11137 Ops
[1] = Builder
.CreateZExt(Ops
[1], Int64Ty
);
11138 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_sqshlu
, Int64Ty
),
11141 case NEON::BI__builtin_neon_vqshld_n_u64
:
11142 case NEON::BI__builtin_neon_vqshld_n_s64
: {
11143 unsigned Int
= BuiltinID
== NEON::BI__builtin_neon_vqshld_n_u64
11144 ? Intrinsic::aarch64_neon_uqshl
11145 : Intrinsic::aarch64_neon_sqshl
;
11146 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
11147 Ops
[1] = Builder
.CreateZExt(Ops
[1], Int64Ty
);
11148 return EmitNeonCall(CGM
.getIntrinsic(Int
, Int64Ty
), Ops
, "vqshl_n");
11150 case NEON::BI__builtin_neon_vrshrd_n_u64
:
11151 case NEON::BI__builtin_neon_vrshrd_n_s64
: {
11152 unsigned Int
= BuiltinID
== NEON::BI__builtin_neon_vrshrd_n_u64
11153 ? Intrinsic::aarch64_neon_urshl
11154 : Intrinsic::aarch64_neon_srshl
;
11155 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
11156 int SV
= cast
<ConstantInt
>(Ops
[1])->getSExtValue();
11157 Ops
[1] = ConstantInt::get(Int64Ty
, -SV
);
11158 return EmitNeonCall(CGM
.getIntrinsic(Int
, Int64Ty
), Ops
, "vrshr_n");
11160 case NEON::BI__builtin_neon_vrsrad_n_u64
:
11161 case NEON::BI__builtin_neon_vrsrad_n_s64
: {
11162 unsigned Int
= BuiltinID
== NEON::BI__builtin_neon_vrsrad_n_u64
11163 ? Intrinsic::aarch64_neon_urshl
11164 : Intrinsic::aarch64_neon_srshl
;
11165 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Int64Ty
);
11166 Ops
.push_back(Builder
.CreateNeg(EmitScalarExpr(E
->getArg(2))));
11167 Ops
[1] = Builder
.CreateCall(CGM
.getIntrinsic(Int
, Int64Ty
),
11168 {Ops
[1], Builder
.CreateSExt(Ops
[2], Int64Ty
)});
11169 return Builder
.CreateAdd(Ops
[0], Builder
.CreateBitCast(Ops
[1], Int64Ty
));
11171 case NEON::BI__builtin_neon_vshld_n_s64
:
11172 case NEON::BI__builtin_neon_vshld_n_u64
: {
11173 llvm::ConstantInt
*Amt
= cast
<ConstantInt
>(EmitScalarExpr(E
->getArg(1)));
11174 return Builder
.CreateShl(
11175 Ops
[0], ConstantInt::get(Int64Ty
, Amt
->getZExtValue()), "shld_n");
11177 case NEON::BI__builtin_neon_vshrd_n_s64
: {
11178 llvm::ConstantInt
*Amt
= cast
<ConstantInt
>(EmitScalarExpr(E
->getArg(1)));
11179 return Builder
.CreateAShr(
11180 Ops
[0], ConstantInt::get(Int64Ty
, std::min(static_cast<uint64_t>(63),
11181 Amt
->getZExtValue())),
11184 case NEON::BI__builtin_neon_vshrd_n_u64
: {
11185 llvm::ConstantInt
*Amt
= cast
<ConstantInt
>(EmitScalarExpr(E
->getArg(1)));
11186 uint64_t ShiftAmt
= Amt
->getZExtValue();
11187 // Right-shifting an unsigned value by its size yields 0.
11188 if (ShiftAmt
== 64)
11189 return ConstantInt::get(Int64Ty
, 0);
11190 return Builder
.CreateLShr(Ops
[0], ConstantInt::get(Int64Ty
, ShiftAmt
),
11193 case NEON::BI__builtin_neon_vsrad_n_s64
: {
11194 llvm::ConstantInt
*Amt
= cast
<ConstantInt
>(EmitScalarExpr(E
->getArg(2)));
11195 Ops
[1] = Builder
.CreateAShr(
11196 Ops
[1], ConstantInt::get(Int64Ty
, std::min(static_cast<uint64_t>(63),
11197 Amt
->getZExtValue())),
11199 return Builder
.CreateAdd(Ops
[0], Ops
[1]);
11201 case NEON::BI__builtin_neon_vsrad_n_u64
: {
11202 llvm::ConstantInt
*Amt
= cast
<ConstantInt
>(EmitScalarExpr(E
->getArg(2)));
11203 uint64_t ShiftAmt
= Amt
->getZExtValue();
11204 // Right-shifting an unsigned value by its size yields 0.
11205 // As Op + 0 = Op, return Ops[0] directly.
11206 if (ShiftAmt
== 64)
11208 Ops
[1] = Builder
.CreateLShr(Ops
[1], ConstantInt::get(Int64Ty
, ShiftAmt
),
11210 return Builder
.CreateAdd(Ops
[0], Ops
[1]);
11212 case NEON::BI__builtin_neon_vqdmlalh_lane_s16
:
11213 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16
:
11214 case NEON::BI__builtin_neon_vqdmlslh_lane_s16
:
11215 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16
: {
11216 Ops
[2] = Builder
.CreateExtractElement(Ops
[2], EmitScalarExpr(E
->getArg(3)),
11218 SmallVector
<Value
*, 2> ProductOps
;
11219 ProductOps
.push_back(vectorWrapScalar16(Ops
[1]));
11220 ProductOps
.push_back(vectorWrapScalar16(Ops
[2]));
11221 auto *VTy
= llvm::FixedVectorType::get(Int32Ty
, 4);
11222 Ops
[1] = EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_sqdmull
, VTy
),
11223 ProductOps
, "vqdmlXl");
11224 Constant
*CI
= ConstantInt::get(SizeTy
, 0);
11225 Ops
[1] = Builder
.CreateExtractElement(Ops
[1], CI
, "lane0");
11228 unsigned AccInt
= (BuiltinID
== NEON::BI__builtin_neon_vqdmlalh_lane_s16
||
11229 BuiltinID
== NEON::BI__builtin_neon_vqdmlalh_laneq_s16
)
11230 ? Intrinsic::aarch64_neon_sqadd
11231 : Intrinsic::aarch64_neon_sqsub
;
11232 return EmitNeonCall(CGM
.getIntrinsic(AccInt
, Int32Ty
), Ops
, "vqdmlXl");
11234 case NEON::BI__builtin_neon_vqdmlals_s32
:
11235 case NEON::BI__builtin_neon_vqdmlsls_s32
: {
11236 SmallVector
<Value
*, 2> ProductOps
;
11237 ProductOps
.push_back(Ops
[1]);
11238 ProductOps
.push_back(EmitScalarExpr(E
->getArg(2)));
11240 EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar
),
11241 ProductOps
, "vqdmlXl");
11243 unsigned AccumInt
= BuiltinID
== NEON::BI__builtin_neon_vqdmlals_s32
11244 ? Intrinsic::aarch64_neon_sqadd
11245 : Intrinsic::aarch64_neon_sqsub
;
11246 return EmitNeonCall(CGM
.getIntrinsic(AccumInt
, Int64Ty
), Ops
, "vqdmlXl");
11248 case NEON::BI__builtin_neon_vqdmlals_lane_s32
:
11249 case NEON::BI__builtin_neon_vqdmlals_laneq_s32
:
11250 case NEON::BI__builtin_neon_vqdmlsls_lane_s32
:
11251 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32
: {
11252 Ops
[2] = Builder
.CreateExtractElement(Ops
[2], EmitScalarExpr(E
->getArg(3)),
11254 SmallVector
<Value
*, 2> ProductOps
;
11255 ProductOps
.push_back(Ops
[1]);
11256 ProductOps
.push_back(Ops
[2]);
11258 EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar
),
11259 ProductOps
, "vqdmlXl");
11262 unsigned AccInt
= (BuiltinID
== NEON::BI__builtin_neon_vqdmlals_lane_s32
||
11263 BuiltinID
== NEON::BI__builtin_neon_vqdmlals_laneq_s32
)
11264 ? Intrinsic::aarch64_neon_sqadd
11265 : Intrinsic::aarch64_neon_sqsub
;
11266 return EmitNeonCall(CGM
.getIntrinsic(AccInt
, Int64Ty
), Ops
, "vqdmlXl");
11268 case NEON::BI__builtin_neon_vget_lane_bf16
:
11269 case NEON::BI__builtin_neon_vduph_lane_bf16
:
11270 case NEON::BI__builtin_neon_vduph_lane_f16
: {
11271 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
11274 case NEON::BI__builtin_neon_vgetq_lane_bf16
:
11275 case NEON::BI__builtin_neon_vduph_laneq_bf16
:
11276 case NEON::BI__builtin_neon_vduph_laneq_f16
: {
11277 return Builder
.CreateExtractElement(Ops
[0], EmitScalarExpr(E
->getArg(1)),
11281 case clang::AArch64::BI_InterlockedAdd
: {
11282 Value
*Arg0
= EmitScalarExpr(E
->getArg(0));
11283 Value
*Arg1
= EmitScalarExpr(E
->getArg(1));
11284 AtomicRMWInst
*RMWI
= Builder
.CreateAtomicRMW(
11285 AtomicRMWInst::Add
, Arg0
, Arg1
,
11286 llvm::AtomicOrdering::SequentiallyConsistent
);
11287 return Builder
.CreateAdd(RMWI
, Arg1
);
11291 llvm::FixedVectorType
*VTy
= GetNeonType(this, Type
);
11292 llvm::Type
*Ty
= VTy
;
11296 // Not all intrinsics handled by the common case work for AArch64 yet, so only
11297 // defer to common code if it's been added to our special map.
11298 Builtin
= findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap
, BuiltinID
,
11299 AArch64SIMDIntrinsicsProvenSorted
);
11302 return EmitCommonNeonBuiltinExpr(
11303 Builtin
->BuiltinID
, Builtin
->LLVMIntrinsic
, Builtin
->AltLLVMIntrinsic
,
11304 Builtin
->NameHint
, Builtin
->TypeModifier
, E
, Ops
,
11305 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch
);
11307 if (Value
*V
= EmitAArch64TblBuiltinExpr(*this, BuiltinID
, E
, Ops
, Arch
))
11311 switch (BuiltinID
) {
11312 default: return nullptr;
11313 case NEON::BI__builtin_neon_vbsl_v
:
11314 case NEON::BI__builtin_neon_vbslq_v
: {
11315 llvm::Type
*BitTy
= llvm::VectorType::getInteger(VTy
);
11316 Ops
[0] = Builder
.CreateBitCast(Ops
[0], BitTy
, "vbsl");
11317 Ops
[1] = Builder
.CreateBitCast(Ops
[1], BitTy
, "vbsl");
11318 Ops
[2] = Builder
.CreateBitCast(Ops
[2], BitTy
, "vbsl");
11320 Ops
[1] = Builder
.CreateAnd(Ops
[0], Ops
[1], "vbsl");
11321 Ops
[2] = Builder
.CreateAnd(Builder
.CreateNot(Ops
[0]), Ops
[2], "vbsl");
11322 Ops
[0] = Builder
.CreateOr(Ops
[1], Ops
[2], "vbsl");
11323 return Builder
.CreateBitCast(Ops
[0], Ty
);
11325 case NEON::BI__builtin_neon_vfma_lane_v
:
11326 case NEON::BI__builtin_neon_vfmaq_lane_v
: { // Only used for FP types
11327 // The ARM builtins (and instructions) have the addend as the first
11328 // operand, but the 'fma' intrinsics have it last. Swap it around here.
11329 Value
*Addend
= Ops
[0];
11330 Value
*Multiplicand
= Ops
[1];
11331 Value
*LaneSource
= Ops
[2];
11332 Ops
[0] = Multiplicand
;
11333 Ops
[1] = LaneSource
;
11336 // Now adjust things to handle the lane access.
11337 auto *SourceTy
= BuiltinID
== NEON::BI__builtin_neon_vfmaq_lane_v
11338 ? llvm::FixedVectorType::get(VTy
->getElementType(),
11339 VTy
->getNumElements() / 2)
11341 llvm::Constant
*cst
= cast
<Constant
>(Ops
[3]);
11342 Value
*SV
= llvm::ConstantVector::getSplat(VTy
->getElementCount(), cst
);
11343 Ops
[1] = Builder
.CreateBitCast(Ops
[1], SourceTy
);
11344 Ops
[1] = Builder
.CreateShuffleVector(Ops
[1], Ops
[1], SV
, "lane");
11347 Int
= Builder
.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
11349 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "fmla");
11351 case NEON::BI__builtin_neon_vfma_laneq_v
: {
11352 auto *VTy
= cast
<llvm::FixedVectorType
>(Ty
);
11353 // v1f64 fma should be mapped to Neon scalar f64 fma
11354 if (VTy
&& VTy
->getElementType() == DoubleTy
) {
11355 Ops
[0] = Builder
.CreateBitCast(Ops
[0], DoubleTy
);
11356 Ops
[1] = Builder
.CreateBitCast(Ops
[1], DoubleTy
);
11357 llvm::FixedVectorType
*VTy
=
11358 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64
, false, true));
11359 Ops
[2] = Builder
.CreateBitCast(Ops
[2], VTy
);
11360 Ops
[2] = Builder
.CreateExtractElement(Ops
[2], Ops
[3], "extract");
11362 Result
= emitCallMaybeConstrainedFPBuiltin(
11363 *this, Intrinsic::fma
, Intrinsic::experimental_constrained_fma
,
11364 DoubleTy
, {Ops
[1], Ops
[2], Ops
[0]});
11365 return Builder
.CreateBitCast(Result
, Ty
);
11367 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
11368 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
11370 auto *STy
= llvm::FixedVectorType::get(VTy
->getElementType(),
11371 VTy
->getNumElements() * 2);
11372 Ops
[2] = Builder
.CreateBitCast(Ops
[2], STy
);
11373 Value
*SV
= llvm::ConstantVector::getSplat(VTy
->getElementCount(),
11374 cast
<ConstantInt
>(Ops
[3]));
11375 Ops
[2] = Builder
.CreateShuffleVector(Ops
[2], Ops
[2], SV
, "lane");
11377 return emitCallMaybeConstrainedFPBuiltin(
11378 *this, Intrinsic::fma
, Intrinsic::experimental_constrained_fma
, Ty
,
11379 {Ops
[2], Ops
[1], Ops
[0]});
11381 case NEON::BI__builtin_neon_vfmaq_laneq_v
: {
11382 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
11383 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
11385 Ops
[2] = Builder
.CreateBitCast(Ops
[2], Ty
);
11386 Ops
[2] = EmitNeonSplat(Ops
[2], cast
<ConstantInt
>(Ops
[3]));
11387 return emitCallMaybeConstrainedFPBuiltin(
11388 *this, Intrinsic::fma
, Intrinsic::experimental_constrained_fma
, Ty
,
11389 {Ops
[2], Ops
[1], Ops
[0]});
11391 case NEON::BI__builtin_neon_vfmah_lane_f16
:
11392 case NEON::BI__builtin_neon_vfmas_lane_f32
:
11393 case NEON::BI__builtin_neon_vfmah_laneq_f16
:
11394 case NEON::BI__builtin_neon_vfmas_laneq_f32
:
11395 case NEON::BI__builtin_neon_vfmad_lane_f64
:
11396 case NEON::BI__builtin_neon_vfmad_laneq_f64
: {
11397 Ops
.push_back(EmitScalarExpr(E
->getArg(3)));
11398 llvm::Type
*Ty
= ConvertType(E
->getCallReturnType(getContext()));
11399 Ops
[2] = Builder
.CreateExtractElement(Ops
[2], Ops
[3], "extract");
11400 return emitCallMaybeConstrainedFPBuiltin(
11401 *this, Intrinsic::fma
, Intrinsic::experimental_constrained_fma
, Ty
,
11402 {Ops
[1], Ops
[2], Ops
[0]});
11404 case NEON::BI__builtin_neon_vmull_v
:
11405 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
11406 Int
= usgn
? Intrinsic::aarch64_neon_umull
: Intrinsic::aarch64_neon_smull
;
11407 if (Type
.isPoly()) Int
= Intrinsic::aarch64_neon_pmull
;
11408 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vmull");
11409 case NEON::BI__builtin_neon_vmax_v
:
11410 case NEON::BI__builtin_neon_vmaxq_v
:
11411 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
11412 Int
= usgn
? Intrinsic::aarch64_neon_umax
: Intrinsic::aarch64_neon_smax
;
11413 if (Ty
->isFPOrFPVectorTy()) Int
= Intrinsic::aarch64_neon_fmax
;
11414 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vmax");
11415 case NEON::BI__builtin_neon_vmaxh_f16
: {
11416 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
11417 Int
= Intrinsic::aarch64_neon_fmax
;
11418 return EmitNeonCall(CGM
.getIntrinsic(Int
, HalfTy
), Ops
, "vmax");
11420 case NEON::BI__builtin_neon_vmin_v
:
11421 case NEON::BI__builtin_neon_vminq_v
:
11422 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
11423 Int
= usgn
? Intrinsic::aarch64_neon_umin
: Intrinsic::aarch64_neon_smin
;
11424 if (Ty
->isFPOrFPVectorTy()) Int
= Intrinsic::aarch64_neon_fmin
;
11425 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vmin");
11426 case NEON::BI__builtin_neon_vminh_f16
: {
11427 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
11428 Int
= Intrinsic::aarch64_neon_fmin
;
11429 return EmitNeonCall(CGM
.getIntrinsic(Int
, HalfTy
), Ops
, "vmin");
11431 case NEON::BI__builtin_neon_vabd_v
:
11432 case NEON::BI__builtin_neon_vabdq_v
:
11433 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
11434 Int
= usgn
? Intrinsic::aarch64_neon_uabd
: Intrinsic::aarch64_neon_sabd
;
11435 if (Ty
->isFPOrFPVectorTy()) Int
= Intrinsic::aarch64_neon_fabd
;
11436 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vabd");
11437 case NEON::BI__builtin_neon_vpadal_v
:
11438 case NEON::BI__builtin_neon_vpadalq_v
: {
11439 unsigned ArgElts
= VTy
->getNumElements();
11440 llvm::IntegerType
*EltTy
= cast
<IntegerType
>(VTy
->getElementType());
11441 unsigned BitWidth
= EltTy
->getBitWidth();
11442 auto *ArgTy
= llvm::FixedVectorType::get(
11443 llvm::IntegerType::get(getLLVMContext(), BitWidth
/ 2), 2 * ArgElts
);
11444 llvm::Type
* Tys
[2] = { VTy
, ArgTy
};
11445 Int
= usgn
? Intrinsic::aarch64_neon_uaddlp
: Intrinsic::aarch64_neon_saddlp
;
11446 SmallVector
<llvm::Value
*, 1> TmpOps
;
11447 TmpOps
.push_back(Ops
[1]);
11448 Function
*F
= CGM
.getIntrinsic(Int
, Tys
);
11449 llvm::Value
*tmp
= EmitNeonCall(F
, TmpOps
, "vpadal");
11450 llvm::Value
*addend
= Builder
.CreateBitCast(Ops
[0], tmp
->getType());
11451 return Builder
.CreateAdd(tmp
, addend
);
11453 case NEON::BI__builtin_neon_vpmin_v
:
11454 case NEON::BI__builtin_neon_vpminq_v
:
11455 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
11456 Int
= usgn
? Intrinsic::aarch64_neon_uminp
: Intrinsic::aarch64_neon_sminp
;
11457 if (Ty
->isFPOrFPVectorTy()) Int
= Intrinsic::aarch64_neon_fminp
;
11458 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vpmin");
11459 case NEON::BI__builtin_neon_vpmax_v
:
11460 case NEON::BI__builtin_neon_vpmaxq_v
:
11461 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
11462 Int
= usgn
? Intrinsic::aarch64_neon_umaxp
: Intrinsic::aarch64_neon_smaxp
;
11463 if (Ty
->isFPOrFPVectorTy()) Int
= Intrinsic::aarch64_neon_fmaxp
;
11464 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vpmax");
11465 case NEON::BI__builtin_neon_vminnm_v
:
11466 case NEON::BI__builtin_neon_vminnmq_v
:
11467 Int
= Intrinsic::aarch64_neon_fminnm
;
11468 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vminnm");
11469 case NEON::BI__builtin_neon_vminnmh_f16
:
11470 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
11471 Int
= Intrinsic::aarch64_neon_fminnm
;
11472 return EmitNeonCall(CGM
.getIntrinsic(Int
, HalfTy
), Ops
, "vminnm");
11473 case NEON::BI__builtin_neon_vmaxnm_v
:
11474 case NEON::BI__builtin_neon_vmaxnmq_v
:
11475 Int
= Intrinsic::aarch64_neon_fmaxnm
;
11476 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vmaxnm");
11477 case NEON::BI__builtin_neon_vmaxnmh_f16
:
11478 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
11479 Int
= Intrinsic::aarch64_neon_fmaxnm
;
11480 return EmitNeonCall(CGM
.getIntrinsic(Int
, HalfTy
), Ops
, "vmaxnm");
11481 case NEON::BI__builtin_neon_vrecpss_f32
: {
11482 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
11483 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_frecps
, FloatTy
),
11486 case NEON::BI__builtin_neon_vrecpsd_f64
:
11487 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
11488 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_frecps
, DoubleTy
),
11490 case NEON::BI__builtin_neon_vrecpsh_f16
:
11491 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
11492 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_frecps
, HalfTy
),
11494 case NEON::BI__builtin_neon_vqshrun_n_v
:
11495 Int
= Intrinsic::aarch64_neon_sqshrun
;
11496 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vqshrun_n");
11497 case NEON::BI__builtin_neon_vqrshrun_n_v
:
11498 Int
= Intrinsic::aarch64_neon_sqrshrun
;
11499 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vqrshrun_n");
11500 case NEON::BI__builtin_neon_vqshrn_n_v
:
11501 Int
= usgn
? Intrinsic::aarch64_neon_uqshrn
: Intrinsic::aarch64_neon_sqshrn
;
11502 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vqshrn_n");
11503 case NEON::BI__builtin_neon_vrshrn_n_v
:
11504 Int
= Intrinsic::aarch64_neon_rshrn
;
11505 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vrshrn_n");
11506 case NEON::BI__builtin_neon_vqrshrn_n_v
:
11507 Int
= usgn
? Intrinsic::aarch64_neon_uqrshrn
: Intrinsic::aarch64_neon_sqrshrn
;
11508 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vqrshrn_n");
11509 case NEON::BI__builtin_neon_vrndah_f16
: {
11510 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11511 Int
= Builder
.getIsFPConstrained()
11512 ? Intrinsic::experimental_constrained_round
11513 : Intrinsic::round
;
11514 return EmitNeonCall(CGM
.getIntrinsic(Int
, HalfTy
), Ops
, "vrnda");
11516 case NEON::BI__builtin_neon_vrnda_v
:
11517 case NEON::BI__builtin_neon_vrndaq_v
: {
11518 Int
= Builder
.getIsFPConstrained()
11519 ? Intrinsic::experimental_constrained_round
11520 : Intrinsic::round
;
11521 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vrnda");
11523 case NEON::BI__builtin_neon_vrndih_f16
: {
11524 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11525 Int
= Builder
.getIsFPConstrained()
11526 ? Intrinsic::experimental_constrained_nearbyint
11527 : Intrinsic::nearbyint
;
11528 return EmitNeonCall(CGM
.getIntrinsic(Int
, HalfTy
), Ops
, "vrndi");
11530 case NEON::BI__builtin_neon_vrndmh_f16
: {
11531 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11532 Int
= Builder
.getIsFPConstrained()
11533 ? Intrinsic::experimental_constrained_floor
11534 : Intrinsic::floor
;
11535 return EmitNeonCall(CGM
.getIntrinsic(Int
, HalfTy
), Ops
, "vrndm");
11537 case NEON::BI__builtin_neon_vrndm_v
:
11538 case NEON::BI__builtin_neon_vrndmq_v
: {
11539 Int
= Builder
.getIsFPConstrained()
11540 ? Intrinsic::experimental_constrained_floor
11541 : Intrinsic::floor
;
11542 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vrndm");
11544 case NEON::BI__builtin_neon_vrndnh_f16
: {
11545 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11546 Int
= Builder
.getIsFPConstrained()
11547 ? Intrinsic::experimental_constrained_roundeven
11548 : Intrinsic::roundeven
;
11549 return EmitNeonCall(CGM
.getIntrinsic(Int
, HalfTy
), Ops
, "vrndn");
11551 case NEON::BI__builtin_neon_vrndn_v
:
11552 case NEON::BI__builtin_neon_vrndnq_v
: {
11553 Int
= Builder
.getIsFPConstrained()
11554 ? Intrinsic::experimental_constrained_roundeven
11555 : Intrinsic::roundeven
;
11556 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vrndn");
11558 case NEON::BI__builtin_neon_vrndns_f32
: {
11559 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11560 Int
= Builder
.getIsFPConstrained()
11561 ? Intrinsic::experimental_constrained_roundeven
11562 : Intrinsic::roundeven
;
11563 return EmitNeonCall(CGM
.getIntrinsic(Int
, FloatTy
), Ops
, "vrndn");
11565 case NEON::BI__builtin_neon_vrndph_f16
: {
11566 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11567 Int
= Builder
.getIsFPConstrained()
11568 ? Intrinsic::experimental_constrained_ceil
11570 return EmitNeonCall(CGM
.getIntrinsic(Int
, HalfTy
), Ops
, "vrndp");
11572 case NEON::BI__builtin_neon_vrndp_v
:
11573 case NEON::BI__builtin_neon_vrndpq_v
: {
11574 Int
= Builder
.getIsFPConstrained()
11575 ? Intrinsic::experimental_constrained_ceil
11577 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vrndp");
11579 case NEON::BI__builtin_neon_vrndxh_f16
: {
11580 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11581 Int
= Builder
.getIsFPConstrained()
11582 ? Intrinsic::experimental_constrained_rint
11584 return EmitNeonCall(CGM
.getIntrinsic(Int
, HalfTy
), Ops
, "vrndx");
11586 case NEON::BI__builtin_neon_vrndx_v
:
11587 case NEON::BI__builtin_neon_vrndxq_v
: {
11588 Int
= Builder
.getIsFPConstrained()
11589 ? Intrinsic::experimental_constrained_rint
11591 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vrndx");
11593 case NEON::BI__builtin_neon_vrndh_f16
: {
11594 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11595 Int
= Builder
.getIsFPConstrained()
11596 ? Intrinsic::experimental_constrained_trunc
11597 : Intrinsic::trunc
;
11598 return EmitNeonCall(CGM
.getIntrinsic(Int
, HalfTy
), Ops
, "vrndz");
11600 case NEON::BI__builtin_neon_vrnd32x_f32
:
11601 case NEON::BI__builtin_neon_vrnd32xq_f32
: {
11602 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11603 Int
= Intrinsic::aarch64_neon_frint32x
;
11604 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vrnd32x");
11606 case NEON::BI__builtin_neon_vrnd32z_f32
:
11607 case NEON::BI__builtin_neon_vrnd32zq_f32
: {
11608 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11609 Int
= Intrinsic::aarch64_neon_frint32z
;
11610 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vrnd32z");
11612 case NEON::BI__builtin_neon_vrnd64x_f32
:
11613 case NEON::BI__builtin_neon_vrnd64xq_f32
: {
11614 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11615 Int
= Intrinsic::aarch64_neon_frint64x
;
11616 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vrnd64x");
11618 case NEON::BI__builtin_neon_vrnd64z_f32
:
11619 case NEON::BI__builtin_neon_vrnd64zq_f32
: {
11620 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11621 Int
= Intrinsic::aarch64_neon_frint64z
;
11622 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vrnd64z");
11624 case NEON::BI__builtin_neon_vrnd_v
:
11625 case NEON::BI__builtin_neon_vrndq_v
: {
11626 Int
= Builder
.getIsFPConstrained()
11627 ? Intrinsic::experimental_constrained_trunc
11628 : Intrinsic::trunc
;
11629 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vrndz");
11631 case NEON::BI__builtin_neon_vcvt_f64_v
:
11632 case NEON::BI__builtin_neon_vcvtq_f64_v
:
11633 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
11634 Ty
= GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64
, false, quad
));
11635 return usgn
? Builder
.CreateUIToFP(Ops
[0], Ty
, "vcvt")
11636 : Builder
.CreateSIToFP(Ops
[0], Ty
, "vcvt");
11637 case NEON::BI__builtin_neon_vcvt_f64_f32
: {
11638 assert(Type
.getEltType() == NeonTypeFlags::Float64
&& quad
&&
11639 "unexpected vcvt_f64_f32 builtin");
11640 NeonTypeFlags SrcFlag
= NeonTypeFlags(NeonTypeFlags::Float32
, false, false);
11641 Ops
[0] = Builder
.CreateBitCast(Ops
[0], GetNeonType(this, SrcFlag
));
11643 return Builder
.CreateFPExt(Ops
[0], Ty
, "vcvt");
11645 case NEON::BI__builtin_neon_vcvt_f32_f64
: {
11646 assert(Type
.getEltType() == NeonTypeFlags::Float32
&&
11647 "unexpected vcvt_f32_f64 builtin");
11648 NeonTypeFlags SrcFlag
= NeonTypeFlags(NeonTypeFlags::Float64
, false, true);
11649 Ops
[0] = Builder
.CreateBitCast(Ops
[0], GetNeonType(this, SrcFlag
));
11651 return Builder
.CreateFPTrunc(Ops
[0], Ty
, "vcvt");
11653 case NEON::BI__builtin_neon_vcvt_s32_v
:
11654 case NEON::BI__builtin_neon_vcvt_u32_v
:
11655 case NEON::BI__builtin_neon_vcvt_s64_v
:
11656 case NEON::BI__builtin_neon_vcvt_u64_v
:
11657 case NEON::BI__builtin_neon_vcvt_s16_f16
:
11658 case NEON::BI__builtin_neon_vcvt_u16_f16
:
11659 case NEON::BI__builtin_neon_vcvtq_s32_v
:
11660 case NEON::BI__builtin_neon_vcvtq_u32_v
:
11661 case NEON::BI__builtin_neon_vcvtq_s64_v
:
11662 case NEON::BI__builtin_neon_vcvtq_u64_v
:
11663 case NEON::BI__builtin_neon_vcvtq_s16_f16
:
11664 case NEON::BI__builtin_neon_vcvtq_u16_f16
: {
11666 usgn
? Intrinsic::aarch64_neon_fcvtzu
: Intrinsic::aarch64_neon_fcvtzs
;
11667 llvm::Type
*Tys
[2] = {Ty
, GetFloatNeonType(this, Type
)};
11668 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vcvtz");
11670 case NEON::BI__builtin_neon_vcvta_s16_f16
:
11671 case NEON::BI__builtin_neon_vcvta_u16_f16
:
11672 case NEON::BI__builtin_neon_vcvta_s32_v
:
11673 case NEON::BI__builtin_neon_vcvtaq_s16_f16
:
11674 case NEON::BI__builtin_neon_vcvtaq_s32_v
:
11675 case NEON::BI__builtin_neon_vcvta_u32_v
:
11676 case NEON::BI__builtin_neon_vcvtaq_u16_f16
:
11677 case NEON::BI__builtin_neon_vcvtaq_u32_v
:
11678 case NEON::BI__builtin_neon_vcvta_s64_v
:
11679 case NEON::BI__builtin_neon_vcvtaq_s64_v
:
11680 case NEON::BI__builtin_neon_vcvta_u64_v
:
11681 case NEON::BI__builtin_neon_vcvtaq_u64_v
: {
11682 Int
= usgn
? Intrinsic::aarch64_neon_fcvtau
: Intrinsic::aarch64_neon_fcvtas
;
11683 llvm::Type
*Tys
[2] = { Ty
, GetFloatNeonType(this, Type
) };
11684 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vcvta");
11686 case NEON::BI__builtin_neon_vcvtm_s16_f16
:
11687 case NEON::BI__builtin_neon_vcvtm_s32_v
:
11688 case NEON::BI__builtin_neon_vcvtmq_s16_f16
:
11689 case NEON::BI__builtin_neon_vcvtmq_s32_v
:
11690 case NEON::BI__builtin_neon_vcvtm_u16_f16
:
11691 case NEON::BI__builtin_neon_vcvtm_u32_v
:
11692 case NEON::BI__builtin_neon_vcvtmq_u16_f16
:
11693 case NEON::BI__builtin_neon_vcvtmq_u32_v
:
11694 case NEON::BI__builtin_neon_vcvtm_s64_v
:
11695 case NEON::BI__builtin_neon_vcvtmq_s64_v
:
11696 case NEON::BI__builtin_neon_vcvtm_u64_v
:
11697 case NEON::BI__builtin_neon_vcvtmq_u64_v
: {
11698 Int
= usgn
? Intrinsic::aarch64_neon_fcvtmu
: Intrinsic::aarch64_neon_fcvtms
;
11699 llvm::Type
*Tys
[2] = { Ty
, GetFloatNeonType(this, Type
) };
11700 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vcvtm");
11702 case NEON::BI__builtin_neon_vcvtn_s16_f16
:
11703 case NEON::BI__builtin_neon_vcvtn_s32_v
:
11704 case NEON::BI__builtin_neon_vcvtnq_s16_f16
:
11705 case NEON::BI__builtin_neon_vcvtnq_s32_v
:
11706 case NEON::BI__builtin_neon_vcvtn_u16_f16
:
11707 case NEON::BI__builtin_neon_vcvtn_u32_v
:
11708 case NEON::BI__builtin_neon_vcvtnq_u16_f16
:
11709 case NEON::BI__builtin_neon_vcvtnq_u32_v
:
11710 case NEON::BI__builtin_neon_vcvtn_s64_v
:
11711 case NEON::BI__builtin_neon_vcvtnq_s64_v
:
11712 case NEON::BI__builtin_neon_vcvtn_u64_v
:
11713 case NEON::BI__builtin_neon_vcvtnq_u64_v
: {
11714 Int
= usgn
? Intrinsic::aarch64_neon_fcvtnu
: Intrinsic::aarch64_neon_fcvtns
;
11715 llvm::Type
*Tys
[2] = { Ty
, GetFloatNeonType(this, Type
) };
11716 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vcvtn");
11718 case NEON::BI__builtin_neon_vcvtp_s16_f16
:
11719 case NEON::BI__builtin_neon_vcvtp_s32_v
:
11720 case NEON::BI__builtin_neon_vcvtpq_s16_f16
:
11721 case NEON::BI__builtin_neon_vcvtpq_s32_v
:
11722 case NEON::BI__builtin_neon_vcvtp_u16_f16
:
11723 case NEON::BI__builtin_neon_vcvtp_u32_v
:
11724 case NEON::BI__builtin_neon_vcvtpq_u16_f16
:
11725 case NEON::BI__builtin_neon_vcvtpq_u32_v
:
11726 case NEON::BI__builtin_neon_vcvtp_s64_v
:
11727 case NEON::BI__builtin_neon_vcvtpq_s64_v
:
11728 case NEON::BI__builtin_neon_vcvtp_u64_v
:
11729 case NEON::BI__builtin_neon_vcvtpq_u64_v
: {
11730 Int
= usgn
? Intrinsic::aarch64_neon_fcvtpu
: Intrinsic::aarch64_neon_fcvtps
;
11731 llvm::Type
*Tys
[2] = { Ty
, GetFloatNeonType(this, Type
) };
11732 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vcvtp");
11734 case NEON::BI__builtin_neon_vmulx_v
:
11735 case NEON::BI__builtin_neon_vmulxq_v
: {
11736 Int
= Intrinsic::aarch64_neon_fmulx
;
11737 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vmulx");
11739 case NEON::BI__builtin_neon_vmulxh_lane_f16
:
11740 case NEON::BI__builtin_neon_vmulxh_laneq_f16
: {
11741 // vmulx_lane should be mapped to Neon scalar mulx after
11742 // extracting the scalar element
11743 Ops
.push_back(EmitScalarExpr(E
->getArg(2)));
11744 Ops
[1] = Builder
.CreateExtractElement(Ops
[1], Ops
[2], "extract");
11746 Int
= Intrinsic::aarch64_neon_fmulx
;
11747 return EmitNeonCall(CGM
.getIntrinsic(Int
, HalfTy
), Ops
, "vmulx");
11749 case NEON::BI__builtin_neon_vmul_lane_v
:
11750 case NEON::BI__builtin_neon_vmul_laneq_v
: {
11751 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
11753 if (BuiltinID
== NEON::BI__builtin_neon_vmul_laneq_v
)
11755 Ops
[0] = Builder
.CreateBitCast(Ops
[0], DoubleTy
);
11756 llvm::FixedVectorType
*VTy
=
11757 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64
, false, Quad
));
11758 Ops
[1] = Builder
.CreateBitCast(Ops
[1], VTy
);
11759 Ops
[1] = Builder
.CreateExtractElement(Ops
[1], Ops
[2], "extract");
11760 Value
*Result
= Builder
.CreateFMul(Ops
[0], Ops
[1]);
11761 return Builder
.CreateBitCast(Result
, Ty
);
11763 case NEON::BI__builtin_neon_vnegd_s64
:
11764 return Builder
.CreateNeg(EmitScalarExpr(E
->getArg(0)), "vnegd");
11765 case NEON::BI__builtin_neon_vnegh_f16
:
11766 return Builder
.CreateFNeg(EmitScalarExpr(E
->getArg(0)), "vnegh");
11767 case NEON::BI__builtin_neon_vpmaxnm_v
:
11768 case NEON::BI__builtin_neon_vpmaxnmq_v
: {
11769 Int
= Intrinsic::aarch64_neon_fmaxnmp
;
11770 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vpmaxnm");
11772 case NEON::BI__builtin_neon_vpminnm_v
:
11773 case NEON::BI__builtin_neon_vpminnmq_v
: {
11774 Int
= Intrinsic::aarch64_neon_fminnmp
;
11775 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vpminnm");
11777 case NEON::BI__builtin_neon_vsqrth_f16
: {
11778 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11779 Int
= Builder
.getIsFPConstrained()
11780 ? Intrinsic::experimental_constrained_sqrt
11782 return EmitNeonCall(CGM
.getIntrinsic(Int
, HalfTy
), Ops
, "vsqrt");
11784 case NEON::BI__builtin_neon_vsqrt_v
:
11785 case NEON::BI__builtin_neon_vsqrtq_v
: {
11786 Int
= Builder
.getIsFPConstrained()
11787 ? Intrinsic::experimental_constrained_sqrt
11789 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
11790 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vsqrt");
11792 case NEON::BI__builtin_neon_vrbit_v
:
11793 case NEON::BI__builtin_neon_vrbitq_v
: {
11794 Int
= Intrinsic::bitreverse
;
11795 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vrbit");
11797 case NEON::BI__builtin_neon_vaddv_u8
:
11798 // FIXME: These are handled by the AArch64 scalar code.
11801 case NEON::BI__builtin_neon_vaddv_s8
: {
11802 Int
= usgn
? Intrinsic::aarch64_neon_uaddv
: Intrinsic::aarch64_neon_saddv
;
11804 VTy
= llvm::FixedVectorType::get(Int8Ty
, 8);
11805 llvm::Type
*Tys
[2] = { Ty
, VTy
};
11806 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11807 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vaddv");
11808 return Builder
.CreateTrunc(Ops
[0], Int8Ty
);
11810 case NEON::BI__builtin_neon_vaddv_u16
:
11813 case NEON::BI__builtin_neon_vaddv_s16
: {
11814 Int
= usgn
? Intrinsic::aarch64_neon_uaddv
: Intrinsic::aarch64_neon_saddv
;
11816 VTy
= llvm::FixedVectorType::get(Int16Ty
, 4);
11817 llvm::Type
*Tys
[2] = { Ty
, VTy
};
11818 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11819 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vaddv");
11820 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
11822 case NEON::BI__builtin_neon_vaddvq_u8
:
11825 case NEON::BI__builtin_neon_vaddvq_s8
: {
11826 Int
= usgn
? Intrinsic::aarch64_neon_uaddv
: Intrinsic::aarch64_neon_saddv
;
11828 VTy
= llvm::FixedVectorType::get(Int8Ty
, 16);
11829 llvm::Type
*Tys
[2] = { Ty
, VTy
};
11830 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11831 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vaddv");
11832 return Builder
.CreateTrunc(Ops
[0], Int8Ty
);
11834 case NEON::BI__builtin_neon_vaddvq_u16
:
11837 case NEON::BI__builtin_neon_vaddvq_s16
: {
11838 Int
= usgn
? Intrinsic::aarch64_neon_uaddv
: Intrinsic::aarch64_neon_saddv
;
11840 VTy
= llvm::FixedVectorType::get(Int16Ty
, 8);
11841 llvm::Type
*Tys
[2] = { Ty
, VTy
};
11842 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11843 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vaddv");
11844 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
11846 case NEON::BI__builtin_neon_vmaxv_u8
: {
11847 Int
= Intrinsic::aarch64_neon_umaxv
;
11849 VTy
= llvm::FixedVectorType::get(Int8Ty
, 8);
11850 llvm::Type
*Tys
[2] = { Ty
, VTy
};
11851 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11852 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vmaxv");
11853 return Builder
.CreateTrunc(Ops
[0], Int8Ty
);
11855 case NEON::BI__builtin_neon_vmaxv_u16
: {
11856 Int
= Intrinsic::aarch64_neon_umaxv
;
11858 VTy
= llvm::FixedVectorType::get(Int16Ty
, 4);
11859 llvm::Type
*Tys
[2] = { Ty
, VTy
};
11860 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11861 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vmaxv");
11862 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
11864 case NEON::BI__builtin_neon_vmaxvq_u8
: {
11865 Int
= Intrinsic::aarch64_neon_umaxv
;
11867 VTy
= llvm::FixedVectorType::get(Int8Ty
, 16);
11868 llvm::Type
*Tys
[2] = { Ty
, VTy
};
11869 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11870 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vmaxv");
11871 return Builder
.CreateTrunc(Ops
[0], Int8Ty
);
11873 case NEON::BI__builtin_neon_vmaxvq_u16
: {
11874 Int
= Intrinsic::aarch64_neon_umaxv
;
11876 VTy
= llvm::FixedVectorType::get(Int16Ty
, 8);
11877 llvm::Type
*Tys
[2] = { Ty
, VTy
};
11878 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11879 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vmaxv");
11880 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
11882 case NEON::BI__builtin_neon_vmaxv_s8
: {
11883 Int
= Intrinsic::aarch64_neon_smaxv
;
11885 VTy
= llvm::FixedVectorType::get(Int8Ty
, 8);
11886 llvm::Type
*Tys
[2] = { Ty
, VTy
};
11887 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11888 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vmaxv");
11889 return Builder
.CreateTrunc(Ops
[0], Int8Ty
);
11891 case NEON::BI__builtin_neon_vmaxv_s16
: {
11892 Int
= Intrinsic::aarch64_neon_smaxv
;
11894 VTy
= llvm::FixedVectorType::get(Int16Ty
, 4);
11895 llvm::Type
*Tys
[2] = { Ty
, VTy
};
11896 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11897 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vmaxv");
11898 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
11900 case NEON::BI__builtin_neon_vmaxvq_s8
: {
11901 Int
= Intrinsic::aarch64_neon_smaxv
;
11903 VTy
= llvm::FixedVectorType::get(Int8Ty
, 16);
11904 llvm::Type
*Tys
[2] = { Ty
, VTy
};
11905 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11906 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vmaxv");
11907 return Builder
.CreateTrunc(Ops
[0], Int8Ty
);
11909 case NEON::BI__builtin_neon_vmaxvq_s16
: {
11910 Int
= Intrinsic::aarch64_neon_smaxv
;
11912 VTy
= llvm::FixedVectorType::get(Int16Ty
, 8);
11913 llvm::Type
*Tys
[2] = { Ty
, VTy
};
11914 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11915 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vmaxv");
11916 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
11918 case NEON::BI__builtin_neon_vmaxv_f16
: {
11919 Int
= Intrinsic::aarch64_neon_fmaxv
;
11921 VTy
= llvm::FixedVectorType::get(HalfTy
, 4);
11922 llvm::Type
*Tys
[2] = { Ty
, VTy
};
11923 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11924 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vmaxv");
11925 return Builder
.CreateTrunc(Ops
[0], HalfTy
);
11927 case NEON::BI__builtin_neon_vmaxvq_f16
: {
11928 Int
= Intrinsic::aarch64_neon_fmaxv
;
11930 VTy
= llvm::FixedVectorType::get(HalfTy
, 8);
11931 llvm::Type
*Tys
[2] = { Ty
, VTy
};
11932 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11933 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vmaxv");
11934 return Builder
.CreateTrunc(Ops
[0], HalfTy
);
11936 case NEON::BI__builtin_neon_vminv_u8
: {
11937 Int
= Intrinsic::aarch64_neon_uminv
;
11939 VTy
= llvm::FixedVectorType::get(Int8Ty
, 8);
11940 llvm::Type
*Tys
[2] = { Ty
, VTy
};
11941 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11942 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vminv");
11943 return Builder
.CreateTrunc(Ops
[0], Int8Ty
);
11945 case NEON::BI__builtin_neon_vminv_u16
: {
11946 Int
= Intrinsic::aarch64_neon_uminv
;
11948 VTy
= llvm::FixedVectorType::get(Int16Ty
, 4);
11949 llvm::Type
*Tys
[2] = { Ty
, VTy
};
11950 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11951 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vminv");
11952 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
11954 case NEON::BI__builtin_neon_vminvq_u8
: {
11955 Int
= Intrinsic::aarch64_neon_uminv
;
11957 VTy
= llvm::FixedVectorType::get(Int8Ty
, 16);
11958 llvm::Type
*Tys
[2] = { Ty
, VTy
};
11959 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11960 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vminv");
11961 return Builder
.CreateTrunc(Ops
[0], Int8Ty
);
11963 case NEON::BI__builtin_neon_vminvq_u16
: {
11964 Int
= Intrinsic::aarch64_neon_uminv
;
11966 VTy
= llvm::FixedVectorType::get(Int16Ty
, 8);
11967 llvm::Type
*Tys
[2] = { Ty
, VTy
};
11968 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11969 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vminv");
11970 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
11972 case NEON::BI__builtin_neon_vminv_s8
: {
11973 Int
= Intrinsic::aarch64_neon_sminv
;
11975 VTy
= llvm::FixedVectorType::get(Int8Ty
, 8);
11976 llvm::Type
*Tys
[2] = { Ty
, VTy
};
11977 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11978 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vminv");
11979 return Builder
.CreateTrunc(Ops
[0], Int8Ty
);
11981 case NEON::BI__builtin_neon_vminv_s16
: {
11982 Int
= Intrinsic::aarch64_neon_sminv
;
11984 VTy
= llvm::FixedVectorType::get(Int16Ty
, 4);
11985 llvm::Type
*Tys
[2] = { Ty
, VTy
};
11986 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11987 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vminv");
11988 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
11990 case NEON::BI__builtin_neon_vminvq_s8
: {
11991 Int
= Intrinsic::aarch64_neon_sminv
;
11993 VTy
= llvm::FixedVectorType::get(Int8Ty
, 16);
11994 llvm::Type
*Tys
[2] = { Ty
, VTy
};
11995 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
11996 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vminv");
11997 return Builder
.CreateTrunc(Ops
[0], Int8Ty
);
11999 case NEON::BI__builtin_neon_vminvq_s16
: {
12000 Int
= Intrinsic::aarch64_neon_sminv
;
12002 VTy
= llvm::FixedVectorType::get(Int16Ty
, 8);
12003 llvm::Type
*Tys
[2] = { Ty
, VTy
};
12004 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
12005 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vminv");
12006 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
12008 case NEON::BI__builtin_neon_vminv_f16
: {
12009 Int
= Intrinsic::aarch64_neon_fminv
;
12011 VTy
= llvm::FixedVectorType::get(HalfTy
, 4);
12012 llvm::Type
*Tys
[2] = { Ty
, VTy
};
12013 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
12014 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vminv");
12015 return Builder
.CreateTrunc(Ops
[0], HalfTy
);
12017 case NEON::BI__builtin_neon_vminvq_f16
: {
12018 Int
= Intrinsic::aarch64_neon_fminv
;
12020 VTy
= llvm::FixedVectorType::get(HalfTy
, 8);
12021 llvm::Type
*Tys
[2] = { Ty
, VTy
};
12022 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
12023 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vminv");
12024 return Builder
.CreateTrunc(Ops
[0], HalfTy
);
12026 case NEON::BI__builtin_neon_vmaxnmv_f16
: {
12027 Int
= Intrinsic::aarch64_neon_fmaxnmv
;
12029 VTy
= llvm::FixedVectorType::get(HalfTy
, 4);
12030 llvm::Type
*Tys
[2] = { Ty
, VTy
};
12031 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
12032 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vmaxnmv");
12033 return Builder
.CreateTrunc(Ops
[0], HalfTy
);
12035 case NEON::BI__builtin_neon_vmaxnmvq_f16
: {
12036 Int
= Intrinsic::aarch64_neon_fmaxnmv
;
12038 VTy
= llvm::FixedVectorType::get(HalfTy
, 8);
12039 llvm::Type
*Tys
[2] = { Ty
, VTy
};
12040 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
12041 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vmaxnmv");
12042 return Builder
.CreateTrunc(Ops
[0], HalfTy
);
12044 case NEON::BI__builtin_neon_vminnmv_f16
: {
12045 Int
= Intrinsic::aarch64_neon_fminnmv
;
12047 VTy
= llvm::FixedVectorType::get(HalfTy
, 4);
12048 llvm::Type
*Tys
[2] = { Ty
, VTy
};
12049 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
12050 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vminnmv");
12051 return Builder
.CreateTrunc(Ops
[0], HalfTy
);
12053 case NEON::BI__builtin_neon_vminnmvq_f16
: {
12054 Int
= Intrinsic::aarch64_neon_fminnmv
;
12056 VTy
= llvm::FixedVectorType::get(HalfTy
, 8);
12057 llvm::Type
*Tys
[2] = { Ty
, VTy
};
12058 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
12059 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vminnmv");
12060 return Builder
.CreateTrunc(Ops
[0], HalfTy
);
12062 case NEON::BI__builtin_neon_vmul_n_f64
: {
12063 Ops
[0] = Builder
.CreateBitCast(Ops
[0], DoubleTy
);
12064 Value
*RHS
= Builder
.CreateBitCast(EmitScalarExpr(E
->getArg(1)), DoubleTy
);
12065 return Builder
.CreateFMul(Ops
[0], RHS
);
12067 case NEON::BI__builtin_neon_vaddlv_u8
: {
12068 Int
= Intrinsic::aarch64_neon_uaddlv
;
12070 VTy
= llvm::FixedVectorType::get(Int8Ty
, 8);
12071 llvm::Type
*Tys
[2] = { Ty
, VTy
};
12072 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
12073 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vaddlv");
12074 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
12076 case NEON::BI__builtin_neon_vaddlv_u16
: {
12077 Int
= Intrinsic::aarch64_neon_uaddlv
;
12079 VTy
= llvm::FixedVectorType::get(Int16Ty
, 4);
12080 llvm::Type
*Tys
[2] = { Ty
, VTy
};
12081 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
12082 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vaddlv");
12084 case NEON::BI__builtin_neon_vaddlvq_u8
: {
12085 Int
= Intrinsic::aarch64_neon_uaddlv
;
12087 VTy
= llvm::FixedVectorType::get(Int8Ty
, 16);
12088 llvm::Type
*Tys
[2] = { Ty
, VTy
};
12089 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
12090 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vaddlv");
12091 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
12093 case NEON::BI__builtin_neon_vaddlvq_u16
: {
12094 Int
= Intrinsic::aarch64_neon_uaddlv
;
12096 VTy
= llvm::FixedVectorType::get(Int16Ty
, 8);
12097 llvm::Type
*Tys
[2] = { Ty
, VTy
};
12098 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
12099 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vaddlv");
12101 case NEON::BI__builtin_neon_vaddlv_s8
: {
12102 Int
= Intrinsic::aarch64_neon_saddlv
;
12104 VTy
= llvm::FixedVectorType::get(Int8Ty
, 8);
12105 llvm::Type
*Tys
[2] = { Ty
, VTy
};
12106 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
12107 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vaddlv");
12108 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
12110 case NEON::BI__builtin_neon_vaddlv_s16
: {
12111 Int
= Intrinsic::aarch64_neon_saddlv
;
12113 VTy
= llvm::FixedVectorType::get(Int16Ty
, 4);
12114 llvm::Type
*Tys
[2] = { Ty
, VTy
};
12115 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
12116 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vaddlv");
12118 case NEON::BI__builtin_neon_vaddlvq_s8
: {
12119 Int
= Intrinsic::aarch64_neon_saddlv
;
12121 VTy
= llvm::FixedVectorType::get(Int8Ty
, 16);
12122 llvm::Type
*Tys
[2] = { Ty
, VTy
};
12123 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
12124 Ops
[0] = EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vaddlv");
12125 return Builder
.CreateTrunc(Ops
[0], Int16Ty
);
12127 case NEON::BI__builtin_neon_vaddlvq_s16
: {
12128 Int
= Intrinsic::aarch64_neon_saddlv
;
12130 VTy
= llvm::FixedVectorType::get(Int16Ty
, 8);
12131 llvm::Type
*Tys
[2] = { Ty
, VTy
};
12132 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
12133 return EmitNeonCall(CGM
.getIntrinsic(Int
, Tys
), Ops
, "vaddlv");
12135 case NEON::BI__builtin_neon_vsri_n_v
:
12136 case NEON::BI__builtin_neon_vsriq_n_v
: {
12137 Int
= Intrinsic::aarch64_neon_vsri
;
12138 llvm::Function
*Intrin
= CGM
.getIntrinsic(Int
, Ty
);
12139 return EmitNeonCall(Intrin
, Ops
, "vsri_n");
12141 case NEON::BI__builtin_neon_vsli_n_v
:
12142 case NEON::BI__builtin_neon_vsliq_n_v
: {
12143 Int
= Intrinsic::aarch64_neon_vsli
;
12144 llvm::Function
*Intrin
= CGM
.getIntrinsic(Int
, Ty
);
12145 return EmitNeonCall(Intrin
, Ops
, "vsli_n");
12147 case NEON::BI__builtin_neon_vsra_n_v
:
12148 case NEON::BI__builtin_neon_vsraq_n_v
:
12149 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
12150 Ops
[1] = EmitNeonRShiftImm(Ops
[1], Ops
[2], Ty
, usgn
, "vsra_n");
12151 return Builder
.CreateAdd(Ops
[0], Ops
[1]);
12152 case NEON::BI__builtin_neon_vrsra_n_v
:
12153 case NEON::BI__builtin_neon_vrsraq_n_v
: {
12154 Int
= usgn
? Intrinsic::aarch64_neon_urshl
: Intrinsic::aarch64_neon_srshl
;
12155 SmallVector
<llvm::Value
*,2> TmpOps
;
12156 TmpOps
.push_back(Ops
[1]);
12157 TmpOps
.push_back(Ops
[2]);
12158 Function
* F
= CGM
.getIntrinsic(Int
, Ty
);
12159 llvm::Value
*tmp
= EmitNeonCall(F
, TmpOps
, "vrshr_n", 1, true);
12160 Ops
[0] = Builder
.CreateBitCast(Ops
[0], VTy
);
12161 return Builder
.CreateAdd(Ops
[0], tmp
);
12163 case NEON::BI__builtin_neon_vld1_v
:
12164 case NEON::BI__builtin_neon_vld1q_v
: {
12165 Ops
[0] = Builder
.CreateBitCast(Ops
[0], llvm::PointerType::getUnqual(VTy
));
12166 return Builder
.CreateAlignedLoad(VTy
, Ops
[0], PtrOp0
.getAlignment());
12168 case NEON::BI__builtin_neon_vst1_v
:
12169 case NEON::BI__builtin_neon_vst1q_v
:
12170 Ops
[0] = Builder
.CreateBitCast(Ops
[0], llvm::PointerType::getUnqual(VTy
));
12171 Ops
[1] = Builder
.CreateBitCast(Ops
[1], VTy
);
12172 return Builder
.CreateAlignedStore(Ops
[1], Ops
[0], PtrOp0
.getAlignment());
12173 case NEON::BI__builtin_neon_vld1_lane_v
:
12174 case NEON::BI__builtin_neon_vld1q_lane_v
: {
12175 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
12176 Ty
= llvm::PointerType::getUnqual(VTy
->getElementType());
12177 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
12178 Ops
[0] = Builder
.CreateAlignedLoad(VTy
->getElementType(), Ops
[0],
12179 PtrOp0
.getAlignment());
12180 return Builder
.CreateInsertElement(Ops
[1], Ops
[0], Ops
[2], "vld1_lane");
12182 case NEON::BI__builtin_neon_vld1_dup_v
:
12183 case NEON::BI__builtin_neon_vld1q_dup_v
: {
12184 Value
*V
= PoisonValue::get(Ty
);
12185 Ty
= llvm::PointerType::getUnqual(VTy
->getElementType());
12186 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
12187 Ops
[0] = Builder
.CreateAlignedLoad(VTy
->getElementType(), Ops
[0],
12188 PtrOp0
.getAlignment());
12189 llvm::Constant
*CI
= ConstantInt::get(Int32Ty
, 0);
12190 Ops
[0] = Builder
.CreateInsertElement(V
, Ops
[0], CI
);
12191 return EmitNeonSplat(Ops
[0], CI
);
12193 case NEON::BI__builtin_neon_vst1_lane_v
:
12194 case NEON::BI__builtin_neon_vst1q_lane_v
:
12195 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
12196 Ops
[1] = Builder
.CreateExtractElement(Ops
[1], Ops
[2]);
12197 Ty
= llvm::PointerType::getUnqual(Ops
[1]->getType());
12198 return Builder
.CreateAlignedStore(Ops
[1], Builder
.CreateBitCast(Ops
[0], Ty
),
12199 PtrOp0
.getAlignment());
12200 case NEON::BI__builtin_neon_vld2_v
:
12201 case NEON::BI__builtin_neon_vld2q_v
: {
12202 llvm::Type
*PTy
= llvm::PointerType::getUnqual(VTy
);
12203 Ops
[1] = Builder
.CreateBitCast(Ops
[1], PTy
);
12204 llvm::Type
*Tys
[2] = { VTy
, PTy
};
12205 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_neon_ld2
, Tys
);
12206 Ops
[1] = Builder
.CreateCall(F
, Ops
[1], "vld2");
12207 Ops
[0] = Builder
.CreateBitCast(Ops
[0],
12208 llvm::PointerType::getUnqual(Ops
[1]->getType()));
12209 return Builder
.CreateDefaultAlignedStore(Ops
[1], Ops
[0]);
12211 case NEON::BI__builtin_neon_vld3_v
:
12212 case NEON::BI__builtin_neon_vld3q_v
: {
12213 llvm::Type
*PTy
= llvm::PointerType::getUnqual(VTy
);
12214 Ops
[1] = Builder
.CreateBitCast(Ops
[1], PTy
);
12215 llvm::Type
*Tys
[2] = { VTy
, PTy
};
12216 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_neon_ld3
, Tys
);
12217 Ops
[1] = Builder
.CreateCall(F
, Ops
[1], "vld3");
12218 Ops
[0] = Builder
.CreateBitCast(Ops
[0],
12219 llvm::PointerType::getUnqual(Ops
[1]->getType()));
12220 return Builder
.CreateDefaultAlignedStore(Ops
[1], Ops
[0]);
12222 case NEON::BI__builtin_neon_vld4_v
:
12223 case NEON::BI__builtin_neon_vld4q_v
: {
12224 llvm::Type
*PTy
= llvm::PointerType::getUnqual(VTy
);
12225 Ops
[1] = Builder
.CreateBitCast(Ops
[1], PTy
);
12226 llvm::Type
*Tys
[2] = { VTy
, PTy
};
12227 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_neon_ld4
, Tys
);
12228 Ops
[1] = Builder
.CreateCall(F
, Ops
[1], "vld4");
12229 Ops
[0] = Builder
.CreateBitCast(Ops
[0],
12230 llvm::PointerType::getUnqual(Ops
[1]->getType()));
12231 return Builder
.CreateDefaultAlignedStore(Ops
[1], Ops
[0]);
12233 case NEON::BI__builtin_neon_vld2_dup_v
:
12234 case NEON::BI__builtin_neon_vld2q_dup_v
: {
12236 llvm::PointerType::getUnqual(VTy
->getElementType());
12237 Ops
[1] = Builder
.CreateBitCast(Ops
[1], PTy
);
12238 llvm::Type
*Tys
[2] = { VTy
, PTy
};
12239 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_neon_ld2r
, Tys
);
12240 Ops
[1] = Builder
.CreateCall(F
, Ops
[1], "vld2");
12241 Ops
[0] = Builder
.CreateBitCast(Ops
[0],
12242 llvm::PointerType::getUnqual(Ops
[1]->getType()));
12243 return Builder
.CreateDefaultAlignedStore(Ops
[1], Ops
[0]);
12245 case NEON::BI__builtin_neon_vld3_dup_v
:
12246 case NEON::BI__builtin_neon_vld3q_dup_v
: {
12248 llvm::PointerType::getUnqual(VTy
->getElementType());
12249 Ops
[1] = Builder
.CreateBitCast(Ops
[1], PTy
);
12250 llvm::Type
*Tys
[2] = { VTy
, PTy
};
12251 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_neon_ld3r
, Tys
);
12252 Ops
[1] = Builder
.CreateCall(F
, Ops
[1], "vld3");
12253 Ops
[0] = Builder
.CreateBitCast(Ops
[0],
12254 llvm::PointerType::getUnqual(Ops
[1]->getType()));
12255 return Builder
.CreateDefaultAlignedStore(Ops
[1], Ops
[0]);
12257 case NEON::BI__builtin_neon_vld4_dup_v
:
12258 case NEON::BI__builtin_neon_vld4q_dup_v
: {
12260 llvm::PointerType::getUnqual(VTy
->getElementType());
12261 Ops
[1] = Builder
.CreateBitCast(Ops
[1], PTy
);
12262 llvm::Type
*Tys
[2] = { VTy
, PTy
};
12263 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_neon_ld4r
, Tys
);
12264 Ops
[1] = Builder
.CreateCall(F
, Ops
[1], "vld4");
12265 Ops
[0] = Builder
.CreateBitCast(Ops
[0],
12266 llvm::PointerType::getUnqual(Ops
[1]->getType()));
12267 return Builder
.CreateDefaultAlignedStore(Ops
[1], Ops
[0]);
12269 case NEON::BI__builtin_neon_vld2_lane_v
:
12270 case NEON::BI__builtin_neon_vld2q_lane_v
: {
12271 llvm::Type
*Tys
[2] = { VTy
, Ops
[1]->getType() };
12272 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_neon_ld2lane
, Tys
);
12273 std::rotate(Ops
.begin() + 1, Ops
.begin() + 2, Ops
.end());
12274 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
12275 Ops
[2] = Builder
.CreateBitCast(Ops
[2], Ty
);
12276 Ops
[3] = Builder
.CreateZExt(Ops
[3], Int64Ty
);
12277 Ops
[1] = Builder
.CreateCall(F
, ArrayRef(Ops
).slice(1), "vld2_lane");
12278 Ty
= llvm::PointerType::getUnqual(Ops
[1]->getType());
12279 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
12280 return Builder
.CreateDefaultAlignedStore(Ops
[1], Ops
[0]);
12282 case NEON::BI__builtin_neon_vld3_lane_v
:
12283 case NEON::BI__builtin_neon_vld3q_lane_v
: {
12284 llvm::Type
*Tys
[2] = { VTy
, Ops
[1]->getType() };
12285 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_neon_ld3lane
, Tys
);
12286 std::rotate(Ops
.begin() + 1, Ops
.begin() + 2, Ops
.end());
12287 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
12288 Ops
[2] = Builder
.CreateBitCast(Ops
[2], Ty
);
12289 Ops
[3] = Builder
.CreateBitCast(Ops
[3], Ty
);
12290 Ops
[4] = Builder
.CreateZExt(Ops
[4], Int64Ty
);
12291 Ops
[1] = Builder
.CreateCall(F
, ArrayRef(Ops
).slice(1), "vld3_lane");
12292 Ty
= llvm::PointerType::getUnqual(Ops
[1]->getType());
12293 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
12294 return Builder
.CreateDefaultAlignedStore(Ops
[1], Ops
[0]);
12296 case NEON::BI__builtin_neon_vld4_lane_v
:
12297 case NEON::BI__builtin_neon_vld4q_lane_v
: {
12298 llvm::Type
*Tys
[2] = { VTy
, Ops
[1]->getType() };
12299 Function
*F
= CGM
.getIntrinsic(Intrinsic::aarch64_neon_ld4lane
, Tys
);
12300 std::rotate(Ops
.begin() + 1, Ops
.begin() + 2, Ops
.end());
12301 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
12302 Ops
[2] = Builder
.CreateBitCast(Ops
[2], Ty
);
12303 Ops
[3] = Builder
.CreateBitCast(Ops
[3], Ty
);
12304 Ops
[4] = Builder
.CreateBitCast(Ops
[4], Ty
);
12305 Ops
[5] = Builder
.CreateZExt(Ops
[5], Int64Ty
);
12306 Ops
[1] = Builder
.CreateCall(F
, ArrayRef(Ops
).slice(1), "vld4_lane");
12307 Ty
= llvm::PointerType::getUnqual(Ops
[1]->getType());
12308 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Ty
);
12309 return Builder
.CreateDefaultAlignedStore(Ops
[1], Ops
[0]);
12311 case NEON::BI__builtin_neon_vst2_v
:
12312 case NEON::BI__builtin_neon_vst2q_v
: {
12313 std::rotate(Ops
.begin(), Ops
.begin() + 1, Ops
.end());
12314 llvm::Type
*Tys
[2] = { VTy
, Ops
[2]->getType() };
12315 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_st2
, Tys
),
12318 case NEON::BI__builtin_neon_vst2_lane_v
:
12319 case NEON::BI__builtin_neon_vst2q_lane_v
: {
12320 std::rotate(Ops
.begin(), Ops
.begin() + 1, Ops
.end());
12321 Ops
[2] = Builder
.CreateZExt(Ops
[2], Int64Ty
);
12322 llvm::Type
*Tys
[2] = { VTy
, Ops
[3]->getType() };
12323 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_st2lane
, Tys
),
12326 case NEON::BI__builtin_neon_vst3_v
:
12327 case NEON::BI__builtin_neon_vst3q_v
: {
12328 std::rotate(Ops
.begin(), Ops
.begin() + 1, Ops
.end());
12329 llvm::Type
*Tys
[2] = { VTy
, Ops
[3]->getType() };
12330 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_st3
, Tys
),
12333 case NEON::BI__builtin_neon_vst3_lane_v
:
12334 case NEON::BI__builtin_neon_vst3q_lane_v
: {
12335 std::rotate(Ops
.begin(), Ops
.begin() + 1, Ops
.end());
12336 Ops
[3] = Builder
.CreateZExt(Ops
[3], Int64Ty
);
12337 llvm::Type
*Tys
[2] = { VTy
, Ops
[4]->getType() };
12338 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_st3lane
, Tys
),
12341 case NEON::BI__builtin_neon_vst4_v
:
12342 case NEON::BI__builtin_neon_vst4q_v
: {
12343 std::rotate(Ops
.begin(), Ops
.begin() + 1, Ops
.end());
12344 llvm::Type
*Tys
[2] = { VTy
, Ops
[4]->getType() };
12345 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_st4
, Tys
),
12348 case NEON::BI__builtin_neon_vst4_lane_v
:
12349 case NEON::BI__builtin_neon_vst4q_lane_v
: {
12350 std::rotate(Ops
.begin(), Ops
.begin() + 1, Ops
.end());
12351 Ops
[4] = Builder
.CreateZExt(Ops
[4], Int64Ty
);
12352 llvm::Type
*Tys
[2] = { VTy
, Ops
[5]->getType() };
12353 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_st4lane
, Tys
),
12356 case NEON::BI__builtin_neon_vtrn_v
:
12357 case NEON::BI__builtin_neon_vtrnq_v
: {
12358 Ops
[0] = Builder
.CreateBitCast(Ops
[0], llvm::PointerType::getUnqual(Ty
));
12359 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
12360 Ops
[2] = Builder
.CreateBitCast(Ops
[2], Ty
);
12361 Value
*SV
= nullptr;
12363 for (unsigned vi
= 0; vi
!= 2; ++vi
) {
12364 SmallVector
<int, 16> Indices
;
12365 for (unsigned i
= 0, e
= VTy
->getNumElements(); i
!= e
; i
+= 2) {
12366 Indices
.push_back(i
+vi
);
12367 Indices
.push_back(i
+e
+vi
);
12369 Value
*Addr
= Builder
.CreateConstInBoundsGEP1_32(Ty
, Ops
[0], vi
);
12370 SV
= Builder
.CreateShuffleVector(Ops
[1], Ops
[2], Indices
, "vtrn");
12371 SV
= Builder
.CreateDefaultAlignedStore(SV
, Addr
);
12375 case NEON::BI__builtin_neon_vuzp_v
:
12376 case NEON::BI__builtin_neon_vuzpq_v
: {
12377 Ops
[0] = Builder
.CreateBitCast(Ops
[0], llvm::PointerType::getUnqual(Ty
));
12378 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
12379 Ops
[2] = Builder
.CreateBitCast(Ops
[2], Ty
);
12380 Value
*SV
= nullptr;
12382 for (unsigned vi
= 0; vi
!= 2; ++vi
) {
12383 SmallVector
<int, 16> Indices
;
12384 for (unsigned i
= 0, e
= VTy
->getNumElements(); i
!= e
; ++i
)
12385 Indices
.push_back(2*i
+vi
);
12387 Value
*Addr
= Builder
.CreateConstInBoundsGEP1_32(Ty
, Ops
[0], vi
);
12388 SV
= Builder
.CreateShuffleVector(Ops
[1], Ops
[2], Indices
, "vuzp");
12389 SV
= Builder
.CreateDefaultAlignedStore(SV
, Addr
);
12393 case NEON::BI__builtin_neon_vzip_v
:
12394 case NEON::BI__builtin_neon_vzipq_v
: {
12395 Ops
[0] = Builder
.CreateBitCast(Ops
[0], llvm::PointerType::getUnqual(Ty
));
12396 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Ty
);
12397 Ops
[2] = Builder
.CreateBitCast(Ops
[2], Ty
);
12398 Value
*SV
= nullptr;
12400 for (unsigned vi
= 0; vi
!= 2; ++vi
) {
12401 SmallVector
<int, 16> Indices
;
12402 for (unsigned i
= 0, e
= VTy
->getNumElements(); i
!= e
; i
+= 2) {
12403 Indices
.push_back((i
+ vi
*e
) >> 1);
12404 Indices
.push_back(((i
+ vi
*e
) >> 1)+e
);
12406 Value
*Addr
= Builder
.CreateConstInBoundsGEP1_32(Ty
, Ops
[0], vi
);
12407 SV
= Builder
.CreateShuffleVector(Ops
[1], Ops
[2], Indices
, "vzip");
12408 SV
= Builder
.CreateDefaultAlignedStore(SV
, Addr
);
12412 case NEON::BI__builtin_neon_vqtbl1q_v
: {
12413 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_tbl1
, Ty
),
12416 case NEON::BI__builtin_neon_vqtbl2q_v
: {
12417 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_tbl2
, Ty
),
12420 case NEON::BI__builtin_neon_vqtbl3q_v
: {
12421 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_tbl3
, Ty
),
12424 case NEON::BI__builtin_neon_vqtbl4q_v
: {
12425 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_tbl4
, Ty
),
12428 case NEON::BI__builtin_neon_vqtbx1q_v
: {
12429 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_tbx1
, Ty
),
12432 case NEON::BI__builtin_neon_vqtbx2q_v
: {
12433 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_tbx2
, Ty
),
12436 case NEON::BI__builtin_neon_vqtbx3q_v
: {
12437 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_tbx3
, Ty
),
12440 case NEON::BI__builtin_neon_vqtbx4q_v
: {
12441 return EmitNeonCall(CGM
.getIntrinsic(Intrinsic::aarch64_neon_tbx4
, Ty
),
12444 case NEON::BI__builtin_neon_vsqadd_v
:
12445 case NEON::BI__builtin_neon_vsqaddq_v
: {
12446 Int
= Intrinsic::aarch64_neon_usqadd
;
12447 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vsqadd");
12449 case NEON::BI__builtin_neon_vuqadd_v
:
12450 case NEON::BI__builtin_neon_vuqaddq_v
: {
12451 Int
= Intrinsic::aarch64_neon_suqadd
;
12452 return EmitNeonCall(CGM
.getIntrinsic(Int
, Ty
), Ops
, "vuqadd");
12457 Value
*CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID
,
12458 const CallExpr
*E
) {
12459 assert((BuiltinID
== BPF::BI__builtin_preserve_field_info
||
12460 BuiltinID
== BPF::BI__builtin_btf_type_id
||
12461 BuiltinID
== BPF::BI__builtin_preserve_type_info
||
12462 BuiltinID
== BPF::BI__builtin_preserve_enum_value
) &&
12463 "unexpected BPF builtin");
12465 // A sequence number, injected into IR builtin functions, to
12466 // prevent CSE given the only difference of the function
12467 // may just be the debuginfo metadata.
12468 static uint32_t BuiltinSeqNum
;
12470 switch (BuiltinID
) {
12472 llvm_unreachable("Unexpected BPF builtin");
12473 case BPF::BI__builtin_preserve_field_info
: {
12474 const Expr
*Arg
= E
->getArg(0);
12475 bool IsBitField
= Arg
->IgnoreParens()->getObjectKind() == OK_BitField
;
12477 if (!getDebugInfo()) {
12478 CGM
.Error(E
->getExprLoc(),
12479 "using __builtin_preserve_field_info() without -g");
12480 return IsBitField
? EmitLValue(Arg
).getBitFieldPointer()
12481 : EmitLValue(Arg
).getPointer(*this);
12484 // Enable underlying preserve_*_access_index() generation.
12485 bool OldIsInPreservedAIRegion
= IsInPreservedAIRegion
;
12486 IsInPreservedAIRegion
= true;
12487 Value
*FieldAddr
= IsBitField
? EmitLValue(Arg
).getBitFieldPointer()
12488 : EmitLValue(Arg
).getPointer(*this);
12489 IsInPreservedAIRegion
= OldIsInPreservedAIRegion
;
12491 ConstantInt
*C
= cast
<ConstantInt
>(EmitScalarExpr(E
->getArg(1)));
12492 Value
*InfoKind
= ConstantInt::get(Int64Ty
, C
->getSExtValue());
12494 // Built the IR for the preserve_field_info intrinsic.
12495 llvm::Function
*FnGetFieldInfo
= llvm::Intrinsic::getDeclaration(
12496 &CGM
.getModule(), llvm::Intrinsic::bpf_preserve_field_info
,
12497 {FieldAddr
->getType()});
12498 return Builder
.CreateCall(FnGetFieldInfo
, {FieldAddr
, InfoKind
});
12500 case BPF::BI__builtin_btf_type_id
:
12501 case BPF::BI__builtin_preserve_type_info
: {
12502 if (!getDebugInfo()) {
12503 CGM
.Error(E
->getExprLoc(), "using builtin function without -g");
12507 const Expr
*Arg0
= E
->getArg(0);
12508 llvm::DIType
*DbgInfo
= getDebugInfo()->getOrCreateStandaloneType(
12509 Arg0
->getType(), Arg0
->getExprLoc());
12511 ConstantInt
*Flag
= cast
<ConstantInt
>(EmitScalarExpr(E
->getArg(1)));
12512 Value
*FlagValue
= ConstantInt::get(Int64Ty
, Flag
->getSExtValue());
12513 Value
*SeqNumVal
= ConstantInt::get(Int32Ty
, BuiltinSeqNum
++);
12515 llvm::Function
*FnDecl
;
12516 if (BuiltinID
== BPF::BI__builtin_btf_type_id
)
12517 FnDecl
= llvm::Intrinsic::getDeclaration(
12518 &CGM
.getModule(), llvm::Intrinsic::bpf_btf_type_id
, {});
12520 FnDecl
= llvm::Intrinsic::getDeclaration(
12521 &CGM
.getModule(), llvm::Intrinsic::bpf_preserve_type_info
, {});
12522 CallInst
*Fn
= Builder
.CreateCall(FnDecl
, {SeqNumVal
, FlagValue
});
12523 Fn
->setMetadata(LLVMContext::MD_preserve_access_index
, DbgInfo
);
12526 case BPF::BI__builtin_preserve_enum_value
: {
12527 if (!getDebugInfo()) {
12528 CGM
.Error(E
->getExprLoc(), "using builtin function without -g");
12532 const Expr
*Arg0
= E
->getArg(0);
12533 llvm::DIType
*DbgInfo
= getDebugInfo()->getOrCreateStandaloneType(
12534 Arg0
->getType(), Arg0
->getExprLoc());
12537 const auto *UO
= cast
<UnaryOperator
>(Arg0
->IgnoreParens());
12538 const auto *CE
= cast
<CStyleCastExpr
>(UO
->getSubExpr());
12539 const auto *DR
= cast
<DeclRefExpr
>(CE
->getSubExpr());
12540 const auto *Enumerator
= cast
<EnumConstantDecl
>(DR
->getDecl());
12542 auto &InitVal
= Enumerator
->getInitVal();
12543 std::string InitValStr
;
12544 if (InitVal
.isNegative() || InitVal
> uint64_t(INT64_MAX
))
12545 InitValStr
= std::to_string(InitVal
.getSExtValue());
12547 InitValStr
= std::to_string(InitVal
.getZExtValue());
12548 std::string EnumStr
= Enumerator
->getNameAsString() + ":" + InitValStr
;
12549 Value
*EnumStrVal
= Builder
.CreateGlobalStringPtr(EnumStr
);
12551 ConstantInt
*Flag
= cast
<ConstantInt
>(EmitScalarExpr(E
->getArg(1)));
12552 Value
*FlagValue
= ConstantInt::get(Int64Ty
, Flag
->getSExtValue());
12553 Value
*SeqNumVal
= ConstantInt::get(Int32Ty
, BuiltinSeqNum
++);
12555 llvm::Function
*IntrinsicFn
= llvm::Intrinsic::getDeclaration(
12556 &CGM
.getModule(), llvm::Intrinsic::bpf_preserve_enum_value
, {});
12558 Builder
.CreateCall(IntrinsicFn
, {SeqNumVal
, EnumStrVal
, FlagValue
});
12559 Fn
->setMetadata(LLVMContext::MD_preserve_access_index
, DbgInfo
);
12565 llvm::Value
*CodeGenFunction::
12566 BuildVector(ArrayRef
<llvm::Value
*> Ops
) {
12567 assert((Ops
.size() & (Ops
.size() - 1)) == 0 &&
12568 "Not a power-of-two sized vector!");
12569 bool AllConstants
= true;
12570 for (unsigned i
= 0, e
= Ops
.size(); i
!= e
&& AllConstants
; ++i
)
12571 AllConstants
&= isa
<Constant
>(Ops
[i
]);
12573 // If this is a constant vector, create a ConstantVector.
12574 if (AllConstants
) {
12575 SmallVector
<llvm::Constant
*, 16> CstOps
;
12576 for (unsigned i
= 0, e
= Ops
.size(); i
!= e
; ++i
)
12577 CstOps
.push_back(cast
<Constant
>(Ops
[i
]));
12578 return llvm::ConstantVector::get(CstOps
);
12581 // Otherwise, insertelement the values to build the vector.
12582 Value
*Result
= llvm::PoisonValue::get(
12583 llvm::FixedVectorType::get(Ops
[0]->getType(), Ops
.size()));
12585 for (unsigned i
= 0, e
= Ops
.size(); i
!= e
; ++i
)
12586 Result
= Builder
.CreateInsertElement(Result
, Ops
[i
], Builder
.getInt64(i
));
12591 // Convert the mask from an integer type to a vector of i1.
12592 static Value
*getMaskVecValue(CodeGenFunction
&CGF
, Value
*Mask
,
12593 unsigned NumElts
) {
12595 auto *MaskTy
= llvm::FixedVectorType::get(
12596 CGF
.Builder
.getInt1Ty(),
12597 cast
<IntegerType
>(Mask
->getType())->getBitWidth());
12598 Value
*MaskVec
= CGF
.Builder
.CreateBitCast(Mask
, MaskTy
);
12600 // If we have less than 8 elements, then the starting mask was an i8 and
12601 // we need to extract down to the right number of elements.
12604 for (unsigned i
= 0; i
!= NumElts
; ++i
)
12606 MaskVec
= CGF
.Builder
.CreateShuffleVector(
12607 MaskVec
, MaskVec
, ArrayRef(Indices
, NumElts
), "extract");
12612 static Value
*EmitX86MaskedStore(CodeGenFunction
&CGF
, ArrayRef
<Value
*> Ops
,
12614 // Cast the pointer to right type.
12615 Value
*Ptr
= CGF
.Builder
.CreateBitCast(Ops
[0],
12616 llvm::PointerType::getUnqual(Ops
[1]->getType()));
12618 Value
*MaskVec
= getMaskVecValue(
12620 cast
<llvm::FixedVectorType
>(Ops
[1]->getType())->getNumElements());
12622 return CGF
.Builder
.CreateMaskedStore(Ops
[1], Ptr
, Alignment
, MaskVec
);
12625 static Value
*EmitX86MaskedLoad(CodeGenFunction
&CGF
, ArrayRef
<Value
*> Ops
,
12627 // Cast the pointer to right type.
12628 llvm::Type
*Ty
= Ops
[1]->getType();
12630 CGF
.Builder
.CreateBitCast(Ops
[0], llvm::PointerType::getUnqual(Ty
));
12632 Value
*MaskVec
= getMaskVecValue(
12633 CGF
, Ops
[2], cast
<llvm::FixedVectorType
>(Ty
)->getNumElements());
12635 return CGF
.Builder
.CreateMaskedLoad(Ty
, Ptr
, Alignment
, MaskVec
, Ops
[1]);
12638 static Value
*EmitX86ExpandLoad(CodeGenFunction
&CGF
,
12639 ArrayRef
<Value
*> Ops
) {
12640 auto *ResultTy
= cast
<llvm::VectorType
>(Ops
[1]->getType());
12641 llvm::Type
*PtrTy
= ResultTy
->getElementType();
12643 // Cast the pointer to element type.
12644 Value
*Ptr
= CGF
.Builder
.CreateBitCast(Ops
[0],
12645 llvm::PointerType::getUnqual(PtrTy
));
12647 Value
*MaskVec
= getMaskVecValue(
12648 CGF
, Ops
[2], cast
<FixedVectorType
>(ResultTy
)->getNumElements());
12650 llvm::Function
*F
= CGF
.CGM
.getIntrinsic(Intrinsic::masked_expandload
,
12652 return CGF
.Builder
.CreateCall(F
, { Ptr
, MaskVec
, Ops
[1] });
12655 static Value
*EmitX86CompressExpand(CodeGenFunction
&CGF
,
12656 ArrayRef
<Value
*> Ops
,
12658 auto *ResultTy
= cast
<llvm::FixedVectorType
>(Ops
[1]->getType());
12660 Value
*MaskVec
= getMaskVecValue(CGF
, Ops
[2], ResultTy
->getNumElements());
12662 Intrinsic::ID IID
= IsCompress
? Intrinsic::x86_avx512_mask_compress
12663 : Intrinsic::x86_avx512_mask_expand
;
12664 llvm::Function
*F
= CGF
.CGM
.getIntrinsic(IID
, ResultTy
);
12665 return CGF
.Builder
.CreateCall(F
, { Ops
[0], Ops
[1], MaskVec
});
12668 static Value
*EmitX86CompressStore(CodeGenFunction
&CGF
,
12669 ArrayRef
<Value
*> Ops
) {
12670 auto *ResultTy
= cast
<llvm::FixedVectorType
>(Ops
[1]->getType());
12671 llvm::Type
*PtrTy
= ResultTy
->getElementType();
12673 // Cast the pointer to element type.
12674 Value
*Ptr
= CGF
.Builder
.CreateBitCast(Ops
[0],
12675 llvm::PointerType::getUnqual(PtrTy
));
12677 Value
*MaskVec
= getMaskVecValue(CGF
, Ops
[2], ResultTy
->getNumElements());
12679 llvm::Function
*F
= CGF
.CGM
.getIntrinsic(Intrinsic::masked_compressstore
,
12681 return CGF
.Builder
.CreateCall(F
, { Ops
[1], Ptr
, MaskVec
});
12684 static Value
*EmitX86MaskLogic(CodeGenFunction
&CGF
, Instruction::BinaryOps Opc
,
12685 ArrayRef
<Value
*> Ops
,
12686 bool InvertLHS
= false) {
12687 unsigned NumElts
= Ops
[0]->getType()->getIntegerBitWidth();
12688 Value
*LHS
= getMaskVecValue(CGF
, Ops
[0], NumElts
);
12689 Value
*RHS
= getMaskVecValue(CGF
, Ops
[1], NumElts
);
12692 LHS
= CGF
.Builder
.CreateNot(LHS
);
12694 return CGF
.Builder
.CreateBitCast(CGF
.Builder
.CreateBinOp(Opc
, LHS
, RHS
),
12695 Ops
[0]->getType());
12698 static Value
*EmitX86FunnelShift(CodeGenFunction
&CGF
, Value
*Op0
, Value
*Op1
,
12699 Value
*Amt
, bool IsRight
) {
12700 llvm::Type
*Ty
= Op0
->getType();
12702 // Amount may be scalar immediate, in which case create a splat vector.
12703 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
12704 // we only care about the lowest log2 bits anyway.
12705 if (Amt
->getType() != Ty
) {
12706 unsigned NumElts
= cast
<llvm::FixedVectorType
>(Ty
)->getNumElements();
12707 Amt
= CGF
.Builder
.CreateIntCast(Amt
, Ty
->getScalarType(), false);
12708 Amt
= CGF
.Builder
.CreateVectorSplat(NumElts
, Amt
);
12711 unsigned IID
= IsRight
? Intrinsic::fshr
: Intrinsic::fshl
;
12712 Function
*F
= CGF
.CGM
.getIntrinsic(IID
, Ty
);
12713 return CGF
.Builder
.CreateCall(F
, {Op0
, Op1
, Amt
});
12716 static Value
*EmitX86vpcom(CodeGenFunction
&CGF
, ArrayRef
<Value
*> Ops
,
12718 Value
*Op0
= Ops
[0];
12719 Value
*Op1
= Ops
[1];
12720 llvm::Type
*Ty
= Op0
->getType();
12721 uint64_t Imm
= cast
<llvm::ConstantInt
>(Ops
[2])->getZExtValue() & 0x7;
12723 CmpInst::Predicate Pred
;
12726 Pred
= IsSigned
? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT
;
12729 Pred
= IsSigned
? ICmpInst::ICMP_SLE
: ICmpInst::ICMP_ULE
;
12732 Pred
= IsSigned
? ICmpInst::ICMP_SGT
: ICmpInst::ICMP_UGT
;
12735 Pred
= IsSigned
? ICmpInst::ICMP_SGE
: ICmpInst::ICMP_UGE
;
12738 Pred
= ICmpInst::ICMP_EQ
;
12741 Pred
= ICmpInst::ICMP_NE
;
12744 return llvm::Constant::getNullValue(Ty
); // FALSE
12746 return llvm::Constant::getAllOnesValue(Ty
); // TRUE
12748 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
12751 Value
*Cmp
= CGF
.Builder
.CreateICmp(Pred
, Op0
, Op1
);
12752 Value
*Res
= CGF
.Builder
.CreateSExt(Cmp
, Ty
);
12756 static Value
*EmitX86Select(CodeGenFunction
&CGF
,
12757 Value
*Mask
, Value
*Op0
, Value
*Op1
) {
12759 // If the mask is all ones just return first argument.
12760 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
12761 if (C
->isAllOnesValue())
12764 Mask
= getMaskVecValue(
12765 CGF
, Mask
, cast
<llvm::FixedVectorType
>(Op0
->getType())->getNumElements());
12767 return CGF
.Builder
.CreateSelect(Mask
, Op0
, Op1
);
12770 static Value
*EmitX86ScalarSelect(CodeGenFunction
&CGF
,
12771 Value
*Mask
, Value
*Op0
, Value
*Op1
) {
12772 // If the mask is all ones just return first argument.
12773 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
12774 if (C
->isAllOnesValue())
12777 auto *MaskTy
= llvm::FixedVectorType::get(
12778 CGF
.Builder
.getInt1Ty(), Mask
->getType()->getIntegerBitWidth());
12779 Mask
= CGF
.Builder
.CreateBitCast(Mask
, MaskTy
);
12780 Mask
= CGF
.Builder
.CreateExtractElement(Mask
, (uint64_t)0);
12781 return CGF
.Builder
.CreateSelect(Mask
, Op0
, Op1
);
12784 static Value
*EmitX86MaskedCompareResult(CodeGenFunction
&CGF
, Value
*Cmp
,
12785 unsigned NumElts
, Value
*MaskIn
) {
12787 const auto *C
= dyn_cast
<Constant
>(MaskIn
);
12788 if (!C
|| !C
->isAllOnesValue())
12789 Cmp
= CGF
.Builder
.CreateAnd(Cmp
, getMaskVecValue(CGF
, MaskIn
, NumElts
));
12794 for (unsigned i
= 0; i
!= NumElts
; ++i
)
12796 for (unsigned i
= NumElts
; i
!= 8; ++i
)
12797 Indices
[i
] = i
% NumElts
+ NumElts
;
12798 Cmp
= CGF
.Builder
.CreateShuffleVector(
12799 Cmp
, llvm::Constant::getNullValue(Cmp
->getType()), Indices
);
12802 return CGF
.Builder
.CreateBitCast(Cmp
,
12803 IntegerType::get(CGF
.getLLVMContext(),
12804 std::max(NumElts
, 8U)));
12807 static Value
*EmitX86MaskedCompare(CodeGenFunction
&CGF
, unsigned CC
,
12808 bool Signed
, ArrayRef
<Value
*> Ops
) {
12809 assert((Ops
.size() == 2 || Ops
.size() == 4) &&
12810 "Unexpected number of arguments");
12812 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
12816 Cmp
= Constant::getNullValue(
12817 llvm::FixedVectorType::get(CGF
.Builder
.getInt1Ty(), NumElts
));
12818 } else if (CC
== 7) {
12819 Cmp
= Constant::getAllOnesValue(
12820 llvm::FixedVectorType::get(CGF
.Builder
.getInt1Ty(), NumElts
));
12822 ICmpInst::Predicate Pred
;
12824 default: llvm_unreachable("Unknown condition code");
12825 case 0: Pred
= ICmpInst::ICMP_EQ
; break;
12826 case 1: Pred
= Signed
? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT
; break;
12827 case 2: Pred
= Signed
? ICmpInst::ICMP_SLE
: ICmpInst::ICMP_ULE
; break;
12828 case 4: Pred
= ICmpInst::ICMP_NE
; break;
12829 case 5: Pred
= Signed
? ICmpInst::ICMP_SGE
: ICmpInst::ICMP_UGE
; break;
12830 case 6: Pred
= Signed
? ICmpInst::ICMP_SGT
: ICmpInst::ICMP_UGT
; break;
12832 Cmp
= CGF
.Builder
.CreateICmp(Pred
, Ops
[0], Ops
[1]);
12835 Value
*MaskIn
= nullptr;
12836 if (Ops
.size() == 4)
12839 return EmitX86MaskedCompareResult(CGF
, Cmp
, NumElts
, MaskIn
);
12842 static Value
*EmitX86ConvertToMask(CodeGenFunction
&CGF
, Value
*In
) {
12843 Value
*Zero
= Constant::getNullValue(In
->getType());
12844 return EmitX86MaskedCompare(CGF
, 1, true, { In
, Zero
});
12847 static Value
*EmitX86ConvertIntToFp(CodeGenFunction
&CGF
, const CallExpr
*E
,
12848 ArrayRef
<Value
*> Ops
, bool IsSigned
) {
12849 unsigned Rnd
= cast
<llvm::ConstantInt
>(Ops
[3])->getZExtValue();
12850 llvm::Type
*Ty
= Ops
[1]->getType();
12854 Intrinsic::ID IID
= IsSigned
? Intrinsic::x86_avx512_sitofp_round
12855 : Intrinsic::x86_avx512_uitofp_round
;
12856 Function
*F
= CGF
.CGM
.getIntrinsic(IID
, { Ty
, Ops
[0]->getType() });
12857 Res
= CGF
.Builder
.CreateCall(F
, { Ops
[0], Ops
[3] });
12859 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(CGF
, E
);
12860 Res
= IsSigned
? CGF
.Builder
.CreateSIToFP(Ops
[0], Ty
)
12861 : CGF
.Builder
.CreateUIToFP(Ops
[0], Ty
);
12864 return EmitX86Select(CGF
, Ops
[2], Res
, Ops
[1]);
12867 // Lowers X86 FMA intrinsics to IR.
12868 static Value
*EmitX86FMAExpr(CodeGenFunction
&CGF
, const CallExpr
*E
,
12869 ArrayRef
<Value
*> Ops
, unsigned BuiltinID
,
12872 bool Subtract
= false;
12873 Intrinsic::ID IID
= Intrinsic::not_intrinsic
;
12874 switch (BuiltinID
) {
12876 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3
:
12879 case clang::X86::BI__builtin_ia32_vfmaddph512_mask
:
12880 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz
:
12881 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3
:
12882 IID
= llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512
;
12884 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3
:
12887 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask
:
12888 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz
:
12889 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3
:
12890 IID
= llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512
;
12892 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3
:
12895 case clang::X86::BI__builtin_ia32_vfmaddps512_mask
:
12896 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz
:
12897 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3
:
12898 IID
= llvm::Intrinsic::x86_avx512_vfmadd_ps_512
; break;
12899 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3
:
12902 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask
:
12903 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz
:
12904 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3
:
12905 IID
= llvm::Intrinsic::x86_avx512_vfmadd_pd_512
; break;
12906 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3
:
12909 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask
:
12910 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz
:
12911 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3
:
12912 IID
= llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512
;
12914 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3
:
12917 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask
:
12918 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz
:
12919 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3
:
12920 IID
= llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512
;
12929 C
= CGF
.Builder
.CreateFNeg(C
);
12933 // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
12934 if (IID
!= Intrinsic::not_intrinsic
&&
12935 (cast
<llvm::ConstantInt
>(Ops
.back())->getZExtValue() != (uint64_t)4 ||
12937 Function
*Intr
= CGF
.CGM
.getIntrinsic(IID
);
12938 Res
= CGF
.Builder
.CreateCall(Intr
, {A
, B
, C
, Ops
.back() });
12940 llvm::Type
*Ty
= A
->getType();
12942 if (CGF
.Builder
.getIsFPConstrained()) {
12943 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(CGF
, E
);
12944 FMA
= CGF
.CGM
.getIntrinsic(Intrinsic::experimental_constrained_fma
, Ty
);
12945 Res
= CGF
.Builder
.CreateConstrainedFPCall(FMA
, {A
, B
, C
});
12947 FMA
= CGF
.CGM
.getIntrinsic(Intrinsic::fma
, Ty
);
12948 Res
= CGF
.Builder
.CreateCall(FMA
, {A
, B
, C
});
12952 // Handle any required masking.
12953 Value
*MaskFalseVal
= nullptr;
12954 switch (BuiltinID
) {
12955 case clang::X86::BI__builtin_ia32_vfmaddph512_mask
:
12956 case clang::X86::BI__builtin_ia32_vfmaddps512_mask
:
12957 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask
:
12958 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask
:
12959 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask
:
12960 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask
:
12961 MaskFalseVal
= Ops
[0];
12963 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz
:
12964 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz
:
12965 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz
:
12966 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz
:
12967 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz
:
12968 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz
:
12969 MaskFalseVal
= Constant::getNullValue(Ops
[0]->getType());
12971 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3
:
12972 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3
:
12973 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3
:
12974 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3
:
12975 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3
:
12976 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3
:
12977 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3
:
12978 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3
:
12979 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3
:
12980 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3
:
12981 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3
:
12982 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3
:
12983 MaskFalseVal
= Ops
[2];
12988 return EmitX86Select(CGF
, Ops
[3], Res
, MaskFalseVal
);
12993 static Value
*EmitScalarFMAExpr(CodeGenFunction
&CGF
, const CallExpr
*E
,
12994 MutableArrayRef
<Value
*> Ops
, Value
*Upper
,
12995 bool ZeroMask
= false, unsigned PTIdx
= 0,
12996 bool NegAcc
= false) {
12998 if (Ops
.size() > 4)
12999 Rnd
= cast
<llvm::ConstantInt
>(Ops
[4])->getZExtValue();
13002 Ops
[2] = CGF
.Builder
.CreateFNeg(Ops
[2]);
13004 Ops
[0] = CGF
.Builder
.CreateExtractElement(Ops
[0], (uint64_t)0);
13005 Ops
[1] = CGF
.Builder
.CreateExtractElement(Ops
[1], (uint64_t)0);
13006 Ops
[2] = CGF
.Builder
.CreateExtractElement(Ops
[2], (uint64_t)0);
13011 switch (Ops
[0]->getType()->getPrimitiveSizeInBits()) {
13013 IID
= Intrinsic::x86_avx512fp16_vfmadd_f16
;
13016 IID
= Intrinsic::x86_avx512_vfmadd_f32
;
13019 IID
= Intrinsic::x86_avx512_vfmadd_f64
;
13022 llvm_unreachable("Unexpected size");
13024 Res
= CGF
.Builder
.CreateCall(CGF
.CGM
.getIntrinsic(IID
),
13025 {Ops
[0], Ops
[1], Ops
[2], Ops
[4]});
13026 } else if (CGF
.Builder
.getIsFPConstrained()) {
13027 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(CGF
, E
);
13028 Function
*FMA
= CGF
.CGM
.getIntrinsic(
13029 Intrinsic::experimental_constrained_fma
, Ops
[0]->getType());
13030 Res
= CGF
.Builder
.CreateConstrainedFPCall(FMA
, Ops
.slice(0, 3));
13032 Function
*FMA
= CGF
.CGM
.getIntrinsic(Intrinsic::fma
, Ops
[0]->getType());
13033 Res
= CGF
.Builder
.CreateCall(FMA
, Ops
.slice(0, 3));
13035 // If we have more than 3 arguments, we need to do masking.
13036 if (Ops
.size() > 3) {
13037 Value
*PassThru
= ZeroMask
? Constant::getNullValue(Res
->getType())
13040 // If we negated the accumulator and the its the PassThru value we need to
13041 // bypass the negate. Conveniently Upper should be the same thing in this
13043 if (NegAcc
&& PTIdx
== 2)
13044 PassThru
= CGF
.Builder
.CreateExtractElement(Upper
, (uint64_t)0);
13046 Res
= EmitX86ScalarSelect(CGF
, Ops
[3], Res
, PassThru
);
13048 return CGF
.Builder
.CreateInsertElement(Upper
, Res
, (uint64_t)0);
13051 static Value
*EmitX86Muldq(CodeGenFunction
&CGF
, bool IsSigned
,
13052 ArrayRef
<Value
*> Ops
) {
13053 llvm::Type
*Ty
= Ops
[0]->getType();
13054 // Arguments have a vXi32 type so cast to vXi64.
13055 Ty
= llvm::FixedVectorType::get(CGF
.Int64Ty
,
13056 Ty
->getPrimitiveSizeInBits() / 64);
13057 Value
*LHS
= CGF
.Builder
.CreateBitCast(Ops
[0], Ty
);
13058 Value
*RHS
= CGF
.Builder
.CreateBitCast(Ops
[1], Ty
);
13061 // Shift left then arithmetic shift right.
13062 Constant
*ShiftAmt
= ConstantInt::get(Ty
, 32);
13063 LHS
= CGF
.Builder
.CreateShl(LHS
, ShiftAmt
);
13064 LHS
= CGF
.Builder
.CreateAShr(LHS
, ShiftAmt
);
13065 RHS
= CGF
.Builder
.CreateShl(RHS
, ShiftAmt
);
13066 RHS
= CGF
.Builder
.CreateAShr(RHS
, ShiftAmt
);
13068 // Clear the upper bits.
13069 Constant
*Mask
= ConstantInt::get(Ty
, 0xffffffff);
13070 LHS
= CGF
.Builder
.CreateAnd(LHS
, Mask
);
13071 RHS
= CGF
.Builder
.CreateAnd(RHS
, Mask
);
13074 return CGF
.Builder
.CreateMul(LHS
, RHS
);
13077 // Emit a masked pternlog intrinsic. This only exists because the header has to
13078 // use a macro and we aren't able to pass the input argument to a pternlog
13079 // builtin and a select builtin without evaluating it twice.
13080 static Value
*EmitX86Ternlog(CodeGenFunction
&CGF
, bool ZeroMask
,
13081 ArrayRef
<Value
*> Ops
) {
13082 llvm::Type
*Ty
= Ops
[0]->getType();
13084 unsigned VecWidth
= Ty
->getPrimitiveSizeInBits();
13085 unsigned EltWidth
= Ty
->getScalarSizeInBits();
13087 if (VecWidth
== 128 && EltWidth
== 32)
13088 IID
= Intrinsic::x86_avx512_pternlog_d_128
;
13089 else if (VecWidth
== 256 && EltWidth
== 32)
13090 IID
= Intrinsic::x86_avx512_pternlog_d_256
;
13091 else if (VecWidth
== 512 && EltWidth
== 32)
13092 IID
= Intrinsic::x86_avx512_pternlog_d_512
;
13093 else if (VecWidth
== 128 && EltWidth
== 64)
13094 IID
= Intrinsic::x86_avx512_pternlog_q_128
;
13095 else if (VecWidth
== 256 && EltWidth
== 64)
13096 IID
= Intrinsic::x86_avx512_pternlog_q_256
;
13097 else if (VecWidth
== 512 && EltWidth
== 64)
13098 IID
= Intrinsic::x86_avx512_pternlog_q_512
;
13100 llvm_unreachable("Unexpected intrinsic");
13102 Value
*Ternlog
= CGF
.Builder
.CreateCall(CGF
.CGM
.getIntrinsic(IID
),
13104 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(Ty
) : Ops
[0];
13105 return EmitX86Select(CGF
, Ops
[4], Ternlog
, PassThru
);
13108 static Value
*EmitX86SExtMask(CodeGenFunction
&CGF
, Value
*Op
,
13109 llvm::Type
*DstTy
) {
13110 unsigned NumberOfElements
=
13111 cast
<llvm::FixedVectorType
>(DstTy
)->getNumElements();
13112 Value
*Mask
= getMaskVecValue(CGF
, Op
, NumberOfElements
);
13113 return CGF
.Builder
.CreateSExt(Mask
, DstTy
, "vpmovm2");
13116 Value
*CodeGenFunction::EmitX86CpuIs(const CallExpr
*E
) {
13117 const Expr
*CPUExpr
= E
->getArg(0)->IgnoreParenCasts();
13118 StringRef CPUStr
= cast
<clang::StringLiteral
>(CPUExpr
)->getString();
13119 return EmitX86CpuIs(CPUStr
);
13122 // Convert F16 halfs to floats.
13123 static Value
*EmitX86CvtF16ToFloatExpr(CodeGenFunction
&CGF
,
13124 ArrayRef
<Value
*> Ops
,
13125 llvm::Type
*DstTy
) {
13126 assert((Ops
.size() == 1 || Ops
.size() == 3 || Ops
.size() == 4) &&
13127 "Unknown cvtph2ps intrinsic");
13129 // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
13130 if (Ops
.size() == 4 && cast
<llvm::ConstantInt
>(Ops
[3])->getZExtValue() != 4) {
13132 CGF
.CGM
.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512
);
13133 return CGF
.Builder
.CreateCall(F
, {Ops
[0], Ops
[1], Ops
[2], Ops
[3]});
13136 unsigned NumDstElts
= cast
<llvm::FixedVectorType
>(DstTy
)->getNumElements();
13137 Value
*Src
= Ops
[0];
13139 // Extract the subvector.
13141 cast
<llvm::FixedVectorType
>(Src
->getType())->getNumElements()) {
13142 assert(NumDstElts
== 4 && "Unexpected vector size");
13143 Src
= CGF
.Builder
.CreateShuffleVector(Src
, ArrayRef
<int>{0, 1, 2, 3});
13146 // Bitcast from vXi16 to vXf16.
13147 auto *HalfTy
= llvm::FixedVectorType::get(
13148 llvm::Type::getHalfTy(CGF
.getLLVMContext()), NumDstElts
);
13149 Src
= CGF
.Builder
.CreateBitCast(Src
, HalfTy
);
13151 // Perform the fp-extension.
13152 Value
*Res
= CGF
.Builder
.CreateFPExt(Src
, DstTy
, "cvtph2ps");
13154 if (Ops
.size() >= 3)
13155 Res
= EmitX86Select(CGF
, Ops
[2], Res
, Ops
[1]);
13159 Value
*CodeGenFunction::EmitX86CpuIs(StringRef CPUStr
) {
13161 llvm::Type
*Int32Ty
= Builder
.getInt32Ty();
13163 // Matching the struct layout from the compiler-rt/libgcc structure that is
13165 // unsigned int __cpu_vendor;
13166 // unsigned int __cpu_type;
13167 // unsigned int __cpu_subtype;
13168 // unsigned int __cpu_features[1];
13169 llvm::Type
*STy
= llvm::StructType::get(Int32Ty
, Int32Ty
, Int32Ty
,
13170 llvm::ArrayType::get(Int32Ty
, 1));
13172 // Grab the global __cpu_model.
13173 llvm::Constant
*CpuModel
= CGM
.CreateRuntimeVariable(STy
, "__cpu_model");
13174 cast
<llvm::GlobalValue
>(CpuModel
)->setDSOLocal(true);
13176 // Calculate the index needed to access the correct field based on the
13177 // range. Also adjust the expected value.
13180 std::tie(Index
, Value
) = StringSwitch
<std::pair
<unsigned, unsigned>>(CPUStr
)
13181 #define X86_VENDOR(ENUM, STRING) \
13182 .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
13183 #define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \
13184 .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
13185 #define X86_CPU_TYPE(ENUM, STR) \
13186 .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
13187 #define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \
13188 .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
13189 #define X86_CPU_SUBTYPE(ENUM, STR) \
13190 .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
13191 #include "llvm/TargetParser/X86TargetParser.def"
13193 assert(Value
!= 0 && "Invalid CPUStr passed to CpuIs");
13195 // Grab the appropriate field from __cpu_model.
13196 llvm::Value
*Idxs
[] = {ConstantInt::get(Int32Ty
, 0),
13197 ConstantInt::get(Int32Ty
, Index
)};
13198 llvm::Value
*CpuValue
= Builder
.CreateGEP(STy
, CpuModel
, Idxs
);
13199 CpuValue
= Builder
.CreateAlignedLoad(Int32Ty
, CpuValue
,
13200 CharUnits::fromQuantity(4));
13202 // Check the value of the field against the requested value.
13203 return Builder
.CreateICmpEQ(CpuValue
,
13204 llvm::ConstantInt::get(Int32Ty
, Value
));
13207 Value
*CodeGenFunction::EmitX86CpuSupports(const CallExpr
*E
) {
13208 const Expr
*FeatureExpr
= E
->getArg(0)->IgnoreParenCasts();
13209 StringRef FeatureStr
= cast
<StringLiteral
>(FeatureExpr
)->getString();
13210 return EmitX86CpuSupports(FeatureStr
);
13213 Value
*CodeGenFunction::EmitX86CpuSupports(ArrayRef
<StringRef
> FeatureStrs
) {
13214 return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs
));
13217 llvm::Value
*CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask
) {
13218 uint32_t Features1
= Lo_32(FeaturesMask
);
13219 uint32_t Features2
= Hi_32(FeaturesMask
);
13221 Value
*Result
= Builder
.getTrue();
13223 if (Features1
!= 0) {
13224 // Matching the struct layout from the compiler-rt/libgcc structure that is
13226 // unsigned int __cpu_vendor;
13227 // unsigned int __cpu_type;
13228 // unsigned int __cpu_subtype;
13229 // unsigned int __cpu_features[1];
13230 llvm::Type
*STy
= llvm::StructType::get(Int32Ty
, Int32Ty
, Int32Ty
,
13231 llvm::ArrayType::get(Int32Ty
, 1));
13233 // Grab the global __cpu_model.
13234 llvm::Constant
*CpuModel
= CGM
.CreateRuntimeVariable(STy
, "__cpu_model");
13235 cast
<llvm::GlobalValue
>(CpuModel
)->setDSOLocal(true);
13237 // Grab the first (0th) element from the field __cpu_features off of the
13238 // global in the struct STy.
13239 Value
*Idxs
[] = {Builder
.getInt32(0), Builder
.getInt32(3),
13240 Builder
.getInt32(0)};
13241 Value
*CpuFeatures
= Builder
.CreateGEP(STy
, CpuModel
, Idxs
);
13242 Value
*Features
= Builder
.CreateAlignedLoad(Int32Ty
, CpuFeatures
,
13243 CharUnits::fromQuantity(4));
13245 // Check the value of the bit corresponding to the feature requested.
13246 Value
*Mask
= Builder
.getInt32(Features1
);
13247 Value
*Bitset
= Builder
.CreateAnd(Features
, Mask
);
13248 Value
*Cmp
= Builder
.CreateICmpEQ(Bitset
, Mask
);
13249 Result
= Builder
.CreateAnd(Result
, Cmp
);
13252 if (Features2
!= 0) {
13253 llvm::Constant
*CpuFeatures2
= CGM
.CreateRuntimeVariable(Int32Ty
,
13254 "__cpu_features2");
13255 cast
<llvm::GlobalValue
>(CpuFeatures2
)->setDSOLocal(true);
13257 Value
*Features
= Builder
.CreateAlignedLoad(Int32Ty
, CpuFeatures2
,
13258 CharUnits::fromQuantity(4));
13260 // Check the value of the bit corresponding to the feature requested.
13261 Value
*Mask
= Builder
.getInt32(Features2
);
13262 Value
*Bitset
= Builder
.CreateAnd(Features
, Mask
);
13263 Value
*Cmp
= Builder
.CreateICmpEQ(Bitset
, Mask
);
13264 Result
= Builder
.CreateAnd(Result
, Cmp
);
13270 Value
*CodeGenFunction::EmitAArch64CpuInit() {
13271 llvm::FunctionType
*FTy
= llvm::FunctionType::get(VoidTy
, false);
13272 llvm::FunctionCallee Func
=
13273 CGM
.CreateRuntimeFunction(FTy
, "init_cpu_features_resolver");
13274 cast
<llvm::GlobalValue
>(Func
.getCallee())->setDSOLocal(true);
13275 cast
<llvm::GlobalValue
>(Func
.getCallee())
13276 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass
);
13277 return Builder
.CreateCall(Func
);
13280 Value
*CodeGenFunction::EmitX86CpuInit() {
13281 llvm::FunctionType
*FTy
= llvm::FunctionType::get(VoidTy
,
13282 /*Variadic*/ false);
13283 llvm::FunctionCallee Func
=
13284 CGM
.CreateRuntimeFunction(FTy
, "__cpu_indicator_init");
13285 cast
<llvm::GlobalValue
>(Func
.getCallee())->setDSOLocal(true);
13286 cast
<llvm::GlobalValue
>(Func
.getCallee())
13287 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass
);
13288 return Builder
.CreateCall(Func
);
13292 CodeGenFunction::EmitAArch64CpuSupports(ArrayRef
<StringRef
> FeaturesStrs
) {
13293 uint64_t FeaturesMask
= llvm::AArch64::getCpuSupportsMask(FeaturesStrs
);
13294 Value
*Result
= Builder
.getTrue();
13295 if (FeaturesMask
!= 0) {
13296 // Get features from structure in runtime library
13298 // unsigned long long features;
13299 // } __aarch64_cpu_features;
13300 llvm::Type
*STy
= llvm::StructType::get(Int64Ty
);
13301 llvm::Constant
*AArch64CPUFeatures
=
13302 CGM
.CreateRuntimeVariable(STy
, "__aarch64_cpu_features");
13303 cast
<llvm::GlobalValue
>(AArch64CPUFeatures
)->setDSOLocal(true);
13304 llvm::Value
*CpuFeatures
= Builder
.CreateGEP(
13305 STy
, AArch64CPUFeatures
,
13306 {ConstantInt::get(Int32Ty
, 0), ConstantInt::get(Int32Ty
, 0)});
13307 Value
*Features
= Builder
.CreateAlignedLoad(Int64Ty
, CpuFeatures
,
13308 CharUnits::fromQuantity(8));
13309 Value
*Mask
= Builder
.getInt64(FeaturesMask
);
13310 Value
*Bitset
= Builder
.CreateAnd(Features
, Mask
);
13311 Value
*Cmp
= Builder
.CreateICmpEQ(Bitset
, Mask
);
13312 Result
= Builder
.CreateAnd(Result
, Cmp
);
13317 Value
*CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID
,
13318 const CallExpr
*E
) {
13319 if (BuiltinID
== X86::BI__builtin_cpu_is
)
13320 return EmitX86CpuIs(E
);
13321 if (BuiltinID
== X86::BI__builtin_cpu_supports
)
13322 return EmitX86CpuSupports(E
);
13323 if (BuiltinID
== X86::BI__builtin_cpu_init
)
13324 return EmitX86CpuInit();
13326 // Handle MSVC intrinsics before argument evaluation to prevent double
13328 if (std::optional
<MSVCIntrin
> MsvcIntId
= translateX86ToMsvcIntrin(BuiltinID
))
13329 return EmitMSVCBuiltinExpr(*MsvcIntId
, E
);
13331 SmallVector
<Value
*, 4> Ops
;
13332 bool IsMaskFCmp
= false;
13333 bool IsConjFMA
= false;
13335 // Find out if any arguments are required to be integer constant expressions.
13336 unsigned ICEArguments
= 0;
13337 ASTContext::GetBuiltinTypeError Error
;
13338 getContext().GetBuiltinType(BuiltinID
, Error
, &ICEArguments
);
13339 assert(Error
== ASTContext::GE_None
&& "Should not codegen an error");
13341 for (unsigned i
= 0, e
= E
->getNumArgs(); i
!= e
; i
++) {
13342 // If this is a normal argument, just emit it as a scalar.
13343 if ((ICEArguments
& (1 << i
)) == 0) {
13344 Ops
.push_back(EmitScalarExpr(E
->getArg(i
)));
13348 // If this is required to be a constant, constant fold it so that we know
13349 // that the generated intrinsic gets a ConstantInt.
13350 Ops
.push_back(llvm::ConstantInt::get(
13351 getLLVMContext(), *E
->getArg(i
)->getIntegerConstantExpr(getContext())));
13354 // These exist so that the builtin that takes an immediate can be bounds
13355 // checked by clang to avoid passing bad immediates to the backend. Since
13356 // AVX has a larger immediate than SSE we would need separate builtins to
13357 // do the different bounds checking. Rather than create a clang specific
13358 // SSE only builtin, this implements eight separate builtins to match gcc
13360 auto getCmpIntrinsicCall
= [this, &Ops
](Intrinsic::ID ID
, unsigned Imm
) {
13361 Ops
.push_back(llvm::ConstantInt::get(Int8Ty
, Imm
));
13362 llvm::Function
*F
= CGM
.getIntrinsic(ID
);
13363 return Builder
.CreateCall(F
, Ops
);
13366 // For the vector forms of FP comparisons, translate the builtins directly to
13368 // TODO: The builtins could be removed if the SSE header files used vector
13369 // extension comparisons directly (vector ordered/unordered may need
13370 // additional support via __builtin_isnan()).
13371 auto getVectorFCmpIR
= [this, &Ops
, E
](CmpInst::Predicate Pred
,
13372 bool IsSignaling
) {
13373 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
13376 Cmp
= Builder
.CreateFCmpS(Pred
, Ops
[0], Ops
[1]);
13378 Cmp
= Builder
.CreateFCmp(Pred
, Ops
[0], Ops
[1]);
13379 llvm::VectorType
*FPVecTy
= cast
<llvm::VectorType
>(Ops
[0]->getType());
13380 llvm::VectorType
*IntVecTy
= llvm::VectorType::getInteger(FPVecTy
);
13381 Value
*Sext
= Builder
.CreateSExt(Cmp
, IntVecTy
);
13382 return Builder
.CreateBitCast(Sext
, FPVecTy
);
13385 switch (BuiltinID
) {
13386 default: return nullptr;
13387 case X86::BI_mm_prefetch
: {
13388 Value
*Address
= Ops
[0];
13389 ConstantInt
*C
= cast
<ConstantInt
>(Ops
[1]);
13390 Value
*RW
= ConstantInt::get(Int32Ty
, (C
->getZExtValue() >> 2) & 0x1);
13391 Value
*Locality
= ConstantInt::get(Int32Ty
, C
->getZExtValue() & 0x3);
13392 Value
*Data
= ConstantInt::get(Int32Ty
, 1);
13393 Function
*F
= CGM
.getIntrinsic(Intrinsic::prefetch
, Address
->getType());
13394 return Builder
.CreateCall(F
, {Address
, RW
, Locality
, Data
});
13396 case X86::BI_mm_clflush
: {
13397 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::x86_sse2_clflush
),
13400 case X86::BI_mm_lfence
: {
13401 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::x86_sse2_lfence
));
13403 case X86::BI_mm_mfence
: {
13404 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::x86_sse2_mfence
));
13406 case X86::BI_mm_sfence
: {
13407 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::x86_sse_sfence
));
13409 case X86::BI_mm_pause
: {
13410 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::x86_sse2_pause
));
13412 case X86::BI__rdtsc
: {
13413 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::x86_rdtsc
));
13415 case X86::BI__builtin_ia32_rdtscp
: {
13416 Value
*Call
= Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::x86_rdtscp
));
13417 Builder
.CreateDefaultAlignedStore(Builder
.CreateExtractValue(Call
, 1),
13419 return Builder
.CreateExtractValue(Call
, 0);
13421 case X86::BI__builtin_ia32_lzcnt_u16
:
13422 case X86::BI__builtin_ia32_lzcnt_u32
:
13423 case X86::BI__builtin_ia32_lzcnt_u64
: {
13424 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctlz
, Ops
[0]->getType());
13425 return Builder
.CreateCall(F
, {Ops
[0], Builder
.getInt1(false)});
13427 case X86::BI__builtin_ia32_tzcnt_u16
:
13428 case X86::BI__builtin_ia32_tzcnt_u32
:
13429 case X86::BI__builtin_ia32_tzcnt_u64
: {
13430 Function
*F
= CGM
.getIntrinsic(Intrinsic::cttz
, Ops
[0]->getType());
13431 return Builder
.CreateCall(F
, {Ops
[0], Builder
.getInt1(false)});
13433 case X86::BI__builtin_ia32_undef128
:
13434 case X86::BI__builtin_ia32_undef256
:
13435 case X86::BI__builtin_ia32_undef512
:
13436 // The x86 definition of "undef" is not the same as the LLVM definition
13437 // (PR32176). We leave optimizing away an unnecessary zero constant to the
13438 // IR optimizer and backend.
13439 // TODO: If we had a "freeze" IR instruction to generate a fixed undef
13440 // value, we should use that here instead of a zero.
13441 return llvm::Constant::getNullValue(ConvertType(E
->getType()));
13442 case X86::BI__builtin_ia32_vec_init_v8qi
:
13443 case X86::BI__builtin_ia32_vec_init_v4hi
:
13444 case X86::BI__builtin_ia32_vec_init_v2si
:
13445 return Builder
.CreateBitCast(BuildVector(Ops
),
13446 llvm::Type::getX86_MMXTy(getLLVMContext()));
13447 case X86::BI__builtin_ia32_vec_ext_v2si
:
13448 case X86::BI__builtin_ia32_vec_ext_v16qi
:
13449 case X86::BI__builtin_ia32_vec_ext_v8hi
:
13450 case X86::BI__builtin_ia32_vec_ext_v4si
:
13451 case X86::BI__builtin_ia32_vec_ext_v4sf
:
13452 case X86::BI__builtin_ia32_vec_ext_v2di
:
13453 case X86::BI__builtin_ia32_vec_ext_v32qi
:
13454 case X86::BI__builtin_ia32_vec_ext_v16hi
:
13455 case X86::BI__builtin_ia32_vec_ext_v8si
:
13456 case X86::BI__builtin_ia32_vec_ext_v4di
: {
13458 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
13459 uint64_t Index
= cast
<ConstantInt
>(Ops
[1])->getZExtValue();
13460 Index
&= NumElts
- 1;
13461 // These builtins exist so we can ensure the index is an ICE and in range.
13462 // Otherwise we could just do this in the header file.
13463 return Builder
.CreateExtractElement(Ops
[0], Index
);
13465 case X86::BI__builtin_ia32_vec_set_v16qi
:
13466 case X86::BI__builtin_ia32_vec_set_v8hi
:
13467 case X86::BI__builtin_ia32_vec_set_v4si
:
13468 case X86::BI__builtin_ia32_vec_set_v2di
:
13469 case X86::BI__builtin_ia32_vec_set_v32qi
:
13470 case X86::BI__builtin_ia32_vec_set_v16hi
:
13471 case X86::BI__builtin_ia32_vec_set_v8si
:
13472 case X86::BI__builtin_ia32_vec_set_v4di
: {
13474 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
13475 unsigned Index
= cast
<ConstantInt
>(Ops
[2])->getZExtValue();
13476 Index
&= NumElts
- 1;
13477 // These builtins exist so we can ensure the index is an ICE and in range.
13478 // Otherwise we could just do this in the header file.
13479 return Builder
.CreateInsertElement(Ops
[0], Ops
[1], Index
);
13481 case X86::BI_mm_setcsr
:
13482 case X86::BI__builtin_ia32_ldmxcsr
: {
13483 Address Tmp
= CreateMemTemp(E
->getArg(0)->getType());
13484 Builder
.CreateStore(Ops
[0], Tmp
);
13485 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::x86_sse_ldmxcsr
),
13486 Builder
.CreateBitCast(Tmp
.getPointer(), Int8PtrTy
));
13488 case X86::BI_mm_getcsr
:
13489 case X86::BI__builtin_ia32_stmxcsr
: {
13490 Address Tmp
= CreateMemTemp(E
->getType());
13491 Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::x86_sse_stmxcsr
),
13492 Builder
.CreateBitCast(Tmp
.getPointer(), Int8PtrTy
));
13493 return Builder
.CreateLoad(Tmp
, "stmxcsr");
13495 case X86::BI__builtin_ia32_xsave
:
13496 case X86::BI__builtin_ia32_xsave64
:
13497 case X86::BI__builtin_ia32_xrstor
:
13498 case X86::BI__builtin_ia32_xrstor64
:
13499 case X86::BI__builtin_ia32_xsaveopt
:
13500 case X86::BI__builtin_ia32_xsaveopt64
:
13501 case X86::BI__builtin_ia32_xrstors
:
13502 case X86::BI__builtin_ia32_xrstors64
:
13503 case X86::BI__builtin_ia32_xsavec
:
13504 case X86::BI__builtin_ia32_xsavec64
:
13505 case X86::BI__builtin_ia32_xsaves
:
13506 case X86::BI__builtin_ia32_xsaves64
:
13507 case X86::BI__builtin_ia32_xsetbv
:
13508 case X86::BI_xsetbv
: {
13510 #define INTRINSIC_X86_XSAVE_ID(NAME) \
13511 case X86::BI__builtin_ia32_##NAME: \
13512 ID = Intrinsic::x86_##NAME; \
13514 switch (BuiltinID
) {
13515 default: llvm_unreachable("Unsupported intrinsic!");
13516 INTRINSIC_X86_XSAVE_ID(xsave
);
13517 INTRINSIC_X86_XSAVE_ID(xsave64
);
13518 INTRINSIC_X86_XSAVE_ID(xrstor
);
13519 INTRINSIC_X86_XSAVE_ID(xrstor64
);
13520 INTRINSIC_X86_XSAVE_ID(xsaveopt
);
13521 INTRINSIC_X86_XSAVE_ID(xsaveopt64
);
13522 INTRINSIC_X86_XSAVE_ID(xrstors
);
13523 INTRINSIC_X86_XSAVE_ID(xrstors64
);
13524 INTRINSIC_X86_XSAVE_ID(xsavec
);
13525 INTRINSIC_X86_XSAVE_ID(xsavec64
);
13526 INTRINSIC_X86_XSAVE_ID(xsaves
);
13527 INTRINSIC_X86_XSAVE_ID(xsaves64
);
13528 INTRINSIC_X86_XSAVE_ID(xsetbv
);
13529 case X86::BI_xsetbv
:
13530 ID
= Intrinsic::x86_xsetbv
;
13533 #undef INTRINSIC_X86_XSAVE_ID
13534 Value
*Mhi
= Builder
.CreateTrunc(
13535 Builder
.CreateLShr(Ops
[1], ConstantInt::get(Int64Ty
, 32)), Int32Ty
);
13536 Value
*Mlo
= Builder
.CreateTrunc(Ops
[1], Int32Ty
);
13538 Ops
.push_back(Mlo
);
13539 return Builder
.CreateCall(CGM
.getIntrinsic(ID
), Ops
);
13541 case X86::BI__builtin_ia32_xgetbv
:
13542 case X86::BI_xgetbv
:
13543 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::x86_xgetbv
), Ops
);
13544 case X86::BI__builtin_ia32_storedqudi128_mask
:
13545 case X86::BI__builtin_ia32_storedqusi128_mask
:
13546 case X86::BI__builtin_ia32_storedquhi128_mask
:
13547 case X86::BI__builtin_ia32_storedquqi128_mask
:
13548 case X86::BI__builtin_ia32_storeupd128_mask
:
13549 case X86::BI__builtin_ia32_storeups128_mask
:
13550 case X86::BI__builtin_ia32_storedqudi256_mask
:
13551 case X86::BI__builtin_ia32_storedqusi256_mask
:
13552 case X86::BI__builtin_ia32_storedquhi256_mask
:
13553 case X86::BI__builtin_ia32_storedquqi256_mask
:
13554 case X86::BI__builtin_ia32_storeupd256_mask
:
13555 case X86::BI__builtin_ia32_storeups256_mask
:
13556 case X86::BI__builtin_ia32_storedqudi512_mask
:
13557 case X86::BI__builtin_ia32_storedqusi512_mask
:
13558 case X86::BI__builtin_ia32_storedquhi512_mask
:
13559 case X86::BI__builtin_ia32_storedquqi512_mask
:
13560 case X86::BI__builtin_ia32_storeupd512_mask
:
13561 case X86::BI__builtin_ia32_storeups512_mask
:
13562 return EmitX86MaskedStore(*this, Ops
, Align(1));
13564 case X86::BI__builtin_ia32_storesh128_mask
:
13565 case X86::BI__builtin_ia32_storess128_mask
:
13566 case X86::BI__builtin_ia32_storesd128_mask
:
13567 return EmitX86MaskedStore(*this, Ops
, Align(1));
13569 case X86::BI__builtin_ia32_vpopcntb_128
:
13570 case X86::BI__builtin_ia32_vpopcntd_128
:
13571 case X86::BI__builtin_ia32_vpopcntq_128
:
13572 case X86::BI__builtin_ia32_vpopcntw_128
:
13573 case X86::BI__builtin_ia32_vpopcntb_256
:
13574 case X86::BI__builtin_ia32_vpopcntd_256
:
13575 case X86::BI__builtin_ia32_vpopcntq_256
:
13576 case X86::BI__builtin_ia32_vpopcntw_256
:
13577 case X86::BI__builtin_ia32_vpopcntb_512
:
13578 case X86::BI__builtin_ia32_vpopcntd_512
:
13579 case X86::BI__builtin_ia32_vpopcntq_512
:
13580 case X86::BI__builtin_ia32_vpopcntw_512
: {
13581 llvm::Type
*ResultType
= ConvertType(E
->getType());
13582 llvm::Function
*F
= CGM
.getIntrinsic(Intrinsic::ctpop
, ResultType
);
13583 return Builder
.CreateCall(F
, Ops
);
13585 case X86::BI__builtin_ia32_cvtmask2b128
:
13586 case X86::BI__builtin_ia32_cvtmask2b256
:
13587 case X86::BI__builtin_ia32_cvtmask2b512
:
13588 case X86::BI__builtin_ia32_cvtmask2w128
:
13589 case X86::BI__builtin_ia32_cvtmask2w256
:
13590 case X86::BI__builtin_ia32_cvtmask2w512
:
13591 case X86::BI__builtin_ia32_cvtmask2d128
:
13592 case X86::BI__builtin_ia32_cvtmask2d256
:
13593 case X86::BI__builtin_ia32_cvtmask2d512
:
13594 case X86::BI__builtin_ia32_cvtmask2q128
:
13595 case X86::BI__builtin_ia32_cvtmask2q256
:
13596 case X86::BI__builtin_ia32_cvtmask2q512
:
13597 return EmitX86SExtMask(*this, Ops
[0], ConvertType(E
->getType()));
13599 case X86::BI__builtin_ia32_cvtb2mask128
:
13600 case X86::BI__builtin_ia32_cvtb2mask256
:
13601 case X86::BI__builtin_ia32_cvtb2mask512
:
13602 case X86::BI__builtin_ia32_cvtw2mask128
:
13603 case X86::BI__builtin_ia32_cvtw2mask256
:
13604 case X86::BI__builtin_ia32_cvtw2mask512
:
13605 case X86::BI__builtin_ia32_cvtd2mask128
:
13606 case X86::BI__builtin_ia32_cvtd2mask256
:
13607 case X86::BI__builtin_ia32_cvtd2mask512
:
13608 case X86::BI__builtin_ia32_cvtq2mask128
:
13609 case X86::BI__builtin_ia32_cvtq2mask256
:
13610 case X86::BI__builtin_ia32_cvtq2mask512
:
13611 return EmitX86ConvertToMask(*this, Ops
[0]);
13613 case X86::BI__builtin_ia32_cvtdq2ps512_mask
:
13614 case X86::BI__builtin_ia32_cvtqq2ps512_mask
:
13615 case X86::BI__builtin_ia32_cvtqq2pd512_mask
:
13616 case X86::BI__builtin_ia32_vcvtw2ph512_mask
:
13617 case X86::BI__builtin_ia32_vcvtdq2ph512_mask
:
13618 case X86::BI__builtin_ia32_vcvtqq2ph512_mask
:
13619 return EmitX86ConvertIntToFp(*this, E
, Ops
, /*IsSigned*/ true);
13620 case X86::BI__builtin_ia32_cvtudq2ps512_mask
:
13621 case X86::BI__builtin_ia32_cvtuqq2ps512_mask
:
13622 case X86::BI__builtin_ia32_cvtuqq2pd512_mask
:
13623 case X86::BI__builtin_ia32_vcvtuw2ph512_mask
:
13624 case X86::BI__builtin_ia32_vcvtudq2ph512_mask
:
13625 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask
:
13626 return EmitX86ConvertIntToFp(*this, E
, Ops
, /*IsSigned*/ false);
13628 case X86::BI__builtin_ia32_vfmaddss3
:
13629 case X86::BI__builtin_ia32_vfmaddsd3
:
13630 case X86::BI__builtin_ia32_vfmaddsh3_mask
:
13631 case X86::BI__builtin_ia32_vfmaddss3_mask
:
13632 case X86::BI__builtin_ia32_vfmaddsd3_mask
:
13633 return EmitScalarFMAExpr(*this, E
, Ops
, Ops
[0]);
13634 case X86::BI__builtin_ia32_vfmaddss
:
13635 case X86::BI__builtin_ia32_vfmaddsd
:
13636 return EmitScalarFMAExpr(*this, E
, Ops
,
13637 Constant::getNullValue(Ops
[0]->getType()));
13638 case X86::BI__builtin_ia32_vfmaddsh3_maskz
:
13639 case X86::BI__builtin_ia32_vfmaddss3_maskz
:
13640 case X86::BI__builtin_ia32_vfmaddsd3_maskz
:
13641 return EmitScalarFMAExpr(*this, E
, Ops
, Ops
[0], /*ZeroMask*/ true);
13642 case X86::BI__builtin_ia32_vfmaddsh3_mask3
:
13643 case X86::BI__builtin_ia32_vfmaddss3_mask3
:
13644 case X86::BI__builtin_ia32_vfmaddsd3_mask3
:
13645 return EmitScalarFMAExpr(*this, E
, Ops
, Ops
[2], /*ZeroMask*/ false, 2);
13646 case X86::BI__builtin_ia32_vfmsubsh3_mask3
:
13647 case X86::BI__builtin_ia32_vfmsubss3_mask3
:
13648 case X86::BI__builtin_ia32_vfmsubsd3_mask3
:
13649 return EmitScalarFMAExpr(*this, E
, Ops
, Ops
[2], /*ZeroMask*/ false, 2,
13651 case X86::BI__builtin_ia32_vfmaddph
:
13652 case X86::BI__builtin_ia32_vfmaddps
:
13653 case X86::BI__builtin_ia32_vfmaddpd
:
13654 case X86::BI__builtin_ia32_vfmaddph256
:
13655 case X86::BI__builtin_ia32_vfmaddps256
:
13656 case X86::BI__builtin_ia32_vfmaddpd256
:
13657 case X86::BI__builtin_ia32_vfmaddph512_mask
:
13658 case X86::BI__builtin_ia32_vfmaddph512_maskz
:
13659 case X86::BI__builtin_ia32_vfmaddph512_mask3
:
13660 case X86::BI__builtin_ia32_vfmaddps512_mask
:
13661 case X86::BI__builtin_ia32_vfmaddps512_maskz
:
13662 case X86::BI__builtin_ia32_vfmaddps512_mask3
:
13663 case X86::BI__builtin_ia32_vfmsubps512_mask3
:
13664 case X86::BI__builtin_ia32_vfmaddpd512_mask
:
13665 case X86::BI__builtin_ia32_vfmaddpd512_maskz
:
13666 case X86::BI__builtin_ia32_vfmaddpd512_mask3
:
13667 case X86::BI__builtin_ia32_vfmsubpd512_mask3
:
13668 case X86::BI__builtin_ia32_vfmsubph512_mask3
:
13669 return EmitX86FMAExpr(*this, E
, Ops
, BuiltinID
, /*IsAddSub*/ false);
13670 case X86::BI__builtin_ia32_vfmaddsubph512_mask
:
13671 case X86::BI__builtin_ia32_vfmaddsubph512_maskz
:
13672 case X86::BI__builtin_ia32_vfmaddsubph512_mask3
:
13673 case X86::BI__builtin_ia32_vfmsubaddph512_mask3
:
13674 case X86::BI__builtin_ia32_vfmaddsubps512_mask
:
13675 case X86::BI__builtin_ia32_vfmaddsubps512_maskz
:
13676 case X86::BI__builtin_ia32_vfmaddsubps512_mask3
:
13677 case X86::BI__builtin_ia32_vfmsubaddps512_mask3
:
13678 case X86::BI__builtin_ia32_vfmaddsubpd512_mask
:
13679 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz
:
13680 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3
:
13681 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3
:
13682 return EmitX86FMAExpr(*this, E
, Ops
, BuiltinID
, /*IsAddSub*/ true);
13684 case X86::BI__builtin_ia32_movdqa32store128_mask
:
13685 case X86::BI__builtin_ia32_movdqa64store128_mask
:
13686 case X86::BI__builtin_ia32_storeaps128_mask
:
13687 case X86::BI__builtin_ia32_storeapd128_mask
:
13688 case X86::BI__builtin_ia32_movdqa32store256_mask
:
13689 case X86::BI__builtin_ia32_movdqa64store256_mask
:
13690 case X86::BI__builtin_ia32_storeaps256_mask
:
13691 case X86::BI__builtin_ia32_storeapd256_mask
:
13692 case X86::BI__builtin_ia32_movdqa32store512_mask
:
13693 case X86::BI__builtin_ia32_movdqa64store512_mask
:
13694 case X86::BI__builtin_ia32_storeaps512_mask
:
13695 case X86::BI__builtin_ia32_storeapd512_mask
:
13696 return EmitX86MaskedStore(
13698 getContext().getTypeAlignInChars(E
->getArg(1)->getType()).getAsAlign());
13700 case X86::BI__builtin_ia32_loadups128_mask
:
13701 case X86::BI__builtin_ia32_loadups256_mask
:
13702 case X86::BI__builtin_ia32_loadups512_mask
:
13703 case X86::BI__builtin_ia32_loadupd128_mask
:
13704 case X86::BI__builtin_ia32_loadupd256_mask
:
13705 case X86::BI__builtin_ia32_loadupd512_mask
:
13706 case X86::BI__builtin_ia32_loaddquqi128_mask
:
13707 case X86::BI__builtin_ia32_loaddquqi256_mask
:
13708 case X86::BI__builtin_ia32_loaddquqi512_mask
:
13709 case X86::BI__builtin_ia32_loaddquhi128_mask
:
13710 case X86::BI__builtin_ia32_loaddquhi256_mask
:
13711 case X86::BI__builtin_ia32_loaddquhi512_mask
:
13712 case X86::BI__builtin_ia32_loaddqusi128_mask
:
13713 case X86::BI__builtin_ia32_loaddqusi256_mask
:
13714 case X86::BI__builtin_ia32_loaddqusi512_mask
:
13715 case X86::BI__builtin_ia32_loaddqudi128_mask
:
13716 case X86::BI__builtin_ia32_loaddqudi256_mask
:
13717 case X86::BI__builtin_ia32_loaddqudi512_mask
:
13718 return EmitX86MaskedLoad(*this, Ops
, Align(1));
13720 case X86::BI__builtin_ia32_loadsh128_mask
:
13721 case X86::BI__builtin_ia32_loadss128_mask
:
13722 case X86::BI__builtin_ia32_loadsd128_mask
:
13723 return EmitX86MaskedLoad(*this, Ops
, Align(1));
13725 case X86::BI__builtin_ia32_loadaps128_mask
:
13726 case X86::BI__builtin_ia32_loadaps256_mask
:
13727 case X86::BI__builtin_ia32_loadaps512_mask
:
13728 case X86::BI__builtin_ia32_loadapd128_mask
:
13729 case X86::BI__builtin_ia32_loadapd256_mask
:
13730 case X86::BI__builtin_ia32_loadapd512_mask
:
13731 case X86::BI__builtin_ia32_movdqa32load128_mask
:
13732 case X86::BI__builtin_ia32_movdqa32load256_mask
:
13733 case X86::BI__builtin_ia32_movdqa32load512_mask
:
13734 case X86::BI__builtin_ia32_movdqa64load128_mask
:
13735 case X86::BI__builtin_ia32_movdqa64load256_mask
:
13736 case X86::BI__builtin_ia32_movdqa64load512_mask
:
13737 return EmitX86MaskedLoad(
13739 getContext().getTypeAlignInChars(E
->getArg(1)->getType()).getAsAlign());
13741 case X86::BI__builtin_ia32_expandloaddf128_mask
:
13742 case X86::BI__builtin_ia32_expandloaddf256_mask
:
13743 case X86::BI__builtin_ia32_expandloaddf512_mask
:
13744 case X86::BI__builtin_ia32_expandloadsf128_mask
:
13745 case X86::BI__builtin_ia32_expandloadsf256_mask
:
13746 case X86::BI__builtin_ia32_expandloadsf512_mask
:
13747 case X86::BI__builtin_ia32_expandloaddi128_mask
:
13748 case X86::BI__builtin_ia32_expandloaddi256_mask
:
13749 case X86::BI__builtin_ia32_expandloaddi512_mask
:
13750 case X86::BI__builtin_ia32_expandloadsi128_mask
:
13751 case X86::BI__builtin_ia32_expandloadsi256_mask
:
13752 case X86::BI__builtin_ia32_expandloadsi512_mask
:
13753 case X86::BI__builtin_ia32_expandloadhi128_mask
:
13754 case X86::BI__builtin_ia32_expandloadhi256_mask
:
13755 case X86::BI__builtin_ia32_expandloadhi512_mask
:
13756 case X86::BI__builtin_ia32_expandloadqi128_mask
:
13757 case X86::BI__builtin_ia32_expandloadqi256_mask
:
13758 case X86::BI__builtin_ia32_expandloadqi512_mask
:
13759 return EmitX86ExpandLoad(*this, Ops
);
13761 case X86::BI__builtin_ia32_compressstoredf128_mask
:
13762 case X86::BI__builtin_ia32_compressstoredf256_mask
:
13763 case X86::BI__builtin_ia32_compressstoredf512_mask
:
13764 case X86::BI__builtin_ia32_compressstoresf128_mask
:
13765 case X86::BI__builtin_ia32_compressstoresf256_mask
:
13766 case X86::BI__builtin_ia32_compressstoresf512_mask
:
13767 case X86::BI__builtin_ia32_compressstoredi128_mask
:
13768 case X86::BI__builtin_ia32_compressstoredi256_mask
:
13769 case X86::BI__builtin_ia32_compressstoredi512_mask
:
13770 case X86::BI__builtin_ia32_compressstoresi128_mask
:
13771 case X86::BI__builtin_ia32_compressstoresi256_mask
:
13772 case X86::BI__builtin_ia32_compressstoresi512_mask
:
13773 case X86::BI__builtin_ia32_compressstorehi128_mask
:
13774 case X86::BI__builtin_ia32_compressstorehi256_mask
:
13775 case X86::BI__builtin_ia32_compressstorehi512_mask
:
13776 case X86::BI__builtin_ia32_compressstoreqi128_mask
:
13777 case X86::BI__builtin_ia32_compressstoreqi256_mask
:
13778 case X86::BI__builtin_ia32_compressstoreqi512_mask
:
13779 return EmitX86CompressStore(*this, Ops
);
13781 case X86::BI__builtin_ia32_expanddf128_mask
:
13782 case X86::BI__builtin_ia32_expanddf256_mask
:
13783 case X86::BI__builtin_ia32_expanddf512_mask
:
13784 case X86::BI__builtin_ia32_expandsf128_mask
:
13785 case X86::BI__builtin_ia32_expandsf256_mask
:
13786 case X86::BI__builtin_ia32_expandsf512_mask
:
13787 case X86::BI__builtin_ia32_expanddi128_mask
:
13788 case X86::BI__builtin_ia32_expanddi256_mask
:
13789 case X86::BI__builtin_ia32_expanddi512_mask
:
13790 case X86::BI__builtin_ia32_expandsi128_mask
:
13791 case X86::BI__builtin_ia32_expandsi256_mask
:
13792 case X86::BI__builtin_ia32_expandsi512_mask
:
13793 case X86::BI__builtin_ia32_expandhi128_mask
:
13794 case X86::BI__builtin_ia32_expandhi256_mask
:
13795 case X86::BI__builtin_ia32_expandhi512_mask
:
13796 case X86::BI__builtin_ia32_expandqi128_mask
:
13797 case X86::BI__builtin_ia32_expandqi256_mask
:
13798 case X86::BI__builtin_ia32_expandqi512_mask
:
13799 return EmitX86CompressExpand(*this, Ops
, /*IsCompress*/false);
13801 case X86::BI__builtin_ia32_compressdf128_mask
:
13802 case X86::BI__builtin_ia32_compressdf256_mask
:
13803 case X86::BI__builtin_ia32_compressdf512_mask
:
13804 case X86::BI__builtin_ia32_compresssf128_mask
:
13805 case X86::BI__builtin_ia32_compresssf256_mask
:
13806 case X86::BI__builtin_ia32_compresssf512_mask
:
13807 case X86::BI__builtin_ia32_compressdi128_mask
:
13808 case X86::BI__builtin_ia32_compressdi256_mask
:
13809 case X86::BI__builtin_ia32_compressdi512_mask
:
13810 case X86::BI__builtin_ia32_compresssi128_mask
:
13811 case X86::BI__builtin_ia32_compresssi256_mask
:
13812 case X86::BI__builtin_ia32_compresssi512_mask
:
13813 case X86::BI__builtin_ia32_compresshi128_mask
:
13814 case X86::BI__builtin_ia32_compresshi256_mask
:
13815 case X86::BI__builtin_ia32_compresshi512_mask
:
13816 case X86::BI__builtin_ia32_compressqi128_mask
:
13817 case X86::BI__builtin_ia32_compressqi256_mask
:
13818 case X86::BI__builtin_ia32_compressqi512_mask
:
13819 return EmitX86CompressExpand(*this, Ops
, /*IsCompress*/true);
13821 case X86::BI__builtin_ia32_gather3div2df
:
13822 case X86::BI__builtin_ia32_gather3div2di
:
13823 case X86::BI__builtin_ia32_gather3div4df
:
13824 case X86::BI__builtin_ia32_gather3div4di
:
13825 case X86::BI__builtin_ia32_gather3div4sf
:
13826 case X86::BI__builtin_ia32_gather3div4si
:
13827 case X86::BI__builtin_ia32_gather3div8sf
:
13828 case X86::BI__builtin_ia32_gather3div8si
:
13829 case X86::BI__builtin_ia32_gather3siv2df
:
13830 case X86::BI__builtin_ia32_gather3siv2di
:
13831 case X86::BI__builtin_ia32_gather3siv4df
:
13832 case X86::BI__builtin_ia32_gather3siv4di
:
13833 case X86::BI__builtin_ia32_gather3siv4sf
:
13834 case X86::BI__builtin_ia32_gather3siv4si
:
13835 case X86::BI__builtin_ia32_gather3siv8sf
:
13836 case X86::BI__builtin_ia32_gather3siv8si
:
13837 case X86::BI__builtin_ia32_gathersiv8df
:
13838 case X86::BI__builtin_ia32_gathersiv16sf
:
13839 case X86::BI__builtin_ia32_gatherdiv8df
:
13840 case X86::BI__builtin_ia32_gatherdiv16sf
:
13841 case X86::BI__builtin_ia32_gathersiv8di
:
13842 case X86::BI__builtin_ia32_gathersiv16si
:
13843 case X86::BI__builtin_ia32_gatherdiv8di
:
13844 case X86::BI__builtin_ia32_gatherdiv16si
: {
13846 switch (BuiltinID
) {
13847 default: llvm_unreachable("Unexpected builtin");
13848 case X86::BI__builtin_ia32_gather3div2df
:
13849 IID
= Intrinsic::x86_avx512_mask_gather3div2_df
;
13851 case X86::BI__builtin_ia32_gather3div2di
:
13852 IID
= Intrinsic::x86_avx512_mask_gather3div2_di
;
13854 case X86::BI__builtin_ia32_gather3div4df
:
13855 IID
= Intrinsic::x86_avx512_mask_gather3div4_df
;
13857 case X86::BI__builtin_ia32_gather3div4di
:
13858 IID
= Intrinsic::x86_avx512_mask_gather3div4_di
;
13860 case X86::BI__builtin_ia32_gather3div4sf
:
13861 IID
= Intrinsic::x86_avx512_mask_gather3div4_sf
;
13863 case X86::BI__builtin_ia32_gather3div4si
:
13864 IID
= Intrinsic::x86_avx512_mask_gather3div4_si
;
13866 case X86::BI__builtin_ia32_gather3div8sf
:
13867 IID
= Intrinsic::x86_avx512_mask_gather3div8_sf
;
13869 case X86::BI__builtin_ia32_gather3div8si
:
13870 IID
= Intrinsic::x86_avx512_mask_gather3div8_si
;
13872 case X86::BI__builtin_ia32_gather3siv2df
:
13873 IID
= Intrinsic::x86_avx512_mask_gather3siv2_df
;
13875 case X86::BI__builtin_ia32_gather3siv2di
:
13876 IID
= Intrinsic::x86_avx512_mask_gather3siv2_di
;
13878 case X86::BI__builtin_ia32_gather3siv4df
:
13879 IID
= Intrinsic::x86_avx512_mask_gather3siv4_df
;
13881 case X86::BI__builtin_ia32_gather3siv4di
:
13882 IID
= Intrinsic::x86_avx512_mask_gather3siv4_di
;
13884 case X86::BI__builtin_ia32_gather3siv4sf
:
13885 IID
= Intrinsic::x86_avx512_mask_gather3siv4_sf
;
13887 case X86::BI__builtin_ia32_gather3siv4si
:
13888 IID
= Intrinsic::x86_avx512_mask_gather3siv4_si
;
13890 case X86::BI__builtin_ia32_gather3siv8sf
:
13891 IID
= Intrinsic::x86_avx512_mask_gather3siv8_sf
;
13893 case X86::BI__builtin_ia32_gather3siv8si
:
13894 IID
= Intrinsic::x86_avx512_mask_gather3siv8_si
;
13896 case X86::BI__builtin_ia32_gathersiv8df
:
13897 IID
= Intrinsic::x86_avx512_mask_gather_dpd_512
;
13899 case X86::BI__builtin_ia32_gathersiv16sf
:
13900 IID
= Intrinsic::x86_avx512_mask_gather_dps_512
;
13902 case X86::BI__builtin_ia32_gatherdiv8df
:
13903 IID
= Intrinsic::x86_avx512_mask_gather_qpd_512
;
13905 case X86::BI__builtin_ia32_gatherdiv16sf
:
13906 IID
= Intrinsic::x86_avx512_mask_gather_qps_512
;
13908 case X86::BI__builtin_ia32_gathersiv8di
:
13909 IID
= Intrinsic::x86_avx512_mask_gather_dpq_512
;
13911 case X86::BI__builtin_ia32_gathersiv16si
:
13912 IID
= Intrinsic::x86_avx512_mask_gather_dpi_512
;
13914 case X86::BI__builtin_ia32_gatherdiv8di
:
13915 IID
= Intrinsic::x86_avx512_mask_gather_qpq_512
;
13917 case X86::BI__builtin_ia32_gatherdiv16si
:
13918 IID
= Intrinsic::x86_avx512_mask_gather_qpi_512
;
13922 unsigned MinElts
= std::min(
13923 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements(),
13924 cast
<llvm::FixedVectorType
>(Ops
[2]->getType())->getNumElements());
13925 Ops
[3] = getMaskVecValue(*this, Ops
[3], MinElts
);
13926 Function
*Intr
= CGM
.getIntrinsic(IID
);
13927 return Builder
.CreateCall(Intr
, Ops
);
13930 case X86::BI__builtin_ia32_scattersiv8df
:
13931 case X86::BI__builtin_ia32_scattersiv16sf
:
13932 case X86::BI__builtin_ia32_scatterdiv8df
:
13933 case X86::BI__builtin_ia32_scatterdiv16sf
:
13934 case X86::BI__builtin_ia32_scattersiv8di
:
13935 case X86::BI__builtin_ia32_scattersiv16si
:
13936 case X86::BI__builtin_ia32_scatterdiv8di
:
13937 case X86::BI__builtin_ia32_scatterdiv16si
:
13938 case X86::BI__builtin_ia32_scatterdiv2df
:
13939 case X86::BI__builtin_ia32_scatterdiv2di
:
13940 case X86::BI__builtin_ia32_scatterdiv4df
:
13941 case X86::BI__builtin_ia32_scatterdiv4di
:
13942 case X86::BI__builtin_ia32_scatterdiv4sf
:
13943 case X86::BI__builtin_ia32_scatterdiv4si
:
13944 case X86::BI__builtin_ia32_scatterdiv8sf
:
13945 case X86::BI__builtin_ia32_scatterdiv8si
:
13946 case X86::BI__builtin_ia32_scattersiv2df
:
13947 case X86::BI__builtin_ia32_scattersiv2di
:
13948 case X86::BI__builtin_ia32_scattersiv4df
:
13949 case X86::BI__builtin_ia32_scattersiv4di
:
13950 case X86::BI__builtin_ia32_scattersiv4sf
:
13951 case X86::BI__builtin_ia32_scattersiv4si
:
13952 case X86::BI__builtin_ia32_scattersiv8sf
:
13953 case X86::BI__builtin_ia32_scattersiv8si
: {
13955 switch (BuiltinID
) {
13956 default: llvm_unreachable("Unexpected builtin");
13957 case X86::BI__builtin_ia32_scattersiv8df
:
13958 IID
= Intrinsic::x86_avx512_mask_scatter_dpd_512
;
13960 case X86::BI__builtin_ia32_scattersiv16sf
:
13961 IID
= Intrinsic::x86_avx512_mask_scatter_dps_512
;
13963 case X86::BI__builtin_ia32_scatterdiv8df
:
13964 IID
= Intrinsic::x86_avx512_mask_scatter_qpd_512
;
13966 case X86::BI__builtin_ia32_scatterdiv16sf
:
13967 IID
= Intrinsic::x86_avx512_mask_scatter_qps_512
;
13969 case X86::BI__builtin_ia32_scattersiv8di
:
13970 IID
= Intrinsic::x86_avx512_mask_scatter_dpq_512
;
13972 case X86::BI__builtin_ia32_scattersiv16si
:
13973 IID
= Intrinsic::x86_avx512_mask_scatter_dpi_512
;
13975 case X86::BI__builtin_ia32_scatterdiv8di
:
13976 IID
= Intrinsic::x86_avx512_mask_scatter_qpq_512
;
13978 case X86::BI__builtin_ia32_scatterdiv16si
:
13979 IID
= Intrinsic::x86_avx512_mask_scatter_qpi_512
;
13981 case X86::BI__builtin_ia32_scatterdiv2df
:
13982 IID
= Intrinsic::x86_avx512_mask_scatterdiv2_df
;
13984 case X86::BI__builtin_ia32_scatterdiv2di
:
13985 IID
= Intrinsic::x86_avx512_mask_scatterdiv2_di
;
13987 case X86::BI__builtin_ia32_scatterdiv4df
:
13988 IID
= Intrinsic::x86_avx512_mask_scatterdiv4_df
;
13990 case X86::BI__builtin_ia32_scatterdiv4di
:
13991 IID
= Intrinsic::x86_avx512_mask_scatterdiv4_di
;
13993 case X86::BI__builtin_ia32_scatterdiv4sf
:
13994 IID
= Intrinsic::x86_avx512_mask_scatterdiv4_sf
;
13996 case X86::BI__builtin_ia32_scatterdiv4si
:
13997 IID
= Intrinsic::x86_avx512_mask_scatterdiv4_si
;
13999 case X86::BI__builtin_ia32_scatterdiv8sf
:
14000 IID
= Intrinsic::x86_avx512_mask_scatterdiv8_sf
;
14002 case X86::BI__builtin_ia32_scatterdiv8si
:
14003 IID
= Intrinsic::x86_avx512_mask_scatterdiv8_si
;
14005 case X86::BI__builtin_ia32_scattersiv2df
:
14006 IID
= Intrinsic::x86_avx512_mask_scattersiv2_df
;
14008 case X86::BI__builtin_ia32_scattersiv2di
:
14009 IID
= Intrinsic::x86_avx512_mask_scattersiv2_di
;
14011 case X86::BI__builtin_ia32_scattersiv4df
:
14012 IID
= Intrinsic::x86_avx512_mask_scattersiv4_df
;
14014 case X86::BI__builtin_ia32_scattersiv4di
:
14015 IID
= Intrinsic::x86_avx512_mask_scattersiv4_di
;
14017 case X86::BI__builtin_ia32_scattersiv4sf
:
14018 IID
= Intrinsic::x86_avx512_mask_scattersiv4_sf
;
14020 case X86::BI__builtin_ia32_scattersiv4si
:
14021 IID
= Intrinsic::x86_avx512_mask_scattersiv4_si
;
14023 case X86::BI__builtin_ia32_scattersiv8sf
:
14024 IID
= Intrinsic::x86_avx512_mask_scattersiv8_sf
;
14026 case X86::BI__builtin_ia32_scattersiv8si
:
14027 IID
= Intrinsic::x86_avx512_mask_scattersiv8_si
;
14031 unsigned MinElts
= std::min(
14032 cast
<llvm::FixedVectorType
>(Ops
[2]->getType())->getNumElements(),
14033 cast
<llvm::FixedVectorType
>(Ops
[3]->getType())->getNumElements());
14034 Ops
[1] = getMaskVecValue(*this, Ops
[1], MinElts
);
14035 Function
*Intr
= CGM
.getIntrinsic(IID
);
14036 return Builder
.CreateCall(Intr
, Ops
);
14039 case X86::BI__builtin_ia32_vextractf128_pd256
:
14040 case X86::BI__builtin_ia32_vextractf128_ps256
:
14041 case X86::BI__builtin_ia32_vextractf128_si256
:
14042 case X86::BI__builtin_ia32_extract128i256
:
14043 case X86::BI__builtin_ia32_extractf64x4_mask
:
14044 case X86::BI__builtin_ia32_extractf32x4_mask
:
14045 case X86::BI__builtin_ia32_extracti64x4_mask
:
14046 case X86::BI__builtin_ia32_extracti32x4_mask
:
14047 case X86::BI__builtin_ia32_extractf32x8_mask
:
14048 case X86::BI__builtin_ia32_extracti32x8_mask
:
14049 case X86::BI__builtin_ia32_extractf32x4_256_mask
:
14050 case X86::BI__builtin_ia32_extracti32x4_256_mask
:
14051 case X86::BI__builtin_ia32_extractf64x2_256_mask
:
14052 case X86::BI__builtin_ia32_extracti64x2_256_mask
:
14053 case X86::BI__builtin_ia32_extractf64x2_512_mask
:
14054 case X86::BI__builtin_ia32_extracti64x2_512_mask
: {
14055 auto *DstTy
= cast
<llvm::FixedVectorType
>(ConvertType(E
->getType()));
14056 unsigned NumElts
= DstTy
->getNumElements();
14057 unsigned SrcNumElts
=
14058 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
14059 unsigned SubVectors
= SrcNumElts
/ NumElts
;
14060 unsigned Index
= cast
<ConstantInt
>(Ops
[1])->getZExtValue();
14061 assert(llvm::isPowerOf2_32(SubVectors
) && "Expected power of 2 subvectors");
14062 Index
&= SubVectors
- 1; // Remove any extra bits.
14066 for (unsigned i
= 0; i
!= NumElts
; ++i
)
14067 Indices
[i
] = i
+ Index
;
14069 Value
*Res
= Builder
.CreateShuffleVector(Ops
[0], ArrayRef(Indices
, NumElts
),
14072 if (Ops
.size() == 4)
14073 Res
= EmitX86Select(*this, Ops
[3], Res
, Ops
[2]);
14077 case X86::BI__builtin_ia32_vinsertf128_pd256
:
14078 case X86::BI__builtin_ia32_vinsertf128_ps256
:
14079 case X86::BI__builtin_ia32_vinsertf128_si256
:
14080 case X86::BI__builtin_ia32_insert128i256
:
14081 case X86::BI__builtin_ia32_insertf64x4
:
14082 case X86::BI__builtin_ia32_insertf32x4
:
14083 case X86::BI__builtin_ia32_inserti64x4
:
14084 case X86::BI__builtin_ia32_inserti32x4
:
14085 case X86::BI__builtin_ia32_insertf32x8
:
14086 case X86::BI__builtin_ia32_inserti32x8
:
14087 case X86::BI__builtin_ia32_insertf32x4_256
:
14088 case X86::BI__builtin_ia32_inserti32x4_256
:
14089 case X86::BI__builtin_ia32_insertf64x2_256
:
14090 case X86::BI__builtin_ia32_inserti64x2_256
:
14091 case X86::BI__builtin_ia32_insertf64x2_512
:
14092 case X86::BI__builtin_ia32_inserti64x2_512
: {
14093 unsigned DstNumElts
=
14094 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
14095 unsigned SrcNumElts
=
14096 cast
<llvm::FixedVectorType
>(Ops
[1]->getType())->getNumElements();
14097 unsigned SubVectors
= DstNumElts
/ SrcNumElts
;
14098 unsigned Index
= cast
<ConstantInt
>(Ops
[2])->getZExtValue();
14099 assert(llvm::isPowerOf2_32(SubVectors
) && "Expected power of 2 subvectors");
14100 Index
&= SubVectors
- 1; // Remove any extra bits.
14101 Index
*= SrcNumElts
;
14104 for (unsigned i
= 0; i
!= DstNumElts
; ++i
)
14105 Indices
[i
] = (i
>= SrcNumElts
) ? SrcNumElts
+ (i
% SrcNumElts
) : i
;
14107 Value
*Op1
= Builder
.CreateShuffleVector(
14108 Ops
[1], ArrayRef(Indices
, DstNumElts
), "widen");
14110 for (unsigned i
= 0; i
!= DstNumElts
; ++i
) {
14111 if (i
>= Index
&& i
< (Index
+ SrcNumElts
))
14112 Indices
[i
] = (i
- Index
) + DstNumElts
;
14117 return Builder
.CreateShuffleVector(Ops
[0], Op1
,
14118 ArrayRef(Indices
, DstNumElts
), "insert");
14120 case X86::BI__builtin_ia32_pmovqd512_mask
:
14121 case X86::BI__builtin_ia32_pmovwb512_mask
: {
14122 Value
*Res
= Builder
.CreateTrunc(Ops
[0], Ops
[1]->getType());
14123 return EmitX86Select(*this, Ops
[2], Res
, Ops
[1]);
14125 case X86::BI__builtin_ia32_pmovdb512_mask
:
14126 case X86::BI__builtin_ia32_pmovdw512_mask
:
14127 case X86::BI__builtin_ia32_pmovqw512_mask
: {
14128 if (const auto *C
= dyn_cast
<Constant
>(Ops
[2]))
14129 if (C
->isAllOnesValue())
14130 return Builder
.CreateTrunc(Ops
[0], Ops
[1]->getType());
14133 switch (BuiltinID
) {
14134 default: llvm_unreachable("Unsupported intrinsic!");
14135 case X86::BI__builtin_ia32_pmovdb512_mask
:
14136 IID
= Intrinsic::x86_avx512_mask_pmov_db_512
;
14138 case X86::BI__builtin_ia32_pmovdw512_mask
:
14139 IID
= Intrinsic::x86_avx512_mask_pmov_dw_512
;
14141 case X86::BI__builtin_ia32_pmovqw512_mask
:
14142 IID
= Intrinsic::x86_avx512_mask_pmov_qw_512
;
14146 Function
*Intr
= CGM
.getIntrinsic(IID
);
14147 return Builder
.CreateCall(Intr
, Ops
);
14149 case X86::BI__builtin_ia32_pblendw128
:
14150 case X86::BI__builtin_ia32_blendpd
:
14151 case X86::BI__builtin_ia32_blendps
:
14152 case X86::BI__builtin_ia32_blendpd256
:
14153 case X86::BI__builtin_ia32_blendps256
:
14154 case X86::BI__builtin_ia32_pblendw256
:
14155 case X86::BI__builtin_ia32_pblendd128
:
14156 case X86::BI__builtin_ia32_pblendd256
: {
14158 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
14159 unsigned Imm
= cast
<llvm::ConstantInt
>(Ops
[2])->getZExtValue();
14162 // If there are more than 8 elements, the immediate is used twice so make
14163 // sure we handle that.
14164 for (unsigned i
= 0; i
!= NumElts
; ++i
)
14165 Indices
[i
] = ((Imm
>> (i
% 8)) & 0x1) ? NumElts
+ i
: i
;
14167 return Builder
.CreateShuffleVector(Ops
[0], Ops
[1],
14168 ArrayRef(Indices
, NumElts
), "blend");
14170 case X86::BI__builtin_ia32_pshuflw
:
14171 case X86::BI__builtin_ia32_pshuflw256
:
14172 case X86::BI__builtin_ia32_pshuflw512
: {
14173 uint32_t Imm
= cast
<llvm::ConstantInt
>(Ops
[1])->getZExtValue();
14174 auto *Ty
= cast
<llvm::FixedVectorType
>(Ops
[0]->getType());
14175 unsigned NumElts
= Ty
->getNumElements();
14177 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
14178 Imm
= (Imm
& 0xff) * 0x01010101;
14181 for (unsigned l
= 0; l
!= NumElts
; l
+= 8) {
14182 for (unsigned i
= 0; i
!= 4; ++i
) {
14183 Indices
[l
+ i
] = l
+ (Imm
& 3);
14186 for (unsigned i
= 4; i
!= 8; ++i
)
14187 Indices
[l
+ i
] = l
+ i
;
14190 return Builder
.CreateShuffleVector(Ops
[0], ArrayRef(Indices
, NumElts
),
14193 case X86::BI__builtin_ia32_pshufhw
:
14194 case X86::BI__builtin_ia32_pshufhw256
:
14195 case X86::BI__builtin_ia32_pshufhw512
: {
14196 uint32_t Imm
= cast
<llvm::ConstantInt
>(Ops
[1])->getZExtValue();
14197 auto *Ty
= cast
<llvm::FixedVectorType
>(Ops
[0]->getType());
14198 unsigned NumElts
= Ty
->getNumElements();
14200 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
14201 Imm
= (Imm
& 0xff) * 0x01010101;
14204 for (unsigned l
= 0; l
!= NumElts
; l
+= 8) {
14205 for (unsigned i
= 0; i
!= 4; ++i
)
14206 Indices
[l
+ i
] = l
+ i
;
14207 for (unsigned i
= 4; i
!= 8; ++i
) {
14208 Indices
[l
+ i
] = l
+ 4 + (Imm
& 3);
14213 return Builder
.CreateShuffleVector(Ops
[0], ArrayRef(Indices
, NumElts
),
14216 case X86::BI__builtin_ia32_pshufd
:
14217 case X86::BI__builtin_ia32_pshufd256
:
14218 case X86::BI__builtin_ia32_pshufd512
:
14219 case X86::BI__builtin_ia32_vpermilpd
:
14220 case X86::BI__builtin_ia32_vpermilps
:
14221 case X86::BI__builtin_ia32_vpermilpd256
:
14222 case X86::BI__builtin_ia32_vpermilps256
:
14223 case X86::BI__builtin_ia32_vpermilpd512
:
14224 case X86::BI__builtin_ia32_vpermilps512
: {
14225 uint32_t Imm
= cast
<llvm::ConstantInt
>(Ops
[1])->getZExtValue();
14226 auto *Ty
= cast
<llvm::FixedVectorType
>(Ops
[0]->getType());
14227 unsigned NumElts
= Ty
->getNumElements();
14228 unsigned NumLanes
= Ty
->getPrimitiveSizeInBits() / 128;
14229 unsigned NumLaneElts
= NumElts
/ NumLanes
;
14231 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
14232 Imm
= (Imm
& 0xff) * 0x01010101;
14235 for (unsigned l
= 0; l
!= NumElts
; l
+= NumLaneElts
) {
14236 for (unsigned i
= 0; i
!= NumLaneElts
; ++i
) {
14237 Indices
[i
+ l
] = (Imm
% NumLaneElts
) + l
;
14238 Imm
/= NumLaneElts
;
14242 return Builder
.CreateShuffleVector(Ops
[0], ArrayRef(Indices
, NumElts
),
14245 case X86::BI__builtin_ia32_shufpd
:
14246 case X86::BI__builtin_ia32_shufpd256
:
14247 case X86::BI__builtin_ia32_shufpd512
:
14248 case X86::BI__builtin_ia32_shufps
:
14249 case X86::BI__builtin_ia32_shufps256
:
14250 case X86::BI__builtin_ia32_shufps512
: {
14251 uint32_t Imm
= cast
<llvm::ConstantInt
>(Ops
[2])->getZExtValue();
14252 auto *Ty
= cast
<llvm::FixedVectorType
>(Ops
[0]->getType());
14253 unsigned NumElts
= Ty
->getNumElements();
14254 unsigned NumLanes
= Ty
->getPrimitiveSizeInBits() / 128;
14255 unsigned NumLaneElts
= NumElts
/ NumLanes
;
14257 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
14258 Imm
= (Imm
& 0xff) * 0x01010101;
14261 for (unsigned l
= 0; l
!= NumElts
; l
+= NumLaneElts
) {
14262 for (unsigned i
= 0; i
!= NumLaneElts
; ++i
) {
14263 unsigned Index
= Imm
% NumLaneElts
;
14264 Imm
/= NumLaneElts
;
14265 if (i
>= (NumLaneElts
/ 2))
14267 Indices
[l
+ i
] = l
+ Index
;
14271 return Builder
.CreateShuffleVector(Ops
[0], Ops
[1],
14272 ArrayRef(Indices
, NumElts
), "shufp");
14274 case X86::BI__builtin_ia32_permdi256
:
14275 case X86::BI__builtin_ia32_permdf256
:
14276 case X86::BI__builtin_ia32_permdi512
:
14277 case X86::BI__builtin_ia32_permdf512
: {
14278 unsigned Imm
= cast
<llvm::ConstantInt
>(Ops
[1])->getZExtValue();
14279 auto *Ty
= cast
<llvm::FixedVectorType
>(Ops
[0]->getType());
14280 unsigned NumElts
= Ty
->getNumElements();
14282 // These intrinsics operate on 256-bit lanes of four 64-bit elements.
14284 for (unsigned l
= 0; l
!= NumElts
; l
+= 4)
14285 for (unsigned i
= 0; i
!= 4; ++i
)
14286 Indices
[l
+ i
] = l
+ ((Imm
>> (2 * i
)) & 0x3);
14288 return Builder
.CreateShuffleVector(Ops
[0], ArrayRef(Indices
, NumElts
),
14291 case X86::BI__builtin_ia32_palignr128
:
14292 case X86::BI__builtin_ia32_palignr256
:
14293 case X86::BI__builtin_ia32_palignr512
: {
14294 unsigned ShiftVal
= cast
<llvm::ConstantInt
>(Ops
[2])->getZExtValue() & 0xff;
14297 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
14298 assert(NumElts
% 16 == 0);
14300 // If palignr is shifting the pair of vectors more than the size of two
14301 // lanes, emit zero.
14302 if (ShiftVal
>= 32)
14303 return llvm::Constant::getNullValue(ConvertType(E
->getType()));
14305 // If palignr is shifting the pair of input vectors more than one lane,
14306 // but less than two lanes, convert to shifting in zeroes.
14307 if (ShiftVal
> 16) {
14310 Ops
[0] = llvm::Constant::getNullValue(Ops
[0]->getType());
14314 // 256-bit palignr operates on 128-bit lanes so we need to handle that
14315 for (unsigned l
= 0; l
!= NumElts
; l
+= 16) {
14316 for (unsigned i
= 0; i
!= 16; ++i
) {
14317 unsigned Idx
= ShiftVal
+ i
;
14319 Idx
+= NumElts
- 16; // End of lane, switch operand.
14320 Indices
[l
+ i
] = Idx
+ l
;
14324 return Builder
.CreateShuffleVector(Ops
[1], Ops
[0],
14325 ArrayRef(Indices
, NumElts
), "palignr");
14327 case X86::BI__builtin_ia32_alignd128
:
14328 case X86::BI__builtin_ia32_alignd256
:
14329 case X86::BI__builtin_ia32_alignd512
:
14330 case X86::BI__builtin_ia32_alignq128
:
14331 case X86::BI__builtin_ia32_alignq256
:
14332 case X86::BI__builtin_ia32_alignq512
: {
14334 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
14335 unsigned ShiftVal
= cast
<llvm::ConstantInt
>(Ops
[2])->getZExtValue() & 0xff;
14337 // Mask the shift amount to width of a vector.
14338 ShiftVal
&= NumElts
- 1;
14341 for (unsigned i
= 0; i
!= NumElts
; ++i
)
14342 Indices
[i
] = i
+ ShiftVal
;
14344 return Builder
.CreateShuffleVector(Ops
[1], Ops
[0],
14345 ArrayRef(Indices
, NumElts
), "valign");
14347 case X86::BI__builtin_ia32_shuf_f32x4_256
:
14348 case X86::BI__builtin_ia32_shuf_f64x2_256
:
14349 case X86::BI__builtin_ia32_shuf_i32x4_256
:
14350 case X86::BI__builtin_ia32_shuf_i64x2_256
:
14351 case X86::BI__builtin_ia32_shuf_f32x4
:
14352 case X86::BI__builtin_ia32_shuf_f64x2
:
14353 case X86::BI__builtin_ia32_shuf_i32x4
:
14354 case X86::BI__builtin_ia32_shuf_i64x2
: {
14355 unsigned Imm
= cast
<llvm::ConstantInt
>(Ops
[2])->getZExtValue();
14356 auto *Ty
= cast
<llvm::FixedVectorType
>(Ops
[0]->getType());
14357 unsigned NumElts
= Ty
->getNumElements();
14358 unsigned NumLanes
= Ty
->getPrimitiveSizeInBits() == 512 ? 4 : 2;
14359 unsigned NumLaneElts
= NumElts
/ NumLanes
;
14362 for (unsigned l
= 0; l
!= NumElts
; l
+= NumLaneElts
) {
14363 unsigned Index
= (Imm
% NumLanes
) * NumLaneElts
;
14364 Imm
/= NumLanes
; // Discard the bits we just used.
14365 if (l
>= (NumElts
/ 2))
14366 Index
+= NumElts
; // Switch to other source.
14367 for (unsigned i
= 0; i
!= NumLaneElts
; ++i
) {
14368 Indices
[l
+ i
] = Index
+ i
;
14372 return Builder
.CreateShuffleVector(Ops
[0], Ops
[1],
14373 ArrayRef(Indices
, NumElts
), "shuf");
14376 case X86::BI__builtin_ia32_vperm2f128_pd256
:
14377 case X86::BI__builtin_ia32_vperm2f128_ps256
:
14378 case X86::BI__builtin_ia32_vperm2f128_si256
:
14379 case X86::BI__builtin_ia32_permti256
: {
14380 unsigned Imm
= cast
<llvm::ConstantInt
>(Ops
[2])->getZExtValue();
14382 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
14384 // This takes a very simple approach since there are two lanes and a
14385 // shuffle can have 2 inputs. So we reserve the first input for the first
14386 // lane and the second input for the second lane. This may result in
14387 // duplicate sources, but this can be dealt with in the backend.
14391 for (unsigned l
= 0; l
!= 2; ++l
) {
14392 // Determine the source for this lane.
14393 if (Imm
& (1 << ((l
* 4) + 3)))
14394 OutOps
[l
] = llvm::ConstantAggregateZero::get(Ops
[0]->getType());
14395 else if (Imm
& (1 << ((l
* 4) + 1)))
14396 OutOps
[l
] = Ops
[1];
14398 OutOps
[l
] = Ops
[0];
14400 for (unsigned i
= 0; i
!= NumElts
/2; ++i
) {
14401 // Start with ith element of the source for this lane.
14402 unsigned Idx
= (l
* NumElts
) + i
;
14403 // If bit 0 of the immediate half is set, switch to the high half of
14405 if (Imm
& (1 << (l
* 4)))
14407 Indices
[(l
* (NumElts
/2)) + i
] = Idx
;
14411 return Builder
.CreateShuffleVector(OutOps
[0], OutOps
[1],
14412 ArrayRef(Indices
, NumElts
), "vperm");
14415 case X86::BI__builtin_ia32_pslldqi128_byteshift
:
14416 case X86::BI__builtin_ia32_pslldqi256_byteshift
:
14417 case X86::BI__builtin_ia32_pslldqi512_byteshift
: {
14418 unsigned ShiftVal
= cast
<llvm::ConstantInt
>(Ops
[1])->getZExtValue() & 0xff;
14419 auto *ResultType
= cast
<llvm::FixedVectorType
>(Ops
[0]->getType());
14420 // Builtin type is vXi64 so multiply by 8 to get bytes.
14421 unsigned NumElts
= ResultType
->getNumElements() * 8;
14423 // If pslldq is shifting the vector more than 15 bytes, emit zero.
14424 if (ShiftVal
>= 16)
14425 return llvm::Constant::getNullValue(ResultType
);
14428 // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
14429 for (unsigned l
= 0; l
!= NumElts
; l
+= 16) {
14430 for (unsigned i
= 0; i
!= 16; ++i
) {
14431 unsigned Idx
= NumElts
+ i
- ShiftVal
;
14432 if (Idx
< NumElts
) Idx
-= NumElts
- 16; // end of lane, switch operand.
14433 Indices
[l
+ i
] = Idx
+ l
;
14437 auto *VecTy
= llvm::FixedVectorType::get(Int8Ty
, NumElts
);
14438 Value
*Cast
= Builder
.CreateBitCast(Ops
[0], VecTy
, "cast");
14439 Value
*Zero
= llvm::Constant::getNullValue(VecTy
);
14440 Value
*SV
= Builder
.CreateShuffleVector(
14441 Zero
, Cast
, ArrayRef(Indices
, NumElts
), "pslldq");
14442 return Builder
.CreateBitCast(SV
, Ops
[0]->getType(), "cast");
14444 case X86::BI__builtin_ia32_psrldqi128_byteshift
:
14445 case X86::BI__builtin_ia32_psrldqi256_byteshift
:
14446 case X86::BI__builtin_ia32_psrldqi512_byteshift
: {
14447 unsigned ShiftVal
= cast
<llvm::ConstantInt
>(Ops
[1])->getZExtValue() & 0xff;
14448 auto *ResultType
= cast
<llvm::FixedVectorType
>(Ops
[0]->getType());
14449 // Builtin type is vXi64 so multiply by 8 to get bytes.
14450 unsigned NumElts
= ResultType
->getNumElements() * 8;
14452 // If psrldq is shifting the vector more than 15 bytes, emit zero.
14453 if (ShiftVal
>= 16)
14454 return llvm::Constant::getNullValue(ResultType
);
14457 // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
14458 for (unsigned l
= 0; l
!= NumElts
; l
+= 16) {
14459 for (unsigned i
= 0; i
!= 16; ++i
) {
14460 unsigned Idx
= i
+ ShiftVal
;
14461 if (Idx
>= 16) Idx
+= NumElts
- 16; // end of lane, switch operand.
14462 Indices
[l
+ i
] = Idx
+ l
;
14466 auto *VecTy
= llvm::FixedVectorType::get(Int8Ty
, NumElts
);
14467 Value
*Cast
= Builder
.CreateBitCast(Ops
[0], VecTy
, "cast");
14468 Value
*Zero
= llvm::Constant::getNullValue(VecTy
);
14469 Value
*SV
= Builder
.CreateShuffleVector(
14470 Cast
, Zero
, ArrayRef(Indices
, NumElts
), "psrldq");
14471 return Builder
.CreateBitCast(SV
, ResultType
, "cast");
14473 case X86::BI__builtin_ia32_kshiftliqi
:
14474 case X86::BI__builtin_ia32_kshiftlihi
:
14475 case X86::BI__builtin_ia32_kshiftlisi
:
14476 case X86::BI__builtin_ia32_kshiftlidi
: {
14477 unsigned ShiftVal
= cast
<llvm::ConstantInt
>(Ops
[1])->getZExtValue() & 0xff;
14478 unsigned NumElts
= Ops
[0]->getType()->getIntegerBitWidth();
14480 if (ShiftVal
>= NumElts
)
14481 return llvm::Constant::getNullValue(Ops
[0]->getType());
14483 Value
*In
= getMaskVecValue(*this, Ops
[0], NumElts
);
14486 for (unsigned i
= 0; i
!= NumElts
; ++i
)
14487 Indices
[i
] = NumElts
+ i
- ShiftVal
;
14489 Value
*Zero
= llvm::Constant::getNullValue(In
->getType());
14490 Value
*SV
= Builder
.CreateShuffleVector(
14491 Zero
, In
, ArrayRef(Indices
, NumElts
), "kshiftl");
14492 return Builder
.CreateBitCast(SV
, Ops
[0]->getType());
14494 case X86::BI__builtin_ia32_kshiftriqi
:
14495 case X86::BI__builtin_ia32_kshiftrihi
:
14496 case X86::BI__builtin_ia32_kshiftrisi
:
14497 case X86::BI__builtin_ia32_kshiftridi
: {
14498 unsigned ShiftVal
= cast
<llvm::ConstantInt
>(Ops
[1])->getZExtValue() & 0xff;
14499 unsigned NumElts
= Ops
[0]->getType()->getIntegerBitWidth();
14501 if (ShiftVal
>= NumElts
)
14502 return llvm::Constant::getNullValue(Ops
[0]->getType());
14504 Value
*In
= getMaskVecValue(*this, Ops
[0], NumElts
);
14507 for (unsigned i
= 0; i
!= NumElts
; ++i
)
14508 Indices
[i
] = i
+ ShiftVal
;
14510 Value
*Zero
= llvm::Constant::getNullValue(In
->getType());
14511 Value
*SV
= Builder
.CreateShuffleVector(
14512 In
, Zero
, ArrayRef(Indices
, NumElts
), "kshiftr");
14513 return Builder
.CreateBitCast(SV
, Ops
[0]->getType());
14515 case X86::BI__builtin_ia32_movnti
:
14516 case X86::BI__builtin_ia32_movnti64
:
14517 case X86::BI__builtin_ia32_movntsd
:
14518 case X86::BI__builtin_ia32_movntss
: {
14519 llvm::MDNode
*Node
= llvm::MDNode::get(
14520 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder
.getInt32(1)));
14522 Value
*Ptr
= Ops
[0];
14523 Value
*Src
= Ops
[1];
14525 // Extract the 0'th element of the source vector.
14526 if (BuiltinID
== X86::BI__builtin_ia32_movntsd
||
14527 BuiltinID
== X86::BI__builtin_ia32_movntss
)
14528 Src
= Builder
.CreateExtractElement(Src
, (uint64_t)0, "extract");
14530 // Convert the type of the pointer to a pointer to the stored type.
14531 Value
*BC
= Builder
.CreateBitCast(
14532 Ptr
, llvm::PointerType::getUnqual(Src
->getType()), "cast");
14534 // Unaligned nontemporal store of the scalar value.
14535 StoreInst
*SI
= Builder
.CreateDefaultAlignedStore(Src
, BC
);
14536 SI
->setMetadata(llvm::LLVMContext::MD_nontemporal
, Node
);
14537 SI
->setAlignment(llvm::Align(1));
14540 // Rotate is a special case of funnel shift - 1st 2 args are the same.
14541 case X86::BI__builtin_ia32_vprotb
:
14542 case X86::BI__builtin_ia32_vprotw
:
14543 case X86::BI__builtin_ia32_vprotd
:
14544 case X86::BI__builtin_ia32_vprotq
:
14545 case X86::BI__builtin_ia32_vprotbi
:
14546 case X86::BI__builtin_ia32_vprotwi
:
14547 case X86::BI__builtin_ia32_vprotdi
:
14548 case X86::BI__builtin_ia32_vprotqi
:
14549 case X86::BI__builtin_ia32_prold128
:
14550 case X86::BI__builtin_ia32_prold256
:
14551 case X86::BI__builtin_ia32_prold512
:
14552 case X86::BI__builtin_ia32_prolq128
:
14553 case X86::BI__builtin_ia32_prolq256
:
14554 case X86::BI__builtin_ia32_prolq512
:
14555 case X86::BI__builtin_ia32_prolvd128
:
14556 case X86::BI__builtin_ia32_prolvd256
:
14557 case X86::BI__builtin_ia32_prolvd512
:
14558 case X86::BI__builtin_ia32_prolvq128
:
14559 case X86::BI__builtin_ia32_prolvq256
:
14560 case X86::BI__builtin_ia32_prolvq512
:
14561 return EmitX86FunnelShift(*this, Ops
[0], Ops
[0], Ops
[1], false);
14562 case X86::BI__builtin_ia32_prord128
:
14563 case X86::BI__builtin_ia32_prord256
:
14564 case X86::BI__builtin_ia32_prord512
:
14565 case X86::BI__builtin_ia32_prorq128
:
14566 case X86::BI__builtin_ia32_prorq256
:
14567 case X86::BI__builtin_ia32_prorq512
:
14568 case X86::BI__builtin_ia32_prorvd128
:
14569 case X86::BI__builtin_ia32_prorvd256
:
14570 case X86::BI__builtin_ia32_prorvd512
:
14571 case X86::BI__builtin_ia32_prorvq128
:
14572 case X86::BI__builtin_ia32_prorvq256
:
14573 case X86::BI__builtin_ia32_prorvq512
:
14574 return EmitX86FunnelShift(*this, Ops
[0], Ops
[0], Ops
[1], true);
14575 case X86::BI__builtin_ia32_selectb_128
:
14576 case X86::BI__builtin_ia32_selectb_256
:
14577 case X86::BI__builtin_ia32_selectb_512
:
14578 case X86::BI__builtin_ia32_selectw_128
:
14579 case X86::BI__builtin_ia32_selectw_256
:
14580 case X86::BI__builtin_ia32_selectw_512
:
14581 case X86::BI__builtin_ia32_selectd_128
:
14582 case X86::BI__builtin_ia32_selectd_256
:
14583 case X86::BI__builtin_ia32_selectd_512
:
14584 case X86::BI__builtin_ia32_selectq_128
:
14585 case X86::BI__builtin_ia32_selectq_256
:
14586 case X86::BI__builtin_ia32_selectq_512
:
14587 case X86::BI__builtin_ia32_selectph_128
:
14588 case X86::BI__builtin_ia32_selectph_256
:
14589 case X86::BI__builtin_ia32_selectph_512
:
14590 case X86::BI__builtin_ia32_selectpbf_128
:
14591 case X86::BI__builtin_ia32_selectpbf_256
:
14592 case X86::BI__builtin_ia32_selectpbf_512
:
14593 case X86::BI__builtin_ia32_selectps_128
:
14594 case X86::BI__builtin_ia32_selectps_256
:
14595 case X86::BI__builtin_ia32_selectps_512
:
14596 case X86::BI__builtin_ia32_selectpd_128
:
14597 case X86::BI__builtin_ia32_selectpd_256
:
14598 case X86::BI__builtin_ia32_selectpd_512
:
14599 return EmitX86Select(*this, Ops
[0], Ops
[1], Ops
[2]);
14600 case X86::BI__builtin_ia32_selectsh_128
:
14601 case X86::BI__builtin_ia32_selectsbf_128
:
14602 case X86::BI__builtin_ia32_selectss_128
:
14603 case X86::BI__builtin_ia32_selectsd_128
: {
14604 Value
*A
= Builder
.CreateExtractElement(Ops
[1], (uint64_t)0);
14605 Value
*B
= Builder
.CreateExtractElement(Ops
[2], (uint64_t)0);
14606 A
= EmitX86ScalarSelect(*this, Ops
[0], A
, B
);
14607 return Builder
.CreateInsertElement(Ops
[1], A
, (uint64_t)0);
14609 case X86::BI__builtin_ia32_cmpb128_mask
:
14610 case X86::BI__builtin_ia32_cmpb256_mask
:
14611 case X86::BI__builtin_ia32_cmpb512_mask
:
14612 case X86::BI__builtin_ia32_cmpw128_mask
:
14613 case X86::BI__builtin_ia32_cmpw256_mask
:
14614 case X86::BI__builtin_ia32_cmpw512_mask
:
14615 case X86::BI__builtin_ia32_cmpd128_mask
:
14616 case X86::BI__builtin_ia32_cmpd256_mask
:
14617 case X86::BI__builtin_ia32_cmpd512_mask
:
14618 case X86::BI__builtin_ia32_cmpq128_mask
:
14619 case X86::BI__builtin_ia32_cmpq256_mask
:
14620 case X86::BI__builtin_ia32_cmpq512_mask
: {
14621 unsigned CC
= cast
<llvm::ConstantInt
>(Ops
[2])->getZExtValue() & 0x7;
14622 return EmitX86MaskedCompare(*this, CC
, true, Ops
);
14624 case X86::BI__builtin_ia32_ucmpb128_mask
:
14625 case X86::BI__builtin_ia32_ucmpb256_mask
:
14626 case X86::BI__builtin_ia32_ucmpb512_mask
:
14627 case X86::BI__builtin_ia32_ucmpw128_mask
:
14628 case X86::BI__builtin_ia32_ucmpw256_mask
:
14629 case X86::BI__builtin_ia32_ucmpw512_mask
:
14630 case X86::BI__builtin_ia32_ucmpd128_mask
:
14631 case X86::BI__builtin_ia32_ucmpd256_mask
:
14632 case X86::BI__builtin_ia32_ucmpd512_mask
:
14633 case X86::BI__builtin_ia32_ucmpq128_mask
:
14634 case X86::BI__builtin_ia32_ucmpq256_mask
:
14635 case X86::BI__builtin_ia32_ucmpq512_mask
: {
14636 unsigned CC
= cast
<llvm::ConstantInt
>(Ops
[2])->getZExtValue() & 0x7;
14637 return EmitX86MaskedCompare(*this, CC
, false, Ops
);
14639 case X86::BI__builtin_ia32_vpcomb
:
14640 case X86::BI__builtin_ia32_vpcomw
:
14641 case X86::BI__builtin_ia32_vpcomd
:
14642 case X86::BI__builtin_ia32_vpcomq
:
14643 return EmitX86vpcom(*this, Ops
, true);
14644 case X86::BI__builtin_ia32_vpcomub
:
14645 case X86::BI__builtin_ia32_vpcomuw
:
14646 case X86::BI__builtin_ia32_vpcomud
:
14647 case X86::BI__builtin_ia32_vpcomuq
:
14648 return EmitX86vpcom(*this, Ops
, false);
14650 case X86::BI__builtin_ia32_kortestcqi
:
14651 case X86::BI__builtin_ia32_kortestchi
:
14652 case X86::BI__builtin_ia32_kortestcsi
:
14653 case X86::BI__builtin_ia32_kortestcdi
: {
14654 Value
*Or
= EmitX86MaskLogic(*this, Instruction::Or
, Ops
);
14655 Value
*C
= llvm::Constant::getAllOnesValue(Ops
[0]->getType());
14656 Value
*Cmp
= Builder
.CreateICmpEQ(Or
, C
);
14657 return Builder
.CreateZExt(Cmp
, ConvertType(E
->getType()));
14659 case X86::BI__builtin_ia32_kortestzqi
:
14660 case X86::BI__builtin_ia32_kortestzhi
:
14661 case X86::BI__builtin_ia32_kortestzsi
:
14662 case X86::BI__builtin_ia32_kortestzdi
: {
14663 Value
*Or
= EmitX86MaskLogic(*this, Instruction::Or
, Ops
);
14664 Value
*C
= llvm::Constant::getNullValue(Ops
[0]->getType());
14665 Value
*Cmp
= Builder
.CreateICmpEQ(Or
, C
);
14666 return Builder
.CreateZExt(Cmp
, ConvertType(E
->getType()));
14669 case X86::BI__builtin_ia32_ktestcqi
:
14670 case X86::BI__builtin_ia32_ktestzqi
:
14671 case X86::BI__builtin_ia32_ktestchi
:
14672 case X86::BI__builtin_ia32_ktestzhi
:
14673 case X86::BI__builtin_ia32_ktestcsi
:
14674 case X86::BI__builtin_ia32_ktestzsi
:
14675 case X86::BI__builtin_ia32_ktestcdi
:
14676 case X86::BI__builtin_ia32_ktestzdi
: {
14678 switch (BuiltinID
) {
14679 default: llvm_unreachable("Unsupported intrinsic!");
14680 case X86::BI__builtin_ia32_ktestcqi
:
14681 IID
= Intrinsic::x86_avx512_ktestc_b
;
14683 case X86::BI__builtin_ia32_ktestzqi
:
14684 IID
= Intrinsic::x86_avx512_ktestz_b
;
14686 case X86::BI__builtin_ia32_ktestchi
:
14687 IID
= Intrinsic::x86_avx512_ktestc_w
;
14689 case X86::BI__builtin_ia32_ktestzhi
:
14690 IID
= Intrinsic::x86_avx512_ktestz_w
;
14692 case X86::BI__builtin_ia32_ktestcsi
:
14693 IID
= Intrinsic::x86_avx512_ktestc_d
;
14695 case X86::BI__builtin_ia32_ktestzsi
:
14696 IID
= Intrinsic::x86_avx512_ktestz_d
;
14698 case X86::BI__builtin_ia32_ktestcdi
:
14699 IID
= Intrinsic::x86_avx512_ktestc_q
;
14701 case X86::BI__builtin_ia32_ktestzdi
:
14702 IID
= Intrinsic::x86_avx512_ktestz_q
;
14706 unsigned NumElts
= Ops
[0]->getType()->getIntegerBitWidth();
14707 Value
*LHS
= getMaskVecValue(*this, Ops
[0], NumElts
);
14708 Value
*RHS
= getMaskVecValue(*this, Ops
[1], NumElts
);
14709 Function
*Intr
= CGM
.getIntrinsic(IID
);
14710 return Builder
.CreateCall(Intr
, {LHS
, RHS
});
14713 case X86::BI__builtin_ia32_kaddqi
:
14714 case X86::BI__builtin_ia32_kaddhi
:
14715 case X86::BI__builtin_ia32_kaddsi
:
14716 case X86::BI__builtin_ia32_kadddi
: {
14718 switch (BuiltinID
) {
14719 default: llvm_unreachable("Unsupported intrinsic!");
14720 case X86::BI__builtin_ia32_kaddqi
:
14721 IID
= Intrinsic::x86_avx512_kadd_b
;
14723 case X86::BI__builtin_ia32_kaddhi
:
14724 IID
= Intrinsic::x86_avx512_kadd_w
;
14726 case X86::BI__builtin_ia32_kaddsi
:
14727 IID
= Intrinsic::x86_avx512_kadd_d
;
14729 case X86::BI__builtin_ia32_kadddi
:
14730 IID
= Intrinsic::x86_avx512_kadd_q
;
14734 unsigned NumElts
= Ops
[0]->getType()->getIntegerBitWidth();
14735 Value
*LHS
= getMaskVecValue(*this, Ops
[0], NumElts
);
14736 Value
*RHS
= getMaskVecValue(*this, Ops
[1], NumElts
);
14737 Function
*Intr
= CGM
.getIntrinsic(IID
);
14738 Value
*Res
= Builder
.CreateCall(Intr
, {LHS
, RHS
});
14739 return Builder
.CreateBitCast(Res
, Ops
[0]->getType());
14741 case X86::BI__builtin_ia32_kandqi
:
14742 case X86::BI__builtin_ia32_kandhi
:
14743 case X86::BI__builtin_ia32_kandsi
:
14744 case X86::BI__builtin_ia32_kanddi
:
14745 return EmitX86MaskLogic(*this, Instruction::And
, Ops
);
14746 case X86::BI__builtin_ia32_kandnqi
:
14747 case X86::BI__builtin_ia32_kandnhi
:
14748 case X86::BI__builtin_ia32_kandnsi
:
14749 case X86::BI__builtin_ia32_kandndi
:
14750 return EmitX86MaskLogic(*this, Instruction::And
, Ops
, true);
14751 case X86::BI__builtin_ia32_korqi
:
14752 case X86::BI__builtin_ia32_korhi
:
14753 case X86::BI__builtin_ia32_korsi
:
14754 case X86::BI__builtin_ia32_kordi
:
14755 return EmitX86MaskLogic(*this, Instruction::Or
, Ops
);
14756 case X86::BI__builtin_ia32_kxnorqi
:
14757 case X86::BI__builtin_ia32_kxnorhi
:
14758 case X86::BI__builtin_ia32_kxnorsi
:
14759 case X86::BI__builtin_ia32_kxnordi
:
14760 return EmitX86MaskLogic(*this, Instruction::Xor
, Ops
, true);
14761 case X86::BI__builtin_ia32_kxorqi
:
14762 case X86::BI__builtin_ia32_kxorhi
:
14763 case X86::BI__builtin_ia32_kxorsi
:
14764 case X86::BI__builtin_ia32_kxordi
:
14765 return EmitX86MaskLogic(*this, Instruction::Xor
, Ops
);
14766 case X86::BI__builtin_ia32_knotqi
:
14767 case X86::BI__builtin_ia32_knothi
:
14768 case X86::BI__builtin_ia32_knotsi
:
14769 case X86::BI__builtin_ia32_knotdi
: {
14770 unsigned NumElts
= Ops
[0]->getType()->getIntegerBitWidth();
14771 Value
*Res
= getMaskVecValue(*this, Ops
[0], NumElts
);
14772 return Builder
.CreateBitCast(Builder
.CreateNot(Res
),
14773 Ops
[0]->getType());
14775 case X86::BI__builtin_ia32_kmovb
:
14776 case X86::BI__builtin_ia32_kmovw
:
14777 case X86::BI__builtin_ia32_kmovd
:
14778 case X86::BI__builtin_ia32_kmovq
: {
14779 // Bitcast to vXi1 type and then back to integer. This gets the mask
14780 // register type into the IR, but might be optimized out depending on
14781 // what's around it.
14782 unsigned NumElts
= Ops
[0]->getType()->getIntegerBitWidth();
14783 Value
*Res
= getMaskVecValue(*this, Ops
[0], NumElts
);
14784 return Builder
.CreateBitCast(Res
, Ops
[0]->getType());
14787 case X86::BI__builtin_ia32_kunpckdi
:
14788 case X86::BI__builtin_ia32_kunpcksi
:
14789 case X86::BI__builtin_ia32_kunpckhi
: {
14790 unsigned NumElts
= Ops
[0]->getType()->getIntegerBitWidth();
14791 Value
*LHS
= getMaskVecValue(*this, Ops
[0], NumElts
);
14792 Value
*RHS
= getMaskVecValue(*this, Ops
[1], NumElts
);
14794 for (unsigned i
= 0; i
!= NumElts
; ++i
)
14797 // First extract half of each vector. This gives better codegen than
14798 // doing it in a single shuffle.
14799 LHS
= Builder
.CreateShuffleVector(LHS
, LHS
, ArrayRef(Indices
, NumElts
/ 2));
14800 RHS
= Builder
.CreateShuffleVector(RHS
, RHS
, ArrayRef(Indices
, NumElts
/ 2));
14801 // Concat the vectors.
14802 // NOTE: Operands are swapped to match the intrinsic definition.
14804 Builder
.CreateShuffleVector(RHS
, LHS
, ArrayRef(Indices
, NumElts
));
14805 return Builder
.CreateBitCast(Res
, Ops
[0]->getType());
14808 case X86::BI__builtin_ia32_vplzcntd_128
:
14809 case X86::BI__builtin_ia32_vplzcntd_256
:
14810 case X86::BI__builtin_ia32_vplzcntd_512
:
14811 case X86::BI__builtin_ia32_vplzcntq_128
:
14812 case X86::BI__builtin_ia32_vplzcntq_256
:
14813 case X86::BI__builtin_ia32_vplzcntq_512
: {
14814 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctlz
, Ops
[0]->getType());
14815 return Builder
.CreateCall(F
, {Ops
[0],Builder
.getInt1(false)});
14817 case X86::BI__builtin_ia32_sqrtss
:
14818 case X86::BI__builtin_ia32_sqrtsd
: {
14819 Value
*A
= Builder
.CreateExtractElement(Ops
[0], (uint64_t)0);
14821 if (Builder
.getIsFPConstrained()) {
14822 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
14823 F
= CGM
.getIntrinsic(Intrinsic::experimental_constrained_sqrt
,
14825 A
= Builder
.CreateConstrainedFPCall(F
, {A
});
14827 F
= CGM
.getIntrinsic(Intrinsic::sqrt
, A
->getType());
14828 A
= Builder
.CreateCall(F
, {A
});
14830 return Builder
.CreateInsertElement(Ops
[0], A
, (uint64_t)0);
14832 case X86::BI__builtin_ia32_sqrtsh_round_mask
:
14833 case X86::BI__builtin_ia32_sqrtsd_round_mask
:
14834 case X86::BI__builtin_ia32_sqrtss_round_mask
: {
14835 unsigned CC
= cast
<llvm::ConstantInt
>(Ops
[4])->getZExtValue();
14836 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
14837 // otherwise keep the intrinsic.
14841 switch (BuiltinID
) {
14843 llvm_unreachable("Unsupported intrinsic!");
14844 case X86::BI__builtin_ia32_sqrtsh_round_mask
:
14845 IID
= Intrinsic::x86_avx512fp16_mask_sqrt_sh
;
14847 case X86::BI__builtin_ia32_sqrtsd_round_mask
:
14848 IID
= Intrinsic::x86_avx512_mask_sqrt_sd
;
14850 case X86::BI__builtin_ia32_sqrtss_round_mask
:
14851 IID
= Intrinsic::x86_avx512_mask_sqrt_ss
;
14854 return Builder
.CreateCall(CGM
.getIntrinsic(IID
), Ops
);
14856 Value
*A
= Builder
.CreateExtractElement(Ops
[1], (uint64_t)0);
14858 if (Builder
.getIsFPConstrained()) {
14859 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
14860 F
= CGM
.getIntrinsic(Intrinsic::experimental_constrained_sqrt
,
14862 A
= Builder
.CreateConstrainedFPCall(F
, A
);
14864 F
= CGM
.getIntrinsic(Intrinsic::sqrt
, A
->getType());
14865 A
= Builder
.CreateCall(F
, A
);
14867 Value
*Src
= Builder
.CreateExtractElement(Ops
[2], (uint64_t)0);
14868 A
= EmitX86ScalarSelect(*this, Ops
[3], A
, Src
);
14869 return Builder
.CreateInsertElement(Ops
[0], A
, (uint64_t)0);
14871 case X86::BI__builtin_ia32_sqrtpd256
:
14872 case X86::BI__builtin_ia32_sqrtpd
:
14873 case X86::BI__builtin_ia32_sqrtps256
:
14874 case X86::BI__builtin_ia32_sqrtps
:
14875 case X86::BI__builtin_ia32_sqrtph256
:
14876 case X86::BI__builtin_ia32_sqrtph
:
14877 case X86::BI__builtin_ia32_sqrtph512
:
14878 case X86::BI__builtin_ia32_sqrtps512
:
14879 case X86::BI__builtin_ia32_sqrtpd512
: {
14880 if (Ops
.size() == 2) {
14881 unsigned CC
= cast
<llvm::ConstantInt
>(Ops
[1])->getZExtValue();
14882 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
14883 // otherwise keep the intrinsic.
14887 switch (BuiltinID
) {
14889 llvm_unreachable("Unsupported intrinsic!");
14890 case X86::BI__builtin_ia32_sqrtph512
:
14891 IID
= Intrinsic::x86_avx512fp16_sqrt_ph_512
;
14893 case X86::BI__builtin_ia32_sqrtps512
:
14894 IID
= Intrinsic::x86_avx512_sqrt_ps_512
;
14896 case X86::BI__builtin_ia32_sqrtpd512
:
14897 IID
= Intrinsic::x86_avx512_sqrt_pd_512
;
14900 return Builder
.CreateCall(CGM
.getIntrinsic(IID
), Ops
);
14903 if (Builder
.getIsFPConstrained()) {
14904 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
14905 Function
*F
= CGM
.getIntrinsic(Intrinsic::experimental_constrained_sqrt
,
14906 Ops
[0]->getType());
14907 return Builder
.CreateConstrainedFPCall(F
, Ops
[0]);
14909 Function
*F
= CGM
.getIntrinsic(Intrinsic::sqrt
, Ops
[0]->getType());
14910 return Builder
.CreateCall(F
, Ops
[0]);
14914 case X86::BI__builtin_ia32_pmuludq128
:
14915 case X86::BI__builtin_ia32_pmuludq256
:
14916 case X86::BI__builtin_ia32_pmuludq512
:
14917 return EmitX86Muldq(*this, /*IsSigned*/false, Ops
);
14919 case X86::BI__builtin_ia32_pmuldq128
:
14920 case X86::BI__builtin_ia32_pmuldq256
:
14921 case X86::BI__builtin_ia32_pmuldq512
:
14922 return EmitX86Muldq(*this, /*IsSigned*/true, Ops
);
14924 case X86::BI__builtin_ia32_pternlogd512_mask
:
14925 case X86::BI__builtin_ia32_pternlogq512_mask
:
14926 case X86::BI__builtin_ia32_pternlogd128_mask
:
14927 case X86::BI__builtin_ia32_pternlogd256_mask
:
14928 case X86::BI__builtin_ia32_pternlogq128_mask
:
14929 case X86::BI__builtin_ia32_pternlogq256_mask
:
14930 return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops
);
14932 case X86::BI__builtin_ia32_pternlogd512_maskz
:
14933 case X86::BI__builtin_ia32_pternlogq512_maskz
:
14934 case X86::BI__builtin_ia32_pternlogd128_maskz
:
14935 case X86::BI__builtin_ia32_pternlogd256_maskz
:
14936 case X86::BI__builtin_ia32_pternlogq128_maskz
:
14937 case X86::BI__builtin_ia32_pternlogq256_maskz
:
14938 return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops
);
14940 case X86::BI__builtin_ia32_vpshldd128
:
14941 case X86::BI__builtin_ia32_vpshldd256
:
14942 case X86::BI__builtin_ia32_vpshldd512
:
14943 case X86::BI__builtin_ia32_vpshldq128
:
14944 case X86::BI__builtin_ia32_vpshldq256
:
14945 case X86::BI__builtin_ia32_vpshldq512
:
14946 case X86::BI__builtin_ia32_vpshldw128
:
14947 case X86::BI__builtin_ia32_vpshldw256
:
14948 case X86::BI__builtin_ia32_vpshldw512
:
14949 return EmitX86FunnelShift(*this, Ops
[0], Ops
[1], Ops
[2], false);
14951 case X86::BI__builtin_ia32_vpshrdd128
:
14952 case X86::BI__builtin_ia32_vpshrdd256
:
14953 case X86::BI__builtin_ia32_vpshrdd512
:
14954 case X86::BI__builtin_ia32_vpshrdq128
:
14955 case X86::BI__builtin_ia32_vpshrdq256
:
14956 case X86::BI__builtin_ia32_vpshrdq512
:
14957 case X86::BI__builtin_ia32_vpshrdw128
:
14958 case X86::BI__builtin_ia32_vpshrdw256
:
14959 case X86::BI__builtin_ia32_vpshrdw512
:
14960 // Ops 0 and 1 are swapped.
14961 return EmitX86FunnelShift(*this, Ops
[1], Ops
[0], Ops
[2], true);
14963 case X86::BI__builtin_ia32_vpshldvd128
:
14964 case X86::BI__builtin_ia32_vpshldvd256
:
14965 case X86::BI__builtin_ia32_vpshldvd512
:
14966 case X86::BI__builtin_ia32_vpshldvq128
:
14967 case X86::BI__builtin_ia32_vpshldvq256
:
14968 case X86::BI__builtin_ia32_vpshldvq512
:
14969 case X86::BI__builtin_ia32_vpshldvw128
:
14970 case X86::BI__builtin_ia32_vpshldvw256
:
14971 case X86::BI__builtin_ia32_vpshldvw512
:
14972 return EmitX86FunnelShift(*this, Ops
[0], Ops
[1], Ops
[2], false);
14974 case X86::BI__builtin_ia32_vpshrdvd128
:
14975 case X86::BI__builtin_ia32_vpshrdvd256
:
14976 case X86::BI__builtin_ia32_vpshrdvd512
:
14977 case X86::BI__builtin_ia32_vpshrdvq128
:
14978 case X86::BI__builtin_ia32_vpshrdvq256
:
14979 case X86::BI__builtin_ia32_vpshrdvq512
:
14980 case X86::BI__builtin_ia32_vpshrdvw128
:
14981 case X86::BI__builtin_ia32_vpshrdvw256
:
14982 case X86::BI__builtin_ia32_vpshrdvw512
:
14983 // Ops 0 and 1 are swapped.
14984 return EmitX86FunnelShift(*this, Ops
[1], Ops
[0], Ops
[2], true);
14987 case X86::BI__builtin_ia32_reduce_fadd_pd512
:
14988 case X86::BI__builtin_ia32_reduce_fadd_ps512
:
14989 case X86::BI__builtin_ia32_reduce_fadd_ph512
:
14990 case X86::BI__builtin_ia32_reduce_fadd_ph256
:
14991 case X86::BI__builtin_ia32_reduce_fadd_ph128
: {
14993 CGM
.getIntrinsic(Intrinsic::vector_reduce_fadd
, Ops
[1]->getType());
14994 IRBuilder
<>::FastMathFlagGuard
FMFGuard(Builder
);
14995 Builder
.getFastMathFlags().setAllowReassoc();
14996 return Builder
.CreateCall(F
, {Ops
[0], Ops
[1]});
14998 case X86::BI__builtin_ia32_reduce_fmul_pd512
:
14999 case X86::BI__builtin_ia32_reduce_fmul_ps512
:
15000 case X86::BI__builtin_ia32_reduce_fmul_ph512
:
15001 case X86::BI__builtin_ia32_reduce_fmul_ph256
:
15002 case X86::BI__builtin_ia32_reduce_fmul_ph128
: {
15004 CGM
.getIntrinsic(Intrinsic::vector_reduce_fmul
, Ops
[1]->getType());
15005 IRBuilder
<>::FastMathFlagGuard
FMFGuard(Builder
);
15006 Builder
.getFastMathFlags().setAllowReassoc();
15007 return Builder
.CreateCall(F
, {Ops
[0], Ops
[1]});
15009 case X86::BI__builtin_ia32_reduce_fmax_pd512
:
15010 case X86::BI__builtin_ia32_reduce_fmax_ps512
:
15011 case X86::BI__builtin_ia32_reduce_fmax_ph512
:
15012 case X86::BI__builtin_ia32_reduce_fmax_ph256
:
15013 case X86::BI__builtin_ia32_reduce_fmax_ph128
: {
15015 CGM
.getIntrinsic(Intrinsic::vector_reduce_fmax
, Ops
[0]->getType());
15016 IRBuilder
<>::FastMathFlagGuard
FMFGuard(Builder
);
15017 Builder
.getFastMathFlags().setNoNaNs();
15018 return Builder
.CreateCall(F
, {Ops
[0]});
15020 case X86::BI__builtin_ia32_reduce_fmin_pd512
:
15021 case X86::BI__builtin_ia32_reduce_fmin_ps512
:
15022 case X86::BI__builtin_ia32_reduce_fmin_ph512
:
15023 case X86::BI__builtin_ia32_reduce_fmin_ph256
:
15024 case X86::BI__builtin_ia32_reduce_fmin_ph128
: {
15026 CGM
.getIntrinsic(Intrinsic::vector_reduce_fmin
, Ops
[0]->getType());
15027 IRBuilder
<>::FastMathFlagGuard
FMFGuard(Builder
);
15028 Builder
.getFastMathFlags().setNoNaNs();
15029 return Builder
.CreateCall(F
, {Ops
[0]});
15033 case X86::BI__builtin_ia32_pswapdsf
:
15034 case X86::BI__builtin_ia32_pswapdsi
: {
15035 llvm::Type
*MMXTy
= llvm::Type::getX86_MMXTy(getLLVMContext());
15036 Ops
[0] = Builder
.CreateBitCast(Ops
[0], MMXTy
, "cast");
15037 llvm::Function
*F
= CGM
.getIntrinsic(Intrinsic::x86_3dnowa_pswapd
);
15038 return Builder
.CreateCall(F
, Ops
, "pswapd");
15040 case X86::BI__builtin_ia32_rdrand16_step
:
15041 case X86::BI__builtin_ia32_rdrand32_step
:
15042 case X86::BI__builtin_ia32_rdrand64_step
:
15043 case X86::BI__builtin_ia32_rdseed16_step
:
15044 case X86::BI__builtin_ia32_rdseed32_step
:
15045 case X86::BI__builtin_ia32_rdseed64_step
: {
15047 switch (BuiltinID
) {
15048 default: llvm_unreachable("Unsupported intrinsic!");
15049 case X86::BI__builtin_ia32_rdrand16_step
:
15050 ID
= Intrinsic::x86_rdrand_16
;
15052 case X86::BI__builtin_ia32_rdrand32_step
:
15053 ID
= Intrinsic::x86_rdrand_32
;
15055 case X86::BI__builtin_ia32_rdrand64_step
:
15056 ID
= Intrinsic::x86_rdrand_64
;
15058 case X86::BI__builtin_ia32_rdseed16_step
:
15059 ID
= Intrinsic::x86_rdseed_16
;
15061 case X86::BI__builtin_ia32_rdseed32_step
:
15062 ID
= Intrinsic::x86_rdseed_32
;
15064 case X86::BI__builtin_ia32_rdseed64_step
:
15065 ID
= Intrinsic::x86_rdseed_64
;
15069 Value
*Call
= Builder
.CreateCall(CGM
.getIntrinsic(ID
));
15070 Builder
.CreateDefaultAlignedStore(Builder
.CreateExtractValue(Call
, 0),
15072 return Builder
.CreateExtractValue(Call
, 1);
15074 case X86::BI__builtin_ia32_addcarryx_u32
:
15075 case X86::BI__builtin_ia32_addcarryx_u64
:
15076 case X86::BI__builtin_ia32_subborrow_u32
:
15077 case X86::BI__builtin_ia32_subborrow_u64
: {
15079 switch (BuiltinID
) {
15080 default: llvm_unreachable("Unsupported intrinsic!");
15081 case X86::BI__builtin_ia32_addcarryx_u32
:
15082 IID
= Intrinsic::x86_addcarry_32
;
15084 case X86::BI__builtin_ia32_addcarryx_u64
:
15085 IID
= Intrinsic::x86_addcarry_64
;
15087 case X86::BI__builtin_ia32_subborrow_u32
:
15088 IID
= Intrinsic::x86_subborrow_32
;
15090 case X86::BI__builtin_ia32_subborrow_u64
:
15091 IID
= Intrinsic::x86_subborrow_64
;
15095 Value
*Call
= Builder
.CreateCall(CGM
.getIntrinsic(IID
),
15096 { Ops
[0], Ops
[1], Ops
[2] });
15097 Builder
.CreateDefaultAlignedStore(Builder
.CreateExtractValue(Call
, 1),
15099 return Builder
.CreateExtractValue(Call
, 0);
15102 case X86::BI__builtin_ia32_fpclassps128_mask
:
15103 case X86::BI__builtin_ia32_fpclassps256_mask
:
15104 case X86::BI__builtin_ia32_fpclassps512_mask
:
15105 case X86::BI__builtin_ia32_fpclassph128_mask
:
15106 case X86::BI__builtin_ia32_fpclassph256_mask
:
15107 case X86::BI__builtin_ia32_fpclassph512_mask
:
15108 case X86::BI__builtin_ia32_fpclasspd128_mask
:
15109 case X86::BI__builtin_ia32_fpclasspd256_mask
:
15110 case X86::BI__builtin_ia32_fpclasspd512_mask
: {
15112 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
15113 Value
*MaskIn
= Ops
[2];
15114 Ops
.erase(&Ops
[2]);
15117 switch (BuiltinID
) {
15118 default: llvm_unreachable("Unsupported intrinsic!");
15119 case X86::BI__builtin_ia32_fpclassph128_mask
:
15120 ID
= Intrinsic::x86_avx512fp16_fpclass_ph_128
;
15122 case X86::BI__builtin_ia32_fpclassph256_mask
:
15123 ID
= Intrinsic::x86_avx512fp16_fpclass_ph_256
;
15125 case X86::BI__builtin_ia32_fpclassph512_mask
:
15126 ID
= Intrinsic::x86_avx512fp16_fpclass_ph_512
;
15128 case X86::BI__builtin_ia32_fpclassps128_mask
:
15129 ID
= Intrinsic::x86_avx512_fpclass_ps_128
;
15131 case X86::BI__builtin_ia32_fpclassps256_mask
:
15132 ID
= Intrinsic::x86_avx512_fpclass_ps_256
;
15134 case X86::BI__builtin_ia32_fpclassps512_mask
:
15135 ID
= Intrinsic::x86_avx512_fpclass_ps_512
;
15137 case X86::BI__builtin_ia32_fpclasspd128_mask
:
15138 ID
= Intrinsic::x86_avx512_fpclass_pd_128
;
15140 case X86::BI__builtin_ia32_fpclasspd256_mask
:
15141 ID
= Intrinsic::x86_avx512_fpclass_pd_256
;
15143 case X86::BI__builtin_ia32_fpclasspd512_mask
:
15144 ID
= Intrinsic::x86_avx512_fpclass_pd_512
;
15148 Value
*Fpclass
= Builder
.CreateCall(CGM
.getIntrinsic(ID
), Ops
);
15149 return EmitX86MaskedCompareResult(*this, Fpclass
, NumElts
, MaskIn
);
15152 case X86::BI__builtin_ia32_vp2intersect_q_512
:
15153 case X86::BI__builtin_ia32_vp2intersect_q_256
:
15154 case X86::BI__builtin_ia32_vp2intersect_q_128
:
15155 case X86::BI__builtin_ia32_vp2intersect_d_512
:
15156 case X86::BI__builtin_ia32_vp2intersect_d_256
:
15157 case X86::BI__builtin_ia32_vp2intersect_d_128
: {
15159 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
15162 switch (BuiltinID
) {
15163 default: llvm_unreachable("Unsupported intrinsic!");
15164 case X86::BI__builtin_ia32_vp2intersect_q_512
:
15165 ID
= Intrinsic::x86_avx512_vp2intersect_q_512
;
15167 case X86::BI__builtin_ia32_vp2intersect_q_256
:
15168 ID
= Intrinsic::x86_avx512_vp2intersect_q_256
;
15170 case X86::BI__builtin_ia32_vp2intersect_q_128
:
15171 ID
= Intrinsic::x86_avx512_vp2intersect_q_128
;
15173 case X86::BI__builtin_ia32_vp2intersect_d_512
:
15174 ID
= Intrinsic::x86_avx512_vp2intersect_d_512
;
15176 case X86::BI__builtin_ia32_vp2intersect_d_256
:
15177 ID
= Intrinsic::x86_avx512_vp2intersect_d_256
;
15179 case X86::BI__builtin_ia32_vp2intersect_d_128
:
15180 ID
= Intrinsic::x86_avx512_vp2intersect_d_128
;
15184 Value
*Call
= Builder
.CreateCall(CGM
.getIntrinsic(ID
), {Ops
[0], Ops
[1]});
15185 Value
*Result
= Builder
.CreateExtractValue(Call
, 0);
15186 Result
= EmitX86MaskedCompareResult(*this, Result
, NumElts
, nullptr);
15187 Builder
.CreateDefaultAlignedStore(Result
, Ops
[2]);
15189 Result
= Builder
.CreateExtractValue(Call
, 1);
15190 Result
= EmitX86MaskedCompareResult(*this, Result
, NumElts
, nullptr);
15191 return Builder
.CreateDefaultAlignedStore(Result
, Ops
[3]);
15194 case X86::BI__builtin_ia32_vpmultishiftqb128
:
15195 case X86::BI__builtin_ia32_vpmultishiftqb256
:
15196 case X86::BI__builtin_ia32_vpmultishiftqb512
: {
15198 switch (BuiltinID
) {
15199 default: llvm_unreachable("Unsupported intrinsic!");
15200 case X86::BI__builtin_ia32_vpmultishiftqb128
:
15201 ID
= Intrinsic::x86_avx512_pmultishift_qb_128
;
15203 case X86::BI__builtin_ia32_vpmultishiftqb256
:
15204 ID
= Intrinsic::x86_avx512_pmultishift_qb_256
;
15206 case X86::BI__builtin_ia32_vpmultishiftqb512
:
15207 ID
= Intrinsic::x86_avx512_pmultishift_qb_512
;
15211 return Builder
.CreateCall(CGM
.getIntrinsic(ID
), Ops
);
15214 case X86::BI__builtin_ia32_vpshufbitqmb128_mask
:
15215 case X86::BI__builtin_ia32_vpshufbitqmb256_mask
:
15216 case X86::BI__builtin_ia32_vpshufbitqmb512_mask
: {
15218 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
15219 Value
*MaskIn
= Ops
[2];
15220 Ops
.erase(&Ops
[2]);
15223 switch (BuiltinID
) {
15224 default: llvm_unreachable("Unsupported intrinsic!");
15225 case X86::BI__builtin_ia32_vpshufbitqmb128_mask
:
15226 ID
= Intrinsic::x86_avx512_vpshufbitqmb_128
;
15228 case X86::BI__builtin_ia32_vpshufbitqmb256_mask
:
15229 ID
= Intrinsic::x86_avx512_vpshufbitqmb_256
;
15231 case X86::BI__builtin_ia32_vpshufbitqmb512_mask
:
15232 ID
= Intrinsic::x86_avx512_vpshufbitqmb_512
;
15236 Value
*Shufbit
= Builder
.CreateCall(CGM
.getIntrinsic(ID
), Ops
);
15237 return EmitX86MaskedCompareResult(*this, Shufbit
, NumElts
, MaskIn
);
15240 // packed comparison intrinsics
15241 case X86::BI__builtin_ia32_cmpeqps
:
15242 case X86::BI__builtin_ia32_cmpeqpd
:
15243 return getVectorFCmpIR(CmpInst::FCMP_OEQ
, /*IsSignaling*/false);
15244 case X86::BI__builtin_ia32_cmpltps
:
15245 case X86::BI__builtin_ia32_cmpltpd
:
15246 return getVectorFCmpIR(CmpInst::FCMP_OLT
, /*IsSignaling*/true);
15247 case X86::BI__builtin_ia32_cmpleps
:
15248 case X86::BI__builtin_ia32_cmplepd
:
15249 return getVectorFCmpIR(CmpInst::FCMP_OLE
, /*IsSignaling*/true);
15250 case X86::BI__builtin_ia32_cmpunordps
:
15251 case X86::BI__builtin_ia32_cmpunordpd
:
15252 return getVectorFCmpIR(CmpInst::FCMP_UNO
, /*IsSignaling*/false);
15253 case X86::BI__builtin_ia32_cmpneqps
:
15254 case X86::BI__builtin_ia32_cmpneqpd
:
15255 return getVectorFCmpIR(CmpInst::FCMP_UNE
, /*IsSignaling*/false);
15256 case X86::BI__builtin_ia32_cmpnltps
:
15257 case X86::BI__builtin_ia32_cmpnltpd
:
15258 return getVectorFCmpIR(CmpInst::FCMP_UGE
, /*IsSignaling*/true);
15259 case X86::BI__builtin_ia32_cmpnleps
:
15260 case X86::BI__builtin_ia32_cmpnlepd
:
15261 return getVectorFCmpIR(CmpInst::FCMP_UGT
, /*IsSignaling*/true);
15262 case X86::BI__builtin_ia32_cmpordps
:
15263 case X86::BI__builtin_ia32_cmpordpd
:
15264 return getVectorFCmpIR(CmpInst::FCMP_ORD
, /*IsSignaling*/false);
15265 case X86::BI__builtin_ia32_cmpph128_mask
:
15266 case X86::BI__builtin_ia32_cmpph256_mask
:
15267 case X86::BI__builtin_ia32_cmpph512_mask
:
15268 case X86::BI__builtin_ia32_cmpps128_mask
:
15269 case X86::BI__builtin_ia32_cmpps256_mask
:
15270 case X86::BI__builtin_ia32_cmpps512_mask
:
15271 case X86::BI__builtin_ia32_cmppd128_mask
:
15272 case X86::BI__builtin_ia32_cmppd256_mask
:
15273 case X86::BI__builtin_ia32_cmppd512_mask
:
15276 case X86::BI__builtin_ia32_cmpps
:
15277 case X86::BI__builtin_ia32_cmpps256
:
15278 case X86::BI__builtin_ia32_cmppd
:
15279 case X86::BI__builtin_ia32_cmppd256
: {
15280 // Lowering vector comparisons to fcmp instructions, while
15281 // ignoring signalling behaviour requested
15282 // ignoring rounding mode requested
15283 // This is only possible if fp-model is not strict and FENV_ACCESS is off.
15285 // The third argument is the comparison condition, and integer in the
15287 unsigned CC
= cast
<llvm::ConstantInt
>(Ops
[2])->getZExtValue() & 0x1f;
15289 // Lowering to IR fcmp instruction.
15290 // Ignoring requested signaling behaviour,
15291 // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
15292 FCmpInst::Predicate Pred
;
15294 // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
15295 // behavior is inverted. We'll handle that after the switch.
15296 switch (CC
& 0xf) {
15297 case 0x00: Pred
= FCmpInst::FCMP_OEQ
; IsSignaling
= false; break;
15298 case 0x01: Pred
= FCmpInst::FCMP_OLT
; IsSignaling
= true; break;
15299 case 0x02: Pred
= FCmpInst::FCMP_OLE
; IsSignaling
= true; break;
15300 case 0x03: Pred
= FCmpInst::FCMP_UNO
; IsSignaling
= false; break;
15301 case 0x04: Pred
= FCmpInst::FCMP_UNE
; IsSignaling
= false; break;
15302 case 0x05: Pred
= FCmpInst::FCMP_UGE
; IsSignaling
= true; break;
15303 case 0x06: Pred
= FCmpInst::FCMP_UGT
; IsSignaling
= true; break;
15304 case 0x07: Pred
= FCmpInst::FCMP_ORD
; IsSignaling
= false; break;
15305 case 0x08: Pred
= FCmpInst::FCMP_UEQ
; IsSignaling
= false; break;
15306 case 0x09: Pred
= FCmpInst::FCMP_ULT
; IsSignaling
= true; break;
15307 case 0x0a: Pred
= FCmpInst::FCMP_ULE
; IsSignaling
= true; break;
15308 case 0x0b: Pred
= FCmpInst::FCMP_FALSE
; IsSignaling
= false; break;
15309 case 0x0c: Pred
= FCmpInst::FCMP_ONE
; IsSignaling
= false; break;
15310 case 0x0d: Pred
= FCmpInst::FCMP_OGE
; IsSignaling
= true; break;
15311 case 0x0e: Pred
= FCmpInst::FCMP_OGT
; IsSignaling
= true; break;
15312 case 0x0f: Pred
= FCmpInst::FCMP_TRUE
; IsSignaling
= false; break;
15313 default: llvm_unreachable("Unhandled CC");
15316 // Invert the signalling behavior for 16-31.
15318 IsSignaling
= !IsSignaling
;
15320 // If the predicate is true or false and we're using constrained intrinsics,
15321 // we don't have a compare intrinsic we can use. Just use the legacy X86
15322 // specific intrinsic.
15323 // If the intrinsic is mask enabled and we're using constrained intrinsics,
15324 // use the legacy X86 specific intrinsic.
15325 if (Builder
.getIsFPConstrained() &&
15326 (Pred
== FCmpInst::FCMP_TRUE
|| Pred
== FCmpInst::FCMP_FALSE
||
15330 switch (BuiltinID
) {
15331 default: llvm_unreachable("Unexpected builtin");
15332 case X86::BI__builtin_ia32_cmpps
:
15333 IID
= Intrinsic::x86_sse_cmp_ps
;
15335 case X86::BI__builtin_ia32_cmpps256
:
15336 IID
= Intrinsic::x86_avx_cmp_ps_256
;
15338 case X86::BI__builtin_ia32_cmppd
:
15339 IID
= Intrinsic::x86_sse2_cmp_pd
;
15341 case X86::BI__builtin_ia32_cmppd256
:
15342 IID
= Intrinsic::x86_avx_cmp_pd_256
;
15344 case X86::BI__builtin_ia32_cmpps512_mask
:
15345 IID
= Intrinsic::x86_avx512_mask_cmp_ps_512
;
15347 case X86::BI__builtin_ia32_cmppd512_mask
:
15348 IID
= Intrinsic::x86_avx512_mask_cmp_pd_512
;
15350 case X86::BI__builtin_ia32_cmpps128_mask
:
15351 IID
= Intrinsic::x86_avx512_mask_cmp_ps_128
;
15353 case X86::BI__builtin_ia32_cmpps256_mask
:
15354 IID
= Intrinsic::x86_avx512_mask_cmp_ps_256
;
15356 case X86::BI__builtin_ia32_cmppd128_mask
:
15357 IID
= Intrinsic::x86_avx512_mask_cmp_pd_128
;
15359 case X86::BI__builtin_ia32_cmppd256_mask
:
15360 IID
= Intrinsic::x86_avx512_mask_cmp_pd_256
;
15364 Function
*Intr
= CGM
.getIntrinsic(IID
);
15367 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
15368 Ops
[3] = getMaskVecValue(*this, Ops
[3], NumElts
);
15369 Value
*Cmp
= Builder
.CreateCall(Intr
, Ops
);
15370 return EmitX86MaskedCompareResult(*this, Cmp
, NumElts
, nullptr);
15373 return Builder
.CreateCall(Intr
, Ops
);
15376 // Builtins without the _mask suffix return a vector of integers
15377 // of the same width as the input vectors
15379 // We ignore SAE if strict FP is disabled. We only keep precise
15380 // exception behavior under strict FP.
15381 // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
15382 // object will be required.
15384 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements();
15387 Cmp
= Builder
.CreateFCmpS(Pred
, Ops
[0], Ops
[1]);
15389 Cmp
= Builder
.CreateFCmp(Pred
, Ops
[0], Ops
[1]);
15390 return EmitX86MaskedCompareResult(*this, Cmp
, NumElts
, Ops
[3]);
15393 return getVectorFCmpIR(Pred
, IsSignaling
);
15396 // SSE scalar comparison intrinsics
15397 case X86::BI__builtin_ia32_cmpeqss
:
15398 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss
, 0);
15399 case X86::BI__builtin_ia32_cmpltss
:
15400 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss
, 1);
15401 case X86::BI__builtin_ia32_cmpless
:
15402 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss
, 2);
15403 case X86::BI__builtin_ia32_cmpunordss
:
15404 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss
, 3);
15405 case X86::BI__builtin_ia32_cmpneqss
:
15406 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss
, 4);
15407 case X86::BI__builtin_ia32_cmpnltss
:
15408 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss
, 5);
15409 case X86::BI__builtin_ia32_cmpnless
:
15410 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss
, 6);
15411 case X86::BI__builtin_ia32_cmpordss
:
15412 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss
, 7);
15413 case X86::BI__builtin_ia32_cmpeqsd
:
15414 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd
, 0);
15415 case X86::BI__builtin_ia32_cmpltsd
:
15416 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd
, 1);
15417 case X86::BI__builtin_ia32_cmplesd
:
15418 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd
, 2);
15419 case X86::BI__builtin_ia32_cmpunordsd
:
15420 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd
, 3);
15421 case X86::BI__builtin_ia32_cmpneqsd
:
15422 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd
, 4);
15423 case X86::BI__builtin_ia32_cmpnltsd
:
15424 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd
, 5);
15425 case X86::BI__builtin_ia32_cmpnlesd
:
15426 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd
, 6);
15427 case X86::BI__builtin_ia32_cmpordsd
:
15428 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd
, 7);
15430 // f16c half2float intrinsics
15431 case X86::BI__builtin_ia32_vcvtph2ps
:
15432 case X86::BI__builtin_ia32_vcvtph2ps256
:
15433 case X86::BI__builtin_ia32_vcvtph2ps_mask
:
15434 case X86::BI__builtin_ia32_vcvtph2ps256_mask
:
15435 case X86::BI__builtin_ia32_vcvtph2ps512_mask
: {
15436 CodeGenFunction::CGFPOptionsRAII
FPOptsRAII(*this, E
);
15437 return EmitX86CvtF16ToFloatExpr(*this, Ops
, ConvertType(E
->getType()));
15440 // AVX512 bf16 intrinsics
15441 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask
: {
15442 Ops
[2] = getMaskVecValue(
15444 cast
<llvm::FixedVectorType
>(Ops
[0]->getType())->getNumElements());
15445 Intrinsic::ID IID
= Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128
;
15446 return Builder
.CreateCall(CGM
.getIntrinsic(IID
), Ops
);
15448 case X86::BI__builtin_ia32_cvtsbf162ss_32
:
15449 return Builder
.CreateFPExt(Ops
[0], Builder
.getFloatTy());
15451 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask
:
15452 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask
: {
15454 switch (BuiltinID
) {
15455 default: llvm_unreachable("Unsupported intrinsic!");
15456 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask
:
15457 IID
= Intrinsic::x86_avx512bf16_cvtneps2bf16_256
;
15459 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask
:
15460 IID
= Intrinsic::x86_avx512bf16_cvtneps2bf16_512
;
15463 Value
*Res
= Builder
.CreateCall(CGM
.getIntrinsic(IID
), Ops
[0]);
15464 return EmitX86Select(*this, Ops
[2], Res
, Ops
[1]);
15467 case X86::BI__cpuid
:
15468 case X86::BI__cpuidex
: {
15469 Value
*FuncId
= EmitScalarExpr(E
->getArg(1));
15470 Value
*SubFuncId
= BuiltinID
== X86::BI__cpuidex
15471 ? EmitScalarExpr(E
->getArg(2))
15472 : llvm::ConstantInt::get(Int32Ty
, 0);
15474 llvm::StructType
*CpuidRetTy
=
15475 llvm::StructType::get(Int32Ty
, Int32Ty
, Int32Ty
, Int32Ty
);
15476 llvm::FunctionType
*FTy
=
15477 llvm::FunctionType::get(CpuidRetTy
, {Int32Ty
, Int32Ty
}, false);
15479 StringRef Asm
, Constraints
;
15480 if (getTarget().getTriple().getArch() == llvm::Triple::x86
) {
15482 Constraints
= "={ax},={bx},={cx},={dx},{ax},{cx}";
15484 // x86-64 uses %rbx as the base register, so preserve it.
15485 Asm
= "xchgq %rbx, ${1:q}\n"
15487 "xchgq %rbx, ${1:q}";
15488 Constraints
= "={ax},=r,={cx},={dx},0,2";
15491 llvm::InlineAsm
*IA
= llvm::InlineAsm::get(FTy
, Asm
, Constraints
,
15492 /*hasSideEffects=*/false);
15493 Value
*IACall
= Builder
.CreateCall(IA
, {FuncId
, SubFuncId
});
15494 Value
*BasePtr
= EmitScalarExpr(E
->getArg(0));
15495 Value
*Store
= nullptr;
15496 for (unsigned i
= 0; i
< 4; i
++) {
15497 Value
*Extracted
= Builder
.CreateExtractValue(IACall
, i
);
15498 Value
*StorePtr
= Builder
.CreateConstInBoundsGEP1_32(Int32Ty
, BasePtr
, i
);
15499 Store
= Builder
.CreateAlignedStore(Extracted
, StorePtr
, getIntAlign());
15502 // Return the last store instruction to signal that we have emitted the
15507 case X86::BI__emul
:
15508 case X86::BI__emulu
: {
15509 llvm::Type
*Int64Ty
= llvm::IntegerType::get(getLLVMContext(), 64);
15510 bool isSigned
= (BuiltinID
== X86::BI__emul
);
15511 Value
*LHS
= Builder
.CreateIntCast(Ops
[0], Int64Ty
, isSigned
);
15512 Value
*RHS
= Builder
.CreateIntCast(Ops
[1], Int64Ty
, isSigned
);
15513 return Builder
.CreateMul(LHS
, RHS
, "", !isSigned
, isSigned
);
15515 case X86::BI__mulh
:
15516 case X86::BI__umulh
:
15517 case X86::BI_mul128
:
15518 case X86::BI_umul128
: {
15519 llvm::Type
*ResType
= ConvertType(E
->getType());
15520 llvm::Type
*Int128Ty
= llvm::IntegerType::get(getLLVMContext(), 128);
15522 bool IsSigned
= (BuiltinID
== X86::BI__mulh
|| BuiltinID
== X86::BI_mul128
);
15523 Value
*LHS
= Builder
.CreateIntCast(Ops
[0], Int128Ty
, IsSigned
);
15524 Value
*RHS
= Builder
.CreateIntCast(Ops
[1], Int128Ty
, IsSigned
);
15526 Value
*MulResult
, *HigherBits
;
15528 MulResult
= Builder
.CreateNSWMul(LHS
, RHS
);
15529 HigherBits
= Builder
.CreateAShr(MulResult
, 64);
15531 MulResult
= Builder
.CreateNUWMul(LHS
, RHS
);
15532 HigherBits
= Builder
.CreateLShr(MulResult
, 64);
15534 HigherBits
= Builder
.CreateIntCast(HigherBits
, ResType
, IsSigned
);
15536 if (BuiltinID
== X86::BI__mulh
|| BuiltinID
== X86::BI__umulh
)
15539 Address HighBitsAddress
= EmitPointerWithAlignment(E
->getArg(2));
15540 Builder
.CreateStore(HigherBits
, HighBitsAddress
);
15541 return Builder
.CreateIntCast(MulResult
, ResType
, IsSigned
);
15544 case X86::BI__faststorefence
: {
15545 return Builder
.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent
,
15546 llvm::SyncScope::System
);
15548 case X86::BI__shiftleft128
:
15549 case X86::BI__shiftright128
: {
15550 llvm::Function
*F
= CGM
.getIntrinsic(
15551 BuiltinID
== X86::BI__shiftleft128
? Intrinsic::fshl
: Intrinsic::fshr
,
15553 // Flip low/high ops and zero-extend amount to matching type.
15554 // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
15555 // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
15556 std::swap(Ops
[0], Ops
[1]);
15557 Ops
[2] = Builder
.CreateZExt(Ops
[2], Int64Ty
);
15558 return Builder
.CreateCall(F
, Ops
);
15560 case X86::BI_ReadWriteBarrier
:
15561 case X86::BI_ReadBarrier
:
15562 case X86::BI_WriteBarrier
: {
15563 return Builder
.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent
,
15564 llvm::SyncScope::SingleThread
);
15567 case X86::BI_AddressOfReturnAddress
: {
15569 CGM
.getIntrinsic(Intrinsic::addressofreturnaddress
, AllocaInt8PtrTy
);
15570 return Builder
.CreateCall(F
);
15572 case X86::BI__stosb
: {
15573 // We treat __stosb as a volatile memset - it may not generate "rep stosb"
15574 // instruction, but it will create a memset that won't be optimized away.
15575 return Builder
.CreateMemSet(Ops
[0], Ops
[1], Ops
[2], Align(1), true);
15578 // llvm.trap makes a ud2a instruction on x86.
15579 return EmitTrapCall(Intrinsic::trap
);
15580 case X86::BI__int2c
: {
15581 // This syscall signals a driver assertion failure in x86 NT kernels.
15582 llvm::FunctionType
*FTy
= llvm::FunctionType::get(VoidTy
, false);
15583 llvm::InlineAsm
*IA
=
15584 llvm::InlineAsm::get(FTy
, "int $$0x2c", "", /*hasSideEffects=*/true);
15585 llvm::AttributeList NoReturnAttr
= llvm::AttributeList::get(
15586 getLLVMContext(), llvm::AttributeList::FunctionIndex
,
15587 llvm::Attribute::NoReturn
);
15588 llvm::CallInst
*CI
= Builder
.CreateCall(IA
);
15589 CI
->setAttributes(NoReturnAttr
);
15592 case X86::BI__readfsbyte
:
15593 case X86::BI__readfsword
:
15594 case X86::BI__readfsdword
:
15595 case X86::BI__readfsqword
: {
15596 llvm::Type
*IntTy
= ConvertType(E
->getType());
15598 Builder
.CreateIntToPtr(Ops
[0], llvm::PointerType::get(IntTy
, 257));
15599 LoadInst
*Load
= Builder
.CreateAlignedLoad(
15600 IntTy
, Ptr
, getContext().getTypeAlignInChars(E
->getType()));
15601 Load
->setVolatile(true);
15604 case X86::BI__readgsbyte
:
15605 case X86::BI__readgsword
:
15606 case X86::BI__readgsdword
:
15607 case X86::BI__readgsqword
: {
15608 llvm::Type
*IntTy
= ConvertType(E
->getType());
15610 Builder
.CreateIntToPtr(Ops
[0], llvm::PointerType::get(IntTy
, 256));
15611 LoadInst
*Load
= Builder
.CreateAlignedLoad(
15612 IntTy
, Ptr
, getContext().getTypeAlignInChars(E
->getType()));
15613 Load
->setVolatile(true);
15616 case X86::BI__builtin_ia32_encodekey128_u32
: {
15617 Intrinsic::ID IID
= Intrinsic::x86_encodekey128
;
15619 Value
*Call
= Builder
.CreateCall(CGM
.getIntrinsic(IID
), {Ops
[0], Ops
[1]});
15621 for (int i
= 0; i
< 3; ++i
) {
15622 Value
*Extract
= Builder
.CreateExtractValue(Call
, i
+ 1);
15623 Value
*Ptr
= Builder
.CreateConstGEP1_32(Int8Ty
, Ops
[2], i
* 16);
15624 Ptr
= Builder
.CreateBitCast(
15625 Ptr
, llvm::PointerType::getUnqual(Extract
->getType()));
15626 Builder
.CreateAlignedStore(Extract
, Ptr
, Align(1));
15629 return Builder
.CreateExtractValue(Call
, 0);
15631 case X86::BI__builtin_ia32_encodekey256_u32
: {
15632 Intrinsic::ID IID
= Intrinsic::x86_encodekey256
;
15635 Builder
.CreateCall(CGM
.getIntrinsic(IID
), {Ops
[0], Ops
[1], Ops
[2]});
15637 for (int i
= 0; i
< 4; ++i
) {
15638 Value
*Extract
= Builder
.CreateExtractValue(Call
, i
+ 1);
15639 Value
*Ptr
= Builder
.CreateConstGEP1_32(Int8Ty
, Ops
[3], i
* 16);
15640 Ptr
= Builder
.CreateBitCast(
15641 Ptr
, llvm::PointerType::getUnqual(Extract
->getType()));
15642 Builder
.CreateAlignedStore(Extract
, Ptr
, Align(1));
15645 return Builder
.CreateExtractValue(Call
, 0);
15647 case X86::BI__builtin_ia32_aesenc128kl_u8
:
15648 case X86::BI__builtin_ia32_aesdec128kl_u8
:
15649 case X86::BI__builtin_ia32_aesenc256kl_u8
:
15650 case X86::BI__builtin_ia32_aesdec256kl_u8
: {
15652 StringRef BlockName
;
15653 switch (BuiltinID
) {
15655 llvm_unreachable("Unexpected builtin");
15656 case X86::BI__builtin_ia32_aesenc128kl_u8
:
15657 IID
= Intrinsic::x86_aesenc128kl
;
15658 BlockName
= "aesenc128kl";
15660 case X86::BI__builtin_ia32_aesdec128kl_u8
:
15661 IID
= Intrinsic::x86_aesdec128kl
;
15662 BlockName
= "aesdec128kl";
15664 case X86::BI__builtin_ia32_aesenc256kl_u8
:
15665 IID
= Intrinsic::x86_aesenc256kl
;
15666 BlockName
= "aesenc256kl";
15668 case X86::BI__builtin_ia32_aesdec256kl_u8
:
15669 IID
= Intrinsic::x86_aesdec256kl
;
15670 BlockName
= "aesdec256kl";
15674 Value
*Call
= Builder
.CreateCall(CGM
.getIntrinsic(IID
), {Ops
[1], Ops
[2]});
15676 BasicBlock
*NoError
=
15677 createBasicBlock(BlockName
+ "_no_error", this->CurFn
);
15678 BasicBlock
*Error
= createBasicBlock(BlockName
+ "_error", this->CurFn
);
15679 BasicBlock
*End
= createBasicBlock(BlockName
+ "_end", this->CurFn
);
15681 Value
*Ret
= Builder
.CreateExtractValue(Call
, 0);
15682 Value
*Succ
= Builder
.CreateTrunc(Ret
, Builder
.getInt1Ty());
15683 Value
*Out
= Builder
.CreateExtractValue(Call
, 1);
15684 Builder
.CreateCondBr(Succ
, NoError
, Error
);
15686 Builder
.SetInsertPoint(NoError
);
15687 Builder
.CreateDefaultAlignedStore(Out
, Ops
[0]);
15688 Builder
.CreateBr(End
);
15690 Builder
.SetInsertPoint(Error
);
15691 Constant
*Zero
= llvm::Constant::getNullValue(Out
->getType());
15692 Builder
.CreateDefaultAlignedStore(Zero
, Ops
[0]);
15693 Builder
.CreateBr(End
);
15695 Builder
.SetInsertPoint(End
);
15696 return Builder
.CreateExtractValue(Call
, 0);
15698 case X86::BI__builtin_ia32_aesencwide128kl_u8
:
15699 case X86::BI__builtin_ia32_aesdecwide128kl_u8
:
15700 case X86::BI__builtin_ia32_aesencwide256kl_u8
:
15701 case X86::BI__builtin_ia32_aesdecwide256kl_u8
: {
15703 StringRef BlockName
;
15704 switch (BuiltinID
) {
15705 case X86::BI__builtin_ia32_aesencwide128kl_u8
:
15706 IID
= Intrinsic::x86_aesencwide128kl
;
15707 BlockName
= "aesencwide128kl";
15709 case X86::BI__builtin_ia32_aesdecwide128kl_u8
:
15710 IID
= Intrinsic::x86_aesdecwide128kl
;
15711 BlockName
= "aesdecwide128kl";
15713 case X86::BI__builtin_ia32_aesencwide256kl_u8
:
15714 IID
= Intrinsic::x86_aesencwide256kl
;
15715 BlockName
= "aesencwide256kl";
15717 case X86::BI__builtin_ia32_aesdecwide256kl_u8
:
15718 IID
= Intrinsic::x86_aesdecwide256kl
;
15719 BlockName
= "aesdecwide256kl";
15723 llvm::Type
*Ty
= FixedVectorType::get(Builder
.getInt64Ty(), 2);
15726 for (int i
= 0; i
!= 8; ++i
) {
15727 Value
*Ptr
= Builder
.CreateConstGEP1_32(Ty
, Ops
[1], i
);
15728 InOps
[i
+ 1] = Builder
.CreateAlignedLoad(Ty
, Ptr
, Align(16));
15731 Value
*Call
= Builder
.CreateCall(CGM
.getIntrinsic(IID
), InOps
);
15733 BasicBlock
*NoError
=
15734 createBasicBlock(BlockName
+ "_no_error", this->CurFn
);
15735 BasicBlock
*Error
= createBasicBlock(BlockName
+ "_error", this->CurFn
);
15736 BasicBlock
*End
= createBasicBlock(BlockName
+ "_end", this->CurFn
);
15738 Value
*Ret
= Builder
.CreateExtractValue(Call
, 0);
15739 Value
*Succ
= Builder
.CreateTrunc(Ret
, Builder
.getInt1Ty());
15740 Builder
.CreateCondBr(Succ
, NoError
, Error
);
15742 Builder
.SetInsertPoint(NoError
);
15743 for (int i
= 0; i
!= 8; ++i
) {
15744 Value
*Extract
= Builder
.CreateExtractValue(Call
, i
+ 1);
15745 Value
*Ptr
= Builder
.CreateConstGEP1_32(Extract
->getType(), Ops
[0], i
);
15746 Builder
.CreateAlignedStore(Extract
, Ptr
, Align(16));
15748 Builder
.CreateBr(End
);
15750 Builder
.SetInsertPoint(Error
);
15751 for (int i
= 0; i
!= 8; ++i
) {
15752 Value
*Out
= Builder
.CreateExtractValue(Call
, i
+ 1);
15753 Constant
*Zero
= llvm::Constant::getNullValue(Out
->getType());
15754 Value
*Ptr
= Builder
.CreateConstGEP1_32(Out
->getType(), Ops
[0], i
);
15755 Builder
.CreateAlignedStore(Zero
, Ptr
, Align(16));
15757 Builder
.CreateBr(End
);
15759 Builder
.SetInsertPoint(End
);
15760 return Builder
.CreateExtractValue(Call
, 0);
15762 case X86::BI__builtin_ia32_vfcmaddcph512_mask
:
15765 case X86::BI__builtin_ia32_vfmaddcph512_mask
: {
15766 Intrinsic::ID IID
= IsConjFMA
15767 ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
15768 : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512
;
15769 Value
*Call
= Builder
.CreateCall(CGM
.getIntrinsic(IID
), Ops
);
15770 return EmitX86Select(*this, Ops
[3], Call
, Ops
[0]);
15772 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask
:
15775 case X86::BI__builtin_ia32_vfmaddcsh_round_mask
: {
15776 Intrinsic::ID IID
= IsConjFMA
? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
15777 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh
;
15778 Value
*Call
= Builder
.CreateCall(CGM
.getIntrinsic(IID
), Ops
);
15779 Value
*And
= Builder
.CreateAnd(Ops
[3], llvm::ConstantInt::get(Int8Ty
, 1));
15780 return EmitX86Select(*this, And
, Call
, Ops
[0]);
15782 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3
:
15785 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3
: {
15786 Intrinsic::ID IID
= IsConjFMA
? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
15787 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh
;
15788 Value
*Call
= Builder
.CreateCall(CGM
.getIntrinsic(IID
), Ops
);
15789 static constexpr int Mask
[] = {0, 5, 6, 7};
15790 return Builder
.CreateShuffleVector(Call
, Ops
[2], Mask
);
15792 case X86::BI__builtin_ia32_prefetchi
:
15793 return Builder
.CreateCall(
15794 CGM
.getIntrinsic(Intrinsic::prefetch
, Ops
[0]->getType()),
15795 {Ops
[0], llvm::ConstantInt::get(Int32Ty
, 0), Ops
[1],
15796 llvm::ConstantInt::get(Int32Ty
, 0)});
15800 Value
*CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID
,
15801 const CallExpr
*E
) {
15802 // Do not emit the builtin arguments in the arguments of a function call,
15803 // because the evaluation order of function arguments is not specified in C++.
15804 // This is important when testing to ensure the arguments are emitted in the
15805 // same order every time. Eg:
15807 // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
15808 // EmitScalarExpr(E->getArg(1)), "swdiv");
15810 // Value *Op0 = EmitScalarExpr(E->getArg(0));
15811 // Value *Op1 = EmitScalarExpr(E->getArg(1));
15812 // return Builder.CreateFDiv(Op0, Op1, "swdiv")
15814 Intrinsic::ID ID
= Intrinsic::not_intrinsic
;
15816 switch (BuiltinID
) {
15817 default: return nullptr;
15819 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
15820 // call __builtin_readcyclecounter.
15821 case PPC::BI__builtin_ppc_get_timebase
:
15822 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::readcyclecounter
));
15824 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
15825 case PPC::BI__builtin_altivec_lvx
:
15826 case PPC::BI__builtin_altivec_lvxl
:
15827 case PPC::BI__builtin_altivec_lvebx
:
15828 case PPC::BI__builtin_altivec_lvehx
:
15829 case PPC::BI__builtin_altivec_lvewx
:
15830 case PPC::BI__builtin_altivec_lvsl
:
15831 case PPC::BI__builtin_altivec_lvsr
:
15832 case PPC::BI__builtin_vsx_lxvd2x
:
15833 case PPC::BI__builtin_vsx_lxvw4x
:
15834 case PPC::BI__builtin_vsx_lxvd2x_be
:
15835 case PPC::BI__builtin_vsx_lxvw4x_be
:
15836 case PPC::BI__builtin_vsx_lxvl
:
15837 case PPC::BI__builtin_vsx_lxvll
:
15839 SmallVector
<Value
*, 2> Ops
;
15840 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
15841 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
15842 if(BuiltinID
== PPC::BI__builtin_vsx_lxvl
||
15843 BuiltinID
== PPC::BI__builtin_vsx_lxvll
){
15844 Ops
[0] = Builder
.CreateBitCast(Ops
[0], Int8PtrTy
);
15846 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Int8PtrTy
);
15847 Ops
[0] = Builder
.CreateGEP(Int8Ty
, Ops
[1], Ops
[0]);
15851 switch (BuiltinID
) {
15852 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
15853 case PPC::BI__builtin_altivec_lvx
:
15854 ID
= Intrinsic::ppc_altivec_lvx
;
15856 case PPC::BI__builtin_altivec_lvxl
:
15857 ID
= Intrinsic::ppc_altivec_lvxl
;
15859 case PPC::BI__builtin_altivec_lvebx
:
15860 ID
= Intrinsic::ppc_altivec_lvebx
;
15862 case PPC::BI__builtin_altivec_lvehx
:
15863 ID
= Intrinsic::ppc_altivec_lvehx
;
15865 case PPC::BI__builtin_altivec_lvewx
:
15866 ID
= Intrinsic::ppc_altivec_lvewx
;
15868 case PPC::BI__builtin_altivec_lvsl
:
15869 ID
= Intrinsic::ppc_altivec_lvsl
;
15871 case PPC::BI__builtin_altivec_lvsr
:
15872 ID
= Intrinsic::ppc_altivec_lvsr
;
15874 case PPC::BI__builtin_vsx_lxvd2x
:
15875 ID
= Intrinsic::ppc_vsx_lxvd2x
;
15877 case PPC::BI__builtin_vsx_lxvw4x
:
15878 ID
= Intrinsic::ppc_vsx_lxvw4x
;
15880 case PPC::BI__builtin_vsx_lxvd2x_be
:
15881 ID
= Intrinsic::ppc_vsx_lxvd2x_be
;
15883 case PPC::BI__builtin_vsx_lxvw4x_be
:
15884 ID
= Intrinsic::ppc_vsx_lxvw4x_be
;
15886 case PPC::BI__builtin_vsx_lxvl
:
15887 ID
= Intrinsic::ppc_vsx_lxvl
;
15889 case PPC::BI__builtin_vsx_lxvll
:
15890 ID
= Intrinsic::ppc_vsx_lxvll
;
15893 llvm::Function
*F
= CGM
.getIntrinsic(ID
);
15894 return Builder
.CreateCall(F
, Ops
, "");
15897 // vec_st, vec_xst_be
15898 case PPC::BI__builtin_altivec_stvx
:
15899 case PPC::BI__builtin_altivec_stvxl
:
15900 case PPC::BI__builtin_altivec_stvebx
:
15901 case PPC::BI__builtin_altivec_stvehx
:
15902 case PPC::BI__builtin_altivec_stvewx
:
15903 case PPC::BI__builtin_vsx_stxvd2x
:
15904 case PPC::BI__builtin_vsx_stxvw4x
:
15905 case PPC::BI__builtin_vsx_stxvd2x_be
:
15906 case PPC::BI__builtin_vsx_stxvw4x_be
:
15907 case PPC::BI__builtin_vsx_stxvl
:
15908 case PPC::BI__builtin_vsx_stxvll
:
15910 SmallVector
<Value
*, 3> Ops
;
15911 Ops
.push_back(EmitScalarExpr(E
->getArg(0)));
15912 Ops
.push_back(EmitScalarExpr(E
->getArg(1)));
15913 Ops
.push_back(EmitScalarExpr(E
->getArg(2)));
15914 if(BuiltinID
== PPC::BI__builtin_vsx_stxvl
||
15915 BuiltinID
== PPC::BI__builtin_vsx_stxvll
){
15916 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Int8PtrTy
);
15918 Ops
[2] = Builder
.CreateBitCast(Ops
[2], Int8PtrTy
);
15919 Ops
[1] = Builder
.CreateGEP(Int8Ty
, Ops
[2], Ops
[1]);
15923 switch (BuiltinID
) {
15924 default: llvm_unreachable("Unsupported st intrinsic!");
15925 case PPC::BI__builtin_altivec_stvx
:
15926 ID
= Intrinsic::ppc_altivec_stvx
;
15928 case PPC::BI__builtin_altivec_stvxl
:
15929 ID
= Intrinsic::ppc_altivec_stvxl
;
15931 case PPC::BI__builtin_altivec_stvebx
:
15932 ID
= Intrinsic::ppc_altivec_stvebx
;
15934 case PPC::BI__builtin_altivec_stvehx
:
15935 ID
= Intrinsic::ppc_altivec_stvehx
;
15937 case PPC::BI__builtin_altivec_stvewx
:
15938 ID
= Intrinsic::ppc_altivec_stvewx
;
15940 case PPC::BI__builtin_vsx_stxvd2x
:
15941 ID
= Intrinsic::ppc_vsx_stxvd2x
;
15943 case PPC::BI__builtin_vsx_stxvw4x
:
15944 ID
= Intrinsic::ppc_vsx_stxvw4x
;
15946 case PPC::BI__builtin_vsx_stxvd2x_be
:
15947 ID
= Intrinsic::ppc_vsx_stxvd2x_be
;
15949 case PPC::BI__builtin_vsx_stxvw4x_be
:
15950 ID
= Intrinsic::ppc_vsx_stxvw4x_be
;
15952 case PPC::BI__builtin_vsx_stxvl
:
15953 ID
= Intrinsic::ppc_vsx_stxvl
;
15955 case PPC::BI__builtin_vsx_stxvll
:
15956 ID
= Intrinsic::ppc_vsx_stxvll
;
15959 llvm::Function
*F
= CGM
.getIntrinsic(ID
);
15960 return Builder
.CreateCall(F
, Ops
, "");
15962 case PPC::BI__builtin_vsx_ldrmb
: {
15963 // Essentially boils down to performing an unaligned VMX load sequence so
15964 // as to avoid crossing a page boundary and then shuffling the elements
15965 // into the right side of the vector register.
15966 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
15967 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
15968 int64_t NumBytes
= cast
<ConstantInt
>(Op1
)->getZExtValue();
15969 llvm::Type
*ResTy
= ConvertType(E
->getType());
15970 bool IsLE
= getTarget().isLittleEndian();
15972 // If the user wants the entire vector, just load the entire vector.
15973 if (NumBytes
== 16) {
15974 Value
*BC
= Builder
.CreateBitCast(Op0
, ResTy
->getPointerTo());
15976 Builder
.CreateLoad(Address(BC
, ResTy
, CharUnits::fromQuantity(1)));
15980 // Reverse the bytes on LE.
15981 SmallVector
<int, 16> RevMask
;
15982 for (int Idx
= 0; Idx
< 16; Idx
++)
15983 RevMask
.push_back(15 - Idx
);
15984 return Builder
.CreateShuffleVector(LD
, LD
, RevMask
);
15987 llvm::Function
*Lvx
= CGM
.getIntrinsic(Intrinsic::ppc_altivec_lvx
);
15988 llvm::Function
*Lvs
= CGM
.getIntrinsic(IsLE
? Intrinsic::ppc_altivec_lvsr
15989 : Intrinsic::ppc_altivec_lvsl
);
15990 llvm::Function
*Vperm
= CGM
.getIntrinsic(Intrinsic::ppc_altivec_vperm
);
15991 Value
*HiMem
= Builder
.CreateGEP(
15992 Int8Ty
, Op0
, ConstantInt::get(Op1
->getType(), NumBytes
- 1));
15993 Value
*LoLd
= Builder
.CreateCall(Lvx
, Op0
, "ld.lo");
15994 Value
*HiLd
= Builder
.CreateCall(Lvx
, HiMem
, "ld.hi");
15995 Value
*Mask1
= Builder
.CreateCall(Lvs
, Op0
, "mask1");
15997 Op0
= IsLE
? HiLd
: LoLd
;
15998 Op1
= IsLE
? LoLd
: HiLd
;
15999 Value
*AllElts
= Builder
.CreateCall(Vperm
, {Op0
, Op1
, Mask1
}, "shuffle1");
16000 Constant
*Zero
= llvm::Constant::getNullValue(IsLE
? ResTy
: AllElts
->getType());
16003 SmallVector
<int, 16> Consts
;
16004 for (int Idx
= 0; Idx
< 16; Idx
++) {
16005 int Val
= (NumBytes
- Idx
- 1 >= 0) ? (NumBytes
- Idx
- 1)
16006 : 16 - (NumBytes
- Idx
);
16007 Consts
.push_back(Val
);
16009 return Builder
.CreateShuffleVector(Builder
.CreateBitCast(AllElts
, ResTy
),
16012 SmallVector
<Constant
*, 16> Consts
;
16013 for (int Idx
= 0; Idx
< 16; Idx
++)
16014 Consts
.push_back(Builder
.getInt8(NumBytes
+ Idx
));
16015 Value
*Mask2
= ConstantVector::get(Consts
);
16016 return Builder
.CreateBitCast(
16017 Builder
.CreateCall(Vperm
, {Zero
, AllElts
, Mask2
}, "shuffle2"), ResTy
);
16019 case PPC::BI__builtin_vsx_strmb
: {
16020 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16021 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
16022 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
16023 int64_t NumBytes
= cast
<ConstantInt
>(Op1
)->getZExtValue();
16024 bool IsLE
= getTarget().isLittleEndian();
16025 auto StoreSubVec
= [&](unsigned Width
, unsigned Offset
, unsigned EltNo
) {
16026 // Storing the whole vector, simply store it on BE and reverse bytes and
16029 Value
*BC
= Builder
.CreateBitCast(Op0
, Op2
->getType()->getPointerTo());
16030 Value
*StVec
= Op2
;
16032 SmallVector
<int, 16> RevMask
;
16033 for (int Idx
= 0; Idx
< 16; Idx
++)
16034 RevMask
.push_back(15 - Idx
);
16035 StVec
= Builder
.CreateShuffleVector(Op2
, Op2
, RevMask
);
16037 return Builder
.CreateStore(
16038 StVec
, Address(BC
, Op2
->getType(), CharUnits::fromQuantity(1)));
16040 auto *ConvTy
= Int64Ty
;
16041 unsigned NumElts
= 0;
16044 llvm_unreachable("width for stores must be a power of 2");
16062 Value
*Vec
= Builder
.CreateBitCast(
16063 Op2
, llvm::FixedVectorType::get(ConvTy
, NumElts
));
16065 Builder
.CreateGEP(Int8Ty
, Op0
, ConstantInt::get(Int64Ty
, Offset
));
16066 Value
*PtrBC
= Builder
.CreateBitCast(Ptr
, ConvTy
->getPointerTo());
16067 Value
*Elt
= Builder
.CreateExtractElement(Vec
, EltNo
);
16068 if (IsLE
&& Width
> 1) {
16069 Function
*F
= CGM
.getIntrinsic(Intrinsic::bswap
, ConvTy
);
16070 Elt
= Builder
.CreateCall(F
, Elt
);
16072 return Builder
.CreateStore(
16073 Elt
, Address(PtrBC
, ConvTy
, CharUnits::fromQuantity(1)));
16075 unsigned Stored
= 0;
16076 unsigned RemainingBytes
= NumBytes
;
16078 if (NumBytes
== 16)
16079 return StoreSubVec(16, 0, 0);
16080 if (NumBytes
>= 8) {
16081 Result
= StoreSubVec(8, NumBytes
- 8, IsLE
? 0 : 1);
16082 RemainingBytes
-= 8;
16085 if (RemainingBytes
>= 4) {
16086 Result
= StoreSubVec(4, NumBytes
- Stored
- 4,
16087 IsLE
? (Stored
>> 2) : 3 - (Stored
>> 2));
16088 RemainingBytes
-= 4;
16091 if (RemainingBytes
>= 2) {
16092 Result
= StoreSubVec(2, NumBytes
- Stored
- 2,
16093 IsLE
? (Stored
>> 1) : 7 - (Stored
>> 1));
16094 RemainingBytes
-= 2;
16097 if (RemainingBytes
)
16099 StoreSubVec(1, NumBytes
- Stored
- 1, IsLE
? Stored
: 15 - Stored
);
16103 case PPC::BI__builtin_vsx_xvsqrtsp
:
16104 case PPC::BI__builtin_vsx_xvsqrtdp
: {
16105 llvm::Type
*ResultType
= ConvertType(E
->getType());
16106 Value
*X
= EmitScalarExpr(E
->getArg(0));
16107 if (Builder
.getIsFPConstrained()) {
16108 llvm::Function
*F
= CGM
.getIntrinsic(
16109 Intrinsic::experimental_constrained_sqrt
, ResultType
);
16110 return Builder
.CreateConstrainedFPCall(F
, X
);
16112 llvm::Function
*F
= CGM
.getIntrinsic(Intrinsic::sqrt
, ResultType
);
16113 return Builder
.CreateCall(F
, X
);
16116 // Count leading zeros
16117 case PPC::BI__builtin_altivec_vclzb
:
16118 case PPC::BI__builtin_altivec_vclzh
:
16119 case PPC::BI__builtin_altivec_vclzw
:
16120 case PPC::BI__builtin_altivec_vclzd
: {
16121 llvm::Type
*ResultType
= ConvertType(E
->getType());
16122 Value
*X
= EmitScalarExpr(E
->getArg(0));
16123 Value
*Undef
= ConstantInt::get(Builder
.getInt1Ty(), false);
16124 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctlz
, ResultType
);
16125 return Builder
.CreateCall(F
, {X
, Undef
});
16127 case PPC::BI__builtin_altivec_vctzb
:
16128 case PPC::BI__builtin_altivec_vctzh
:
16129 case PPC::BI__builtin_altivec_vctzw
:
16130 case PPC::BI__builtin_altivec_vctzd
: {
16131 llvm::Type
*ResultType
= ConvertType(E
->getType());
16132 Value
*X
= EmitScalarExpr(E
->getArg(0));
16133 Value
*Undef
= ConstantInt::get(Builder
.getInt1Ty(), false);
16134 Function
*F
= CGM
.getIntrinsic(Intrinsic::cttz
, ResultType
);
16135 return Builder
.CreateCall(F
, {X
, Undef
});
16137 case PPC::BI__builtin_altivec_vinsd
:
16138 case PPC::BI__builtin_altivec_vinsw
:
16139 case PPC::BI__builtin_altivec_vinsd_elt
:
16140 case PPC::BI__builtin_altivec_vinsw_elt
: {
16141 llvm::Type
*ResultType
= ConvertType(E
->getType());
16142 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16143 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
16144 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
16146 bool IsUnaligned
= (BuiltinID
== PPC::BI__builtin_altivec_vinsw
||
16147 BuiltinID
== PPC::BI__builtin_altivec_vinsd
);
16149 bool Is32bit
= (BuiltinID
== PPC::BI__builtin_altivec_vinsw
||
16150 BuiltinID
== PPC::BI__builtin_altivec_vinsw_elt
);
16152 // The third argument must be a compile time constant.
16153 ConstantInt
*ArgCI
= dyn_cast
<ConstantInt
>(Op2
);
16155 "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
16157 // Valid value for the third argument is dependent on the input type and
16159 int ValidMaxValue
= 0;
16161 ValidMaxValue
= (Is32bit
) ? 12 : 8;
16163 ValidMaxValue
= (Is32bit
) ? 3 : 1;
16165 // Get value of third argument.
16166 int64_t ConstArg
= ArgCI
->getSExtValue();
16168 // Compose range checking error message.
16169 std::string RangeErrMsg
= IsUnaligned
? "byte" : "element";
16170 RangeErrMsg
+= " number " + llvm::to_string(ConstArg
);
16171 RangeErrMsg
+= " is outside of the valid range [0, ";
16172 RangeErrMsg
+= llvm::to_string(ValidMaxValue
) + "]";
16174 // Issue error if third argument is not within the valid range.
16175 if (ConstArg
< 0 || ConstArg
> ValidMaxValue
)
16176 CGM
.Error(E
->getExprLoc(), RangeErrMsg
);
16178 // Input to vec_replace_elt is an element index, convert to byte index.
16179 if (!IsUnaligned
) {
16180 ConstArg
*= Is32bit
? 4 : 8;
16181 // Fix the constant according to endianess.
16182 if (getTarget().isLittleEndian())
16183 ConstArg
= (Is32bit
? 12 : 8) - ConstArg
;
16186 ID
= Is32bit
? Intrinsic::ppc_altivec_vinsw
: Intrinsic::ppc_altivec_vinsd
;
16187 Op2
= ConstantInt::getSigned(Int32Ty
, ConstArg
);
16188 // Casting input to vector int as per intrinsic definition.
16191 ? Builder
.CreateBitCast(Op0
, llvm::FixedVectorType::get(Int32Ty
, 4))
16192 : Builder
.CreateBitCast(Op0
,
16193 llvm::FixedVectorType::get(Int64Ty
, 2));
16194 return Builder
.CreateBitCast(
16195 Builder
.CreateCall(CGM
.getIntrinsic(ID
), {Op0
, Op1
, Op2
}), ResultType
);
16197 case PPC::BI__builtin_altivec_vpopcntb
:
16198 case PPC::BI__builtin_altivec_vpopcnth
:
16199 case PPC::BI__builtin_altivec_vpopcntw
:
16200 case PPC::BI__builtin_altivec_vpopcntd
: {
16201 llvm::Type
*ResultType
= ConvertType(E
->getType());
16202 Value
*X
= EmitScalarExpr(E
->getArg(0));
16203 llvm::Function
*F
= CGM
.getIntrinsic(Intrinsic::ctpop
, ResultType
);
16204 return Builder
.CreateCall(F
, X
);
16206 case PPC::BI__builtin_altivec_vadduqm
:
16207 case PPC::BI__builtin_altivec_vsubuqm
: {
16208 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16209 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
16210 llvm::Type
*Int128Ty
= llvm::IntegerType::get(getLLVMContext(), 128);
16211 Op0
= Builder
.CreateBitCast(Op0
, llvm::FixedVectorType::get(Int128Ty
, 1));
16212 Op1
= Builder
.CreateBitCast(Op1
, llvm::FixedVectorType::get(Int128Ty
, 1));
16213 if (BuiltinID
== PPC::BI__builtin_altivec_vadduqm
)
16214 return Builder
.CreateAdd(Op0
, Op1
, "vadduqm");
16216 return Builder
.CreateSub(Op0
, Op1
, "vsubuqm");
16218 case PPC::BI__builtin_altivec_vaddcuq_c
:
16219 case PPC::BI__builtin_altivec_vsubcuq_c
: {
16220 SmallVector
<Value
*, 2> Ops
;
16221 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16222 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
16223 llvm::Type
*V1I128Ty
= llvm::FixedVectorType::get(
16224 llvm::IntegerType::get(getLLVMContext(), 128), 1);
16225 Ops
.push_back(Builder
.CreateBitCast(Op0
, V1I128Ty
));
16226 Ops
.push_back(Builder
.CreateBitCast(Op1
, V1I128Ty
));
16227 ID
= (BuiltinID
== PPC::BI__builtin_altivec_vaddcuq_c
)
16228 ? Intrinsic::ppc_altivec_vaddcuq
16229 : Intrinsic::ppc_altivec_vsubcuq
;
16230 return Builder
.CreateCall(CGM
.getIntrinsic(ID
), Ops
, "");
16232 case PPC::BI__builtin_altivec_vaddeuqm_c
:
16233 case PPC::BI__builtin_altivec_vaddecuq_c
:
16234 case PPC::BI__builtin_altivec_vsubeuqm_c
:
16235 case PPC::BI__builtin_altivec_vsubecuq_c
: {
16236 SmallVector
<Value
*, 3> Ops
;
16237 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16238 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
16239 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
16240 llvm::Type
*V1I128Ty
= llvm::FixedVectorType::get(
16241 llvm::IntegerType::get(getLLVMContext(), 128), 1);
16242 Ops
.push_back(Builder
.CreateBitCast(Op0
, V1I128Ty
));
16243 Ops
.push_back(Builder
.CreateBitCast(Op1
, V1I128Ty
));
16244 Ops
.push_back(Builder
.CreateBitCast(Op2
, V1I128Ty
));
16245 switch (BuiltinID
) {
16247 llvm_unreachable("Unsupported intrinsic!");
16248 case PPC::BI__builtin_altivec_vaddeuqm_c
:
16249 ID
= Intrinsic::ppc_altivec_vaddeuqm
;
16251 case PPC::BI__builtin_altivec_vaddecuq_c
:
16252 ID
= Intrinsic::ppc_altivec_vaddecuq
;
16254 case PPC::BI__builtin_altivec_vsubeuqm_c
:
16255 ID
= Intrinsic::ppc_altivec_vsubeuqm
;
16257 case PPC::BI__builtin_altivec_vsubecuq_c
:
16258 ID
= Intrinsic::ppc_altivec_vsubecuq
;
16261 return Builder
.CreateCall(CGM
.getIntrinsic(ID
), Ops
, "");
16263 // Rotate and insert under mask operation.
16264 // __rldimi(rs, is, shift, mask)
16265 // (rotl64(rs, shift) & mask) | (is & ~mask)
16266 // __rlwimi(rs, is, shift, mask)
16267 // (rotl(rs, shift) & mask) | (is & ~mask)
16268 case PPC::BI__builtin_ppc_rldimi
:
16269 case PPC::BI__builtin_ppc_rlwimi
: {
16270 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16271 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
16272 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
16273 Value
*Op3
= EmitScalarExpr(E
->getArg(3));
16274 llvm::Type
*Ty
= Op0
->getType();
16275 Function
*F
= CGM
.getIntrinsic(Intrinsic::fshl
, Ty
);
16276 if (BuiltinID
== PPC::BI__builtin_ppc_rldimi
)
16277 Op2
= Builder
.CreateZExt(Op2
, Int64Ty
);
16278 Value
*Shift
= Builder
.CreateCall(F
, {Op0
, Op0
, Op2
});
16279 Value
*X
= Builder
.CreateAnd(Shift
, Op3
);
16280 Value
*Y
= Builder
.CreateAnd(Op1
, Builder
.CreateNot(Op3
));
16281 return Builder
.CreateOr(X
, Y
);
16283 // Rotate and insert under mask operation.
16284 // __rlwnm(rs, shift, mask)
16285 // rotl(rs, shift) & mask
16286 case PPC::BI__builtin_ppc_rlwnm
: {
16287 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16288 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
16289 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
16290 llvm::Type
*Ty
= Op0
->getType();
16291 Function
*F
= CGM
.getIntrinsic(Intrinsic::fshl
, Ty
);
16292 Value
*Shift
= Builder
.CreateCall(F
, {Op0
, Op0
, Op1
});
16293 return Builder
.CreateAnd(Shift
, Op2
);
16295 case PPC::BI__builtin_ppc_poppar4
:
16296 case PPC::BI__builtin_ppc_poppar8
: {
16297 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16298 llvm::Type
*ArgType
= Op0
->getType();
16299 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctpop
, ArgType
);
16300 Value
*Tmp
= Builder
.CreateCall(F
, Op0
);
16302 llvm::Type
*ResultType
= ConvertType(E
->getType());
16303 Value
*Result
= Builder
.CreateAnd(Tmp
, llvm::ConstantInt::get(ArgType
, 1));
16304 if (Result
->getType() != ResultType
)
16305 Result
= Builder
.CreateIntCast(Result
, ResultType
, /*isSigned*/true,
16309 case PPC::BI__builtin_ppc_cmpb
: {
16310 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16311 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
16312 if (getTarget().getTriple().isPPC64()) {
16314 CGM
.getIntrinsic(Intrinsic::ppc_cmpb
, {Int64Ty
, Int64Ty
, Int64Ty
});
16315 return Builder
.CreateCall(F
, {Op0
, Op1
}, "cmpb");
16317 // For 32 bit, emit the code as below:
16318 // %conv = trunc i64 %a to i32
16319 // %conv1 = trunc i64 %b to i32
16320 // %shr = lshr i64 %a, 32
16321 // %conv2 = trunc i64 %shr to i32
16322 // %shr3 = lshr i64 %b, 32
16323 // %conv4 = trunc i64 %shr3 to i32
16324 // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)
16325 // %conv5 = zext i32 %0 to i64
16326 // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)
16327 // %conv614 = zext i32 %1 to i64
16328 // %shl = shl nuw i64 %conv614, 32
16329 // %or = or i64 %shl, %conv5
16332 CGM
.getIntrinsic(Intrinsic::ppc_cmpb
, {Int32Ty
, Int32Ty
, Int32Ty
});
16333 Value
*ArgOneLo
= Builder
.CreateTrunc(Op0
, Int32Ty
);
16334 Value
*ArgTwoLo
= Builder
.CreateTrunc(Op1
, Int32Ty
);
16335 Constant
*ShiftAmt
= ConstantInt::get(Int64Ty
, 32);
16337 Builder
.CreateTrunc(Builder
.CreateLShr(Op0
, ShiftAmt
), Int32Ty
);
16339 Builder
.CreateTrunc(Builder
.CreateLShr(Op1
, ShiftAmt
), Int32Ty
);
16340 Value
*ResLo
= Builder
.CreateZExt(
16341 Builder
.CreateCall(F
, {ArgOneLo
, ArgTwoLo
}, "cmpb"), Int64Ty
);
16342 Value
*ResHiShift
= Builder
.CreateZExt(
16343 Builder
.CreateCall(F
, {ArgOneHi
, ArgTwoHi
}, "cmpb"), Int64Ty
);
16344 Value
*ResHi
= Builder
.CreateShl(ResHiShift
, ShiftAmt
);
16345 return Builder
.CreateOr(ResLo
, ResHi
);
16348 case PPC::BI__builtin_vsx_xvcpsgnsp
:
16349 case PPC::BI__builtin_vsx_xvcpsgndp
: {
16350 llvm::Type
*ResultType
= ConvertType(E
->getType());
16351 Value
*X
= EmitScalarExpr(E
->getArg(0));
16352 Value
*Y
= EmitScalarExpr(E
->getArg(1));
16353 ID
= Intrinsic::copysign
;
16354 llvm::Function
*F
= CGM
.getIntrinsic(ID
, ResultType
);
16355 return Builder
.CreateCall(F
, {X
, Y
});
16357 // Rounding/truncation
16358 case PPC::BI__builtin_vsx_xvrspip
:
16359 case PPC::BI__builtin_vsx_xvrdpip
:
16360 case PPC::BI__builtin_vsx_xvrdpim
:
16361 case PPC::BI__builtin_vsx_xvrspim
:
16362 case PPC::BI__builtin_vsx_xvrdpi
:
16363 case PPC::BI__builtin_vsx_xvrspi
:
16364 case PPC::BI__builtin_vsx_xvrdpic
:
16365 case PPC::BI__builtin_vsx_xvrspic
:
16366 case PPC::BI__builtin_vsx_xvrdpiz
:
16367 case PPC::BI__builtin_vsx_xvrspiz
: {
16368 llvm::Type
*ResultType
= ConvertType(E
->getType());
16369 Value
*X
= EmitScalarExpr(E
->getArg(0));
16370 if (BuiltinID
== PPC::BI__builtin_vsx_xvrdpim
||
16371 BuiltinID
== PPC::BI__builtin_vsx_xvrspim
)
16372 ID
= Builder
.getIsFPConstrained()
16373 ? Intrinsic::experimental_constrained_floor
16374 : Intrinsic::floor
;
16375 else if (BuiltinID
== PPC::BI__builtin_vsx_xvrdpi
||
16376 BuiltinID
== PPC::BI__builtin_vsx_xvrspi
)
16377 ID
= Builder
.getIsFPConstrained()
16378 ? Intrinsic::experimental_constrained_round
16379 : Intrinsic::round
;
16380 else if (BuiltinID
== PPC::BI__builtin_vsx_xvrdpic
||
16381 BuiltinID
== PPC::BI__builtin_vsx_xvrspic
)
16382 ID
= Builder
.getIsFPConstrained()
16383 ? Intrinsic::experimental_constrained_rint
16385 else if (BuiltinID
== PPC::BI__builtin_vsx_xvrdpip
||
16386 BuiltinID
== PPC::BI__builtin_vsx_xvrspip
)
16387 ID
= Builder
.getIsFPConstrained()
16388 ? Intrinsic::experimental_constrained_ceil
16390 else if (BuiltinID
== PPC::BI__builtin_vsx_xvrdpiz
||
16391 BuiltinID
== PPC::BI__builtin_vsx_xvrspiz
)
16392 ID
= Builder
.getIsFPConstrained()
16393 ? Intrinsic::experimental_constrained_trunc
16394 : Intrinsic::trunc
;
16395 llvm::Function
*F
= CGM
.getIntrinsic(ID
, ResultType
);
16396 return Builder
.getIsFPConstrained() ? Builder
.CreateConstrainedFPCall(F
, X
)
16397 : Builder
.CreateCall(F
, X
);
16401 case PPC::BI__builtin_vsx_xvabsdp
:
16402 case PPC::BI__builtin_vsx_xvabssp
: {
16403 llvm::Type
*ResultType
= ConvertType(E
->getType());
16404 Value
*X
= EmitScalarExpr(E
->getArg(0));
16405 llvm::Function
*F
= CGM
.getIntrinsic(Intrinsic::fabs
, ResultType
);
16406 return Builder
.CreateCall(F
, X
);
16409 // Fastmath by default
16410 case PPC::BI__builtin_ppc_recipdivf
:
16411 case PPC::BI__builtin_ppc_recipdivd
:
16412 case PPC::BI__builtin_ppc_rsqrtf
:
16413 case PPC::BI__builtin_ppc_rsqrtd
: {
16414 FastMathFlags FMF
= Builder
.getFastMathFlags();
16415 Builder
.getFastMathFlags().setFast();
16416 llvm::Type
*ResultType
= ConvertType(E
->getType());
16417 Value
*X
= EmitScalarExpr(E
->getArg(0));
16419 if (BuiltinID
== PPC::BI__builtin_ppc_recipdivf
||
16420 BuiltinID
== PPC::BI__builtin_ppc_recipdivd
) {
16421 Value
*Y
= EmitScalarExpr(E
->getArg(1));
16422 Value
*FDiv
= Builder
.CreateFDiv(X
, Y
, "recipdiv");
16423 Builder
.getFastMathFlags() &= (FMF
);
16426 auto *One
= ConstantFP::get(ResultType
, 1.0);
16427 llvm::Function
*F
= CGM
.getIntrinsic(Intrinsic::sqrt
, ResultType
);
16428 Value
*FDiv
= Builder
.CreateFDiv(One
, Builder
.CreateCall(F
, X
), "rsqrt");
16429 Builder
.getFastMathFlags() &= (FMF
);
16432 case PPC::BI__builtin_ppc_alignx
: {
16433 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16434 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
16435 ConstantInt
*AlignmentCI
= cast
<ConstantInt
>(Op0
);
16436 if (AlignmentCI
->getValue().ugt(llvm::Value::MaximumAlignment
))
16437 AlignmentCI
= ConstantInt::get(AlignmentCI
->getType(),
16438 llvm::Value::MaximumAlignment
);
16440 emitAlignmentAssumption(Op1
, E
->getArg(1),
16441 /*The expr loc is sufficient.*/ SourceLocation(),
16442 AlignmentCI
, nullptr);
16445 case PPC::BI__builtin_ppc_rdlam
: {
16446 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16447 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
16448 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
16449 llvm::Type
*Ty
= Op0
->getType();
16450 Value
*ShiftAmt
= Builder
.CreateIntCast(Op1
, Ty
, false);
16451 Function
*F
= CGM
.getIntrinsic(Intrinsic::fshl
, Ty
);
16452 Value
*Rotate
= Builder
.CreateCall(F
, {Op0
, Op0
, ShiftAmt
});
16453 return Builder
.CreateAnd(Rotate
, Op2
);
16455 case PPC::BI__builtin_ppc_load2r
: {
16456 Function
*F
= CGM
.getIntrinsic(Intrinsic::ppc_load2r
);
16457 Value
*Op0
= Builder
.CreateBitCast(EmitScalarExpr(E
->getArg(0)), Int8PtrTy
);
16458 Value
*LoadIntrinsic
= Builder
.CreateCall(F
, {Op0
});
16459 return Builder
.CreateTrunc(LoadIntrinsic
, Int16Ty
);
16462 case PPC::BI__builtin_ppc_fnmsub
:
16463 case PPC::BI__builtin_ppc_fnmsubs
:
16464 case PPC::BI__builtin_vsx_xvmaddadp
:
16465 case PPC::BI__builtin_vsx_xvmaddasp
:
16466 case PPC::BI__builtin_vsx_xvnmaddadp
:
16467 case PPC::BI__builtin_vsx_xvnmaddasp
:
16468 case PPC::BI__builtin_vsx_xvmsubadp
:
16469 case PPC::BI__builtin_vsx_xvmsubasp
:
16470 case PPC::BI__builtin_vsx_xvnmsubadp
:
16471 case PPC::BI__builtin_vsx_xvnmsubasp
: {
16472 llvm::Type
*ResultType
= ConvertType(E
->getType());
16473 Value
*X
= EmitScalarExpr(E
->getArg(0));
16474 Value
*Y
= EmitScalarExpr(E
->getArg(1));
16475 Value
*Z
= EmitScalarExpr(E
->getArg(2));
16477 if (Builder
.getIsFPConstrained())
16478 F
= CGM
.getIntrinsic(Intrinsic::experimental_constrained_fma
, ResultType
);
16480 F
= CGM
.getIntrinsic(Intrinsic::fma
, ResultType
);
16481 switch (BuiltinID
) {
16482 case PPC::BI__builtin_vsx_xvmaddadp
:
16483 case PPC::BI__builtin_vsx_xvmaddasp
:
16484 if (Builder
.getIsFPConstrained())
16485 return Builder
.CreateConstrainedFPCall(F
, {X
, Y
, Z
});
16487 return Builder
.CreateCall(F
, {X
, Y
, Z
});
16488 case PPC::BI__builtin_vsx_xvnmaddadp
:
16489 case PPC::BI__builtin_vsx_xvnmaddasp
:
16490 if (Builder
.getIsFPConstrained())
16491 return Builder
.CreateFNeg(
16492 Builder
.CreateConstrainedFPCall(F
, {X
, Y
, Z
}), "neg");
16494 return Builder
.CreateFNeg(Builder
.CreateCall(F
, {X
, Y
, Z
}), "neg");
16495 case PPC::BI__builtin_vsx_xvmsubadp
:
16496 case PPC::BI__builtin_vsx_xvmsubasp
:
16497 if (Builder
.getIsFPConstrained())
16498 return Builder
.CreateConstrainedFPCall(
16499 F
, {X
, Y
, Builder
.CreateFNeg(Z
, "neg")});
16501 return Builder
.CreateCall(F
, {X
, Y
, Builder
.CreateFNeg(Z
, "neg")});
16502 case PPC::BI__builtin_ppc_fnmsub
:
16503 case PPC::BI__builtin_ppc_fnmsubs
:
16504 case PPC::BI__builtin_vsx_xvnmsubadp
:
16505 case PPC::BI__builtin_vsx_xvnmsubasp
:
16506 if (Builder
.getIsFPConstrained())
16507 return Builder
.CreateFNeg(
16508 Builder
.CreateConstrainedFPCall(
16509 F
, {X
, Y
, Builder
.CreateFNeg(Z
, "neg")}),
16512 return Builder
.CreateCall(
16513 CGM
.getIntrinsic(Intrinsic::ppc_fnmsub
, ResultType
), {X
, Y
, Z
});
16515 llvm_unreachable("Unknown FMA operation");
16516 return nullptr; // Suppress no-return warning
16519 case PPC::BI__builtin_vsx_insertword
: {
16520 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16521 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
16522 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
16523 llvm::Function
*F
= CGM
.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw
);
16525 // Third argument is a compile time constant int. It must be clamped to
16526 // to the range [0, 12].
16527 ConstantInt
*ArgCI
= dyn_cast
<ConstantInt
>(Op2
);
16529 "Third arg to xxinsertw intrinsic must be constant integer");
16530 const int64_t MaxIndex
= 12;
16531 int64_t Index
= std::clamp(ArgCI
->getSExtValue(), (int64_t)0, MaxIndex
);
16533 // The builtin semantics don't exactly match the xxinsertw instructions
16534 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
16535 // word from the first argument, and inserts it in the second argument. The
16536 // instruction extracts the word from its second input register and inserts
16537 // it into its first input register, so swap the first and second arguments.
16538 std::swap(Op0
, Op1
);
16540 // Need to cast the second argument from a vector of unsigned int to a
16541 // vector of long long.
16542 Op1
= Builder
.CreateBitCast(Op1
, llvm::FixedVectorType::get(Int64Ty
, 2));
16544 if (getTarget().isLittleEndian()) {
16545 // Reverse the double words in the vector we will extract from.
16546 Op0
= Builder
.CreateBitCast(Op0
, llvm::FixedVectorType::get(Int64Ty
, 2));
16547 Op0
= Builder
.CreateShuffleVector(Op0
, Op0
, ArrayRef
<int>{1, 0});
16549 // Reverse the index.
16550 Index
= MaxIndex
- Index
;
16553 // Intrinsic expects the first arg to be a vector of int.
16554 Op0
= Builder
.CreateBitCast(Op0
, llvm::FixedVectorType::get(Int32Ty
, 4));
16555 Op2
= ConstantInt::getSigned(Int32Ty
, Index
);
16556 return Builder
.CreateCall(F
, {Op0
, Op1
, Op2
});
16559 case PPC::BI__builtin_vsx_extractuword
: {
16560 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16561 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
16562 llvm::Function
*F
= CGM
.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw
);
16564 // Intrinsic expects the first argument to be a vector of doublewords.
16565 Op0
= Builder
.CreateBitCast(Op0
, llvm::FixedVectorType::get(Int64Ty
, 2));
16567 // The second argument is a compile time constant int that needs to
16568 // be clamped to the range [0, 12].
16569 ConstantInt
*ArgCI
= dyn_cast
<ConstantInt
>(Op1
);
16571 "Second Arg to xxextractuw intrinsic must be a constant integer!");
16572 const int64_t MaxIndex
= 12;
16573 int64_t Index
= std::clamp(ArgCI
->getSExtValue(), (int64_t)0, MaxIndex
);
16575 if (getTarget().isLittleEndian()) {
16576 // Reverse the index.
16577 Index
= MaxIndex
- Index
;
16578 Op1
= ConstantInt::getSigned(Int32Ty
, Index
);
16580 // Emit the call, then reverse the double words of the results vector.
16581 Value
*Call
= Builder
.CreateCall(F
, {Op0
, Op1
});
16583 Value
*ShuffleCall
=
16584 Builder
.CreateShuffleVector(Call
, Call
, ArrayRef
<int>{1, 0});
16585 return ShuffleCall
;
16587 Op1
= ConstantInt::getSigned(Int32Ty
, Index
);
16588 return Builder
.CreateCall(F
, {Op0
, Op1
});
16592 case PPC::BI__builtin_vsx_xxpermdi
: {
16593 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16594 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
16595 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
16596 ConstantInt
*ArgCI
= dyn_cast
<ConstantInt
>(Op2
);
16597 assert(ArgCI
&& "Third arg must be constant integer!");
16599 unsigned Index
= ArgCI
->getZExtValue();
16600 Op0
= Builder
.CreateBitCast(Op0
, llvm::FixedVectorType::get(Int64Ty
, 2));
16601 Op1
= Builder
.CreateBitCast(Op1
, llvm::FixedVectorType::get(Int64Ty
, 2));
16603 // Account for endianness by treating this as just a shuffle. So we use the
16604 // same indices for both LE and BE in order to produce expected results in
16606 int ElemIdx0
= (Index
& 2) >> 1;
16607 int ElemIdx1
= 2 + (Index
& 1);
16609 int ShuffleElts
[2] = {ElemIdx0
, ElemIdx1
};
16610 Value
*ShuffleCall
= Builder
.CreateShuffleVector(Op0
, Op1
, ShuffleElts
);
16611 QualType BIRetType
= E
->getType();
16612 auto RetTy
= ConvertType(BIRetType
);
16613 return Builder
.CreateBitCast(ShuffleCall
, RetTy
);
16616 case PPC::BI__builtin_vsx_xxsldwi
: {
16617 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16618 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
16619 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
16620 ConstantInt
*ArgCI
= dyn_cast
<ConstantInt
>(Op2
);
16621 assert(ArgCI
&& "Third argument must be a compile time constant");
16622 unsigned Index
= ArgCI
->getZExtValue() & 0x3;
16623 Op0
= Builder
.CreateBitCast(Op0
, llvm::FixedVectorType::get(Int32Ty
, 4));
16624 Op1
= Builder
.CreateBitCast(Op1
, llvm::FixedVectorType::get(Int32Ty
, 4));
16626 // Create a shuffle mask
16631 if (getTarget().isLittleEndian()) {
16632 // Little endian element N comes from element 8+N-Index of the
16633 // concatenated wide vector (of course, using modulo arithmetic on
16634 // the total number of elements).
16635 ElemIdx0
= (8 - Index
) % 8;
16636 ElemIdx1
= (9 - Index
) % 8;
16637 ElemIdx2
= (10 - Index
) % 8;
16638 ElemIdx3
= (11 - Index
) % 8;
16640 // Big endian ElemIdx<N> = Index + N
16642 ElemIdx1
= Index
+ 1;
16643 ElemIdx2
= Index
+ 2;
16644 ElemIdx3
= Index
+ 3;
16647 int ShuffleElts
[4] = {ElemIdx0
, ElemIdx1
, ElemIdx2
, ElemIdx3
};
16648 Value
*ShuffleCall
= Builder
.CreateShuffleVector(Op0
, Op1
, ShuffleElts
);
16649 QualType BIRetType
= E
->getType();
16650 auto RetTy
= ConvertType(BIRetType
);
16651 return Builder
.CreateBitCast(ShuffleCall
, RetTy
);
16654 case PPC::BI__builtin_pack_vector_int128
: {
16655 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16656 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
16657 bool isLittleEndian
= getTarget().isLittleEndian();
16658 Value
*PoisonValue
=
16659 llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0
->getType(), 2));
16660 Value
*Res
= Builder
.CreateInsertElement(
16661 PoisonValue
, Op0
, (uint64_t)(isLittleEndian
? 1 : 0));
16662 Res
= Builder
.CreateInsertElement(Res
, Op1
,
16663 (uint64_t)(isLittleEndian
? 0 : 1));
16664 return Builder
.CreateBitCast(Res
, ConvertType(E
->getType()));
16667 case PPC::BI__builtin_unpack_vector_int128
: {
16668 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16669 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
16670 ConstantInt
*Index
= cast
<ConstantInt
>(Op1
);
16671 Value
*Unpacked
= Builder
.CreateBitCast(
16672 Op0
, llvm::FixedVectorType::get(ConvertType(E
->getType()), 2));
16674 if (getTarget().isLittleEndian())
16675 Index
= ConstantInt::get(Index
->getType(), 1 - Index
->getZExtValue());
16677 return Builder
.CreateExtractElement(Unpacked
, Index
);
16680 case PPC::BI__builtin_ppc_sthcx
: {
16681 llvm::Function
*F
= CGM
.getIntrinsic(Intrinsic::ppc_sthcx
);
16682 Value
*Op0
= Builder
.CreateBitCast(EmitScalarExpr(E
->getArg(0)), Int8PtrTy
);
16683 Value
*Op1
= Builder
.CreateSExt(EmitScalarExpr(E
->getArg(1)), Int32Ty
);
16684 return Builder
.CreateCall(F
, {Op0
, Op1
});
16687 // The PPC MMA builtins take a pointer to a __vector_quad as an argument.
16688 // Some of the MMA instructions accumulate their result into an existing
16689 // accumulator whereas the others generate a new accumulator. So we need to
16690 // use custom code generation to expand a builtin call with a pointer to a
16691 // load (if the corresponding instruction accumulates its result) followed by
16692 // the call to the intrinsic and a store of the result.
16693 #define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \
16694 case PPC::BI__builtin_##Name:
16695 #include "clang/Basic/BuiltinsPPC.def"
16697 SmallVector
<Value
*, 4> Ops
;
16698 for (unsigned i
= 0, e
= E
->getNumArgs(); i
!= e
; i
++)
16699 if (E
->getArg(i
)->getType()->isArrayType())
16700 Ops
.push_back(EmitArrayToPointerDecay(E
->getArg(i
)).getPointer());
16702 Ops
.push_back(EmitScalarExpr(E
->getArg(i
)));
16703 // The first argument of these two builtins is a pointer used to store their
16704 // result. However, the llvm intrinsics return their result in multiple
16705 // return values. So, here we emit code extracting these values from the
16706 // intrinsic results and storing them using that pointer.
16707 if (BuiltinID
== PPC::BI__builtin_mma_disassemble_acc
||
16708 BuiltinID
== PPC::BI__builtin_vsx_disassemble_pair
||
16709 BuiltinID
== PPC::BI__builtin_mma_disassemble_pair
) {
16710 unsigned NumVecs
= 2;
16711 auto Intrinsic
= Intrinsic::ppc_vsx_disassemble_pair
;
16712 if (BuiltinID
== PPC::BI__builtin_mma_disassemble_acc
) {
16714 Intrinsic
= Intrinsic::ppc_mma_disassemble_acc
;
16716 llvm::Function
*F
= CGM
.getIntrinsic(Intrinsic
);
16717 Address Addr
= EmitPointerWithAlignment(E
->getArg(1));
16718 Value
*Vec
= Builder
.CreateLoad(Addr
);
16719 Value
*Call
= Builder
.CreateCall(F
, {Vec
});
16720 llvm::Type
*VTy
= llvm::FixedVectorType::get(Int8Ty
, 16);
16721 Value
*Ptr
= Builder
.CreateBitCast(Ops
[0], VTy
->getPointerTo());
16722 for (unsigned i
=0; i
<NumVecs
; i
++) {
16723 Value
*Vec
= Builder
.CreateExtractValue(Call
, i
);
16724 llvm::ConstantInt
* Index
= llvm::ConstantInt::get(IntTy
, i
);
16725 Value
*GEP
= Builder
.CreateInBoundsGEP(VTy
, Ptr
, Index
);
16726 Builder
.CreateAlignedStore(Vec
, GEP
, MaybeAlign(16));
16730 if (BuiltinID
== PPC::BI__builtin_vsx_build_pair
||
16731 BuiltinID
== PPC::BI__builtin_mma_build_acc
) {
16732 // Reverse the order of the operands for LE, so the
16733 // same builtin call can be used on both LE and BE
16734 // without the need for the programmer to swap operands.
16735 // The operands are reversed starting from the second argument,
16736 // the first operand is the pointer to the pair/accumulator
16737 // that is being built.
16738 if (getTarget().isLittleEndian())
16739 std::reverse(Ops
.begin() + 1, Ops
.end());
16742 switch (BuiltinID
) {
16743 #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \
16744 case PPC::BI__builtin_##Name: \
16745 ID = Intrinsic::ppc_##Intr; \
16746 Accumulate = Acc; \
16748 #include "clang/Basic/BuiltinsPPC.def"
16750 if (BuiltinID
== PPC::BI__builtin_vsx_lxvp
||
16751 BuiltinID
== PPC::BI__builtin_vsx_stxvp
||
16752 BuiltinID
== PPC::BI__builtin_mma_lxvp
||
16753 BuiltinID
== PPC::BI__builtin_mma_stxvp
) {
16754 if (BuiltinID
== PPC::BI__builtin_vsx_lxvp
||
16755 BuiltinID
== PPC::BI__builtin_mma_lxvp
) {
16756 Ops
[1] = Builder
.CreateBitCast(Ops
[1], Int8PtrTy
);
16757 Ops
[0] = Builder
.CreateGEP(Int8Ty
, Ops
[1], Ops
[0]);
16759 Ops
[2] = Builder
.CreateBitCast(Ops
[2], Int8PtrTy
);
16760 Ops
[1] = Builder
.CreateGEP(Int8Ty
, Ops
[2], Ops
[1]);
16763 llvm::Function
*F
= CGM
.getIntrinsic(ID
);
16764 return Builder
.CreateCall(F
, Ops
, "");
16766 SmallVector
<Value
*, 4> CallOps
;
16768 Address Addr
= EmitPointerWithAlignment(E
->getArg(0));
16769 Value
*Acc
= Builder
.CreateLoad(Addr
);
16770 CallOps
.push_back(Acc
);
16772 for (unsigned i
=1; i
<Ops
.size(); i
++)
16773 CallOps
.push_back(Ops
[i
]);
16774 llvm::Function
*F
= CGM
.getIntrinsic(ID
);
16775 Value
*Call
= Builder
.CreateCall(F
, CallOps
);
16776 return Builder
.CreateAlignedStore(Call
, Ops
[0], MaybeAlign(64));
16779 case PPC::BI__builtin_ppc_compare_and_swap
:
16780 case PPC::BI__builtin_ppc_compare_and_swaplp
: {
16781 Address Addr
= EmitPointerWithAlignment(E
->getArg(0));
16782 Address OldValAddr
= EmitPointerWithAlignment(E
->getArg(1));
16783 Value
*OldVal
= Builder
.CreateLoad(OldValAddr
);
16784 QualType AtomicTy
= E
->getArg(0)->getType()->getPointeeType();
16785 LValue LV
= MakeAddrLValue(Addr
, AtomicTy
);
16786 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
16787 auto Pair
= EmitAtomicCompareExchange(
16788 LV
, RValue::get(OldVal
), RValue::get(Op2
), E
->getExprLoc(),
16789 llvm::AtomicOrdering::Monotonic
, llvm::AtomicOrdering::Monotonic
, true);
16790 // Unlike c11's atomic_compare_exchange, according to
16791 // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
16792 // > In either case, the contents of the memory location specified by addr
16793 // > are copied into the memory location specified by old_val_addr.
16794 // But it hasn't specified storing to OldValAddr is atomic or not and
16795 // which order to use. Now following XL's codegen, treat it as a normal
16797 Value
*LoadedVal
= Pair
.first
.getScalarVal();
16798 Builder
.CreateStore(LoadedVal
, OldValAddr
);
16799 return Builder
.CreateZExt(Pair
.second
, Builder
.getInt32Ty());
16801 case PPC::BI__builtin_ppc_fetch_and_add
:
16802 case PPC::BI__builtin_ppc_fetch_and_addlp
: {
16803 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add
, E
,
16804 llvm::AtomicOrdering::Monotonic
);
16806 case PPC::BI__builtin_ppc_fetch_and_and
:
16807 case PPC::BI__builtin_ppc_fetch_and_andlp
: {
16808 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And
, E
,
16809 llvm::AtomicOrdering::Monotonic
);
16812 case PPC::BI__builtin_ppc_fetch_and_or
:
16813 case PPC::BI__builtin_ppc_fetch_and_orlp
: {
16814 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or
, E
,
16815 llvm::AtomicOrdering::Monotonic
);
16817 case PPC::BI__builtin_ppc_fetch_and_swap
:
16818 case PPC::BI__builtin_ppc_fetch_and_swaplp
: {
16819 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg
, E
,
16820 llvm::AtomicOrdering::Monotonic
);
16822 case PPC::BI__builtin_ppc_ldarx
:
16823 case PPC::BI__builtin_ppc_lwarx
:
16824 case PPC::BI__builtin_ppc_lharx
:
16825 case PPC::BI__builtin_ppc_lbarx
:
16826 return emitPPCLoadReserveIntrinsic(*this, BuiltinID
, E
);
16827 case PPC::BI__builtin_ppc_mfspr
: {
16828 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16829 llvm::Type
*RetType
= CGM
.getDataLayout().getTypeSizeInBits(VoidPtrTy
) == 32
16832 Function
*F
= CGM
.getIntrinsic(Intrinsic::ppc_mfspr
, RetType
);
16833 return Builder
.CreateCall(F
, {Op0
});
16835 case PPC::BI__builtin_ppc_mtspr
: {
16836 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16837 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
16838 llvm::Type
*RetType
= CGM
.getDataLayout().getTypeSizeInBits(VoidPtrTy
) == 32
16841 Function
*F
= CGM
.getIntrinsic(Intrinsic::ppc_mtspr
, RetType
);
16842 return Builder
.CreateCall(F
, {Op0
, Op1
});
16844 case PPC::BI__builtin_ppc_popcntb
: {
16845 Value
*ArgValue
= EmitScalarExpr(E
->getArg(0));
16846 llvm::Type
*ArgType
= ArgValue
->getType();
16847 Function
*F
= CGM
.getIntrinsic(Intrinsic::ppc_popcntb
, {ArgType
, ArgType
});
16848 return Builder
.CreateCall(F
, {ArgValue
}, "popcntb");
16850 case PPC::BI__builtin_ppc_mtfsf
: {
16851 // The builtin takes a uint32 that needs to be cast to an
16852 // f64 to be passed to the intrinsic.
16853 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16854 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
16855 Value
*Cast
= Builder
.CreateUIToFP(Op1
, DoubleTy
);
16856 llvm::Function
*F
= CGM
.getIntrinsic(Intrinsic::ppc_mtfsf
);
16857 return Builder
.CreateCall(F
, {Op0
, Cast
}, "");
16860 case PPC::BI__builtin_ppc_swdiv_nochk
:
16861 case PPC::BI__builtin_ppc_swdivs_nochk
: {
16862 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16863 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
16864 FastMathFlags FMF
= Builder
.getFastMathFlags();
16865 Builder
.getFastMathFlags().setFast();
16866 Value
*FDiv
= Builder
.CreateFDiv(Op0
, Op1
, "swdiv_nochk");
16867 Builder
.getFastMathFlags() &= (FMF
);
16870 case PPC::BI__builtin_ppc_fric
:
16871 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
16872 *this, E
, Intrinsic::rint
,
16873 Intrinsic::experimental_constrained_rint
))
16875 case PPC::BI__builtin_ppc_frim
:
16876 case PPC::BI__builtin_ppc_frims
:
16877 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
16878 *this, E
, Intrinsic::floor
,
16879 Intrinsic::experimental_constrained_floor
))
16881 case PPC::BI__builtin_ppc_frin
:
16882 case PPC::BI__builtin_ppc_frins
:
16883 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
16884 *this, E
, Intrinsic::round
,
16885 Intrinsic::experimental_constrained_round
))
16887 case PPC::BI__builtin_ppc_frip
:
16888 case PPC::BI__builtin_ppc_frips
:
16889 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
16890 *this, E
, Intrinsic::ceil
,
16891 Intrinsic::experimental_constrained_ceil
))
16893 case PPC::BI__builtin_ppc_friz
:
16894 case PPC::BI__builtin_ppc_frizs
:
16895 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
16896 *this, E
, Intrinsic::trunc
,
16897 Intrinsic::experimental_constrained_trunc
))
16899 case PPC::BI__builtin_ppc_fsqrt
:
16900 case PPC::BI__builtin_ppc_fsqrts
:
16901 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
16902 *this, E
, Intrinsic::sqrt
,
16903 Intrinsic::experimental_constrained_sqrt
))
16905 case PPC::BI__builtin_ppc_test_data_class
: {
16906 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16907 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
16908 return Builder
.CreateCall(
16909 CGM
.getIntrinsic(Intrinsic::ppc_test_data_class
, Op0
->getType()),
16910 {Op0
, Op1
}, "test_data_class");
16912 case PPC::BI__builtin_ppc_maxfe
: {
16913 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16914 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
16915 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
16916 Value
*Op3
= EmitScalarExpr(E
->getArg(3));
16917 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::ppc_maxfe
),
16918 {Op0
, Op1
, Op2
, Op3
});
16920 case PPC::BI__builtin_ppc_maxfl
: {
16921 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16922 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
16923 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
16924 Value
*Op3
= EmitScalarExpr(E
->getArg(3));
16925 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::ppc_maxfl
),
16926 {Op0
, Op1
, Op2
, Op3
});
16928 case PPC::BI__builtin_ppc_maxfs
: {
16929 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16930 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
16931 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
16932 Value
*Op3
= EmitScalarExpr(E
->getArg(3));
16933 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::ppc_maxfs
),
16934 {Op0
, Op1
, Op2
, Op3
});
16936 case PPC::BI__builtin_ppc_minfe
: {
16937 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16938 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
16939 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
16940 Value
*Op3
= EmitScalarExpr(E
->getArg(3));
16941 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::ppc_minfe
),
16942 {Op0
, Op1
, Op2
, Op3
});
16944 case PPC::BI__builtin_ppc_minfl
: {
16945 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16946 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
16947 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
16948 Value
*Op3
= EmitScalarExpr(E
->getArg(3));
16949 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::ppc_minfl
),
16950 {Op0
, Op1
, Op2
, Op3
});
16952 case PPC::BI__builtin_ppc_minfs
: {
16953 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16954 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
16955 Value
*Op2
= EmitScalarExpr(E
->getArg(2));
16956 Value
*Op3
= EmitScalarExpr(E
->getArg(3));
16957 return Builder
.CreateCall(CGM
.getIntrinsic(Intrinsic::ppc_minfs
),
16958 {Op0
, Op1
, Op2
, Op3
});
16960 case PPC::BI__builtin_ppc_swdiv
:
16961 case PPC::BI__builtin_ppc_swdivs
: {
16962 Value
*Op0
= EmitScalarExpr(E
->getArg(0));
16963 Value
*Op1
= EmitScalarExpr(E
->getArg(1));
16964 return Builder
.CreateFDiv(Op0
, Op1
, "swdiv");
16970 // If \p E is not null pointer, insert address space cast to match return
16971 // type of \p E if necessary.
16972 Value
*EmitAMDGPUDispatchPtr(CodeGenFunction
&CGF
,
16973 const CallExpr
*E
= nullptr) {
16974 auto *F
= CGF
.CGM
.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr
);
16975 auto *Call
= CGF
.Builder
.CreateCall(F
);
16977 Attribute::getWithDereferenceableBytes(Call
->getContext(), 64));
16978 Call
->addRetAttr(Attribute::getWithAlignment(Call
->getContext(), Align(4)));
16981 QualType BuiltinRetType
= E
->getType();
16982 auto *RetTy
= cast
<llvm::PointerType
>(CGF
.ConvertType(BuiltinRetType
));
16983 if (RetTy
== Call
->getType())
16985 return CGF
.Builder
.CreateAddrSpaceCast(Call
, RetTy
);
16988 Value
*EmitAMDGPUImplicitArgPtr(CodeGenFunction
&CGF
) {
16989 auto *F
= CGF
.CGM
.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr
);
16990 auto *Call
= CGF
.Builder
.CreateCall(F
);
16992 Attribute::getWithDereferenceableBytes(Call
->getContext(), 256));
16993 Call
->addRetAttr(Attribute::getWithAlignment(Call
->getContext(), Align(8)));
16997 // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
16998 Value
*EmitAMDGPUWorkGroupSize(CodeGenFunction
&CGF
, unsigned Index
) {
16999 bool IsCOV_5
= CGF
.getTarget().getTargetOpts().CodeObjectVersion
==
17000 clang::TargetOptions::COV_5
;
17004 // Indexing the implicit kernarg segment.
17005 Offset
= llvm::ConstantInt::get(CGF
.Int32Ty
, 12 + Index
* 2);
17006 DP
= EmitAMDGPUImplicitArgPtr(CGF
);
17008 // Indexing the HSA kernel_dispatch_packet struct.
17009 Offset
= llvm::ConstantInt::get(CGF
.Int32Ty
, 4 + Index
* 2);
17010 DP
= EmitAMDGPUDispatchPtr(CGF
);
17013 auto *GEP
= CGF
.Builder
.CreateGEP(CGF
.Int8Ty
, DP
, Offset
);
17015 CGF
.Int16Ty
->getPointerTo(GEP
->getType()->getPointerAddressSpace());
17016 auto *Cast
= CGF
.Builder
.CreateBitCast(GEP
, DstTy
);
17017 auto *LD
= CGF
.Builder
.CreateLoad(
17018 Address(Cast
, CGF
.Int16Ty
, CharUnits::fromQuantity(2)));
17019 llvm::MDBuilder
MDHelper(CGF
.getLLVMContext());
17020 llvm::MDNode
*RNode
= MDHelper
.createRange(APInt(16, 1),
17021 APInt(16, CGF
.getTarget().getMaxOpenCLWorkGroupSize() + 1));
17022 LD
->setMetadata(llvm::LLVMContext::MD_range
, RNode
);
17023 LD
->setMetadata(llvm::LLVMContext::MD_noundef
,
17024 llvm::MDNode::get(CGF
.getLLVMContext(), std::nullopt
));
17025 LD
->setMetadata(llvm::LLVMContext::MD_invariant_load
,
17026 llvm::MDNode::get(CGF
.getLLVMContext(), std::nullopt
));
17030 // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
17031 Value
*EmitAMDGPUGridSize(CodeGenFunction
&CGF
, unsigned Index
) {
17032 const unsigned XOffset
= 12;
17033 auto *DP
= EmitAMDGPUDispatchPtr(CGF
);
17034 // Indexing the HSA kernel_dispatch_packet struct.
17035 auto *Offset
= llvm::ConstantInt::get(CGF
.Int32Ty
, XOffset
+ Index
* 4);
17036 auto *GEP
= CGF
.Builder
.CreateGEP(CGF
.Int8Ty
, DP
, Offset
);
17038 CGF
.Int32Ty
->getPointerTo(GEP
->getType()->getPointerAddressSpace());
17039 auto *Cast
= CGF
.Builder
.CreateBitCast(GEP
, DstTy
);
17040 auto *LD
= CGF
.Builder
.CreateLoad(
17041 Address(Cast
, CGF
.Int32Ty
, CharUnits::fromQuantity(4)));
17042 LD
->setMetadata(llvm::LLVMContext::MD_invariant_load
,
17043 llvm::MDNode::get(CGF
.getLLVMContext(), std::nullopt
));
17048 // For processing memory ordering and memory scope arguments of various
17049 // amdgcn builtins.
17050 // \p Order takes a C++11 comptabile memory-ordering specifier and converts
17051 // it into LLVM's memory ordering specifier using atomic C ABI, and writes
17052 // to \p AO. \p Scope takes a const char * and converts it into AMDGCN
17053 // specific SyncScopeID and writes it to \p SSID.
17054 void CodeGenFunction::ProcessOrderScopeAMDGCN(Value
*Order
, Value
*Scope
,
17055 llvm::AtomicOrdering
&AO
,
17056 llvm::SyncScope::ID
&SSID
) {
17057 int ord
= cast
<llvm::ConstantInt
>(Order
)->getZExtValue();
17059 // Map C11/C++11 memory ordering to LLVM memory ordering
17060 assert(llvm::isValidAtomicOrderingCABI(ord
));
17061 switch (static_cast<llvm::AtomicOrderingCABI
>(ord
)) {
17062 case llvm::AtomicOrderingCABI::acquire
:
17063 case llvm::AtomicOrderingCABI::consume
:
17064 AO
= llvm::AtomicOrdering::Acquire
;
17066 case llvm::AtomicOrderingCABI::release
:
17067 AO
= llvm::AtomicOrdering::Release
;
17069 case llvm::AtomicOrderingCABI::acq_rel
:
17070 AO
= llvm::AtomicOrdering::AcquireRelease
;
17072 case llvm::AtomicOrderingCABI::seq_cst
:
17073 AO
= llvm::AtomicOrdering::SequentiallyConsistent
;
17075 case llvm::AtomicOrderingCABI::relaxed
:
17076 AO
= llvm::AtomicOrdering::Monotonic
;
17081 llvm::getConstantStringInfo(Scope
, scp
);
17082 SSID
= getLLVMContext().getOrInsertSyncScopeID(scp
);
17085 Value
*CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID
,
17086 const CallExpr
*E
) {
17087 llvm::AtomicOrdering AO
= llvm::AtomicOrdering::SequentiallyConsistent
;
17088 llvm::SyncScope::ID SSID
;
17089 switch (BuiltinID
) {
17090 case AMDGPU::BI__builtin_amdgcn_div_scale
:
17091 case AMDGPU::BI__builtin_amdgcn_div_scalef
: {
17092 // Translate from the intrinsics's struct return to the builtin's out
17095 Address FlagOutPtr
= EmitPointerWithAlignment(E
->getArg(3));
17097 llvm::Value
*X
= EmitScalarExpr(E
->getArg(0));
17098 llvm::Value
*Y
= EmitScalarExpr(E
->getArg(1));
17099 llvm::Value
*Z
= EmitScalarExpr(E
->getArg(2));
17101 llvm::Function
*Callee
= CGM
.getIntrinsic(Intrinsic::amdgcn_div_scale
,
17104 llvm::Value
*Tmp
= Builder
.CreateCall(Callee
, {X
, Y
, Z
});
17106 llvm::Value
*Result
= Builder
.CreateExtractValue(Tmp
, 0);
17107 llvm::Value
*Flag
= Builder
.CreateExtractValue(Tmp
, 1);
17109 llvm::Type
*RealFlagType
= FlagOutPtr
.getElementType();
17111 llvm::Value
*FlagExt
= Builder
.CreateZExt(Flag
, RealFlagType
);
17112 Builder
.CreateStore(FlagExt
, FlagOutPtr
);
17115 case AMDGPU::BI__builtin_amdgcn_div_fmas
:
17116 case AMDGPU::BI__builtin_amdgcn_div_fmasf
: {
17117 llvm::Value
*Src0
= EmitScalarExpr(E
->getArg(0));
17118 llvm::Value
*Src1
= EmitScalarExpr(E
->getArg(1));
17119 llvm::Value
*Src2
= EmitScalarExpr(E
->getArg(2));
17120 llvm::Value
*Src3
= EmitScalarExpr(E
->getArg(3));
17122 llvm::Function
*F
= CGM
.getIntrinsic(Intrinsic::amdgcn_div_fmas
,
17124 llvm::Value
*Src3ToBool
= Builder
.CreateIsNotNull(Src3
);
17125 return Builder
.CreateCall(F
, {Src0
, Src1
, Src2
, Src3ToBool
});
17128 case AMDGPU::BI__builtin_amdgcn_ds_swizzle
:
17129 return emitBinaryBuiltin(*this, E
, Intrinsic::amdgcn_ds_swizzle
);
17130 case AMDGPU::BI__builtin_amdgcn_mov_dpp8
:
17131 return emitBinaryBuiltin(*this, E
, Intrinsic::amdgcn_mov_dpp8
);
17132 case AMDGPU::BI__builtin_amdgcn_mov_dpp
:
17133 case AMDGPU::BI__builtin_amdgcn_update_dpp
: {
17134 llvm::SmallVector
<llvm::Value
*, 6> Args
;
17135 for (unsigned I
= 0; I
!= E
->getNumArgs(); ++I
)
17136 Args
.push_back(EmitScalarExpr(E
->getArg(I
)));
17137 assert(Args
.size() == 5 || Args
.size() == 6);
17138 if (Args
.size() == 5)
17139 Args
.insert(Args
.begin(), llvm::PoisonValue::get(Args
[0]->getType()));
17141 CGM
.getIntrinsic(Intrinsic::amdgcn_update_dpp
, Args
[0]->getType());
17142 return Builder
.CreateCall(F
, Args
);
17144 case AMDGPU::BI__builtin_amdgcn_div_fixup
:
17145 case AMDGPU::BI__builtin_amdgcn_div_fixupf
:
17146 case AMDGPU::BI__builtin_amdgcn_div_fixuph
:
17147 return emitTernaryBuiltin(*this, E
, Intrinsic::amdgcn_div_fixup
);
17148 case AMDGPU::BI__builtin_amdgcn_trig_preop
:
17149 case AMDGPU::BI__builtin_amdgcn_trig_preopf
:
17150 return emitFPIntBuiltin(*this, E
, Intrinsic::amdgcn_trig_preop
);
17151 case AMDGPU::BI__builtin_amdgcn_rcp
:
17152 case AMDGPU::BI__builtin_amdgcn_rcpf
:
17153 case AMDGPU::BI__builtin_amdgcn_rcph
:
17154 return emitUnaryBuiltin(*this, E
, Intrinsic::amdgcn_rcp
);
17155 case AMDGPU::BI__builtin_amdgcn_sqrt
:
17156 case AMDGPU::BI__builtin_amdgcn_sqrtf
:
17157 case AMDGPU::BI__builtin_amdgcn_sqrth
:
17158 return emitUnaryBuiltin(*this, E
, Intrinsic::amdgcn_sqrt
);
17159 case AMDGPU::BI__builtin_amdgcn_rsq
:
17160 case AMDGPU::BI__builtin_amdgcn_rsqf
:
17161 case AMDGPU::BI__builtin_amdgcn_rsqh
:
17162 return emitUnaryBuiltin(*this, E
, Intrinsic::amdgcn_rsq
);
17163 case AMDGPU::BI__builtin_amdgcn_rsq_clamp
:
17164 case AMDGPU::BI__builtin_amdgcn_rsq_clampf
:
17165 return emitUnaryBuiltin(*this, E
, Intrinsic::amdgcn_rsq_clamp
);
17166 case AMDGPU::BI__builtin_amdgcn_sinf
:
17167 case AMDGPU::BI__builtin_amdgcn_sinh
:
17168 return emitUnaryBuiltin(*this, E
, Intrinsic::amdgcn_sin
);
17169 case AMDGPU::BI__builtin_amdgcn_cosf
:
17170 case AMDGPU::BI__builtin_amdgcn_cosh
:
17171 return emitUnaryBuiltin(*this, E
, Intrinsic::amdgcn_cos
);
17172 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr
:
17173 return EmitAMDGPUDispatchPtr(*this, E
);
17174 case AMDGPU::BI__builtin_amdgcn_logf
:
17175 return emitUnaryBuiltin(*this, E
, Intrinsic::amdgcn_log
);
17176 case AMDGPU::BI__builtin_amdgcn_log_clampf
:
17177 return emitUnaryBuiltin(*this, E
, Intrinsic::amdgcn_log_clamp
);
17178 case AMDGPU::BI__builtin_amdgcn_ldexp
:
17179 case AMDGPU::BI__builtin_amdgcn_ldexpf
:
17180 case AMDGPU::BI__builtin_amdgcn_ldexph
: {
17181 llvm::Value
*Src0
= EmitScalarExpr(E
->getArg(0));
17182 llvm::Value
*Src1
= EmitScalarExpr(E
->getArg(1));
17183 llvm::Function
*F
=
17184 CGM
.getIntrinsic(Intrinsic::ldexp
, {Src0
->getType(), Src1
->getType()});
17185 return Builder
.CreateCall(F
, {Src0
, Src1
});
17187 case AMDGPU::BI__builtin_amdgcn_frexp_mant
:
17188 case AMDGPU::BI__builtin_amdgcn_frexp_mantf
:
17189 case AMDGPU::BI__builtin_amdgcn_frexp_manth
:
17190 return emitUnaryBuiltin(*this, E
, Intrinsic::amdgcn_frexp_mant
);
17191 case AMDGPU::BI__builtin_amdgcn_frexp_exp
:
17192 case AMDGPU::BI__builtin_amdgcn_frexp_expf
: {
17193 Value
*Src0
= EmitScalarExpr(E
->getArg(0));
17194 Function
*F
= CGM
.getIntrinsic(Intrinsic::amdgcn_frexp_exp
,
17195 { Builder
.getInt32Ty(), Src0
->getType() });
17196 return Builder
.CreateCall(F
, Src0
);
17198 case AMDGPU::BI__builtin_amdgcn_frexp_exph
: {
17199 Value
*Src0
= EmitScalarExpr(E
->getArg(0));
17200 Function
*F
= CGM
.getIntrinsic(Intrinsic::amdgcn_frexp_exp
,
17201 { Builder
.getInt16Ty(), Src0
->getType() });
17202 return Builder
.CreateCall(F
, Src0
);
17204 case AMDGPU::BI__builtin_amdgcn_fract
:
17205 case AMDGPU::BI__builtin_amdgcn_fractf
:
17206 case AMDGPU::BI__builtin_amdgcn_fracth
:
17207 return emitUnaryBuiltin(*this, E
, Intrinsic::amdgcn_fract
);
17208 case AMDGPU::BI__builtin_amdgcn_lerp
:
17209 return emitTernaryBuiltin(*this, E
, Intrinsic::amdgcn_lerp
);
17210 case AMDGPU::BI__builtin_amdgcn_ubfe
:
17211 return emitTernaryBuiltin(*this, E
, Intrinsic::amdgcn_ubfe
);
17212 case AMDGPU::BI__builtin_amdgcn_sbfe
:
17213 return emitTernaryBuiltin(*this, E
, Intrinsic::amdgcn_sbfe
);
17214 case AMDGPU::BI__builtin_amdgcn_ballot_w32
:
17215 case AMDGPU::BI__builtin_amdgcn_ballot_w64
: {
17216 llvm::Type
*ResultType
= ConvertType(E
->getType());
17217 llvm::Value
*Src
= EmitScalarExpr(E
->getArg(0));
17218 Function
*F
= CGM
.getIntrinsic(Intrinsic::amdgcn_ballot
, { ResultType
});
17219 return Builder
.CreateCall(F
, { Src
});
17221 case AMDGPU::BI__builtin_amdgcn_uicmp
:
17222 case AMDGPU::BI__builtin_amdgcn_uicmpl
:
17223 case AMDGPU::BI__builtin_amdgcn_sicmp
:
17224 case AMDGPU::BI__builtin_amdgcn_sicmpl
: {
17225 llvm::Value
*Src0
= EmitScalarExpr(E
->getArg(0));
17226 llvm::Value
*Src1
= EmitScalarExpr(E
->getArg(1));
17227 llvm::Value
*Src2
= EmitScalarExpr(E
->getArg(2));
17229 // FIXME-GFX10: How should 32 bit mask be handled?
17230 Function
*F
= CGM
.getIntrinsic(Intrinsic::amdgcn_icmp
,
17231 { Builder
.getInt64Ty(), Src0
->getType() });
17232 return Builder
.CreateCall(F
, { Src0
, Src1
, Src2
});
17234 case AMDGPU::BI__builtin_amdgcn_fcmp
:
17235 case AMDGPU::BI__builtin_amdgcn_fcmpf
: {
17236 llvm::Value
*Src0
= EmitScalarExpr(E
->getArg(0));
17237 llvm::Value
*Src1
= EmitScalarExpr(E
->getArg(1));
17238 llvm::Value
*Src2
= EmitScalarExpr(E
->getArg(2));
17240 // FIXME-GFX10: How should 32 bit mask be handled?
17241 Function
*F
= CGM
.getIntrinsic(Intrinsic::amdgcn_fcmp
,
17242 { Builder
.getInt64Ty(), Src0
->getType() });
17243 return Builder
.CreateCall(F
, { Src0
, Src1
, Src2
});
17245 case AMDGPU::BI__builtin_amdgcn_class
:
17246 case AMDGPU::BI__builtin_amdgcn_classf
:
17247 case AMDGPU::BI__builtin_amdgcn_classh
:
17248 return emitFPIntBuiltin(*this, E
, Intrinsic::amdgcn_class
);
17249 case AMDGPU::BI__builtin_amdgcn_fmed3f
:
17250 case AMDGPU::BI__builtin_amdgcn_fmed3h
:
17251 return emitTernaryBuiltin(*this, E
, Intrinsic::amdgcn_fmed3
);
17252 case AMDGPU::BI__builtin_amdgcn_ds_append
:
17253 case AMDGPU::BI__builtin_amdgcn_ds_consume
: {
17254 Intrinsic::ID Intrin
= BuiltinID
== AMDGPU::BI__builtin_amdgcn_ds_append
?
17255 Intrinsic::amdgcn_ds_append
: Intrinsic::amdgcn_ds_consume
;
17256 Value
*Src0
= EmitScalarExpr(E
->getArg(0));
17257 Function
*F
= CGM
.getIntrinsic(Intrin
, { Src0
->getType() });
17258 return Builder
.CreateCall(F
, { Src0
, Builder
.getFalse() });
17260 case AMDGPU::BI__builtin_amdgcn_ds_faddf
:
17261 case AMDGPU::BI__builtin_amdgcn_ds_fminf
:
17262 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf
: {
17263 Intrinsic::ID Intrin
;
17264 switch (BuiltinID
) {
17265 case AMDGPU::BI__builtin_amdgcn_ds_faddf
:
17266 Intrin
= Intrinsic::amdgcn_ds_fadd
;
17268 case AMDGPU::BI__builtin_amdgcn_ds_fminf
:
17269 Intrin
= Intrinsic::amdgcn_ds_fmin
;
17271 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf
:
17272 Intrin
= Intrinsic::amdgcn_ds_fmax
;
17275 llvm::Value
*Src0
= EmitScalarExpr(E
->getArg(0));
17276 llvm::Value
*Src1
= EmitScalarExpr(E
->getArg(1));
17277 llvm::Value
*Src2
= EmitScalarExpr(E
->getArg(2));
17278 llvm::Value
*Src3
= EmitScalarExpr(E
->getArg(3));
17279 llvm::Value
*Src4
= EmitScalarExpr(E
->getArg(4));
17280 llvm::Function
*F
= CGM
.getIntrinsic(Intrin
, { Src1
->getType() });
17281 llvm::FunctionType
*FTy
= F
->getFunctionType();
17282 llvm::Type
*PTy
= FTy
->getParamType(0);
17283 Src0
= Builder
.CreatePointerBitCastOrAddrSpaceCast(Src0
, PTy
);
17284 return Builder
.CreateCall(F
, { Src0
, Src1
, Src2
, Src3
, Src4
});
17286 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64
:
17287 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32
:
17288 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16
:
17289 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64
:
17290 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64
:
17291 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64
:
17292 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64
:
17293 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64
:
17294 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32
:
17295 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16
: {
17297 llvm::Type
*ArgTy
= llvm::Type::getDoubleTy(getLLVMContext());
17298 switch (BuiltinID
) {
17299 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32
:
17300 ArgTy
= llvm::Type::getFloatTy(getLLVMContext());
17301 IID
= Intrinsic::amdgcn_global_atomic_fadd
;
17303 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16
:
17304 ArgTy
= llvm::FixedVectorType::get(
17305 llvm::Type::getHalfTy(getLLVMContext()), 2);
17306 IID
= Intrinsic::amdgcn_global_atomic_fadd
;
17308 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64
:
17309 IID
= Intrinsic::amdgcn_global_atomic_fadd
;
17311 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64
:
17312 IID
= Intrinsic::amdgcn_global_atomic_fmin
;
17314 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64
:
17315 IID
= Intrinsic::amdgcn_global_atomic_fmax
;
17317 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64
:
17318 IID
= Intrinsic::amdgcn_flat_atomic_fadd
;
17320 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64
:
17321 IID
= Intrinsic::amdgcn_flat_atomic_fmin
;
17323 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64
:
17324 IID
= Intrinsic::amdgcn_flat_atomic_fmax
;
17326 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32
:
17327 ArgTy
= llvm::Type::getFloatTy(getLLVMContext());
17328 IID
= Intrinsic::amdgcn_flat_atomic_fadd
;
17330 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16
:
17331 ArgTy
= llvm::FixedVectorType::get(
17332 llvm::Type::getHalfTy(getLLVMContext()), 2);
17333 IID
= Intrinsic::amdgcn_flat_atomic_fadd
;
17336 llvm::Value
*Addr
= EmitScalarExpr(E
->getArg(0));
17337 llvm::Value
*Val
= EmitScalarExpr(E
->getArg(1));
17338 llvm::Function
*F
=
17339 CGM
.getIntrinsic(IID
, {ArgTy
, Addr
->getType(), Val
->getType()});
17340 return Builder
.CreateCall(F
, {Addr
, Val
});
17342 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16
:
17343 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16
: {
17345 switch (BuiltinID
) {
17346 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16
:
17347 IID
= Intrinsic::amdgcn_global_atomic_fadd_v2bf16
;
17349 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16
:
17350 IID
= Intrinsic::amdgcn_flat_atomic_fadd_v2bf16
;
17353 llvm::Value
*Addr
= EmitScalarExpr(E
->getArg(0));
17354 llvm::Value
*Val
= EmitScalarExpr(E
->getArg(1));
17355 llvm::Function
*F
= CGM
.getIntrinsic(IID
, {Addr
->getType()});
17356 return Builder
.CreateCall(F
, {Addr
, Val
});
17358 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64
:
17359 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32
:
17360 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16
: {
17363 switch (BuiltinID
) {
17364 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32
:
17365 ArgTy
= llvm::Type::getFloatTy(getLLVMContext());
17366 IID
= Intrinsic::amdgcn_ds_fadd
;
17368 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64
:
17369 ArgTy
= llvm::Type::getDoubleTy(getLLVMContext());
17370 IID
= Intrinsic::amdgcn_ds_fadd
;
17372 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16
:
17373 ArgTy
= llvm::FixedVectorType::get(
17374 llvm::Type::getHalfTy(getLLVMContext()), 2);
17375 IID
= Intrinsic::amdgcn_ds_fadd
;
17378 llvm::Value
*Addr
= EmitScalarExpr(E
->getArg(0));
17379 llvm::Value
*Val
= EmitScalarExpr(E
->getArg(1));
17380 llvm::Constant
*ZeroI32
= llvm::ConstantInt::getIntegerValue(
17381 llvm::Type::getInt32Ty(getLLVMContext()), APInt(32, 0, true));
17382 llvm::Constant
*ZeroI1
= llvm::ConstantInt::getIntegerValue(
17383 llvm::Type::getInt1Ty(getLLVMContext()), APInt(1, 0));
17384 llvm::Function
*F
= CGM
.getIntrinsic(IID
, {ArgTy
});
17385 return Builder
.CreateCall(F
, {Addr
, Val
, ZeroI32
, ZeroI32
, ZeroI1
});
17387 case AMDGPU::BI__builtin_amdgcn_read_exec
: {
17388 CallInst
*CI
= cast
<CallInst
>(
17389 EmitSpecialRegisterBuiltin(*this, E
, Int64Ty
, Int64Ty
, NormalRead
, "exec"));
17390 CI
->setConvergent();
17393 case AMDGPU::BI__builtin_amdgcn_read_exec_lo
:
17394 case AMDGPU::BI__builtin_amdgcn_read_exec_hi
: {
17395 StringRef RegName
= BuiltinID
== AMDGPU::BI__builtin_amdgcn_read_exec_lo
?
17396 "exec_lo" : "exec_hi";
17397 CallInst
*CI
= cast
<CallInst
>(
17398 EmitSpecialRegisterBuiltin(*this, E
, Int32Ty
, Int32Ty
, NormalRead
, RegName
));
17399 CI
->setConvergent();
17402 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray
:
17403 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h
:
17404 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l
:
17405 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh
: {
17406 llvm::Value
*NodePtr
= EmitScalarExpr(E
->getArg(0));
17407 llvm::Value
*RayExtent
= EmitScalarExpr(E
->getArg(1));
17408 llvm::Value
*RayOrigin
= EmitScalarExpr(E
->getArg(2));
17409 llvm::Value
*RayDir
= EmitScalarExpr(E
->getArg(3));
17410 llvm::Value
*RayInverseDir
= EmitScalarExpr(E
->getArg(4));
17411 llvm::Value
*TextureDescr
= EmitScalarExpr(E
->getArg(5));
17413 // The builtins take these arguments as vec4 where the last element is
17414 // ignored. The intrinsic takes them as vec3.
17415 RayOrigin
= Builder
.CreateShuffleVector(RayOrigin
, RayOrigin
,
17416 ArrayRef
<int>{0, 1, 2});
17418 Builder
.CreateShuffleVector(RayDir
, RayDir
, ArrayRef
<int>{0, 1, 2});
17419 RayInverseDir
= Builder
.CreateShuffleVector(RayInverseDir
, RayInverseDir
,
17420 ArrayRef
<int>{0, 1, 2});
17422 Function
*F
= CGM
.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray
,
17423 {NodePtr
->getType(), RayDir
->getType()});
17424 return Builder
.CreateCall(F
, {NodePtr
, RayExtent
, RayOrigin
, RayDir
,
17425 RayInverseDir
, TextureDescr
});
17428 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn
: {
17429 SmallVector
<Value
*, 4> Args
;
17430 for (int i
= 0, e
= E
->getNumArgs(); i
!= e
; ++i
)
17431 Args
.push_back(EmitScalarExpr(E
->getArg(i
)));
17433 Function
*F
= CGM
.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn
);
17434 Value
*Call
= Builder
.CreateCall(F
, Args
);
17435 Value
*Rtn
= Builder
.CreateExtractValue(Call
, 0);
17436 Value
*A
= Builder
.CreateExtractValue(Call
, 1);
17437 llvm::Type
*RetTy
= ConvertType(E
->getType());
17438 Value
*I0
= Builder
.CreateInsertElement(PoisonValue::get(RetTy
), Rtn
,
17440 return Builder
.CreateInsertElement(I0
, A
, 1);
17443 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32
:
17444 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64
:
17445 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32
:
17446 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64
:
17447 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32
:
17448 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64
:
17449 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32
:
17450 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64
:
17451 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32
:
17452 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64
:
17453 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32
:
17454 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64
: {
17456 // These operations perform a matrix multiplication and accumulation of
17459 // The return type always matches the type of matrix C.
17460 unsigned ArgForMatchingRetType
;
17461 unsigned BuiltinWMMAOp
;
17463 switch (BuiltinID
) {
17464 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32
:
17465 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64
:
17466 ArgForMatchingRetType
= 2;
17467 BuiltinWMMAOp
= Intrinsic::amdgcn_wmma_f32_16x16x16_f16
;
17469 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32
:
17470 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64
:
17471 ArgForMatchingRetType
= 2;
17472 BuiltinWMMAOp
= Intrinsic::amdgcn_wmma_f32_16x16x16_bf16
;
17474 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32
:
17475 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64
:
17476 ArgForMatchingRetType
= 2;
17477 BuiltinWMMAOp
= Intrinsic::amdgcn_wmma_f16_16x16x16_f16
;
17479 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32
:
17480 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64
:
17481 ArgForMatchingRetType
= 2;
17482 BuiltinWMMAOp
= Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16
;
17484 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32
:
17485 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64
:
17486 ArgForMatchingRetType
= 4;
17487 BuiltinWMMAOp
= Intrinsic::amdgcn_wmma_i32_16x16x16_iu8
;
17489 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32
:
17490 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64
:
17491 ArgForMatchingRetType
= 4;
17492 BuiltinWMMAOp
= Intrinsic::amdgcn_wmma_i32_16x16x16_iu4
;
17496 SmallVector
<Value
*, 6> Args
;
17497 for (int i
= 0, e
= E
->getNumArgs(); i
!= e
; ++i
)
17498 Args
.push_back(EmitScalarExpr(E
->getArg(i
)));
17500 Function
*F
= CGM
.getIntrinsic(BuiltinWMMAOp
,
17501 {Args
[ArgForMatchingRetType
]->getType()});
17503 return Builder
.CreateCall(F
, Args
);
17507 case AMDGPU::BI__builtin_amdgcn_workitem_id_x
:
17508 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x
, 0, 1024);
17509 case AMDGPU::BI__builtin_amdgcn_workitem_id_y
:
17510 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y
, 0, 1024);
17511 case AMDGPU::BI__builtin_amdgcn_workitem_id_z
:
17512 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z
, 0, 1024);
17514 // amdgcn workgroup size
17515 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x
:
17516 return EmitAMDGPUWorkGroupSize(*this, 0);
17517 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y
:
17518 return EmitAMDGPUWorkGroupSize(*this, 1);
17519 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z
:
17520 return EmitAMDGPUWorkGroupSize(*this, 2);
17522 // amdgcn grid size
17523 case AMDGPU::BI__builtin_amdgcn_grid_size_x
:
17524 return EmitAMDGPUGridSize(*this, 0);
17525 case AMDGPU::BI__builtin_amdgcn_grid_size_y
:
17526 return EmitAMDGPUGridSize(*this, 1);
17527 case AMDGPU::BI__builtin_amdgcn_grid_size_z
:
17528 return EmitAMDGPUGridSize(*this, 2);
17531 case AMDGPU::BI__builtin_r600_recipsqrt_ieee
:
17532 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef
:
17533 return emitUnaryBuiltin(*this, E
, Intrinsic::r600_recipsqrt_ieee
);
17534 case AMDGPU::BI__builtin_r600_read_tidig_x
:
17535 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x
, 0, 1024);
17536 case AMDGPU::BI__builtin_r600_read_tidig_y
:
17537 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y
, 0, 1024);
17538 case AMDGPU::BI__builtin_r600_read_tidig_z
:
17539 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z
, 0, 1024);
17540 case AMDGPU::BI__builtin_amdgcn_alignbit
: {
17541 llvm::Value
*Src0
= EmitScalarExpr(E
->getArg(0));
17542 llvm::Value
*Src1
= EmitScalarExpr(E
->getArg(1));
17543 llvm::Value
*Src2
= EmitScalarExpr(E
->getArg(2));
17544 Function
*F
= CGM
.getIntrinsic(Intrinsic::fshr
, Src0
->getType());
17545 return Builder
.CreateCall(F
, { Src0
, Src1
, Src2
});
17547 case AMDGPU::BI__builtin_amdgcn_fence
: {
17548 ProcessOrderScopeAMDGCN(EmitScalarExpr(E
->getArg(0)),
17549 EmitScalarExpr(E
->getArg(1)), AO
, SSID
);
17550 return Builder
.CreateFence(AO
, SSID
);
17552 case AMDGPU::BI__builtin_amdgcn_atomic_inc32
:
17553 case AMDGPU::BI__builtin_amdgcn_atomic_inc64
:
17554 case AMDGPU::BI__builtin_amdgcn_atomic_dec32
:
17555 case AMDGPU::BI__builtin_amdgcn_atomic_dec64
: {
17556 unsigned BuiltinAtomicOp
;
17557 llvm::Type
*ResultType
= ConvertType(E
->getType());
17559 switch (BuiltinID
) {
17560 case AMDGPU::BI__builtin_amdgcn_atomic_inc32
:
17561 case AMDGPU::BI__builtin_amdgcn_atomic_inc64
:
17562 BuiltinAtomicOp
= Intrinsic::amdgcn_atomic_inc
;
17564 case AMDGPU::BI__builtin_amdgcn_atomic_dec32
:
17565 case AMDGPU::BI__builtin_amdgcn_atomic_dec64
:
17566 BuiltinAtomicOp
= Intrinsic::amdgcn_atomic_dec
;
17570 Value
*Ptr
= EmitScalarExpr(E
->getArg(0));
17571 Value
*Val
= EmitScalarExpr(E
->getArg(1));
17573 llvm::Function
*F
=
17574 CGM
.getIntrinsic(BuiltinAtomicOp
, {ResultType
, Ptr
->getType()});
17576 ProcessOrderScopeAMDGCN(EmitScalarExpr(E
->getArg(2)),
17577 EmitScalarExpr(E
->getArg(3)), AO
, SSID
);
17579 // llvm.amdgcn.atomic.inc and llvm.amdgcn.atomic.dec expects ordering and
17580 // scope as unsigned values
17581 Value
*MemOrder
= Builder
.getInt32(static_cast<int>(AO
));
17582 Value
*MemScope
= Builder
.getInt32(static_cast<int>(SSID
));
17584 QualType PtrTy
= E
->getArg(0)->IgnoreImpCasts()->getType();
17586 PtrTy
->castAs
<PointerType
>()->getPointeeType().isVolatileQualified();
17587 Value
*IsVolatile
= Builder
.getInt1(static_cast<bool>(Volatile
));
17589 return Builder
.CreateCall(F
, {Ptr
, Val
, MemOrder
, MemScope
, IsVolatile
});
17591 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn
:
17592 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl
: {
17593 llvm::Value
*Arg
= EmitScalarExpr(E
->getArg(0));
17594 llvm::Type
*ResultType
= ConvertType(E
->getType());
17595 // s_sendmsg_rtn is mangled using return type only.
17597 CGM
.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn
, {ResultType
});
17598 return Builder
.CreateCall(F
, {Arg
});
17605 /// Handle a SystemZ function in which the final argument is a pointer
17606 /// to an int that receives the post-instruction CC value. At the LLVM level
17607 /// this is represented as a function that returns a {result, cc} pair.
17608 static Value
*EmitSystemZIntrinsicWithCC(CodeGenFunction
&CGF
,
17609 unsigned IntrinsicID
,
17610 const CallExpr
*E
) {
17611 unsigned NumArgs
= E
->getNumArgs() - 1;
17612 SmallVector
<Value
*, 8> Args(NumArgs
);
17613 for (unsigned I
= 0; I
< NumArgs
; ++I
)
17614 Args
[I
] = CGF
.EmitScalarExpr(E
->getArg(I
));
17615 Address CCPtr
= CGF
.EmitPointerWithAlignment(E
->getArg(NumArgs
));
17616 Function
*F
= CGF
.CGM
.getIntrinsic(IntrinsicID
);
17617 Value
*Call
= CGF
.Builder
.CreateCall(F
, Args
);
17618 Value
*CC
= CGF
.Builder
.CreateExtractValue(Call
, 1);
17619 CGF
.Builder
.CreateStore(CC
, CCPtr
);
17620 return CGF
.Builder
.CreateExtractValue(Call
, 0);
17623 Value
*CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID
,
17624 const CallExpr
*E
) {
17625 switch (BuiltinID
) {
17626 case SystemZ::BI__builtin_tbegin
: {
17627 Value
*TDB
= EmitScalarExpr(E
->getArg(0));
17628 Value
*Control
= llvm::ConstantInt::get(Int32Ty
, 0xff0c);
17629 Function
*F
= CGM
.getIntrinsic(Intrinsic::s390_tbegin
);
17630 return Builder
.CreateCall(F
, {TDB
, Control
});
17632 case SystemZ::BI__builtin_tbegin_nofloat
: {
17633 Value
*TDB
= EmitScalarExpr(E
->getArg(0));
17634 Value
*Control
= llvm::ConstantInt::get(Int32Ty
, 0xff0c);
17635 Function
*F
= CGM
.getIntrinsic(Intrinsic::s390_tbegin_nofloat
);
17636 return Builder
.CreateCall(F
, {TDB
, Control
});
17638 case SystemZ::BI__builtin_tbeginc
: {
17639 Value
*TDB
= llvm::ConstantPointerNull::get(Int8PtrTy
);
17640 Value
*Control
= llvm::ConstantInt::get(Int32Ty
, 0xff08);
17641 Function
*F
= CGM
.getIntrinsic(Intrinsic::s390_tbeginc
);
17642 return Builder
.CreateCall(F
, {TDB
, Control
});
17644 case SystemZ::BI__builtin_tabort
: {
17645 Value
*Data
= EmitScalarExpr(E
->getArg(0));
17646 Function
*F
= CGM
.getIntrinsic(Intrinsic::s390_tabort
);
17647 return Builder
.CreateCall(F
, Builder
.CreateSExt(Data
, Int64Ty
, "tabort"));
17649 case SystemZ::BI__builtin_non_tx_store
: {
17650 Value
*Address
= EmitScalarExpr(E
->getArg(0));
17651 Value
*Data
= EmitScalarExpr(E
->getArg(1));
17652 Function
*F
= CGM
.getIntrinsic(Intrinsic::s390_ntstg
);
17653 return Builder
.CreateCall(F
, {Data
, Address
});
17656 // Vector builtins. Note that most vector builtins are mapped automatically
17657 // to target-specific LLVM intrinsics. The ones handled specially here can
17658 // be represented via standard LLVM IR, which is preferable to enable common
17659 // LLVM optimizations.
17661 case SystemZ::BI__builtin_s390_vpopctb
:
17662 case SystemZ::BI__builtin_s390_vpopcth
:
17663 case SystemZ::BI__builtin_s390_vpopctf
:
17664 case SystemZ::BI__builtin_s390_vpopctg
: {
17665 llvm::Type
*ResultType
= ConvertType(E
->getType());
17666 Value
*X
= EmitScalarExpr(E
->getArg(0));
17667 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctpop
, ResultType
);
17668 return Builder
.CreateCall(F
, X
);
17671 case SystemZ::BI__builtin_s390_vclzb
:
17672 case SystemZ::BI__builtin_s390_vclzh
:
17673 case SystemZ::BI__builtin_s390_vclzf
:
17674 case SystemZ::BI__builtin_s390_vclzg
: {
17675 llvm::Type
*ResultType
= ConvertType(E
->getType());
17676 Value
*X
= EmitScalarExpr(E
->getArg(0));
17677 Value
*Undef
= ConstantInt::get(Builder
.getInt1Ty(), false);
17678 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctlz
, ResultType
);
17679 return Builder
.CreateCall(F
, {X
, Undef
});
17682 case SystemZ::BI__builtin_s390_vctzb
:
17683 case SystemZ::BI__builtin_s390_vctzh
:
17684 case SystemZ::BI__builtin_s390_vctzf
:
17685 case SystemZ::BI__builtin_s390_vctzg
: {
17686 llvm::Type
*ResultType
= ConvertType(E
->getType());
17687 Value
*X
= EmitScalarExpr(E
->getArg(0));
17688 Value
*Undef
= ConstantInt::get(Builder
.getInt1Ty(), false);
17689 Function
*F
= CGM
.getIntrinsic(Intrinsic::cttz
, ResultType
);
17690 return Builder
.CreateCall(F
, {X
, Undef
});
17693 case SystemZ::BI__builtin_s390_vfsqsb
:
17694 case SystemZ::BI__builtin_s390_vfsqdb
: {
17695 llvm::Type
*ResultType
= ConvertType(E
->getType());
17696 Value
*X
= EmitScalarExpr(E
->getArg(0));
17697 if (Builder
.getIsFPConstrained()) {
17698 Function
*F
= CGM
.getIntrinsic(Intrinsic::experimental_constrained_sqrt
, ResultType
);
17699 return Builder
.CreateConstrainedFPCall(F
, { X
});
17701 Function
*F
= CGM
.getIntrinsic(Intrinsic::sqrt
, ResultType
);
17702 return Builder
.CreateCall(F
, X
);
17705 case SystemZ::BI__builtin_s390_vfmasb
:
17706 case SystemZ::BI__builtin_s390_vfmadb
: {
17707 llvm::Type
*ResultType
= ConvertType(E
->getType());
17708 Value
*X
= EmitScalarExpr(E
->getArg(0));
17709 Value
*Y
= EmitScalarExpr(E
->getArg(1));
17710 Value
*Z
= EmitScalarExpr(E
->getArg(2));
17711 if (Builder
.getIsFPConstrained()) {
17712 Function
*F
= CGM
.getIntrinsic(Intrinsic::experimental_constrained_fma
, ResultType
);
17713 return Builder
.CreateConstrainedFPCall(F
, {X
, Y
, Z
});
17715 Function
*F
= CGM
.getIntrinsic(Intrinsic::fma
, ResultType
);
17716 return Builder
.CreateCall(F
, {X
, Y
, Z
});
17719 case SystemZ::BI__builtin_s390_vfmssb
:
17720 case SystemZ::BI__builtin_s390_vfmsdb
: {
17721 llvm::Type
*ResultType
= ConvertType(E
->getType());
17722 Value
*X
= EmitScalarExpr(E
->getArg(0));
17723 Value
*Y
= EmitScalarExpr(E
->getArg(1));
17724 Value
*Z
= EmitScalarExpr(E
->getArg(2));
17725 if (Builder
.getIsFPConstrained()) {
17726 Function
*F
= CGM
.getIntrinsic(Intrinsic::experimental_constrained_fma
, ResultType
);
17727 return Builder
.CreateConstrainedFPCall(F
, {X
, Y
, Builder
.CreateFNeg(Z
, "neg")});
17729 Function
*F
= CGM
.getIntrinsic(Intrinsic::fma
, ResultType
);
17730 return Builder
.CreateCall(F
, {X
, Y
, Builder
.CreateFNeg(Z
, "neg")});
17733 case SystemZ::BI__builtin_s390_vfnmasb
:
17734 case SystemZ::BI__builtin_s390_vfnmadb
: {
17735 llvm::Type
*ResultType
= ConvertType(E
->getType());
17736 Value
*X
= EmitScalarExpr(E
->getArg(0));
17737 Value
*Y
= EmitScalarExpr(E
->getArg(1));
17738 Value
*Z
= EmitScalarExpr(E
->getArg(2));
17739 if (Builder
.getIsFPConstrained()) {
17740 Function
*F
= CGM
.getIntrinsic(Intrinsic::experimental_constrained_fma
, ResultType
);
17741 return Builder
.CreateFNeg(Builder
.CreateConstrainedFPCall(F
, {X
, Y
, Z
}), "neg");
17743 Function
*F
= CGM
.getIntrinsic(Intrinsic::fma
, ResultType
);
17744 return Builder
.CreateFNeg(Builder
.CreateCall(F
, {X
, Y
, Z
}), "neg");
17747 case SystemZ::BI__builtin_s390_vfnmssb
:
17748 case SystemZ::BI__builtin_s390_vfnmsdb
: {
17749 llvm::Type
*ResultType
= ConvertType(E
->getType());
17750 Value
*X
= EmitScalarExpr(E
->getArg(0));
17751 Value
*Y
= EmitScalarExpr(E
->getArg(1));
17752 Value
*Z
= EmitScalarExpr(E
->getArg(2));
17753 if (Builder
.getIsFPConstrained()) {
17754 Function
*F
= CGM
.getIntrinsic(Intrinsic::experimental_constrained_fma
, ResultType
);
17755 Value
*NegZ
= Builder
.CreateFNeg(Z
, "sub");
17756 return Builder
.CreateFNeg(Builder
.CreateConstrainedFPCall(F
, {X
, Y
, NegZ
}));
17758 Function
*F
= CGM
.getIntrinsic(Intrinsic::fma
, ResultType
);
17759 Value
*NegZ
= Builder
.CreateFNeg(Z
, "neg");
17760 return Builder
.CreateFNeg(Builder
.CreateCall(F
, {X
, Y
, NegZ
}));
17763 case SystemZ::BI__builtin_s390_vflpsb
:
17764 case SystemZ::BI__builtin_s390_vflpdb
: {
17765 llvm::Type
*ResultType
= ConvertType(E
->getType());
17766 Value
*X
= EmitScalarExpr(E
->getArg(0));
17767 Function
*F
= CGM
.getIntrinsic(Intrinsic::fabs
, ResultType
);
17768 return Builder
.CreateCall(F
, X
);
17770 case SystemZ::BI__builtin_s390_vflnsb
:
17771 case SystemZ::BI__builtin_s390_vflndb
: {
17772 llvm::Type
*ResultType
= ConvertType(E
->getType());
17773 Value
*X
= EmitScalarExpr(E
->getArg(0));
17774 Function
*F
= CGM
.getIntrinsic(Intrinsic::fabs
, ResultType
);
17775 return Builder
.CreateFNeg(Builder
.CreateCall(F
, X
), "neg");
17777 case SystemZ::BI__builtin_s390_vfisb
:
17778 case SystemZ::BI__builtin_s390_vfidb
: {
17779 llvm::Type
*ResultType
= ConvertType(E
->getType());
17780 Value
*X
= EmitScalarExpr(E
->getArg(0));
17781 // Constant-fold the M4 and M5 mask arguments.
17782 llvm::APSInt M4
= *E
->getArg(1)->getIntegerConstantExpr(getContext());
17783 llvm::APSInt M5
= *E
->getArg(2)->getIntegerConstantExpr(getContext());
17784 // Check whether this instance can be represented via a LLVM standard
17785 // intrinsic. We only support some combinations of M4 and M5.
17786 Intrinsic::ID ID
= Intrinsic::not_intrinsic
;
17788 switch (M4
.getZExtValue()) {
17790 case 0: // IEEE-inexact exception allowed
17791 switch (M5
.getZExtValue()) {
17793 case 0: ID
= Intrinsic::rint
;
17794 CI
= Intrinsic::experimental_constrained_rint
; break;
17797 case 4: // IEEE-inexact exception suppressed
17798 switch (M5
.getZExtValue()) {
17800 case 0: ID
= Intrinsic::nearbyint
;
17801 CI
= Intrinsic::experimental_constrained_nearbyint
; break;
17802 case 1: ID
= Intrinsic::round
;
17803 CI
= Intrinsic::experimental_constrained_round
; break;
17804 case 5: ID
= Intrinsic::trunc
;
17805 CI
= Intrinsic::experimental_constrained_trunc
; break;
17806 case 6: ID
= Intrinsic::ceil
;
17807 CI
= Intrinsic::experimental_constrained_ceil
; break;
17808 case 7: ID
= Intrinsic::floor
;
17809 CI
= Intrinsic::experimental_constrained_floor
; break;
17813 if (ID
!= Intrinsic::not_intrinsic
) {
17814 if (Builder
.getIsFPConstrained()) {
17815 Function
*F
= CGM
.getIntrinsic(CI
, ResultType
);
17816 return Builder
.CreateConstrainedFPCall(F
, X
);
17818 Function
*F
= CGM
.getIntrinsic(ID
, ResultType
);
17819 return Builder
.CreateCall(F
, X
);
17822 switch (BuiltinID
) { // FIXME: constrained version?
17823 case SystemZ::BI__builtin_s390_vfisb
: ID
= Intrinsic::s390_vfisb
; break;
17824 case SystemZ::BI__builtin_s390_vfidb
: ID
= Intrinsic::s390_vfidb
; break;
17825 default: llvm_unreachable("Unknown BuiltinID");
17827 Function
*F
= CGM
.getIntrinsic(ID
);
17828 Value
*M4Value
= llvm::ConstantInt::get(getLLVMContext(), M4
);
17829 Value
*M5Value
= llvm::ConstantInt::get(getLLVMContext(), M5
);
17830 return Builder
.CreateCall(F
, {X
, M4Value
, M5Value
});
17832 case SystemZ::BI__builtin_s390_vfmaxsb
:
17833 case SystemZ::BI__builtin_s390_vfmaxdb
: {
17834 llvm::Type
*ResultType
= ConvertType(E
->getType());
17835 Value
*X
= EmitScalarExpr(E
->getArg(0));
17836 Value
*Y
= EmitScalarExpr(E
->getArg(1));
17837 // Constant-fold the M4 mask argument.
17838 llvm::APSInt M4
= *E
->getArg(2)->getIntegerConstantExpr(getContext());
17839 // Check whether this instance can be represented via a LLVM standard
17840 // intrinsic. We only support some values of M4.
17841 Intrinsic::ID ID
= Intrinsic::not_intrinsic
;
17843 switch (M4
.getZExtValue()) {
17845 case 4: ID
= Intrinsic::maxnum
;
17846 CI
= Intrinsic::experimental_constrained_maxnum
; break;
17848 if (ID
!= Intrinsic::not_intrinsic
) {
17849 if (Builder
.getIsFPConstrained()) {
17850 Function
*F
= CGM
.getIntrinsic(CI
, ResultType
);
17851 return Builder
.CreateConstrainedFPCall(F
, {X
, Y
});
17853 Function
*F
= CGM
.getIntrinsic(ID
, ResultType
);
17854 return Builder
.CreateCall(F
, {X
, Y
});
17857 switch (BuiltinID
) {
17858 case SystemZ::BI__builtin_s390_vfmaxsb
: ID
= Intrinsic::s390_vfmaxsb
; break;
17859 case SystemZ::BI__builtin_s390_vfmaxdb
: ID
= Intrinsic::s390_vfmaxdb
; break;
17860 default: llvm_unreachable("Unknown BuiltinID");
17862 Function
*F
= CGM
.getIntrinsic(ID
);
17863 Value
*M4Value
= llvm::ConstantInt::get(getLLVMContext(), M4
);
17864 return Builder
.CreateCall(F
, {X
, Y
, M4Value
});
17866 case SystemZ::BI__builtin_s390_vfminsb
:
17867 case SystemZ::BI__builtin_s390_vfmindb
: {
17868 llvm::Type
*ResultType
= ConvertType(E
->getType());
17869 Value
*X
= EmitScalarExpr(E
->getArg(0));
17870 Value
*Y
= EmitScalarExpr(E
->getArg(1));
17871 // Constant-fold the M4 mask argument.
17872 llvm::APSInt M4
= *E
->getArg(2)->getIntegerConstantExpr(getContext());
17873 // Check whether this instance can be represented via a LLVM standard
17874 // intrinsic. We only support some values of M4.
17875 Intrinsic::ID ID
= Intrinsic::not_intrinsic
;
17877 switch (M4
.getZExtValue()) {
17879 case 4: ID
= Intrinsic::minnum
;
17880 CI
= Intrinsic::experimental_constrained_minnum
; break;
17882 if (ID
!= Intrinsic::not_intrinsic
) {
17883 if (Builder
.getIsFPConstrained()) {
17884 Function
*F
= CGM
.getIntrinsic(CI
, ResultType
);
17885 return Builder
.CreateConstrainedFPCall(F
, {X
, Y
});
17887 Function
*F
= CGM
.getIntrinsic(ID
, ResultType
);
17888 return Builder
.CreateCall(F
, {X
, Y
});
17891 switch (BuiltinID
) {
17892 case SystemZ::BI__builtin_s390_vfminsb
: ID
= Intrinsic::s390_vfminsb
; break;
17893 case SystemZ::BI__builtin_s390_vfmindb
: ID
= Intrinsic::s390_vfmindb
; break;
17894 default: llvm_unreachable("Unknown BuiltinID");
17896 Function
*F
= CGM
.getIntrinsic(ID
);
17897 Value
*M4Value
= llvm::ConstantInt::get(getLLVMContext(), M4
);
17898 return Builder
.CreateCall(F
, {X
, Y
, M4Value
});
17901 case SystemZ::BI__builtin_s390_vlbrh
:
17902 case SystemZ::BI__builtin_s390_vlbrf
:
17903 case SystemZ::BI__builtin_s390_vlbrg
: {
17904 llvm::Type
*ResultType
= ConvertType(E
->getType());
17905 Value
*X
= EmitScalarExpr(E
->getArg(0));
17906 Function
*F
= CGM
.getIntrinsic(Intrinsic::bswap
, ResultType
);
17907 return Builder
.CreateCall(F
, X
);
17910 // Vector intrinsics that output the post-instruction CC value.
17912 #define INTRINSIC_WITH_CC(NAME) \
17913 case SystemZ::BI__builtin_##NAME: \
17914 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
17916 INTRINSIC_WITH_CC(s390_vpkshs
);
17917 INTRINSIC_WITH_CC(s390_vpksfs
);
17918 INTRINSIC_WITH_CC(s390_vpksgs
);
17920 INTRINSIC_WITH_CC(s390_vpklshs
);
17921 INTRINSIC_WITH_CC(s390_vpklsfs
);
17922 INTRINSIC_WITH_CC(s390_vpklsgs
);
17924 INTRINSIC_WITH_CC(s390_vceqbs
);
17925 INTRINSIC_WITH_CC(s390_vceqhs
);
17926 INTRINSIC_WITH_CC(s390_vceqfs
);
17927 INTRINSIC_WITH_CC(s390_vceqgs
);
17929 INTRINSIC_WITH_CC(s390_vchbs
);
17930 INTRINSIC_WITH_CC(s390_vchhs
);
17931 INTRINSIC_WITH_CC(s390_vchfs
);
17932 INTRINSIC_WITH_CC(s390_vchgs
);
17934 INTRINSIC_WITH_CC(s390_vchlbs
);
17935 INTRINSIC_WITH_CC(s390_vchlhs
);
17936 INTRINSIC_WITH_CC(s390_vchlfs
);
17937 INTRINSIC_WITH_CC(s390_vchlgs
);
17939 INTRINSIC_WITH_CC(s390_vfaebs
);
17940 INTRINSIC_WITH_CC(s390_vfaehs
);
17941 INTRINSIC_WITH_CC(s390_vfaefs
);
17943 INTRINSIC_WITH_CC(s390_vfaezbs
);
17944 INTRINSIC_WITH_CC(s390_vfaezhs
);
17945 INTRINSIC_WITH_CC(s390_vfaezfs
);
17947 INTRINSIC_WITH_CC(s390_vfeebs
);
17948 INTRINSIC_WITH_CC(s390_vfeehs
);
17949 INTRINSIC_WITH_CC(s390_vfeefs
);
17951 INTRINSIC_WITH_CC(s390_vfeezbs
);
17952 INTRINSIC_WITH_CC(s390_vfeezhs
);
17953 INTRINSIC_WITH_CC(s390_vfeezfs
);
17955 INTRINSIC_WITH_CC(s390_vfenebs
);
17956 INTRINSIC_WITH_CC(s390_vfenehs
);
17957 INTRINSIC_WITH_CC(s390_vfenefs
);
17959 INTRINSIC_WITH_CC(s390_vfenezbs
);
17960 INTRINSIC_WITH_CC(s390_vfenezhs
);
17961 INTRINSIC_WITH_CC(s390_vfenezfs
);
17963 INTRINSIC_WITH_CC(s390_vistrbs
);
17964 INTRINSIC_WITH_CC(s390_vistrhs
);
17965 INTRINSIC_WITH_CC(s390_vistrfs
);
17967 INTRINSIC_WITH_CC(s390_vstrcbs
);
17968 INTRINSIC_WITH_CC(s390_vstrchs
);
17969 INTRINSIC_WITH_CC(s390_vstrcfs
);
17971 INTRINSIC_WITH_CC(s390_vstrczbs
);
17972 INTRINSIC_WITH_CC(s390_vstrczhs
);
17973 INTRINSIC_WITH_CC(s390_vstrczfs
);
17975 INTRINSIC_WITH_CC(s390_vfcesbs
);
17976 INTRINSIC_WITH_CC(s390_vfcedbs
);
17977 INTRINSIC_WITH_CC(s390_vfchsbs
);
17978 INTRINSIC_WITH_CC(s390_vfchdbs
);
17979 INTRINSIC_WITH_CC(s390_vfchesbs
);
17980 INTRINSIC_WITH_CC(s390_vfchedbs
);
17982 INTRINSIC_WITH_CC(s390_vftcisb
);
17983 INTRINSIC_WITH_CC(s390_vftcidb
);
17985 INTRINSIC_WITH_CC(s390_vstrsb
);
17986 INTRINSIC_WITH_CC(s390_vstrsh
);
17987 INTRINSIC_WITH_CC(s390_vstrsf
);
17989 INTRINSIC_WITH_CC(s390_vstrszb
);
17990 INTRINSIC_WITH_CC(s390_vstrszh
);
17991 INTRINSIC_WITH_CC(s390_vstrszf
);
17993 #undef INTRINSIC_WITH_CC
18001 // Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
18002 struct NVPTXMmaLdstInfo
{
18003 unsigned NumResults
; // Number of elements to load/store
18004 // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
18009 #define MMA_INTR(geom_op_type, layout) \
18010 Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
18011 #define MMA_LDST(n, geom_op_type) \
18012 { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
18014 static NVPTXMmaLdstInfo
getNVPTXMmaLdstInfo(unsigned BuiltinID
) {
18015 switch (BuiltinID
) {
18017 case NVPTX::BI__hmma_m16n16k16_ld_a
:
18018 return MMA_LDST(8, m16n16k16_load_a_f16
);
18019 case NVPTX::BI__hmma_m16n16k16_ld_b
:
18020 return MMA_LDST(8, m16n16k16_load_b_f16
);
18021 case NVPTX::BI__hmma_m16n16k16_ld_c_f16
:
18022 return MMA_LDST(4, m16n16k16_load_c_f16
);
18023 case NVPTX::BI__hmma_m16n16k16_ld_c_f32
:
18024 return MMA_LDST(8, m16n16k16_load_c_f32
);
18025 case NVPTX::BI__hmma_m32n8k16_ld_a
:
18026 return MMA_LDST(8, m32n8k16_load_a_f16
);
18027 case NVPTX::BI__hmma_m32n8k16_ld_b
:
18028 return MMA_LDST(8, m32n8k16_load_b_f16
);
18029 case NVPTX::BI__hmma_m32n8k16_ld_c_f16
:
18030 return MMA_LDST(4, m32n8k16_load_c_f16
);
18031 case NVPTX::BI__hmma_m32n8k16_ld_c_f32
:
18032 return MMA_LDST(8, m32n8k16_load_c_f32
);
18033 case NVPTX::BI__hmma_m8n32k16_ld_a
:
18034 return MMA_LDST(8, m8n32k16_load_a_f16
);
18035 case NVPTX::BI__hmma_m8n32k16_ld_b
:
18036 return MMA_LDST(8, m8n32k16_load_b_f16
);
18037 case NVPTX::BI__hmma_m8n32k16_ld_c_f16
:
18038 return MMA_LDST(4, m8n32k16_load_c_f16
);
18039 case NVPTX::BI__hmma_m8n32k16_ld_c_f32
:
18040 return MMA_LDST(8, m8n32k16_load_c_f32
);
18042 // Integer MMA loads
18043 case NVPTX::BI__imma_m16n16k16_ld_a_s8
:
18044 return MMA_LDST(2, m16n16k16_load_a_s8
);
18045 case NVPTX::BI__imma_m16n16k16_ld_a_u8
:
18046 return MMA_LDST(2, m16n16k16_load_a_u8
);
18047 case NVPTX::BI__imma_m16n16k16_ld_b_s8
:
18048 return MMA_LDST(2, m16n16k16_load_b_s8
);
18049 case NVPTX::BI__imma_m16n16k16_ld_b_u8
:
18050 return MMA_LDST(2, m16n16k16_load_b_u8
);
18051 case NVPTX::BI__imma_m16n16k16_ld_c
:
18052 return MMA_LDST(8, m16n16k16_load_c_s32
);
18053 case NVPTX::BI__imma_m32n8k16_ld_a_s8
:
18054 return MMA_LDST(4, m32n8k16_load_a_s8
);
18055 case NVPTX::BI__imma_m32n8k16_ld_a_u8
:
18056 return MMA_LDST(4, m32n8k16_load_a_u8
);
18057 case NVPTX::BI__imma_m32n8k16_ld_b_s8
:
18058 return MMA_LDST(1, m32n8k16_load_b_s8
);
18059 case NVPTX::BI__imma_m32n8k16_ld_b_u8
:
18060 return MMA_LDST(1, m32n8k16_load_b_u8
);
18061 case NVPTX::BI__imma_m32n8k16_ld_c
:
18062 return MMA_LDST(8, m32n8k16_load_c_s32
);
18063 case NVPTX::BI__imma_m8n32k16_ld_a_s8
:
18064 return MMA_LDST(1, m8n32k16_load_a_s8
);
18065 case NVPTX::BI__imma_m8n32k16_ld_a_u8
:
18066 return MMA_LDST(1, m8n32k16_load_a_u8
);
18067 case NVPTX::BI__imma_m8n32k16_ld_b_s8
:
18068 return MMA_LDST(4, m8n32k16_load_b_s8
);
18069 case NVPTX::BI__imma_m8n32k16_ld_b_u8
:
18070 return MMA_LDST(4, m8n32k16_load_b_u8
);
18071 case NVPTX::BI__imma_m8n32k16_ld_c
:
18072 return MMA_LDST(8, m8n32k16_load_c_s32
);
18074 // Sub-integer MMA loads.
18075 // Only row/col layout is supported by A/B fragments.
18076 case NVPTX::BI__imma_m8n8k32_ld_a_s4
:
18077 return {1, 0, MMA_INTR(m8n8k32_load_a_s4
, row
)};
18078 case NVPTX::BI__imma_m8n8k32_ld_a_u4
:
18079 return {1, 0, MMA_INTR(m8n8k32_load_a_u4
, row
)};
18080 case NVPTX::BI__imma_m8n8k32_ld_b_s4
:
18081 return {1, MMA_INTR(m8n8k32_load_b_s4
, col
), 0};
18082 case NVPTX::BI__imma_m8n8k32_ld_b_u4
:
18083 return {1, MMA_INTR(m8n8k32_load_b_u4
, col
), 0};
18084 case NVPTX::BI__imma_m8n8k32_ld_c
:
18085 return MMA_LDST(2, m8n8k32_load_c_s32
);
18086 case NVPTX::BI__bmma_m8n8k128_ld_a_b1
:
18087 return {1, 0, MMA_INTR(m8n8k128_load_a_b1
, row
)};
18088 case NVPTX::BI__bmma_m8n8k128_ld_b_b1
:
18089 return {1, MMA_INTR(m8n8k128_load_b_b1
, col
), 0};
18090 case NVPTX::BI__bmma_m8n8k128_ld_c
:
18091 return MMA_LDST(2, m8n8k128_load_c_s32
);
18093 // Double MMA loads
18094 case NVPTX::BI__dmma_m8n8k4_ld_a
:
18095 return MMA_LDST(1, m8n8k4_load_a_f64
);
18096 case NVPTX::BI__dmma_m8n8k4_ld_b
:
18097 return MMA_LDST(1, m8n8k4_load_b_f64
);
18098 case NVPTX::BI__dmma_m8n8k4_ld_c
:
18099 return MMA_LDST(2, m8n8k4_load_c_f64
);
18101 // Alternate float MMA loads
18102 case NVPTX::BI__mma_bf16_m16n16k16_ld_a
:
18103 return MMA_LDST(4, m16n16k16_load_a_bf16
);
18104 case NVPTX::BI__mma_bf16_m16n16k16_ld_b
:
18105 return MMA_LDST(4, m16n16k16_load_b_bf16
);
18106 case NVPTX::BI__mma_bf16_m8n32k16_ld_a
:
18107 return MMA_LDST(2, m8n32k16_load_a_bf16
);
18108 case NVPTX::BI__mma_bf16_m8n32k16_ld_b
:
18109 return MMA_LDST(8, m8n32k16_load_b_bf16
);
18110 case NVPTX::BI__mma_bf16_m32n8k16_ld_a
:
18111 return MMA_LDST(8, m32n8k16_load_a_bf16
);
18112 case NVPTX::BI__mma_bf16_m32n8k16_ld_b
:
18113 return MMA_LDST(2, m32n8k16_load_b_bf16
);
18114 case NVPTX::BI__mma_tf32_m16n16k8_ld_a
:
18115 return MMA_LDST(4, m16n16k8_load_a_tf32
);
18116 case NVPTX::BI__mma_tf32_m16n16k8_ld_b
:
18117 return MMA_LDST(4, m16n16k8_load_b_tf32
);
18118 case NVPTX::BI__mma_tf32_m16n16k8_ld_c
:
18119 return MMA_LDST(8, m16n16k8_load_c_f32
);
18121 // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike
18122 // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
18123 // use fragment C for both loads and stores.
18125 case NVPTX::BI__hmma_m16n16k16_st_c_f16
:
18126 return MMA_LDST(4, m16n16k16_store_d_f16
);
18127 case NVPTX::BI__hmma_m16n16k16_st_c_f32
:
18128 return MMA_LDST(8, m16n16k16_store_d_f32
);
18129 case NVPTX::BI__hmma_m32n8k16_st_c_f16
:
18130 return MMA_LDST(4, m32n8k16_store_d_f16
);
18131 case NVPTX::BI__hmma_m32n8k16_st_c_f32
:
18132 return MMA_LDST(8, m32n8k16_store_d_f32
);
18133 case NVPTX::BI__hmma_m8n32k16_st_c_f16
:
18134 return MMA_LDST(4, m8n32k16_store_d_f16
);
18135 case NVPTX::BI__hmma_m8n32k16_st_c_f32
:
18136 return MMA_LDST(8, m8n32k16_store_d_f32
);
18138 // Integer and sub-integer MMA stores.
18139 // Another naming quirk. Unlike other MMA builtins that use PTX types in the
18140 // name, integer loads/stores use LLVM's i32.
18141 case NVPTX::BI__imma_m16n16k16_st_c_i32
:
18142 return MMA_LDST(8, m16n16k16_store_d_s32
);
18143 case NVPTX::BI__imma_m32n8k16_st_c_i32
:
18144 return MMA_LDST(8, m32n8k16_store_d_s32
);
18145 case NVPTX::BI__imma_m8n32k16_st_c_i32
:
18146 return MMA_LDST(8, m8n32k16_store_d_s32
);
18147 case NVPTX::BI__imma_m8n8k32_st_c_i32
:
18148 return MMA_LDST(2, m8n8k32_store_d_s32
);
18149 case NVPTX::BI__bmma_m8n8k128_st_c_i32
:
18150 return MMA_LDST(2, m8n8k128_store_d_s32
);
18152 // Double MMA store
18153 case NVPTX::BI__dmma_m8n8k4_st_c_f64
:
18154 return MMA_LDST(2, m8n8k4_store_d_f64
);
18156 // Alternate float MMA store
18157 case NVPTX::BI__mma_m16n16k8_st_c_f32
:
18158 return MMA_LDST(8, m16n16k8_store_d_f32
);
18161 llvm_unreachable("Unknown MMA builtin");
18168 struct NVPTXMmaInfo
{
18174 // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority
18175 // over 'col' for layout. The index of non-satf variants is expected to match
18176 // the undocumented layout constants used by CUDA's mma.hpp.
18177 std::array
<unsigned, 8> Variants
;
18179 unsigned getMMAIntrinsic(int Layout
, bool Satf
) {
18180 unsigned Index
= Layout
+ 4 * Satf
;
18181 if (Index
>= Variants
.size())
18183 return Variants
[Index
];
18187 // Returns an intrinsic that matches Layout and Satf for valid combinations of
18188 // Layout and Satf, 0 otherwise.
18189 static NVPTXMmaInfo
getNVPTXMmaInfo(unsigned BuiltinID
) {
18190 // clang-format off
18191 #define MMA_VARIANTS(geom, type) \
18192 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
18193 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
18194 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
18195 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
18196 #define MMA_SATF_VARIANTS(geom, type) \
18197 MMA_VARIANTS(geom, type), \
18198 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
18199 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
18200 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
18201 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
18202 // Sub-integer MMA only supports row.col layout.
18203 #define MMA_VARIANTS_I4(geom, type) \
18205 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
18209 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
18212 // b1 MMA does not support .satfinite.
18213 #define MMA_VARIANTS_B1_XOR(geom, type) \
18215 Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \
18222 #define MMA_VARIANTS_B1_AND(geom, type) \
18224 Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \
18232 switch (BuiltinID
) {
18234 // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while
18235 // NumEltsN of return value are ordered as A,B,C,D.
18236 case NVPTX::BI__hmma_m16n16k16_mma_f16f16
:
18237 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16
, f16_f16
)}}};
18238 case NVPTX::BI__hmma_m16n16k16_mma_f32f16
:
18239 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16
, f32_f16
)}}};
18240 case NVPTX::BI__hmma_m16n16k16_mma_f16f32
:
18241 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16
, f16_f32
)}}};
18242 case NVPTX::BI__hmma_m16n16k16_mma_f32f32
:
18243 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16
, f32_f32
)}}};
18244 case NVPTX::BI__hmma_m32n8k16_mma_f16f16
:
18245 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16
, f16_f16
)}}};
18246 case NVPTX::BI__hmma_m32n8k16_mma_f32f16
:
18247 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16
, f32_f16
)}}};
18248 case NVPTX::BI__hmma_m32n8k16_mma_f16f32
:
18249 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16
, f16_f32
)}}};
18250 case NVPTX::BI__hmma_m32n8k16_mma_f32f32
:
18251 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16
, f32_f32
)}}};
18252 case NVPTX::BI__hmma_m8n32k16_mma_f16f16
:
18253 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16
, f16_f16
)}}};
18254 case NVPTX::BI__hmma_m8n32k16_mma_f32f16
:
18255 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16
, f32_f16
)}}};
18256 case NVPTX::BI__hmma_m8n32k16_mma_f16f32
:
18257 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16
, f16_f32
)}}};
18258 case NVPTX::BI__hmma_m8n32k16_mma_f32f32
:
18259 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16
, f32_f32
)}}};
18262 case NVPTX::BI__imma_m16n16k16_mma_s8
:
18263 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16
, s8
)}}};
18264 case NVPTX::BI__imma_m16n16k16_mma_u8
:
18265 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16
, u8
)}}};
18266 case NVPTX::BI__imma_m32n8k16_mma_s8
:
18267 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16
, s8
)}}};
18268 case NVPTX::BI__imma_m32n8k16_mma_u8
:
18269 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16
, u8
)}}};
18270 case NVPTX::BI__imma_m8n32k16_mma_s8
:
18271 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16
, s8
)}}};
18272 case NVPTX::BI__imma_m8n32k16_mma_u8
:
18273 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16
, u8
)}}};
18276 case NVPTX::BI__imma_m8n8k32_mma_s4
:
18277 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32
, s4
)}}};
18278 case NVPTX::BI__imma_m8n8k32_mma_u4
:
18279 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32
, u4
)}}};
18280 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1
:
18281 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128
, b1
)}}};
18282 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1
:
18283 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128
, b1
)}}};
18286 case NVPTX::BI__dmma_m8n8k4_mma_f64
:
18287 return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4
, f64
)}}};
18289 // Alternate FP MMA
18290 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32
:
18291 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16
, bf16
)}}};
18292 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32
:
18293 return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16
, bf16
)}}};
18294 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32
:
18295 return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16
, bf16
)}}};
18296 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32
:
18297 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8
, tf32
)}}};
18299 llvm_unreachable("Unexpected builtin ID.");
18301 #undef MMA_VARIANTS
18302 #undef MMA_SATF_VARIANTS
18303 #undef MMA_VARIANTS_I4
18304 #undef MMA_VARIANTS_B1_AND
18305 #undef MMA_VARIANTS_B1_XOR
18308 static Value
*MakeLdgLdu(unsigned IntrinsicID
, CodeGenFunction
&CGF
,
18309 const CallExpr
*E
) {
18310 Value
*Ptr
= CGF
.EmitScalarExpr(E
->getArg(0));
18311 QualType ArgType
= E
->getArg(0)->getType();
18312 clang::CharUnits Align
= CGF
.CGM
.getNaturalPointeeTypeAlignment(ArgType
);
18313 llvm::Type
*ElemTy
= CGF
.ConvertTypeForMem(ArgType
->getPointeeType());
18314 return CGF
.Builder
.CreateCall(
18315 CGF
.CGM
.getIntrinsic(IntrinsicID
, {ElemTy
, Ptr
->getType()}),
18316 {Ptr
, ConstantInt::get(CGF
.Builder
.getInt32Ty(), Align
.getQuantity())});
18319 static Value
*MakeScopedAtomic(unsigned IntrinsicID
, CodeGenFunction
&CGF
,
18320 const CallExpr
*E
) {
18321 Value
*Ptr
= CGF
.EmitScalarExpr(E
->getArg(0));
18322 llvm::Type
*ElemTy
=
18323 CGF
.ConvertTypeForMem(E
->getArg(0)->getType()->getPointeeType());
18324 return CGF
.Builder
.CreateCall(
18325 CGF
.CGM
.getIntrinsic(IntrinsicID
, {ElemTy
, Ptr
->getType()}),
18326 {Ptr
, CGF
.EmitScalarExpr(E
->getArg(1))});
18329 static Value
*MakeCpAsync(unsigned IntrinsicID
, unsigned IntrinsicIDS
,
18330 CodeGenFunction
&CGF
, const CallExpr
*E
,
18332 return E
->getNumArgs() == 3
18333 ? CGF
.Builder
.CreateCall(CGF
.CGM
.getIntrinsic(IntrinsicIDS
),
18334 {CGF
.EmitScalarExpr(E
->getArg(0)),
18335 CGF
.EmitScalarExpr(E
->getArg(1)),
18336 CGF
.EmitScalarExpr(E
->getArg(2))})
18337 : CGF
.Builder
.CreateCall(CGF
.CGM
.getIntrinsic(IntrinsicID
),
18338 {CGF
.EmitScalarExpr(E
->getArg(0)),
18339 CGF
.EmitScalarExpr(E
->getArg(1))});
18342 static Value
*MakeHalfType(unsigned IntrinsicID
, unsigned BuiltinID
,
18343 const CallExpr
*E
, CodeGenFunction
&CGF
) {
18344 auto &C
= CGF
.CGM
.getContext();
18345 if (!(C
.getLangOpts().NativeHalfType
||
18346 !C
.getTargetInfo().useFP16ConversionIntrinsics())) {
18347 CGF
.CGM
.Error(E
->getExprLoc(), C
.BuiltinInfo
.getName(BuiltinID
).str() +
18348 " requires native half type support.");
18352 if (IntrinsicID
== Intrinsic::nvvm_ldg_global_f
||
18353 IntrinsicID
== Intrinsic::nvvm_ldu_global_f
)
18354 return MakeLdgLdu(IntrinsicID
, CGF
, E
);
18356 SmallVector
<Value
*, 16> Args
;
18357 auto *F
= CGF
.CGM
.getIntrinsic(IntrinsicID
);
18358 auto *FTy
= F
->getFunctionType();
18359 unsigned ICEArguments
= 0;
18360 ASTContext::GetBuiltinTypeError Error
;
18361 C
.GetBuiltinType(BuiltinID
, Error
, &ICEArguments
);
18362 assert(Error
== ASTContext::GE_None
&& "Should not codegen an error");
18363 for (unsigned i
= 0, e
= E
->getNumArgs(); i
!= e
; ++i
) {
18364 assert((ICEArguments
& (1 << i
)) == 0);
18365 auto *ArgValue
= CGF
.EmitScalarExpr(E
->getArg(i
));
18366 auto *PTy
= FTy
->getParamType(i
);
18367 if (PTy
!= ArgValue
->getType())
18368 ArgValue
= CGF
.Builder
.CreateBitCast(ArgValue
, PTy
);
18369 Args
.push_back(ArgValue
);
18372 return CGF
.Builder
.CreateCall(F
, Args
);
18376 Value
*CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID
,
18377 const CallExpr
*E
) {
18378 switch (BuiltinID
) {
18379 case NVPTX::BI__nvvm_atom_add_gen_i
:
18380 case NVPTX::BI__nvvm_atom_add_gen_l
:
18381 case NVPTX::BI__nvvm_atom_add_gen_ll
:
18382 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add
, E
);
18384 case NVPTX::BI__nvvm_atom_sub_gen_i
:
18385 case NVPTX::BI__nvvm_atom_sub_gen_l
:
18386 case NVPTX::BI__nvvm_atom_sub_gen_ll
:
18387 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub
, E
);
18389 case NVPTX::BI__nvvm_atom_and_gen_i
:
18390 case NVPTX::BI__nvvm_atom_and_gen_l
:
18391 case NVPTX::BI__nvvm_atom_and_gen_ll
:
18392 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And
, E
);
18394 case NVPTX::BI__nvvm_atom_or_gen_i
:
18395 case NVPTX::BI__nvvm_atom_or_gen_l
:
18396 case NVPTX::BI__nvvm_atom_or_gen_ll
:
18397 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or
, E
);
18399 case NVPTX::BI__nvvm_atom_xor_gen_i
:
18400 case NVPTX::BI__nvvm_atom_xor_gen_l
:
18401 case NVPTX::BI__nvvm_atom_xor_gen_ll
:
18402 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor
, E
);
18404 case NVPTX::BI__nvvm_atom_xchg_gen_i
:
18405 case NVPTX::BI__nvvm_atom_xchg_gen_l
:
18406 case NVPTX::BI__nvvm_atom_xchg_gen_ll
:
18407 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg
, E
);
18409 case NVPTX::BI__nvvm_atom_max_gen_i
:
18410 case NVPTX::BI__nvvm_atom_max_gen_l
:
18411 case NVPTX::BI__nvvm_atom_max_gen_ll
:
18412 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max
, E
);
18414 case NVPTX::BI__nvvm_atom_max_gen_ui
:
18415 case NVPTX::BI__nvvm_atom_max_gen_ul
:
18416 case NVPTX::BI__nvvm_atom_max_gen_ull
:
18417 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax
, E
);
18419 case NVPTX::BI__nvvm_atom_min_gen_i
:
18420 case NVPTX::BI__nvvm_atom_min_gen_l
:
18421 case NVPTX::BI__nvvm_atom_min_gen_ll
:
18422 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min
, E
);
18424 case NVPTX::BI__nvvm_atom_min_gen_ui
:
18425 case NVPTX::BI__nvvm_atom_min_gen_ul
:
18426 case NVPTX::BI__nvvm_atom_min_gen_ull
:
18427 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin
, E
);
18429 case NVPTX::BI__nvvm_atom_cas_gen_i
:
18430 case NVPTX::BI__nvvm_atom_cas_gen_l
:
18431 case NVPTX::BI__nvvm_atom_cas_gen_ll
:
18432 // __nvvm_atom_cas_gen_* should return the old value rather than the
18434 return MakeAtomicCmpXchgValue(*this, E
, /*ReturnBool=*/false);
18436 case NVPTX::BI__nvvm_atom_add_gen_f
:
18437 case NVPTX::BI__nvvm_atom_add_gen_d
: {
18438 Value
*Ptr
= EmitScalarExpr(E
->getArg(0));
18439 Value
*Val
= EmitScalarExpr(E
->getArg(1));
18440 return Builder
.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd
, Ptr
, Val
,
18441 AtomicOrdering::SequentiallyConsistent
);
18444 case NVPTX::BI__nvvm_atom_inc_gen_ui
: {
18445 Value
*Ptr
= EmitScalarExpr(E
->getArg(0));
18446 Value
*Val
= EmitScalarExpr(E
->getArg(1));
18447 Function
*FnALI32
=
18448 CGM
.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32
, Ptr
->getType());
18449 return Builder
.CreateCall(FnALI32
, {Ptr
, Val
});
18452 case NVPTX::BI__nvvm_atom_dec_gen_ui
: {
18453 Value
*Ptr
= EmitScalarExpr(E
->getArg(0));
18454 Value
*Val
= EmitScalarExpr(E
->getArg(1));
18455 Function
*FnALD32
=
18456 CGM
.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32
, Ptr
->getType());
18457 return Builder
.CreateCall(FnALD32
, {Ptr
, Val
});
18460 case NVPTX::BI__nvvm_ldg_c
:
18461 case NVPTX::BI__nvvm_ldg_sc
:
18462 case NVPTX::BI__nvvm_ldg_c2
:
18463 case NVPTX::BI__nvvm_ldg_sc2
:
18464 case NVPTX::BI__nvvm_ldg_c4
:
18465 case NVPTX::BI__nvvm_ldg_sc4
:
18466 case NVPTX::BI__nvvm_ldg_s
:
18467 case NVPTX::BI__nvvm_ldg_s2
:
18468 case NVPTX::BI__nvvm_ldg_s4
:
18469 case NVPTX::BI__nvvm_ldg_i
:
18470 case NVPTX::BI__nvvm_ldg_i2
:
18471 case NVPTX::BI__nvvm_ldg_i4
:
18472 case NVPTX::BI__nvvm_ldg_l
:
18473 case NVPTX::BI__nvvm_ldg_l2
:
18474 case NVPTX::BI__nvvm_ldg_ll
:
18475 case NVPTX::BI__nvvm_ldg_ll2
:
18476 case NVPTX::BI__nvvm_ldg_uc
:
18477 case NVPTX::BI__nvvm_ldg_uc2
:
18478 case NVPTX::BI__nvvm_ldg_uc4
:
18479 case NVPTX::BI__nvvm_ldg_us
:
18480 case NVPTX::BI__nvvm_ldg_us2
:
18481 case NVPTX::BI__nvvm_ldg_us4
:
18482 case NVPTX::BI__nvvm_ldg_ui
:
18483 case NVPTX::BI__nvvm_ldg_ui2
:
18484 case NVPTX::BI__nvvm_ldg_ui4
:
18485 case NVPTX::BI__nvvm_ldg_ul
:
18486 case NVPTX::BI__nvvm_ldg_ul2
:
18487 case NVPTX::BI__nvvm_ldg_ull
:
18488 case NVPTX::BI__nvvm_ldg_ull2
:
18489 // PTX Interoperability section 2.2: "For a vector with an even number of
18490 // elements, its alignment is set to number of elements times the alignment
18491 // of its member: n*alignof(t)."
18492 return MakeLdgLdu(Intrinsic::nvvm_ldg_global_i
, *this, E
);
18493 case NVPTX::BI__nvvm_ldg_f
:
18494 case NVPTX::BI__nvvm_ldg_f2
:
18495 case NVPTX::BI__nvvm_ldg_f4
:
18496 case NVPTX::BI__nvvm_ldg_d
:
18497 case NVPTX::BI__nvvm_ldg_d2
:
18498 return MakeLdgLdu(Intrinsic::nvvm_ldg_global_f
, *this, E
);
18500 case NVPTX::BI__nvvm_ldu_c
:
18501 case NVPTX::BI__nvvm_ldu_sc
:
18502 case NVPTX::BI__nvvm_ldu_c2
:
18503 case NVPTX::BI__nvvm_ldu_sc2
:
18504 case NVPTX::BI__nvvm_ldu_c4
:
18505 case NVPTX::BI__nvvm_ldu_sc4
:
18506 case NVPTX::BI__nvvm_ldu_s
:
18507 case NVPTX::BI__nvvm_ldu_s2
:
18508 case NVPTX::BI__nvvm_ldu_s4
:
18509 case NVPTX::BI__nvvm_ldu_i
:
18510 case NVPTX::BI__nvvm_ldu_i2
:
18511 case NVPTX::BI__nvvm_ldu_i4
:
18512 case NVPTX::BI__nvvm_ldu_l
:
18513 case NVPTX::BI__nvvm_ldu_l2
:
18514 case NVPTX::BI__nvvm_ldu_ll
:
18515 case NVPTX::BI__nvvm_ldu_ll2
:
18516 case NVPTX::BI__nvvm_ldu_uc
:
18517 case NVPTX::BI__nvvm_ldu_uc2
:
18518 case NVPTX::BI__nvvm_ldu_uc4
:
18519 case NVPTX::BI__nvvm_ldu_us
:
18520 case NVPTX::BI__nvvm_ldu_us2
:
18521 case NVPTX::BI__nvvm_ldu_us4
:
18522 case NVPTX::BI__nvvm_ldu_ui
:
18523 case NVPTX::BI__nvvm_ldu_ui2
:
18524 case NVPTX::BI__nvvm_ldu_ui4
:
18525 case NVPTX::BI__nvvm_ldu_ul
:
18526 case NVPTX::BI__nvvm_ldu_ul2
:
18527 case NVPTX::BI__nvvm_ldu_ull
:
18528 case NVPTX::BI__nvvm_ldu_ull2
:
18529 return MakeLdgLdu(Intrinsic::nvvm_ldu_global_i
, *this, E
);
18530 case NVPTX::BI__nvvm_ldu_f
:
18531 case NVPTX::BI__nvvm_ldu_f2
:
18532 case NVPTX::BI__nvvm_ldu_f4
:
18533 case NVPTX::BI__nvvm_ldu_d
:
18534 case NVPTX::BI__nvvm_ldu_d2
:
18535 return MakeLdgLdu(Intrinsic::nvvm_ldu_global_f
, *this, E
);
18537 case NVPTX::BI__nvvm_atom_cta_add_gen_i
:
18538 case NVPTX::BI__nvvm_atom_cta_add_gen_l
:
18539 case NVPTX::BI__nvvm_atom_cta_add_gen_ll
:
18540 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta
, *this, E
);
18541 case NVPTX::BI__nvvm_atom_sys_add_gen_i
:
18542 case NVPTX::BI__nvvm_atom_sys_add_gen_l
:
18543 case NVPTX::BI__nvvm_atom_sys_add_gen_ll
:
18544 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys
, *this, E
);
18545 case NVPTX::BI__nvvm_atom_cta_add_gen_f
:
18546 case NVPTX::BI__nvvm_atom_cta_add_gen_d
:
18547 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta
, *this, E
);
18548 case NVPTX::BI__nvvm_atom_sys_add_gen_f
:
18549 case NVPTX::BI__nvvm_atom_sys_add_gen_d
:
18550 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys
, *this, E
);
18551 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i
:
18552 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l
:
18553 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll
:
18554 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta
, *this, E
);
18555 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i
:
18556 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l
:
18557 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll
:
18558 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys
, *this, E
);
18559 case NVPTX::BI__nvvm_atom_cta_max_gen_i
:
18560 case NVPTX::BI__nvvm_atom_cta_max_gen_ui
:
18561 case NVPTX::BI__nvvm_atom_cta_max_gen_l
:
18562 case NVPTX::BI__nvvm_atom_cta_max_gen_ul
:
18563 case NVPTX::BI__nvvm_atom_cta_max_gen_ll
:
18564 case NVPTX::BI__nvvm_atom_cta_max_gen_ull
:
18565 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta
, *this, E
);
18566 case NVPTX::BI__nvvm_atom_sys_max_gen_i
:
18567 case NVPTX::BI__nvvm_atom_sys_max_gen_ui
:
18568 case NVPTX::BI__nvvm_atom_sys_max_gen_l
:
18569 case NVPTX::BI__nvvm_atom_sys_max_gen_ul
:
18570 case NVPTX::BI__nvvm_atom_sys_max_gen_ll
:
18571 case NVPTX::BI__nvvm_atom_sys_max_gen_ull
:
18572 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys
, *this, E
);
18573 case NVPTX::BI__nvvm_atom_cta_min_gen_i
:
18574 case NVPTX::BI__nvvm_atom_cta_min_gen_ui
:
18575 case NVPTX::BI__nvvm_atom_cta_min_gen_l
:
18576 case NVPTX::BI__nvvm_atom_cta_min_gen_ul
:
18577 case NVPTX::BI__nvvm_atom_cta_min_gen_ll
:
18578 case NVPTX::BI__nvvm_atom_cta_min_gen_ull
:
18579 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta
, *this, E
);
18580 case NVPTX::BI__nvvm_atom_sys_min_gen_i
:
18581 case NVPTX::BI__nvvm_atom_sys_min_gen_ui
:
18582 case NVPTX::BI__nvvm_atom_sys_min_gen_l
:
18583 case NVPTX::BI__nvvm_atom_sys_min_gen_ul
:
18584 case NVPTX::BI__nvvm_atom_sys_min_gen_ll
:
18585 case NVPTX::BI__nvvm_atom_sys_min_gen_ull
:
18586 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys
, *this, E
);
18587 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui
:
18588 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta
, *this, E
);
18589 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui
:
18590 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta
, *this, E
);
18591 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui
:
18592 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys
, *this, E
);
18593 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui
:
18594 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys
, *this, E
);
18595 case NVPTX::BI__nvvm_atom_cta_and_gen_i
:
18596 case NVPTX::BI__nvvm_atom_cta_and_gen_l
:
18597 case NVPTX::BI__nvvm_atom_cta_and_gen_ll
:
18598 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta
, *this, E
);
18599 case NVPTX::BI__nvvm_atom_sys_and_gen_i
:
18600 case NVPTX::BI__nvvm_atom_sys_and_gen_l
:
18601 case NVPTX::BI__nvvm_atom_sys_and_gen_ll
:
18602 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys
, *this, E
);
18603 case NVPTX::BI__nvvm_atom_cta_or_gen_i
:
18604 case NVPTX::BI__nvvm_atom_cta_or_gen_l
:
18605 case NVPTX::BI__nvvm_atom_cta_or_gen_ll
:
18606 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta
, *this, E
);
18607 case NVPTX::BI__nvvm_atom_sys_or_gen_i
:
18608 case NVPTX::BI__nvvm_atom_sys_or_gen_l
:
18609 case NVPTX::BI__nvvm_atom_sys_or_gen_ll
:
18610 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys
, *this, E
);
18611 case NVPTX::BI__nvvm_atom_cta_xor_gen_i
:
18612 case NVPTX::BI__nvvm_atom_cta_xor_gen_l
:
18613 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll
:
18614 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta
, *this, E
);
18615 case NVPTX::BI__nvvm_atom_sys_xor_gen_i
:
18616 case NVPTX::BI__nvvm_atom_sys_xor_gen_l
:
18617 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll
:
18618 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys
, *this, E
);
18619 case NVPTX::BI__nvvm_atom_cta_cas_gen_i
:
18620 case NVPTX::BI__nvvm_atom_cta_cas_gen_l
:
18621 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll
: {
18622 Value
*Ptr
= EmitScalarExpr(E
->getArg(0));
18623 llvm::Type
*ElemTy
=
18624 ConvertTypeForMem(E
->getArg(0)->getType()->getPointeeType());
18625 return Builder
.CreateCall(
18627 Intrinsic::nvvm_atomic_cas_gen_i_cta
, {ElemTy
, Ptr
->getType()}),
18628 {Ptr
, EmitScalarExpr(E
->getArg(1)), EmitScalarExpr(E
->getArg(2))});
18630 case NVPTX::BI__nvvm_atom_sys_cas_gen_i
:
18631 case NVPTX::BI__nvvm_atom_sys_cas_gen_l
:
18632 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll
: {
18633 Value
*Ptr
= EmitScalarExpr(E
->getArg(0));
18634 llvm::Type
*ElemTy
=
18635 ConvertTypeForMem(E
->getArg(0)->getType()->getPointeeType());
18636 return Builder
.CreateCall(
18638 Intrinsic::nvvm_atomic_cas_gen_i_sys
, {ElemTy
, Ptr
->getType()}),
18639 {Ptr
, EmitScalarExpr(E
->getArg(1)), EmitScalarExpr(E
->getArg(2))});
18641 case NVPTX::BI__nvvm_match_all_sync_i32p
:
18642 case NVPTX::BI__nvvm_match_all_sync_i64p
: {
18643 Value
*Mask
= EmitScalarExpr(E
->getArg(0));
18644 Value
*Val
= EmitScalarExpr(E
->getArg(1));
18645 Address PredOutPtr
= EmitPointerWithAlignment(E
->getArg(2));
18646 Value
*ResultPair
= Builder
.CreateCall(
18647 CGM
.getIntrinsic(BuiltinID
== NVPTX::BI__nvvm_match_all_sync_i32p
18648 ? Intrinsic::nvvm_match_all_sync_i32p
18649 : Intrinsic::nvvm_match_all_sync_i64p
),
18651 Value
*Pred
= Builder
.CreateZExt(Builder
.CreateExtractValue(ResultPair
, 1),
18652 PredOutPtr
.getElementType());
18653 Builder
.CreateStore(Pred
, PredOutPtr
);
18654 return Builder
.CreateExtractValue(ResultPair
, 0);
18658 case NVPTX::BI__hmma_m16n16k16_ld_a
:
18659 case NVPTX::BI__hmma_m16n16k16_ld_b
:
18660 case NVPTX::BI__hmma_m16n16k16_ld_c_f16
:
18661 case NVPTX::BI__hmma_m16n16k16_ld_c_f32
:
18662 case NVPTX::BI__hmma_m32n8k16_ld_a
:
18663 case NVPTX::BI__hmma_m32n8k16_ld_b
:
18664 case NVPTX::BI__hmma_m32n8k16_ld_c_f16
:
18665 case NVPTX::BI__hmma_m32n8k16_ld_c_f32
:
18666 case NVPTX::BI__hmma_m8n32k16_ld_a
:
18667 case NVPTX::BI__hmma_m8n32k16_ld_b
:
18668 case NVPTX::BI__hmma_m8n32k16_ld_c_f16
:
18669 case NVPTX::BI__hmma_m8n32k16_ld_c_f32
:
18670 // Integer MMA loads.
18671 case NVPTX::BI__imma_m16n16k16_ld_a_s8
:
18672 case NVPTX::BI__imma_m16n16k16_ld_a_u8
:
18673 case NVPTX::BI__imma_m16n16k16_ld_b_s8
:
18674 case NVPTX::BI__imma_m16n16k16_ld_b_u8
:
18675 case NVPTX::BI__imma_m16n16k16_ld_c
:
18676 case NVPTX::BI__imma_m32n8k16_ld_a_s8
:
18677 case NVPTX::BI__imma_m32n8k16_ld_a_u8
:
18678 case NVPTX::BI__imma_m32n8k16_ld_b_s8
:
18679 case NVPTX::BI__imma_m32n8k16_ld_b_u8
:
18680 case NVPTX::BI__imma_m32n8k16_ld_c
:
18681 case NVPTX::BI__imma_m8n32k16_ld_a_s8
:
18682 case NVPTX::BI__imma_m8n32k16_ld_a_u8
:
18683 case NVPTX::BI__imma_m8n32k16_ld_b_s8
:
18684 case NVPTX::BI__imma_m8n32k16_ld_b_u8
:
18685 case NVPTX::BI__imma_m8n32k16_ld_c
:
18686 // Sub-integer MMA loads.
18687 case NVPTX::BI__imma_m8n8k32_ld_a_s4
:
18688 case NVPTX::BI__imma_m8n8k32_ld_a_u4
:
18689 case NVPTX::BI__imma_m8n8k32_ld_b_s4
:
18690 case NVPTX::BI__imma_m8n8k32_ld_b_u4
:
18691 case NVPTX::BI__imma_m8n8k32_ld_c
:
18692 case NVPTX::BI__bmma_m8n8k128_ld_a_b1
:
18693 case NVPTX::BI__bmma_m8n8k128_ld_b_b1
:
18694 case NVPTX::BI__bmma_m8n8k128_ld_c
:
18695 // Double MMA loads.
18696 case NVPTX::BI__dmma_m8n8k4_ld_a
:
18697 case NVPTX::BI__dmma_m8n8k4_ld_b
:
18698 case NVPTX::BI__dmma_m8n8k4_ld_c
:
18699 // Alternate float MMA loads.
18700 case NVPTX::BI__mma_bf16_m16n16k16_ld_a
:
18701 case NVPTX::BI__mma_bf16_m16n16k16_ld_b
:
18702 case NVPTX::BI__mma_bf16_m8n32k16_ld_a
:
18703 case NVPTX::BI__mma_bf16_m8n32k16_ld_b
:
18704 case NVPTX::BI__mma_bf16_m32n8k16_ld_a
:
18705 case NVPTX::BI__mma_bf16_m32n8k16_ld_b
:
18706 case NVPTX::BI__mma_tf32_m16n16k8_ld_a
:
18707 case NVPTX::BI__mma_tf32_m16n16k8_ld_b
:
18708 case NVPTX::BI__mma_tf32_m16n16k8_ld_c
: {
18709 Address Dst
= EmitPointerWithAlignment(E
->getArg(0));
18710 Value
*Src
= EmitScalarExpr(E
->getArg(1));
18711 Value
*Ldm
= EmitScalarExpr(E
->getArg(2));
18712 std::optional
<llvm::APSInt
> isColMajorArg
=
18713 E
->getArg(3)->getIntegerConstantExpr(getContext());
18714 if (!isColMajorArg
)
18716 bool isColMajor
= isColMajorArg
->getSExtValue();
18717 NVPTXMmaLdstInfo II
= getNVPTXMmaLdstInfo(BuiltinID
);
18718 unsigned IID
= isColMajor
? II
.IID_col
: II
.IID_row
;
18723 Builder
.CreateCall(CGM
.getIntrinsic(IID
, Src
->getType()), {Src
, Ldm
});
18725 // Save returned values.
18726 assert(II
.NumResults
);
18727 if (II
.NumResults
== 1) {
18728 Builder
.CreateAlignedStore(Result
, Dst
.getPointer(),
18729 CharUnits::fromQuantity(4));
18731 for (unsigned i
= 0; i
< II
.NumResults
; ++i
) {
18732 Builder
.CreateAlignedStore(
18733 Builder
.CreateBitCast(Builder
.CreateExtractValue(Result
, i
),
18734 Dst
.getElementType()),
18735 Builder
.CreateGEP(Dst
.getElementType(), Dst
.getPointer(),
18736 llvm::ConstantInt::get(IntTy
, i
)),
18737 CharUnits::fromQuantity(4));
18743 case NVPTX::BI__hmma_m16n16k16_st_c_f16
:
18744 case NVPTX::BI__hmma_m16n16k16_st_c_f32
:
18745 case NVPTX::BI__hmma_m32n8k16_st_c_f16
:
18746 case NVPTX::BI__hmma_m32n8k16_st_c_f32
:
18747 case NVPTX::BI__hmma_m8n32k16_st_c_f16
:
18748 case NVPTX::BI__hmma_m8n32k16_st_c_f32
:
18749 case NVPTX::BI__imma_m16n16k16_st_c_i32
:
18750 case NVPTX::BI__imma_m32n8k16_st_c_i32
:
18751 case NVPTX::BI__imma_m8n32k16_st_c_i32
:
18752 case NVPTX::BI__imma_m8n8k32_st_c_i32
:
18753 case NVPTX::BI__bmma_m8n8k128_st_c_i32
:
18754 case NVPTX::BI__dmma_m8n8k4_st_c_f64
:
18755 case NVPTX::BI__mma_m16n16k8_st_c_f32
: {
18756 Value
*Dst
= EmitScalarExpr(E
->getArg(0));
18757 Address Src
= EmitPointerWithAlignment(E
->getArg(1));
18758 Value
*Ldm
= EmitScalarExpr(E
->getArg(2));
18759 std::optional
<llvm::APSInt
> isColMajorArg
=
18760 E
->getArg(3)->getIntegerConstantExpr(getContext());
18761 if (!isColMajorArg
)
18763 bool isColMajor
= isColMajorArg
->getSExtValue();
18764 NVPTXMmaLdstInfo II
= getNVPTXMmaLdstInfo(BuiltinID
);
18765 unsigned IID
= isColMajor
? II
.IID_col
: II
.IID_row
;
18768 Function
*Intrinsic
=
18769 CGM
.getIntrinsic(IID
, Dst
->getType());
18770 llvm::Type
*ParamType
= Intrinsic
->getFunctionType()->getParamType(1);
18771 SmallVector
<Value
*, 10> Values
= {Dst
};
18772 for (unsigned i
= 0; i
< II
.NumResults
; ++i
) {
18773 Value
*V
= Builder
.CreateAlignedLoad(
18774 Src
.getElementType(),
18775 Builder
.CreateGEP(Src
.getElementType(), Src
.getPointer(),
18776 llvm::ConstantInt::get(IntTy
, i
)),
18777 CharUnits::fromQuantity(4));
18778 Values
.push_back(Builder
.CreateBitCast(V
, ParamType
));
18780 Values
.push_back(Ldm
);
18781 Value
*Result
= Builder
.CreateCall(Intrinsic
, Values
);
18785 // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
18786 // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
18787 case NVPTX::BI__hmma_m16n16k16_mma_f16f16
:
18788 case NVPTX::BI__hmma_m16n16k16_mma_f32f16
:
18789 case NVPTX::BI__hmma_m16n16k16_mma_f32f32
:
18790 case NVPTX::BI__hmma_m16n16k16_mma_f16f32
:
18791 case NVPTX::BI__hmma_m32n8k16_mma_f16f16
:
18792 case NVPTX::BI__hmma_m32n8k16_mma_f32f16
:
18793 case NVPTX::BI__hmma_m32n8k16_mma_f32f32
:
18794 case NVPTX::BI__hmma_m32n8k16_mma_f16f32
:
18795 case NVPTX::BI__hmma_m8n32k16_mma_f16f16
:
18796 case NVPTX::BI__hmma_m8n32k16_mma_f32f16
:
18797 case NVPTX::BI__hmma_m8n32k16_mma_f32f32
:
18798 case NVPTX::BI__hmma_m8n32k16_mma_f16f32
:
18799 case NVPTX::BI__imma_m16n16k16_mma_s8
:
18800 case NVPTX::BI__imma_m16n16k16_mma_u8
:
18801 case NVPTX::BI__imma_m32n8k16_mma_s8
:
18802 case NVPTX::BI__imma_m32n8k16_mma_u8
:
18803 case NVPTX::BI__imma_m8n32k16_mma_s8
:
18804 case NVPTX::BI__imma_m8n32k16_mma_u8
:
18805 case NVPTX::BI__imma_m8n8k32_mma_s4
:
18806 case NVPTX::BI__imma_m8n8k32_mma_u4
:
18807 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1
:
18808 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1
:
18809 case NVPTX::BI__dmma_m8n8k4_mma_f64
:
18810 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32
:
18811 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32
:
18812 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32
:
18813 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32
: {
18814 Address Dst
= EmitPointerWithAlignment(E
->getArg(0));
18815 Address SrcA
= EmitPointerWithAlignment(E
->getArg(1));
18816 Address SrcB
= EmitPointerWithAlignment(E
->getArg(2));
18817 Address SrcC
= EmitPointerWithAlignment(E
->getArg(3));
18818 std::optional
<llvm::APSInt
> LayoutArg
=
18819 E
->getArg(4)->getIntegerConstantExpr(getContext());
18822 int Layout
= LayoutArg
->getSExtValue();
18823 if (Layout
< 0 || Layout
> 3)
18825 llvm::APSInt SatfArg
;
18826 if (BuiltinID
== NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1
||
18827 BuiltinID
== NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1
)
18828 SatfArg
= 0; // .b1 does not have satf argument.
18829 else if (std::optional
<llvm::APSInt
> OptSatfArg
=
18830 E
->getArg(5)->getIntegerConstantExpr(getContext()))
18831 SatfArg
= *OptSatfArg
;
18834 bool Satf
= SatfArg
.getSExtValue();
18835 NVPTXMmaInfo MI
= getNVPTXMmaInfo(BuiltinID
);
18836 unsigned IID
= MI
.getMMAIntrinsic(Layout
, Satf
);
18837 if (IID
== 0) // Unsupported combination of Layout/Satf.
18840 SmallVector
<Value
*, 24> Values
;
18841 Function
*Intrinsic
= CGM
.getIntrinsic(IID
);
18842 llvm::Type
*AType
= Intrinsic
->getFunctionType()->getParamType(0);
18844 for (unsigned i
= 0; i
< MI
.NumEltsA
; ++i
) {
18845 Value
*V
= Builder
.CreateAlignedLoad(
18846 SrcA
.getElementType(),
18847 Builder
.CreateGEP(SrcA
.getElementType(), SrcA
.getPointer(),
18848 llvm::ConstantInt::get(IntTy
, i
)),
18849 CharUnits::fromQuantity(4));
18850 Values
.push_back(Builder
.CreateBitCast(V
, AType
));
18853 llvm::Type
*BType
= Intrinsic
->getFunctionType()->getParamType(MI
.NumEltsA
);
18854 for (unsigned i
= 0; i
< MI
.NumEltsB
; ++i
) {
18855 Value
*V
= Builder
.CreateAlignedLoad(
18856 SrcB
.getElementType(),
18857 Builder
.CreateGEP(SrcB
.getElementType(), SrcB
.getPointer(),
18858 llvm::ConstantInt::get(IntTy
, i
)),
18859 CharUnits::fromQuantity(4));
18860 Values
.push_back(Builder
.CreateBitCast(V
, BType
));
18863 llvm::Type
*CType
=
18864 Intrinsic
->getFunctionType()->getParamType(MI
.NumEltsA
+ MI
.NumEltsB
);
18865 for (unsigned i
= 0; i
< MI
.NumEltsC
; ++i
) {
18866 Value
*V
= Builder
.CreateAlignedLoad(
18867 SrcC
.getElementType(),
18868 Builder
.CreateGEP(SrcC
.getElementType(), SrcC
.getPointer(),
18869 llvm::ConstantInt::get(IntTy
, i
)),
18870 CharUnits::fromQuantity(4));
18871 Values
.push_back(Builder
.CreateBitCast(V
, CType
));
18873 Value
*Result
= Builder
.CreateCall(Intrinsic
, Values
);
18874 llvm::Type
*DType
= Dst
.getElementType();
18875 for (unsigned i
= 0; i
< MI
.NumEltsD
; ++i
)
18876 Builder
.CreateAlignedStore(
18877 Builder
.CreateBitCast(Builder
.CreateExtractValue(Result
, i
), DType
),
18878 Builder
.CreateGEP(Dst
.getElementType(), Dst
.getPointer(),
18879 llvm::ConstantInt::get(IntTy
, i
)),
18880 CharUnits::fromQuantity(4));
18883 // The following builtins require half type support
18884 case NVPTX::BI__nvvm_ex2_approx_f16
:
18885 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16
, BuiltinID
, E
, *this);
18886 case NVPTX::BI__nvvm_ex2_approx_f16x2
:
18887 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2
, BuiltinID
, E
, *this);
18888 case NVPTX::BI__nvvm_ff2f16x2_rn
:
18889 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn
, BuiltinID
, E
, *this);
18890 case NVPTX::BI__nvvm_ff2f16x2_rn_relu
:
18891 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu
, BuiltinID
, E
, *this);
18892 case NVPTX::BI__nvvm_ff2f16x2_rz
:
18893 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz
, BuiltinID
, E
, *this);
18894 case NVPTX::BI__nvvm_ff2f16x2_rz_relu
:
18895 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu
, BuiltinID
, E
, *this);
18896 case NVPTX::BI__nvvm_fma_rn_f16
:
18897 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16
, BuiltinID
, E
, *this);
18898 case NVPTX::BI__nvvm_fma_rn_f16x2
:
18899 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2
, BuiltinID
, E
, *this);
18900 case NVPTX::BI__nvvm_fma_rn_ftz_f16
:
18901 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16
, BuiltinID
, E
, *this);
18902 case NVPTX::BI__nvvm_fma_rn_ftz_f16x2
:
18903 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2
, BuiltinID
, E
, *this);
18904 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16
:
18905 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16
, BuiltinID
, E
,
18907 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2
:
18908 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2
, BuiltinID
, E
,
18910 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16
:
18911 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16
, BuiltinID
, E
,
18913 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2
:
18914 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2
, BuiltinID
, E
,
18916 case NVPTX::BI__nvvm_fma_rn_relu_f16
:
18917 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16
, BuiltinID
, E
, *this);
18918 case NVPTX::BI__nvvm_fma_rn_relu_f16x2
:
18919 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2
, BuiltinID
, E
, *this);
18920 case NVPTX::BI__nvvm_fma_rn_sat_f16
:
18921 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16
, BuiltinID
, E
, *this);
18922 case NVPTX::BI__nvvm_fma_rn_sat_f16x2
:
18923 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2
, BuiltinID
, E
, *this);
18924 case NVPTX::BI__nvvm_fmax_f16
:
18925 return MakeHalfType(Intrinsic::nvvm_fmax_f16
, BuiltinID
, E
, *this);
18926 case NVPTX::BI__nvvm_fmax_f16x2
:
18927 return MakeHalfType(Intrinsic::nvvm_fmax_f16x2
, BuiltinID
, E
, *this);
18928 case NVPTX::BI__nvvm_fmax_ftz_f16
:
18929 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16
, BuiltinID
, E
, *this);
18930 case NVPTX::BI__nvvm_fmax_ftz_f16x2
:
18931 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2
, BuiltinID
, E
, *this);
18932 case NVPTX::BI__nvvm_fmax_ftz_nan_f16
:
18933 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16
, BuiltinID
, E
, *this);
18934 case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2
:
18935 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2
, BuiltinID
, E
,
18937 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16
:
18938 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16
, BuiltinID
,
18940 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2
:
18941 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2
,
18942 BuiltinID
, E
, *this);
18943 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16
:
18944 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16
, BuiltinID
, E
,
18946 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2
:
18947 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2
, BuiltinID
,
18949 case NVPTX::BI__nvvm_fmax_nan_f16
:
18950 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16
, BuiltinID
, E
, *this);
18951 case NVPTX::BI__nvvm_fmax_nan_f16x2
:
18952 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2
, BuiltinID
, E
, *this);
18953 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16
:
18954 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16
, BuiltinID
, E
,
18956 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2
:
18957 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2
, BuiltinID
,
18959 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16
:
18960 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16
, BuiltinID
, E
,
18962 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2
:
18963 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2
, BuiltinID
, E
,
18965 case NVPTX::BI__nvvm_fmin_f16
:
18966 return MakeHalfType(Intrinsic::nvvm_fmin_f16
, BuiltinID
, E
, *this);
18967 case NVPTX::BI__nvvm_fmin_f16x2
:
18968 return MakeHalfType(Intrinsic::nvvm_fmin_f16x2
, BuiltinID
, E
, *this);
18969 case NVPTX::BI__nvvm_fmin_ftz_f16
:
18970 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16
, BuiltinID
, E
, *this);
18971 case NVPTX::BI__nvvm_fmin_ftz_f16x2
:
18972 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2
, BuiltinID
, E
, *this);
18973 case NVPTX::BI__nvvm_fmin_ftz_nan_f16
:
18974 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16
, BuiltinID
, E
, *this);
18975 case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2
:
18976 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2
, BuiltinID
, E
,
18978 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16
:
18979 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16
, BuiltinID
,
18981 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2
:
18982 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2
,
18983 BuiltinID
, E
, *this);
18984 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16
:
18985 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16
, BuiltinID
, E
,
18987 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2
:
18988 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2
, BuiltinID
,
18990 case NVPTX::BI__nvvm_fmin_nan_f16
:
18991 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16
, BuiltinID
, E
, *this);
18992 case NVPTX::BI__nvvm_fmin_nan_f16x2
:
18993 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2
, BuiltinID
, E
, *this);
18994 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16
:
18995 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16
, BuiltinID
, E
,
18997 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2
:
18998 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2
, BuiltinID
,
19000 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16
:
19001 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16
, BuiltinID
, E
,
19003 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2
:
19004 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2
, BuiltinID
, E
,
19006 case NVPTX::BI__nvvm_ldg_h
:
19007 return MakeHalfType(Intrinsic::nvvm_ldg_global_f
, BuiltinID
, E
, *this);
19008 case NVPTX::BI__nvvm_ldg_h2
:
19009 return MakeHalfType(Intrinsic::nvvm_ldg_global_f
, BuiltinID
, E
, *this);
19010 case NVPTX::BI__nvvm_ldu_h
:
19011 return MakeHalfType(Intrinsic::nvvm_ldu_global_f
, BuiltinID
, E
, *this);
19012 case NVPTX::BI__nvvm_ldu_h2
: {
19013 return MakeHalfType(Intrinsic::nvvm_ldu_global_f
, BuiltinID
, E
, *this);
19015 case NVPTX::BI__nvvm_cp_async_ca_shared_global_4
:
19016 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4
,
19017 Intrinsic::nvvm_cp_async_ca_shared_global_4_s
, *this, E
,
19019 case NVPTX::BI__nvvm_cp_async_ca_shared_global_8
:
19020 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8
,
19021 Intrinsic::nvvm_cp_async_ca_shared_global_8_s
, *this, E
,
19023 case NVPTX::BI__nvvm_cp_async_ca_shared_global_16
:
19024 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16
,
19025 Intrinsic::nvvm_cp_async_ca_shared_global_16_s
, *this, E
,
19027 case NVPTX::BI__nvvm_cp_async_cg_shared_global_16
:
19028 return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16
,
19029 Intrinsic::nvvm_cp_async_cg_shared_global_16_s
, *this, E
,
19031 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x
:
19032 return Builder
.CreateCall(
19033 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x
));
19034 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y
:
19035 return Builder
.CreateCall(
19036 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y
));
19037 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z
:
19038 return Builder
.CreateCall(
19039 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z
));
19040 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w
:
19041 return Builder
.CreateCall(
19042 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w
));
19043 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x
:
19044 return Builder
.CreateCall(
19045 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x
));
19046 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y
:
19047 return Builder
.CreateCall(
19048 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y
));
19049 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z
:
19050 return Builder
.CreateCall(
19051 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z
));
19052 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w
:
19053 return Builder
.CreateCall(
19054 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w
));
19055 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x
:
19056 return Builder
.CreateCall(
19057 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x
));
19058 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y
:
19059 return Builder
.CreateCall(
19060 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y
));
19061 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z
:
19062 return Builder
.CreateCall(
19063 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z
));
19064 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w
:
19065 return Builder
.CreateCall(
19066 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w
));
19067 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x
:
19068 return Builder
.CreateCall(
19069 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x
));
19070 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y
:
19071 return Builder
.CreateCall(
19072 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y
));
19073 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z
:
19074 return Builder
.CreateCall(
19075 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z
));
19076 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w
:
19077 return Builder
.CreateCall(
19078 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w
));
19079 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank
:
19080 return Builder
.CreateCall(
19081 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank
));
19082 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank
:
19083 return Builder
.CreateCall(
19084 CGM
.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank
));
19085 case NVPTX::BI__nvvm_is_explicit_cluster
:
19086 return Builder
.CreateCall(
19087 CGM
.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster
));
19088 case NVPTX::BI__nvvm_isspacep_shared_cluster
:
19089 return Builder
.CreateCall(
19090 CGM
.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster
),
19091 EmitScalarExpr(E
->getArg(0)));
19092 case NVPTX::BI__nvvm_mapa
:
19093 return Builder
.CreateCall(
19094 CGM
.getIntrinsic(Intrinsic::nvvm_mapa
),
19095 {EmitScalarExpr(E
->getArg(0)), EmitScalarExpr(E
->getArg(1))});
19096 case NVPTX::BI__nvvm_mapa_shared_cluster
:
19097 return Builder
.CreateCall(
19098 CGM
.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster
),
19099 {EmitScalarExpr(E
->getArg(0)), EmitScalarExpr(E
->getArg(1))});
19100 case NVPTX::BI__nvvm_getctarank
:
19101 return Builder
.CreateCall(
19102 CGM
.getIntrinsic(Intrinsic::nvvm_getctarank
),
19103 EmitScalarExpr(E
->getArg(0)));
19104 case NVPTX::BI__nvvm_getctarank_shared_cluster
:
19105 return Builder
.CreateCall(
19106 CGM
.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster
),
19107 EmitScalarExpr(E
->getArg(0)));
19108 case NVPTX::BI__nvvm_barrier_cluster_arrive
:
19109 return Builder
.CreateCall(
19110 CGM
.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive
));
19111 case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed
:
19112 return Builder
.CreateCall(
19113 CGM
.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed
));
19114 case NVPTX::BI__nvvm_barrier_cluster_wait
:
19115 return Builder
.CreateCall(
19116 CGM
.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait
));
19117 case NVPTX::BI__nvvm_fence_sc_cluster
:
19118 return Builder
.CreateCall(
19119 CGM
.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster
));
19126 struct BuiltinAlignArgs
{
19127 llvm::Value
*Src
= nullptr;
19128 llvm::Type
*SrcType
= nullptr;
19129 llvm::Value
*Alignment
= nullptr;
19130 llvm::Value
*Mask
= nullptr;
19131 llvm::IntegerType
*IntType
= nullptr;
19133 BuiltinAlignArgs(const CallExpr
*E
, CodeGenFunction
&CGF
) {
19134 QualType AstType
= E
->getArg(0)->getType();
19135 if (AstType
->isArrayType())
19136 Src
= CGF
.EmitArrayToPointerDecay(E
->getArg(0)).getPointer();
19138 Src
= CGF
.EmitScalarExpr(E
->getArg(0));
19139 SrcType
= Src
->getType();
19140 if (SrcType
->isPointerTy()) {
19141 IntType
= IntegerType::get(
19142 CGF
.getLLVMContext(),
19143 CGF
.CGM
.getDataLayout().getIndexTypeSizeInBits(SrcType
));
19145 assert(SrcType
->isIntegerTy());
19146 IntType
= cast
<llvm::IntegerType
>(SrcType
);
19148 Alignment
= CGF
.EmitScalarExpr(E
->getArg(1));
19149 Alignment
= CGF
.Builder
.CreateZExtOrTrunc(Alignment
, IntType
, "alignment");
19150 auto *One
= llvm::ConstantInt::get(IntType
, 1);
19151 Mask
= CGF
.Builder
.CreateSub(Alignment
, One
, "mask");
19156 /// Generate (x & (y-1)) == 0.
19157 RValue
CodeGenFunction::EmitBuiltinIsAligned(const CallExpr
*E
) {
19158 BuiltinAlignArgs
Args(E
, *this);
19159 llvm::Value
*SrcAddress
= Args
.Src
;
19160 if (Args
.SrcType
->isPointerTy())
19162 Builder
.CreateBitOrPointerCast(Args
.Src
, Args
.IntType
, "src_addr");
19163 return RValue::get(Builder
.CreateICmpEQ(
19164 Builder
.CreateAnd(SrcAddress
, Args
.Mask
, "set_bits"),
19165 llvm::Constant::getNullValue(Args
.IntType
), "is_aligned"));
19168 /// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
19169 /// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
19170 /// llvm.ptrmask intrinsic (with a GEP before in the align_up case).
19171 /// TODO: actually use ptrmask once most optimization passes know about it.
19172 RValue
CodeGenFunction::EmitBuiltinAlignTo(const CallExpr
*E
, bool AlignUp
) {
19173 BuiltinAlignArgs
Args(E
, *this);
19174 llvm::Value
*SrcAddr
= Args
.Src
;
19175 if (Args
.Src
->getType()->isPointerTy())
19176 SrcAddr
= Builder
.CreatePtrToInt(Args
.Src
, Args
.IntType
, "intptr");
19177 llvm::Value
*SrcForMask
= SrcAddr
;
19179 // When aligning up we have to first add the mask to ensure we go over the
19180 // next alignment value and then align down to the next valid multiple.
19181 // By adding the mask, we ensure that align_up on an already aligned
19182 // value will not change the value.
19183 SrcForMask
= Builder
.CreateAdd(SrcForMask
, Args
.Mask
, "over_boundary");
19185 // Invert the mask to only clear the lower bits.
19186 llvm::Value
*InvertedMask
= Builder
.CreateNot(Args
.Mask
, "inverted_mask");
19187 llvm::Value
*Result
=
19188 Builder
.CreateAnd(SrcForMask
, InvertedMask
, "aligned_result");
19189 if (Args
.Src
->getType()->isPointerTy()) {
19190 /// TODO: Use ptrmask instead of ptrtoint+gep once it is optimized well.
19191 // Result = Builder.CreateIntrinsic(
19192 // Intrinsic::ptrmask, {Args.SrcType, SrcForMask->getType(), Args.IntType},
19193 // {SrcForMask, NegatedMask}, nullptr, "aligned_result");
19194 Result
->setName("aligned_intptr");
19195 llvm::Value
*Difference
= Builder
.CreateSub(Result
, SrcAddr
, "diff");
19196 // The result must point to the same underlying allocation. This means we
19197 // can use an inbounds GEP to enable better optimization.
19198 Value
*Base
= EmitCastToVoidPtr(Args
.Src
);
19199 if (getLangOpts().isSignedOverflowDefined())
19200 Result
= Builder
.CreateGEP(Int8Ty
, Base
, Difference
, "aligned_result");
19202 Result
= EmitCheckedInBoundsGEP(Int8Ty
, Base
, Difference
,
19203 /*SignedIndices=*/true,
19204 /*isSubtraction=*/!AlignUp
,
19205 E
->getExprLoc(), "aligned_result");
19206 Result
= Builder
.CreatePointerCast(Result
, Args
.SrcType
);
19207 // Emit an alignment assumption to ensure that the new alignment is
19208 // propagated to loads/stores, etc.
19209 emitAlignmentAssumption(Result
, E
, E
->getExprLoc(), Args
.Alignment
);
19211 assert(Result
->getType() == Args
.SrcType
);
19212 return RValue::get(Result
);
19215 Value
*CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID
,
19216 const CallExpr
*E
) {
19217 switch (BuiltinID
) {
19218 case WebAssembly::BI__builtin_wasm_memory_size
: {
19219 llvm::Type
*ResultType
= ConvertType(E
->getType());
19220 Value
*I
= EmitScalarExpr(E
->getArg(0));
19222 CGM
.getIntrinsic(Intrinsic::wasm_memory_size
, ResultType
);
19223 return Builder
.CreateCall(Callee
, I
);
19225 case WebAssembly::BI__builtin_wasm_memory_grow
: {
19226 llvm::Type
*ResultType
= ConvertType(E
->getType());
19227 Value
*Args
[] = {EmitScalarExpr(E
->getArg(0)),
19228 EmitScalarExpr(E
->getArg(1))};
19230 CGM
.getIntrinsic(Intrinsic::wasm_memory_grow
, ResultType
);
19231 return Builder
.CreateCall(Callee
, Args
);
19233 case WebAssembly::BI__builtin_wasm_tls_size
: {
19234 llvm::Type
*ResultType
= ConvertType(E
->getType());
19235 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_tls_size
, ResultType
);
19236 return Builder
.CreateCall(Callee
);
19238 case WebAssembly::BI__builtin_wasm_tls_align
: {
19239 llvm::Type
*ResultType
= ConvertType(E
->getType());
19240 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_tls_align
, ResultType
);
19241 return Builder
.CreateCall(Callee
);
19243 case WebAssembly::BI__builtin_wasm_tls_base
: {
19244 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_tls_base
);
19245 return Builder
.CreateCall(Callee
);
19247 case WebAssembly::BI__builtin_wasm_throw
: {
19248 Value
*Tag
= EmitScalarExpr(E
->getArg(0));
19249 Value
*Obj
= EmitScalarExpr(E
->getArg(1));
19250 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_throw
);
19251 return Builder
.CreateCall(Callee
, {Tag
, Obj
});
19253 case WebAssembly::BI__builtin_wasm_rethrow
: {
19254 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_rethrow
);
19255 return Builder
.CreateCall(Callee
);
19257 case WebAssembly::BI__builtin_wasm_memory_atomic_wait32
: {
19258 Value
*Addr
= EmitScalarExpr(E
->getArg(0));
19259 Value
*Expected
= EmitScalarExpr(E
->getArg(1));
19260 Value
*Timeout
= EmitScalarExpr(E
->getArg(2));
19261 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32
);
19262 return Builder
.CreateCall(Callee
, {Addr
, Expected
, Timeout
});
19264 case WebAssembly::BI__builtin_wasm_memory_atomic_wait64
: {
19265 Value
*Addr
= EmitScalarExpr(E
->getArg(0));
19266 Value
*Expected
= EmitScalarExpr(E
->getArg(1));
19267 Value
*Timeout
= EmitScalarExpr(E
->getArg(2));
19268 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64
);
19269 return Builder
.CreateCall(Callee
, {Addr
, Expected
, Timeout
});
19271 case WebAssembly::BI__builtin_wasm_memory_atomic_notify
: {
19272 Value
*Addr
= EmitScalarExpr(E
->getArg(0));
19273 Value
*Count
= EmitScalarExpr(E
->getArg(1));
19274 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_memory_atomic_notify
);
19275 return Builder
.CreateCall(Callee
, {Addr
, Count
});
19277 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32
:
19278 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64
:
19279 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32
:
19280 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64
: {
19281 Value
*Src
= EmitScalarExpr(E
->getArg(0));
19282 llvm::Type
*ResT
= ConvertType(E
->getType());
19284 CGM
.getIntrinsic(Intrinsic::wasm_trunc_signed
, {ResT
, Src
->getType()});
19285 return Builder
.CreateCall(Callee
, {Src
});
19287 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32
:
19288 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64
:
19289 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32
:
19290 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64
: {
19291 Value
*Src
= EmitScalarExpr(E
->getArg(0));
19292 llvm::Type
*ResT
= ConvertType(E
->getType());
19293 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_trunc_unsigned
,
19294 {ResT
, Src
->getType()});
19295 return Builder
.CreateCall(Callee
, {Src
});
19297 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32
:
19298 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64
:
19299 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32
:
19300 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64
:
19301 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4
: {
19302 Value
*Src
= EmitScalarExpr(E
->getArg(0));
19303 llvm::Type
*ResT
= ConvertType(E
->getType());
19305 CGM
.getIntrinsic(Intrinsic::fptosi_sat
, {ResT
, Src
->getType()});
19306 return Builder
.CreateCall(Callee
, {Src
});
19308 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32
:
19309 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64
:
19310 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32
:
19311 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64
:
19312 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4
: {
19313 Value
*Src
= EmitScalarExpr(E
->getArg(0));
19314 llvm::Type
*ResT
= ConvertType(E
->getType());
19316 CGM
.getIntrinsic(Intrinsic::fptoui_sat
, {ResT
, Src
->getType()});
19317 return Builder
.CreateCall(Callee
, {Src
});
19319 case WebAssembly::BI__builtin_wasm_min_f32
:
19320 case WebAssembly::BI__builtin_wasm_min_f64
:
19321 case WebAssembly::BI__builtin_wasm_min_f32x4
:
19322 case WebAssembly::BI__builtin_wasm_min_f64x2
: {
19323 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
19324 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
19326 CGM
.getIntrinsic(Intrinsic::minimum
, ConvertType(E
->getType()));
19327 return Builder
.CreateCall(Callee
, {LHS
, RHS
});
19329 case WebAssembly::BI__builtin_wasm_max_f32
:
19330 case WebAssembly::BI__builtin_wasm_max_f64
:
19331 case WebAssembly::BI__builtin_wasm_max_f32x4
:
19332 case WebAssembly::BI__builtin_wasm_max_f64x2
: {
19333 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
19334 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
19336 CGM
.getIntrinsic(Intrinsic::maximum
, ConvertType(E
->getType()));
19337 return Builder
.CreateCall(Callee
, {LHS
, RHS
});
19339 case WebAssembly::BI__builtin_wasm_pmin_f32x4
:
19340 case WebAssembly::BI__builtin_wasm_pmin_f64x2
: {
19341 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
19342 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
19344 CGM
.getIntrinsic(Intrinsic::wasm_pmin
, ConvertType(E
->getType()));
19345 return Builder
.CreateCall(Callee
, {LHS
, RHS
});
19347 case WebAssembly::BI__builtin_wasm_pmax_f32x4
:
19348 case WebAssembly::BI__builtin_wasm_pmax_f64x2
: {
19349 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
19350 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
19352 CGM
.getIntrinsic(Intrinsic::wasm_pmax
, ConvertType(E
->getType()));
19353 return Builder
.CreateCall(Callee
, {LHS
, RHS
});
19355 case WebAssembly::BI__builtin_wasm_ceil_f32x4
:
19356 case WebAssembly::BI__builtin_wasm_floor_f32x4
:
19357 case WebAssembly::BI__builtin_wasm_trunc_f32x4
:
19358 case WebAssembly::BI__builtin_wasm_nearest_f32x4
:
19359 case WebAssembly::BI__builtin_wasm_ceil_f64x2
:
19360 case WebAssembly::BI__builtin_wasm_floor_f64x2
:
19361 case WebAssembly::BI__builtin_wasm_trunc_f64x2
:
19362 case WebAssembly::BI__builtin_wasm_nearest_f64x2
: {
19364 switch (BuiltinID
) {
19365 case WebAssembly::BI__builtin_wasm_ceil_f32x4
:
19366 case WebAssembly::BI__builtin_wasm_ceil_f64x2
:
19367 IntNo
= Intrinsic::ceil
;
19369 case WebAssembly::BI__builtin_wasm_floor_f32x4
:
19370 case WebAssembly::BI__builtin_wasm_floor_f64x2
:
19371 IntNo
= Intrinsic::floor
;
19373 case WebAssembly::BI__builtin_wasm_trunc_f32x4
:
19374 case WebAssembly::BI__builtin_wasm_trunc_f64x2
:
19375 IntNo
= Intrinsic::trunc
;
19377 case WebAssembly::BI__builtin_wasm_nearest_f32x4
:
19378 case WebAssembly::BI__builtin_wasm_nearest_f64x2
:
19379 IntNo
= Intrinsic::nearbyint
;
19382 llvm_unreachable("unexpected builtin ID");
19384 Value
*Value
= EmitScalarExpr(E
->getArg(0));
19385 Function
*Callee
= CGM
.getIntrinsic(IntNo
, ConvertType(E
->getType()));
19386 return Builder
.CreateCall(Callee
, Value
);
19388 case WebAssembly::BI__builtin_wasm_ref_null_extern
: {
19389 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_ref_null_extern
);
19390 return Builder
.CreateCall(Callee
);
19392 case WebAssembly::BI__builtin_wasm_ref_null_func
: {
19393 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_ref_null_func
);
19394 return Builder
.CreateCall(Callee
);
19396 case WebAssembly::BI__builtin_wasm_swizzle_i8x16
: {
19397 Value
*Src
= EmitScalarExpr(E
->getArg(0));
19398 Value
*Indices
= EmitScalarExpr(E
->getArg(1));
19399 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_swizzle
);
19400 return Builder
.CreateCall(Callee
, {Src
, Indices
});
19402 case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16
:
19403 case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16
:
19404 case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8
:
19405 case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8
:
19406 case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16
:
19407 case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16
:
19408 case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8
:
19409 case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8
: {
19411 switch (BuiltinID
) {
19412 case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16
:
19413 case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8
:
19414 IntNo
= Intrinsic::sadd_sat
;
19416 case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16
:
19417 case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8
:
19418 IntNo
= Intrinsic::uadd_sat
;
19420 case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16
:
19421 case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8
:
19422 IntNo
= Intrinsic::wasm_sub_sat_signed
;
19424 case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16
:
19425 case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8
:
19426 IntNo
= Intrinsic::wasm_sub_sat_unsigned
;
19429 llvm_unreachable("unexpected builtin ID");
19431 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
19432 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
19433 Function
*Callee
= CGM
.getIntrinsic(IntNo
, ConvertType(E
->getType()));
19434 return Builder
.CreateCall(Callee
, {LHS
, RHS
});
19436 case WebAssembly::BI__builtin_wasm_abs_i8x16
:
19437 case WebAssembly::BI__builtin_wasm_abs_i16x8
:
19438 case WebAssembly::BI__builtin_wasm_abs_i32x4
:
19439 case WebAssembly::BI__builtin_wasm_abs_i64x2
: {
19440 Value
*Vec
= EmitScalarExpr(E
->getArg(0));
19441 Value
*Neg
= Builder
.CreateNeg(Vec
, "neg");
19442 Constant
*Zero
= llvm::Constant::getNullValue(Vec
->getType());
19443 Value
*ICmp
= Builder
.CreateICmpSLT(Vec
, Zero
, "abscond");
19444 return Builder
.CreateSelect(ICmp
, Neg
, Vec
, "abs");
19446 case WebAssembly::BI__builtin_wasm_min_s_i8x16
:
19447 case WebAssembly::BI__builtin_wasm_min_u_i8x16
:
19448 case WebAssembly::BI__builtin_wasm_max_s_i8x16
:
19449 case WebAssembly::BI__builtin_wasm_max_u_i8x16
:
19450 case WebAssembly::BI__builtin_wasm_min_s_i16x8
:
19451 case WebAssembly::BI__builtin_wasm_min_u_i16x8
:
19452 case WebAssembly::BI__builtin_wasm_max_s_i16x8
:
19453 case WebAssembly::BI__builtin_wasm_max_u_i16x8
:
19454 case WebAssembly::BI__builtin_wasm_min_s_i32x4
:
19455 case WebAssembly::BI__builtin_wasm_min_u_i32x4
:
19456 case WebAssembly::BI__builtin_wasm_max_s_i32x4
:
19457 case WebAssembly::BI__builtin_wasm_max_u_i32x4
: {
19458 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
19459 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
19461 switch (BuiltinID
) {
19462 case WebAssembly::BI__builtin_wasm_min_s_i8x16
:
19463 case WebAssembly::BI__builtin_wasm_min_s_i16x8
:
19464 case WebAssembly::BI__builtin_wasm_min_s_i32x4
:
19465 ICmp
= Builder
.CreateICmpSLT(LHS
, RHS
);
19467 case WebAssembly::BI__builtin_wasm_min_u_i8x16
:
19468 case WebAssembly::BI__builtin_wasm_min_u_i16x8
:
19469 case WebAssembly::BI__builtin_wasm_min_u_i32x4
:
19470 ICmp
= Builder
.CreateICmpULT(LHS
, RHS
);
19472 case WebAssembly::BI__builtin_wasm_max_s_i8x16
:
19473 case WebAssembly::BI__builtin_wasm_max_s_i16x8
:
19474 case WebAssembly::BI__builtin_wasm_max_s_i32x4
:
19475 ICmp
= Builder
.CreateICmpSGT(LHS
, RHS
);
19477 case WebAssembly::BI__builtin_wasm_max_u_i8x16
:
19478 case WebAssembly::BI__builtin_wasm_max_u_i16x8
:
19479 case WebAssembly::BI__builtin_wasm_max_u_i32x4
:
19480 ICmp
= Builder
.CreateICmpUGT(LHS
, RHS
);
19483 llvm_unreachable("unexpected builtin ID");
19485 return Builder
.CreateSelect(ICmp
, LHS
, RHS
);
19487 case WebAssembly::BI__builtin_wasm_avgr_u_i8x16
:
19488 case WebAssembly::BI__builtin_wasm_avgr_u_i16x8
: {
19489 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
19490 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
19491 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_avgr_unsigned
,
19492 ConvertType(E
->getType()));
19493 return Builder
.CreateCall(Callee
, {LHS
, RHS
});
19495 case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8
: {
19496 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
19497 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
19498 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed
);
19499 return Builder
.CreateCall(Callee
, {LHS
, RHS
});
19501 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8
:
19502 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8
:
19503 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4
:
19504 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4
: {
19505 Value
*Vec
= EmitScalarExpr(E
->getArg(0));
19507 switch (BuiltinID
) {
19508 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8
:
19509 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4
:
19510 IntNo
= Intrinsic::wasm_extadd_pairwise_signed
;
19512 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8
:
19513 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4
:
19514 IntNo
= Intrinsic::wasm_extadd_pairwise_unsigned
;
19517 llvm_unreachable("unexpected builtin ID");
19520 Function
*Callee
= CGM
.getIntrinsic(IntNo
, ConvertType(E
->getType()));
19521 return Builder
.CreateCall(Callee
, Vec
);
19523 case WebAssembly::BI__builtin_wasm_bitselect
: {
19524 Value
*V1
= EmitScalarExpr(E
->getArg(0));
19525 Value
*V2
= EmitScalarExpr(E
->getArg(1));
19526 Value
*C
= EmitScalarExpr(E
->getArg(2));
19528 CGM
.getIntrinsic(Intrinsic::wasm_bitselect
, ConvertType(E
->getType()));
19529 return Builder
.CreateCall(Callee
, {V1
, V2
, C
});
19531 case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8
: {
19532 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
19533 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
19534 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_dot
);
19535 return Builder
.CreateCall(Callee
, {LHS
, RHS
});
19537 case WebAssembly::BI__builtin_wasm_popcnt_i8x16
: {
19538 Value
*Vec
= EmitScalarExpr(E
->getArg(0));
19540 CGM
.getIntrinsic(Intrinsic::ctpop
, ConvertType(E
->getType()));
19541 return Builder
.CreateCall(Callee
, {Vec
});
19543 case WebAssembly::BI__builtin_wasm_any_true_v128
:
19544 case WebAssembly::BI__builtin_wasm_all_true_i8x16
:
19545 case WebAssembly::BI__builtin_wasm_all_true_i16x8
:
19546 case WebAssembly::BI__builtin_wasm_all_true_i32x4
:
19547 case WebAssembly::BI__builtin_wasm_all_true_i64x2
: {
19549 switch (BuiltinID
) {
19550 case WebAssembly::BI__builtin_wasm_any_true_v128
:
19551 IntNo
= Intrinsic::wasm_anytrue
;
19553 case WebAssembly::BI__builtin_wasm_all_true_i8x16
:
19554 case WebAssembly::BI__builtin_wasm_all_true_i16x8
:
19555 case WebAssembly::BI__builtin_wasm_all_true_i32x4
:
19556 case WebAssembly::BI__builtin_wasm_all_true_i64x2
:
19557 IntNo
= Intrinsic::wasm_alltrue
;
19560 llvm_unreachable("unexpected builtin ID");
19562 Value
*Vec
= EmitScalarExpr(E
->getArg(0));
19563 Function
*Callee
= CGM
.getIntrinsic(IntNo
, Vec
->getType());
19564 return Builder
.CreateCall(Callee
, {Vec
});
19566 case WebAssembly::BI__builtin_wasm_bitmask_i8x16
:
19567 case WebAssembly::BI__builtin_wasm_bitmask_i16x8
:
19568 case WebAssembly::BI__builtin_wasm_bitmask_i32x4
:
19569 case WebAssembly::BI__builtin_wasm_bitmask_i64x2
: {
19570 Value
*Vec
= EmitScalarExpr(E
->getArg(0));
19572 CGM
.getIntrinsic(Intrinsic::wasm_bitmask
, Vec
->getType());
19573 return Builder
.CreateCall(Callee
, {Vec
});
19575 case WebAssembly::BI__builtin_wasm_abs_f32x4
:
19576 case WebAssembly::BI__builtin_wasm_abs_f64x2
: {
19577 Value
*Vec
= EmitScalarExpr(E
->getArg(0));
19578 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::fabs
, Vec
->getType());
19579 return Builder
.CreateCall(Callee
, {Vec
});
19581 case WebAssembly::BI__builtin_wasm_sqrt_f32x4
:
19582 case WebAssembly::BI__builtin_wasm_sqrt_f64x2
: {
19583 Value
*Vec
= EmitScalarExpr(E
->getArg(0));
19584 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::sqrt
, Vec
->getType());
19585 return Builder
.CreateCall(Callee
, {Vec
});
19587 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8
:
19588 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8
:
19589 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4
:
19590 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4
: {
19591 Value
*Low
= EmitScalarExpr(E
->getArg(0));
19592 Value
*High
= EmitScalarExpr(E
->getArg(1));
19594 switch (BuiltinID
) {
19595 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8
:
19596 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4
:
19597 IntNo
= Intrinsic::wasm_narrow_signed
;
19599 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8
:
19600 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4
:
19601 IntNo
= Intrinsic::wasm_narrow_unsigned
;
19604 llvm_unreachable("unexpected builtin ID");
19607 CGM
.getIntrinsic(IntNo
, {ConvertType(E
->getType()), Low
->getType()});
19608 return Builder
.CreateCall(Callee
, {Low
, High
});
19610 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4
:
19611 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4
: {
19612 Value
*Vec
= EmitScalarExpr(E
->getArg(0));
19614 switch (BuiltinID
) {
19615 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4
:
19616 IntNo
= Intrinsic::fptosi_sat
;
19618 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4
:
19619 IntNo
= Intrinsic::fptoui_sat
;
19622 llvm_unreachable("unexpected builtin ID");
19624 llvm::Type
*SrcT
= Vec
->getType();
19625 llvm::Type
*TruncT
= SrcT
->getWithNewType(Builder
.getInt32Ty());
19626 Function
*Callee
= CGM
.getIntrinsic(IntNo
, {TruncT
, SrcT
});
19627 Value
*Trunc
= Builder
.CreateCall(Callee
, Vec
);
19628 Value
*Splat
= Constant::getNullValue(TruncT
);
19629 return Builder
.CreateShuffleVector(Trunc
, Splat
, ArrayRef
<int>{0, 1, 2, 3});
19631 case WebAssembly::BI__builtin_wasm_shuffle_i8x16
: {
19634 Ops
[OpIdx
++] = EmitScalarExpr(E
->getArg(0));
19635 Ops
[OpIdx
++] = EmitScalarExpr(E
->getArg(1));
19636 while (OpIdx
< 18) {
19637 std::optional
<llvm::APSInt
> LaneConst
=
19638 E
->getArg(OpIdx
)->getIntegerConstantExpr(getContext());
19639 assert(LaneConst
&& "Constant arg isn't actually constant?");
19640 Ops
[OpIdx
++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst
);
19642 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_shuffle
);
19643 return Builder
.CreateCall(Callee
, Ops
);
19645 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4
:
19646 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4
:
19647 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2
:
19648 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2
: {
19649 Value
*A
= EmitScalarExpr(E
->getArg(0));
19650 Value
*B
= EmitScalarExpr(E
->getArg(1));
19651 Value
*C
= EmitScalarExpr(E
->getArg(2));
19653 switch (BuiltinID
) {
19654 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4
:
19655 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2
:
19656 IntNo
= Intrinsic::wasm_relaxed_madd
;
19658 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4
:
19659 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2
:
19660 IntNo
= Intrinsic::wasm_relaxed_nmadd
;
19663 llvm_unreachable("unexpected builtin ID");
19665 Function
*Callee
= CGM
.getIntrinsic(IntNo
, A
->getType());
19666 return Builder
.CreateCall(Callee
, {A
, B
, C
});
19668 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16
:
19669 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8
:
19670 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4
:
19671 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2
: {
19672 Value
*A
= EmitScalarExpr(E
->getArg(0));
19673 Value
*B
= EmitScalarExpr(E
->getArg(1));
19674 Value
*C
= EmitScalarExpr(E
->getArg(2));
19676 CGM
.getIntrinsic(Intrinsic::wasm_relaxed_laneselect
, A
->getType());
19677 return Builder
.CreateCall(Callee
, {A
, B
, C
});
19679 case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16
: {
19680 Value
*Src
= EmitScalarExpr(E
->getArg(0));
19681 Value
*Indices
= EmitScalarExpr(E
->getArg(1));
19682 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_relaxed_swizzle
);
19683 return Builder
.CreateCall(Callee
, {Src
, Indices
});
19685 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4
:
19686 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4
:
19687 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2
:
19688 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2
: {
19689 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
19690 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
19692 switch (BuiltinID
) {
19693 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4
:
19694 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2
:
19695 IntNo
= Intrinsic::wasm_relaxed_min
;
19697 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4
:
19698 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2
:
19699 IntNo
= Intrinsic::wasm_relaxed_max
;
19702 llvm_unreachable("unexpected builtin ID");
19704 Function
*Callee
= CGM
.getIntrinsic(IntNo
, LHS
->getType());
19705 return Builder
.CreateCall(Callee
, {LHS
, RHS
});
19707 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4
:
19708 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4
:
19709 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2
:
19710 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2
: {
19711 Value
*Vec
= EmitScalarExpr(E
->getArg(0));
19713 switch (BuiltinID
) {
19714 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4
:
19715 IntNo
= Intrinsic::wasm_relaxed_trunc_signed
;
19717 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4
:
19718 IntNo
= Intrinsic::wasm_relaxed_trunc_unsigned
;
19720 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2
:
19721 IntNo
= Intrinsic::wasm_relaxed_trunc_signed_zero
;
19723 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2
:
19724 IntNo
= Intrinsic::wasm_relaxed_trunc_unsigned_zero
;
19727 llvm_unreachable("unexpected builtin ID");
19729 Function
*Callee
= CGM
.getIntrinsic(IntNo
);
19730 return Builder
.CreateCall(Callee
, {Vec
});
19732 case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8
: {
19733 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
19734 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
19735 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed
);
19736 return Builder
.CreateCall(Callee
, {LHS
, RHS
});
19738 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8
: {
19739 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
19740 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
19742 CGM
.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed
);
19743 return Builder
.CreateCall(Callee
, {LHS
, RHS
});
19745 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4
: {
19746 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
19747 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
19748 Value
*Acc
= EmitScalarExpr(E
->getArg(2));
19750 CGM
.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed
);
19751 return Builder
.CreateCall(Callee
, {LHS
, RHS
, Acc
});
19753 case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4
: {
19754 Value
*LHS
= EmitScalarExpr(E
->getArg(0));
19755 Value
*RHS
= EmitScalarExpr(E
->getArg(1));
19756 Value
*Acc
= EmitScalarExpr(E
->getArg(2));
19758 CGM
.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32
);
19759 return Builder
.CreateCall(Callee
, {LHS
, RHS
, Acc
});
19761 case WebAssembly::BI__builtin_wasm_table_get
: {
19762 assert(E
->getArg(0)->getType()->isArrayType());
19764 EmitCastToVoidPtr(EmitArrayToPointerDecay(E
->getArg(0)).getPointer());
19765 Value
*Index
= EmitScalarExpr(E
->getArg(1));
19767 if (E
->getType().isWebAssemblyExternrefType())
19768 Callee
= CGM
.getIntrinsic(Intrinsic::wasm_table_get_externref
);
19769 else if (E
->getType().isWebAssemblyFuncrefType())
19770 Callee
= CGM
.getIntrinsic(Intrinsic::wasm_table_get_funcref
);
19773 "Unexpected reference type for __builtin_wasm_table_get");
19774 return Builder
.CreateCall(Callee
, {Table
, Index
});
19776 case WebAssembly::BI__builtin_wasm_table_set
: {
19777 assert(E
->getArg(0)->getType()->isArrayType());
19779 EmitCastToVoidPtr(EmitArrayToPointerDecay(E
->getArg(0)).getPointer());
19780 Value
*Index
= EmitScalarExpr(E
->getArg(1));
19781 Value
*Val
= EmitScalarExpr(E
->getArg(2));
19783 if (E
->getArg(2)->getType().isWebAssemblyExternrefType())
19784 Callee
= CGM
.getIntrinsic(Intrinsic::wasm_table_set_externref
);
19785 else if (E
->getArg(2)->getType().isWebAssemblyFuncrefType())
19786 Callee
= CGM
.getIntrinsic(Intrinsic::wasm_table_set_funcref
);
19789 "Unexpected reference type for __builtin_wasm_table_set");
19790 return Builder
.CreateCall(Callee
, {Table
, Index
, Val
});
19792 case WebAssembly::BI__builtin_wasm_table_size
: {
19793 assert(E
->getArg(0)->getType()->isArrayType());
19795 EmitCastToVoidPtr(EmitArrayToPointerDecay(E
->getArg(0)).getPointer());
19796 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_table_size
);
19797 return Builder
.CreateCall(Callee
, Value
);
19799 case WebAssembly::BI__builtin_wasm_table_grow
: {
19800 assert(E
->getArg(0)->getType()->isArrayType());
19802 EmitCastToVoidPtr(EmitArrayToPointerDecay(E
->getArg(0)).getPointer());
19803 Value
*Val
= EmitScalarExpr(E
->getArg(1));
19804 Value
*NElems
= EmitScalarExpr(E
->getArg(2));
19807 if (E
->getArg(1)->getType().isWebAssemblyExternrefType())
19808 Callee
= CGM
.getIntrinsic(Intrinsic::wasm_table_grow_externref
);
19809 else if (E
->getArg(2)->getType().isWebAssemblyFuncrefType())
19810 Callee
= CGM
.getIntrinsic(Intrinsic::wasm_table_fill_funcref
);
19813 "Unexpected reference type for __builtin_wasm_table_grow");
19815 return Builder
.CreateCall(Callee
, {Table
, Val
, NElems
});
19817 case WebAssembly::BI__builtin_wasm_table_fill
: {
19818 assert(E
->getArg(0)->getType()->isArrayType());
19820 EmitCastToVoidPtr(EmitArrayToPointerDecay(E
->getArg(0)).getPointer());
19821 Value
*Index
= EmitScalarExpr(E
->getArg(1));
19822 Value
*Val
= EmitScalarExpr(E
->getArg(2));
19823 Value
*NElems
= EmitScalarExpr(E
->getArg(3));
19826 if (E
->getArg(2)->getType().isWebAssemblyExternrefType())
19827 Callee
= CGM
.getIntrinsic(Intrinsic::wasm_table_fill_externref
);
19828 else if (E
->getArg(2)->getType().isWebAssemblyFuncrefType())
19829 Callee
= CGM
.getIntrinsic(Intrinsic::wasm_table_fill_funcref
);
19832 "Unexpected reference type for __builtin_wasm_table_fill");
19834 return Builder
.CreateCall(Callee
, {Table
, Index
, Val
, NElems
});
19836 case WebAssembly::BI__builtin_wasm_table_copy
: {
19837 assert(E
->getArg(0)->getType()->isArrayType());
19839 EmitCastToVoidPtr(EmitArrayToPointerDecay(E
->getArg(0)).getPointer());
19841 EmitCastToVoidPtr(EmitArrayToPointerDecay(E
->getArg(1)).getPointer());
19842 Value
*DstIdx
= EmitScalarExpr(E
->getArg(2));
19843 Value
*SrcIdx
= EmitScalarExpr(E
->getArg(3));
19844 Value
*NElems
= EmitScalarExpr(E
->getArg(4));
19846 Function
*Callee
= CGM
.getIntrinsic(Intrinsic::wasm_table_copy
);
19848 return Builder
.CreateCall(Callee
, {TableX
, TableY
, SrcIdx
, DstIdx
, NElems
});
19855 static std::pair
<Intrinsic::ID
, unsigned>
19856 getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID
) {
19858 unsigned BuiltinID
;
19859 Intrinsic::ID IntrinsicID
;
19862 static Info Infos
[] = {
19863 #define CUSTOM_BUILTIN_MAPPING(x,s) \
19864 { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
19865 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci
, 0)
19866 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci
, 0)
19867 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci
, 0)
19868 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci
, 0)
19869 CUSTOM_BUILTIN_MAPPING(L2_loadri_pci
, 0)
19870 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci
, 0)
19871 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr
, 0)
19872 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr
, 0)
19873 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr
, 0)
19874 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr
, 0)
19875 CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr
, 0)
19876 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr
, 0)
19877 CUSTOM_BUILTIN_MAPPING(S2_storerb_pci
, 0)
19878 CUSTOM_BUILTIN_MAPPING(S2_storerh_pci
, 0)
19879 CUSTOM_BUILTIN_MAPPING(S2_storerf_pci
, 0)
19880 CUSTOM_BUILTIN_MAPPING(S2_storeri_pci
, 0)
19881 CUSTOM_BUILTIN_MAPPING(S2_storerd_pci
, 0)
19882 CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr
, 0)
19883 CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr
, 0)
19884 CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr
, 0)
19885 CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr
, 0)
19886 CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr
, 0)
19887 // Legacy builtins that take a vector in place of a vector predicate.
19888 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq
, 64)
19889 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq
, 64)
19890 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq
, 64)
19891 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq
, 64)
19892 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B
, 128)
19893 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B
, 128)
19894 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B
, 128)
19895 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B
, 128)
19896 #include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
19897 #undef CUSTOM_BUILTIN_MAPPING
19900 auto CmpInfo
= [] (Info A
, Info B
) { return A
.BuiltinID
< B
.BuiltinID
; };
19901 static const bool SortOnce
= (llvm::sort(Infos
, CmpInfo
), true);
19904 const Info
*F
= llvm::lower_bound(Infos
, Info
{BuiltinID
, 0, 0}, CmpInfo
);
19905 if (F
== std::end(Infos
) || F
->BuiltinID
!= BuiltinID
)
19906 return {Intrinsic::not_intrinsic
, 0};
19908 return {F
->IntrinsicID
, F
->VecLen
};
19911 Value
*CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID
,
19912 const CallExpr
*E
) {
19915 std::tie(ID
, VecLen
) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID
);
19917 auto MakeCircOp
= [this, E
](unsigned IntID
, bool IsLoad
) {
19918 // The base pointer is passed by address, so it needs to be loaded.
19919 Address A
= EmitPointerWithAlignment(E
->getArg(0));
19920 Address BP
= Address(Builder
.CreateBitCast(
19921 A
.getPointer(), Int8PtrPtrTy
), Int8PtrTy
, A
.getAlignment());
19922 llvm::Value
*Base
= Builder
.CreateLoad(BP
);
19923 // The treatment of both loads and stores is the same: the arguments for
19924 // the builtin are the same as the arguments for the intrinsic.
19926 // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)
19927 // builtin(Base, Mod, Start) -> intr(Base, Mod, Start)
19929 // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)
19930 // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start)
19931 SmallVector
<llvm::Value
*,5> Ops
= { Base
};
19932 for (unsigned i
= 1, e
= E
->getNumArgs(); i
!= e
; ++i
)
19933 Ops
.push_back(EmitScalarExpr(E
->getArg(i
)));
19935 llvm::Value
*Result
= Builder
.CreateCall(CGM
.getIntrinsic(IntID
), Ops
);
19936 // The load intrinsics generate two results (Value, NewBase), stores
19937 // generate one (NewBase). The new base address needs to be stored.
19938 llvm::Value
*NewBase
= IsLoad
? Builder
.CreateExtractValue(Result
, 1)
19940 llvm::Value
*LV
= Builder
.CreateBitCast(
19941 EmitScalarExpr(E
->getArg(0)), NewBase
->getType()->getPointerTo());
19942 Address Dest
= EmitPointerWithAlignment(E
->getArg(0));
19943 llvm::Value
*RetVal
=
19944 Builder
.CreateAlignedStore(NewBase
, LV
, Dest
.getAlignment());
19946 RetVal
= Builder
.CreateExtractValue(Result
, 0);
19950 // Handle the conversion of bit-reverse load intrinsics to bit code.
19951 // The intrinsic call after this function only reads from memory and the
19952 // write to memory is dealt by the store instruction.
19953 auto MakeBrevLd
= [this, E
](unsigned IntID
, llvm::Type
*DestTy
) {
19954 // The intrinsic generates one result, which is the new value for the base
19955 // pointer. It needs to be returned. The result of the load instruction is
19956 // passed to intrinsic by address, so the value needs to be stored.
19957 llvm::Value
*BaseAddress
=
19958 Builder
.CreateBitCast(EmitScalarExpr(E
->getArg(0)), Int8PtrTy
);
19960 // Expressions like &(*pt++) will be incremented per evaluation.
19961 // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
19963 Address DestAddr
= EmitPointerWithAlignment(E
->getArg(1));
19964 DestAddr
= Address(Builder
.CreateBitCast(DestAddr
.getPointer(), Int8PtrTy
),
19965 Int8Ty
, DestAddr
.getAlignment());
19966 llvm::Value
*DestAddress
= DestAddr
.getPointer();
19968 // Operands are Base, Dest, Modifier.
19969 // The intrinsic format in LLVM IR is defined as
19970 // { ValueType, i8* } (i8*, i32).
19971 llvm::Value
*Result
= Builder
.CreateCall(
19972 CGM
.getIntrinsic(IntID
), {BaseAddress
, EmitScalarExpr(E
->getArg(2))});
19974 // The value needs to be stored as the variable is passed by reference.
19975 llvm::Value
*DestVal
= Builder
.CreateExtractValue(Result
, 0);
19977 // The store needs to be truncated to fit the destination type.
19978 // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
19979 // to be handled with stores of respective destination type.
19980 DestVal
= Builder
.CreateTrunc(DestVal
, DestTy
);
19982 llvm::Value
*DestForStore
=
19983 Builder
.CreateBitCast(DestAddress
, DestVal
->getType()->getPointerTo());
19984 Builder
.CreateAlignedStore(DestVal
, DestForStore
, DestAddr
.getAlignment());
19985 // The updated value of the base pointer is returned.
19986 return Builder
.CreateExtractValue(Result
, 1);
19989 auto V2Q
= [this, VecLen
] (llvm::Value
*Vec
) {
19990 Intrinsic::ID ID
= VecLen
== 128 ? Intrinsic::hexagon_V6_vandvrt_128B
19991 : Intrinsic::hexagon_V6_vandvrt
;
19992 return Builder
.CreateCall(CGM
.getIntrinsic(ID
),
19993 {Vec
, Builder
.getInt32(-1)});
19995 auto Q2V
= [this, VecLen
] (llvm::Value
*Pred
) {
19996 Intrinsic::ID ID
= VecLen
== 128 ? Intrinsic::hexagon_V6_vandqrt_128B
19997 : Intrinsic::hexagon_V6_vandqrt
;
19998 return Builder
.CreateCall(CGM
.getIntrinsic(ID
),
19999 {Pred
, Builder
.getInt32(-1)});
20002 switch (BuiltinID
) {
20003 // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
20004 // and the corresponding C/C++ builtins use loads/stores to update
20006 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry
:
20007 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B
:
20008 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry
:
20009 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B
: {
20010 // Get the type from the 0-th argument.
20011 llvm::Type
*VecType
= ConvertType(E
->getArg(0)->getType());
20012 Address PredAddr
= Builder
.CreateElementBitCast(
20013 EmitPointerWithAlignment(E
->getArg(2)), VecType
);
20014 llvm::Value
*PredIn
= V2Q(Builder
.CreateLoad(PredAddr
));
20015 llvm::Value
*Result
= Builder
.CreateCall(CGM
.getIntrinsic(ID
),
20016 {EmitScalarExpr(E
->getArg(0)), EmitScalarExpr(E
->getArg(1)), PredIn
});
20018 llvm::Value
*PredOut
= Builder
.CreateExtractValue(Result
, 1);
20019 Builder
.CreateAlignedStore(Q2V(PredOut
), PredAddr
.getPointer(),
20020 PredAddr
.getAlignment());
20021 return Builder
.CreateExtractValue(Result
, 0);
20023 // These are identical to the builtins above, except they don't consume
20024 // input carry, only generate carry-out. Since they still produce two
20025 // outputs, generate the store of the predicate, but no load.
20026 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo
:
20027 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B
:
20028 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo
:
20029 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B
: {
20030 // Get the type from the 0-th argument.
20031 llvm::Type
*VecType
= ConvertType(E
->getArg(0)->getType());
20032 Address PredAddr
= Builder
.CreateElementBitCast(
20033 EmitPointerWithAlignment(E
->getArg(2)), VecType
);
20034 llvm::Value
*Result
= Builder
.CreateCall(CGM
.getIntrinsic(ID
),
20035 {EmitScalarExpr(E
->getArg(0)), EmitScalarExpr(E
->getArg(1))});
20037 llvm::Value
*PredOut
= Builder
.CreateExtractValue(Result
, 1);
20038 Builder
.CreateAlignedStore(Q2V(PredOut
), PredAddr
.getPointer(),
20039 PredAddr
.getAlignment());
20040 return Builder
.CreateExtractValue(Result
, 0);
20043 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq
:
20044 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq
:
20045 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq
:
20046 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq
:
20047 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B
:
20048 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B
:
20049 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B
:
20050 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B
: {
20051 SmallVector
<llvm::Value
*,4> Ops
;
20052 const Expr
*PredOp
= E
->getArg(0);
20053 // There will be an implicit cast to a boolean vector. Strip it.
20054 if (auto *Cast
= dyn_cast
<ImplicitCastExpr
>(PredOp
)) {
20055 if (Cast
->getCastKind() == CK_BitCast
)
20056 PredOp
= Cast
->getSubExpr();
20057 Ops
.push_back(V2Q(EmitScalarExpr(PredOp
)));
20059 for (int i
= 1, e
= E
->getNumArgs(); i
!= e
; ++i
)
20060 Ops
.push_back(EmitScalarExpr(E
->getArg(i
)));
20061 return Builder
.CreateCall(CGM
.getIntrinsic(ID
), Ops
);
20064 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci
:
20065 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci
:
20066 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci
:
20067 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci
:
20068 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci
:
20069 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci
:
20070 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr
:
20071 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr
:
20072 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr
:
20073 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr
:
20074 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr
:
20075 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr
:
20076 return MakeCircOp(ID
, /*IsLoad=*/true);
20077 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci
:
20078 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci
:
20079 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci
:
20080 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci
:
20081 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci
:
20082 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr
:
20083 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr
:
20084 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr
:
20085 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr
:
20086 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr
:
20087 return MakeCircOp(ID
, /*IsLoad=*/false);
20088 case Hexagon::BI__builtin_brev_ldub
:
20089 return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr
, Int8Ty
);
20090 case Hexagon::BI__builtin_brev_ldb
:
20091 return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr
, Int8Ty
);
20092 case Hexagon::BI__builtin_brev_lduh
:
20093 return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr
, Int16Ty
);
20094 case Hexagon::BI__builtin_brev_ldh
:
20095 return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr
, Int16Ty
);
20096 case Hexagon::BI__builtin_brev_ldw
:
20097 return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr
, Int32Ty
);
20098 case Hexagon::BI__builtin_brev_ldd
:
20099 return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr
, Int64Ty
);
20105 Value
*CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID
,
20107 ReturnValueSlot ReturnValue
) {
20108 SmallVector
<Value
*, 4> Ops
;
20109 llvm::Type
*ResultType
= ConvertType(E
->getType());
20111 // Find out if any arguments are required to be integer constant expressions.
20112 unsigned ICEArguments
= 0;
20113 ASTContext::GetBuiltinTypeError Error
;
20114 getContext().GetBuiltinType(BuiltinID
, Error
, &ICEArguments
);
20115 if (Error
== ASTContext::GE_Missing_type
) {
20116 // Vector intrinsics don't have a type string.
20117 assert(BuiltinID
>= clang::RISCV::FirstRVVBuiltin
&&
20118 BuiltinID
<= clang::RISCV::LastRVVBuiltin
);
20120 if (BuiltinID
== RISCVVector::BI__builtin_rvv_vget_v
||
20121 BuiltinID
== RISCVVector::BI__builtin_rvv_vset_v
)
20122 ICEArguments
= 1 << 1;
20124 assert(Error
== ASTContext::GE_None
&& "Unexpected error");
20127 if (BuiltinID
== RISCV::BI__builtin_riscv_ntl_load
)
20128 ICEArguments
|= (1 << 1);
20129 if (BuiltinID
== RISCV::BI__builtin_riscv_ntl_store
)
20130 ICEArguments
|= (1 << 2);
20132 for (unsigned i
= 0, e
= E
->getNumArgs(); i
!= e
; i
++) {
20133 // Handle aggregate argument, namely RVV tuple types in segment load/store
20134 if (hasAggregateEvaluationKind(E
->getArg(i
)->getType())) {
20135 LValue L
= EmitAggExprToLValue(E
->getArg(i
));
20136 llvm::Value
*AggValue
= Builder
.CreateLoad(L
.getAddress(*this));
20137 Ops
.push_back(AggValue
);
20141 // If this is a normal argument, just emit it as a scalar.
20142 if ((ICEArguments
& (1 << i
)) == 0) {
20143 Ops
.push_back(EmitScalarExpr(E
->getArg(i
)));
20147 // If this is required to be a constant, constant fold it so that we know
20148 // that the generated intrinsic gets a ConstantInt.
20149 Ops
.push_back(llvm::ConstantInt::get(
20150 getLLVMContext(), *E
->getArg(i
)->getIntegerConstantExpr(getContext())));
20153 Intrinsic::ID ID
= Intrinsic::not_intrinsic
;
20155 // The 0th bit simulates the `vta` of RVV
20156 // The 1st bit simulates the `vma` of RVV
20157 constexpr unsigned RVV_VTA
= 0x1;
20158 constexpr unsigned RVV_VMA
= 0x2;
20159 int PolicyAttrs
= 0;
20160 bool IsMasked
= false;
20162 // Required for overloaded intrinsics.
20163 llvm::SmallVector
<llvm::Type
*, 2> IntrinsicTypes
;
20164 switch (BuiltinID
) {
20165 default: llvm_unreachable("unexpected builtin ID");
20166 case RISCV::BI__builtin_riscv_orc_b_32
:
20167 case RISCV::BI__builtin_riscv_orc_b_64
:
20168 case RISCV::BI__builtin_riscv_clz_32
:
20169 case RISCV::BI__builtin_riscv_clz_64
:
20170 case RISCV::BI__builtin_riscv_ctz_32
:
20171 case RISCV::BI__builtin_riscv_ctz_64
:
20172 case RISCV::BI__builtin_riscv_clmul
:
20173 case RISCV::BI__builtin_riscv_clmulh
:
20174 case RISCV::BI__builtin_riscv_clmulr
:
20175 case RISCV::BI__builtin_riscv_xperm4
:
20176 case RISCV::BI__builtin_riscv_xperm8
:
20177 case RISCV::BI__builtin_riscv_brev8
:
20178 case RISCV::BI__builtin_riscv_zip_32
:
20179 case RISCV::BI__builtin_riscv_unzip_32
: {
20180 switch (BuiltinID
) {
20181 default: llvm_unreachable("unexpected builtin ID");
20183 case RISCV::BI__builtin_riscv_orc_b_32
:
20184 case RISCV::BI__builtin_riscv_orc_b_64
:
20185 ID
= Intrinsic::riscv_orc_b
;
20187 case RISCV::BI__builtin_riscv_clz_32
:
20188 case RISCV::BI__builtin_riscv_clz_64
: {
20189 Function
*F
= CGM
.getIntrinsic(Intrinsic::ctlz
, Ops
[0]->getType());
20190 return Builder
.CreateCall(F
, {Ops
[0], Builder
.getInt1(false)});
20192 case RISCV::BI__builtin_riscv_ctz_32
:
20193 case RISCV::BI__builtin_riscv_ctz_64
: {
20194 Function
*F
= CGM
.getIntrinsic(Intrinsic::cttz
, Ops
[0]->getType());
20195 return Builder
.CreateCall(F
, {Ops
[0], Builder
.getInt1(false)});
20199 case RISCV::BI__builtin_riscv_clmul
:
20200 ID
= Intrinsic::riscv_clmul
;
20202 case RISCV::BI__builtin_riscv_clmulh
:
20203 ID
= Intrinsic::riscv_clmulh
;
20205 case RISCV::BI__builtin_riscv_clmulr
:
20206 ID
= Intrinsic::riscv_clmulr
;
20210 case RISCV::BI__builtin_riscv_xperm8
:
20211 ID
= Intrinsic::riscv_xperm8
;
20213 case RISCV::BI__builtin_riscv_xperm4
:
20214 ID
= Intrinsic::riscv_xperm4
;
20218 case RISCV::BI__builtin_riscv_brev8
:
20219 ID
= Intrinsic::riscv_brev8
;
20221 case RISCV::BI__builtin_riscv_zip_32
:
20222 ID
= Intrinsic::riscv_zip
;
20224 case RISCV::BI__builtin_riscv_unzip_32
:
20225 ID
= Intrinsic::riscv_unzip
;
20229 IntrinsicTypes
= {ResultType
};
20236 case RISCV::BI__builtin_riscv_aes32dsi_32
:
20237 ID
= Intrinsic::riscv_aes32dsi
;
20239 case RISCV::BI__builtin_riscv_aes32dsmi_32
:
20240 ID
= Intrinsic::riscv_aes32dsmi
;
20242 case RISCV::BI__builtin_riscv_aes64ds_64
:
20243 ID
= Intrinsic::riscv_aes64ds
;
20245 case RISCV::BI__builtin_riscv_aes64dsm_64
:
20246 ID
= Intrinsic::riscv_aes64dsm
;
20248 case RISCV::BI__builtin_riscv_aes64im_64
:
20249 ID
= Intrinsic::riscv_aes64im
;
20253 case RISCV::BI__builtin_riscv_aes32esi_32
:
20254 ID
= Intrinsic::riscv_aes32esi
;
20256 case RISCV::BI__builtin_riscv_aes32esmi_32
:
20257 ID
= Intrinsic::riscv_aes32esmi
;
20259 case RISCV::BI__builtin_riscv_aes64es_64
:
20260 ID
= Intrinsic::riscv_aes64es
;
20262 case RISCV::BI__builtin_riscv_aes64esm_64
:
20263 ID
= Intrinsic::riscv_aes64esm
;
20267 case RISCV::BI__builtin_riscv_aes64ks1i_64
:
20268 ID
= Intrinsic::riscv_aes64ks1i
;
20270 case RISCV::BI__builtin_riscv_aes64ks2_64
:
20271 ID
= Intrinsic::riscv_aes64ks2
;
20275 case RISCV::BI__builtin_riscv_sha256sig0
:
20276 ID
= Intrinsic::riscv_sha256sig0
;
20277 IntrinsicTypes
= {ResultType
};
20279 case RISCV::BI__builtin_riscv_sha256sig1
:
20280 ID
= Intrinsic::riscv_sha256sig1
;
20281 IntrinsicTypes
= {ResultType
};
20283 case RISCV::BI__builtin_riscv_sha256sum0
:
20284 ID
= Intrinsic::riscv_sha256sum0
;
20285 IntrinsicTypes
= {ResultType
};
20287 case RISCV::BI__builtin_riscv_sha256sum1
:
20288 ID
= Intrinsic::riscv_sha256sum1
;
20289 IntrinsicTypes
= {ResultType
};
20291 case RISCV::BI__builtin_riscv_sha512sig0_64
:
20292 ID
= Intrinsic::riscv_sha512sig0
;
20294 case RISCV::BI__builtin_riscv_sha512sig0h_32
:
20295 ID
= Intrinsic::riscv_sha512sig0h
;
20297 case RISCV::BI__builtin_riscv_sha512sig0l_32
:
20298 ID
= Intrinsic::riscv_sha512sig0l
;
20300 case RISCV::BI__builtin_riscv_sha512sig1_64
:
20301 ID
= Intrinsic::riscv_sha512sig1
;
20303 case RISCV::BI__builtin_riscv_sha512sig1h_32
:
20304 ID
= Intrinsic::riscv_sha512sig1h
;
20306 case RISCV::BI__builtin_riscv_sha512sig1l_32
:
20307 ID
= Intrinsic::riscv_sha512sig1l
;
20309 case RISCV::BI__builtin_riscv_sha512sum0_64
:
20310 ID
= Intrinsic::riscv_sha512sum0
;
20312 case RISCV::BI__builtin_riscv_sha512sum0r_32
:
20313 ID
= Intrinsic::riscv_sha512sum0r
;
20315 case RISCV::BI__builtin_riscv_sha512sum1_64
:
20316 ID
= Intrinsic::riscv_sha512sum1
;
20318 case RISCV::BI__builtin_riscv_sha512sum1r_32
:
20319 ID
= Intrinsic::riscv_sha512sum1r
;
20323 case RISCV::BI__builtin_riscv_sm4ks
:
20324 ID
= Intrinsic::riscv_sm4ks
;
20325 IntrinsicTypes
= {ResultType
};
20327 case RISCV::BI__builtin_riscv_sm4ed
:
20328 ID
= Intrinsic::riscv_sm4ed
;
20329 IntrinsicTypes
= {ResultType
};
20333 case RISCV::BI__builtin_riscv_sm3p0
:
20334 ID
= Intrinsic::riscv_sm3p0
;
20335 IntrinsicTypes
= {ResultType
};
20337 case RISCV::BI__builtin_riscv_sm3p1
:
20338 ID
= Intrinsic::riscv_sm3p1
;
20339 IntrinsicTypes
= {ResultType
};
20343 case RISCV::BI__builtin_riscv_ntl_load
: {
20344 llvm::Type
*ResTy
= ConvertType(E
->getType());
20345 ConstantInt
*Mode
= cast
<ConstantInt
>(Ops
[1]);
20347 llvm::MDNode
*RISCVDomainNode
= llvm::MDNode::get(
20349 llvm::ConstantAsMetadata::get(Builder
.getInt32(Mode
->getZExtValue())));
20350 llvm::MDNode
*NontemporalNode
= llvm::MDNode::get(
20351 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder
.getInt32(1)));
20354 if(ResTy
->isScalableTy()) {
20355 const ScalableVectorType
*SVTy
= cast
<ScalableVectorType
>(ResTy
);
20356 llvm::Type
*ScalarTy
= ResTy
->getScalarType();
20357 Width
= ScalarTy
->getPrimitiveSizeInBits() *
20358 SVTy
->getElementCount().getKnownMinValue();
20360 Width
= ResTy
->getPrimitiveSizeInBits();
20361 LoadInst
*Load
= Builder
.CreateLoad(
20362 Address(Ops
[0], ResTy
, CharUnits::fromQuantity(Width
/ 8)));
20364 Load
->setMetadata(llvm::LLVMContext::MD_nontemporal
, NontemporalNode
);
20365 Load
->setMetadata(CGM
.getModule().getMDKindID("riscv-nontemporal-domain"),
20370 case RISCV::BI__builtin_riscv_ntl_store
: {
20371 ConstantInt
*Mode
= cast
<ConstantInt
>(Ops
[2]);
20373 llvm::MDNode
*RISCVDomainNode
= llvm::MDNode::get(
20375 llvm::ConstantAsMetadata::get(Builder
.getInt32(Mode
->getZExtValue())));
20376 llvm::MDNode
*NontemporalNode
= llvm::MDNode::get(
20377 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder
.getInt32(1)));
20379 Value
*BC
= Builder
.CreateBitCast(
20380 Ops
[0], llvm::PointerType::getUnqual(Ops
[1]->getType()), "cast");
20382 StoreInst
*Store
= Builder
.CreateDefaultAlignedStore(Ops
[1], BC
);
20383 Store
->setMetadata(llvm::LLVMContext::MD_nontemporal
, NontemporalNode
);
20384 Store
->setMetadata(CGM
.getModule().getMDKindID("riscv-nontemporal-domain"),
20390 // Vector builtins are handled from here.
20391 #include "clang/Basic/riscv_vector_builtin_cg.inc"
20392 // SiFive Vector builtins are handled from here.
20393 #include "clang/Basic/riscv_sifive_vector_builtin_cg.inc"
20396 assert(ID
!= Intrinsic::not_intrinsic
);
20398 llvm::Function
*F
= CGM
.getIntrinsic(ID
, IntrinsicTypes
);
20399 return Builder
.CreateCall(F
, Ops
, "");
20402 Value
*CodeGenFunction::EmitLoongArchBuiltinExpr(unsigned BuiltinID
,
20403 const CallExpr
*E
) {
20404 SmallVector
<Value
*, 4> Ops
;
20406 for (unsigned i
= 0, e
= E
->getNumArgs(); i
!= e
; i
++)
20407 Ops
.push_back(EmitScalarExpr(E
->getArg(i
)));
20409 Intrinsic::ID ID
= Intrinsic::not_intrinsic
;
20411 switch (BuiltinID
) {
20413 llvm_unreachable("unexpected builtin ID.");
20414 case LoongArch::BI__builtin_loongarch_cacop_d
:
20415 ID
= Intrinsic::loongarch_cacop_d
;
20417 case LoongArch::BI__builtin_loongarch_cacop_w
:
20418 ID
= Intrinsic::loongarch_cacop_w
;
20420 case LoongArch::BI__builtin_loongarch_dbar
:
20421 ID
= Intrinsic::loongarch_dbar
;
20423 case LoongArch::BI__builtin_loongarch_break
:
20424 ID
= Intrinsic::loongarch_break
;
20426 case LoongArch::BI__builtin_loongarch_ibar
:
20427 ID
= Intrinsic::loongarch_ibar
;
20429 case LoongArch::BI__builtin_loongarch_movfcsr2gr
:
20430 ID
= Intrinsic::loongarch_movfcsr2gr
;
20432 case LoongArch::BI__builtin_loongarch_movgr2fcsr
:
20433 ID
= Intrinsic::loongarch_movgr2fcsr
;
20435 case LoongArch::BI__builtin_loongarch_syscall
:
20436 ID
= Intrinsic::loongarch_syscall
;
20438 case LoongArch::BI__builtin_loongarch_crc_w_b_w
:
20439 ID
= Intrinsic::loongarch_crc_w_b_w
;
20441 case LoongArch::BI__builtin_loongarch_crc_w_h_w
:
20442 ID
= Intrinsic::loongarch_crc_w_h_w
;
20444 case LoongArch::BI__builtin_loongarch_crc_w_w_w
:
20445 ID
= Intrinsic::loongarch_crc_w_w_w
;
20447 case LoongArch::BI__builtin_loongarch_crc_w_d_w
:
20448 ID
= Intrinsic::loongarch_crc_w_d_w
;
20450 case LoongArch::BI__builtin_loongarch_crcc_w_b_w
:
20451 ID
= Intrinsic::loongarch_crcc_w_b_w
;
20453 case LoongArch::BI__builtin_loongarch_crcc_w_h_w
:
20454 ID
= Intrinsic::loongarch_crcc_w_h_w
;
20456 case LoongArch::BI__builtin_loongarch_crcc_w_w_w
:
20457 ID
= Intrinsic::loongarch_crcc_w_w_w
;
20459 case LoongArch::BI__builtin_loongarch_crcc_w_d_w
:
20460 ID
= Intrinsic::loongarch_crcc_w_d_w
;
20462 case LoongArch::BI__builtin_loongarch_csrrd_w
:
20463 ID
= Intrinsic::loongarch_csrrd_w
;
20465 case LoongArch::BI__builtin_loongarch_csrwr_w
:
20466 ID
= Intrinsic::loongarch_csrwr_w
;
20468 case LoongArch::BI__builtin_loongarch_csrxchg_w
:
20469 ID
= Intrinsic::loongarch_csrxchg_w
;
20471 case LoongArch::BI__builtin_loongarch_csrrd_d
:
20472 ID
= Intrinsic::loongarch_csrrd_d
;
20474 case LoongArch::BI__builtin_loongarch_csrwr_d
:
20475 ID
= Intrinsic::loongarch_csrwr_d
;
20477 case LoongArch::BI__builtin_loongarch_csrxchg_d
:
20478 ID
= Intrinsic::loongarch_csrxchg_d
;
20480 case LoongArch::BI__builtin_loongarch_iocsrrd_b
:
20481 ID
= Intrinsic::loongarch_iocsrrd_b
;
20483 case LoongArch::BI__builtin_loongarch_iocsrrd_h
:
20484 ID
= Intrinsic::loongarch_iocsrrd_h
;
20486 case LoongArch::BI__builtin_loongarch_iocsrrd_w
:
20487 ID
= Intrinsic::loongarch_iocsrrd_w
;
20489 case LoongArch::BI__builtin_loongarch_iocsrrd_d
:
20490 ID
= Intrinsic::loongarch_iocsrrd_d
;
20492 case LoongArch::BI__builtin_loongarch_iocsrwr_b
:
20493 ID
= Intrinsic::loongarch_iocsrwr_b
;
20495 case LoongArch::BI__builtin_loongarch_iocsrwr_h
:
20496 ID
= Intrinsic::loongarch_iocsrwr_h
;
20498 case LoongArch::BI__builtin_loongarch_iocsrwr_w
:
20499 ID
= Intrinsic::loongarch_iocsrwr_w
;
20501 case LoongArch::BI__builtin_loongarch_iocsrwr_d
:
20502 ID
= Intrinsic::loongarch_iocsrwr_d
;
20504 case LoongArch::BI__builtin_loongarch_cpucfg
:
20505 ID
= Intrinsic::loongarch_cpucfg
;
20507 case LoongArch::BI__builtin_loongarch_asrtle_d
:
20508 ID
= Intrinsic::loongarch_asrtle_d
;
20510 case LoongArch::BI__builtin_loongarch_asrtgt_d
:
20511 ID
= Intrinsic::loongarch_asrtgt_d
;
20513 case LoongArch::BI__builtin_loongarch_lddir_d
:
20514 ID
= Intrinsic::loongarch_lddir_d
;
20516 case LoongArch::BI__builtin_loongarch_ldpte_d
:
20517 ID
= Intrinsic::loongarch_ldpte_d
;
20519 // TODO: Support more Intrinsics.
20522 assert(ID
!= Intrinsic::not_intrinsic
);
20524 llvm::Function
*F
= CGM
.getIntrinsic(ID
);
20525 return Builder
.CreateCall(F
, Ops
);