1 //===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the interfaces that VE uses to lower LLVM code into a
12 //===----------------------------------------------------------------------===//
14 #include "VEISelLowering.h"
15 #include "MCTargetDesc/VEMCExpr.h"
16 #include "VEInstrBuilder.h"
17 #include "VEMachineFunctionInfo.h"
18 #include "VERegisterInfo.h"
19 #include "VETargetMachine.h"
20 #include "llvm/ADT/StringSwitch.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineJumpTableInfo.h"
26 #include "llvm/CodeGen/MachineModuleInfo.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/SelectionDAG.h"
29 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
30 #include "llvm/IR/DerivedTypes.h"
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/IRBuilder.h"
33 #include "llvm/IR/Module.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/KnownBits.h"
38 #define DEBUG_TYPE "ve-lower"
40 //===----------------------------------------------------------------------===//
41 // Calling Convention Implementation
42 //===----------------------------------------------------------------------===//
44 #include "VEGenCallingConv.inc"
46 CCAssignFn
*getReturnCC(CallingConv::ID CallConv
) {
50 case CallingConv::Fast
:
55 CCAssignFn
*getParamCC(CallingConv::ID CallConv
, bool IsVarArg
) {
61 case CallingConv::Fast
:
66 bool VETargetLowering::CanLowerReturn(
67 CallingConv::ID CallConv
, MachineFunction
&MF
, bool IsVarArg
,
68 const SmallVectorImpl
<ISD::OutputArg
> &Outs
, LLVMContext
&Context
) const {
69 CCAssignFn
*RetCC
= getReturnCC(CallConv
);
70 SmallVector
<CCValAssign
, 16> RVLocs
;
71 CCState
CCInfo(CallConv
, IsVarArg
, MF
, RVLocs
, Context
);
72 return CCInfo
.CheckReturn(Outs
, RetCC
);
75 static const MVT AllVectorVTs
[] = {MVT::v256i32
, MVT::v512i32
, MVT::v256i64
,
76 MVT::v256f32
, MVT::v512f32
, MVT::v256f64
};
78 static const MVT AllPackedVTs
[] = {MVT::v512i32
, MVT::v512f32
};
80 void VETargetLowering::initRegisterClasses() {
81 // Set up the register classes.
82 addRegisterClass(MVT::i32
, &VE::I32RegClass
);
83 addRegisterClass(MVT::i64
, &VE::I64RegClass
);
84 addRegisterClass(MVT::f32
, &VE::F32RegClass
);
85 addRegisterClass(MVT::f64
, &VE::I64RegClass
);
86 addRegisterClass(MVT::f128
, &VE::F128RegClass
);
88 if (Subtarget
->enableVPU()) {
89 for (MVT VecVT
: AllVectorVTs
)
90 addRegisterClass(VecVT
, &VE::V64RegClass
);
91 addRegisterClass(MVT::v256i1
, &VE::VMRegClass
);
92 addRegisterClass(MVT::v512i1
, &VE::VM512RegClass
);
96 void VETargetLowering::initSPUActions() {
97 const auto &TM
= getTargetMachine();
100 // VE doesn't have i1 sign extending load.
101 for (MVT VT
: MVT::integer_valuetypes()) {
102 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i1
, Promote
);
103 setLoadExtAction(ISD::ZEXTLOAD
, VT
, MVT::i1
, Promote
);
104 setLoadExtAction(ISD::EXTLOAD
, VT
, MVT::i1
, Promote
);
105 setTruncStoreAction(VT
, MVT::i1
, Expand
);
108 // VE doesn't have floating point extload/truncstore, so expand them.
109 for (MVT FPVT
: MVT::fp_valuetypes()) {
110 for (MVT OtherFPVT
: MVT::fp_valuetypes()) {
111 setLoadExtAction(ISD::EXTLOAD
, FPVT
, OtherFPVT
, Expand
);
112 setTruncStoreAction(FPVT
, OtherFPVT
, Expand
);
116 // VE doesn't have fp128 load/store, so expand them in custom lower.
117 setOperationAction(ISD::LOAD
, MVT::f128
, Custom
);
118 setOperationAction(ISD::STORE
, MVT::f128
, Custom
);
122 // Custom legalize address nodes into LO/HI parts.
123 MVT PtrVT
= MVT::getIntegerVT(TM
.getPointerSizeInBits(0));
124 setOperationAction(ISD::BlockAddress
, PtrVT
, Custom
);
125 setOperationAction(ISD::GlobalAddress
, PtrVT
, Custom
);
126 setOperationAction(ISD::GlobalTLSAddress
, PtrVT
, Custom
);
127 setOperationAction(ISD::ConstantPool
, PtrVT
, Custom
);
128 setOperationAction(ISD::JumpTable
, PtrVT
, Custom
);
131 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
132 // VAARG needs to be lowered to access with 8 bytes alignment.
133 setOperationAction(ISD::VAARG
, MVT::Other
, Custom
);
134 // Use the default implementation.
135 setOperationAction(ISD::VACOPY
, MVT::Other
, Expand
);
136 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
140 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i32
, Custom
);
141 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i64
, Custom
);
143 // Use the default implementation.
144 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Expand
);
145 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Expand
);
150 // VE doesn't have BRCOND
151 setOperationAction(ISD::BRCOND
, MVT::Other
, Expand
);
153 // BR_JT is not implemented yet.
154 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
159 for (MVT IntVT
: {MVT::i32
, MVT::i64
}) {
160 // VE has no REM or DIVREM operations.
161 setOperationAction(ISD::UREM
, IntVT
, Expand
);
162 setOperationAction(ISD::SREM
, IntVT
, Expand
);
163 setOperationAction(ISD::SDIVREM
, IntVT
, Expand
);
164 setOperationAction(ISD::UDIVREM
, IntVT
, Expand
);
166 // VE has no SHL_PARTS/SRA_PARTS/SRL_PARTS operations.
167 setOperationAction(ISD::SHL_PARTS
, IntVT
, Expand
);
168 setOperationAction(ISD::SRA_PARTS
, IntVT
, Expand
);
169 setOperationAction(ISD::SRL_PARTS
, IntVT
, Expand
);
171 // VE has no MULHU/S or U/SMUL_LOHI operations.
172 // TODO: Use MPD instruction to implement SMUL_LOHI for i32 type.
173 setOperationAction(ISD::MULHU
, IntVT
, Expand
);
174 setOperationAction(ISD::MULHS
, IntVT
, Expand
);
175 setOperationAction(ISD::UMUL_LOHI
, IntVT
, Expand
);
176 setOperationAction(ISD::SMUL_LOHI
, IntVT
, Expand
);
178 // VE has no CTTZ, ROTL, ROTR operations.
179 setOperationAction(ISD::CTTZ
, IntVT
, Expand
);
180 setOperationAction(ISD::ROTL
, IntVT
, Expand
);
181 setOperationAction(ISD::ROTR
, IntVT
, Expand
);
183 // VE has 64 bits instruction which works as i64 BSWAP operation. This
184 // instruction works fine as i32 BSWAP operation with an additional
185 // parameter. Use isel patterns to lower BSWAP.
186 setOperationAction(ISD::BSWAP
, IntVT
, Legal
);
188 // VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP
189 // operations. Use isel patterns for i64, promote for i32.
190 LegalizeAction Act
= (IntVT
== MVT::i32
) ? Promote
: Legal
;
191 setOperationAction(ISD::BITREVERSE
, IntVT
, Act
);
192 setOperationAction(ISD::CTLZ
, IntVT
, Act
);
193 setOperationAction(ISD::CTLZ_ZERO_UNDEF
, IntVT
, Act
);
194 setOperationAction(ISD::CTPOP
, IntVT
, Act
);
196 // VE has only 64 bits instructions which work as i64 AND/OR/XOR operations.
197 // Use isel patterns for i64, promote for i32.
198 setOperationAction(ISD::AND
, IntVT
, Act
);
199 setOperationAction(ISD::OR
, IntVT
, Act
);
200 setOperationAction(ISD::XOR
, IntVT
, Act
);
205 // VE doesn't have instructions for fp<->uint, so expand them by llvm
206 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Promote
); // use i64
207 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Promote
); // use i64
208 setOperationAction(ISD::FP_TO_UINT
, MVT::i64
, Expand
);
209 setOperationAction(ISD::UINT_TO_FP
, MVT::i64
, Expand
);
211 // fp16 not supported
212 for (MVT FPVT
: MVT::fp_valuetypes()) {
213 setOperationAction(ISD::FP16_TO_FP
, FPVT
, Expand
);
214 setOperationAction(ISD::FP_TO_FP16
, FPVT
, Expand
);
218 /// Floating-point Ops {
219 /// Note: Floating-point operations are fneg, fadd, fsub, fmul, fdiv, frem,
222 // VE doesn't have following floating point operations.
223 for (MVT VT
: MVT::fp_valuetypes()) {
224 setOperationAction(ISD::FNEG
, VT
, Expand
);
225 setOperationAction(ISD::FREM
, VT
, Expand
);
228 // VE doesn't have fdiv of f128.
229 setOperationAction(ISD::FDIV
, MVT::f128
, Expand
);
231 for (MVT FPVT
: {MVT::f32
, MVT::f64
}) {
232 // f32 and f64 uses ConstantFP. f128 uses ConstantPool.
233 setOperationAction(ISD::ConstantFP
, FPVT
, Legal
);
235 /// } Floating-point Ops
237 /// Floating-point math functions {
239 // VE doesn't have following floating point math functions.
240 for (MVT VT
: MVT::fp_valuetypes()) {
241 setOperationAction(ISD::FABS
, VT
, Expand
);
242 setOperationAction(ISD::FCOPYSIGN
, VT
, Expand
);
243 setOperationAction(ISD::FCOS
, VT
, Expand
);
244 setOperationAction(ISD::FSIN
, VT
, Expand
);
245 setOperationAction(ISD::FSQRT
, VT
, Expand
);
248 /// } Floating-point math functions
250 /// Atomic instructions {
252 setMaxAtomicSizeInBitsSupported(64);
253 setMinCmpXchgSizeInBits(32);
254 setSupportsUnalignedAtomics(false);
256 // Use custom inserter for ATOMIC_FENCE.
257 setOperationAction(ISD::ATOMIC_FENCE
, MVT::Other
, Custom
);
259 // Other atomic instructions.
260 for (MVT VT
: MVT::integer_valuetypes()) {
261 // Support i8/i16 atomic swap.
262 setOperationAction(ISD::ATOMIC_SWAP
, VT
, Custom
);
264 // FIXME: Support "atmam" instructions.
265 setOperationAction(ISD::ATOMIC_LOAD_ADD
, VT
, Expand
);
266 setOperationAction(ISD::ATOMIC_LOAD_SUB
, VT
, Expand
);
267 setOperationAction(ISD::ATOMIC_LOAD_AND
, VT
, Expand
);
268 setOperationAction(ISD::ATOMIC_LOAD_OR
, VT
, Expand
);
270 // VE doesn't have follwing instructions.
271 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS
, VT
, Expand
);
272 setOperationAction(ISD::ATOMIC_LOAD_CLR
, VT
, Expand
);
273 setOperationAction(ISD::ATOMIC_LOAD_XOR
, VT
, Expand
);
274 setOperationAction(ISD::ATOMIC_LOAD_NAND
, VT
, Expand
);
275 setOperationAction(ISD::ATOMIC_LOAD_MIN
, VT
, Expand
);
276 setOperationAction(ISD::ATOMIC_LOAD_MAX
, VT
, Expand
);
277 setOperationAction(ISD::ATOMIC_LOAD_UMIN
, VT
, Expand
);
278 setOperationAction(ISD::ATOMIC_LOAD_UMAX
, VT
, Expand
);
281 /// } Atomic instructions
283 /// SJLJ instructions {
284 setOperationAction(ISD::EH_SJLJ_LONGJMP
, MVT::Other
, Custom
);
285 setOperationAction(ISD::EH_SJLJ_SETJMP
, MVT::i32
, Custom
);
286 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH
, MVT::Other
, Custom
);
287 if (TM
.Options
.ExceptionModel
== ExceptionHandling::SjLj
)
288 setLibcallName(RTLIB::UNWIND_RESUME
, "_Unwind_SjLj_Resume");
289 /// } SJLJ instructions
291 // Intrinsic instructions
292 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
295 void VETargetLowering::initVPUActions() {
296 for (MVT LegalVecVT
: AllVectorVTs
) {
297 setOperationAction(ISD::BUILD_VECTOR
, LegalVecVT
, Custom
);
298 setOperationAction(ISD::INSERT_VECTOR_ELT
, LegalVecVT
, Legal
);
299 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, LegalVecVT
, Legal
);
300 // Translate all vector instructions with legal element types to VVP_*
302 // TODO We will custom-widen into VVP_* nodes in the future. While we are
303 // buildling the infrastructure for this, we only do this for legal vector
305 #define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME) \
306 setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);
307 #define ADD_VVP_OP(VVP_NAME, ISD_NAME) \
308 setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
309 #include "VVPNodes.def"
312 for (MVT LegalPackedVT
: AllPackedVTs
) {
313 setOperationAction(ISD::INSERT_VECTOR_ELT
, LegalPackedVT
, Custom
);
314 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, LegalPackedVT
, Custom
);
319 VETargetLowering::LowerReturn(SDValue Chain
, CallingConv::ID CallConv
,
321 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
322 const SmallVectorImpl
<SDValue
> &OutVals
,
323 const SDLoc
&DL
, SelectionDAG
&DAG
) const {
324 // CCValAssign - represent the assignment of the return value to locations.
325 SmallVector
<CCValAssign
, 16> RVLocs
;
327 // CCState - Info about the registers and stack slot.
328 CCState
CCInfo(CallConv
, IsVarArg
, DAG
.getMachineFunction(), RVLocs
,
331 // Analyze return values.
332 CCInfo
.AnalyzeReturn(Outs
, getReturnCC(CallConv
));
335 SmallVector
<SDValue
, 4> RetOps(1, Chain
);
337 // Copy the result values into the output registers.
338 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
339 CCValAssign
&VA
= RVLocs
[i
];
340 assert(VA
.isRegLoc() && "Can only return in registers!");
341 assert(!VA
.needsCustom() && "Unexpected custom lowering");
342 SDValue OutVal
= OutVals
[i
];
344 // Integer return values must be sign or zero extended by the callee.
345 switch (VA
.getLocInfo()) {
346 case CCValAssign::Full
:
348 case CCValAssign::SExt
:
349 OutVal
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, VA
.getLocVT(), OutVal
);
351 case CCValAssign::ZExt
:
352 OutVal
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, VA
.getLocVT(), OutVal
);
354 case CCValAssign::AExt
:
355 OutVal
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, VA
.getLocVT(), OutVal
);
357 case CCValAssign::BCvt
: {
358 // Convert a float return value to i64 with padding.
363 assert(VA
.getLocVT() == MVT::i64
);
364 assert(VA
.getValVT() == MVT::f32
);
365 SDValue Undef
= SDValue(
366 DAG
.getMachineNode(TargetOpcode::IMPLICIT_DEF
, DL
, MVT::i64
), 0);
367 SDValue Sub_f32
= DAG
.getTargetConstant(VE::sub_f32
, DL
, MVT::i32
);
368 OutVal
= SDValue(DAG
.getMachineNode(TargetOpcode::INSERT_SUBREG
, DL
,
369 MVT::i64
, Undef
, OutVal
, Sub_f32
),
374 llvm_unreachable("Unknown loc info!");
377 Chain
= DAG
.getCopyToReg(Chain
, DL
, VA
.getLocReg(), OutVal
, Flag
);
379 // Guarantee that all emitted copies are stuck together with flags.
380 Flag
= Chain
.getValue(1);
381 RetOps
.push_back(DAG
.getRegister(VA
.getLocReg(), VA
.getLocVT()));
384 RetOps
[0] = Chain
; // Update chain.
386 // Add the flag if we have it.
388 RetOps
.push_back(Flag
);
390 return DAG
.getNode(VEISD::RET_FLAG
, DL
, MVT::Other
, RetOps
);
393 SDValue
VETargetLowering::LowerFormalArguments(
394 SDValue Chain
, CallingConv::ID CallConv
, bool IsVarArg
,
395 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&DL
,
396 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
397 MachineFunction
&MF
= DAG
.getMachineFunction();
399 // Get the base offset of the incoming arguments stack space.
400 unsigned ArgsBaseOffset
= Subtarget
->getRsaSize();
401 // Get the size of the preserved arguments area
402 unsigned ArgsPreserved
= 64;
404 // Analyze arguments according to CC_VE.
405 SmallVector
<CCValAssign
, 16> ArgLocs
;
406 CCState
CCInfo(CallConv
, IsVarArg
, DAG
.getMachineFunction(), ArgLocs
,
408 // Allocate the preserved area first.
409 CCInfo
.AllocateStack(ArgsPreserved
, Align(8));
410 // We already allocated the preserved area, so the stack offset computed
411 // by CC_VE would be correct now.
412 CCInfo
.AnalyzeFormalArguments(Ins
, getParamCC(CallConv
, false));
414 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
415 CCValAssign
&VA
= ArgLocs
[i
];
416 assert(!VA
.needsCustom() && "Unexpected custom lowering");
418 // This argument is passed in a register.
419 // All integer register arguments are promoted by the caller to i64.
421 // Create a virtual register for the promoted live-in value.
423 MF
.addLiveIn(VA
.getLocReg(), getRegClassFor(VA
.getLocVT()));
424 SDValue Arg
= DAG
.getCopyFromReg(Chain
, DL
, VReg
, VA
.getLocVT());
426 // The caller promoted the argument, so insert an Assert?ext SDNode so we
427 // won't promote the value again in this function.
428 switch (VA
.getLocInfo()) {
429 case CCValAssign::SExt
:
430 Arg
= DAG
.getNode(ISD::AssertSext
, DL
, VA
.getLocVT(), Arg
,
431 DAG
.getValueType(VA
.getValVT()));
433 case CCValAssign::ZExt
:
434 Arg
= DAG
.getNode(ISD::AssertZext
, DL
, VA
.getLocVT(), Arg
,
435 DAG
.getValueType(VA
.getValVT()));
437 case CCValAssign::BCvt
: {
438 // Extract a float argument from i64 with padding.
443 assert(VA
.getLocVT() == MVT::i64
);
444 assert(VA
.getValVT() == MVT::f32
);
445 SDValue Sub_f32
= DAG
.getTargetConstant(VE::sub_f32
, DL
, MVT::i32
);
446 Arg
= SDValue(DAG
.getMachineNode(TargetOpcode::EXTRACT_SUBREG
, DL
,
447 MVT::f32
, Arg
, Sub_f32
),
455 // Truncate the register down to the argument type.
457 Arg
= DAG
.getNode(ISD::TRUNCATE
, DL
, VA
.getValVT(), Arg
);
459 InVals
.push_back(Arg
);
463 // The registers are exhausted. This argument was passed on the stack.
464 assert(VA
.isMemLoc());
465 // The CC_VE_Full/Half functions compute stack offsets relative to the
466 // beginning of the arguments area at %fp + the size of reserved area.
467 unsigned Offset
= VA
.getLocMemOffset() + ArgsBaseOffset
;
468 unsigned ValSize
= VA
.getValVT().getSizeInBits() / 8;
470 // Adjust offset for a float argument by adding 4 since the argument is
471 // stored in 8 bytes buffer with offset like below. LLVM generates
472 // 4 bytes load instruction, so need to adjust offset here. This
473 // adjustment is required in only LowerFormalArguments. In LowerCall,
474 // a float argument is converted to i64 first, and stored as 8 bytes
475 // data, which is required by ABI, so no need for adjustment.
480 if (VA
.getValVT() == MVT::f32
)
483 int FI
= MF
.getFrameInfo().CreateFixedObject(ValSize
, Offset
, true);
485 DAG
.getLoad(VA
.getValVT(), DL
, Chain
,
486 DAG
.getFrameIndex(FI
, getPointerTy(MF
.getDataLayout())),
487 MachinePointerInfo::getFixedStack(MF
, FI
)));
493 // This function takes variable arguments, some of which may have been passed
494 // in registers %s0-%s8.
496 // The va_start intrinsic needs to know the offset to the first variable
498 // TODO: need to calculate offset correctly once we support f128.
499 unsigned ArgOffset
= ArgLocs
.size() * 8;
500 VEMachineFunctionInfo
*FuncInfo
= MF
.getInfo
<VEMachineFunctionInfo
>();
501 // Skip the reserved area at the top of stack.
502 FuncInfo
->setVarArgsFrameOffset(ArgOffset
+ ArgsBaseOffset
);
507 // FIXME? Maybe this could be a TableGen attribute on some registers and
508 // this table could be generated automatically from RegInfo.
509 Register
VETargetLowering::getRegisterByName(const char *RegName
, LLT VT
,
510 const MachineFunction
&MF
) const {
511 Register Reg
= StringSwitch
<Register
>(RegName
)
512 .Case("sp", VE::SX11
) // Stack pointer
513 .Case("fp", VE::SX9
) // Frame pointer
514 .Case("sl", VE::SX8
) // Stack limit
515 .Case("lr", VE::SX10
) // Link register
516 .Case("tp", VE::SX14
) // Thread pointer
517 .Case("outer", VE::SX12
) // Outer regiser
518 .Case("info", VE::SX17
) // Info area register
519 .Case("got", VE::SX15
) // Global offset table register
520 .Case("plt", VE::SX16
) // Procedure linkage table register
526 report_fatal_error("Invalid register name global variable");
529 //===----------------------------------------------------------------------===//
530 // TargetLowering Implementation
531 //===----------------------------------------------------------------------===//
533 SDValue
VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo
&CLI
,
534 SmallVectorImpl
<SDValue
> &InVals
) const {
535 SelectionDAG
&DAG
= CLI
.DAG
;
537 SDValue Chain
= CLI
.Chain
;
538 auto PtrVT
= getPointerTy(DAG
.getDataLayout());
540 // VE target does not yet support tail call optimization.
541 CLI
.IsTailCall
= false;
543 // Get the base offset of the outgoing arguments stack space.
544 unsigned ArgsBaseOffset
= Subtarget
->getRsaSize();
545 // Get the size of the preserved arguments area
546 unsigned ArgsPreserved
= 8 * 8u;
548 // Analyze operands of the call, assigning locations to each operand.
549 SmallVector
<CCValAssign
, 16> ArgLocs
;
550 CCState
CCInfo(CLI
.CallConv
, CLI
.IsVarArg
, DAG
.getMachineFunction(), ArgLocs
,
552 // Allocate the preserved area first.
553 CCInfo
.AllocateStack(ArgsPreserved
, Align(8));
554 // We already allocated the preserved area, so the stack offset computed
555 // by CC_VE would be correct now.
556 CCInfo
.AnalyzeCallOperands(CLI
.Outs
, getParamCC(CLI
.CallConv
, false));
558 // VE requires to use both register and stack for varargs or no-prototyped
560 bool UseBoth
= CLI
.IsVarArg
;
562 // Analyze operands again if it is required to store BOTH.
563 SmallVector
<CCValAssign
, 16> ArgLocs2
;
564 CCState
CCInfo2(CLI
.CallConv
, CLI
.IsVarArg
, DAG
.getMachineFunction(),
565 ArgLocs2
, *DAG
.getContext());
567 CCInfo2
.AnalyzeCallOperands(CLI
.Outs
, getParamCC(CLI
.CallConv
, true));
569 // Get the size of the outgoing arguments stack space requirement.
570 unsigned ArgsSize
= CCInfo
.getNextStackOffset();
572 // Keep stack frames 16-byte aligned.
573 ArgsSize
= alignTo(ArgsSize
, 16);
575 // Adjust the stack pointer to make room for the arguments.
576 // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
577 // with more than 6 arguments.
578 Chain
= DAG
.getCALLSEQ_START(Chain
, ArgsSize
, 0, DL
);
580 // Collect the set of registers to pass to the function and their values.
581 // This will be emitted as a sequence of CopyToReg nodes glued to the call
583 SmallVector
<std::pair
<unsigned, SDValue
>, 8> RegsToPass
;
585 // Collect chains from all the memory opeations that copy arguments to the
586 // stack. They must follow the stack pointer adjustment above and precede the
587 // call instruction itself.
588 SmallVector
<SDValue
, 8> MemOpChains
;
590 // VE needs to get address of callee function in a register
591 // So, prepare to copy it to SX12 here.
593 // If the callee is a GlobalAddress node (quite common, every direct call is)
594 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
595 // Likewise ExternalSymbol -> TargetExternalSymbol.
596 SDValue Callee
= CLI
.Callee
;
598 bool IsPICCall
= isPositionIndependent();
600 // PC-relative references to external symbols should go through $stub.
601 // If so, we need to prepare GlobalBaseReg first.
602 const TargetMachine
&TM
= DAG
.getTarget();
603 const Module
*Mod
= DAG
.getMachineFunction().getFunction().getParent();
604 const GlobalValue
*GV
= nullptr;
605 auto *CalleeG
= dyn_cast
<GlobalAddressSDNode
>(Callee
);
607 GV
= CalleeG
->getGlobal();
608 bool Local
= TM
.shouldAssumeDSOLocal(*Mod
, GV
);
609 bool UsePlt
= !Local
;
610 MachineFunction
&MF
= DAG
.getMachineFunction();
612 // Turn GlobalAddress/ExternalSymbol node into a value node
613 // containing the address of them here.
617 Subtarget
->getInstrInfo()->getGlobalBaseReg(&MF
);
618 Callee
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, 0, 0);
619 Callee
= DAG
.getNode(VEISD::GETFUNPLT
, DL
, PtrVT
, Callee
);
622 makeHiLoPair(Callee
, VEMCExpr::VK_VE_HI32
, VEMCExpr::VK_VE_LO32
, DAG
);
624 } else if (ExternalSymbolSDNode
*E
= dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
627 Subtarget
->getInstrInfo()->getGlobalBaseReg(&MF
);
628 Callee
= DAG
.getTargetExternalSymbol(E
->getSymbol(), PtrVT
, 0);
629 Callee
= DAG
.getNode(VEISD::GETFUNPLT
, DL
, PtrVT
, Callee
);
632 makeHiLoPair(Callee
, VEMCExpr::VK_VE_HI32
, VEMCExpr::VK_VE_LO32
, DAG
);
636 RegsToPass
.push_back(std::make_pair(VE::SX12
, Callee
));
638 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
639 CCValAssign
&VA
= ArgLocs
[i
];
640 SDValue Arg
= CLI
.OutVals
[i
];
642 // Promote the value if needed.
643 switch (VA
.getLocInfo()) {
645 llvm_unreachable("Unknown location info!");
646 case CCValAssign::Full
:
648 case CCValAssign::SExt
:
649 Arg
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, VA
.getLocVT(), Arg
);
651 case CCValAssign::ZExt
:
652 Arg
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, VA
.getLocVT(), Arg
);
654 case CCValAssign::AExt
:
655 Arg
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, VA
.getLocVT(), Arg
);
657 case CCValAssign::BCvt
: {
658 // Convert a float argument to i64 with padding.
663 assert(VA
.getLocVT() == MVT::i64
);
664 assert(VA
.getValVT() == MVT::f32
);
665 SDValue Undef
= SDValue(
666 DAG
.getMachineNode(TargetOpcode::IMPLICIT_DEF
, DL
, MVT::i64
), 0);
667 SDValue Sub_f32
= DAG
.getTargetConstant(VE::sub_f32
, DL
, MVT::i32
);
668 Arg
= SDValue(DAG
.getMachineNode(TargetOpcode::INSERT_SUBREG
, DL
,
669 MVT::i64
, Undef
, Arg
, Sub_f32
),
676 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), Arg
));
682 assert(VA
.isMemLoc());
684 // Create a store off the stack pointer for this argument.
685 SDValue StackPtr
= DAG
.getRegister(VE::SX11
, PtrVT
);
686 // The argument area starts at %fp/%sp + the size of reserved area.
688 DAG
.getIntPtrConstant(VA
.getLocMemOffset() + ArgsBaseOffset
, DL
);
689 PtrOff
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, StackPtr
, PtrOff
);
690 MemOpChains
.push_back(
691 DAG
.getStore(Chain
, DL
, Arg
, PtrOff
, MachinePointerInfo()));
694 // Emit all stores, make sure they occur before the call.
695 if (!MemOpChains
.empty())
696 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, MemOpChains
);
698 // Build a sequence of CopyToReg nodes glued together with token chain and
699 // glue operands which copy the outgoing args into registers. The InGlue is
700 // necessary since all emitted instructions must be stuck together in order
701 // to pass the live physical registers.
703 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
704 Chain
= DAG
.getCopyToReg(Chain
, DL
, RegsToPass
[i
].first
,
705 RegsToPass
[i
].second
, InGlue
);
706 InGlue
= Chain
.getValue(1);
709 // Build the operands for the call instruction itself.
710 SmallVector
<SDValue
, 8> Ops
;
711 Ops
.push_back(Chain
);
712 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
)
713 Ops
.push_back(DAG
.getRegister(RegsToPass
[i
].first
,
714 RegsToPass
[i
].second
.getValueType()));
716 // Add a register mask operand representing the call-preserved registers.
717 const VERegisterInfo
*TRI
= Subtarget
->getRegisterInfo();
718 const uint32_t *Mask
=
719 TRI
->getCallPreservedMask(DAG
.getMachineFunction(), CLI
.CallConv
);
720 assert(Mask
&& "Missing call preserved mask for calling convention");
721 Ops
.push_back(DAG
.getRegisterMask(Mask
));
723 // Make sure the CopyToReg nodes are glued to the call instruction which
724 // consumes the registers.
725 if (InGlue
.getNode())
726 Ops
.push_back(InGlue
);
728 // Now the call itself.
729 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
730 Chain
= DAG
.getNode(VEISD::CALL
, DL
, NodeTys
, Ops
);
731 InGlue
= Chain
.getValue(1);
733 // Revert the stack pointer immediately after the call.
734 Chain
= DAG
.getCALLSEQ_END(Chain
, DAG
.getIntPtrConstant(ArgsSize
, DL
, true),
735 DAG
.getIntPtrConstant(0, DL
, true), InGlue
, DL
);
736 InGlue
= Chain
.getValue(1);
738 // Now extract the return values. This is more or less the same as
739 // LowerFormalArguments.
741 // Assign locations to each value returned by this call.
742 SmallVector
<CCValAssign
, 16> RVLocs
;
743 CCState
RVInfo(CLI
.CallConv
, CLI
.IsVarArg
, DAG
.getMachineFunction(), RVLocs
,
746 // Set inreg flag manually for codegen generated library calls that
748 if (CLI
.Ins
.size() == 1 && CLI
.Ins
[0].VT
== MVT::f32
&& !CLI
.CB
)
749 CLI
.Ins
[0].Flags
.setInReg();
751 RVInfo
.AnalyzeCallResult(CLI
.Ins
, getReturnCC(CLI
.CallConv
));
753 // Copy all of the result registers out of their specified physreg.
754 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
755 CCValAssign
&VA
= RVLocs
[i
];
756 assert(!VA
.needsCustom() && "Unexpected custom lowering");
757 unsigned Reg
= VA
.getLocReg();
759 // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
760 // reside in the same register in the high and low bits. Reuse the
761 // CopyFromReg previous node to avoid duplicate copies.
763 if (RegisterSDNode
*SrcReg
= dyn_cast
<RegisterSDNode
>(Chain
.getOperand(1)))
764 if (SrcReg
->getReg() == Reg
&& Chain
->getOpcode() == ISD::CopyFromReg
)
765 RV
= Chain
.getValue(0);
767 // But usually we'll create a new CopyFromReg for a different register.
769 RV
= DAG
.getCopyFromReg(Chain
, DL
, Reg
, RVLocs
[i
].getLocVT(), InGlue
);
770 Chain
= RV
.getValue(1);
771 InGlue
= Chain
.getValue(2);
774 // The callee promoted the return value, so insert an Assert?ext SDNode so
775 // we won't promote the value again in this function.
776 switch (VA
.getLocInfo()) {
777 case CCValAssign::SExt
:
778 RV
= DAG
.getNode(ISD::AssertSext
, DL
, VA
.getLocVT(), RV
,
779 DAG
.getValueType(VA
.getValVT()));
781 case CCValAssign::ZExt
:
782 RV
= DAG
.getNode(ISD::AssertZext
, DL
, VA
.getLocVT(), RV
,
783 DAG
.getValueType(VA
.getValVT()));
785 case CCValAssign::BCvt
: {
786 // Extract a float return value from i64 with padding.
791 assert(VA
.getLocVT() == MVT::i64
);
792 assert(VA
.getValVT() == MVT::f32
);
793 SDValue Sub_f32
= DAG
.getTargetConstant(VE::sub_f32
, DL
, MVT::i32
);
794 RV
= SDValue(DAG
.getMachineNode(TargetOpcode::EXTRACT_SUBREG
, DL
,
795 MVT::f32
, RV
, Sub_f32
),
803 // Truncate the register down to the return value type.
805 RV
= DAG
.getNode(ISD::TRUNCATE
, DL
, VA
.getValVT(), RV
);
807 InVals
.push_back(RV
);
813 bool VETargetLowering::isOffsetFoldingLegal(
814 const GlobalAddressSDNode
*GA
) const {
815 // VE uses 64 bit addressing, so we need multiple instructions to generate
816 // an address. Folding address with offset increases the number of
817 // instructions, so that we disable it here. Offsets will be folded in
818 // the DAG combine later if it worth to do so.
822 /// isFPImmLegal - Returns true if the target can instruction select the
823 /// specified FP immediate natively. If false, the legalizer will
824 /// materialize the FP immediate as a load from a constant pool.
825 bool VETargetLowering::isFPImmLegal(const APFloat
&Imm
, EVT VT
,
826 bool ForCodeSize
) const {
827 return VT
== MVT::f32
|| VT
== MVT::f64
;
830 /// Determine if the target supports unaligned memory accesses.
832 /// This function returns true if the target allows unaligned memory accesses
833 /// of the specified type in the given address space. If true, it also returns
834 /// whether the unaligned memory access is "fast" in the last argument by
835 /// reference. This is used, for example, in situations where an array
836 /// copy/move/set is converted to a sequence of store operations. Its use
837 /// helps to ensure that such replacements don't generate code that causes an
838 /// alignment error (trap) on the target machine.
839 bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT
,
842 MachineMemOperand::Flags
,
845 // It's fast anytime on VE
851 VETargetLowering::VETargetLowering(const TargetMachine
&TM
,
852 const VESubtarget
&STI
)
853 : TargetLowering(TM
), Subtarget(&STI
) {
854 // Instructions which use registers as conditionals examine all the
855 // bits (as does the pseudo SELECT_CC expansion). I don't think it
856 // matters much whether it's ZeroOrOneBooleanContent, or
857 // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
859 setBooleanContents(ZeroOrOneBooleanContent
);
860 setBooleanVectorContents(ZeroOrOneBooleanContent
);
862 initRegisterClasses();
866 setStackPointerRegisterToSaveRestore(VE::SX11
);
868 // We have target-specific dag combine patterns for the following nodes:
869 setTargetDAGCombine(ISD::TRUNCATE
);
871 // Set function alignment to 16 bytes
872 setMinFunctionAlignment(Align(16));
874 // VE stores all argument by 8 bytes alignment
875 setMinStackArgumentAlignment(Align(8));
877 computeRegisterProperties(Subtarget
->getRegisterInfo());
880 const char *VETargetLowering::getTargetNodeName(unsigned Opcode
) const {
881 #define TARGET_NODE_CASE(NAME) \
883 return "VEISD::" #NAME;
884 switch ((VEISD::NodeType
)Opcode
) {
885 case VEISD::FIRST_NUMBER
:
887 TARGET_NODE_CASE(CALL
)
888 TARGET_NODE_CASE(EH_SJLJ_LONGJMP
)
889 TARGET_NODE_CASE(EH_SJLJ_SETJMP
)
890 TARGET_NODE_CASE(EH_SJLJ_SETUP_DISPATCH
)
891 TARGET_NODE_CASE(GETFUNPLT
)
892 TARGET_NODE_CASE(GETSTACKTOP
)
893 TARGET_NODE_CASE(GETTLSADDR
)
894 TARGET_NODE_CASE(GLOBAL_BASE_REG
)
897 TARGET_NODE_CASE(MEMBARRIER
)
898 TARGET_NODE_CASE(RET_FLAG
)
899 TARGET_NODE_CASE(TS1AM
)
900 TARGET_NODE_CASE(VEC_BROADCAST
)
902 // Register the VVP_* SDNodes.
903 #define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
904 #include "VVPNodes.def"
906 #undef TARGET_NODE_CASE
910 EVT
VETargetLowering::getSetCCResultType(const DataLayout
&, LLVMContext
&,
915 // Convert to a target node and set target flags.
916 SDValue
VETargetLowering::withTargetFlags(SDValue Op
, unsigned TF
,
917 SelectionDAG
&DAG
) const {
918 if (const GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(Op
))
919 return DAG
.getTargetGlobalAddress(GA
->getGlobal(), SDLoc(GA
),
920 GA
->getValueType(0), GA
->getOffset(), TF
);
922 if (const BlockAddressSDNode
*BA
= dyn_cast
<BlockAddressSDNode
>(Op
))
923 return DAG
.getTargetBlockAddress(BA
->getBlockAddress(), Op
.getValueType(),
926 if (const ConstantPoolSDNode
*CP
= dyn_cast
<ConstantPoolSDNode
>(Op
))
927 return DAG
.getTargetConstantPool(CP
->getConstVal(), CP
->getValueType(0),
928 CP
->getAlign(), CP
->getOffset(), TF
);
930 if (const ExternalSymbolSDNode
*ES
= dyn_cast
<ExternalSymbolSDNode
>(Op
))
931 return DAG
.getTargetExternalSymbol(ES
->getSymbol(), ES
->getValueType(0),
934 if (const JumpTableSDNode
*JT
= dyn_cast
<JumpTableSDNode
>(Op
))
935 return DAG
.getTargetJumpTable(JT
->getIndex(), JT
->getValueType(0), TF
);
937 llvm_unreachable("Unhandled address SDNode");
940 // Split Op into high and low parts according to HiTF and LoTF.
941 // Return an ADD node combining the parts.
942 SDValue
VETargetLowering::makeHiLoPair(SDValue Op
, unsigned HiTF
, unsigned LoTF
,
943 SelectionDAG
&DAG
) const {
945 EVT VT
= Op
.getValueType();
946 SDValue Hi
= DAG
.getNode(VEISD::Hi
, DL
, VT
, withTargetFlags(Op
, HiTF
, DAG
));
947 SDValue Lo
= DAG
.getNode(VEISD::Lo
, DL
, VT
, withTargetFlags(Op
, LoTF
, DAG
));
948 return DAG
.getNode(ISD::ADD
, DL
, VT
, Hi
, Lo
);
951 // Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
952 // or ExternalSymbol SDNode.
953 SDValue
VETargetLowering::makeAddress(SDValue Op
, SelectionDAG
&DAG
) const {
955 EVT PtrVT
= Op
.getValueType();
957 // Handle PIC mode first. VE needs a got load for every variable!
958 if (isPositionIndependent()) {
959 auto GlobalN
= dyn_cast
<GlobalAddressSDNode
>(Op
);
961 if (isa
<ConstantPoolSDNode
>(Op
) || isa
<JumpTableSDNode
>(Op
) ||
962 (GlobalN
&& GlobalN
->getGlobal()->hasLocalLinkage())) {
963 // Create following instructions for local linkage PIC code.
964 // lea %reg, label@gotoff_lo
965 // and %reg, %reg, (32)0
966 // lea.sl %reg, label@gotoff_hi(%reg, %got)
967 SDValue HiLo
= makeHiLoPair(Op
, VEMCExpr::VK_VE_GOTOFF_HI32
,
968 VEMCExpr::VK_VE_GOTOFF_LO32
, DAG
);
969 SDValue GlobalBase
= DAG
.getNode(VEISD::GLOBAL_BASE_REG
, DL
, PtrVT
);
970 return DAG
.getNode(ISD::ADD
, DL
, PtrVT
, GlobalBase
, HiLo
);
972 // Create following instructions for not local linkage PIC code.
973 // lea %reg, label@got_lo
974 // and %reg, %reg, (32)0
975 // lea.sl %reg, label@got_hi(%reg)
976 // ld %reg, (%reg, %got)
977 SDValue HiLo
= makeHiLoPair(Op
, VEMCExpr::VK_VE_GOT_HI32
,
978 VEMCExpr::VK_VE_GOT_LO32
, DAG
);
979 SDValue GlobalBase
= DAG
.getNode(VEISD::GLOBAL_BASE_REG
, DL
, PtrVT
);
980 SDValue AbsAddr
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, GlobalBase
, HiLo
);
981 return DAG
.getLoad(PtrVT
, DL
, DAG
.getEntryNode(), AbsAddr
,
982 MachinePointerInfo::getGOT(DAG
.getMachineFunction()));
985 // This is one of the absolute code models.
986 switch (getTargetMachine().getCodeModel()) {
988 llvm_unreachable("Unsupported absolute code model");
989 case CodeModel::Small
:
990 case CodeModel::Medium
:
991 case CodeModel::Large
:
993 return makeHiLoPair(Op
, VEMCExpr::VK_VE_HI32
, VEMCExpr::VK_VE_LO32
, DAG
);
999 // The mappings for emitLeading/TrailingFence for VE is designed by following
1000 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
1001 Instruction
*VETargetLowering::emitLeadingFence(IRBuilderBase
&Builder
,
1003 AtomicOrdering Ord
) const {
1005 case AtomicOrdering::NotAtomic
:
1006 case AtomicOrdering::Unordered
:
1007 llvm_unreachable("Invalid fence: unordered/non-atomic");
1008 case AtomicOrdering::Monotonic
:
1009 case AtomicOrdering::Acquire
:
1010 return nullptr; // Nothing to do
1011 case AtomicOrdering::Release
:
1012 case AtomicOrdering::AcquireRelease
:
1013 return Builder
.CreateFence(AtomicOrdering::Release
);
1014 case AtomicOrdering::SequentiallyConsistent
:
1015 if (!Inst
->hasAtomicStore())
1016 return nullptr; // Nothing to do
1017 return Builder
.CreateFence(AtomicOrdering::SequentiallyConsistent
);
1019 llvm_unreachable("Unknown fence ordering in emitLeadingFence");
1022 Instruction
*VETargetLowering::emitTrailingFence(IRBuilderBase
&Builder
,
1024 AtomicOrdering Ord
) const {
1026 case AtomicOrdering::NotAtomic
:
1027 case AtomicOrdering::Unordered
:
1028 llvm_unreachable("Invalid fence: unordered/not-atomic");
1029 case AtomicOrdering::Monotonic
:
1030 case AtomicOrdering::Release
:
1031 return nullptr; // Nothing to do
1032 case AtomicOrdering::Acquire
:
1033 case AtomicOrdering::AcquireRelease
:
1034 return Builder
.CreateFence(AtomicOrdering::Acquire
);
1035 case AtomicOrdering::SequentiallyConsistent
:
1036 return Builder
.CreateFence(AtomicOrdering::SequentiallyConsistent
);
1038 llvm_unreachable("Unknown fence ordering in emitTrailingFence");
1041 SDValue
VETargetLowering::lowerATOMIC_FENCE(SDValue Op
,
1042 SelectionDAG
&DAG
) const {
1044 AtomicOrdering FenceOrdering
= static_cast<AtomicOrdering
>(
1045 cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue());
1046 SyncScope::ID FenceSSID
= static_cast<SyncScope::ID
>(
1047 cast
<ConstantSDNode
>(Op
.getOperand(2))->getZExtValue());
1049 // VE uses Release consistency, so need a fence instruction if it is a
1050 // cross-thread fence.
1051 if (FenceSSID
== SyncScope::System
) {
1052 switch (FenceOrdering
) {
1053 case AtomicOrdering::NotAtomic
:
1054 case AtomicOrdering::Unordered
:
1055 case AtomicOrdering::Monotonic
:
1056 // No need to generate fencem instruction here.
1058 case AtomicOrdering::Acquire
:
1059 // Generate "fencem 2" as acquire fence.
1060 return SDValue(DAG
.getMachineNode(VE::FENCEM
, DL
, MVT::Other
,
1061 DAG
.getTargetConstant(2, DL
, MVT::i32
),
1064 case AtomicOrdering::Release
:
1065 // Generate "fencem 1" as release fence.
1066 return SDValue(DAG
.getMachineNode(VE::FENCEM
, DL
, MVT::Other
,
1067 DAG
.getTargetConstant(1, DL
, MVT::i32
),
1070 case AtomicOrdering::AcquireRelease
:
1071 case AtomicOrdering::SequentiallyConsistent
:
1072 // Generate "fencem 3" as acq_rel and seq_cst fence.
1073 // FIXME: "fencem 3" doesn't wait for for PCIe deveices accesses,
1074 // so seq_cst may require more instruction for them.
1075 return SDValue(DAG
.getMachineNode(VE::FENCEM
, DL
, MVT::Other
,
1076 DAG
.getTargetConstant(3, DL
, MVT::i32
),
1082 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1083 return DAG
.getNode(VEISD::MEMBARRIER
, DL
, MVT::Other
, Op
.getOperand(0));
1086 TargetLowering::AtomicExpansionKind
1087 VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst
*AI
) const {
1088 // We have TS1AM implementation for i8/i16/i32/i64, so use it.
1089 if (AI
->getOperation() == AtomicRMWInst::Xchg
) {
1090 return AtomicExpansionKind::None
;
1092 // FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.
1094 // Otherwise, expand it using compare and exchange instruction to not call
1095 // __sync_fetch_and_* functions.
1096 return AtomicExpansionKind::CmpXChg
;
1099 static SDValue
prepareTS1AM(SDValue Op
, SelectionDAG
&DAG
, SDValue
&Flag
,
1102 AtomicSDNode
*N
= cast
<AtomicSDNode
>(Op
);
1103 SDValue Ptr
= N
->getOperand(1);
1104 SDValue Val
= N
->getOperand(2);
1105 EVT PtrVT
= Ptr
.getValueType();
1106 bool Byte
= N
->getMemoryVT() == MVT::i8
;
1107 // Remainder = AND Ptr, 3
1108 // Flag = 1 << Remainder ; If Byte is true (1 byte swap flag)
1109 // Flag = 3 << Remainder ; If Byte is false (2 bytes swap flag)
1110 // Bits = Remainder << 3
1111 // NewVal = Val << Bits
1112 SDValue Const3
= DAG
.getConstant(3, DL
, PtrVT
);
1113 SDValue Remainder
= DAG
.getNode(ISD::AND
, DL
, PtrVT
, {Ptr
, Const3
});
1114 SDValue Mask
= Byte
? DAG
.getConstant(1, DL
, MVT::i32
)
1115 : DAG
.getConstant(3, DL
, MVT::i32
);
1116 Flag
= DAG
.getNode(ISD::SHL
, DL
, MVT::i32
, {Mask
, Remainder
});
1117 Bits
= DAG
.getNode(ISD::SHL
, DL
, PtrVT
, {Remainder
, Const3
});
1118 return DAG
.getNode(ISD::SHL
, DL
, Val
.getValueType(), {Val
, Bits
});
1121 static SDValue
finalizeTS1AM(SDValue Op
, SelectionDAG
&DAG
, SDValue Data
,
1124 EVT VT
= Data
.getValueType();
1125 bool Byte
= cast
<AtomicSDNode
>(Op
)->getMemoryVT() == MVT::i8
;
1126 // NewData = Data >> Bits
1127 // Result = NewData & 0xff ; If Byte is true (1 byte)
1128 // Result = NewData & 0xffff ; If Byte is false (2 bytes)
1130 SDValue NewData
= DAG
.getNode(ISD::SRL
, DL
, VT
, Data
, Bits
);
1131 return DAG
.getNode(ISD::AND
, DL
, VT
,
1132 {NewData
, DAG
.getConstant(Byte
? 0xff : 0xffff, DL
, VT
)});
1135 SDValue
VETargetLowering::lowerATOMIC_SWAP(SDValue Op
,
1136 SelectionDAG
&DAG
) const {
1138 AtomicSDNode
*N
= cast
<AtomicSDNode
>(Op
);
1140 if (N
->getMemoryVT() == MVT::i8
) {
1141 // For i8, use "ts1am"
1143 // ATOMIC_SWAP Ptr, Val, Order
1146 // Remainder = AND Ptr, 3
1147 // Flag = 1 << Remainder ; 1 byte swap flag for TS1AM inst.
1148 // Bits = Remainder << 3
1149 // NewVal = Val << Bits
1151 // Aligned = AND Ptr, -4
1152 // Data = TS1AM Aligned, Flag, NewVal
1154 // NewData = Data >> Bits
1155 // Result = NewData & 0xff ; 1 byte result
1158 SDValue NewVal
= prepareTS1AM(Op
, DAG
, Flag
, Bits
);
1160 SDValue Ptr
= N
->getOperand(1);
1161 SDValue Aligned
= DAG
.getNode(ISD::AND
, DL
, Ptr
.getValueType(),
1162 {Ptr
, DAG
.getConstant(-4, DL
, MVT::i64
)});
1163 SDValue TS1AM
= DAG
.getAtomic(VEISD::TS1AM
, DL
, N
->getMemoryVT(),
1164 DAG
.getVTList(Op
.getNode()->getValueType(0),
1165 Op
.getNode()->getValueType(1)),
1166 {N
->getChain(), Aligned
, Flag
, NewVal
},
1167 N
->getMemOperand());
1169 SDValue Result
= finalizeTS1AM(Op
, DAG
, TS1AM
, Bits
);
1170 SDValue Chain
= TS1AM
.getValue(1);
1171 return DAG
.getMergeValues({Result
, Chain
}, DL
);
1173 if (N
->getMemoryVT() == MVT::i16
) {
1174 // For i16, use "ts1am"
1177 SDValue NewVal
= prepareTS1AM(Op
, DAG
, Flag
, Bits
);
1179 SDValue Ptr
= N
->getOperand(1);
1180 SDValue Aligned
= DAG
.getNode(ISD::AND
, DL
, Ptr
.getValueType(),
1181 {Ptr
, DAG
.getConstant(-4, DL
, MVT::i64
)});
1182 SDValue TS1AM
= DAG
.getAtomic(VEISD::TS1AM
, DL
, N
->getMemoryVT(),
1183 DAG
.getVTList(Op
.getNode()->getValueType(0),
1184 Op
.getNode()->getValueType(1)),
1185 {N
->getChain(), Aligned
, Flag
, NewVal
},
1186 N
->getMemOperand());
1188 SDValue Result
= finalizeTS1AM(Op
, DAG
, TS1AM
, Bits
);
1189 SDValue Chain
= TS1AM
.getValue(1);
1190 return DAG
.getMergeValues({Result
, Chain
}, DL
);
1192 // Otherwise, let llvm legalize it.
1196 SDValue
VETargetLowering::lowerGlobalAddress(SDValue Op
,
1197 SelectionDAG
&DAG
) const {
1198 return makeAddress(Op
, DAG
);
1201 SDValue
VETargetLowering::lowerBlockAddress(SDValue Op
,
1202 SelectionDAG
&DAG
) const {
1203 return makeAddress(Op
, DAG
);
1206 SDValue
VETargetLowering::lowerConstantPool(SDValue Op
,
1207 SelectionDAG
&DAG
) const {
1208 return makeAddress(Op
, DAG
);
1212 VETargetLowering::lowerToTLSGeneralDynamicModel(SDValue Op
,
1213 SelectionDAG
&DAG
) const {
1216 // Generate the following code:
1217 // t1: ch,glue = callseq_start t0, 0, 0
1218 // t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
1219 // t3: ch,glue = callseq_end t2, 0, 0, t2:2
1220 // t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
1221 SDValue Label
= withTargetFlags(Op
, 0, DAG
);
1222 EVT PtrVT
= Op
.getValueType();
1224 // Lowering the machine isd will make sure everything is in the right
1226 SDValue Chain
= DAG
.getEntryNode();
1227 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
1228 const uint32_t *Mask
= Subtarget
->getRegisterInfo()->getCallPreservedMask(
1229 DAG
.getMachineFunction(), CallingConv::C
);
1230 Chain
= DAG
.getCALLSEQ_START(Chain
, 64, 0, DL
);
1231 SDValue Args
[] = {Chain
, Label
, DAG
.getRegisterMask(Mask
), Chain
.getValue(1)};
1232 Chain
= DAG
.getNode(VEISD::GETTLSADDR
, DL
, NodeTys
, Args
);
1233 Chain
= DAG
.getCALLSEQ_END(Chain
, DAG
.getIntPtrConstant(64, DL
, true),
1234 DAG
.getIntPtrConstant(0, DL
, true),
1235 Chain
.getValue(1), DL
);
1236 Chain
= DAG
.getCopyFromReg(Chain
, DL
, VE::SX0
, PtrVT
, Chain
.getValue(1));
1238 // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
1239 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
1240 MFI
.setHasCalls(true);
1242 // Also generate code to prepare a GOT register if it is PIC.
1243 if (isPositionIndependent()) {
1244 MachineFunction
&MF
= DAG
.getMachineFunction();
1245 Subtarget
->getInstrInfo()->getGlobalBaseReg(&MF
);
1251 SDValue
VETargetLowering::lowerGlobalTLSAddress(SDValue Op
,
1252 SelectionDAG
&DAG
) const {
1253 // The current implementation of nld (2.26) doesn't allow local exec model
1254 // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
1255 // generate the general dynamic model code sequence.
1257 // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
1258 return lowerToTLSGeneralDynamicModel(Op
, DAG
);
1261 SDValue
VETargetLowering::lowerJumpTable(SDValue Op
, SelectionDAG
&DAG
) const {
1262 return makeAddress(Op
, DAG
);
1265 // Lower a f128 load into two f64 loads.
1266 static SDValue
lowerLoadF128(SDValue Op
, SelectionDAG
&DAG
) {
1268 LoadSDNode
*LdNode
= dyn_cast
<LoadSDNode
>(Op
.getNode());
1269 assert(LdNode
&& LdNode
->getOffset().isUndef() && "Unexpected node type");
1270 unsigned Alignment
= LdNode
->getAlign().value();
1275 DAG
.getLoad(MVT::f64
, DL
, LdNode
->getChain(), LdNode
->getBasePtr(),
1276 LdNode
->getPointerInfo(), Alignment
,
1277 LdNode
->isVolatile() ? MachineMemOperand::MOVolatile
1278 : MachineMemOperand::MONone
);
1279 EVT AddrVT
= LdNode
->getBasePtr().getValueType();
1280 SDValue HiPtr
= DAG
.getNode(ISD::ADD
, DL
, AddrVT
, LdNode
->getBasePtr(),
1281 DAG
.getConstant(8, DL
, AddrVT
));
1283 DAG
.getLoad(MVT::f64
, DL
, LdNode
->getChain(), HiPtr
,
1284 LdNode
->getPointerInfo(), Alignment
,
1285 LdNode
->isVolatile() ? MachineMemOperand::MOVolatile
1286 : MachineMemOperand::MONone
);
1288 SDValue SubRegEven
= DAG
.getTargetConstant(VE::sub_even
, DL
, MVT::i32
);
1289 SDValue SubRegOdd
= DAG
.getTargetConstant(VE::sub_odd
, DL
, MVT::i32
);
1291 // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1293 DAG
.getMachineNode(TargetOpcode::IMPLICIT_DEF
, DL
, MVT::f128
);
1294 InFP128
= DAG
.getMachineNode(TargetOpcode::INSERT_SUBREG
, DL
, MVT::f128
,
1295 SDValue(InFP128
, 0), Hi64
, SubRegEven
);
1296 InFP128
= DAG
.getMachineNode(TargetOpcode::INSERT_SUBREG
, DL
, MVT::f128
,
1297 SDValue(InFP128
, 0), Lo64
, SubRegOdd
);
1298 SDValue OutChains
[2] = {SDValue(Lo64
.getNode(), 1),
1299 SDValue(Hi64
.getNode(), 1)};
1300 SDValue OutChain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, OutChains
);
1301 SDValue Ops
[2] = {SDValue(InFP128
, 0), OutChain
};
1302 return DAG
.getMergeValues(Ops
, DL
);
1305 SDValue
VETargetLowering::lowerLOAD(SDValue Op
, SelectionDAG
&DAG
) const {
1306 LoadSDNode
*LdNode
= cast
<LoadSDNode
>(Op
.getNode());
1308 SDValue BasePtr
= LdNode
->getBasePtr();
1309 if (isa
<FrameIndexSDNode
>(BasePtr
.getNode())) {
1310 // Do not expand store instruction with frame index here because of
1311 // dependency problems. We expand it later in eliminateFrameIndex().
1315 EVT MemVT
= LdNode
->getMemoryVT();
1316 if (MemVT
== MVT::f128
)
1317 return lowerLoadF128(Op
, DAG
);
1322 // Lower a f128 store into two f64 stores.
1323 static SDValue
lowerStoreF128(SDValue Op
, SelectionDAG
&DAG
) {
1325 StoreSDNode
*StNode
= dyn_cast
<StoreSDNode
>(Op
.getNode());
1326 assert(StNode
&& StNode
->getOffset().isUndef() && "Unexpected node type");
1328 SDValue SubRegEven
= DAG
.getTargetConstant(VE::sub_even
, DL
, MVT::i32
);
1329 SDValue SubRegOdd
= DAG
.getTargetConstant(VE::sub_odd
, DL
, MVT::i32
);
1331 SDNode
*Hi64
= DAG
.getMachineNode(TargetOpcode::EXTRACT_SUBREG
, DL
, MVT::i64
,
1332 StNode
->getValue(), SubRegEven
);
1333 SDNode
*Lo64
= DAG
.getMachineNode(TargetOpcode::EXTRACT_SUBREG
, DL
, MVT::i64
,
1334 StNode
->getValue(), SubRegOdd
);
1336 unsigned Alignment
= StNode
->getAlign().value();
1340 // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1341 SDValue OutChains
[2];
1343 DAG
.getStore(StNode
->getChain(), DL
, SDValue(Lo64
, 0),
1344 StNode
->getBasePtr(), MachinePointerInfo(), Alignment
,
1345 StNode
->isVolatile() ? MachineMemOperand::MOVolatile
1346 : MachineMemOperand::MONone
);
1347 EVT AddrVT
= StNode
->getBasePtr().getValueType();
1348 SDValue HiPtr
= DAG
.getNode(ISD::ADD
, DL
, AddrVT
, StNode
->getBasePtr(),
1349 DAG
.getConstant(8, DL
, AddrVT
));
1351 DAG
.getStore(StNode
->getChain(), DL
, SDValue(Hi64
, 0), HiPtr
,
1352 MachinePointerInfo(), Alignment
,
1353 StNode
->isVolatile() ? MachineMemOperand::MOVolatile
1354 : MachineMemOperand::MONone
);
1355 return DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, OutChains
);
1358 SDValue
VETargetLowering::lowerSTORE(SDValue Op
, SelectionDAG
&DAG
) const {
1359 StoreSDNode
*StNode
= cast
<StoreSDNode
>(Op
.getNode());
1360 assert(StNode
&& StNode
->getOffset().isUndef() && "Unexpected node type");
1362 SDValue BasePtr
= StNode
->getBasePtr();
1363 if (isa
<FrameIndexSDNode
>(BasePtr
.getNode())) {
1364 // Do not expand store instruction with frame index here because of
1365 // dependency problems. We expand it later in eliminateFrameIndex().
1369 EVT MemVT
= StNode
->getMemoryVT();
1370 if (MemVT
== MVT::f128
)
1371 return lowerStoreF128(Op
, DAG
);
1373 // Otherwise, ask llvm to expand it.
1377 SDValue
VETargetLowering::lowerVASTART(SDValue Op
, SelectionDAG
&DAG
) const {
1378 MachineFunction
&MF
= DAG
.getMachineFunction();
1379 VEMachineFunctionInfo
*FuncInfo
= MF
.getInfo
<VEMachineFunctionInfo
>();
1380 auto PtrVT
= getPointerTy(DAG
.getDataLayout());
1382 // Need frame address to find the address of VarArgsFrameIndex.
1383 MF
.getFrameInfo().setFrameAddressIsTaken(true);
1385 // vastart just stores the address of the VarArgsFrameIndex slot into the
1386 // memory location argument.
1389 DAG
.getNode(ISD::ADD
, DL
, PtrVT
, DAG
.getRegister(VE::SX9
, PtrVT
),
1390 DAG
.getIntPtrConstant(FuncInfo
->getVarArgsFrameOffset(), DL
));
1391 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
1392 return DAG
.getStore(Op
.getOperand(0), DL
, Offset
, Op
.getOperand(1),
1393 MachinePointerInfo(SV
));
1396 SDValue
VETargetLowering::lowerVAARG(SDValue Op
, SelectionDAG
&DAG
) const {
1397 SDNode
*Node
= Op
.getNode();
1398 EVT VT
= Node
->getValueType(0);
1399 SDValue InChain
= Node
->getOperand(0);
1400 SDValue VAListPtr
= Node
->getOperand(1);
1401 EVT PtrVT
= VAListPtr
.getValueType();
1402 const Value
*SV
= cast
<SrcValueSDNode
>(Node
->getOperand(2))->getValue();
1405 DAG
.getLoad(PtrVT
, DL
, InChain
, VAListPtr
, MachinePointerInfo(SV
));
1406 SDValue Chain
= VAList
.getValue(1);
1409 if (VT
== MVT::f128
) {
1410 // VE f128 values must be stored with 16 bytes alignment. We doesn't
1411 // know the actual alignment of VAList, so we take alignment of it
1414 VAList
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, VAList
,
1415 DAG
.getConstant(Align
- 1, DL
, PtrVT
));
1416 VAList
= DAG
.getNode(ISD::AND
, DL
, PtrVT
, VAList
,
1417 DAG
.getConstant(-Align
, DL
, PtrVT
));
1418 // Increment the pointer, VAList, by 16 to the next vaarg.
1420 DAG
.getNode(ISD::ADD
, DL
, PtrVT
, VAList
, DAG
.getIntPtrConstant(16, DL
));
1421 } else if (VT
== MVT::f32
) {
1422 // float --> need special handling like below.
1427 // Increment the pointer, VAList, by 8 to the next vaarg.
1429 DAG
.getNode(ISD::ADD
, DL
, PtrVT
, VAList
, DAG
.getIntPtrConstant(8, DL
));
1430 // Then, adjust VAList.
1431 unsigned InternalOffset
= 4;
1432 VAList
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, VAList
,
1433 DAG
.getConstant(InternalOffset
, DL
, PtrVT
));
1435 // Increment the pointer, VAList, by 8 to the next vaarg.
1437 DAG
.getNode(ISD::ADD
, DL
, PtrVT
, VAList
, DAG
.getIntPtrConstant(8, DL
));
1440 // Store the incremented VAList to the legalized pointer.
1441 InChain
= DAG
.getStore(Chain
, DL
, NextPtr
, VAListPtr
, MachinePointerInfo(SV
));
1443 // Load the actual argument out of the pointer VAList.
1444 // We can't count on greater alignment than the word size.
1445 return DAG
.getLoad(VT
, DL
, InChain
, VAList
, MachinePointerInfo(),
1446 std::min(PtrVT
.getSizeInBits(), VT
.getSizeInBits()) / 8);
1449 SDValue
VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op
,
1450 SelectionDAG
&DAG
) const {
1451 // Generate following code.
1452 // (void)__llvm_grow_stack(size);
1453 // ret = GETSTACKTOP; // pseudo instruction
1457 SDNode
*Node
= Op
.getNode();
1458 SDValue Chain
= Op
.getOperand(0);
1459 SDValue Size
= Op
.getOperand(1);
1460 MaybeAlign
Alignment(Op
.getConstantOperandVal(2));
1461 EVT VT
= Node
->getValueType(0);
1463 // Chain the dynamic stack allocation so that it doesn't modify the stack
1464 // pointer when other instructions are using the stack.
1465 Chain
= DAG
.getCALLSEQ_START(Chain
, 0, 0, DL
);
1467 const TargetFrameLowering
&TFI
= *Subtarget
->getFrameLowering();
1468 Align StackAlign
= TFI
.getStackAlign();
1469 bool NeedsAlign
= Alignment
.valueOrOne() > StackAlign
;
1471 // Prepare arguments
1472 TargetLowering::ArgListTy Args
;
1473 TargetLowering::ArgListEntry Entry
;
1475 Entry
.Ty
= Entry
.Node
.getValueType().getTypeForEVT(*DAG
.getContext());
1476 Args
.push_back(Entry
);
1478 Entry
.Node
= DAG
.getConstant(~(Alignment
->value() - 1ULL), DL
, VT
);
1479 Entry
.Ty
= Entry
.Node
.getValueType().getTypeForEVT(*DAG
.getContext());
1480 Args
.push_back(Entry
);
1482 Type
*RetTy
= Type::getVoidTy(*DAG
.getContext());
1484 EVT PtrVT
= Op
.getValueType();
1487 Callee
= DAG
.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT
, 0);
1489 Callee
= DAG
.getTargetExternalSymbol("__ve_grow_stack", PtrVT
, 0);
1492 TargetLowering::CallLoweringInfo
CLI(DAG
);
1495 .setCallee(CallingConv::PreserveAll
, RetTy
, Callee
, std::move(Args
))
1496 .setDiscardResult(true);
1497 std::pair
<SDValue
, SDValue
> pair
= LowerCallTo(CLI
);
1498 Chain
= pair
.second
;
1499 SDValue Result
= DAG
.getNode(VEISD::GETSTACKTOP
, DL
, VT
, Chain
);
1501 Result
= DAG
.getNode(ISD::ADD
, DL
, VT
, Result
,
1502 DAG
.getConstant((Alignment
->value() - 1ULL), DL
, VT
));
1503 Result
= DAG
.getNode(ISD::AND
, DL
, VT
, Result
,
1504 DAG
.getConstant(~(Alignment
->value() - 1ULL), DL
, VT
));
1506 // Chain = Result.getValue(1);
1507 Chain
= DAG
.getCALLSEQ_END(Chain
, DAG
.getIntPtrConstant(0, DL
, true),
1508 DAG
.getIntPtrConstant(0, DL
, true), SDValue(), DL
);
1510 SDValue Ops
[2] = {Result
, Chain
};
1511 return DAG
.getMergeValues(Ops
, DL
);
1514 SDValue
VETargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op
,
1515 SelectionDAG
&DAG
) const {
1517 return DAG
.getNode(VEISD::EH_SJLJ_LONGJMP
, DL
, MVT::Other
, Op
.getOperand(0),
1521 SDValue
VETargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op
,
1522 SelectionDAG
&DAG
) const {
1524 return DAG
.getNode(VEISD::EH_SJLJ_SETJMP
, DL
,
1525 DAG
.getVTList(MVT::i32
, MVT::Other
), Op
.getOperand(0),
1529 SDValue
VETargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op
,
1530 SelectionDAG
&DAG
) const {
1532 return DAG
.getNode(VEISD::EH_SJLJ_SETUP_DISPATCH
, DL
, MVT::Other
,
1536 static SDValue
lowerFRAMEADDR(SDValue Op
, SelectionDAG
&DAG
,
1537 const VETargetLowering
&TLI
,
1538 const VESubtarget
*Subtarget
) {
1540 MachineFunction
&MF
= DAG
.getMachineFunction();
1541 EVT PtrVT
= TLI
.getPointerTy(MF
.getDataLayout());
1543 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1544 MFI
.setFrameAddressIsTaken(true);
1546 unsigned Depth
= Op
.getConstantOperandVal(0);
1547 const VERegisterInfo
*RegInfo
= Subtarget
->getRegisterInfo();
1548 unsigned FrameReg
= RegInfo
->getFrameRegister(MF
);
1550 DAG
.getCopyFromReg(DAG
.getEntryNode(), DL
, FrameReg
, PtrVT
);
1552 FrameAddr
= DAG
.getLoad(Op
.getValueType(), DL
, DAG
.getEntryNode(),
1553 FrameAddr
, MachinePointerInfo());
1557 static SDValue
lowerRETURNADDR(SDValue Op
, SelectionDAG
&DAG
,
1558 const VETargetLowering
&TLI
,
1559 const VESubtarget
*Subtarget
) {
1560 MachineFunction
&MF
= DAG
.getMachineFunction();
1561 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1562 MFI
.setReturnAddressIsTaken(true);
1564 if (TLI
.verifyReturnAddressArgumentIsConstant(Op
, DAG
))
1567 SDValue FrameAddr
= lowerFRAMEADDR(Op
, DAG
, TLI
, Subtarget
);
1570 EVT VT
= Op
.getValueType();
1571 SDValue Offset
= DAG
.getConstant(8, DL
, VT
);
1572 return DAG
.getLoad(VT
, DL
, DAG
.getEntryNode(),
1573 DAG
.getNode(ISD::ADD
, DL
, VT
, FrameAddr
, Offset
),
1574 MachinePointerInfo());
1577 SDValue
VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op
,
1578 SelectionDAG
&DAG
) const {
1580 unsigned IntNo
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
1582 default: // Don't custom lower most intrinsics.
1584 case Intrinsic::eh_sjlj_lsda
: {
1585 MachineFunction
&MF
= DAG
.getMachineFunction();
1586 MVT VT
= Op
.getSimpleValueType();
1587 const VETargetMachine
*TM
=
1588 static_cast<const VETargetMachine
*>(&DAG
.getTarget());
1590 // Create GCC_except_tableXX string. The real symbol for that will be
1591 // generated in EHStreamer::emitExceptionTable() later. So, we just
1592 // borrow it's name here.
1593 TM
->getStrList()->push_back(std::string(
1594 (Twine("GCC_except_table") + Twine(MF
.getFunctionNumber())).str()));
1596 DAG
.getTargetExternalSymbol(TM
->getStrList()->back().c_str(), VT
, 0);
1597 if (isPositionIndependent()) {
1598 Addr
= makeHiLoPair(Addr
, VEMCExpr::VK_VE_GOTOFF_HI32
,
1599 VEMCExpr::VK_VE_GOTOFF_LO32
, DAG
);
1600 SDValue GlobalBase
= DAG
.getNode(VEISD::GLOBAL_BASE_REG
, DL
, VT
);
1601 return DAG
.getNode(ISD::ADD
, DL
, VT
, GlobalBase
, Addr
);
1603 return makeHiLoPair(Addr
, VEMCExpr::VK_VE_HI32
, VEMCExpr::VK_VE_LO32
, DAG
);
1608 static bool getUniqueInsertion(SDNode
*N
, unsigned &UniqueIdx
) {
1609 if (!isa
<BuildVectorSDNode
>(N
))
1611 const auto *BVN
= cast
<BuildVectorSDNode
>(N
);
1613 // Find first non-undef insertion.
1615 for (Idx
= 0; Idx
< BVN
->getNumOperands(); ++Idx
) {
1616 auto ElemV
= BVN
->getOperand(Idx
);
1617 if (!ElemV
->isUndef())
1620 // Catch the (hypothetical) all-undef case.
1621 if (Idx
== BVN
->getNumOperands())
1623 // Remember insertion.
1625 // Verify that all other insertions are undef.
1626 for (; Idx
< BVN
->getNumOperands(); ++Idx
) {
1627 auto ElemV
= BVN
->getOperand(Idx
);
1628 if (!ElemV
->isUndef())
1634 static SDValue
getSplatValue(SDNode
*N
) {
1635 if (auto *BuildVec
= dyn_cast
<BuildVectorSDNode
>(N
)) {
1636 return BuildVec
->getSplatValue();
1641 SDValue
VETargetLowering::lowerBUILD_VECTOR(SDValue Op
,
1642 SelectionDAG
&DAG
) const {
1644 unsigned NumEls
= Op
.getValueType().getVectorNumElements();
1645 MVT ElemVT
= Op
.getSimpleValueType().getVectorElementType();
1647 // If there is just one element, expand to INSERT_VECTOR_ELT.
1649 if (getUniqueInsertion(Op
.getNode(), UniqueIdx
)) {
1650 SDValue AccuV
= DAG
.getUNDEF(Op
.getValueType());
1651 auto ElemV
= Op
->getOperand(UniqueIdx
);
1652 SDValue IdxV
= DAG
.getConstant(UniqueIdx
, DL
, MVT::i64
);
1653 return DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, Op
.getValueType(), AccuV
,
1657 // Else emit a broadcast.
1658 if (SDValue ScalarV
= getSplatValue(Op
.getNode())) {
1659 // lower to VEC_BROADCAST
1660 MVT LegalResVT
= MVT::getVectorVT(ElemVT
, 256);
1662 auto AVL
= DAG
.getConstant(NumEls
, DL
, MVT::i32
);
1663 return DAG
.getNode(VEISD::VEC_BROADCAST
, DL
, LegalResVT
, Op
.getOperand(0),
1671 SDValue
VETargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const {
1672 unsigned Opcode
= Op
.getOpcode();
1673 if (ISD::isVPOpcode(Opcode
))
1674 return lowerToVVP(Op
, DAG
);
1678 llvm_unreachable("Should not custom lower this!");
1679 case ISD::ATOMIC_FENCE
:
1680 return lowerATOMIC_FENCE(Op
, DAG
);
1681 case ISD::ATOMIC_SWAP
:
1682 return lowerATOMIC_SWAP(Op
, DAG
);
1683 case ISD::BlockAddress
:
1684 return lowerBlockAddress(Op
, DAG
);
1685 case ISD::ConstantPool
:
1686 return lowerConstantPool(Op
, DAG
);
1687 case ISD::DYNAMIC_STACKALLOC
:
1688 return lowerDYNAMIC_STACKALLOC(Op
, DAG
);
1689 case ISD::EH_SJLJ_LONGJMP
:
1690 return lowerEH_SJLJ_LONGJMP(Op
, DAG
);
1691 case ISD::EH_SJLJ_SETJMP
:
1692 return lowerEH_SJLJ_SETJMP(Op
, DAG
);
1693 case ISD::EH_SJLJ_SETUP_DISPATCH
:
1694 return lowerEH_SJLJ_SETUP_DISPATCH(Op
, DAG
);
1695 case ISD::FRAMEADDR
:
1696 return lowerFRAMEADDR(Op
, DAG
, *this, Subtarget
);
1697 case ISD::GlobalAddress
:
1698 return lowerGlobalAddress(Op
, DAG
);
1699 case ISD::GlobalTLSAddress
:
1700 return lowerGlobalTLSAddress(Op
, DAG
);
1701 case ISD::INTRINSIC_WO_CHAIN
:
1702 return lowerINTRINSIC_WO_CHAIN(Op
, DAG
);
1703 case ISD::JumpTable
:
1704 return lowerJumpTable(Op
, DAG
);
1706 return lowerLOAD(Op
, DAG
);
1707 case ISD::RETURNADDR
:
1708 return lowerRETURNADDR(Op
, DAG
, *this, Subtarget
);
1709 case ISD::BUILD_VECTOR
:
1710 return lowerBUILD_VECTOR(Op
, DAG
);
1712 return lowerSTORE(Op
, DAG
);
1714 return lowerVASTART(Op
, DAG
);
1716 return lowerVAARG(Op
, DAG
);
1718 case ISD::INSERT_VECTOR_ELT
:
1719 return lowerINSERT_VECTOR_ELT(Op
, DAG
);
1720 case ISD::EXTRACT_VECTOR_ELT
:
1721 return lowerEXTRACT_VECTOR_ELT(Op
, DAG
);
1723 #define ADD_BINARY_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
1724 #include "VVPNodes.def"
1725 return lowerToVVP(Op
, DAG
);
1730 void VETargetLowering::ReplaceNodeResults(SDNode
*N
,
1731 SmallVectorImpl
<SDValue
> &Results
,
1732 SelectionDAG
&DAG
) const {
1733 switch (N
->getOpcode()) {
1734 case ISD::ATOMIC_SWAP
:
1735 // Let LLVM expand atomic swap instruction through LowerOperation.
1738 LLVM_DEBUG(N
->dumpr(&DAG
));
1739 llvm_unreachable("Do not know how to custom type legalize this operation!");
1743 /// JumpTable for VE.
1745 /// VE cannot generate relocatable symbol in jump table. VE cannot
1746 /// generate expressions using symbols in both text segment and data
1747 /// segment like below.
1748 /// .4byte .LBB0_2-.LJTI0_0
1749 /// So, we generate offset from the top of function like below as
1751 /// .4byte .LBB0_2-<function name>
1753 unsigned VETargetLowering::getJumpTableEncoding() const {
1754 // Use custom label for PIC.
1755 if (isPositionIndependent())
1756 return MachineJumpTableInfo::EK_Custom32
;
1758 // Otherwise, use the normal jump table encoding heuristics.
1759 return TargetLowering::getJumpTableEncoding();
1762 const MCExpr
*VETargetLowering::LowerCustomJumpTableEntry(
1763 const MachineJumpTableInfo
*MJTI
, const MachineBasicBlock
*MBB
,
1764 unsigned Uid
, MCContext
&Ctx
) const {
1765 assert(isPositionIndependent());
1767 // Generate custom label for PIC like below.
1768 // .4bytes .LBB0_2-<function name>
1769 const auto *Value
= MCSymbolRefExpr::create(MBB
->getSymbol(), Ctx
);
1770 MCSymbol
*Sym
= Ctx
.getOrCreateSymbol(MBB
->getParent()->getName().data());
1771 const auto *Base
= MCSymbolRefExpr::create(Sym
, Ctx
);
1772 return MCBinaryExpr::createSub(Value
, Base
, Ctx
);
1775 SDValue
VETargetLowering::getPICJumpTableRelocBase(SDValue Table
,
1776 SelectionDAG
&DAG
) const {
1777 assert(isPositionIndependent());
1779 Function
*Function
= &DAG
.getMachineFunction().getFunction();
1780 assert(Function
!= nullptr);
1781 auto PtrTy
= getPointerTy(DAG
.getDataLayout(), Function
->getAddressSpace());
1783 // In the jump table, we have following values in PIC mode.
1784 // .4bytes .LBB0_2-<function name>
1785 // We need to add this value and the address of this function to generate
1786 // .LBB0_2 label correctly under PIC mode. So, we want to generate following
1788 // lea %reg, fun@gotoff_lo
1789 // and %reg, %reg, (32)0
1790 // lea.sl %reg, fun@gotoff_hi(%reg, %got)
1791 // In order to do so, we need to genarate correctly marked DAG node using
1793 SDValue Op
= DAG
.getGlobalAddress(Function
, DL
, PtrTy
);
1794 SDValue HiLo
= makeHiLoPair(Op
, VEMCExpr::VK_VE_GOTOFF_HI32
,
1795 VEMCExpr::VK_VE_GOTOFF_LO32
, DAG
);
1796 SDValue GlobalBase
= DAG
.getNode(VEISD::GLOBAL_BASE_REG
, DL
, PtrTy
);
1797 return DAG
.getNode(ISD::ADD
, DL
, PtrTy
, GlobalBase
, HiLo
);
1800 Register
VETargetLowering::prepareMBB(MachineBasicBlock
&MBB
,
1801 MachineBasicBlock::iterator I
,
1802 MachineBasicBlock
*TargetBB
,
1803 const DebugLoc
&DL
) const {
1804 MachineFunction
*MF
= MBB
.getParent();
1805 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1806 const VEInstrInfo
*TII
= Subtarget
->getInstrInfo();
1808 const TargetRegisterClass
*RC
= &VE::I64RegClass
;
1809 Register Tmp1
= MRI
.createVirtualRegister(RC
);
1810 Register Tmp2
= MRI
.createVirtualRegister(RC
);
1811 Register Result
= MRI
.createVirtualRegister(RC
);
1813 if (isPositionIndependent()) {
1814 // Create following instructions for local linkage PIC code.
1815 // lea %Tmp1, TargetBB@gotoff_lo
1816 // and %Tmp2, %Tmp1, (32)0
1817 // lea.sl %Result, TargetBB@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
1818 BuildMI(MBB
, I
, DL
, TII
->get(VE::LEAzii
), Tmp1
)
1821 .addMBB(TargetBB
, VEMCExpr::VK_VE_GOTOFF_LO32
);
1822 BuildMI(MBB
, I
, DL
, TII
->get(VE::ANDrm
), Tmp2
)
1823 .addReg(Tmp1
, getKillRegState(true))
1825 BuildMI(MBB
, I
, DL
, TII
->get(VE::LEASLrri
), Result
)
1827 .addReg(Tmp2
, getKillRegState(true))
1828 .addMBB(TargetBB
, VEMCExpr::VK_VE_GOTOFF_HI32
);
1830 // Create following instructions for non-PIC code.
1831 // lea %Tmp1, TargetBB@lo
1832 // and %Tmp2, %Tmp1, (32)0
1833 // lea.sl %Result, TargetBB@hi(%Tmp2)
1834 BuildMI(MBB
, I
, DL
, TII
->get(VE::LEAzii
), Tmp1
)
1837 .addMBB(TargetBB
, VEMCExpr::VK_VE_LO32
);
1838 BuildMI(MBB
, I
, DL
, TII
->get(VE::ANDrm
), Tmp2
)
1839 .addReg(Tmp1
, getKillRegState(true))
1841 BuildMI(MBB
, I
, DL
, TII
->get(VE::LEASLrii
), Result
)
1842 .addReg(Tmp2
, getKillRegState(true))
1844 .addMBB(TargetBB
, VEMCExpr::VK_VE_HI32
);
1849 Register
VETargetLowering::prepareSymbol(MachineBasicBlock
&MBB
,
1850 MachineBasicBlock::iterator I
,
1851 StringRef Symbol
, const DebugLoc
&DL
,
1852 bool IsLocal
= false,
1853 bool IsCall
= false) const {
1854 MachineFunction
*MF
= MBB
.getParent();
1855 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1856 const VEInstrInfo
*TII
= Subtarget
->getInstrInfo();
1858 const TargetRegisterClass
*RC
= &VE::I64RegClass
;
1859 Register Result
= MRI
.createVirtualRegister(RC
);
1861 if (isPositionIndependent()) {
1862 if (IsCall
&& !IsLocal
) {
1863 // Create following instructions for non-local linkage PIC code function
1864 // calls. These instructions uses IC and magic number -24, so we expand
1865 // them in VEAsmPrinter.cpp from GETFUNPLT pseudo instruction.
1866 // lea %Reg, Symbol@plt_lo(-24)
1867 // and %Reg, %Reg, (32)0
1869 // lea.sl %Result, Symbol@plt_hi(%Reg, %s16) ; %s16 is PLT
1870 BuildMI(MBB
, I
, DL
, TII
->get(VE::GETFUNPLT
), Result
)
1871 .addExternalSymbol("abort");
1872 } else if (IsLocal
) {
1873 Register Tmp1
= MRI
.createVirtualRegister(RC
);
1874 Register Tmp2
= MRI
.createVirtualRegister(RC
);
1875 // Create following instructions for local linkage PIC code.
1876 // lea %Tmp1, Symbol@gotoff_lo
1877 // and %Tmp2, %Tmp1, (32)0
1878 // lea.sl %Result, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
1879 BuildMI(MBB
, I
, DL
, TII
->get(VE::LEAzii
), Tmp1
)
1882 .addExternalSymbol(Symbol
.data(), VEMCExpr::VK_VE_GOTOFF_LO32
);
1883 BuildMI(MBB
, I
, DL
, TII
->get(VE::ANDrm
), Tmp2
)
1884 .addReg(Tmp1
, getKillRegState(true))
1886 BuildMI(MBB
, I
, DL
, TII
->get(VE::LEASLrri
), Result
)
1888 .addReg(Tmp2
, getKillRegState(true))
1889 .addExternalSymbol(Symbol
.data(), VEMCExpr::VK_VE_GOTOFF_HI32
);
1891 Register Tmp1
= MRI
.createVirtualRegister(RC
);
1892 Register Tmp2
= MRI
.createVirtualRegister(RC
);
1893 // Create following instructions for not local linkage PIC code.
1894 // lea %Tmp1, Symbol@got_lo
1895 // and %Tmp2, %Tmp1, (32)0
1896 // lea.sl %Tmp3, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
1897 // ld %Result, 0(%Tmp3)
1898 Register Tmp3
= MRI
.createVirtualRegister(RC
);
1899 BuildMI(MBB
, I
, DL
, TII
->get(VE::LEAzii
), Tmp1
)
1902 .addExternalSymbol(Symbol
.data(), VEMCExpr::VK_VE_GOT_LO32
);
1903 BuildMI(MBB
, I
, DL
, TII
->get(VE::ANDrm
), Tmp2
)
1904 .addReg(Tmp1
, getKillRegState(true))
1906 BuildMI(MBB
, I
, DL
, TII
->get(VE::LEASLrri
), Tmp3
)
1908 .addReg(Tmp2
, getKillRegState(true))
1909 .addExternalSymbol(Symbol
.data(), VEMCExpr::VK_VE_GOT_HI32
);
1910 BuildMI(MBB
, I
, DL
, TII
->get(VE::LDrii
), Result
)
1911 .addReg(Tmp3
, getKillRegState(true))
1916 Register Tmp1
= MRI
.createVirtualRegister(RC
);
1917 Register Tmp2
= MRI
.createVirtualRegister(RC
);
1918 // Create following instructions for non-PIC code.
1919 // lea %Tmp1, Symbol@lo
1920 // and %Tmp2, %Tmp1, (32)0
1921 // lea.sl %Result, Symbol@hi(%Tmp2)
1922 BuildMI(MBB
, I
, DL
, TII
->get(VE::LEAzii
), Tmp1
)
1925 .addExternalSymbol(Symbol
.data(), VEMCExpr::VK_VE_LO32
);
1926 BuildMI(MBB
, I
, DL
, TII
->get(VE::ANDrm
), Tmp2
)
1927 .addReg(Tmp1
, getKillRegState(true))
1929 BuildMI(MBB
, I
, DL
, TII
->get(VE::LEASLrii
), Result
)
1930 .addReg(Tmp2
, getKillRegState(true))
1932 .addExternalSymbol(Symbol
.data(), VEMCExpr::VK_VE_HI32
);
1937 void VETargetLowering::setupEntryBlockForSjLj(MachineInstr
&MI
,
1938 MachineBasicBlock
*MBB
,
1939 MachineBasicBlock
*DispatchBB
,
1940 int FI
, int Offset
) const {
1941 DebugLoc DL
= MI
.getDebugLoc();
1942 const VEInstrInfo
*TII
= Subtarget
->getInstrInfo();
1945 prepareMBB(*MBB
, MachineBasicBlock::iterator(MI
), DispatchBB
, DL
);
1947 // Store an address of DispatchBB to a given jmpbuf[1] where has next IC
1948 // referenced by longjmp (throw) later.
1949 MachineInstrBuilder MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(VE::STrii
));
1950 addFrameReference(MIB
, FI
, Offset
); // jmpbuf[1]
1951 MIB
.addReg(LabelReg
, getKillRegState(true));
1955 VETargetLowering::emitEHSjLjSetJmp(MachineInstr
&MI
,
1956 MachineBasicBlock
*MBB
) const {
1957 DebugLoc DL
= MI
.getDebugLoc();
1958 MachineFunction
*MF
= MBB
->getParent();
1959 const TargetInstrInfo
*TII
= Subtarget
->getInstrInfo();
1960 const TargetRegisterInfo
*TRI
= Subtarget
->getRegisterInfo();
1961 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1963 const BasicBlock
*BB
= MBB
->getBasicBlock();
1964 MachineFunction::iterator I
= ++MBB
->getIterator();
1966 // Memory Reference.
1967 SmallVector
<MachineMemOperand
*, 2> MMOs(MI
.memoperands_begin(),
1968 MI
.memoperands_end());
1969 Register BufReg
= MI
.getOperand(1).getReg();
1973 DstReg
= MI
.getOperand(0).getReg();
1974 const TargetRegisterClass
*RC
= MRI
.getRegClass(DstReg
);
1975 assert(TRI
->isTypeLegalForClass(*RC
, MVT::i32
) && "Invalid destination!");
1977 Register MainDestReg
= MRI
.createVirtualRegister(RC
);
1978 Register RestoreDestReg
= MRI
.createVirtualRegister(RC
);
1980 // For `v = call @llvm.eh.sjlj.setjmp(buf)`, we generate following
1981 // instructions. SP/FP must be saved in jmpbuf before `llvm.eh.sjlj.setjmp`.
1984 // buf[3] = %s17 iff %s17 is used as BP
1985 // buf[1] = RestoreMBB as IC after longjmp
1986 // # SjLjSetup RestoreMBB
1992 // v = phi(v_main, MainMBB, v_restore, RestoreMBB)
1996 // %s17 = buf[3] = iff %s17 is used as BP
2000 MachineBasicBlock
*ThisMBB
= MBB
;
2001 MachineBasicBlock
*MainMBB
= MF
->CreateMachineBasicBlock(BB
);
2002 MachineBasicBlock
*SinkMBB
= MF
->CreateMachineBasicBlock(BB
);
2003 MachineBasicBlock
*RestoreMBB
= MF
->CreateMachineBasicBlock(BB
);
2004 MF
->insert(I
, MainMBB
);
2005 MF
->insert(I
, SinkMBB
);
2006 MF
->push_back(RestoreMBB
);
2007 RestoreMBB
->setHasAddressTaken();
2009 // Transfer the remainder of BB and its successor edges to SinkMBB.
2010 SinkMBB
->splice(SinkMBB
->begin(), MBB
,
2011 std::next(MachineBasicBlock::iterator(MI
)), MBB
->end());
2012 SinkMBB
->transferSuccessorsAndUpdatePHIs(MBB
);
2016 prepareMBB(*MBB
, MachineBasicBlock::iterator(MI
), RestoreMBB
, DL
);
2018 // Store BP in buf[3] iff this function is using BP.
2019 const VEFrameLowering
*TFI
= Subtarget
->getFrameLowering();
2020 if (TFI
->hasBP(*MF
)) {
2021 MachineInstrBuilder MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(VE::STrii
));
2025 MIB
.addReg(VE::SX17
);
2026 MIB
.setMemRefs(MMOs
);
2029 // Store IP in buf[1].
2030 MachineInstrBuilder MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(VE::STrii
));
2031 MIB
.add(MI
.getOperand(1)); // we can preserve the kill flags here.
2034 MIB
.addReg(LabelReg
, getKillRegState(true));
2035 MIB
.setMemRefs(MMOs
);
2037 // SP/FP are already stored in jmpbuf before `llvm.eh.sjlj.setjmp`.
2041 BuildMI(*ThisMBB
, MI
, DL
, TII
->get(VE::EH_SjLj_Setup
)).addMBB(RestoreMBB
);
2043 const VERegisterInfo
*RegInfo
= Subtarget
->getRegisterInfo();
2044 MIB
.addRegMask(RegInfo
->getNoPreservedMask());
2045 ThisMBB
->addSuccessor(MainMBB
);
2046 ThisMBB
->addSuccessor(RestoreMBB
);
2049 BuildMI(MainMBB
, DL
, TII
->get(VE::LEAzii
), MainDestReg
)
2053 MainMBB
->addSuccessor(SinkMBB
);
2056 BuildMI(*SinkMBB
, SinkMBB
->begin(), DL
, TII
->get(VE::PHI
), DstReg
)
2057 .addReg(MainDestReg
)
2059 .addReg(RestoreDestReg
)
2060 .addMBB(RestoreMBB
);
2063 // Restore BP from buf[3] iff this function is using BP. The address of
2065 // FIXME: Better to not use SX10 here
2066 if (TFI
->hasBP(*MF
)) {
2067 MachineInstrBuilder MIB
=
2068 BuildMI(RestoreMBB
, DL
, TII
->get(VE::LDrii
), VE::SX17
);
2069 MIB
.addReg(VE::SX10
);
2072 MIB
.setMemRefs(MMOs
);
2074 BuildMI(RestoreMBB
, DL
, TII
->get(VE::LEAzii
), RestoreDestReg
)
2078 BuildMI(RestoreMBB
, DL
, TII
->get(VE::BRCFLa_t
)).addMBB(SinkMBB
);
2079 RestoreMBB
->addSuccessor(SinkMBB
);
2081 MI
.eraseFromParent();
2086 VETargetLowering::emitEHSjLjLongJmp(MachineInstr
&MI
,
2087 MachineBasicBlock
*MBB
) const {
2088 DebugLoc DL
= MI
.getDebugLoc();
2089 MachineFunction
*MF
= MBB
->getParent();
2090 const TargetInstrInfo
*TII
= Subtarget
->getInstrInfo();
2091 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
2093 // Memory Reference.
2094 SmallVector
<MachineMemOperand
*, 2> MMOs(MI
.memoperands_begin(),
2095 MI
.memoperands_end());
2096 Register BufReg
= MI
.getOperand(0).getReg();
2098 Register Tmp
= MRI
.createVirtualRegister(&VE::I64RegClass
);
2099 // Since FP is only updated here but NOT referenced, it's treated as GPR.
2100 Register FP
= VE::SX9
;
2101 Register SP
= VE::SX11
;
2103 MachineInstrBuilder MIB
;
2105 MachineBasicBlock
*ThisMBB
= MBB
;
2107 // For `call @llvm.eh.sjlj.longjmp(buf)`, we generate following instructions.
2110 // %fp = load buf[0]
2111 // %jmp = load buf[1]
2112 // %s10 = buf ; Store an address of buf to SX10 for RestoreMBB
2113 // %sp = load buf[2] ; generated by llvm.eh.sjlj.setjmp.
2117 MIB
= BuildMI(*ThisMBB
, MI
, DL
, TII
->get(VE::LDrii
), FP
);
2121 MIB
.setMemRefs(MMOs
);
2124 MIB
= BuildMI(*ThisMBB
, MI
, DL
, TII
->get(VE::LDrii
), Tmp
);
2128 MIB
.setMemRefs(MMOs
);
2130 // Copy BufReg to SX10 for later use in setjmp.
2131 // FIXME: Better to not use SX10 here
2132 BuildMI(*ThisMBB
, MI
, DL
, TII
->get(VE::ORri
), VE::SX10
)
2137 MIB
= BuildMI(*ThisMBB
, MI
, DL
, TII
->get(VE::LDrii
), SP
);
2138 MIB
.add(MI
.getOperand(0)); // we can preserve the kill flags here.
2141 MIB
.setMemRefs(MMOs
);
2144 BuildMI(*ThisMBB
, MI
, DL
, TII
->get(VE::BCFLari_t
))
2145 .addReg(Tmp
, getKillRegState(true))
2148 MI
.eraseFromParent();
2153 VETargetLowering::emitSjLjDispatchBlock(MachineInstr
&MI
,
2154 MachineBasicBlock
*BB
) const {
2155 DebugLoc DL
= MI
.getDebugLoc();
2156 MachineFunction
*MF
= BB
->getParent();
2157 MachineFrameInfo
&MFI
= MF
->getFrameInfo();
2158 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
2159 const VEInstrInfo
*TII
= Subtarget
->getInstrInfo();
2160 int FI
= MFI
.getFunctionContextIndex();
2162 // Get a mapping of the call site numbers to all of the landing pads they're
2164 DenseMap
<unsigned, SmallVector
<MachineBasicBlock
*, 2>> CallSiteNumToLPad
;
2165 unsigned MaxCSNum
= 0;
2166 for (auto &MBB
: *MF
) {
2170 MCSymbol
*Sym
= nullptr;
2171 for (const auto &MI
: MBB
) {
2172 if (MI
.isDebugInstr())
2175 assert(MI
.isEHLabel() && "expected EH_LABEL");
2176 Sym
= MI
.getOperand(0).getMCSymbol();
2180 if (!MF
->hasCallSiteLandingPad(Sym
))
2183 for (unsigned CSI
: MF
->getCallSiteLandingPad(Sym
)) {
2184 CallSiteNumToLPad
[CSI
].push_back(&MBB
);
2185 MaxCSNum
= std::max(MaxCSNum
, CSI
);
2189 // Get an ordered list of the machine basic blocks for the jump table.
2190 std::vector
<MachineBasicBlock
*> LPadList
;
2191 SmallPtrSet
<MachineBasicBlock
*, 32> InvokeBBs
;
2192 LPadList
.reserve(CallSiteNumToLPad
.size());
2194 for (unsigned CSI
= 1; CSI
<= MaxCSNum
; ++CSI
) {
2195 for (auto &LP
: CallSiteNumToLPad
[CSI
]) {
2196 LPadList
.push_back(LP
);
2197 InvokeBBs
.insert(LP
->pred_begin(), LP
->pred_end());
2201 assert(!LPadList
.empty() &&
2202 "No landing pad destinations for the dispatch jump table!");
2204 // The %fn_context is allocated like below (from --print-after=sjljehprepare):
2205 // %fn_context = alloca { i8*, i64, [4 x i64], i8*, i8*, [5 x i8*] }
2207 // This `[5 x i8*]` is jmpbuf, so jmpbuf[1] is FI+72.
2208 // First `i64` is callsite, so callsite is FI+8.
2209 static const int OffsetIC
= 72;
2210 static const int OffsetCS
= 8;
2212 // Create the MBBs for the dispatch code like following:
2215 // Prepare DispatchBB address and store it to buf[1].
2219 // %s15 = GETGOT iff isPositionIndependent
2220 // %callsite = load callsite
2221 // brgt.l.t #size of callsites, %callsite, DispContBB
2227 // %breg = address of jump table
2228 // %pc = load and calculate next pc from %breg and %callsite
2231 // Shove the dispatch's address into the return slot in the function context.
2232 MachineBasicBlock
*DispatchBB
= MF
->CreateMachineBasicBlock();
2233 DispatchBB
->setIsEHPad(true);
2235 // Trap BB will causes trap like `assert(0)`.
2236 MachineBasicBlock
*TrapBB
= MF
->CreateMachineBasicBlock();
2237 DispatchBB
->addSuccessor(TrapBB
);
2239 MachineBasicBlock
*DispContBB
= MF
->CreateMachineBasicBlock();
2240 DispatchBB
->addSuccessor(DispContBB
);
2243 MF
->push_back(DispatchBB
);
2244 MF
->push_back(DispContBB
);
2245 MF
->push_back(TrapBB
);
2247 // Insert code to call abort in the TrapBB.
2248 Register Abort
= prepareSymbol(*TrapBB
, TrapBB
->end(), "abort", DL
,
2249 /* Local */ false, /* Call */ true);
2250 BuildMI(TrapBB
, DL
, TII
->get(VE::BSICrii
), VE::SX10
)
2251 .addReg(Abort
, getKillRegState(true))
2255 // Insert code into the entry block that creates and registers the function
2257 setupEntryBlockForSjLj(MI
, BB
, DispatchBB
, FI
, OffsetIC
);
2259 // Create the jump table and associated information
2260 unsigned JTE
= getJumpTableEncoding();
2261 MachineJumpTableInfo
*JTI
= MF
->getOrCreateJumpTableInfo(JTE
);
2262 unsigned MJTI
= JTI
->createJumpTableIndex(LPadList
);
2264 const VERegisterInfo
&RI
= TII
->getRegisterInfo();
2265 // Add a register mask with no preserved registers. This results in all
2266 // registers being marked as clobbered.
2267 BuildMI(DispatchBB
, DL
, TII
->get(VE::NOP
))
2268 .addRegMask(RI
.getNoPreservedMask());
2270 if (isPositionIndependent()) {
2271 // Force to generate GETGOT, since current implementation doesn't store GOT
2273 BuildMI(DispatchBB
, DL
, TII
->get(VE::GETGOT
), VE::SX15
);
2276 // IReg is used as an index in a memory operand and therefore can't be SP
2277 const TargetRegisterClass
*RC
= &VE::I64RegClass
;
2278 Register IReg
= MRI
.createVirtualRegister(RC
);
2279 addFrameReference(BuildMI(DispatchBB
, DL
, TII
->get(VE::LDLZXrii
), IReg
), FI
,
2281 if (LPadList
.size() < 64) {
2282 BuildMI(DispatchBB
, DL
, TII
->get(VE::BRCFLir_t
))
2283 .addImm(VECC::CC_ILE
)
2284 .addImm(LPadList
.size())
2288 assert(LPadList
.size() <= 0x7FFFFFFF && "Too large Landing Pad!");
2289 Register TmpReg
= MRI
.createVirtualRegister(RC
);
2290 BuildMI(DispatchBB
, DL
, TII
->get(VE::LEAzii
), TmpReg
)
2293 .addImm(LPadList
.size());
2294 BuildMI(DispatchBB
, DL
, TII
->get(VE::BRCFLrr_t
))
2295 .addImm(VECC::CC_ILE
)
2296 .addReg(TmpReg
, getKillRegState(true))
2301 Register BReg
= MRI
.createVirtualRegister(RC
);
2302 Register Tmp1
= MRI
.createVirtualRegister(RC
);
2303 Register Tmp2
= MRI
.createVirtualRegister(RC
);
2305 if (isPositionIndependent()) {
2306 // Create following instructions for local linkage PIC code.
2307 // lea %Tmp1, .LJTI0_0@gotoff_lo
2308 // and %Tmp2, %Tmp1, (32)0
2309 // lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2310 BuildMI(DispContBB
, DL
, TII
->get(VE::LEAzii
), Tmp1
)
2313 .addJumpTableIndex(MJTI
, VEMCExpr::VK_VE_GOTOFF_LO32
);
2314 BuildMI(DispContBB
, DL
, TII
->get(VE::ANDrm
), Tmp2
)
2315 .addReg(Tmp1
, getKillRegState(true))
2317 BuildMI(DispContBB
, DL
, TII
->get(VE::LEASLrri
), BReg
)
2319 .addReg(Tmp2
, getKillRegState(true))
2320 .addJumpTableIndex(MJTI
, VEMCExpr::VK_VE_GOTOFF_HI32
);
2322 // Create following instructions for non-PIC code.
2323 // lea %Tmp1, .LJTI0_0@lo
2324 // and %Tmp2, %Tmp1, (32)0
2325 // lea.sl %BReg, .LJTI0_0@hi(%Tmp2)
2326 BuildMI(DispContBB
, DL
, TII
->get(VE::LEAzii
), Tmp1
)
2329 .addJumpTableIndex(MJTI
, VEMCExpr::VK_VE_LO32
);
2330 BuildMI(DispContBB
, DL
, TII
->get(VE::ANDrm
), Tmp2
)
2331 .addReg(Tmp1
, getKillRegState(true))
2333 BuildMI(DispContBB
, DL
, TII
->get(VE::LEASLrii
), BReg
)
2334 .addReg(Tmp2
, getKillRegState(true))
2336 .addJumpTableIndex(MJTI
, VEMCExpr::VK_VE_HI32
);
2340 case MachineJumpTableInfo::EK_BlockAddress
: {
2341 // Generate simple block address code for no-PIC model.
2342 // sll %Tmp1, %IReg, 3
2343 // lds %TReg, 0(%Tmp1, %BReg)
2346 Register TReg
= MRI
.createVirtualRegister(RC
);
2347 Register Tmp1
= MRI
.createVirtualRegister(RC
);
2349 BuildMI(DispContBB
, DL
, TII
->get(VE::SLLri
), Tmp1
)
2350 .addReg(IReg
, getKillRegState(true))
2352 BuildMI(DispContBB
, DL
, TII
->get(VE::LDrri
), TReg
)
2353 .addReg(BReg
, getKillRegState(true))
2354 .addReg(Tmp1
, getKillRegState(true))
2356 BuildMI(DispContBB
, DL
, TII
->get(VE::BCFLari_t
))
2357 .addReg(TReg
, getKillRegState(true))
2361 case MachineJumpTableInfo::EK_Custom32
: {
2362 // Generate block address code using differences from the function pointer
2364 // sll %Tmp1, %IReg, 2
2365 // ldl.zx %OReg, 0(%Tmp1, %BReg)
2366 // Prepare function address in BReg2.
2367 // adds.l %TReg, %BReg2, %OReg
2370 assert(isPositionIndependent());
2371 Register OReg
= MRI
.createVirtualRegister(RC
);
2372 Register TReg
= MRI
.createVirtualRegister(RC
);
2373 Register Tmp1
= MRI
.createVirtualRegister(RC
);
2375 BuildMI(DispContBB
, DL
, TII
->get(VE::SLLri
), Tmp1
)
2376 .addReg(IReg
, getKillRegState(true))
2378 BuildMI(DispContBB
, DL
, TII
->get(VE::LDLZXrri
), OReg
)
2379 .addReg(BReg
, getKillRegState(true))
2380 .addReg(Tmp1
, getKillRegState(true))
2383 prepareSymbol(*DispContBB
, DispContBB
->end(),
2384 DispContBB
->getParent()->getName(), DL
, /* Local */ true);
2385 BuildMI(DispContBB
, DL
, TII
->get(VE::ADDSLrr
), TReg
)
2386 .addReg(OReg
, getKillRegState(true))
2387 .addReg(BReg2
, getKillRegState(true));
2388 BuildMI(DispContBB
, DL
, TII
->get(VE::BCFLari_t
))
2389 .addReg(TReg
, getKillRegState(true))
2394 llvm_unreachable("Unexpected jump table encoding");
2397 // Add the jump table entries as successors to the MBB.
2398 SmallPtrSet
<MachineBasicBlock
*, 8> SeenMBBs
;
2399 for (auto &LP
: LPadList
)
2400 if (SeenMBBs
.insert(LP
).second
)
2401 DispContBB
->addSuccessor(LP
);
2403 // N.B. the order the invoke BBs are processed in doesn't matter here.
2404 SmallVector
<MachineBasicBlock
*, 64> MBBLPads
;
2405 const MCPhysReg
*SavedRegs
= MF
->getRegInfo().getCalleeSavedRegs();
2406 for (MachineBasicBlock
*MBB
: InvokeBBs
) {
2407 // Remove the landing pad successor from the invoke block and replace it
2408 // with the new dispatch block.
2409 // Keep a copy of Successors since it's modified inside the loop.
2410 SmallVector
<MachineBasicBlock
*, 8> Successors(MBB
->succ_rbegin(),
2412 // FIXME: Avoid quadratic complexity.
2413 for (auto MBBS
: Successors
) {
2414 if (MBBS
->isEHPad()) {
2415 MBB
->removeSuccessor(MBBS
);
2416 MBBLPads
.push_back(MBBS
);
2420 MBB
->addSuccessor(DispatchBB
);
2422 // Find the invoke call and mark all of the callee-saved registers as
2423 // 'implicit defined' so that they're spilled. This prevents code from
2424 // moving instructions to before the EH block, where they will never be
2426 for (auto &II
: reverse(*MBB
)) {
2430 DenseMap
<Register
, bool> DefRegs
;
2431 for (auto &MOp
: II
.operands())
2433 DefRegs
[MOp
.getReg()] = true;
2435 MachineInstrBuilder
MIB(*MF
, &II
);
2436 for (unsigned RI
= 0; SavedRegs
[RI
]; ++RI
) {
2437 Register Reg
= SavedRegs
[RI
];
2439 MIB
.addReg(Reg
, RegState::ImplicitDefine
| RegState::Dead
);
2446 // Mark all former landing pads as non-landing pads. The dispatch is the only
2448 for (auto &LP
: MBBLPads
)
2449 LP
->setIsEHPad(false);
2451 // The instruction is gone now.
2452 MI
.eraseFromParent();
2457 VETargetLowering::EmitInstrWithCustomInserter(MachineInstr
&MI
,
2458 MachineBasicBlock
*BB
) const {
2459 switch (MI
.getOpcode()) {
2461 llvm_unreachable("Unknown Custom Instruction!");
2462 case VE::EH_SjLj_LongJmp
:
2463 return emitEHSjLjLongJmp(MI
, BB
);
2464 case VE::EH_SjLj_SetJmp
:
2465 return emitEHSjLjSetJmp(MI
, BB
);
2466 case VE::EH_SjLj_Setup_Dispatch
:
2467 return emitSjLjDispatchBlock(MI
, BB
);
2471 static bool isI32Insn(const SDNode
*User
, const SDNode
*N
) {
2472 switch (User
->getOpcode()) {
2486 case ISD::SINT_TO_FP
:
2487 case ISD::UINT_TO_FP
:
2490 case ISD::ATOMIC_CMP_SWAP
:
2491 case ISD::ATOMIC_SWAP
:
2494 if (N
->getOperand(0).getOpcode() != ISD::SRL
)
2496 // (srl (trunc (srl ...))) may be optimized by combining srl, so
2497 // doesn't optimize trunc now.
2499 case ISD::SELECT_CC
:
2500 if (User
->getOperand(2).getNode() != N
&&
2501 User
->getOperand(3).getNode() != N
)
2508 case ISD::CopyToReg
:
2509 // Check all use of selections, bit operations, and copies. If all of them
2510 // are safe, optimize truncate to extract_subreg.
2511 for (SDNode::use_iterator UI
= User
->use_begin(), UE
= User
->use_end();
2513 switch ((*UI
)->getOpcode()) {
2515 // If the use is an instruction which treats the source operand as i32,
2516 // it is safe to avoid truncate here.
2517 if (isI32Insn(*UI
, N
))
2520 case ISD::ANY_EXTEND
:
2521 case ISD::SIGN_EXTEND
:
2522 case ISD::ZERO_EXTEND
: {
2523 // Special optimizations to the combination of ext and trunc.
2524 // (ext ... (select ... (trunc ...))) is safe to avoid truncate here
2525 // since this truncate instruction clears higher 32 bits which is filled
2526 // by one of ext instructions later.
2527 assert(N
->getValueType(0) == MVT::i32
&&
2528 "find truncate to not i32 integer");
2529 if (User
->getOpcode() == ISD::SELECT_CC
||
2530 User
->getOpcode() == ISD::SELECT
)
2541 // Optimize TRUNCATE in DAG combining. Optimizing it in CUSTOM lower is
2542 // sometime too early. Optimizing it in DAG pattern matching in VEInstrInfo.td
2543 // is sometime too late. So, doing it at here.
2544 SDValue
VETargetLowering::combineTRUNCATE(SDNode
*N
,
2545 DAGCombinerInfo
&DCI
) const {
2546 assert(N
->getOpcode() == ISD::TRUNCATE
&&
2547 "Should be called with a TRUNCATE node");
2549 SelectionDAG
&DAG
= DCI
.DAG
;
2551 EVT VT
= N
->getValueType(0);
2553 // We prefer to do this when all types are legal.
2554 if (!DCI
.isAfterLegalizeDAG())
2557 // Skip combine TRUNCATE atm if the operand of TRUNCATE might be a constant.
2558 if (N
->getOperand(0)->getOpcode() == ISD::SELECT_CC
&&
2559 isa
<ConstantSDNode
>(N
->getOperand(0)->getOperand(0)) &&
2560 isa
<ConstantSDNode
>(N
->getOperand(0)->getOperand(1)))
2563 // Check all use of this TRUNCATE.
2564 for (SDNode::use_iterator UI
= N
->use_begin(), UE
= N
->use_end(); UI
!= UE
;
2568 // Make sure that we're not going to replace TRUNCATE for non i32
2571 // FIXME: Although we could sometimes handle this, and it does occur in
2572 // practice that one of the condition inputs to the select is also one of
2573 // the outputs, we currently can't deal with this.
2574 if (isI32Insn(User
, N
))
2580 SDValue SubI32
= DAG
.getTargetConstant(VE::sub_i32
, DL
, MVT::i32
);
2581 return SDValue(DAG
.getMachineNode(TargetOpcode::EXTRACT_SUBREG
, DL
, VT
,
2582 N
->getOperand(0), SubI32
),
2586 SDValue
VETargetLowering::PerformDAGCombine(SDNode
*N
,
2587 DAGCombinerInfo
&DCI
) const {
2588 switch (N
->getOpcode()) {
2592 return combineTRUNCATE(N
, DCI
);
2598 //===----------------------------------------------------------------------===//
2599 // VE Inline Assembly Support
2600 //===----------------------------------------------------------------------===//
2602 VETargetLowering::ConstraintType
2603 VETargetLowering::getConstraintType(StringRef Constraint
) const {
2604 if (Constraint
.size() == 1) {
2605 switch (Constraint
[0]) {
2608 case 'v': // vector registers
2609 return C_RegisterClass
;
2612 return TargetLowering::getConstraintType(Constraint
);
2615 std::pair
<unsigned, const TargetRegisterClass
*>
2616 VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo
*TRI
,
2617 StringRef Constraint
,
2619 const TargetRegisterClass
*RC
= nullptr;
2620 if (Constraint
.size() == 1) {
2621 switch (Constraint
[0]) {
2623 return TargetLowering::getRegForInlineAsmConstraint(TRI
, Constraint
, VT
);
2625 RC
= &VE::I64RegClass
;
2628 RC
= &VE::V64RegClass
;
2631 return std::make_pair(0U, RC
);
2634 return TargetLowering::getRegForInlineAsmConstraint(TRI
, Constraint
, VT
);
2637 //===----------------------------------------------------------------------===//
2638 // VE Target Optimization Support
2639 //===----------------------------------------------------------------------===//
2641 unsigned VETargetLowering::getMinimumJumpTableEntries() const {
2642 // Specify 8 for PIC model to relieve the impact of PIC load instructions.
2643 if (isJumpTableRelative())
2646 return TargetLowering::getMinimumJumpTableEntries();
2649 bool VETargetLowering::hasAndNot(SDValue Y
) const {
2650 EVT VT
= Y
.getValueType();
2652 // VE doesn't have vector and not instruction.
2656 // VE allows different immediate values for X and Y where ~X & Y.
2657 // Only simm7 works for X, and only mimm works for Y on VE. However, this
2658 // function is used to check whether an immediate value is OK for and-not
2659 // instruction as both X and Y. Generating additional instruction to
2660 // retrieve an immediate value is no good since the purpose of this
2661 // function is to convert a series of 3 instructions to another series of
2662 // 3 instructions with better parallelism. Therefore, we return false
2663 // for all immediate values now.
2664 // FIXME: Change hasAndNot function to have two operands to make it work
2665 // correctly with Aurora VE.
2666 if (isa
<ConstantSDNode
>(Y
))
2669 // It's ok for generic registers.
2673 /// \returns the VVP_* SDNode opcode corresponsing to \p OC.
2674 static Optional
<unsigned> getVVPOpcode(unsigned Opcode
) {
2676 #define HANDLE_VP_TO_VVP(VPOPC, VVPNAME) \
2678 return VEISD::VVPNAME;
2679 #define ADD_VVP_OP(VVPNAME, SDNAME) \
2680 case VEISD::VVPNAME: \
2682 return VEISD::VVPNAME;
2683 #include "VVPNodes.def"
2688 SDValue
VETargetLowering::lowerToVVP(SDValue Op
, SelectionDAG
&DAG
) const {
2689 // Can we represent this as a VVP node.
2690 const unsigned Opcode
= Op
->getOpcode();
2691 auto VVPOpcodeOpt
= getVVPOpcode(Opcode
);
2692 if (!VVPOpcodeOpt
.hasValue())
2694 unsigned VVPOpcode
= VVPOpcodeOpt
.getValue();
2695 const bool FromVP
= ISD::isVPOpcode(Opcode
);
2697 // The representative and legalized vector type of this operation.
2699 MVT MaskVT
= MVT::v256i1
; // TODO: packed mode.
2700 EVT OpVecVT
= Op
.getValueType();
2701 EVT LegalVecVT
= getTypeToTransformTo(*DAG
.getContext(), OpVecVT
);
2707 // All upstream VP SDNodes always have a mask and avl.
2708 auto MaskIdx
= ISD::getVPMaskIdx(Opcode
).getValue();
2709 auto AVLIdx
= ISD::getVPExplicitVectorLengthIdx(Opcode
).getValue();
2710 Mask
= Op
->getOperand(MaskIdx
);
2711 AVL
= Op
->getOperand(AVLIdx
);
2714 // Materialize the VL parameter.
2715 AVL
= DAG
.getConstant(OpVecVT
.getVectorNumElements(), DL
, MVT::i32
);
2716 SDValue ConstTrue
= DAG
.getConstant(1, DL
, MVT::i32
);
2717 Mask
= DAG
.getNode(VEISD::VEC_BROADCAST
, DL
, MaskVT
,
2718 ConstTrue
); // emit a VEISD::VEC_BROADCAST here.
2721 // Categories we are interested in.
2722 bool IsBinaryOp
= false;
2724 switch (VVPOpcode
) {
2725 #define ADD_BINARY_VVP_OP(VVPNAME, ...) \
2726 case VEISD::VVPNAME: \
2727 IsBinaryOp = true; \
2729 #include "VVPNodes.def"
2733 assert(LegalVecVT
.isSimple());
2734 return DAG
.getNode(VVPOpcode
, DL
, LegalVecVT
, Op
->getOperand(0),
2735 Op
->getOperand(1), Mask
, AVL
);
2737 llvm_unreachable("lowerToVVP called for unexpected SDNode.");
2740 SDValue
VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op
,
2741 SelectionDAG
&DAG
) const {
2742 assert(Op
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&& "Unknown opcode!");
2743 MVT VT
= Op
.getOperand(0).getSimpleValueType();
2745 // Special treatment for packed V64 types.
2746 assert(VT
== MVT::v512i32
|| VT
== MVT::v512f32
);
2748 // Example of codes:
2749 // %packed_v = extractelt %vr, %idx / 2
2750 // %v = %packed_v >> (%idx % 2 * 32)
2751 // %res = %v & 0xffffffff
2753 SDValue Vec
= Op
.getOperand(0);
2754 SDValue Idx
= Op
.getOperand(1);
2756 SDValue Result
= Op
;
2757 if (0 /* Idx->isConstant() */) {
2758 // TODO: optimized implementation using constant values
2760 SDValue Const1
= DAG
.getConstant(1, DL
, MVT::i64
);
2761 SDValue HalfIdx
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, {Idx
, Const1
});
2763 SDValue(DAG
.getMachineNode(VE::LVSvr
, DL
, MVT::i64
, {Vec
, HalfIdx
}), 0);
2764 SDValue AndIdx
= DAG
.getNode(ISD::AND
, DL
, MVT::i64
, {Idx
, Const1
});
2765 SDValue Shift
= DAG
.getNode(ISD::XOR
, DL
, MVT::i64
, {AndIdx
, Const1
});
2766 SDValue Const5
= DAG
.getConstant(5, DL
, MVT::i64
);
2767 Shift
= DAG
.getNode(ISD::SHL
, DL
, MVT::i64
, {Shift
, Const5
});
2768 PackedElt
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, {PackedElt
, Shift
});
2769 SDValue Mask
= DAG
.getConstant(0xFFFFFFFFL
, DL
, MVT::i64
);
2770 PackedElt
= DAG
.getNode(ISD::AND
, DL
, MVT::i64
, {PackedElt
, Mask
});
2771 SDValue SubI32
= DAG
.getTargetConstant(VE::sub_i32
, DL
, MVT::i32
);
2772 Result
= SDValue(DAG
.getMachineNode(TargetOpcode::EXTRACT_SUBREG
, DL
,
2773 MVT::i32
, PackedElt
, SubI32
),
2776 if (Op
.getSimpleValueType() == MVT::f32
) {
2777 Result
= DAG
.getBitcast(MVT::f32
, Result
);
2779 assert(Op
.getSimpleValueType() == MVT::i32
);
2785 SDValue
VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op
,
2786 SelectionDAG
&DAG
) const {
2787 assert(Op
.getOpcode() == ISD::INSERT_VECTOR_ELT
&& "Unknown opcode!");
2788 MVT VT
= Op
.getOperand(0).getSimpleValueType();
2790 // Special treatment for packed V64 types.
2791 assert(VT
== MVT::v512i32
|| VT
== MVT::v512f32
);
2793 // The v512i32 and v512f32 starts from upper bits (0..31). This "upper
2794 // bits" required `val << 32` from C implementation's point of view.
2796 // Example of codes:
2797 // %packed_elt = extractelt %vr, (%idx >> 1)
2798 // %shift = ((%idx & 1) ^ 1) << 5
2799 // %packed_elt &= 0xffffffff00000000 >> shift
2800 // %packed_elt |= (zext %val) << shift
2801 // %vr = insertelt %vr, %packed_elt, (%idx >> 1)
2804 SDValue Vec
= Op
.getOperand(0);
2805 SDValue Val
= Op
.getOperand(1);
2806 SDValue Idx
= Op
.getOperand(2);
2807 if (Idx
.getSimpleValueType() == MVT::i32
)
2808 Idx
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, Idx
);
2809 if (Val
.getSimpleValueType() == MVT::f32
)
2810 Val
= DAG
.getBitcast(MVT::i32
, Val
);
2811 assert(Val
.getSimpleValueType() == MVT::i32
);
2812 Val
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, Val
);
2814 SDValue Result
= Op
;
2815 if (0 /* Idx->isConstant()*/) {
2816 // TODO: optimized implementation using constant values
2818 SDValue Const1
= DAG
.getConstant(1, DL
, MVT::i64
);
2819 SDValue HalfIdx
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, {Idx
, Const1
});
2821 SDValue(DAG
.getMachineNode(VE::LVSvr
, DL
, MVT::i64
, {Vec
, HalfIdx
}), 0);
2822 SDValue AndIdx
= DAG
.getNode(ISD::AND
, DL
, MVT::i64
, {Idx
, Const1
});
2823 SDValue Shift
= DAG
.getNode(ISD::XOR
, DL
, MVT::i64
, {AndIdx
, Const1
});
2824 SDValue Const5
= DAG
.getConstant(5, DL
, MVT::i64
);
2825 Shift
= DAG
.getNode(ISD::SHL
, DL
, MVT::i64
, {Shift
, Const5
});
2826 SDValue Mask
= DAG
.getConstant(0xFFFFFFFF00000000L
, DL
, MVT::i64
);
2827 Mask
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, {Mask
, Shift
});
2828 PackedElt
= DAG
.getNode(ISD::AND
, DL
, MVT::i64
, {PackedElt
, Mask
});
2829 Val
= DAG
.getNode(ISD::SHL
, DL
, MVT::i64
, {Val
, Shift
});
2830 PackedElt
= DAG
.getNode(ISD::OR
, DL
, MVT::i64
, {PackedElt
, Val
});
2832 SDValue(DAG
.getMachineNode(VE::LSVrr_v
, DL
, Vec
.getSimpleValueType(),
2833 {HalfIdx
, PackedElt
, Vec
}),