1 //===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the interfaces that VE uses to lower LLVM code into a
12 //===----------------------------------------------------------------------===//
14 #include "VEISelLowering.h"
15 #include "MCTargetDesc/VEMCExpr.h"
16 #include "VECustomDAG.h"
17 #include "VEInstrBuilder.h"
18 #include "VEMachineFunctionInfo.h"
19 #include "VERegisterInfo.h"
20 #include "VETargetMachine.h"
21 #include "llvm/ADT/StringSwitch.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineJumpTableInfo.h"
27 #include "llvm/CodeGen/MachineModuleInfo.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/SelectionDAG.h"
30 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
31 #include "llvm/IR/DerivedTypes.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/Module.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/KnownBits.h"
39 #define DEBUG_TYPE "ve-lower"
41 //===----------------------------------------------------------------------===//
42 // Calling Convention Implementation
43 //===----------------------------------------------------------------------===//
45 #include "VEGenCallingConv.inc"
47 CCAssignFn
*getReturnCC(CallingConv::ID CallConv
) {
51 case CallingConv::Fast
:
56 CCAssignFn
*getParamCC(CallingConv::ID CallConv
, bool IsVarArg
) {
62 case CallingConv::Fast
:
67 bool VETargetLowering::CanLowerReturn(
68 CallingConv::ID CallConv
, MachineFunction
&MF
, bool IsVarArg
,
69 const SmallVectorImpl
<ISD::OutputArg
> &Outs
, LLVMContext
&Context
) const {
70 CCAssignFn
*RetCC
= getReturnCC(CallConv
);
71 SmallVector
<CCValAssign
, 16> RVLocs
;
72 CCState
CCInfo(CallConv
, IsVarArg
, MF
, RVLocs
, Context
);
73 return CCInfo
.CheckReturn(Outs
, RetCC
);
76 static const MVT AllVectorVTs
[] = {MVT::v256i32
, MVT::v512i32
, MVT::v256i64
,
77 MVT::v256f32
, MVT::v512f32
, MVT::v256f64
};
79 static const MVT AllMaskVTs
[] = {MVT::v256i1
, MVT::v512i1
};
81 static const MVT AllPackedVTs
[] = {MVT::v512i32
, MVT::v512f32
};
83 void VETargetLowering::initRegisterClasses() {
84 // Set up the register classes.
85 addRegisterClass(MVT::i32
, &VE::I32RegClass
);
86 addRegisterClass(MVT::i64
, &VE::I64RegClass
);
87 addRegisterClass(MVT::f32
, &VE::F32RegClass
);
88 addRegisterClass(MVT::f64
, &VE::I64RegClass
);
89 addRegisterClass(MVT::f128
, &VE::F128RegClass
);
91 if (Subtarget
->enableVPU()) {
92 for (MVT VecVT
: AllVectorVTs
)
93 addRegisterClass(VecVT
, &VE::V64RegClass
);
94 addRegisterClass(MVT::v256i1
, &VE::VMRegClass
);
95 addRegisterClass(MVT::v512i1
, &VE::VM512RegClass
);
99 void VETargetLowering::initSPUActions() {
100 const auto &TM
= getTargetMachine();
103 // VE doesn't have i1 sign extending load.
104 for (MVT VT
: MVT::integer_valuetypes()) {
105 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i1
, Promote
);
106 setLoadExtAction(ISD::ZEXTLOAD
, VT
, MVT::i1
, Promote
);
107 setLoadExtAction(ISD::EXTLOAD
, VT
, MVT::i1
, Promote
);
108 setTruncStoreAction(VT
, MVT::i1
, Expand
);
111 // VE doesn't have floating point extload/truncstore, so expand them.
112 for (MVT FPVT
: MVT::fp_valuetypes()) {
113 for (MVT OtherFPVT
: MVT::fp_valuetypes()) {
114 setLoadExtAction(ISD::EXTLOAD
, FPVT
, OtherFPVT
, Expand
);
115 setTruncStoreAction(FPVT
, OtherFPVT
, Expand
);
119 // VE doesn't have fp128 load/store, so expand them in custom lower.
120 setOperationAction(ISD::LOAD
, MVT::f128
, Custom
);
121 setOperationAction(ISD::STORE
, MVT::f128
, Custom
);
125 // Custom legalize address nodes into LO/HI parts.
126 MVT PtrVT
= MVT::getIntegerVT(TM
.getPointerSizeInBits(0));
127 setOperationAction(ISD::BlockAddress
, PtrVT
, Custom
);
128 setOperationAction(ISD::GlobalAddress
, PtrVT
, Custom
);
129 setOperationAction(ISD::GlobalTLSAddress
, PtrVT
, Custom
);
130 setOperationAction(ISD::ConstantPool
, PtrVT
, Custom
);
131 setOperationAction(ISD::JumpTable
, PtrVT
, Custom
);
134 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
135 // VAARG needs to be lowered to access with 8 bytes alignment.
136 setOperationAction(ISD::VAARG
, MVT::Other
, Custom
);
137 // Use the default implementation.
138 setOperationAction(ISD::VACOPY
, MVT::Other
, Expand
);
139 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
143 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i32
, Custom
);
144 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i64
, Custom
);
146 // Use the default implementation.
147 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Expand
);
148 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Expand
);
153 // VE doesn't have BRCOND
154 setOperationAction(ISD::BRCOND
, MVT::Other
, Expand
);
156 // BR_JT is not implemented yet.
157 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
162 for (MVT IntVT
: {MVT::i32
, MVT::i64
}) {
163 // VE has no REM or DIVREM operations.
164 setOperationAction(ISD::UREM
, IntVT
, Expand
);
165 setOperationAction(ISD::SREM
, IntVT
, Expand
);
166 setOperationAction(ISD::SDIVREM
, IntVT
, Expand
);
167 setOperationAction(ISD::UDIVREM
, IntVT
, Expand
);
169 // VE has no SHL_PARTS/SRA_PARTS/SRL_PARTS operations.
170 setOperationAction(ISD::SHL_PARTS
, IntVT
, Expand
);
171 setOperationAction(ISD::SRA_PARTS
, IntVT
, Expand
);
172 setOperationAction(ISD::SRL_PARTS
, IntVT
, Expand
);
174 // VE has no MULHU/S or U/SMUL_LOHI operations.
175 // TODO: Use MPD instruction to implement SMUL_LOHI for i32 type.
176 setOperationAction(ISD::MULHU
, IntVT
, Expand
);
177 setOperationAction(ISD::MULHS
, IntVT
, Expand
);
178 setOperationAction(ISD::UMUL_LOHI
, IntVT
, Expand
);
179 setOperationAction(ISD::SMUL_LOHI
, IntVT
, Expand
);
181 // VE has no CTTZ, ROTL, ROTR operations.
182 setOperationAction(ISD::CTTZ
, IntVT
, Expand
);
183 setOperationAction(ISD::ROTL
, IntVT
, Expand
);
184 setOperationAction(ISD::ROTR
, IntVT
, Expand
);
186 // VE has 64 bits instruction which works as i64 BSWAP operation. This
187 // instruction works fine as i32 BSWAP operation with an additional
188 // parameter. Use isel patterns to lower BSWAP.
189 setOperationAction(ISD::BSWAP
, IntVT
, Legal
);
191 // VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP
192 // operations. Use isel patterns for i64, promote for i32.
193 LegalizeAction Act
= (IntVT
== MVT::i32
) ? Promote
: Legal
;
194 setOperationAction(ISD::BITREVERSE
, IntVT
, Act
);
195 setOperationAction(ISD::CTLZ
, IntVT
, Act
);
196 setOperationAction(ISD::CTLZ_ZERO_UNDEF
, IntVT
, Act
);
197 setOperationAction(ISD::CTPOP
, IntVT
, Act
);
199 // VE has only 64 bits instructions which work as i64 AND/OR/XOR operations.
200 // Use isel patterns for i64, promote for i32.
201 setOperationAction(ISD::AND
, IntVT
, Act
);
202 setOperationAction(ISD::OR
, IntVT
, Act
);
203 setOperationAction(ISD::XOR
, IntVT
, Act
);
205 // Legal smax and smin
206 setOperationAction(ISD::SMAX
, IntVT
, Legal
);
207 setOperationAction(ISD::SMIN
, IntVT
, Legal
);
212 // VE doesn't have instructions for fp<->uint, so expand them by llvm
213 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Promote
); // use i64
214 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Promote
); // use i64
215 setOperationAction(ISD::FP_TO_UINT
, MVT::i64
, Expand
);
216 setOperationAction(ISD::UINT_TO_FP
, MVT::i64
, Expand
);
218 // fp16 not supported
219 for (MVT FPVT
: MVT::fp_valuetypes()) {
220 setOperationAction(ISD::FP16_TO_FP
, FPVT
, Expand
);
221 setOperationAction(ISD::FP_TO_FP16
, FPVT
, Expand
);
225 /// Floating-point Ops {
226 /// Note: Floating-point operations are fneg, fadd, fsub, fmul, fdiv, frem,
229 // VE doesn't have following floating point operations.
230 for (MVT VT
: MVT::fp_valuetypes()) {
231 setOperationAction(ISD::FNEG
, VT
, Expand
);
232 setOperationAction(ISD::FREM
, VT
, Expand
);
235 // VE doesn't have fdiv of f128.
236 setOperationAction(ISD::FDIV
, MVT::f128
, Expand
);
238 for (MVT FPVT
: {MVT::f32
, MVT::f64
}) {
239 // f32 and f64 uses ConstantFP. f128 uses ConstantPool.
240 setOperationAction(ISD::ConstantFP
, FPVT
, Legal
);
242 /// } Floating-point Ops
244 /// Floating-point math functions {
246 // VE doesn't have following floating point math functions.
247 for (MVT VT
: MVT::fp_valuetypes()) {
248 setOperationAction(ISD::FABS
, VT
, Expand
);
249 setOperationAction(ISD::FCOPYSIGN
, VT
, Expand
);
250 setOperationAction(ISD::FCOS
, VT
, Expand
);
251 setOperationAction(ISD::FMA
, VT
, Expand
);
252 setOperationAction(ISD::FPOW
, VT
, Expand
);
253 setOperationAction(ISD::FSIN
, VT
, Expand
);
254 setOperationAction(ISD::FSQRT
, VT
, Expand
);
257 // VE has single and double FMINNUM and FMAXNUM
258 for (MVT VT
: {MVT::f32
, MVT::f64
}) {
259 setOperationAction({ISD::FMAXNUM
, ISD::FMINNUM
}, VT
, Legal
);
262 /// } Floating-point math functions
264 /// Atomic instructions {
266 setMaxAtomicSizeInBitsSupported(64);
267 setMinCmpXchgSizeInBits(32);
268 setSupportsUnalignedAtomics(false);
270 // Use custom inserter for ATOMIC_FENCE.
271 setOperationAction(ISD::ATOMIC_FENCE
, MVT::Other
, Custom
);
273 // Other atomic instructions.
274 for (MVT VT
: MVT::integer_valuetypes()) {
275 // Support i8/i16 atomic swap.
276 setOperationAction(ISD::ATOMIC_SWAP
, VT
, Custom
);
278 // FIXME: Support "atmam" instructions.
279 setOperationAction(ISD::ATOMIC_LOAD_ADD
, VT
, Expand
);
280 setOperationAction(ISD::ATOMIC_LOAD_SUB
, VT
, Expand
);
281 setOperationAction(ISD::ATOMIC_LOAD_AND
, VT
, Expand
);
282 setOperationAction(ISD::ATOMIC_LOAD_OR
, VT
, Expand
);
284 // VE doesn't have follwing instructions.
285 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS
, VT
, Expand
);
286 setOperationAction(ISD::ATOMIC_LOAD_CLR
, VT
, Expand
);
287 setOperationAction(ISD::ATOMIC_LOAD_XOR
, VT
, Expand
);
288 setOperationAction(ISD::ATOMIC_LOAD_NAND
, VT
, Expand
);
289 setOperationAction(ISD::ATOMIC_LOAD_MIN
, VT
, Expand
);
290 setOperationAction(ISD::ATOMIC_LOAD_MAX
, VT
, Expand
);
291 setOperationAction(ISD::ATOMIC_LOAD_UMIN
, VT
, Expand
);
292 setOperationAction(ISD::ATOMIC_LOAD_UMAX
, VT
, Expand
);
295 /// } Atomic instructions
297 /// SJLJ instructions {
298 setOperationAction(ISD::EH_SJLJ_LONGJMP
, MVT::Other
, Custom
);
299 setOperationAction(ISD::EH_SJLJ_SETJMP
, MVT::i32
, Custom
);
300 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH
, MVT::Other
, Custom
);
301 if (TM
.Options
.ExceptionModel
== ExceptionHandling::SjLj
)
302 setLibcallName(RTLIB::UNWIND_RESUME
, "_Unwind_SjLj_Resume");
303 /// } SJLJ instructions
305 // Intrinsic instructions
306 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
309 void VETargetLowering::initVPUActions() {
310 for (MVT LegalMaskVT
: AllMaskVTs
)
311 setOperationAction(ISD::BUILD_VECTOR
, LegalMaskVT
, Custom
);
313 for (unsigned Opc
: {ISD::AND
, ISD::OR
, ISD::XOR
})
314 setOperationAction(Opc
, MVT::v512i1
, Custom
);
316 for (MVT LegalVecVT
: AllVectorVTs
) {
317 setOperationAction(ISD::BUILD_VECTOR
, LegalVecVT
, Custom
);
318 setOperationAction(ISD::INSERT_VECTOR_ELT
, LegalVecVT
, Legal
);
319 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, LegalVecVT
, Legal
);
320 // Translate all vector instructions with legal element types to VVP_*
322 // TODO We will custom-widen into VVP_* nodes in the future. While we are
323 // buildling the infrastructure for this, we only do this for legal vector
325 #define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME) \
326 setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);
327 #define ADD_VVP_OP(VVP_NAME, ISD_NAME) \
328 setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
329 setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_LOAD
, LegalVecVT
, Custom
);
330 setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_STORE
, LegalVecVT
, Custom
);
331 #include "VVPNodes.def"
334 for (MVT LegalPackedVT
: AllPackedVTs
) {
335 setOperationAction(ISD::INSERT_VECTOR_ELT
, LegalPackedVT
, Custom
);
336 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, LegalPackedVT
, Custom
);
339 // vNt32, vNt64 ops (legal element types)
340 for (MVT VT
: MVT::vector_valuetypes()) {
341 MVT ElemVT
= VT
.getVectorElementType();
342 unsigned ElemBits
= ElemVT
.getScalarSizeInBits();
343 if (ElemBits
!= 32 && ElemBits
!= 64)
346 for (unsigned MemOpc
: {ISD::MLOAD
, ISD::MSTORE
, ISD::LOAD
, ISD::STORE
})
347 setOperationAction(MemOpc
, VT
, Custom
);
349 const ISD::NodeType IntReductionOCs
[] = {
350 ISD::VECREDUCE_ADD
, ISD::VECREDUCE_MUL
, ISD::VECREDUCE_AND
,
351 ISD::VECREDUCE_OR
, ISD::VECREDUCE_XOR
, ISD::VECREDUCE_SMIN
,
352 ISD::VECREDUCE_SMAX
, ISD::VECREDUCE_UMIN
, ISD::VECREDUCE_UMAX
};
354 for (unsigned IntRedOpc
: IntReductionOCs
)
355 setOperationAction(IntRedOpc
, VT
, Custom
);
358 // v256i1 and v512i1 ops
359 for (MVT MaskVT
: AllMaskVTs
) {
360 // Custom lower mask ops
361 setOperationAction(ISD::STORE
, MaskVT
, Custom
);
362 setOperationAction(ISD::LOAD
, MaskVT
, Custom
);
367 VETargetLowering::LowerReturn(SDValue Chain
, CallingConv::ID CallConv
,
369 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
370 const SmallVectorImpl
<SDValue
> &OutVals
,
371 const SDLoc
&DL
, SelectionDAG
&DAG
) const {
372 // CCValAssign - represent the assignment of the return value to locations.
373 SmallVector
<CCValAssign
, 16> RVLocs
;
375 // CCState - Info about the registers and stack slot.
376 CCState
CCInfo(CallConv
, IsVarArg
, DAG
.getMachineFunction(), RVLocs
,
379 // Analyze return values.
380 CCInfo
.AnalyzeReturn(Outs
, getReturnCC(CallConv
));
383 SmallVector
<SDValue
, 4> RetOps(1, Chain
);
385 // Copy the result values into the output registers.
386 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
387 CCValAssign
&VA
= RVLocs
[i
];
388 assert(VA
.isRegLoc() && "Can only return in registers!");
389 assert(!VA
.needsCustom() && "Unexpected custom lowering");
390 SDValue OutVal
= OutVals
[i
];
392 // Integer return values must be sign or zero extended by the callee.
393 switch (VA
.getLocInfo()) {
394 case CCValAssign::Full
:
396 case CCValAssign::SExt
:
397 OutVal
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, VA
.getLocVT(), OutVal
);
399 case CCValAssign::ZExt
:
400 OutVal
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, VA
.getLocVT(), OutVal
);
402 case CCValAssign::AExt
:
403 OutVal
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, VA
.getLocVT(), OutVal
);
405 case CCValAssign::BCvt
: {
406 // Convert a float return value to i64 with padding.
411 assert(VA
.getLocVT() == MVT::i64
);
412 assert(VA
.getValVT() == MVT::f32
);
413 SDValue Undef
= SDValue(
414 DAG
.getMachineNode(TargetOpcode::IMPLICIT_DEF
, DL
, MVT::i64
), 0);
415 SDValue Sub_f32
= DAG
.getTargetConstant(VE::sub_f32
, DL
, MVT::i32
);
416 OutVal
= SDValue(DAG
.getMachineNode(TargetOpcode::INSERT_SUBREG
, DL
,
417 MVT::i64
, Undef
, OutVal
, Sub_f32
),
422 llvm_unreachable("Unknown loc info!");
425 Chain
= DAG
.getCopyToReg(Chain
, DL
, VA
.getLocReg(), OutVal
, Glue
);
427 // Guarantee that all emitted copies are stuck together with flags.
428 Glue
= Chain
.getValue(1);
429 RetOps
.push_back(DAG
.getRegister(VA
.getLocReg(), VA
.getLocVT()));
432 RetOps
[0] = Chain
; // Update chain.
434 // Add the glue if we have it.
436 RetOps
.push_back(Glue
);
438 return DAG
.getNode(VEISD::RET_GLUE
, DL
, MVT::Other
, RetOps
);
441 SDValue
VETargetLowering::LowerFormalArguments(
442 SDValue Chain
, CallingConv::ID CallConv
, bool IsVarArg
,
443 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&DL
,
444 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
445 MachineFunction
&MF
= DAG
.getMachineFunction();
447 // Get the base offset of the incoming arguments stack space.
448 unsigned ArgsBaseOffset
= Subtarget
->getRsaSize();
449 // Get the size of the preserved arguments area
450 unsigned ArgsPreserved
= 64;
452 // Analyze arguments according to CC_VE.
453 SmallVector
<CCValAssign
, 16> ArgLocs
;
454 CCState
CCInfo(CallConv
, IsVarArg
, DAG
.getMachineFunction(), ArgLocs
,
456 // Allocate the preserved area first.
457 CCInfo
.AllocateStack(ArgsPreserved
, Align(8));
458 // We already allocated the preserved area, so the stack offset computed
459 // by CC_VE would be correct now.
460 CCInfo
.AnalyzeFormalArguments(Ins
, getParamCC(CallConv
, false));
462 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
463 CCValAssign
&VA
= ArgLocs
[i
];
464 assert(!VA
.needsCustom() && "Unexpected custom lowering");
466 // This argument is passed in a register.
467 // All integer register arguments are promoted by the caller to i64.
469 // Create a virtual register for the promoted live-in value.
471 MF
.addLiveIn(VA
.getLocReg(), getRegClassFor(VA
.getLocVT()));
472 SDValue Arg
= DAG
.getCopyFromReg(Chain
, DL
, VReg
, VA
.getLocVT());
474 // The caller promoted the argument, so insert an Assert?ext SDNode so we
475 // won't promote the value again in this function.
476 switch (VA
.getLocInfo()) {
477 case CCValAssign::SExt
:
478 Arg
= DAG
.getNode(ISD::AssertSext
, DL
, VA
.getLocVT(), Arg
,
479 DAG
.getValueType(VA
.getValVT()));
481 case CCValAssign::ZExt
:
482 Arg
= DAG
.getNode(ISD::AssertZext
, DL
, VA
.getLocVT(), Arg
,
483 DAG
.getValueType(VA
.getValVT()));
485 case CCValAssign::BCvt
: {
486 // Extract a float argument from i64 with padding.
491 assert(VA
.getLocVT() == MVT::i64
);
492 assert(VA
.getValVT() == MVT::f32
);
493 SDValue Sub_f32
= DAG
.getTargetConstant(VE::sub_f32
, DL
, MVT::i32
);
494 Arg
= SDValue(DAG
.getMachineNode(TargetOpcode::EXTRACT_SUBREG
, DL
,
495 MVT::f32
, Arg
, Sub_f32
),
503 // Truncate the register down to the argument type.
505 Arg
= DAG
.getNode(ISD::TRUNCATE
, DL
, VA
.getValVT(), Arg
);
507 InVals
.push_back(Arg
);
511 // The registers are exhausted. This argument was passed on the stack.
512 assert(VA
.isMemLoc());
513 // The CC_VE_Full/Half functions compute stack offsets relative to the
514 // beginning of the arguments area at %fp + the size of reserved area.
515 unsigned Offset
= VA
.getLocMemOffset() + ArgsBaseOffset
;
516 unsigned ValSize
= VA
.getValVT().getSizeInBits() / 8;
518 // Adjust offset for a float argument by adding 4 since the argument is
519 // stored in 8 bytes buffer with offset like below. LLVM generates
520 // 4 bytes load instruction, so need to adjust offset here. This
521 // adjustment is required in only LowerFormalArguments. In LowerCall,
522 // a float argument is converted to i64 first, and stored as 8 bytes
523 // data, which is required by ABI, so no need for adjustment.
528 if (VA
.getValVT() == MVT::f32
)
531 int FI
= MF
.getFrameInfo().CreateFixedObject(ValSize
, Offset
, true);
533 DAG
.getLoad(VA
.getValVT(), DL
, Chain
,
534 DAG
.getFrameIndex(FI
, getPointerTy(MF
.getDataLayout())),
535 MachinePointerInfo::getFixedStack(MF
, FI
)));
541 // This function takes variable arguments, some of which may have been passed
542 // in registers %s0-%s8.
544 // The va_start intrinsic needs to know the offset to the first variable
546 // TODO: need to calculate offset correctly once we support f128.
547 unsigned ArgOffset
= ArgLocs
.size() * 8;
548 VEMachineFunctionInfo
*FuncInfo
= MF
.getInfo
<VEMachineFunctionInfo
>();
549 // Skip the reserved area at the top of stack.
550 FuncInfo
->setVarArgsFrameOffset(ArgOffset
+ ArgsBaseOffset
);
555 // FIXME? Maybe this could be a TableGen attribute on some registers and
556 // this table could be generated automatically from RegInfo.
557 Register
VETargetLowering::getRegisterByName(const char *RegName
, LLT VT
,
558 const MachineFunction
&MF
) const {
559 Register Reg
= StringSwitch
<Register
>(RegName
)
560 .Case("sp", VE::SX11
) // Stack pointer
561 .Case("fp", VE::SX9
) // Frame pointer
562 .Case("sl", VE::SX8
) // Stack limit
563 .Case("lr", VE::SX10
) // Link register
564 .Case("tp", VE::SX14
) // Thread pointer
565 .Case("outer", VE::SX12
) // Outer regiser
566 .Case("info", VE::SX17
) // Info area register
567 .Case("got", VE::SX15
) // Global offset table register
568 .Case("plt", VE::SX16
) // Procedure linkage table register
574 report_fatal_error("Invalid register name global variable");
577 //===----------------------------------------------------------------------===//
578 // TargetLowering Implementation
579 //===----------------------------------------------------------------------===//
581 SDValue
VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo
&CLI
,
582 SmallVectorImpl
<SDValue
> &InVals
) const {
583 SelectionDAG
&DAG
= CLI
.DAG
;
585 SDValue Chain
= CLI
.Chain
;
586 auto PtrVT
= getPointerTy(DAG
.getDataLayout());
588 // VE target does not yet support tail call optimization.
589 CLI
.IsTailCall
= false;
591 // Get the base offset of the outgoing arguments stack space.
592 unsigned ArgsBaseOffset
= Subtarget
->getRsaSize();
593 // Get the size of the preserved arguments area
594 unsigned ArgsPreserved
= 8 * 8u;
596 // Analyze operands of the call, assigning locations to each operand.
597 SmallVector
<CCValAssign
, 16> ArgLocs
;
598 CCState
CCInfo(CLI
.CallConv
, CLI
.IsVarArg
, DAG
.getMachineFunction(), ArgLocs
,
600 // Allocate the preserved area first.
601 CCInfo
.AllocateStack(ArgsPreserved
, Align(8));
602 // We already allocated the preserved area, so the stack offset computed
603 // by CC_VE would be correct now.
604 CCInfo
.AnalyzeCallOperands(CLI
.Outs
, getParamCC(CLI
.CallConv
, false));
606 // VE requires to use both register and stack for varargs or no-prototyped
608 bool UseBoth
= CLI
.IsVarArg
;
610 // Analyze operands again if it is required to store BOTH.
611 SmallVector
<CCValAssign
, 16> ArgLocs2
;
612 CCState
CCInfo2(CLI
.CallConv
, CLI
.IsVarArg
, DAG
.getMachineFunction(),
613 ArgLocs2
, *DAG
.getContext());
615 CCInfo2
.AnalyzeCallOperands(CLI
.Outs
, getParamCC(CLI
.CallConv
, true));
617 // Get the size of the outgoing arguments stack space requirement.
618 unsigned ArgsSize
= CCInfo
.getStackSize();
620 // Keep stack frames 16-byte aligned.
621 ArgsSize
= alignTo(ArgsSize
, 16);
623 // Adjust the stack pointer to make room for the arguments.
624 // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
625 // with more than 6 arguments.
626 Chain
= DAG
.getCALLSEQ_START(Chain
, ArgsSize
, 0, DL
);
628 // Collect the set of registers to pass to the function and their values.
629 // This will be emitted as a sequence of CopyToReg nodes glued to the call
631 SmallVector
<std::pair
<unsigned, SDValue
>, 8> RegsToPass
;
633 // Collect chains from all the memory opeations that copy arguments to the
634 // stack. They must follow the stack pointer adjustment above and precede the
635 // call instruction itself.
636 SmallVector
<SDValue
, 8> MemOpChains
;
638 // VE needs to get address of callee function in a register
639 // So, prepare to copy it to SX12 here.
641 // If the callee is a GlobalAddress node (quite common, every direct call is)
642 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
643 // Likewise ExternalSymbol -> TargetExternalSymbol.
644 SDValue Callee
= CLI
.Callee
;
646 bool IsPICCall
= isPositionIndependent();
648 // PC-relative references to external symbols should go through $stub.
649 // If so, we need to prepare GlobalBaseReg first.
650 const TargetMachine
&TM
= DAG
.getTarget();
651 const Module
*Mod
= DAG
.getMachineFunction().getFunction().getParent();
652 const GlobalValue
*GV
= nullptr;
653 auto *CalleeG
= dyn_cast
<GlobalAddressSDNode
>(Callee
);
655 GV
= CalleeG
->getGlobal();
656 bool Local
= TM
.shouldAssumeDSOLocal(*Mod
, GV
);
657 bool UsePlt
= !Local
;
658 MachineFunction
&MF
= DAG
.getMachineFunction();
660 // Turn GlobalAddress/ExternalSymbol node into a value node
661 // containing the address of them here.
665 Subtarget
->getInstrInfo()->getGlobalBaseReg(&MF
);
666 Callee
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, 0, 0);
667 Callee
= DAG
.getNode(VEISD::GETFUNPLT
, DL
, PtrVT
, Callee
);
670 makeHiLoPair(Callee
, VEMCExpr::VK_VE_HI32
, VEMCExpr::VK_VE_LO32
, DAG
);
672 } else if (ExternalSymbolSDNode
*E
= dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
675 Subtarget
->getInstrInfo()->getGlobalBaseReg(&MF
);
676 Callee
= DAG
.getTargetExternalSymbol(E
->getSymbol(), PtrVT
, 0);
677 Callee
= DAG
.getNode(VEISD::GETFUNPLT
, DL
, PtrVT
, Callee
);
680 makeHiLoPair(Callee
, VEMCExpr::VK_VE_HI32
, VEMCExpr::VK_VE_LO32
, DAG
);
684 RegsToPass
.push_back(std::make_pair(VE::SX12
, Callee
));
686 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
687 CCValAssign
&VA
= ArgLocs
[i
];
688 SDValue Arg
= CLI
.OutVals
[i
];
690 // Promote the value if needed.
691 switch (VA
.getLocInfo()) {
693 llvm_unreachable("Unknown location info!");
694 case CCValAssign::Full
:
696 case CCValAssign::SExt
:
697 Arg
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, VA
.getLocVT(), Arg
);
699 case CCValAssign::ZExt
:
700 Arg
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, VA
.getLocVT(), Arg
);
702 case CCValAssign::AExt
:
703 Arg
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, VA
.getLocVT(), Arg
);
705 case CCValAssign::BCvt
: {
706 // Convert a float argument to i64 with padding.
711 assert(VA
.getLocVT() == MVT::i64
);
712 assert(VA
.getValVT() == MVT::f32
);
713 SDValue Undef
= SDValue(
714 DAG
.getMachineNode(TargetOpcode::IMPLICIT_DEF
, DL
, MVT::i64
), 0);
715 SDValue Sub_f32
= DAG
.getTargetConstant(VE::sub_f32
, DL
, MVT::i32
);
716 Arg
= SDValue(DAG
.getMachineNode(TargetOpcode::INSERT_SUBREG
, DL
,
717 MVT::i64
, Undef
, Arg
, Sub_f32
),
724 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), Arg
));
730 assert(VA
.isMemLoc());
732 // Create a store off the stack pointer for this argument.
733 SDValue StackPtr
= DAG
.getRegister(VE::SX11
, PtrVT
);
734 // The argument area starts at %fp/%sp + the size of reserved area.
736 DAG
.getIntPtrConstant(VA
.getLocMemOffset() + ArgsBaseOffset
, DL
);
737 PtrOff
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, StackPtr
, PtrOff
);
738 MemOpChains
.push_back(
739 DAG
.getStore(Chain
, DL
, Arg
, PtrOff
, MachinePointerInfo()));
742 // Emit all stores, make sure they occur before the call.
743 if (!MemOpChains
.empty())
744 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, MemOpChains
);
746 // Build a sequence of CopyToReg nodes glued together with token chain and
747 // glue operands which copy the outgoing args into registers. The InGlue is
748 // necessary since all emitted instructions must be stuck together in order
749 // to pass the live physical registers.
751 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
752 Chain
= DAG
.getCopyToReg(Chain
, DL
, RegsToPass
[i
].first
,
753 RegsToPass
[i
].second
, InGlue
);
754 InGlue
= Chain
.getValue(1);
757 // Build the operands for the call instruction itself.
758 SmallVector
<SDValue
, 8> Ops
;
759 Ops
.push_back(Chain
);
760 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
)
761 Ops
.push_back(DAG
.getRegister(RegsToPass
[i
].first
,
762 RegsToPass
[i
].second
.getValueType()));
764 // Add a register mask operand representing the call-preserved registers.
765 const VERegisterInfo
*TRI
= Subtarget
->getRegisterInfo();
766 const uint32_t *Mask
=
767 TRI
->getCallPreservedMask(DAG
.getMachineFunction(), CLI
.CallConv
);
768 assert(Mask
&& "Missing call preserved mask for calling convention");
769 Ops
.push_back(DAG
.getRegisterMask(Mask
));
771 // Make sure the CopyToReg nodes are glued to the call instruction which
772 // consumes the registers.
773 if (InGlue
.getNode())
774 Ops
.push_back(InGlue
);
776 // Now the call itself.
777 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
778 Chain
= DAG
.getNode(VEISD::CALL
, DL
, NodeTys
, Ops
);
779 InGlue
= Chain
.getValue(1);
781 // Revert the stack pointer immediately after the call.
782 Chain
= DAG
.getCALLSEQ_END(Chain
, ArgsSize
, 0, InGlue
, DL
);
783 InGlue
= Chain
.getValue(1);
785 // Now extract the return values. This is more or less the same as
786 // LowerFormalArguments.
788 // Assign locations to each value returned by this call.
789 SmallVector
<CCValAssign
, 16> RVLocs
;
790 CCState
RVInfo(CLI
.CallConv
, CLI
.IsVarArg
, DAG
.getMachineFunction(), RVLocs
,
793 // Set inreg flag manually for codegen generated library calls that
795 if (CLI
.Ins
.size() == 1 && CLI
.Ins
[0].VT
== MVT::f32
&& !CLI
.CB
)
796 CLI
.Ins
[0].Flags
.setInReg();
798 RVInfo
.AnalyzeCallResult(CLI
.Ins
, getReturnCC(CLI
.CallConv
));
800 // Copy all of the result registers out of their specified physreg.
801 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
802 CCValAssign
&VA
= RVLocs
[i
];
803 assert(!VA
.needsCustom() && "Unexpected custom lowering");
804 Register Reg
= VA
.getLocReg();
806 // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
807 // reside in the same register in the high and low bits. Reuse the
808 // CopyFromReg previous node to avoid duplicate copies.
810 if (RegisterSDNode
*SrcReg
= dyn_cast
<RegisterSDNode
>(Chain
.getOperand(1)))
811 if (SrcReg
->getReg() == Reg
&& Chain
->getOpcode() == ISD::CopyFromReg
)
812 RV
= Chain
.getValue(0);
814 // But usually we'll create a new CopyFromReg for a different register.
816 RV
= DAG
.getCopyFromReg(Chain
, DL
, Reg
, RVLocs
[i
].getLocVT(), InGlue
);
817 Chain
= RV
.getValue(1);
818 InGlue
= Chain
.getValue(2);
821 // The callee promoted the return value, so insert an Assert?ext SDNode so
822 // we won't promote the value again in this function.
823 switch (VA
.getLocInfo()) {
824 case CCValAssign::SExt
:
825 RV
= DAG
.getNode(ISD::AssertSext
, DL
, VA
.getLocVT(), RV
,
826 DAG
.getValueType(VA
.getValVT()));
828 case CCValAssign::ZExt
:
829 RV
= DAG
.getNode(ISD::AssertZext
, DL
, VA
.getLocVT(), RV
,
830 DAG
.getValueType(VA
.getValVT()));
832 case CCValAssign::BCvt
: {
833 // Extract a float return value from i64 with padding.
838 assert(VA
.getLocVT() == MVT::i64
);
839 assert(VA
.getValVT() == MVT::f32
);
840 SDValue Sub_f32
= DAG
.getTargetConstant(VE::sub_f32
, DL
, MVT::i32
);
841 RV
= SDValue(DAG
.getMachineNode(TargetOpcode::EXTRACT_SUBREG
, DL
,
842 MVT::f32
, RV
, Sub_f32
),
850 // Truncate the register down to the return value type.
852 RV
= DAG
.getNode(ISD::TRUNCATE
, DL
, VA
.getValVT(), RV
);
854 InVals
.push_back(RV
);
860 bool VETargetLowering::isOffsetFoldingLegal(
861 const GlobalAddressSDNode
*GA
) const {
862 // VE uses 64 bit addressing, so we need multiple instructions to generate
863 // an address. Folding address with offset increases the number of
864 // instructions, so that we disable it here. Offsets will be folded in
865 // the DAG combine later if it worth to do so.
869 /// isFPImmLegal - Returns true if the target can instruction select the
870 /// specified FP immediate natively. If false, the legalizer will
871 /// materialize the FP immediate as a load from a constant pool.
872 bool VETargetLowering::isFPImmLegal(const APFloat
&Imm
, EVT VT
,
873 bool ForCodeSize
) const {
874 return VT
== MVT::f32
|| VT
== MVT::f64
;
877 /// Determine if the target supports unaligned memory accesses.
879 /// This function returns true if the target allows unaligned memory accesses
880 /// of the specified type in the given address space. If true, it also returns
881 /// whether the unaligned memory access is "fast" in the last argument by
882 /// reference. This is used, for example, in situations where an array
883 /// copy/move/set is converted to a sequence of store operations. Its use
884 /// helps to ensure that such replacements don't generate code that causes an
885 /// alignment error (trap) on the target machine.
886 bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT
,
889 MachineMemOperand::Flags
,
890 unsigned *Fast
) const {
892 // It's fast anytime on VE
898 VETargetLowering::VETargetLowering(const TargetMachine
&TM
,
899 const VESubtarget
&STI
)
900 : TargetLowering(TM
), Subtarget(&STI
) {
901 // Instructions which use registers as conditionals examine all the
902 // bits (as does the pseudo SELECT_CC expansion). I don't think it
903 // matters much whether it's ZeroOrOneBooleanContent, or
904 // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
906 setBooleanContents(ZeroOrOneBooleanContent
);
907 setBooleanVectorContents(ZeroOrOneBooleanContent
);
909 initRegisterClasses();
913 setStackPointerRegisterToSaveRestore(VE::SX11
);
915 // We have target-specific dag combine patterns for the following nodes:
916 setTargetDAGCombine(ISD::TRUNCATE
);
917 setTargetDAGCombine(ISD::SELECT
);
918 setTargetDAGCombine(ISD::SELECT_CC
);
920 // Set function alignment to 16 bytes
921 setMinFunctionAlignment(Align(16));
923 // VE stores all argument by 8 bytes alignment
924 setMinStackArgumentAlignment(Align(8));
926 computeRegisterProperties(Subtarget
->getRegisterInfo());
929 const char *VETargetLowering::getTargetNodeName(unsigned Opcode
) const {
930 #define TARGET_NODE_CASE(NAME) \
932 return "VEISD::" #NAME;
933 switch ((VEISD::NodeType
)Opcode
) {
934 case VEISD::FIRST_NUMBER
:
936 TARGET_NODE_CASE(CMPI
)
937 TARGET_NODE_CASE(CMPU
)
938 TARGET_NODE_CASE(CMPF
)
939 TARGET_NODE_CASE(CMPQ
)
940 TARGET_NODE_CASE(CMOV
)
941 TARGET_NODE_CASE(CALL
)
942 TARGET_NODE_CASE(EH_SJLJ_LONGJMP
)
943 TARGET_NODE_CASE(EH_SJLJ_SETJMP
)
944 TARGET_NODE_CASE(EH_SJLJ_SETUP_DISPATCH
)
945 TARGET_NODE_CASE(GETFUNPLT
)
946 TARGET_NODE_CASE(GETSTACKTOP
)
947 TARGET_NODE_CASE(GETTLSADDR
)
948 TARGET_NODE_CASE(GLOBAL_BASE_REG
)
951 TARGET_NODE_CASE(RET_GLUE
)
952 TARGET_NODE_CASE(TS1AM
)
953 TARGET_NODE_CASE(VEC_UNPACK_LO
)
954 TARGET_NODE_CASE(VEC_UNPACK_HI
)
955 TARGET_NODE_CASE(VEC_PACK
)
956 TARGET_NODE_CASE(VEC_BROADCAST
)
957 TARGET_NODE_CASE(REPL_I32
)
958 TARGET_NODE_CASE(REPL_F32
)
960 TARGET_NODE_CASE(LEGALAVL
)
962 // Register the VVP_* SDNodes.
963 #define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
964 #include "VVPNodes.def"
966 #undef TARGET_NODE_CASE
970 EVT
VETargetLowering::getSetCCResultType(const DataLayout
&, LLVMContext
&,
975 // Convert to a target node and set target flags.
976 SDValue
VETargetLowering::withTargetFlags(SDValue Op
, unsigned TF
,
977 SelectionDAG
&DAG
) const {
978 if (const GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(Op
))
979 return DAG
.getTargetGlobalAddress(GA
->getGlobal(), SDLoc(GA
),
980 GA
->getValueType(0), GA
->getOffset(), TF
);
982 if (const BlockAddressSDNode
*BA
= dyn_cast
<BlockAddressSDNode
>(Op
))
983 return DAG
.getTargetBlockAddress(BA
->getBlockAddress(), Op
.getValueType(),
986 if (const ConstantPoolSDNode
*CP
= dyn_cast
<ConstantPoolSDNode
>(Op
))
987 return DAG
.getTargetConstantPool(CP
->getConstVal(), CP
->getValueType(0),
988 CP
->getAlign(), CP
->getOffset(), TF
);
990 if (const ExternalSymbolSDNode
*ES
= dyn_cast
<ExternalSymbolSDNode
>(Op
))
991 return DAG
.getTargetExternalSymbol(ES
->getSymbol(), ES
->getValueType(0),
994 if (const JumpTableSDNode
*JT
= dyn_cast
<JumpTableSDNode
>(Op
))
995 return DAG
.getTargetJumpTable(JT
->getIndex(), JT
->getValueType(0), TF
);
997 llvm_unreachable("Unhandled address SDNode");
1000 // Split Op into high and low parts according to HiTF and LoTF.
1001 // Return an ADD node combining the parts.
1002 SDValue
VETargetLowering::makeHiLoPair(SDValue Op
, unsigned HiTF
, unsigned LoTF
,
1003 SelectionDAG
&DAG
) const {
1005 EVT VT
= Op
.getValueType();
1006 SDValue Hi
= DAG
.getNode(VEISD::Hi
, DL
, VT
, withTargetFlags(Op
, HiTF
, DAG
));
1007 SDValue Lo
= DAG
.getNode(VEISD::Lo
, DL
, VT
, withTargetFlags(Op
, LoTF
, DAG
));
1008 return DAG
.getNode(ISD::ADD
, DL
, VT
, Hi
, Lo
);
1011 // Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
1012 // or ExternalSymbol SDNode.
1013 SDValue
VETargetLowering::makeAddress(SDValue Op
, SelectionDAG
&DAG
) const {
1015 EVT PtrVT
= Op
.getValueType();
1017 // Handle PIC mode first. VE needs a got load for every variable!
1018 if (isPositionIndependent()) {
1019 auto GlobalN
= dyn_cast
<GlobalAddressSDNode
>(Op
);
1021 if (isa
<ConstantPoolSDNode
>(Op
) || isa
<JumpTableSDNode
>(Op
) ||
1022 (GlobalN
&& GlobalN
->getGlobal()->hasLocalLinkage())) {
1023 // Create following instructions for local linkage PIC code.
1024 // lea %reg, label@gotoff_lo
1025 // and %reg, %reg, (32)0
1026 // lea.sl %reg, label@gotoff_hi(%reg, %got)
1027 SDValue HiLo
= makeHiLoPair(Op
, VEMCExpr::VK_VE_GOTOFF_HI32
,
1028 VEMCExpr::VK_VE_GOTOFF_LO32
, DAG
);
1029 SDValue GlobalBase
= DAG
.getNode(VEISD::GLOBAL_BASE_REG
, DL
, PtrVT
);
1030 return DAG
.getNode(ISD::ADD
, DL
, PtrVT
, GlobalBase
, HiLo
);
1032 // Create following instructions for not local linkage PIC code.
1033 // lea %reg, label@got_lo
1034 // and %reg, %reg, (32)0
1035 // lea.sl %reg, label@got_hi(%reg)
1036 // ld %reg, (%reg, %got)
1037 SDValue HiLo
= makeHiLoPair(Op
, VEMCExpr::VK_VE_GOT_HI32
,
1038 VEMCExpr::VK_VE_GOT_LO32
, DAG
);
1039 SDValue GlobalBase
= DAG
.getNode(VEISD::GLOBAL_BASE_REG
, DL
, PtrVT
);
1040 SDValue AbsAddr
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, GlobalBase
, HiLo
);
1041 return DAG
.getLoad(PtrVT
, DL
, DAG
.getEntryNode(), AbsAddr
,
1042 MachinePointerInfo::getGOT(DAG
.getMachineFunction()));
1045 // This is one of the absolute code models.
1046 switch (getTargetMachine().getCodeModel()) {
1048 llvm_unreachable("Unsupported absolute code model");
1049 case CodeModel::Small
:
1050 case CodeModel::Medium
:
1051 case CodeModel::Large
:
1053 return makeHiLoPair(Op
, VEMCExpr::VK_VE_HI32
, VEMCExpr::VK_VE_LO32
, DAG
);
1059 // The mappings for emitLeading/TrailingFence for VE is designed by following
1060 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
1061 Instruction
*VETargetLowering::emitLeadingFence(IRBuilderBase
&Builder
,
1063 AtomicOrdering Ord
) const {
1065 case AtomicOrdering::NotAtomic
:
1066 case AtomicOrdering::Unordered
:
1067 llvm_unreachable("Invalid fence: unordered/non-atomic");
1068 case AtomicOrdering::Monotonic
:
1069 case AtomicOrdering::Acquire
:
1070 return nullptr; // Nothing to do
1071 case AtomicOrdering::Release
:
1072 case AtomicOrdering::AcquireRelease
:
1073 return Builder
.CreateFence(AtomicOrdering::Release
);
1074 case AtomicOrdering::SequentiallyConsistent
:
1075 if (!Inst
->hasAtomicStore())
1076 return nullptr; // Nothing to do
1077 return Builder
.CreateFence(AtomicOrdering::SequentiallyConsistent
);
1079 llvm_unreachable("Unknown fence ordering in emitLeadingFence");
1082 Instruction
*VETargetLowering::emitTrailingFence(IRBuilderBase
&Builder
,
1084 AtomicOrdering Ord
) const {
1086 case AtomicOrdering::NotAtomic
:
1087 case AtomicOrdering::Unordered
:
1088 llvm_unreachable("Invalid fence: unordered/not-atomic");
1089 case AtomicOrdering::Monotonic
:
1090 case AtomicOrdering::Release
:
1091 return nullptr; // Nothing to do
1092 case AtomicOrdering::Acquire
:
1093 case AtomicOrdering::AcquireRelease
:
1094 return Builder
.CreateFence(AtomicOrdering::Acquire
);
1095 case AtomicOrdering::SequentiallyConsistent
:
1096 return Builder
.CreateFence(AtomicOrdering::SequentiallyConsistent
);
1098 llvm_unreachable("Unknown fence ordering in emitTrailingFence");
1101 SDValue
VETargetLowering::lowerATOMIC_FENCE(SDValue Op
,
1102 SelectionDAG
&DAG
) const {
1104 AtomicOrdering FenceOrdering
=
1105 static_cast<AtomicOrdering
>(Op
.getConstantOperandVal(1));
1106 SyncScope::ID FenceSSID
=
1107 static_cast<SyncScope::ID
>(Op
.getConstantOperandVal(2));
1109 // VE uses Release consistency, so need a fence instruction if it is a
1110 // cross-thread fence.
1111 if (FenceSSID
== SyncScope::System
) {
1112 switch (FenceOrdering
) {
1113 case AtomicOrdering::NotAtomic
:
1114 case AtomicOrdering::Unordered
:
1115 case AtomicOrdering::Monotonic
:
1116 // No need to generate fencem instruction here.
1118 case AtomicOrdering::Acquire
:
1119 // Generate "fencem 2" as acquire fence.
1120 return SDValue(DAG
.getMachineNode(VE::FENCEM
, DL
, MVT::Other
,
1121 DAG
.getTargetConstant(2, DL
, MVT::i32
),
1124 case AtomicOrdering::Release
:
1125 // Generate "fencem 1" as release fence.
1126 return SDValue(DAG
.getMachineNode(VE::FENCEM
, DL
, MVT::Other
,
1127 DAG
.getTargetConstant(1, DL
, MVT::i32
),
1130 case AtomicOrdering::AcquireRelease
:
1131 case AtomicOrdering::SequentiallyConsistent
:
1132 // Generate "fencem 3" as acq_rel and seq_cst fence.
1133 // FIXME: "fencem 3" doesn't wait for PCIe deveices accesses,
1134 // so seq_cst may require more instruction for them.
1135 return SDValue(DAG
.getMachineNode(VE::FENCEM
, DL
, MVT::Other
,
1136 DAG
.getTargetConstant(3, DL
, MVT::i32
),
1142 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1143 return DAG
.getNode(ISD::MEMBARRIER
, DL
, MVT::Other
, Op
.getOperand(0));
1146 TargetLowering::AtomicExpansionKind
1147 VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst
*AI
) const {
1148 // We have TS1AM implementation for i8/i16/i32/i64, so use it.
1149 if (AI
->getOperation() == AtomicRMWInst::Xchg
) {
1150 return AtomicExpansionKind::None
;
1152 // FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.
1154 // Otherwise, expand it using compare and exchange instruction to not call
1155 // __sync_fetch_and_* functions.
1156 return AtomicExpansionKind::CmpXChg
;
1159 static SDValue
prepareTS1AM(SDValue Op
, SelectionDAG
&DAG
, SDValue
&Flag
,
1162 AtomicSDNode
*N
= cast
<AtomicSDNode
>(Op
);
1163 SDValue Ptr
= N
->getOperand(1);
1164 SDValue Val
= N
->getOperand(2);
1165 EVT PtrVT
= Ptr
.getValueType();
1166 bool Byte
= N
->getMemoryVT() == MVT::i8
;
1167 // Remainder = AND Ptr, 3
1168 // Flag = 1 << Remainder ; If Byte is true (1 byte swap flag)
1169 // Flag = 3 << Remainder ; If Byte is false (2 bytes swap flag)
1170 // Bits = Remainder << 3
1171 // NewVal = Val << Bits
1172 SDValue Const3
= DAG
.getConstant(3, DL
, PtrVT
);
1173 SDValue Remainder
= DAG
.getNode(ISD::AND
, DL
, PtrVT
, {Ptr
, Const3
});
1174 SDValue Mask
= Byte
? DAG
.getConstant(1, DL
, MVT::i32
)
1175 : DAG
.getConstant(3, DL
, MVT::i32
);
1176 Flag
= DAG
.getNode(ISD::SHL
, DL
, MVT::i32
, {Mask
, Remainder
});
1177 Bits
= DAG
.getNode(ISD::SHL
, DL
, PtrVT
, {Remainder
, Const3
});
1178 return DAG
.getNode(ISD::SHL
, DL
, Val
.getValueType(), {Val
, Bits
});
1181 static SDValue
finalizeTS1AM(SDValue Op
, SelectionDAG
&DAG
, SDValue Data
,
1184 EVT VT
= Data
.getValueType();
1185 bool Byte
= cast
<AtomicSDNode
>(Op
)->getMemoryVT() == MVT::i8
;
1186 // NewData = Data >> Bits
1187 // Result = NewData & 0xff ; If Byte is true (1 byte)
1188 // Result = NewData & 0xffff ; If Byte is false (2 bytes)
1190 SDValue NewData
= DAG
.getNode(ISD::SRL
, DL
, VT
, Data
, Bits
);
1191 return DAG
.getNode(ISD::AND
, DL
, VT
,
1192 {NewData
, DAG
.getConstant(Byte
? 0xff : 0xffff, DL
, VT
)});
1195 SDValue
VETargetLowering::lowerATOMIC_SWAP(SDValue Op
,
1196 SelectionDAG
&DAG
) const {
1198 AtomicSDNode
*N
= cast
<AtomicSDNode
>(Op
);
1200 if (N
->getMemoryVT() == MVT::i8
) {
1201 // For i8, use "ts1am"
1203 // ATOMIC_SWAP Ptr, Val, Order
1206 // Remainder = AND Ptr, 3
1207 // Flag = 1 << Remainder ; 1 byte swap flag for TS1AM inst.
1208 // Bits = Remainder << 3
1209 // NewVal = Val << Bits
1211 // Aligned = AND Ptr, -4
1212 // Data = TS1AM Aligned, Flag, NewVal
1214 // NewData = Data >> Bits
1215 // Result = NewData & 0xff ; 1 byte result
1218 SDValue NewVal
= prepareTS1AM(Op
, DAG
, Flag
, Bits
);
1220 SDValue Ptr
= N
->getOperand(1);
1221 SDValue Aligned
= DAG
.getNode(ISD::AND
, DL
, Ptr
.getValueType(),
1222 {Ptr
, DAG
.getConstant(-4, DL
, MVT::i64
)});
1223 SDValue TS1AM
= DAG
.getAtomic(VEISD::TS1AM
, DL
, N
->getMemoryVT(),
1224 DAG
.getVTList(Op
.getNode()->getValueType(0),
1225 Op
.getNode()->getValueType(1)),
1226 {N
->getChain(), Aligned
, Flag
, NewVal
},
1227 N
->getMemOperand());
1229 SDValue Result
= finalizeTS1AM(Op
, DAG
, TS1AM
, Bits
);
1230 SDValue Chain
= TS1AM
.getValue(1);
1231 return DAG
.getMergeValues({Result
, Chain
}, DL
);
1233 if (N
->getMemoryVT() == MVT::i16
) {
1234 // For i16, use "ts1am"
1237 SDValue NewVal
= prepareTS1AM(Op
, DAG
, Flag
, Bits
);
1239 SDValue Ptr
= N
->getOperand(1);
1240 SDValue Aligned
= DAG
.getNode(ISD::AND
, DL
, Ptr
.getValueType(),
1241 {Ptr
, DAG
.getConstant(-4, DL
, MVT::i64
)});
1242 SDValue TS1AM
= DAG
.getAtomic(VEISD::TS1AM
, DL
, N
->getMemoryVT(),
1243 DAG
.getVTList(Op
.getNode()->getValueType(0),
1244 Op
.getNode()->getValueType(1)),
1245 {N
->getChain(), Aligned
, Flag
, NewVal
},
1246 N
->getMemOperand());
1248 SDValue Result
= finalizeTS1AM(Op
, DAG
, TS1AM
, Bits
);
1249 SDValue Chain
= TS1AM
.getValue(1);
1250 return DAG
.getMergeValues({Result
, Chain
}, DL
);
1252 // Otherwise, let llvm legalize it.
1256 SDValue
VETargetLowering::lowerGlobalAddress(SDValue Op
,
1257 SelectionDAG
&DAG
) const {
1258 return makeAddress(Op
, DAG
);
1261 SDValue
VETargetLowering::lowerBlockAddress(SDValue Op
,
1262 SelectionDAG
&DAG
) const {
1263 return makeAddress(Op
, DAG
);
1266 SDValue
VETargetLowering::lowerConstantPool(SDValue Op
,
1267 SelectionDAG
&DAG
) const {
1268 return makeAddress(Op
, DAG
);
1272 VETargetLowering::lowerToTLSGeneralDynamicModel(SDValue Op
,
1273 SelectionDAG
&DAG
) const {
1276 // Generate the following code:
1277 // t1: ch,glue = callseq_start t0, 0, 0
1278 // t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
1279 // t3: ch,glue = callseq_end t2, 0, 0, t2:2
1280 // t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
1281 SDValue Label
= withTargetFlags(Op
, 0, DAG
);
1282 EVT PtrVT
= Op
.getValueType();
1284 // Lowering the machine isd will make sure everything is in the right
1286 SDValue Chain
= DAG
.getEntryNode();
1287 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
1288 const uint32_t *Mask
= Subtarget
->getRegisterInfo()->getCallPreservedMask(
1289 DAG
.getMachineFunction(), CallingConv::C
);
1290 Chain
= DAG
.getCALLSEQ_START(Chain
, 64, 0, DL
);
1291 SDValue Args
[] = {Chain
, Label
, DAG
.getRegisterMask(Mask
), Chain
.getValue(1)};
1292 Chain
= DAG
.getNode(VEISD::GETTLSADDR
, DL
, NodeTys
, Args
);
1293 Chain
= DAG
.getCALLSEQ_END(Chain
, 64, 0, Chain
.getValue(1), DL
);
1294 Chain
= DAG
.getCopyFromReg(Chain
, DL
, VE::SX0
, PtrVT
, Chain
.getValue(1));
1296 // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
1297 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
1298 MFI
.setHasCalls(true);
1300 // Also generate code to prepare a GOT register if it is PIC.
1301 if (isPositionIndependent()) {
1302 MachineFunction
&MF
= DAG
.getMachineFunction();
1303 Subtarget
->getInstrInfo()->getGlobalBaseReg(&MF
);
1309 SDValue
VETargetLowering::lowerGlobalTLSAddress(SDValue Op
,
1310 SelectionDAG
&DAG
) const {
1311 // The current implementation of nld (2.26) doesn't allow local exec model
1312 // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
1313 // generate the general dynamic model code sequence.
1315 // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
1316 return lowerToTLSGeneralDynamicModel(Op
, DAG
);
1319 SDValue
VETargetLowering::lowerJumpTable(SDValue Op
, SelectionDAG
&DAG
) const {
1320 return makeAddress(Op
, DAG
);
1323 // Lower a f128 load into two f64 loads.
1324 static SDValue
lowerLoadF128(SDValue Op
, SelectionDAG
&DAG
) {
1326 LoadSDNode
*LdNode
= dyn_cast
<LoadSDNode
>(Op
.getNode());
1327 assert(LdNode
&& LdNode
->getOffset().isUndef() && "Unexpected node type");
1328 Align Alignment
= LdNode
->getAlign();
1330 Alignment
= Align(8);
1333 DAG
.getLoad(MVT::f64
, DL
, LdNode
->getChain(), LdNode
->getBasePtr(),
1334 LdNode
->getPointerInfo(), Alignment
,
1335 LdNode
->isVolatile() ? MachineMemOperand::MOVolatile
1336 : MachineMemOperand::MONone
);
1337 EVT AddrVT
= LdNode
->getBasePtr().getValueType();
1338 SDValue HiPtr
= DAG
.getNode(ISD::ADD
, DL
, AddrVT
, LdNode
->getBasePtr(),
1339 DAG
.getConstant(8, DL
, AddrVT
));
1341 DAG
.getLoad(MVT::f64
, DL
, LdNode
->getChain(), HiPtr
,
1342 LdNode
->getPointerInfo(), Alignment
,
1343 LdNode
->isVolatile() ? MachineMemOperand::MOVolatile
1344 : MachineMemOperand::MONone
);
1346 SDValue SubRegEven
= DAG
.getTargetConstant(VE::sub_even
, DL
, MVT::i32
);
1347 SDValue SubRegOdd
= DAG
.getTargetConstant(VE::sub_odd
, DL
, MVT::i32
);
1349 // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1351 DAG
.getMachineNode(TargetOpcode::IMPLICIT_DEF
, DL
, MVT::f128
);
1352 InFP128
= DAG
.getMachineNode(TargetOpcode::INSERT_SUBREG
, DL
, MVT::f128
,
1353 SDValue(InFP128
, 0), Hi64
, SubRegEven
);
1354 InFP128
= DAG
.getMachineNode(TargetOpcode::INSERT_SUBREG
, DL
, MVT::f128
,
1355 SDValue(InFP128
, 0), Lo64
, SubRegOdd
);
1356 SDValue OutChains
[2] = {SDValue(Lo64
.getNode(), 1),
1357 SDValue(Hi64
.getNode(), 1)};
1358 SDValue OutChain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, OutChains
);
1359 SDValue Ops
[2] = {SDValue(InFP128
, 0), OutChain
};
1360 return DAG
.getMergeValues(Ops
, DL
);
1363 // Lower a vXi1 load into following instructions
1364 // LDrii %1, (,%addr)
1365 // LVMxir %vm, 0, %1
1366 // LDrii %2, 8(,%addr)
1367 // LVMxir %vm, 0, %2
1369 static SDValue
lowerLoadI1(SDValue Op
, SelectionDAG
&DAG
) {
1371 LoadSDNode
*LdNode
= dyn_cast
<LoadSDNode
>(Op
.getNode());
1372 assert(LdNode
&& LdNode
->getOffset().isUndef() && "Unexpected node type");
1374 SDValue BasePtr
= LdNode
->getBasePtr();
1375 Align Alignment
= LdNode
->getAlign();
1377 Alignment
= Align(8);
1379 EVT AddrVT
= BasePtr
.getValueType();
1380 EVT MemVT
= LdNode
->getMemoryVT();
1381 if (MemVT
== MVT::v256i1
|| MemVT
== MVT::v4i64
) {
1382 SDValue OutChains
[4];
1383 SDNode
*VM
= DAG
.getMachineNode(TargetOpcode::IMPLICIT_DEF
, DL
, MemVT
);
1384 for (int i
= 0; i
< 4; ++i
) {
1385 // Generate load dag and prepare chains.
1386 SDValue Addr
= DAG
.getNode(ISD::ADD
, DL
, AddrVT
, BasePtr
,
1387 DAG
.getConstant(8 * i
, DL
, AddrVT
));
1389 DAG
.getLoad(MVT::i64
, DL
, LdNode
->getChain(), Addr
,
1390 LdNode
->getPointerInfo(), Alignment
,
1391 LdNode
->isVolatile() ? MachineMemOperand::MOVolatile
1392 : MachineMemOperand::MONone
);
1393 OutChains
[i
] = SDValue(Val
.getNode(), 1);
1395 VM
= DAG
.getMachineNode(VE::LVMir_m
, DL
, MVT::i64
,
1396 DAG
.getTargetConstant(i
, DL
, MVT::i64
), Val
,
1399 SDValue OutChain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, OutChains
);
1400 SDValue Ops
[2] = {SDValue(VM
, 0), OutChain
};
1401 return DAG
.getMergeValues(Ops
, DL
);
1402 } else if (MemVT
== MVT::v512i1
|| MemVT
== MVT::v8i64
) {
1403 SDValue OutChains
[8];
1404 SDNode
*VM
= DAG
.getMachineNode(TargetOpcode::IMPLICIT_DEF
, DL
, MemVT
);
1405 for (int i
= 0; i
< 8; ++i
) {
1406 // Generate load dag and prepare chains.
1407 SDValue Addr
= DAG
.getNode(ISD::ADD
, DL
, AddrVT
, BasePtr
,
1408 DAG
.getConstant(8 * i
, DL
, AddrVT
));
1410 DAG
.getLoad(MVT::i64
, DL
, LdNode
->getChain(), Addr
,
1411 LdNode
->getPointerInfo(), Alignment
,
1412 LdNode
->isVolatile() ? MachineMemOperand::MOVolatile
1413 : MachineMemOperand::MONone
);
1414 OutChains
[i
] = SDValue(Val
.getNode(), 1);
1416 VM
= DAG
.getMachineNode(VE::LVMyir_y
, DL
, MVT::i64
,
1417 DAG
.getTargetConstant(i
, DL
, MVT::i64
), Val
,
1420 SDValue OutChain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, OutChains
);
1421 SDValue Ops
[2] = {SDValue(VM
, 0), OutChain
};
1422 return DAG
.getMergeValues(Ops
, DL
);
1424 // Otherwise, ask llvm to expand it.
1429 SDValue
VETargetLowering::lowerLOAD(SDValue Op
, SelectionDAG
&DAG
) const {
1430 LoadSDNode
*LdNode
= cast
<LoadSDNode
>(Op
.getNode());
1431 EVT MemVT
= LdNode
->getMemoryVT();
1433 // If VPU is enabled, always expand non-mask vector loads to VVP
1434 if (Subtarget
->enableVPU() && MemVT
.isVector() && !isMaskType(MemVT
))
1435 return lowerToVVP(Op
, DAG
);
1437 SDValue BasePtr
= LdNode
->getBasePtr();
1438 if (isa
<FrameIndexSDNode
>(BasePtr
.getNode())) {
1439 // Do not expand store instruction with frame index here because of
1440 // dependency problems. We expand it later in eliminateFrameIndex().
1444 if (MemVT
== MVT::f128
)
1445 return lowerLoadF128(Op
, DAG
);
1446 if (isMaskType(MemVT
))
1447 return lowerLoadI1(Op
, DAG
);
1452 // Lower a f128 store into two f64 stores.
1453 static SDValue
lowerStoreF128(SDValue Op
, SelectionDAG
&DAG
) {
1455 StoreSDNode
*StNode
= dyn_cast
<StoreSDNode
>(Op
.getNode());
1456 assert(StNode
&& StNode
->getOffset().isUndef() && "Unexpected node type");
1458 SDValue SubRegEven
= DAG
.getTargetConstant(VE::sub_even
, DL
, MVT::i32
);
1459 SDValue SubRegOdd
= DAG
.getTargetConstant(VE::sub_odd
, DL
, MVT::i32
);
1461 SDNode
*Hi64
= DAG
.getMachineNode(TargetOpcode::EXTRACT_SUBREG
, DL
, MVT::i64
,
1462 StNode
->getValue(), SubRegEven
);
1463 SDNode
*Lo64
= DAG
.getMachineNode(TargetOpcode::EXTRACT_SUBREG
, DL
, MVT::i64
,
1464 StNode
->getValue(), SubRegOdd
);
1466 Align Alignment
= StNode
->getAlign();
1468 Alignment
= Align(8);
1470 // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1471 SDValue OutChains
[2];
1473 DAG
.getStore(StNode
->getChain(), DL
, SDValue(Lo64
, 0),
1474 StNode
->getBasePtr(), MachinePointerInfo(), Alignment
,
1475 StNode
->isVolatile() ? MachineMemOperand::MOVolatile
1476 : MachineMemOperand::MONone
);
1477 EVT AddrVT
= StNode
->getBasePtr().getValueType();
1478 SDValue HiPtr
= DAG
.getNode(ISD::ADD
, DL
, AddrVT
, StNode
->getBasePtr(),
1479 DAG
.getConstant(8, DL
, AddrVT
));
1481 DAG
.getStore(StNode
->getChain(), DL
, SDValue(Hi64
, 0), HiPtr
,
1482 MachinePointerInfo(), Alignment
,
1483 StNode
->isVolatile() ? MachineMemOperand::MOVolatile
1484 : MachineMemOperand::MONone
);
1485 return DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, OutChains
);
1488 // Lower a vXi1 store into following instructions
1490 // STrii %1, (,%addr)
1492 // STrii %2, 8(,%addr)
1494 static SDValue
lowerStoreI1(SDValue Op
, SelectionDAG
&DAG
) {
1496 StoreSDNode
*StNode
= dyn_cast
<StoreSDNode
>(Op
.getNode());
1497 assert(StNode
&& StNode
->getOffset().isUndef() && "Unexpected node type");
1499 SDValue BasePtr
= StNode
->getBasePtr();
1500 Align Alignment
= StNode
->getAlign();
1502 Alignment
= Align(8);
1503 EVT AddrVT
= BasePtr
.getValueType();
1504 EVT MemVT
= StNode
->getMemoryVT();
1505 if (MemVT
== MVT::v256i1
|| MemVT
== MVT::v4i64
) {
1506 SDValue OutChains
[4];
1507 for (int i
= 0; i
< 4; ++i
) {
1509 DAG
.getMachineNode(VE::SVMmi
, DL
, MVT::i64
, StNode
->getValue(),
1510 DAG
.getTargetConstant(i
, DL
, MVT::i64
));
1511 SDValue Addr
= DAG
.getNode(ISD::ADD
, DL
, AddrVT
, BasePtr
,
1512 DAG
.getConstant(8 * i
, DL
, AddrVT
));
1514 DAG
.getStore(StNode
->getChain(), DL
, SDValue(V
, 0), Addr
,
1515 MachinePointerInfo(), Alignment
,
1516 StNode
->isVolatile() ? MachineMemOperand::MOVolatile
1517 : MachineMemOperand::MONone
);
1519 return DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, OutChains
);
1520 } else if (MemVT
== MVT::v512i1
|| MemVT
== MVT::v8i64
) {
1521 SDValue OutChains
[8];
1522 for (int i
= 0; i
< 8; ++i
) {
1524 DAG
.getMachineNode(VE::SVMyi
, DL
, MVT::i64
, StNode
->getValue(),
1525 DAG
.getTargetConstant(i
, DL
, MVT::i64
));
1526 SDValue Addr
= DAG
.getNode(ISD::ADD
, DL
, AddrVT
, BasePtr
,
1527 DAG
.getConstant(8 * i
, DL
, AddrVT
));
1529 DAG
.getStore(StNode
->getChain(), DL
, SDValue(V
, 0), Addr
,
1530 MachinePointerInfo(), Alignment
,
1531 StNode
->isVolatile() ? MachineMemOperand::MOVolatile
1532 : MachineMemOperand::MONone
);
1534 return DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, OutChains
);
1536 // Otherwise, ask llvm to expand it.
1541 SDValue
VETargetLowering::lowerSTORE(SDValue Op
, SelectionDAG
&DAG
) const {
1542 StoreSDNode
*StNode
= cast
<StoreSDNode
>(Op
.getNode());
1543 assert(StNode
&& StNode
->getOffset().isUndef() && "Unexpected node type");
1544 EVT MemVT
= StNode
->getMemoryVT();
1546 // If VPU is enabled, always expand non-mask vector stores to VVP
1547 if (Subtarget
->enableVPU() && MemVT
.isVector() && !isMaskType(MemVT
))
1548 return lowerToVVP(Op
, DAG
);
1550 SDValue BasePtr
= StNode
->getBasePtr();
1551 if (isa
<FrameIndexSDNode
>(BasePtr
.getNode())) {
1552 // Do not expand store instruction with frame index here because of
1553 // dependency problems. We expand it later in eliminateFrameIndex().
1557 if (MemVT
== MVT::f128
)
1558 return lowerStoreF128(Op
, DAG
);
1559 if (isMaskType(MemVT
))
1560 return lowerStoreI1(Op
, DAG
);
1562 // Otherwise, ask llvm to expand it.
1566 SDValue
VETargetLowering::lowerVASTART(SDValue Op
, SelectionDAG
&DAG
) const {
1567 MachineFunction
&MF
= DAG
.getMachineFunction();
1568 VEMachineFunctionInfo
*FuncInfo
= MF
.getInfo
<VEMachineFunctionInfo
>();
1569 auto PtrVT
= getPointerTy(DAG
.getDataLayout());
1571 // Need frame address to find the address of VarArgsFrameIndex.
1572 MF
.getFrameInfo().setFrameAddressIsTaken(true);
1574 // vastart just stores the address of the VarArgsFrameIndex slot into the
1575 // memory location argument.
1578 DAG
.getNode(ISD::ADD
, DL
, PtrVT
, DAG
.getRegister(VE::SX9
, PtrVT
),
1579 DAG
.getIntPtrConstant(FuncInfo
->getVarArgsFrameOffset(), DL
));
1580 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
1581 return DAG
.getStore(Op
.getOperand(0), DL
, Offset
, Op
.getOperand(1),
1582 MachinePointerInfo(SV
));
1585 SDValue
VETargetLowering::lowerVAARG(SDValue Op
, SelectionDAG
&DAG
) const {
1586 SDNode
*Node
= Op
.getNode();
1587 EVT VT
= Node
->getValueType(0);
1588 SDValue InChain
= Node
->getOperand(0);
1589 SDValue VAListPtr
= Node
->getOperand(1);
1590 EVT PtrVT
= VAListPtr
.getValueType();
1591 const Value
*SV
= cast
<SrcValueSDNode
>(Node
->getOperand(2))->getValue();
1594 DAG
.getLoad(PtrVT
, DL
, InChain
, VAListPtr
, MachinePointerInfo(SV
));
1595 SDValue Chain
= VAList
.getValue(1);
1598 if (VT
== MVT::f128
) {
1599 // VE f128 values must be stored with 16 bytes alignment. We don't
1600 // know the actual alignment of VAList, so we take alignment of it
1603 VAList
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, VAList
,
1604 DAG
.getConstant(Align
- 1, DL
, PtrVT
));
1605 VAList
= DAG
.getNode(ISD::AND
, DL
, PtrVT
, VAList
,
1606 DAG
.getConstant(-Align
, DL
, PtrVT
));
1607 // Increment the pointer, VAList, by 16 to the next vaarg.
1609 DAG
.getNode(ISD::ADD
, DL
, PtrVT
, VAList
, DAG
.getIntPtrConstant(16, DL
));
1610 } else if (VT
== MVT::f32
) {
1611 // float --> need special handling like below.
1616 // Increment the pointer, VAList, by 8 to the next vaarg.
1618 DAG
.getNode(ISD::ADD
, DL
, PtrVT
, VAList
, DAG
.getIntPtrConstant(8, DL
));
1619 // Then, adjust VAList.
1620 unsigned InternalOffset
= 4;
1621 VAList
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, VAList
,
1622 DAG
.getConstant(InternalOffset
, DL
, PtrVT
));
1624 // Increment the pointer, VAList, by 8 to the next vaarg.
1626 DAG
.getNode(ISD::ADD
, DL
, PtrVT
, VAList
, DAG
.getIntPtrConstant(8, DL
));
1629 // Store the incremented VAList to the legalized pointer.
1630 InChain
= DAG
.getStore(Chain
, DL
, NextPtr
, VAListPtr
, MachinePointerInfo(SV
));
1632 // Load the actual argument out of the pointer VAList.
1633 // We can't count on greater alignment than the word size.
1635 VT
, DL
, InChain
, VAList
, MachinePointerInfo(),
1636 Align(std::min(PtrVT
.getSizeInBits(), VT
.getSizeInBits()) / 8));
1639 SDValue
VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op
,
1640 SelectionDAG
&DAG
) const {
1641 // Generate following code.
1642 // (void)__llvm_grow_stack(size);
1643 // ret = GETSTACKTOP; // pseudo instruction
1647 SDNode
*Node
= Op
.getNode();
1648 SDValue Chain
= Op
.getOperand(0);
1649 SDValue Size
= Op
.getOperand(1);
1650 MaybeAlign
Alignment(Op
.getConstantOperandVal(2));
1651 EVT VT
= Node
->getValueType(0);
1653 // Chain the dynamic stack allocation so that it doesn't modify the stack
1654 // pointer when other instructions are using the stack.
1655 Chain
= DAG
.getCALLSEQ_START(Chain
, 0, 0, DL
);
1657 const TargetFrameLowering
&TFI
= *Subtarget
->getFrameLowering();
1658 Align StackAlign
= TFI
.getStackAlign();
1659 bool NeedsAlign
= Alignment
.valueOrOne() > StackAlign
;
1661 // Prepare arguments
1662 TargetLowering::ArgListTy Args
;
1663 TargetLowering::ArgListEntry Entry
;
1665 Entry
.Ty
= Entry
.Node
.getValueType().getTypeForEVT(*DAG
.getContext());
1666 Args
.push_back(Entry
);
1668 Entry
.Node
= DAG
.getConstant(~(Alignment
->value() - 1ULL), DL
, VT
);
1669 Entry
.Ty
= Entry
.Node
.getValueType().getTypeForEVT(*DAG
.getContext());
1670 Args
.push_back(Entry
);
1672 Type
*RetTy
= Type::getVoidTy(*DAG
.getContext());
1674 EVT PtrVT
= Op
.getValueType();
1677 Callee
= DAG
.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT
, 0);
1679 Callee
= DAG
.getTargetExternalSymbol("__ve_grow_stack", PtrVT
, 0);
1682 TargetLowering::CallLoweringInfo
CLI(DAG
);
1685 .setCallee(CallingConv::PreserveAll
, RetTy
, Callee
, std::move(Args
))
1686 .setDiscardResult(true);
1687 std::pair
<SDValue
, SDValue
> pair
= LowerCallTo(CLI
);
1688 Chain
= pair
.second
;
1689 SDValue Result
= DAG
.getNode(VEISD::GETSTACKTOP
, DL
, VT
, Chain
);
1691 Result
= DAG
.getNode(ISD::ADD
, DL
, VT
, Result
,
1692 DAG
.getConstant((Alignment
->value() - 1ULL), DL
, VT
));
1693 Result
= DAG
.getNode(ISD::AND
, DL
, VT
, Result
,
1694 DAG
.getConstant(~(Alignment
->value() - 1ULL), DL
, VT
));
1696 // Chain = Result.getValue(1);
1697 Chain
= DAG
.getCALLSEQ_END(Chain
, 0, 0, SDValue(), DL
);
1699 SDValue Ops
[2] = {Result
, Chain
};
1700 return DAG
.getMergeValues(Ops
, DL
);
1703 SDValue
VETargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op
,
1704 SelectionDAG
&DAG
) const {
1706 return DAG
.getNode(VEISD::EH_SJLJ_LONGJMP
, DL
, MVT::Other
, Op
.getOperand(0),
1710 SDValue
VETargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op
,
1711 SelectionDAG
&DAG
) const {
1713 return DAG
.getNode(VEISD::EH_SJLJ_SETJMP
, DL
,
1714 DAG
.getVTList(MVT::i32
, MVT::Other
), Op
.getOperand(0),
1718 SDValue
VETargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op
,
1719 SelectionDAG
&DAG
) const {
1721 return DAG
.getNode(VEISD::EH_SJLJ_SETUP_DISPATCH
, DL
, MVT::Other
,
1725 static SDValue
lowerFRAMEADDR(SDValue Op
, SelectionDAG
&DAG
,
1726 const VETargetLowering
&TLI
,
1727 const VESubtarget
*Subtarget
) {
1729 MachineFunction
&MF
= DAG
.getMachineFunction();
1730 EVT PtrVT
= TLI
.getPointerTy(MF
.getDataLayout());
1732 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1733 MFI
.setFrameAddressIsTaken(true);
1735 unsigned Depth
= Op
.getConstantOperandVal(0);
1736 const VERegisterInfo
*RegInfo
= Subtarget
->getRegisterInfo();
1737 Register FrameReg
= RegInfo
->getFrameRegister(MF
);
1739 DAG
.getCopyFromReg(DAG
.getEntryNode(), DL
, FrameReg
, PtrVT
);
1741 FrameAddr
= DAG
.getLoad(Op
.getValueType(), DL
, DAG
.getEntryNode(),
1742 FrameAddr
, MachinePointerInfo());
1746 static SDValue
lowerRETURNADDR(SDValue Op
, SelectionDAG
&DAG
,
1747 const VETargetLowering
&TLI
,
1748 const VESubtarget
*Subtarget
) {
1749 MachineFunction
&MF
= DAG
.getMachineFunction();
1750 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1751 MFI
.setReturnAddressIsTaken(true);
1753 if (TLI
.verifyReturnAddressArgumentIsConstant(Op
, DAG
))
1756 SDValue FrameAddr
= lowerFRAMEADDR(Op
, DAG
, TLI
, Subtarget
);
1759 EVT VT
= Op
.getValueType();
1760 SDValue Offset
= DAG
.getConstant(8, DL
, VT
);
1761 return DAG
.getLoad(VT
, DL
, DAG
.getEntryNode(),
1762 DAG
.getNode(ISD::ADD
, DL
, VT
, FrameAddr
, Offset
),
1763 MachinePointerInfo());
1766 SDValue
VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op
,
1767 SelectionDAG
&DAG
) const {
1769 unsigned IntNo
= Op
.getConstantOperandVal(0);
1771 default: // Don't custom lower most intrinsics.
1773 case Intrinsic::eh_sjlj_lsda
: {
1774 MachineFunction
&MF
= DAG
.getMachineFunction();
1775 MVT VT
= Op
.getSimpleValueType();
1776 const VETargetMachine
*TM
=
1777 static_cast<const VETargetMachine
*>(&DAG
.getTarget());
1779 // Create GCC_except_tableXX string. The real symbol for that will be
1780 // generated in EHStreamer::emitExceptionTable() later. So, we just
1781 // borrow it's name here.
1782 TM
->getStrList()->push_back(std::string(
1783 (Twine("GCC_except_table") + Twine(MF
.getFunctionNumber())).str()));
1785 DAG
.getTargetExternalSymbol(TM
->getStrList()->back().c_str(), VT
, 0);
1786 if (isPositionIndependent()) {
1787 Addr
= makeHiLoPair(Addr
, VEMCExpr::VK_VE_GOTOFF_HI32
,
1788 VEMCExpr::VK_VE_GOTOFF_LO32
, DAG
);
1789 SDValue GlobalBase
= DAG
.getNode(VEISD::GLOBAL_BASE_REG
, DL
, VT
);
1790 return DAG
.getNode(ISD::ADD
, DL
, VT
, GlobalBase
, Addr
);
1792 return makeHiLoPair(Addr
, VEMCExpr::VK_VE_HI32
, VEMCExpr::VK_VE_LO32
, DAG
);
1797 static bool getUniqueInsertion(SDNode
*N
, unsigned &UniqueIdx
) {
1798 if (!isa
<BuildVectorSDNode
>(N
))
1800 const auto *BVN
= cast
<BuildVectorSDNode
>(N
);
1802 // Find first non-undef insertion.
1804 for (Idx
= 0; Idx
< BVN
->getNumOperands(); ++Idx
) {
1805 auto ElemV
= BVN
->getOperand(Idx
);
1806 if (!ElemV
->isUndef())
1809 // Catch the (hypothetical) all-undef case.
1810 if (Idx
== BVN
->getNumOperands())
1812 // Remember insertion.
1814 // Verify that all other insertions are undef.
1815 for (; Idx
< BVN
->getNumOperands(); ++Idx
) {
1816 auto ElemV
= BVN
->getOperand(Idx
);
1817 if (!ElemV
->isUndef())
1823 static SDValue
getSplatValue(SDNode
*N
) {
1824 if (auto *BuildVec
= dyn_cast
<BuildVectorSDNode
>(N
)) {
1825 return BuildVec
->getSplatValue();
1830 SDValue
VETargetLowering::lowerBUILD_VECTOR(SDValue Op
,
1831 SelectionDAG
&DAG
) const {
1832 VECustomDAG
CDAG(DAG
, Op
);
1833 MVT ResultVT
= Op
.getSimpleValueType();
1835 // If there is just one element, expand to INSERT_VECTOR_ELT.
1837 if (getUniqueInsertion(Op
.getNode(), UniqueIdx
)) {
1838 SDValue AccuV
= CDAG
.getUNDEF(Op
.getValueType());
1839 auto ElemV
= Op
->getOperand(UniqueIdx
);
1840 SDValue IdxV
= CDAG
.getConstant(UniqueIdx
, MVT::i64
);
1841 return CDAG
.getNode(ISD::INSERT_VECTOR_ELT
, ResultVT
, {AccuV
, ElemV
, IdxV
});
1844 // Else emit a broadcast.
1845 if (SDValue ScalarV
= getSplatValue(Op
.getNode())) {
1846 unsigned NumEls
= ResultVT
.getVectorNumElements();
1847 auto AVL
= CDAG
.getConstant(NumEls
, MVT::i32
);
1848 return CDAG
.getBroadcast(ResultVT
, ScalarV
, AVL
);
1855 TargetLowering::LegalizeAction
1856 VETargetLowering::getCustomOperationAction(SDNode
&Op
) const {
1857 // Custom legalization on VVP_* and VEC_* opcodes is required to pack-legalize
1858 // these operations (transform nodes such that their AVL parameter refers to
1859 // packs of 64bit, instead of number of elements.
1861 // Packing opcodes are created with a pack-legal AVL (LEGALAVL). No need to
1863 if (isPackingSupportOpcode(Op
.getOpcode()))
1866 // Custom lower to legalize AVL for packed mode.
1867 if (isVVPOrVEC(Op
.getOpcode()))
1872 SDValue
VETargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const {
1873 LLVM_DEBUG(dbgs() << "::LowerOperation "; Op
.dump(&DAG
));
1874 unsigned Opcode
= Op
.getOpcode();
1878 case ISD::ATOMIC_FENCE
:
1879 return lowerATOMIC_FENCE(Op
, DAG
);
1880 case ISD::ATOMIC_SWAP
:
1881 return lowerATOMIC_SWAP(Op
, DAG
);
1882 case ISD::BlockAddress
:
1883 return lowerBlockAddress(Op
, DAG
);
1884 case ISD::ConstantPool
:
1885 return lowerConstantPool(Op
, DAG
);
1886 case ISD::DYNAMIC_STACKALLOC
:
1887 return lowerDYNAMIC_STACKALLOC(Op
, DAG
);
1888 case ISD::EH_SJLJ_LONGJMP
:
1889 return lowerEH_SJLJ_LONGJMP(Op
, DAG
);
1890 case ISD::EH_SJLJ_SETJMP
:
1891 return lowerEH_SJLJ_SETJMP(Op
, DAG
);
1892 case ISD::EH_SJLJ_SETUP_DISPATCH
:
1893 return lowerEH_SJLJ_SETUP_DISPATCH(Op
, DAG
);
1894 case ISD::FRAMEADDR
:
1895 return lowerFRAMEADDR(Op
, DAG
, *this, Subtarget
);
1896 case ISD::GlobalAddress
:
1897 return lowerGlobalAddress(Op
, DAG
);
1898 case ISD::GlobalTLSAddress
:
1899 return lowerGlobalTLSAddress(Op
, DAG
);
1900 case ISD::INTRINSIC_WO_CHAIN
:
1901 return lowerINTRINSIC_WO_CHAIN(Op
, DAG
);
1902 case ISD::JumpTable
:
1903 return lowerJumpTable(Op
, DAG
);
1905 return lowerLOAD(Op
, DAG
);
1906 case ISD::RETURNADDR
:
1907 return lowerRETURNADDR(Op
, DAG
, *this, Subtarget
);
1908 case ISD::BUILD_VECTOR
:
1909 return lowerBUILD_VECTOR(Op
, DAG
);
1911 return lowerSTORE(Op
, DAG
);
1913 return lowerVASTART(Op
, DAG
);
1915 return lowerVAARG(Op
, DAG
);
1917 case ISD::INSERT_VECTOR_ELT
:
1918 return lowerINSERT_VECTOR_ELT(Op
, DAG
);
1919 case ISD::EXTRACT_VECTOR_ELT
:
1920 return lowerEXTRACT_VECTOR_ELT(Op
, DAG
);
1924 if (ISD::isVPOpcode(Opcode
))
1925 return lowerToVVP(Op
, DAG
);
1929 llvm_unreachable("Should not custom lower this!");
1931 // Legalize the AVL of this internal node.
1932 case VEISD::VEC_BROADCAST
:
1933 #define ADD_VVP_OP(VVP_NAME, ...) case VEISD::VVP_NAME:
1934 #include "VVPNodes.def"
1935 // AVL already legalized.
1936 if (getAnnotatedNodeAVL(Op
).second
)
1938 return legalizeInternalVectorOp(Op
, DAG
);
1940 // Translate into a VEC_*/VVP_* layer operation.
1943 #define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
1944 #include "VVPNodes.def"
1945 if (isMaskArithmetic(Op
) && isPackedVectorType(Op
.getValueType()))
1946 return splitMaskArithmetic(Op
, DAG
);
1947 return lowerToVVP(Op
, DAG
);
1952 void VETargetLowering::ReplaceNodeResults(SDNode
*N
,
1953 SmallVectorImpl
<SDValue
> &Results
,
1954 SelectionDAG
&DAG
) const {
1955 switch (N
->getOpcode()) {
1956 case ISD::ATOMIC_SWAP
:
1957 // Let LLVM expand atomic swap instruction through LowerOperation.
1960 LLVM_DEBUG(N
->dumpr(&DAG
));
1961 llvm_unreachable("Do not know how to custom type legalize this operation!");
1965 /// JumpTable for VE.
1967 /// VE cannot generate relocatable symbol in jump table. VE cannot
1968 /// generate expressions using symbols in both text segment and data
1969 /// segment like below.
1970 /// .4byte .LBB0_2-.LJTI0_0
1971 /// So, we generate offset from the top of function like below as
1973 /// .4byte .LBB0_2-<function name>
1975 unsigned VETargetLowering::getJumpTableEncoding() const {
1976 // Use custom label for PIC.
1977 if (isPositionIndependent())
1978 return MachineJumpTableInfo::EK_Custom32
;
1980 // Otherwise, use the normal jump table encoding heuristics.
1981 return TargetLowering::getJumpTableEncoding();
1984 const MCExpr
*VETargetLowering::LowerCustomJumpTableEntry(
1985 const MachineJumpTableInfo
*MJTI
, const MachineBasicBlock
*MBB
,
1986 unsigned Uid
, MCContext
&Ctx
) const {
1987 assert(isPositionIndependent());
1989 // Generate custom label for PIC like below.
1990 // .4bytes .LBB0_2-<function name>
1991 const auto *Value
= MCSymbolRefExpr::create(MBB
->getSymbol(), Ctx
);
1992 MCSymbol
*Sym
= Ctx
.getOrCreateSymbol(MBB
->getParent()->getName().data());
1993 const auto *Base
= MCSymbolRefExpr::create(Sym
, Ctx
);
1994 return MCBinaryExpr::createSub(Value
, Base
, Ctx
);
1997 SDValue
VETargetLowering::getPICJumpTableRelocBase(SDValue Table
,
1998 SelectionDAG
&DAG
) const {
1999 assert(isPositionIndependent());
2001 Function
*Function
= &DAG
.getMachineFunction().getFunction();
2002 assert(Function
!= nullptr);
2003 auto PtrTy
= getPointerTy(DAG
.getDataLayout(), Function
->getAddressSpace());
2005 // In the jump table, we have following values in PIC mode.
2006 // .4bytes .LBB0_2-<function name>
2007 // We need to add this value and the address of this function to generate
2008 // .LBB0_2 label correctly under PIC mode. So, we want to generate following
2010 // lea %reg, fun@gotoff_lo
2011 // and %reg, %reg, (32)0
2012 // lea.sl %reg, fun@gotoff_hi(%reg, %got)
2013 // In order to do so, we need to genarate correctly marked DAG node using
2015 SDValue Op
= DAG
.getGlobalAddress(Function
, DL
, PtrTy
);
2016 SDValue HiLo
= makeHiLoPair(Op
, VEMCExpr::VK_VE_GOTOFF_HI32
,
2017 VEMCExpr::VK_VE_GOTOFF_LO32
, DAG
);
2018 SDValue GlobalBase
= DAG
.getNode(VEISD::GLOBAL_BASE_REG
, DL
, PtrTy
);
2019 return DAG
.getNode(ISD::ADD
, DL
, PtrTy
, GlobalBase
, HiLo
);
2022 Register
VETargetLowering::prepareMBB(MachineBasicBlock
&MBB
,
2023 MachineBasicBlock::iterator I
,
2024 MachineBasicBlock
*TargetBB
,
2025 const DebugLoc
&DL
) const {
2026 MachineFunction
*MF
= MBB
.getParent();
2027 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
2028 const VEInstrInfo
*TII
= Subtarget
->getInstrInfo();
2030 const TargetRegisterClass
*RC
= &VE::I64RegClass
;
2031 Register Tmp1
= MRI
.createVirtualRegister(RC
);
2032 Register Tmp2
= MRI
.createVirtualRegister(RC
);
2033 Register Result
= MRI
.createVirtualRegister(RC
);
2035 if (isPositionIndependent()) {
2036 // Create following instructions for local linkage PIC code.
2037 // lea %Tmp1, TargetBB@gotoff_lo
2038 // and %Tmp2, %Tmp1, (32)0
2039 // lea.sl %Result, TargetBB@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2040 BuildMI(MBB
, I
, DL
, TII
->get(VE::LEAzii
), Tmp1
)
2043 .addMBB(TargetBB
, VEMCExpr::VK_VE_GOTOFF_LO32
);
2044 BuildMI(MBB
, I
, DL
, TII
->get(VE::ANDrm
), Tmp2
)
2045 .addReg(Tmp1
, getKillRegState(true))
2047 BuildMI(MBB
, I
, DL
, TII
->get(VE::LEASLrri
), Result
)
2049 .addReg(Tmp2
, getKillRegState(true))
2050 .addMBB(TargetBB
, VEMCExpr::VK_VE_GOTOFF_HI32
);
2052 // Create following instructions for non-PIC code.
2053 // lea %Tmp1, TargetBB@lo
2054 // and %Tmp2, %Tmp1, (32)0
2055 // lea.sl %Result, TargetBB@hi(%Tmp2)
2056 BuildMI(MBB
, I
, DL
, TII
->get(VE::LEAzii
), Tmp1
)
2059 .addMBB(TargetBB
, VEMCExpr::VK_VE_LO32
);
2060 BuildMI(MBB
, I
, DL
, TII
->get(VE::ANDrm
), Tmp2
)
2061 .addReg(Tmp1
, getKillRegState(true))
2063 BuildMI(MBB
, I
, DL
, TII
->get(VE::LEASLrii
), Result
)
2064 .addReg(Tmp2
, getKillRegState(true))
2066 .addMBB(TargetBB
, VEMCExpr::VK_VE_HI32
);
2071 Register
VETargetLowering::prepareSymbol(MachineBasicBlock
&MBB
,
2072 MachineBasicBlock::iterator I
,
2073 StringRef Symbol
, const DebugLoc
&DL
,
2074 bool IsLocal
= false,
2075 bool IsCall
= false) const {
2076 MachineFunction
*MF
= MBB
.getParent();
2077 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
2078 const VEInstrInfo
*TII
= Subtarget
->getInstrInfo();
2080 const TargetRegisterClass
*RC
= &VE::I64RegClass
;
2081 Register Result
= MRI
.createVirtualRegister(RC
);
2083 if (isPositionIndependent()) {
2084 if (IsCall
&& !IsLocal
) {
2085 // Create following instructions for non-local linkage PIC code function
2086 // calls. These instructions uses IC and magic number -24, so we expand
2087 // them in VEAsmPrinter.cpp from GETFUNPLT pseudo instruction.
2088 // lea %Reg, Symbol@plt_lo(-24)
2089 // and %Reg, %Reg, (32)0
2091 // lea.sl %Result, Symbol@plt_hi(%Reg, %s16) ; %s16 is PLT
2092 BuildMI(MBB
, I
, DL
, TII
->get(VE::GETFUNPLT
), Result
)
2093 .addExternalSymbol("abort");
2094 } else if (IsLocal
) {
2095 Register Tmp1
= MRI
.createVirtualRegister(RC
);
2096 Register Tmp2
= MRI
.createVirtualRegister(RC
);
2097 // Create following instructions for local linkage PIC code.
2098 // lea %Tmp1, Symbol@gotoff_lo
2099 // and %Tmp2, %Tmp1, (32)0
2100 // lea.sl %Result, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2101 BuildMI(MBB
, I
, DL
, TII
->get(VE::LEAzii
), Tmp1
)
2104 .addExternalSymbol(Symbol
.data(), VEMCExpr::VK_VE_GOTOFF_LO32
);
2105 BuildMI(MBB
, I
, DL
, TII
->get(VE::ANDrm
), Tmp2
)
2106 .addReg(Tmp1
, getKillRegState(true))
2108 BuildMI(MBB
, I
, DL
, TII
->get(VE::LEASLrri
), Result
)
2110 .addReg(Tmp2
, getKillRegState(true))
2111 .addExternalSymbol(Symbol
.data(), VEMCExpr::VK_VE_GOTOFF_HI32
);
2113 Register Tmp1
= MRI
.createVirtualRegister(RC
);
2114 Register Tmp2
= MRI
.createVirtualRegister(RC
);
2115 // Create following instructions for not local linkage PIC code.
2116 // lea %Tmp1, Symbol@got_lo
2117 // and %Tmp2, %Tmp1, (32)0
2118 // lea.sl %Tmp3, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2119 // ld %Result, 0(%Tmp3)
2120 Register Tmp3
= MRI
.createVirtualRegister(RC
);
2121 BuildMI(MBB
, I
, DL
, TII
->get(VE::LEAzii
), Tmp1
)
2124 .addExternalSymbol(Symbol
.data(), VEMCExpr::VK_VE_GOT_LO32
);
2125 BuildMI(MBB
, I
, DL
, TII
->get(VE::ANDrm
), Tmp2
)
2126 .addReg(Tmp1
, getKillRegState(true))
2128 BuildMI(MBB
, I
, DL
, TII
->get(VE::LEASLrri
), Tmp3
)
2130 .addReg(Tmp2
, getKillRegState(true))
2131 .addExternalSymbol(Symbol
.data(), VEMCExpr::VK_VE_GOT_HI32
);
2132 BuildMI(MBB
, I
, DL
, TII
->get(VE::LDrii
), Result
)
2133 .addReg(Tmp3
, getKillRegState(true))
2138 Register Tmp1
= MRI
.createVirtualRegister(RC
);
2139 Register Tmp2
= MRI
.createVirtualRegister(RC
);
2140 // Create following instructions for non-PIC code.
2141 // lea %Tmp1, Symbol@lo
2142 // and %Tmp2, %Tmp1, (32)0
2143 // lea.sl %Result, Symbol@hi(%Tmp2)
2144 BuildMI(MBB
, I
, DL
, TII
->get(VE::LEAzii
), Tmp1
)
2147 .addExternalSymbol(Symbol
.data(), VEMCExpr::VK_VE_LO32
);
2148 BuildMI(MBB
, I
, DL
, TII
->get(VE::ANDrm
), Tmp2
)
2149 .addReg(Tmp1
, getKillRegState(true))
2151 BuildMI(MBB
, I
, DL
, TII
->get(VE::LEASLrii
), Result
)
2152 .addReg(Tmp2
, getKillRegState(true))
2154 .addExternalSymbol(Symbol
.data(), VEMCExpr::VK_VE_HI32
);
2159 void VETargetLowering::setupEntryBlockForSjLj(MachineInstr
&MI
,
2160 MachineBasicBlock
*MBB
,
2161 MachineBasicBlock
*DispatchBB
,
2162 int FI
, int Offset
) const {
2163 DebugLoc DL
= MI
.getDebugLoc();
2164 const VEInstrInfo
*TII
= Subtarget
->getInstrInfo();
2167 prepareMBB(*MBB
, MachineBasicBlock::iterator(MI
), DispatchBB
, DL
);
2169 // Store an address of DispatchBB to a given jmpbuf[1] where has next IC
2170 // referenced by longjmp (throw) later.
2171 MachineInstrBuilder MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(VE::STrii
));
2172 addFrameReference(MIB
, FI
, Offset
); // jmpbuf[1]
2173 MIB
.addReg(LabelReg
, getKillRegState(true));
2177 VETargetLowering::emitEHSjLjSetJmp(MachineInstr
&MI
,
2178 MachineBasicBlock
*MBB
) const {
2179 DebugLoc DL
= MI
.getDebugLoc();
2180 MachineFunction
*MF
= MBB
->getParent();
2181 const TargetInstrInfo
*TII
= Subtarget
->getInstrInfo();
2182 const TargetRegisterInfo
*TRI
= Subtarget
->getRegisterInfo();
2183 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
2185 const BasicBlock
*BB
= MBB
->getBasicBlock();
2186 MachineFunction::iterator I
= ++MBB
->getIterator();
2188 // Memory Reference.
2189 SmallVector
<MachineMemOperand
*, 2> MMOs(MI
.memoperands_begin(),
2190 MI
.memoperands_end());
2191 Register BufReg
= MI
.getOperand(1).getReg();
2195 DstReg
= MI
.getOperand(0).getReg();
2196 const TargetRegisterClass
*RC
= MRI
.getRegClass(DstReg
);
2197 assert(TRI
->isTypeLegalForClass(*RC
, MVT::i32
) && "Invalid destination!");
2199 Register MainDestReg
= MRI
.createVirtualRegister(RC
);
2200 Register RestoreDestReg
= MRI
.createVirtualRegister(RC
);
2202 // For `v = call @llvm.eh.sjlj.setjmp(buf)`, we generate following
2203 // instructions. SP/FP must be saved in jmpbuf before `llvm.eh.sjlj.setjmp`.
2206 // buf[3] = %s17 iff %s17 is used as BP
2207 // buf[1] = RestoreMBB as IC after longjmp
2208 // # SjLjSetup RestoreMBB
2214 // v = phi(v_main, MainMBB, v_restore, RestoreMBB)
2218 // %s17 = buf[3] = iff %s17 is used as BP
2222 MachineBasicBlock
*ThisMBB
= MBB
;
2223 MachineBasicBlock
*MainMBB
= MF
->CreateMachineBasicBlock(BB
);
2224 MachineBasicBlock
*SinkMBB
= MF
->CreateMachineBasicBlock(BB
);
2225 MachineBasicBlock
*RestoreMBB
= MF
->CreateMachineBasicBlock(BB
);
2226 MF
->insert(I
, MainMBB
);
2227 MF
->insert(I
, SinkMBB
);
2228 MF
->push_back(RestoreMBB
);
2229 RestoreMBB
->setMachineBlockAddressTaken();
2231 // Transfer the remainder of BB and its successor edges to SinkMBB.
2232 SinkMBB
->splice(SinkMBB
->begin(), MBB
,
2233 std::next(MachineBasicBlock::iterator(MI
)), MBB
->end());
2234 SinkMBB
->transferSuccessorsAndUpdatePHIs(MBB
);
2238 prepareMBB(*MBB
, MachineBasicBlock::iterator(MI
), RestoreMBB
, DL
);
2240 // Store BP in buf[3] iff this function is using BP.
2241 const VEFrameLowering
*TFI
= Subtarget
->getFrameLowering();
2242 if (TFI
->hasBP(*MF
)) {
2243 MachineInstrBuilder MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(VE::STrii
));
2247 MIB
.addReg(VE::SX17
);
2248 MIB
.setMemRefs(MMOs
);
2251 // Store IP in buf[1].
2252 MachineInstrBuilder MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(VE::STrii
));
2253 MIB
.add(MI
.getOperand(1)); // we can preserve the kill flags here.
2256 MIB
.addReg(LabelReg
, getKillRegState(true));
2257 MIB
.setMemRefs(MMOs
);
2259 // SP/FP are already stored in jmpbuf before `llvm.eh.sjlj.setjmp`.
2263 BuildMI(*ThisMBB
, MI
, DL
, TII
->get(VE::EH_SjLj_Setup
)).addMBB(RestoreMBB
);
2265 const VERegisterInfo
*RegInfo
= Subtarget
->getRegisterInfo();
2266 MIB
.addRegMask(RegInfo
->getNoPreservedMask());
2267 ThisMBB
->addSuccessor(MainMBB
);
2268 ThisMBB
->addSuccessor(RestoreMBB
);
2271 BuildMI(MainMBB
, DL
, TII
->get(VE::LEAzii
), MainDestReg
)
2275 MainMBB
->addSuccessor(SinkMBB
);
2278 BuildMI(*SinkMBB
, SinkMBB
->begin(), DL
, TII
->get(VE::PHI
), DstReg
)
2279 .addReg(MainDestReg
)
2281 .addReg(RestoreDestReg
)
2282 .addMBB(RestoreMBB
);
2285 // Restore BP from buf[3] iff this function is using BP. The address of
2287 // FIXME: Better to not use SX10 here
2288 if (TFI
->hasBP(*MF
)) {
2289 MachineInstrBuilder MIB
=
2290 BuildMI(RestoreMBB
, DL
, TII
->get(VE::LDrii
), VE::SX17
);
2291 MIB
.addReg(VE::SX10
);
2294 MIB
.setMemRefs(MMOs
);
2296 BuildMI(RestoreMBB
, DL
, TII
->get(VE::LEAzii
), RestoreDestReg
)
2300 BuildMI(RestoreMBB
, DL
, TII
->get(VE::BRCFLa_t
)).addMBB(SinkMBB
);
2301 RestoreMBB
->addSuccessor(SinkMBB
);
2303 MI
.eraseFromParent();
2308 VETargetLowering::emitEHSjLjLongJmp(MachineInstr
&MI
,
2309 MachineBasicBlock
*MBB
) const {
2310 DebugLoc DL
= MI
.getDebugLoc();
2311 MachineFunction
*MF
= MBB
->getParent();
2312 const TargetInstrInfo
*TII
= Subtarget
->getInstrInfo();
2313 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
2315 // Memory Reference.
2316 SmallVector
<MachineMemOperand
*, 2> MMOs(MI
.memoperands_begin(),
2317 MI
.memoperands_end());
2318 Register BufReg
= MI
.getOperand(0).getReg();
2320 Register Tmp
= MRI
.createVirtualRegister(&VE::I64RegClass
);
2321 // Since FP is only updated here but NOT referenced, it's treated as GPR.
2322 Register FP
= VE::SX9
;
2323 Register SP
= VE::SX11
;
2325 MachineInstrBuilder MIB
;
2327 MachineBasicBlock
*ThisMBB
= MBB
;
2329 // For `call @llvm.eh.sjlj.longjmp(buf)`, we generate following instructions.
2332 // %fp = load buf[0]
2333 // %jmp = load buf[1]
2334 // %s10 = buf ; Store an address of buf to SX10 for RestoreMBB
2335 // %sp = load buf[2] ; generated by llvm.eh.sjlj.setjmp.
2339 MIB
= BuildMI(*ThisMBB
, MI
, DL
, TII
->get(VE::LDrii
), FP
);
2343 MIB
.setMemRefs(MMOs
);
2346 MIB
= BuildMI(*ThisMBB
, MI
, DL
, TII
->get(VE::LDrii
), Tmp
);
2350 MIB
.setMemRefs(MMOs
);
2352 // Copy BufReg to SX10 for later use in setjmp.
2353 // FIXME: Better to not use SX10 here
2354 BuildMI(*ThisMBB
, MI
, DL
, TII
->get(VE::ORri
), VE::SX10
)
2359 MIB
= BuildMI(*ThisMBB
, MI
, DL
, TII
->get(VE::LDrii
), SP
);
2360 MIB
.add(MI
.getOperand(0)); // we can preserve the kill flags here.
2363 MIB
.setMemRefs(MMOs
);
2366 BuildMI(*ThisMBB
, MI
, DL
, TII
->get(VE::BCFLari_t
))
2367 .addReg(Tmp
, getKillRegState(true))
2370 MI
.eraseFromParent();
2375 VETargetLowering::emitSjLjDispatchBlock(MachineInstr
&MI
,
2376 MachineBasicBlock
*BB
) const {
2377 DebugLoc DL
= MI
.getDebugLoc();
2378 MachineFunction
*MF
= BB
->getParent();
2379 MachineFrameInfo
&MFI
= MF
->getFrameInfo();
2380 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
2381 const VEInstrInfo
*TII
= Subtarget
->getInstrInfo();
2382 int FI
= MFI
.getFunctionContextIndex();
2384 // Get a mapping of the call site numbers to all of the landing pads they're
2386 DenseMap
<unsigned, SmallVector
<MachineBasicBlock
*, 2>> CallSiteNumToLPad
;
2387 unsigned MaxCSNum
= 0;
2388 for (auto &MBB
: *MF
) {
2392 MCSymbol
*Sym
= nullptr;
2393 for (const auto &MI
: MBB
) {
2394 if (MI
.isDebugInstr())
2397 assert(MI
.isEHLabel() && "expected EH_LABEL");
2398 Sym
= MI
.getOperand(0).getMCSymbol();
2402 if (!MF
->hasCallSiteLandingPad(Sym
))
2405 for (unsigned CSI
: MF
->getCallSiteLandingPad(Sym
)) {
2406 CallSiteNumToLPad
[CSI
].push_back(&MBB
);
2407 MaxCSNum
= std::max(MaxCSNum
, CSI
);
2411 // Get an ordered list of the machine basic blocks for the jump table.
2412 std::vector
<MachineBasicBlock
*> LPadList
;
2413 SmallPtrSet
<MachineBasicBlock
*, 32> InvokeBBs
;
2414 LPadList
.reserve(CallSiteNumToLPad
.size());
2416 for (unsigned CSI
= 1; CSI
<= MaxCSNum
; ++CSI
) {
2417 for (auto &LP
: CallSiteNumToLPad
[CSI
]) {
2418 LPadList
.push_back(LP
);
2419 InvokeBBs
.insert(LP
->pred_begin(), LP
->pred_end());
2423 assert(!LPadList
.empty() &&
2424 "No landing pad destinations for the dispatch jump table!");
2426 // The %fn_context is allocated like below (from --print-after=sjljehprepare):
2427 // %fn_context = alloca { i8*, i64, [4 x i64], i8*, i8*, [5 x i8*] }
2429 // This `[5 x i8*]` is jmpbuf, so jmpbuf[1] is FI+72.
2430 // First `i64` is callsite, so callsite is FI+8.
2431 static const int OffsetIC
= 72;
2432 static const int OffsetCS
= 8;
2434 // Create the MBBs for the dispatch code like following:
2437 // Prepare DispatchBB address and store it to buf[1].
2441 // %s15 = GETGOT iff isPositionIndependent
2442 // %callsite = load callsite
2443 // brgt.l.t #size of callsites, %callsite, DispContBB
2449 // %breg = address of jump table
2450 // %pc = load and calculate next pc from %breg and %callsite
2453 // Shove the dispatch's address into the return slot in the function context.
2454 MachineBasicBlock
*DispatchBB
= MF
->CreateMachineBasicBlock();
2455 DispatchBB
->setIsEHPad(true);
2457 // Trap BB will causes trap like `assert(0)`.
2458 MachineBasicBlock
*TrapBB
= MF
->CreateMachineBasicBlock();
2459 DispatchBB
->addSuccessor(TrapBB
);
2461 MachineBasicBlock
*DispContBB
= MF
->CreateMachineBasicBlock();
2462 DispatchBB
->addSuccessor(DispContBB
);
2465 MF
->push_back(DispatchBB
);
2466 MF
->push_back(DispContBB
);
2467 MF
->push_back(TrapBB
);
2469 // Insert code to call abort in the TrapBB.
2470 Register Abort
= prepareSymbol(*TrapBB
, TrapBB
->end(), "abort", DL
,
2471 /* Local */ false, /* Call */ true);
2472 BuildMI(TrapBB
, DL
, TII
->get(VE::BSICrii
), VE::SX10
)
2473 .addReg(Abort
, getKillRegState(true))
2477 // Insert code into the entry block that creates and registers the function
2479 setupEntryBlockForSjLj(MI
, BB
, DispatchBB
, FI
, OffsetIC
);
2481 // Create the jump table and associated information
2482 unsigned JTE
= getJumpTableEncoding();
2483 MachineJumpTableInfo
*JTI
= MF
->getOrCreateJumpTableInfo(JTE
);
2484 unsigned MJTI
= JTI
->createJumpTableIndex(LPadList
);
2486 const VERegisterInfo
&RI
= TII
->getRegisterInfo();
2487 // Add a register mask with no preserved registers. This results in all
2488 // registers being marked as clobbered.
2489 BuildMI(DispatchBB
, DL
, TII
->get(VE::NOP
))
2490 .addRegMask(RI
.getNoPreservedMask());
2492 if (isPositionIndependent()) {
2493 // Force to generate GETGOT, since current implementation doesn't store GOT
2495 BuildMI(DispatchBB
, DL
, TII
->get(VE::GETGOT
), VE::SX15
);
2498 // IReg is used as an index in a memory operand and therefore can't be SP
2499 const TargetRegisterClass
*RC
= &VE::I64RegClass
;
2500 Register IReg
= MRI
.createVirtualRegister(RC
);
2501 addFrameReference(BuildMI(DispatchBB
, DL
, TII
->get(VE::LDLZXrii
), IReg
), FI
,
2503 if (LPadList
.size() < 64) {
2504 BuildMI(DispatchBB
, DL
, TII
->get(VE::BRCFLir_t
))
2505 .addImm(VECC::CC_ILE
)
2506 .addImm(LPadList
.size())
2510 assert(LPadList
.size() <= 0x7FFFFFFF && "Too large Landing Pad!");
2511 Register TmpReg
= MRI
.createVirtualRegister(RC
);
2512 BuildMI(DispatchBB
, DL
, TII
->get(VE::LEAzii
), TmpReg
)
2515 .addImm(LPadList
.size());
2516 BuildMI(DispatchBB
, DL
, TII
->get(VE::BRCFLrr_t
))
2517 .addImm(VECC::CC_ILE
)
2518 .addReg(TmpReg
, getKillRegState(true))
2523 Register BReg
= MRI
.createVirtualRegister(RC
);
2524 Register Tmp1
= MRI
.createVirtualRegister(RC
);
2525 Register Tmp2
= MRI
.createVirtualRegister(RC
);
2527 if (isPositionIndependent()) {
2528 // Create following instructions for local linkage PIC code.
2529 // lea %Tmp1, .LJTI0_0@gotoff_lo
2530 // and %Tmp2, %Tmp1, (32)0
2531 // lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2532 BuildMI(DispContBB
, DL
, TII
->get(VE::LEAzii
), Tmp1
)
2535 .addJumpTableIndex(MJTI
, VEMCExpr::VK_VE_GOTOFF_LO32
);
2536 BuildMI(DispContBB
, DL
, TII
->get(VE::ANDrm
), Tmp2
)
2537 .addReg(Tmp1
, getKillRegState(true))
2539 BuildMI(DispContBB
, DL
, TII
->get(VE::LEASLrri
), BReg
)
2541 .addReg(Tmp2
, getKillRegState(true))
2542 .addJumpTableIndex(MJTI
, VEMCExpr::VK_VE_GOTOFF_HI32
);
2544 // Create following instructions for non-PIC code.
2545 // lea %Tmp1, .LJTI0_0@lo
2546 // and %Tmp2, %Tmp1, (32)0
2547 // lea.sl %BReg, .LJTI0_0@hi(%Tmp2)
2548 BuildMI(DispContBB
, DL
, TII
->get(VE::LEAzii
), Tmp1
)
2551 .addJumpTableIndex(MJTI
, VEMCExpr::VK_VE_LO32
);
2552 BuildMI(DispContBB
, DL
, TII
->get(VE::ANDrm
), Tmp2
)
2553 .addReg(Tmp1
, getKillRegState(true))
2555 BuildMI(DispContBB
, DL
, TII
->get(VE::LEASLrii
), BReg
)
2556 .addReg(Tmp2
, getKillRegState(true))
2558 .addJumpTableIndex(MJTI
, VEMCExpr::VK_VE_HI32
);
2562 case MachineJumpTableInfo::EK_BlockAddress
: {
2563 // Generate simple block address code for no-PIC model.
2564 // sll %Tmp1, %IReg, 3
2565 // lds %TReg, 0(%Tmp1, %BReg)
2568 Register TReg
= MRI
.createVirtualRegister(RC
);
2569 Register Tmp1
= MRI
.createVirtualRegister(RC
);
2571 BuildMI(DispContBB
, DL
, TII
->get(VE::SLLri
), Tmp1
)
2572 .addReg(IReg
, getKillRegState(true))
2574 BuildMI(DispContBB
, DL
, TII
->get(VE::LDrri
), TReg
)
2575 .addReg(BReg
, getKillRegState(true))
2576 .addReg(Tmp1
, getKillRegState(true))
2578 BuildMI(DispContBB
, DL
, TII
->get(VE::BCFLari_t
))
2579 .addReg(TReg
, getKillRegState(true))
2583 case MachineJumpTableInfo::EK_Custom32
: {
2584 // Generate block address code using differences from the function pointer
2586 // sll %Tmp1, %IReg, 2
2587 // ldl.zx %OReg, 0(%Tmp1, %BReg)
2588 // Prepare function address in BReg2.
2589 // adds.l %TReg, %BReg2, %OReg
2592 assert(isPositionIndependent());
2593 Register OReg
= MRI
.createVirtualRegister(RC
);
2594 Register TReg
= MRI
.createVirtualRegister(RC
);
2595 Register Tmp1
= MRI
.createVirtualRegister(RC
);
2597 BuildMI(DispContBB
, DL
, TII
->get(VE::SLLri
), Tmp1
)
2598 .addReg(IReg
, getKillRegState(true))
2600 BuildMI(DispContBB
, DL
, TII
->get(VE::LDLZXrri
), OReg
)
2601 .addReg(BReg
, getKillRegState(true))
2602 .addReg(Tmp1
, getKillRegState(true))
2605 prepareSymbol(*DispContBB
, DispContBB
->end(),
2606 DispContBB
->getParent()->getName(), DL
, /* Local */ true);
2607 BuildMI(DispContBB
, DL
, TII
->get(VE::ADDSLrr
), TReg
)
2608 .addReg(OReg
, getKillRegState(true))
2609 .addReg(BReg2
, getKillRegState(true));
2610 BuildMI(DispContBB
, DL
, TII
->get(VE::BCFLari_t
))
2611 .addReg(TReg
, getKillRegState(true))
2616 llvm_unreachable("Unexpected jump table encoding");
2619 // Add the jump table entries as successors to the MBB.
2620 SmallPtrSet
<MachineBasicBlock
*, 8> SeenMBBs
;
2621 for (auto &LP
: LPadList
)
2622 if (SeenMBBs
.insert(LP
).second
)
2623 DispContBB
->addSuccessor(LP
);
2625 // N.B. the order the invoke BBs are processed in doesn't matter here.
2626 SmallVector
<MachineBasicBlock
*, 64> MBBLPads
;
2627 const MCPhysReg
*SavedRegs
= MF
->getRegInfo().getCalleeSavedRegs();
2628 for (MachineBasicBlock
*MBB
: InvokeBBs
) {
2629 // Remove the landing pad successor from the invoke block and replace it
2630 // with the new dispatch block.
2631 // Keep a copy of Successors since it's modified inside the loop.
2632 SmallVector
<MachineBasicBlock
*, 8> Successors(MBB
->succ_rbegin(),
2634 // FIXME: Avoid quadratic complexity.
2635 for (auto *MBBS
: Successors
) {
2636 if (MBBS
->isEHPad()) {
2637 MBB
->removeSuccessor(MBBS
);
2638 MBBLPads
.push_back(MBBS
);
2642 MBB
->addSuccessor(DispatchBB
);
2644 // Find the invoke call and mark all of the callee-saved registers as
2645 // 'implicit defined' so that they're spilled. This prevents code from
2646 // moving instructions to before the EH block, where they will never be
2648 for (auto &II
: reverse(*MBB
)) {
2652 DenseMap
<Register
, bool> DefRegs
;
2653 for (auto &MOp
: II
.operands())
2655 DefRegs
[MOp
.getReg()] = true;
2657 MachineInstrBuilder
MIB(*MF
, &II
);
2658 for (unsigned RI
= 0; SavedRegs
[RI
]; ++RI
) {
2659 Register Reg
= SavedRegs
[RI
];
2661 MIB
.addReg(Reg
, RegState::ImplicitDefine
| RegState::Dead
);
2668 // Mark all former landing pads as non-landing pads. The dispatch is the only
2670 for (auto &LP
: MBBLPads
)
2671 LP
->setIsEHPad(false);
2673 // The instruction is gone now.
2674 MI
.eraseFromParent();
2679 VETargetLowering::EmitInstrWithCustomInserter(MachineInstr
&MI
,
2680 MachineBasicBlock
*BB
) const {
2681 switch (MI
.getOpcode()) {
2683 llvm_unreachable("Unknown Custom Instruction!");
2684 case VE::EH_SjLj_LongJmp
:
2685 return emitEHSjLjLongJmp(MI
, BB
);
2686 case VE::EH_SjLj_SetJmp
:
2687 return emitEHSjLjSetJmp(MI
, BB
);
2688 case VE::EH_SjLj_Setup_Dispatch
:
2689 return emitSjLjDispatchBlock(MI
, BB
);
2693 static bool isSimm7(SDValue V
) {
2694 EVT VT
= V
.getValueType();
2698 if (VT
.isInteger()) {
2699 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(V
))
2700 return isInt
<7>(C
->getSExtValue());
2701 } else if (VT
.isFloatingPoint()) {
2702 if (ConstantFPSDNode
*C
= dyn_cast
<ConstantFPSDNode
>(V
)) {
2703 if (VT
== MVT::f32
|| VT
== MVT::f64
) {
2704 const APInt
&Imm
= C
->getValueAPF().bitcastToAPInt();
2705 uint64_t Val
= Imm
.getSExtValue();
2706 if (Imm
.getBitWidth() == 32)
2707 Val
<<= 32; // Immediate value of float place at higher bits on VE.
2708 return isInt
<7>(Val
);
2715 static bool isMImm(SDValue V
) {
2716 EVT VT
= V
.getValueType();
2720 if (VT
.isInteger()) {
2721 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(V
))
2722 return isMImmVal(getImmVal(C
));
2723 } else if (VT
.isFloatingPoint()) {
2724 if (ConstantFPSDNode
*C
= dyn_cast
<ConstantFPSDNode
>(V
)) {
2725 if (VT
== MVT::f32
) {
2726 // Float value places at higher bits, so ignore lower 32 bits.
2727 return isMImm32Val(getFpImmVal(C
) >> 32);
2728 } else if (VT
== MVT::f64
) {
2729 return isMImmVal(getFpImmVal(C
));
2736 static unsigned decideComp(EVT SrcVT
, ISD::CondCode CC
) {
2737 if (SrcVT
.isFloatingPoint()) {
2738 if (SrcVT
== MVT::f128
)
2742 return isSignedIntSetCC(CC
) ? VEISD::CMPI
: VEISD::CMPU
;
2745 static EVT
decideCompType(EVT SrcVT
) {
2746 if (SrcVT
== MVT::f128
)
2751 static bool safeWithoutCompWithNull(EVT SrcVT
, ISD::CondCode CC
,
2753 if (SrcVT
.isFloatingPoint()) {
2754 // For the case of floating point setcc, only unordered comparison
2755 // or general comparison with -enable-no-nans-fp-math option reach
2756 // here, so it is safe even if values are NaN. Only f128 doesn't
2757 // safe since VE uses f64 result of f128 comparison.
2758 return SrcVT
!= MVT::f128
;
2760 if (isIntEqualitySetCC(CC
)) {
2761 // For the case of equal or not equal, it is safe without comparison with 0.
2765 // For the case of integer setcc with cmov, all signed comparison with 0
2767 return isSignedIntSetCC(CC
);
2769 // For the case of integer setcc, only signed 64 bits comparison is safe.
2770 // For unsigned, "CMPU 0x80000000, 0" has to be greater than 0, but it becomes
2771 // less than 0 witout CMPU. For 32 bits, other half of 32 bits are
2772 // uncoditional, so it is not safe too without CMPI..
2773 return isSignedIntSetCC(CC
) && SrcVT
== MVT::i64
;
2776 static SDValue
generateComparison(EVT VT
, SDValue LHS
, SDValue RHS
,
2777 ISD::CondCode CC
, bool WithCMov
,
2778 const SDLoc
&DL
, SelectionDAG
&DAG
) {
2779 // Compare values. If RHS is 0 and it is safe to calculate without
2780 // comparison, we don't generate an instruction for comparison.
2781 EVT CompVT
= decideCompType(VT
);
2782 if (CompVT
== VT
&& safeWithoutCompWithNull(VT
, CC
, WithCMov
) &&
2783 (isNullConstant(RHS
) || isNullFPConstant(RHS
))) {
2786 return DAG
.getNode(decideComp(VT
, CC
), DL
, CompVT
, LHS
, RHS
);
2789 SDValue
VETargetLowering::combineSelect(SDNode
*N
,
2790 DAGCombinerInfo
&DCI
) const {
2791 assert(N
->getOpcode() == ISD::SELECT
&&
2792 "Should be called with a SELECT node");
2793 ISD::CondCode CC
= ISD::CondCode::SETNE
;
2794 SDValue Cond
= N
->getOperand(0);
2795 SDValue True
= N
->getOperand(1);
2796 SDValue False
= N
->getOperand(2);
2798 // We handle only scalar SELECT.
2799 EVT VT
= N
->getValueType(0);
2803 // Peform combineSelect after leagalize DAG.
2804 if (!DCI
.isAfterLegalizeDAG())
2807 EVT VT0
= Cond
.getValueType();
2809 // VE's condition move can handle MImm in True clause, so nothing to do.
2810 } else if (isMImm(False
)) {
2811 // VE's condition move can handle MImm in True clause, so swap True and
2812 // False clauses if False has MImm value. And, update condition code.
2813 std::swap(True
, False
);
2814 CC
= getSetCCInverse(CC
, VT0
);
2818 SelectionDAG
&DAG
= DCI
.DAG
;
2819 VECC::CondCode VECCVal
;
2820 if (VT0
.isFloatingPoint()) {
2821 VECCVal
= fpCondCode2Fcc(CC
);
2823 VECCVal
= intCondCode2Icc(CC
);
2825 SDValue Ops
[] = {Cond
, True
, False
,
2826 DAG
.getConstant(VECCVal
, DL
, MVT::i32
)};
2827 return DAG
.getNode(VEISD::CMOV
, DL
, VT
, Ops
);
2830 SDValue
VETargetLowering::combineSelectCC(SDNode
*N
,
2831 DAGCombinerInfo
&DCI
) const {
2832 assert(N
->getOpcode() == ISD::SELECT_CC
&&
2833 "Should be called with a SELECT_CC node");
2834 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(4))->get();
2835 SDValue LHS
= N
->getOperand(0);
2836 SDValue RHS
= N
->getOperand(1);
2837 SDValue True
= N
->getOperand(2);
2838 SDValue False
= N
->getOperand(3);
2840 // We handle only scalar SELECT_CC.
2841 EVT VT
= N
->getValueType(0);
2845 // Peform combineSelectCC after leagalize DAG.
2846 if (!DCI
.isAfterLegalizeDAG())
2849 // We handle only i32/i64/f32/f64/f128 comparisons.
2850 EVT LHSVT
= LHS
.getValueType();
2851 assert(LHSVT
== RHS
.getValueType());
2852 switch (LHSVT
.getSimpleVT().SimpleTy
) {
2860 // Return SDValue to let llvm handle other types.
2865 // VE's comparison can handle MImm in RHS, so nothing to do.
2866 } else if (isSimm7(RHS
)) {
2867 // VE's comparison can handle Simm7 in LHS, so swap LHS and RHS, and
2868 // update condition code.
2869 std::swap(LHS
, RHS
);
2870 CC
= getSetCCSwappedOperands(CC
);
2873 // VE's condition move can handle MImm in True clause, so nothing to do.
2874 } else if (isMImm(False
)) {
2875 // VE's condition move can handle MImm in True clause, so swap True and
2876 // False clauses if False has MImm value. And, update condition code.
2877 std::swap(True
, False
);
2878 CC
= getSetCCInverse(CC
, LHSVT
);
2882 SelectionDAG
&DAG
= DCI
.DAG
;
2884 bool WithCMov
= true;
2885 SDValue CompNode
= generateComparison(LHSVT
, LHS
, RHS
, CC
, WithCMov
, DL
, DAG
);
2887 VECC::CondCode VECCVal
;
2888 if (LHSVT
.isFloatingPoint()) {
2889 VECCVal
= fpCondCode2Fcc(CC
);
2891 VECCVal
= intCondCode2Icc(CC
);
2893 SDValue Ops
[] = {CompNode
, True
, False
,
2894 DAG
.getConstant(VECCVal
, DL
, MVT::i32
)};
2895 return DAG
.getNode(VEISD::CMOV
, DL
, VT
, Ops
);
2898 static bool isI32InsnAllUses(const SDNode
*User
, const SDNode
*N
);
2899 static bool isI32Insn(const SDNode
*User
, const SDNode
*N
) {
2900 switch (User
->getOpcode()) {
2914 case ISD::SINT_TO_FP
:
2915 case ISD::UINT_TO_FP
:
2918 case ISD::ATOMIC_CMP_SWAP
:
2919 case ISD::ATOMIC_SWAP
:
2924 if (N
->getOperand(0).getOpcode() != ISD::SRL
)
2926 // (srl (trunc (srl ...))) may be optimized by combining srl, so
2927 // doesn't optimize trunc now.
2929 case ISD::SELECT_CC
:
2930 if (User
->getOperand(2).getNode() != N
&&
2931 User
->getOperand(3).getNode() != N
)
2933 return isI32InsnAllUses(User
, N
);
2935 // CMOV in (cmov (trunc ...), true, false, int-comparison) is safe.
2936 // However, trunc in true or false clauses is not safe.
2937 if (User
->getOperand(1).getNode() != N
&&
2938 User
->getOperand(2).getNode() != N
&&
2939 isa
<ConstantSDNode
>(User
->getOperand(3))) {
2940 VECC::CondCode VECCVal
=
2941 static_cast<VECC::CondCode
>(User
->getConstantOperandVal(3));
2942 return isIntVECondCode(VECCVal
);
2949 case ISD::CopyToReg
:
2950 // Check all use of selections, bit operations, and copies. If all of them
2951 // are safe, optimize truncate to extract_subreg.
2952 return isI32InsnAllUses(User
, N
);
2956 static bool isI32InsnAllUses(const SDNode
*User
, const SDNode
*N
) {
2957 // Check all use of User node. If all of them are safe, optimize
2958 // truncate to extract_subreg.
2959 for (const SDNode
*U
: User
->uses()) {
2960 switch (U
->getOpcode()) {
2962 // If the use is an instruction which treats the source operand as i32,
2963 // it is safe to avoid truncate here.
2964 if (isI32Insn(U
, N
))
2967 case ISD::ANY_EXTEND
:
2968 case ISD::SIGN_EXTEND
:
2969 case ISD::ZERO_EXTEND
: {
2970 // Special optimizations to the combination of ext and trunc.
2971 // (ext ... (select ... (trunc ...))) is safe to avoid truncate here
2972 // since this truncate instruction clears higher 32 bits which is filled
2973 // by one of ext instructions later.
2974 assert(N
->getValueType(0) == MVT::i32
&&
2975 "find truncate to not i32 integer");
2976 if (User
->getOpcode() == ISD::SELECT_CC
||
2977 User
->getOpcode() == ISD::SELECT
|| User
->getOpcode() == VEISD::CMOV
)
2987 // Optimize TRUNCATE in DAG combining. Optimizing it in CUSTOM lower is
2988 // sometime too early. Optimizing it in DAG pattern matching in VEInstrInfo.td
2989 // is sometime too late. So, doing it at here.
2990 SDValue
VETargetLowering::combineTRUNCATE(SDNode
*N
,
2991 DAGCombinerInfo
&DCI
) const {
2992 assert(N
->getOpcode() == ISD::TRUNCATE
&&
2993 "Should be called with a TRUNCATE node");
2995 SelectionDAG
&DAG
= DCI
.DAG
;
2997 EVT VT
= N
->getValueType(0);
2999 // We prefer to do this when all types are legal.
3000 if (!DCI
.isAfterLegalizeDAG())
3003 // Skip combine TRUNCATE atm if the operand of TRUNCATE might be a constant.
3004 if (N
->getOperand(0)->getOpcode() == ISD::SELECT_CC
&&
3005 isa
<ConstantSDNode
>(N
->getOperand(0)->getOperand(0)) &&
3006 isa
<ConstantSDNode
>(N
->getOperand(0)->getOperand(1)))
3009 // Check all use of this TRUNCATE.
3010 for (const SDNode
*User
: N
->uses()) {
3011 // Make sure that we're not going to replace TRUNCATE for non i32
3014 // FIXME: Although we could sometimes handle this, and it does occur in
3015 // practice that one of the condition inputs to the select is also one of
3016 // the outputs, we currently can't deal with this.
3017 if (isI32Insn(User
, N
))
3023 SDValue SubI32
= DAG
.getTargetConstant(VE::sub_i32
, DL
, MVT::i32
);
3024 return SDValue(DAG
.getMachineNode(TargetOpcode::EXTRACT_SUBREG
, DL
, VT
,
3025 N
->getOperand(0), SubI32
),
3029 SDValue
VETargetLowering::PerformDAGCombine(SDNode
*N
,
3030 DAGCombinerInfo
&DCI
) const {
3031 switch (N
->getOpcode()) {
3035 return combineSelect(N
, DCI
);
3036 case ISD::SELECT_CC
:
3037 return combineSelectCC(N
, DCI
);
3039 return combineTRUNCATE(N
, DCI
);
3045 //===----------------------------------------------------------------------===//
3046 // VE Inline Assembly Support
3047 //===----------------------------------------------------------------------===//
3049 VETargetLowering::ConstraintType
3050 VETargetLowering::getConstraintType(StringRef Constraint
) const {
3051 if (Constraint
.size() == 1) {
3052 switch (Constraint
[0]) {
3055 case 'v': // vector registers
3056 return C_RegisterClass
;
3059 return TargetLowering::getConstraintType(Constraint
);
3062 std::pair
<unsigned, const TargetRegisterClass
*>
3063 VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo
*TRI
,
3064 StringRef Constraint
,
3066 const TargetRegisterClass
*RC
= nullptr;
3067 if (Constraint
.size() == 1) {
3068 switch (Constraint
[0]) {
3070 return TargetLowering::getRegForInlineAsmConstraint(TRI
, Constraint
, VT
);
3072 RC
= &VE::I64RegClass
;
3075 RC
= &VE::V64RegClass
;
3078 return std::make_pair(0U, RC
);
3081 return TargetLowering::getRegForInlineAsmConstraint(TRI
, Constraint
, VT
);
3084 //===----------------------------------------------------------------------===//
3085 // VE Target Optimization Support
3086 //===----------------------------------------------------------------------===//
3088 unsigned VETargetLowering::getMinimumJumpTableEntries() const {
3089 // Specify 8 for PIC model to relieve the impact of PIC load instructions.
3090 if (isJumpTableRelative())
3093 return TargetLowering::getMinimumJumpTableEntries();
3096 bool VETargetLowering::hasAndNot(SDValue Y
) const {
3097 EVT VT
= Y
.getValueType();
3099 // VE doesn't have vector and not instruction.
3103 // VE allows different immediate values for X and Y where ~X & Y.
3104 // Only simm7 works for X, and only mimm works for Y on VE. However, this
3105 // function is used to check whether an immediate value is OK for and-not
3106 // instruction as both X and Y. Generating additional instruction to
3107 // retrieve an immediate value is no good since the purpose of this
3108 // function is to convert a series of 3 instructions to another series of
3109 // 3 instructions with better parallelism. Therefore, we return false
3110 // for all immediate values now.
3111 // FIXME: Change hasAndNot function to have two operands to make it work
3112 // correctly with Aurora VE.
3113 if (isa
<ConstantSDNode
>(Y
))
3116 // It's ok for generic registers.
3120 SDValue
VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op
,
3121 SelectionDAG
&DAG
) const {
3122 assert(Op
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&& "Unknown opcode!");
3123 MVT VT
= Op
.getOperand(0).getSimpleValueType();
3125 // Special treatment for packed V64 types.
3126 assert(VT
== MVT::v512i32
|| VT
== MVT::v512f32
);
3128 // Example of codes:
3129 // %packed_v = extractelt %vr, %idx / 2
3130 // %v = %packed_v >> (%idx % 2 * 32)
3131 // %res = %v & 0xffffffff
3133 SDValue Vec
= Op
.getOperand(0);
3134 SDValue Idx
= Op
.getOperand(1);
3136 SDValue Result
= Op
;
3137 if (false /* Idx->isConstant() */) {
3138 // TODO: optimized implementation using constant values
3140 SDValue Const1
= DAG
.getConstant(1, DL
, MVT::i64
);
3141 SDValue HalfIdx
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, {Idx
, Const1
});
3143 SDValue(DAG
.getMachineNode(VE::LVSvr
, DL
, MVT::i64
, {Vec
, HalfIdx
}), 0);
3144 SDValue AndIdx
= DAG
.getNode(ISD::AND
, DL
, MVT::i64
, {Idx
, Const1
});
3145 SDValue Shift
= DAG
.getNode(ISD::XOR
, DL
, MVT::i64
, {AndIdx
, Const1
});
3146 SDValue Const5
= DAG
.getConstant(5, DL
, MVT::i64
);
3147 Shift
= DAG
.getNode(ISD::SHL
, DL
, MVT::i64
, {Shift
, Const5
});
3148 PackedElt
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, {PackedElt
, Shift
});
3149 SDValue Mask
= DAG
.getConstant(0xFFFFFFFFL
, DL
, MVT::i64
);
3150 PackedElt
= DAG
.getNode(ISD::AND
, DL
, MVT::i64
, {PackedElt
, Mask
});
3151 SDValue SubI32
= DAG
.getTargetConstant(VE::sub_i32
, DL
, MVT::i32
);
3152 Result
= SDValue(DAG
.getMachineNode(TargetOpcode::EXTRACT_SUBREG
, DL
,
3153 MVT::i32
, PackedElt
, SubI32
),
3156 if (Op
.getSimpleValueType() == MVT::f32
) {
3157 Result
= DAG
.getBitcast(MVT::f32
, Result
);
3159 assert(Op
.getSimpleValueType() == MVT::i32
);
3165 SDValue
VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op
,
3166 SelectionDAG
&DAG
) const {
3167 assert(Op
.getOpcode() == ISD::INSERT_VECTOR_ELT
&& "Unknown opcode!");
3168 MVT VT
= Op
.getOperand(0).getSimpleValueType();
3170 // Special treatment for packed V64 types.
3171 assert(VT
== MVT::v512i32
|| VT
== MVT::v512f32
);
3173 // The v512i32 and v512f32 starts from upper bits (0..31). This "upper
3174 // bits" required `val << 32` from C implementation's point of view.
3176 // Example of codes:
3177 // %packed_elt = extractelt %vr, (%idx >> 1)
3178 // %shift = ((%idx & 1) ^ 1) << 5
3179 // %packed_elt &= 0xffffffff00000000 >> shift
3180 // %packed_elt |= (zext %val) << shift
3181 // %vr = insertelt %vr, %packed_elt, (%idx >> 1)
3184 SDValue Vec
= Op
.getOperand(0);
3185 SDValue Val
= Op
.getOperand(1);
3186 SDValue Idx
= Op
.getOperand(2);
3187 if (Idx
.getSimpleValueType() == MVT::i32
)
3188 Idx
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, Idx
);
3189 if (Val
.getSimpleValueType() == MVT::f32
)
3190 Val
= DAG
.getBitcast(MVT::i32
, Val
);
3191 assert(Val
.getSimpleValueType() == MVT::i32
);
3192 Val
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, Val
);
3194 SDValue Result
= Op
;
3195 if (false /* Idx->isConstant()*/) {
3196 // TODO: optimized implementation using constant values
3198 SDValue Const1
= DAG
.getConstant(1, DL
, MVT::i64
);
3199 SDValue HalfIdx
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, {Idx
, Const1
});
3201 SDValue(DAG
.getMachineNode(VE::LVSvr
, DL
, MVT::i64
, {Vec
, HalfIdx
}), 0);
3202 SDValue AndIdx
= DAG
.getNode(ISD::AND
, DL
, MVT::i64
, {Idx
, Const1
});
3203 SDValue Shift
= DAG
.getNode(ISD::XOR
, DL
, MVT::i64
, {AndIdx
, Const1
});
3204 SDValue Const5
= DAG
.getConstant(5, DL
, MVT::i64
);
3205 Shift
= DAG
.getNode(ISD::SHL
, DL
, MVT::i64
, {Shift
, Const5
});
3206 SDValue Mask
= DAG
.getConstant(0xFFFFFFFF00000000L
, DL
, MVT::i64
);
3207 Mask
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, {Mask
, Shift
});
3208 PackedElt
= DAG
.getNode(ISD::AND
, DL
, MVT::i64
, {PackedElt
, Mask
});
3209 Val
= DAG
.getNode(ISD::SHL
, DL
, MVT::i64
, {Val
, Shift
});
3210 PackedElt
= DAG
.getNode(ISD::OR
, DL
, MVT::i64
, {PackedElt
, Val
});
3212 SDValue(DAG
.getMachineNode(VE::LSVrr_v
, DL
, Vec
.getSimpleValueType(),
3213 {HalfIdx
, PackedElt
, Vec
}),