AMDGPU: Mark test as XFAIL in expensive_checks builds
[llvm-project.git] / llvm / lib / Target / VE / VEISelLowering.cpp
blob87c1625c11454ed02b6591aac989a296555cc381
1 //===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the interfaces that VE uses to lower LLVM code into a
10 // selection DAG.
12 //===----------------------------------------------------------------------===//
14 #include "VEISelLowering.h"
15 #include "MCTargetDesc/VEMCExpr.h"
16 #include "VECustomDAG.h"
17 #include "VEInstrBuilder.h"
18 #include "VEMachineFunctionInfo.h"
19 #include "VERegisterInfo.h"
20 #include "VETargetMachine.h"
21 #include "llvm/ADT/StringSwitch.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineJumpTableInfo.h"
27 #include "llvm/CodeGen/MachineModuleInfo.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/SelectionDAG.h"
30 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
31 #include "llvm/IR/DerivedTypes.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/Module.h"
35 #include "llvm/Support/ErrorHandling.h"
36 using namespace llvm;
38 #define DEBUG_TYPE "ve-lower"
40 //===----------------------------------------------------------------------===//
41 // Calling Convention Implementation
42 //===----------------------------------------------------------------------===//
44 #include "VEGenCallingConv.inc"
46 CCAssignFn *getReturnCC(CallingConv::ID CallConv) {
47 switch (CallConv) {
48 default:
49 return RetCC_VE_C;
50 case CallingConv::Fast:
51 return RetCC_VE_Fast;
55 CCAssignFn *getParamCC(CallingConv::ID CallConv, bool IsVarArg) {
56 if (IsVarArg)
57 return CC_VE2;
58 switch (CallConv) {
59 default:
60 return CC_VE_C;
61 case CallingConv::Fast:
62 return CC_VE_Fast;
66 bool VETargetLowering::CanLowerReturn(
67 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
68 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
69 CCAssignFn *RetCC = getReturnCC(CallConv);
70 SmallVector<CCValAssign, 16> RVLocs;
71 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
72 return CCInfo.CheckReturn(Outs, RetCC);
75 static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64,
76 MVT::v256f32, MVT::v512f32, MVT::v256f64};
78 static const MVT AllMaskVTs[] = {MVT::v256i1, MVT::v512i1};
80 static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32};
82 void VETargetLowering::initRegisterClasses() {
83 // Set up the register classes.
84 addRegisterClass(MVT::i32, &VE::I32RegClass);
85 addRegisterClass(MVT::i64, &VE::I64RegClass);
86 addRegisterClass(MVT::f32, &VE::F32RegClass);
87 addRegisterClass(MVT::f64, &VE::I64RegClass);
88 addRegisterClass(MVT::f128, &VE::F128RegClass);
90 if (Subtarget->enableVPU()) {
91 for (MVT VecVT : AllVectorVTs)
92 addRegisterClass(VecVT, &VE::V64RegClass);
93 addRegisterClass(MVT::v256i1, &VE::VMRegClass);
94 addRegisterClass(MVT::v512i1, &VE::VM512RegClass);
98 void VETargetLowering::initSPUActions() {
99 const auto &TM = getTargetMachine();
100 /// Load & Store {
102 // VE doesn't have i1 sign extending load.
103 for (MVT VT : MVT::integer_valuetypes()) {
104 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
105 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
106 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
107 setTruncStoreAction(VT, MVT::i1, Expand);
110 // VE doesn't have floating point extload/truncstore, so expand them.
111 for (MVT FPVT : MVT::fp_valuetypes()) {
112 for (MVT OtherFPVT : MVT::fp_valuetypes()) {
113 setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);
114 setTruncStoreAction(FPVT, OtherFPVT, Expand);
118 // VE doesn't have fp128 load/store, so expand them in custom lower.
119 setOperationAction(ISD::LOAD, MVT::f128, Custom);
120 setOperationAction(ISD::STORE, MVT::f128, Custom);
122 /// } Load & Store
124 // Custom legalize address nodes into LO/HI parts.
125 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
126 setOperationAction(ISD::BlockAddress, PtrVT, Custom);
127 setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
128 setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
129 setOperationAction(ISD::ConstantPool, PtrVT, Custom);
130 setOperationAction(ISD::JumpTable, PtrVT, Custom);
132 /// VAARG handling {
133 setOperationAction(ISD::VASTART, MVT::Other, Custom);
134 // VAARG needs to be lowered to access with 8 bytes alignment.
135 setOperationAction(ISD::VAARG, MVT::Other, Custom);
136 // Use the default implementation.
137 setOperationAction(ISD::VACOPY, MVT::Other, Expand);
138 setOperationAction(ISD::VAEND, MVT::Other, Expand);
139 /// } VAARG handling
141 /// Stack {
142 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
143 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
145 // Use the default implementation.
146 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
147 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
148 /// } Stack
150 /// Branch {
152 // VE doesn't have BRCOND
153 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
155 // BR_JT is not implemented yet.
156 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
158 /// } Branch
160 /// Int Ops {
161 for (MVT IntVT : {MVT::i32, MVT::i64}) {
162 // VE has no REM or DIVREM operations.
163 setOperationAction(ISD::UREM, IntVT, Expand);
164 setOperationAction(ISD::SREM, IntVT, Expand);
165 setOperationAction(ISD::SDIVREM, IntVT, Expand);
166 setOperationAction(ISD::UDIVREM, IntVT, Expand);
168 // VE has no SHL_PARTS/SRA_PARTS/SRL_PARTS operations.
169 setOperationAction(ISD::SHL_PARTS, IntVT, Expand);
170 setOperationAction(ISD::SRA_PARTS, IntVT, Expand);
171 setOperationAction(ISD::SRL_PARTS, IntVT, Expand);
173 // VE has no MULHU/S or U/SMUL_LOHI operations.
174 // TODO: Use MPD instruction to implement SMUL_LOHI for i32 type.
175 setOperationAction(ISD::MULHU, IntVT, Expand);
176 setOperationAction(ISD::MULHS, IntVT, Expand);
177 setOperationAction(ISD::UMUL_LOHI, IntVT, Expand);
178 setOperationAction(ISD::SMUL_LOHI, IntVT, Expand);
180 // VE has no CTTZ, ROTL, ROTR operations.
181 setOperationAction(ISD::CTTZ, IntVT, Expand);
182 setOperationAction(ISD::ROTL, IntVT, Expand);
183 setOperationAction(ISD::ROTR, IntVT, Expand);
185 // VE has 64 bits instruction which works as i64 BSWAP operation. This
186 // instruction works fine as i32 BSWAP operation with an additional
187 // parameter. Use isel patterns to lower BSWAP.
188 setOperationAction(ISD::BSWAP, IntVT, Legal);
190 // VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP
191 // operations. Use isel patterns for i64, promote for i32.
192 LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
193 setOperationAction(ISD::BITREVERSE, IntVT, Act);
194 setOperationAction(ISD::CTLZ, IntVT, Act);
195 setOperationAction(ISD::CTLZ_ZERO_UNDEF, IntVT, Act);
196 setOperationAction(ISD::CTPOP, IntVT, Act);
198 // VE has only 64 bits instructions which work as i64 AND/OR/XOR operations.
199 // Use isel patterns for i64, promote for i32.
200 setOperationAction(ISD::AND, IntVT, Act);
201 setOperationAction(ISD::OR, IntVT, Act);
202 setOperationAction(ISD::XOR, IntVT, Act);
204 // Legal smax and smin
205 setOperationAction(ISD::SMAX, IntVT, Legal);
206 setOperationAction(ISD::SMIN, IntVT, Legal);
208 /// } Int Ops
210 /// Conversion {
211 // VE doesn't have instructions for fp<->uint, so expand them by llvm
212 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64
213 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64
214 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
215 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
217 // fp16 not supported
218 for (MVT FPVT : MVT::fp_valuetypes()) {
219 setOperationAction(ISD::FP16_TO_FP, FPVT, Expand);
220 setOperationAction(ISD::FP_TO_FP16, FPVT, Expand);
222 /// } Conversion
224 /// Floating-point Ops {
225 /// Note: Floating-point operations are fneg, fadd, fsub, fmul, fdiv, frem,
226 /// and fcmp.
228 // VE doesn't have following floating point operations.
229 for (MVT VT : MVT::fp_valuetypes()) {
230 setOperationAction(ISD::FNEG, VT, Expand);
231 setOperationAction(ISD::FREM, VT, Expand);
234 // VE doesn't have fdiv of f128.
235 setOperationAction(ISD::FDIV, MVT::f128, Expand);
237 for (MVT FPVT : {MVT::f32, MVT::f64}) {
238 // f32 and f64 uses ConstantFP. f128 uses ConstantPool.
239 setOperationAction(ISD::ConstantFP, FPVT, Legal);
241 /// } Floating-point Ops
243 /// Floating-point math functions {
245 // VE doesn't have following floating point math functions.
246 for (MVT VT : MVT::fp_valuetypes()) {
247 setOperationAction(ISD::FABS, VT, Expand);
248 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
249 setOperationAction(ISD::FCOS, VT, Expand);
250 setOperationAction(ISD::FMA, VT, Expand);
251 setOperationAction(ISD::FPOW, VT, Expand);
252 setOperationAction(ISD::FSIN, VT, Expand);
253 setOperationAction(ISD::FSQRT, VT, Expand);
256 // VE has single and double FMINNUM and FMAXNUM
257 for (MVT VT : {MVT::f32, MVT::f64}) {
258 setOperationAction({ISD::FMAXNUM, ISD::FMINNUM}, VT, Legal);
261 /// } Floating-point math functions
263 /// Atomic instructions {
265 setMaxAtomicSizeInBitsSupported(64);
266 setMinCmpXchgSizeInBits(32);
267 setSupportsUnalignedAtomics(false);
269 // Use custom inserter for ATOMIC_FENCE.
270 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
272 // Other atomic instructions.
273 for (MVT VT : MVT::integer_valuetypes()) {
274 // Support i8/i16 atomic swap.
275 setOperationAction(ISD::ATOMIC_SWAP, VT, Custom);
277 // FIXME: Support "atmam" instructions.
278 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Expand);
279 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Expand);
280 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Expand);
281 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Expand);
283 // VE doesn't have follwing instructions.
284 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand);
285 setOperationAction(ISD::ATOMIC_LOAD_CLR, VT, Expand);
286 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Expand);
287 setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand);
288 setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand);
289 setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand);
290 setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand);
291 setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand);
294 /// } Atomic instructions
296 /// SJLJ instructions {
297 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
298 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
299 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
300 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
301 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
302 /// } SJLJ instructions
304 // Intrinsic instructions
305 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
308 void VETargetLowering::initVPUActions() {
309 for (MVT LegalMaskVT : AllMaskVTs)
310 setOperationAction(ISD::BUILD_VECTOR, LegalMaskVT, Custom);
312 for (unsigned Opc : {ISD::AND, ISD::OR, ISD::XOR})
313 setOperationAction(Opc, MVT::v512i1, Custom);
315 for (MVT LegalVecVT : AllVectorVTs) {
316 setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom);
317 setOperationAction(ISD::INSERT_VECTOR_ELT, LegalVecVT, Legal);
318 setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalVecVT, Legal);
319 // Translate all vector instructions with legal element types to VVP_*
320 // nodes.
321 // TODO We will custom-widen into VVP_* nodes in the future. While we are
322 // buildling the infrastructure for this, we only do this for legal vector
323 // VTs.
324 #define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME) \
325 setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);
326 #define ADD_VVP_OP(VVP_NAME, ISD_NAME) \
327 setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
328 setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_LOAD, LegalVecVT, Custom);
329 setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_STORE, LegalVecVT, Custom);
330 #include "VVPNodes.def"
333 for (MVT LegalPackedVT : AllPackedVTs) {
334 setOperationAction(ISD::INSERT_VECTOR_ELT, LegalPackedVT, Custom);
335 setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalPackedVT, Custom);
338 // vNt32, vNt64 ops (legal element types)
339 for (MVT VT : MVT::vector_valuetypes()) {
340 MVT ElemVT = VT.getVectorElementType();
341 unsigned ElemBits = ElemVT.getScalarSizeInBits();
342 if (ElemBits != 32 && ElemBits != 64)
343 continue;
345 for (unsigned MemOpc : {ISD::MLOAD, ISD::MSTORE, ISD::LOAD, ISD::STORE})
346 setOperationAction(MemOpc, VT, Custom);
348 const ISD::NodeType IntReductionOCs[] = {
349 ISD::VECREDUCE_ADD, ISD::VECREDUCE_MUL, ISD::VECREDUCE_AND,
350 ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMIN,
351 ISD::VECREDUCE_SMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_UMAX};
353 for (unsigned IntRedOpc : IntReductionOCs)
354 setOperationAction(IntRedOpc, VT, Custom);
357 // v256i1 and v512i1 ops
358 for (MVT MaskVT : AllMaskVTs) {
359 // Custom lower mask ops
360 setOperationAction(ISD::STORE, MaskVT, Custom);
361 setOperationAction(ISD::LOAD, MaskVT, Custom);
365 SDValue
366 VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
367 bool IsVarArg,
368 const SmallVectorImpl<ISD::OutputArg> &Outs,
369 const SmallVectorImpl<SDValue> &OutVals,
370 const SDLoc &DL, SelectionDAG &DAG) const {
371 // CCValAssign - represent the assignment of the return value to locations.
372 SmallVector<CCValAssign, 16> RVLocs;
374 // CCState - Info about the registers and stack slot.
375 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
376 *DAG.getContext());
378 // Analyze return values.
379 CCInfo.AnalyzeReturn(Outs, getReturnCC(CallConv));
381 SDValue Glue;
382 SmallVector<SDValue, 4> RetOps(1, Chain);
384 // Copy the result values into the output registers.
385 for (unsigned i = 0; i != RVLocs.size(); ++i) {
386 CCValAssign &VA = RVLocs[i];
387 assert(VA.isRegLoc() && "Can only return in registers!");
388 assert(!VA.needsCustom() && "Unexpected custom lowering");
389 SDValue OutVal = OutVals[i];
391 // Integer return values must be sign or zero extended by the callee.
392 switch (VA.getLocInfo()) {
393 case CCValAssign::Full:
394 break;
395 case CCValAssign::SExt:
396 OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);
397 break;
398 case CCValAssign::ZExt:
399 OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);
400 break;
401 case CCValAssign::AExt:
402 OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
403 break;
404 case CCValAssign::BCvt: {
405 // Convert a float return value to i64 with padding.
406 // 63 31 0
407 // +------+------+
408 // | float| 0 |
409 // +------+------+
410 assert(VA.getLocVT() == MVT::i64);
411 assert(VA.getValVT() == MVT::f32);
412 SDValue Undef = SDValue(
413 DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
414 SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
415 OutVal = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
416 MVT::i64, Undef, OutVal, Sub_f32),
418 break;
420 default:
421 llvm_unreachable("Unknown loc info!");
424 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Glue);
426 // Guarantee that all emitted copies are stuck together with flags.
427 Glue = Chain.getValue(1);
428 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
431 RetOps[0] = Chain; // Update chain.
433 // Add the glue if we have it.
434 if (Glue.getNode())
435 RetOps.push_back(Glue);
437 return DAG.getNode(VEISD::RET_GLUE, DL, MVT::Other, RetOps);
440 SDValue VETargetLowering::LowerFormalArguments(
441 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
442 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
443 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
444 MachineFunction &MF = DAG.getMachineFunction();
446 // Get the base offset of the incoming arguments stack space.
447 unsigned ArgsBaseOffset = Subtarget->getRsaSize();
448 // Get the size of the preserved arguments area
449 unsigned ArgsPreserved = 64;
451 // Analyze arguments according to CC_VE.
452 SmallVector<CCValAssign, 16> ArgLocs;
453 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
454 *DAG.getContext());
455 // Allocate the preserved area first.
456 CCInfo.AllocateStack(ArgsPreserved, Align(8));
457 // We already allocated the preserved area, so the stack offset computed
458 // by CC_VE would be correct now.
459 CCInfo.AnalyzeFormalArguments(Ins, getParamCC(CallConv, false));
461 for (const CCValAssign &VA : ArgLocs) {
462 assert(!VA.needsCustom() && "Unexpected custom lowering");
463 if (VA.isRegLoc()) {
464 // This argument is passed in a register.
465 // All integer register arguments are promoted by the caller to i64.
467 // Create a virtual register for the promoted live-in value.
468 Register VReg =
469 MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT()));
470 SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
472 // The caller promoted the argument, so insert an Assert?ext SDNode so we
473 // won't promote the value again in this function.
474 switch (VA.getLocInfo()) {
475 case CCValAssign::SExt:
476 Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg,
477 DAG.getValueType(VA.getValVT()));
478 break;
479 case CCValAssign::ZExt:
480 Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,
481 DAG.getValueType(VA.getValVT()));
482 break;
483 case CCValAssign::BCvt: {
484 // Extract a float argument from i64 with padding.
485 // 63 31 0
486 // +------+------+
487 // | float| 0 |
488 // +------+------+
489 assert(VA.getLocVT() == MVT::i64);
490 assert(VA.getValVT() == MVT::f32);
491 SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
492 Arg = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
493 MVT::f32, Arg, Sub_f32),
495 break;
497 default:
498 break;
501 // Truncate the register down to the argument type.
502 if (VA.isExtInLoc())
503 Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
505 InVals.push_back(Arg);
506 continue;
509 // The registers are exhausted. This argument was passed on the stack.
510 assert(VA.isMemLoc());
511 // The CC_VE_Full/Half functions compute stack offsets relative to the
512 // beginning of the arguments area at %fp + the size of reserved area.
513 unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
514 unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
516 // Adjust offset for a float argument by adding 4 since the argument is
517 // stored in 8 bytes buffer with offset like below. LLVM generates
518 // 4 bytes load instruction, so need to adjust offset here. This
519 // adjustment is required in only LowerFormalArguments. In LowerCall,
520 // a float argument is converted to i64 first, and stored as 8 bytes
521 // data, which is required by ABI, so no need for adjustment.
522 // 0 4
523 // +------+------+
524 // | empty| float|
525 // +------+------+
526 if (VA.getValVT() == MVT::f32)
527 Offset += 4;
529 int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true);
530 InVals.push_back(
531 DAG.getLoad(VA.getValVT(), DL, Chain,
532 DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),
533 MachinePointerInfo::getFixedStack(MF, FI)));
536 if (!IsVarArg)
537 return Chain;
539 // This function takes variable arguments, some of which may have been passed
540 // in registers %s0-%s8.
542 // The va_start intrinsic needs to know the offset to the first variable
543 // argument.
544 // TODO: need to calculate offset correctly once we support f128.
545 unsigned ArgOffset = ArgLocs.size() * 8;
546 VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
547 // Skip the reserved area at the top of stack.
548 FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
550 return Chain;
553 // FIXME? Maybe this could be a TableGen attribute on some registers and
554 // this table could be generated automatically from RegInfo.
555 Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
556 const MachineFunction &MF) const {
557 Register Reg = StringSwitch<Register>(RegName)
558 .Case("sp", VE::SX11) // Stack pointer
559 .Case("fp", VE::SX9) // Frame pointer
560 .Case("sl", VE::SX8) // Stack limit
561 .Case("lr", VE::SX10) // Link register
562 .Case("tp", VE::SX14) // Thread pointer
563 .Case("outer", VE::SX12) // Outer regiser
564 .Case("info", VE::SX17) // Info area register
565 .Case("got", VE::SX15) // Global offset table register
566 .Case("plt", VE::SX16) // Procedure linkage table register
567 .Default(0);
569 if (Reg)
570 return Reg;
572 report_fatal_error("Invalid register name global variable");
575 //===----------------------------------------------------------------------===//
576 // TargetLowering Implementation
577 //===----------------------------------------------------------------------===//
579 SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
580 SmallVectorImpl<SDValue> &InVals) const {
581 SelectionDAG &DAG = CLI.DAG;
582 SDLoc DL = CLI.DL;
583 SDValue Chain = CLI.Chain;
584 auto PtrVT = getPointerTy(DAG.getDataLayout());
586 // VE target does not yet support tail call optimization.
587 CLI.IsTailCall = false;
589 // Get the base offset of the outgoing arguments stack space.
590 unsigned ArgsBaseOffset = Subtarget->getRsaSize();
591 // Get the size of the preserved arguments area
592 unsigned ArgsPreserved = 8 * 8u;
594 // Analyze operands of the call, assigning locations to each operand.
595 SmallVector<CCValAssign, 16> ArgLocs;
596 CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
597 *DAG.getContext());
598 // Allocate the preserved area first.
599 CCInfo.AllocateStack(ArgsPreserved, Align(8));
600 // We already allocated the preserved area, so the stack offset computed
601 // by CC_VE would be correct now.
602 CCInfo.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, false));
604 // VE requires to use both register and stack for varargs or no-prototyped
605 // functions.
606 bool UseBoth = CLI.IsVarArg;
608 // Analyze operands again if it is required to store BOTH.
609 SmallVector<CCValAssign, 16> ArgLocs2;
610 CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
611 ArgLocs2, *DAG.getContext());
612 if (UseBoth)
613 CCInfo2.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, true));
615 // Get the size of the outgoing arguments stack space requirement.
616 unsigned ArgsSize = CCInfo.getStackSize();
618 // Keep stack frames 16-byte aligned.
619 ArgsSize = alignTo(ArgsSize, 16);
621 // Adjust the stack pointer to make room for the arguments.
622 // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
623 // with more than 6 arguments.
624 Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL);
626 // Collect the set of registers to pass to the function and their values.
627 // This will be emitted as a sequence of CopyToReg nodes glued to the call
628 // instruction.
629 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
631 // Collect chains from all the memory opeations that copy arguments to the
632 // stack. They must follow the stack pointer adjustment above and precede the
633 // call instruction itself.
634 SmallVector<SDValue, 8> MemOpChains;
636 // VE needs to get address of callee function in a register
637 // So, prepare to copy it to SX12 here.
639 // If the callee is a GlobalAddress node (quite common, every direct call is)
640 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
641 // Likewise ExternalSymbol -> TargetExternalSymbol.
642 SDValue Callee = CLI.Callee;
644 bool IsPICCall = isPositionIndependent();
646 // PC-relative references to external symbols should go through $stub.
647 // If so, we need to prepare GlobalBaseReg first.
648 const TargetMachine &TM = DAG.getTarget();
649 const GlobalValue *GV = nullptr;
650 auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Callee);
651 if (CalleeG)
652 GV = CalleeG->getGlobal();
653 bool Local = TM.shouldAssumeDSOLocal(GV);
654 bool UsePlt = !Local;
655 MachineFunction &MF = DAG.getMachineFunction();
657 // Turn GlobalAddress/ExternalSymbol node into a value node
658 // containing the address of them here.
659 if (CalleeG) {
660 if (IsPICCall) {
661 if (UsePlt)
662 Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
663 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
664 Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
665 } else {
666 Callee =
667 makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
669 } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
670 if (IsPICCall) {
671 if (UsePlt)
672 Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
673 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
674 Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
675 } else {
676 Callee =
677 makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
681 RegsToPass.push_back(std::make_pair(VE::SX12, Callee));
683 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
684 CCValAssign &VA = ArgLocs[i];
685 SDValue Arg = CLI.OutVals[i];
687 // Promote the value if needed.
688 switch (VA.getLocInfo()) {
689 default:
690 llvm_unreachable("Unknown location info!");
691 case CCValAssign::Full:
692 break;
693 case CCValAssign::SExt:
694 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
695 break;
696 case CCValAssign::ZExt:
697 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
698 break;
699 case CCValAssign::AExt:
700 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
701 break;
702 case CCValAssign::BCvt: {
703 // Convert a float argument to i64 with padding.
704 // 63 31 0
705 // +------+------+
706 // | float| 0 |
707 // +------+------+
708 assert(VA.getLocVT() == MVT::i64);
709 assert(VA.getValVT() == MVT::f32);
710 SDValue Undef = SDValue(
711 DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
712 SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
713 Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
714 MVT::i64, Undef, Arg, Sub_f32),
716 break;
720 if (VA.isRegLoc()) {
721 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
722 if (!UseBoth)
723 continue;
724 VA = ArgLocs2[i];
727 assert(VA.isMemLoc());
729 // Create a store off the stack pointer for this argument.
730 SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT);
731 // The argument area starts at %fp/%sp + the size of reserved area.
732 SDValue PtrOff =
733 DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL);
734 PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
735 MemOpChains.push_back(
736 DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()));
739 // Emit all stores, make sure they occur before the call.
740 if (!MemOpChains.empty())
741 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
743 // Build a sequence of CopyToReg nodes glued together with token chain and
744 // glue operands which copy the outgoing args into registers. The InGlue is
745 // necessary since all emitted instructions must be stuck together in order
746 // to pass the live physical registers.
747 SDValue InGlue;
748 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
749 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
750 RegsToPass[i].second, InGlue);
751 InGlue = Chain.getValue(1);
754 // Build the operands for the call instruction itself.
755 SmallVector<SDValue, 8> Ops;
756 Ops.push_back(Chain);
757 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
758 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
759 RegsToPass[i].second.getValueType()));
761 // Add a register mask operand representing the call-preserved registers.
762 const VERegisterInfo *TRI = Subtarget->getRegisterInfo();
763 const uint32_t *Mask =
764 TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv);
765 assert(Mask && "Missing call preserved mask for calling convention");
766 Ops.push_back(DAG.getRegisterMask(Mask));
768 // Make sure the CopyToReg nodes are glued to the call instruction which
769 // consumes the registers.
770 if (InGlue.getNode())
771 Ops.push_back(InGlue);
773 // Now the call itself.
774 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
775 Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops);
776 InGlue = Chain.getValue(1);
778 // Revert the stack pointer immediately after the call.
779 Chain = DAG.getCALLSEQ_END(Chain, ArgsSize, 0, InGlue, DL);
780 InGlue = Chain.getValue(1);
782 // Now extract the return values. This is more or less the same as
783 // LowerFormalArguments.
785 // Assign locations to each value returned by this call.
786 SmallVector<CCValAssign, 16> RVLocs;
787 CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
788 *DAG.getContext());
790 // Set inreg flag manually for codegen generated library calls that
791 // return float.
792 if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB)
793 CLI.Ins[0].Flags.setInReg();
795 RVInfo.AnalyzeCallResult(CLI.Ins, getReturnCC(CLI.CallConv));
797 // Copy all of the result registers out of their specified physreg.
798 for (unsigned i = 0; i != RVLocs.size(); ++i) {
799 CCValAssign &VA = RVLocs[i];
800 assert(!VA.needsCustom() && "Unexpected custom lowering");
801 Register Reg = VA.getLocReg();
803 // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
804 // reside in the same register in the high and low bits. Reuse the
805 // CopyFromReg previous node to avoid duplicate copies.
806 SDValue RV;
807 if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))
808 if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
809 RV = Chain.getValue(0);
811 // But usually we'll create a new CopyFromReg for a different register.
812 if (!RV.getNode()) {
813 RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
814 Chain = RV.getValue(1);
815 InGlue = Chain.getValue(2);
818 // The callee promoted the return value, so insert an Assert?ext SDNode so
819 // we won't promote the value again in this function.
820 switch (VA.getLocInfo()) {
821 case CCValAssign::SExt:
822 RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
823 DAG.getValueType(VA.getValVT()));
824 break;
825 case CCValAssign::ZExt:
826 RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
827 DAG.getValueType(VA.getValVT()));
828 break;
829 case CCValAssign::BCvt: {
830 // Extract a float return value from i64 with padding.
831 // 63 31 0
832 // +------+------+
833 // | float| 0 |
834 // +------+------+
835 assert(VA.getLocVT() == MVT::i64);
836 assert(VA.getValVT() == MVT::f32);
837 SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
838 RV = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
839 MVT::f32, RV, Sub_f32),
841 break;
843 default:
844 break;
847 // Truncate the register down to the return value type.
848 if (VA.isExtInLoc())
849 RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);
851 InVals.push_back(RV);
854 return Chain;
857 bool VETargetLowering::isOffsetFoldingLegal(
858 const GlobalAddressSDNode *GA) const {
859 // VE uses 64 bit addressing, so we need multiple instructions to generate
860 // an address. Folding address with offset increases the number of
861 // instructions, so that we disable it here. Offsets will be folded in
862 // the DAG combine later if it worth to do so.
863 return false;
866 /// isFPImmLegal - Returns true if the target can instruction select the
867 /// specified FP immediate natively. If false, the legalizer will
868 /// materialize the FP immediate as a load from a constant pool.
869 bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
870 bool ForCodeSize) const {
871 return VT == MVT::f32 || VT == MVT::f64;
874 /// Determine if the target supports unaligned memory accesses.
876 /// This function returns true if the target allows unaligned memory accesses
877 /// of the specified type in the given address space. If true, it also returns
878 /// whether the unaligned memory access is "fast" in the last argument by
879 /// reference. This is used, for example, in situations where an array
880 /// copy/move/set is converted to a sequence of store operations. Its use
881 /// helps to ensure that such replacements don't generate code that causes an
882 /// alignment error (trap) on the target machine.
883 bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
884 unsigned AddrSpace,
885 Align A,
886 MachineMemOperand::Flags,
887 unsigned *Fast) const {
888 if (Fast) {
889 // It's fast anytime on VE
890 *Fast = 1;
892 return true;
895 VETargetLowering::VETargetLowering(const TargetMachine &TM,
896 const VESubtarget &STI)
897 : TargetLowering(TM), Subtarget(&STI) {
898 // Instructions which use registers as conditionals examine all the
899 // bits (as does the pseudo SELECT_CC expansion). I don't think it
900 // matters much whether it's ZeroOrOneBooleanContent, or
901 // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
902 // former.
903 setBooleanContents(ZeroOrOneBooleanContent);
904 setBooleanVectorContents(ZeroOrOneBooleanContent);
906 initRegisterClasses();
907 initSPUActions();
908 initVPUActions();
910 setStackPointerRegisterToSaveRestore(VE::SX11);
912 // We have target-specific dag combine patterns for the following nodes:
913 setTargetDAGCombine(ISD::TRUNCATE);
914 setTargetDAGCombine(ISD::SELECT);
915 setTargetDAGCombine(ISD::SELECT_CC);
917 // Set function alignment to 16 bytes
918 setMinFunctionAlignment(Align(16));
920 // VE stores all argument by 8 bytes alignment
921 setMinStackArgumentAlignment(Align(8));
923 computeRegisterProperties(Subtarget->getRegisterInfo());
926 const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
927 #define TARGET_NODE_CASE(NAME) \
928 case VEISD::NAME: \
929 return "VEISD::" #NAME;
930 switch ((VEISD::NodeType)Opcode) {
931 case VEISD::FIRST_NUMBER:
932 break;
933 TARGET_NODE_CASE(CMPI)
934 TARGET_NODE_CASE(CMPU)
935 TARGET_NODE_CASE(CMPF)
936 TARGET_NODE_CASE(CMPQ)
937 TARGET_NODE_CASE(CMOV)
938 TARGET_NODE_CASE(CALL)
939 TARGET_NODE_CASE(EH_SJLJ_LONGJMP)
940 TARGET_NODE_CASE(EH_SJLJ_SETJMP)
941 TARGET_NODE_CASE(EH_SJLJ_SETUP_DISPATCH)
942 TARGET_NODE_CASE(GETFUNPLT)
943 TARGET_NODE_CASE(GETSTACKTOP)
944 TARGET_NODE_CASE(GETTLSADDR)
945 TARGET_NODE_CASE(GLOBAL_BASE_REG)
946 TARGET_NODE_CASE(Hi)
947 TARGET_NODE_CASE(Lo)
948 TARGET_NODE_CASE(RET_GLUE)
949 TARGET_NODE_CASE(TS1AM)
950 TARGET_NODE_CASE(VEC_UNPACK_LO)
951 TARGET_NODE_CASE(VEC_UNPACK_HI)
952 TARGET_NODE_CASE(VEC_PACK)
953 TARGET_NODE_CASE(VEC_BROADCAST)
954 TARGET_NODE_CASE(REPL_I32)
955 TARGET_NODE_CASE(REPL_F32)
957 TARGET_NODE_CASE(LEGALAVL)
959 // Register the VVP_* SDNodes.
960 #define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
961 #include "VVPNodes.def"
963 #undef TARGET_NODE_CASE
964 return nullptr;
967 EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
968 EVT VT) const {
969 return MVT::i32;
972 // Convert to a target node and set target flags.
973 SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
974 SelectionDAG &DAG) const {
975 if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
976 return DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
977 GA->getValueType(0), GA->getOffset(), TF);
979 if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op))
980 return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(),
981 0, TF);
983 if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op))
984 return DAG.getTargetConstantPool(CP->getConstVal(), CP->getValueType(0),
985 CP->getAlign(), CP->getOffset(), TF);
987 if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
988 return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
989 TF);
991 if (const JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op))
992 return DAG.getTargetJumpTable(JT->getIndex(), JT->getValueType(0), TF);
994 llvm_unreachable("Unhandled address SDNode");
997 // Split Op into high and low parts according to HiTF and LoTF.
998 // Return an ADD node combining the parts.
999 SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
1000 SelectionDAG &DAG) const {
1001 SDLoc DL(Op);
1002 EVT VT = Op.getValueType();
1003 SDValue Hi = DAG.getNode(VEISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG));
1004 SDValue Lo = DAG.getNode(VEISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG));
1005 return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
1008 // Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
1009 // or ExternalSymbol SDNode.
1010 SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
1011 SDLoc DL(Op);
1012 EVT PtrVT = Op.getValueType();
1014 // Handle PIC mode first. VE needs a got load for every variable!
1015 if (isPositionIndependent()) {
1016 auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op);
1018 if (isa<ConstantPoolSDNode>(Op) || isa<JumpTableSDNode>(Op) ||
1019 (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
1020 // Create following instructions for local linkage PIC code.
1021 // lea %reg, label@gotoff_lo
1022 // and %reg, %reg, (32)0
1023 // lea.sl %reg, label@gotoff_hi(%reg, %got)
1024 SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
1025 VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
1026 SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
1027 return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
1029 // Create following instructions for not local linkage PIC code.
1030 // lea %reg, label@got_lo
1031 // and %reg, %reg, (32)0
1032 // lea.sl %reg, label@got_hi(%reg)
1033 // ld %reg, (%reg, %got)
1034 SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32,
1035 VEMCExpr::VK_VE_GOT_LO32, DAG);
1036 SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
1037 SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
1038 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), AbsAddr,
1039 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
1042 // This is one of the absolute code models.
1043 switch (getTargetMachine().getCodeModel()) {
1044 default:
1045 llvm_unreachable("Unsupported absolute code model");
1046 case CodeModel::Small:
1047 case CodeModel::Medium:
1048 case CodeModel::Large:
1049 // abs64.
1050 return makeHiLoPair(Op, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
1054 /// Custom Lower {
1056 // The mappings for emitLeading/TrailingFence for VE is designed by following
1057 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
1058 Instruction *VETargetLowering::emitLeadingFence(IRBuilderBase &Builder,
1059 Instruction *Inst,
1060 AtomicOrdering Ord) const {
1061 switch (Ord) {
1062 case AtomicOrdering::NotAtomic:
1063 case AtomicOrdering::Unordered:
1064 llvm_unreachable("Invalid fence: unordered/non-atomic");
1065 case AtomicOrdering::Monotonic:
1066 case AtomicOrdering::Acquire:
1067 return nullptr; // Nothing to do
1068 case AtomicOrdering::Release:
1069 case AtomicOrdering::AcquireRelease:
1070 return Builder.CreateFence(AtomicOrdering::Release);
1071 case AtomicOrdering::SequentiallyConsistent:
1072 if (!Inst->hasAtomicStore())
1073 return nullptr; // Nothing to do
1074 return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
1076 llvm_unreachable("Unknown fence ordering in emitLeadingFence");
1079 Instruction *VETargetLowering::emitTrailingFence(IRBuilderBase &Builder,
1080 Instruction *Inst,
1081 AtomicOrdering Ord) const {
1082 switch (Ord) {
1083 case AtomicOrdering::NotAtomic:
1084 case AtomicOrdering::Unordered:
1085 llvm_unreachable("Invalid fence: unordered/not-atomic");
1086 case AtomicOrdering::Monotonic:
1087 case AtomicOrdering::Release:
1088 return nullptr; // Nothing to do
1089 case AtomicOrdering::Acquire:
1090 case AtomicOrdering::AcquireRelease:
1091 return Builder.CreateFence(AtomicOrdering::Acquire);
1092 case AtomicOrdering::SequentiallyConsistent:
1093 return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
1095 llvm_unreachable("Unknown fence ordering in emitTrailingFence");
1098 SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
1099 SelectionDAG &DAG) const {
1100 SDLoc DL(Op);
1101 AtomicOrdering FenceOrdering =
1102 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
1103 SyncScope::ID FenceSSID =
1104 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
1106 // VE uses Release consistency, so need a fence instruction if it is a
1107 // cross-thread fence.
1108 if (FenceSSID == SyncScope::System) {
1109 switch (FenceOrdering) {
1110 case AtomicOrdering::NotAtomic:
1111 case AtomicOrdering::Unordered:
1112 case AtomicOrdering::Monotonic:
1113 // No need to generate fencem instruction here.
1114 break;
1115 case AtomicOrdering::Acquire:
1116 // Generate "fencem 2" as acquire fence.
1117 return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1118 DAG.getTargetConstant(2, DL, MVT::i32),
1119 Op.getOperand(0)),
1121 case AtomicOrdering::Release:
1122 // Generate "fencem 1" as release fence.
1123 return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1124 DAG.getTargetConstant(1, DL, MVT::i32),
1125 Op.getOperand(0)),
1127 case AtomicOrdering::AcquireRelease:
1128 case AtomicOrdering::SequentiallyConsistent:
1129 // Generate "fencem 3" as acq_rel and seq_cst fence.
1130 // FIXME: "fencem 3" doesn't wait for PCIe deveices accesses,
1131 // so seq_cst may require more instruction for them.
1132 return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1133 DAG.getTargetConstant(3, DL, MVT::i32),
1134 Op.getOperand(0)),
1139 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1140 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
1143 TargetLowering::AtomicExpansionKind
1144 VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
1145 // We have TS1AM implementation for i8/i16/i32/i64, so use it.
1146 if (AI->getOperation() == AtomicRMWInst::Xchg) {
1147 return AtomicExpansionKind::None;
1149 // FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.
1151 // Otherwise, expand it using compare and exchange instruction to not call
1152 // __sync_fetch_and_* functions.
1153 return AtomicExpansionKind::CmpXChg;
1156 static SDValue prepareTS1AM(SDValue Op, SelectionDAG &DAG, SDValue &Flag,
1157 SDValue &Bits) {
1158 SDLoc DL(Op);
1159 AtomicSDNode *N = cast<AtomicSDNode>(Op);
1160 SDValue Ptr = N->getOperand(1);
1161 SDValue Val = N->getOperand(2);
1162 EVT PtrVT = Ptr.getValueType();
1163 bool Byte = N->getMemoryVT() == MVT::i8;
1164 // Remainder = AND Ptr, 3
1165 // Flag = 1 << Remainder ; If Byte is true (1 byte swap flag)
1166 // Flag = 3 << Remainder ; If Byte is false (2 bytes swap flag)
1167 // Bits = Remainder << 3
1168 // NewVal = Val << Bits
1169 SDValue Const3 = DAG.getConstant(3, DL, PtrVT);
1170 SDValue Remainder = DAG.getNode(ISD::AND, DL, PtrVT, {Ptr, Const3});
1171 SDValue Mask = Byte ? DAG.getConstant(1, DL, MVT::i32)
1172 : DAG.getConstant(3, DL, MVT::i32);
1173 Flag = DAG.getNode(ISD::SHL, DL, MVT::i32, {Mask, Remainder});
1174 Bits = DAG.getNode(ISD::SHL, DL, PtrVT, {Remainder, Const3});
1175 return DAG.getNode(ISD::SHL, DL, Val.getValueType(), {Val, Bits});
1178 static SDValue finalizeTS1AM(SDValue Op, SelectionDAG &DAG, SDValue Data,
1179 SDValue Bits) {
1180 SDLoc DL(Op);
1181 EVT VT = Data.getValueType();
1182 bool Byte = cast<AtomicSDNode>(Op)->getMemoryVT() == MVT::i8;
1183 // NewData = Data >> Bits
1184 // Result = NewData & 0xff ; If Byte is true (1 byte)
1185 // Result = NewData & 0xffff ; If Byte is false (2 bytes)
1187 SDValue NewData = DAG.getNode(ISD::SRL, DL, VT, Data, Bits);
1188 return DAG.getNode(ISD::AND, DL, VT,
1189 {NewData, DAG.getConstant(Byte ? 0xff : 0xffff, DL, VT)});
1192 SDValue VETargetLowering::lowerATOMIC_SWAP(SDValue Op,
1193 SelectionDAG &DAG) const {
1194 SDLoc DL(Op);
1195 AtomicSDNode *N = cast<AtomicSDNode>(Op);
1197 if (N->getMemoryVT() == MVT::i8) {
1198 // For i8, use "ts1am"
1199 // Input:
1200 // ATOMIC_SWAP Ptr, Val, Order
1202 // Output:
1203 // Remainder = AND Ptr, 3
1204 // Flag = 1 << Remainder ; 1 byte swap flag for TS1AM inst.
1205 // Bits = Remainder << 3
1206 // NewVal = Val << Bits
1208 // Aligned = AND Ptr, -4
1209 // Data = TS1AM Aligned, Flag, NewVal
1211 // NewData = Data >> Bits
1212 // Result = NewData & 0xff ; 1 byte result
1213 SDValue Flag;
1214 SDValue Bits;
1215 SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1217 SDValue Ptr = N->getOperand(1);
1218 SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
1219 {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
1220 SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
1221 DAG.getVTList(Op.getNode()->getValueType(0),
1222 Op.getNode()->getValueType(1)),
1223 {N->getChain(), Aligned, Flag, NewVal},
1224 N->getMemOperand());
1226 SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
1227 SDValue Chain = TS1AM.getValue(1);
1228 return DAG.getMergeValues({Result, Chain}, DL);
1230 if (N->getMemoryVT() == MVT::i16) {
1231 // For i16, use "ts1am"
1232 SDValue Flag;
1233 SDValue Bits;
1234 SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1236 SDValue Ptr = N->getOperand(1);
1237 SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
1238 {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
1239 SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
1240 DAG.getVTList(Op.getNode()->getValueType(0),
1241 Op.getNode()->getValueType(1)),
1242 {N->getChain(), Aligned, Flag, NewVal},
1243 N->getMemOperand());
1245 SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
1246 SDValue Chain = TS1AM.getValue(1);
1247 return DAG.getMergeValues({Result, Chain}, DL);
1249 // Otherwise, let llvm legalize it.
1250 return Op;
1253 SDValue VETargetLowering::lowerGlobalAddress(SDValue Op,
1254 SelectionDAG &DAG) const {
1255 return makeAddress(Op, DAG);
1258 SDValue VETargetLowering::lowerBlockAddress(SDValue Op,
1259 SelectionDAG &DAG) const {
1260 return makeAddress(Op, DAG);
1263 SDValue VETargetLowering::lowerConstantPool(SDValue Op,
1264 SelectionDAG &DAG) const {
1265 return makeAddress(Op, DAG);
1268 SDValue
1269 VETargetLowering::lowerToTLSGeneralDynamicModel(SDValue Op,
1270 SelectionDAG &DAG) const {
1271 SDLoc DL(Op);
1273 // Generate the following code:
1274 // t1: ch,glue = callseq_start t0, 0, 0
1275 // t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
1276 // t3: ch,glue = callseq_end t2, 0, 0, t2:2
1277 // t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
1278 SDValue Label = withTargetFlags(Op, 0, DAG);
1279 EVT PtrVT = Op.getValueType();
1281 // Lowering the machine isd will make sure everything is in the right
1282 // location.
1283 SDValue Chain = DAG.getEntryNode();
1284 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1285 const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
1286 DAG.getMachineFunction(), CallingConv::C);
1287 Chain = DAG.getCALLSEQ_START(Chain, 64, 0, DL);
1288 SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)};
1289 Chain = DAG.getNode(VEISD::GETTLSADDR, DL, NodeTys, Args);
1290 Chain = DAG.getCALLSEQ_END(Chain, 64, 0, Chain.getValue(1), DL);
1291 Chain = DAG.getCopyFromReg(Chain, DL, VE::SX0, PtrVT, Chain.getValue(1));
1293 // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
1294 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1295 MFI.setHasCalls(true);
1297 // Also generate code to prepare a GOT register if it is PIC.
1298 if (isPositionIndependent()) {
1299 MachineFunction &MF = DAG.getMachineFunction();
1300 Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
1303 return Chain;
1306 SDValue VETargetLowering::lowerGlobalTLSAddress(SDValue Op,
1307 SelectionDAG &DAG) const {
1308 // The current implementation of nld (2.26) doesn't allow local exec model
1309 // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
1310 // generate the general dynamic model code sequence.
1312 // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
1313 return lowerToTLSGeneralDynamicModel(Op, DAG);
1316 SDValue VETargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1317 return makeAddress(Op, DAG);
1320 // Lower a f128 load into two f64 loads.
1321 static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) {
1322 SDLoc DL(Op);
1323 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
1324 assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1325 Align Alignment = LdNode->getAlign();
1326 if (Alignment > 8)
1327 Alignment = Align(8);
1329 SDValue Lo64 =
1330 DAG.getLoad(MVT::f64, DL, LdNode->getChain(), LdNode->getBasePtr(),
1331 LdNode->getPointerInfo(), Alignment,
1332 LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1333 : MachineMemOperand::MONone);
1334 EVT AddrVT = LdNode->getBasePtr().getValueType();
1335 SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, LdNode->getBasePtr(),
1336 DAG.getConstant(8, DL, AddrVT));
1337 SDValue Hi64 =
1338 DAG.getLoad(MVT::f64, DL, LdNode->getChain(), HiPtr,
1339 LdNode->getPointerInfo(), Alignment,
1340 LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1341 : MachineMemOperand::MONone);
1343 SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1344 SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1346 // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1347 SDNode *InFP128 =
1348 DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f128);
1349 InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
1350 SDValue(InFP128, 0), Hi64, SubRegEven);
1351 InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
1352 SDValue(InFP128, 0), Lo64, SubRegOdd);
1353 SDValue OutChains[2] = {SDValue(Lo64.getNode(), 1),
1354 SDValue(Hi64.getNode(), 1)};
1355 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1356 SDValue Ops[2] = {SDValue(InFP128, 0), OutChain};
1357 return DAG.getMergeValues(Ops, DL);
1360 // Lower a vXi1 load into following instructions
1361 // LDrii %1, (,%addr)
1362 // LVMxir %vm, 0, %1
1363 // LDrii %2, 8(,%addr)
1364 // LVMxir %vm, 0, %2
1365 // ...
1366 static SDValue lowerLoadI1(SDValue Op, SelectionDAG &DAG) {
1367 SDLoc DL(Op);
1368 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
1369 assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1371 SDValue BasePtr = LdNode->getBasePtr();
1372 Align Alignment = LdNode->getAlign();
1373 if (Alignment > 8)
1374 Alignment = Align(8);
1376 EVT AddrVT = BasePtr.getValueType();
1377 EVT MemVT = LdNode->getMemoryVT();
1378 if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
1379 SDValue OutChains[4];
1380 SDNode *VM = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MemVT);
1381 for (int i = 0; i < 4; ++i) {
1382 // Generate load dag and prepare chains.
1383 SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1384 DAG.getConstant(8 * i, DL, AddrVT));
1385 SDValue Val =
1386 DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr,
1387 LdNode->getPointerInfo(), Alignment,
1388 LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1389 : MachineMemOperand::MONone);
1390 OutChains[i] = SDValue(Val.getNode(), 1);
1392 VM = DAG.getMachineNode(VE::LVMir_m, DL, MVT::i64,
1393 DAG.getTargetConstant(i, DL, MVT::i64), Val,
1394 SDValue(VM, 0));
1396 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1397 SDValue Ops[2] = {SDValue(VM, 0), OutChain};
1398 return DAG.getMergeValues(Ops, DL);
1399 } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
1400 SDValue OutChains[8];
1401 SDNode *VM = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MemVT);
1402 for (int i = 0; i < 8; ++i) {
1403 // Generate load dag and prepare chains.
1404 SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1405 DAG.getConstant(8 * i, DL, AddrVT));
1406 SDValue Val =
1407 DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr,
1408 LdNode->getPointerInfo(), Alignment,
1409 LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1410 : MachineMemOperand::MONone);
1411 OutChains[i] = SDValue(Val.getNode(), 1);
1413 VM = DAG.getMachineNode(VE::LVMyir_y, DL, MVT::i64,
1414 DAG.getTargetConstant(i, DL, MVT::i64), Val,
1415 SDValue(VM, 0));
1417 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1418 SDValue Ops[2] = {SDValue(VM, 0), OutChain};
1419 return DAG.getMergeValues(Ops, DL);
1420 } else {
1421 // Otherwise, ask llvm to expand it.
1422 return SDValue();
1426 SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1427 LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());
1428 EVT MemVT = LdNode->getMemoryVT();
1430 // If VPU is enabled, always expand non-mask vector loads to VVP
1431 if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(MemVT))
1432 return lowerToVVP(Op, DAG);
1434 SDValue BasePtr = LdNode->getBasePtr();
1435 if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
1436 // Do not expand store instruction with frame index here because of
1437 // dependency problems. We expand it later in eliminateFrameIndex().
1438 return Op;
1441 if (MemVT == MVT::f128)
1442 return lowerLoadF128(Op, DAG);
1443 if (isMaskType(MemVT))
1444 return lowerLoadI1(Op, DAG);
1446 return Op;
1449 // Lower a f128 store into two f64 stores.
1450 static SDValue lowerStoreF128(SDValue Op, SelectionDAG &DAG) {
1451 SDLoc DL(Op);
1452 StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
1453 assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1455 SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1456 SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1458 SDNode *Hi64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
1459 StNode->getValue(), SubRegEven);
1460 SDNode *Lo64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
1461 StNode->getValue(), SubRegOdd);
1463 Align Alignment = StNode->getAlign();
1464 if (Alignment > 8)
1465 Alignment = Align(8);
1467 // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1468 SDValue OutChains[2];
1469 OutChains[0] =
1470 DAG.getStore(StNode->getChain(), DL, SDValue(Lo64, 0),
1471 StNode->getBasePtr(), MachinePointerInfo(), Alignment,
1472 StNode->isVolatile() ? MachineMemOperand::MOVolatile
1473 : MachineMemOperand::MONone);
1474 EVT AddrVT = StNode->getBasePtr().getValueType();
1475 SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, StNode->getBasePtr(),
1476 DAG.getConstant(8, DL, AddrVT));
1477 OutChains[1] =
1478 DAG.getStore(StNode->getChain(), DL, SDValue(Hi64, 0), HiPtr,
1479 MachinePointerInfo(), Alignment,
1480 StNode->isVolatile() ? MachineMemOperand::MOVolatile
1481 : MachineMemOperand::MONone);
1482 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1485 // Lower a vXi1 store into following instructions
1486 // SVMi %1, %vm, 0
1487 // STrii %1, (,%addr)
1488 // SVMi %2, %vm, 1
1489 // STrii %2, 8(,%addr)
1490 // ...
1491 static SDValue lowerStoreI1(SDValue Op, SelectionDAG &DAG) {
1492 SDLoc DL(Op);
1493 StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
1494 assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1496 SDValue BasePtr = StNode->getBasePtr();
1497 Align Alignment = StNode->getAlign();
1498 if (Alignment > 8)
1499 Alignment = Align(8);
1500 EVT AddrVT = BasePtr.getValueType();
1501 EVT MemVT = StNode->getMemoryVT();
1502 if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
1503 SDValue OutChains[4];
1504 for (int i = 0; i < 4; ++i) {
1505 SDNode *V =
1506 DAG.getMachineNode(VE::SVMmi, DL, MVT::i64, StNode->getValue(),
1507 DAG.getTargetConstant(i, DL, MVT::i64));
1508 SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1509 DAG.getConstant(8 * i, DL, AddrVT));
1510 OutChains[i] =
1511 DAG.getStore(StNode->getChain(), DL, SDValue(V, 0), Addr,
1512 MachinePointerInfo(), Alignment,
1513 StNode->isVolatile() ? MachineMemOperand::MOVolatile
1514 : MachineMemOperand::MONone);
1516 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1517 } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
1518 SDValue OutChains[8];
1519 for (int i = 0; i < 8; ++i) {
1520 SDNode *V =
1521 DAG.getMachineNode(VE::SVMyi, DL, MVT::i64, StNode->getValue(),
1522 DAG.getTargetConstant(i, DL, MVT::i64));
1523 SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1524 DAG.getConstant(8 * i, DL, AddrVT));
1525 OutChains[i] =
1526 DAG.getStore(StNode->getChain(), DL, SDValue(V, 0), Addr,
1527 MachinePointerInfo(), Alignment,
1528 StNode->isVolatile() ? MachineMemOperand::MOVolatile
1529 : MachineMemOperand::MONone);
1531 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1532 } else {
1533 // Otherwise, ask llvm to expand it.
1534 return SDValue();
1538 SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1539 StoreSDNode *StNode = cast<StoreSDNode>(Op.getNode());
1540 assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1541 EVT MemVT = StNode->getMemoryVT();
1543 // If VPU is enabled, always expand non-mask vector stores to VVP
1544 if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(MemVT))
1545 return lowerToVVP(Op, DAG);
1547 SDValue BasePtr = StNode->getBasePtr();
1548 if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
1549 // Do not expand store instruction with frame index here because of
1550 // dependency problems. We expand it later in eliminateFrameIndex().
1551 return Op;
1554 if (MemVT == MVT::f128)
1555 return lowerStoreF128(Op, DAG);
1556 if (isMaskType(MemVT))
1557 return lowerStoreI1(Op, DAG);
1559 // Otherwise, ask llvm to expand it.
1560 return SDValue();
1563 SDValue VETargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1564 MachineFunction &MF = DAG.getMachineFunction();
1565 VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
1566 auto PtrVT = getPointerTy(DAG.getDataLayout());
1568 // Need frame address to find the address of VarArgsFrameIndex.
1569 MF.getFrameInfo().setFrameAddressIsTaken(true);
1571 // vastart just stores the address of the VarArgsFrameIndex slot into the
1572 // memory location argument.
1573 SDLoc DL(Op);
1574 SDValue Offset =
1575 DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT),
1576 DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL));
1577 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1578 return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),
1579 MachinePointerInfo(SV));
1582 SDValue VETargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
1583 SDNode *Node = Op.getNode();
1584 EVT VT = Node->getValueType(0);
1585 SDValue InChain = Node->getOperand(0);
1586 SDValue VAListPtr = Node->getOperand(1);
1587 EVT PtrVT = VAListPtr.getValueType();
1588 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
1589 SDLoc DL(Node);
1590 SDValue VAList =
1591 DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV));
1592 SDValue Chain = VAList.getValue(1);
1593 SDValue NextPtr;
1595 if (VT == MVT::f128) {
1596 // VE f128 values must be stored with 16 bytes alignment. We don't
1597 // know the actual alignment of VAList, so we take alignment of it
1598 // dynamically.
1599 int Align = 16;
1600 VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
1601 DAG.getConstant(Align - 1, DL, PtrVT));
1602 VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
1603 DAG.getConstant(-Align, DL, PtrVT));
1604 // Increment the pointer, VAList, by 16 to the next vaarg.
1605 NextPtr =
1606 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(16, DL));
1607 } else if (VT == MVT::f32) {
1608 // float --> need special handling like below.
1609 // 0 4
1610 // +------+------+
1611 // | empty| float|
1612 // +------+------+
1613 // Increment the pointer, VAList, by 8 to the next vaarg.
1614 NextPtr =
1615 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
1616 // Then, adjust VAList.
1617 unsigned InternalOffset = 4;
1618 VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
1619 DAG.getConstant(InternalOffset, DL, PtrVT));
1620 } else {
1621 // Increment the pointer, VAList, by 8 to the next vaarg.
1622 NextPtr =
1623 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
1626 // Store the incremented VAList to the legalized pointer.
1627 InChain = DAG.getStore(Chain, DL, NextPtr, VAListPtr, MachinePointerInfo(SV));
1629 // Load the actual argument out of the pointer VAList.
1630 // We can't count on greater alignment than the word size.
1631 return DAG.getLoad(
1632 VT, DL, InChain, VAList, MachinePointerInfo(),
1633 Align(std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8));
1636 SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
1637 SelectionDAG &DAG) const {
1638 // Generate following code.
1639 // (void)__llvm_grow_stack(size);
1640 // ret = GETSTACKTOP; // pseudo instruction
1641 SDLoc DL(Op);
1643 // Get the inputs.
1644 SDNode *Node = Op.getNode();
1645 SDValue Chain = Op.getOperand(0);
1646 SDValue Size = Op.getOperand(1);
1647 MaybeAlign Alignment(Op.getConstantOperandVal(2));
1648 EVT VT = Node->getValueType(0);
1650 // Chain the dynamic stack allocation so that it doesn't modify the stack
1651 // pointer when other instructions are using the stack.
1652 Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
1654 const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
1655 Align StackAlign = TFI.getStackAlign();
1656 bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
1658 // Prepare arguments
1659 TargetLowering::ArgListTy Args;
1660 TargetLowering::ArgListEntry Entry;
1661 Entry.Node = Size;
1662 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1663 Args.push_back(Entry);
1664 if (NeedsAlign) {
1665 Entry.Node = DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT);
1666 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1667 Args.push_back(Entry);
1669 Type *RetTy = Type::getVoidTy(*DAG.getContext());
1671 EVT PtrVT = Op.getValueType();
1672 SDValue Callee;
1673 if (NeedsAlign) {
1674 Callee = DAG.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT, 0);
1675 } else {
1676 Callee = DAG.getTargetExternalSymbol("__ve_grow_stack", PtrVT, 0);
1679 TargetLowering::CallLoweringInfo CLI(DAG);
1680 CLI.setDebugLoc(DL)
1681 .setChain(Chain)
1682 .setCallee(CallingConv::PreserveAll, RetTy, Callee, std::move(Args))
1683 .setDiscardResult(true);
1684 std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
1685 Chain = pair.second;
1686 SDValue Result = DAG.getNode(VEISD::GETSTACKTOP, DL, VT, Chain);
1687 if (NeedsAlign) {
1688 Result = DAG.getNode(ISD::ADD, DL, VT, Result,
1689 DAG.getConstant((Alignment->value() - 1ULL), DL, VT));
1690 Result = DAG.getNode(ISD::AND, DL, VT, Result,
1691 DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT));
1693 // Chain = Result.getValue(1);
1694 Chain = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), DL);
1696 SDValue Ops[2] = {Result, Chain};
1697 return DAG.getMergeValues(Ops, DL);
1700 SDValue VETargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
1701 SelectionDAG &DAG) const {
1702 SDLoc DL(Op);
1703 return DAG.getNode(VEISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0),
1704 Op.getOperand(1));
1707 SDValue VETargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
1708 SelectionDAG &DAG) const {
1709 SDLoc DL(Op);
1710 return DAG.getNode(VEISD::EH_SJLJ_SETJMP, DL,
1711 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
1712 Op.getOperand(1));
1715 SDValue VETargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
1716 SelectionDAG &DAG) const {
1717 SDLoc DL(Op);
1718 return DAG.getNode(VEISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other,
1719 Op.getOperand(0));
1722 static SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG,
1723 const VETargetLowering &TLI,
1724 const VESubtarget *Subtarget) {
1725 SDLoc DL(Op);
1726 MachineFunction &MF = DAG.getMachineFunction();
1727 EVT PtrVT = TLI.getPointerTy(MF.getDataLayout());
1729 MachineFrameInfo &MFI = MF.getFrameInfo();
1730 MFI.setFrameAddressIsTaken(true);
1732 unsigned Depth = Op.getConstantOperandVal(0);
1733 const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
1734 Register FrameReg = RegInfo->getFrameRegister(MF);
1735 SDValue FrameAddr =
1736 DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, PtrVT);
1737 while (Depth--)
1738 FrameAddr = DAG.getLoad(Op.getValueType(), DL, DAG.getEntryNode(),
1739 FrameAddr, MachinePointerInfo());
1740 return FrameAddr;
1743 static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
1744 const VETargetLowering &TLI,
1745 const VESubtarget *Subtarget) {
1746 MachineFunction &MF = DAG.getMachineFunction();
1747 MachineFrameInfo &MFI = MF.getFrameInfo();
1748 MFI.setReturnAddressIsTaken(true);
1750 if (TLI.verifyReturnAddressArgumentIsConstant(Op, DAG))
1751 return SDValue();
1753 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG, TLI, Subtarget);
1755 SDLoc DL(Op);
1756 EVT VT = Op.getValueType();
1757 SDValue Offset = DAG.getConstant(8, DL, VT);
1758 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1759 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
1760 MachinePointerInfo());
1763 SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1764 SelectionDAG &DAG) const {
1765 SDLoc DL(Op);
1766 unsigned IntNo = Op.getConstantOperandVal(0);
1767 switch (IntNo) {
1768 default: // Don't custom lower most intrinsics.
1769 return SDValue();
1770 case Intrinsic::eh_sjlj_lsda: {
1771 MachineFunction &MF = DAG.getMachineFunction();
1772 MVT VT = Op.getSimpleValueType();
1773 const VETargetMachine *TM =
1774 static_cast<const VETargetMachine *>(&DAG.getTarget());
1776 // Create GCC_except_tableXX string. The real symbol for that will be
1777 // generated in EHStreamer::emitExceptionTable() later. So, we just
1778 // borrow it's name here.
1779 TM->getStrList()->push_back(std::string(
1780 (Twine("GCC_except_table") + Twine(MF.getFunctionNumber())).str()));
1781 SDValue Addr =
1782 DAG.getTargetExternalSymbol(TM->getStrList()->back().c_str(), VT, 0);
1783 if (isPositionIndependent()) {
1784 Addr = makeHiLoPair(Addr, VEMCExpr::VK_VE_GOTOFF_HI32,
1785 VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
1786 SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, VT);
1787 return DAG.getNode(ISD::ADD, DL, VT, GlobalBase, Addr);
1789 return makeHiLoPair(Addr, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
1794 static bool getUniqueInsertion(SDNode *N, unsigned &UniqueIdx) {
1795 if (!isa<BuildVectorSDNode>(N))
1796 return false;
1797 const auto *BVN = cast<BuildVectorSDNode>(N);
1799 // Find first non-undef insertion.
1800 unsigned Idx;
1801 for (Idx = 0; Idx < BVN->getNumOperands(); ++Idx) {
1802 auto ElemV = BVN->getOperand(Idx);
1803 if (!ElemV->isUndef())
1804 break;
1806 // Catch the (hypothetical) all-undef case.
1807 if (Idx == BVN->getNumOperands())
1808 return false;
1809 // Remember insertion.
1810 UniqueIdx = Idx++;
1811 // Verify that all other insertions are undef.
1812 for (; Idx < BVN->getNumOperands(); ++Idx) {
1813 auto ElemV = BVN->getOperand(Idx);
1814 if (!ElemV->isUndef())
1815 return false;
1817 return true;
1820 static SDValue getSplatValue(SDNode *N) {
1821 if (auto *BuildVec = dyn_cast<BuildVectorSDNode>(N)) {
1822 return BuildVec->getSplatValue();
1824 return SDValue();
1827 SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
1828 SelectionDAG &DAG) const {
1829 VECustomDAG CDAG(DAG, Op);
1830 MVT ResultVT = Op.getSimpleValueType();
1832 // If there is just one element, expand to INSERT_VECTOR_ELT.
1833 unsigned UniqueIdx;
1834 if (getUniqueInsertion(Op.getNode(), UniqueIdx)) {
1835 SDValue AccuV = CDAG.getUNDEF(Op.getValueType());
1836 auto ElemV = Op->getOperand(UniqueIdx);
1837 SDValue IdxV = CDAG.getConstant(UniqueIdx, MVT::i64);
1838 return CDAG.getNode(ISD::INSERT_VECTOR_ELT, ResultVT, {AccuV, ElemV, IdxV});
1841 // Else emit a broadcast.
1842 if (SDValue ScalarV = getSplatValue(Op.getNode())) {
1843 unsigned NumEls = ResultVT.getVectorNumElements();
1844 auto AVL = CDAG.getConstant(NumEls, MVT::i32);
1845 return CDAG.getBroadcast(ResultVT, ScalarV, AVL);
1848 // Expand
1849 return SDValue();
1852 TargetLowering::LegalizeAction
1853 VETargetLowering::getCustomOperationAction(SDNode &Op) const {
1854 // Custom legalization on VVP_* and VEC_* opcodes is required to pack-legalize
1855 // these operations (transform nodes such that their AVL parameter refers to
1856 // packs of 64bit, instead of number of elements.
1858 // Packing opcodes are created with a pack-legal AVL (LEGALAVL). No need to
1859 // re-visit them.
1860 if (isPackingSupportOpcode(Op.getOpcode()))
1861 return Legal;
1863 // Custom lower to legalize AVL for packed mode.
1864 if (isVVPOrVEC(Op.getOpcode()))
1865 return Custom;
1866 return Legal;
1869 SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1870 LLVM_DEBUG(dbgs() << "::LowerOperation "; Op.dump(&DAG));
1871 unsigned Opcode = Op.getOpcode();
1873 /// Scalar isel.
1874 switch (Opcode) {
1875 case ISD::ATOMIC_FENCE:
1876 return lowerATOMIC_FENCE(Op, DAG);
1877 case ISD::ATOMIC_SWAP:
1878 return lowerATOMIC_SWAP(Op, DAG);
1879 case ISD::BlockAddress:
1880 return lowerBlockAddress(Op, DAG);
1881 case ISD::ConstantPool:
1882 return lowerConstantPool(Op, DAG);
1883 case ISD::DYNAMIC_STACKALLOC:
1884 return lowerDYNAMIC_STACKALLOC(Op, DAG);
1885 case ISD::EH_SJLJ_LONGJMP:
1886 return lowerEH_SJLJ_LONGJMP(Op, DAG);
1887 case ISD::EH_SJLJ_SETJMP:
1888 return lowerEH_SJLJ_SETJMP(Op, DAG);
1889 case ISD::EH_SJLJ_SETUP_DISPATCH:
1890 return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
1891 case ISD::FRAMEADDR:
1892 return lowerFRAMEADDR(Op, DAG, *this, Subtarget);
1893 case ISD::GlobalAddress:
1894 return lowerGlobalAddress(Op, DAG);
1895 case ISD::GlobalTLSAddress:
1896 return lowerGlobalTLSAddress(Op, DAG);
1897 case ISD::INTRINSIC_WO_CHAIN:
1898 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
1899 case ISD::JumpTable:
1900 return lowerJumpTable(Op, DAG);
1901 case ISD::LOAD:
1902 return lowerLOAD(Op, DAG);
1903 case ISD::RETURNADDR:
1904 return lowerRETURNADDR(Op, DAG, *this, Subtarget);
1905 case ISD::BUILD_VECTOR:
1906 return lowerBUILD_VECTOR(Op, DAG);
1907 case ISD::STORE:
1908 return lowerSTORE(Op, DAG);
1909 case ISD::VASTART:
1910 return lowerVASTART(Op, DAG);
1911 case ISD::VAARG:
1912 return lowerVAARG(Op, DAG);
1914 case ISD::INSERT_VECTOR_ELT:
1915 return lowerINSERT_VECTOR_ELT(Op, DAG);
1916 case ISD::EXTRACT_VECTOR_ELT:
1917 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
1920 /// Vector isel.
1921 if (ISD::isVPOpcode(Opcode))
1922 return lowerToVVP(Op, DAG);
1924 switch (Opcode) {
1925 default:
1926 llvm_unreachable("Should not custom lower this!");
1928 // Legalize the AVL of this internal node.
1929 case VEISD::VEC_BROADCAST:
1930 #define ADD_VVP_OP(VVP_NAME, ...) case VEISD::VVP_NAME:
1931 #include "VVPNodes.def"
1932 // AVL already legalized.
1933 if (getAnnotatedNodeAVL(Op).second)
1934 return Op;
1935 return legalizeInternalVectorOp(Op, DAG);
1937 // Translate into a VEC_*/VVP_* layer operation.
1938 case ISD::MLOAD:
1939 case ISD::MSTORE:
1940 #define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
1941 #include "VVPNodes.def"
1942 if (isMaskArithmetic(Op) && isPackedVectorType(Op.getValueType()))
1943 return splitMaskArithmetic(Op, DAG);
1944 return lowerToVVP(Op, DAG);
1947 /// } Custom Lower
1949 void VETargetLowering::ReplaceNodeResults(SDNode *N,
1950 SmallVectorImpl<SDValue> &Results,
1951 SelectionDAG &DAG) const {
1952 switch (N->getOpcode()) {
1953 case ISD::ATOMIC_SWAP:
1954 // Let LLVM expand atomic swap instruction through LowerOperation.
1955 return;
1956 default:
1957 LLVM_DEBUG(N->dumpr(&DAG));
1958 llvm_unreachable("Do not know how to custom type legalize this operation!");
1962 /// JumpTable for VE.
1964 /// VE cannot generate relocatable symbol in jump table. VE cannot
1965 /// generate expressions using symbols in both text segment and data
1966 /// segment like below.
1967 /// .4byte .LBB0_2-.LJTI0_0
1968 /// So, we generate offset from the top of function like below as
1969 /// a custom label.
1970 /// .4byte .LBB0_2-<function name>
1972 unsigned VETargetLowering::getJumpTableEncoding() const {
1973 // Use custom label for PIC.
1974 if (isPositionIndependent())
1975 return MachineJumpTableInfo::EK_Custom32;
1977 // Otherwise, use the normal jump table encoding heuristics.
1978 return TargetLowering::getJumpTableEncoding();
1981 const MCExpr *VETargetLowering::LowerCustomJumpTableEntry(
1982 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
1983 unsigned Uid, MCContext &Ctx) const {
1984 assert(isPositionIndependent());
1986 // Generate custom label for PIC like below.
1987 // .4bytes .LBB0_2-<function name>
1988 const auto *Value = MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
1989 MCSymbol *Sym = Ctx.getOrCreateSymbol(MBB->getParent()->getName().data());
1990 const auto *Base = MCSymbolRefExpr::create(Sym, Ctx);
1991 return MCBinaryExpr::createSub(Value, Base, Ctx);
1994 SDValue VETargetLowering::getPICJumpTableRelocBase(SDValue Table,
1995 SelectionDAG &DAG) const {
1996 assert(isPositionIndependent());
1997 SDLoc DL(Table);
1998 Function *Function = &DAG.getMachineFunction().getFunction();
1999 assert(Function != nullptr);
2000 auto PtrTy = getPointerTy(DAG.getDataLayout(), Function->getAddressSpace());
2002 // In the jump table, we have following values in PIC mode.
2003 // .4bytes .LBB0_2-<function name>
2004 // We need to add this value and the address of this function to generate
2005 // .LBB0_2 label correctly under PIC mode. So, we want to generate following
2006 // instructions:
2007 // lea %reg, fun@gotoff_lo
2008 // and %reg, %reg, (32)0
2009 // lea.sl %reg, fun@gotoff_hi(%reg, %got)
2010 // In order to do so, we need to genarate correctly marked DAG node using
2011 // makeHiLoPair.
2012 SDValue Op = DAG.getGlobalAddress(Function, DL, PtrTy);
2013 SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
2014 VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
2015 SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrTy);
2016 return DAG.getNode(ISD::ADD, DL, PtrTy, GlobalBase, HiLo);
2019 Register VETargetLowering::prepareMBB(MachineBasicBlock &MBB,
2020 MachineBasicBlock::iterator I,
2021 MachineBasicBlock *TargetBB,
2022 const DebugLoc &DL) const {
2023 MachineFunction *MF = MBB.getParent();
2024 MachineRegisterInfo &MRI = MF->getRegInfo();
2025 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2027 const TargetRegisterClass *RC = &VE::I64RegClass;
2028 Register Tmp1 = MRI.createVirtualRegister(RC);
2029 Register Tmp2 = MRI.createVirtualRegister(RC);
2030 Register Result = MRI.createVirtualRegister(RC);
2032 if (isPositionIndependent()) {
2033 // Create following instructions for local linkage PIC code.
2034 // lea %Tmp1, TargetBB@gotoff_lo
2035 // and %Tmp2, %Tmp1, (32)0
2036 // lea.sl %Result, TargetBB@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2037 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2038 .addImm(0)
2039 .addImm(0)
2040 .addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_LO32);
2041 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2042 .addReg(Tmp1, getKillRegState(true))
2043 .addImm(M0(32));
2044 BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
2045 .addReg(VE::SX15)
2046 .addReg(Tmp2, getKillRegState(true))
2047 .addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_HI32);
2048 } else {
2049 // Create following instructions for non-PIC code.
2050 // lea %Tmp1, TargetBB@lo
2051 // and %Tmp2, %Tmp1, (32)0
2052 // lea.sl %Result, TargetBB@hi(%Tmp2)
2053 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2054 .addImm(0)
2055 .addImm(0)
2056 .addMBB(TargetBB, VEMCExpr::VK_VE_LO32);
2057 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2058 .addReg(Tmp1, getKillRegState(true))
2059 .addImm(M0(32));
2060 BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
2061 .addReg(Tmp2, getKillRegState(true))
2062 .addImm(0)
2063 .addMBB(TargetBB, VEMCExpr::VK_VE_HI32);
2065 return Result;
2068 Register VETargetLowering::prepareSymbol(MachineBasicBlock &MBB,
2069 MachineBasicBlock::iterator I,
2070 StringRef Symbol, const DebugLoc &DL,
2071 bool IsLocal = false,
2072 bool IsCall = false) const {
2073 MachineFunction *MF = MBB.getParent();
2074 MachineRegisterInfo &MRI = MF->getRegInfo();
2075 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2077 const TargetRegisterClass *RC = &VE::I64RegClass;
2078 Register Result = MRI.createVirtualRegister(RC);
2080 if (isPositionIndependent()) {
2081 if (IsCall && !IsLocal) {
2082 // Create following instructions for non-local linkage PIC code function
2083 // calls. These instructions uses IC and magic number -24, so we expand
2084 // them in VEAsmPrinter.cpp from GETFUNPLT pseudo instruction.
2085 // lea %Reg, Symbol@plt_lo(-24)
2086 // and %Reg, %Reg, (32)0
2087 // sic %s16
2088 // lea.sl %Result, Symbol@plt_hi(%Reg, %s16) ; %s16 is PLT
2089 BuildMI(MBB, I, DL, TII->get(VE::GETFUNPLT), Result)
2090 .addExternalSymbol("abort");
2091 } else if (IsLocal) {
2092 Register Tmp1 = MRI.createVirtualRegister(RC);
2093 Register Tmp2 = MRI.createVirtualRegister(RC);
2094 // Create following instructions for local linkage PIC code.
2095 // lea %Tmp1, Symbol@gotoff_lo
2096 // and %Tmp2, %Tmp1, (32)0
2097 // lea.sl %Result, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2098 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2099 .addImm(0)
2100 .addImm(0)
2101 .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_LO32);
2102 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2103 .addReg(Tmp1, getKillRegState(true))
2104 .addImm(M0(32));
2105 BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
2106 .addReg(VE::SX15)
2107 .addReg(Tmp2, getKillRegState(true))
2108 .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_HI32);
2109 } else {
2110 Register Tmp1 = MRI.createVirtualRegister(RC);
2111 Register Tmp2 = MRI.createVirtualRegister(RC);
2112 // Create following instructions for not local linkage PIC code.
2113 // lea %Tmp1, Symbol@got_lo
2114 // and %Tmp2, %Tmp1, (32)0
2115 // lea.sl %Tmp3, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2116 // ld %Result, 0(%Tmp3)
2117 Register Tmp3 = MRI.createVirtualRegister(RC);
2118 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2119 .addImm(0)
2120 .addImm(0)
2121 .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_LO32);
2122 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2123 .addReg(Tmp1, getKillRegState(true))
2124 .addImm(M0(32));
2125 BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Tmp3)
2126 .addReg(VE::SX15)
2127 .addReg(Tmp2, getKillRegState(true))
2128 .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_HI32);
2129 BuildMI(MBB, I, DL, TII->get(VE::LDrii), Result)
2130 .addReg(Tmp3, getKillRegState(true))
2131 .addImm(0)
2132 .addImm(0);
2134 } else {
2135 Register Tmp1 = MRI.createVirtualRegister(RC);
2136 Register Tmp2 = MRI.createVirtualRegister(RC);
2137 // Create following instructions for non-PIC code.
2138 // lea %Tmp1, Symbol@lo
2139 // and %Tmp2, %Tmp1, (32)0
2140 // lea.sl %Result, Symbol@hi(%Tmp2)
2141 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2142 .addImm(0)
2143 .addImm(0)
2144 .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_LO32);
2145 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2146 .addReg(Tmp1, getKillRegState(true))
2147 .addImm(M0(32));
2148 BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
2149 .addReg(Tmp2, getKillRegState(true))
2150 .addImm(0)
2151 .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_HI32);
2153 return Result;
2156 void VETargetLowering::setupEntryBlockForSjLj(MachineInstr &MI,
2157 MachineBasicBlock *MBB,
2158 MachineBasicBlock *DispatchBB,
2159 int FI, int Offset) const {
2160 DebugLoc DL = MI.getDebugLoc();
2161 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2163 Register LabelReg =
2164 prepareMBB(*MBB, MachineBasicBlock::iterator(MI), DispatchBB, DL);
2166 // Store an address of DispatchBB to a given jmpbuf[1] where has next IC
2167 // referenced by longjmp (throw) later.
2168 MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2169 addFrameReference(MIB, FI, Offset); // jmpbuf[1]
2170 MIB.addReg(LabelReg, getKillRegState(true));
2173 MachineBasicBlock *
2174 VETargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
2175 MachineBasicBlock *MBB) const {
2176 DebugLoc DL = MI.getDebugLoc();
2177 MachineFunction *MF = MBB->getParent();
2178 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2179 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
2180 MachineRegisterInfo &MRI = MF->getRegInfo();
2182 const BasicBlock *BB = MBB->getBasicBlock();
2183 MachineFunction::iterator I = ++MBB->getIterator();
2185 // Memory Reference.
2186 SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands());
2187 Register BufReg = MI.getOperand(1).getReg();
2189 Register DstReg;
2191 DstReg = MI.getOperand(0).getReg();
2192 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
2193 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
2194 (void)TRI;
2195 Register MainDestReg = MRI.createVirtualRegister(RC);
2196 Register RestoreDestReg = MRI.createVirtualRegister(RC);
2198 // For `v = call @llvm.eh.sjlj.setjmp(buf)`, we generate following
2199 // instructions. SP/FP must be saved in jmpbuf before `llvm.eh.sjlj.setjmp`.
2201 // ThisMBB:
2202 // buf[3] = %s17 iff %s17 is used as BP
2203 // buf[1] = RestoreMBB as IC after longjmp
2204 // # SjLjSetup RestoreMBB
2206 // MainMBB:
2207 // v_main = 0
2209 // SinkMBB:
2210 // v = phi(v_main, MainMBB, v_restore, RestoreMBB)
2211 // ...
2213 // RestoreMBB:
2214 // %s17 = buf[3] = iff %s17 is used as BP
2215 // v_restore = 1
2216 // goto SinkMBB
2218 MachineBasicBlock *ThisMBB = MBB;
2219 MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
2220 MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
2221 MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
2222 MF->insert(I, MainMBB);
2223 MF->insert(I, SinkMBB);
2224 MF->push_back(RestoreMBB);
2225 RestoreMBB->setMachineBlockAddressTaken();
2227 // Transfer the remainder of BB and its successor edges to SinkMBB.
2228 SinkMBB->splice(SinkMBB->begin(), MBB,
2229 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
2230 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
2232 // ThisMBB:
2233 Register LabelReg =
2234 prepareMBB(*MBB, MachineBasicBlock::iterator(MI), RestoreMBB, DL);
2236 // Store BP in buf[3] iff this function is using BP.
2237 const VEFrameLowering *TFI = Subtarget->getFrameLowering();
2238 if (TFI->hasBP(*MF)) {
2239 MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2240 MIB.addReg(BufReg);
2241 MIB.addImm(0);
2242 MIB.addImm(24);
2243 MIB.addReg(VE::SX17);
2244 MIB.setMemRefs(MMOs);
2247 // Store IP in buf[1].
2248 MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2249 MIB.add(MI.getOperand(1)); // we can preserve the kill flags here.
2250 MIB.addImm(0);
2251 MIB.addImm(8);
2252 MIB.addReg(LabelReg, getKillRegState(true));
2253 MIB.setMemRefs(MMOs);
2255 // SP/FP are already stored in jmpbuf before `llvm.eh.sjlj.setjmp`.
2257 // Insert setup.
2258 MIB =
2259 BuildMI(*ThisMBB, MI, DL, TII->get(VE::EH_SjLj_Setup)).addMBB(RestoreMBB);
2261 const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2262 MIB.addRegMask(RegInfo->getNoPreservedMask());
2263 ThisMBB->addSuccessor(MainMBB);
2264 ThisMBB->addSuccessor(RestoreMBB);
2266 // MainMBB:
2267 BuildMI(MainMBB, DL, TII->get(VE::LEAzii), MainDestReg)
2268 .addImm(0)
2269 .addImm(0)
2270 .addImm(0);
2271 MainMBB->addSuccessor(SinkMBB);
2273 // SinkMBB:
2274 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(VE::PHI), DstReg)
2275 .addReg(MainDestReg)
2276 .addMBB(MainMBB)
2277 .addReg(RestoreDestReg)
2278 .addMBB(RestoreMBB);
2280 // RestoreMBB:
2281 // Restore BP from buf[3] iff this function is using BP. The address of
2282 // buf is in SX10.
2283 // FIXME: Better to not use SX10 here
2284 if (TFI->hasBP(*MF)) {
2285 MachineInstrBuilder MIB =
2286 BuildMI(RestoreMBB, DL, TII->get(VE::LDrii), VE::SX17);
2287 MIB.addReg(VE::SX10);
2288 MIB.addImm(0);
2289 MIB.addImm(24);
2290 MIB.setMemRefs(MMOs);
2292 BuildMI(RestoreMBB, DL, TII->get(VE::LEAzii), RestoreDestReg)
2293 .addImm(0)
2294 .addImm(0)
2295 .addImm(1);
2296 BuildMI(RestoreMBB, DL, TII->get(VE::BRCFLa_t)).addMBB(SinkMBB);
2297 RestoreMBB->addSuccessor(SinkMBB);
2299 MI.eraseFromParent();
2300 return SinkMBB;
2303 MachineBasicBlock *
2304 VETargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
2305 MachineBasicBlock *MBB) const {
2306 DebugLoc DL = MI.getDebugLoc();
2307 MachineFunction *MF = MBB->getParent();
2308 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2309 MachineRegisterInfo &MRI = MF->getRegInfo();
2311 // Memory Reference.
2312 SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands());
2313 Register BufReg = MI.getOperand(0).getReg();
2315 Register Tmp = MRI.createVirtualRegister(&VE::I64RegClass);
2316 // Since FP is only updated here but NOT referenced, it's treated as GPR.
2317 Register FP = VE::SX9;
2318 Register SP = VE::SX11;
2320 MachineInstrBuilder MIB;
2322 MachineBasicBlock *ThisMBB = MBB;
2324 // For `call @llvm.eh.sjlj.longjmp(buf)`, we generate following instructions.
2326 // ThisMBB:
2327 // %fp = load buf[0]
2328 // %jmp = load buf[1]
2329 // %s10 = buf ; Store an address of buf to SX10 for RestoreMBB
2330 // %sp = load buf[2] ; generated by llvm.eh.sjlj.setjmp.
2331 // jmp %jmp
2333 // Reload FP.
2334 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), FP);
2335 MIB.addReg(BufReg);
2336 MIB.addImm(0);
2337 MIB.addImm(0);
2338 MIB.setMemRefs(MMOs);
2340 // Reload IP.
2341 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), Tmp);
2342 MIB.addReg(BufReg);
2343 MIB.addImm(0);
2344 MIB.addImm(8);
2345 MIB.setMemRefs(MMOs);
2347 // Copy BufReg to SX10 for later use in setjmp.
2348 // FIXME: Better to not use SX10 here
2349 BuildMI(*ThisMBB, MI, DL, TII->get(VE::ORri), VE::SX10)
2350 .addReg(BufReg)
2351 .addImm(0);
2353 // Reload SP.
2354 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), SP);
2355 MIB.add(MI.getOperand(0)); // we can preserve the kill flags here.
2356 MIB.addImm(0);
2357 MIB.addImm(16);
2358 MIB.setMemRefs(MMOs);
2360 // Jump.
2361 BuildMI(*ThisMBB, MI, DL, TII->get(VE::BCFLari_t))
2362 .addReg(Tmp, getKillRegState(true))
2363 .addImm(0);
2365 MI.eraseFromParent();
2366 return ThisMBB;
2369 MachineBasicBlock *
2370 VETargetLowering::emitSjLjDispatchBlock(MachineInstr &MI,
2371 MachineBasicBlock *BB) const {
2372 DebugLoc DL = MI.getDebugLoc();
2373 MachineFunction *MF = BB->getParent();
2374 MachineFrameInfo &MFI = MF->getFrameInfo();
2375 MachineRegisterInfo &MRI = MF->getRegInfo();
2376 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2377 int FI = MFI.getFunctionContextIndex();
2379 // Get a mapping of the call site numbers to all of the landing pads they're
2380 // associated with.
2381 DenseMap<unsigned, SmallVector<MachineBasicBlock *, 2>> CallSiteNumToLPad;
2382 unsigned MaxCSNum = 0;
2383 for (auto &MBB : *MF) {
2384 if (!MBB.isEHPad())
2385 continue;
2387 MCSymbol *Sym = nullptr;
2388 for (const auto &MI : MBB) {
2389 if (MI.isDebugInstr())
2390 continue;
2392 assert(MI.isEHLabel() && "expected EH_LABEL");
2393 Sym = MI.getOperand(0).getMCSymbol();
2394 break;
2397 if (!MF->hasCallSiteLandingPad(Sym))
2398 continue;
2400 for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {
2401 CallSiteNumToLPad[CSI].push_back(&MBB);
2402 MaxCSNum = std::max(MaxCSNum, CSI);
2406 // Get an ordered list of the machine basic blocks for the jump table.
2407 std::vector<MachineBasicBlock *> LPadList;
2408 SmallPtrSet<MachineBasicBlock *, 32> InvokeBBs;
2409 LPadList.reserve(CallSiteNumToLPad.size());
2411 for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) {
2412 for (auto &LP : CallSiteNumToLPad[CSI]) {
2413 LPadList.push_back(LP);
2414 InvokeBBs.insert(LP->pred_begin(), LP->pred_end());
2418 assert(!LPadList.empty() &&
2419 "No landing pad destinations for the dispatch jump table!");
2421 // The %fn_context is allocated like below (from --print-after=sjljehprepare):
2422 // %fn_context = alloca { i8*, i64, [4 x i64], i8*, i8*, [5 x i8*] }
2424 // This `[5 x i8*]` is jmpbuf, so jmpbuf[1] is FI+72.
2425 // First `i64` is callsite, so callsite is FI+8.
2426 static const int OffsetIC = 72;
2427 static const int OffsetCS = 8;
2429 // Create the MBBs for the dispatch code like following:
2431 // ThisMBB:
2432 // Prepare DispatchBB address and store it to buf[1].
2433 // ...
2435 // DispatchBB:
2436 // %s15 = GETGOT iff isPositionIndependent
2437 // %callsite = load callsite
2438 // brgt.l.t #size of callsites, %callsite, DispContBB
2440 // TrapBB:
2441 // Call abort.
2443 // DispContBB:
2444 // %breg = address of jump table
2445 // %pc = load and calculate next pc from %breg and %callsite
2446 // jmp %pc
2448 // Shove the dispatch's address into the return slot in the function context.
2449 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
2450 DispatchBB->setIsEHPad(true);
2452 // Trap BB will causes trap like `assert(0)`.
2453 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
2454 DispatchBB->addSuccessor(TrapBB);
2456 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
2457 DispatchBB->addSuccessor(DispContBB);
2459 // Insert MBBs.
2460 MF->push_back(DispatchBB);
2461 MF->push_back(DispContBB);
2462 MF->push_back(TrapBB);
2464 // Insert code to call abort in the TrapBB.
2465 Register Abort = prepareSymbol(*TrapBB, TrapBB->end(), "abort", DL,
2466 /* Local */ false, /* Call */ true);
2467 BuildMI(TrapBB, DL, TII->get(VE::BSICrii), VE::SX10)
2468 .addReg(Abort, getKillRegState(true))
2469 .addImm(0)
2470 .addImm(0);
2472 // Insert code into the entry block that creates and registers the function
2473 // context.
2474 setupEntryBlockForSjLj(MI, BB, DispatchBB, FI, OffsetIC);
2476 // Create the jump table and associated information
2477 unsigned JTE = getJumpTableEncoding();
2478 MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTE);
2479 unsigned MJTI = JTI->createJumpTableIndex(LPadList);
2481 const VERegisterInfo &RI = TII->getRegisterInfo();
2482 // Add a register mask with no preserved registers. This results in all
2483 // registers being marked as clobbered.
2484 BuildMI(DispatchBB, DL, TII->get(VE::NOP))
2485 .addRegMask(RI.getNoPreservedMask());
2487 if (isPositionIndependent()) {
2488 // Force to generate GETGOT, since current implementation doesn't store GOT
2489 // register.
2490 BuildMI(DispatchBB, DL, TII->get(VE::GETGOT), VE::SX15);
2493 // IReg is used as an index in a memory operand and therefore can't be SP
2494 const TargetRegisterClass *RC = &VE::I64RegClass;
2495 Register IReg = MRI.createVirtualRegister(RC);
2496 addFrameReference(BuildMI(DispatchBB, DL, TII->get(VE::LDLZXrii), IReg), FI,
2497 OffsetCS);
2498 if (LPadList.size() < 64) {
2499 BuildMI(DispatchBB, DL, TII->get(VE::BRCFLir_t))
2500 .addImm(VECC::CC_ILE)
2501 .addImm(LPadList.size())
2502 .addReg(IReg)
2503 .addMBB(TrapBB);
2504 } else {
2505 assert(LPadList.size() <= 0x7FFFFFFF && "Too large Landing Pad!");
2506 Register TmpReg = MRI.createVirtualRegister(RC);
2507 BuildMI(DispatchBB, DL, TII->get(VE::LEAzii), TmpReg)
2508 .addImm(0)
2509 .addImm(0)
2510 .addImm(LPadList.size());
2511 BuildMI(DispatchBB, DL, TII->get(VE::BRCFLrr_t))
2512 .addImm(VECC::CC_ILE)
2513 .addReg(TmpReg, getKillRegState(true))
2514 .addReg(IReg)
2515 .addMBB(TrapBB);
2518 Register BReg = MRI.createVirtualRegister(RC);
2519 Register Tmp1 = MRI.createVirtualRegister(RC);
2520 Register Tmp2 = MRI.createVirtualRegister(RC);
2522 if (isPositionIndependent()) {
2523 // Create following instructions for local linkage PIC code.
2524 // lea %Tmp1, .LJTI0_0@gotoff_lo
2525 // and %Tmp2, %Tmp1, (32)0
2526 // lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2527 BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2528 .addImm(0)
2529 .addImm(0)
2530 .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_LO32);
2531 BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2532 .addReg(Tmp1, getKillRegState(true))
2533 .addImm(M0(32));
2534 BuildMI(DispContBB, DL, TII->get(VE::LEASLrri), BReg)
2535 .addReg(VE::SX15)
2536 .addReg(Tmp2, getKillRegState(true))
2537 .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_HI32);
2538 } else {
2539 // Create following instructions for non-PIC code.
2540 // lea %Tmp1, .LJTI0_0@lo
2541 // and %Tmp2, %Tmp1, (32)0
2542 // lea.sl %BReg, .LJTI0_0@hi(%Tmp2)
2543 BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2544 .addImm(0)
2545 .addImm(0)
2546 .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_LO32);
2547 BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2548 .addReg(Tmp1, getKillRegState(true))
2549 .addImm(M0(32));
2550 BuildMI(DispContBB, DL, TII->get(VE::LEASLrii), BReg)
2551 .addReg(Tmp2, getKillRegState(true))
2552 .addImm(0)
2553 .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_HI32);
2556 switch (JTE) {
2557 case MachineJumpTableInfo::EK_BlockAddress: {
2558 // Generate simple block address code for no-PIC model.
2559 // sll %Tmp1, %IReg, 3
2560 // lds %TReg, 0(%Tmp1, %BReg)
2561 // bcfla %TReg
2563 Register TReg = MRI.createVirtualRegister(RC);
2564 Register Tmp1 = MRI.createVirtualRegister(RC);
2566 BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2567 .addReg(IReg, getKillRegState(true))
2568 .addImm(3);
2569 BuildMI(DispContBB, DL, TII->get(VE::LDrri), TReg)
2570 .addReg(BReg, getKillRegState(true))
2571 .addReg(Tmp1, getKillRegState(true))
2572 .addImm(0);
2573 BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2574 .addReg(TReg, getKillRegState(true))
2575 .addImm(0);
2576 break;
2578 case MachineJumpTableInfo::EK_Custom32: {
2579 // Generate block address code using differences from the function pointer
2580 // for PIC model.
2581 // sll %Tmp1, %IReg, 2
2582 // ldl.zx %OReg, 0(%Tmp1, %BReg)
2583 // Prepare function address in BReg2.
2584 // adds.l %TReg, %BReg2, %OReg
2585 // bcfla %TReg
2587 assert(isPositionIndependent());
2588 Register OReg = MRI.createVirtualRegister(RC);
2589 Register TReg = MRI.createVirtualRegister(RC);
2590 Register Tmp1 = MRI.createVirtualRegister(RC);
2592 BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2593 .addReg(IReg, getKillRegState(true))
2594 .addImm(2);
2595 BuildMI(DispContBB, DL, TII->get(VE::LDLZXrri), OReg)
2596 .addReg(BReg, getKillRegState(true))
2597 .addReg(Tmp1, getKillRegState(true))
2598 .addImm(0);
2599 Register BReg2 =
2600 prepareSymbol(*DispContBB, DispContBB->end(),
2601 DispContBB->getParent()->getName(), DL, /* Local */ true);
2602 BuildMI(DispContBB, DL, TII->get(VE::ADDSLrr), TReg)
2603 .addReg(OReg, getKillRegState(true))
2604 .addReg(BReg2, getKillRegState(true));
2605 BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2606 .addReg(TReg, getKillRegState(true))
2607 .addImm(0);
2608 break;
2610 default:
2611 llvm_unreachable("Unexpected jump table encoding");
2614 // Add the jump table entries as successors to the MBB.
2615 SmallPtrSet<MachineBasicBlock *, 8> SeenMBBs;
2616 for (auto &LP : LPadList)
2617 if (SeenMBBs.insert(LP).second)
2618 DispContBB->addSuccessor(LP);
2620 // N.B. the order the invoke BBs are processed in doesn't matter here.
2621 SmallVector<MachineBasicBlock *, 64> MBBLPads;
2622 const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
2623 for (MachineBasicBlock *MBB : InvokeBBs) {
2624 // Remove the landing pad successor from the invoke block and replace it
2625 // with the new dispatch block.
2626 // Keep a copy of Successors since it's modified inside the loop.
2627 SmallVector<MachineBasicBlock *, 8> Successors(MBB->succ_rbegin(),
2628 MBB->succ_rend());
2629 // FIXME: Avoid quadratic complexity.
2630 for (auto *MBBS : Successors) {
2631 if (MBBS->isEHPad()) {
2632 MBB->removeSuccessor(MBBS);
2633 MBBLPads.push_back(MBBS);
2637 MBB->addSuccessor(DispatchBB);
2639 // Find the invoke call and mark all of the callee-saved registers as
2640 // 'implicit defined' so that they're spilled. This prevents code from
2641 // moving instructions to before the EH block, where they will never be
2642 // executed.
2643 for (auto &II : reverse(*MBB)) {
2644 if (!II.isCall())
2645 continue;
2647 DenseMap<Register, bool> DefRegs;
2648 for (auto &MOp : II.operands())
2649 if (MOp.isReg())
2650 DefRegs[MOp.getReg()] = true;
2652 MachineInstrBuilder MIB(*MF, &II);
2653 for (unsigned RI = 0; SavedRegs[RI]; ++RI) {
2654 Register Reg = SavedRegs[RI];
2655 if (!DefRegs[Reg])
2656 MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
2659 break;
2663 // Mark all former landing pads as non-landing pads. The dispatch is the only
2664 // landing pad now.
2665 for (auto &LP : MBBLPads)
2666 LP->setIsEHPad(false);
2668 // The instruction is gone now.
2669 MI.eraseFromParent();
2670 return BB;
2673 MachineBasicBlock *
2674 VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2675 MachineBasicBlock *BB) const {
2676 switch (MI.getOpcode()) {
2677 default:
2678 llvm_unreachable("Unknown Custom Instruction!");
2679 case VE::EH_SjLj_LongJmp:
2680 return emitEHSjLjLongJmp(MI, BB);
2681 case VE::EH_SjLj_SetJmp:
2682 return emitEHSjLjSetJmp(MI, BB);
2683 case VE::EH_SjLj_Setup_Dispatch:
2684 return emitSjLjDispatchBlock(MI, BB);
2688 static bool isSimm7(SDValue V) {
2689 EVT VT = V.getValueType();
2690 if (VT.isVector())
2691 return false;
2693 if (VT.isInteger()) {
2694 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(V))
2695 return isInt<7>(C->getSExtValue());
2696 } else if (VT.isFloatingPoint()) {
2697 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(V)) {
2698 if (VT == MVT::f32 || VT == MVT::f64) {
2699 const APInt &Imm = C->getValueAPF().bitcastToAPInt();
2700 uint64_t Val = Imm.getSExtValue();
2701 if (Imm.getBitWidth() == 32)
2702 Val <<= 32; // Immediate value of float place at higher bits on VE.
2703 return isInt<7>(Val);
2707 return false;
2710 static bool isMImm(SDValue V) {
2711 EVT VT = V.getValueType();
2712 if (VT.isVector())
2713 return false;
2715 if (VT.isInteger()) {
2716 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(V))
2717 return isMImmVal(getImmVal(C));
2718 } else if (VT.isFloatingPoint()) {
2719 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(V)) {
2720 if (VT == MVT::f32) {
2721 // Float value places at higher bits, so ignore lower 32 bits.
2722 return isMImm32Val(getFpImmVal(C) >> 32);
2723 } else if (VT == MVT::f64) {
2724 return isMImmVal(getFpImmVal(C));
2728 return false;
2731 static unsigned decideComp(EVT SrcVT, ISD::CondCode CC) {
2732 if (SrcVT.isFloatingPoint()) {
2733 if (SrcVT == MVT::f128)
2734 return VEISD::CMPQ;
2735 return VEISD::CMPF;
2737 return isSignedIntSetCC(CC) ? VEISD::CMPI : VEISD::CMPU;
2740 static EVT decideCompType(EVT SrcVT) {
2741 if (SrcVT == MVT::f128)
2742 return MVT::f64;
2743 return SrcVT;
2746 static bool safeWithoutCompWithNull(EVT SrcVT, ISD::CondCode CC,
2747 bool WithCMov) {
2748 if (SrcVT.isFloatingPoint()) {
2749 // For the case of floating point setcc, only unordered comparison
2750 // or general comparison with -enable-no-nans-fp-math option reach
2751 // here, so it is safe even if values are NaN. Only f128 doesn't
2752 // safe since VE uses f64 result of f128 comparison.
2753 return SrcVT != MVT::f128;
2755 if (isIntEqualitySetCC(CC)) {
2756 // For the case of equal or not equal, it is safe without comparison with 0.
2757 return true;
2759 if (WithCMov) {
2760 // For the case of integer setcc with cmov, all signed comparison with 0
2761 // are safe.
2762 return isSignedIntSetCC(CC);
2764 // For the case of integer setcc, only signed 64 bits comparison is safe.
2765 // For unsigned, "CMPU 0x80000000, 0" has to be greater than 0, but it becomes
2766 // less than 0 witout CMPU. For 32 bits, other half of 32 bits are
2767 // uncoditional, so it is not safe too without CMPI..
2768 return isSignedIntSetCC(CC) && SrcVT == MVT::i64;
2771 static SDValue generateComparison(EVT VT, SDValue LHS, SDValue RHS,
2772 ISD::CondCode CC, bool WithCMov,
2773 const SDLoc &DL, SelectionDAG &DAG) {
2774 // Compare values. If RHS is 0 and it is safe to calculate without
2775 // comparison, we don't generate an instruction for comparison.
2776 EVT CompVT = decideCompType(VT);
2777 if (CompVT == VT && safeWithoutCompWithNull(VT, CC, WithCMov) &&
2778 (isNullConstant(RHS) || isNullFPConstant(RHS))) {
2779 return LHS;
2781 return DAG.getNode(decideComp(VT, CC), DL, CompVT, LHS, RHS);
2784 SDValue VETargetLowering::combineSelect(SDNode *N,
2785 DAGCombinerInfo &DCI) const {
2786 assert(N->getOpcode() == ISD::SELECT &&
2787 "Should be called with a SELECT node");
2788 ISD::CondCode CC = ISD::CondCode::SETNE;
2789 SDValue Cond = N->getOperand(0);
2790 SDValue True = N->getOperand(1);
2791 SDValue False = N->getOperand(2);
2793 // We handle only scalar SELECT.
2794 EVT VT = N->getValueType(0);
2795 if (VT.isVector())
2796 return SDValue();
2798 // Peform combineSelect after leagalize DAG.
2799 if (!DCI.isAfterLegalizeDAG())
2800 return SDValue();
2802 EVT VT0 = Cond.getValueType();
2803 if (isMImm(True)) {
2804 // VE's condition move can handle MImm in True clause, so nothing to do.
2805 } else if (isMImm(False)) {
2806 // VE's condition move can handle MImm in True clause, so swap True and
2807 // False clauses if False has MImm value. And, update condition code.
2808 std::swap(True, False);
2809 CC = getSetCCInverse(CC, VT0);
2812 SDLoc DL(N);
2813 SelectionDAG &DAG = DCI.DAG;
2814 VECC::CondCode VECCVal;
2815 if (VT0.isFloatingPoint()) {
2816 VECCVal = fpCondCode2Fcc(CC);
2817 } else {
2818 VECCVal = intCondCode2Icc(CC);
2820 SDValue Ops[] = {Cond, True, False,
2821 DAG.getConstant(VECCVal, DL, MVT::i32)};
2822 return DAG.getNode(VEISD::CMOV, DL, VT, Ops);
2825 SDValue VETargetLowering::combineSelectCC(SDNode *N,
2826 DAGCombinerInfo &DCI) const {
2827 assert(N->getOpcode() == ISD::SELECT_CC &&
2828 "Should be called with a SELECT_CC node");
2829 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
2830 SDValue LHS = N->getOperand(0);
2831 SDValue RHS = N->getOperand(1);
2832 SDValue True = N->getOperand(2);
2833 SDValue False = N->getOperand(3);
2835 // We handle only scalar SELECT_CC.
2836 EVT VT = N->getValueType(0);
2837 if (VT.isVector())
2838 return SDValue();
2840 // Peform combineSelectCC after leagalize DAG.
2841 if (!DCI.isAfterLegalizeDAG())
2842 return SDValue();
2844 // We handle only i32/i64/f32/f64/f128 comparisons.
2845 EVT LHSVT = LHS.getValueType();
2846 assert(LHSVT == RHS.getValueType());
2847 switch (LHSVT.getSimpleVT().SimpleTy) {
2848 case MVT::i32:
2849 case MVT::i64:
2850 case MVT::f32:
2851 case MVT::f64:
2852 case MVT::f128:
2853 break;
2854 default:
2855 // Return SDValue to let llvm handle other types.
2856 return SDValue();
2859 if (isMImm(RHS)) {
2860 // VE's comparison can handle MImm in RHS, so nothing to do.
2861 } else if (isSimm7(RHS)) {
2862 // VE's comparison can handle Simm7 in LHS, so swap LHS and RHS, and
2863 // update condition code.
2864 std::swap(LHS, RHS);
2865 CC = getSetCCSwappedOperands(CC);
2867 if (isMImm(True)) {
2868 // VE's condition move can handle MImm in True clause, so nothing to do.
2869 } else if (isMImm(False)) {
2870 // VE's condition move can handle MImm in True clause, so swap True and
2871 // False clauses if False has MImm value. And, update condition code.
2872 std::swap(True, False);
2873 CC = getSetCCInverse(CC, LHSVT);
2876 SDLoc DL(N);
2877 SelectionDAG &DAG = DCI.DAG;
2879 bool WithCMov = true;
2880 SDValue CompNode = generateComparison(LHSVT, LHS, RHS, CC, WithCMov, DL, DAG);
2882 VECC::CondCode VECCVal;
2883 if (LHSVT.isFloatingPoint()) {
2884 VECCVal = fpCondCode2Fcc(CC);
2885 } else {
2886 VECCVal = intCondCode2Icc(CC);
2888 SDValue Ops[] = {CompNode, True, False,
2889 DAG.getConstant(VECCVal, DL, MVT::i32)};
2890 return DAG.getNode(VEISD::CMOV, DL, VT, Ops);
2893 static bool isI32InsnAllUses(const SDNode *User, const SDNode *N);
2894 static bool isI32Insn(const SDNode *User, const SDNode *N) {
2895 switch (User->getOpcode()) {
2896 default:
2897 return false;
2898 case ISD::ADD:
2899 case ISD::SUB:
2900 case ISD::MUL:
2901 case ISD::SDIV:
2902 case ISD::UDIV:
2903 case ISD::SETCC:
2904 case ISD::SMIN:
2905 case ISD::SMAX:
2906 case ISD::SHL:
2907 case ISD::SRA:
2908 case ISD::BSWAP:
2909 case ISD::SINT_TO_FP:
2910 case ISD::UINT_TO_FP:
2911 case ISD::BR_CC:
2912 case ISD::BITCAST:
2913 case ISD::ATOMIC_CMP_SWAP:
2914 case ISD::ATOMIC_SWAP:
2915 case VEISD::CMPU:
2916 case VEISD::CMPI:
2917 return true;
2918 case ISD::SRL:
2919 if (N->getOperand(0).getOpcode() != ISD::SRL)
2920 return true;
2921 // (srl (trunc (srl ...))) may be optimized by combining srl, so
2922 // doesn't optimize trunc now.
2923 return false;
2924 case ISD::SELECT_CC:
2925 if (User->getOperand(2).getNode() != N &&
2926 User->getOperand(3).getNode() != N)
2927 return true;
2928 return isI32InsnAllUses(User, N);
2929 case VEISD::CMOV:
2930 // CMOV in (cmov (trunc ...), true, false, int-comparison) is safe.
2931 // However, trunc in true or false clauses is not safe.
2932 if (User->getOperand(1).getNode() != N &&
2933 User->getOperand(2).getNode() != N &&
2934 isa<ConstantSDNode>(User->getOperand(3))) {
2935 VECC::CondCode VECCVal =
2936 static_cast<VECC::CondCode>(User->getConstantOperandVal(3));
2937 return isIntVECondCode(VECCVal);
2939 [[fallthrough]];
2940 case ISD::AND:
2941 case ISD::OR:
2942 case ISD::XOR:
2943 case ISD::SELECT:
2944 case ISD::CopyToReg:
2945 // Check all use of selections, bit operations, and copies. If all of them
2946 // are safe, optimize truncate to extract_subreg.
2947 return isI32InsnAllUses(User, N);
2951 static bool isI32InsnAllUses(const SDNode *User, const SDNode *N) {
2952 // Check all use of User node. If all of them are safe, optimize
2953 // truncate to extract_subreg.
2954 for (const SDNode *U : User->users()) {
2955 switch (U->getOpcode()) {
2956 default:
2957 // If the use is an instruction which treats the source operand as i32,
2958 // it is safe to avoid truncate here.
2959 if (isI32Insn(U, N))
2960 continue;
2961 break;
2962 case ISD::ANY_EXTEND:
2963 case ISD::SIGN_EXTEND:
2964 case ISD::ZERO_EXTEND: {
2965 // Special optimizations to the combination of ext and trunc.
2966 // (ext ... (select ... (trunc ...))) is safe to avoid truncate here
2967 // since this truncate instruction clears higher 32 bits which is filled
2968 // by one of ext instructions later.
2969 assert(N->getValueType(0) == MVT::i32 &&
2970 "find truncate to not i32 integer");
2971 if (User->getOpcode() == ISD::SELECT_CC ||
2972 User->getOpcode() == ISD::SELECT || User->getOpcode() == VEISD::CMOV)
2973 continue;
2974 break;
2977 return false;
2979 return true;
2982 // Optimize TRUNCATE in DAG combining. Optimizing it in CUSTOM lower is
2983 // sometime too early. Optimizing it in DAG pattern matching in VEInstrInfo.td
2984 // is sometime too late. So, doing it at here.
2985 SDValue VETargetLowering::combineTRUNCATE(SDNode *N,
2986 DAGCombinerInfo &DCI) const {
2987 assert(N->getOpcode() == ISD::TRUNCATE &&
2988 "Should be called with a TRUNCATE node");
2990 SelectionDAG &DAG = DCI.DAG;
2991 SDLoc DL(N);
2992 EVT VT = N->getValueType(0);
2994 // We prefer to do this when all types are legal.
2995 if (!DCI.isAfterLegalizeDAG())
2996 return SDValue();
2998 // Skip combine TRUNCATE atm if the operand of TRUNCATE might be a constant.
2999 if (N->getOperand(0)->getOpcode() == ISD::SELECT_CC &&
3000 isa<ConstantSDNode>(N->getOperand(0)->getOperand(0)) &&
3001 isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
3002 return SDValue();
3004 // Check all use of this TRUNCATE.
3005 for (const SDNode *User : N->users()) {
3006 // Make sure that we're not going to replace TRUNCATE for non i32
3007 // instructions.
3009 // FIXME: Although we could sometimes handle this, and it does occur in
3010 // practice that one of the condition inputs to the select is also one of
3011 // the outputs, we currently can't deal with this.
3012 if (isI32Insn(User, N))
3013 continue;
3015 return SDValue();
3018 SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
3019 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT,
3020 N->getOperand(0), SubI32),
3024 SDValue VETargetLowering::PerformDAGCombine(SDNode *N,
3025 DAGCombinerInfo &DCI) const {
3026 switch (N->getOpcode()) {
3027 default:
3028 break;
3029 case ISD::SELECT:
3030 return combineSelect(N, DCI);
3031 case ISD::SELECT_CC:
3032 return combineSelectCC(N, DCI);
3033 case ISD::TRUNCATE:
3034 return combineTRUNCATE(N, DCI);
3037 return SDValue();
3040 //===----------------------------------------------------------------------===//
3041 // VE Inline Assembly Support
3042 //===----------------------------------------------------------------------===//
3044 VETargetLowering::ConstraintType
3045 VETargetLowering::getConstraintType(StringRef Constraint) const {
3046 if (Constraint.size() == 1) {
3047 switch (Constraint[0]) {
3048 default:
3049 break;
3050 case 'v': // vector registers
3051 return C_RegisterClass;
3054 return TargetLowering::getConstraintType(Constraint);
3057 std::pair<unsigned, const TargetRegisterClass *>
3058 VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
3059 StringRef Constraint,
3060 MVT VT) const {
3061 const TargetRegisterClass *RC = nullptr;
3062 if (Constraint.size() == 1) {
3063 switch (Constraint[0]) {
3064 default:
3065 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3066 case 'r':
3067 RC = &VE::I64RegClass;
3068 break;
3069 case 'v':
3070 RC = &VE::V64RegClass;
3071 break;
3073 return std::make_pair(0U, RC);
3076 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3079 //===----------------------------------------------------------------------===//
3080 // VE Target Optimization Support
3081 //===----------------------------------------------------------------------===//
3083 unsigned VETargetLowering::getMinimumJumpTableEntries() const {
3084 // Specify 8 for PIC model to relieve the impact of PIC load instructions.
3085 if (isJumpTableRelative())
3086 return 8;
3088 return TargetLowering::getMinimumJumpTableEntries();
3091 bool VETargetLowering::hasAndNot(SDValue Y) const {
3092 EVT VT = Y.getValueType();
3094 // VE doesn't have vector and not instruction.
3095 if (VT.isVector())
3096 return false;
3098 // VE allows different immediate values for X and Y where ~X & Y.
3099 // Only simm7 works for X, and only mimm works for Y on VE. However, this
3100 // function is used to check whether an immediate value is OK for and-not
3101 // instruction as both X and Y. Generating additional instruction to
3102 // retrieve an immediate value is no good since the purpose of this
3103 // function is to convert a series of 3 instructions to another series of
3104 // 3 instructions with better parallelism. Therefore, we return false
3105 // for all immediate values now.
3106 // FIXME: Change hasAndNot function to have two operands to make it work
3107 // correctly with Aurora VE.
3108 if (isa<ConstantSDNode>(Y))
3109 return false;
3111 // It's ok for generic registers.
3112 return true;
3115 SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3116 SelectionDAG &DAG) const {
3117 assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
3118 MVT VT = Op.getOperand(0).getSimpleValueType();
3120 // Special treatment for packed V64 types.
3121 assert(VT == MVT::v512i32 || VT == MVT::v512f32);
3122 (void)VT;
3123 // Example of codes:
3124 // %packed_v = extractelt %vr, %idx / 2
3125 // %v = %packed_v >> (%idx % 2 * 32)
3126 // %res = %v & 0xffffffff
3128 SDValue Vec = Op.getOperand(0);
3129 SDValue Idx = Op.getOperand(1);
3130 SDLoc DL(Op);
3131 SDValue Result = Op;
3132 if (false /* Idx->isConstant() */) {
3133 // TODO: optimized implementation using constant values
3134 } else {
3135 SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
3136 SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
3137 SDValue PackedElt =
3138 SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
3139 SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
3140 SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
3141 SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
3142 Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
3143 PackedElt = DAG.getNode(ISD::SRL, DL, MVT::i64, {PackedElt, Shift});
3144 SDValue Mask = DAG.getConstant(0xFFFFFFFFL, DL, MVT::i64);
3145 PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
3146 SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
3147 Result = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
3148 MVT::i32, PackedElt, SubI32),
3151 if (Op.getSimpleValueType() == MVT::f32) {
3152 Result = DAG.getBitcast(MVT::f32, Result);
3153 } else {
3154 assert(Op.getSimpleValueType() == MVT::i32);
3157 return Result;
3160 SDValue VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3161 SelectionDAG &DAG) const {
3162 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
3163 MVT VT = Op.getOperand(0).getSimpleValueType();
3165 // Special treatment for packed V64 types.
3166 assert(VT == MVT::v512i32 || VT == MVT::v512f32);
3167 (void)VT;
3168 // The v512i32 and v512f32 starts from upper bits (0..31). This "upper
3169 // bits" required `val << 32` from C implementation's point of view.
3171 // Example of codes:
3172 // %packed_elt = extractelt %vr, (%idx >> 1)
3173 // %shift = ((%idx & 1) ^ 1) << 5
3174 // %packed_elt &= 0xffffffff00000000 >> shift
3175 // %packed_elt |= (zext %val) << shift
3176 // %vr = insertelt %vr, %packed_elt, (%idx >> 1)
3178 SDLoc DL(Op);
3179 SDValue Vec = Op.getOperand(0);
3180 SDValue Val = Op.getOperand(1);
3181 SDValue Idx = Op.getOperand(2);
3182 if (Idx.getSimpleValueType() == MVT::i32)
3183 Idx = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Idx);
3184 if (Val.getSimpleValueType() == MVT::f32)
3185 Val = DAG.getBitcast(MVT::i32, Val);
3186 assert(Val.getSimpleValueType() == MVT::i32);
3187 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
3189 SDValue Result = Op;
3190 if (false /* Idx->isConstant()*/) {
3191 // TODO: optimized implementation using constant values
3192 } else {
3193 SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
3194 SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
3195 SDValue PackedElt =
3196 SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
3197 SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
3198 SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
3199 SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
3200 Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
3201 SDValue Mask = DAG.getConstant(0xFFFFFFFF00000000L, DL, MVT::i64);
3202 Mask = DAG.getNode(ISD::SRL, DL, MVT::i64, {Mask, Shift});
3203 PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
3204 Val = DAG.getNode(ISD::SHL, DL, MVT::i64, {Val, Shift});
3205 PackedElt = DAG.getNode(ISD::OR, DL, MVT::i64, {PackedElt, Val});
3206 Result =
3207 SDValue(DAG.getMachineNode(VE::LSVrr_v, DL, Vec.getSimpleValueType(),
3208 {HalfIdx, PackedElt, Vec}),
3211 return Result;