[ORC] Add std::tuple support to SimplePackedSerialization.
[llvm-project.git] / llvm / lib / Target / VE / VEISelLowering.cpp
blobb297e0fcd1a2382ce6e8f5c2d738266694d5c36e
1 //===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the interfaces that VE uses to lower LLVM code into a
10 // selection DAG.
12 //===----------------------------------------------------------------------===//
14 #include "VEISelLowering.h"
15 #include "MCTargetDesc/VEMCExpr.h"
16 #include "VEInstrBuilder.h"
17 #include "VEMachineFunctionInfo.h"
18 #include "VERegisterInfo.h"
19 #include "VETargetMachine.h"
20 #include "llvm/ADT/StringSwitch.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineJumpTableInfo.h"
26 #include "llvm/CodeGen/MachineModuleInfo.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/SelectionDAG.h"
29 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
30 #include "llvm/IR/DerivedTypes.h"
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/IRBuilder.h"
33 #include "llvm/IR/Module.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/KnownBits.h"
36 using namespace llvm;
38 #define DEBUG_TYPE "ve-lower"
40 //===----------------------------------------------------------------------===//
41 // Calling Convention Implementation
42 //===----------------------------------------------------------------------===//
44 #include "VEGenCallingConv.inc"
46 CCAssignFn *getReturnCC(CallingConv::ID CallConv) {
47 switch (CallConv) {
48 default:
49 return RetCC_VE_C;
50 case CallingConv::Fast:
51 return RetCC_VE_Fast;
55 CCAssignFn *getParamCC(CallingConv::ID CallConv, bool IsVarArg) {
56 if (IsVarArg)
57 return CC_VE2;
58 switch (CallConv) {
59 default:
60 return CC_VE_C;
61 case CallingConv::Fast:
62 return CC_VE_Fast;
66 bool VETargetLowering::CanLowerReturn(
67 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
68 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
69 CCAssignFn *RetCC = getReturnCC(CallConv);
70 SmallVector<CCValAssign, 16> RVLocs;
71 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
72 return CCInfo.CheckReturn(Outs, RetCC);
75 static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64,
76 MVT::v256f32, MVT::v512f32, MVT::v256f64};
78 static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32};
80 void VETargetLowering::initRegisterClasses() {
81 // Set up the register classes.
82 addRegisterClass(MVT::i32, &VE::I32RegClass);
83 addRegisterClass(MVT::i64, &VE::I64RegClass);
84 addRegisterClass(MVT::f32, &VE::F32RegClass);
85 addRegisterClass(MVT::f64, &VE::I64RegClass);
86 addRegisterClass(MVT::f128, &VE::F128RegClass);
88 if (Subtarget->enableVPU()) {
89 for (MVT VecVT : AllVectorVTs)
90 addRegisterClass(VecVT, &VE::V64RegClass);
91 addRegisterClass(MVT::v256i1, &VE::VMRegClass);
92 addRegisterClass(MVT::v512i1, &VE::VM512RegClass);
96 void VETargetLowering::initSPUActions() {
97 const auto &TM = getTargetMachine();
98 /// Load & Store {
100 // VE doesn't have i1 sign extending load.
101 for (MVT VT : MVT::integer_valuetypes()) {
102 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
103 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
104 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
105 setTruncStoreAction(VT, MVT::i1, Expand);
108 // VE doesn't have floating point extload/truncstore, so expand them.
109 for (MVT FPVT : MVT::fp_valuetypes()) {
110 for (MVT OtherFPVT : MVT::fp_valuetypes()) {
111 setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);
112 setTruncStoreAction(FPVT, OtherFPVT, Expand);
116 // VE doesn't have fp128 load/store, so expand them in custom lower.
117 setOperationAction(ISD::LOAD, MVT::f128, Custom);
118 setOperationAction(ISD::STORE, MVT::f128, Custom);
120 /// } Load & Store
122 // Custom legalize address nodes into LO/HI parts.
123 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
124 setOperationAction(ISD::BlockAddress, PtrVT, Custom);
125 setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
126 setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
127 setOperationAction(ISD::ConstantPool, PtrVT, Custom);
128 setOperationAction(ISD::JumpTable, PtrVT, Custom);
130 /// VAARG handling {
131 setOperationAction(ISD::VASTART, MVT::Other, Custom);
132 // VAARG needs to be lowered to access with 8 bytes alignment.
133 setOperationAction(ISD::VAARG, MVT::Other, Custom);
134 // Use the default implementation.
135 setOperationAction(ISD::VACOPY, MVT::Other, Expand);
136 setOperationAction(ISD::VAEND, MVT::Other, Expand);
137 /// } VAARG handling
139 /// Stack {
140 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
141 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
143 // Use the default implementation.
144 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
145 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
146 /// } Stack
148 /// Branch {
150 // VE doesn't have BRCOND
151 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
153 // BR_JT is not implemented yet.
154 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
156 /// } Branch
158 /// Int Ops {
159 for (MVT IntVT : {MVT::i32, MVT::i64}) {
160 // VE has no REM or DIVREM operations.
161 setOperationAction(ISD::UREM, IntVT, Expand);
162 setOperationAction(ISD::SREM, IntVT, Expand);
163 setOperationAction(ISD::SDIVREM, IntVT, Expand);
164 setOperationAction(ISD::UDIVREM, IntVT, Expand);
166 // VE has no SHL_PARTS/SRA_PARTS/SRL_PARTS operations.
167 setOperationAction(ISD::SHL_PARTS, IntVT, Expand);
168 setOperationAction(ISD::SRA_PARTS, IntVT, Expand);
169 setOperationAction(ISD::SRL_PARTS, IntVT, Expand);
171 // VE has no MULHU/S or U/SMUL_LOHI operations.
172 // TODO: Use MPD instruction to implement SMUL_LOHI for i32 type.
173 setOperationAction(ISD::MULHU, IntVT, Expand);
174 setOperationAction(ISD::MULHS, IntVT, Expand);
175 setOperationAction(ISD::UMUL_LOHI, IntVT, Expand);
176 setOperationAction(ISD::SMUL_LOHI, IntVT, Expand);
178 // VE has no CTTZ, ROTL, ROTR operations.
179 setOperationAction(ISD::CTTZ, IntVT, Expand);
180 setOperationAction(ISD::ROTL, IntVT, Expand);
181 setOperationAction(ISD::ROTR, IntVT, Expand);
183 // VE has 64 bits instruction which works as i64 BSWAP operation. This
184 // instruction works fine as i32 BSWAP operation with an additional
185 // parameter. Use isel patterns to lower BSWAP.
186 setOperationAction(ISD::BSWAP, IntVT, Legal);
188 // VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP
189 // operations. Use isel patterns for i64, promote for i32.
190 LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
191 setOperationAction(ISD::BITREVERSE, IntVT, Act);
192 setOperationAction(ISD::CTLZ, IntVT, Act);
193 setOperationAction(ISD::CTLZ_ZERO_UNDEF, IntVT, Act);
194 setOperationAction(ISD::CTPOP, IntVT, Act);
196 // VE has only 64 bits instructions which work as i64 AND/OR/XOR operations.
197 // Use isel patterns for i64, promote for i32.
198 setOperationAction(ISD::AND, IntVT, Act);
199 setOperationAction(ISD::OR, IntVT, Act);
200 setOperationAction(ISD::XOR, IntVT, Act);
202 /// } Int Ops
204 /// Conversion {
205 // VE doesn't have instructions for fp<->uint, so expand them by llvm
206 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64
207 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64
208 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
209 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
211 // fp16 not supported
212 for (MVT FPVT : MVT::fp_valuetypes()) {
213 setOperationAction(ISD::FP16_TO_FP, FPVT, Expand);
214 setOperationAction(ISD::FP_TO_FP16, FPVT, Expand);
216 /// } Conversion
218 /// Floating-point Ops {
219 /// Note: Floating-point operations are fneg, fadd, fsub, fmul, fdiv, frem,
220 /// and fcmp.
222 // VE doesn't have following floating point operations.
223 for (MVT VT : MVT::fp_valuetypes()) {
224 setOperationAction(ISD::FNEG, VT, Expand);
225 setOperationAction(ISD::FREM, VT, Expand);
228 // VE doesn't have fdiv of f128.
229 setOperationAction(ISD::FDIV, MVT::f128, Expand);
231 for (MVT FPVT : {MVT::f32, MVT::f64}) {
232 // f32 and f64 uses ConstantFP. f128 uses ConstantPool.
233 setOperationAction(ISD::ConstantFP, FPVT, Legal);
235 /// } Floating-point Ops
237 /// Floating-point math functions {
239 // VE doesn't have following floating point math functions.
240 for (MVT VT : MVT::fp_valuetypes()) {
241 setOperationAction(ISD::FABS, VT, Expand);
242 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
243 setOperationAction(ISD::FCOS, VT, Expand);
244 setOperationAction(ISD::FSIN, VT, Expand);
245 setOperationAction(ISD::FSQRT, VT, Expand);
248 /// } Floating-point math functions
250 /// Atomic instructions {
252 setMaxAtomicSizeInBitsSupported(64);
253 setMinCmpXchgSizeInBits(32);
254 setSupportsUnalignedAtomics(false);
256 // Use custom inserter for ATOMIC_FENCE.
257 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
259 // Other atomic instructions.
260 for (MVT VT : MVT::integer_valuetypes()) {
261 // Support i8/i16 atomic swap.
262 setOperationAction(ISD::ATOMIC_SWAP, VT, Custom);
264 // FIXME: Support "atmam" instructions.
265 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Expand);
266 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Expand);
267 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Expand);
268 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Expand);
270 // VE doesn't have follwing instructions.
271 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand);
272 setOperationAction(ISD::ATOMIC_LOAD_CLR, VT, Expand);
273 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Expand);
274 setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand);
275 setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand);
276 setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand);
277 setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand);
278 setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand);
281 /// } Atomic instructions
283 /// SJLJ instructions {
284 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
285 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
286 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
287 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
288 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
289 /// } SJLJ instructions
291 // Intrinsic instructions
292 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
295 void VETargetLowering::initVPUActions() {
296 for (MVT LegalVecVT : AllVectorVTs) {
297 setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom);
298 setOperationAction(ISD::INSERT_VECTOR_ELT, LegalVecVT, Legal);
299 setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalVecVT, Legal);
300 // Translate all vector instructions with legal element types to VVP_*
301 // nodes.
302 // TODO We will custom-widen into VVP_* nodes in the future. While we are
303 // buildling the infrastructure for this, we only do this for legal vector
304 // VTs.
305 #define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME) \
306 setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);
307 #define ADD_VVP_OP(VVP_NAME, ISD_NAME) \
308 setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
309 #include "VVPNodes.def"
312 for (MVT LegalPackedVT : AllPackedVTs) {
313 setOperationAction(ISD::INSERT_VECTOR_ELT, LegalPackedVT, Custom);
314 setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalPackedVT, Custom);
318 SDValue
319 VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
320 bool IsVarArg,
321 const SmallVectorImpl<ISD::OutputArg> &Outs,
322 const SmallVectorImpl<SDValue> &OutVals,
323 const SDLoc &DL, SelectionDAG &DAG) const {
324 // CCValAssign - represent the assignment of the return value to locations.
325 SmallVector<CCValAssign, 16> RVLocs;
327 // CCState - Info about the registers and stack slot.
328 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
329 *DAG.getContext());
331 // Analyze return values.
332 CCInfo.AnalyzeReturn(Outs, getReturnCC(CallConv));
334 SDValue Flag;
335 SmallVector<SDValue, 4> RetOps(1, Chain);
337 // Copy the result values into the output registers.
338 for (unsigned i = 0; i != RVLocs.size(); ++i) {
339 CCValAssign &VA = RVLocs[i];
340 assert(VA.isRegLoc() && "Can only return in registers!");
341 assert(!VA.needsCustom() && "Unexpected custom lowering");
342 SDValue OutVal = OutVals[i];
344 // Integer return values must be sign or zero extended by the callee.
345 switch (VA.getLocInfo()) {
346 case CCValAssign::Full:
347 break;
348 case CCValAssign::SExt:
349 OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);
350 break;
351 case CCValAssign::ZExt:
352 OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);
353 break;
354 case CCValAssign::AExt:
355 OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
356 break;
357 case CCValAssign::BCvt: {
358 // Convert a float return value to i64 with padding.
359 // 63 31 0
360 // +------+------+
361 // | float| 0 |
362 // +------+------+
363 assert(VA.getLocVT() == MVT::i64);
364 assert(VA.getValVT() == MVT::f32);
365 SDValue Undef = SDValue(
366 DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
367 SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
368 OutVal = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
369 MVT::i64, Undef, OutVal, Sub_f32),
371 break;
373 default:
374 llvm_unreachable("Unknown loc info!");
377 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag);
379 // Guarantee that all emitted copies are stuck together with flags.
380 Flag = Chain.getValue(1);
381 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
384 RetOps[0] = Chain; // Update chain.
386 // Add the flag if we have it.
387 if (Flag.getNode())
388 RetOps.push_back(Flag);
390 return DAG.getNode(VEISD::RET_FLAG, DL, MVT::Other, RetOps);
393 SDValue VETargetLowering::LowerFormalArguments(
394 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
395 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
396 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
397 MachineFunction &MF = DAG.getMachineFunction();
399 // Get the base offset of the incoming arguments stack space.
400 unsigned ArgsBaseOffset = Subtarget->getRsaSize();
401 // Get the size of the preserved arguments area
402 unsigned ArgsPreserved = 64;
404 // Analyze arguments according to CC_VE.
405 SmallVector<CCValAssign, 16> ArgLocs;
406 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
407 *DAG.getContext());
408 // Allocate the preserved area first.
409 CCInfo.AllocateStack(ArgsPreserved, Align(8));
410 // We already allocated the preserved area, so the stack offset computed
411 // by CC_VE would be correct now.
412 CCInfo.AnalyzeFormalArguments(Ins, getParamCC(CallConv, false));
414 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
415 CCValAssign &VA = ArgLocs[i];
416 assert(!VA.needsCustom() && "Unexpected custom lowering");
417 if (VA.isRegLoc()) {
418 // This argument is passed in a register.
419 // All integer register arguments are promoted by the caller to i64.
421 // Create a virtual register for the promoted live-in value.
422 unsigned VReg =
423 MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT()));
424 SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
426 // The caller promoted the argument, so insert an Assert?ext SDNode so we
427 // won't promote the value again in this function.
428 switch (VA.getLocInfo()) {
429 case CCValAssign::SExt:
430 Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg,
431 DAG.getValueType(VA.getValVT()));
432 break;
433 case CCValAssign::ZExt:
434 Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,
435 DAG.getValueType(VA.getValVT()));
436 break;
437 case CCValAssign::BCvt: {
438 // Extract a float argument from i64 with padding.
439 // 63 31 0
440 // +------+------+
441 // | float| 0 |
442 // +------+------+
443 assert(VA.getLocVT() == MVT::i64);
444 assert(VA.getValVT() == MVT::f32);
445 SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
446 Arg = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
447 MVT::f32, Arg, Sub_f32),
449 break;
451 default:
452 break;
455 // Truncate the register down to the argument type.
456 if (VA.isExtInLoc())
457 Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
459 InVals.push_back(Arg);
460 continue;
463 // The registers are exhausted. This argument was passed on the stack.
464 assert(VA.isMemLoc());
465 // The CC_VE_Full/Half functions compute stack offsets relative to the
466 // beginning of the arguments area at %fp + the size of reserved area.
467 unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
468 unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
470 // Adjust offset for a float argument by adding 4 since the argument is
471 // stored in 8 bytes buffer with offset like below. LLVM generates
472 // 4 bytes load instruction, so need to adjust offset here. This
473 // adjustment is required in only LowerFormalArguments. In LowerCall,
474 // a float argument is converted to i64 first, and stored as 8 bytes
475 // data, which is required by ABI, so no need for adjustment.
476 // 0 4
477 // +------+------+
478 // | empty| float|
479 // +------+------+
480 if (VA.getValVT() == MVT::f32)
481 Offset += 4;
483 int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true);
484 InVals.push_back(
485 DAG.getLoad(VA.getValVT(), DL, Chain,
486 DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),
487 MachinePointerInfo::getFixedStack(MF, FI)));
490 if (!IsVarArg)
491 return Chain;
493 // This function takes variable arguments, some of which may have been passed
494 // in registers %s0-%s8.
496 // The va_start intrinsic needs to know the offset to the first variable
497 // argument.
498 // TODO: need to calculate offset correctly once we support f128.
499 unsigned ArgOffset = ArgLocs.size() * 8;
500 VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
501 // Skip the reserved area at the top of stack.
502 FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
504 return Chain;
507 // FIXME? Maybe this could be a TableGen attribute on some registers and
508 // this table could be generated automatically from RegInfo.
509 Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
510 const MachineFunction &MF) const {
511 Register Reg = StringSwitch<Register>(RegName)
512 .Case("sp", VE::SX11) // Stack pointer
513 .Case("fp", VE::SX9) // Frame pointer
514 .Case("sl", VE::SX8) // Stack limit
515 .Case("lr", VE::SX10) // Link register
516 .Case("tp", VE::SX14) // Thread pointer
517 .Case("outer", VE::SX12) // Outer regiser
518 .Case("info", VE::SX17) // Info area register
519 .Case("got", VE::SX15) // Global offset table register
520 .Case("plt", VE::SX16) // Procedure linkage table register
521 .Default(0);
523 if (Reg)
524 return Reg;
526 report_fatal_error("Invalid register name global variable");
529 //===----------------------------------------------------------------------===//
530 // TargetLowering Implementation
531 //===----------------------------------------------------------------------===//
533 SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
534 SmallVectorImpl<SDValue> &InVals) const {
535 SelectionDAG &DAG = CLI.DAG;
536 SDLoc DL = CLI.DL;
537 SDValue Chain = CLI.Chain;
538 auto PtrVT = getPointerTy(DAG.getDataLayout());
540 // VE target does not yet support tail call optimization.
541 CLI.IsTailCall = false;
543 // Get the base offset of the outgoing arguments stack space.
544 unsigned ArgsBaseOffset = Subtarget->getRsaSize();
545 // Get the size of the preserved arguments area
546 unsigned ArgsPreserved = 8 * 8u;
548 // Analyze operands of the call, assigning locations to each operand.
549 SmallVector<CCValAssign, 16> ArgLocs;
550 CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
551 *DAG.getContext());
552 // Allocate the preserved area first.
553 CCInfo.AllocateStack(ArgsPreserved, Align(8));
554 // We already allocated the preserved area, so the stack offset computed
555 // by CC_VE would be correct now.
556 CCInfo.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, false));
558 // VE requires to use both register and stack for varargs or no-prototyped
559 // functions.
560 bool UseBoth = CLI.IsVarArg;
562 // Analyze operands again if it is required to store BOTH.
563 SmallVector<CCValAssign, 16> ArgLocs2;
564 CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
565 ArgLocs2, *DAG.getContext());
566 if (UseBoth)
567 CCInfo2.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, true));
569 // Get the size of the outgoing arguments stack space requirement.
570 unsigned ArgsSize = CCInfo.getNextStackOffset();
572 // Keep stack frames 16-byte aligned.
573 ArgsSize = alignTo(ArgsSize, 16);
575 // Adjust the stack pointer to make room for the arguments.
576 // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
577 // with more than 6 arguments.
578 Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL);
580 // Collect the set of registers to pass to the function and their values.
581 // This will be emitted as a sequence of CopyToReg nodes glued to the call
582 // instruction.
583 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
585 // Collect chains from all the memory opeations that copy arguments to the
586 // stack. They must follow the stack pointer adjustment above and precede the
587 // call instruction itself.
588 SmallVector<SDValue, 8> MemOpChains;
590 // VE needs to get address of callee function in a register
591 // So, prepare to copy it to SX12 here.
593 // If the callee is a GlobalAddress node (quite common, every direct call is)
594 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
595 // Likewise ExternalSymbol -> TargetExternalSymbol.
596 SDValue Callee = CLI.Callee;
598 bool IsPICCall = isPositionIndependent();
600 // PC-relative references to external symbols should go through $stub.
601 // If so, we need to prepare GlobalBaseReg first.
602 const TargetMachine &TM = DAG.getTarget();
603 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
604 const GlobalValue *GV = nullptr;
605 auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Callee);
606 if (CalleeG)
607 GV = CalleeG->getGlobal();
608 bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
609 bool UsePlt = !Local;
610 MachineFunction &MF = DAG.getMachineFunction();
612 // Turn GlobalAddress/ExternalSymbol node into a value node
613 // containing the address of them here.
614 if (CalleeG) {
615 if (IsPICCall) {
616 if (UsePlt)
617 Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
618 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
619 Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
620 } else {
621 Callee =
622 makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
624 } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
625 if (IsPICCall) {
626 if (UsePlt)
627 Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
628 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
629 Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
630 } else {
631 Callee =
632 makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
636 RegsToPass.push_back(std::make_pair(VE::SX12, Callee));
638 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
639 CCValAssign &VA = ArgLocs[i];
640 SDValue Arg = CLI.OutVals[i];
642 // Promote the value if needed.
643 switch (VA.getLocInfo()) {
644 default:
645 llvm_unreachable("Unknown location info!");
646 case CCValAssign::Full:
647 break;
648 case CCValAssign::SExt:
649 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
650 break;
651 case CCValAssign::ZExt:
652 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
653 break;
654 case CCValAssign::AExt:
655 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
656 break;
657 case CCValAssign::BCvt: {
658 // Convert a float argument to i64 with padding.
659 // 63 31 0
660 // +------+------+
661 // | float| 0 |
662 // +------+------+
663 assert(VA.getLocVT() == MVT::i64);
664 assert(VA.getValVT() == MVT::f32);
665 SDValue Undef = SDValue(
666 DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
667 SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
668 Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
669 MVT::i64, Undef, Arg, Sub_f32),
671 break;
675 if (VA.isRegLoc()) {
676 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
677 if (!UseBoth)
678 continue;
679 VA = ArgLocs2[i];
682 assert(VA.isMemLoc());
684 // Create a store off the stack pointer for this argument.
685 SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT);
686 // The argument area starts at %fp/%sp + the size of reserved area.
687 SDValue PtrOff =
688 DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL);
689 PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
690 MemOpChains.push_back(
691 DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()));
694 // Emit all stores, make sure they occur before the call.
695 if (!MemOpChains.empty())
696 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
698 // Build a sequence of CopyToReg nodes glued together with token chain and
699 // glue operands which copy the outgoing args into registers. The InGlue is
700 // necessary since all emitted instructions must be stuck together in order
701 // to pass the live physical registers.
702 SDValue InGlue;
703 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
704 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
705 RegsToPass[i].second, InGlue);
706 InGlue = Chain.getValue(1);
709 // Build the operands for the call instruction itself.
710 SmallVector<SDValue, 8> Ops;
711 Ops.push_back(Chain);
712 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
713 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
714 RegsToPass[i].second.getValueType()));
716 // Add a register mask operand representing the call-preserved registers.
717 const VERegisterInfo *TRI = Subtarget->getRegisterInfo();
718 const uint32_t *Mask =
719 TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv);
720 assert(Mask && "Missing call preserved mask for calling convention");
721 Ops.push_back(DAG.getRegisterMask(Mask));
723 // Make sure the CopyToReg nodes are glued to the call instruction which
724 // consumes the registers.
725 if (InGlue.getNode())
726 Ops.push_back(InGlue);
728 // Now the call itself.
729 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
730 Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops);
731 InGlue = Chain.getValue(1);
733 // Revert the stack pointer immediately after the call.
734 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, DL, true),
735 DAG.getIntPtrConstant(0, DL, true), InGlue, DL);
736 InGlue = Chain.getValue(1);
738 // Now extract the return values. This is more or less the same as
739 // LowerFormalArguments.
741 // Assign locations to each value returned by this call.
742 SmallVector<CCValAssign, 16> RVLocs;
743 CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
744 *DAG.getContext());
746 // Set inreg flag manually for codegen generated library calls that
747 // return float.
748 if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB)
749 CLI.Ins[0].Flags.setInReg();
751 RVInfo.AnalyzeCallResult(CLI.Ins, getReturnCC(CLI.CallConv));
753 // Copy all of the result registers out of their specified physreg.
754 for (unsigned i = 0; i != RVLocs.size(); ++i) {
755 CCValAssign &VA = RVLocs[i];
756 assert(!VA.needsCustom() && "Unexpected custom lowering");
757 unsigned Reg = VA.getLocReg();
759 // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
760 // reside in the same register in the high and low bits. Reuse the
761 // CopyFromReg previous node to avoid duplicate copies.
762 SDValue RV;
763 if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))
764 if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
765 RV = Chain.getValue(0);
767 // But usually we'll create a new CopyFromReg for a different register.
768 if (!RV.getNode()) {
769 RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
770 Chain = RV.getValue(1);
771 InGlue = Chain.getValue(2);
774 // The callee promoted the return value, so insert an Assert?ext SDNode so
775 // we won't promote the value again in this function.
776 switch (VA.getLocInfo()) {
777 case CCValAssign::SExt:
778 RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
779 DAG.getValueType(VA.getValVT()));
780 break;
781 case CCValAssign::ZExt:
782 RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
783 DAG.getValueType(VA.getValVT()));
784 break;
785 case CCValAssign::BCvt: {
786 // Extract a float return value from i64 with padding.
787 // 63 31 0
788 // +------+------+
789 // | float| 0 |
790 // +------+------+
791 assert(VA.getLocVT() == MVT::i64);
792 assert(VA.getValVT() == MVT::f32);
793 SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
794 RV = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
795 MVT::f32, RV, Sub_f32),
797 break;
799 default:
800 break;
803 // Truncate the register down to the return value type.
804 if (VA.isExtInLoc())
805 RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);
807 InVals.push_back(RV);
810 return Chain;
813 bool VETargetLowering::isOffsetFoldingLegal(
814 const GlobalAddressSDNode *GA) const {
815 // VE uses 64 bit addressing, so we need multiple instructions to generate
816 // an address. Folding address with offset increases the number of
817 // instructions, so that we disable it here. Offsets will be folded in
818 // the DAG combine later if it worth to do so.
819 return false;
822 /// isFPImmLegal - Returns true if the target can instruction select the
823 /// specified FP immediate natively. If false, the legalizer will
824 /// materialize the FP immediate as a load from a constant pool.
825 bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
826 bool ForCodeSize) const {
827 return VT == MVT::f32 || VT == MVT::f64;
830 /// Determine if the target supports unaligned memory accesses.
832 /// This function returns true if the target allows unaligned memory accesses
833 /// of the specified type in the given address space. If true, it also returns
834 /// whether the unaligned memory access is "fast" in the last argument by
835 /// reference. This is used, for example, in situations where an array
836 /// copy/move/set is converted to a sequence of store operations. Its use
837 /// helps to ensure that such replacements don't generate code that causes an
838 /// alignment error (trap) on the target machine.
839 bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
840 unsigned AddrSpace,
841 Align A,
842 MachineMemOperand::Flags,
843 bool *Fast) const {
844 if (Fast) {
845 // It's fast anytime on VE
846 *Fast = true;
848 return true;
851 VETargetLowering::VETargetLowering(const TargetMachine &TM,
852 const VESubtarget &STI)
853 : TargetLowering(TM), Subtarget(&STI) {
854 // Instructions which use registers as conditionals examine all the
855 // bits (as does the pseudo SELECT_CC expansion). I don't think it
856 // matters much whether it's ZeroOrOneBooleanContent, or
857 // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
858 // former.
859 setBooleanContents(ZeroOrOneBooleanContent);
860 setBooleanVectorContents(ZeroOrOneBooleanContent);
862 initRegisterClasses();
863 initSPUActions();
864 initVPUActions();
866 setStackPointerRegisterToSaveRestore(VE::SX11);
868 // We have target-specific dag combine patterns for the following nodes:
869 setTargetDAGCombine(ISD::TRUNCATE);
871 // Set function alignment to 16 bytes
872 setMinFunctionAlignment(Align(16));
874 // VE stores all argument by 8 bytes alignment
875 setMinStackArgumentAlignment(Align(8));
877 computeRegisterProperties(Subtarget->getRegisterInfo());
880 const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
881 #define TARGET_NODE_CASE(NAME) \
882 case VEISD::NAME: \
883 return "VEISD::" #NAME;
884 switch ((VEISD::NodeType)Opcode) {
885 case VEISD::FIRST_NUMBER:
886 break;
887 TARGET_NODE_CASE(CALL)
888 TARGET_NODE_CASE(EH_SJLJ_LONGJMP)
889 TARGET_NODE_CASE(EH_SJLJ_SETJMP)
890 TARGET_NODE_CASE(EH_SJLJ_SETUP_DISPATCH)
891 TARGET_NODE_CASE(GETFUNPLT)
892 TARGET_NODE_CASE(GETSTACKTOP)
893 TARGET_NODE_CASE(GETTLSADDR)
894 TARGET_NODE_CASE(GLOBAL_BASE_REG)
895 TARGET_NODE_CASE(Hi)
896 TARGET_NODE_CASE(Lo)
897 TARGET_NODE_CASE(MEMBARRIER)
898 TARGET_NODE_CASE(RET_FLAG)
899 TARGET_NODE_CASE(TS1AM)
900 TARGET_NODE_CASE(VEC_BROADCAST)
902 // Register the VVP_* SDNodes.
903 #define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
904 #include "VVPNodes.def"
906 #undef TARGET_NODE_CASE
907 return nullptr;
910 EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
911 EVT VT) const {
912 return MVT::i32;
915 // Convert to a target node and set target flags.
916 SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
917 SelectionDAG &DAG) const {
918 if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
919 return DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
920 GA->getValueType(0), GA->getOffset(), TF);
922 if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op))
923 return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(),
924 0, TF);
926 if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op))
927 return DAG.getTargetConstantPool(CP->getConstVal(), CP->getValueType(0),
928 CP->getAlign(), CP->getOffset(), TF);
930 if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
931 return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
932 TF);
934 if (const JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op))
935 return DAG.getTargetJumpTable(JT->getIndex(), JT->getValueType(0), TF);
937 llvm_unreachable("Unhandled address SDNode");
940 // Split Op into high and low parts according to HiTF and LoTF.
941 // Return an ADD node combining the parts.
942 SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
943 SelectionDAG &DAG) const {
944 SDLoc DL(Op);
945 EVT VT = Op.getValueType();
946 SDValue Hi = DAG.getNode(VEISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG));
947 SDValue Lo = DAG.getNode(VEISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG));
948 return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
951 // Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
952 // or ExternalSymbol SDNode.
953 SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
954 SDLoc DL(Op);
955 EVT PtrVT = Op.getValueType();
957 // Handle PIC mode first. VE needs a got load for every variable!
958 if (isPositionIndependent()) {
959 auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op);
961 if (isa<ConstantPoolSDNode>(Op) || isa<JumpTableSDNode>(Op) ||
962 (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
963 // Create following instructions for local linkage PIC code.
964 // lea %reg, label@gotoff_lo
965 // and %reg, %reg, (32)0
966 // lea.sl %reg, label@gotoff_hi(%reg, %got)
967 SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
968 VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
969 SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
970 return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
972 // Create following instructions for not local linkage PIC code.
973 // lea %reg, label@got_lo
974 // and %reg, %reg, (32)0
975 // lea.sl %reg, label@got_hi(%reg)
976 // ld %reg, (%reg, %got)
977 SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32,
978 VEMCExpr::VK_VE_GOT_LO32, DAG);
979 SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
980 SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
981 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), AbsAddr,
982 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
985 // This is one of the absolute code models.
986 switch (getTargetMachine().getCodeModel()) {
987 default:
988 llvm_unreachable("Unsupported absolute code model");
989 case CodeModel::Small:
990 case CodeModel::Medium:
991 case CodeModel::Large:
992 // abs64.
993 return makeHiLoPair(Op, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
997 /// Custom Lower {
999 // The mappings for emitLeading/TrailingFence for VE is designed by following
1000 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
1001 Instruction *VETargetLowering::emitLeadingFence(IRBuilderBase &Builder,
1002 Instruction *Inst,
1003 AtomicOrdering Ord) const {
1004 switch (Ord) {
1005 case AtomicOrdering::NotAtomic:
1006 case AtomicOrdering::Unordered:
1007 llvm_unreachable("Invalid fence: unordered/non-atomic");
1008 case AtomicOrdering::Monotonic:
1009 case AtomicOrdering::Acquire:
1010 return nullptr; // Nothing to do
1011 case AtomicOrdering::Release:
1012 case AtomicOrdering::AcquireRelease:
1013 return Builder.CreateFence(AtomicOrdering::Release);
1014 case AtomicOrdering::SequentiallyConsistent:
1015 if (!Inst->hasAtomicStore())
1016 return nullptr; // Nothing to do
1017 return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
1019 llvm_unreachable("Unknown fence ordering in emitLeadingFence");
1022 Instruction *VETargetLowering::emitTrailingFence(IRBuilderBase &Builder,
1023 Instruction *Inst,
1024 AtomicOrdering Ord) const {
1025 switch (Ord) {
1026 case AtomicOrdering::NotAtomic:
1027 case AtomicOrdering::Unordered:
1028 llvm_unreachable("Invalid fence: unordered/not-atomic");
1029 case AtomicOrdering::Monotonic:
1030 case AtomicOrdering::Release:
1031 return nullptr; // Nothing to do
1032 case AtomicOrdering::Acquire:
1033 case AtomicOrdering::AcquireRelease:
1034 return Builder.CreateFence(AtomicOrdering::Acquire);
1035 case AtomicOrdering::SequentiallyConsistent:
1036 return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
1038 llvm_unreachable("Unknown fence ordering in emitTrailingFence");
1041 SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
1042 SelectionDAG &DAG) const {
1043 SDLoc DL(Op);
1044 AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
1045 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
1046 SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
1047 cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
1049 // VE uses Release consistency, so need a fence instruction if it is a
1050 // cross-thread fence.
1051 if (FenceSSID == SyncScope::System) {
1052 switch (FenceOrdering) {
1053 case AtomicOrdering::NotAtomic:
1054 case AtomicOrdering::Unordered:
1055 case AtomicOrdering::Monotonic:
1056 // No need to generate fencem instruction here.
1057 break;
1058 case AtomicOrdering::Acquire:
1059 // Generate "fencem 2" as acquire fence.
1060 return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1061 DAG.getTargetConstant(2, DL, MVT::i32),
1062 Op.getOperand(0)),
1064 case AtomicOrdering::Release:
1065 // Generate "fencem 1" as release fence.
1066 return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1067 DAG.getTargetConstant(1, DL, MVT::i32),
1068 Op.getOperand(0)),
1070 case AtomicOrdering::AcquireRelease:
1071 case AtomicOrdering::SequentiallyConsistent:
1072 // Generate "fencem 3" as acq_rel and seq_cst fence.
1073 // FIXME: "fencem 3" doesn't wait for for PCIe deveices accesses,
1074 // so seq_cst may require more instruction for them.
1075 return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1076 DAG.getTargetConstant(3, DL, MVT::i32),
1077 Op.getOperand(0)),
1082 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1083 return DAG.getNode(VEISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
1086 TargetLowering::AtomicExpansionKind
1087 VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
1088 // We have TS1AM implementation for i8/i16/i32/i64, so use it.
1089 if (AI->getOperation() == AtomicRMWInst::Xchg) {
1090 return AtomicExpansionKind::None;
1092 // FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.
1094 // Otherwise, expand it using compare and exchange instruction to not call
1095 // __sync_fetch_and_* functions.
1096 return AtomicExpansionKind::CmpXChg;
1099 static SDValue prepareTS1AM(SDValue Op, SelectionDAG &DAG, SDValue &Flag,
1100 SDValue &Bits) {
1101 SDLoc DL(Op);
1102 AtomicSDNode *N = cast<AtomicSDNode>(Op);
1103 SDValue Ptr = N->getOperand(1);
1104 SDValue Val = N->getOperand(2);
1105 EVT PtrVT = Ptr.getValueType();
1106 bool Byte = N->getMemoryVT() == MVT::i8;
1107 // Remainder = AND Ptr, 3
1108 // Flag = 1 << Remainder ; If Byte is true (1 byte swap flag)
1109 // Flag = 3 << Remainder ; If Byte is false (2 bytes swap flag)
1110 // Bits = Remainder << 3
1111 // NewVal = Val << Bits
1112 SDValue Const3 = DAG.getConstant(3, DL, PtrVT);
1113 SDValue Remainder = DAG.getNode(ISD::AND, DL, PtrVT, {Ptr, Const3});
1114 SDValue Mask = Byte ? DAG.getConstant(1, DL, MVT::i32)
1115 : DAG.getConstant(3, DL, MVT::i32);
1116 Flag = DAG.getNode(ISD::SHL, DL, MVT::i32, {Mask, Remainder});
1117 Bits = DAG.getNode(ISD::SHL, DL, PtrVT, {Remainder, Const3});
1118 return DAG.getNode(ISD::SHL, DL, Val.getValueType(), {Val, Bits});
1121 static SDValue finalizeTS1AM(SDValue Op, SelectionDAG &DAG, SDValue Data,
1122 SDValue Bits) {
1123 SDLoc DL(Op);
1124 EVT VT = Data.getValueType();
1125 bool Byte = cast<AtomicSDNode>(Op)->getMemoryVT() == MVT::i8;
1126 // NewData = Data >> Bits
1127 // Result = NewData & 0xff ; If Byte is true (1 byte)
1128 // Result = NewData & 0xffff ; If Byte is false (2 bytes)
1130 SDValue NewData = DAG.getNode(ISD::SRL, DL, VT, Data, Bits);
1131 return DAG.getNode(ISD::AND, DL, VT,
1132 {NewData, DAG.getConstant(Byte ? 0xff : 0xffff, DL, VT)});
1135 SDValue VETargetLowering::lowerATOMIC_SWAP(SDValue Op,
1136 SelectionDAG &DAG) const {
1137 SDLoc DL(Op);
1138 AtomicSDNode *N = cast<AtomicSDNode>(Op);
1140 if (N->getMemoryVT() == MVT::i8) {
1141 // For i8, use "ts1am"
1142 // Input:
1143 // ATOMIC_SWAP Ptr, Val, Order
1145 // Output:
1146 // Remainder = AND Ptr, 3
1147 // Flag = 1 << Remainder ; 1 byte swap flag for TS1AM inst.
1148 // Bits = Remainder << 3
1149 // NewVal = Val << Bits
1151 // Aligned = AND Ptr, -4
1152 // Data = TS1AM Aligned, Flag, NewVal
1154 // NewData = Data >> Bits
1155 // Result = NewData & 0xff ; 1 byte result
1156 SDValue Flag;
1157 SDValue Bits;
1158 SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1160 SDValue Ptr = N->getOperand(1);
1161 SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
1162 {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
1163 SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
1164 DAG.getVTList(Op.getNode()->getValueType(0),
1165 Op.getNode()->getValueType(1)),
1166 {N->getChain(), Aligned, Flag, NewVal},
1167 N->getMemOperand());
1169 SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
1170 SDValue Chain = TS1AM.getValue(1);
1171 return DAG.getMergeValues({Result, Chain}, DL);
1173 if (N->getMemoryVT() == MVT::i16) {
1174 // For i16, use "ts1am"
1175 SDValue Flag;
1176 SDValue Bits;
1177 SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1179 SDValue Ptr = N->getOperand(1);
1180 SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
1181 {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
1182 SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
1183 DAG.getVTList(Op.getNode()->getValueType(0),
1184 Op.getNode()->getValueType(1)),
1185 {N->getChain(), Aligned, Flag, NewVal},
1186 N->getMemOperand());
1188 SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
1189 SDValue Chain = TS1AM.getValue(1);
1190 return DAG.getMergeValues({Result, Chain}, DL);
1192 // Otherwise, let llvm legalize it.
1193 return Op;
1196 SDValue VETargetLowering::lowerGlobalAddress(SDValue Op,
1197 SelectionDAG &DAG) const {
1198 return makeAddress(Op, DAG);
1201 SDValue VETargetLowering::lowerBlockAddress(SDValue Op,
1202 SelectionDAG &DAG) const {
1203 return makeAddress(Op, DAG);
1206 SDValue VETargetLowering::lowerConstantPool(SDValue Op,
1207 SelectionDAG &DAG) const {
1208 return makeAddress(Op, DAG);
1211 SDValue
1212 VETargetLowering::lowerToTLSGeneralDynamicModel(SDValue Op,
1213 SelectionDAG &DAG) const {
1214 SDLoc DL(Op);
1216 // Generate the following code:
1217 // t1: ch,glue = callseq_start t0, 0, 0
1218 // t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
1219 // t3: ch,glue = callseq_end t2, 0, 0, t2:2
1220 // t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
1221 SDValue Label = withTargetFlags(Op, 0, DAG);
1222 EVT PtrVT = Op.getValueType();
1224 // Lowering the machine isd will make sure everything is in the right
1225 // location.
1226 SDValue Chain = DAG.getEntryNode();
1227 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1228 const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
1229 DAG.getMachineFunction(), CallingConv::C);
1230 Chain = DAG.getCALLSEQ_START(Chain, 64, 0, DL);
1231 SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)};
1232 Chain = DAG.getNode(VEISD::GETTLSADDR, DL, NodeTys, Args);
1233 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(64, DL, true),
1234 DAG.getIntPtrConstant(0, DL, true),
1235 Chain.getValue(1), DL);
1236 Chain = DAG.getCopyFromReg(Chain, DL, VE::SX0, PtrVT, Chain.getValue(1));
1238 // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
1239 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1240 MFI.setHasCalls(true);
1242 // Also generate code to prepare a GOT register if it is PIC.
1243 if (isPositionIndependent()) {
1244 MachineFunction &MF = DAG.getMachineFunction();
1245 Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
1248 return Chain;
1251 SDValue VETargetLowering::lowerGlobalTLSAddress(SDValue Op,
1252 SelectionDAG &DAG) const {
1253 // The current implementation of nld (2.26) doesn't allow local exec model
1254 // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
1255 // generate the general dynamic model code sequence.
1257 // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
1258 return lowerToTLSGeneralDynamicModel(Op, DAG);
1261 SDValue VETargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1262 return makeAddress(Op, DAG);
1265 // Lower a f128 load into two f64 loads.
1266 static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) {
1267 SDLoc DL(Op);
1268 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
1269 assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1270 unsigned Alignment = LdNode->getAlign().value();
1271 if (Alignment > 8)
1272 Alignment = 8;
1274 SDValue Lo64 =
1275 DAG.getLoad(MVT::f64, DL, LdNode->getChain(), LdNode->getBasePtr(),
1276 LdNode->getPointerInfo(), Alignment,
1277 LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1278 : MachineMemOperand::MONone);
1279 EVT AddrVT = LdNode->getBasePtr().getValueType();
1280 SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, LdNode->getBasePtr(),
1281 DAG.getConstant(8, DL, AddrVT));
1282 SDValue Hi64 =
1283 DAG.getLoad(MVT::f64, DL, LdNode->getChain(), HiPtr,
1284 LdNode->getPointerInfo(), Alignment,
1285 LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1286 : MachineMemOperand::MONone);
1288 SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1289 SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1291 // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1292 SDNode *InFP128 =
1293 DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f128);
1294 InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
1295 SDValue(InFP128, 0), Hi64, SubRegEven);
1296 InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
1297 SDValue(InFP128, 0), Lo64, SubRegOdd);
1298 SDValue OutChains[2] = {SDValue(Lo64.getNode(), 1),
1299 SDValue(Hi64.getNode(), 1)};
1300 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1301 SDValue Ops[2] = {SDValue(InFP128, 0), OutChain};
1302 return DAG.getMergeValues(Ops, DL);
1305 SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1306 LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());
1308 SDValue BasePtr = LdNode->getBasePtr();
1309 if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
1310 // Do not expand store instruction with frame index here because of
1311 // dependency problems. We expand it later in eliminateFrameIndex().
1312 return Op;
1315 EVT MemVT = LdNode->getMemoryVT();
1316 if (MemVT == MVT::f128)
1317 return lowerLoadF128(Op, DAG);
1319 return Op;
1322 // Lower a f128 store into two f64 stores.
1323 static SDValue lowerStoreF128(SDValue Op, SelectionDAG &DAG) {
1324 SDLoc DL(Op);
1325 StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
1326 assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1328 SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1329 SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1331 SDNode *Hi64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
1332 StNode->getValue(), SubRegEven);
1333 SDNode *Lo64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
1334 StNode->getValue(), SubRegOdd);
1336 unsigned Alignment = StNode->getAlign().value();
1337 if (Alignment > 8)
1338 Alignment = 8;
1340 // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1341 SDValue OutChains[2];
1342 OutChains[0] =
1343 DAG.getStore(StNode->getChain(), DL, SDValue(Lo64, 0),
1344 StNode->getBasePtr(), MachinePointerInfo(), Alignment,
1345 StNode->isVolatile() ? MachineMemOperand::MOVolatile
1346 : MachineMemOperand::MONone);
1347 EVT AddrVT = StNode->getBasePtr().getValueType();
1348 SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, StNode->getBasePtr(),
1349 DAG.getConstant(8, DL, AddrVT));
1350 OutChains[1] =
1351 DAG.getStore(StNode->getChain(), DL, SDValue(Hi64, 0), HiPtr,
1352 MachinePointerInfo(), Alignment,
1353 StNode->isVolatile() ? MachineMemOperand::MOVolatile
1354 : MachineMemOperand::MONone);
1355 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1358 SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1359 StoreSDNode *StNode = cast<StoreSDNode>(Op.getNode());
1360 assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1362 SDValue BasePtr = StNode->getBasePtr();
1363 if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
1364 // Do not expand store instruction with frame index here because of
1365 // dependency problems. We expand it later in eliminateFrameIndex().
1366 return Op;
1369 EVT MemVT = StNode->getMemoryVT();
1370 if (MemVT == MVT::f128)
1371 return lowerStoreF128(Op, DAG);
1373 // Otherwise, ask llvm to expand it.
1374 return SDValue();
1377 SDValue VETargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1378 MachineFunction &MF = DAG.getMachineFunction();
1379 VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
1380 auto PtrVT = getPointerTy(DAG.getDataLayout());
1382 // Need frame address to find the address of VarArgsFrameIndex.
1383 MF.getFrameInfo().setFrameAddressIsTaken(true);
1385 // vastart just stores the address of the VarArgsFrameIndex slot into the
1386 // memory location argument.
1387 SDLoc DL(Op);
1388 SDValue Offset =
1389 DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT),
1390 DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL));
1391 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1392 return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),
1393 MachinePointerInfo(SV));
1396 SDValue VETargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
1397 SDNode *Node = Op.getNode();
1398 EVT VT = Node->getValueType(0);
1399 SDValue InChain = Node->getOperand(0);
1400 SDValue VAListPtr = Node->getOperand(1);
1401 EVT PtrVT = VAListPtr.getValueType();
1402 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
1403 SDLoc DL(Node);
1404 SDValue VAList =
1405 DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV));
1406 SDValue Chain = VAList.getValue(1);
1407 SDValue NextPtr;
1409 if (VT == MVT::f128) {
1410 // VE f128 values must be stored with 16 bytes alignment. We doesn't
1411 // know the actual alignment of VAList, so we take alignment of it
1412 // dyanmically.
1413 int Align = 16;
1414 VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
1415 DAG.getConstant(Align - 1, DL, PtrVT));
1416 VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
1417 DAG.getConstant(-Align, DL, PtrVT));
1418 // Increment the pointer, VAList, by 16 to the next vaarg.
1419 NextPtr =
1420 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(16, DL));
1421 } else if (VT == MVT::f32) {
1422 // float --> need special handling like below.
1423 // 0 4
1424 // +------+------+
1425 // | empty| float|
1426 // +------+------+
1427 // Increment the pointer, VAList, by 8 to the next vaarg.
1428 NextPtr =
1429 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
1430 // Then, adjust VAList.
1431 unsigned InternalOffset = 4;
1432 VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
1433 DAG.getConstant(InternalOffset, DL, PtrVT));
1434 } else {
1435 // Increment the pointer, VAList, by 8 to the next vaarg.
1436 NextPtr =
1437 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
1440 // Store the incremented VAList to the legalized pointer.
1441 InChain = DAG.getStore(Chain, DL, NextPtr, VAListPtr, MachinePointerInfo(SV));
1443 // Load the actual argument out of the pointer VAList.
1444 // We can't count on greater alignment than the word size.
1445 return DAG.getLoad(VT, DL, InChain, VAList, MachinePointerInfo(),
1446 std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8);
1449 SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
1450 SelectionDAG &DAG) const {
1451 // Generate following code.
1452 // (void)__llvm_grow_stack(size);
1453 // ret = GETSTACKTOP; // pseudo instruction
1454 SDLoc DL(Op);
1456 // Get the inputs.
1457 SDNode *Node = Op.getNode();
1458 SDValue Chain = Op.getOperand(0);
1459 SDValue Size = Op.getOperand(1);
1460 MaybeAlign Alignment(Op.getConstantOperandVal(2));
1461 EVT VT = Node->getValueType(0);
1463 // Chain the dynamic stack allocation so that it doesn't modify the stack
1464 // pointer when other instructions are using the stack.
1465 Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
1467 const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
1468 Align StackAlign = TFI.getStackAlign();
1469 bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
1471 // Prepare arguments
1472 TargetLowering::ArgListTy Args;
1473 TargetLowering::ArgListEntry Entry;
1474 Entry.Node = Size;
1475 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1476 Args.push_back(Entry);
1477 if (NeedsAlign) {
1478 Entry.Node = DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT);
1479 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1480 Args.push_back(Entry);
1482 Type *RetTy = Type::getVoidTy(*DAG.getContext());
1484 EVT PtrVT = Op.getValueType();
1485 SDValue Callee;
1486 if (NeedsAlign) {
1487 Callee = DAG.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT, 0);
1488 } else {
1489 Callee = DAG.getTargetExternalSymbol("__ve_grow_stack", PtrVT, 0);
1492 TargetLowering::CallLoweringInfo CLI(DAG);
1493 CLI.setDebugLoc(DL)
1494 .setChain(Chain)
1495 .setCallee(CallingConv::PreserveAll, RetTy, Callee, std::move(Args))
1496 .setDiscardResult(true);
1497 std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
1498 Chain = pair.second;
1499 SDValue Result = DAG.getNode(VEISD::GETSTACKTOP, DL, VT, Chain);
1500 if (NeedsAlign) {
1501 Result = DAG.getNode(ISD::ADD, DL, VT, Result,
1502 DAG.getConstant((Alignment->value() - 1ULL), DL, VT));
1503 Result = DAG.getNode(ISD::AND, DL, VT, Result,
1504 DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT));
1506 // Chain = Result.getValue(1);
1507 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
1508 DAG.getIntPtrConstant(0, DL, true), SDValue(), DL);
1510 SDValue Ops[2] = {Result, Chain};
1511 return DAG.getMergeValues(Ops, DL);
1514 SDValue VETargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
1515 SelectionDAG &DAG) const {
1516 SDLoc DL(Op);
1517 return DAG.getNode(VEISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0),
1518 Op.getOperand(1));
1521 SDValue VETargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
1522 SelectionDAG &DAG) const {
1523 SDLoc DL(Op);
1524 return DAG.getNode(VEISD::EH_SJLJ_SETJMP, DL,
1525 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
1526 Op.getOperand(1));
1529 SDValue VETargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
1530 SelectionDAG &DAG) const {
1531 SDLoc DL(Op);
1532 return DAG.getNode(VEISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other,
1533 Op.getOperand(0));
1536 static SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG,
1537 const VETargetLowering &TLI,
1538 const VESubtarget *Subtarget) {
1539 SDLoc DL(Op);
1540 MachineFunction &MF = DAG.getMachineFunction();
1541 EVT PtrVT = TLI.getPointerTy(MF.getDataLayout());
1543 MachineFrameInfo &MFI = MF.getFrameInfo();
1544 MFI.setFrameAddressIsTaken(true);
1546 unsigned Depth = Op.getConstantOperandVal(0);
1547 const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
1548 unsigned FrameReg = RegInfo->getFrameRegister(MF);
1549 SDValue FrameAddr =
1550 DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, PtrVT);
1551 while (Depth--)
1552 FrameAddr = DAG.getLoad(Op.getValueType(), DL, DAG.getEntryNode(),
1553 FrameAddr, MachinePointerInfo());
1554 return FrameAddr;
1557 static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
1558 const VETargetLowering &TLI,
1559 const VESubtarget *Subtarget) {
1560 MachineFunction &MF = DAG.getMachineFunction();
1561 MachineFrameInfo &MFI = MF.getFrameInfo();
1562 MFI.setReturnAddressIsTaken(true);
1564 if (TLI.verifyReturnAddressArgumentIsConstant(Op, DAG))
1565 return SDValue();
1567 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG, TLI, Subtarget);
1569 SDLoc DL(Op);
1570 EVT VT = Op.getValueType();
1571 SDValue Offset = DAG.getConstant(8, DL, VT);
1572 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1573 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
1574 MachinePointerInfo());
1577 SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1578 SelectionDAG &DAG) const {
1579 SDLoc DL(Op);
1580 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1581 switch (IntNo) {
1582 default: // Don't custom lower most intrinsics.
1583 return SDValue();
1584 case Intrinsic::eh_sjlj_lsda: {
1585 MachineFunction &MF = DAG.getMachineFunction();
1586 MVT VT = Op.getSimpleValueType();
1587 const VETargetMachine *TM =
1588 static_cast<const VETargetMachine *>(&DAG.getTarget());
1590 // Create GCC_except_tableXX string. The real symbol for that will be
1591 // generated in EHStreamer::emitExceptionTable() later. So, we just
1592 // borrow it's name here.
1593 TM->getStrList()->push_back(std::string(
1594 (Twine("GCC_except_table") + Twine(MF.getFunctionNumber())).str()));
1595 SDValue Addr =
1596 DAG.getTargetExternalSymbol(TM->getStrList()->back().c_str(), VT, 0);
1597 if (isPositionIndependent()) {
1598 Addr = makeHiLoPair(Addr, VEMCExpr::VK_VE_GOTOFF_HI32,
1599 VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
1600 SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, VT);
1601 return DAG.getNode(ISD::ADD, DL, VT, GlobalBase, Addr);
1603 return makeHiLoPair(Addr, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
1608 static bool getUniqueInsertion(SDNode *N, unsigned &UniqueIdx) {
1609 if (!isa<BuildVectorSDNode>(N))
1610 return false;
1611 const auto *BVN = cast<BuildVectorSDNode>(N);
1613 // Find first non-undef insertion.
1614 unsigned Idx;
1615 for (Idx = 0; Idx < BVN->getNumOperands(); ++Idx) {
1616 auto ElemV = BVN->getOperand(Idx);
1617 if (!ElemV->isUndef())
1618 break;
1620 // Catch the (hypothetical) all-undef case.
1621 if (Idx == BVN->getNumOperands())
1622 return false;
1623 // Remember insertion.
1624 UniqueIdx = Idx++;
1625 // Verify that all other insertions are undef.
1626 for (; Idx < BVN->getNumOperands(); ++Idx) {
1627 auto ElemV = BVN->getOperand(Idx);
1628 if (!ElemV->isUndef())
1629 return false;
1631 return true;
1634 static SDValue getSplatValue(SDNode *N) {
1635 if (auto *BuildVec = dyn_cast<BuildVectorSDNode>(N)) {
1636 return BuildVec->getSplatValue();
1638 return SDValue();
1641 SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
1642 SelectionDAG &DAG) const {
1643 SDLoc DL(Op);
1644 unsigned NumEls = Op.getValueType().getVectorNumElements();
1645 MVT ElemVT = Op.getSimpleValueType().getVectorElementType();
1647 // If there is just one element, expand to INSERT_VECTOR_ELT.
1648 unsigned UniqueIdx;
1649 if (getUniqueInsertion(Op.getNode(), UniqueIdx)) {
1650 SDValue AccuV = DAG.getUNDEF(Op.getValueType());
1651 auto ElemV = Op->getOperand(UniqueIdx);
1652 SDValue IdxV = DAG.getConstant(UniqueIdx, DL, MVT::i64);
1653 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(), AccuV,
1654 ElemV, IdxV);
1657 // Else emit a broadcast.
1658 if (SDValue ScalarV = getSplatValue(Op.getNode())) {
1659 // lower to VEC_BROADCAST
1660 MVT LegalResVT = MVT::getVectorVT(ElemVT, 256);
1662 auto AVL = DAG.getConstant(NumEls, DL, MVT::i32);
1663 return DAG.getNode(VEISD::VEC_BROADCAST, DL, LegalResVT, Op.getOperand(0),
1664 AVL);
1667 // Expand
1668 return SDValue();
1671 SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1672 unsigned Opcode = Op.getOpcode();
1673 if (ISD::isVPOpcode(Opcode))
1674 return lowerToVVP(Op, DAG);
1676 switch (Opcode) {
1677 default:
1678 llvm_unreachable("Should not custom lower this!");
1679 case ISD::ATOMIC_FENCE:
1680 return lowerATOMIC_FENCE(Op, DAG);
1681 case ISD::ATOMIC_SWAP:
1682 return lowerATOMIC_SWAP(Op, DAG);
1683 case ISD::BlockAddress:
1684 return lowerBlockAddress(Op, DAG);
1685 case ISD::ConstantPool:
1686 return lowerConstantPool(Op, DAG);
1687 case ISD::DYNAMIC_STACKALLOC:
1688 return lowerDYNAMIC_STACKALLOC(Op, DAG);
1689 case ISD::EH_SJLJ_LONGJMP:
1690 return lowerEH_SJLJ_LONGJMP(Op, DAG);
1691 case ISD::EH_SJLJ_SETJMP:
1692 return lowerEH_SJLJ_SETJMP(Op, DAG);
1693 case ISD::EH_SJLJ_SETUP_DISPATCH:
1694 return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
1695 case ISD::FRAMEADDR:
1696 return lowerFRAMEADDR(Op, DAG, *this, Subtarget);
1697 case ISD::GlobalAddress:
1698 return lowerGlobalAddress(Op, DAG);
1699 case ISD::GlobalTLSAddress:
1700 return lowerGlobalTLSAddress(Op, DAG);
1701 case ISD::INTRINSIC_WO_CHAIN:
1702 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
1703 case ISD::JumpTable:
1704 return lowerJumpTable(Op, DAG);
1705 case ISD::LOAD:
1706 return lowerLOAD(Op, DAG);
1707 case ISD::RETURNADDR:
1708 return lowerRETURNADDR(Op, DAG, *this, Subtarget);
1709 case ISD::BUILD_VECTOR:
1710 return lowerBUILD_VECTOR(Op, DAG);
1711 case ISD::STORE:
1712 return lowerSTORE(Op, DAG);
1713 case ISD::VASTART:
1714 return lowerVASTART(Op, DAG);
1715 case ISD::VAARG:
1716 return lowerVAARG(Op, DAG);
1718 case ISD::INSERT_VECTOR_ELT:
1719 return lowerINSERT_VECTOR_ELT(Op, DAG);
1720 case ISD::EXTRACT_VECTOR_ELT:
1721 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
1723 #define ADD_BINARY_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
1724 #include "VVPNodes.def"
1725 return lowerToVVP(Op, DAG);
1728 /// } Custom Lower
1730 void VETargetLowering::ReplaceNodeResults(SDNode *N,
1731 SmallVectorImpl<SDValue> &Results,
1732 SelectionDAG &DAG) const {
1733 switch (N->getOpcode()) {
1734 case ISD::ATOMIC_SWAP:
1735 // Let LLVM expand atomic swap instruction through LowerOperation.
1736 return;
1737 default:
1738 LLVM_DEBUG(N->dumpr(&DAG));
1739 llvm_unreachable("Do not know how to custom type legalize this operation!");
1743 /// JumpTable for VE.
1745 /// VE cannot generate relocatable symbol in jump table. VE cannot
1746 /// generate expressions using symbols in both text segment and data
1747 /// segment like below.
1748 /// .4byte .LBB0_2-.LJTI0_0
1749 /// So, we generate offset from the top of function like below as
1750 /// a custom label.
1751 /// .4byte .LBB0_2-<function name>
1753 unsigned VETargetLowering::getJumpTableEncoding() const {
1754 // Use custom label for PIC.
1755 if (isPositionIndependent())
1756 return MachineJumpTableInfo::EK_Custom32;
1758 // Otherwise, use the normal jump table encoding heuristics.
1759 return TargetLowering::getJumpTableEncoding();
1762 const MCExpr *VETargetLowering::LowerCustomJumpTableEntry(
1763 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
1764 unsigned Uid, MCContext &Ctx) const {
1765 assert(isPositionIndependent());
1767 // Generate custom label for PIC like below.
1768 // .4bytes .LBB0_2-<function name>
1769 const auto *Value = MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
1770 MCSymbol *Sym = Ctx.getOrCreateSymbol(MBB->getParent()->getName().data());
1771 const auto *Base = MCSymbolRefExpr::create(Sym, Ctx);
1772 return MCBinaryExpr::createSub(Value, Base, Ctx);
1775 SDValue VETargetLowering::getPICJumpTableRelocBase(SDValue Table,
1776 SelectionDAG &DAG) const {
1777 assert(isPositionIndependent());
1778 SDLoc DL(Table);
1779 Function *Function = &DAG.getMachineFunction().getFunction();
1780 assert(Function != nullptr);
1781 auto PtrTy = getPointerTy(DAG.getDataLayout(), Function->getAddressSpace());
1783 // In the jump table, we have following values in PIC mode.
1784 // .4bytes .LBB0_2-<function name>
1785 // We need to add this value and the address of this function to generate
1786 // .LBB0_2 label correctly under PIC mode. So, we want to generate following
1787 // instructions:
1788 // lea %reg, fun@gotoff_lo
1789 // and %reg, %reg, (32)0
1790 // lea.sl %reg, fun@gotoff_hi(%reg, %got)
1791 // In order to do so, we need to genarate correctly marked DAG node using
1792 // makeHiLoPair.
1793 SDValue Op = DAG.getGlobalAddress(Function, DL, PtrTy);
1794 SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
1795 VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
1796 SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrTy);
1797 return DAG.getNode(ISD::ADD, DL, PtrTy, GlobalBase, HiLo);
1800 Register VETargetLowering::prepareMBB(MachineBasicBlock &MBB,
1801 MachineBasicBlock::iterator I,
1802 MachineBasicBlock *TargetBB,
1803 const DebugLoc &DL) const {
1804 MachineFunction *MF = MBB.getParent();
1805 MachineRegisterInfo &MRI = MF->getRegInfo();
1806 const VEInstrInfo *TII = Subtarget->getInstrInfo();
1808 const TargetRegisterClass *RC = &VE::I64RegClass;
1809 Register Tmp1 = MRI.createVirtualRegister(RC);
1810 Register Tmp2 = MRI.createVirtualRegister(RC);
1811 Register Result = MRI.createVirtualRegister(RC);
1813 if (isPositionIndependent()) {
1814 // Create following instructions for local linkage PIC code.
1815 // lea %Tmp1, TargetBB@gotoff_lo
1816 // and %Tmp2, %Tmp1, (32)0
1817 // lea.sl %Result, TargetBB@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
1818 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
1819 .addImm(0)
1820 .addImm(0)
1821 .addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_LO32);
1822 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
1823 .addReg(Tmp1, getKillRegState(true))
1824 .addImm(M0(32));
1825 BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
1826 .addReg(VE::SX15)
1827 .addReg(Tmp2, getKillRegState(true))
1828 .addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_HI32);
1829 } else {
1830 // Create following instructions for non-PIC code.
1831 // lea %Tmp1, TargetBB@lo
1832 // and %Tmp2, %Tmp1, (32)0
1833 // lea.sl %Result, TargetBB@hi(%Tmp2)
1834 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
1835 .addImm(0)
1836 .addImm(0)
1837 .addMBB(TargetBB, VEMCExpr::VK_VE_LO32);
1838 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
1839 .addReg(Tmp1, getKillRegState(true))
1840 .addImm(M0(32));
1841 BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
1842 .addReg(Tmp2, getKillRegState(true))
1843 .addImm(0)
1844 .addMBB(TargetBB, VEMCExpr::VK_VE_HI32);
1846 return Result;
1849 Register VETargetLowering::prepareSymbol(MachineBasicBlock &MBB,
1850 MachineBasicBlock::iterator I,
1851 StringRef Symbol, const DebugLoc &DL,
1852 bool IsLocal = false,
1853 bool IsCall = false) const {
1854 MachineFunction *MF = MBB.getParent();
1855 MachineRegisterInfo &MRI = MF->getRegInfo();
1856 const VEInstrInfo *TII = Subtarget->getInstrInfo();
1858 const TargetRegisterClass *RC = &VE::I64RegClass;
1859 Register Result = MRI.createVirtualRegister(RC);
1861 if (isPositionIndependent()) {
1862 if (IsCall && !IsLocal) {
1863 // Create following instructions for non-local linkage PIC code function
1864 // calls. These instructions uses IC and magic number -24, so we expand
1865 // them in VEAsmPrinter.cpp from GETFUNPLT pseudo instruction.
1866 // lea %Reg, Symbol@plt_lo(-24)
1867 // and %Reg, %Reg, (32)0
1868 // sic %s16
1869 // lea.sl %Result, Symbol@plt_hi(%Reg, %s16) ; %s16 is PLT
1870 BuildMI(MBB, I, DL, TII->get(VE::GETFUNPLT), Result)
1871 .addExternalSymbol("abort");
1872 } else if (IsLocal) {
1873 Register Tmp1 = MRI.createVirtualRegister(RC);
1874 Register Tmp2 = MRI.createVirtualRegister(RC);
1875 // Create following instructions for local linkage PIC code.
1876 // lea %Tmp1, Symbol@gotoff_lo
1877 // and %Tmp2, %Tmp1, (32)0
1878 // lea.sl %Result, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
1879 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
1880 .addImm(0)
1881 .addImm(0)
1882 .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_LO32);
1883 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
1884 .addReg(Tmp1, getKillRegState(true))
1885 .addImm(M0(32));
1886 BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
1887 .addReg(VE::SX15)
1888 .addReg(Tmp2, getKillRegState(true))
1889 .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_HI32);
1890 } else {
1891 Register Tmp1 = MRI.createVirtualRegister(RC);
1892 Register Tmp2 = MRI.createVirtualRegister(RC);
1893 // Create following instructions for not local linkage PIC code.
1894 // lea %Tmp1, Symbol@got_lo
1895 // and %Tmp2, %Tmp1, (32)0
1896 // lea.sl %Tmp3, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
1897 // ld %Result, 0(%Tmp3)
1898 Register Tmp3 = MRI.createVirtualRegister(RC);
1899 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
1900 .addImm(0)
1901 .addImm(0)
1902 .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_LO32);
1903 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
1904 .addReg(Tmp1, getKillRegState(true))
1905 .addImm(M0(32));
1906 BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Tmp3)
1907 .addReg(VE::SX15)
1908 .addReg(Tmp2, getKillRegState(true))
1909 .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_HI32);
1910 BuildMI(MBB, I, DL, TII->get(VE::LDrii), Result)
1911 .addReg(Tmp3, getKillRegState(true))
1912 .addImm(0)
1913 .addImm(0);
1915 } else {
1916 Register Tmp1 = MRI.createVirtualRegister(RC);
1917 Register Tmp2 = MRI.createVirtualRegister(RC);
1918 // Create following instructions for non-PIC code.
1919 // lea %Tmp1, Symbol@lo
1920 // and %Tmp2, %Tmp1, (32)0
1921 // lea.sl %Result, Symbol@hi(%Tmp2)
1922 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
1923 .addImm(0)
1924 .addImm(0)
1925 .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_LO32);
1926 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
1927 .addReg(Tmp1, getKillRegState(true))
1928 .addImm(M0(32));
1929 BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
1930 .addReg(Tmp2, getKillRegState(true))
1931 .addImm(0)
1932 .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_HI32);
1934 return Result;
1937 void VETargetLowering::setupEntryBlockForSjLj(MachineInstr &MI,
1938 MachineBasicBlock *MBB,
1939 MachineBasicBlock *DispatchBB,
1940 int FI, int Offset) const {
1941 DebugLoc DL = MI.getDebugLoc();
1942 const VEInstrInfo *TII = Subtarget->getInstrInfo();
1944 Register LabelReg =
1945 prepareMBB(*MBB, MachineBasicBlock::iterator(MI), DispatchBB, DL);
1947 // Store an address of DispatchBB to a given jmpbuf[1] where has next IC
1948 // referenced by longjmp (throw) later.
1949 MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
1950 addFrameReference(MIB, FI, Offset); // jmpbuf[1]
1951 MIB.addReg(LabelReg, getKillRegState(true));
1954 MachineBasicBlock *
1955 VETargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
1956 MachineBasicBlock *MBB) const {
1957 DebugLoc DL = MI.getDebugLoc();
1958 MachineFunction *MF = MBB->getParent();
1959 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1960 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
1961 MachineRegisterInfo &MRI = MF->getRegInfo();
1963 const BasicBlock *BB = MBB->getBasicBlock();
1964 MachineFunction::iterator I = ++MBB->getIterator();
1966 // Memory Reference.
1967 SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
1968 MI.memoperands_end());
1969 Register BufReg = MI.getOperand(1).getReg();
1971 Register DstReg;
1973 DstReg = MI.getOperand(0).getReg();
1974 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
1975 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
1976 (void)TRI;
1977 Register MainDestReg = MRI.createVirtualRegister(RC);
1978 Register RestoreDestReg = MRI.createVirtualRegister(RC);
1980 // For `v = call @llvm.eh.sjlj.setjmp(buf)`, we generate following
1981 // instructions. SP/FP must be saved in jmpbuf before `llvm.eh.sjlj.setjmp`.
1983 // ThisMBB:
1984 // buf[3] = %s17 iff %s17 is used as BP
1985 // buf[1] = RestoreMBB as IC after longjmp
1986 // # SjLjSetup RestoreMBB
1988 // MainMBB:
1989 // v_main = 0
1991 // SinkMBB:
1992 // v = phi(v_main, MainMBB, v_restore, RestoreMBB)
1993 // ...
1995 // RestoreMBB:
1996 // %s17 = buf[3] = iff %s17 is used as BP
1997 // v_restore = 1
1998 // goto SinkMBB
2000 MachineBasicBlock *ThisMBB = MBB;
2001 MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
2002 MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
2003 MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
2004 MF->insert(I, MainMBB);
2005 MF->insert(I, SinkMBB);
2006 MF->push_back(RestoreMBB);
2007 RestoreMBB->setHasAddressTaken();
2009 // Transfer the remainder of BB and its successor edges to SinkMBB.
2010 SinkMBB->splice(SinkMBB->begin(), MBB,
2011 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
2012 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
2014 // ThisMBB:
2015 Register LabelReg =
2016 prepareMBB(*MBB, MachineBasicBlock::iterator(MI), RestoreMBB, DL);
2018 // Store BP in buf[3] iff this function is using BP.
2019 const VEFrameLowering *TFI = Subtarget->getFrameLowering();
2020 if (TFI->hasBP(*MF)) {
2021 MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2022 MIB.addReg(BufReg);
2023 MIB.addImm(0);
2024 MIB.addImm(24);
2025 MIB.addReg(VE::SX17);
2026 MIB.setMemRefs(MMOs);
2029 // Store IP in buf[1].
2030 MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2031 MIB.add(MI.getOperand(1)); // we can preserve the kill flags here.
2032 MIB.addImm(0);
2033 MIB.addImm(8);
2034 MIB.addReg(LabelReg, getKillRegState(true));
2035 MIB.setMemRefs(MMOs);
2037 // SP/FP are already stored in jmpbuf before `llvm.eh.sjlj.setjmp`.
2039 // Insert setup.
2040 MIB =
2041 BuildMI(*ThisMBB, MI, DL, TII->get(VE::EH_SjLj_Setup)).addMBB(RestoreMBB);
2043 const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2044 MIB.addRegMask(RegInfo->getNoPreservedMask());
2045 ThisMBB->addSuccessor(MainMBB);
2046 ThisMBB->addSuccessor(RestoreMBB);
2048 // MainMBB:
2049 BuildMI(MainMBB, DL, TII->get(VE::LEAzii), MainDestReg)
2050 .addImm(0)
2051 .addImm(0)
2052 .addImm(0);
2053 MainMBB->addSuccessor(SinkMBB);
2055 // SinkMBB:
2056 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(VE::PHI), DstReg)
2057 .addReg(MainDestReg)
2058 .addMBB(MainMBB)
2059 .addReg(RestoreDestReg)
2060 .addMBB(RestoreMBB);
2062 // RestoreMBB:
2063 // Restore BP from buf[3] iff this function is using BP. The address of
2064 // buf is in SX10.
2065 // FIXME: Better to not use SX10 here
2066 if (TFI->hasBP(*MF)) {
2067 MachineInstrBuilder MIB =
2068 BuildMI(RestoreMBB, DL, TII->get(VE::LDrii), VE::SX17);
2069 MIB.addReg(VE::SX10);
2070 MIB.addImm(0);
2071 MIB.addImm(24);
2072 MIB.setMemRefs(MMOs);
2074 BuildMI(RestoreMBB, DL, TII->get(VE::LEAzii), RestoreDestReg)
2075 .addImm(0)
2076 .addImm(0)
2077 .addImm(1);
2078 BuildMI(RestoreMBB, DL, TII->get(VE::BRCFLa_t)).addMBB(SinkMBB);
2079 RestoreMBB->addSuccessor(SinkMBB);
2081 MI.eraseFromParent();
2082 return SinkMBB;
2085 MachineBasicBlock *
2086 VETargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
2087 MachineBasicBlock *MBB) const {
2088 DebugLoc DL = MI.getDebugLoc();
2089 MachineFunction *MF = MBB->getParent();
2090 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2091 MachineRegisterInfo &MRI = MF->getRegInfo();
2093 // Memory Reference.
2094 SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
2095 MI.memoperands_end());
2096 Register BufReg = MI.getOperand(0).getReg();
2098 Register Tmp = MRI.createVirtualRegister(&VE::I64RegClass);
2099 // Since FP is only updated here but NOT referenced, it's treated as GPR.
2100 Register FP = VE::SX9;
2101 Register SP = VE::SX11;
2103 MachineInstrBuilder MIB;
2105 MachineBasicBlock *ThisMBB = MBB;
2107 // For `call @llvm.eh.sjlj.longjmp(buf)`, we generate following instructions.
2109 // ThisMBB:
2110 // %fp = load buf[0]
2111 // %jmp = load buf[1]
2112 // %s10 = buf ; Store an address of buf to SX10 for RestoreMBB
2113 // %sp = load buf[2] ; generated by llvm.eh.sjlj.setjmp.
2114 // jmp %jmp
2116 // Reload FP.
2117 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), FP);
2118 MIB.addReg(BufReg);
2119 MIB.addImm(0);
2120 MIB.addImm(0);
2121 MIB.setMemRefs(MMOs);
2123 // Reload IP.
2124 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), Tmp);
2125 MIB.addReg(BufReg);
2126 MIB.addImm(0);
2127 MIB.addImm(8);
2128 MIB.setMemRefs(MMOs);
2130 // Copy BufReg to SX10 for later use in setjmp.
2131 // FIXME: Better to not use SX10 here
2132 BuildMI(*ThisMBB, MI, DL, TII->get(VE::ORri), VE::SX10)
2133 .addReg(BufReg)
2134 .addImm(0);
2136 // Reload SP.
2137 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), SP);
2138 MIB.add(MI.getOperand(0)); // we can preserve the kill flags here.
2139 MIB.addImm(0);
2140 MIB.addImm(16);
2141 MIB.setMemRefs(MMOs);
2143 // Jump.
2144 BuildMI(*ThisMBB, MI, DL, TII->get(VE::BCFLari_t))
2145 .addReg(Tmp, getKillRegState(true))
2146 .addImm(0);
2148 MI.eraseFromParent();
2149 return ThisMBB;
2152 MachineBasicBlock *
2153 VETargetLowering::emitSjLjDispatchBlock(MachineInstr &MI,
2154 MachineBasicBlock *BB) const {
2155 DebugLoc DL = MI.getDebugLoc();
2156 MachineFunction *MF = BB->getParent();
2157 MachineFrameInfo &MFI = MF->getFrameInfo();
2158 MachineRegisterInfo &MRI = MF->getRegInfo();
2159 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2160 int FI = MFI.getFunctionContextIndex();
2162 // Get a mapping of the call site numbers to all of the landing pads they're
2163 // associated with.
2164 DenseMap<unsigned, SmallVector<MachineBasicBlock *, 2>> CallSiteNumToLPad;
2165 unsigned MaxCSNum = 0;
2166 for (auto &MBB : *MF) {
2167 if (!MBB.isEHPad())
2168 continue;
2170 MCSymbol *Sym = nullptr;
2171 for (const auto &MI : MBB) {
2172 if (MI.isDebugInstr())
2173 continue;
2175 assert(MI.isEHLabel() && "expected EH_LABEL");
2176 Sym = MI.getOperand(0).getMCSymbol();
2177 break;
2180 if (!MF->hasCallSiteLandingPad(Sym))
2181 continue;
2183 for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {
2184 CallSiteNumToLPad[CSI].push_back(&MBB);
2185 MaxCSNum = std::max(MaxCSNum, CSI);
2189 // Get an ordered list of the machine basic blocks for the jump table.
2190 std::vector<MachineBasicBlock *> LPadList;
2191 SmallPtrSet<MachineBasicBlock *, 32> InvokeBBs;
2192 LPadList.reserve(CallSiteNumToLPad.size());
2194 for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) {
2195 for (auto &LP : CallSiteNumToLPad[CSI]) {
2196 LPadList.push_back(LP);
2197 InvokeBBs.insert(LP->pred_begin(), LP->pred_end());
2201 assert(!LPadList.empty() &&
2202 "No landing pad destinations for the dispatch jump table!");
2204 // The %fn_context is allocated like below (from --print-after=sjljehprepare):
2205 // %fn_context = alloca { i8*, i64, [4 x i64], i8*, i8*, [5 x i8*] }
2207 // This `[5 x i8*]` is jmpbuf, so jmpbuf[1] is FI+72.
2208 // First `i64` is callsite, so callsite is FI+8.
2209 static const int OffsetIC = 72;
2210 static const int OffsetCS = 8;
2212 // Create the MBBs for the dispatch code like following:
2214 // ThisMBB:
2215 // Prepare DispatchBB address and store it to buf[1].
2216 // ...
2218 // DispatchBB:
2219 // %s15 = GETGOT iff isPositionIndependent
2220 // %callsite = load callsite
2221 // brgt.l.t #size of callsites, %callsite, DispContBB
2223 // TrapBB:
2224 // Call abort.
2226 // DispContBB:
2227 // %breg = address of jump table
2228 // %pc = load and calculate next pc from %breg and %callsite
2229 // jmp %pc
2231 // Shove the dispatch's address into the return slot in the function context.
2232 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
2233 DispatchBB->setIsEHPad(true);
2235 // Trap BB will causes trap like `assert(0)`.
2236 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
2237 DispatchBB->addSuccessor(TrapBB);
2239 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
2240 DispatchBB->addSuccessor(DispContBB);
2242 // Insert MBBs.
2243 MF->push_back(DispatchBB);
2244 MF->push_back(DispContBB);
2245 MF->push_back(TrapBB);
2247 // Insert code to call abort in the TrapBB.
2248 Register Abort = prepareSymbol(*TrapBB, TrapBB->end(), "abort", DL,
2249 /* Local */ false, /* Call */ true);
2250 BuildMI(TrapBB, DL, TII->get(VE::BSICrii), VE::SX10)
2251 .addReg(Abort, getKillRegState(true))
2252 .addImm(0)
2253 .addImm(0);
2255 // Insert code into the entry block that creates and registers the function
2256 // context.
2257 setupEntryBlockForSjLj(MI, BB, DispatchBB, FI, OffsetIC);
2259 // Create the jump table and associated information
2260 unsigned JTE = getJumpTableEncoding();
2261 MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTE);
2262 unsigned MJTI = JTI->createJumpTableIndex(LPadList);
2264 const VERegisterInfo &RI = TII->getRegisterInfo();
2265 // Add a register mask with no preserved registers. This results in all
2266 // registers being marked as clobbered.
2267 BuildMI(DispatchBB, DL, TII->get(VE::NOP))
2268 .addRegMask(RI.getNoPreservedMask());
2270 if (isPositionIndependent()) {
2271 // Force to generate GETGOT, since current implementation doesn't store GOT
2272 // register.
2273 BuildMI(DispatchBB, DL, TII->get(VE::GETGOT), VE::SX15);
2276 // IReg is used as an index in a memory operand and therefore can't be SP
2277 const TargetRegisterClass *RC = &VE::I64RegClass;
2278 Register IReg = MRI.createVirtualRegister(RC);
2279 addFrameReference(BuildMI(DispatchBB, DL, TII->get(VE::LDLZXrii), IReg), FI,
2280 OffsetCS);
2281 if (LPadList.size() < 64) {
2282 BuildMI(DispatchBB, DL, TII->get(VE::BRCFLir_t))
2283 .addImm(VECC::CC_ILE)
2284 .addImm(LPadList.size())
2285 .addReg(IReg)
2286 .addMBB(TrapBB);
2287 } else {
2288 assert(LPadList.size() <= 0x7FFFFFFF && "Too large Landing Pad!");
2289 Register TmpReg = MRI.createVirtualRegister(RC);
2290 BuildMI(DispatchBB, DL, TII->get(VE::LEAzii), TmpReg)
2291 .addImm(0)
2292 .addImm(0)
2293 .addImm(LPadList.size());
2294 BuildMI(DispatchBB, DL, TII->get(VE::BRCFLrr_t))
2295 .addImm(VECC::CC_ILE)
2296 .addReg(TmpReg, getKillRegState(true))
2297 .addReg(IReg)
2298 .addMBB(TrapBB);
2301 Register BReg = MRI.createVirtualRegister(RC);
2302 Register Tmp1 = MRI.createVirtualRegister(RC);
2303 Register Tmp2 = MRI.createVirtualRegister(RC);
2305 if (isPositionIndependent()) {
2306 // Create following instructions for local linkage PIC code.
2307 // lea %Tmp1, .LJTI0_0@gotoff_lo
2308 // and %Tmp2, %Tmp1, (32)0
2309 // lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2310 BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2311 .addImm(0)
2312 .addImm(0)
2313 .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_LO32);
2314 BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2315 .addReg(Tmp1, getKillRegState(true))
2316 .addImm(M0(32));
2317 BuildMI(DispContBB, DL, TII->get(VE::LEASLrri), BReg)
2318 .addReg(VE::SX15)
2319 .addReg(Tmp2, getKillRegState(true))
2320 .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_HI32);
2321 } else {
2322 // Create following instructions for non-PIC code.
2323 // lea %Tmp1, .LJTI0_0@lo
2324 // and %Tmp2, %Tmp1, (32)0
2325 // lea.sl %BReg, .LJTI0_0@hi(%Tmp2)
2326 BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2327 .addImm(0)
2328 .addImm(0)
2329 .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_LO32);
2330 BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2331 .addReg(Tmp1, getKillRegState(true))
2332 .addImm(M0(32));
2333 BuildMI(DispContBB, DL, TII->get(VE::LEASLrii), BReg)
2334 .addReg(Tmp2, getKillRegState(true))
2335 .addImm(0)
2336 .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_HI32);
2339 switch (JTE) {
2340 case MachineJumpTableInfo::EK_BlockAddress: {
2341 // Generate simple block address code for no-PIC model.
2342 // sll %Tmp1, %IReg, 3
2343 // lds %TReg, 0(%Tmp1, %BReg)
2344 // bcfla %TReg
2346 Register TReg = MRI.createVirtualRegister(RC);
2347 Register Tmp1 = MRI.createVirtualRegister(RC);
2349 BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2350 .addReg(IReg, getKillRegState(true))
2351 .addImm(3);
2352 BuildMI(DispContBB, DL, TII->get(VE::LDrri), TReg)
2353 .addReg(BReg, getKillRegState(true))
2354 .addReg(Tmp1, getKillRegState(true))
2355 .addImm(0);
2356 BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2357 .addReg(TReg, getKillRegState(true))
2358 .addImm(0);
2359 break;
2361 case MachineJumpTableInfo::EK_Custom32: {
2362 // Generate block address code using differences from the function pointer
2363 // for PIC model.
2364 // sll %Tmp1, %IReg, 2
2365 // ldl.zx %OReg, 0(%Tmp1, %BReg)
2366 // Prepare function address in BReg2.
2367 // adds.l %TReg, %BReg2, %OReg
2368 // bcfla %TReg
2370 assert(isPositionIndependent());
2371 Register OReg = MRI.createVirtualRegister(RC);
2372 Register TReg = MRI.createVirtualRegister(RC);
2373 Register Tmp1 = MRI.createVirtualRegister(RC);
2375 BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2376 .addReg(IReg, getKillRegState(true))
2377 .addImm(2);
2378 BuildMI(DispContBB, DL, TII->get(VE::LDLZXrri), OReg)
2379 .addReg(BReg, getKillRegState(true))
2380 .addReg(Tmp1, getKillRegState(true))
2381 .addImm(0);
2382 Register BReg2 =
2383 prepareSymbol(*DispContBB, DispContBB->end(),
2384 DispContBB->getParent()->getName(), DL, /* Local */ true);
2385 BuildMI(DispContBB, DL, TII->get(VE::ADDSLrr), TReg)
2386 .addReg(OReg, getKillRegState(true))
2387 .addReg(BReg2, getKillRegState(true));
2388 BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2389 .addReg(TReg, getKillRegState(true))
2390 .addImm(0);
2391 break;
2393 default:
2394 llvm_unreachable("Unexpected jump table encoding");
2397 // Add the jump table entries as successors to the MBB.
2398 SmallPtrSet<MachineBasicBlock *, 8> SeenMBBs;
2399 for (auto &LP : LPadList)
2400 if (SeenMBBs.insert(LP).second)
2401 DispContBB->addSuccessor(LP);
2403 // N.B. the order the invoke BBs are processed in doesn't matter here.
2404 SmallVector<MachineBasicBlock *, 64> MBBLPads;
2405 const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
2406 for (MachineBasicBlock *MBB : InvokeBBs) {
2407 // Remove the landing pad successor from the invoke block and replace it
2408 // with the new dispatch block.
2409 // Keep a copy of Successors since it's modified inside the loop.
2410 SmallVector<MachineBasicBlock *, 8> Successors(MBB->succ_rbegin(),
2411 MBB->succ_rend());
2412 // FIXME: Avoid quadratic complexity.
2413 for (auto MBBS : Successors) {
2414 if (MBBS->isEHPad()) {
2415 MBB->removeSuccessor(MBBS);
2416 MBBLPads.push_back(MBBS);
2420 MBB->addSuccessor(DispatchBB);
2422 // Find the invoke call and mark all of the callee-saved registers as
2423 // 'implicit defined' so that they're spilled. This prevents code from
2424 // moving instructions to before the EH block, where they will never be
2425 // executed.
2426 for (auto &II : reverse(*MBB)) {
2427 if (!II.isCall())
2428 continue;
2430 DenseMap<Register, bool> DefRegs;
2431 for (auto &MOp : II.operands())
2432 if (MOp.isReg())
2433 DefRegs[MOp.getReg()] = true;
2435 MachineInstrBuilder MIB(*MF, &II);
2436 for (unsigned RI = 0; SavedRegs[RI]; ++RI) {
2437 Register Reg = SavedRegs[RI];
2438 if (!DefRegs[Reg])
2439 MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
2442 break;
2446 // Mark all former landing pads as non-landing pads. The dispatch is the only
2447 // landing pad now.
2448 for (auto &LP : MBBLPads)
2449 LP->setIsEHPad(false);
2451 // The instruction is gone now.
2452 MI.eraseFromParent();
2453 return BB;
2456 MachineBasicBlock *
2457 VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2458 MachineBasicBlock *BB) const {
2459 switch (MI.getOpcode()) {
2460 default:
2461 llvm_unreachable("Unknown Custom Instruction!");
2462 case VE::EH_SjLj_LongJmp:
2463 return emitEHSjLjLongJmp(MI, BB);
2464 case VE::EH_SjLj_SetJmp:
2465 return emitEHSjLjSetJmp(MI, BB);
2466 case VE::EH_SjLj_Setup_Dispatch:
2467 return emitSjLjDispatchBlock(MI, BB);
2471 static bool isI32Insn(const SDNode *User, const SDNode *N) {
2472 switch (User->getOpcode()) {
2473 default:
2474 return false;
2475 case ISD::ADD:
2476 case ISD::SUB:
2477 case ISD::MUL:
2478 case ISD::SDIV:
2479 case ISD::UDIV:
2480 case ISD::SETCC:
2481 case ISD::SMIN:
2482 case ISD::SMAX:
2483 case ISD::SHL:
2484 case ISD::SRA:
2485 case ISD::BSWAP:
2486 case ISD::SINT_TO_FP:
2487 case ISD::UINT_TO_FP:
2488 case ISD::BR_CC:
2489 case ISD::BITCAST:
2490 case ISD::ATOMIC_CMP_SWAP:
2491 case ISD::ATOMIC_SWAP:
2492 return true;
2493 case ISD::SRL:
2494 if (N->getOperand(0).getOpcode() != ISD::SRL)
2495 return true;
2496 // (srl (trunc (srl ...))) may be optimized by combining srl, so
2497 // doesn't optimize trunc now.
2498 return false;
2499 case ISD::SELECT_CC:
2500 if (User->getOperand(2).getNode() != N &&
2501 User->getOperand(3).getNode() != N)
2502 return true;
2503 LLVM_FALLTHROUGH;
2504 case ISD::AND:
2505 case ISD::OR:
2506 case ISD::XOR:
2507 case ISD::SELECT:
2508 case ISD::CopyToReg:
2509 // Check all use of selections, bit operations, and copies. If all of them
2510 // are safe, optimize truncate to extract_subreg.
2511 for (SDNode::use_iterator UI = User->use_begin(), UE = User->use_end();
2512 UI != UE; ++UI) {
2513 switch ((*UI)->getOpcode()) {
2514 default:
2515 // If the use is an instruction which treats the source operand as i32,
2516 // it is safe to avoid truncate here.
2517 if (isI32Insn(*UI, N))
2518 continue;
2519 break;
2520 case ISD::ANY_EXTEND:
2521 case ISD::SIGN_EXTEND:
2522 case ISD::ZERO_EXTEND: {
2523 // Special optimizations to the combination of ext and trunc.
2524 // (ext ... (select ... (trunc ...))) is safe to avoid truncate here
2525 // since this truncate instruction clears higher 32 bits which is filled
2526 // by one of ext instructions later.
2527 assert(N->getValueType(0) == MVT::i32 &&
2528 "find truncate to not i32 integer");
2529 if (User->getOpcode() == ISD::SELECT_CC ||
2530 User->getOpcode() == ISD::SELECT)
2531 continue;
2532 break;
2535 return false;
2537 return true;
2541 // Optimize TRUNCATE in DAG combining. Optimizing it in CUSTOM lower is
2542 // sometime too early. Optimizing it in DAG pattern matching in VEInstrInfo.td
2543 // is sometime too late. So, doing it at here.
2544 SDValue VETargetLowering::combineTRUNCATE(SDNode *N,
2545 DAGCombinerInfo &DCI) const {
2546 assert(N->getOpcode() == ISD::TRUNCATE &&
2547 "Should be called with a TRUNCATE node");
2549 SelectionDAG &DAG = DCI.DAG;
2550 SDLoc DL(N);
2551 EVT VT = N->getValueType(0);
2553 // We prefer to do this when all types are legal.
2554 if (!DCI.isAfterLegalizeDAG())
2555 return SDValue();
2557 // Skip combine TRUNCATE atm if the operand of TRUNCATE might be a constant.
2558 if (N->getOperand(0)->getOpcode() == ISD::SELECT_CC &&
2559 isa<ConstantSDNode>(N->getOperand(0)->getOperand(0)) &&
2560 isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
2561 return SDValue();
2563 // Check all use of this TRUNCATE.
2564 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE;
2565 ++UI) {
2566 SDNode *User = *UI;
2568 // Make sure that we're not going to replace TRUNCATE for non i32
2569 // instructions.
2571 // FIXME: Although we could sometimes handle this, and it does occur in
2572 // practice that one of the condition inputs to the select is also one of
2573 // the outputs, we currently can't deal with this.
2574 if (isI32Insn(User, N))
2575 continue;
2577 return SDValue();
2580 SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
2581 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT,
2582 N->getOperand(0), SubI32),
2586 SDValue VETargetLowering::PerformDAGCombine(SDNode *N,
2587 DAGCombinerInfo &DCI) const {
2588 switch (N->getOpcode()) {
2589 default:
2590 break;
2591 case ISD::TRUNCATE:
2592 return combineTRUNCATE(N, DCI);
2595 return SDValue();
2598 //===----------------------------------------------------------------------===//
2599 // VE Inline Assembly Support
2600 //===----------------------------------------------------------------------===//
2602 VETargetLowering::ConstraintType
2603 VETargetLowering::getConstraintType(StringRef Constraint) const {
2604 if (Constraint.size() == 1) {
2605 switch (Constraint[0]) {
2606 default:
2607 break;
2608 case 'v': // vector registers
2609 return C_RegisterClass;
2612 return TargetLowering::getConstraintType(Constraint);
2615 std::pair<unsigned, const TargetRegisterClass *>
2616 VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
2617 StringRef Constraint,
2618 MVT VT) const {
2619 const TargetRegisterClass *RC = nullptr;
2620 if (Constraint.size() == 1) {
2621 switch (Constraint[0]) {
2622 default:
2623 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
2624 case 'r':
2625 RC = &VE::I64RegClass;
2626 break;
2627 case 'v':
2628 RC = &VE::V64RegClass;
2629 break;
2631 return std::make_pair(0U, RC);
2634 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
2637 //===----------------------------------------------------------------------===//
2638 // VE Target Optimization Support
2639 //===----------------------------------------------------------------------===//
2641 unsigned VETargetLowering::getMinimumJumpTableEntries() const {
2642 // Specify 8 for PIC model to relieve the impact of PIC load instructions.
2643 if (isJumpTableRelative())
2644 return 8;
2646 return TargetLowering::getMinimumJumpTableEntries();
2649 bool VETargetLowering::hasAndNot(SDValue Y) const {
2650 EVT VT = Y.getValueType();
2652 // VE doesn't have vector and not instruction.
2653 if (VT.isVector())
2654 return false;
2656 // VE allows different immediate values for X and Y where ~X & Y.
2657 // Only simm7 works for X, and only mimm works for Y on VE. However, this
2658 // function is used to check whether an immediate value is OK for and-not
2659 // instruction as both X and Y. Generating additional instruction to
2660 // retrieve an immediate value is no good since the purpose of this
2661 // function is to convert a series of 3 instructions to another series of
2662 // 3 instructions with better parallelism. Therefore, we return false
2663 // for all immediate values now.
2664 // FIXME: Change hasAndNot function to have two operands to make it work
2665 // correctly with Aurora VE.
2666 if (isa<ConstantSDNode>(Y))
2667 return false;
2669 // It's ok for generic registers.
2670 return true;
2673 /// \returns the VVP_* SDNode opcode corresponsing to \p OC.
2674 static Optional<unsigned> getVVPOpcode(unsigned Opcode) {
2675 switch (Opcode) {
2676 #define HANDLE_VP_TO_VVP(VPOPC, VVPNAME) \
2677 case ISD::VPOPC: \
2678 return VEISD::VVPNAME;
2679 #define ADD_VVP_OP(VVPNAME, SDNAME) \
2680 case VEISD::VVPNAME: \
2681 case ISD::SDNAME: \
2682 return VEISD::VVPNAME;
2683 #include "VVPNodes.def"
2685 return None;
2688 SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
2689 // Can we represent this as a VVP node.
2690 const unsigned Opcode = Op->getOpcode();
2691 auto VVPOpcodeOpt = getVVPOpcode(Opcode);
2692 if (!VVPOpcodeOpt.hasValue())
2693 return SDValue();
2694 unsigned VVPOpcode = VVPOpcodeOpt.getValue();
2695 const bool FromVP = ISD::isVPOpcode(Opcode);
2697 // The representative and legalized vector type of this operation.
2698 SDLoc DL(Op);
2699 MVT MaskVT = MVT::v256i1; // TODO: packed mode.
2700 EVT OpVecVT = Op.getValueType();
2701 EVT LegalVecVT = getTypeToTransformTo(*DAG.getContext(), OpVecVT);
2703 SDValue AVL;
2704 SDValue Mask;
2706 if (FromVP) {
2707 // All upstream VP SDNodes always have a mask and avl.
2708 auto MaskIdx = ISD::getVPMaskIdx(Opcode).getValue();
2709 auto AVLIdx = ISD::getVPExplicitVectorLengthIdx(Opcode).getValue();
2710 Mask = Op->getOperand(MaskIdx);
2711 AVL = Op->getOperand(AVLIdx);
2713 } else {
2714 // Materialize the VL parameter.
2715 AVL = DAG.getConstant(OpVecVT.getVectorNumElements(), DL, MVT::i32);
2716 SDValue ConstTrue = DAG.getConstant(1, DL, MVT::i32);
2717 Mask = DAG.getNode(VEISD::VEC_BROADCAST, DL, MaskVT,
2718 ConstTrue); // emit a VEISD::VEC_BROADCAST here.
2721 // Categories we are interested in.
2722 bool IsBinaryOp = false;
2724 switch (VVPOpcode) {
2725 #define ADD_BINARY_VVP_OP(VVPNAME, ...) \
2726 case VEISD::VVPNAME: \
2727 IsBinaryOp = true; \
2728 break;
2729 #include "VVPNodes.def"
2732 if (IsBinaryOp) {
2733 assert(LegalVecVT.isSimple());
2734 return DAG.getNode(VVPOpcode, DL, LegalVecVT, Op->getOperand(0),
2735 Op->getOperand(1), Mask, AVL);
2737 llvm_unreachable("lowerToVVP called for unexpected SDNode.");
2740 SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
2741 SelectionDAG &DAG) const {
2742 assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
2743 MVT VT = Op.getOperand(0).getSimpleValueType();
2745 // Special treatment for packed V64 types.
2746 assert(VT == MVT::v512i32 || VT == MVT::v512f32);
2747 (void)VT;
2748 // Example of codes:
2749 // %packed_v = extractelt %vr, %idx / 2
2750 // %v = %packed_v >> (%idx % 2 * 32)
2751 // %res = %v & 0xffffffff
2753 SDValue Vec = Op.getOperand(0);
2754 SDValue Idx = Op.getOperand(1);
2755 SDLoc DL(Op);
2756 SDValue Result = Op;
2757 if (0 /* Idx->isConstant() */) {
2758 // TODO: optimized implementation using constant values
2759 } else {
2760 SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
2761 SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
2762 SDValue PackedElt =
2763 SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
2764 SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
2765 SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
2766 SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
2767 Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
2768 PackedElt = DAG.getNode(ISD::SRL, DL, MVT::i64, {PackedElt, Shift});
2769 SDValue Mask = DAG.getConstant(0xFFFFFFFFL, DL, MVT::i64);
2770 PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
2771 SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
2772 Result = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
2773 MVT::i32, PackedElt, SubI32),
2776 if (Op.getSimpleValueType() == MVT::f32) {
2777 Result = DAG.getBitcast(MVT::f32, Result);
2778 } else {
2779 assert(Op.getSimpleValueType() == MVT::i32);
2782 return Result;
2785 SDValue VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
2786 SelectionDAG &DAG) const {
2787 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
2788 MVT VT = Op.getOperand(0).getSimpleValueType();
2790 // Special treatment for packed V64 types.
2791 assert(VT == MVT::v512i32 || VT == MVT::v512f32);
2792 (void)VT;
2793 // The v512i32 and v512f32 starts from upper bits (0..31). This "upper
2794 // bits" required `val << 32` from C implementation's point of view.
2796 // Example of codes:
2797 // %packed_elt = extractelt %vr, (%idx >> 1)
2798 // %shift = ((%idx & 1) ^ 1) << 5
2799 // %packed_elt &= 0xffffffff00000000 >> shift
2800 // %packed_elt |= (zext %val) << shift
2801 // %vr = insertelt %vr, %packed_elt, (%idx >> 1)
2803 SDLoc DL(Op);
2804 SDValue Vec = Op.getOperand(0);
2805 SDValue Val = Op.getOperand(1);
2806 SDValue Idx = Op.getOperand(2);
2807 if (Idx.getSimpleValueType() == MVT::i32)
2808 Idx = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Idx);
2809 if (Val.getSimpleValueType() == MVT::f32)
2810 Val = DAG.getBitcast(MVT::i32, Val);
2811 assert(Val.getSimpleValueType() == MVT::i32);
2812 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
2814 SDValue Result = Op;
2815 if (0 /* Idx->isConstant()*/) {
2816 // TODO: optimized implementation using constant values
2817 } else {
2818 SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
2819 SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
2820 SDValue PackedElt =
2821 SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
2822 SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
2823 SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
2824 SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
2825 Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
2826 SDValue Mask = DAG.getConstant(0xFFFFFFFF00000000L, DL, MVT::i64);
2827 Mask = DAG.getNode(ISD::SRL, DL, MVT::i64, {Mask, Shift});
2828 PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
2829 Val = DAG.getNode(ISD::SHL, DL, MVT::i64, {Val, Shift});
2830 PackedElt = DAG.getNode(ISD::OR, DL, MVT::i64, {PackedElt, Val});
2831 Result =
2832 SDValue(DAG.getMachineNode(VE::LSVrr_v, DL, Vec.getSimpleValueType(),
2833 {HalfIdx, PackedElt, Vec}),
2836 return Result;