1 //===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the interfaces that Hexagon uses to lower LLVM code
10 // into a selection DAG.
12 //===----------------------------------------------------------------------===//
14 #include "HexagonISelLowering.h"
16 #include "HexagonMachineFunctionInfo.h"
17 #include "HexagonRegisterInfo.h"
18 #include "HexagonSubtarget.h"
19 #include "HexagonTargetMachine.h"
20 #include "HexagonTargetObjectFile.h"
21 #include "llvm/ADT/APInt.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringSwitch.h"
25 #include "llvm/CodeGen/CallingConvLower.h"
26 #include "llvm/CodeGen/MachineFrameInfo.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineMemOperand.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/RuntimeLibcalls.h"
31 #include "llvm/CodeGen/SelectionDAG.h"
32 #include "llvm/CodeGen/TargetCallingConv.h"
33 #include "llvm/CodeGen/ValueTypes.h"
34 #include "llvm/IR/BasicBlock.h"
35 #include "llvm/IR/CallingConv.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/IR/GlobalValue.h"
40 #include "llvm/IR/InlineAsm.h"
41 #include "llvm/IR/Instructions.h"
42 #include "llvm/IR/Intrinsics.h"
43 #include "llvm/IR/IntrinsicInst.h"
44 #include "llvm/IR/Module.h"
45 #include "llvm/IR/Type.h"
46 #include "llvm/IR/Value.h"
47 #include "llvm/MC/MCRegisterInfo.h"
48 #include "llvm/Support/Casting.h"
49 #include "llvm/Support/CodeGen.h"
50 #include "llvm/Support/CommandLine.h"
51 #include "llvm/Support/Debug.h"
52 #include "llvm/Support/ErrorHandling.h"
53 #include "llvm/Support/MathExtras.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include "llvm/Target/TargetMachine.h"
65 #define DEBUG_TYPE "hexagon-lowering"
67 static cl::opt
<bool> EmitJumpTables("hexagon-emit-jump-tables",
68 cl::init(true), cl::Hidden
,
69 cl::desc("Control jump table emission on Hexagon target"));
71 static cl::opt
<bool> EnableHexSDNodeSched("enable-hexagon-sdnode-sched",
72 cl::Hidden
, cl::ZeroOrMore
, cl::init(false),
73 cl::desc("Enable Hexagon SDNode scheduling"));
75 static cl::opt
<bool> EnableFastMath("ffast-math",
76 cl::Hidden
, cl::ZeroOrMore
, cl::init(false),
77 cl::desc("Enable Fast Math processing"));
79 static cl::opt
<int> MinimumJumpTables("minimum-jump-tables",
80 cl::Hidden
, cl::ZeroOrMore
, cl::init(5),
81 cl::desc("Set minimum jump tables"));
83 static cl::opt
<int> MaxStoresPerMemcpyCL("max-store-memcpy",
84 cl::Hidden
, cl::ZeroOrMore
, cl::init(6),
85 cl::desc("Max #stores to inline memcpy"));
87 static cl::opt
<int> MaxStoresPerMemcpyOptSizeCL("max-store-memcpy-Os",
88 cl::Hidden
, cl::ZeroOrMore
, cl::init(4),
89 cl::desc("Max #stores to inline memcpy"));
91 static cl::opt
<int> MaxStoresPerMemmoveCL("max-store-memmove",
92 cl::Hidden
, cl::ZeroOrMore
, cl::init(6),
93 cl::desc("Max #stores to inline memmove"));
95 static cl::opt
<int> MaxStoresPerMemmoveOptSizeCL("max-store-memmove-Os",
96 cl::Hidden
, cl::ZeroOrMore
, cl::init(4),
97 cl::desc("Max #stores to inline memmove"));
99 static cl::opt
<int> MaxStoresPerMemsetCL("max-store-memset",
100 cl::Hidden
, cl::ZeroOrMore
, cl::init(8),
101 cl::desc("Max #stores to inline memset"));
103 static cl::opt
<int> MaxStoresPerMemsetOptSizeCL("max-store-memset-Os",
104 cl::Hidden
, cl::ZeroOrMore
, cl::init(4),
105 cl::desc("Max #stores to inline memset"));
107 static cl::opt
<bool> AlignLoads("hexagon-align-loads",
108 cl::Hidden
, cl::init(false),
109 cl::desc("Rewrite unaligned loads as a pair of aligned loads"));
114 class HexagonCCState
: public CCState
{
115 unsigned NumNamedVarArgParams
= 0;
118 HexagonCCState(CallingConv::ID CC
, bool IsVarArg
, MachineFunction
&MF
,
119 SmallVectorImpl
<CCValAssign
> &locs
, LLVMContext
&C
,
120 unsigned NumNamedArgs
)
121 : CCState(CC
, IsVarArg
, MF
, locs
, C
),
122 NumNamedVarArgParams(NumNamedArgs
) {}
123 unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams
; }
126 } // end anonymous namespace
129 // Implement calling convention for Hexagon.
131 static bool CC_SkipOdd(unsigned &ValNo
, MVT
&ValVT
, MVT
&LocVT
,
132 CCValAssign::LocInfo
&LocInfo
,
133 ISD::ArgFlagsTy
&ArgFlags
, CCState
&State
) {
134 static const MCPhysReg ArgRegs
[] = {
135 Hexagon::R0
, Hexagon::R1
, Hexagon::R2
,
136 Hexagon::R3
, Hexagon::R4
, Hexagon::R5
138 const unsigned NumArgRegs
= array_lengthof(ArgRegs
);
139 unsigned RegNum
= State
.getFirstUnallocated(ArgRegs
);
141 // RegNum is an index into ArgRegs: skip a register if RegNum is odd.
142 if (RegNum
!= NumArgRegs
&& RegNum
% 2 == 1)
143 State
.AllocateReg(ArgRegs
[RegNum
]);
145 // Always return false here, as this function only makes sure that the first
146 // unallocated register has an even register number and does not actually
147 // allocate a register for the current argument.
151 #include "HexagonGenCallingConv.inc"
155 HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op
, SelectionDAG
&DAG
)
160 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
161 /// by "Src" to address "Dst" of size "Size". Alignment information is
162 /// specified by the specific parameter attribute. The copy will be passed as
163 /// a byval function parameter. Sometimes what we are copying is the end of a
164 /// larger object, the part that does not fit in registers.
165 static SDValue
CreateCopyOfByValArgument(SDValue Src
, SDValue Dst
,
166 SDValue Chain
, ISD::ArgFlagsTy Flags
,
167 SelectionDAG
&DAG
, const SDLoc
&dl
) {
168 SDValue SizeNode
= DAG
.getConstant(Flags
.getByValSize(), dl
, MVT::i32
);
169 return DAG
.getMemcpy(Chain
, dl
, Dst
, Src
, SizeNode
, Flags
.getByValAlign(),
170 /*isVolatile=*/false, /*AlwaysInline=*/false,
171 /*isTailCall=*/false,
172 MachinePointerInfo(), MachinePointerInfo());
176 HexagonTargetLowering::CanLowerReturn(
177 CallingConv::ID CallConv
, MachineFunction
&MF
, bool IsVarArg
,
178 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
179 LLVMContext
&Context
) const {
180 SmallVector
<CCValAssign
, 16> RVLocs
;
181 CCState
CCInfo(CallConv
, IsVarArg
, MF
, RVLocs
, Context
);
183 if (MF
.getSubtarget
<HexagonSubtarget
>().useHVXOps())
184 return CCInfo
.CheckReturn(Outs
, RetCC_Hexagon_HVX
);
185 return CCInfo
.CheckReturn(Outs
, RetCC_Hexagon
);
188 // LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
189 // passed by value, the function prototype is modified to return void and
190 // the value is stored in memory pointed by a pointer passed by caller.
192 HexagonTargetLowering::LowerReturn(SDValue Chain
, CallingConv::ID CallConv
,
194 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
195 const SmallVectorImpl
<SDValue
> &OutVals
,
196 const SDLoc
&dl
, SelectionDAG
&DAG
) const {
197 // CCValAssign - represent the assignment of the return value to locations.
198 SmallVector
<CCValAssign
, 16> RVLocs
;
200 // CCState - Info about the registers and stack slot.
201 CCState
CCInfo(CallConv
, IsVarArg
, DAG
.getMachineFunction(), RVLocs
,
204 // Analyze return values of ISD::RET
205 if (Subtarget
.useHVXOps())
206 CCInfo
.AnalyzeReturn(Outs
, RetCC_Hexagon_HVX
);
208 CCInfo
.AnalyzeReturn(Outs
, RetCC_Hexagon
);
211 SmallVector
<SDValue
, 4> RetOps(1, Chain
);
213 // Copy the result values into the output registers.
214 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
215 CCValAssign
&VA
= RVLocs
[i
];
217 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), OutVals
[i
], Flag
);
219 // Guarantee that all emitted copies are stuck together with flags.
220 Flag
= Chain
.getValue(1);
221 RetOps
.push_back(DAG
.getRegister(VA
.getLocReg(), VA
.getLocVT()));
224 RetOps
[0] = Chain
; // Update chain.
226 // Add the flag if we have it.
228 RetOps
.push_back(Flag
);
230 return DAG
.getNode(HexagonISD::RET_FLAG
, dl
, MVT::Other
, RetOps
);
233 bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst
*CI
) const {
234 // If either no tail call or told not to tail call at all, don't.
236 CI
->getParent()->getParent()->getFnAttribute("disable-tail-calls");
237 if (!CI
->isTailCall() || Attr
.getValueAsString() == "true")
243 unsigned HexagonTargetLowering::getRegisterByName(const char* RegName
, EVT VT
,
244 SelectionDAG
&DAG
) const {
245 // Just support r19, the linux kernel uses it.
246 unsigned Reg
= StringSwitch
<unsigned>(RegName
)
247 .Case("r19", Hexagon::R19
)
252 report_fatal_error("Invalid register name global variable");
255 /// LowerCallResult - Lower the result values of an ISD::CALL into the
256 /// appropriate copies out of appropriate physical registers. This assumes that
257 /// Chain/Glue are the input chain/glue to use, and that TheCall is the call
258 /// being lowered. Returns a SDNode with the same number of values as the
260 SDValue
HexagonTargetLowering::LowerCallResult(
261 SDValue Chain
, SDValue Glue
, CallingConv::ID CallConv
, bool IsVarArg
,
262 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
263 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
,
264 const SmallVectorImpl
<SDValue
> &OutVals
, SDValue Callee
) const {
265 // Assign locations to each value returned by this call.
266 SmallVector
<CCValAssign
, 16> RVLocs
;
268 CCState
CCInfo(CallConv
, IsVarArg
, DAG
.getMachineFunction(), RVLocs
,
271 if (Subtarget
.useHVXOps())
272 CCInfo
.AnalyzeCallResult(Ins
, RetCC_Hexagon_HVX
);
274 CCInfo
.AnalyzeCallResult(Ins
, RetCC_Hexagon
);
276 // Copy all of the result registers out of their specified physreg.
277 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
279 if (RVLocs
[i
].getValVT() == MVT::i1
) {
280 // Return values of type MVT::i1 require special handling. The reason
281 // is that MVT::i1 is associated with the PredRegs register class, but
282 // values of that type are still returned in R0. Generate an explicit
283 // copy into a predicate register from R0, and treat the value of the
284 // predicate register as the call result.
285 auto &MRI
= DAG
.getMachineFunction().getRegInfo();
286 SDValue FR0
= DAG
.getCopyFromReg(Chain
, dl
, RVLocs
[i
].getLocReg(),
288 // FR0 = (Value, Chain, Glue)
289 unsigned PredR
= MRI
.createVirtualRegister(&Hexagon::PredRegsRegClass
);
290 SDValue TPR
= DAG
.getCopyToReg(FR0
.getValue(1), dl
, PredR
,
291 FR0
.getValue(0), FR0
.getValue(2));
292 // TPR = (Chain, Glue)
293 // Don't glue this CopyFromReg, because it copies from a virtual
294 // register. If it is glued to the call, InstrEmitter will add it
295 // as an implicit def to the call (EmitMachineNode).
296 RetVal
= DAG
.getCopyFromReg(TPR
.getValue(0), dl
, PredR
, MVT::i1
);
297 Glue
= TPR
.getValue(1);
298 Chain
= TPR
.getValue(0);
300 RetVal
= DAG
.getCopyFromReg(Chain
, dl
, RVLocs
[i
].getLocReg(),
301 RVLocs
[i
].getValVT(), Glue
);
302 Glue
= RetVal
.getValue(2);
303 Chain
= RetVal
.getValue(1);
305 InVals
.push_back(RetVal
.getValue(0));
311 /// LowerCall - Functions arguments are copied from virtual regs to
312 /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
314 HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo
&CLI
,
315 SmallVectorImpl
<SDValue
> &InVals
) const {
316 SelectionDAG
&DAG
= CLI
.DAG
;
318 SmallVectorImpl
<ISD::OutputArg
> &Outs
= CLI
.Outs
;
319 SmallVectorImpl
<SDValue
> &OutVals
= CLI
.OutVals
;
320 SmallVectorImpl
<ISD::InputArg
> &Ins
= CLI
.Ins
;
321 SDValue Chain
= CLI
.Chain
;
322 SDValue Callee
= CLI
.Callee
;
323 CallingConv::ID CallConv
= CLI
.CallConv
;
324 bool IsVarArg
= CLI
.IsVarArg
;
325 bool DoesNotReturn
= CLI
.DoesNotReturn
;
327 bool IsStructRet
= Outs
.empty() ? false : Outs
[0].Flags
.isSRet();
328 MachineFunction
&MF
= DAG
.getMachineFunction();
329 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
330 auto PtrVT
= getPointerTy(MF
.getDataLayout());
332 unsigned NumParams
= CLI
.CS
.getInstruction()
333 ? CLI
.CS
.getFunctionType()->getNumParams()
335 if (GlobalAddressSDNode
*GAN
= dyn_cast
<GlobalAddressSDNode
>(Callee
))
336 Callee
= DAG
.getTargetGlobalAddress(GAN
->getGlobal(), dl
, MVT::i32
);
338 // Analyze operands of the call, assigning locations to each operand.
339 SmallVector
<CCValAssign
, 16> ArgLocs
;
340 HexagonCCState
CCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext(),
343 if (Subtarget
.useHVXOps())
344 CCInfo
.AnalyzeCallOperands(Outs
, CC_Hexagon_HVX
);
346 CCInfo
.AnalyzeCallOperands(Outs
, CC_Hexagon
);
348 auto Attr
= MF
.getFunction().getFnAttribute("disable-tail-calls");
349 if (Attr
.getValueAsString() == "true")
350 CLI
.IsTailCall
= false;
352 if (CLI
.IsTailCall
) {
353 bool StructAttrFlag
= MF
.getFunction().hasStructRetAttr();
354 CLI
.IsTailCall
= IsEligibleForTailCallOptimization(Callee
, CallConv
,
355 IsVarArg
, IsStructRet
, StructAttrFlag
, Outs
,
357 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
358 CCValAssign
&VA
= ArgLocs
[i
];
360 CLI
.IsTailCall
= false;
364 LLVM_DEBUG(dbgs() << (CLI
.IsTailCall
? "Eligible for Tail Call\n"
365 : "Argument must be passed on stack. "
366 "Not eligible for Tail Call\n"));
368 // Get a count of how many bytes are to be pushed on the stack.
369 unsigned NumBytes
= CCInfo
.getNextStackOffset();
370 SmallVector
<std::pair
<unsigned, SDValue
>, 16> RegsToPass
;
371 SmallVector
<SDValue
, 8> MemOpChains
;
373 const HexagonRegisterInfo
&HRI
= *Subtarget
.getRegisterInfo();
375 DAG
.getCopyFromReg(Chain
, dl
, HRI
.getStackRegister(), PtrVT
);
377 bool NeedsArgAlign
= false;
378 unsigned LargestAlignSeen
= 0;
379 // Walk the register/memloc assignments, inserting copies/loads.
380 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
381 CCValAssign
&VA
= ArgLocs
[i
];
382 SDValue Arg
= OutVals
[i
];
383 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
384 // Record if we need > 8 byte alignment on an argument.
385 bool ArgAlign
= Subtarget
.isHVXVectorType(VA
.getValVT());
386 NeedsArgAlign
|= ArgAlign
;
388 // Promote the value if needed.
389 switch (VA
.getLocInfo()) {
391 // Loc info must be one of Full, BCvt, SExt, ZExt, or AExt.
392 llvm_unreachable("Unknown loc info!");
393 case CCValAssign::Full
:
395 case CCValAssign::BCvt
:
396 Arg
= DAG
.getBitcast(VA
.getLocVT(), Arg
);
398 case CCValAssign::SExt
:
399 Arg
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, VA
.getLocVT(), Arg
);
401 case CCValAssign::ZExt
:
402 Arg
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VA
.getLocVT(), Arg
);
404 case CCValAssign::AExt
:
405 Arg
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, VA
.getLocVT(), Arg
);
410 unsigned LocMemOffset
= VA
.getLocMemOffset();
411 SDValue MemAddr
= DAG
.getConstant(LocMemOffset
, dl
,
412 StackPtr
.getValueType());
413 MemAddr
= DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, StackPtr
, MemAddr
);
415 LargestAlignSeen
= std::max(LargestAlignSeen
,
416 VA
.getLocVT().getStoreSizeInBits() >> 3);
417 if (Flags
.isByVal()) {
418 // The argument is a struct passed by value. According to LLVM, "Arg"
420 MemOpChains
.push_back(CreateCopyOfByValArgument(Arg
, MemAddr
, Chain
,
423 MachinePointerInfo LocPI
= MachinePointerInfo::getStack(
424 DAG
.getMachineFunction(), LocMemOffset
);
425 SDValue S
= DAG
.getStore(Chain
, dl
, Arg
, MemAddr
, LocPI
);
426 MemOpChains
.push_back(S
);
431 // Arguments that can be passed on register must be kept at RegsToPass
434 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), Arg
));
437 if (NeedsArgAlign
&& Subtarget
.hasV60Ops()) {
438 LLVM_DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
439 unsigned VecAlign
= HRI
.getSpillAlignment(Hexagon::HvxVRRegClass
);
440 LargestAlignSeen
= std::max(LargestAlignSeen
, VecAlign
);
441 MFI
.ensureMaxAlignment(LargestAlignSeen
);
443 // Transform all store nodes into one single node because all store
444 // nodes are independent of each other.
445 if (!MemOpChains
.empty())
446 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOpChains
);
449 if (!CLI
.IsTailCall
) {
450 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, dl
);
451 Glue
= Chain
.getValue(1);
454 // Build a sequence of copy-to-reg nodes chained together with token
455 // chain and flag operands which copy the outgoing args into registers.
456 // The Glue is necessary since all emitted instructions must be
458 if (!CLI
.IsTailCall
) {
459 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
460 Chain
= DAG
.getCopyToReg(Chain
, dl
, RegsToPass
[i
].first
,
461 RegsToPass
[i
].second
, Glue
);
462 Glue
= Chain
.getValue(1);
465 // For tail calls lower the arguments to the 'real' stack slot.
467 // Force all the incoming stack arguments to be loaded from the stack
468 // before any new outgoing arguments are stored to the stack, because the
469 // outgoing stack slots may alias the incoming argument stack slots, and
470 // the alias isn't otherwise explicit. This is slightly more conservative
471 // than necessary, because it means that each store effectively depends
472 // on every argument instead of just those arguments it would clobber.
474 // Do not flag preceding copytoreg stuff together with the following stuff.
476 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
477 Chain
= DAG
.getCopyToReg(Chain
, dl
, RegsToPass
[i
].first
,
478 RegsToPass
[i
].second
, Glue
);
479 Glue
= Chain
.getValue(1);
484 bool LongCalls
= MF
.getSubtarget
<HexagonSubtarget
>().useLongCalls();
485 unsigned Flags
= LongCalls
? HexagonII::HMOTF_ConstExtended
: 0;
487 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
488 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
489 // node so that legalize doesn't hack it.
490 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
491 Callee
= DAG
.getTargetGlobalAddress(G
->getGlobal(), dl
, PtrVT
, 0, Flags
);
492 } else if (ExternalSymbolSDNode
*S
=
493 dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
494 Callee
= DAG
.getTargetExternalSymbol(S
->getSymbol(), PtrVT
, Flags
);
497 // Returns a chain & a flag for retval copy to use.
498 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
499 SmallVector
<SDValue
, 8> Ops
;
500 Ops
.push_back(Chain
);
501 Ops
.push_back(Callee
);
503 // Add argument registers to the end of the list so that they are
504 // known live into the call.
505 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
506 Ops
.push_back(DAG
.getRegister(RegsToPass
[i
].first
,
507 RegsToPass
[i
].second
.getValueType()));
510 const uint32_t *Mask
= HRI
.getCallPreservedMask(MF
, CallConv
);
511 assert(Mask
&& "Missing call preserved mask for calling convention");
512 Ops
.push_back(DAG
.getRegisterMask(Mask
));
517 if (CLI
.IsTailCall
) {
518 MFI
.setHasTailCall();
519 return DAG
.getNode(HexagonISD::TC_RETURN
, dl
, NodeTys
, Ops
);
522 // Set this here because we need to know this for "hasFP" in frame lowering.
523 // The target-independent code calls getFrameRegister before setting it, and
524 // getFrameRegister uses hasFP to determine whether the function has FP.
525 MFI
.setHasCalls(true);
527 unsigned OpCode
= DoesNotReturn
? HexagonISD::CALLnr
: HexagonISD::CALL
;
528 Chain
= DAG
.getNode(OpCode
, dl
, NodeTys
, Ops
);
529 Glue
= Chain
.getValue(1);
531 // Create the CALLSEQ_END node.
532 Chain
= DAG
.getCALLSEQ_END(Chain
, DAG
.getIntPtrConstant(NumBytes
, dl
, true),
533 DAG
.getIntPtrConstant(0, dl
, true), Glue
, dl
);
534 Glue
= Chain
.getValue(1);
536 // Handle result values, copying them out of physregs into vregs that we
538 return LowerCallResult(Chain
, Glue
, CallConv
, IsVarArg
, Ins
, dl
, DAG
,
539 InVals
, OutVals
, Callee
);
542 /// Returns true by value, base pointer and offset pointer and addressing
543 /// mode by reference if this node can be combined with a load / store to
544 /// form a post-indexed load / store.
545 bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode
*N
, SDNode
*Op
,
546 SDValue
&Base
, SDValue
&Offset
, ISD::MemIndexedMode
&AM
,
547 SelectionDAG
&DAG
) const {
548 LSBaseSDNode
*LSN
= dyn_cast
<LSBaseSDNode
>(N
);
551 EVT VT
= LSN
->getMemoryVT();
554 bool IsLegalType
= VT
== MVT::i8
|| VT
== MVT::i16
|| VT
== MVT::i32
||
555 VT
== MVT::i64
|| VT
== MVT::f32
|| VT
== MVT::f64
||
556 VT
== MVT::v2i16
|| VT
== MVT::v2i32
|| VT
== MVT::v4i8
||
557 VT
== MVT::v4i16
|| VT
== MVT::v8i8
||
558 Subtarget
.isHVXVectorType(VT
.getSimpleVT());
562 if (Op
->getOpcode() != ISD::ADD
)
564 Base
= Op
->getOperand(0);
565 Offset
= Op
->getOperand(1);
566 if (!isa
<ConstantSDNode
>(Offset
.getNode()))
570 int32_t V
= cast
<ConstantSDNode
>(Offset
.getNode())->getSExtValue();
571 return Subtarget
.getInstrInfo()->isValidAutoIncImm(VT
, V
);
575 HexagonTargetLowering::LowerINLINEASM(SDValue Op
, SelectionDAG
&DAG
) const {
576 MachineFunction
&MF
= DAG
.getMachineFunction();
577 auto &HMFI
= *MF
.getInfo
<HexagonMachineFunctionInfo
>();
578 const HexagonRegisterInfo
&HRI
= *Subtarget
.getRegisterInfo();
579 unsigned LR
= HRI
.getRARegister();
581 if ((Op
.getOpcode() != ISD::INLINEASM
&&
582 Op
.getOpcode() != ISD::INLINEASM_BR
) || HMFI
.hasClobberLR())
585 unsigned NumOps
= Op
.getNumOperands();
586 if (Op
.getOperand(NumOps
-1).getValueType() == MVT::Glue
)
587 --NumOps
; // Ignore the flag operand.
589 for (unsigned i
= InlineAsm::Op_FirstOperand
; i
!= NumOps
;) {
590 unsigned Flags
= cast
<ConstantSDNode
>(Op
.getOperand(i
))->getZExtValue();
591 unsigned NumVals
= InlineAsm::getNumOperandRegisters(Flags
);
592 ++i
; // Skip the ID value.
594 switch (InlineAsm::getKind(Flags
)) {
596 llvm_unreachable("Bad flags!");
597 case InlineAsm::Kind_RegUse
:
598 case InlineAsm::Kind_Imm
:
599 case InlineAsm::Kind_Mem
:
602 case InlineAsm::Kind_Clobber
:
603 case InlineAsm::Kind_RegDef
:
604 case InlineAsm::Kind_RegDefEarlyClobber
: {
605 for (; NumVals
; --NumVals
, ++i
) {
606 unsigned Reg
= cast
<RegisterSDNode
>(Op
.getOperand(i
))->getReg();
609 HMFI
.setHasClobberLR(true);
620 // Need to transform ISD::PREFETCH into something that doesn't inherit
621 // all of the properties of ISD::PREFETCH, specifically SDNPMayLoad and
623 SDValue
HexagonTargetLowering::LowerPREFETCH(SDValue Op
,
624 SelectionDAG
&DAG
) const {
625 SDValue Chain
= Op
.getOperand(0);
626 SDValue Addr
= Op
.getOperand(1);
627 // Lower it to DCFETCH($reg, #0). A "pat" will try to merge the offset in,
628 // if the "reg" is fed by an "add".
630 SDValue Zero
= DAG
.getConstant(0, DL
, MVT::i32
);
631 return DAG
.getNode(HexagonISD::DCFETCH
, DL
, MVT::Other
, Chain
, Addr
, Zero
);
634 // Custom-handle ISD::READCYCLECOUNTER because the target-independent SDNode
635 // is marked as having side-effects, while the register read on Hexagon does
636 // not have any. TableGen refuses to accept the direct pattern from that node
638 SDValue
HexagonTargetLowering::LowerREADCYCLECOUNTER(SDValue Op
,
639 SelectionDAG
&DAG
) const {
640 SDValue Chain
= Op
.getOperand(0);
642 SDVTList VTs
= DAG
.getVTList(MVT::i32
, MVT::Other
);
643 return DAG
.getNode(HexagonISD::READCYCLE
, dl
, VTs
, Chain
);
646 SDValue
HexagonTargetLowering::LowerINTRINSIC_VOID(SDValue Op
,
647 SelectionDAG
&DAG
) const {
648 SDValue Chain
= Op
.getOperand(0);
649 unsigned IntNo
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
650 // Lower the hexagon_prefetch builtin to DCFETCH, as above.
651 if (IntNo
== Intrinsic::hexagon_prefetch
) {
652 SDValue Addr
= Op
.getOperand(2);
654 SDValue Zero
= DAG
.getConstant(0, DL
, MVT::i32
);
655 return DAG
.getNode(HexagonISD::DCFETCH
, DL
, MVT::Other
, Chain
, Addr
, Zero
);
661 HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op
,
662 SelectionDAG
&DAG
) const {
663 SDValue Chain
= Op
.getOperand(0);
664 SDValue Size
= Op
.getOperand(1);
665 SDValue Align
= Op
.getOperand(2);
668 ConstantSDNode
*AlignConst
= dyn_cast
<ConstantSDNode
>(Align
);
669 assert(AlignConst
&& "Non-constant Align in LowerDYNAMIC_STACKALLOC");
671 unsigned A
= AlignConst
->getSExtValue();
672 auto &HFI
= *Subtarget
.getFrameLowering();
673 // "Zero" means natural stack alignment.
675 A
= HFI
.getStackAlignment();
678 dbgs () << __func__
<< " Align: " << A
<< " Size: ";
679 Size
.getNode()->dump(&DAG
);
683 SDValue AC
= DAG
.getConstant(A
, dl
, MVT::i32
);
684 SDVTList VTs
= DAG
.getVTList(MVT::i32
, MVT::Other
);
685 SDValue AA
= DAG
.getNode(HexagonISD::ALLOCA
, dl
, VTs
, Chain
, Size
, AC
);
687 DAG
.ReplaceAllUsesOfValueWith(Op
, AA
);
691 SDValue
HexagonTargetLowering::LowerFormalArguments(
692 SDValue Chain
, CallingConv::ID CallConv
, bool IsVarArg
,
693 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
694 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
695 MachineFunction
&MF
= DAG
.getMachineFunction();
696 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
697 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
699 // Assign locations to all of the incoming arguments.
700 SmallVector
<CCValAssign
, 16> ArgLocs
;
701 HexagonCCState
CCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext(),
702 MF
.getFunction().getFunctionType()->getNumParams());
704 if (Subtarget
.useHVXOps())
705 CCInfo
.AnalyzeFormalArguments(Ins
, CC_Hexagon_HVX
);
707 CCInfo
.AnalyzeFormalArguments(Ins
, CC_Hexagon
);
709 // For LLVM, in the case when returning a struct by value (>8byte),
710 // the first argument is a pointer that points to the location on caller's
711 // stack where the return value will be stored. For Hexagon, the location on
712 // caller's stack is passed only when the struct size is smaller than (and
713 // equal to) 8 bytes. If not, no address will be passed into callee and
714 // callee return the result direclty through R0/R1.
716 auto &HMFI
= *MF
.getInfo
<HexagonMachineFunctionInfo
>();
718 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
719 CCValAssign
&VA
= ArgLocs
[i
];
720 ISD::ArgFlagsTy Flags
= Ins
[i
].Flags
;
721 bool ByVal
= Flags
.isByVal();
723 // Arguments passed in registers:
724 // 1. 32- and 64-bit values and HVX vectors are passed directly,
725 // 2. Large structs are passed via an address, and the address is
726 // passed in a register.
727 if (VA
.isRegLoc() && ByVal
&& Flags
.getByValSize() <= 8)
728 llvm_unreachable("ByValSize must be bigger than 8 bytes");
730 bool InReg
= VA
.isRegLoc() &&
731 (!ByVal
|| (ByVal
&& Flags
.getByValSize() > 8));
734 MVT RegVT
= VA
.getLocVT();
735 if (VA
.getLocInfo() == CCValAssign::BCvt
)
736 RegVT
= VA
.getValVT();
738 const TargetRegisterClass
*RC
= getRegClassFor(RegVT
);
739 unsigned VReg
= MRI
.createVirtualRegister(RC
);
740 SDValue Copy
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, RegVT
);
742 // Treat values of type MVT::i1 specially: they are passed in
743 // registers of type i32, but they need to remain as values of
744 // type i1 for consistency of the argument lowering.
745 if (VA
.getValVT() == MVT::i1
) {
746 assert(RegVT
.getSizeInBits() <= 32);
747 SDValue T
= DAG
.getNode(ISD::AND
, dl
, RegVT
,
748 Copy
, DAG
.getConstant(1, dl
, RegVT
));
749 Copy
= DAG
.getSetCC(dl
, MVT::i1
, T
, DAG
.getConstant(0, dl
, RegVT
),
753 unsigned RegSize
= RegVT
.getSizeInBits();
754 assert(RegSize
== 32 || RegSize
== 64 ||
755 Subtarget
.isHVXVectorType(RegVT
));
758 InVals
.push_back(Copy
);
759 MRI
.addLiveIn(VA
.getLocReg(), VReg
);
761 assert(VA
.isMemLoc() && "Argument should be passed in memory");
763 // If it's a byval parameter, then we need to compute the
764 // "real" size, not the size of the pointer.
765 unsigned ObjSize
= Flags
.isByVal()
766 ? Flags
.getByValSize()
767 : VA
.getLocVT().getStoreSizeInBits() / 8;
769 // Create the frame index object for this incoming parameter.
770 int Offset
= HEXAGON_LRFP_SIZE
+ VA
.getLocMemOffset();
771 int FI
= MFI
.CreateFixedObject(ObjSize
, Offset
, true);
772 SDValue FIN
= DAG
.getFrameIndex(FI
, MVT::i32
);
774 if (Flags
.isByVal()) {
775 // If it's a pass-by-value aggregate, then do not dereference the stack
776 // location. Instead, we should generate a reference to the stack
778 InVals
.push_back(FIN
);
780 SDValue L
= DAG
.getLoad(VA
.getValVT(), dl
, Chain
, FIN
,
781 MachinePointerInfo::getFixedStack(MF
, FI
, 0));
789 // This will point to the next argument passed via stack.
790 int Offset
= HEXAGON_LRFP_SIZE
+ CCInfo
.getNextStackOffset();
791 int FI
= MFI
.CreateFixedObject(Hexagon_PointerSize
, Offset
, true);
792 HMFI
.setVarArgsFrameIndex(FI
);
799 HexagonTargetLowering::LowerVASTART(SDValue Op
, SelectionDAG
&DAG
) const {
800 // VASTART stores the address of the VarArgsFrameIndex slot into the
801 // memory location argument.
802 MachineFunction
&MF
= DAG
.getMachineFunction();
803 HexagonMachineFunctionInfo
*QFI
= MF
.getInfo
<HexagonMachineFunctionInfo
>();
804 SDValue Addr
= DAG
.getFrameIndex(QFI
->getVarArgsFrameIndex(), MVT::i32
);
805 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
806 return DAG
.getStore(Op
.getOperand(0), SDLoc(Op
), Addr
, Op
.getOperand(1),
807 MachinePointerInfo(SV
));
810 SDValue
HexagonTargetLowering::LowerSETCC(SDValue Op
, SelectionDAG
&DAG
) const {
812 SDValue LHS
= Op
.getOperand(0);
813 SDValue RHS
= Op
.getOperand(1);
814 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(2))->get();
818 if (OpTy
== MVT::v2i16
|| OpTy
== MVT::v4i8
) {
819 MVT ElemTy
= OpTy
.getVectorElementType();
820 assert(ElemTy
.isScalarInteger());
821 MVT WideTy
= MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy
.getSizeInBits()),
822 OpTy
.getVectorNumElements());
823 return DAG
.getSetCC(dl
, ResTy
,
824 DAG
.getSExtOrTrunc(LHS
, SDLoc(LHS
), WideTy
),
825 DAG
.getSExtOrTrunc(RHS
, SDLoc(RHS
), WideTy
), CC
);
828 // Treat all other vector types as legal.
829 if (ResTy
.isVector())
832 // Comparisons of short integers should use sign-extend, not zero-extend,
833 // since we can represent small negative values in the compare instructions.
834 // The LLVM default is to use zero-extend arbitrarily in these cases.
835 auto isSExtFree
= [this](SDValue N
) {
836 switch (N
.getOpcode()) {
837 case ISD::TRUNCATE
: {
838 // A sign-extend of a truncate of a sign-extend is free.
839 SDValue Op
= N
.getOperand(0);
840 if (Op
.getOpcode() != ISD::AssertSext
)
842 EVT OrigTy
= cast
<VTSDNode
>(Op
.getOperand(1))->getVT();
843 unsigned ThisBW
= ty(N
).getSizeInBits();
844 unsigned OrigBW
= OrigTy
.getSizeInBits();
845 // The type that was sign-extended to get the AssertSext must be
846 // narrower than the type of N (so that N has still the same value
848 return ThisBW
>= OrigBW
;
851 // We have sign-extended loads.
857 if (OpTy
== MVT::i8
|| OpTy
== MVT::i16
) {
858 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(RHS
);
859 bool IsNegative
= C
&& C
->getAPIntValue().isNegative();
860 if (IsNegative
|| isSExtFree(LHS
) || isSExtFree(RHS
))
861 return DAG
.getSetCC(dl
, ResTy
,
862 DAG
.getSExtOrTrunc(LHS
, SDLoc(LHS
), MVT::i32
),
863 DAG
.getSExtOrTrunc(RHS
, SDLoc(RHS
), MVT::i32
), CC
);
870 HexagonTargetLowering::LowerVSELECT(SDValue Op
, SelectionDAG
&DAG
) const {
871 SDValue PredOp
= Op
.getOperand(0);
872 SDValue Op1
= Op
.getOperand(1), Op2
= Op
.getOperand(2);
873 EVT OpVT
= Op1
.getValueType();
876 if (OpVT
== MVT::v2i16
) {
877 SDValue X1
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::v2i32
, Op1
);
878 SDValue X2
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::v2i32
, Op2
);
879 SDValue SL
= DAG
.getNode(ISD::VSELECT
, DL
, MVT::v2i32
, PredOp
, X1
, X2
);
880 SDValue TR
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::v2i16
, SL
);
887 static Constant
*convert_i1_to_i8(const Constant
*ConstVal
) {
888 SmallVector
<Constant
*, 128> NewConst
;
889 const ConstantVector
*CV
= dyn_cast
<ConstantVector
>(ConstVal
);
893 LLVMContext
&Ctx
= ConstVal
->getContext();
894 IRBuilder
<> IRB(Ctx
);
895 unsigned NumVectorElements
= CV
->getNumOperands();
896 assert(isPowerOf2_32(NumVectorElements
) &&
897 "conversion only supported for pow2 VectorSize!");
899 for (unsigned i
= 0; i
< NumVectorElements
/ 8; ++i
) {
901 for (unsigned j
= 0; j
< 8; ++j
) {
902 uint8_t y
= CV
->getOperand(i
* 8 + j
)->getUniqueInteger().getZExtValue();
905 assert((x
== 0 || x
== 255) && "Either all 0's or all 1's expected!");
906 NewConst
.push_back(IRB
.getInt8(x
));
908 return ConstantVector::get(NewConst
);
912 HexagonTargetLowering::LowerConstantPool(SDValue Op
, SelectionDAG
&DAG
) const {
913 EVT ValTy
= Op
.getValueType();
914 ConstantPoolSDNode
*CPN
= cast
<ConstantPoolSDNode
>(Op
);
915 Constant
*CVal
= nullptr;
916 bool isVTi1Type
= false;
917 if (const Constant
*ConstVal
= dyn_cast
<Constant
>(CPN
->getConstVal())) {
918 Type
*CValTy
= ConstVal
->getType();
919 if (CValTy
->isVectorTy() &&
920 CValTy
->getVectorElementType()->isIntegerTy(1)) {
921 CVal
= convert_i1_to_i8(ConstVal
);
922 isVTi1Type
= (CVal
!= nullptr);
925 unsigned Align
= CPN
->getAlignment();
926 bool IsPositionIndependent
= isPositionIndependent();
927 unsigned char TF
= IsPositionIndependent
? HexagonII::MO_PCREL
: 0;
931 if (CPN
->isMachineConstantPoolEntry())
932 T
= DAG
.getTargetConstantPool(CPN
->getMachineCPVal(), ValTy
, Align
, Offset
,
935 T
= DAG
.getTargetConstantPool(CVal
, ValTy
, Align
, Offset
, TF
);
937 T
= DAG
.getTargetConstantPool(CPN
->getConstVal(), ValTy
, Align
, Offset
, TF
);
939 assert(cast
<ConstantPoolSDNode
>(T
)->getTargetFlags() == TF
&&
940 "Inconsistent target flag encountered");
942 if (IsPositionIndependent
)
943 return DAG
.getNode(HexagonISD::AT_PCREL
, SDLoc(Op
), ValTy
, T
);
944 return DAG
.getNode(HexagonISD::CP
, SDLoc(Op
), ValTy
, T
);
948 HexagonTargetLowering::LowerJumpTable(SDValue Op
, SelectionDAG
&DAG
) const {
949 EVT VT
= Op
.getValueType();
950 int Idx
= cast
<JumpTableSDNode
>(Op
)->getIndex();
951 if (isPositionIndependent()) {
952 SDValue T
= DAG
.getTargetJumpTable(Idx
, VT
, HexagonII::MO_PCREL
);
953 return DAG
.getNode(HexagonISD::AT_PCREL
, SDLoc(Op
), VT
, T
);
956 SDValue T
= DAG
.getTargetJumpTable(Idx
, VT
);
957 return DAG
.getNode(HexagonISD::JT
, SDLoc(Op
), VT
, T
);
961 HexagonTargetLowering::LowerRETURNADDR(SDValue Op
, SelectionDAG
&DAG
) const {
962 const HexagonRegisterInfo
&HRI
= *Subtarget
.getRegisterInfo();
963 MachineFunction
&MF
= DAG
.getMachineFunction();
964 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
965 MFI
.setReturnAddressIsTaken(true);
967 if (verifyReturnAddressArgumentIsConstant(Op
, DAG
))
970 EVT VT
= Op
.getValueType();
972 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
974 SDValue FrameAddr
= LowerFRAMEADDR(Op
, DAG
);
975 SDValue Offset
= DAG
.getConstant(4, dl
, MVT::i32
);
976 return DAG
.getLoad(VT
, dl
, DAG
.getEntryNode(),
977 DAG
.getNode(ISD::ADD
, dl
, VT
, FrameAddr
, Offset
),
978 MachinePointerInfo());
981 // Return LR, which contains the return address. Mark it an implicit live-in.
982 unsigned Reg
= MF
.addLiveIn(HRI
.getRARegister(), getRegClassFor(MVT::i32
));
983 return DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
, Reg
, VT
);
987 HexagonTargetLowering::LowerFRAMEADDR(SDValue Op
, SelectionDAG
&DAG
) const {
988 const HexagonRegisterInfo
&HRI
= *Subtarget
.getRegisterInfo();
989 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
990 MFI
.setFrameAddressIsTaken(true);
992 EVT VT
= Op
.getValueType();
994 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
995 SDValue FrameAddr
= DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
,
996 HRI
.getFrameRegister(), VT
);
998 FrameAddr
= DAG
.getLoad(VT
, dl
, DAG
.getEntryNode(), FrameAddr
,
999 MachinePointerInfo());
1004 HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op
, SelectionDAG
& DAG
) const {
1006 return DAG
.getNode(HexagonISD::BARRIER
, dl
, MVT::Other
, Op
.getOperand(0));
1010 HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op
, SelectionDAG
&DAG
) const {
1012 auto *GAN
= cast
<GlobalAddressSDNode
>(Op
);
1013 auto PtrVT
= getPointerTy(DAG
.getDataLayout());
1014 auto *GV
= GAN
->getGlobal();
1015 int64_t Offset
= GAN
->getOffset();
1017 auto &HLOF
= *HTM
.getObjFileLowering();
1018 Reloc::Model RM
= HTM
.getRelocationModel();
1020 if (RM
== Reloc::Static
) {
1021 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, Offset
);
1022 const GlobalObject
*GO
= GV
->getBaseObject();
1023 if (GO
&& Subtarget
.useSmallData() && HLOF
.isGlobalInSmallSection(GO
, HTM
))
1024 return DAG
.getNode(HexagonISD::CONST32_GP
, dl
, PtrVT
, GA
);
1025 return DAG
.getNode(HexagonISD::CONST32
, dl
, PtrVT
, GA
);
1028 bool UsePCRel
= getTargetMachine().shouldAssumeDSOLocal(*GV
->getParent(), GV
);
1030 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, Offset
,
1031 HexagonII::MO_PCREL
);
1032 return DAG
.getNode(HexagonISD::AT_PCREL
, dl
, PtrVT
, GA
);
1036 SDValue GOT
= DAG
.getGLOBAL_OFFSET_TABLE(PtrVT
);
1037 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0, HexagonII::MO_GOT
);
1038 SDValue Off
= DAG
.getConstant(Offset
, dl
, MVT::i32
);
1039 return DAG
.getNode(HexagonISD::AT_GOT
, dl
, PtrVT
, GOT
, GA
, Off
);
1042 // Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
1044 HexagonTargetLowering::LowerBlockAddress(SDValue Op
, SelectionDAG
&DAG
) const {
1045 const BlockAddress
*BA
= cast
<BlockAddressSDNode
>(Op
)->getBlockAddress();
1047 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
1049 Reloc::Model RM
= HTM
.getRelocationModel();
1050 if (RM
== Reloc::Static
) {
1051 SDValue A
= DAG
.getTargetBlockAddress(BA
, PtrVT
);
1052 return DAG
.getNode(HexagonISD::CONST32_GP
, dl
, PtrVT
, A
);
1055 SDValue A
= DAG
.getTargetBlockAddress(BA
, PtrVT
, 0, HexagonII::MO_PCREL
);
1056 return DAG
.getNode(HexagonISD::AT_PCREL
, dl
, PtrVT
, A
);
1060 HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op
, SelectionDAG
&DAG
)
1062 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
1063 SDValue GOTSym
= DAG
.getTargetExternalSymbol(HEXAGON_GOT_SYM_NAME
, PtrVT
,
1064 HexagonII::MO_PCREL
);
1065 return DAG
.getNode(HexagonISD::AT_PCREL
, SDLoc(Op
), PtrVT
, GOTSym
);
1069 HexagonTargetLowering::GetDynamicTLSAddr(SelectionDAG
&DAG
, SDValue Chain
,
1070 GlobalAddressSDNode
*GA
, SDValue Glue
, EVT PtrVT
, unsigned ReturnReg
,
1071 unsigned char OperandFlags
) const {
1072 MachineFunction
&MF
= DAG
.getMachineFunction();
1073 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1074 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
1076 SDValue TGA
= DAG
.getTargetGlobalAddress(GA
->getGlobal(), dl
,
1077 GA
->getValueType(0),
1080 // Create Operands for the call.The Operands should have the following:
1082 // 2. Callee which in this case is the Global address value.
1083 // 3. Registers live into the call.In this case its R0, as we
1084 // have just one argument to be passed.
1086 // Note: The order is important.
1088 const auto &HRI
= *Subtarget
.getRegisterInfo();
1089 const uint32_t *Mask
= HRI
.getCallPreservedMask(MF
, CallingConv::C
);
1090 assert(Mask
&& "Missing call preserved mask for calling convention");
1091 SDValue Ops
[] = { Chain
, TGA
, DAG
.getRegister(Hexagon::R0
, PtrVT
),
1092 DAG
.getRegisterMask(Mask
), Glue
};
1093 Chain
= DAG
.getNode(HexagonISD::CALL
, dl
, NodeTys
, Ops
);
1095 // Inform MFI that function has calls.
1096 MFI
.setAdjustsStack(true);
1098 Glue
= Chain
.getValue(1);
1099 return DAG
.getCopyFromReg(Chain
, dl
, ReturnReg
, PtrVT
, Glue
);
1103 // Lower using the intial executable model for TLS addresses
1106 HexagonTargetLowering::LowerToTLSInitialExecModel(GlobalAddressSDNode
*GA
,
1107 SelectionDAG
&DAG
) const {
1109 int64_t Offset
= GA
->getOffset();
1110 auto PtrVT
= getPointerTy(DAG
.getDataLayout());
1112 // Get the thread pointer.
1113 SDValue TP
= DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
, Hexagon::UGP
, PtrVT
);
1115 bool IsPositionIndependent
= isPositionIndependent();
1117 IsPositionIndependent
? HexagonII::MO_IEGOT
: HexagonII::MO_IE
;
1119 // First generate the TLS symbol address
1120 SDValue TGA
= DAG
.getTargetGlobalAddress(GA
->getGlobal(), dl
, PtrVT
,
1123 SDValue Sym
= DAG
.getNode(HexagonISD::CONST32
, dl
, PtrVT
, TGA
);
1125 if (IsPositionIndependent
) {
1126 // Generate the GOT pointer in case of position independent code
1127 SDValue GOT
= LowerGLOBAL_OFFSET_TABLE(Sym
, DAG
);
1129 // Add the TLS Symbol address to GOT pointer.This gives
1130 // GOT relative relocation for the symbol.
1131 Sym
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, GOT
, Sym
);
1134 // Load the offset value for TLS symbol.This offset is relative to
1136 SDValue LoadOffset
=
1137 DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(), Sym
, MachinePointerInfo());
1139 // Address of the thread local variable is the add of thread
1140 // pointer and the offset of the variable.
1141 return DAG
.getNode(ISD::ADD
, dl
, PtrVT
, TP
, LoadOffset
);
1145 // Lower using the local executable model for TLS addresses
1148 HexagonTargetLowering::LowerToTLSLocalExecModel(GlobalAddressSDNode
*GA
,
1149 SelectionDAG
&DAG
) const {
1151 int64_t Offset
= GA
->getOffset();
1152 auto PtrVT
= getPointerTy(DAG
.getDataLayout());
1154 // Get the thread pointer.
1155 SDValue TP
= DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
, Hexagon::UGP
, PtrVT
);
1156 // Generate the TLS symbol address
1157 SDValue TGA
= DAG
.getTargetGlobalAddress(GA
->getGlobal(), dl
, PtrVT
, Offset
,
1158 HexagonII::MO_TPREL
);
1159 SDValue Sym
= DAG
.getNode(HexagonISD::CONST32
, dl
, PtrVT
, TGA
);
1161 // Address of the thread local variable is the add of thread
1162 // pointer and the offset of the variable.
1163 return DAG
.getNode(ISD::ADD
, dl
, PtrVT
, TP
, Sym
);
1167 // Lower using the general dynamic model for TLS addresses
1170 HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode
*GA
,
1171 SelectionDAG
&DAG
) const {
1173 int64_t Offset
= GA
->getOffset();
1174 auto PtrVT
= getPointerTy(DAG
.getDataLayout());
1176 // First generate the TLS symbol address
1177 SDValue TGA
= DAG
.getTargetGlobalAddress(GA
->getGlobal(), dl
, PtrVT
, Offset
,
1178 HexagonII::MO_GDGOT
);
1180 // Then, generate the GOT pointer
1181 SDValue GOT
= LowerGLOBAL_OFFSET_TABLE(TGA
, DAG
);
1183 // Add the TLS symbol and the GOT pointer
1184 SDValue Sym
= DAG
.getNode(HexagonISD::CONST32
, dl
, PtrVT
, TGA
);
1185 SDValue Chain
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, GOT
, Sym
);
1187 // Copy over the argument to R0
1189 Chain
= DAG
.getCopyToReg(DAG
.getEntryNode(), dl
, Hexagon::R0
, Chain
, InFlag
);
1190 InFlag
= Chain
.getValue(1);
1193 static_cast<const HexagonSubtarget
&>(DAG
.getSubtarget()).useLongCalls()
1194 ? HexagonII::MO_GDPLT
| HexagonII::HMOTF_ConstExtended
1195 : HexagonII::MO_GDPLT
;
1197 return GetDynamicTLSAddr(DAG
, Chain
, GA
, InFlag
, PtrVT
,
1198 Hexagon::R0
, Flags
);
1202 // Lower TLS addresses.
1204 // For now for dynamic models, we only support the general dynamic model.
1207 HexagonTargetLowering::LowerGlobalTLSAddress(SDValue Op
,
1208 SelectionDAG
&DAG
) const {
1209 GlobalAddressSDNode
*GA
= cast
<GlobalAddressSDNode
>(Op
);
1211 switch (HTM
.getTLSModel(GA
->getGlobal())) {
1212 case TLSModel::GeneralDynamic
:
1213 case TLSModel::LocalDynamic
:
1214 return LowerToTLSGeneralDynamicModel(GA
, DAG
);
1215 case TLSModel::InitialExec
:
1216 return LowerToTLSInitialExecModel(GA
, DAG
);
1217 case TLSModel::LocalExec
:
1218 return LowerToTLSLocalExecModel(GA
, DAG
);
1220 llvm_unreachable("Bogus TLS model");
1223 //===----------------------------------------------------------------------===//
1224 // TargetLowering Implementation
1225 //===----------------------------------------------------------------------===//
1227 HexagonTargetLowering::HexagonTargetLowering(const TargetMachine
&TM
,
1228 const HexagonSubtarget
&ST
)
1229 : TargetLowering(TM
), HTM(static_cast<const HexagonTargetMachine
&>(TM
)),
1231 auto &HRI
= *Subtarget
.getRegisterInfo();
1233 setPrefLoopAlignment(4);
1234 setPrefFunctionAlignment(4);
1235 setMinFunctionAlignment(2);
1236 setStackPointerRegisterToSaveRestore(HRI
.getStackRegister());
1237 setBooleanContents(TargetLoweringBase::UndefinedBooleanContent
);
1238 setBooleanVectorContents(TargetLoweringBase::UndefinedBooleanContent
);
1240 setMaxAtomicSizeInBitsSupported(64);
1241 setMinCmpXchgSizeInBits(32);
1243 if (EnableHexSDNodeSched
)
1244 setSchedulingPreference(Sched::VLIW
);
1246 setSchedulingPreference(Sched::Source
);
1248 // Limits for inline expansion of memcpy/memmove
1249 MaxStoresPerMemcpy
= MaxStoresPerMemcpyCL
;
1250 MaxStoresPerMemcpyOptSize
= MaxStoresPerMemcpyOptSizeCL
;
1251 MaxStoresPerMemmove
= MaxStoresPerMemmoveCL
;
1252 MaxStoresPerMemmoveOptSize
= MaxStoresPerMemmoveOptSizeCL
;
1253 MaxStoresPerMemset
= MaxStoresPerMemsetCL
;
1254 MaxStoresPerMemsetOptSize
= MaxStoresPerMemsetOptSizeCL
;
1257 // Set up register classes.
1260 addRegisterClass(MVT::i1
, &Hexagon::PredRegsRegClass
);
1261 addRegisterClass(MVT::v2i1
, &Hexagon::PredRegsRegClass
); // bbbbaaaa
1262 addRegisterClass(MVT::v4i1
, &Hexagon::PredRegsRegClass
); // ddccbbaa
1263 addRegisterClass(MVT::v8i1
, &Hexagon::PredRegsRegClass
); // hgfedcba
1264 addRegisterClass(MVT::i32
, &Hexagon::IntRegsRegClass
);
1265 addRegisterClass(MVT::v2i16
, &Hexagon::IntRegsRegClass
);
1266 addRegisterClass(MVT::v4i8
, &Hexagon::IntRegsRegClass
);
1267 addRegisterClass(MVT::i64
, &Hexagon::DoubleRegsRegClass
);
1268 addRegisterClass(MVT::v8i8
, &Hexagon::DoubleRegsRegClass
);
1269 addRegisterClass(MVT::v4i16
, &Hexagon::DoubleRegsRegClass
);
1270 addRegisterClass(MVT::v2i32
, &Hexagon::DoubleRegsRegClass
);
1272 addRegisterClass(MVT::f32
, &Hexagon::IntRegsRegClass
);
1273 addRegisterClass(MVT::f64
, &Hexagon::DoubleRegsRegClass
);
1276 // Handling of scalar operations.
1278 // All operations default to "legal", except:
1279 // - indexed loads and stores (pre-/post-incremented),
1280 // - ANY_EXTEND_VECTOR_INREG, ATOMIC_CMP_SWAP_WITH_SUCCESS, CONCAT_VECTORS,
1281 // ConstantFP, DEBUGTRAP, FCEIL, FCOPYSIGN, FEXP, FEXP2, FFLOOR, FGETSIGN,
1282 // FLOG, FLOG2, FLOG10, FMAXNUM, FMINNUM, FNEARBYINT, FRINT, FROUND, TRAP,
1283 // FTRUNC, PREFETCH, SIGN_EXTEND_VECTOR_INREG, ZERO_EXTEND_VECTOR_INREG,
1284 // which default to "expand" for at least one type.
1287 setOperationAction(ISD::ConstantFP
, MVT::f32
, Legal
);
1288 setOperationAction(ISD::ConstantFP
, MVT::f64
, Legal
);
1289 setOperationAction(ISD::TRAP
, MVT::Other
, Legal
);
1290 setOperationAction(ISD::ConstantPool
, MVT::i32
, Custom
);
1291 setOperationAction(ISD::JumpTable
, MVT::i32
, Custom
);
1292 setOperationAction(ISD::BUILD_PAIR
, MVT::i64
, Expand
);
1293 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
1294 setOperationAction(ISD::INLINEASM
, MVT::Other
, Custom
);
1295 setOperationAction(ISD::INLINEASM_BR
, MVT::Other
, Custom
);
1296 setOperationAction(ISD::PREFETCH
, MVT::Other
, Custom
);
1297 setOperationAction(ISD::READCYCLECOUNTER
, MVT::i64
, Custom
);
1298 setOperationAction(ISD::INTRINSIC_VOID
, MVT::Other
, Custom
);
1299 setOperationAction(ISD::EH_RETURN
, MVT::Other
, Custom
);
1300 setOperationAction(ISD::GLOBAL_OFFSET_TABLE
, MVT::i32
, Custom
);
1301 setOperationAction(ISD::GlobalTLSAddress
, MVT::i32
, Custom
);
1302 setOperationAction(ISD::ATOMIC_FENCE
, MVT::Other
, Custom
);
1304 // Custom legalize GlobalAddress nodes into CONST32.
1305 setOperationAction(ISD::GlobalAddress
, MVT::i32
, Custom
);
1306 setOperationAction(ISD::GlobalAddress
, MVT::i8
, Custom
);
1307 setOperationAction(ISD::BlockAddress
, MVT::i32
, Custom
);
1309 // Hexagon needs to optimize cases with negative constants.
1310 setOperationAction(ISD::SETCC
, MVT::i8
, Custom
);
1311 setOperationAction(ISD::SETCC
, MVT::i16
, Custom
);
1312 setOperationAction(ISD::SETCC
, MVT::v4i8
, Custom
);
1313 setOperationAction(ISD::SETCC
, MVT::v2i16
, Custom
);
1315 // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
1316 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
1317 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
1318 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
1319 setOperationAction(ISD::VACOPY
, MVT::Other
, Expand
);
1321 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Expand
);
1322 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Expand
);
1323 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i32
, Custom
);
1326 setMinimumJumpTableEntries(MinimumJumpTables
);
1328 setMinimumJumpTableEntries(std::numeric_limits
<unsigned>::max());
1329 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
1331 setOperationAction(ISD::ABS
, MVT::i32
, Legal
);
1332 setOperationAction(ISD::ABS
, MVT::i64
, Legal
);
1334 // Hexagon has A4_addp_c and A4_subp_c that take and generate a carry bit,
1335 // but they only operate on i64.
1336 for (MVT VT
: MVT::integer_valuetypes()) {
1337 setOperationAction(ISD::UADDO
, VT
, Custom
);
1338 setOperationAction(ISD::USUBO
, VT
, Custom
);
1339 setOperationAction(ISD::SADDO
, VT
, Expand
);
1340 setOperationAction(ISD::SSUBO
, VT
, Expand
);
1341 setOperationAction(ISD::ADDCARRY
, VT
, Expand
);
1342 setOperationAction(ISD::SUBCARRY
, VT
, Expand
);
1344 setOperationAction(ISD::ADDCARRY
, MVT::i64
, Custom
);
1345 setOperationAction(ISD::SUBCARRY
, MVT::i64
, Custom
);
1347 setOperationAction(ISD::CTLZ
, MVT::i8
, Promote
);
1348 setOperationAction(ISD::CTLZ
, MVT::i16
, Promote
);
1349 setOperationAction(ISD::CTTZ
, MVT::i8
, Promote
);
1350 setOperationAction(ISD::CTTZ
, MVT::i16
, Promote
);
1352 // Popcount can count # of 1s in i64 but returns i32.
1353 setOperationAction(ISD::CTPOP
, MVT::i8
, Promote
);
1354 setOperationAction(ISD::CTPOP
, MVT::i16
, Promote
);
1355 setOperationAction(ISD::CTPOP
, MVT::i32
, Promote
);
1356 setOperationAction(ISD::CTPOP
, MVT::i64
, Legal
);
1358 setOperationAction(ISD::BITREVERSE
, MVT::i32
, Legal
);
1359 setOperationAction(ISD::BITREVERSE
, MVT::i64
, Legal
);
1360 setOperationAction(ISD::BSWAP
, MVT::i32
, Legal
);
1361 setOperationAction(ISD::BSWAP
, MVT::i64
, Legal
);
1363 setOperationAction(ISD::FSHL
, MVT::i32
, Legal
);
1364 setOperationAction(ISD::FSHL
, MVT::i64
, Legal
);
1365 setOperationAction(ISD::FSHR
, MVT::i32
, Legal
);
1366 setOperationAction(ISD::FSHR
, MVT::i64
, Legal
);
1368 for (unsigned IntExpOp
:
1369 {ISD::SDIV
, ISD::UDIV
, ISD::SREM
, ISD::UREM
,
1370 ISD::SDIVREM
, ISD::UDIVREM
, ISD::ROTL
, ISD::ROTR
,
1371 ISD::SHL_PARTS
, ISD::SRA_PARTS
, ISD::SRL_PARTS
,
1372 ISD::SMUL_LOHI
, ISD::UMUL_LOHI
}) {
1373 for (MVT VT
: MVT::integer_valuetypes())
1374 setOperationAction(IntExpOp
, VT
, Expand
);
1377 for (unsigned FPExpOp
:
1378 {ISD::FDIV
, ISD::FREM
, ISD::FSQRT
, ISD::FSIN
, ISD::FCOS
, ISD::FSINCOS
,
1379 ISD::FPOW
, ISD::FCOPYSIGN
}) {
1380 for (MVT VT
: MVT::fp_valuetypes())
1381 setOperationAction(FPExpOp
, VT
, Expand
);
1384 // No extending loads from i32.
1385 for (MVT VT
: MVT::integer_valuetypes()) {
1386 setLoadExtAction(ISD::ZEXTLOAD
, VT
, MVT::i32
, Expand
);
1387 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i32
, Expand
);
1388 setLoadExtAction(ISD::EXTLOAD
, VT
, MVT::i32
, Expand
);
1390 // Turn FP truncstore into trunc + store.
1391 setTruncStoreAction(MVT::f64
, MVT::f32
, Expand
);
1392 // Turn FP extload into load/fpextend.
1393 for (MVT VT
: MVT::fp_valuetypes())
1394 setLoadExtAction(ISD::EXTLOAD
, VT
, MVT::f32
, Expand
);
1396 // Expand BR_CC and SELECT_CC for all integer and fp types.
1397 for (MVT VT
: MVT::integer_valuetypes()) {
1398 setOperationAction(ISD::BR_CC
, VT
, Expand
);
1399 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
1401 for (MVT VT
: MVT::fp_valuetypes()) {
1402 setOperationAction(ISD::BR_CC
, VT
, Expand
);
1403 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
1405 setOperationAction(ISD::BR_CC
, MVT::Other
, Expand
);
1408 // Handling of vector operations.
1411 // Set the action for vector operations to "expand", then override it with
1412 // either "custom" or "legal" for specific cases.
1413 static const unsigned VectExpOps
[] = {
1414 // Integer arithmetic:
1415 ISD::ADD
, ISD::SUB
, ISD::MUL
, ISD::SDIV
, ISD::UDIV
,
1416 ISD::SREM
, ISD::UREM
, ISD::SDIVREM
, ISD::UDIVREM
, ISD::SADDO
,
1417 ISD::UADDO
, ISD::SSUBO
, ISD::USUBO
, ISD::SMUL_LOHI
, ISD::UMUL_LOHI
,
1419 ISD::AND
, ISD::OR
, ISD::XOR
, ISD::ROTL
, ISD::ROTR
,
1420 ISD::CTPOP
, ISD::CTLZ
, ISD::CTTZ
,
1421 // Floating point arithmetic/math functions:
1422 ISD::FADD
, ISD::FSUB
, ISD::FMUL
, ISD::FMA
, ISD::FDIV
,
1423 ISD::FREM
, ISD::FNEG
, ISD::FABS
, ISD::FSQRT
, ISD::FSIN
,
1424 ISD::FCOS
, ISD::FPOW
, ISD::FLOG
, ISD::FLOG2
,
1425 ISD::FLOG10
, ISD::FEXP
, ISD::FEXP2
, ISD::FCEIL
, ISD::FTRUNC
,
1426 ISD::FRINT
, ISD::FNEARBYINT
, ISD::FROUND
, ISD::FFLOOR
,
1427 ISD::FMINNUM
, ISD::FMAXNUM
, ISD::FSINCOS
,
1429 ISD::BR_CC
, ISD::SELECT_CC
, ISD::ConstantPool
,
1431 ISD::BUILD_VECTOR
, ISD::SCALAR_TO_VECTOR
,
1432 ISD::EXTRACT_VECTOR_ELT
, ISD::INSERT_VECTOR_ELT
,
1433 ISD::EXTRACT_SUBVECTOR
, ISD::INSERT_SUBVECTOR
,
1434 ISD::CONCAT_VECTORS
, ISD::VECTOR_SHUFFLE
1437 for (MVT VT
: MVT::vector_valuetypes()) {
1438 for (unsigned VectExpOp
: VectExpOps
)
1439 setOperationAction(VectExpOp
, VT
, Expand
);
1441 // Expand all extending loads and truncating stores:
1442 for (MVT TargetVT
: MVT::vector_valuetypes()) {
1445 setLoadExtAction(ISD::EXTLOAD
, TargetVT
, VT
, Expand
);
1446 setLoadExtAction(ISD::ZEXTLOAD
, TargetVT
, VT
, Expand
);
1447 setLoadExtAction(ISD::SEXTLOAD
, TargetVT
, VT
, Expand
);
1448 setTruncStoreAction(VT
, TargetVT
, Expand
);
1451 // Normalize all inputs to SELECT to be vectors of i32.
1452 if (VT
.getVectorElementType() != MVT::i32
) {
1453 MVT VT32
= MVT::getVectorVT(MVT::i32
, VT
.getSizeInBits()/32);
1454 setOperationAction(ISD::SELECT
, VT
, Promote
);
1455 AddPromotedToType(ISD::SELECT
, VT
, VT32
);
1457 setOperationAction(ISD::SRA
, VT
, Custom
);
1458 setOperationAction(ISD::SHL
, VT
, Custom
);
1459 setOperationAction(ISD::SRL
, VT
, Custom
);
1462 // Extending loads from (native) vectors of i8 into (native) vectors of i16
1464 setLoadExtAction(ISD::EXTLOAD
, MVT::v2i16
, MVT::v2i8
, Legal
);
1465 setLoadExtAction(ISD::ZEXTLOAD
, MVT::v2i16
, MVT::v2i8
, Legal
);
1466 setLoadExtAction(ISD::SEXTLOAD
, MVT::v2i16
, MVT::v2i8
, Legal
);
1467 setLoadExtAction(ISD::EXTLOAD
, MVT::v4i16
, MVT::v4i8
, Legal
);
1468 setLoadExtAction(ISD::ZEXTLOAD
, MVT::v4i16
, MVT::v4i8
, Legal
);
1469 setLoadExtAction(ISD::SEXTLOAD
, MVT::v4i16
, MVT::v4i8
, Legal
);
1471 // Types natively supported:
1472 for (MVT NativeVT
: {MVT::v8i1
, MVT::v4i1
, MVT::v2i1
, MVT::v4i8
,
1473 MVT::v8i8
, MVT::v2i16
, MVT::v4i16
, MVT::v2i32
}) {
1474 setOperationAction(ISD::BUILD_VECTOR
, NativeVT
, Custom
);
1475 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, NativeVT
, Custom
);
1476 setOperationAction(ISD::INSERT_VECTOR_ELT
, NativeVT
, Custom
);
1477 setOperationAction(ISD::EXTRACT_SUBVECTOR
, NativeVT
, Custom
);
1478 setOperationAction(ISD::INSERT_SUBVECTOR
, NativeVT
, Custom
);
1479 setOperationAction(ISD::CONCAT_VECTORS
, NativeVT
, Custom
);
1481 setOperationAction(ISD::ADD
, NativeVT
, Legal
);
1482 setOperationAction(ISD::SUB
, NativeVT
, Legal
);
1483 setOperationAction(ISD::MUL
, NativeVT
, Legal
);
1484 setOperationAction(ISD::AND
, NativeVT
, Legal
);
1485 setOperationAction(ISD::OR
, NativeVT
, Legal
);
1486 setOperationAction(ISD::XOR
, NativeVT
, Legal
);
1489 // Custom lower unaligned loads.
1490 // Also, for both loads and stores, verify the alignment of the address
1491 // in case it is a compile-time constant. This is a usability feature to
1492 // provide a meaningful error message to users.
1493 for (MVT VT
: {MVT::i16
, MVT::i32
, MVT::v4i8
, MVT::i64
, MVT::v8i8
,
1494 MVT::v2i16
, MVT::v4i16
, MVT::v2i32
}) {
1495 setOperationAction(ISD::LOAD
, VT
, Custom
);
1496 setOperationAction(ISD::STORE
, VT
, Custom
);
1499 for (MVT VT
: {MVT::v2i16
, MVT::v4i8
, MVT::v2i32
, MVT::v4i16
, MVT::v2i32
}) {
1500 setCondCodeAction(ISD::SETLT
, VT
, Expand
);
1501 setCondCodeAction(ISD::SETLE
, VT
, Expand
);
1502 setCondCodeAction(ISD::SETULT
, VT
, Expand
);
1503 setCondCodeAction(ISD::SETULE
, VT
, Expand
);
1506 // Custom-lower bitcasts from i8 to v8i1.
1507 setOperationAction(ISD::BITCAST
, MVT::i8
, Custom
);
1508 setOperationAction(ISD::SETCC
, MVT::v2i16
, Custom
);
1509 setOperationAction(ISD::VSELECT
, MVT::v2i16
, Custom
);
1510 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v4i8
, Custom
);
1511 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v4i16
, Custom
);
1512 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v8i8
, Custom
);
1515 setOperationAction(ISD::FMA
, MVT::f64
, Expand
);
1516 setOperationAction(ISD::FADD
, MVT::f64
, Expand
);
1517 setOperationAction(ISD::FSUB
, MVT::f64
, Expand
);
1518 setOperationAction(ISD::FMUL
, MVT::f64
, Expand
);
1520 setOperationAction(ISD::FMINNUM
, MVT::f32
, Legal
);
1521 setOperationAction(ISD::FMAXNUM
, MVT::f32
, Legal
);
1523 setOperationAction(ISD::FP_TO_UINT
, MVT::i1
, Promote
);
1524 setOperationAction(ISD::FP_TO_UINT
, MVT::i8
, Promote
);
1525 setOperationAction(ISD::FP_TO_UINT
, MVT::i16
, Promote
);
1526 setOperationAction(ISD::FP_TO_SINT
, MVT::i1
, Promote
);
1527 setOperationAction(ISD::FP_TO_SINT
, MVT::i8
, Promote
);
1528 setOperationAction(ISD::FP_TO_SINT
, MVT::i16
, Promote
);
1529 setOperationAction(ISD::UINT_TO_FP
, MVT::i1
, Promote
);
1530 setOperationAction(ISD::UINT_TO_FP
, MVT::i8
, Promote
);
1531 setOperationAction(ISD::UINT_TO_FP
, MVT::i16
, Promote
);
1532 setOperationAction(ISD::SINT_TO_FP
, MVT::i1
, Promote
);
1533 setOperationAction(ISD::SINT_TO_FP
, MVT::i8
, Promote
);
1534 setOperationAction(ISD::SINT_TO_FP
, MVT::i16
, Promote
);
1536 // Handling of indexed loads/stores: default is "expand".
1538 for (MVT VT
: {MVT::i8
, MVT::i16
, MVT::i32
, MVT::i64
, MVT::f32
, MVT::f64
,
1539 MVT::v2i16
, MVT::v2i32
, MVT::v4i8
, MVT::v4i16
, MVT::v8i8
}) {
1540 setIndexedLoadAction(ISD::POST_INC
, VT
, Legal
);
1541 setIndexedStoreAction(ISD::POST_INC
, VT
, Legal
);
1544 // Subtarget-specific operation actions.
1546 if (Subtarget
.hasV60Ops()) {
1547 setOperationAction(ISD::ROTL
, MVT::i32
, Legal
);
1548 setOperationAction(ISD::ROTL
, MVT::i64
, Legal
);
1549 setOperationAction(ISD::ROTR
, MVT::i32
, Legal
);
1550 setOperationAction(ISD::ROTR
, MVT::i64
, Legal
);
1552 if (Subtarget
.hasV66Ops()) {
1553 setOperationAction(ISD::FADD
, MVT::f64
, Legal
);
1554 setOperationAction(ISD::FSUB
, MVT::f64
, Legal
);
1557 if (Subtarget
.useHVXOps())
1558 initializeHVXLowering();
1560 computeRegisterProperties(&HRI
);
1563 // Library calls for unsupported operations
1565 bool FastMath
= EnableFastMath
;
1567 setLibcallName(RTLIB::SDIV_I32
, "__hexagon_divsi3");
1568 setLibcallName(RTLIB::SDIV_I64
, "__hexagon_divdi3");
1569 setLibcallName(RTLIB::UDIV_I32
, "__hexagon_udivsi3");
1570 setLibcallName(RTLIB::UDIV_I64
, "__hexagon_udivdi3");
1571 setLibcallName(RTLIB::SREM_I32
, "__hexagon_modsi3");
1572 setLibcallName(RTLIB::SREM_I64
, "__hexagon_moddi3");
1573 setLibcallName(RTLIB::UREM_I32
, "__hexagon_umodsi3");
1574 setLibcallName(RTLIB::UREM_I64
, "__hexagon_umoddi3");
1576 setLibcallName(RTLIB::SINTTOFP_I128_F64
, "__hexagon_floattidf");
1577 setLibcallName(RTLIB::SINTTOFP_I128_F32
, "__hexagon_floattisf");
1578 setLibcallName(RTLIB::FPTOUINT_F32_I128
, "__hexagon_fixunssfti");
1579 setLibcallName(RTLIB::FPTOUINT_F64_I128
, "__hexagon_fixunsdfti");
1580 setLibcallName(RTLIB::FPTOSINT_F32_I128
, "__hexagon_fixsfti");
1581 setLibcallName(RTLIB::FPTOSINT_F64_I128
, "__hexagon_fixdfti");
1583 // This is the only fast library function for sqrtd.
1585 setLibcallName(RTLIB::SQRT_F64
, "__hexagon_fast2_sqrtdf2");
1587 // Prefix is: nothing for "slow-math",
1588 // "fast2_" for V5+ fast-math double-precision
1589 // (actually, keep fast-math and fast-math2 separate for now)
1591 setLibcallName(RTLIB::ADD_F64
, "__hexagon_fast_adddf3");
1592 setLibcallName(RTLIB::SUB_F64
, "__hexagon_fast_subdf3");
1593 setLibcallName(RTLIB::MUL_F64
, "__hexagon_fast_muldf3");
1594 setLibcallName(RTLIB::DIV_F64
, "__hexagon_fast_divdf3");
1595 setLibcallName(RTLIB::DIV_F32
, "__hexagon_fast_divsf3");
1597 setLibcallName(RTLIB::ADD_F64
, "__hexagon_adddf3");
1598 setLibcallName(RTLIB::SUB_F64
, "__hexagon_subdf3");
1599 setLibcallName(RTLIB::MUL_F64
, "__hexagon_muldf3");
1600 setLibcallName(RTLIB::DIV_F64
, "__hexagon_divdf3");
1601 setLibcallName(RTLIB::DIV_F32
, "__hexagon_divsf3");
1605 setLibcallName(RTLIB::SQRT_F32
, "__hexagon_fast2_sqrtf");
1607 setLibcallName(RTLIB::SQRT_F32
, "__hexagon_sqrtf");
1609 // These cause problems when the shift amount is non-constant.
1610 setLibcallName(RTLIB::SHL_I128
, nullptr);
1611 setLibcallName(RTLIB::SRL_I128
, nullptr);
1612 setLibcallName(RTLIB::SRA_I128
, nullptr);
1615 const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode
) const {
1616 switch ((HexagonISD::NodeType
)Opcode
) {
1617 case HexagonISD::ADDC
: return "HexagonISD::ADDC";
1618 case HexagonISD::SUBC
: return "HexagonISD::SUBC";
1619 case HexagonISD::ALLOCA
: return "HexagonISD::ALLOCA";
1620 case HexagonISD::AT_GOT
: return "HexagonISD::AT_GOT";
1621 case HexagonISD::AT_PCREL
: return "HexagonISD::AT_PCREL";
1622 case HexagonISD::BARRIER
: return "HexagonISD::BARRIER";
1623 case HexagonISD::CALL
: return "HexagonISD::CALL";
1624 case HexagonISD::CALLnr
: return "HexagonISD::CALLnr";
1625 case HexagonISD::CALLR
: return "HexagonISD::CALLR";
1626 case HexagonISD::COMBINE
: return "HexagonISD::COMBINE";
1627 case HexagonISD::CONST32_GP
: return "HexagonISD::CONST32_GP";
1628 case HexagonISD::CONST32
: return "HexagonISD::CONST32";
1629 case HexagonISD::CP
: return "HexagonISD::CP";
1630 case HexagonISD::DCFETCH
: return "HexagonISD::DCFETCH";
1631 case HexagonISD::EH_RETURN
: return "HexagonISD::EH_RETURN";
1632 case HexagonISD::TSTBIT
: return "HexagonISD::TSTBIT";
1633 case HexagonISD::EXTRACTU
: return "HexagonISD::EXTRACTU";
1634 case HexagonISD::INSERT
: return "HexagonISD::INSERT";
1635 case HexagonISD::JT
: return "HexagonISD::JT";
1636 case HexagonISD::RET_FLAG
: return "HexagonISD::RET_FLAG";
1637 case HexagonISD::TC_RETURN
: return "HexagonISD::TC_RETURN";
1638 case HexagonISD::VASL
: return "HexagonISD::VASL";
1639 case HexagonISD::VASR
: return "HexagonISD::VASR";
1640 case HexagonISD::VLSR
: return "HexagonISD::VLSR";
1641 case HexagonISD::VSPLAT
: return "HexagonISD::VSPLAT";
1642 case HexagonISD::VEXTRACTW
: return "HexagonISD::VEXTRACTW";
1643 case HexagonISD::VINSERTW0
: return "HexagonISD::VINSERTW0";
1644 case HexagonISD::VROR
: return "HexagonISD::VROR";
1645 case HexagonISD::READCYCLE
: return "HexagonISD::READCYCLE";
1646 case HexagonISD::VZERO
: return "HexagonISD::VZERO";
1647 case HexagonISD::VSPLATW
: return "HexagonISD::VSPLATW";
1648 case HexagonISD::D2P
: return "HexagonISD::D2P";
1649 case HexagonISD::P2D
: return "HexagonISD::P2D";
1650 case HexagonISD::V2Q
: return "HexagonISD::V2Q";
1651 case HexagonISD::Q2V
: return "HexagonISD::Q2V";
1652 case HexagonISD::QCAT
: return "HexagonISD::QCAT";
1653 case HexagonISD::QTRUE
: return "HexagonISD::QTRUE";
1654 case HexagonISD::QFALSE
: return "HexagonISD::QFALSE";
1655 case HexagonISD::TYPECAST
: return "HexagonISD::TYPECAST";
1656 case HexagonISD::VALIGN
: return "HexagonISD::VALIGN";
1657 case HexagonISD::VALIGNADDR
: return "HexagonISD::VALIGNADDR";
1658 case HexagonISD::OP_END
: break;
1664 HexagonTargetLowering::validateConstPtrAlignment(SDValue Ptr
, const SDLoc
&dl
,
1665 unsigned NeedAlign
) const {
1666 auto *CA
= dyn_cast
<ConstantSDNode
>(Ptr
);
1669 unsigned Addr
= CA
->getZExtValue();
1670 unsigned HaveAlign
= Addr
!= 0 ? 1u << countTrailingZeros(Addr
) : NeedAlign
;
1671 if (HaveAlign
< NeedAlign
) {
1673 raw_string_ostream
O(ErrMsg
);
1674 O
<< "Misaligned constant address: " << format_hex(Addr
, 10)
1675 << " has alignment " << HaveAlign
1676 << ", but the memory access requires " << NeedAlign
;
1677 if (DebugLoc DL
= dl
.getDebugLoc())
1678 DL
.print(O
<< ", at ");
1679 report_fatal_error(O
.str());
1683 // Bit-reverse Load Intrinsic: Check if the instruction is a bit reverse load
1685 static bool isBrevLdIntrinsic(const Value
*Inst
) {
1686 unsigned ID
= cast
<IntrinsicInst
>(Inst
)->getIntrinsicID();
1687 return (ID
== Intrinsic::hexagon_L2_loadrd_pbr
||
1688 ID
== Intrinsic::hexagon_L2_loadri_pbr
||
1689 ID
== Intrinsic::hexagon_L2_loadrh_pbr
||
1690 ID
== Intrinsic::hexagon_L2_loadruh_pbr
||
1691 ID
== Intrinsic::hexagon_L2_loadrb_pbr
||
1692 ID
== Intrinsic::hexagon_L2_loadrub_pbr
);
1695 // Bit-reverse Load Intrinsic :Crawl up and figure out the object from previous
1696 // instruction. So far we only handle bitcast, extract value and bit reverse
1697 // load intrinsic instructions. Should we handle CGEP ?
1698 static Value
*getBrevLdObject(Value
*V
) {
1699 if (Operator::getOpcode(V
) == Instruction::ExtractValue
||
1700 Operator::getOpcode(V
) == Instruction::BitCast
)
1701 V
= cast
<Operator
>(V
)->getOperand(0);
1702 else if (isa
<IntrinsicInst
>(V
) && isBrevLdIntrinsic(V
))
1703 V
= cast
<Instruction
>(V
)->getOperand(0);
1707 // Bit-reverse Load Intrinsic: For a PHI Node return either an incoming edge or
1708 // a back edge. If the back edge comes from the intrinsic itself, the incoming
1709 // edge is returned.
1710 static Value
*returnEdge(const PHINode
*PN
, Value
*IntrBaseVal
) {
1711 const BasicBlock
*Parent
= PN
->getParent();
1713 for (unsigned i
= 0, e
= PN
->getNumIncomingValues(); i
< e
; ++i
) {
1714 BasicBlock
*Blk
= PN
->getIncomingBlock(i
);
1715 // Determine if the back edge is originated from intrinsic.
1716 if (Blk
== Parent
) {
1717 Value
*BackEdgeVal
= PN
->getIncomingValue(i
);
1719 // Loop over till we return the same Value or we hit the IntrBaseVal.
1721 BaseVal
= BackEdgeVal
;
1722 BackEdgeVal
= getBrevLdObject(BackEdgeVal
);
1723 } while ((BaseVal
!= BackEdgeVal
) && (IntrBaseVal
!= BackEdgeVal
));
1724 // If the getBrevLdObject returns IntrBaseVal, we should return the
1726 if (IntrBaseVal
== BackEdgeVal
)
1730 } else // Set the node to incoming edge.
1733 assert(Idx
>= 0 && "Unexpected index to incoming argument in PHI");
1734 return PN
->getIncomingValue(Idx
);
1737 // Bit-reverse Load Intrinsic: Figure out the underlying object the base
1738 // pointer points to, for the bit-reverse load intrinsic. Setting this to
1739 // memoperand might help alias analysis to figure out the dependencies.
1740 static Value
*getUnderLyingObjectForBrevLdIntr(Value
*V
) {
1741 Value
*IntrBaseVal
= V
;
1743 // Loop over till we return the same Value, implies we either figure out
1744 // the object or we hit a PHI
1747 V
= getBrevLdObject(V
);
1748 } while (BaseVal
!= V
);
1750 // Identify the object from PHINode.
1751 if (const PHINode
*PN
= dyn_cast
<PHINode
>(V
))
1752 return returnEdge(PN
, IntrBaseVal
);
1753 // For non PHI nodes, the object is the last value returned by getBrevLdObject
1758 /// Given an intrinsic, checks if on the target the intrinsic will need to map
1759 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1760 /// true and store the intrinsic information into the IntrinsicInfo that was
1761 /// passed to the function.
1762 bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo
&Info
,
1764 MachineFunction
&MF
,
1765 unsigned Intrinsic
) const {
1766 switch (Intrinsic
) {
1767 case Intrinsic::hexagon_L2_loadrd_pbr
:
1768 case Intrinsic::hexagon_L2_loadri_pbr
:
1769 case Intrinsic::hexagon_L2_loadrh_pbr
:
1770 case Intrinsic::hexagon_L2_loadruh_pbr
:
1771 case Intrinsic::hexagon_L2_loadrb_pbr
:
1772 case Intrinsic::hexagon_L2_loadrub_pbr
: {
1773 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
1774 auto &DL
= I
.getCalledFunction()->getParent()->getDataLayout();
1775 auto &Cont
= I
.getCalledFunction()->getParent()->getContext();
1776 // The intrinsic function call is of the form { ElTy, i8* }
1777 // @llvm.hexagon.L2.loadXX.pbr(i8*, i32). The pointer and memory access type
1778 // should be derived from ElTy.
1779 Type
*ElTy
= I
.getCalledFunction()->getReturnType()->getStructElementType(0);
1780 Info
.memVT
= MVT::getVT(ElTy
);
1781 llvm::Value
*BasePtrVal
= I
.getOperand(0);
1782 Info
.ptrVal
= getUnderLyingObjectForBrevLdIntr(BasePtrVal
);
1783 // The offset value comes through Modifier register. For now, assume the
1787 MaybeAlign(DL
.getABITypeAlignment(Info
.memVT
.getTypeForEVT(Cont
)));
1788 Info
.flags
= MachineMemOperand::MOLoad
;
1791 case Intrinsic::hexagon_V6_vgathermw
:
1792 case Intrinsic::hexagon_V6_vgathermw_128B
:
1793 case Intrinsic::hexagon_V6_vgathermh
:
1794 case Intrinsic::hexagon_V6_vgathermh_128B
:
1795 case Intrinsic::hexagon_V6_vgathermhw
:
1796 case Intrinsic::hexagon_V6_vgathermhw_128B
:
1797 case Intrinsic::hexagon_V6_vgathermwq
:
1798 case Intrinsic::hexagon_V6_vgathermwq_128B
:
1799 case Intrinsic::hexagon_V6_vgathermhq
:
1800 case Intrinsic::hexagon_V6_vgathermhq_128B
:
1801 case Intrinsic::hexagon_V6_vgathermhwq
:
1802 case Intrinsic::hexagon_V6_vgathermhwq_128B
: {
1803 const Module
&M
= *I
.getParent()->getParent()->getParent();
1804 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
1805 Type
*VecTy
= I
.getArgOperand(1)->getType();
1806 Info
.memVT
= MVT::getVT(VecTy
);
1807 Info
.ptrVal
= I
.getArgOperand(0);
1810 MaybeAlign(M
.getDataLayout().getTypeAllocSizeInBits(VecTy
) / 8);
1811 Info
.flags
= MachineMemOperand::MOLoad
|
1812 MachineMemOperand::MOStore
|
1813 MachineMemOperand::MOVolatile
;
1822 bool HexagonTargetLowering::hasBitTest(SDValue X
, SDValue Y
) const {
1823 return X
.getValueType().isScalarInteger(); // 'tstbit'
1826 bool HexagonTargetLowering::isTruncateFree(Type
*Ty1
, Type
*Ty2
) const {
1827 return isTruncateFree(EVT::getEVT(Ty1
), EVT::getEVT(Ty2
));
1830 bool HexagonTargetLowering::isTruncateFree(EVT VT1
, EVT VT2
) const {
1831 if (!VT1
.isSimple() || !VT2
.isSimple())
1833 return VT1
.getSimpleVT() == MVT::i64
&& VT2
.getSimpleVT() == MVT::i32
;
1836 bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT
) const {
1837 return isOperationLegalOrCustom(ISD::FMA
, VT
);
1840 // Should we expand the build vector with shuffles?
1841 bool HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT
,
1842 unsigned DefinedValues
) const {
1846 bool HexagonTargetLowering::isShuffleMaskLegal(ArrayRef
<int> Mask
,
1851 TargetLoweringBase::LegalizeTypeAction
1852 HexagonTargetLowering::getPreferredVectorAction(MVT VT
) const {
1853 if (VT
.getVectorNumElements() == 1)
1854 return TargetLoweringBase::TypeScalarizeVector
;
1856 // Always widen vectors of i1.
1857 MVT ElemTy
= VT
.getVectorElementType();
1858 if (ElemTy
== MVT::i1
)
1859 return TargetLoweringBase::TypeWidenVector
;
1861 if (Subtarget
.useHVXOps()) {
1862 // If the size of VT is at least half of the vector length,
1863 // widen the vector. Note: the threshold was not selected in
1864 // any scientific way.
1865 ArrayRef
<MVT
> Tys
= Subtarget
.getHVXElementTypes();
1866 if (llvm::find(Tys
, ElemTy
) != Tys
.end()) {
1867 unsigned HwWidth
= 8*Subtarget
.getVectorLength();
1868 unsigned VecWidth
= VT
.getSizeInBits();
1869 if (VecWidth
>= HwWidth
/2 && VecWidth
< HwWidth
)
1870 return TargetLoweringBase::TypeWidenVector
;
1873 return TargetLoweringBase::TypeSplitVector
;
1876 std::pair
<SDValue
, int>
1877 HexagonTargetLowering::getBaseAndOffset(SDValue Addr
) const {
1878 if (Addr
.getOpcode() == ISD::ADD
) {
1879 SDValue Op1
= Addr
.getOperand(1);
1880 if (auto *CN
= dyn_cast
<const ConstantSDNode
>(Op1
.getNode()))
1881 return { Addr
.getOperand(0), CN
->getSExtValue() };
1886 // Lower a vector shuffle (V1, V2, V3). V1 and V2 are the two vectors
1887 // to select data from, V3 is the permutation.
1889 HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op
, SelectionDAG
&DAG
)
1891 const auto *SVN
= cast
<ShuffleVectorSDNode
>(Op
);
1892 ArrayRef
<int> AM
= SVN
->getMask();
1893 assert(AM
.size() <= 8 && "Unexpected shuffle mask");
1894 unsigned VecLen
= AM
.size();
1897 assert(!Subtarget
.isHVXVectorType(VecTy
, true) &&
1898 "HVX shuffles should be legal");
1899 assert(VecTy
.getSizeInBits() <= 64 && "Unexpected vector length");
1901 SDValue Op0
= Op
.getOperand(0);
1902 SDValue Op1
= Op
.getOperand(1);
1903 const SDLoc
&dl(Op
);
1905 // If the inputs are not the same as the output, bail. This is not an
1906 // error situation, but complicates the handling and the default expansion
1907 // (into BUILD_VECTOR) should be adequate.
1908 if (ty(Op0
) != VecTy
|| ty(Op1
) != VecTy
)
1911 // Normalize the mask so that the first non-negative index comes from
1912 // the first operand.
1913 SmallVector
<int,8> Mask(AM
.begin(), AM
.end());
1914 unsigned F
= llvm::find_if(AM
, [](int M
) { return M
>= 0; }) - AM
.data();
1916 return DAG
.getUNDEF(VecTy
);
1917 if (AM
[F
] >= int(VecLen
)) {
1918 ShuffleVectorSDNode::commuteMask(Mask
);
1919 std::swap(Op0
, Op1
);
1922 // Express the shuffle mask in terms of bytes.
1923 SmallVector
<int,8> ByteMask
;
1924 unsigned ElemBytes
= VecTy
.getVectorElementType().getSizeInBits() / 8;
1925 for (unsigned i
= 0, e
= Mask
.size(); i
!= e
; ++i
) {
1928 for (unsigned j
= 0; j
!= ElemBytes
; ++j
)
1929 ByteMask
.push_back(-1);
1931 for (unsigned j
= 0; j
!= ElemBytes
; ++j
)
1932 ByteMask
.push_back(M
*ElemBytes
+ j
);
1935 assert(ByteMask
.size() <= 8);
1937 // All non-undef (non-negative) indexes are well within [0..127], so they
1938 // fit in a single byte. Build two 64-bit words:
1939 // - MaskIdx where each byte is the corresponding index (for non-negative
1940 // indexes), and 0xFF for negative indexes, and
1941 // - MaskUnd that has 0xFF for each negative index.
1942 uint64_t MaskIdx
= 0;
1943 uint64_t MaskUnd
= 0;
1944 for (unsigned i
= 0, e
= ByteMask
.size(); i
!= e
; ++i
) {
1946 uint64_t M
= ByteMask
[i
] & 0xFF;
1952 if (ByteMask
.size() == 4) {
1954 if (MaskIdx
== (0x03020100 | MaskUnd
))
1957 if (MaskIdx
== (0x00010203 | MaskUnd
)) {
1958 SDValue T0
= DAG
.getBitcast(MVT::i32
, Op0
);
1959 SDValue T1
= DAG
.getNode(ISD::BSWAP
, dl
, MVT::i32
, T0
);
1960 return DAG
.getBitcast(VecTy
, T1
);
1964 SDValue Concat10
= DAG
.getNode(HexagonISD::COMBINE
, dl
,
1965 typeJoin({ty(Op1
), ty(Op0
)}), {Op1
, Op0
});
1966 if (MaskIdx
== (0x06040200 | MaskUnd
))
1967 return getInstr(Hexagon::S2_vtrunehb
, dl
, VecTy
, {Concat10
}, DAG
);
1968 if (MaskIdx
== (0x07050301 | MaskUnd
))
1969 return getInstr(Hexagon::S2_vtrunohb
, dl
, VecTy
, {Concat10
}, DAG
);
1971 SDValue Concat01
= DAG
.getNode(HexagonISD::COMBINE
, dl
,
1972 typeJoin({ty(Op0
), ty(Op1
)}), {Op0
, Op1
});
1973 if (MaskIdx
== (0x02000604 | MaskUnd
))
1974 return getInstr(Hexagon::S2_vtrunehb
, dl
, VecTy
, {Concat01
}, DAG
);
1975 if (MaskIdx
== (0x03010705 | MaskUnd
))
1976 return getInstr(Hexagon::S2_vtrunohb
, dl
, VecTy
, {Concat01
}, DAG
);
1979 if (ByteMask
.size() == 8) {
1981 if (MaskIdx
== (0x0706050403020100ull
| MaskUnd
))
1984 if (MaskIdx
== (0x0001020304050607ull
| MaskUnd
)) {
1985 SDValue T0
= DAG
.getBitcast(MVT::i64
, Op0
);
1986 SDValue T1
= DAG
.getNode(ISD::BSWAP
, dl
, MVT::i64
, T0
);
1987 return DAG
.getBitcast(VecTy
, T1
);
1991 if (MaskIdx
== (0x0d0c050409080100ull
| MaskUnd
))
1992 return getInstr(Hexagon::S2_shuffeh
, dl
, VecTy
, {Op1
, Op0
}, DAG
);
1993 if (MaskIdx
== (0x0f0e07060b0a0302ull
| MaskUnd
))
1994 return getInstr(Hexagon::S2_shuffoh
, dl
, VecTy
, {Op1
, Op0
}, DAG
);
1995 if (MaskIdx
== (0x0d0c090805040100ull
| MaskUnd
))
1996 return getInstr(Hexagon::S2_vtrunewh
, dl
, VecTy
, {Op1
, Op0
}, DAG
);
1997 if (MaskIdx
== (0x0f0e0b0a07060302ull
| MaskUnd
))
1998 return getInstr(Hexagon::S2_vtrunowh
, dl
, VecTy
, {Op1
, Op0
}, DAG
);
1999 if (MaskIdx
== (0x0706030205040100ull
| MaskUnd
)) {
2000 VectorPair P
= opSplit(Op0
, dl
, DAG
);
2001 return getInstr(Hexagon::S2_packhl
, dl
, VecTy
, {P
.second
, P
.first
}, DAG
);
2005 if (MaskIdx
== (0x0e060c040a020800ull
| MaskUnd
))
2006 return getInstr(Hexagon::S2_shuffeb
, dl
, VecTy
, {Op1
, Op0
}, DAG
);
2007 if (MaskIdx
== (0x0f070d050b030901ull
| MaskUnd
))
2008 return getInstr(Hexagon::S2_shuffob
, dl
, VecTy
, {Op1
, Op0
}, DAG
);
2014 // Create a Hexagon-specific node for shifting a vector by an integer.
2016 HexagonTargetLowering::getVectorShiftByInt(SDValue Op
, SelectionDAG
&DAG
)
2018 if (auto *BVN
= dyn_cast
<BuildVectorSDNode
>(Op
.getOperand(1).getNode())) {
2019 if (SDValue S
= BVN
->getSplatValue()) {
2021 switch (Op
.getOpcode()) {
2023 NewOpc
= HexagonISD::VASL
;
2026 NewOpc
= HexagonISD::VASR
;
2029 NewOpc
= HexagonISD::VLSR
;
2032 llvm_unreachable("Unexpected shift opcode");
2034 return DAG
.getNode(NewOpc
, SDLoc(Op
), ty(Op
), Op
.getOperand(0), S
);
2042 HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op
, SelectionDAG
&DAG
) const {
2043 return getVectorShiftByInt(Op
, DAG
);
2047 HexagonTargetLowering::LowerROTL(SDValue Op
, SelectionDAG
&DAG
) const {
2048 if (isa
<ConstantSDNode
>(Op
.getOperand(1).getNode()))
2054 HexagonTargetLowering::LowerBITCAST(SDValue Op
, SelectionDAG
&DAG
) const {
2056 SDValue InpV
= Op
.getOperand(0);
2057 MVT InpTy
= ty(InpV
);
2058 assert(ResTy
.getSizeInBits() == InpTy
.getSizeInBits());
2059 const SDLoc
&dl(Op
);
2061 // Handle conversion from i8 to v8i1.
2062 if (ResTy
== MVT::v8i1
) {
2063 SDValue Sc
= DAG
.getBitcast(tyScalar(InpTy
), InpV
);
2064 SDValue Ext
= DAG
.getZExtOrTrunc(Sc
, dl
, MVT::i32
);
2065 return getInstr(Hexagon::C2_tfrrp
, dl
, ResTy
, Ext
, DAG
);
2072 HexagonTargetLowering::getBuildVectorConstInts(ArrayRef
<SDValue
> Values
,
2073 MVT VecTy
, SelectionDAG
&DAG
,
2074 MutableArrayRef
<ConstantInt
*> Consts
) const {
2075 MVT ElemTy
= VecTy
.getVectorElementType();
2076 unsigned ElemWidth
= ElemTy
.getSizeInBits();
2077 IntegerType
*IntTy
= IntegerType::get(*DAG
.getContext(), ElemWidth
);
2078 bool AllConst
= true;
2080 for (unsigned i
= 0, e
= Values
.size(); i
!= e
; ++i
) {
2081 SDValue V
= Values
[i
];
2083 Consts
[i
] = ConstantInt::get(IntTy
, 0);
2086 // Make sure to always cast to IntTy.
2087 if (auto *CN
= dyn_cast
<ConstantSDNode
>(V
.getNode())) {
2088 const ConstantInt
*CI
= CN
->getConstantIntValue();
2089 Consts
[i
] = ConstantInt::get(IntTy
, CI
->getValue().getSExtValue());
2090 } else if (auto *CN
= dyn_cast
<ConstantFPSDNode
>(V
.getNode())) {
2091 const ConstantFP
*CF
= CN
->getConstantFPValue();
2092 APInt A
= CF
->getValueAPF().bitcastToAPInt();
2093 Consts
[i
] = ConstantInt::get(IntTy
, A
.getZExtValue());
2102 HexagonTargetLowering::buildVector32(ArrayRef
<SDValue
> Elem
, const SDLoc
&dl
,
2103 MVT VecTy
, SelectionDAG
&DAG
) const {
2104 MVT ElemTy
= VecTy
.getVectorElementType();
2105 assert(VecTy
.getVectorNumElements() == Elem
.size());
2107 SmallVector
<ConstantInt
*,4> Consts(Elem
.size());
2108 bool AllConst
= getBuildVectorConstInts(Elem
, VecTy
, DAG
, Consts
);
2110 unsigned First
, Num
= Elem
.size();
2111 for (First
= 0; First
!= Num
; ++First
)
2112 if (!isUndef(Elem
[First
]))
2115 return DAG
.getUNDEF(VecTy
);
2118 llvm::all_of(Consts
, [](ConstantInt
*CI
) { return CI
->isZero(); }))
2119 return getZero(dl
, VecTy
, DAG
);
2121 if (ElemTy
== MVT::i16
) {
2122 assert(Elem
.size() == 2);
2124 uint32_t V
= (Consts
[0]->getZExtValue() & 0xFFFF) |
2125 Consts
[1]->getZExtValue() << 16;
2126 return DAG
.getBitcast(MVT::v2i16
, DAG
.getConstant(V
, dl
, MVT::i32
));
2128 SDValue N
= getInstr(Hexagon::A2_combine_ll
, dl
, MVT::i32
,
2129 {Elem
[1], Elem
[0]}, DAG
);
2130 return DAG
.getBitcast(MVT::v2i16
, N
);
2133 if (ElemTy
== MVT::i8
) {
2134 // First try generating a constant.
2136 int32_t V
= (Consts
[0]->getZExtValue() & 0xFF) |
2137 (Consts
[1]->getZExtValue() & 0xFF) << 8 |
2138 (Consts
[1]->getZExtValue() & 0xFF) << 16 |
2139 Consts
[2]->getZExtValue() << 24;
2140 return DAG
.getBitcast(MVT::v4i8
, DAG
.getConstant(V
, dl
, MVT::i32
));
2144 bool IsSplat
= true;
2145 for (unsigned i
= 0; i
!= Num
; ++i
) {
2148 if (Elem
[i
] == Elem
[First
] || isUndef(Elem
[i
]))
2154 // Legalize the operand to VSPLAT.
2155 SDValue Ext
= DAG
.getZExtOrTrunc(Elem
[First
], dl
, MVT::i32
);
2156 return DAG
.getNode(HexagonISD::VSPLAT
, dl
, VecTy
, Ext
);
2160 // (zxtb(Elem[0]) | (zxtb(Elem[1]) << 8)) |
2161 // (zxtb(Elem[2]) | (zxtb(Elem[3]) << 8)) << 16
2162 assert(Elem
.size() == 4);
2164 for (unsigned i
= 0; i
!= 4; ++i
) {
2165 Vs
[i
] = DAG
.getZExtOrTrunc(Elem
[i
], dl
, MVT::i32
);
2166 Vs
[i
] = DAG
.getZeroExtendInReg(Vs
[i
], dl
, MVT::i8
);
2168 SDValue S8
= DAG
.getConstant(8, dl
, MVT::i32
);
2169 SDValue T0
= DAG
.getNode(ISD::SHL
, dl
, MVT::i32
, {Vs
[1], S8
});
2170 SDValue T1
= DAG
.getNode(ISD::SHL
, dl
, MVT::i32
, {Vs
[3], S8
});
2171 SDValue B0
= DAG
.getNode(ISD::OR
, dl
, MVT::i32
, {Vs
[0], T0
});
2172 SDValue B1
= DAG
.getNode(ISD::OR
, dl
, MVT::i32
, {Vs
[2], T1
});
2174 SDValue R
= getInstr(Hexagon::A2_combine_ll
, dl
, MVT::i32
, {B1
, B0
}, DAG
);
2175 return DAG
.getBitcast(MVT::v4i8
, R
);
2179 dbgs() << "VecTy: " << EVT(VecTy
).getEVTString() << '\n';
2181 llvm_unreachable("Unexpected vector element type");
2185 HexagonTargetLowering::buildVector64(ArrayRef
<SDValue
> Elem
, const SDLoc
&dl
,
2186 MVT VecTy
, SelectionDAG
&DAG
) const {
2187 MVT ElemTy
= VecTy
.getVectorElementType();
2188 assert(VecTy
.getVectorNumElements() == Elem
.size());
2190 SmallVector
<ConstantInt
*,8> Consts(Elem
.size());
2191 bool AllConst
= getBuildVectorConstInts(Elem
, VecTy
, DAG
, Consts
);
2193 unsigned First
, Num
= Elem
.size();
2194 for (First
= 0; First
!= Num
; ++First
)
2195 if (!isUndef(Elem
[First
]))
2198 return DAG
.getUNDEF(VecTy
);
2201 llvm::all_of(Consts
, [](ConstantInt
*CI
) { return CI
->isZero(); }))
2202 return getZero(dl
, VecTy
, DAG
);
2204 // First try splat if possible.
2205 if (ElemTy
== MVT::i16
) {
2206 bool IsSplat
= true;
2207 for (unsigned i
= 0; i
!= Num
; ++i
) {
2210 if (Elem
[i
] == Elem
[First
] || isUndef(Elem
[i
]))
2216 // Legalize the operand to VSPLAT.
2217 SDValue Ext
= DAG
.getZExtOrTrunc(Elem
[First
], dl
, MVT::i32
);
2218 return DAG
.getNode(HexagonISD::VSPLAT
, dl
, VecTy
, Ext
);
2222 // Then try constant.
2225 unsigned W
= ElemTy
.getSizeInBits();
2226 uint64_t Mask
= (ElemTy
== MVT::i8
) ? 0xFFull
2227 : (ElemTy
== MVT::i16
) ? 0xFFFFull
: 0xFFFFFFFFull
;
2228 for (unsigned i
= 0; i
!= Num
; ++i
)
2229 Val
= (Val
<< W
) | (Consts
[Num
-1-i
]->getZExtValue() & Mask
);
2230 SDValue V0
= DAG
.getConstant(Val
, dl
, MVT::i64
);
2231 return DAG
.getBitcast(VecTy
, V0
);
2234 // Build two 32-bit vectors and concatenate.
2235 MVT HalfTy
= MVT::getVectorVT(ElemTy
, Num
/2);
2236 SDValue L
= (ElemTy
== MVT::i32
)
2238 : buildVector32(Elem
.take_front(Num
/2), dl
, HalfTy
, DAG
);
2239 SDValue H
= (ElemTy
== MVT::i32
)
2241 : buildVector32(Elem
.drop_front(Num
/2), dl
, HalfTy
, DAG
);
2242 return DAG
.getNode(HexagonISD::COMBINE
, dl
, VecTy
, {H
, L
});
2246 HexagonTargetLowering::extractVector(SDValue VecV
, SDValue IdxV
,
2247 const SDLoc
&dl
, MVT ValTy
, MVT ResTy
,
2248 SelectionDAG
&DAG
) const {
2249 MVT VecTy
= ty(VecV
);
2250 assert(!ValTy
.isVector() ||
2251 VecTy
.getVectorElementType() == ValTy
.getVectorElementType());
2252 unsigned VecWidth
= VecTy
.getSizeInBits();
2253 unsigned ValWidth
= ValTy
.getSizeInBits();
2254 unsigned ElemWidth
= VecTy
.getVectorElementType().getSizeInBits();
2255 assert((VecWidth
% ElemWidth
) == 0);
2256 auto *IdxN
= dyn_cast
<ConstantSDNode
>(IdxV
);
2258 // Special case for v{8,4,2}i1 (the only boolean vectors legal in Hexagon
2259 // without any coprocessors).
2260 if (ElemWidth
== 1) {
2261 assert(VecWidth
== VecTy
.getVectorNumElements() && "Sanity failure");
2262 assert(VecWidth
== 8 || VecWidth
== 4 || VecWidth
== 2);
2263 // Check if this is an extract of the lowest bit.
2265 // Extracting the lowest bit is a no-op, but it changes the type,
2266 // so it must be kept as an operation to avoid errors related to
2268 if (IdxN
->isNullValue() && ValTy
.getSizeInBits() == 1)
2269 return DAG
.getNode(HexagonISD::TYPECAST
, dl
, MVT::i1
, VecV
);
2272 // If the value extracted is a single bit, use tstbit.
2273 if (ValWidth
== 1) {
2274 SDValue A0
= getInstr(Hexagon::C2_tfrpr
, dl
, MVT::i32
, {VecV
}, DAG
);
2275 SDValue M0
= DAG
.getConstant(8 / VecWidth
, dl
, MVT::i32
);
2276 SDValue I0
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
, M0
);
2277 return DAG
.getNode(HexagonISD::TSTBIT
, dl
, MVT::i1
, A0
, I0
);
2280 // Each bool vector (v2i1, v4i1, v8i1) always occupies 8 bits in
2281 // a predicate register. The elements of the vector are repeated
2282 // in the register (if necessary) so that the total number is 8.
2283 // The extracted subvector will need to be expanded in such a way.
2284 unsigned Scale
= VecWidth
/ ValWidth
;
2286 // Generate (p2d VecV) >> 8*Idx to move the interesting bytes to
2288 assert(ty(IdxV
) == MVT::i32
);
2289 unsigned VecRep
= 8 / VecWidth
;
2290 SDValue S0
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
,
2291 DAG
.getConstant(8*VecRep
, dl
, MVT::i32
));
2292 SDValue T0
= DAG
.getNode(HexagonISD::P2D
, dl
, MVT::i64
, VecV
);
2293 SDValue T1
= DAG
.getNode(ISD::SRL
, dl
, MVT::i64
, T0
, S0
);
2295 // The longest possible subvector is at most 32 bits, so it is always
2296 // contained in the low subregister.
2297 T1
= DAG
.getTargetExtractSubreg(Hexagon::isub_lo
, dl
, MVT::i32
, T1
);
2298 T1
= expandPredicate(T1
, dl
, DAG
);
2302 return DAG
.getNode(HexagonISD::D2P
, dl
, ResTy
, T1
);
2305 assert(VecWidth
== 32 || VecWidth
== 64);
2307 // Cast everything to scalar integer types.
2308 MVT ScalarTy
= tyScalar(VecTy
);
2309 VecV
= DAG
.getBitcast(ScalarTy
, VecV
);
2311 SDValue WidthV
= DAG
.getConstant(ValWidth
, dl
, MVT::i32
);
2315 unsigned Off
= IdxN
->getZExtValue() * ElemWidth
;
2316 if (VecWidth
== 64 && ValWidth
== 32) {
2317 assert(Off
== 0 || Off
== 32);
2318 unsigned SubIdx
= Off
== 0 ? Hexagon::isub_lo
: Hexagon::isub_hi
;
2319 ExtV
= DAG
.getTargetExtractSubreg(SubIdx
, dl
, MVT::i32
, VecV
);
2320 } else if (Off
== 0 && (ValWidth
% 8) == 0) {
2321 ExtV
= DAG
.getZeroExtendInReg(VecV
, dl
, tyScalar(ValTy
));
2323 SDValue OffV
= DAG
.getConstant(Off
, dl
, MVT::i32
);
2324 // The return type of EXTRACTU must be the same as the type of the
2326 ExtV
= DAG
.getNode(HexagonISD::EXTRACTU
, dl
, ScalarTy
,
2327 {VecV
, WidthV
, OffV
});
2330 if (ty(IdxV
) != MVT::i32
)
2331 IdxV
= DAG
.getZExtOrTrunc(IdxV
, dl
, MVT::i32
);
2332 SDValue OffV
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
,
2333 DAG
.getConstant(ElemWidth
, dl
, MVT::i32
));
2334 ExtV
= DAG
.getNode(HexagonISD::EXTRACTU
, dl
, ScalarTy
,
2335 {VecV
, WidthV
, OffV
});
2338 // Cast ExtV to the requested result type.
2339 ExtV
= DAG
.getZExtOrTrunc(ExtV
, dl
, tyScalar(ResTy
));
2340 ExtV
= DAG
.getBitcast(ResTy
, ExtV
);
2345 HexagonTargetLowering::insertVector(SDValue VecV
, SDValue ValV
, SDValue IdxV
,
2346 const SDLoc
&dl
, MVT ValTy
,
2347 SelectionDAG
&DAG
) const {
2348 MVT VecTy
= ty(VecV
);
2349 if (VecTy
.getVectorElementType() == MVT::i1
) {
2350 MVT ValTy
= ty(ValV
);
2351 assert(ValTy
.getVectorElementType() == MVT::i1
);
2352 SDValue ValR
= DAG
.getNode(HexagonISD::P2D
, dl
, MVT::i64
, ValV
);
2353 unsigned VecLen
= VecTy
.getVectorNumElements();
2354 unsigned Scale
= VecLen
/ ValTy
.getVectorNumElements();
2357 for (unsigned R
= Scale
; R
> 1; R
/= 2) {
2358 ValR
= contractPredicate(ValR
, dl
, DAG
);
2359 ValR
= DAG
.getNode(HexagonISD::COMBINE
, dl
, MVT::i64
,
2360 DAG
.getUNDEF(MVT::i32
), ValR
);
2362 // The longest possible subvector is at most 32 bits, so it is always
2363 // contained in the low subregister.
2364 ValR
= DAG
.getTargetExtractSubreg(Hexagon::isub_lo
, dl
, MVT::i32
, ValR
);
2366 unsigned ValBytes
= 64 / Scale
;
2367 SDValue Width
= DAG
.getConstant(ValBytes
*8, dl
, MVT::i32
);
2368 SDValue Idx
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
,
2369 DAG
.getConstant(8, dl
, MVT::i32
));
2370 SDValue VecR
= DAG
.getNode(HexagonISD::P2D
, dl
, MVT::i64
, VecV
);
2371 SDValue Ins
= DAG
.getNode(HexagonISD::INSERT
, dl
, MVT::i32
,
2372 {VecR
, ValR
, Width
, Idx
});
2373 return DAG
.getNode(HexagonISD::D2P
, dl
, VecTy
, Ins
);
2376 unsigned VecWidth
= VecTy
.getSizeInBits();
2377 unsigned ValWidth
= ValTy
.getSizeInBits();
2378 assert(VecWidth
== 32 || VecWidth
== 64);
2379 assert((VecWidth
% ValWidth
) == 0);
2381 // Cast everything to scalar integer types.
2382 MVT ScalarTy
= MVT::getIntegerVT(VecWidth
);
2383 // The actual type of ValV may be different than ValTy (which is related
2384 // to the vector type).
2385 unsigned VW
= ty(ValV
).getSizeInBits();
2386 ValV
= DAG
.getBitcast(MVT::getIntegerVT(VW
), ValV
);
2387 VecV
= DAG
.getBitcast(ScalarTy
, VecV
);
2389 ValV
= DAG
.getAnyExtOrTrunc(ValV
, dl
, ScalarTy
);
2391 SDValue WidthV
= DAG
.getConstant(ValWidth
, dl
, MVT::i32
);
2394 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(IdxV
)) {
2395 unsigned W
= C
->getZExtValue() * ValWidth
;
2396 SDValue OffV
= DAG
.getConstant(W
, dl
, MVT::i32
);
2397 InsV
= DAG
.getNode(HexagonISD::INSERT
, dl
, ScalarTy
,
2398 {VecV
, ValV
, WidthV
, OffV
});
2400 if (ty(IdxV
) != MVT::i32
)
2401 IdxV
= DAG
.getZExtOrTrunc(IdxV
, dl
, MVT::i32
);
2402 SDValue OffV
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
, WidthV
);
2403 InsV
= DAG
.getNode(HexagonISD::INSERT
, dl
, ScalarTy
,
2404 {VecV
, ValV
, WidthV
, OffV
});
2407 return DAG
.getNode(ISD::BITCAST
, dl
, VecTy
, InsV
);
2411 HexagonTargetLowering::expandPredicate(SDValue Vec32
, const SDLoc
&dl
,
2412 SelectionDAG
&DAG
) const {
2413 assert(ty(Vec32
).getSizeInBits() == 32);
2415 return DAG
.getUNDEF(MVT::i64
);
2416 return getInstr(Hexagon::S2_vsxtbh
, dl
, MVT::i64
, {Vec32
}, DAG
);
2420 HexagonTargetLowering::contractPredicate(SDValue Vec64
, const SDLoc
&dl
,
2421 SelectionDAG
&DAG
) const {
2422 assert(ty(Vec64
).getSizeInBits() == 64);
2424 return DAG
.getUNDEF(MVT::i32
);
2425 return getInstr(Hexagon::S2_vtrunehb
, dl
, MVT::i32
, {Vec64
}, DAG
);
2429 HexagonTargetLowering::getZero(const SDLoc
&dl
, MVT Ty
, SelectionDAG
&DAG
)
2431 if (Ty
.isVector()) {
2432 assert(Ty
.isInteger() && "Only integer vectors are supported here");
2433 unsigned W
= Ty
.getSizeInBits();
2435 return DAG
.getBitcast(Ty
, DAG
.getConstant(0, dl
, MVT::getIntegerVT(W
)));
2436 return DAG
.getNode(HexagonISD::VZERO
, dl
, Ty
);
2440 return DAG
.getConstant(0, dl
, Ty
);
2441 if (Ty
.isFloatingPoint())
2442 return DAG
.getConstantFP(0.0, dl
, Ty
);
2443 llvm_unreachable("Invalid type for zero");
2447 HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op
, SelectionDAG
&DAG
) const {
2449 unsigned BW
= VecTy
.getSizeInBits();
2450 const SDLoc
&dl(Op
);
2451 SmallVector
<SDValue
,8> Ops
;
2452 for (unsigned i
= 0, e
= Op
.getNumOperands(); i
!= e
; ++i
)
2453 Ops
.push_back(Op
.getOperand(i
));
2456 return buildVector32(Ops
, dl
, VecTy
, DAG
);
2458 return buildVector64(Ops
, dl
, VecTy
, DAG
);
2460 if (VecTy
== MVT::v8i1
|| VecTy
== MVT::v4i1
|| VecTy
== MVT::v2i1
) {
2461 // For each i1 element in the resulting predicate register, put 1
2462 // shifted by the index of the element into a general-purpose register,
2463 // then or them together and transfer it back into a predicate register.
2465 SDValue Z
= getZero(dl
, MVT::i32
, DAG
);
2466 // Always produce 8 bits, repeat inputs if necessary.
2467 unsigned Rep
= 8 / VecTy
.getVectorNumElements();
2468 for (unsigned i
= 0; i
!= 8; ++i
) {
2469 SDValue S
= DAG
.getConstant(1ull << i
, dl
, MVT::i32
);
2470 Rs
[i
] = DAG
.getSelect(dl
, MVT::i32
, Ops
[i
/Rep
], S
, Z
);
2472 for (ArrayRef
<SDValue
> A(Rs
); A
.size() != 1; A
= A
.drop_back(A
.size()/2)) {
2473 for (unsigned i
= 0, e
= A
.size()/2; i
!= e
; ++i
)
2474 Rs
[i
] = DAG
.getNode(ISD::OR
, dl
, MVT::i32
, Rs
[2*i
], Rs
[2*i
+1]);
2476 // Move the value directly to a predicate register.
2477 return getInstr(Hexagon::C2_tfrrp
, dl
, VecTy
, {Rs
[0]}, DAG
);
2484 HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op
,
2485 SelectionDAG
&DAG
) const {
2487 const SDLoc
&dl(Op
);
2488 if (VecTy
.getSizeInBits() == 64) {
2489 assert(Op
.getNumOperands() == 2);
2490 return DAG
.getNode(HexagonISD::COMBINE
, dl
, VecTy
, Op
.getOperand(1),
2494 MVT ElemTy
= VecTy
.getVectorElementType();
2495 if (ElemTy
== MVT::i1
) {
2496 assert(VecTy
== MVT::v2i1
|| VecTy
== MVT::v4i1
|| VecTy
== MVT::v8i1
);
2497 MVT OpTy
= ty(Op
.getOperand(0));
2498 // Scale is how many times the operands need to be contracted to match
2499 // the representation in the target register.
2500 unsigned Scale
= VecTy
.getVectorNumElements() / OpTy
.getVectorNumElements();
2501 assert(Scale
== Op
.getNumOperands() && Scale
> 1);
2503 // First, convert all bool vectors to integers, then generate pairwise
2504 // inserts to form values of doubled length. Up until there are only
2505 // two values left to concatenate, all of these values will fit in a
2506 // 32-bit integer, so keep them as i32 to use 32-bit inserts.
2507 SmallVector
<SDValue
,4> Words
[2];
2510 for (SDValue P
: Op
.getNode()->op_values()) {
2511 SDValue W
= DAG
.getNode(HexagonISD::P2D
, dl
, MVT::i64
, P
);
2512 for (unsigned R
= Scale
; R
> 1; R
/= 2) {
2513 W
= contractPredicate(W
, dl
, DAG
);
2514 W
= DAG
.getNode(HexagonISD::COMBINE
, dl
, MVT::i64
,
2515 DAG
.getUNDEF(MVT::i32
), W
);
2517 W
= DAG
.getTargetExtractSubreg(Hexagon::isub_lo
, dl
, MVT::i32
, W
);
2518 Words
[IdxW
].push_back(W
);
2522 SDValue WidthV
= DAG
.getConstant(64 / Scale
, dl
, MVT::i32
);
2523 Words
[IdxW
^ 1].clear();
2525 for (unsigned i
= 0, e
= Words
[IdxW
].size(); i
!= e
; i
+= 2) {
2526 SDValue W0
= Words
[IdxW
][i
], W1
= Words
[IdxW
][i
+1];
2527 // Insert W1 into W0 right next to the significant bits of W0.
2528 SDValue T
= DAG
.getNode(HexagonISD::INSERT
, dl
, MVT::i32
,
2529 {W0
, W1
, WidthV
, WidthV
});
2530 Words
[IdxW
^ 1].push_back(T
);
2536 // Another sanity check. At this point there should only be two words
2537 // left, and Scale should be 2.
2538 assert(Scale
== 2 && Words
[IdxW
].size() == 2);
2540 SDValue WW
= DAG
.getNode(HexagonISD::COMBINE
, dl
, MVT::i64
,
2541 Words
[IdxW
][1], Words
[IdxW
][0]);
2542 return DAG
.getNode(HexagonISD::D2P
, dl
, VecTy
, WW
);
2549 HexagonTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op
,
2550 SelectionDAG
&DAG
) const {
2551 SDValue Vec
= Op
.getOperand(0);
2552 MVT ElemTy
= ty(Vec
).getVectorElementType();
2553 return extractVector(Vec
, Op
.getOperand(1), SDLoc(Op
), ElemTy
, ty(Op
), DAG
);
2557 HexagonTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op
,
2558 SelectionDAG
&DAG
) const {
2559 return extractVector(Op
.getOperand(0), Op
.getOperand(1), SDLoc(Op
),
2560 ty(Op
), ty(Op
), DAG
);
2564 HexagonTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op
,
2565 SelectionDAG
&DAG
) const {
2566 return insertVector(Op
.getOperand(0), Op
.getOperand(1), Op
.getOperand(2),
2567 SDLoc(Op
), ty(Op
).getVectorElementType(), DAG
);
2571 HexagonTargetLowering::LowerINSERT_SUBVECTOR(SDValue Op
,
2572 SelectionDAG
&DAG
) const {
2573 SDValue ValV
= Op
.getOperand(1);
2574 return insertVector(Op
.getOperand(0), ValV
, Op
.getOperand(2),
2575 SDLoc(Op
), ty(ValV
), DAG
);
2579 HexagonTargetLowering::allowTruncateForTailCall(Type
*Ty1
, Type
*Ty2
) const {
2580 // Assuming the caller does not have either a signext or zeroext modifier, and
2581 // only one value is accepted, any reasonable truncation is allowed.
2582 if (!Ty1
->isIntegerTy() || !Ty2
->isIntegerTy())
2585 // FIXME: in principle up to 64-bit could be made safe, but it would be very
2586 // fragile at the moment: any support for multiple value returns would be
2587 // liable to disallow tail calls involving i64 -> iN truncation in many cases.
2588 return Ty1
->getPrimitiveSizeInBits() <= 32;
2592 HexagonTargetLowering::LowerLoad(SDValue Op
, SelectionDAG
&DAG
) const {
2593 LoadSDNode
*LN
= cast
<LoadSDNode
>(Op
.getNode());
2594 unsigned ClaimAlign
= LN
->getAlignment();
2595 validateConstPtrAlignment(LN
->getBasePtr(), SDLoc(Op
), ClaimAlign
);
2596 // Call LowerUnalignedLoad for all loads, it recognizes loads that
2597 // don't need extra aligning.
2598 return LowerUnalignedLoad(Op
, DAG
);
2602 HexagonTargetLowering::LowerStore(SDValue Op
, SelectionDAG
&DAG
) const {
2603 StoreSDNode
*SN
= cast
<StoreSDNode
>(Op
.getNode());
2604 unsigned ClaimAlign
= SN
->getAlignment();
2605 SDValue Ptr
= SN
->getBasePtr();
2606 const SDLoc
&dl(Op
);
2607 validateConstPtrAlignment(Ptr
, dl
, ClaimAlign
);
2609 MVT StoreTy
= SN
->getMemoryVT().getSimpleVT();
2610 unsigned NeedAlign
= Subtarget
.getTypeAlignment(StoreTy
);
2611 if (ClaimAlign
< NeedAlign
)
2612 return expandUnalignedStore(SN
, DAG
);
2617 HexagonTargetLowering::LowerUnalignedLoad(SDValue Op
, SelectionDAG
&DAG
)
2619 LoadSDNode
*LN
= cast
<LoadSDNode
>(Op
.getNode());
2620 MVT LoadTy
= ty(Op
);
2621 unsigned NeedAlign
= Subtarget
.getTypeAlignment(LoadTy
);
2622 unsigned HaveAlign
= LN
->getAlignment();
2623 if (HaveAlign
>= NeedAlign
)
2626 const SDLoc
&dl(Op
);
2627 const DataLayout
&DL
= DAG
.getDataLayout();
2628 LLVMContext
&Ctx
= *DAG
.getContext();
2630 // If the load aligning is disabled or the load can be broken up into two
2631 // smaller legal loads, do the default (target-independent) expansion.
2632 bool DoDefault
= false;
2633 // Handle it in the default way if this is an indexed load.
2634 if (!LN
->isUnindexed())
2638 if (allowsMemoryAccess(Ctx
, DL
, LN
->getMemoryVT(), *LN
->getMemOperand()))
2642 if (!DoDefault
&& (2 * HaveAlign
) == NeedAlign
) {
2643 // The PartTy is the equivalent of "getLoadableTypeOfSize(HaveAlign)".
2644 MVT PartTy
= HaveAlign
<= 8 ? MVT::getIntegerVT(8 * HaveAlign
)
2645 : MVT::getVectorVT(MVT::i8
, HaveAlign
);
2646 DoDefault
= allowsMemoryAccess(Ctx
, DL
, PartTy
, *LN
->getMemOperand());
2649 std::pair
<SDValue
, SDValue
> P
= expandUnalignedLoad(LN
, DAG
);
2650 return DAG
.getMergeValues({P
.first
, P
.second
}, dl
);
2653 // The code below generates two loads, both aligned as NeedAlign, and
2654 // with the distance of NeedAlign between them. For that to cover the
2655 // bits that need to be loaded (and without overlapping), the size of
2656 // the loads should be equal to NeedAlign. This is true for all loadable
2657 // types, but add an assertion in case something changes in the future.
2658 assert(LoadTy
.getSizeInBits() == 8*NeedAlign
);
2660 unsigned LoadLen
= NeedAlign
;
2661 SDValue Base
= LN
->getBasePtr();
2662 SDValue Chain
= LN
->getChain();
2663 auto BO
= getBaseAndOffset(Base
);
2664 unsigned BaseOpc
= BO
.first
.getOpcode();
2665 if (BaseOpc
== HexagonISD::VALIGNADDR
&& BO
.second
% LoadLen
== 0)
2668 if (BO
.second
% LoadLen
!= 0) {
2669 BO
.first
= DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, BO
.first
,
2670 DAG
.getConstant(BO
.second
% LoadLen
, dl
, MVT::i32
));
2671 BO
.second
-= BO
.second
% LoadLen
;
2673 SDValue BaseNoOff
= (BaseOpc
!= HexagonISD::VALIGNADDR
)
2674 ? DAG
.getNode(HexagonISD::VALIGNADDR
, dl
, MVT::i32
, BO
.first
,
2675 DAG
.getConstant(NeedAlign
, dl
, MVT::i32
))
2677 SDValue Base0
= DAG
.getMemBasePlusOffset(BaseNoOff
, BO
.second
, dl
);
2678 SDValue Base1
= DAG
.getMemBasePlusOffset(BaseNoOff
, BO
.second
+LoadLen
, dl
);
2680 MachineMemOperand
*WideMMO
= nullptr;
2681 if (MachineMemOperand
*MMO
= LN
->getMemOperand()) {
2682 MachineFunction
&MF
= DAG
.getMachineFunction();
2683 WideMMO
= MF
.getMachineMemOperand(MMO
->getPointerInfo(), MMO
->getFlags(),
2684 2*LoadLen
, LoadLen
, MMO
->getAAInfo(), MMO
->getRanges(),
2685 MMO
->getSyncScopeID(), MMO
->getOrdering(),
2686 MMO
->getFailureOrdering());
2689 SDValue Load0
= DAG
.getLoad(LoadTy
, dl
, Chain
, Base0
, WideMMO
);
2690 SDValue Load1
= DAG
.getLoad(LoadTy
, dl
, Chain
, Base1
, WideMMO
);
2692 SDValue Aligned
= DAG
.getNode(HexagonISD::VALIGN
, dl
, LoadTy
,
2693 {Load1
, Load0
, BaseNoOff
.getOperand(0)});
2694 SDValue NewChain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
2695 Load0
.getValue(1), Load1
.getValue(1));
2696 SDValue M
= DAG
.getMergeValues({Aligned
, NewChain
}, dl
);
2701 HexagonTargetLowering::LowerUAddSubO(SDValue Op
, SelectionDAG
&DAG
) const {
2702 SDValue X
= Op
.getOperand(0), Y
= Op
.getOperand(1);
2703 auto *CY
= dyn_cast
<ConstantSDNode
>(Y
);
2707 const SDLoc
&dl(Op
);
2708 SDVTList VTs
= Op
.getNode()->getVTList();
2709 assert(VTs
.NumVTs
== 2);
2710 assert(VTs
.VTs
[1] == MVT::i1
);
2711 unsigned Opc
= Op
.getOpcode();
2714 uint32_t VY
= CY
->getZExtValue();
2715 assert(VY
!= 0 && "This should have been folded");
2720 if (Opc
== ISD::UADDO
) {
2721 SDValue Op
= DAG
.getNode(ISD::ADD
, dl
, VTs
.VTs
[0], {X
, Y
});
2722 SDValue Ov
= DAG
.getSetCC(dl
, MVT::i1
, Op
, getZero(dl
, ty(Op
), DAG
),
2724 return DAG
.getMergeValues({Op
, Ov
}, dl
);
2726 if (Opc
== ISD::USUBO
) {
2727 SDValue Op
= DAG
.getNode(ISD::SUB
, dl
, VTs
.VTs
[0], {X
, Y
});
2728 SDValue Ov
= DAG
.getSetCC(dl
, MVT::i1
, Op
,
2729 DAG
.getConstant(-1, dl
, ty(Op
)), ISD::SETEQ
);
2730 return DAG
.getMergeValues({Op
, Ov
}, dl
);
2738 HexagonTargetLowering::LowerAddSubCarry(SDValue Op
, SelectionDAG
&DAG
) const {
2739 const SDLoc
&dl(Op
);
2740 unsigned Opc
= Op
.getOpcode();
2741 SDValue X
= Op
.getOperand(0), Y
= Op
.getOperand(1), C
= Op
.getOperand(2);
2743 if (Opc
== ISD::ADDCARRY
)
2744 return DAG
.getNode(HexagonISD::ADDC
, dl
, Op
.getNode()->getVTList(),
2747 EVT CarryTy
= C
.getValueType();
2748 SDValue SubC
= DAG
.getNode(HexagonISD::SUBC
, dl
, Op
.getNode()->getVTList(),
2749 { X
, Y
, DAG
.getLogicalNOT(dl
, C
, CarryTy
) });
2750 SDValue Out
[] = { SubC
.getValue(0),
2751 DAG
.getLogicalNOT(dl
, SubC
.getValue(1), CarryTy
) };
2752 return DAG
.getMergeValues(Out
, dl
);
2756 HexagonTargetLowering::LowerEH_RETURN(SDValue Op
, SelectionDAG
&DAG
) const {
2757 SDValue Chain
= Op
.getOperand(0);
2758 SDValue Offset
= Op
.getOperand(1);
2759 SDValue Handler
= Op
.getOperand(2);
2761 auto PtrVT
= getPointerTy(DAG
.getDataLayout());
2763 // Mark function as containing a call to EH_RETURN.
2764 HexagonMachineFunctionInfo
*FuncInfo
=
2765 DAG
.getMachineFunction().getInfo
<HexagonMachineFunctionInfo
>();
2766 FuncInfo
->setHasEHReturn();
2768 unsigned OffsetReg
= Hexagon::R28
;
2771 DAG
.getNode(ISD::ADD
, dl
, PtrVT
, DAG
.getRegister(Hexagon::R30
, PtrVT
),
2772 DAG
.getIntPtrConstant(4, dl
));
2773 Chain
= DAG
.getStore(Chain
, dl
, Handler
, StoreAddr
, MachinePointerInfo());
2774 Chain
= DAG
.getCopyToReg(Chain
, dl
, OffsetReg
, Offset
);
2776 // Not needed we already use it as explict input to EH_RETURN.
2777 // MF.getRegInfo().addLiveOut(OffsetReg);
2779 return DAG
.getNode(HexagonISD::EH_RETURN
, dl
, MVT::Other
, Chain
);
2783 HexagonTargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const {
2784 unsigned Opc
= Op
.getOpcode();
2786 // Handle INLINEASM first.
2787 if (Opc
== ISD::INLINEASM
|| Opc
== ISD::INLINEASM_BR
)
2788 return LowerINLINEASM(Op
, DAG
);
2790 if (isHvxOperation(Op
)) {
2791 // If HVX lowering returns nothing, try the default lowering.
2792 if (SDValue V
= LowerHvxOperation(Op
, DAG
))
2799 Op
.getNode()->dumpr(&DAG
);
2800 if (Opc
> HexagonISD::OP_BEGIN
&& Opc
< HexagonISD::OP_END
)
2801 errs() << "Error: check for a non-legal type in this operation\n";
2803 llvm_unreachable("Should not custom lower this!");
2804 case ISD::CONCAT_VECTORS
: return LowerCONCAT_VECTORS(Op
, DAG
);
2805 case ISD::INSERT_SUBVECTOR
: return LowerINSERT_SUBVECTOR(Op
, DAG
);
2806 case ISD::INSERT_VECTOR_ELT
: return LowerINSERT_VECTOR_ELT(Op
, DAG
);
2807 case ISD::EXTRACT_SUBVECTOR
: return LowerEXTRACT_SUBVECTOR(Op
, DAG
);
2808 case ISD::EXTRACT_VECTOR_ELT
: return LowerEXTRACT_VECTOR_ELT(Op
, DAG
);
2809 case ISD::BUILD_VECTOR
: return LowerBUILD_VECTOR(Op
, DAG
);
2810 case ISD::VECTOR_SHUFFLE
: return LowerVECTOR_SHUFFLE(Op
, DAG
);
2811 case ISD::BITCAST
: return LowerBITCAST(Op
, DAG
);
2812 case ISD::LOAD
: return LowerLoad(Op
, DAG
);
2813 case ISD::STORE
: return LowerStore(Op
, DAG
);
2815 case ISD::USUBO
: return LowerUAddSubO(Op
, DAG
);
2817 case ISD::SUBCARRY
: return LowerAddSubCarry(Op
, DAG
);
2820 case ISD::SRL
: return LowerVECTOR_SHIFT(Op
, DAG
);
2821 case ISD::ROTL
: return LowerROTL(Op
, DAG
);
2822 case ISD::ConstantPool
: return LowerConstantPool(Op
, DAG
);
2823 case ISD::JumpTable
: return LowerJumpTable(Op
, DAG
);
2824 case ISD::EH_RETURN
: return LowerEH_RETURN(Op
, DAG
);
2825 case ISD::RETURNADDR
: return LowerRETURNADDR(Op
, DAG
);
2826 case ISD::FRAMEADDR
: return LowerFRAMEADDR(Op
, DAG
);
2827 case ISD::GlobalTLSAddress
: return LowerGlobalTLSAddress(Op
, DAG
);
2828 case ISD::ATOMIC_FENCE
: return LowerATOMIC_FENCE(Op
, DAG
);
2829 case ISD::GlobalAddress
: return LowerGLOBALADDRESS(Op
, DAG
);
2830 case ISD::BlockAddress
: return LowerBlockAddress(Op
, DAG
);
2831 case ISD::GLOBAL_OFFSET_TABLE
: return LowerGLOBAL_OFFSET_TABLE(Op
, DAG
);
2832 case ISD::VASTART
: return LowerVASTART(Op
, DAG
);
2833 case ISD::DYNAMIC_STACKALLOC
: return LowerDYNAMIC_STACKALLOC(Op
, DAG
);
2834 case ISD::SETCC
: return LowerSETCC(Op
, DAG
);
2835 case ISD::VSELECT
: return LowerVSELECT(Op
, DAG
);
2836 case ISD::INTRINSIC_WO_CHAIN
: return LowerINTRINSIC_WO_CHAIN(Op
, DAG
);
2837 case ISD::INTRINSIC_VOID
: return LowerINTRINSIC_VOID(Op
, DAG
);
2838 case ISD::PREFETCH
: return LowerPREFETCH(Op
, DAG
);
2839 case ISD::READCYCLECOUNTER
: return LowerREADCYCLECOUNTER(Op
, DAG
);
2847 HexagonTargetLowering::LowerOperationWrapper(SDNode
*N
,
2848 SmallVectorImpl
<SDValue
> &Results
,
2849 SelectionDAG
&DAG
) const {
2850 // We are only custom-lowering stores to verify the alignment of the
2851 // address if it is a compile-time constant. Since a store can be modified
2852 // during type-legalization (the value being stored may need legalization),
2853 // return empty Results here to indicate that we don't really make any
2854 // changes in the custom lowering.
2855 if (N
->getOpcode() != ISD::STORE
)
2856 return TargetLowering::LowerOperationWrapper(N
, Results
, DAG
);
2860 HexagonTargetLowering::ReplaceNodeResults(SDNode
*N
,
2861 SmallVectorImpl
<SDValue
> &Results
,
2862 SelectionDAG
&DAG
) const {
2864 switch (N
->getOpcode()) {
2870 // Handle a bitcast from v8i1 to i8.
2871 if (N
->getValueType(0) == MVT::i8
) {
2872 SDValue P
= getInstr(Hexagon::C2_tfrpr
, dl
, MVT::i32
,
2873 N
->getOperand(0), DAG
);
2874 Results
.push_back(P
);
2880 /// Returns relocation base for the given PIC jumptable.
2882 HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table
,
2883 SelectionDAG
&DAG
) const {
2884 int Idx
= cast
<JumpTableSDNode
>(Table
)->getIndex();
2885 EVT VT
= Table
.getValueType();
2886 SDValue T
= DAG
.getTargetJumpTable(Idx
, VT
, HexagonII::MO_PCREL
);
2887 return DAG
.getNode(HexagonISD::AT_PCREL
, SDLoc(Table
), VT
, T
);
2890 //===----------------------------------------------------------------------===//
2891 // Inline Assembly Support
2892 //===----------------------------------------------------------------------===//
2894 TargetLowering::ConstraintType
2895 HexagonTargetLowering::getConstraintType(StringRef Constraint
) const {
2896 if (Constraint
.size() == 1) {
2897 switch (Constraint
[0]) {
2900 if (Subtarget
.useHVXOps())
2901 return C_RegisterClass
;
2904 return C_RegisterClass
;
2909 return TargetLowering::getConstraintType(Constraint
);
2912 std::pair
<unsigned, const TargetRegisterClass
*>
2913 HexagonTargetLowering::getRegForInlineAsmConstraint(
2914 const TargetRegisterInfo
*TRI
, StringRef Constraint
, MVT VT
) const {
2916 if (Constraint
.size() == 1) {
2917 switch (Constraint
[0]) {
2919 switch (VT
.SimpleTy
) {
2921 return {0u, nullptr};
2927 return {0u, &Hexagon::IntRegsRegClass
};
2930 return {0u, &Hexagon::DoubleRegsRegClass
};
2935 return {0u, nullptr};
2936 return {0u, &Hexagon::ModRegsRegClass
};
2938 switch (VT
.getSizeInBits()) {
2940 return {0u, nullptr};
2943 return {0u, &Hexagon::HvxQRRegClass
};
2947 switch (VT
.getSizeInBits()) {
2949 return {0u, nullptr};
2951 return {0u, &Hexagon::HvxVRRegClass
};
2953 if (Subtarget
.hasV60Ops() && Subtarget
.useHVX128BOps())
2954 return {0u, &Hexagon::HvxVRRegClass
};
2955 return {0u, &Hexagon::HvxWRRegClass
};
2957 return {0u, &Hexagon::HvxWRRegClass
};
2961 return {0u, nullptr};
2965 return TargetLowering::getRegForInlineAsmConstraint(TRI
, Constraint
, VT
);
2968 /// isFPImmLegal - Returns true if the target can instruction select the
2969 /// specified FP immediate natively. If false, the legalizer will
2970 /// materialize the FP immediate as a load from a constant pool.
2971 bool HexagonTargetLowering::isFPImmLegal(const APFloat
&Imm
, EVT VT
,
2972 bool ForCodeSize
) const {
2976 /// isLegalAddressingMode - Return true if the addressing mode represented by
2977 /// AM is legal for this target, for a load/store of the specified type.
2978 bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout
&DL
,
2979 const AddrMode
&AM
, Type
*Ty
,
2980 unsigned AS
, Instruction
*I
) const {
2981 if (Ty
->isSized()) {
2982 // When LSR detects uses of the same base address to access different
2983 // types (e.g. unions), it will assume a conservative type for these
2985 // LSR Use: Kind=Address of void in addrspace(4294967295), ...
2986 // The type Ty passed here would then be "void". Skip the alignment
2987 // checks, but do not return false right away, since that confuses
2988 // LSR into crashing.
2989 unsigned A
= DL
.getABITypeAlignment(Ty
);
2990 // The base offset must be a multiple of the alignment.
2991 if ((AM
.BaseOffs
% A
) != 0)
2993 // The shifted offset must fit in 11 bits.
2994 if (!isInt
<11>(AM
.BaseOffs
>> Log2_32(A
)))
2998 // No global is ever allowed as a base.
3002 int Scale
= AM
.Scale
;
3006 case 0: // No scale reg, "r+i", "r", or just "i".
3008 default: // No scaled addressing mode.
3014 /// Return true if folding a constant offset with the given GlobalAddress is
3015 /// legal. It is frequently not legal in PIC relocation models.
3016 bool HexagonTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode
*GA
)
3018 return HTM
.getRelocationModel() == Reloc::Static
;
3021 /// isLegalICmpImmediate - Return true if the specified immediate is legal
3022 /// icmp immediate, that is the target has icmp instructions which can compare
3023 /// a register against the immediate without having to materialize the
3024 /// immediate into a register.
3025 bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm
) const {
3026 return Imm
>= -512 && Imm
<= 511;
3029 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
3030 /// for tail call optimization. Targets which want to do tail call
3031 /// optimization should implement this function.
3032 bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
3034 CallingConv::ID CalleeCC
,
3036 bool IsCalleeStructRet
,
3037 bool IsCallerStructRet
,
3038 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
3039 const SmallVectorImpl
<SDValue
> &OutVals
,
3040 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
3041 SelectionDAG
& DAG
) const {
3042 const Function
&CallerF
= DAG
.getMachineFunction().getFunction();
3043 CallingConv::ID CallerCC
= CallerF
.getCallingConv();
3044 bool CCMatch
= CallerCC
== CalleeCC
;
3046 // ***************************************************************************
3047 // Look for obvious safe cases to perform tail call optimization that do not
3048 // require ABI changes.
3049 // ***************************************************************************
3051 // If this is a tail call via a function pointer, then don't do it!
3052 if (!isa
<GlobalAddressSDNode
>(Callee
) &&
3053 !isa
<ExternalSymbolSDNode
>(Callee
)) {
3057 // Do not optimize if the calling conventions do not match and the conventions
3058 // used are not C or Fast.
3060 bool R
= (CallerCC
== CallingConv::C
|| CallerCC
== CallingConv::Fast
);
3061 bool E
= (CalleeCC
== CallingConv::C
|| CalleeCC
== CallingConv::Fast
);
3062 // If R & E, then ok.
3067 // Do not tail call optimize vararg calls.
3071 // Also avoid tail call optimization if either caller or callee uses struct
3072 // return semantics.
3073 if (IsCalleeStructRet
|| IsCallerStructRet
)
3076 // In addition to the cases above, we also disable Tail Call Optimization if
3077 // the calling convention code that at least one outgoing argument needs to
3078 // go on the stack. We cannot check that here because at this point that
3079 // information is not available.
3083 /// Returns the target specific optimal type for load and store operations as
3084 /// a result of memset, memcpy, and memmove lowering.
3086 /// If DstAlign is zero that means it's safe to destination alignment can
3087 /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
3088 /// a need to check it against alignment requirement, probably because the
3089 /// source does not need to be loaded. If 'IsMemset' is true, that means it's
3090 /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of
3091 /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
3092 /// does not need to be loaded. It returns EVT::Other if the type should be
3093 /// determined using generic target-independent logic.
3094 EVT
HexagonTargetLowering::getOptimalMemOpType(uint64_t Size
,
3095 unsigned DstAlign
, unsigned SrcAlign
, bool IsMemset
, bool ZeroMemset
,
3096 bool MemcpyStrSrc
, const AttributeList
&FuncAttributes
) const {
3098 auto Aligned
= [](unsigned GivenA
, unsigned MinA
) -> bool {
3099 return (GivenA
% MinA
) == 0;
3102 if (Size
>= 8 && Aligned(DstAlign
, 8) && (IsMemset
|| Aligned(SrcAlign
, 8)))
3104 if (Size
>= 4 && Aligned(DstAlign
, 4) && (IsMemset
|| Aligned(SrcAlign
, 4)))
3106 if (Size
>= 2 && Aligned(DstAlign
, 2) && (IsMemset
|| Aligned(SrcAlign
, 2)))
3112 bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(
3113 EVT VT
, unsigned AS
, unsigned Align
, MachineMemOperand::Flags Flags
,
3117 return Subtarget
.isHVXVectorType(VT
.getSimpleVT());
3120 std::pair
<const TargetRegisterClass
*, uint8_t>
3121 HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo
*TRI
,
3123 if (Subtarget
.isHVXVectorType(VT
, true)) {
3124 unsigned BitWidth
= VT
.getSizeInBits();
3125 unsigned VecWidth
= Subtarget
.getVectorLength() * 8;
3127 if (VT
.getVectorElementType() == MVT::i1
)
3128 return std::make_pair(&Hexagon::HvxQRRegClass
, 1);
3129 if (BitWidth
== VecWidth
)
3130 return std::make_pair(&Hexagon::HvxVRRegClass
, 1);
3131 assert(BitWidth
== 2 * VecWidth
);
3132 return std::make_pair(&Hexagon::HvxWRRegClass
, 1);
3135 return TargetLowering::findRepresentativeClass(TRI
, VT
);
3138 bool HexagonTargetLowering::shouldReduceLoadWidth(SDNode
*Load
,
3139 ISD::LoadExtType ExtTy
, EVT NewVT
) const {
3140 // TODO: This may be worth removing. Check regression tests for diffs.
3141 if (!TargetLoweringBase::shouldReduceLoadWidth(Load
, ExtTy
, NewVT
))
3144 auto *L
= cast
<LoadSDNode
>(Load
);
3145 std::pair
<SDValue
,int> BO
= getBaseAndOffset(L
->getBasePtr());
3146 // Small-data object, do not shrink.
3147 if (BO
.first
.getOpcode() == HexagonISD::CONST32_GP
)
3149 if (GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(BO
.first
)) {
3150 auto &HTM
= static_cast<const HexagonTargetMachine
&>(getTargetMachine());
3151 const auto *GO
= dyn_cast_or_null
<const GlobalObject
>(GA
->getGlobal());
3152 return !GO
|| !HTM
.getObjFileLowering()->isGlobalInSmallSection(GO
, HTM
);
3157 Value
*HexagonTargetLowering::emitLoadLinked(IRBuilder
<> &Builder
, Value
*Addr
,
3158 AtomicOrdering Ord
) const {
3159 BasicBlock
*BB
= Builder
.GetInsertBlock();
3160 Module
*M
= BB
->getParent()->getParent();
3161 auto PT
= cast
<PointerType
>(Addr
->getType());
3162 Type
*Ty
= PT
->getElementType();
3163 unsigned SZ
= Ty
->getPrimitiveSizeInBits();
3164 assert((SZ
== 32 || SZ
== 64) && "Only 32/64-bit atomic loads supported");
3165 Intrinsic::ID IntID
= (SZ
== 32) ? Intrinsic::hexagon_L2_loadw_locked
3166 : Intrinsic::hexagon_L4_loadd_locked
;
3167 Function
*Fn
= Intrinsic::getDeclaration(M
, IntID
);
3169 PointerType
*NewPtrTy
3170 = Builder
.getIntNTy(SZ
)->getPointerTo(PT
->getAddressSpace());
3171 Addr
= Builder
.CreateBitCast(Addr
, NewPtrTy
);
3173 Value
*Call
= Builder
.CreateCall(Fn
, Addr
, "larx");
3175 return Builder
.CreateBitCast(Call
, Ty
);
3178 /// Perform a store-conditional operation to Addr. Return the status of the
3179 /// store. This should be 0 if the store succeeded, non-zero otherwise.
3180 Value
*HexagonTargetLowering::emitStoreConditional(IRBuilder
<> &Builder
,
3181 Value
*Val
, Value
*Addr
, AtomicOrdering Ord
) const {
3182 BasicBlock
*BB
= Builder
.GetInsertBlock();
3183 Module
*M
= BB
->getParent()->getParent();
3184 Type
*Ty
= Val
->getType();
3185 unsigned SZ
= Ty
->getPrimitiveSizeInBits();
3187 Type
*CastTy
= Builder
.getIntNTy(SZ
);
3188 assert((SZ
== 32 || SZ
== 64) && "Only 32/64-bit atomic stores supported");
3189 Intrinsic::ID IntID
= (SZ
== 32) ? Intrinsic::hexagon_S2_storew_locked
3190 : Intrinsic::hexagon_S4_stored_locked
;
3191 Function
*Fn
= Intrinsic::getDeclaration(M
, IntID
);
3193 unsigned AS
= Addr
->getType()->getPointerAddressSpace();
3194 Addr
= Builder
.CreateBitCast(Addr
, CastTy
->getPointerTo(AS
));
3195 Val
= Builder
.CreateBitCast(Val
, CastTy
);
3197 Value
*Call
= Builder
.CreateCall(Fn
, {Addr
, Val
}, "stcx");
3198 Value
*Cmp
= Builder
.CreateICmpEQ(Call
, Builder
.getInt32(0), "");
3199 Value
*Ext
= Builder
.CreateZExt(Cmp
, Type::getInt32Ty(M
->getContext()));
3203 TargetLowering::AtomicExpansionKind
3204 HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst
*LI
) const {
3205 // Do not expand loads and stores that don't exceed 64 bits.
3206 return LI
->getType()->getPrimitiveSizeInBits() > 64
3207 ? AtomicExpansionKind::LLOnly
3208 : AtomicExpansionKind::None
;
3211 bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst
*SI
) const {
3212 // Do not expand loads and stores that don't exceed 64 bits.
3213 return SI
->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64;
3216 TargetLowering::AtomicExpansionKind
3217 HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
3218 AtomicCmpXchgInst
*AI
) const {
3219 const DataLayout
&DL
= AI
->getModule()->getDataLayout();
3220 unsigned Size
= DL
.getTypeStoreSize(AI
->getCompareOperand()->getType());
3221 if (Size
>= 4 && Size
<= 8)
3222 return AtomicExpansionKind::LLSC
;
3223 return AtomicExpansionKind::None
;