1 //===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the interfaces that Hexagon uses to lower LLVM code
10 // into a selection DAG.
12 //===----------------------------------------------------------------------===//
14 #include "HexagonISelLowering.h"
16 #include "HexagonMachineFunctionInfo.h"
17 #include "HexagonRegisterInfo.h"
18 #include "HexagonSubtarget.h"
19 #include "HexagonTargetMachine.h"
20 #include "HexagonTargetObjectFile.h"
21 #include "llvm/ADT/APInt.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringSwitch.h"
25 #include "llvm/CodeGen/CallingConvLower.h"
26 #include "llvm/CodeGen/MachineFrameInfo.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineMemOperand.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/RuntimeLibcalls.h"
31 #include "llvm/CodeGen/SelectionDAG.h"
32 #include "llvm/CodeGen/TargetCallingConv.h"
33 #include "llvm/CodeGen/ValueTypes.h"
34 #include "llvm/IR/BasicBlock.h"
35 #include "llvm/IR/CallingConv.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/IR/GlobalValue.h"
40 #include "llvm/IR/InlineAsm.h"
41 #include "llvm/IR/Instructions.h"
42 #include "llvm/IR/Intrinsics.h"
43 #include "llvm/IR/IntrinsicInst.h"
44 #include "llvm/IR/Module.h"
45 #include "llvm/IR/Type.h"
46 #include "llvm/IR/Value.h"
47 #include "llvm/MC/MCRegisterInfo.h"
48 #include "llvm/Support/Casting.h"
49 #include "llvm/Support/CodeGen.h"
50 #include "llvm/Support/CommandLine.h"
51 #include "llvm/Support/Debug.h"
52 #include "llvm/Support/ErrorHandling.h"
53 #include "llvm/Support/MathExtras.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include "llvm/Target/TargetMachine.h"
65 #define DEBUG_TYPE "hexagon-lowering"
67 static cl::opt
<bool> EmitJumpTables("hexagon-emit-jump-tables",
68 cl::init(true), cl::Hidden
,
69 cl::desc("Control jump table emission on Hexagon target"));
71 static cl::opt
<bool> EnableHexSDNodeSched("enable-hexagon-sdnode-sched",
72 cl::Hidden
, cl::ZeroOrMore
, cl::init(false),
73 cl::desc("Enable Hexagon SDNode scheduling"));
75 static cl::opt
<bool> EnableFastMath("ffast-math",
76 cl::Hidden
, cl::ZeroOrMore
, cl::init(false),
77 cl::desc("Enable Fast Math processing"));
79 static cl::opt
<int> MinimumJumpTables("minimum-jump-tables",
80 cl::Hidden
, cl::ZeroOrMore
, cl::init(5),
81 cl::desc("Set minimum jump tables"));
83 static cl::opt
<int> MaxStoresPerMemcpyCL("max-store-memcpy",
84 cl::Hidden
, cl::ZeroOrMore
, cl::init(6),
85 cl::desc("Max #stores to inline memcpy"));
87 static cl::opt
<int> MaxStoresPerMemcpyOptSizeCL("max-store-memcpy-Os",
88 cl::Hidden
, cl::ZeroOrMore
, cl::init(4),
89 cl::desc("Max #stores to inline memcpy"));
91 static cl::opt
<int> MaxStoresPerMemmoveCL("max-store-memmove",
92 cl::Hidden
, cl::ZeroOrMore
, cl::init(6),
93 cl::desc("Max #stores to inline memmove"));
95 static cl::opt
<int> MaxStoresPerMemmoveOptSizeCL("max-store-memmove-Os",
96 cl::Hidden
, cl::ZeroOrMore
, cl::init(4),
97 cl::desc("Max #stores to inline memmove"));
99 static cl::opt
<int> MaxStoresPerMemsetCL("max-store-memset",
100 cl::Hidden
, cl::ZeroOrMore
, cl::init(8),
101 cl::desc("Max #stores to inline memset"));
103 static cl::opt
<int> MaxStoresPerMemsetOptSizeCL("max-store-memset-Os",
104 cl::Hidden
, cl::ZeroOrMore
, cl::init(4),
105 cl::desc("Max #stores to inline memset"));
107 static cl::opt
<bool> AlignLoads("hexagon-align-loads",
108 cl::Hidden
, cl::init(false),
109 cl::desc("Rewrite unaligned loads as a pair of aligned loads"));
114 class HexagonCCState
: public CCState
{
115 unsigned NumNamedVarArgParams
= 0;
118 HexagonCCState(CallingConv::ID CC
, bool IsVarArg
, MachineFunction
&MF
,
119 SmallVectorImpl
<CCValAssign
> &locs
, LLVMContext
&C
,
120 unsigned NumNamedArgs
)
121 : CCState(CC
, IsVarArg
, MF
, locs
, C
),
122 NumNamedVarArgParams(NumNamedArgs
) {}
123 unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams
; }
126 } // end anonymous namespace
129 // Implement calling convention for Hexagon.
131 static bool CC_SkipOdd(unsigned &ValNo
, MVT
&ValVT
, MVT
&LocVT
,
132 CCValAssign::LocInfo
&LocInfo
,
133 ISD::ArgFlagsTy
&ArgFlags
, CCState
&State
) {
134 static const MCPhysReg ArgRegs
[] = {
135 Hexagon::R0
, Hexagon::R1
, Hexagon::R2
,
136 Hexagon::R3
, Hexagon::R4
, Hexagon::R5
138 const unsigned NumArgRegs
= array_lengthof(ArgRegs
);
139 unsigned RegNum
= State
.getFirstUnallocated(ArgRegs
);
141 // RegNum is an index into ArgRegs: skip a register if RegNum is odd.
142 if (RegNum
!= NumArgRegs
&& RegNum
% 2 == 1)
143 State
.AllocateReg(ArgRegs
[RegNum
]);
145 // Always return false here, as this function only makes sure that the first
146 // unallocated register has an even register number and does not actually
147 // allocate a register for the current argument.
151 #include "HexagonGenCallingConv.inc"
155 HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op
, SelectionDAG
&DAG
)
160 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
161 /// by "Src" to address "Dst" of size "Size". Alignment information is
162 /// specified by the specific parameter attribute. The copy will be passed as
163 /// a byval function parameter. Sometimes what we are copying is the end of a
164 /// larger object, the part that does not fit in registers.
165 static SDValue
CreateCopyOfByValArgument(SDValue Src
, SDValue Dst
,
166 SDValue Chain
, ISD::ArgFlagsTy Flags
,
167 SelectionDAG
&DAG
, const SDLoc
&dl
) {
168 SDValue SizeNode
= DAG
.getConstant(Flags
.getByValSize(), dl
, MVT::i32
);
169 return DAG
.getMemcpy(Chain
, dl
, Dst
, Src
, SizeNode
, Flags
.getByValAlign(),
170 /*isVolatile=*/false, /*AlwaysInline=*/false,
171 /*isTailCall=*/false,
172 MachinePointerInfo(), MachinePointerInfo());
176 HexagonTargetLowering::CanLowerReturn(
177 CallingConv::ID CallConv
, MachineFunction
&MF
, bool IsVarArg
,
178 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
179 LLVMContext
&Context
) const {
180 SmallVector
<CCValAssign
, 16> RVLocs
;
181 CCState
CCInfo(CallConv
, IsVarArg
, MF
, RVLocs
, Context
);
183 if (MF
.getSubtarget
<HexagonSubtarget
>().useHVXOps())
184 return CCInfo
.CheckReturn(Outs
, RetCC_Hexagon_HVX
);
185 return CCInfo
.CheckReturn(Outs
, RetCC_Hexagon
);
188 // LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
189 // passed by value, the function prototype is modified to return void and
190 // the value is stored in memory pointed by a pointer passed by caller.
192 HexagonTargetLowering::LowerReturn(SDValue Chain
, CallingConv::ID CallConv
,
194 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
195 const SmallVectorImpl
<SDValue
> &OutVals
,
196 const SDLoc
&dl
, SelectionDAG
&DAG
) const {
197 // CCValAssign - represent the assignment of the return value to locations.
198 SmallVector
<CCValAssign
, 16> RVLocs
;
200 // CCState - Info about the registers and stack slot.
201 CCState
CCInfo(CallConv
, IsVarArg
, DAG
.getMachineFunction(), RVLocs
,
204 // Analyze return values of ISD::RET
205 if (Subtarget
.useHVXOps())
206 CCInfo
.AnalyzeReturn(Outs
, RetCC_Hexagon_HVX
);
208 CCInfo
.AnalyzeReturn(Outs
, RetCC_Hexagon
);
211 SmallVector
<SDValue
, 4> RetOps(1, Chain
);
213 // Copy the result values into the output registers.
214 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
215 CCValAssign
&VA
= RVLocs
[i
];
217 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), OutVals
[i
], Flag
);
219 // Guarantee that all emitted copies are stuck together with flags.
220 Flag
= Chain
.getValue(1);
221 RetOps
.push_back(DAG
.getRegister(VA
.getLocReg(), VA
.getLocVT()));
224 RetOps
[0] = Chain
; // Update chain.
226 // Add the flag if we have it.
228 RetOps
.push_back(Flag
);
230 return DAG
.getNode(HexagonISD::RET_FLAG
, dl
, MVT::Other
, RetOps
);
233 bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst
*CI
) const {
234 // If either no tail call or told not to tail call at all, don't.
236 CI
->getParent()->getParent()->getFnAttribute("disable-tail-calls");
237 if (!CI
->isTailCall() || Attr
.getValueAsString() == "true")
243 Register
HexagonTargetLowering::getRegisterByName(const char* RegName
, EVT VT
,
244 const MachineFunction
&) const {
245 // Just support r19, the linux kernel uses it.
246 Register Reg
= StringSwitch
<Register
>(RegName
)
247 .Case("r19", Hexagon::R19
)
248 .Default(Register());
252 report_fatal_error("Invalid register name global variable");
255 /// LowerCallResult - Lower the result values of an ISD::CALL into the
256 /// appropriate copies out of appropriate physical registers. This assumes that
257 /// Chain/Glue are the input chain/glue to use, and that TheCall is the call
258 /// being lowered. Returns a SDNode with the same number of values as the
260 SDValue
HexagonTargetLowering::LowerCallResult(
261 SDValue Chain
, SDValue Glue
, CallingConv::ID CallConv
, bool IsVarArg
,
262 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
263 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
,
264 const SmallVectorImpl
<SDValue
> &OutVals
, SDValue Callee
) const {
265 // Assign locations to each value returned by this call.
266 SmallVector
<CCValAssign
, 16> RVLocs
;
268 CCState
CCInfo(CallConv
, IsVarArg
, DAG
.getMachineFunction(), RVLocs
,
271 if (Subtarget
.useHVXOps())
272 CCInfo
.AnalyzeCallResult(Ins
, RetCC_Hexagon_HVX
);
274 CCInfo
.AnalyzeCallResult(Ins
, RetCC_Hexagon
);
276 // Copy all of the result registers out of their specified physreg.
277 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
279 if (RVLocs
[i
].getValVT() == MVT::i1
) {
280 // Return values of type MVT::i1 require special handling. The reason
281 // is that MVT::i1 is associated with the PredRegs register class, but
282 // values of that type are still returned in R0. Generate an explicit
283 // copy into a predicate register from R0, and treat the value of the
284 // predicate register as the call result.
285 auto &MRI
= DAG
.getMachineFunction().getRegInfo();
286 SDValue FR0
= DAG
.getCopyFromReg(Chain
, dl
, RVLocs
[i
].getLocReg(),
288 // FR0 = (Value, Chain, Glue)
289 Register PredR
= MRI
.createVirtualRegister(&Hexagon::PredRegsRegClass
);
290 SDValue TPR
= DAG
.getCopyToReg(FR0
.getValue(1), dl
, PredR
,
291 FR0
.getValue(0), FR0
.getValue(2));
292 // TPR = (Chain, Glue)
293 // Don't glue this CopyFromReg, because it copies from a virtual
294 // register. If it is glued to the call, InstrEmitter will add it
295 // as an implicit def to the call (EmitMachineNode).
296 RetVal
= DAG
.getCopyFromReg(TPR
.getValue(0), dl
, PredR
, MVT::i1
);
297 Glue
= TPR
.getValue(1);
298 Chain
= TPR
.getValue(0);
300 RetVal
= DAG
.getCopyFromReg(Chain
, dl
, RVLocs
[i
].getLocReg(),
301 RVLocs
[i
].getValVT(), Glue
);
302 Glue
= RetVal
.getValue(2);
303 Chain
= RetVal
.getValue(1);
305 InVals
.push_back(RetVal
.getValue(0));
311 /// LowerCall - Functions arguments are copied from virtual regs to
312 /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
314 HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo
&CLI
,
315 SmallVectorImpl
<SDValue
> &InVals
) const {
316 SelectionDAG
&DAG
= CLI
.DAG
;
318 SmallVectorImpl
<ISD::OutputArg
> &Outs
= CLI
.Outs
;
319 SmallVectorImpl
<SDValue
> &OutVals
= CLI
.OutVals
;
320 SmallVectorImpl
<ISD::InputArg
> &Ins
= CLI
.Ins
;
321 SDValue Chain
= CLI
.Chain
;
322 SDValue Callee
= CLI
.Callee
;
323 CallingConv::ID CallConv
= CLI
.CallConv
;
324 bool IsVarArg
= CLI
.IsVarArg
;
325 bool DoesNotReturn
= CLI
.DoesNotReturn
;
327 bool IsStructRet
= Outs
.empty() ? false : Outs
[0].Flags
.isSRet();
328 MachineFunction
&MF
= DAG
.getMachineFunction();
329 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
330 auto PtrVT
= getPointerTy(MF
.getDataLayout());
332 unsigned NumParams
= CLI
.CS
.getInstruction()
333 ? CLI
.CS
.getFunctionType()->getNumParams()
335 if (GlobalAddressSDNode
*GAN
= dyn_cast
<GlobalAddressSDNode
>(Callee
))
336 Callee
= DAG
.getTargetGlobalAddress(GAN
->getGlobal(), dl
, MVT::i32
);
338 // Analyze operands of the call, assigning locations to each operand.
339 SmallVector
<CCValAssign
, 16> ArgLocs
;
340 HexagonCCState
CCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext(),
343 if (Subtarget
.useHVXOps())
344 CCInfo
.AnalyzeCallOperands(Outs
, CC_Hexagon_HVX
);
346 CCInfo
.AnalyzeCallOperands(Outs
, CC_Hexagon
);
348 auto Attr
= MF
.getFunction().getFnAttribute("disable-tail-calls");
349 if (Attr
.getValueAsString() == "true")
350 CLI
.IsTailCall
= false;
352 if (CLI
.IsTailCall
) {
353 bool StructAttrFlag
= MF
.getFunction().hasStructRetAttr();
354 CLI
.IsTailCall
= IsEligibleForTailCallOptimization(Callee
, CallConv
,
355 IsVarArg
, IsStructRet
, StructAttrFlag
, Outs
,
357 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
358 CCValAssign
&VA
= ArgLocs
[i
];
360 CLI
.IsTailCall
= false;
364 LLVM_DEBUG(dbgs() << (CLI
.IsTailCall
? "Eligible for Tail Call\n"
365 : "Argument must be passed on stack. "
366 "Not eligible for Tail Call\n"));
368 // Get a count of how many bytes are to be pushed on the stack.
369 unsigned NumBytes
= CCInfo
.getNextStackOffset();
370 SmallVector
<std::pair
<unsigned, SDValue
>, 16> RegsToPass
;
371 SmallVector
<SDValue
, 8> MemOpChains
;
373 const HexagonRegisterInfo
&HRI
= *Subtarget
.getRegisterInfo();
375 DAG
.getCopyFromReg(Chain
, dl
, HRI
.getStackRegister(), PtrVT
);
377 bool NeedsArgAlign
= false;
378 unsigned LargestAlignSeen
= 0;
379 // Walk the register/memloc assignments, inserting copies/loads.
380 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
381 CCValAssign
&VA
= ArgLocs
[i
];
382 SDValue Arg
= OutVals
[i
];
383 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
384 // Record if we need > 8 byte alignment on an argument.
385 bool ArgAlign
= Subtarget
.isHVXVectorType(VA
.getValVT());
386 NeedsArgAlign
|= ArgAlign
;
388 // Promote the value if needed.
389 switch (VA
.getLocInfo()) {
391 // Loc info must be one of Full, BCvt, SExt, ZExt, or AExt.
392 llvm_unreachable("Unknown loc info!");
393 case CCValAssign::Full
:
395 case CCValAssign::BCvt
:
396 Arg
= DAG
.getBitcast(VA
.getLocVT(), Arg
);
398 case CCValAssign::SExt
:
399 Arg
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, VA
.getLocVT(), Arg
);
401 case CCValAssign::ZExt
:
402 Arg
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VA
.getLocVT(), Arg
);
404 case CCValAssign::AExt
:
405 Arg
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, VA
.getLocVT(), Arg
);
410 unsigned LocMemOffset
= VA
.getLocMemOffset();
411 SDValue MemAddr
= DAG
.getConstant(LocMemOffset
, dl
,
412 StackPtr
.getValueType());
413 MemAddr
= DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, StackPtr
, MemAddr
);
415 LargestAlignSeen
= std::max(LargestAlignSeen
,
416 VA
.getLocVT().getStoreSizeInBits() >> 3);
417 if (Flags
.isByVal()) {
418 // The argument is a struct passed by value. According to LLVM, "Arg"
420 MemOpChains
.push_back(CreateCopyOfByValArgument(Arg
, MemAddr
, Chain
,
423 MachinePointerInfo LocPI
= MachinePointerInfo::getStack(
424 DAG
.getMachineFunction(), LocMemOffset
);
425 SDValue S
= DAG
.getStore(Chain
, dl
, Arg
, MemAddr
, LocPI
);
426 MemOpChains
.push_back(S
);
431 // Arguments that can be passed on register must be kept at RegsToPass
434 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), Arg
));
437 if (NeedsArgAlign
&& Subtarget
.hasV60Ops()) {
438 LLVM_DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
439 unsigned VecAlign
= HRI
.getSpillAlignment(Hexagon::HvxVRRegClass
);
440 LargestAlignSeen
= std::max(LargestAlignSeen
, VecAlign
);
441 MFI
.ensureMaxAlignment(LargestAlignSeen
);
443 // Transform all store nodes into one single node because all store
444 // nodes are independent of each other.
445 if (!MemOpChains
.empty())
446 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOpChains
);
449 if (!CLI
.IsTailCall
) {
450 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, dl
);
451 Glue
= Chain
.getValue(1);
454 // Build a sequence of copy-to-reg nodes chained together with token
455 // chain and flag operands which copy the outgoing args into registers.
456 // The Glue is necessary since all emitted instructions must be
458 if (!CLI
.IsTailCall
) {
459 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
460 Chain
= DAG
.getCopyToReg(Chain
, dl
, RegsToPass
[i
].first
,
461 RegsToPass
[i
].second
, Glue
);
462 Glue
= Chain
.getValue(1);
465 // For tail calls lower the arguments to the 'real' stack slot.
467 // Force all the incoming stack arguments to be loaded from the stack
468 // before any new outgoing arguments are stored to the stack, because the
469 // outgoing stack slots may alias the incoming argument stack slots, and
470 // the alias isn't otherwise explicit. This is slightly more conservative
471 // than necessary, because it means that each store effectively depends
472 // on every argument instead of just those arguments it would clobber.
474 // Do not flag preceding copytoreg stuff together with the following stuff.
476 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
477 Chain
= DAG
.getCopyToReg(Chain
, dl
, RegsToPass
[i
].first
,
478 RegsToPass
[i
].second
, Glue
);
479 Glue
= Chain
.getValue(1);
484 bool LongCalls
= MF
.getSubtarget
<HexagonSubtarget
>().useLongCalls();
485 unsigned Flags
= LongCalls
? HexagonII::HMOTF_ConstExtended
: 0;
487 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
488 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
489 // node so that legalize doesn't hack it.
490 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
491 Callee
= DAG
.getTargetGlobalAddress(G
->getGlobal(), dl
, PtrVT
, 0, Flags
);
492 } else if (ExternalSymbolSDNode
*S
=
493 dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
494 Callee
= DAG
.getTargetExternalSymbol(S
->getSymbol(), PtrVT
, Flags
);
497 // Returns a chain & a flag for retval copy to use.
498 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
499 SmallVector
<SDValue
, 8> Ops
;
500 Ops
.push_back(Chain
);
501 Ops
.push_back(Callee
);
503 // Add argument registers to the end of the list so that they are
504 // known live into the call.
505 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
506 Ops
.push_back(DAG
.getRegister(RegsToPass
[i
].first
,
507 RegsToPass
[i
].second
.getValueType()));
510 const uint32_t *Mask
= HRI
.getCallPreservedMask(MF
, CallConv
);
511 assert(Mask
&& "Missing call preserved mask for calling convention");
512 Ops
.push_back(DAG
.getRegisterMask(Mask
));
517 if (CLI
.IsTailCall
) {
518 MFI
.setHasTailCall();
519 return DAG
.getNode(HexagonISD::TC_RETURN
, dl
, NodeTys
, Ops
);
522 // Set this here because we need to know this for "hasFP" in frame lowering.
523 // The target-independent code calls getFrameRegister before setting it, and
524 // getFrameRegister uses hasFP to determine whether the function has FP.
525 MFI
.setHasCalls(true);
527 unsigned OpCode
= DoesNotReturn
? HexagonISD::CALLnr
: HexagonISD::CALL
;
528 Chain
= DAG
.getNode(OpCode
, dl
, NodeTys
, Ops
);
529 Glue
= Chain
.getValue(1);
531 // Create the CALLSEQ_END node.
532 Chain
= DAG
.getCALLSEQ_END(Chain
, DAG
.getIntPtrConstant(NumBytes
, dl
, true),
533 DAG
.getIntPtrConstant(0, dl
, true), Glue
, dl
);
534 Glue
= Chain
.getValue(1);
536 // Handle result values, copying them out of physregs into vregs that we
538 return LowerCallResult(Chain
, Glue
, CallConv
, IsVarArg
, Ins
, dl
, DAG
,
539 InVals
, OutVals
, Callee
);
542 /// Returns true by value, base pointer and offset pointer and addressing
543 /// mode by reference if this node can be combined with a load / store to
544 /// form a post-indexed load / store.
545 bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode
*N
, SDNode
*Op
,
546 SDValue
&Base
, SDValue
&Offset
, ISD::MemIndexedMode
&AM
,
547 SelectionDAG
&DAG
) const {
548 LSBaseSDNode
*LSN
= dyn_cast
<LSBaseSDNode
>(N
);
551 EVT VT
= LSN
->getMemoryVT();
554 bool IsLegalType
= VT
== MVT::i8
|| VT
== MVT::i16
|| VT
== MVT::i32
||
555 VT
== MVT::i64
|| VT
== MVT::f32
|| VT
== MVT::f64
||
556 VT
== MVT::v2i16
|| VT
== MVT::v2i32
|| VT
== MVT::v4i8
||
557 VT
== MVT::v4i16
|| VT
== MVT::v8i8
||
558 Subtarget
.isHVXVectorType(VT
.getSimpleVT());
562 if (Op
->getOpcode() != ISD::ADD
)
564 Base
= Op
->getOperand(0);
565 Offset
= Op
->getOperand(1);
566 if (!isa
<ConstantSDNode
>(Offset
.getNode()))
570 int32_t V
= cast
<ConstantSDNode
>(Offset
.getNode())->getSExtValue();
571 return Subtarget
.getInstrInfo()->isValidAutoIncImm(VT
, V
);
575 HexagonTargetLowering::LowerINLINEASM(SDValue Op
, SelectionDAG
&DAG
) const {
576 MachineFunction
&MF
= DAG
.getMachineFunction();
577 auto &HMFI
= *MF
.getInfo
<HexagonMachineFunctionInfo
>();
578 const HexagonRegisterInfo
&HRI
= *Subtarget
.getRegisterInfo();
579 unsigned LR
= HRI
.getRARegister();
581 if ((Op
.getOpcode() != ISD::INLINEASM
&&
582 Op
.getOpcode() != ISD::INLINEASM_BR
) || HMFI
.hasClobberLR())
585 unsigned NumOps
= Op
.getNumOperands();
586 if (Op
.getOperand(NumOps
-1).getValueType() == MVT::Glue
)
587 --NumOps
; // Ignore the flag operand.
589 for (unsigned i
= InlineAsm::Op_FirstOperand
; i
!= NumOps
;) {
590 unsigned Flags
= cast
<ConstantSDNode
>(Op
.getOperand(i
))->getZExtValue();
591 unsigned NumVals
= InlineAsm::getNumOperandRegisters(Flags
);
592 ++i
; // Skip the ID value.
594 switch (InlineAsm::getKind(Flags
)) {
596 llvm_unreachable("Bad flags!");
597 case InlineAsm::Kind_RegUse
:
598 case InlineAsm::Kind_Imm
:
599 case InlineAsm::Kind_Mem
:
602 case InlineAsm::Kind_Clobber
:
603 case InlineAsm::Kind_RegDef
:
604 case InlineAsm::Kind_RegDefEarlyClobber
: {
605 for (; NumVals
; --NumVals
, ++i
) {
606 unsigned Reg
= cast
<RegisterSDNode
>(Op
.getOperand(i
))->getReg();
609 HMFI
.setHasClobberLR(true);
620 // Need to transform ISD::PREFETCH into something that doesn't inherit
621 // all of the properties of ISD::PREFETCH, specifically SDNPMayLoad and
623 SDValue
HexagonTargetLowering::LowerPREFETCH(SDValue Op
,
624 SelectionDAG
&DAG
) const {
625 SDValue Chain
= Op
.getOperand(0);
626 SDValue Addr
= Op
.getOperand(1);
627 // Lower it to DCFETCH($reg, #0). A "pat" will try to merge the offset in,
628 // if the "reg" is fed by an "add".
630 SDValue Zero
= DAG
.getConstant(0, DL
, MVT::i32
);
631 return DAG
.getNode(HexagonISD::DCFETCH
, DL
, MVT::Other
, Chain
, Addr
, Zero
);
634 // Custom-handle ISD::READCYCLECOUNTER because the target-independent SDNode
635 // is marked as having side-effects, while the register read on Hexagon does
636 // not have any. TableGen refuses to accept the direct pattern from that node
638 SDValue
HexagonTargetLowering::LowerREADCYCLECOUNTER(SDValue Op
,
639 SelectionDAG
&DAG
) const {
640 SDValue Chain
= Op
.getOperand(0);
642 SDVTList VTs
= DAG
.getVTList(MVT::i32
, MVT::Other
);
643 return DAG
.getNode(HexagonISD::READCYCLE
, dl
, VTs
, Chain
);
646 SDValue
HexagonTargetLowering::LowerINTRINSIC_VOID(SDValue Op
,
647 SelectionDAG
&DAG
) const {
648 SDValue Chain
= Op
.getOperand(0);
649 unsigned IntNo
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
650 // Lower the hexagon_prefetch builtin to DCFETCH, as above.
651 if (IntNo
== Intrinsic::hexagon_prefetch
) {
652 SDValue Addr
= Op
.getOperand(2);
654 SDValue Zero
= DAG
.getConstant(0, DL
, MVT::i32
);
655 return DAG
.getNode(HexagonISD::DCFETCH
, DL
, MVT::Other
, Chain
, Addr
, Zero
);
661 HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op
,
662 SelectionDAG
&DAG
) const {
663 SDValue Chain
= Op
.getOperand(0);
664 SDValue Size
= Op
.getOperand(1);
665 SDValue Align
= Op
.getOperand(2);
668 ConstantSDNode
*AlignConst
= dyn_cast
<ConstantSDNode
>(Align
);
669 assert(AlignConst
&& "Non-constant Align in LowerDYNAMIC_STACKALLOC");
671 unsigned A
= AlignConst
->getSExtValue();
672 auto &HFI
= *Subtarget
.getFrameLowering();
673 // "Zero" means natural stack alignment.
675 A
= HFI
.getStackAlignment();
678 dbgs () << __func__
<< " Align: " << A
<< " Size: ";
679 Size
.getNode()->dump(&DAG
);
683 SDValue AC
= DAG
.getConstant(A
, dl
, MVT::i32
);
684 SDVTList VTs
= DAG
.getVTList(MVT::i32
, MVT::Other
);
685 SDValue AA
= DAG
.getNode(HexagonISD::ALLOCA
, dl
, VTs
, Chain
, Size
, AC
);
687 DAG
.ReplaceAllUsesOfValueWith(Op
, AA
);
691 SDValue
HexagonTargetLowering::LowerFormalArguments(
692 SDValue Chain
, CallingConv::ID CallConv
, bool IsVarArg
,
693 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
694 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
695 MachineFunction
&MF
= DAG
.getMachineFunction();
696 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
697 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
699 // Assign locations to all of the incoming arguments.
700 SmallVector
<CCValAssign
, 16> ArgLocs
;
701 HexagonCCState
CCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext(),
702 MF
.getFunction().getFunctionType()->getNumParams());
704 if (Subtarget
.useHVXOps())
705 CCInfo
.AnalyzeFormalArguments(Ins
, CC_Hexagon_HVX
);
707 CCInfo
.AnalyzeFormalArguments(Ins
, CC_Hexagon
);
709 // For LLVM, in the case when returning a struct by value (>8byte),
710 // the first argument is a pointer that points to the location on caller's
711 // stack where the return value will be stored. For Hexagon, the location on
712 // caller's stack is passed only when the struct size is smaller than (and
713 // equal to) 8 bytes. If not, no address will be passed into callee and
714 // callee return the result direclty through R0/R1.
716 auto &HMFI
= *MF
.getInfo
<HexagonMachineFunctionInfo
>();
718 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
719 CCValAssign
&VA
= ArgLocs
[i
];
720 ISD::ArgFlagsTy Flags
= Ins
[i
].Flags
;
721 bool ByVal
= Flags
.isByVal();
723 // Arguments passed in registers:
724 // 1. 32- and 64-bit values and HVX vectors are passed directly,
725 // 2. Large structs are passed via an address, and the address is
726 // passed in a register.
727 if (VA
.isRegLoc() && ByVal
&& Flags
.getByValSize() <= 8)
728 llvm_unreachable("ByValSize must be bigger than 8 bytes");
730 bool InReg
= VA
.isRegLoc() &&
731 (!ByVal
|| (ByVal
&& Flags
.getByValSize() > 8));
734 MVT RegVT
= VA
.getLocVT();
735 if (VA
.getLocInfo() == CCValAssign::BCvt
)
736 RegVT
= VA
.getValVT();
738 const TargetRegisterClass
*RC
= getRegClassFor(RegVT
);
739 Register VReg
= MRI
.createVirtualRegister(RC
);
740 SDValue Copy
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, RegVT
);
742 // Treat values of type MVT::i1 specially: they are passed in
743 // registers of type i32, but they need to remain as values of
744 // type i1 for consistency of the argument lowering.
745 if (VA
.getValVT() == MVT::i1
) {
746 assert(RegVT
.getSizeInBits() <= 32);
747 SDValue T
= DAG
.getNode(ISD::AND
, dl
, RegVT
,
748 Copy
, DAG
.getConstant(1, dl
, RegVT
));
749 Copy
= DAG
.getSetCC(dl
, MVT::i1
, T
, DAG
.getConstant(0, dl
, RegVT
),
753 unsigned RegSize
= RegVT
.getSizeInBits();
754 assert(RegSize
== 32 || RegSize
== 64 ||
755 Subtarget
.isHVXVectorType(RegVT
));
758 InVals
.push_back(Copy
);
759 MRI
.addLiveIn(VA
.getLocReg(), VReg
);
761 assert(VA
.isMemLoc() && "Argument should be passed in memory");
763 // If it's a byval parameter, then we need to compute the
764 // "real" size, not the size of the pointer.
765 unsigned ObjSize
= Flags
.isByVal()
766 ? Flags
.getByValSize()
767 : VA
.getLocVT().getStoreSizeInBits() / 8;
769 // Create the frame index object for this incoming parameter.
770 int Offset
= HEXAGON_LRFP_SIZE
+ VA
.getLocMemOffset();
771 int FI
= MFI
.CreateFixedObject(ObjSize
, Offset
, true);
772 SDValue FIN
= DAG
.getFrameIndex(FI
, MVT::i32
);
774 if (Flags
.isByVal()) {
775 // If it's a pass-by-value aggregate, then do not dereference the stack
776 // location. Instead, we should generate a reference to the stack
778 InVals
.push_back(FIN
);
780 SDValue L
= DAG
.getLoad(VA
.getValVT(), dl
, Chain
, FIN
,
781 MachinePointerInfo::getFixedStack(MF
, FI
, 0));
789 // This will point to the next argument passed via stack.
790 int Offset
= HEXAGON_LRFP_SIZE
+ CCInfo
.getNextStackOffset();
791 int FI
= MFI
.CreateFixedObject(Hexagon_PointerSize
, Offset
, true);
792 HMFI
.setVarArgsFrameIndex(FI
);
799 HexagonTargetLowering::LowerVASTART(SDValue Op
, SelectionDAG
&DAG
) const {
800 // VASTART stores the address of the VarArgsFrameIndex slot into the
801 // memory location argument.
802 MachineFunction
&MF
= DAG
.getMachineFunction();
803 HexagonMachineFunctionInfo
*QFI
= MF
.getInfo
<HexagonMachineFunctionInfo
>();
804 SDValue Addr
= DAG
.getFrameIndex(QFI
->getVarArgsFrameIndex(), MVT::i32
);
805 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
806 return DAG
.getStore(Op
.getOperand(0), SDLoc(Op
), Addr
, Op
.getOperand(1),
807 MachinePointerInfo(SV
));
810 SDValue
HexagonTargetLowering::LowerSETCC(SDValue Op
, SelectionDAG
&DAG
) const {
812 SDValue LHS
= Op
.getOperand(0);
813 SDValue RHS
= Op
.getOperand(1);
814 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(2))->get();
818 if (OpTy
== MVT::v2i16
|| OpTy
== MVT::v4i8
) {
819 MVT ElemTy
= OpTy
.getVectorElementType();
820 assert(ElemTy
.isScalarInteger());
821 MVT WideTy
= MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy
.getSizeInBits()),
822 OpTy
.getVectorNumElements());
823 return DAG
.getSetCC(dl
, ResTy
,
824 DAG
.getSExtOrTrunc(LHS
, SDLoc(LHS
), WideTy
),
825 DAG
.getSExtOrTrunc(RHS
, SDLoc(RHS
), WideTy
), CC
);
828 // Treat all other vector types as legal.
829 if (ResTy
.isVector())
832 // Comparisons of short integers should use sign-extend, not zero-extend,
833 // since we can represent small negative values in the compare instructions.
834 // The LLVM default is to use zero-extend arbitrarily in these cases.
835 auto isSExtFree
= [this](SDValue N
) {
836 switch (N
.getOpcode()) {
837 case ISD::TRUNCATE
: {
838 // A sign-extend of a truncate of a sign-extend is free.
839 SDValue Op
= N
.getOperand(0);
840 if (Op
.getOpcode() != ISD::AssertSext
)
842 EVT OrigTy
= cast
<VTSDNode
>(Op
.getOperand(1))->getVT();
843 unsigned ThisBW
= ty(N
).getSizeInBits();
844 unsigned OrigBW
= OrigTy
.getSizeInBits();
845 // The type that was sign-extended to get the AssertSext must be
846 // narrower than the type of N (so that N has still the same value
848 return ThisBW
>= OrigBW
;
851 // We have sign-extended loads.
857 if (OpTy
== MVT::i8
|| OpTy
== MVT::i16
) {
858 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(RHS
);
859 bool IsNegative
= C
&& C
->getAPIntValue().isNegative();
860 if (IsNegative
|| isSExtFree(LHS
) || isSExtFree(RHS
))
861 return DAG
.getSetCC(dl
, ResTy
,
862 DAG
.getSExtOrTrunc(LHS
, SDLoc(LHS
), MVT::i32
),
863 DAG
.getSExtOrTrunc(RHS
, SDLoc(RHS
), MVT::i32
), CC
);
870 HexagonTargetLowering::LowerVSELECT(SDValue Op
, SelectionDAG
&DAG
) const {
871 SDValue PredOp
= Op
.getOperand(0);
872 SDValue Op1
= Op
.getOperand(1), Op2
= Op
.getOperand(2);
876 if (OpTy
== MVT::v2i16
|| OpTy
== MVT::v4i8
) {
877 MVT ElemTy
= OpTy
.getVectorElementType();
878 assert(ElemTy
.isScalarInteger());
879 MVT WideTy
= MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy
.getSizeInBits()),
880 OpTy
.getVectorNumElements());
881 // Generate (trunc (select (_, sext, sext))).
882 return DAG
.getSExtOrTrunc(
883 DAG
.getSelect(dl
, WideTy
, PredOp
,
884 DAG
.getSExtOrTrunc(Op1
, dl
, WideTy
),
885 DAG
.getSExtOrTrunc(Op2
, dl
, WideTy
)),
892 static Constant
*convert_i1_to_i8(const Constant
*ConstVal
) {
893 SmallVector
<Constant
*, 128> NewConst
;
894 const ConstantVector
*CV
= dyn_cast
<ConstantVector
>(ConstVal
);
898 LLVMContext
&Ctx
= ConstVal
->getContext();
899 IRBuilder
<> IRB(Ctx
);
900 unsigned NumVectorElements
= CV
->getNumOperands();
901 assert(isPowerOf2_32(NumVectorElements
) &&
902 "conversion only supported for pow2 VectorSize!");
904 for (unsigned i
= 0; i
< NumVectorElements
/ 8; ++i
) {
906 for (unsigned j
= 0; j
< 8; ++j
) {
907 uint8_t y
= CV
->getOperand(i
* 8 + j
)->getUniqueInteger().getZExtValue();
910 assert((x
== 0 || x
== 255) && "Either all 0's or all 1's expected!");
911 NewConst
.push_back(IRB
.getInt8(x
));
913 return ConstantVector::get(NewConst
);
917 HexagonTargetLowering::LowerConstantPool(SDValue Op
, SelectionDAG
&DAG
) const {
918 EVT ValTy
= Op
.getValueType();
919 ConstantPoolSDNode
*CPN
= cast
<ConstantPoolSDNode
>(Op
);
920 Constant
*CVal
= nullptr;
921 bool isVTi1Type
= false;
922 if (const Constant
*ConstVal
= dyn_cast
<Constant
>(CPN
->getConstVal())) {
923 Type
*CValTy
= ConstVal
->getType();
924 if (CValTy
->isVectorTy() &&
925 CValTy
->getVectorElementType()->isIntegerTy(1)) {
926 CVal
= convert_i1_to_i8(ConstVal
);
927 isVTi1Type
= (CVal
!= nullptr);
930 unsigned Align
= CPN
->getAlignment();
931 bool IsPositionIndependent
= isPositionIndependent();
932 unsigned char TF
= IsPositionIndependent
? HexagonII::MO_PCREL
: 0;
936 if (CPN
->isMachineConstantPoolEntry())
937 T
= DAG
.getTargetConstantPool(CPN
->getMachineCPVal(), ValTy
, Align
, Offset
,
940 T
= DAG
.getTargetConstantPool(CVal
, ValTy
, Align
, Offset
, TF
);
942 T
= DAG
.getTargetConstantPool(CPN
->getConstVal(), ValTy
, Align
, Offset
, TF
);
944 assert(cast
<ConstantPoolSDNode
>(T
)->getTargetFlags() == TF
&&
945 "Inconsistent target flag encountered");
947 if (IsPositionIndependent
)
948 return DAG
.getNode(HexagonISD::AT_PCREL
, SDLoc(Op
), ValTy
, T
);
949 return DAG
.getNode(HexagonISD::CP
, SDLoc(Op
), ValTy
, T
);
953 HexagonTargetLowering::LowerJumpTable(SDValue Op
, SelectionDAG
&DAG
) const {
954 EVT VT
= Op
.getValueType();
955 int Idx
= cast
<JumpTableSDNode
>(Op
)->getIndex();
956 if (isPositionIndependent()) {
957 SDValue T
= DAG
.getTargetJumpTable(Idx
, VT
, HexagonII::MO_PCREL
);
958 return DAG
.getNode(HexagonISD::AT_PCREL
, SDLoc(Op
), VT
, T
);
961 SDValue T
= DAG
.getTargetJumpTable(Idx
, VT
);
962 return DAG
.getNode(HexagonISD::JT
, SDLoc(Op
), VT
, T
);
966 HexagonTargetLowering::LowerRETURNADDR(SDValue Op
, SelectionDAG
&DAG
) const {
967 const HexagonRegisterInfo
&HRI
= *Subtarget
.getRegisterInfo();
968 MachineFunction
&MF
= DAG
.getMachineFunction();
969 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
970 MFI
.setReturnAddressIsTaken(true);
972 if (verifyReturnAddressArgumentIsConstant(Op
, DAG
))
975 EVT VT
= Op
.getValueType();
977 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
979 SDValue FrameAddr
= LowerFRAMEADDR(Op
, DAG
);
980 SDValue Offset
= DAG
.getConstant(4, dl
, MVT::i32
);
981 return DAG
.getLoad(VT
, dl
, DAG
.getEntryNode(),
982 DAG
.getNode(ISD::ADD
, dl
, VT
, FrameAddr
, Offset
),
983 MachinePointerInfo());
986 // Return LR, which contains the return address. Mark it an implicit live-in.
987 unsigned Reg
= MF
.addLiveIn(HRI
.getRARegister(), getRegClassFor(MVT::i32
));
988 return DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
, Reg
, VT
);
992 HexagonTargetLowering::LowerFRAMEADDR(SDValue Op
, SelectionDAG
&DAG
) const {
993 const HexagonRegisterInfo
&HRI
= *Subtarget
.getRegisterInfo();
994 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
995 MFI
.setFrameAddressIsTaken(true);
997 EVT VT
= Op
.getValueType();
999 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
1000 SDValue FrameAddr
= DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
,
1001 HRI
.getFrameRegister(), VT
);
1003 FrameAddr
= DAG
.getLoad(VT
, dl
, DAG
.getEntryNode(), FrameAddr
,
1004 MachinePointerInfo());
1009 HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op
, SelectionDAG
& DAG
) const {
1011 return DAG
.getNode(HexagonISD::BARRIER
, dl
, MVT::Other
, Op
.getOperand(0));
1015 HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op
, SelectionDAG
&DAG
) const {
1017 auto *GAN
= cast
<GlobalAddressSDNode
>(Op
);
1018 auto PtrVT
= getPointerTy(DAG
.getDataLayout());
1019 auto *GV
= GAN
->getGlobal();
1020 int64_t Offset
= GAN
->getOffset();
1022 auto &HLOF
= *HTM
.getObjFileLowering();
1023 Reloc::Model RM
= HTM
.getRelocationModel();
1025 if (RM
== Reloc::Static
) {
1026 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, Offset
);
1027 const GlobalObject
*GO
= GV
->getBaseObject();
1028 if (GO
&& Subtarget
.useSmallData() && HLOF
.isGlobalInSmallSection(GO
, HTM
))
1029 return DAG
.getNode(HexagonISD::CONST32_GP
, dl
, PtrVT
, GA
);
1030 return DAG
.getNode(HexagonISD::CONST32
, dl
, PtrVT
, GA
);
1033 bool UsePCRel
= getTargetMachine().shouldAssumeDSOLocal(*GV
->getParent(), GV
);
1035 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, Offset
,
1036 HexagonII::MO_PCREL
);
1037 return DAG
.getNode(HexagonISD::AT_PCREL
, dl
, PtrVT
, GA
);
1041 SDValue GOT
= DAG
.getGLOBAL_OFFSET_TABLE(PtrVT
);
1042 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0, HexagonII::MO_GOT
);
1043 SDValue Off
= DAG
.getConstant(Offset
, dl
, MVT::i32
);
1044 return DAG
.getNode(HexagonISD::AT_GOT
, dl
, PtrVT
, GOT
, GA
, Off
);
1047 // Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
1049 HexagonTargetLowering::LowerBlockAddress(SDValue Op
, SelectionDAG
&DAG
) const {
1050 const BlockAddress
*BA
= cast
<BlockAddressSDNode
>(Op
)->getBlockAddress();
1052 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
1054 Reloc::Model RM
= HTM
.getRelocationModel();
1055 if (RM
== Reloc::Static
) {
1056 SDValue A
= DAG
.getTargetBlockAddress(BA
, PtrVT
);
1057 return DAG
.getNode(HexagonISD::CONST32_GP
, dl
, PtrVT
, A
);
1060 SDValue A
= DAG
.getTargetBlockAddress(BA
, PtrVT
, 0, HexagonII::MO_PCREL
);
1061 return DAG
.getNode(HexagonISD::AT_PCREL
, dl
, PtrVT
, A
);
1065 HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op
, SelectionDAG
&DAG
)
1067 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
1068 SDValue GOTSym
= DAG
.getTargetExternalSymbol(HEXAGON_GOT_SYM_NAME
, PtrVT
,
1069 HexagonII::MO_PCREL
);
1070 return DAG
.getNode(HexagonISD::AT_PCREL
, SDLoc(Op
), PtrVT
, GOTSym
);
1074 HexagonTargetLowering::GetDynamicTLSAddr(SelectionDAG
&DAG
, SDValue Chain
,
1075 GlobalAddressSDNode
*GA
, SDValue Glue
, EVT PtrVT
, unsigned ReturnReg
,
1076 unsigned char OperandFlags
) const {
1077 MachineFunction
&MF
= DAG
.getMachineFunction();
1078 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1079 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
1081 SDValue TGA
= DAG
.getTargetGlobalAddress(GA
->getGlobal(), dl
,
1082 GA
->getValueType(0),
1085 // Create Operands for the call.The Operands should have the following:
1087 // 2. Callee which in this case is the Global address value.
1088 // 3. Registers live into the call.In this case its R0, as we
1089 // have just one argument to be passed.
1091 // Note: The order is important.
1093 const auto &HRI
= *Subtarget
.getRegisterInfo();
1094 const uint32_t *Mask
= HRI
.getCallPreservedMask(MF
, CallingConv::C
);
1095 assert(Mask
&& "Missing call preserved mask for calling convention");
1096 SDValue Ops
[] = { Chain
, TGA
, DAG
.getRegister(Hexagon::R0
, PtrVT
),
1097 DAG
.getRegisterMask(Mask
), Glue
};
1098 Chain
= DAG
.getNode(HexagonISD::CALL
, dl
, NodeTys
, Ops
);
1100 // Inform MFI that function has calls.
1101 MFI
.setAdjustsStack(true);
1103 Glue
= Chain
.getValue(1);
1104 return DAG
.getCopyFromReg(Chain
, dl
, ReturnReg
, PtrVT
, Glue
);
1108 // Lower using the intial executable model for TLS addresses
1111 HexagonTargetLowering::LowerToTLSInitialExecModel(GlobalAddressSDNode
*GA
,
1112 SelectionDAG
&DAG
) const {
1114 int64_t Offset
= GA
->getOffset();
1115 auto PtrVT
= getPointerTy(DAG
.getDataLayout());
1117 // Get the thread pointer.
1118 SDValue TP
= DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
, Hexagon::UGP
, PtrVT
);
1120 bool IsPositionIndependent
= isPositionIndependent();
1122 IsPositionIndependent
? HexagonII::MO_IEGOT
: HexagonII::MO_IE
;
1124 // First generate the TLS symbol address
1125 SDValue TGA
= DAG
.getTargetGlobalAddress(GA
->getGlobal(), dl
, PtrVT
,
1128 SDValue Sym
= DAG
.getNode(HexagonISD::CONST32
, dl
, PtrVT
, TGA
);
1130 if (IsPositionIndependent
) {
1131 // Generate the GOT pointer in case of position independent code
1132 SDValue GOT
= LowerGLOBAL_OFFSET_TABLE(Sym
, DAG
);
1134 // Add the TLS Symbol address to GOT pointer.This gives
1135 // GOT relative relocation for the symbol.
1136 Sym
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, GOT
, Sym
);
1139 // Load the offset value for TLS symbol.This offset is relative to
1141 SDValue LoadOffset
=
1142 DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(), Sym
, MachinePointerInfo());
1144 // Address of the thread local variable is the add of thread
1145 // pointer and the offset of the variable.
1146 return DAG
.getNode(ISD::ADD
, dl
, PtrVT
, TP
, LoadOffset
);
1150 // Lower using the local executable model for TLS addresses
1153 HexagonTargetLowering::LowerToTLSLocalExecModel(GlobalAddressSDNode
*GA
,
1154 SelectionDAG
&DAG
) const {
1156 int64_t Offset
= GA
->getOffset();
1157 auto PtrVT
= getPointerTy(DAG
.getDataLayout());
1159 // Get the thread pointer.
1160 SDValue TP
= DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
, Hexagon::UGP
, PtrVT
);
1161 // Generate the TLS symbol address
1162 SDValue TGA
= DAG
.getTargetGlobalAddress(GA
->getGlobal(), dl
, PtrVT
, Offset
,
1163 HexagonII::MO_TPREL
);
1164 SDValue Sym
= DAG
.getNode(HexagonISD::CONST32
, dl
, PtrVT
, TGA
);
1166 // Address of the thread local variable is the add of thread
1167 // pointer and the offset of the variable.
1168 return DAG
.getNode(ISD::ADD
, dl
, PtrVT
, TP
, Sym
);
1172 // Lower using the general dynamic model for TLS addresses
1175 HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode
*GA
,
1176 SelectionDAG
&DAG
) const {
1178 int64_t Offset
= GA
->getOffset();
1179 auto PtrVT
= getPointerTy(DAG
.getDataLayout());
1181 // First generate the TLS symbol address
1182 SDValue TGA
= DAG
.getTargetGlobalAddress(GA
->getGlobal(), dl
, PtrVT
, Offset
,
1183 HexagonII::MO_GDGOT
);
1185 // Then, generate the GOT pointer
1186 SDValue GOT
= LowerGLOBAL_OFFSET_TABLE(TGA
, DAG
);
1188 // Add the TLS symbol and the GOT pointer
1189 SDValue Sym
= DAG
.getNode(HexagonISD::CONST32
, dl
, PtrVT
, TGA
);
1190 SDValue Chain
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, GOT
, Sym
);
1192 // Copy over the argument to R0
1194 Chain
= DAG
.getCopyToReg(DAG
.getEntryNode(), dl
, Hexagon::R0
, Chain
, InFlag
);
1195 InFlag
= Chain
.getValue(1);
1198 static_cast<const HexagonSubtarget
&>(DAG
.getSubtarget()).useLongCalls()
1199 ? HexagonII::MO_GDPLT
| HexagonII::HMOTF_ConstExtended
1200 : HexagonII::MO_GDPLT
;
1202 return GetDynamicTLSAddr(DAG
, Chain
, GA
, InFlag
, PtrVT
,
1203 Hexagon::R0
, Flags
);
1207 // Lower TLS addresses.
1209 // For now for dynamic models, we only support the general dynamic model.
1212 HexagonTargetLowering::LowerGlobalTLSAddress(SDValue Op
,
1213 SelectionDAG
&DAG
) const {
1214 GlobalAddressSDNode
*GA
= cast
<GlobalAddressSDNode
>(Op
);
1216 switch (HTM
.getTLSModel(GA
->getGlobal())) {
1217 case TLSModel::GeneralDynamic
:
1218 case TLSModel::LocalDynamic
:
1219 return LowerToTLSGeneralDynamicModel(GA
, DAG
);
1220 case TLSModel::InitialExec
:
1221 return LowerToTLSInitialExecModel(GA
, DAG
);
1222 case TLSModel::LocalExec
:
1223 return LowerToTLSLocalExecModel(GA
, DAG
);
1225 llvm_unreachable("Bogus TLS model");
1228 //===----------------------------------------------------------------------===//
1229 // TargetLowering Implementation
1230 //===----------------------------------------------------------------------===//
1232 HexagonTargetLowering::HexagonTargetLowering(const TargetMachine
&TM
,
1233 const HexagonSubtarget
&ST
)
1234 : TargetLowering(TM
), HTM(static_cast<const HexagonTargetMachine
&>(TM
)),
1236 auto &HRI
= *Subtarget
.getRegisterInfo();
1238 setPrefLoopAlignment(Align(16));
1239 setMinFunctionAlignment(Align(4));
1240 setPrefFunctionAlignment(Align(16));
1241 setStackPointerRegisterToSaveRestore(HRI
.getStackRegister());
1242 setBooleanContents(TargetLoweringBase::UndefinedBooleanContent
);
1243 setBooleanVectorContents(TargetLoweringBase::UndefinedBooleanContent
);
1245 setMaxAtomicSizeInBitsSupported(64);
1246 setMinCmpXchgSizeInBits(32);
1248 if (EnableHexSDNodeSched
)
1249 setSchedulingPreference(Sched::VLIW
);
1251 setSchedulingPreference(Sched::Source
);
1253 // Limits for inline expansion of memcpy/memmove
1254 MaxStoresPerMemcpy
= MaxStoresPerMemcpyCL
;
1255 MaxStoresPerMemcpyOptSize
= MaxStoresPerMemcpyOptSizeCL
;
1256 MaxStoresPerMemmove
= MaxStoresPerMemmoveCL
;
1257 MaxStoresPerMemmoveOptSize
= MaxStoresPerMemmoveOptSizeCL
;
1258 MaxStoresPerMemset
= MaxStoresPerMemsetCL
;
1259 MaxStoresPerMemsetOptSize
= MaxStoresPerMemsetOptSizeCL
;
1262 // Set up register classes.
1265 addRegisterClass(MVT::i1
, &Hexagon::PredRegsRegClass
);
1266 addRegisterClass(MVT::v2i1
, &Hexagon::PredRegsRegClass
); // bbbbaaaa
1267 addRegisterClass(MVT::v4i1
, &Hexagon::PredRegsRegClass
); // ddccbbaa
1268 addRegisterClass(MVT::v8i1
, &Hexagon::PredRegsRegClass
); // hgfedcba
1269 addRegisterClass(MVT::i32
, &Hexagon::IntRegsRegClass
);
1270 addRegisterClass(MVT::v2i16
, &Hexagon::IntRegsRegClass
);
1271 addRegisterClass(MVT::v4i8
, &Hexagon::IntRegsRegClass
);
1272 addRegisterClass(MVT::i64
, &Hexagon::DoubleRegsRegClass
);
1273 addRegisterClass(MVT::v8i8
, &Hexagon::DoubleRegsRegClass
);
1274 addRegisterClass(MVT::v4i16
, &Hexagon::DoubleRegsRegClass
);
1275 addRegisterClass(MVT::v2i32
, &Hexagon::DoubleRegsRegClass
);
1277 addRegisterClass(MVT::f32
, &Hexagon::IntRegsRegClass
);
1278 addRegisterClass(MVT::f64
, &Hexagon::DoubleRegsRegClass
);
1281 // Handling of scalar operations.
1283 // All operations default to "legal", except:
1284 // - indexed loads and stores (pre-/post-incremented),
1285 // - ANY_EXTEND_VECTOR_INREG, ATOMIC_CMP_SWAP_WITH_SUCCESS, CONCAT_VECTORS,
1286 // ConstantFP, DEBUGTRAP, FCEIL, FCOPYSIGN, FEXP, FEXP2, FFLOOR, FGETSIGN,
1287 // FLOG, FLOG2, FLOG10, FMAXNUM, FMINNUM, FNEARBYINT, FRINT, FROUND, TRAP,
1288 // FTRUNC, PREFETCH, SIGN_EXTEND_VECTOR_INREG, ZERO_EXTEND_VECTOR_INREG,
1289 // which default to "expand" for at least one type.
1292 setOperationAction(ISD::ConstantFP
, MVT::f32
, Legal
);
1293 setOperationAction(ISD::ConstantFP
, MVT::f64
, Legal
);
1294 setOperationAction(ISD::TRAP
, MVT::Other
, Legal
);
1295 setOperationAction(ISD::ConstantPool
, MVT::i32
, Custom
);
1296 setOperationAction(ISD::JumpTable
, MVT::i32
, Custom
);
1297 setOperationAction(ISD::BUILD_PAIR
, MVT::i64
, Expand
);
1298 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
1299 setOperationAction(ISD::INLINEASM
, MVT::Other
, Custom
);
1300 setOperationAction(ISD::INLINEASM_BR
, MVT::Other
, Custom
);
1301 setOperationAction(ISD::PREFETCH
, MVT::Other
, Custom
);
1302 setOperationAction(ISD::READCYCLECOUNTER
, MVT::i64
, Custom
);
1303 setOperationAction(ISD::INTRINSIC_VOID
, MVT::Other
, Custom
);
1304 setOperationAction(ISD::EH_RETURN
, MVT::Other
, Custom
);
1305 setOperationAction(ISD::GLOBAL_OFFSET_TABLE
, MVT::i32
, Custom
);
1306 setOperationAction(ISD::GlobalTLSAddress
, MVT::i32
, Custom
);
1307 setOperationAction(ISD::ATOMIC_FENCE
, MVT::Other
, Custom
);
1309 // Custom legalize GlobalAddress nodes into CONST32.
1310 setOperationAction(ISD::GlobalAddress
, MVT::i32
, Custom
);
1311 setOperationAction(ISD::GlobalAddress
, MVT::i8
, Custom
);
1312 setOperationAction(ISD::BlockAddress
, MVT::i32
, Custom
);
1314 // Hexagon needs to optimize cases with negative constants.
1315 setOperationAction(ISD::SETCC
, MVT::i8
, Custom
);
1316 setOperationAction(ISD::SETCC
, MVT::i16
, Custom
);
1317 setOperationAction(ISD::SETCC
, MVT::v4i8
, Custom
);
1318 setOperationAction(ISD::SETCC
, MVT::v2i16
, Custom
);
1320 // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
1321 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
1322 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
1323 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
1324 setOperationAction(ISD::VACOPY
, MVT::Other
, Expand
);
1326 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Expand
);
1327 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Expand
);
1328 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i32
, Custom
);
1331 setMinimumJumpTableEntries(MinimumJumpTables
);
1333 setMinimumJumpTableEntries(std::numeric_limits
<unsigned>::max());
1334 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
1336 setOperationAction(ISD::ABS
, MVT::i32
, Legal
);
1337 setOperationAction(ISD::ABS
, MVT::i64
, Legal
);
1339 // Hexagon has A4_addp_c and A4_subp_c that take and generate a carry bit,
1340 // but they only operate on i64.
1341 for (MVT VT
: MVT::integer_valuetypes()) {
1342 setOperationAction(ISD::UADDO
, VT
, Custom
);
1343 setOperationAction(ISD::USUBO
, VT
, Custom
);
1344 setOperationAction(ISD::SADDO
, VT
, Expand
);
1345 setOperationAction(ISD::SSUBO
, VT
, Expand
);
1346 setOperationAction(ISD::ADDCARRY
, VT
, Expand
);
1347 setOperationAction(ISD::SUBCARRY
, VT
, Expand
);
1349 setOperationAction(ISD::ADDCARRY
, MVT::i64
, Custom
);
1350 setOperationAction(ISD::SUBCARRY
, MVT::i64
, Custom
);
1352 setOperationAction(ISD::CTLZ
, MVT::i8
, Promote
);
1353 setOperationAction(ISD::CTLZ
, MVT::i16
, Promote
);
1354 setOperationAction(ISD::CTTZ
, MVT::i8
, Promote
);
1355 setOperationAction(ISD::CTTZ
, MVT::i16
, Promote
);
1357 // Popcount can count # of 1s in i64 but returns i32.
1358 setOperationAction(ISD::CTPOP
, MVT::i8
, Promote
);
1359 setOperationAction(ISD::CTPOP
, MVT::i16
, Promote
);
1360 setOperationAction(ISD::CTPOP
, MVT::i32
, Promote
);
1361 setOperationAction(ISD::CTPOP
, MVT::i64
, Legal
);
1363 setOperationAction(ISD::BITREVERSE
, MVT::i32
, Legal
);
1364 setOperationAction(ISD::BITREVERSE
, MVT::i64
, Legal
);
1365 setOperationAction(ISD::BSWAP
, MVT::i32
, Legal
);
1366 setOperationAction(ISD::BSWAP
, MVT::i64
, Legal
);
1368 setOperationAction(ISD::FSHL
, MVT::i32
, Legal
);
1369 setOperationAction(ISD::FSHL
, MVT::i64
, Legal
);
1370 setOperationAction(ISD::FSHR
, MVT::i32
, Legal
);
1371 setOperationAction(ISD::FSHR
, MVT::i64
, Legal
);
1373 for (unsigned IntExpOp
:
1374 {ISD::SDIV
, ISD::UDIV
, ISD::SREM
, ISD::UREM
,
1375 ISD::SDIVREM
, ISD::UDIVREM
, ISD::ROTL
, ISD::ROTR
,
1376 ISD::SHL_PARTS
, ISD::SRA_PARTS
, ISD::SRL_PARTS
,
1377 ISD::SMUL_LOHI
, ISD::UMUL_LOHI
}) {
1378 for (MVT VT
: MVT::integer_valuetypes())
1379 setOperationAction(IntExpOp
, VT
, Expand
);
1382 for (unsigned FPExpOp
:
1383 {ISD::FDIV
, ISD::FREM
, ISD::FSQRT
, ISD::FSIN
, ISD::FCOS
, ISD::FSINCOS
,
1384 ISD::FPOW
, ISD::FCOPYSIGN
}) {
1385 for (MVT VT
: MVT::fp_valuetypes())
1386 setOperationAction(FPExpOp
, VT
, Expand
);
1389 // No extending loads from i32.
1390 for (MVT VT
: MVT::integer_valuetypes()) {
1391 setLoadExtAction(ISD::ZEXTLOAD
, VT
, MVT::i32
, Expand
);
1392 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i32
, Expand
);
1393 setLoadExtAction(ISD::EXTLOAD
, VT
, MVT::i32
, Expand
);
1395 // Turn FP truncstore into trunc + store.
1396 setTruncStoreAction(MVT::f64
, MVT::f32
, Expand
);
1397 // Turn FP extload into load/fpextend.
1398 for (MVT VT
: MVT::fp_valuetypes())
1399 setLoadExtAction(ISD::EXTLOAD
, VT
, MVT::f32
, Expand
);
1401 // Expand BR_CC and SELECT_CC for all integer and fp types.
1402 for (MVT VT
: MVT::integer_valuetypes()) {
1403 setOperationAction(ISD::BR_CC
, VT
, Expand
);
1404 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
1406 for (MVT VT
: MVT::fp_valuetypes()) {
1407 setOperationAction(ISD::BR_CC
, VT
, Expand
);
1408 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
1410 setOperationAction(ISD::BR_CC
, MVT::Other
, Expand
);
1413 // Handling of vector operations.
1416 // Set the action for vector operations to "expand", then override it with
1417 // either "custom" or "legal" for specific cases.
1418 static const unsigned VectExpOps
[] = {
1419 // Integer arithmetic:
1420 ISD::ADD
, ISD::SUB
, ISD::MUL
, ISD::SDIV
, ISD::UDIV
,
1421 ISD::SREM
, ISD::UREM
, ISD::SDIVREM
, ISD::UDIVREM
, ISD::SADDO
,
1422 ISD::UADDO
, ISD::SSUBO
, ISD::USUBO
, ISD::SMUL_LOHI
, ISD::UMUL_LOHI
,
1424 ISD::AND
, ISD::OR
, ISD::XOR
, ISD::ROTL
, ISD::ROTR
,
1425 ISD::CTPOP
, ISD::CTLZ
, ISD::CTTZ
,
1426 // Floating point arithmetic/math functions:
1427 ISD::FADD
, ISD::FSUB
, ISD::FMUL
, ISD::FMA
, ISD::FDIV
,
1428 ISD::FREM
, ISD::FNEG
, ISD::FABS
, ISD::FSQRT
, ISD::FSIN
,
1429 ISD::FCOS
, ISD::FPOW
, ISD::FLOG
, ISD::FLOG2
,
1430 ISD::FLOG10
, ISD::FEXP
, ISD::FEXP2
, ISD::FCEIL
, ISD::FTRUNC
,
1431 ISD::FRINT
, ISD::FNEARBYINT
, ISD::FROUND
, ISD::FFLOOR
,
1432 ISD::FMINNUM
, ISD::FMAXNUM
, ISD::FSINCOS
,
1434 ISD::BR_CC
, ISD::SELECT_CC
, ISD::ConstantPool
,
1436 ISD::BUILD_VECTOR
, ISD::SCALAR_TO_VECTOR
,
1437 ISD::EXTRACT_VECTOR_ELT
, ISD::INSERT_VECTOR_ELT
,
1438 ISD::EXTRACT_SUBVECTOR
, ISD::INSERT_SUBVECTOR
,
1439 ISD::CONCAT_VECTORS
, ISD::VECTOR_SHUFFLE
1442 for (MVT VT
: MVT::fixedlen_vector_valuetypes()) {
1443 for (unsigned VectExpOp
: VectExpOps
)
1444 setOperationAction(VectExpOp
, VT
, Expand
);
1446 // Expand all extending loads and truncating stores:
1447 for (MVT TargetVT
: MVT::fixedlen_vector_valuetypes()) {
1450 setLoadExtAction(ISD::EXTLOAD
, TargetVT
, VT
, Expand
);
1451 setLoadExtAction(ISD::ZEXTLOAD
, TargetVT
, VT
, Expand
);
1452 setLoadExtAction(ISD::SEXTLOAD
, TargetVT
, VT
, Expand
);
1453 setTruncStoreAction(VT
, TargetVT
, Expand
);
1456 // Normalize all inputs to SELECT to be vectors of i32.
1457 if (VT
.getVectorElementType() != MVT::i32
) {
1458 MVT VT32
= MVT::getVectorVT(MVT::i32
, VT
.getSizeInBits()/32);
1459 setOperationAction(ISD::SELECT
, VT
, Promote
);
1460 AddPromotedToType(ISD::SELECT
, VT
, VT32
);
1462 setOperationAction(ISD::SRA
, VT
, Custom
);
1463 setOperationAction(ISD::SHL
, VT
, Custom
);
1464 setOperationAction(ISD::SRL
, VT
, Custom
);
1467 // Extending loads from (native) vectors of i8 into (native) vectors of i16
1469 setLoadExtAction(ISD::EXTLOAD
, MVT::v2i16
, MVT::v2i8
, Legal
);
1470 setLoadExtAction(ISD::ZEXTLOAD
, MVT::v2i16
, MVT::v2i8
, Legal
);
1471 setLoadExtAction(ISD::SEXTLOAD
, MVT::v2i16
, MVT::v2i8
, Legal
);
1472 setLoadExtAction(ISD::EXTLOAD
, MVT::v4i16
, MVT::v4i8
, Legal
);
1473 setLoadExtAction(ISD::ZEXTLOAD
, MVT::v4i16
, MVT::v4i8
, Legal
);
1474 setLoadExtAction(ISD::SEXTLOAD
, MVT::v4i16
, MVT::v4i8
, Legal
);
1476 // Types natively supported:
1477 for (MVT NativeVT
: {MVT::v8i1
, MVT::v4i1
, MVT::v2i1
, MVT::v4i8
,
1478 MVT::v8i8
, MVT::v2i16
, MVT::v4i16
, MVT::v2i32
}) {
1479 setOperationAction(ISD::BUILD_VECTOR
, NativeVT
, Custom
);
1480 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, NativeVT
, Custom
);
1481 setOperationAction(ISD::INSERT_VECTOR_ELT
, NativeVT
, Custom
);
1482 setOperationAction(ISD::EXTRACT_SUBVECTOR
, NativeVT
, Custom
);
1483 setOperationAction(ISD::INSERT_SUBVECTOR
, NativeVT
, Custom
);
1484 setOperationAction(ISD::CONCAT_VECTORS
, NativeVT
, Custom
);
1486 setOperationAction(ISD::ADD
, NativeVT
, Legal
);
1487 setOperationAction(ISD::SUB
, NativeVT
, Legal
);
1488 setOperationAction(ISD::MUL
, NativeVT
, Legal
);
1489 setOperationAction(ISD::AND
, NativeVT
, Legal
);
1490 setOperationAction(ISD::OR
, NativeVT
, Legal
);
1491 setOperationAction(ISD::XOR
, NativeVT
, Legal
);
1494 // Custom lower unaligned loads.
1495 // Also, for both loads and stores, verify the alignment of the address
1496 // in case it is a compile-time constant. This is a usability feature to
1497 // provide a meaningful error message to users.
1498 for (MVT VT
: {MVT::i16
, MVT::i32
, MVT::v4i8
, MVT::i64
, MVT::v8i8
,
1499 MVT::v2i16
, MVT::v4i16
, MVT::v2i32
}) {
1500 setOperationAction(ISD::LOAD
, VT
, Custom
);
1501 setOperationAction(ISD::STORE
, VT
, Custom
);
1504 for (MVT VT
: {MVT::v2i16
, MVT::v4i8
, MVT::v8i8
, MVT::v2i32
, MVT::v4i16
,
1506 setCondCodeAction(ISD::SETNE
, VT
, Expand
);
1507 setCondCodeAction(ISD::SETLE
, VT
, Expand
);
1508 setCondCodeAction(ISD::SETGE
, VT
, Expand
);
1509 setCondCodeAction(ISD::SETLT
, VT
, Expand
);
1510 setCondCodeAction(ISD::SETULE
, VT
, Expand
);
1511 setCondCodeAction(ISD::SETUGE
, VT
, Expand
);
1512 setCondCodeAction(ISD::SETULT
, VT
, Expand
);
1515 // Custom-lower bitcasts from i8 to v8i1.
1516 setOperationAction(ISD::BITCAST
, MVT::i8
, Custom
);
1517 setOperationAction(ISD::SETCC
, MVT::v2i16
, Custom
);
1518 setOperationAction(ISD::VSELECT
, MVT::v4i8
, Custom
);
1519 setOperationAction(ISD::VSELECT
, MVT::v2i16
, Custom
);
1520 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v4i8
, Custom
);
1521 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v4i16
, Custom
);
1522 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v8i8
, Custom
);
1525 setOperationAction(ISD::FMA
, MVT::f64
, Expand
);
1526 setOperationAction(ISD::FADD
, MVT::f64
, Expand
);
1527 setOperationAction(ISD::FSUB
, MVT::f64
, Expand
);
1528 setOperationAction(ISD::FMUL
, MVT::f64
, Expand
);
1530 setOperationAction(ISD::FMINNUM
, MVT::f32
, Legal
);
1531 setOperationAction(ISD::FMAXNUM
, MVT::f32
, Legal
);
1533 setOperationAction(ISD::FP_TO_UINT
, MVT::i1
, Promote
);
1534 setOperationAction(ISD::FP_TO_UINT
, MVT::i8
, Promote
);
1535 setOperationAction(ISD::FP_TO_UINT
, MVT::i16
, Promote
);
1536 setOperationAction(ISD::FP_TO_SINT
, MVT::i1
, Promote
);
1537 setOperationAction(ISD::FP_TO_SINT
, MVT::i8
, Promote
);
1538 setOperationAction(ISD::FP_TO_SINT
, MVT::i16
, Promote
);
1539 setOperationAction(ISD::UINT_TO_FP
, MVT::i1
, Promote
);
1540 setOperationAction(ISD::UINT_TO_FP
, MVT::i8
, Promote
);
1541 setOperationAction(ISD::UINT_TO_FP
, MVT::i16
, Promote
);
1542 setOperationAction(ISD::SINT_TO_FP
, MVT::i1
, Promote
);
1543 setOperationAction(ISD::SINT_TO_FP
, MVT::i8
, Promote
);
1544 setOperationAction(ISD::SINT_TO_FP
, MVT::i16
, Promote
);
1546 // Handling of indexed loads/stores: default is "expand".
1548 for (MVT VT
: {MVT::i8
, MVT::i16
, MVT::i32
, MVT::i64
, MVT::f32
, MVT::f64
,
1549 MVT::v2i16
, MVT::v2i32
, MVT::v4i8
, MVT::v4i16
, MVT::v8i8
}) {
1550 setIndexedLoadAction(ISD::POST_INC
, VT
, Legal
);
1551 setIndexedStoreAction(ISD::POST_INC
, VT
, Legal
);
1554 // Subtarget-specific operation actions.
1556 if (Subtarget
.hasV60Ops()) {
1557 setOperationAction(ISD::ROTL
, MVT::i32
, Legal
);
1558 setOperationAction(ISD::ROTL
, MVT::i64
, Legal
);
1559 setOperationAction(ISD::ROTR
, MVT::i32
, Legal
);
1560 setOperationAction(ISD::ROTR
, MVT::i64
, Legal
);
1562 if (Subtarget
.hasV66Ops()) {
1563 setOperationAction(ISD::FADD
, MVT::f64
, Legal
);
1564 setOperationAction(ISD::FSUB
, MVT::f64
, Legal
);
1567 setTargetDAGCombine(ISD::VSELECT
);
1569 if (Subtarget
.useHVXOps())
1570 initializeHVXLowering();
1572 computeRegisterProperties(&HRI
);
1575 // Library calls for unsupported operations
1577 bool FastMath
= EnableFastMath
;
1579 setLibcallName(RTLIB::SDIV_I32
, "__hexagon_divsi3");
1580 setLibcallName(RTLIB::SDIV_I64
, "__hexagon_divdi3");
1581 setLibcallName(RTLIB::UDIV_I32
, "__hexagon_udivsi3");
1582 setLibcallName(RTLIB::UDIV_I64
, "__hexagon_udivdi3");
1583 setLibcallName(RTLIB::SREM_I32
, "__hexagon_modsi3");
1584 setLibcallName(RTLIB::SREM_I64
, "__hexagon_moddi3");
1585 setLibcallName(RTLIB::UREM_I32
, "__hexagon_umodsi3");
1586 setLibcallName(RTLIB::UREM_I64
, "__hexagon_umoddi3");
1588 setLibcallName(RTLIB::SINTTOFP_I128_F64
, "__hexagon_floattidf");
1589 setLibcallName(RTLIB::SINTTOFP_I128_F32
, "__hexagon_floattisf");
1590 setLibcallName(RTLIB::FPTOUINT_F32_I128
, "__hexagon_fixunssfti");
1591 setLibcallName(RTLIB::FPTOUINT_F64_I128
, "__hexagon_fixunsdfti");
1592 setLibcallName(RTLIB::FPTOSINT_F32_I128
, "__hexagon_fixsfti");
1593 setLibcallName(RTLIB::FPTOSINT_F64_I128
, "__hexagon_fixdfti");
1595 // This is the only fast library function for sqrtd.
1597 setLibcallName(RTLIB::SQRT_F64
, "__hexagon_fast2_sqrtdf2");
1599 // Prefix is: nothing for "slow-math",
1600 // "fast2_" for V5+ fast-math double-precision
1601 // (actually, keep fast-math and fast-math2 separate for now)
1603 setLibcallName(RTLIB::ADD_F64
, "__hexagon_fast_adddf3");
1604 setLibcallName(RTLIB::SUB_F64
, "__hexagon_fast_subdf3");
1605 setLibcallName(RTLIB::MUL_F64
, "__hexagon_fast_muldf3");
1606 setLibcallName(RTLIB::DIV_F64
, "__hexagon_fast_divdf3");
1607 setLibcallName(RTLIB::DIV_F32
, "__hexagon_fast_divsf3");
1609 setLibcallName(RTLIB::ADD_F64
, "__hexagon_adddf3");
1610 setLibcallName(RTLIB::SUB_F64
, "__hexagon_subdf3");
1611 setLibcallName(RTLIB::MUL_F64
, "__hexagon_muldf3");
1612 setLibcallName(RTLIB::DIV_F64
, "__hexagon_divdf3");
1613 setLibcallName(RTLIB::DIV_F32
, "__hexagon_divsf3");
1617 setLibcallName(RTLIB::SQRT_F32
, "__hexagon_fast2_sqrtf");
1619 setLibcallName(RTLIB::SQRT_F32
, "__hexagon_sqrtf");
1621 // These cause problems when the shift amount is non-constant.
1622 setLibcallName(RTLIB::SHL_I128
, nullptr);
1623 setLibcallName(RTLIB::SRL_I128
, nullptr);
1624 setLibcallName(RTLIB::SRA_I128
, nullptr);
1627 const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode
) const {
1628 switch ((HexagonISD::NodeType
)Opcode
) {
1629 case HexagonISD::ADDC
: return "HexagonISD::ADDC";
1630 case HexagonISD::SUBC
: return "HexagonISD::SUBC";
1631 case HexagonISD::ALLOCA
: return "HexagonISD::ALLOCA";
1632 case HexagonISD::AT_GOT
: return "HexagonISD::AT_GOT";
1633 case HexagonISD::AT_PCREL
: return "HexagonISD::AT_PCREL";
1634 case HexagonISD::BARRIER
: return "HexagonISD::BARRIER";
1635 case HexagonISD::CALL
: return "HexagonISD::CALL";
1636 case HexagonISD::CALLnr
: return "HexagonISD::CALLnr";
1637 case HexagonISD::CALLR
: return "HexagonISD::CALLR";
1638 case HexagonISD::COMBINE
: return "HexagonISD::COMBINE";
1639 case HexagonISD::CONST32_GP
: return "HexagonISD::CONST32_GP";
1640 case HexagonISD::CONST32
: return "HexagonISD::CONST32";
1641 case HexagonISD::CP
: return "HexagonISD::CP";
1642 case HexagonISD::DCFETCH
: return "HexagonISD::DCFETCH";
1643 case HexagonISD::EH_RETURN
: return "HexagonISD::EH_RETURN";
1644 case HexagonISD::TSTBIT
: return "HexagonISD::TSTBIT";
1645 case HexagonISD::EXTRACTU
: return "HexagonISD::EXTRACTU";
1646 case HexagonISD::INSERT
: return "HexagonISD::INSERT";
1647 case HexagonISD::JT
: return "HexagonISD::JT";
1648 case HexagonISD::RET_FLAG
: return "HexagonISD::RET_FLAG";
1649 case HexagonISD::TC_RETURN
: return "HexagonISD::TC_RETURN";
1650 case HexagonISD::VASL
: return "HexagonISD::VASL";
1651 case HexagonISD::VASR
: return "HexagonISD::VASR";
1652 case HexagonISD::VLSR
: return "HexagonISD::VLSR";
1653 case HexagonISD::VSPLAT
: return "HexagonISD::VSPLAT";
1654 case HexagonISD::VEXTRACTW
: return "HexagonISD::VEXTRACTW";
1655 case HexagonISD::VINSERTW0
: return "HexagonISD::VINSERTW0";
1656 case HexagonISD::VROR
: return "HexagonISD::VROR";
1657 case HexagonISD::READCYCLE
: return "HexagonISD::READCYCLE";
1658 case HexagonISD::PTRUE
: return "HexagonISD::PTRUE";
1659 case HexagonISD::PFALSE
: return "HexagonISD::PFALSE";
1660 case HexagonISD::VZERO
: return "HexagonISD::VZERO";
1661 case HexagonISD::VSPLATW
: return "HexagonISD::VSPLATW";
1662 case HexagonISD::D2P
: return "HexagonISD::D2P";
1663 case HexagonISD::P2D
: return "HexagonISD::P2D";
1664 case HexagonISD::V2Q
: return "HexagonISD::V2Q";
1665 case HexagonISD::Q2V
: return "HexagonISD::Q2V";
1666 case HexagonISD::QCAT
: return "HexagonISD::QCAT";
1667 case HexagonISD::QTRUE
: return "HexagonISD::QTRUE";
1668 case HexagonISD::QFALSE
: return "HexagonISD::QFALSE";
1669 case HexagonISD::TYPECAST
: return "HexagonISD::TYPECAST";
1670 case HexagonISD::VALIGN
: return "HexagonISD::VALIGN";
1671 case HexagonISD::VALIGNADDR
: return "HexagonISD::VALIGNADDR";
1672 case HexagonISD::OP_END
: break;
1678 HexagonTargetLowering::validateConstPtrAlignment(SDValue Ptr
, const SDLoc
&dl
,
1679 unsigned NeedAlign
) const {
1680 auto *CA
= dyn_cast
<ConstantSDNode
>(Ptr
);
1683 unsigned Addr
= CA
->getZExtValue();
1684 unsigned HaveAlign
= Addr
!= 0 ? 1u << countTrailingZeros(Addr
) : NeedAlign
;
1685 if (HaveAlign
< NeedAlign
) {
1687 raw_string_ostream
O(ErrMsg
);
1688 O
<< "Misaligned constant address: " << format_hex(Addr
, 10)
1689 << " has alignment " << HaveAlign
1690 << ", but the memory access requires " << NeedAlign
;
1691 if (DebugLoc DL
= dl
.getDebugLoc())
1692 DL
.print(O
<< ", at ");
1693 report_fatal_error(O
.str());
1697 // Bit-reverse Load Intrinsic: Check if the instruction is a bit reverse load
1699 static bool isBrevLdIntrinsic(const Value
*Inst
) {
1700 unsigned ID
= cast
<IntrinsicInst
>(Inst
)->getIntrinsicID();
1701 return (ID
== Intrinsic::hexagon_L2_loadrd_pbr
||
1702 ID
== Intrinsic::hexagon_L2_loadri_pbr
||
1703 ID
== Intrinsic::hexagon_L2_loadrh_pbr
||
1704 ID
== Intrinsic::hexagon_L2_loadruh_pbr
||
1705 ID
== Intrinsic::hexagon_L2_loadrb_pbr
||
1706 ID
== Intrinsic::hexagon_L2_loadrub_pbr
);
1709 // Bit-reverse Load Intrinsic :Crawl up and figure out the object from previous
1710 // instruction. So far we only handle bitcast, extract value and bit reverse
1711 // load intrinsic instructions. Should we handle CGEP ?
1712 static Value
*getBrevLdObject(Value
*V
) {
1713 if (Operator::getOpcode(V
) == Instruction::ExtractValue
||
1714 Operator::getOpcode(V
) == Instruction::BitCast
)
1715 V
= cast
<Operator
>(V
)->getOperand(0);
1716 else if (isa
<IntrinsicInst
>(V
) && isBrevLdIntrinsic(V
))
1717 V
= cast
<Instruction
>(V
)->getOperand(0);
1721 // Bit-reverse Load Intrinsic: For a PHI Node return either an incoming edge or
1722 // a back edge. If the back edge comes from the intrinsic itself, the incoming
1723 // edge is returned.
1724 static Value
*returnEdge(const PHINode
*PN
, Value
*IntrBaseVal
) {
1725 const BasicBlock
*Parent
= PN
->getParent();
1727 for (unsigned i
= 0, e
= PN
->getNumIncomingValues(); i
< e
; ++i
) {
1728 BasicBlock
*Blk
= PN
->getIncomingBlock(i
);
1729 // Determine if the back edge is originated from intrinsic.
1730 if (Blk
== Parent
) {
1731 Value
*BackEdgeVal
= PN
->getIncomingValue(i
);
1733 // Loop over till we return the same Value or we hit the IntrBaseVal.
1735 BaseVal
= BackEdgeVal
;
1736 BackEdgeVal
= getBrevLdObject(BackEdgeVal
);
1737 } while ((BaseVal
!= BackEdgeVal
) && (IntrBaseVal
!= BackEdgeVal
));
1738 // If the getBrevLdObject returns IntrBaseVal, we should return the
1740 if (IntrBaseVal
== BackEdgeVal
)
1744 } else // Set the node to incoming edge.
1747 assert(Idx
>= 0 && "Unexpected index to incoming argument in PHI");
1748 return PN
->getIncomingValue(Idx
);
1751 // Bit-reverse Load Intrinsic: Figure out the underlying object the base
1752 // pointer points to, for the bit-reverse load intrinsic. Setting this to
1753 // memoperand might help alias analysis to figure out the dependencies.
1754 static Value
*getUnderLyingObjectForBrevLdIntr(Value
*V
) {
1755 Value
*IntrBaseVal
= V
;
1757 // Loop over till we return the same Value, implies we either figure out
1758 // the object or we hit a PHI
1761 V
= getBrevLdObject(V
);
1762 } while (BaseVal
!= V
);
1764 // Identify the object from PHINode.
1765 if (const PHINode
*PN
= dyn_cast
<PHINode
>(V
))
1766 return returnEdge(PN
, IntrBaseVal
);
1767 // For non PHI nodes, the object is the last value returned by getBrevLdObject
1772 /// Given an intrinsic, checks if on the target the intrinsic will need to map
1773 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1774 /// true and store the intrinsic information into the IntrinsicInfo that was
1775 /// passed to the function.
1776 bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo
&Info
,
1778 MachineFunction
&MF
,
1779 unsigned Intrinsic
) const {
1780 switch (Intrinsic
) {
1781 case Intrinsic::hexagon_L2_loadrd_pbr
:
1782 case Intrinsic::hexagon_L2_loadri_pbr
:
1783 case Intrinsic::hexagon_L2_loadrh_pbr
:
1784 case Intrinsic::hexagon_L2_loadruh_pbr
:
1785 case Intrinsic::hexagon_L2_loadrb_pbr
:
1786 case Intrinsic::hexagon_L2_loadrub_pbr
: {
1787 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
1788 auto &DL
= I
.getCalledFunction()->getParent()->getDataLayout();
1789 auto &Cont
= I
.getCalledFunction()->getParent()->getContext();
1790 // The intrinsic function call is of the form { ElTy, i8* }
1791 // @llvm.hexagon.L2.loadXX.pbr(i8*, i32). The pointer and memory access type
1792 // should be derived from ElTy.
1793 Type
*ElTy
= I
.getCalledFunction()->getReturnType()->getStructElementType(0);
1794 Info
.memVT
= MVT::getVT(ElTy
);
1795 llvm::Value
*BasePtrVal
= I
.getOperand(0);
1796 Info
.ptrVal
= getUnderLyingObjectForBrevLdIntr(BasePtrVal
);
1797 // The offset value comes through Modifier register. For now, assume the
1801 MaybeAlign(DL
.getABITypeAlignment(Info
.memVT
.getTypeForEVT(Cont
)));
1802 Info
.flags
= MachineMemOperand::MOLoad
;
1805 case Intrinsic::hexagon_V6_vgathermw
:
1806 case Intrinsic::hexagon_V6_vgathermw_128B
:
1807 case Intrinsic::hexagon_V6_vgathermh
:
1808 case Intrinsic::hexagon_V6_vgathermh_128B
:
1809 case Intrinsic::hexagon_V6_vgathermhw
:
1810 case Intrinsic::hexagon_V6_vgathermhw_128B
:
1811 case Intrinsic::hexagon_V6_vgathermwq
:
1812 case Intrinsic::hexagon_V6_vgathermwq_128B
:
1813 case Intrinsic::hexagon_V6_vgathermhq
:
1814 case Intrinsic::hexagon_V6_vgathermhq_128B
:
1815 case Intrinsic::hexagon_V6_vgathermhwq
:
1816 case Intrinsic::hexagon_V6_vgathermhwq_128B
: {
1817 const Module
&M
= *I
.getParent()->getParent()->getParent();
1818 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
1819 Type
*VecTy
= I
.getArgOperand(1)->getType();
1820 Info
.memVT
= MVT::getVT(VecTy
);
1821 Info
.ptrVal
= I
.getArgOperand(0);
1824 MaybeAlign(M
.getDataLayout().getTypeAllocSizeInBits(VecTy
) / 8);
1825 Info
.flags
= MachineMemOperand::MOLoad
|
1826 MachineMemOperand::MOStore
|
1827 MachineMemOperand::MOVolatile
;
1836 bool HexagonTargetLowering::hasBitTest(SDValue X
, SDValue Y
) const {
1837 return X
.getValueType().isScalarInteger(); // 'tstbit'
1840 bool HexagonTargetLowering::isTruncateFree(Type
*Ty1
, Type
*Ty2
) const {
1841 return isTruncateFree(EVT::getEVT(Ty1
), EVT::getEVT(Ty2
));
1844 bool HexagonTargetLowering::isTruncateFree(EVT VT1
, EVT VT2
) const {
1845 if (!VT1
.isSimple() || !VT2
.isSimple())
1847 return VT1
.getSimpleVT() == MVT::i64
&& VT2
.getSimpleVT() == MVT::i32
;
1850 bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT
) const {
1851 return isOperationLegalOrCustom(ISD::FMA
, VT
);
1854 // Should we expand the build vector with shuffles?
1855 bool HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT
,
1856 unsigned DefinedValues
) const {
1860 bool HexagonTargetLowering::isShuffleMaskLegal(ArrayRef
<int> Mask
,
1865 TargetLoweringBase::LegalizeTypeAction
1866 HexagonTargetLowering::getPreferredVectorAction(MVT VT
) const {
1867 unsigned VecLen
= VT
.getVectorNumElements();
1868 MVT ElemTy
= VT
.getVectorElementType();
1870 if (VecLen
== 1 || VT
.isScalableVector())
1871 return TargetLoweringBase::TypeScalarizeVector
;
1873 if (Subtarget
.useHVXOps()) {
1874 unsigned HwLen
= Subtarget
.getVectorLength();
1875 // If the size of VT is at least half of the vector length,
1876 // widen the vector. Note: the threshold was not selected in
1877 // any scientific way.
1878 ArrayRef
<MVT
> Tys
= Subtarget
.getHVXElementTypes();
1879 if (llvm::find(Tys
, ElemTy
) != Tys
.end()) {
1880 unsigned HwWidth
= 8*HwLen
;
1881 unsigned VecWidth
= VT
.getSizeInBits();
1882 if (VecWidth
>= HwWidth
/2 && VecWidth
< HwWidth
)
1883 return TargetLoweringBase::TypeWidenVector
;
1885 // Split vectors of i1 that correspond to (byte) vector pairs.
1886 if (ElemTy
== MVT::i1
&& VecLen
== 2*HwLen
)
1887 return TargetLoweringBase::TypeSplitVector
;
1890 // Always widen (remaining) vectors of i1.
1891 if (ElemTy
== MVT::i1
)
1892 return TargetLoweringBase::TypeWidenVector
;
1894 return TargetLoweringBase::TypeSplitVector
;
1897 std::pair
<SDValue
, int>
1898 HexagonTargetLowering::getBaseAndOffset(SDValue Addr
) const {
1899 if (Addr
.getOpcode() == ISD::ADD
) {
1900 SDValue Op1
= Addr
.getOperand(1);
1901 if (auto *CN
= dyn_cast
<const ConstantSDNode
>(Op1
.getNode()))
1902 return { Addr
.getOperand(0), CN
->getSExtValue() };
1907 // Lower a vector shuffle (V1, V2, V3). V1 and V2 are the two vectors
1908 // to select data from, V3 is the permutation.
1910 HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op
, SelectionDAG
&DAG
)
1912 const auto *SVN
= cast
<ShuffleVectorSDNode
>(Op
);
1913 ArrayRef
<int> AM
= SVN
->getMask();
1914 assert(AM
.size() <= 8 && "Unexpected shuffle mask");
1915 unsigned VecLen
= AM
.size();
1918 assert(!Subtarget
.isHVXVectorType(VecTy
, true) &&
1919 "HVX shuffles should be legal");
1920 assert(VecTy
.getSizeInBits() <= 64 && "Unexpected vector length");
1922 SDValue Op0
= Op
.getOperand(0);
1923 SDValue Op1
= Op
.getOperand(1);
1924 const SDLoc
&dl(Op
);
1926 // If the inputs are not the same as the output, bail. This is not an
1927 // error situation, but complicates the handling and the default expansion
1928 // (into BUILD_VECTOR) should be adequate.
1929 if (ty(Op0
) != VecTy
|| ty(Op1
) != VecTy
)
1932 // Normalize the mask so that the first non-negative index comes from
1933 // the first operand.
1934 SmallVector
<int,8> Mask(AM
.begin(), AM
.end());
1935 unsigned F
= llvm::find_if(AM
, [](int M
) { return M
>= 0; }) - AM
.data();
1937 return DAG
.getUNDEF(VecTy
);
1938 if (AM
[F
] >= int(VecLen
)) {
1939 ShuffleVectorSDNode::commuteMask(Mask
);
1940 std::swap(Op0
, Op1
);
1943 // Express the shuffle mask in terms of bytes.
1944 SmallVector
<int,8> ByteMask
;
1945 unsigned ElemBytes
= VecTy
.getVectorElementType().getSizeInBits() / 8;
1946 for (unsigned i
= 0, e
= Mask
.size(); i
!= e
; ++i
) {
1949 for (unsigned j
= 0; j
!= ElemBytes
; ++j
)
1950 ByteMask
.push_back(-1);
1952 for (unsigned j
= 0; j
!= ElemBytes
; ++j
)
1953 ByteMask
.push_back(M
*ElemBytes
+ j
);
1956 assert(ByteMask
.size() <= 8);
1958 // All non-undef (non-negative) indexes are well within [0..127], so they
1959 // fit in a single byte. Build two 64-bit words:
1960 // - MaskIdx where each byte is the corresponding index (for non-negative
1961 // indexes), and 0xFF for negative indexes, and
1962 // - MaskUnd that has 0xFF for each negative index.
1963 uint64_t MaskIdx
= 0;
1964 uint64_t MaskUnd
= 0;
1965 for (unsigned i
= 0, e
= ByteMask
.size(); i
!= e
; ++i
) {
1967 uint64_t M
= ByteMask
[i
] & 0xFF;
1973 if (ByteMask
.size() == 4) {
1975 if (MaskIdx
== (0x03020100 | MaskUnd
))
1978 if (MaskIdx
== (0x00010203 | MaskUnd
)) {
1979 SDValue T0
= DAG
.getBitcast(MVT::i32
, Op0
);
1980 SDValue T1
= DAG
.getNode(ISD::BSWAP
, dl
, MVT::i32
, T0
);
1981 return DAG
.getBitcast(VecTy
, T1
);
1985 SDValue Concat10
= DAG
.getNode(HexagonISD::COMBINE
, dl
,
1986 typeJoin({ty(Op1
), ty(Op0
)}), {Op1
, Op0
});
1987 if (MaskIdx
== (0x06040200 | MaskUnd
))
1988 return getInstr(Hexagon::S2_vtrunehb
, dl
, VecTy
, {Concat10
}, DAG
);
1989 if (MaskIdx
== (0x07050301 | MaskUnd
))
1990 return getInstr(Hexagon::S2_vtrunohb
, dl
, VecTy
, {Concat10
}, DAG
);
1992 SDValue Concat01
= DAG
.getNode(HexagonISD::COMBINE
, dl
,
1993 typeJoin({ty(Op0
), ty(Op1
)}), {Op0
, Op1
});
1994 if (MaskIdx
== (0x02000604 | MaskUnd
))
1995 return getInstr(Hexagon::S2_vtrunehb
, dl
, VecTy
, {Concat01
}, DAG
);
1996 if (MaskIdx
== (0x03010705 | MaskUnd
))
1997 return getInstr(Hexagon::S2_vtrunohb
, dl
, VecTy
, {Concat01
}, DAG
);
2000 if (ByteMask
.size() == 8) {
2002 if (MaskIdx
== (0x0706050403020100ull
| MaskUnd
))
2005 if (MaskIdx
== (0x0001020304050607ull
| MaskUnd
)) {
2006 SDValue T0
= DAG
.getBitcast(MVT::i64
, Op0
);
2007 SDValue T1
= DAG
.getNode(ISD::BSWAP
, dl
, MVT::i64
, T0
);
2008 return DAG
.getBitcast(VecTy
, T1
);
2012 if (MaskIdx
== (0x0d0c050409080100ull
| MaskUnd
))
2013 return getInstr(Hexagon::S2_shuffeh
, dl
, VecTy
, {Op1
, Op0
}, DAG
);
2014 if (MaskIdx
== (0x0f0e07060b0a0302ull
| MaskUnd
))
2015 return getInstr(Hexagon::S2_shuffoh
, dl
, VecTy
, {Op1
, Op0
}, DAG
);
2016 if (MaskIdx
== (0x0d0c090805040100ull
| MaskUnd
))
2017 return getInstr(Hexagon::S2_vtrunewh
, dl
, VecTy
, {Op1
, Op0
}, DAG
);
2018 if (MaskIdx
== (0x0f0e0b0a07060302ull
| MaskUnd
))
2019 return getInstr(Hexagon::S2_vtrunowh
, dl
, VecTy
, {Op1
, Op0
}, DAG
);
2020 if (MaskIdx
== (0x0706030205040100ull
| MaskUnd
)) {
2021 VectorPair P
= opSplit(Op0
, dl
, DAG
);
2022 return getInstr(Hexagon::S2_packhl
, dl
, VecTy
, {P
.second
, P
.first
}, DAG
);
2026 if (MaskIdx
== (0x0e060c040a020800ull
| MaskUnd
))
2027 return getInstr(Hexagon::S2_shuffeb
, dl
, VecTy
, {Op1
, Op0
}, DAG
);
2028 if (MaskIdx
== (0x0f070d050b030901ull
| MaskUnd
))
2029 return getInstr(Hexagon::S2_shuffob
, dl
, VecTy
, {Op1
, Op0
}, DAG
);
2035 // Create a Hexagon-specific node for shifting a vector by an integer.
2037 HexagonTargetLowering::getVectorShiftByInt(SDValue Op
, SelectionDAG
&DAG
)
2039 if (auto *BVN
= dyn_cast
<BuildVectorSDNode
>(Op
.getOperand(1).getNode())) {
2040 if (SDValue S
= BVN
->getSplatValue()) {
2042 switch (Op
.getOpcode()) {
2044 NewOpc
= HexagonISD::VASL
;
2047 NewOpc
= HexagonISD::VASR
;
2050 NewOpc
= HexagonISD::VLSR
;
2053 llvm_unreachable("Unexpected shift opcode");
2055 return DAG
.getNode(NewOpc
, SDLoc(Op
), ty(Op
), Op
.getOperand(0), S
);
2063 HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op
, SelectionDAG
&DAG
) const {
2064 return getVectorShiftByInt(Op
, DAG
);
2068 HexagonTargetLowering::LowerROTL(SDValue Op
, SelectionDAG
&DAG
) const {
2069 if (isa
<ConstantSDNode
>(Op
.getOperand(1).getNode()))
2075 HexagonTargetLowering::LowerBITCAST(SDValue Op
, SelectionDAG
&DAG
) const {
2077 SDValue InpV
= Op
.getOperand(0);
2078 MVT InpTy
= ty(InpV
);
2079 assert(ResTy
.getSizeInBits() == InpTy
.getSizeInBits());
2080 const SDLoc
&dl(Op
);
2082 // Handle conversion from i8 to v8i1.
2083 if (ResTy
== MVT::v8i1
) {
2084 SDValue Sc
= DAG
.getBitcast(tyScalar(InpTy
), InpV
);
2085 SDValue Ext
= DAG
.getZExtOrTrunc(Sc
, dl
, MVT::i32
);
2086 return getInstr(Hexagon::C2_tfrrp
, dl
, ResTy
, Ext
, DAG
);
2093 HexagonTargetLowering::getBuildVectorConstInts(ArrayRef
<SDValue
> Values
,
2094 MVT VecTy
, SelectionDAG
&DAG
,
2095 MutableArrayRef
<ConstantInt
*> Consts
) const {
2096 MVT ElemTy
= VecTy
.getVectorElementType();
2097 unsigned ElemWidth
= ElemTy
.getSizeInBits();
2098 IntegerType
*IntTy
= IntegerType::get(*DAG
.getContext(), ElemWidth
);
2099 bool AllConst
= true;
2101 for (unsigned i
= 0, e
= Values
.size(); i
!= e
; ++i
) {
2102 SDValue V
= Values
[i
];
2104 Consts
[i
] = ConstantInt::get(IntTy
, 0);
2107 // Make sure to always cast to IntTy.
2108 if (auto *CN
= dyn_cast
<ConstantSDNode
>(V
.getNode())) {
2109 const ConstantInt
*CI
= CN
->getConstantIntValue();
2110 Consts
[i
] = ConstantInt::get(IntTy
, CI
->getValue().getSExtValue());
2111 } else if (auto *CN
= dyn_cast
<ConstantFPSDNode
>(V
.getNode())) {
2112 const ConstantFP
*CF
= CN
->getConstantFPValue();
2113 APInt A
= CF
->getValueAPF().bitcastToAPInt();
2114 Consts
[i
] = ConstantInt::get(IntTy
, A
.getZExtValue());
2123 HexagonTargetLowering::buildVector32(ArrayRef
<SDValue
> Elem
, const SDLoc
&dl
,
2124 MVT VecTy
, SelectionDAG
&DAG
) const {
2125 MVT ElemTy
= VecTy
.getVectorElementType();
2126 assert(VecTy
.getVectorNumElements() == Elem
.size());
2128 SmallVector
<ConstantInt
*,4> Consts(Elem
.size());
2129 bool AllConst
= getBuildVectorConstInts(Elem
, VecTy
, DAG
, Consts
);
2131 unsigned First
, Num
= Elem
.size();
2132 for (First
= 0; First
!= Num
; ++First
)
2133 if (!isUndef(Elem
[First
]))
2136 return DAG
.getUNDEF(VecTy
);
2139 llvm::all_of(Consts
, [](ConstantInt
*CI
) { return CI
->isZero(); }))
2140 return getZero(dl
, VecTy
, DAG
);
2142 if (ElemTy
== MVT::i16
) {
2143 assert(Elem
.size() == 2);
2145 uint32_t V
= (Consts
[0]->getZExtValue() & 0xFFFF) |
2146 Consts
[1]->getZExtValue() << 16;
2147 return DAG
.getBitcast(MVT::v2i16
, DAG
.getConstant(V
, dl
, MVT::i32
));
2149 SDValue N
= getInstr(Hexagon::A2_combine_ll
, dl
, MVT::i32
,
2150 {Elem
[1], Elem
[0]}, DAG
);
2151 return DAG
.getBitcast(MVT::v2i16
, N
);
2154 if (ElemTy
== MVT::i8
) {
2155 // First try generating a constant.
2157 int32_t V
= (Consts
[0]->getZExtValue() & 0xFF) |
2158 (Consts
[1]->getZExtValue() & 0xFF) << 8 |
2159 (Consts
[1]->getZExtValue() & 0xFF) << 16 |
2160 Consts
[2]->getZExtValue() << 24;
2161 return DAG
.getBitcast(MVT::v4i8
, DAG
.getConstant(V
, dl
, MVT::i32
));
2165 bool IsSplat
= true;
2166 for (unsigned i
= 0; i
!= Num
; ++i
) {
2169 if (Elem
[i
] == Elem
[First
] || isUndef(Elem
[i
]))
2175 // Legalize the operand to VSPLAT.
2176 SDValue Ext
= DAG
.getZExtOrTrunc(Elem
[First
], dl
, MVT::i32
);
2177 return DAG
.getNode(HexagonISD::VSPLAT
, dl
, VecTy
, Ext
);
2181 // (zxtb(Elem[0]) | (zxtb(Elem[1]) << 8)) |
2182 // (zxtb(Elem[2]) | (zxtb(Elem[3]) << 8)) << 16
2183 assert(Elem
.size() == 4);
2185 for (unsigned i
= 0; i
!= 4; ++i
) {
2186 Vs
[i
] = DAG
.getZExtOrTrunc(Elem
[i
], dl
, MVT::i32
);
2187 Vs
[i
] = DAG
.getZeroExtendInReg(Vs
[i
], dl
, MVT::i8
);
2189 SDValue S8
= DAG
.getConstant(8, dl
, MVT::i32
);
2190 SDValue T0
= DAG
.getNode(ISD::SHL
, dl
, MVT::i32
, {Vs
[1], S8
});
2191 SDValue T1
= DAG
.getNode(ISD::SHL
, dl
, MVT::i32
, {Vs
[3], S8
});
2192 SDValue B0
= DAG
.getNode(ISD::OR
, dl
, MVT::i32
, {Vs
[0], T0
});
2193 SDValue B1
= DAG
.getNode(ISD::OR
, dl
, MVT::i32
, {Vs
[2], T1
});
2195 SDValue R
= getInstr(Hexagon::A2_combine_ll
, dl
, MVT::i32
, {B1
, B0
}, DAG
);
2196 return DAG
.getBitcast(MVT::v4i8
, R
);
2200 dbgs() << "VecTy: " << EVT(VecTy
).getEVTString() << '\n';
2202 llvm_unreachable("Unexpected vector element type");
2206 HexagonTargetLowering::buildVector64(ArrayRef
<SDValue
> Elem
, const SDLoc
&dl
,
2207 MVT VecTy
, SelectionDAG
&DAG
) const {
2208 MVT ElemTy
= VecTy
.getVectorElementType();
2209 assert(VecTy
.getVectorNumElements() == Elem
.size());
2211 SmallVector
<ConstantInt
*,8> Consts(Elem
.size());
2212 bool AllConst
= getBuildVectorConstInts(Elem
, VecTy
, DAG
, Consts
);
2214 unsigned First
, Num
= Elem
.size();
2215 for (First
= 0; First
!= Num
; ++First
)
2216 if (!isUndef(Elem
[First
]))
2219 return DAG
.getUNDEF(VecTy
);
2222 llvm::all_of(Consts
, [](ConstantInt
*CI
) { return CI
->isZero(); }))
2223 return getZero(dl
, VecTy
, DAG
);
2225 // First try splat if possible.
2226 if (ElemTy
== MVT::i16
) {
2227 bool IsSplat
= true;
2228 for (unsigned i
= 0; i
!= Num
; ++i
) {
2231 if (Elem
[i
] == Elem
[First
] || isUndef(Elem
[i
]))
2237 // Legalize the operand to VSPLAT.
2238 SDValue Ext
= DAG
.getZExtOrTrunc(Elem
[First
], dl
, MVT::i32
);
2239 return DAG
.getNode(HexagonISD::VSPLAT
, dl
, VecTy
, Ext
);
2243 // Then try constant.
2246 unsigned W
= ElemTy
.getSizeInBits();
2247 uint64_t Mask
= (ElemTy
== MVT::i8
) ? 0xFFull
2248 : (ElemTy
== MVT::i16
) ? 0xFFFFull
: 0xFFFFFFFFull
;
2249 for (unsigned i
= 0; i
!= Num
; ++i
)
2250 Val
= (Val
<< W
) | (Consts
[Num
-1-i
]->getZExtValue() & Mask
);
2251 SDValue V0
= DAG
.getConstant(Val
, dl
, MVT::i64
);
2252 return DAG
.getBitcast(VecTy
, V0
);
2255 // Build two 32-bit vectors and concatenate.
2256 MVT HalfTy
= MVT::getVectorVT(ElemTy
, Num
/2);
2257 SDValue L
= (ElemTy
== MVT::i32
)
2259 : buildVector32(Elem
.take_front(Num
/2), dl
, HalfTy
, DAG
);
2260 SDValue H
= (ElemTy
== MVT::i32
)
2262 : buildVector32(Elem
.drop_front(Num
/2), dl
, HalfTy
, DAG
);
2263 return DAG
.getNode(HexagonISD::COMBINE
, dl
, VecTy
, {H
, L
});
2267 HexagonTargetLowering::extractVector(SDValue VecV
, SDValue IdxV
,
2268 const SDLoc
&dl
, MVT ValTy
, MVT ResTy
,
2269 SelectionDAG
&DAG
) const {
2270 MVT VecTy
= ty(VecV
);
2271 assert(!ValTy
.isVector() ||
2272 VecTy
.getVectorElementType() == ValTy
.getVectorElementType());
2273 unsigned VecWidth
= VecTy
.getSizeInBits();
2274 unsigned ValWidth
= ValTy
.getSizeInBits();
2275 unsigned ElemWidth
= VecTy
.getVectorElementType().getSizeInBits();
2276 assert((VecWidth
% ElemWidth
) == 0);
2277 auto *IdxN
= dyn_cast
<ConstantSDNode
>(IdxV
);
2279 // Special case for v{8,4,2}i1 (the only boolean vectors legal in Hexagon
2280 // without any coprocessors).
2281 if (ElemWidth
== 1) {
2282 assert(VecWidth
== VecTy
.getVectorNumElements() && "Sanity failure");
2283 assert(VecWidth
== 8 || VecWidth
== 4 || VecWidth
== 2);
2284 // Check if this is an extract of the lowest bit.
2286 // Extracting the lowest bit is a no-op, but it changes the type,
2287 // so it must be kept as an operation to avoid errors related to
2289 if (IdxN
->isNullValue() && ValTy
.getSizeInBits() == 1)
2290 return DAG
.getNode(HexagonISD::TYPECAST
, dl
, MVT::i1
, VecV
);
2293 // If the value extracted is a single bit, use tstbit.
2294 if (ValWidth
== 1) {
2295 SDValue A0
= getInstr(Hexagon::C2_tfrpr
, dl
, MVT::i32
, {VecV
}, DAG
);
2296 SDValue M0
= DAG
.getConstant(8 / VecWidth
, dl
, MVT::i32
);
2297 SDValue I0
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
, M0
);
2298 return DAG
.getNode(HexagonISD::TSTBIT
, dl
, MVT::i1
, A0
, I0
);
2301 // Each bool vector (v2i1, v4i1, v8i1) always occupies 8 bits in
2302 // a predicate register. The elements of the vector are repeated
2303 // in the register (if necessary) so that the total number is 8.
2304 // The extracted subvector will need to be expanded in such a way.
2305 unsigned Scale
= VecWidth
/ ValWidth
;
2307 // Generate (p2d VecV) >> 8*Idx to move the interesting bytes to
2309 assert(ty(IdxV
) == MVT::i32
);
2310 unsigned VecRep
= 8 / VecWidth
;
2311 SDValue S0
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
,
2312 DAG
.getConstant(8*VecRep
, dl
, MVT::i32
));
2313 SDValue T0
= DAG
.getNode(HexagonISD::P2D
, dl
, MVT::i64
, VecV
);
2314 SDValue T1
= DAG
.getNode(ISD::SRL
, dl
, MVT::i64
, T0
, S0
);
2316 // The longest possible subvector is at most 32 bits, so it is always
2317 // contained in the low subregister.
2318 T1
= DAG
.getTargetExtractSubreg(Hexagon::isub_lo
, dl
, MVT::i32
, T1
);
2319 T1
= expandPredicate(T1
, dl
, DAG
);
2323 return DAG
.getNode(HexagonISD::D2P
, dl
, ResTy
, T1
);
2326 assert(VecWidth
== 32 || VecWidth
== 64);
2328 // Cast everything to scalar integer types.
2329 MVT ScalarTy
= tyScalar(VecTy
);
2330 VecV
= DAG
.getBitcast(ScalarTy
, VecV
);
2332 SDValue WidthV
= DAG
.getConstant(ValWidth
, dl
, MVT::i32
);
2336 unsigned Off
= IdxN
->getZExtValue() * ElemWidth
;
2337 if (VecWidth
== 64 && ValWidth
== 32) {
2338 assert(Off
== 0 || Off
== 32);
2339 unsigned SubIdx
= Off
== 0 ? Hexagon::isub_lo
: Hexagon::isub_hi
;
2340 ExtV
= DAG
.getTargetExtractSubreg(SubIdx
, dl
, MVT::i32
, VecV
);
2341 } else if (Off
== 0 && (ValWidth
% 8) == 0) {
2342 ExtV
= DAG
.getZeroExtendInReg(VecV
, dl
, tyScalar(ValTy
));
2344 SDValue OffV
= DAG
.getConstant(Off
, dl
, MVT::i32
);
2345 // The return type of EXTRACTU must be the same as the type of the
2347 ExtV
= DAG
.getNode(HexagonISD::EXTRACTU
, dl
, ScalarTy
,
2348 {VecV
, WidthV
, OffV
});
2351 if (ty(IdxV
) != MVT::i32
)
2352 IdxV
= DAG
.getZExtOrTrunc(IdxV
, dl
, MVT::i32
);
2353 SDValue OffV
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
,
2354 DAG
.getConstant(ElemWidth
, dl
, MVT::i32
));
2355 ExtV
= DAG
.getNode(HexagonISD::EXTRACTU
, dl
, ScalarTy
,
2356 {VecV
, WidthV
, OffV
});
2359 // Cast ExtV to the requested result type.
2360 ExtV
= DAG
.getZExtOrTrunc(ExtV
, dl
, tyScalar(ResTy
));
2361 ExtV
= DAG
.getBitcast(ResTy
, ExtV
);
2366 HexagonTargetLowering::insertVector(SDValue VecV
, SDValue ValV
, SDValue IdxV
,
2367 const SDLoc
&dl
, MVT ValTy
,
2368 SelectionDAG
&DAG
) const {
2369 MVT VecTy
= ty(VecV
);
2370 if (VecTy
.getVectorElementType() == MVT::i1
) {
2371 MVT ValTy
= ty(ValV
);
2372 assert(ValTy
.getVectorElementType() == MVT::i1
);
2373 SDValue ValR
= DAG
.getNode(HexagonISD::P2D
, dl
, MVT::i64
, ValV
);
2374 unsigned VecLen
= VecTy
.getVectorNumElements();
2375 unsigned Scale
= VecLen
/ ValTy
.getVectorNumElements();
2378 for (unsigned R
= Scale
; R
> 1; R
/= 2) {
2379 ValR
= contractPredicate(ValR
, dl
, DAG
);
2380 ValR
= DAG
.getNode(HexagonISD::COMBINE
, dl
, MVT::i64
,
2381 DAG
.getUNDEF(MVT::i32
), ValR
);
2383 // The longest possible subvector is at most 32 bits, so it is always
2384 // contained in the low subregister.
2385 ValR
= DAG
.getTargetExtractSubreg(Hexagon::isub_lo
, dl
, MVT::i32
, ValR
);
2387 unsigned ValBytes
= 64 / Scale
;
2388 SDValue Width
= DAG
.getConstant(ValBytes
*8, dl
, MVT::i32
);
2389 SDValue Idx
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
,
2390 DAG
.getConstant(8, dl
, MVT::i32
));
2391 SDValue VecR
= DAG
.getNode(HexagonISD::P2D
, dl
, MVT::i64
, VecV
);
2392 SDValue Ins
= DAG
.getNode(HexagonISD::INSERT
, dl
, MVT::i32
,
2393 {VecR
, ValR
, Width
, Idx
});
2394 return DAG
.getNode(HexagonISD::D2P
, dl
, VecTy
, Ins
);
2397 unsigned VecWidth
= VecTy
.getSizeInBits();
2398 unsigned ValWidth
= ValTy
.getSizeInBits();
2399 assert(VecWidth
== 32 || VecWidth
== 64);
2400 assert((VecWidth
% ValWidth
) == 0);
2402 // Cast everything to scalar integer types.
2403 MVT ScalarTy
= MVT::getIntegerVT(VecWidth
);
2404 // The actual type of ValV may be different than ValTy (which is related
2405 // to the vector type).
2406 unsigned VW
= ty(ValV
).getSizeInBits();
2407 ValV
= DAG
.getBitcast(MVT::getIntegerVT(VW
), ValV
);
2408 VecV
= DAG
.getBitcast(ScalarTy
, VecV
);
2410 ValV
= DAG
.getAnyExtOrTrunc(ValV
, dl
, ScalarTy
);
2412 SDValue WidthV
= DAG
.getConstant(ValWidth
, dl
, MVT::i32
);
2415 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(IdxV
)) {
2416 unsigned W
= C
->getZExtValue() * ValWidth
;
2417 SDValue OffV
= DAG
.getConstant(W
, dl
, MVT::i32
);
2418 InsV
= DAG
.getNode(HexagonISD::INSERT
, dl
, ScalarTy
,
2419 {VecV
, ValV
, WidthV
, OffV
});
2421 if (ty(IdxV
) != MVT::i32
)
2422 IdxV
= DAG
.getZExtOrTrunc(IdxV
, dl
, MVT::i32
);
2423 SDValue OffV
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
, WidthV
);
2424 InsV
= DAG
.getNode(HexagonISD::INSERT
, dl
, ScalarTy
,
2425 {VecV
, ValV
, WidthV
, OffV
});
2428 return DAG
.getNode(ISD::BITCAST
, dl
, VecTy
, InsV
);
2432 HexagonTargetLowering::expandPredicate(SDValue Vec32
, const SDLoc
&dl
,
2433 SelectionDAG
&DAG
) const {
2434 assert(ty(Vec32
).getSizeInBits() == 32);
2436 return DAG
.getUNDEF(MVT::i64
);
2437 return getInstr(Hexagon::S2_vsxtbh
, dl
, MVT::i64
, {Vec32
}, DAG
);
2441 HexagonTargetLowering::contractPredicate(SDValue Vec64
, const SDLoc
&dl
,
2442 SelectionDAG
&DAG
) const {
2443 assert(ty(Vec64
).getSizeInBits() == 64);
2445 return DAG
.getUNDEF(MVT::i32
);
2446 return getInstr(Hexagon::S2_vtrunehb
, dl
, MVT::i32
, {Vec64
}, DAG
);
2450 HexagonTargetLowering::getZero(const SDLoc
&dl
, MVT Ty
, SelectionDAG
&DAG
)
2452 if (Ty
.isVector()) {
2453 assert(Ty
.isInteger() && "Only integer vectors are supported here");
2454 unsigned W
= Ty
.getSizeInBits();
2456 return DAG
.getBitcast(Ty
, DAG
.getConstant(0, dl
, MVT::getIntegerVT(W
)));
2457 return DAG
.getNode(HexagonISD::VZERO
, dl
, Ty
);
2461 return DAG
.getConstant(0, dl
, Ty
);
2462 if (Ty
.isFloatingPoint())
2463 return DAG
.getConstantFP(0.0, dl
, Ty
);
2464 llvm_unreachable("Invalid type for zero");
2468 HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op
, SelectionDAG
&DAG
) const {
2470 unsigned BW
= VecTy
.getSizeInBits();
2471 const SDLoc
&dl(Op
);
2472 SmallVector
<SDValue
,8> Ops
;
2473 for (unsigned i
= 0, e
= Op
.getNumOperands(); i
!= e
; ++i
)
2474 Ops
.push_back(Op
.getOperand(i
));
2477 return buildVector32(Ops
, dl
, VecTy
, DAG
);
2479 return buildVector64(Ops
, dl
, VecTy
, DAG
);
2481 if (VecTy
== MVT::v8i1
|| VecTy
== MVT::v4i1
|| VecTy
== MVT::v2i1
) {
2482 // Check if this is a special case or all-0 or all-1.
2483 bool All0
= true, All1
= true;
2484 for (SDValue P
: Ops
) {
2485 auto *CN
= dyn_cast
<ConstantSDNode
>(P
.getNode());
2486 if (CN
== nullptr) {
2487 All0
= All1
= false;
2490 uint32_t C
= CN
->getZExtValue();
2495 return DAG
.getNode(HexagonISD::PFALSE
, dl
, VecTy
);
2497 return DAG
.getNode(HexagonISD::PTRUE
, dl
, VecTy
);
2499 // For each i1 element in the resulting predicate register, put 1
2500 // shifted by the index of the element into a general-purpose register,
2501 // then or them together and transfer it back into a predicate register.
2503 SDValue Z
= getZero(dl
, MVT::i32
, DAG
);
2504 // Always produce 8 bits, repeat inputs if necessary.
2505 unsigned Rep
= 8 / VecTy
.getVectorNumElements();
2506 for (unsigned i
= 0; i
!= 8; ++i
) {
2507 SDValue S
= DAG
.getConstant(1ull << i
, dl
, MVT::i32
);
2508 Rs
[i
] = DAG
.getSelect(dl
, MVT::i32
, Ops
[i
/Rep
], S
, Z
);
2510 for (ArrayRef
<SDValue
> A(Rs
); A
.size() != 1; A
= A
.drop_back(A
.size()/2)) {
2511 for (unsigned i
= 0, e
= A
.size()/2; i
!= e
; ++i
)
2512 Rs
[i
] = DAG
.getNode(ISD::OR
, dl
, MVT::i32
, Rs
[2*i
], Rs
[2*i
+1]);
2514 // Move the value directly to a predicate register.
2515 return getInstr(Hexagon::C2_tfrrp
, dl
, VecTy
, {Rs
[0]}, DAG
);
2522 HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op
,
2523 SelectionDAG
&DAG
) const {
2525 const SDLoc
&dl(Op
);
2526 if (VecTy
.getSizeInBits() == 64) {
2527 assert(Op
.getNumOperands() == 2);
2528 return DAG
.getNode(HexagonISD::COMBINE
, dl
, VecTy
, Op
.getOperand(1),
2532 MVT ElemTy
= VecTy
.getVectorElementType();
2533 if (ElemTy
== MVT::i1
) {
2534 assert(VecTy
== MVT::v2i1
|| VecTy
== MVT::v4i1
|| VecTy
== MVT::v8i1
);
2535 MVT OpTy
= ty(Op
.getOperand(0));
2536 // Scale is how many times the operands need to be contracted to match
2537 // the representation in the target register.
2538 unsigned Scale
= VecTy
.getVectorNumElements() / OpTy
.getVectorNumElements();
2539 assert(Scale
== Op
.getNumOperands() && Scale
> 1);
2541 // First, convert all bool vectors to integers, then generate pairwise
2542 // inserts to form values of doubled length. Up until there are only
2543 // two values left to concatenate, all of these values will fit in a
2544 // 32-bit integer, so keep them as i32 to use 32-bit inserts.
2545 SmallVector
<SDValue
,4> Words
[2];
2548 for (SDValue P
: Op
.getNode()->op_values()) {
2549 SDValue W
= DAG
.getNode(HexagonISD::P2D
, dl
, MVT::i64
, P
);
2550 for (unsigned R
= Scale
; R
> 1; R
/= 2) {
2551 W
= contractPredicate(W
, dl
, DAG
);
2552 W
= DAG
.getNode(HexagonISD::COMBINE
, dl
, MVT::i64
,
2553 DAG
.getUNDEF(MVT::i32
), W
);
2555 W
= DAG
.getTargetExtractSubreg(Hexagon::isub_lo
, dl
, MVT::i32
, W
);
2556 Words
[IdxW
].push_back(W
);
2560 SDValue WidthV
= DAG
.getConstant(64 / Scale
, dl
, MVT::i32
);
2561 Words
[IdxW
^ 1].clear();
2563 for (unsigned i
= 0, e
= Words
[IdxW
].size(); i
!= e
; i
+= 2) {
2564 SDValue W0
= Words
[IdxW
][i
], W1
= Words
[IdxW
][i
+1];
2565 // Insert W1 into W0 right next to the significant bits of W0.
2566 SDValue T
= DAG
.getNode(HexagonISD::INSERT
, dl
, MVT::i32
,
2567 {W0
, W1
, WidthV
, WidthV
});
2568 Words
[IdxW
^ 1].push_back(T
);
2574 // Another sanity check. At this point there should only be two words
2575 // left, and Scale should be 2.
2576 assert(Scale
== 2 && Words
[IdxW
].size() == 2);
2578 SDValue WW
= DAG
.getNode(HexagonISD::COMBINE
, dl
, MVT::i64
,
2579 Words
[IdxW
][1], Words
[IdxW
][0]);
2580 return DAG
.getNode(HexagonISD::D2P
, dl
, VecTy
, WW
);
2587 HexagonTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op
,
2588 SelectionDAG
&DAG
) const {
2589 SDValue Vec
= Op
.getOperand(0);
2590 MVT ElemTy
= ty(Vec
).getVectorElementType();
2591 return extractVector(Vec
, Op
.getOperand(1), SDLoc(Op
), ElemTy
, ty(Op
), DAG
);
2595 HexagonTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op
,
2596 SelectionDAG
&DAG
) const {
2597 return extractVector(Op
.getOperand(0), Op
.getOperand(1), SDLoc(Op
),
2598 ty(Op
), ty(Op
), DAG
);
2602 HexagonTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op
,
2603 SelectionDAG
&DAG
) const {
2604 return insertVector(Op
.getOperand(0), Op
.getOperand(1), Op
.getOperand(2),
2605 SDLoc(Op
), ty(Op
).getVectorElementType(), DAG
);
2609 HexagonTargetLowering::LowerINSERT_SUBVECTOR(SDValue Op
,
2610 SelectionDAG
&DAG
) const {
2611 SDValue ValV
= Op
.getOperand(1);
2612 return insertVector(Op
.getOperand(0), ValV
, Op
.getOperand(2),
2613 SDLoc(Op
), ty(ValV
), DAG
);
2617 HexagonTargetLowering::allowTruncateForTailCall(Type
*Ty1
, Type
*Ty2
) const {
2618 // Assuming the caller does not have either a signext or zeroext modifier, and
2619 // only one value is accepted, any reasonable truncation is allowed.
2620 if (!Ty1
->isIntegerTy() || !Ty2
->isIntegerTy())
2623 // FIXME: in principle up to 64-bit could be made safe, but it would be very
2624 // fragile at the moment: any support for multiple value returns would be
2625 // liable to disallow tail calls involving i64 -> iN truncation in many cases.
2626 return Ty1
->getPrimitiveSizeInBits() <= 32;
2630 HexagonTargetLowering::LowerLoad(SDValue Op
, SelectionDAG
&DAG
) const {
2631 LoadSDNode
*LN
= cast
<LoadSDNode
>(Op
.getNode());
2632 unsigned ClaimAlign
= LN
->getAlignment();
2633 validateConstPtrAlignment(LN
->getBasePtr(), SDLoc(Op
), ClaimAlign
);
2634 // Call LowerUnalignedLoad for all loads, it recognizes loads that
2635 // don't need extra aligning.
2636 return LowerUnalignedLoad(Op
, DAG
);
2640 HexagonTargetLowering::LowerStore(SDValue Op
, SelectionDAG
&DAG
) const {
2641 StoreSDNode
*SN
= cast
<StoreSDNode
>(Op
.getNode());
2642 unsigned ClaimAlign
= SN
->getAlignment();
2643 SDValue Ptr
= SN
->getBasePtr();
2644 const SDLoc
&dl(Op
);
2645 validateConstPtrAlignment(Ptr
, dl
, ClaimAlign
);
2647 MVT StoreTy
= SN
->getMemoryVT().getSimpleVT();
2648 unsigned NeedAlign
= Subtarget
.getTypeAlignment(StoreTy
);
2649 if (ClaimAlign
< NeedAlign
)
2650 return expandUnalignedStore(SN
, DAG
);
2655 HexagonTargetLowering::LowerUnalignedLoad(SDValue Op
, SelectionDAG
&DAG
)
2657 LoadSDNode
*LN
= cast
<LoadSDNode
>(Op
.getNode());
2658 MVT LoadTy
= ty(Op
);
2659 unsigned NeedAlign
= Subtarget
.getTypeAlignment(LoadTy
);
2660 unsigned HaveAlign
= LN
->getAlignment();
2661 if (HaveAlign
>= NeedAlign
)
2664 const SDLoc
&dl(Op
);
2665 const DataLayout
&DL
= DAG
.getDataLayout();
2666 LLVMContext
&Ctx
= *DAG
.getContext();
2668 // If the load aligning is disabled or the load can be broken up into two
2669 // smaller legal loads, do the default (target-independent) expansion.
2670 bool DoDefault
= false;
2671 // Handle it in the default way if this is an indexed load.
2672 if (!LN
->isUnindexed())
2676 if (allowsMemoryAccessForAlignment(Ctx
, DL
, LN
->getMemoryVT(),
2677 *LN
->getMemOperand()))
2681 if (!DoDefault
&& (2 * HaveAlign
) == NeedAlign
) {
2682 // The PartTy is the equivalent of "getLoadableTypeOfSize(HaveAlign)".
2683 MVT PartTy
= HaveAlign
<= 8 ? MVT::getIntegerVT(8 * HaveAlign
)
2684 : MVT::getVectorVT(MVT::i8
, HaveAlign
);
2686 allowsMemoryAccessForAlignment(Ctx
, DL
, PartTy
, *LN
->getMemOperand());
2689 std::pair
<SDValue
, SDValue
> P
= expandUnalignedLoad(LN
, DAG
);
2690 return DAG
.getMergeValues({P
.first
, P
.second
}, dl
);
2693 // The code below generates two loads, both aligned as NeedAlign, and
2694 // with the distance of NeedAlign between them. For that to cover the
2695 // bits that need to be loaded (and without overlapping), the size of
2696 // the loads should be equal to NeedAlign. This is true for all loadable
2697 // types, but add an assertion in case something changes in the future.
2698 assert(LoadTy
.getSizeInBits() == 8*NeedAlign
);
2700 unsigned LoadLen
= NeedAlign
;
2701 SDValue Base
= LN
->getBasePtr();
2702 SDValue Chain
= LN
->getChain();
2703 auto BO
= getBaseAndOffset(Base
);
2704 unsigned BaseOpc
= BO
.first
.getOpcode();
2705 if (BaseOpc
== HexagonISD::VALIGNADDR
&& BO
.second
% LoadLen
== 0)
2708 if (BO
.second
% LoadLen
!= 0) {
2709 BO
.first
= DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, BO
.first
,
2710 DAG
.getConstant(BO
.second
% LoadLen
, dl
, MVT::i32
));
2711 BO
.second
-= BO
.second
% LoadLen
;
2713 SDValue BaseNoOff
= (BaseOpc
!= HexagonISD::VALIGNADDR
)
2714 ? DAG
.getNode(HexagonISD::VALIGNADDR
, dl
, MVT::i32
, BO
.first
,
2715 DAG
.getConstant(NeedAlign
, dl
, MVT::i32
))
2717 SDValue Base0
= DAG
.getMemBasePlusOffset(BaseNoOff
, BO
.second
, dl
);
2718 SDValue Base1
= DAG
.getMemBasePlusOffset(BaseNoOff
, BO
.second
+LoadLen
, dl
);
2720 MachineMemOperand
*WideMMO
= nullptr;
2721 if (MachineMemOperand
*MMO
= LN
->getMemOperand()) {
2722 MachineFunction
&MF
= DAG
.getMachineFunction();
2723 WideMMO
= MF
.getMachineMemOperand(MMO
->getPointerInfo(), MMO
->getFlags(),
2724 2*LoadLen
, LoadLen
, MMO
->getAAInfo(), MMO
->getRanges(),
2725 MMO
->getSyncScopeID(), MMO
->getOrdering(),
2726 MMO
->getFailureOrdering());
2729 SDValue Load0
= DAG
.getLoad(LoadTy
, dl
, Chain
, Base0
, WideMMO
);
2730 SDValue Load1
= DAG
.getLoad(LoadTy
, dl
, Chain
, Base1
, WideMMO
);
2732 SDValue Aligned
= DAG
.getNode(HexagonISD::VALIGN
, dl
, LoadTy
,
2733 {Load1
, Load0
, BaseNoOff
.getOperand(0)});
2734 SDValue NewChain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
2735 Load0
.getValue(1), Load1
.getValue(1));
2736 SDValue M
= DAG
.getMergeValues({Aligned
, NewChain
}, dl
);
2741 HexagonTargetLowering::LowerUAddSubO(SDValue Op
, SelectionDAG
&DAG
) const {
2742 SDValue X
= Op
.getOperand(0), Y
= Op
.getOperand(1);
2743 auto *CY
= dyn_cast
<ConstantSDNode
>(Y
);
2747 const SDLoc
&dl(Op
);
2748 SDVTList VTs
= Op
.getNode()->getVTList();
2749 assert(VTs
.NumVTs
== 2);
2750 assert(VTs
.VTs
[1] == MVT::i1
);
2751 unsigned Opc
= Op
.getOpcode();
2754 uint32_t VY
= CY
->getZExtValue();
2755 assert(VY
!= 0 && "This should have been folded");
2760 if (Opc
== ISD::UADDO
) {
2761 SDValue Op
= DAG
.getNode(ISD::ADD
, dl
, VTs
.VTs
[0], {X
, Y
});
2762 SDValue Ov
= DAG
.getSetCC(dl
, MVT::i1
, Op
, getZero(dl
, ty(Op
), DAG
),
2764 return DAG
.getMergeValues({Op
, Ov
}, dl
);
2766 if (Opc
== ISD::USUBO
) {
2767 SDValue Op
= DAG
.getNode(ISD::SUB
, dl
, VTs
.VTs
[0], {X
, Y
});
2768 SDValue Ov
= DAG
.getSetCC(dl
, MVT::i1
, Op
,
2769 DAG
.getConstant(-1, dl
, ty(Op
)), ISD::SETEQ
);
2770 return DAG
.getMergeValues({Op
, Ov
}, dl
);
2778 HexagonTargetLowering::LowerAddSubCarry(SDValue Op
, SelectionDAG
&DAG
) const {
2779 const SDLoc
&dl(Op
);
2780 unsigned Opc
= Op
.getOpcode();
2781 SDValue X
= Op
.getOperand(0), Y
= Op
.getOperand(1), C
= Op
.getOperand(2);
2783 if (Opc
== ISD::ADDCARRY
)
2784 return DAG
.getNode(HexagonISD::ADDC
, dl
, Op
.getNode()->getVTList(),
2787 EVT CarryTy
= C
.getValueType();
2788 SDValue SubC
= DAG
.getNode(HexagonISD::SUBC
, dl
, Op
.getNode()->getVTList(),
2789 { X
, Y
, DAG
.getLogicalNOT(dl
, C
, CarryTy
) });
2790 SDValue Out
[] = { SubC
.getValue(0),
2791 DAG
.getLogicalNOT(dl
, SubC
.getValue(1), CarryTy
) };
2792 return DAG
.getMergeValues(Out
, dl
);
2796 HexagonTargetLowering::LowerEH_RETURN(SDValue Op
, SelectionDAG
&DAG
) const {
2797 SDValue Chain
= Op
.getOperand(0);
2798 SDValue Offset
= Op
.getOperand(1);
2799 SDValue Handler
= Op
.getOperand(2);
2801 auto PtrVT
= getPointerTy(DAG
.getDataLayout());
2803 // Mark function as containing a call to EH_RETURN.
2804 HexagonMachineFunctionInfo
*FuncInfo
=
2805 DAG
.getMachineFunction().getInfo
<HexagonMachineFunctionInfo
>();
2806 FuncInfo
->setHasEHReturn();
2808 unsigned OffsetReg
= Hexagon::R28
;
2811 DAG
.getNode(ISD::ADD
, dl
, PtrVT
, DAG
.getRegister(Hexagon::R30
, PtrVT
),
2812 DAG
.getIntPtrConstant(4, dl
));
2813 Chain
= DAG
.getStore(Chain
, dl
, Handler
, StoreAddr
, MachinePointerInfo());
2814 Chain
= DAG
.getCopyToReg(Chain
, dl
, OffsetReg
, Offset
);
2816 // Not needed we already use it as explict input to EH_RETURN.
2817 // MF.getRegInfo().addLiveOut(OffsetReg);
2819 return DAG
.getNode(HexagonISD::EH_RETURN
, dl
, MVT::Other
, Chain
);
2823 HexagonTargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const {
2824 unsigned Opc
= Op
.getOpcode();
2826 // Handle INLINEASM first.
2827 if (Opc
== ISD::INLINEASM
|| Opc
== ISD::INLINEASM_BR
)
2828 return LowerINLINEASM(Op
, DAG
);
2830 if (isHvxOperation(Op
)) {
2831 // If HVX lowering returns nothing, try the default lowering.
2832 if (SDValue V
= LowerHvxOperation(Op
, DAG
))
2839 Op
.getNode()->dumpr(&DAG
);
2840 if (Opc
> HexagonISD::OP_BEGIN
&& Opc
< HexagonISD::OP_END
)
2841 errs() << "Error: check for a non-legal type in this operation\n";
2843 llvm_unreachable("Should not custom lower this!");
2844 case ISD::CONCAT_VECTORS
: return LowerCONCAT_VECTORS(Op
, DAG
);
2845 case ISD::INSERT_SUBVECTOR
: return LowerINSERT_SUBVECTOR(Op
, DAG
);
2846 case ISD::INSERT_VECTOR_ELT
: return LowerINSERT_VECTOR_ELT(Op
, DAG
);
2847 case ISD::EXTRACT_SUBVECTOR
: return LowerEXTRACT_SUBVECTOR(Op
, DAG
);
2848 case ISD::EXTRACT_VECTOR_ELT
: return LowerEXTRACT_VECTOR_ELT(Op
, DAG
);
2849 case ISD::BUILD_VECTOR
: return LowerBUILD_VECTOR(Op
, DAG
);
2850 case ISD::VECTOR_SHUFFLE
: return LowerVECTOR_SHUFFLE(Op
, DAG
);
2851 case ISD::BITCAST
: return LowerBITCAST(Op
, DAG
);
2852 case ISD::LOAD
: return LowerLoad(Op
, DAG
);
2853 case ISD::STORE
: return LowerStore(Op
, DAG
);
2855 case ISD::USUBO
: return LowerUAddSubO(Op
, DAG
);
2857 case ISD::SUBCARRY
: return LowerAddSubCarry(Op
, DAG
);
2860 case ISD::SRL
: return LowerVECTOR_SHIFT(Op
, DAG
);
2861 case ISD::ROTL
: return LowerROTL(Op
, DAG
);
2862 case ISD::ConstantPool
: return LowerConstantPool(Op
, DAG
);
2863 case ISD::JumpTable
: return LowerJumpTable(Op
, DAG
);
2864 case ISD::EH_RETURN
: return LowerEH_RETURN(Op
, DAG
);
2865 case ISD::RETURNADDR
: return LowerRETURNADDR(Op
, DAG
);
2866 case ISD::FRAMEADDR
: return LowerFRAMEADDR(Op
, DAG
);
2867 case ISD::GlobalTLSAddress
: return LowerGlobalTLSAddress(Op
, DAG
);
2868 case ISD::ATOMIC_FENCE
: return LowerATOMIC_FENCE(Op
, DAG
);
2869 case ISD::GlobalAddress
: return LowerGLOBALADDRESS(Op
, DAG
);
2870 case ISD::BlockAddress
: return LowerBlockAddress(Op
, DAG
);
2871 case ISD::GLOBAL_OFFSET_TABLE
: return LowerGLOBAL_OFFSET_TABLE(Op
, DAG
);
2872 case ISD::VASTART
: return LowerVASTART(Op
, DAG
);
2873 case ISD::DYNAMIC_STACKALLOC
: return LowerDYNAMIC_STACKALLOC(Op
, DAG
);
2874 case ISD::SETCC
: return LowerSETCC(Op
, DAG
);
2875 case ISD::VSELECT
: return LowerVSELECT(Op
, DAG
);
2876 case ISD::INTRINSIC_WO_CHAIN
: return LowerINTRINSIC_WO_CHAIN(Op
, DAG
);
2877 case ISD::INTRINSIC_VOID
: return LowerINTRINSIC_VOID(Op
, DAG
);
2878 case ISD::PREFETCH
: return LowerPREFETCH(Op
, DAG
);
2879 case ISD::READCYCLECOUNTER
: return LowerREADCYCLECOUNTER(Op
, DAG
);
2887 HexagonTargetLowering::LowerOperationWrapper(SDNode
*N
,
2888 SmallVectorImpl
<SDValue
> &Results
,
2889 SelectionDAG
&DAG
) const {
2890 // We are only custom-lowering stores to verify the alignment of the
2891 // address if it is a compile-time constant. Since a store can be modified
2892 // during type-legalization (the value being stored may need legalization),
2893 // return empty Results here to indicate that we don't really make any
2894 // changes in the custom lowering.
2895 if (N
->getOpcode() != ISD::STORE
)
2896 return TargetLowering::LowerOperationWrapper(N
, Results
, DAG
);
2900 HexagonTargetLowering::ReplaceNodeResults(SDNode
*N
,
2901 SmallVectorImpl
<SDValue
> &Results
,
2902 SelectionDAG
&DAG
) const {
2904 switch (N
->getOpcode()) {
2910 // Handle a bitcast from v8i1 to i8.
2911 if (N
->getValueType(0) == MVT::i8
) {
2912 SDValue P
= getInstr(Hexagon::C2_tfrpr
, dl
, MVT::i32
,
2913 N
->getOperand(0), DAG
);
2914 SDValue T
= DAG
.getAnyExtOrTrunc(P
, dl
, MVT::i8
);
2915 Results
.push_back(T
);
2922 HexagonTargetLowering::PerformDAGCombine(SDNode
*N
, DAGCombinerInfo
&DCI
)
2925 if (isHvxOperation(Op
)) {
2926 if (SDValue V
= PerformHvxDAGCombine(N
, DCI
))
2931 const SDLoc
&dl(Op
);
2932 unsigned Opc
= Op
.getOpcode();
2934 if (Opc
== HexagonISD::P2D
) {
2935 SDValue P
= Op
.getOperand(0);
2936 switch (P
.getOpcode()) {
2937 case HexagonISD::PTRUE
:
2938 return DCI
.DAG
.getConstant(-1, dl
, ty(Op
));
2939 case HexagonISD::PFALSE
:
2940 return getZero(dl
, ty(Op
), DCI
.DAG
);
2944 } else if (Opc
== ISD::VSELECT
) {
2945 // This is pretty much duplicated in HexagonISelLoweringHVX...
2947 // (vselect (xor x, ptrue), v0, v1) -> (vselect x, v1, v0)
2948 SDValue Cond
= Op
.getOperand(0);
2949 if (Cond
->getOpcode() == ISD::XOR
) {
2950 SDValue C0
= Cond
.getOperand(0), C1
= Cond
.getOperand(1);
2951 if (C1
->getOpcode() == HexagonISD::PTRUE
) {
2952 SDValue VSel
= DCI
.DAG
.getNode(ISD::VSELECT
, dl
, ty(Op
), C0
,
2953 Op
.getOperand(2), Op
.getOperand(1));
2962 /// Returns relocation base for the given PIC jumptable.
2964 HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table
,
2965 SelectionDAG
&DAG
) const {
2966 int Idx
= cast
<JumpTableSDNode
>(Table
)->getIndex();
2967 EVT VT
= Table
.getValueType();
2968 SDValue T
= DAG
.getTargetJumpTable(Idx
, VT
, HexagonII::MO_PCREL
);
2969 return DAG
.getNode(HexagonISD::AT_PCREL
, SDLoc(Table
), VT
, T
);
2972 //===----------------------------------------------------------------------===//
2973 // Inline Assembly Support
2974 //===----------------------------------------------------------------------===//
2976 TargetLowering::ConstraintType
2977 HexagonTargetLowering::getConstraintType(StringRef Constraint
) const {
2978 if (Constraint
.size() == 1) {
2979 switch (Constraint
[0]) {
2982 if (Subtarget
.useHVXOps())
2983 return C_RegisterClass
;
2986 return C_RegisterClass
;
2991 return TargetLowering::getConstraintType(Constraint
);
2994 std::pair
<unsigned, const TargetRegisterClass
*>
2995 HexagonTargetLowering::getRegForInlineAsmConstraint(
2996 const TargetRegisterInfo
*TRI
, StringRef Constraint
, MVT VT
) const {
2998 if (Constraint
.size() == 1) {
2999 switch (Constraint
[0]) {
3001 switch (VT
.SimpleTy
) {
3003 return {0u, nullptr};
3009 return {0u, &Hexagon::IntRegsRegClass
};
3012 return {0u, &Hexagon::DoubleRegsRegClass
};
3017 return {0u, nullptr};
3018 return {0u, &Hexagon::ModRegsRegClass
};
3020 switch (VT
.getSizeInBits()) {
3022 return {0u, nullptr};
3025 return {0u, &Hexagon::HvxQRRegClass
};
3029 switch (VT
.getSizeInBits()) {
3031 return {0u, nullptr};
3033 return {0u, &Hexagon::HvxVRRegClass
};
3035 if (Subtarget
.hasV60Ops() && Subtarget
.useHVX128BOps())
3036 return {0u, &Hexagon::HvxVRRegClass
};
3037 return {0u, &Hexagon::HvxWRRegClass
};
3039 return {0u, &Hexagon::HvxWRRegClass
};
3043 return {0u, nullptr};
3047 return TargetLowering::getRegForInlineAsmConstraint(TRI
, Constraint
, VT
);
3050 /// isFPImmLegal - Returns true if the target can instruction select the
3051 /// specified FP immediate natively. If false, the legalizer will
3052 /// materialize the FP immediate as a load from a constant pool.
3053 bool HexagonTargetLowering::isFPImmLegal(const APFloat
&Imm
, EVT VT
,
3054 bool ForCodeSize
) const {
3058 /// isLegalAddressingMode - Return true if the addressing mode represented by
3059 /// AM is legal for this target, for a load/store of the specified type.
3060 bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout
&DL
,
3061 const AddrMode
&AM
, Type
*Ty
,
3062 unsigned AS
, Instruction
*I
) const {
3063 if (Ty
->isSized()) {
3064 // When LSR detects uses of the same base address to access different
3065 // types (e.g. unions), it will assume a conservative type for these
3067 // LSR Use: Kind=Address of void in addrspace(4294967295), ...
3068 // The type Ty passed here would then be "void". Skip the alignment
3069 // checks, but do not return false right away, since that confuses
3070 // LSR into crashing.
3071 unsigned A
= DL
.getABITypeAlignment(Ty
);
3072 // The base offset must be a multiple of the alignment.
3073 if ((AM
.BaseOffs
% A
) != 0)
3075 // The shifted offset must fit in 11 bits.
3076 if (!isInt
<11>(AM
.BaseOffs
>> Log2_32(A
)))
3080 // No global is ever allowed as a base.
3084 int Scale
= AM
.Scale
;
3088 case 0: // No scale reg, "r+i", "r", or just "i".
3090 default: // No scaled addressing mode.
3096 /// Return true if folding a constant offset with the given GlobalAddress is
3097 /// legal. It is frequently not legal in PIC relocation models.
3098 bool HexagonTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode
*GA
)
3100 return HTM
.getRelocationModel() == Reloc::Static
;
3103 /// isLegalICmpImmediate - Return true if the specified immediate is legal
3104 /// icmp immediate, that is the target has icmp instructions which can compare
3105 /// a register against the immediate without having to materialize the
3106 /// immediate into a register.
3107 bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm
) const {
3108 return Imm
>= -512 && Imm
<= 511;
3111 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
3112 /// for tail call optimization. Targets which want to do tail call
3113 /// optimization should implement this function.
3114 bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
3116 CallingConv::ID CalleeCC
,
3118 bool IsCalleeStructRet
,
3119 bool IsCallerStructRet
,
3120 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
3121 const SmallVectorImpl
<SDValue
> &OutVals
,
3122 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
3123 SelectionDAG
& DAG
) const {
3124 const Function
&CallerF
= DAG
.getMachineFunction().getFunction();
3125 CallingConv::ID CallerCC
= CallerF
.getCallingConv();
3126 bool CCMatch
= CallerCC
== CalleeCC
;
3128 // ***************************************************************************
3129 // Look for obvious safe cases to perform tail call optimization that do not
3130 // require ABI changes.
3131 // ***************************************************************************
3133 // If this is a tail call via a function pointer, then don't do it!
3134 if (!isa
<GlobalAddressSDNode
>(Callee
) &&
3135 !isa
<ExternalSymbolSDNode
>(Callee
)) {
3139 // Do not optimize if the calling conventions do not match and the conventions
3140 // used are not C or Fast.
3142 bool R
= (CallerCC
== CallingConv::C
|| CallerCC
== CallingConv::Fast
);
3143 bool E
= (CalleeCC
== CallingConv::C
|| CalleeCC
== CallingConv::Fast
);
3144 // If R & E, then ok.
3149 // Do not tail call optimize vararg calls.
3153 // Also avoid tail call optimization if either caller or callee uses struct
3154 // return semantics.
3155 if (IsCalleeStructRet
|| IsCallerStructRet
)
3158 // In addition to the cases above, we also disable Tail Call Optimization if
3159 // the calling convention code that at least one outgoing argument needs to
3160 // go on the stack. We cannot check that here because at this point that
3161 // information is not available.
3165 /// Returns the target specific optimal type for load and store operations as
3166 /// a result of memset, memcpy, and memmove lowering.
3168 /// If DstAlign is zero that means it's safe to destination alignment can
3169 /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
3170 /// a need to check it against alignment requirement, probably because the
3171 /// source does not need to be loaded. If 'IsMemset' is true, that means it's
3172 /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of
3173 /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
3174 /// does not need to be loaded. It returns EVT::Other if the type should be
3175 /// determined using generic target-independent logic.
3176 EVT
HexagonTargetLowering::getOptimalMemOpType(uint64_t Size
,
3177 unsigned DstAlign
, unsigned SrcAlign
, bool IsMemset
, bool ZeroMemset
,
3178 bool MemcpyStrSrc
, const AttributeList
&FuncAttributes
) const {
3180 auto Aligned
= [](unsigned GivenA
, unsigned MinA
) -> bool {
3181 return (GivenA
% MinA
) == 0;
3184 if (Size
>= 8 && Aligned(DstAlign
, 8) && (IsMemset
|| Aligned(SrcAlign
, 8)))
3186 if (Size
>= 4 && Aligned(DstAlign
, 4) && (IsMemset
|| Aligned(SrcAlign
, 4)))
3188 if (Size
>= 2 && Aligned(DstAlign
, 2) && (IsMemset
|| Aligned(SrcAlign
, 2)))
3194 bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(
3195 EVT VT
, unsigned AS
, unsigned Align
, MachineMemOperand::Flags Flags
,
3199 return Subtarget
.isHVXVectorType(VT
.getSimpleVT());
3202 std::pair
<const TargetRegisterClass
*, uint8_t>
3203 HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo
*TRI
,
3205 if (Subtarget
.isHVXVectorType(VT
, true)) {
3206 unsigned BitWidth
= VT
.getSizeInBits();
3207 unsigned VecWidth
= Subtarget
.getVectorLength() * 8;
3209 if (VT
.getVectorElementType() == MVT::i1
)
3210 return std::make_pair(&Hexagon::HvxQRRegClass
, 1);
3211 if (BitWidth
== VecWidth
)
3212 return std::make_pair(&Hexagon::HvxVRRegClass
, 1);
3213 assert(BitWidth
== 2 * VecWidth
);
3214 return std::make_pair(&Hexagon::HvxWRRegClass
, 1);
3217 return TargetLowering::findRepresentativeClass(TRI
, VT
);
3220 bool HexagonTargetLowering::shouldReduceLoadWidth(SDNode
*Load
,
3221 ISD::LoadExtType ExtTy
, EVT NewVT
) const {
3222 // TODO: This may be worth removing. Check regression tests for diffs.
3223 if (!TargetLoweringBase::shouldReduceLoadWidth(Load
, ExtTy
, NewVT
))
3226 auto *L
= cast
<LoadSDNode
>(Load
);
3227 std::pair
<SDValue
,int> BO
= getBaseAndOffset(L
->getBasePtr());
3228 // Small-data object, do not shrink.
3229 if (BO
.first
.getOpcode() == HexagonISD::CONST32_GP
)
3231 if (GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(BO
.first
)) {
3232 auto &HTM
= static_cast<const HexagonTargetMachine
&>(getTargetMachine());
3233 const auto *GO
= dyn_cast_or_null
<const GlobalObject
>(GA
->getGlobal());
3234 return !GO
|| !HTM
.getObjFileLowering()->isGlobalInSmallSection(GO
, HTM
);
3239 Value
*HexagonTargetLowering::emitLoadLinked(IRBuilder
<> &Builder
, Value
*Addr
,
3240 AtomicOrdering Ord
) const {
3241 BasicBlock
*BB
= Builder
.GetInsertBlock();
3242 Module
*M
= BB
->getParent()->getParent();
3243 auto PT
= cast
<PointerType
>(Addr
->getType());
3244 Type
*Ty
= PT
->getElementType();
3245 unsigned SZ
= Ty
->getPrimitiveSizeInBits();
3246 assert((SZ
== 32 || SZ
== 64) && "Only 32/64-bit atomic loads supported");
3247 Intrinsic::ID IntID
= (SZ
== 32) ? Intrinsic::hexagon_L2_loadw_locked
3248 : Intrinsic::hexagon_L4_loadd_locked
;
3249 Function
*Fn
= Intrinsic::getDeclaration(M
, IntID
);
3251 PointerType
*NewPtrTy
3252 = Builder
.getIntNTy(SZ
)->getPointerTo(PT
->getAddressSpace());
3253 Addr
= Builder
.CreateBitCast(Addr
, NewPtrTy
);
3255 Value
*Call
= Builder
.CreateCall(Fn
, Addr
, "larx");
3257 return Builder
.CreateBitCast(Call
, Ty
);
3260 /// Perform a store-conditional operation to Addr. Return the status of the
3261 /// store. This should be 0 if the store succeeded, non-zero otherwise.
3262 Value
*HexagonTargetLowering::emitStoreConditional(IRBuilder
<> &Builder
,
3263 Value
*Val
, Value
*Addr
, AtomicOrdering Ord
) const {
3264 BasicBlock
*BB
= Builder
.GetInsertBlock();
3265 Module
*M
= BB
->getParent()->getParent();
3266 Type
*Ty
= Val
->getType();
3267 unsigned SZ
= Ty
->getPrimitiveSizeInBits();
3269 Type
*CastTy
= Builder
.getIntNTy(SZ
);
3270 assert((SZ
== 32 || SZ
== 64) && "Only 32/64-bit atomic stores supported");
3271 Intrinsic::ID IntID
= (SZ
== 32) ? Intrinsic::hexagon_S2_storew_locked
3272 : Intrinsic::hexagon_S4_stored_locked
;
3273 Function
*Fn
= Intrinsic::getDeclaration(M
, IntID
);
3275 unsigned AS
= Addr
->getType()->getPointerAddressSpace();
3276 Addr
= Builder
.CreateBitCast(Addr
, CastTy
->getPointerTo(AS
));
3277 Val
= Builder
.CreateBitCast(Val
, CastTy
);
3279 Value
*Call
= Builder
.CreateCall(Fn
, {Addr
, Val
}, "stcx");
3280 Value
*Cmp
= Builder
.CreateICmpEQ(Call
, Builder
.getInt32(0), "");
3281 Value
*Ext
= Builder
.CreateZExt(Cmp
, Type::getInt32Ty(M
->getContext()));
3285 TargetLowering::AtomicExpansionKind
3286 HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst
*LI
) const {
3287 // Do not expand loads and stores that don't exceed 64 bits.
3288 return LI
->getType()->getPrimitiveSizeInBits() > 64
3289 ? AtomicExpansionKind::LLOnly
3290 : AtomicExpansionKind::None
;
3293 bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst
*SI
) const {
3294 // Do not expand loads and stores that don't exceed 64 bits.
3295 return SI
->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64;
3298 TargetLowering::AtomicExpansionKind
3299 HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
3300 AtomicCmpXchgInst
*AI
) const {
3301 const DataLayout
&DL
= AI
->getModule()->getDataLayout();
3302 unsigned Size
= DL
.getTypeStoreSize(AI
->getCompareOperand()->getType());
3303 if (Size
>= 4 && Size
<= 8)
3304 return AtomicExpansionKind::LLSC
;
3305 return AtomicExpansionKind::None
;