1 //===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the interfaces that Hexagon uses to lower LLVM code
10 // into a selection DAG.
12 //===----------------------------------------------------------------------===//
14 #include "HexagonISelLowering.h"
16 #include "HexagonMachineFunctionInfo.h"
17 #include "HexagonRegisterInfo.h"
18 #include "HexagonSubtarget.h"
19 #include "HexagonTargetMachine.h"
20 #include "HexagonTargetObjectFile.h"
21 #include "llvm/ADT/APInt.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringSwitch.h"
25 #include "llvm/CodeGen/CallingConvLower.h"
26 #include "llvm/CodeGen/MachineFrameInfo.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineMemOperand.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/RuntimeLibcalls.h"
31 #include "llvm/CodeGen/SelectionDAG.h"
32 #include "llvm/CodeGen/TargetCallingConv.h"
33 #include "llvm/CodeGen/ValueTypes.h"
34 #include "llvm/IR/BasicBlock.h"
35 #include "llvm/IR/CallingConv.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/DiagnosticInfo.h"
39 #include "llvm/IR/DiagnosticPrinter.h"
40 #include "llvm/IR/Function.h"
41 #include "llvm/IR/GlobalValue.h"
42 #include "llvm/IR/InlineAsm.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/IntrinsicInst.h"
45 #include "llvm/IR/Intrinsics.h"
46 #include "llvm/IR/IntrinsicsHexagon.h"
47 #include "llvm/IR/IRBuilder.h"
48 #include "llvm/IR/Module.h"
49 #include "llvm/IR/Type.h"
50 #include "llvm/IR/Value.h"
51 #include "llvm/MC/MCRegisterInfo.h"
52 #include "llvm/Support/Casting.h"
53 #include "llvm/Support/CodeGen.h"
54 #include "llvm/Support/CommandLine.h"
55 #include "llvm/Support/Debug.h"
56 #include "llvm/Support/ErrorHandling.h"
57 #include "llvm/Support/MathExtras.h"
58 #include "llvm/Support/raw_ostream.h"
59 #include "llvm/Target/TargetMachine.h"
69 #define DEBUG_TYPE "hexagon-lowering"
71 static cl::opt
<bool> EmitJumpTables("hexagon-emit-jump-tables",
72 cl::init(true), cl::Hidden
,
73 cl::desc("Control jump table emission on Hexagon target"));
75 static cl::opt
<bool> EnableHexSDNodeSched("enable-hexagon-sdnode-sched",
76 cl::Hidden
, cl::ZeroOrMore
, cl::init(false),
77 cl::desc("Enable Hexagon SDNode scheduling"));
79 static cl::opt
<bool> EnableFastMath("ffast-math",
80 cl::Hidden
, cl::ZeroOrMore
, cl::init(false),
81 cl::desc("Enable Fast Math processing"));
83 static cl::opt
<int> MinimumJumpTables("minimum-jump-tables",
84 cl::Hidden
, cl::ZeroOrMore
, cl::init(5),
85 cl::desc("Set minimum jump tables"));
87 static cl::opt
<int> MaxStoresPerMemcpyCL("max-store-memcpy",
88 cl::Hidden
, cl::ZeroOrMore
, cl::init(6),
89 cl::desc("Max #stores to inline memcpy"));
91 static cl::opt
<int> MaxStoresPerMemcpyOptSizeCL("max-store-memcpy-Os",
92 cl::Hidden
, cl::ZeroOrMore
, cl::init(4),
93 cl::desc("Max #stores to inline memcpy"));
95 static cl::opt
<int> MaxStoresPerMemmoveCL("max-store-memmove",
96 cl::Hidden
, cl::ZeroOrMore
, cl::init(6),
97 cl::desc("Max #stores to inline memmove"));
99 static cl::opt
<int> MaxStoresPerMemmoveOptSizeCL("max-store-memmove-Os",
100 cl::Hidden
, cl::ZeroOrMore
, cl::init(4),
101 cl::desc("Max #stores to inline memmove"));
103 static cl::opt
<int> MaxStoresPerMemsetCL("max-store-memset",
104 cl::Hidden
, cl::ZeroOrMore
, cl::init(8),
105 cl::desc("Max #stores to inline memset"));
107 static cl::opt
<int> MaxStoresPerMemsetOptSizeCL("max-store-memset-Os",
108 cl::Hidden
, cl::ZeroOrMore
, cl::init(4),
109 cl::desc("Max #stores to inline memset"));
111 static cl::opt
<bool> AlignLoads("hexagon-align-loads",
112 cl::Hidden
, cl::init(false),
113 cl::desc("Rewrite unaligned loads as a pair of aligned loads"));
116 DisableArgsMinAlignment("hexagon-disable-args-min-alignment", cl::Hidden
,
118 cl::desc("Disable minimum alignment of 1 for "
119 "arguments passed by value on stack"));
123 class HexagonCCState
: public CCState
{
124 unsigned NumNamedVarArgParams
= 0;
127 HexagonCCState(CallingConv::ID CC
, bool IsVarArg
, MachineFunction
&MF
,
128 SmallVectorImpl
<CCValAssign
> &locs
, LLVMContext
&C
,
129 unsigned NumNamedArgs
)
130 : CCState(CC
, IsVarArg
, MF
, locs
, C
),
131 NumNamedVarArgParams(NumNamedArgs
) {}
132 unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams
; }
135 } // end anonymous namespace
138 // Implement calling convention for Hexagon.
140 static bool CC_SkipOdd(unsigned &ValNo
, MVT
&ValVT
, MVT
&LocVT
,
141 CCValAssign::LocInfo
&LocInfo
,
142 ISD::ArgFlagsTy
&ArgFlags
, CCState
&State
) {
143 static const MCPhysReg ArgRegs
[] = {
144 Hexagon::R0
, Hexagon::R1
, Hexagon::R2
,
145 Hexagon::R3
, Hexagon::R4
, Hexagon::R5
147 const unsigned NumArgRegs
= array_lengthof(ArgRegs
);
148 unsigned RegNum
= State
.getFirstUnallocated(ArgRegs
);
150 // RegNum is an index into ArgRegs: skip a register if RegNum is odd.
151 if (RegNum
!= NumArgRegs
&& RegNum
% 2 == 1)
152 State
.AllocateReg(ArgRegs
[RegNum
]);
154 // Always return false here, as this function only makes sure that the first
155 // unallocated register has an even register number and does not actually
156 // allocate a register for the current argument.
160 #include "HexagonGenCallingConv.inc"
164 HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op
, SelectionDAG
&DAG
)
169 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
170 /// by "Src" to address "Dst" of size "Size". Alignment information is
171 /// specified by the specific parameter attribute. The copy will be passed as
172 /// a byval function parameter. Sometimes what we are copying is the end of a
173 /// larger object, the part that does not fit in registers.
174 static SDValue
CreateCopyOfByValArgument(SDValue Src
, SDValue Dst
,
175 SDValue Chain
, ISD::ArgFlagsTy Flags
,
176 SelectionDAG
&DAG
, const SDLoc
&dl
) {
177 SDValue SizeNode
= DAG
.getConstant(Flags
.getByValSize(), dl
, MVT::i32
);
178 return DAG
.getMemcpy(
179 Chain
, dl
, Dst
, Src
, SizeNode
, Flags
.getNonZeroByValAlign(),
180 /*isVolatile=*/false, /*AlwaysInline=*/false,
181 /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo());
185 HexagonTargetLowering::CanLowerReturn(
186 CallingConv::ID CallConv
, MachineFunction
&MF
, bool IsVarArg
,
187 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
188 LLVMContext
&Context
) const {
189 SmallVector
<CCValAssign
, 16> RVLocs
;
190 CCState
CCInfo(CallConv
, IsVarArg
, MF
, RVLocs
, Context
);
192 if (MF
.getSubtarget
<HexagonSubtarget
>().useHVXOps())
193 return CCInfo
.CheckReturn(Outs
, RetCC_Hexagon_HVX
);
194 return CCInfo
.CheckReturn(Outs
, RetCC_Hexagon
);
197 // LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
198 // passed by value, the function prototype is modified to return void and
199 // the value is stored in memory pointed by a pointer passed by caller.
201 HexagonTargetLowering::LowerReturn(SDValue Chain
, CallingConv::ID CallConv
,
203 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
204 const SmallVectorImpl
<SDValue
> &OutVals
,
205 const SDLoc
&dl
, SelectionDAG
&DAG
) const {
206 // CCValAssign - represent the assignment of the return value to locations.
207 SmallVector
<CCValAssign
, 16> RVLocs
;
209 // CCState - Info about the registers and stack slot.
210 CCState
CCInfo(CallConv
, IsVarArg
, DAG
.getMachineFunction(), RVLocs
,
213 // Analyze return values of ISD::RET
214 if (Subtarget
.useHVXOps())
215 CCInfo
.AnalyzeReturn(Outs
, RetCC_Hexagon_HVX
);
217 CCInfo
.AnalyzeReturn(Outs
, RetCC_Hexagon
);
220 SmallVector
<SDValue
, 4> RetOps(1, Chain
);
222 // Copy the result values into the output registers.
223 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
224 CCValAssign
&VA
= RVLocs
[i
];
225 SDValue Val
= OutVals
[i
];
227 switch (VA
.getLocInfo()) {
229 // Loc info must be one of Full, BCvt, SExt, ZExt, or AExt.
230 llvm_unreachable("Unknown loc info!");
231 case CCValAssign::Full
:
233 case CCValAssign::BCvt
:
234 Val
= DAG
.getBitcast(VA
.getLocVT(), Val
);
236 case CCValAssign::SExt
:
237 Val
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, VA
.getLocVT(), Val
);
239 case CCValAssign::ZExt
:
240 Val
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VA
.getLocVT(), Val
);
242 case CCValAssign::AExt
:
243 Val
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, VA
.getLocVT(), Val
);
247 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), Val
, Flag
);
249 // Guarantee that all emitted copies are stuck together with flags.
250 Flag
= Chain
.getValue(1);
251 RetOps
.push_back(DAG
.getRegister(VA
.getLocReg(), VA
.getLocVT()));
254 RetOps
[0] = Chain
; // Update chain.
256 // Add the flag if we have it.
258 RetOps
.push_back(Flag
);
260 return DAG
.getNode(HexagonISD::RET_FLAG
, dl
, MVT::Other
, RetOps
);
263 bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst
*CI
) const {
264 // If either no tail call or told not to tail call at all, don't.
265 return CI
->isTailCall();
268 Register
HexagonTargetLowering::getRegisterByName(
269 const char* RegName
, LLT VT
, const MachineFunction
&) const {
270 // Just support r19, the linux kernel uses it.
271 Register Reg
= StringSwitch
<Register
>(RegName
)
272 .Case("r0", Hexagon::R0
)
273 .Case("r1", Hexagon::R1
)
274 .Case("r2", Hexagon::R2
)
275 .Case("r3", Hexagon::R3
)
276 .Case("r4", Hexagon::R4
)
277 .Case("r5", Hexagon::R5
)
278 .Case("r6", Hexagon::R6
)
279 .Case("r7", Hexagon::R7
)
280 .Case("r8", Hexagon::R8
)
281 .Case("r9", Hexagon::R9
)
282 .Case("r10", Hexagon::R10
)
283 .Case("r11", Hexagon::R11
)
284 .Case("r12", Hexagon::R12
)
285 .Case("r13", Hexagon::R13
)
286 .Case("r14", Hexagon::R14
)
287 .Case("r15", Hexagon::R15
)
288 .Case("r16", Hexagon::R16
)
289 .Case("r17", Hexagon::R17
)
290 .Case("r18", Hexagon::R18
)
291 .Case("r19", Hexagon::R19
)
292 .Case("r20", Hexagon::R20
)
293 .Case("r21", Hexagon::R21
)
294 .Case("r22", Hexagon::R22
)
295 .Case("r23", Hexagon::R23
)
296 .Case("r24", Hexagon::R24
)
297 .Case("r25", Hexagon::R25
)
298 .Case("r26", Hexagon::R26
)
299 .Case("r27", Hexagon::R27
)
300 .Case("r28", Hexagon::R28
)
301 .Case("r29", Hexagon::R29
)
302 .Case("r30", Hexagon::R30
)
303 .Case("r31", Hexagon::R31
)
304 .Case("r1:0", Hexagon::D0
)
305 .Case("r3:2", Hexagon::D1
)
306 .Case("r5:4", Hexagon::D2
)
307 .Case("r7:6", Hexagon::D3
)
308 .Case("r9:8", Hexagon::D4
)
309 .Case("r11:10", Hexagon::D5
)
310 .Case("r13:12", Hexagon::D6
)
311 .Case("r15:14", Hexagon::D7
)
312 .Case("r17:16", Hexagon::D8
)
313 .Case("r19:18", Hexagon::D9
)
314 .Case("r21:20", Hexagon::D10
)
315 .Case("r23:22", Hexagon::D11
)
316 .Case("r25:24", Hexagon::D12
)
317 .Case("r27:26", Hexagon::D13
)
318 .Case("r29:28", Hexagon::D14
)
319 .Case("r31:30", Hexagon::D15
)
320 .Case("sp", Hexagon::R29
)
321 .Case("fp", Hexagon::R30
)
322 .Case("lr", Hexagon::R31
)
323 .Case("p0", Hexagon::P0
)
324 .Case("p1", Hexagon::P1
)
325 .Case("p2", Hexagon::P2
)
326 .Case("p3", Hexagon::P3
)
327 .Case("sa0", Hexagon::SA0
)
328 .Case("lc0", Hexagon::LC0
)
329 .Case("sa1", Hexagon::SA1
)
330 .Case("lc1", Hexagon::LC1
)
331 .Case("m0", Hexagon::M0
)
332 .Case("m1", Hexagon::M1
)
333 .Case("usr", Hexagon::USR
)
334 .Case("ugp", Hexagon::UGP
)
335 .Case("cs0", Hexagon::CS0
)
336 .Case("cs1", Hexagon::CS1
)
337 .Default(Register());
341 report_fatal_error("Invalid register name global variable");
344 /// LowerCallResult - Lower the result values of an ISD::CALL into the
345 /// appropriate copies out of appropriate physical registers. This assumes that
346 /// Chain/Glue are the input chain/glue to use, and that TheCall is the call
347 /// being lowered. Returns a SDNode with the same number of values as the
349 SDValue
HexagonTargetLowering::LowerCallResult(
350 SDValue Chain
, SDValue Glue
, CallingConv::ID CallConv
, bool IsVarArg
,
351 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
352 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
,
353 const SmallVectorImpl
<SDValue
> &OutVals
, SDValue Callee
) const {
354 // Assign locations to each value returned by this call.
355 SmallVector
<CCValAssign
, 16> RVLocs
;
357 CCState
CCInfo(CallConv
, IsVarArg
, DAG
.getMachineFunction(), RVLocs
,
360 if (Subtarget
.useHVXOps())
361 CCInfo
.AnalyzeCallResult(Ins
, RetCC_Hexagon_HVX
);
363 CCInfo
.AnalyzeCallResult(Ins
, RetCC_Hexagon
);
365 // Copy all of the result registers out of their specified physreg.
366 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
368 if (RVLocs
[i
].getValVT() == MVT::i1
) {
369 // Return values of type MVT::i1 require special handling. The reason
370 // is that MVT::i1 is associated with the PredRegs register class, but
371 // values of that type are still returned in R0. Generate an explicit
372 // copy into a predicate register from R0, and treat the value of the
373 // predicate register as the call result.
374 auto &MRI
= DAG
.getMachineFunction().getRegInfo();
375 SDValue FR0
= DAG
.getCopyFromReg(Chain
, dl
, RVLocs
[i
].getLocReg(),
377 // FR0 = (Value, Chain, Glue)
378 Register PredR
= MRI
.createVirtualRegister(&Hexagon::PredRegsRegClass
);
379 SDValue TPR
= DAG
.getCopyToReg(FR0
.getValue(1), dl
, PredR
,
380 FR0
.getValue(0), FR0
.getValue(2));
381 // TPR = (Chain, Glue)
382 // Don't glue this CopyFromReg, because it copies from a virtual
383 // register. If it is glued to the call, InstrEmitter will add it
384 // as an implicit def to the call (EmitMachineNode).
385 RetVal
= DAG
.getCopyFromReg(TPR
.getValue(0), dl
, PredR
, MVT::i1
);
386 Glue
= TPR
.getValue(1);
387 Chain
= TPR
.getValue(0);
389 RetVal
= DAG
.getCopyFromReg(Chain
, dl
, RVLocs
[i
].getLocReg(),
390 RVLocs
[i
].getValVT(), Glue
);
391 Glue
= RetVal
.getValue(2);
392 Chain
= RetVal
.getValue(1);
394 InVals
.push_back(RetVal
.getValue(0));
400 /// LowerCall - Functions arguments are copied from virtual regs to
401 /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
403 HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo
&CLI
,
404 SmallVectorImpl
<SDValue
> &InVals
) const {
405 SelectionDAG
&DAG
= CLI
.DAG
;
407 SmallVectorImpl
<ISD::OutputArg
> &Outs
= CLI
.Outs
;
408 SmallVectorImpl
<SDValue
> &OutVals
= CLI
.OutVals
;
409 SmallVectorImpl
<ISD::InputArg
> &Ins
= CLI
.Ins
;
410 SDValue Chain
= CLI
.Chain
;
411 SDValue Callee
= CLI
.Callee
;
412 CallingConv::ID CallConv
= CLI
.CallConv
;
413 bool IsVarArg
= CLI
.IsVarArg
;
414 bool DoesNotReturn
= CLI
.DoesNotReturn
;
416 bool IsStructRet
= Outs
.empty() ? false : Outs
[0].Flags
.isSRet();
417 MachineFunction
&MF
= DAG
.getMachineFunction();
418 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
419 auto PtrVT
= getPointerTy(MF
.getDataLayout());
421 unsigned NumParams
= CLI
.CB
? CLI
.CB
->getFunctionType()->getNumParams() : 0;
422 if (GlobalAddressSDNode
*GAN
= dyn_cast
<GlobalAddressSDNode
>(Callee
))
423 Callee
= DAG
.getTargetGlobalAddress(GAN
->getGlobal(), dl
, MVT::i32
);
425 // Linux ABI treats var-arg calls the same way as regular ones.
426 bool TreatAsVarArg
= !Subtarget
.isEnvironmentMusl() && IsVarArg
;
428 // Analyze operands of the call, assigning locations to each operand.
429 SmallVector
<CCValAssign
, 16> ArgLocs
;
430 HexagonCCState
CCInfo(CallConv
, TreatAsVarArg
, MF
, ArgLocs
, *DAG
.getContext(),
433 if (Subtarget
.useHVXOps())
434 CCInfo
.AnalyzeCallOperands(Outs
, CC_Hexagon_HVX
);
435 else if (DisableArgsMinAlignment
)
436 CCInfo
.AnalyzeCallOperands(Outs
, CC_Hexagon_Legacy
);
438 CCInfo
.AnalyzeCallOperands(Outs
, CC_Hexagon
);
440 if (CLI
.IsTailCall
) {
441 bool StructAttrFlag
= MF
.getFunction().hasStructRetAttr();
442 CLI
.IsTailCall
= IsEligibleForTailCallOptimization(Callee
, CallConv
,
443 IsVarArg
, IsStructRet
, StructAttrFlag
, Outs
,
445 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
446 CCValAssign
&VA
= ArgLocs
[i
];
448 CLI
.IsTailCall
= false;
452 LLVM_DEBUG(dbgs() << (CLI
.IsTailCall
? "Eligible for Tail Call\n"
453 : "Argument must be passed on stack. "
454 "Not eligible for Tail Call\n"));
456 // Get a count of how many bytes are to be pushed on the stack.
457 unsigned NumBytes
= CCInfo
.getNextStackOffset();
458 SmallVector
<std::pair
<unsigned, SDValue
>, 16> RegsToPass
;
459 SmallVector
<SDValue
, 8> MemOpChains
;
461 const HexagonRegisterInfo
&HRI
= *Subtarget
.getRegisterInfo();
463 DAG
.getCopyFromReg(Chain
, dl
, HRI
.getStackRegister(), PtrVT
);
465 bool NeedsArgAlign
= false;
466 Align LargestAlignSeen
;
467 // Walk the register/memloc assignments, inserting copies/loads.
468 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
469 CCValAssign
&VA
= ArgLocs
[i
];
470 SDValue Arg
= OutVals
[i
];
471 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
472 // Record if we need > 8 byte alignment on an argument.
473 bool ArgAlign
= Subtarget
.isHVXVectorType(VA
.getValVT());
474 NeedsArgAlign
|= ArgAlign
;
476 // Promote the value if needed.
477 switch (VA
.getLocInfo()) {
479 // Loc info must be one of Full, BCvt, SExt, ZExt, or AExt.
480 llvm_unreachable("Unknown loc info!");
481 case CCValAssign::Full
:
483 case CCValAssign::BCvt
:
484 Arg
= DAG
.getBitcast(VA
.getLocVT(), Arg
);
486 case CCValAssign::SExt
:
487 Arg
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, VA
.getLocVT(), Arg
);
489 case CCValAssign::ZExt
:
490 Arg
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VA
.getLocVT(), Arg
);
492 case CCValAssign::AExt
:
493 Arg
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, VA
.getLocVT(), Arg
);
498 unsigned LocMemOffset
= VA
.getLocMemOffset();
499 SDValue MemAddr
= DAG
.getConstant(LocMemOffset
, dl
,
500 StackPtr
.getValueType());
501 MemAddr
= DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, StackPtr
, MemAddr
);
503 LargestAlignSeen
= std::max(
504 LargestAlignSeen
, Align(VA
.getLocVT().getStoreSizeInBits() / 8));
505 if (Flags
.isByVal()) {
506 // The argument is a struct passed by value. According to LLVM, "Arg"
508 MemOpChains
.push_back(CreateCopyOfByValArgument(Arg
, MemAddr
, Chain
,
511 MachinePointerInfo LocPI
= MachinePointerInfo::getStack(
512 DAG
.getMachineFunction(), LocMemOffset
);
513 SDValue S
= DAG
.getStore(Chain
, dl
, Arg
, MemAddr
, LocPI
);
514 MemOpChains
.push_back(S
);
519 // Arguments that can be passed on register must be kept at RegsToPass
522 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), Arg
));
525 if (NeedsArgAlign
&& Subtarget
.hasV60Ops()) {
526 LLVM_DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
527 Align VecAlign
= HRI
.getSpillAlign(Hexagon::HvxVRRegClass
);
528 LargestAlignSeen
= std::max(LargestAlignSeen
, VecAlign
);
529 MFI
.ensureMaxAlignment(LargestAlignSeen
);
531 // Transform all store nodes into one single node because all store
532 // nodes are independent of each other.
533 if (!MemOpChains
.empty())
534 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOpChains
);
537 if (!CLI
.IsTailCall
) {
538 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, dl
);
539 Glue
= Chain
.getValue(1);
542 // Build a sequence of copy-to-reg nodes chained together with token
543 // chain and flag operands which copy the outgoing args into registers.
544 // The Glue is necessary since all emitted instructions must be
546 if (!CLI
.IsTailCall
) {
547 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
548 Chain
= DAG
.getCopyToReg(Chain
, dl
, RegsToPass
[i
].first
,
549 RegsToPass
[i
].second
, Glue
);
550 Glue
= Chain
.getValue(1);
553 // For tail calls lower the arguments to the 'real' stack slot.
555 // Force all the incoming stack arguments to be loaded from the stack
556 // before any new outgoing arguments are stored to the stack, because the
557 // outgoing stack slots may alias the incoming argument stack slots, and
558 // the alias isn't otherwise explicit. This is slightly more conservative
559 // than necessary, because it means that each store effectively depends
560 // on every argument instead of just those arguments it would clobber.
562 // Do not flag preceding copytoreg stuff together with the following stuff.
564 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
565 Chain
= DAG
.getCopyToReg(Chain
, dl
, RegsToPass
[i
].first
,
566 RegsToPass
[i
].second
, Glue
);
567 Glue
= Chain
.getValue(1);
572 bool LongCalls
= MF
.getSubtarget
<HexagonSubtarget
>().useLongCalls();
573 unsigned Flags
= LongCalls
? HexagonII::HMOTF_ConstExtended
: 0;
575 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
576 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
577 // node so that legalize doesn't hack it.
578 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
579 Callee
= DAG
.getTargetGlobalAddress(G
->getGlobal(), dl
, PtrVT
, 0, Flags
);
580 } else if (ExternalSymbolSDNode
*S
=
581 dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
582 Callee
= DAG
.getTargetExternalSymbol(S
->getSymbol(), PtrVT
, Flags
);
585 // Returns a chain & a flag for retval copy to use.
586 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
587 SmallVector
<SDValue
, 8> Ops
;
588 Ops
.push_back(Chain
);
589 Ops
.push_back(Callee
);
591 // Add argument registers to the end of the list so that they are
592 // known live into the call.
593 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
594 Ops
.push_back(DAG
.getRegister(RegsToPass
[i
].first
,
595 RegsToPass
[i
].second
.getValueType()));
598 const uint32_t *Mask
= HRI
.getCallPreservedMask(MF
, CallConv
);
599 assert(Mask
&& "Missing call preserved mask for calling convention");
600 Ops
.push_back(DAG
.getRegisterMask(Mask
));
605 if (CLI
.IsTailCall
) {
606 MFI
.setHasTailCall();
607 return DAG
.getNode(HexagonISD::TC_RETURN
, dl
, NodeTys
, Ops
);
610 // Set this here because we need to know this for "hasFP" in frame lowering.
611 // The target-independent code calls getFrameRegister before setting it, and
612 // getFrameRegister uses hasFP to determine whether the function has FP.
613 MFI
.setHasCalls(true);
615 unsigned OpCode
= DoesNotReturn
? HexagonISD::CALLnr
: HexagonISD::CALL
;
616 Chain
= DAG
.getNode(OpCode
, dl
, NodeTys
, Ops
);
617 Glue
= Chain
.getValue(1);
619 // Create the CALLSEQ_END node.
620 Chain
= DAG
.getCALLSEQ_END(Chain
, DAG
.getIntPtrConstant(NumBytes
, dl
, true),
621 DAG
.getIntPtrConstant(0, dl
, true), Glue
, dl
);
622 Glue
= Chain
.getValue(1);
624 // Handle result values, copying them out of physregs into vregs that we
626 return LowerCallResult(Chain
, Glue
, CallConv
, IsVarArg
, Ins
, dl
, DAG
,
627 InVals
, OutVals
, Callee
);
630 /// Returns true by value, base pointer and offset pointer and addressing
631 /// mode by reference if this node can be combined with a load / store to
632 /// form a post-indexed load / store.
633 bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode
*N
, SDNode
*Op
,
634 SDValue
&Base
, SDValue
&Offset
, ISD::MemIndexedMode
&AM
,
635 SelectionDAG
&DAG
) const {
636 LSBaseSDNode
*LSN
= dyn_cast
<LSBaseSDNode
>(N
);
639 EVT VT
= LSN
->getMemoryVT();
642 bool IsLegalType
= VT
== MVT::i8
|| VT
== MVT::i16
|| VT
== MVT::i32
||
643 VT
== MVT::i64
|| VT
== MVT::f32
|| VT
== MVT::f64
||
644 VT
== MVT::v2i16
|| VT
== MVT::v2i32
|| VT
== MVT::v4i8
||
645 VT
== MVT::v4i16
|| VT
== MVT::v8i8
||
646 Subtarget
.isHVXVectorType(VT
.getSimpleVT());
650 if (Op
->getOpcode() != ISD::ADD
)
652 Base
= Op
->getOperand(0);
653 Offset
= Op
->getOperand(1);
654 if (!isa
<ConstantSDNode
>(Offset
.getNode()))
658 int32_t V
= cast
<ConstantSDNode
>(Offset
.getNode())->getSExtValue();
659 return Subtarget
.getInstrInfo()->isValidAutoIncImm(VT
, V
);
663 HexagonTargetLowering::LowerINLINEASM(SDValue Op
, SelectionDAG
&DAG
) const {
664 MachineFunction
&MF
= DAG
.getMachineFunction();
665 auto &HMFI
= *MF
.getInfo
<HexagonMachineFunctionInfo
>();
666 const HexagonRegisterInfo
&HRI
= *Subtarget
.getRegisterInfo();
667 unsigned LR
= HRI
.getRARegister();
669 if ((Op
.getOpcode() != ISD::INLINEASM
&&
670 Op
.getOpcode() != ISD::INLINEASM_BR
) || HMFI
.hasClobberLR())
673 unsigned NumOps
= Op
.getNumOperands();
674 if (Op
.getOperand(NumOps
-1).getValueType() == MVT::Glue
)
675 --NumOps
; // Ignore the flag operand.
677 for (unsigned i
= InlineAsm::Op_FirstOperand
; i
!= NumOps
;) {
678 unsigned Flags
= cast
<ConstantSDNode
>(Op
.getOperand(i
))->getZExtValue();
679 unsigned NumVals
= InlineAsm::getNumOperandRegisters(Flags
);
680 ++i
; // Skip the ID value.
682 switch (InlineAsm::getKind(Flags
)) {
684 llvm_unreachable("Bad flags!");
685 case InlineAsm::Kind_RegUse
:
686 case InlineAsm::Kind_Imm
:
687 case InlineAsm::Kind_Mem
:
690 case InlineAsm::Kind_Clobber
:
691 case InlineAsm::Kind_RegDef
:
692 case InlineAsm::Kind_RegDefEarlyClobber
: {
693 for (; NumVals
; --NumVals
, ++i
) {
694 unsigned Reg
= cast
<RegisterSDNode
>(Op
.getOperand(i
))->getReg();
697 HMFI
.setHasClobberLR(true);
708 // Need to transform ISD::PREFETCH into something that doesn't inherit
709 // all of the properties of ISD::PREFETCH, specifically SDNPMayLoad and
711 SDValue
HexagonTargetLowering::LowerPREFETCH(SDValue Op
,
712 SelectionDAG
&DAG
) const {
713 SDValue Chain
= Op
.getOperand(0);
714 SDValue Addr
= Op
.getOperand(1);
715 // Lower it to DCFETCH($reg, #0). A "pat" will try to merge the offset in,
716 // if the "reg" is fed by an "add".
718 SDValue Zero
= DAG
.getConstant(0, DL
, MVT::i32
);
719 return DAG
.getNode(HexagonISD::DCFETCH
, DL
, MVT::Other
, Chain
, Addr
, Zero
);
722 // Custom-handle ISD::READCYCLECOUNTER because the target-independent SDNode
723 // is marked as having side-effects, while the register read on Hexagon does
724 // not have any. TableGen refuses to accept the direct pattern from that node
726 SDValue
HexagonTargetLowering::LowerREADCYCLECOUNTER(SDValue Op
,
727 SelectionDAG
&DAG
) const {
728 SDValue Chain
= Op
.getOperand(0);
730 SDVTList VTs
= DAG
.getVTList(MVT::i64
, MVT::Other
);
731 return DAG
.getNode(HexagonISD::READCYCLE
, dl
, VTs
, Chain
);
734 SDValue
HexagonTargetLowering::LowerINTRINSIC_VOID(SDValue Op
,
735 SelectionDAG
&DAG
) const {
736 SDValue Chain
= Op
.getOperand(0);
737 unsigned IntNo
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
738 // Lower the hexagon_prefetch builtin to DCFETCH, as above.
739 if (IntNo
== Intrinsic::hexagon_prefetch
) {
740 SDValue Addr
= Op
.getOperand(2);
742 SDValue Zero
= DAG
.getConstant(0, DL
, MVT::i32
);
743 return DAG
.getNode(HexagonISD::DCFETCH
, DL
, MVT::Other
, Chain
, Addr
, Zero
);
749 HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op
,
750 SelectionDAG
&DAG
) const {
751 SDValue Chain
= Op
.getOperand(0);
752 SDValue Size
= Op
.getOperand(1);
753 SDValue Align
= Op
.getOperand(2);
756 ConstantSDNode
*AlignConst
= dyn_cast
<ConstantSDNode
>(Align
);
757 assert(AlignConst
&& "Non-constant Align in LowerDYNAMIC_STACKALLOC");
759 unsigned A
= AlignConst
->getSExtValue();
760 auto &HFI
= *Subtarget
.getFrameLowering();
761 // "Zero" means natural stack alignment.
763 A
= HFI
.getStackAlign().value();
766 dbgs () << __func__
<< " Align: " << A
<< " Size: ";
767 Size
.getNode()->dump(&DAG
);
771 SDValue AC
= DAG
.getConstant(A
, dl
, MVT::i32
);
772 SDVTList VTs
= DAG
.getVTList(MVT::i32
, MVT::Other
);
773 SDValue AA
= DAG
.getNode(HexagonISD::ALLOCA
, dl
, VTs
, Chain
, Size
, AC
);
775 DAG
.ReplaceAllUsesOfValueWith(Op
, AA
);
779 SDValue
HexagonTargetLowering::LowerFormalArguments(
780 SDValue Chain
, CallingConv::ID CallConv
, bool IsVarArg
,
781 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
782 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
783 MachineFunction
&MF
= DAG
.getMachineFunction();
784 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
785 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
787 // Linux ABI treats var-arg calls the same way as regular ones.
788 bool TreatAsVarArg
= !Subtarget
.isEnvironmentMusl() && IsVarArg
;
790 // Assign locations to all of the incoming arguments.
791 SmallVector
<CCValAssign
, 16> ArgLocs
;
792 HexagonCCState
CCInfo(CallConv
, TreatAsVarArg
, MF
, ArgLocs
,
794 MF
.getFunction().getFunctionType()->getNumParams());
796 if (Subtarget
.useHVXOps())
797 CCInfo
.AnalyzeFormalArguments(Ins
, CC_Hexagon_HVX
);
798 else if (DisableArgsMinAlignment
)
799 CCInfo
.AnalyzeFormalArguments(Ins
, CC_Hexagon_Legacy
);
801 CCInfo
.AnalyzeFormalArguments(Ins
, CC_Hexagon
);
803 // For LLVM, in the case when returning a struct by value (>8byte),
804 // the first argument is a pointer that points to the location on caller's
805 // stack where the return value will be stored. For Hexagon, the location on
806 // caller's stack is passed only when the struct size is smaller than (and
807 // equal to) 8 bytes. If not, no address will be passed into callee and
808 // callee return the result direclty through R0/R1.
809 auto NextSingleReg
= [] (const TargetRegisterClass
&RC
, unsigned Reg
) {
810 switch (RC
.getID()) {
811 case Hexagon::IntRegsRegClassID
:
812 return Reg
- Hexagon::R0
+ 1;
813 case Hexagon::DoubleRegsRegClassID
:
814 return (Reg
- Hexagon::D0
+ 1) * 2;
815 case Hexagon::HvxVRRegClassID
:
816 return Reg
- Hexagon::V0
+ 1;
817 case Hexagon::HvxWRRegClassID
:
818 return (Reg
- Hexagon::W0
+ 1) * 2;
820 llvm_unreachable("Unexpected register class");
823 auto &HFL
= const_cast<HexagonFrameLowering
&>(*Subtarget
.getFrameLowering());
824 auto &HMFI
= *MF
.getInfo
<HexagonMachineFunctionInfo
>();
825 HFL
.FirstVarArgSavedReg
= 0;
826 HMFI
.setFirstNamedArgFrameIndex(-int(MFI
.getNumFixedObjects()));
828 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
829 CCValAssign
&VA
= ArgLocs
[i
];
830 ISD::ArgFlagsTy Flags
= Ins
[i
].Flags
;
831 bool ByVal
= Flags
.isByVal();
833 // Arguments passed in registers:
834 // 1. 32- and 64-bit values and HVX vectors are passed directly,
835 // 2. Large structs are passed via an address, and the address is
836 // passed in a register.
837 if (VA
.isRegLoc() && ByVal
&& Flags
.getByValSize() <= 8)
838 llvm_unreachable("ByValSize must be bigger than 8 bytes");
840 bool InReg
= VA
.isRegLoc() &&
841 (!ByVal
|| (ByVal
&& Flags
.getByValSize() > 8));
844 MVT RegVT
= VA
.getLocVT();
845 if (VA
.getLocInfo() == CCValAssign::BCvt
)
846 RegVT
= VA
.getValVT();
848 const TargetRegisterClass
*RC
= getRegClassFor(RegVT
);
849 Register VReg
= MRI
.createVirtualRegister(RC
);
850 SDValue Copy
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, RegVT
);
852 // Treat values of type MVT::i1 specially: they are passed in
853 // registers of type i32, but they need to remain as values of
854 // type i1 for consistency of the argument lowering.
855 if (VA
.getValVT() == MVT::i1
) {
856 assert(RegVT
.getSizeInBits() <= 32);
857 SDValue T
= DAG
.getNode(ISD::AND
, dl
, RegVT
,
858 Copy
, DAG
.getConstant(1, dl
, RegVT
));
859 Copy
= DAG
.getSetCC(dl
, MVT::i1
, T
, DAG
.getConstant(0, dl
, RegVT
),
863 unsigned RegSize
= RegVT
.getSizeInBits();
864 assert(RegSize
== 32 || RegSize
== 64 ||
865 Subtarget
.isHVXVectorType(RegVT
));
868 InVals
.push_back(Copy
);
869 MRI
.addLiveIn(VA
.getLocReg(), VReg
);
870 HFL
.FirstVarArgSavedReg
= NextSingleReg(*RC
, VA
.getLocReg());
872 assert(VA
.isMemLoc() && "Argument should be passed in memory");
874 // If it's a byval parameter, then we need to compute the
875 // "real" size, not the size of the pointer.
876 unsigned ObjSize
= Flags
.isByVal()
877 ? Flags
.getByValSize()
878 : VA
.getLocVT().getStoreSizeInBits() / 8;
880 // Create the frame index object for this incoming parameter.
881 int Offset
= HEXAGON_LRFP_SIZE
+ VA
.getLocMemOffset();
882 int FI
= MFI
.CreateFixedObject(ObjSize
, Offset
, true);
883 SDValue FIN
= DAG
.getFrameIndex(FI
, MVT::i32
);
885 if (Flags
.isByVal()) {
886 // If it's a pass-by-value aggregate, then do not dereference the stack
887 // location. Instead, we should generate a reference to the stack
889 InVals
.push_back(FIN
);
891 SDValue L
= DAG
.getLoad(VA
.getValVT(), dl
, Chain
, FIN
,
892 MachinePointerInfo::getFixedStack(MF
, FI
, 0));
898 if (IsVarArg
&& Subtarget
.isEnvironmentMusl()) {
899 for (int i
= HFL
.FirstVarArgSavedReg
; i
< 6; i
++)
900 MRI
.addLiveIn(Hexagon::R0
+i
);
903 if (IsVarArg
&& Subtarget
.isEnvironmentMusl()) {
904 HMFI
.setFirstNamedArgFrameIndex(HMFI
.getFirstNamedArgFrameIndex() - 1);
905 HMFI
.setLastNamedArgFrameIndex(-int(MFI
.getNumFixedObjects()));
907 // Create Frame index for the start of register saved area.
908 int NumVarArgRegs
= 6 - HFL
.FirstVarArgSavedReg
;
909 bool RequiresPadding
= (NumVarArgRegs
& 1);
910 int RegSaveAreaSizePlusPadding
= RequiresPadding
911 ? (NumVarArgRegs
+ 1) * 4
914 if (RegSaveAreaSizePlusPadding
> 0) {
915 // The offset to saved register area should be 8 byte aligned.
916 int RegAreaStart
= HEXAGON_LRFP_SIZE
+ CCInfo
.getNextStackOffset();
917 if (!(RegAreaStart
% 8))
918 RegAreaStart
= (RegAreaStart
+ 7) & -8;
920 int RegSaveAreaFrameIndex
=
921 MFI
.CreateFixedObject(RegSaveAreaSizePlusPadding
, RegAreaStart
, true);
922 HMFI
.setRegSavedAreaStartFrameIndex(RegSaveAreaFrameIndex
);
924 // This will point to the next argument passed via stack.
925 int Offset
= RegAreaStart
+ RegSaveAreaSizePlusPadding
;
926 int FI
= MFI
.CreateFixedObject(Hexagon_PointerSize
, Offset
, true);
927 HMFI
.setVarArgsFrameIndex(FI
);
929 // This will point to the next argument passed via stack, when
930 // there is no saved register area.
931 int Offset
= HEXAGON_LRFP_SIZE
+ CCInfo
.getNextStackOffset();
932 int FI
= MFI
.CreateFixedObject(Hexagon_PointerSize
, Offset
, true);
933 HMFI
.setRegSavedAreaStartFrameIndex(FI
);
934 HMFI
.setVarArgsFrameIndex(FI
);
939 if (IsVarArg
&& !Subtarget
.isEnvironmentMusl()) {
940 // This will point to the next argument passed via stack.
941 int Offset
= HEXAGON_LRFP_SIZE
+ CCInfo
.getNextStackOffset();
942 int FI
= MFI
.CreateFixedObject(Hexagon_PointerSize
, Offset
, true);
943 HMFI
.setVarArgsFrameIndex(FI
);
950 HexagonTargetLowering::LowerVASTART(SDValue Op
, SelectionDAG
&DAG
) const {
951 // VASTART stores the address of the VarArgsFrameIndex slot into the
952 // memory location argument.
953 MachineFunction
&MF
= DAG
.getMachineFunction();
954 HexagonMachineFunctionInfo
*QFI
= MF
.getInfo
<HexagonMachineFunctionInfo
>();
955 SDValue Addr
= DAG
.getFrameIndex(QFI
->getVarArgsFrameIndex(), MVT::i32
);
956 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
958 if (!Subtarget
.isEnvironmentMusl()) {
959 return DAG
.getStore(Op
.getOperand(0), SDLoc(Op
), Addr
, Op
.getOperand(1),
960 MachinePointerInfo(SV
));
962 auto &FuncInfo
= *MF
.getInfo
<HexagonMachineFunctionInfo
>();
963 auto &HFL
= *Subtarget
.getFrameLowering();
965 SmallVector
<SDValue
, 8> MemOps
;
967 // Get frame index of va_list.
968 SDValue FIN
= Op
.getOperand(1);
970 // If first Vararg register is odd, add 4 bytes to start of
971 // saved register area to point to the first register location.
972 // This is because the saved register area has to be 8 byte aligned.
973 // Incase of an odd start register, there will be 4 bytes of padding in
974 // the beginning of saved register area. If all registers area used up,
975 // the following condition will handle it correctly.
976 SDValue SavedRegAreaStartFrameIndex
=
977 DAG
.getFrameIndex(FuncInfo
.getRegSavedAreaStartFrameIndex(), MVT::i32
);
979 auto PtrVT
= getPointerTy(DAG
.getDataLayout());
981 if (HFL
.FirstVarArgSavedReg
& 1)
982 SavedRegAreaStartFrameIndex
=
983 DAG
.getNode(ISD::ADD
, DL
, PtrVT
,
984 DAG
.getFrameIndex(FuncInfo
.getRegSavedAreaStartFrameIndex(),
986 DAG
.getIntPtrConstant(4, DL
));
988 // Store the saved register area start pointer.
990 DAG
.getStore(Op
.getOperand(0), DL
,
991 SavedRegAreaStartFrameIndex
,
992 FIN
, MachinePointerInfo(SV
));
993 MemOps
.push_back(Store
);
995 // Store saved register area end pointer.
996 FIN
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
,
997 FIN
, DAG
.getIntPtrConstant(4, DL
));
998 Store
= DAG
.getStore(Op
.getOperand(0), DL
,
999 DAG
.getFrameIndex(FuncInfo
.getVarArgsFrameIndex(),
1001 FIN
, MachinePointerInfo(SV
, 4));
1002 MemOps
.push_back(Store
);
1004 // Store overflow area pointer.
1005 FIN
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
,
1006 FIN
, DAG
.getIntPtrConstant(4, DL
));
1007 Store
= DAG
.getStore(Op
.getOperand(0), DL
,
1008 DAG
.getFrameIndex(FuncInfo
.getVarArgsFrameIndex(),
1010 FIN
, MachinePointerInfo(SV
, 8));
1011 MemOps
.push_back(Store
);
1013 return DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, MemOps
);
1017 HexagonTargetLowering::LowerVACOPY(SDValue Op
, SelectionDAG
&DAG
) const {
1018 // Assert that the linux ABI is enabled for the current compilation.
1019 assert(Subtarget
.isEnvironmentMusl() && "Linux ABI should be enabled");
1020 SDValue Chain
= Op
.getOperand(0);
1021 SDValue DestPtr
= Op
.getOperand(1);
1022 SDValue SrcPtr
= Op
.getOperand(2);
1023 const Value
*DestSV
= cast
<SrcValueSDNode
>(Op
.getOperand(3))->getValue();
1024 const Value
*SrcSV
= cast
<SrcValueSDNode
>(Op
.getOperand(4))->getValue();
1026 // Size of the va_list is 12 bytes as it has 3 pointers. Therefore,
1027 // we need to memcopy 12 bytes from va_list to another similar list.
1028 return DAG
.getMemcpy(Chain
, DL
, DestPtr
, SrcPtr
,
1029 DAG
.getIntPtrConstant(12, DL
), Align(4),
1030 /*isVolatile*/ false, false, false,
1031 MachinePointerInfo(DestSV
), MachinePointerInfo(SrcSV
));
1034 SDValue
HexagonTargetLowering::LowerSETCC(SDValue Op
, SelectionDAG
&DAG
) const {
1035 const SDLoc
&dl(Op
);
1036 SDValue LHS
= Op
.getOperand(0);
1037 SDValue RHS
= Op
.getOperand(1);
1038 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(2))->get();
1042 if (OpTy
== MVT::v2i16
|| OpTy
== MVT::v4i8
) {
1043 MVT ElemTy
= OpTy
.getVectorElementType();
1044 assert(ElemTy
.isScalarInteger());
1045 MVT WideTy
= MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy
.getSizeInBits()),
1046 OpTy
.getVectorNumElements());
1047 return DAG
.getSetCC(dl
, ResTy
,
1048 DAG
.getSExtOrTrunc(LHS
, SDLoc(LHS
), WideTy
),
1049 DAG
.getSExtOrTrunc(RHS
, SDLoc(RHS
), WideTy
), CC
);
1052 // Treat all other vector types as legal.
1053 if (ResTy
.isVector())
1056 // Comparisons of short integers should use sign-extend, not zero-extend,
1057 // since we can represent small negative values in the compare instructions.
1058 // The LLVM default is to use zero-extend arbitrarily in these cases.
1059 auto isSExtFree
= [this](SDValue N
) {
1060 switch (N
.getOpcode()) {
1061 case ISD::TRUNCATE
: {
1062 // A sign-extend of a truncate of a sign-extend is free.
1063 SDValue Op
= N
.getOperand(0);
1064 if (Op
.getOpcode() != ISD::AssertSext
)
1066 EVT OrigTy
= cast
<VTSDNode
>(Op
.getOperand(1))->getVT();
1067 unsigned ThisBW
= ty(N
).getSizeInBits();
1068 unsigned OrigBW
= OrigTy
.getSizeInBits();
1069 // The type that was sign-extended to get the AssertSext must be
1070 // narrower than the type of N (so that N has still the same value
1071 // as the original).
1072 return ThisBW
>= OrigBW
;
1075 // We have sign-extended loads.
1081 if (OpTy
== MVT::i8
|| OpTy
== MVT::i16
) {
1082 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(RHS
);
1083 bool IsNegative
= C
&& C
->getAPIntValue().isNegative();
1084 if (IsNegative
|| isSExtFree(LHS
) || isSExtFree(RHS
))
1085 return DAG
.getSetCC(dl
, ResTy
,
1086 DAG
.getSExtOrTrunc(LHS
, SDLoc(LHS
), MVT::i32
),
1087 DAG
.getSExtOrTrunc(RHS
, SDLoc(RHS
), MVT::i32
), CC
);
1094 HexagonTargetLowering::LowerVSELECT(SDValue Op
, SelectionDAG
&DAG
) const {
1095 SDValue PredOp
= Op
.getOperand(0);
1096 SDValue Op1
= Op
.getOperand(1), Op2
= Op
.getOperand(2);
1098 const SDLoc
&dl(Op
);
1100 if (OpTy
== MVT::v2i16
|| OpTy
== MVT::v4i8
) {
1101 MVT ElemTy
= OpTy
.getVectorElementType();
1102 assert(ElemTy
.isScalarInteger());
1103 MVT WideTy
= MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy
.getSizeInBits()),
1104 OpTy
.getVectorNumElements());
1105 // Generate (trunc (select (_, sext, sext))).
1106 return DAG
.getSExtOrTrunc(
1107 DAG
.getSelect(dl
, WideTy
, PredOp
,
1108 DAG
.getSExtOrTrunc(Op1
, dl
, WideTy
),
1109 DAG
.getSExtOrTrunc(Op2
, dl
, WideTy
)),
1117 HexagonTargetLowering::LowerConstantPool(SDValue Op
, SelectionDAG
&DAG
) const {
1118 EVT ValTy
= Op
.getValueType();
1119 ConstantPoolSDNode
*CPN
= cast
<ConstantPoolSDNode
>(Op
);
1120 Constant
*CVal
= nullptr;
1121 bool isVTi1Type
= false;
1122 if (auto *CV
= dyn_cast
<ConstantVector
>(CPN
->getConstVal())) {
1123 if (cast
<VectorType
>(CV
->getType())->getElementType()->isIntegerTy(1)) {
1124 IRBuilder
<> IRB(CV
->getContext());
1125 SmallVector
<Constant
*, 128> NewConst
;
1126 unsigned VecLen
= CV
->getNumOperands();
1127 assert(isPowerOf2_32(VecLen
) &&
1128 "conversion only supported for pow2 VectorSize");
1129 for (unsigned i
= 0; i
< VecLen
; ++i
)
1130 NewConst
.push_back(IRB
.getInt8(CV
->getOperand(i
)->isZeroValue()));
1132 CVal
= ConstantVector::get(NewConst
);
1136 Align Alignment
= CPN
->getAlign();
1137 bool IsPositionIndependent
= isPositionIndependent();
1138 unsigned char TF
= IsPositionIndependent
? HexagonII::MO_PCREL
: 0;
1140 unsigned Offset
= 0;
1142 if (CPN
->isMachineConstantPoolEntry())
1143 T
= DAG
.getTargetConstantPool(CPN
->getMachineCPVal(), ValTy
, Alignment
,
1145 else if (isVTi1Type
)
1146 T
= DAG
.getTargetConstantPool(CVal
, ValTy
, Alignment
, Offset
, TF
);
1148 T
= DAG
.getTargetConstantPool(CPN
->getConstVal(), ValTy
, Alignment
, Offset
,
1151 assert(cast
<ConstantPoolSDNode
>(T
)->getTargetFlags() == TF
&&
1152 "Inconsistent target flag encountered");
1154 if (IsPositionIndependent
)
1155 return DAG
.getNode(HexagonISD::AT_PCREL
, SDLoc(Op
), ValTy
, T
);
1156 return DAG
.getNode(HexagonISD::CP
, SDLoc(Op
), ValTy
, T
);
1160 HexagonTargetLowering::LowerJumpTable(SDValue Op
, SelectionDAG
&DAG
) const {
1161 EVT VT
= Op
.getValueType();
1162 int Idx
= cast
<JumpTableSDNode
>(Op
)->getIndex();
1163 if (isPositionIndependent()) {
1164 SDValue T
= DAG
.getTargetJumpTable(Idx
, VT
, HexagonII::MO_PCREL
);
1165 return DAG
.getNode(HexagonISD::AT_PCREL
, SDLoc(Op
), VT
, T
);
1168 SDValue T
= DAG
.getTargetJumpTable(Idx
, VT
);
1169 return DAG
.getNode(HexagonISD::JT
, SDLoc(Op
), VT
, T
);
1173 HexagonTargetLowering::LowerRETURNADDR(SDValue Op
, SelectionDAG
&DAG
) const {
1174 const HexagonRegisterInfo
&HRI
= *Subtarget
.getRegisterInfo();
1175 MachineFunction
&MF
= DAG
.getMachineFunction();
1176 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1177 MFI
.setReturnAddressIsTaken(true);
1179 if (verifyReturnAddressArgumentIsConstant(Op
, DAG
))
1182 EVT VT
= Op
.getValueType();
1184 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
1186 SDValue FrameAddr
= LowerFRAMEADDR(Op
, DAG
);
1187 SDValue Offset
= DAG
.getConstant(4, dl
, MVT::i32
);
1188 return DAG
.getLoad(VT
, dl
, DAG
.getEntryNode(),
1189 DAG
.getNode(ISD::ADD
, dl
, VT
, FrameAddr
, Offset
),
1190 MachinePointerInfo());
1193 // Return LR, which contains the return address. Mark it an implicit live-in.
1194 unsigned Reg
= MF
.addLiveIn(HRI
.getRARegister(), getRegClassFor(MVT::i32
));
1195 return DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
, Reg
, VT
);
1199 HexagonTargetLowering::LowerFRAMEADDR(SDValue Op
, SelectionDAG
&DAG
) const {
1200 const HexagonRegisterInfo
&HRI
= *Subtarget
.getRegisterInfo();
1201 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
1202 MFI
.setFrameAddressIsTaken(true);
1204 EVT VT
= Op
.getValueType();
1206 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
1207 SDValue FrameAddr
= DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
,
1208 HRI
.getFrameRegister(), VT
);
1210 FrameAddr
= DAG
.getLoad(VT
, dl
, DAG
.getEntryNode(), FrameAddr
,
1211 MachinePointerInfo());
1216 HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op
, SelectionDAG
& DAG
) const {
1218 return DAG
.getNode(HexagonISD::BARRIER
, dl
, MVT::Other
, Op
.getOperand(0));
1222 HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op
, SelectionDAG
&DAG
) const {
1224 auto *GAN
= cast
<GlobalAddressSDNode
>(Op
);
1225 auto PtrVT
= getPointerTy(DAG
.getDataLayout());
1226 auto *GV
= GAN
->getGlobal();
1227 int64_t Offset
= GAN
->getOffset();
1229 auto &HLOF
= *HTM
.getObjFileLowering();
1230 Reloc::Model RM
= HTM
.getRelocationModel();
1232 if (RM
== Reloc::Static
) {
1233 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, Offset
);
1234 const GlobalObject
*GO
= GV
->getBaseObject();
1235 if (GO
&& Subtarget
.useSmallData() && HLOF
.isGlobalInSmallSection(GO
, HTM
))
1236 return DAG
.getNode(HexagonISD::CONST32_GP
, dl
, PtrVT
, GA
);
1237 return DAG
.getNode(HexagonISD::CONST32
, dl
, PtrVT
, GA
);
1240 bool UsePCRel
= getTargetMachine().shouldAssumeDSOLocal(*GV
->getParent(), GV
);
1242 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, Offset
,
1243 HexagonII::MO_PCREL
);
1244 return DAG
.getNode(HexagonISD::AT_PCREL
, dl
, PtrVT
, GA
);
1248 SDValue GOT
= DAG
.getGLOBAL_OFFSET_TABLE(PtrVT
);
1249 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0, HexagonII::MO_GOT
);
1250 SDValue Off
= DAG
.getConstant(Offset
, dl
, MVT::i32
);
1251 return DAG
.getNode(HexagonISD::AT_GOT
, dl
, PtrVT
, GOT
, GA
, Off
);
1254 // Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
1256 HexagonTargetLowering::LowerBlockAddress(SDValue Op
, SelectionDAG
&DAG
) const {
1257 const BlockAddress
*BA
= cast
<BlockAddressSDNode
>(Op
)->getBlockAddress();
1259 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
1261 Reloc::Model RM
= HTM
.getRelocationModel();
1262 if (RM
== Reloc::Static
) {
1263 SDValue A
= DAG
.getTargetBlockAddress(BA
, PtrVT
);
1264 return DAG
.getNode(HexagonISD::CONST32_GP
, dl
, PtrVT
, A
);
1267 SDValue A
= DAG
.getTargetBlockAddress(BA
, PtrVT
, 0, HexagonII::MO_PCREL
);
1268 return DAG
.getNode(HexagonISD::AT_PCREL
, dl
, PtrVT
, A
);
1272 HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op
, SelectionDAG
&DAG
)
1274 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
1275 SDValue GOTSym
= DAG
.getTargetExternalSymbol(HEXAGON_GOT_SYM_NAME
, PtrVT
,
1276 HexagonII::MO_PCREL
);
1277 return DAG
.getNode(HexagonISD::AT_PCREL
, SDLoc(Op
), PtrVT
, GOTSym
);
1281 HexagonTargetLowering::GetDynamicTLSAddr(SelectionDAG
&DAG
, SDValue Chain
,
1282 GlobalAddressSDNode
*GA
, SDValue Glue
, EVT PtrVT
, unsigned ReturnReg
,
1283 unsigned char OperandFlags
) const {
1284 MachineFunction
&MF
= DAG
.getMachineFunction();
1285 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1286 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
1288 SDValue TGA
= DAG
.getTargetGlobalAddress(GA
->getGlobal(), dl
,
1289 GA
->getValueType(0),
1292 // Create Operands for the call.The Operands should have the following:
1294 // 2. Callee which in this case is the Global address value.
1295 // 3. Registers live into the call.In this case its R0, as we
1296 // have just one argument to be passed.
1298 // Note: The order is important.
1300 const auto &HRI
= *Subtarget
.getRegisterInfo();
1301 const uint32_t *Mask
= HRI
.getCallPreservedMask(MF
, CallingConv::C
);
1302 assert(Mask
&& "Missing call preserved mask for calling convention");
1303 SDValue Ops
[] = { Chain
, TGA
, DAG
.getRegister(Hexagon::R0
, PtrVT
),
1304 DAG
.getRegisterMask(Mask
), Glue
};
1305 Chain
= DAG
.getNode(HexagonISD::CALL
, dl
, NodeTys
, Ops
);
1307 // Inform MFI that function has calls.
1308 MFI
.setAdjustsStack(true);
1310 Glue
= Chain
.getValue(1);
1311 return DAG
.getCopyFromReg(Chain
, dl
, ReturnReg
, PtrVT
, Glue
);
1315 // Lower using the intial executable model for TLS addresses
1318 HexagonTargetLowering::LowerToTLSInitialExecModel(GlobalAddressSDNode
*GA
,
1319 SelectionDAG
&DAG
) const {
1321 int64_t Offset
= GA
->getOffset();
1322 auto PtrVT
= getPointerTy(DAG
.getDataLayout());
1324 // Get the thread pointer.
1325 SDValue TP
= DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
, Hexagon::UGP
, PtrVT
);
1327 bool IsPositionIndependent
= isPositionIndependent();
1329 IsPositionIndependent
? HexagonII::MO_IEGOT
: HexagonII::MO_IE
;
1331 // First generate the TLS symbol address
1332 SDValue TGA
= DAG
.getTargetGlobalAddress(GA
->getGlobal(), dl
, PtrVT
,
1335 SDValue Sym
= DAG
.getNode(HexagonISD::CONST32
, dl
, PtrVT
, TGA
);
1337 if (IsPositionIndependent
) {
1338 // Generate the GOT pointer in case of position independent code
1339 SDValue GOT
= LowerGLOBAL_OFFSET_TABLE(Sym
, DAG
);
1341 // Add the TLS Symbol address to GOT pointer.This gives
1342 // GOT relative relocation for the symbol.
1343 Sym
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, GOT
, Sym
);
1346 // Load the offset value for TLS symbol.This offset is relative to
1348 SDValue LoadOffset
=
1349 DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(), Sym
, MachinePointerInfo());
1351 // Address of the thread local variable is the add of thread
1352 // pointer and the offset of the variable.
1353 return DAG
.getNode(ISD::ADD
, dl
, PtrVT
, TP
, LoadOffset
);
1357 // Lower using the local executable model for TLS addresses
1360 HexagonTargetLowering::LowerToTLSLocalExecModel(GlobalAddressSDNode
*GA
,
1361 SelectionDAG
&DAG
) const {
1363 int64_t Offset
= GA
->getOffset();
1364 auto PtrVT
= getPointerTy(DAG
.getDataLayout());
1366 // Get the thread pointer.
1367 SDValue TP
= DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
, Hexagon::UGP
, PtrVT
);
1368 // Generate the TLS symbol address
1369 SDValue TGA
= DAG
.getTargetGlobalAddress(GA
->getGlobal(), dl
, PtrVT
, Offset
,
1370 HexagonII::MO_TPREL
);
1371 SDValue Sym
= DAG
.getNode(HexagonISD::CONST32
, dl
, PtrVT
, TGA
);
1373 // Address of the thread local variable is the add of thread
1374 // pointer and the offset of the variable.
1375 return DAG
.getNode(ISD::ADD
, dl
, PtrVT
, TP
, Sym
);
1379 // Lower using the general dynamic model for TLS addresses
1382 HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode
*GA
,
1383 SelectionDAG
&DAG
) const {
1385 int64_t Offset
= GA
->getOffset();
1386 auto PtrVT
= getPointerTy(DAG
.getDataLayout());
1388 // First generate the TLS symbol address
1389 SDValue TGA
= DAG
.getTargetGlobalAddress(GA
->getGlobal(), dl
, PtrVT
, Offset
,
1390 HexagonII::MO_GDGOT
);
1392 // Then, generate the GOT pointer
1393 SDValue GOT
= LowerGLOBAL_OFFSET_TABLE(TGA
, DAG
);
1395 // Add the TLS symbol and the GOT pointer
1396 SDValue Sym
= DAG
.getNode(HexagonISD::CONST32
, dl
, PtrVT
, TGA
);
1397 SDValue Chain
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, GOT
, Sym
);
1399 // Copy over the argument to R0
1401 Chain
= DAG
.getCopyToReg(DAG
.getEntryNode(), dl
, Hexagon::R0
, Chain
, InFlag
);
1402 InFlag
= Chain
.getValue(1);
1405 static_cast<const HexagonSubtarget
&>(DAG
.getSubtarget()).useLongCalls()
1406 ? HexagonII::MO_GDPLT
| HexagonII::HMOTF_ConstExtended
1407 : HexagonII::MO_GDPLT
;
1409 return GetDynamicTLSAddr(DAG
, Chain
, GA
, InFlag
, PtrVT
,
1410 Hexagon::R0
, Flags
);
1414 // Lower TLS addresses.
1416 // For now for dynamic models, we only support the general dynamic model.
1419 HexagonTargetLowering::LowerGlobalTLSAddress(SDValue Op
,
1420 SelectionDAG
&DAG
) const {
1421 GlobalAddressSDNode
*GA
= cast
<GlobalAddressSDNode
>(Op
);
1423 switch (HTM
.getTLSModel(GA
->getGlobal())) {
1424 case TLSModel::GeneralDynamic
:
1425 case TLSModel::LocalDynamic
:
1426 return LowerToTLSGeneralDynamicModel(GA
, DAG
);
1427 case TLSModel::InitialExec
:
1428 return LowerToTLSInitialExecModel(GA
, DAG
);
1429 case TLSModel::LocalExec
:
1430 return LowerToTLSLocalExecModel(GA
, DAG
);
1432 llvm_unreachable("Bogus TLS model");
1435 //===----------------------------------------------------------------------===//
1436 // TargetLowering Implementation
1437 //===----------------------------------------------------------------------===//
1439 HexagonTargetLowering::HexagonTargetLowering(const TargetMachine
&TM
,
1440 const HexagonSubtarget
&ST
)
1441 : TargetLowering(TM
), HTM(static_cast<const HexagonTargetMachine
&>(TM
)),
1443 auto &HRI
= *Subtarget
.getRegisterInfo();
1445 setPrefLoopAlignment(Align(16));
1446 setMinFunctionAlignment(Align(4));
1447 setPrefFunctionAlignment(Align(16));
1448 setStackPointerRegisterToSaveRestore(HRI
.getStackRegister());
1449 setBooleanContents(TargetLoweringBase::UndefinedBooleanContent
);
1450 setBooleanVectorContents(TargetLoweringBase::UndefinedBooleanContent
);
1452 setMaxAtomicSizeInBitsSupported(64);
1453 setMinCmpXchgSizeInBits(32);
1455 if (EnableHexSDNodeSched
)
1456 setSchedulingPreference(Sched::VLIW
);
1458 setSchedulingPreference(Sched::Source
);
1460 // Limits for inline expansion of memcpy/memmove
1461 MaxStoresPerMemcpy
= MaxStoresPerMemcpyCL
;
1462 MaxStoresPerMemcpyOptSize
= MaxStoresPerMemcpyOptSizeCL
;
1463 MaxStoresPerMemmove
= MaxStoresPerMemmoveCL
;
1464 MaxStoresPerMemmoveOptSize
= MaxStoresPerMemmoveOptSizeCL
;
1465 MaxStoresPerMemset
= MaxStoresPerMemsetCL
;
1466 MaxStoresPerMemsetOptSize
= MaxStoresPerMemsetOptSizeCL
;
1469 // Set up register classes.
1472 addRegisterClass(MVT::i1
, &Hexagon::PredRegsRegClass
);
1473 addRegisterClass(MVT::v2i1
, &Hexagon::PredRegsRegClass
); // bbbbaaaa
1474 addRegisterClass(MVT::v4i1
, &Hexagon::PredRegsRegClass
); // ddccbbaa
1475 addRegisterClass(MVT::v8i1
, &Hexagon::PredRegsRegClass
); // hgfedcba
1476 addRegisterClass(MVT::i32
, &Hexagon::IntRegsRegClass
);
1477 addRegisterClass(MVT::v2i16
, &Hexagon::IntRegsRegClass
);
1478 addRegisterClass(MVT::v4i8
, &Hexagon::IntRegsRegClass
);
1479 addRegisterClass(MVT::i64
, &Hexagon::DoubleRegsRegClass
);
1480 addRegisterClass(MVT::v8i8
, &Hexagon::DoubleRegsRegClass
);
1481 addRegisterClass(MVT::v4i16
, &Hexagon::DoubleRegsRegClass
);
1482 addRegisterClass(MVT::v2i32
, &Hexagon::DoubleRegsRegClass
);
1484 addRegisterClass(MVT::f32
, &Hexagon::IntRegsRegClass
);
1485 addRegisterClass(MVT::f64
, &Hexagon::DoubleRegsRegClass
);
1488 // Handling of scalar operations.
1490 // All operations default to "legal", except:
1491 // - indexed loads and stores (pre-/post-incremented),
1492 // - ANY_EXTEND_VECTOR_INREG, ATOMIC_CMP_SWAP_WITH_SUCCESS, CONCAT_VECTORS,
1493 // ConstantFP, DEBUGTRAP, FCEIL, FCOPYSIGN, FEXP, FEXP2, FFLOOR, FGETSIGN,
1494 // FLOG, FLOG2, FLOG10, FMAXNUM, FMINNUM, FNEARBYINT, FRINT, FROUND, TRAP,
1495 // FTRUNC, PREFETCH, SIGN_EXTEND_VECTOR_INREG, ZERO_EXTEND_VECTOR_INREG,
1496 // which default to "expand" for at least one type.
1499 setOperationAction(ISD::ConstantFP
, MVT::f32
, Legal
);
1500 setOperationAction(ISD::ConstantFP
, MVT::f64
, Legal
);
1501 setOperationAction(ISD::TRAP
, MVT::Other
, Legal
);
1502 setOperationAction(ISD::ConstantPool
, MVT::i32
, Custom
);
1503 setOperationAction(ISD::JumpTable
, MVT::i32
, Custom
);
1504 setOperationAction(ISD::BUILD_PAIR
, MVT::i64
, Expand
);
1505 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
1506 setOperationAction(ISD::INLINEASM
, MVT::Other
, Custom
);
1507 setOperationAction(ISD::INLINEASM_BR
, MVT::Other
, Custom
);
1508 setOperationAction(ISD::PREFETCH
, MVT::Other
, Custom
);
1509 setOperationAction(ISD::READCYCLECOUNTER
, MVT::i64
, Custom
);
1510 setOperationAction(ISD::INTRINSIC_VOID
, MVT::Other
, Custom
);
1511 setOperationAction(ISD::EH_RETURN
, MVT::Other
, Custom
);
1512 setOperationAction(ISD::GLOBAL_OFFSET_TABLE
, MVT::i32
, Custom
);
1513 setOperationAction(ISD::GlobalTLSAddress
, MVT::i32
, Custom
);
1514 setOperationAction(ISD::ATOMIC_FENCE
, MVT::Other
, Custom
);
1516 // Custom legalize GlobalAddress nodes into CONST32.
1517 setOperationAction(ISD::GlobalAddress
, MVT::i32
, Custom
);
1518 setOperationAction(ISD::GlobalAddress
, MVT::i8
, Custom
);
1519 setOperationAction(ISD::BlockAddress
, MVT::i32
, Custom
);
1521 // Hexagon needs to optimize cases with negative constants.
1522 setOperationAction(ISD::SETCC
, MVT::i8
, Custom
);
1523 setOperationAction(ISD::SETCC
, MVT::i16
, Custom
);
1524 setOperationAction(ISD::SETCC
, MVT::v4i8
, Custom
);
1525 setOperationAction(ISD::SETCC
, MVT::v2i16
, Custom
);
1527 // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
1528 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
1529 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
1530 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
1531 if (Subtarget
.isEnvironmentMusl())
1532 setOperationAction(ISD::VACOPY
, MVT::Other
, Custom
);
1534 setOperationAction(ISD::VACOPY
, MVT::Other
, Expand
);
1536 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Expand
);
1537 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Expand
);
1538 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i32
, Custom
);
1541 setMinimumJumpTableEntries(MinimumJumpTables
);
1543 setMinimumJumpTableEntries(std::numeric_limits
<unsigned>::max());
1544 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
1546 for (unsigned LegalIntOp
:
1547 {ISD::ABS
, ISD::SMIN
, ISD::SMAX
, ISD::UMIN
, ISD::UMAX
}) {
1548 setOperationAction(LegalIntOp
, MVT::i32
, Legal
);
1549 setOperationAction(LegalIntOp
, MVT::i64
, Legal
);
1552 // Hexagon has A4_addp_c and A4_subp_c that take and generate a carry bit,
1553 // but they only operate on i64.
1554 for (MVT VT
: MVT::integer_valuetypes()) {
1555 setOperationAction(ISD::UADDO
, VT
, Custom
);
1556 setOperationAction(ISD::USUBO
, VT
, Custom
);
1557 setOperationAction(ISD::SADDO
, VT
, Expand
);
1558 setOperationAction(ISD::SSUBO
, VT
, Expand
);
1559 setOperationAction(ISD::ADDCARRY
, VT
, Expand
);
1560 setOperationAction(ISD::SUBCARRY
, VT
, Expand
);
1562 setOperationAction(ISD::ADDCARRY
, MVT::i64
, Custom
);
1563 setOperationAction(ISD::SUBCARRY
, MVT::i64
, Custom
);
1565 setOperationAction(ISD::CTLZ
, MVT::i8
, Promote
);
1566 setOperationAction(ISD::CTLZ
, MVT::i16
, Promote
);
1567 setOperationAction(ISD::CTTZ
, MVT::i8
, Promote
);
1568 setOperationAction(ISD::CTTZ
, MVT::i16
, Promote
);
1570 // Popcount can count # of 1s in i64 but returns i32.
1571 setOperationAction(ISD::CTPOP
, MVT::i8
, Promote
);
1572 setOperationAction(ISD::CTPOP
, MVT::i16
, Promote
);
1573 setOperationAction(ISD::CTPOP
, MVT::i32
, Promote
);
1574 setOperationAction(ISD::CTPOP
, MVT::i64
, Legal
);
1576 setOperationAction(ISD::BITREVERSE
, MVT::i32
, Legal
);
1577 setOperationAction(ISD::BITREVERSE
, MVT::i64
, Legal
);
1578 setOperationAction(ISD::BSWAP
, MVT::i32
, Legal
);
1579 setOperationAction(ISD::BSWAP
, MVT::i64
, Legal
);
1581 setOperationAction(ISD::FSHL
, MVT::i32
, Legal
);
1582 setOperationAction(ISD::FSHL
, MVT::i64
, Legal
);
1583 setOperationAction(ISD::FSHR
, MVT::i32
, Legal
);
1584 setOperationAction(ISD::FSHR
, MVT::i64
, Legal
);
1586 for (unsigned IntExpOp
:
1587 {ISD::SDIV
, ISD::UDIV
, ISD::SREM
, ISD::UREM
,
1588 ISD::SDIVREM
, ISD::UDIVREM
, ISD::ROTL
, ISD::ROTR
,
1589 ISD::SHL_PARTS
, ISD::SRA_PARTS
, ISD::SRL_PARTS
,
1590 ISD::SMUL_LOHI
, ISD::UMUL_LOHI
}) {
1591 for (MVT VT
: MVT::integer_valuetypes())
1592 setOperationAction(IntExpOp
, VT
, Expand
);
1595 for (unsigned FPExpOp
:
1596 {ISD::FDIV
, ISD::FREM
, ISD::FSQRT
, ISD::FSIN
, ISD::FCOS
, ISD::FSINCOS
,
1597 ISD::FPOW
, ISD::FCOPYSIGN
}) {
1598 for (MVT VT
: MVT::fp_valuetypes())
1599 setOperationAction(FPExpOp
, VT
, Expand
);
1602 // No extending loads from i32.
1603 for (MVT VT
: MVT::integer_valuetypes()) {
1604 setLoadExtAction(ISD::ZEXTLOAD
, VT
, MVT::i32
, Expand
);
1605 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i32
, Expand
);
1606 setLoadExtAction(ISD::EXTLOAD
, VT
, MVT::i32
, Expand
);
1608 // Turn FP truncstore into trunc + store.
1609 setTruncStoreAction(MVT::f64
, MVT::f32
, Expand
);
1610 // Turn FP extload into load/fpextend.
1611 for (MVT VT
: MVT::fp_valuetypes())
1612 setLoadExtAction(ISD::EXTLOAD
, VT
, MVT::f32
, Expand
);
1614 // Expand BR_CC and SELECT_CC for all integer and fp types.
1615 for (MVT VT
: MVT::integer_valuetypes()) {
1616 setOperationAction(ISD::BR_CC
, VT
, Expand
);
1617 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
1619 for (MVT VT
: MVT::fp_valuetypes()) {
1620 setOperationAction(ISD::BR_CC
, VT
, Expand
);
1621 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
1623 setOperationAction(ISD::BR_CC
, MVT::Other
, Expand
);
1626 // Handling of vector operations.
1629 // Set the action for vector operations to "expand", then override it with
1630 // either "custom" or "legal" for specific cases.
1631 static const unsigned VectExpOps
[] = {
1632 // Integer arithmetic:
1633 ISD::ADD
, ISD::SUB
, ISD::MUL
, ISD::SDIV
, ISD::UDIV
,
1634 ISD::SREM
, ISD::UREM
, ISD::SDIVREM
, ISD::UDIVREM
, ISD::SADDO
,
1635 ISD::UADDO
, ISD::SSUBO
, ISD::USUBO
, ISD::SMUL_LOHI
, ISD::UMUL_LOHI
,
1637 ISD::AND
, ISD::OR
, ISD::XOR
, ISD::ROTL
, ISD::ROTR
,
1638 ISD::CTPOP
, ISD::CTLZ
, ISD::CTTZ
,
1639 // Floating point arithmetic/math functions:
1640 ISD::FADD
, ISD::FSUB
, ISD::FMUL
, ISD::FMA
, ISD::FDIV
,
1641 ISD::FREM
, ISD::FNEG
, ISD::FABS
, ISD::FSQRT
, ISD::FSIN
,
1642 ISD::FCOS
, ISD::FPOW
, ISD::FLOG
, ISD::FLOG2
,
1643 ISD::FLOG10
, ISD::FEXP
, ISD::FEXP2
, ISD::FCEIL
, ISD::FTRUNC
,
1644 ISD::FRINT
, ISD::FNEARBYINT
, ISD::FROUND
, ISD::FFLOOR
,
1645 ISD::FMINNUM
, ISD::FMAXNUM
, ISD::FSINCOS
,
1647 ISD::BR_CC
, ISD::SELECT_CC
, ISD::ConstantPool
,
1649 ISD::BUILD_VECTOR
, ISD::SCALAR_TO_VECTOR
,
1650 ISD::EXTRACT_VECTOR_ELT
, ISD::INSERT_VECTOR_ELT
,
1651 ISD::EXTRACT_SUBVECTOR
, ISD::INSERT_SUBVECTOR
,
1652 ISD::CONCAT_VECTORS
, ISD::VECTOR_SHUFFLE
,
1656 for (MVT VT
: MVT::fixedlen_vector_valuetypes()) {
1657 for (unsigned VectExpOp
: VectExpOps
)
1658 setOperationAction(VectExpOp
, VT
, Expand
);
1660 // Expand all extending loads and truncating stores:
1661 for (MVT TargetVT
: MVT::fixedlen_vector_valuetypes()) {
1664 setLoadExtAction(ISD::EXTLOAD
, TargetVT
, VT
, Expand
);
1665 setLoadExtAction(ISD::ZEXTLOAD
, TargetVT
, VT
, Expand
);
1666 setLoadExtAction(ISD::SEXTLOAD
, TargetVT
, VT
, Expand
);
1667 setTruncStoreAction(VT
, TargetVT
, Expand
);
1670 // Normalize all inputs to SELECT to be vectors of i32.
1671 if (VT
.getVectorElementType() != MVT::i32
) {
1672 MVT VT32
= MVT::getVectorVT(MVT::i32
, VT
.getSizeInBits()/32);
1673 setOperationAction(ISD::SELECT
, VT
, Promote
);
1674 AddPromotedToType(ISD::SELECT
, VT
, VT32
);
1676 setOperationAction(ISD::SRA
, VT
, Custom
);
1677 setOperationAction(ISD::SHL
, VT
, Custom
);
1678 setOperationAction(ISD::SRL
, VT
, Custom
);
1681 // Extending loads from (native) vectors of i8 into (native) vectors of i16
1683 setLoadExtAction(ISD::EXTLOAD
, MVT::v2i16
, MVT::v2i8
, Legal
);
1684 setLoadExtAction(ISD::ZEXTLOAD
, MVT::v2i16
, MVT::v2i8
, Legal
);
1685 setLoadExtAction(ISD::SEXTLOAD
, MVT::v2i16
, MVT::v2i8
, Legal
);
1686 setLoadExtAction(ISD::EXTLOAD
, MVT::v4i16
, MVT::v4i8
, Legal
);
1687 setLoadExtAction(ISD::ZEXTLOAD
, MVT::v4i16
, MVT::v4i8
, Legal
);
1688 setLoadExtAction(ISD::SEXTLOAD
, MVT::v4i16
, MVT::v4i8
, Legal
);
1690 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v2i8
, Legal
);
1691 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v2i16
, Legal
);
1692 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v2i32
, Legal
);
1694 // Types natively supported:
1695 for (MVT NativeVT
: {MVT::v8i1
, MVT::v4i1
, MVT::v2i1
, MVT::v4i8
,
1696 MVT::v8i8
, MVT::v2i16
, MVT::v4i16
, MVT::v2i32
}) {
1697 setOperationAction(ISD::BUILD_VECTOR
, NativeVT
, Custom
);
1698 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, NativeVT
, Custom
);
1699 setOperationAction(ISD::INSERT_VECTOR_ELT
, NativeVT
, Custom
);
1700 setOperationAction(ISD::EXTRACT_SUBVECTOR
, NativeVT
, Custom
);
1701 setOperationAction(ISD::INSERT_SUBVECTOR
, NativeVT
, Custom
);
1702 setOperationAction(ISD::CONCAT_VECTORS
, NativeVT
, Custom
);
1704 setOperationAction(ISD::ADD
, NativeVT
, Legal
);
1705 setOperationAction(ISD::SUB
, NativeVT
, Legal
);
1706 setOperationAction(ISD::MUL
, NativeVT
, Legal
);
1707 setOperationAction(ISD::AND
, NativeVT
, Legal
);
1708 setOperationAction(ISD::OR
, NativeVT
, Legal
);
1709 setOperationAction(ISD::XOR
, NativeVT
, Legal
);
1711 if (NativeVT
.getVectorElementType() != MVT::i1
)
1712 setOperationAction(ISD::SPLAT_VECTOR
, NativeVT
, Legal
);
1715 for (MVT VT
: {MVT::v8i8
, MVT::v4i16
, MVT::v2i32
}) {
1716 setOperationAction(ISD::SMIN
, VT
, Legal
);
1717 setOperationAction(ISD::SMAX
, VT
, Legal
);
1718 setOperationAction(ISD::UMIN
, VT
, Legal
);
1719 setOperationAction(ISD::UMAX
, VT
, Legal
);
1722 // Custom lower unaligned loads.
1723 // Also, for both loads and stores, verify the alignment of the address
1724 // in case it is a compile-time constant. This is a usability feature to
1725 // provide a meaningful error message to users.
1726 for (MVT VT
: {MVT::i16
, MVT::i32
, MVT::v4i8
, MVT::i64
, MVT::v8i8
,
1727 MVT::v2i16
, MVT::v4i16
, MVT::v2i32
}) {
1728 setOperationAction(ISD::LOAD
, VT
, Custom
);
1729 setOperationAction(ISD::STORE
, VT
, Custom
);
1732 // Custom-lower load/stores of boolean vectors.
1733 for (MVT VT
: {MVT::v2i1
, MVT::v4i1
, MVT::v8i1
}) {
1734 setOperationAction(ISD::LOAD
, VT
, Custom
);
1735 setOperationAction(ISD::STORE
, VT
, Custom
);
1738 for (MVT VT
: {MVT::v2i16
, MVT::v4i8
, MVT::v8i8
, MVT::v2i32
, MVT::v4i16
,
1740 setCondCodeAction(ISD::SETNE
, VT
, Expand
);
1741 setCondCodeAction(ISD::SETLE
, VT
, Expand
);
1742 setCondCodeAction(ISD::SETGE
, VT
, Expand
);
1743 setCondCodeAction(ISD::SETLT
, VT
, Expand
);
1744 setCondCodeAction(ISD::SETULE
, VT
, Expand
);
1745 setCondCodeAction(ISD::SETUGE
, VT
, Expand
);
1746 setCondCodeAction(ISD::SETULT
, VT
, Expand
);
1749 // Custom-lower bitcasts from i8 to v8i1.
1750 setOperationAction(ISD::BITCAST
, MVT::i8
, Custom
);
1751 setOperationAction(ISD::SETCC
, MVT::v2i16
, Custom
);
1752 setOperationAction(ISD::VSELECT
, MVT::v4i8
, Custom
);
1753 setOperationAction(ISD::VSELECT
, MVT::v2i16
, Custom
);
1754 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v4i8
, Custom
);
1755 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v4i16
, Custom
);
1756 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v8i8
, Custom
);
1759 setOperationAction(ISD::FMA
, MVT::f64
, Expand
);
1760 setOperationAction(ISD::FADD
, MVT::f64
, Expand
);
1761 setOperationAction(ISD::FSUB
, MVT::f64
, Expand
);
1762 setOperationAction(ISD::FMUL
, MVT::f64
, Expand
);
1764 setOperationAction(ISD::FMINNUM
, MVT::f32
, Legal
);
1765 setOperationAction(ISD::FMAXNUM
, MVT::f32
, Legal
);
1767 setOperationAction(ISD::FP_TO_UINT
, MVT::i1
, Promote
);
1768 setOperationAction(ISD::FP_TO_UINT
, MVT::i8
, Promote
);
1769 setOperationAction(ISD::FP_TO_UINT
, MVT::i16
, Promote
);
1770 setOperationAction(ISD::FP_TO_SINT
, MVT::i1
, Promote
);
1771 setOperationAction(ISD::FP_TO_SINT
, MVT::i8
, Promote
);
1772 setOperationAction(ISD::FP_TO_SINT
, MVT::i16
, Promote
);
1773 setOperationAction(ISD::UINT_TO_FP
, MVT::i1
, Promote
);
1774 setOperationAction(ISD::UINT_TO_FP
, MVT::i8
, Promote
);
1775 setOperationAction(ISD::UINT_TO_FP
, MVT::i16
, Promote
);
1776 setOperationAction(ISD::SINT_TO_FP
, MVT::i1
, Promote
);
1777 setOperationAction(ISD::SINT_TO_FP
, MVT::i8
, Promote
);
1778 setOperationAction(ISD::SINT_TO_FP
, MVT::i16
, Promote
);
1780 // Handling of indexed loads/stores: default is "expand".
1782 for (MVT VT
: {MVT::i8
, MVT::i16
, MVT::i32
, MVT::i64
, MVT::f32
, MVT::f64
,
1783 MVT::v2i16
, MVT::v2i32
, MVT::v4i8
, MVT::v4i16
, MVT::v8i8
}) {
1784 setIndexedLoadAction(ISD::POST_INC
, VT
, Legal
);
1785 setIndexedStoreAction(ISD::POST_INC
, VT
, Legal
);
1788 // Subtarget-specific operation actions.
1790 if (Subtarget
.hasV60Ops()) {
1791 setOperationAction(ISD::ROTL
, MVT::i32
, Legal
);
1792 setOperationAction(ISD::ROTL
, MVT::i64
, Legal
);
1793 setOperationAction(ISD::ROTR
, MVT::i32
, Legal
);
1794 setOperationAction(ISD::ROTR
, MVT::i64
, Legal
);
1796 if (Subtarget
.hasV66Ops()) {
1797 setOperationAction(ISD::FADD
, MVT::f64
, Legal
);
1798 setOperationAction(ISD::FSUB
, MVT::f64
, Legal
);
1800 if (Subtarget
.hasV67Ops()) {
1801 setOperationAction(ISD::FMINNUM
, MVT::f64
, Legal
);
1802 setOperationAction(ISD::FMAXNUM
, MVT::f64
, Legal
);
1803 setOperationAction(ISD::FMUL
, MVT::f64
, Legal
);
1806 setTargetDAGCombine(ISD::VSELECT
);
1808 if (Subtarget
.useHVXOps())
1809 initializeHVXLowering();
1811 computeRegisterProperties(&HRI
);
1814 // Library calls for unsupported operations
1816 bool FastMath
= EnableFastMath
;
1818 setLibcallName(RTLIB::SDIV_I32
, "__hexagon_divsi3");
1819 setLibcallName(RTLIB::SDIV_I64
, "__hexagon_divdi3");
1820 setLibcallName(RTLIB::UDIV_I32
, "__hexagon_udivsi3");
1821 setLibcallName(RTLIB::UDIV_I64
, "__hexagon_udivdi3");
1822 setLibcallName(RTLIB::SREM_I32
, "__hexagon_modsi3");
1823 setLibcallName(RTLIB::SREM_I64
, "__hexagon_moddi3");
1824 setLibcallName(RTLIB::UREM_I32
, "__hexagon_umodsi3");
1825 setLibcallName(RTLIB::UREM_I64
, "__hexagon_umoddi3");
1827 setLibcallName(RTLIB::SINTTOFP_I128_F64
, "__hexagon_floattidf");
1828 setLibcallName(RTLIB::SINTTOFP_I128_F32
, "__hexagon_floattisf");
1829 setLibcallName(RTLIB::FPTOUINT_F32_I128
, "__hexagon_fixunssfti");
1830 setLibcallName(RTLIB::FPTOUINT_F64_I128
, "__hexagon_fixunsdfti");
1831 setLibcallName(RTLIB::FPTOSINT_F32_I128
, "__hexagon_fixsfti");
1832 setLibcallName(RTLIB::FPTOSINT_F64_I128
, "__hexagon_fixdfti");
1834 // This is the only fast library function for sqrtd.
1836 setLibcallName(RTLIB::SQRT_F64
, "__hexagon_fast2_sqrtdf2");
1838 // Prefix is: nothing for "slow-math",
1839 // "fast2_" for V5+ fast-math double-precision
1840 // (actually, keep fast-math and fast-math2 separate for now)
1842 setLibcallName(RTLIB::ADD_F64
, "__hexagon_fast_adddf3");
1843 setLibcallName(RTLIB::SUB_F64
, "__hexagon_fast_subdf3");
1844 setLibcallName(RTLIB::MUL_F64
, "__hexagon_fast_muldf3");
1845 setLibcallName(RTLIB::DIV_F64
, "__hexagon_fast_divdf3");
1846 setLibcallName(RTLIB::DIV_F32
, "__hexagon_fast_divsf3");
1848 setLibcallName(RTLIB::ADD_F64
, "__hexagon_adddf3");
1849 setLibcallName(RTLIB::SUB_F64
, "__hexagon_subdf3");
1850 setLibcallName(RTLIB::MUL_F64
, "__hexagon_muldf3");
1851 setLibcallName(RTLIB::DIV_F64
, "__hexagon_divdf3");
1852 setLibcallName(RTLIB::DIV_F32
, "__hexagon_divsf3");
1856 setLibcallName(RTLIB::SQRT_F32
, "__hexagon_fast2_sqrtf");
1858 setLibcallName(RTLIB::SQRT_F32
, "__hexagon_sqrtf");
1860 // These cause problems when the shift amount is non-constant.
1861 setLibcallName(RTLIB::SHL_I128
, nullptr);
1862 setLibcallName(RTLIB::SRL_I128
, nullptr);
1863 setLibcallName(RTLIB::SRA_I128
, nullptr);
1866 const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode
) const {
1867 switch ((HexagonISD::NodeType
)Opcode
) {
1868 case HexagonISD::ADDC
: return "HexagonISD::ADDC";
1869 case HexagonISD::SUBC
: return "HexagonISD::SUBC";
1870 case HexagonISD::ALLOCA
: return "HexagonISD::ALLOCA";
1871 case HexagonISD::AT_GOT
: return "HexagonISD::AT_GOT";
1872 case HexagonISD::AT_PCREL
: return "HexagonISD::AT_PCREL";
1873 case HexagonISD::BARRIER
: return "HexagonISD::BARRIER";
1874 case HexagonISD::CALL
: return "HexagonISD::CALL";
1875 case HexagonISD::CALLnr
: return "HexagonISD::CALLnr";
1876 case HexagonISD::CALLR
: return "HexagonISD::CALLR";
1877 case HexagonISD::COMBINE
: return "HexagonISD::COMBINE";
1878 case HexagonISD::CONST32_GP
: return "HexagonISD::CONST32_GP";
1879 case HexagonISD::CONST32
: return "HexagonISD::CONST32";
1880 case HexagonISD::CP
: return "HexagonISD::CP";
1881 case HexagonISD::DCFETCH
: return "HexagonISD::DCFETCH";
1882 case HexagonISD::EH_RETURN
: return "HexagonISD::EH_RETURN";
1883 case HexagonISD::TSTBIT
: return "HexagonISD::TSTBIT";
1884 case HexagonISD::EXTRACTU
: return "HexagonISD::EXTRACTU";
1885 case HexagonISD::INSERT
: return "HexagonISD::INSERT";
1886 case HexagonISD::JT
: return "HexagonISD::JT";
1887 case HexagonISD::RET_FLAG
: return "HexagonISD::RET_FLAG";
1888 case HexagonISD::TC_RETURN
: return "HexagonISD::TC_RETURN";
1889 case HexagonISD::VASL
: return "HexagonISD::VASL";
1890 case HexagonISD::VASR
: return "HexagonISD::VASR";
1891 case HexagonISD::VLSR
: return "HexagonISD::VLSR";
1892 case HexagonISD::VEXTRACTW
: return "HexagonISD::VEXTRACTW";
1893 case HexagonISD::VINSERTW0
: return "HexagonISD::VINSERTW0";
1894 case HexagonISD::VROR
: return "HexagonISD::VROR";
1895 case HexagonISD::READCYCLE
: return "HexagonISD::READCYCLE";
1896 case HexagonISD::PTRUE
: return "HexagonISD::PTRUE";
1897 case HexagonISD::PFALSE
: return "HexagonISD::PFALSE";
1898 case HexagonISD::D2P
: return "HexagonISD::D2P";
1899 case HexagonISD::P2D
: return "HexagonISD::P2D";
1900 case HexagonISD::V2Q
: return "HexagonISD::V2Q";
1901 case HexagonISD::Q2V
: return "HexagonISD::Q2V";
1902 case HexagonISD::QCAT
: return "HexagonISD::QCAT";
1903 case HexagonISD::QTRUE
: return "HexagonISD::QTRUE";
1904 case HexagonISD::QFALSE
: return "HexagonISD::QFALSE";
1905 case HexagonISD::TYPECAST
: return "HexagonISD::TYPECAST";
1906 case HexagonISD::VALIGN
: return "HexagonISD::VALIGN";
1907 case HexagonISD::VALIGNADDR
: return "HexagonISD::VALIGNADDR";
1908 case HexagonISD::VPACKL
: return "HexagonISD::VPACKL";
1909 case HexagonISD::VUNPACK
: return "HexagonISD::VUNPACK";
1910 case HexagonISD::VUNPACKU
: return "HexagonISD::VUNPACKU";
1911 case HexagonISD::ISEL
: return "HexagonISD::ISEL";
1912 case HexagonISD::OP_END
: break;
1918 HexagonTargetLowering::validateConstPtrAlignment(SDValue Ptr
, Align NeedAlign
,
1919 const SDLoc
&dl
, SelectionDAG
&DAG
) const {
1920 auto *CA
= dyn_cast
<ConstantSDNode
>(Ptr
);
1923 unsigned Addr
= CA
->getZExtValue();
1925 Addr
!= 0 ? Align(1ull << countTrailingZeros(Addr
)) : NeedAlign
;
1926 if (HaveAlign
>= NeedAlign
)
1929 static int DK_MisalignedTrap
= llvm::getNextAvailablePluginDiagnosticKind();
1931 struct DiagnosticInfoMisalignedTrap
: public DiagnosticInfo
{
1932 DiagnosticInfoMisalignedTrap(StringRef M
)
1933 : DiagnosticInfo(DK_MisalignedTrap
, DS_Remark
), Msg(M
) {}
1934 void print(DiagnosticPrinter
&DP
) const override
{
1937 static bool classof(const DiagnosticInfo
*DI
) {
1938 return DI
->getKind() == DK_MisalignedTrap
;
1944 raw_string_ostream
O(ErrMsg
);
1945 O
<< "Misaligned constant address: " << format_hex(Addr
, 10)
1946 << " has alignment " << HaveAlign
.value()
1947 << ", but the memory access requires " << NeedAlign
.value();
1948 if (DebugLoc DL
= dl
.getDebugLoc())
1949 DL
.print(O
<< ", at ");
1950 O
<< ". The instruction has been replaced with a trap.";
1952 DAG
.getContext()->diagnose(DiagnosticInfoMisalignedTrap(O
.str()));
1957 HexagonTargetLowering::replaceMemWithUndef(SDValue Op
, SelectionDAG
&DAG
)
1959 const SDLoc
&dl(Op
);
1960 auto *LS
= cast
<LSBaseSDNode
>(Op
.getNode());
1961 assert(!LS
->isIndexed() && "Not expecting indexed ops on constant address");
1963 SDValue Chain
= LS
->getChain();
1964 SDValue Trap
= DAG
.getNode(ISD::TRAP
, dl
, MVT::Other
, Chain
);
1965 if (LS
->getOpcode() == ISD::LOAD
)
1966 return DAG
.getMergeValues({DAG
.getUNDEF(ty(Op
)), Trap
}, dl
);
1970 // Bit-reverse Load Intrinsic: Check if the instruction is a bit reverse load
1972 static bool isBrevLdIntrinsic(const Value
*Inst
) {
1973 unsigned ID
= cast
<IntrinsicInst
>(Inst
)->getIntrinsicID();
1974 return (ID
== Intrinsic::hexagon_L2_loadrd_pbr
||
1975 ID
== Intrinsic::hexagon_L2_loadri_pbr
||
1976 ID
== Intrinsic::hexagon_L2_loadrh_pbr
||
1977 ID
== Intrinsic::hexagon_L2_loadruh_pbr
||
1978 ID
== Intrinsic::hexagon_L2_loadrb_pbr
||
1979 ID
== Intrinsic::hexagon_L2_loadrub_pbr
);
1982 // Bit-reverse Load Intrinsic :Crawl up and figure out the object from previous
1983 // instruction. So far we only handle bitcast, extract value and bit reverse
1984 // load intrinsic instructions. Should we handle CGEP ?
1985 static Value
*getBrevLdObject(Value
*V
) {
1986 if (Operator::getOpcode(V
) == Instruction::ExtractValue
||
1987 Operator::getOpcode(V
) == Instruction::BitCast
)
1988 V
= cast
<Operator
>(V
)->getOperand(0);
1989 else if (isa
<IntrinsicInst
>(V
) && isBrevLdIntrinsic(V
))
1990 V
= cast
<Instruction
>(V
)->getOperand(0);
1994 // Bit-reverse Load Intrinsic: For a PHI Node return either an incoming edge or
1995 // a back edge. If the back edge comes from the intrinsic itself, the incoming
1996 // edge is returned.
1997 static Value
*returnEdge(const PHINode
*PN
, Value
*IntrBaseVal
) {
1998 const BasicBlock
*Parent
= PN
->getParent();
2000 for (unsigned i
= 0, e
= PN
->getNumIncomingValues(); i
< e
; ++i
) {
2001 BasicBlock
*Blk
= PN
->getIncomingBlock(i
);
2002 // Determine if the back edge is originated from intrinsic.
2003 if (Blk
== Parent
) {
2004 Value
*BackEdgeVal
= PN
->getIncomingValue(i
);
2006 // Loop over till we return the same Value or we hit the IntrBaseVal.
2008 BaseVal
= BackEdgeVal
;
2009 BackEdgeVal
= getBrevLdObject(BackEdgeVal
);
2010 } while ((BaseVal
!= BackEdgeVal
) && (IntrBaseVal
!= BackEdgeVal
));
2011 // If the getBrevLdObject returns IntrBaseVal, we should return the
2013 if (IntrBaseVal
== BackEdgeVal
)
2017 } else // Set the node to incoming edge.
2020 assert(Idx
>= 0 && "Unexpected index to incoming argument in PHI");
2021 return PN
->getIncomingValue(Idx
);
2024 // Bit-reverse Load Intrinsic: Figure out the underlying object the base
2025 // pointer points to, for the bit-reverse load intrinsic. Setting this to
2026 // memoperand might help alias analysis to figure out the dependencies.
2027 static Value
*getUnderLyingObjectForBrevLdIntr(Value
*V
) {
2028 Value
*IntrBaseVal
= V
;
2030 // Loop over till we return the same Value, implies we either figure out
2031 // the object or we hit a PHI
2034 V
= getBrevLdObject(V
);
2035 } while (BaseVal
!= V
);
2037 // Identify the object from PHINode.
2038 if (const PHINode
*PN
= dyn_cast
<PHINode
>(V
))
2039 return returnEdge(PN
, IntrBaseVal
);
2040 // For non PHI nodes, the object is the last value returned by getBrevLdObject
2045 /// Given an intrinsic, checks if on the target the intrinsic will need to map
2046 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
2047 /// true and store the intrinsic information into the IntrinsicInfo that was
2048 /// passed to the function.
2049 bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo
&Info
,
2051 MachineFunction
&MF
,
2052 unsigned Intrinsic
) const {
2053 switch (Intrinsic
) {
2054 case Intrinsic::hexagon_L2_loadrd_pbr
:
2055 case Intrinsic::hexagon_L2_loadri_pbr
:
2056 case Intrinsic::hexagon_L2_loadrh_pbr
:
2057 case Intrinsic::hexagon_L2_loadruh_pbr
:
2058 case Intrinsic::hexagon_L2_loadrb_pbr
:
2059 case Intrinsic::hexagon_L2_loadrub_pbr
: {
2060 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
2061 auto &DL
= I
.getCalledFunction()->getParent()->getDataLayout();
2062 auto &Cont
= I
.getCalledFunction()->getParent()->getContext();
2063 // The intrinsic function call is of the form { ElTy, i8* }
2064 // @llvm.hexagon.L2.loadXX.pbr(i8*, i32). The pointer and memory access type
2065 // should be derived from ElTy.
2066 Type
*ElTy
= I
.getCalledFunction()->getReturnType()->getStructElementType(0);
2067 Info
.memVT
= MVT::getVT(ElTy
);
2068 llvm::Value
*BasePtrVal
= I
.getOperand(0);
2069 Info
.ptrVal
= getUnderLyingObjectForBrevLdIntr(BasePtrVal
);
2070 // The offset value comes through Modifier register. For now, assume the
2073 Info
.align
= DL
.getABITypeAlign(Info
.memVT
.getTypeForEVT(Cont
));
2074 Info
.flags
= MachineMemOperand::MOLoad
;
2077 case Intrinsic::hexagon_V6_vgathermw
:
2078 case Intrinsic::hexagon_V6_vgathermw_128B
:
2079 case Intrinsic::hexagon_V6_vgathermh
:
2080 case Intrinsic::hexagon_V6_vgathermh_128B
:
2081 case Intrinsic::hexagon_V6_vgathermhw
:
2082 case Intrinsic::hexagon_V6_vgathermhw_128B
:
2083 case Intrinsic::hexagon_V6_vgathermwq
:
2084 case Intrinsic::hexagon_V6_vgathermwq_128B
:
2085 case Intrinsic::hexagon_V6_vgathermhq
:
2086 case Intrinsic::hexagon_V6_vgathermhq_128B
:
2087 case Intrinsic::hexagon_V6_vgathermhwq
:
2088 case Intrinsic::hexagon_V6_vgathermhwq_128B
: {
2089 const Module
&M
= *I
.getParent()->getParent()->getParent();
2090 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
2091 Type
*VecTy
= I
.getArgOperand(1)->getType();
2092 Info
.memVT
= MVT::getVT(VecTy
);
2093 Info
.ptrVal
= I
.getArgOperand(0);
2096 MaybeAlign(M
.getDataLayout().getTypeAllocSizeInBits(VecTy
) / 8);
2097 Info
.flags
= MachineMemOperand::MOLoad
|
2098 MachineMemOperand::MOStore
|
2099 MachineMemOperand::MOVolatile
;
2108 bool HexagonTargetLowering::hasBitTest(SDValue X
, SDValue Y
) const {
2109 return X
.getValueType().isScalarInteger(); // 'tstbit'
2112 bool HexagonTargetLowering::isTruncateFree(Type
*Ty1
, Type
*Ty2
) const {
2113 return isTruncateFree(EVT::getEVT(Ty1
), EVT::getEVT(Ty2
));
2116 bool HexagonTargetLowering::isTruncateFree(EVT VT1
, EVT VT2
) const {
2117 if (!VT1
.isSimple() || !VT2
.isSimple())
2119 return VT1
.getSimpleVT() == MVT::i64
&& VT2
.getSimpleVT() == MVT::i32
;
2122 bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(
2123 const MachineFunction
&MF
, EVT VT
) const {
2124 return isOperationLegalOrCustom(ISD::FMA
, VT
);
2127 // Should we expand the build vector with shuffles?
2128 bool HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT
,
2129 unsigned DefinedValues
) const {
2133 bool HexagonTargetLowering::isShuffleMaskLegal(ArrayRef
<int> Mask
,
2138 TargetLoweringBase::LegalizeTypeAction
2139 HexagonTargetLowering::getPreferredVectorAction(MVT VT
) const {
2140 unsigned VecLen
= VT
.getVectorMinNumElements();
2141 MVT ElemTy
= VT
.getVectorElementType();
2143 if (VecLen
== 1 || VT
.isScalableVector())
2144 return TargetLoweringBase::TypeScalarizeVector
;
2146 if (Subtarget
.useHVXOps()) {
2147 unsigned Action
= getPreferredHvxVectorAction(VT
);
2149 return static_cast<TargetLoweringBase::LegalizeTypeAction
>(Action
);
2152 // Always widen (remaining) vectors of i1.
2153 if (ElemTy
== MVT::i1
)
2154 return TargetLoweringBase::TypeWidenVector
;
2156 return TargetLoweringBase::TypeSplitVector
;
2159 std::pair
<SDValue
, int>
2160 HexagonTargetLowering::getBaseAndOffset(SDValue Addr
) const {
2161 if (Addr
.getOpcode() == ISD::ADD
) {
2162 SDValue Op1
= Addr
.getOperand(1);
2163 if (auto *CN
= dyn_cast
<const ConstantSDNode
>(Op1
.getNode()))
2164 return { Addr
.getOperand(0), CN
->getSExtValue() };
2169 // Lower a vector shuffle (V1, V2, V3). V1 and V2 are the two vectors
2170 // to select data from, V3 is the permutation.
2172 HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op
, SelectionDAG
&DAG
)
2174 const auto *SVN
= cast
<ShuffleVectorSDNode
>(Op
);
2175 ArrayRef
<int> AM
= SVN
->getMask();
2176 assert(AM
.size() <= 8 && "Unexpected shuffle mask");
2177 unsigned VecLen
= AM
.size();
2180 assert(!Subtarget
.isHVXVectorType(VecTy
, true) &&
2181 "HVX shuffles should be legal");
2182 assert(VecTy
.getSizeInBits() <= 64 && "Unexpected vector length");
2184 SDValue Op0
= Op
.getOperand(0);
2185 SDValue Op1
= Op
.getOperand(1);
2186 const SDLoc
&dl(Op
);
2188 // If the inputs are not the same as the output, bail. This is not an
2189 // error situation, but complicates the handling and the default expansion
2190 // (into BUILD_VECTOR) should be adequate.
2191 if (ty(Op0
) != VecTy
|| ty(Op1
) != VecTy
)
2194 // Normalize the mask so that the first non-negative index comes from
2195 // the first operand.
2196 SmallVector
<int,8> Mask(AM
.begin(), AM
.end());
2197 unsigned F
= llvm::find_if(AM
, [](int M
) { return M
>= 0; }) - AM
.data();
2199 return DAG
.getUNDEF(VecTy
);
2200 if (AM
[F
] >= int(VecLen
)) {
2201 ShuffleVectorSDNode::commuteMask(Mask
);
2202 std::swap(Op0
, Op1
);
2205 // Express the shuffle mask in terms of bytes.
2206 SmallVector
<int,8> ByteMask
;
2207 unsigned ElemBytes
= VecTy
.getVectorElementType().getSizeInBits() / 8;
2208 for (unsigned i
= 0, e
= Mask
.size(); i
!= e
; ++i
) {
2211 for (unsigned j
= 0; j
!= ElemBytes
; ++j
)
2212 ByteMask
.push_back(-1);
2214 for (unsigned j
= 0; j
!= ElemBytes
; ++j
)
2215 ByteMask
.push_back(M
*ElemBytes
+ j
);
2218 assert(ByteMask
.size() <= 8);
2220 // All non-undef (non-negative) indexes are well within [0..127], so they
2221 // fit in a single byte. Build two 64-bit words:
2222 // - MaskIdx where each byte is the corresponding index (for non-negative
2223 // indexes), and 0xFF for negative indexes, and
2224 // - MaskUnd that has 0xFF for each negative index.
2225 uint64_t MaskIdx
= 0;
2226 uint64_t MaskUnd
= 0;
2227 for (unsigned i
= 0, e
= ByteMask
.size(); i
!= e
; ++i
) {
2229 uint64_t M
= ByteMask
[i
] & 0xFF;
2235 if (ByteMask
.size() == 4) {
2237 if (MaskIdx
== (0x03020100 | MaskUnd
))
2240 if (MaskIdx
== (0x00010203 | MaskUnd
)) {
2241 SDValue T0
= DAG
.getBitcast(MVT::i32
, Op0
);
2242 SDValue T1
= DAG
.getNode(ISD::BSWAP
, dl
, MVT::i32
, T0
);
2243 return DAG
.getBitcast(VecTy
, T1
);
2247 SDValue Concat10
= DAG
.getNode(HexagonISD::COMBINE
, dl
,
2248 typeJoin({ty(Op1
), ty(Op0
)}), {Op1
, Op0
});
2249 if (MaskIdx
== (0x06040200 | MaskUnd
))
2250 return getInstr(Hexagon::S2_vtrunehb
, dl
, VecTy
, {Concat10
}, DAG
);
2251 if (MaskIdx
== (0x07050301 | MaskUnd
))
2252 return getInstr(Hexagon::S2_vtrunohb
, dl
, VecTy
, {Concat10
}, DAG
);
2254 SDValue Concat01
= DAG
.getNode(HexagonISD::COMBINE
, dl
,
2255 typeJoin({ty(Op0
), ty(Op1
)}), {Op0
, Op1
});
2256 if (MaskIdx
== (0x02000604 | MaskUnd
))
2257 return getInstr(Hexagon::S2_vtrunehb
, dl
, VecTy
, {Concat01
}, DAG
);
2258 if (MaskIdx
== (0x03010705 | MaskUnd
))
2259 return getInstr(Hexagon::S2_vtrunohb
, dl
, VecTy
, {Concat01
}, DAG
);
2262 if (ByteMask
.size() == 8) {
2264 if (MaskIdx
== (0x0706050403020100ull
| MaskUnd
))
2267 if (MaskIdx
== (0x0001020304050607ull
| MaskUnd
)) {
2268 SDValue T0
= DAG
.getBitcast(MVT::i64
, Op0
);
2269 SDValue T1
= DAG
.getNode(ISD::BSWAP
, dl
, MVT::i64
, T0
);
2270 return DAG
.getBitcast(VecTy
, T1
);
2274 if (MaskIdx
== (0x0d0c050409080100ull
| MaskUnd
))
2275 return getInstr(Hexagon::S2_shuffeh
, dl
, VecTy
, {Op1
, Op0
}, DAG
);
2276 if (MaskIdx
== (0x0f0e07060b0a0302ull
| MaskUnd
))
2277 return getInstr(Hexagon::S2_shuffoh
, dl
, VecTy
, {Op1
, Op0
}, DAG
);
2278 if (MaskIdx
== (0x0d0c090805040100ull
| MaskUnd
))
2279 return getInstr(Hexagon::S2_vtrunewh
, dl
, VecTy
, {Op1
, Op0
}, DAG
);
2280 if (MaskIdx
== (0x0f0e0b0a07060302ull
| MaskUnd
))
2281 return getInstr(Hexagon::S2_vtrunowh
, dl
, VecTy
, {Op1
, Op0
}, DAG
);
2282 if (MaskIdx
== (0x0706030205040100ull
| MaskUnd
)) {
2283 VectorPair P
= opSplit(Op0
, dl
, DAG
);
2284 return getInstr(Hexagon::S2_packhl
, dl
, VecTy
, {P
.second
, P
.first
}, DAG
);
2288 if (MaskIdx
== (0x0e060c040a020800ull
| MaskUnd
))
2289 return getInstr(Hexagon::S2_shuffeb
, dl
, VecTy
, {Op1
, Op0
}, DAG
);
2290 if (MaskIdx
== (0x0f070d050b030901ull
| MaskUnd
))
2291 return getInstr(Hexagon::S2_shuffob
, dl
, VecTy
, {Op1
, Op0
}, DAG
);
2297 // Create a Hexagon-specific node for shifting a vector by an integer.
2299 HexagonTargetLowering::getVectorShiftByInt(SDValue Op
, SelectionDAG
&DAG
)
2302 switch (Op
.getOpcode()) {
2304 NewOpc
= HexagonISD::VASL
;
2307 NewOpc
= HexagonISD::VASR
;
2310 NewOpc
= HexagonISD::VLSR
;
2313 llvm_unreachable("Unexpected shift opcode");
2316 SDValue Op0
= Op
.getOperand(0);
2317 SDValue Op1
= Op
.getOperand(1);
2318 const SDLoc
&dl(Op
);
2320 switch (Op1
.getOpcode()) {
2321 case ISD::BUILD_VECTOR
:
2322 if (SDValue S
= cast
<BuildVectorSDNode
>(Op1
)->getSplatValue())
2323 return DAG
.getNode(NewOpc
, dl
, ty(Op
), Op0
, S
);
2325 case ISD::SPLAT_VECTOR
:
2326 return DAG
.getNode(NewOpc
, dl
, ty(Op
), Op0
, Op1
.getOperand(0));
2332 HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op
, SelectionDAG
&DAG
) const {
2333 return getVectorShiftByInt(Op
, DAG
);
2337 HexagonTargetLowering::LowerROTL(SDValue Op
, SelectionDAG
&DAG
) const {
2338 if (isa
<ConstantSDNode
>(Op
.getOperand(1).getNode()))
2344 HexagonTargetLowering::LowerBITCAST(SDValue Op
, SelectionDAG
&DAG
) const {
2346 SDValue InpV
= Op
.getOperand(0);
2347 MVT InpTy
= ty(InpV
);
2348 assert(ResTy
.getSizeInBits() == InpTy
.getSizeInBits());
2349 const SDLoc
&dl(Op
);
2351 // Handle conversion from i8 to v8i1.
2352 if (InpTy
== MVT::i8
) {
2353 if (ResTy
== MVT::v8i1
) {
2354 SDValue Sc
= DAG
.getBitcast(tyScalar(InpTy
), InpV
);
2355 SDValue Ext
= DAG
.getZExtOrTrunc(Sc
, dl
, MVT::i32
);
2356 return getInstr(Hexagon::C2_tfrrp
, dl
, ResTy
, Ext
, DAG
);
2365 HexagonTargetLowering::getBuildVectorConstInts(ArrayRef
<SDValue
> Values
,
2366 MVT VecTy
, SelectionDAG
&DAG
,
2367 MutableArrayRef
<ConstantInt
*> Consts
) const {
2368 MVT ElemTy
= VecTy
.getVectorElementType();
2369 unsigned ElemWidth
= ElemTy
.getSizeInBits();
2370 IntegerType
*IntTy
= IntegerType::get(*DAG
.getContext(), ElemWidth
);
2371 bool AllConst
= true;
2373 for (unsigned i
= 0, e
= Values
.size(); i
!= e
; ++i
) {
2374 SDValue V
= Values
[i
];
2376 Consts
[i
] = ConstantInt::get(IntTy
, 0);
2379 // Make sure to always cast to IntTy.
2380 if (auto *CN
= dyn_cast
<ConstantSDNode
>(V
.getNode())) {
2381 const ConstantInt
*CI
= CN
->getConstantIntValue();
2382 Consts
[i
] = ConstantInt::get(IntTy
, CI
->getValue().getSExtValue());
2383 } else if (auto *CN
= dyn_cast
<ConstantFPSDNode
>(V
.getNode())) {
2384 const ConstantFP
*CF
= CN
->getConstantFPValue();
2385 APInt A
= CF
->getValueAPF().bitcastToAPInt();
2386 Consts
[i
] = ConstantInt::get(IntTy
, A
.getZExtValue());
2395 HexagonTargetLowering::buildVector32(ArrayRef
<SDValue
> Elem
, const SDLoc
&dl
,
2396 MVT VecTy
, SelectionDAG
&DAG
) const {
2397 MVT ElemTy
= VecTy
.getVectorElementType();
2398 assert(VecTy
.getVectorNumElements() == Elem
.size());
2400 SmallVector
<ConstantInt
*,4> Consts(Elem
.size());
2401 bool AllConst
= getBuildVectorConstInts(Elem
, VecTy
, DAG
, Consts
);
2403 unsigned First
, Num
= Elem
.size();
2404 for (First
= 0; First
!= Num
; ++First
) {
2405 if (!isUndef(Elem
[First
]))
2409 return DAG
.getUNDEF(VecTy
);
2412 llvm::all_of(Consts
, [](ConstantInt
*CI
) { return CI
->isZero(); }))
2413 return getZero(dl
, VecTy
, DAG
);
2415 if (ElemTy
== MVT::i16
) {
2416 assert(Elem
.size() == 2);
2418 uint32_t V
= (Consts
[0]->getZExtValue() & 0xFFFF) |
2419 Consts
[1]->getZExtValue() << 16;
2420 return DAG
.getBitcast(MVT::v2i16
, DAG
.getConstant(V
, dl
, MVT::i32
));
2422 SDValue N
= getInstr(Hexagon::A2_combine_ll
, dl
, MVT::i32
,
2423 {Elem
[1], Elem
[0]}, DAG
);
2424 return DAG
.getBitcast(MVT::v2i16
, N
);
2427 if (ElemTy
== MVT::i8
) {
2428 // First try generating a constant.
2430 int32_t V
= (Consts
[0]->getZExtValue() & 0xFF) |
2431 (Consts
[1]->getZExtValue() & 0xFF) << 8 |
2432 (Consts
[1]->getZExtValue() & 0xFF) << 16 |
2433 Consts
[2]->getZExtValue() << 24;
2434 return DAG
.getBitcast(MVT::v4i8
, DAG
.getConstant(V
, dl
, MVT::i32
));
2438 bool IsSplat
= true;
2439 for (unsigned i
= First
+1; i
!= Num
; ++i
) {
2440 if (Elem
[i
] == Elem
[First
] || isUndef(Elem
[i
]))
2446 // Legalize the operand of SPLAT_VECTOR.
2447 SDValue Ext
= DAG
.getZExtOrTrunc(Elem
[First
], dl
, MVT::i32
);
2448 return DAG
.getNode(ISD::SPLAT_VECTOR
, dl
, VecTy
, Ext
);
2452 // (zxtb(Elem[0]) | (zxtb(Elem[1]) << 8)) |
2453 // (zxtb(Elem[2]) | (zxtb(Elem[3]) << 8)) << 16
2454 assert(Elem
.size() == 4);
2456 for (unsigned i
= 0; i
!= 4; ++i
) {
2457 Vs
[i
] = DAG
.getZExtOrTrunc(Elem
[i
], dl
, MVT::i32
);
2458 Vs
[i
] = DAG
.getZeroExtendInReg(Vs
[i
], dl
, MVT::i8
);
2460 SDValue S8
= DAG
.getConstant(8, dl
, MVT::i32
);
2461 SDValue T0
= DAG
.getNode(ISD::SHL
, dl
, MVT::i32
, {Vs
[1], S8
});
2462 SDValue T1
= DAG
.getNode(ISD::SHL
, dl
, MVT::i32
, {Vs
[3], S8
});
2463 SDValue B0
= DAG
.getNode(ISD::OR
, dl
, MVT::i32
, {Vs
[0], T0
});
2464 SDValue B1
= DAG
.getNode(ISD::OR
, dl
, MVT::i32
, {Vs
[2], T1
});
2466 SDValue R
= getInstr(Hexagon::A2_combine_ll
, dl
, MVT::i32
, {B1
, B0
}, DAG
);
2467 return DAG
.getBitcast(MVT::v4i8
, R
);
2471 dbgs() << "VecTy: " << EVT(VecTy
).getEVTString() << '\n';
2473 llvm_unreachable("Unexpected vector element type");
2477 HexagonTargetLowering::buildVector64(ArrayRef
<SDValue
> Elem
, const SDLoc
&dl
,
2478 MVT VecTy
, SelectionDAG
&DAG
) const {
2479 MVT ElemTy
= VecTy
.getVectorElementType();
2480 assert(VecTy
.getVectorNumElements() == Elem
.size());
2482 SmallVector
<ConstantInt
*,8> Consts(Elem
.size());
2483 bool AllConst
= getBuildVectorConstInts(Elem
, VecTy
, DAG
, Consts
);
2485 unsigned First
, Num
= Elem
.size();
2486 for (First
= 0; First
!= Num
; ++First
) {
2487 if (!isUndef(Elem
[First
]))
2491 return DAG
.getUNDEF(VecTy
);
2494 llvm::all_of(Consts
, [](ConstantInt
*CI
) { return CI
->isZero(); }))
2495 return getZero(dl
, VecTy
, DAG
);
2497 // First try splat if possible.
2498 if (ElemTy
== MVT::i16
) {
2499 bool IsSplat
= true;
2500 for (unsigned i
= First
+1; i
!= Num
; ++i
) {
2501 if (Elem
[i
] == Elem
[First
] || isUndef(Elem
[i
]))
2507 // Legalize the operand of SPLAT_VECTOR
2508 SDValue Ext
= DAG
.getZExtOrTrunc(Elem
[First
], dl
, MVT::i32
);
2509 return DAG
.getNode(ISD::SPLAT_VECTOR
, dl
, VecTy
, Ext
);
2513 // Then try constant.
2516 unsigned W
= ElemTy
.getSizeInBits();
2517 uint64_t Mask
= (ElemTy
== MVT::i8
) ? 0xFFull
2518 : (ElemTy
== MVT::i16
) ? 0xFFFFull
: 0xFFFFFFFFull
;
2519 for (unsigned i
= 0; i
!= Num
; ++i
)
2520 Val
= (Val
<< W
) | (Consts
[Num
-1-i
]->getZExtValue() & Mask
);
2521 SDValue V0
= DAG
.getConstant(Val
, dl
, MVT::i64
);
2522 return DAG
.getBitcast(VecTy
, V0
);
2525 // Build two 32-bit vectors and concatenate.
2526 MVT HalfTy
= MVT::getVectorVT(ElemTy
, Num
/2);
2527 SDValue L
= (ElemTy
== MVT::i32
)
2529 : buildVector32(Elem
.take_front(Num
/2), dl
, HalfTy
, DAG
);
2530 SDValue H
= (ElemTy
== MVT::i32
)
2532 : buildVector32(Elem
.drop_front(Num
/2), dl
, HalfTy
, DAG
);
2533 return DAG
.getNode(HexagonISD::COMBINE
, dl
, VecTy
, {H
, L
});
2537 HexagonTargetLowering::extractVector(SDValue VecV
, SDValue IdxV
,
2538 const SDLoc
&dl
, MVT ValTy
, MVT ResTy
,
2539 SelectionDAG
&DAG
) const {
2540 MVT VecTy
= ty(VecV
);
2541 assert(!ValTy
.isVector() ||
2542 VecTy
.getVectorElementType() == ValTy
.getVectorElementType());
2543 unsigned VecWidth
= VecTy
.getSizeInBits();
2544 unsigned ValWidth
= ValTy
.getSizeInBits();
2545 unsigned ElemWidth
= VecTy
.getVectorElementType().getSizeInBits();
2546 assert((VecWidth
% ElemWidth
) == 0);
2547 auto *IdxN
= dyn_cast
<ConstantSDNode
>(IdxV
);
2549 // Special case for v{8,4,2}i1 (the only boolean vectors legal in Hexagon
2550 // without any coprocessors).
2551 if (ElemWidth
== 1) {
2552 assert(VecWidth
== VecTy
.getVectorNumElements() && "Sanity failure");
2553 assert(VecWidth
== 8 || VecWidth
== 4 || VecWidth
== 2);
2554 // Check if this is an extract of the lowest bit.
2556 // Extracting the lowest bit is a no-op, but it changes the type,
2557 // so it must be kept as an operation to avoid errors related to
2559 if (IdxN
->isNullValue() && ValTy
.getSizeInBits() == 1)
2560 return DAG
.getNode(HexagonISD::TYPECAST
, dl
, MVT::i1
, VecV
);
2563 // If the value extracted is a single bit, use tstbit.
2564 if (ValWidth
== 1) {
2565 SDValue A0
= getInstr(Hexagon::C2_tfrpr
, dl
, MVT::i32
, {VecV
}, DAG
);
2566 SDValue M0
= DAG
.getConstant(8 / VecWidth
, dl
, MVT::i32
);
2567 SDValue I0
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
, M0
);
2568 return DAG
.getNode(HexagonISD::TSTBIT
, dl
, MVT::i1
, A0
, I0
);
2571 // Each bool vector (v2i1, v4i1, v8i1) always occupies 8 bits in
2572 // a predicate register. The elements of the vector are repeated
2573 // in the register (if necessary) so that the total number is 8.
2574 // The extracted subvector will need to be expanded in such a way.
2575 unsigned Scale
= VecWidth
/ ValWidth
;
2577 // Generate (p2d VecV) >> 8*Idx to move the interesting bytes to
2579 assert(ty(IdxV
) == MVT::i32
);
2580 unsigned VecRep
= 8 / VecWidth
;
2581 SDValue S0
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
,
2582 DAG
.getConstant(8*VecRep
, dl
, MVT::i32
));
2583 SDValue T0
= DAG
.getNode(HexagonISD::P2D
, dl
, MVT::i64
, VecV
);
2584 SDValue T1
= DAG
.getNode(ISD::SRL
, dl
, MVT::i64
, T0
, S0
);
2586 // The longest possible subvector is at most 32 bits, so it is always
2587 // contained in the low subregister.
2588 T1
= DAG
.getTargetExtractSubreg(Hexagon::isub_lo
, dl
, MVT::i32
, T1
);
2589 T1
= expandPredicate(T1
, dl
, DAG
);
2593 return DAG
.getNode(HexagonISD::D2P
, dl
, ResTy
, T1
);
2596 assert(VecWidth
== 32 || VecWidth
== 64);
2598 // Cast everything to scalar integer types.
2599 MVT ScalarTy
= tyScalar(VecTy
);
2600 VecV
= DAG
.getBitcast(ScalarTy
, VecV
);
2602 SDValue WidthV
= DAG
.getConstant(ValWidth
, dl
, MVT::i32
);
2606 unsigned Off
= IdxN
->getZExtValue() * ElemWidth
;
2607 if (VecWidth
== 64 && ValWidth
== 32) {
2608 assert(Off
== 0 || Off
== 32);
2609 unsigned SubIdx
= Off
== 0 ? Hexagon::isub_lo
: Hexagon::isub_hi
;
2610 ExtV
= DAG
.getTargetExtractSubreg(SubIdx
, dl
, MVT::i32
, VecV
);
2611 } else if (Off
== 0 && (ValWidth
% 8) == 0) {
2612 ExtV
= DAG
.getZeroExtendInReg(VecV
, dl
, tyScalar(ValTy
));
2614 SDValue OffV
= DAG
.getConstant(Off
, dl
, MVT::i32
);
2615 // The return type of EXTRACTU must be the same as the type of the
2617 ExtV
= DAG
.getNode(HexagonISD::EXTRACTU
, dl
, ScalarTy
,
2618 {VecV
, WidthV
, OffV
});
2621 if (ty(IdxV
) != MVT::i32
)
2622 IdxV
= DAG
.getZExtOrTrunc(IdxV
, dl
, MVT::i32
);
2623 SDValue OffV
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
,
2624 DAG
.getConstant(ElemWidth
, dl
, MVT::i32
));
2625 ExtV
= DAG
.getNode(HexagonISD::EXTRACTU
, dl
, ScalarTy
,
2626 {VecV
, WidthV
, OffV
});
2629 // Cast ExtV to the requested result type.
2630 ExtV
= DAG
.getZExtOrTrunc(ExtV
, dl
, tyScalar(ResTy
));
2631 ExtV
= DAG
.getBitcast(ResTy
, ExtV
);
2636 HexagonTargetLowering::insertVector(SDValue VecV
, SDValue ValV
, SDValue IdxV
,
2637 const SDLoc
&dl
, MVT ValTy
,
2638 SelectionDAG
&DAG
) const {
2639 MVT VecTy
= ty(VecV
);
2640 if (VecTy
.getVectorElementType() == MVT::i1
) {
2641 MVT ValTy
= ty(ValV
);
2642 assert(ValTy
.getVectorElementType() == MVT::i1
);
2643 SDValue ValR
= DAG
.getNode(HexagonISD::P2D
, dl
, MVT::i64
, ValV
);
2644 unsigned VecLen
= VecTy
.getVectorNumElements();
2645 unsigned Scale
= VecLen
/ ValTy
.getVectorNumElements();
2648 for (unsigned R
= Scale
; R
> 1; R
/= 2) {
2649 ValR
= contractPredicate(ValR
, dl
, DAG
);
2650 ValR
= DAG
.getNode(HexagonISD::COMBINE
, dl
, MVT::i64
,
2651 DAG
.getUNDEF(MVT::i32
), ValR
);
2653 // The longest possible subvector is at most 32 bits, so it is always
2654 // contained in the low subregister.
2655 ValR
= DAG
.getTargetExtractSubreg(Hexagon::isub_lo
, dl
, MVT::i32
, ValR
);
2657 unsigned ValBytes
= 64 / Scale
;
2658 SDValue Width
= DAG
.getConstant(ValBytes
*8, dl
, MVT::i32
);
2659 SDValue Idx
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
,
2660 DAG
.getConstant(8, dl
, MVT::i32
));
2661 SDValue VecR
= DAG
.getNode(HexagonISD::P2D
, dl
, MVT::i64
, VecV
);
2662 SDValue Ins
= DAG
.getNode(HexagonISD::INSERT
, dl
, MVT::i32
,
2663 {VecR
, ValR
, Width
, Idx
});
2664 return DAG
.getNode(HexagonISD::D2P
, dl
, VecTy
, Ins
);
2667 unsigned VecWidth
= VecTy
.getSizeInBits();
2668 unsigned ValWidth
= ValTy
.getSizeInBits();
2669 assert(VecWidth
== 32 || VecWidth
== 64);
2670 assert((VecWidth
% ValWidth
) == 0);
2672 // Cast everything to scalar integer types.
2673 MVT ScalarTy
= MVT::getIntegerVT(VecWidth
);
2674 // The actual type of ValV may be different than ValTy (which is related
2675 // to the vector type).
2676 unsigned VW
= ty(ValV
).getSizeInBits();
2677 ValV
= DAG
.getBitcast(MVT::getIntegerVT(VW
), ValV
);
2678 VecV
= DAG
.getBitcast(ScalarTy
, VecV
);
2680 ValV
= DAG
.getAnyExtOrTrunc(ValV
, dl
, ScalarTy
);
2682 SDValue WidthV
= DAG
.getConstant(ValWidth
, dl
, MVT::i32
);
2685 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(IdxV
)) {
2686 unsigned W
= C
->getZExtValue() * ValWidth
;
2687 SDValue OffV
= DAG
.getConstant(W
, dl
, MVT::i32
);
2688 InsV
= DAG
.getNode(HexagonISD::INSERT
, dl
, ScalarTy
,
2689 {VecV
, ValV
, WidthV
, OffV
});
2691 if (ty(IdxV
) != MVT::i32
)
2692 IdxV
= DAG
.getZExtOrTrunc(IdxV
, dl
, MVT::i32
);
2693 SDValue OffV
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
, WidthV
);
2694 InsV
= DAG
.getNode(HexagonISD::INSERT
, dl
, ScalarTy
,
2695 {VecV
, ValV
, WidthV
, OffV
});
2698 return DAG
.getNode(ISD::BITCAST
, dl
, VecTy
, InsV
);
2702 HexagonTargetLowering::expandPredicate(SDValue Vec32
, const SDLoc
&dl
,
2703 SelectionDAG
&DAG
) const {
2704 assert(ty(Vec32
).getSizeInBits() == 32);
2706 return DAG
.getUNDEF(MVT::i64
);
2707 return getInstr(Hexagon::S2_vsxtbh
, dl
, MVT::i64
, {Vec32
}, DAG
);
2711 HexagonTargetLowering::contractPredicate(SDValue Vec64
, const SDLoc
&dl
,
2712 SelectionDAG
&DAG
) const {
2713 assert(ty(Vec64
).getSizeInBits() == 64);
2715 return DAG
.getUNDEF(MVT::i32
);
2716 return getInstr(Hexagon::S2_vtrunehb
, dl
, MVT::i32
, {Vec64
}, DAG
);
2720 HexagonTargetLowering::getZero(const SDLoc
&dl
, MVT Ty
, SelectionDAG
&DAG
)
2722 if (Ty
.isVector()) {
2723 assert(Ty
.isInteger() && "Only integer vectors are supported here");
2724 unsigned W
= Ty
.getSizeInBits();
2726 return DAG
.getBitcast(Ty
, DAG
.getConstant(0, dl
, MVT::getIntegerVT(W
)));
2727 return DAG
.getNode(ISD::SPLAT_VECTOR
, dl
, Ty
, getZero(dl
, MVT::i32
, DAG
));
2731 return DAG
.getConstant(0, dl
, Ty
);
2732 if (Ty
.isFloatingPoint())
2733 return DAG
.getConstantFP(0.0, dl
, Ty
);
2734 llvm_unreachable("Invalid type for zero");
2738 HexagonTargetLowering::appendUndef(SDValue Val
, MVT ResTy
, SelectionDAG
&DAG
)
2740 MVT ValTy
= ty(Val
);
2741 assert(ValTy
.getVectorElementType() == ResTy
.getVectorElementType());
2743 unsigned ValLen
= ValTy
.getVectorNumElements();
2744 unsigned ResLen
= ResTy
.getVectorNumElements();
2745 if (ValLen
== ResLen
)
2748 const SDLoc
&dl(Val
);
2749 assert(ValLen
< ResLen
);
2750 assert(ResLen
% ValLen
== 0);
2752 SmallVector
<SDValue
, 4> Concats
= {Val
};
2753 for (unsigned i
= 1, e
= ResLen
/ ValLen
; i
< e
; ++i
)
2754 Concats
.push_back(DAG
.getUNDEF(ValTy
));
2756 return DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, ResTy
, Concats
);
2760 HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op
, SelectionDAG
&DAG
) const {
2762 unsigned BW
= VecTy
.getSizeInBits();
2763 const SDLoc
&dl(Op
);
2764 SmallVector
<SDValue
,8> Ops
;
2765 for (unsigned i
= 0, e
= Op
.getNumOperands(); i
!= e
; ++i
)
2766 Ops
.push_back(Op
.getOperand(i
));
2769 return buildVector32(Ops
, dl
, VecTy
, DAG
);
2771 return buildVector64(Ops
, dl
, VecTy
, DAG
);
2773 if (VecTy
== MVT::v8i1
|| VecTy
== MVT::v4i1
|| VecTy
== MVT::v2i1
) {
2774 // Check if this is a special case or all-0 or all-1.
2775 bool All0
= true, All1
= true;
2776 for (SDValue P
: Ops
) {
2777 auto *CN
= dyn_cast
<ConstantSDNode
>(P
.getNode());
2778 if (CN
== nullptr) {
2779 All0
= All1
= false;
2782 uint32_t C
= CN
->getZExtValue();
2787 return DAG
.getNode(HexagonISD::PFALSE
, dl
, VecTy
);
2789 return DAG
.getNode(HexagonISD::PTRUE
, dl
, VecTy
);
2791 // For each i1 element in the resulting predicate register, put 1
2792 // shifted by the index of the element into a general-purpose register,
2793 // then or them together and transfer it back into a predicate register.
2795 SDValue Z
= getZero(dl
, MVT::i32
, DAG
);
2796 // Always produce 8 bits, repeat inputs if necessary.
2797 unsigned Rep
= 8 / VecTy
.getVectorNumElements();
2798 for (unsigned i
= 0; i
!= 8; ++i
) {
2799 SDValue S
= DAG
.getConstant(1ull << i
, dl
, MVT::i32
);
2800 Rs
[i
] = DAG
.getSelect(dl
, MVT::i32
, Ops
[i
/Rep
], S
, Z
);
2802 for (ArrayRef
<SDValue
> A(Rs
); A
.size() != 1; A
= A
.drop_back(A
.size()/2)) {
2803 for (unsigned i
= 0, e
= A
.size()/2; i
!= e
; ++i
)
2804 Rs
[i
] = DAG
.getNode(ISD::OR
, dl
, MVT::i32
, Rs
[2*i
], Rs
[2*i
+1]);
2806 // Move the value directly to a predicate register.
2807 return getInstr(Hexagon::C2_tfrrp
, dl
, VecTy
, {Rs
[0]}, DAG
);
2814 HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op
,
2815 SelectionDAG
&DAG
) const {
2817 const SDLoc
&dl(Op
);
2818 if (VecTy
.getSizeInBits() == 64) {
2819 assert(Op
.getNumOperands() == 2);
2820 return DAG
.getNode(HexagonISD::COMBINE
, dl
, VecTy
, Op
.getOperand(1),
2824 MVT ElemTy
= VecTy
.getVectorElementType();
2825 if (ElemTy
== MVT::i1
) {
2826 assert(VecTy
== MVT::v2i1
|| VecTy
== MVT::v4i1
|| VecTy
== MVT::v8i1
);
2827 MVT OpTy
= ty(Op
.getOperand(0));
2828 // Scale is how many times the operands need to be contracted to match
2829 // the representation in the target register.
2830 unsigned Scale
= VecTy
.getVectorNumElements() / OpTy
.getVectorNumElements();
2831 assert(Scale
== Op
.getNumOperands() && Scale
> 1);
2833 // First, convert all bool vectors to integers, then generate pairwise
2834 // inserts to form values of doubled length. Up until there are only
2835 // two values left to concatenate, all of these values will fit in a
2836 // 32-bit integer, so keep them as i32 to use 32-bit inserts.
2837 SmallVector
<SDValue
,4> Words
[2];
2840 for (SDValue P
: Op
.getNode()->op_values()) {
2841 SDValue W
= DAG
.getNode(HexagonISD::P2D
, dl
, MVT::i64
, P
);
2842 for (unsigned R
= Scale
; R
> 1; R
/= 2) {
2843 W
= contractPredicate(W
, dl
, DAG
);
2844 W
= DAG
.getNode(HexagonISD::COMBINE
, dl
, MVT::i64
,
2845 DAG
.getUNDEF(MVT::i32
), W
);
2847 W
= DAG
.getTargetExtractSubreg(Hexagon::isub_lo
, dl
, MVT::i32
, W
);
2848 Words
[IdxW
].push_back(W
);
2852 SDValue WidthV
= DAG
.getConstant(64 / Scale
, dl
, MVT::i32
);
2853 Words
[IdxW
^ 1].clear();
2855 for (unsigned i
= 0, e
= Words
[IdxW
].size(); i
!= e
; i
+= 2) {
2856 SDValue W0
= Words
[IdxW
][i
], W1
= Words
[IdxW
][i
+1];
2857 // Insert W1 into W0 right next to the significant bits of W0.
2858 SDValue T
= DAG
.getNode(HexagonISD::INSERT
, dl
, MVT::i32
,
2859 {W0
, W1
, WidthV
, WidthV
});
2860 Words
[IdxW
^ 1].push_back(T
);
2866 // Another sanity check. At this point there should only be two words
2867 // left, and Scale should be 2.
2868 assert(Scale
== 2 && Words
[IdxW
].size() == 2);
2870 SDValue WW
= DAG
.getNode(HexagonISD::COMBINE
, dl
, MVT::i64
,
2871 Words
[IdxW
][1], Words
[IdxW
][0]);
2872 return DAG
.getNode(HexagonISD::D2P
, dl
, VecTy
, WW
);
2879 HexagonTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op
,
2880 SelectionDAG
&DAG
) const {
2881 SDValue Vec
= Op
.getOperand(0);
2882 MVT ElemTy
= ty(Vec
).getVectorElementType();
2883 return extractVector(Vec
, Op
.getOperand(1), SDLoc(Op
), ElemTy
, ty(Op
), DAG
);
2887 HexagonTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op
,
2888 SelectionDAG
&DAG
) const {
2889 return extractVector(Op
.getOperand(0), Op
.getOperand(1), SDLoc(Op
),
2890 ty(Op
), ty(Op
), DAG
);
2894 HexagonTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op
,
2895 SelectionDAG
&DAG
) const {
2896 return insertVector(Op
.getOperand(0), Op
.getOperand(1), Op
.getOperand(2),
2897 SDLoc(Op
), ty(Op
).getVectorElementType(), DAG
);
2901 HexagonTargetLowering::LowerINSERT_SUBVECTOR(SDValue Op
,
2902 SelectionDAG
&DAG
) const {
2903 SDValue ValV
= Op
.getOperand(1);
2904 return insertVector(Op
.getOperand(0), ValV
, Op
.getOperand(2),
2905 SDLoc(Op
), ty(ValV
), DAG
);
2909 HexagonTargetLowering::allowTruncateForTailCall(Type
*Ty1
, Type
*Ty2
) const {
2910 // Assuming the caller does not have either a signext or zeroext modifier, and
2911 // only one value is accepted, any reasonable truncation is allowed.
2912 if (!Ty1
->isIntegerTy() || !Ty2
->isIntegerTy())
2915 // FIXME: in principle up to 64-bit could be made safe, but it would be very
2916 // fragile at the moment: any support for multiple value returns would be
2917 // liable to disallow tail calls involving i64 -> iN truncation in many cases.
2918 return Ty1
->getPrimitiveSizeInBits() <= 32;
2922 HexagonTargetLowering::LowerLoad(SDValue Op
, SelectionDAG
&DAG
) const {
2924 const SDLoc
&dl(Op
);
2925 // Lower loads of scalar predicate vectors (v2i1, v4i1, v8i1) to loads of i1
2926 // followed by a TYPECAST.
2927 LoadSDNode
*LN
= cast
<LoadSDNode
>(Op
.getNode());
2928 bool DoCast
= (Ty
== MVT::v2i1
|| Ty
== MVT::v4i1
|| Ty
== MVT::v8i1
);
2930 SDValue NL
= DAG
.getLoad(
2931 LN
->getAddressingMode(), LN
->getExtensionType(), MVT::i1
, dl
,
2932 LN
->getChain(), LN
->getBasePtr(), LN
->getOffset(), LN
->getPointerInfo(),
2933 /*MemoryVT*/ MVT::i1
, LN
->getAlign(), LN
->getMemOperand()->getFlags(),
2934 LN
->getAAInfo(), LN
->getRanges());
2935 LN
= cast
<LoadSDNode
>(NL
.getNode());
2938 Align ClaimAlign
= LN
->getAlign();
2939 if (!validateConstPtrAlignment(LN
->getBasePtr(), ClaimAlign
, dl
, DAG
))
2940 return replaceMemWithUndef(Op
, DAG
);
2942 // Call LowerUnalignedLoad for all loads, it recognizes loads that
2943 // don't need extra aligning.
2944 SDValue LU
= LowerUnalignedLoad(SDValue(LN
, 0), DAG
);
2946 SDValue TC
= DAG
.getNode(HexagonISD::TYPECAST
, dl
, Ty
, LU
);
2947 SDValue Ch
= cast
<LoadSDNode
>(LU
.getNode())->getChain();
2948 return DAG
.getMergeValues({TC
, Ch
}, dl
);
2954 HexagonTargetLowering::LowerStore(SDValue Op
, SelectionDAG
&DAG
) const {
2955 const SDLoc
&dl(Op
);
2956 StoreSDNode
*SN
= cast
<StoreSDNode
>(Op
.getNode());
2957 SDValue Val
= SN
->getValue();
2960 bool DoCast
= (Ty
== MVT::v2i1
|| Ty
== MVT::v4i1
|| Ty
== MVT::v8i1
);
2962 SDValue TC
= DAG
.getNode(HexagonISD::TYPECAST
, dl
, MVT::i1
, Val
);
2963 SDValue NS
= DAG
.getStore(SN
->getChain(), dl
, TC
, SN
->getBasePtr(),
2964 SN
->getMemOperand());
2965 if (SN
->isIndexed()) {
2966 NS
= DAG
.getIndexedStore(NS
, dl
, SN
->getBasePtr(), SN
->getOffset(),
2967 SN
->getAddressingMode());
2969 SN
= cast
<StoreSDNode
>(NS
.getNode());
2972 Align ClaimAlign
= SN
->getAlign();
2973 if (!validateConstPtrAlignment(SN
->getBasePtr(), ClaimAlign
, dl
, DAG
))
2974 return replaceMemWithUndef(Op
, DAG
);
2976 MVT StoreTy
= SN
->getMemoryVT().getSimpleVT();
2977 Align NeedAlign
= Subtarget
.getTypeAlignment(StoreTy
);
2978 if (ClaimAlign
< NeedAlign
)
2979 return expandUnalignedStore(SN
, DAG
);
2980 return SDValue(SN
, 0);
2984 HexagonTargetLowering::LowerUnalignedLoad(SDValue Op
, SelectionDAG
&DAG
)
2986 LoadSDNode
*LN
= cast
<LoadSDNode
>(Op
.getNode());
2987 MVT LoadTy
= ty(Op
);
2988 unsigned NeedAlign
= Subtarget
.getTypeAlignment(LoadTy
).value();
2989 unsigned HaveAlign
= LN
->getAlign().value();
2990 if (HaveAlign
>= NeedAlign
)
2993 const SDLoc
&dl(Op
);
2994 const DataLayout
&DL
= DAG
.getDataLayout();
2995 LLVMContext
&Ctx
= *DAG
.getContext();
2997 // If the load aligning is disabled or the load can be broken up into two
2998 // smaller legal loads, do the default (target-independent) expansion.
2999 bool DoDefault
= false;
3000 // Handle it in the default way if this is an indexed load.
3001 if (!LN
->isUnindexed())
3005 if (allowsMemoryAccessForAlignment(Ctx
, DL
, LN
->getMemoryVT(),
3006 *LN
->getMemOperand()))
3010 if (!DoDefault
&& (2 * HaveAlign
) == NeedAlign
) {
3011 // The PartTy is the equivalent of "getLoadableTypeOfSize(HaveAlign)".
3012 MVT PartTy
= HaveAlign
<= 8 ? MVT::getIntegerVT(8 * HaveAlign
)
3013 : MVT::getVectorVT(MVT::i8
, HaveAlign
);
3015 allowsMemoryAccessForAlignment(Ctx
, DL
, PartTy
, *LN
->getMemOperand());
3018 std::pair
<SDValue
, SDValue
> P
= expandUnalignedLoad(LN
, DAG
);
3019 return DAG
.getMergeValues({P
.first
, P
.second
}, dl
);
3022 // The code below generates two loads, both aligned as NeedAlign, and
3023 // with the distance of NeedAlign between them. For that to cover the
3024 // bits that need to be loaded (and without overlapping), the size of
3025 // the loads should be equal to NeedAlign. This is true for all loadable
3026 // types, but add an assertion in case something changes in the future.
3027 assert(LoadTy
.getSizeInBits() == 8*NeedAlign
);
3029 unsigned LoadLen
= NeedAlign
;
3030 SDValue Base
= LN
->getBasePtr();
3031 SDValue Chain
= LN
->getChain();
3032 auto BO
= getBaseAndOffset(Base
);
3033 unsigned BaseOpc
= BO
.first
.getOpcode();
3034 if (BaseOpc
== HexagonISD::VALIGNADDR
&& BO
.second
% LoadLen
== 0)
3037 if (BO
.second
% LoadLen
!= 0) {
3038 BO
.first
= DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, BO
.first
,
3039 DAG
.getConstant(BO
.second
% LoadLen
, dl
, MVT::i32
));
3040 BO
.second
-= BO
.second
% LoadLen
;
3042 SDValue BaseNoOff
= (BaseOpc
!= HexagonISD::VALIGNADDR
)
3043 ? DAG
.getNode(HexagonISD::VALIGNADDR
, dl
, MVT::i32
, BO
.first
,
3044 DAG
.getConstant(NeedAlign
, dl
, MVT::i32
))
3047 DAG
.getMemBasePlusOffset(BaseNoOff
, TypeSize::Fixed(BO
.second
), dl
);
3048 SDValue Base1
= DAG
.getMemBasePlusOffset(
3049 BaseNoOff
, TypeSize::Fixed(BO
.second
+ LoadLen
), dl
);
3051 MachineMemOperand
*WideMMO
= nullptr;
3052 if (MachineMemOperand
*MMO
= LN
->getMemOperand()) {
3053 MachineFunction
&MF
= DAG
.getMachineFunction();
3054 WideMMO
= MF
.getMachineMemOperand(
3055 MMO
->getPointerInfo(), MMO
->getFlags(), 2 * LoadLen
, Align(LoadLen
),
3056 MMO
->getAAInfo(), MMO
->getRanges(), MMO
->getSyncScopeID(),
3057 MMO
->getSuccessOrdering(), MMO
->getFailureOrdering());
3060 SDValue Load0
= DAG
.getLoad(LoadTy
, dl
, Chain
, Base0
, WideMMO
);
3061 SDValue Load1
= DAG
.getLoad(LoadTy
, dl
, Chain
, Base1
, WideMMO
);
3063 SDValue Aligned
= DAG
.getNode(HexagonISD::VALIGN
, dl
, LoadTy
,
3064 {Load1
, Load0
, BaseNoOff
.getOperand(0)});
3065 SDValue NewChain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
3066 Load0
.getValue(1), Load1
.getValue(1));
3067 SDValue M
= DAG
.getMergeValues({Aligned
, NewChain
}, dl
);
3072 HexagonTargetLowering::LowerUAddSubO(SDValue Op
, SelectionDAG
&DAG
) const {
3073 SDValue X
= Op
.getOperand(0), Y
= Op
.getOperand(1);
3074 auto *CY
= dyn_cast
<ConstantSDNode
>(Y
);
3078 const SDLoc
&dl(Op
);
3079 SDVTList VTs
= Op
.getNode()->getVTList();
3080 assert(VTs
.NumVTs
== 2);
3081 assert(VTs
.VTs
[1] == MVT::i1
);
3082 unsigned Opc
= Op
.getOpcode();
3085 uint32_t VY
= CY
->getZExtValue();
3086 assert(VY
!= 0 && "This should have been folded");
3091 if (Opc
== ISD::UADDO
) {
3092 SDValue Op
= DAG
.getNode(ISD::ADD
, dl
, VTs
.VTs
[0], {X
, Y
});
3093 SDValue Ov
= DAG
.getSetCC(dl
, MVT::i1
, Op
, getZero(dl
, ty(Op
), DAG
),
3095 return DAG
.getMergeValues({Op
, Ov
}, dl
);
3097 if (Opc
== ISD::USUBO
) {
3098 SDValue Op
= DAG
.getNode(ISD::SUB
, dl
, VTs
.VTs
[0], {X
, Y
});
3099 SDValue Ov
= DAG
.getSetCC(dl
, MVT::i1
, Op
,
3100 DAG
.getConstant(-1, dl
, ty(Op
)), ISD::SETEQ
);
3101 return DAG
.getMergeValues({Op
, Ov
}, dl
);
3109 HexagonTargetLowering::LowerAddSubCarry(SDValue Op
, SelectionDAG
&DAG
) const {
3110 const SDLoc
&dl(Op
);
3111 unsigned Opc
= Op
.getOpcode();
3112 SDValue X
= Op
.getOperand(0), Y
= Op
.getOperand(1), C
= Op
.getOperand(2);
3114 if (Opc
== ISD::ADDCARRY
)
3115 return DAG
.getNode(HexagonISD::ADDC
, dl
, Op
.getNode()->getVTList(),
3118 EVT CarryTy
= C
.getValueType();
3119 SDValue SubC
= DAG
.getNode(HexagonISD::SUBC
, dl
, Op
.getNode()->getVTList(),
3120 { X
, Y
, DAG
.getLogicalNOT(dl
, C
, CarryTy
) });
3121 SDValue Out
[] = { SubC
.getValue(0),
3122 DAG
.getLogicalNOT(dl
, SubC
.getValue(1), CarryTy
) };
3123 return DAG
.getMergeValues(Out
, dl
);
3127 HexagonTargetLowering::LowerEH_RETURN(SDValue Op
, SelectionDAG
&DAG
) const {
3128 SDValue Chain
= Op
.getOperand(0);
3129 SDValue Offset
= Op
.getOperand(1);
3130 SDValue Handler
= Op
.getOperand(2);
3132 auto PtrVT
= getPointerTy(DAG
.getDataLayout());
3134 // Mark function as containing a call to EH_RETURN.
3135 HexagonMachineFunctionInfo
*FuncInfo
=
3136 DAG
.getMachineFunction().getInfo
<HexagonMachineFunctionInfo
>();
3137 FuncInfo
->setHasEHReturn();
3139 unsigned OffsetReg
= Hexagon::R28
;
3142 DAG
.getNode(ISD::ADD
, dl
, PtrVT
, DAG
.getRegister(Hexagon::R30
, PtrVT
),
3143 DAG
.getIntPtrConstant(4, dl
));
3144 Chain
= DAG
.getStore(Chain
, dl
, Handler
, StoreAddr
, MachinePointerInfo());
3145 Chain
= DAG
.getCopyToReg(Chain
, dl
, OffsetReg
, Offset
);
3147 // Not needed we already use it as explict input to EH_RETURN.
3148 // MF.getRegInfo().addLiveOut(OffsetReg);
3150 return DAG
.getNode(HexagonISD::EH_RETURN
, dl
, MVT::Other
, Chain
);
3154 HexagonTargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const {
3155 unsigned Opc
= Op
.getOpcode();
3157 // Handle INLINEASM first.
3158 if (Opc
== ISD::INLINEASM
|| Opc
== ISD::INLINEASM_BR
)
3159 return LowerINLINEASM(Op
, DAG
);
3161 if (isHvxOperation(Op
.getNode(), DAG
)) {
3162 // If HVX lowering returns nothing, try the default lowering.
3163 if (SDValue V
= LowerHvxOperation(Op
, DAG
))
3170 Op
.getNode()->dumpr(&DAG
);
3171 if (Opc
> HexagonISD::OP_BEGIN
&& Opc
< HexagonISD::OP_END
)
3172 errs() << "Error: check for a non-legal type in this operation\n";
3174 llvm_unreachable("Should not custom lower this!");
3175 case ISD::CONCAT_VECTORS
: return LowerCONCAT_VECTORS(Op
, DAG
);
3176 case ISD::INSERT_SUBVECTOR
: return LowerINSERT_SUBVECTOR(Op
, DAG
);
3177 case ISD::INSERT_VECTOR_ELT
: return LowerINSERT_VECTOR_ELT(Op
, DAG
);
3178 case ISD::EXTRACT_SUBVECTOR
: return LowerEXTRACT_SUBVECTOR(Op
, DAG
);
3179 case ISD::EXTRACT_VECTOR_ELT
: return LowerEXTRACT_VECTOR_ELT(Op
, DAG
);
3180 case ISD::BUILD_VECTOR
: return LowerBUILD_VECTOR(Op
, DAG
);
3181 case ISD::VECTOR_SHUFFLE
: return LowerVECTOR_SHUFFLE(Op
, DAG
);
3182 case ISD::BITCAST
: return LowerBITCAST(Op
, DAG
);
3183 case ISD::LOAD
: return LowerLoad(Op
, DAG
);
3184 case ISD::STORE
: return LowerStore(Op
, DAG
);
3186 case ISD::USUBO
: return LowerUAddSubO(Op
, DAG
);
3188 case ISD::SUBCARRY
: return LowerAddSubCarry(Op
, DAG
);
3191 case ISD::SRL
: return LowerVECTOR_SHIFT(Op
, DAG
);
3192 case ISD::ROTL
: return LowerROTL(Op
, DAG
);
3193 case ISD::ConstantPool
: return LowerConstantPool(Op
, DAG
);
3194 case ISD::JumpTable
: return LowerJumpTable(Op
, DAG
);
3195 case ISD::EH_RETURN
: return LowerEH_RETURN(Op
, DAG
);
3196 case ISD::RETURNADDR
: return LowerRETURNADDR(Op
, DAG
);
3197 case ISD::FRAMEADDR
: return LowerFRAMEADDR(Op
, DAG
);
3198 case ISD::GlobalTLSAddress
: return LowerGlobalTLSAddress(Op
, DAG
);
3199 case ISD::ATOMIC_FENCE
: return LowerATOMIC_FENCE(Op
, DAG
);
3200 case ISD::GlobalAddress
: return LowerGLOBALADDRESS(Op
, DAG
);
3201 case ISD::BlockAddress
: return LowerBlockAddress(Op
, DAG
);
3202 case ISD::GLOBAL_OFFSET_TABLE
: return LowerGLOBAL_OFFSET_TABLE(Op
, DAG
);
3203 case ISD::VACOPY
: return LowerVACOPY(Op
, DAG
);
3204 case ISD::VASTART
: return LowerVASTART(Op
, DAG
);
3205 case ISD::DYNAMIC_STACKALLOC
: return LowerDYNAMIC_STACKALLOC(Op
, DAG
);
3206 case ISD::SETCC
: return LowerSETCC(Op
, DAG
);
3207 case ISD::VSELECT
: return LowerVSELECT(Op
, DAG
);
3208 case ISD::INTRINSIC_WO_CHAIN
: return LowerINTRINSIC_WO_CHAIN(Op
, DAG
);
3209 case ISD::INTRINSIC_VOID
: return LowerINTRINSIC_VOID(Op
, DAG
);
3210 case ISD::PREFETCH
: return LowerPREFETCH(Op
, DAG
);
3211 case ISD::READCYCLECOUNTER
: return LowerREADCYCLECOUNTER(Op
, DAG
);
3219 HexagonTargetLowering::LowerOperationWrapper(SDNode
*N
,
3220 SmallVectorImpl
<SDValue
> &Results
,
3221 SelectionDAG
&DAG
) const {
3222 if (isHvxOperation(N
, DAG
)) {
3223 LowerHvxOperationWrapper(N
, Results
, DAG
);
3224 if (!Results
.empty())
3228 // We are only custom-lowering stores to verify the alignment of the
3229 // address if it is a compile-time constant. Since a store can be modified
3230 // during type-legalization (the value being stored may need legalization),
3231 // return empty Results here to indicate that we don't really make any
3232 // changes in the custom lowering.
3233 if (N
->getOpcode() != ISD::STORE
)
3234 return TargetLowering::LowerOperationWrapper(N
, Results
, DAG
);
3238 HexagonTargetLowering::ReplaceNodeResults(SDNode
*N
,
3239 SmallVectorImpl
<SDValue
> &Results
,
3240 SelectionDAG
&DAG
) const {
3241 if (isHvxOperation(N
, DAG
)) {
3242 ReplaceHvxNodeResults(N
, Results
, DAG
);
3243 if (!Results
.empty())
3248 switch (N
->getOpcode()) {
3254 // Handle a bitcast from v8i1 to i8.
3255 if (N
->getValueType(0) == MVT::i8
) {
3256 if (N
->getOperand(0).getValueType() == MVT::v8i1
) {
3257 SDValue P
= getInstr(Hexagon::C2_tfrpr
, dl
, MVT::i32
,
3258 N
->getOperand(0), DAG
);
3259 SDValue T
= DAG
.getAnyExtOrTrunc(P
, dl
, MVT::i8
);
3260 Results
.push_back(T
);
3268 HexagonTargetLowering::PerformDAGCombine(SDNode
*N
, DAGCombinerInfo
&DCI
)
3270 if (isHvxOperation(N
, DCI
.DAG
)) {
3271 if (SDValue V
= PerformHvxDAGCombine(N
, DCI
))
3276 if (DCI
.isBeforeLegalizeOps())
3280 const SDLoc
&dl(Op
);
3281 unsigned Opc
= Op
.getOpcode();
3283 if (Opc
== HexagonISD::P2D
) {
3284 SDValue P
= Op
.getOperand(0);
3285 switch (P
.getOpcode()) {
3286 case HexagonISD::PTRUE
:
3287 return DCI
.DAG
.getConstant(-1, dl
, ty(Op
));
3288 case HexagonISD::PFALSE
:
3289 return getZero(dl
, ty(Op
), DCI
.DAG
);
3293 } else if (Opc
== ISD::VSELECT
) {
3294 // This is pretty much duplicated in HexagonISelLoweringHVX...
3296 // (vselect (xor x, ptrue), v0, v1) -> (vselect x, v1, v0)
3297 SDValue Cond
= Op
.getOperand(0);
3298 if (Cond
->getOpcode() == ISD::XOR
) {
3299 SDValue C0
= Cond
.getOperand(0), C1
= Cond
.getOperand(1);
3300 if (C1
->getOpcode() == HexagonISD::PTRUE
) {
3301 SDValue VSel
= DCI
.DAG
.getNode(ISD::VSELECT
, dl
, ty(Op
), C0
,
3302 Op
.getOperand(2), Op
.getOperand(1));
3311 /// Returns relocation base for the given PIC jumptable.
3313 HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table
,
3314 SelectionDAG
&DAG
) const {
3315 int Idx
= cast
<JumpTableSDNode
>(Table
)->getIndex();
3316 EVT VT
= Table
.getValueType();
3317 SDValue T
= DAG
.getTargetJumpTable(Idx
, VT
, HexagonII::MO_PCREL
);
3318 return DAG
.getNode(HexagonISD::AT_PCREL
, SDLoc(Table
), VT
, T
);
3321 //===----------------------------------------------------------------------===//
3322 // Inline Assembly Support
3323 //===----------------------------------------------------------------------===//
3325 TargetLowering::ConstraintType
3326 HexagonTargetLowering::getConstraintType(StringRef Constraint
) const {
3327 if (Constraint
.size() == 1) {
3328 switch (Constraint
[0]) {
3331 if (Subtarget
.useHVXOps())
3332 return C_RegisterClass
;
3335 return C_RegisterClass
;
3340 return TargetLowering::getConstraintType(Constraint
);
3343 std::pair
<unsigned, const TargetRegisterClass
*>
3344 HexagonTargetLowering::getRegForInlineAsmConstraint(
3345 const TargetRegisterInfo
*TRI
, StringRef Constraint
, MVT VT
) const {
3347 if (Constraint
.size() == 1) {
3348 switch (Constraint
[0]) {
3350 switch (VT
.SimpleTy
) {
3352 return {0u, nullptr};
3358 return {0u, &Hexagon::IntRegsRegClass
};
3361 return {0u, &Hexagon::DoubleRegsRegClass
};
3366 return {0u, nullptr};
3367 return {0u, &Hexagon::ModRegsRegClass
};
3369 switch (VT
.getSizeInBits()) {
3371 return {0u, nullptr};
3374 return {0u, &Hexagon::HvxQRRegClass
};
3378 switch (VT
.getSizeInBits()) {
3380 return {0u, nullptr};
3382 return {0u, &Hexagon::HvxVRRegClass
};
3384 if (Subtarget
.hasV60Ops() && Subtarget
.useHVX128BOps())
3385 return {0u, &Hexagon::HvxVRRegClass
};
3386 return {0u, &Hexagon::HvxWRRegClass
};
3388 return {0u, &Hexagon::HvxWRRegClass
};
3392 return {0u, nullptr};
3396 return TargetLowering::getRegForInlineAsmConstraint(TRI
, Constraint
, VT
);
3399 /// isFPImmLegal - Returns true if the target can instruction select the
3400 /// specified FP immediate natively. If false, the legalizer will
3401 /// materialize the FP immediate as a load from a constant pool.
3402 bool HexagonTargetLowering::isFPImmLegal(const APFloat
&Imm
, EVT VT
,
3403 bool ForCodeSize
) const {
3407 /// isLegalAddressingMode - Return true if the addressing mode represented by
3408 /// AM is legal for this target, for a load/store of the specified type.
3409 bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout
&DL
,
3410 const AddrMode
&AM
, Type
*Ty
,
3411 unsigned AS
, Instruction
*I
) const {
3412 if (Ty
->isSized()) {
3413 // When LSR detects uses of the same base address to access different
3414 // types (e.g. unions), it will assume a conservative type for these
3416 // LSR Use: Kind=Address of void in addrspace(4294967295), ...
3417 // The type Ty passed here would then be "void". Skip the alignment
3418 // checks, but do not return false right away, since that confuses
3419 // LSR into crashing.
3420 Align A
= DL
.getABITypeAlign(Ty
);
3421 // The base offset must be a multiple of the alignment.
3422 if (!isAligned(A
, AM
.BaseOffs
))
3424 // The shifted offset must fit in 11 bits.
3425 if (!isInt
<11>(AM
.BaseOffs
>> Log2(A
)))
3429 // No global is ever allowed as a base.
3433 int Scale
= AM
.Scale
;
3437 case 0: // No scale reg, "r+i", "r", or just "i".
3439 default: // No scaled addressing mode.
3445 /// Return true if folding a constant offset with the given GlobalAddress is
3446 /// legal. It is frequently not legal in PIC relocation models.
3447 bool HexagonTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode
*GA
)
3449 return HTM
.getRelocationModel() == Reloc::Static
;
3452 /// isLegalICmpImmediate - Return true if the specified immediate is legal
3453 /// icmp immediate, that is the target has icmp instructions which can compare
3454 /// a register against the immediate without having to materialize the
3455 /// immediate into a register.
3456 bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm
) const {
3457 return Imm
>= -512 && Imm
<= 511;
3460 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
3461 /// for tail call optimization. Targets which want to do tail call
3462 /// optimization should implement this function.
3463 bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
3465 CallingConv::ID CalleeCC
,
3467 bool IsCalleeStructRet
,
3468 bool IsCallerStructRet
,
3469 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
3470 const SmallVectorImpl
<SDValue
> &OutVals
,
3471 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
3472 SelectionDAG
& DAG
) const {
3473 const Function
&CallerF
= DAG
.getMachineFunction().getFunction();
3474 CallingConv::ID CallerCC
= CallerF
.getCallingConv();
3475 bool CCMatch
= CallerCC
== CalleeCC
;
3477 // ***************************************************************************
3478 // Look for obvious safe cases to perform tail call optimization that do not
3479 // require ABI changes.
3480 // ***************************************************************************
3482 // If this is a tail call via a function pointer, then don't do it!
3483 if (!isa
<GlobalAddressSDNode
>(Callee
) &&
3484 !isa
<ExternalSymbolSDNode
>(Callee
)) {
3488 // Do not optimize if the calling conventions do not match and the conventions
3489 // used are not C or Fast.
3491 bool R
= (CallerCC
== CallingConv::C
|| CallerCC
== CallingConv::Fast
);
3492 bool E
= (CalleeCC
== CallingConv::C
|| CalleeCC
== CallingConv::Fast
);
3493 // If R & E, then ok.
3498 // Do not tail call optimize vararg calls.
3502 // Also avoid tail call optimization if either caller or callee uses struct
3503 // return semantics.
3504 if (IsCalleeStructRet
|| IsCallerStructRet
)
3507 // In addition to the cases above, we also disable Tail Call Optimization if
3508 // the calling convention code that at least one outgoing argument needs to
3509 // go on the stack. We cannot check that here because at this point that
3510 // information is not available.
3514 /// Returns the target specific optimal type for load and store operations as
3515 /// a result of memset, memcpy, and memmove lowering.
3517 /// If DstAlign is zero that means it's safe to destination alignment can
3518 /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
3519 /// a need to check it against alignment requirement, probably because the
3520 /// source does not need to be loaded. If 'IsMemset' is true, that means it's
3521 /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of
3522 /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
3523 /// does not need to be loaded. It returns EVT::Other if the type should be
3524 /// determined using generic target-independent logic.
3525 EVT
HexagonTargetLowering::getOptimalMemOpType(
3526 const MemOp
&Op
, const AttributeList
&FuncAttributes
) const {
3527 if (Op
.size() >= 8 && Op
.isAligned(Align(8)))
3529 if (Op
.size() >= 4 && Op
.isAligned(Align(4)))
3531 if (Op
.size() >= 2 && Op
.isAligned(Align(2)))
3536 bool HexagonTargetLowering::allowsMemoryAccess(
3537 LLVMContext
&Context
, const DataLayout
&DL
, EVT VT
, unsigned AddrSpace
,
3538 Align Alignment
, MachineMemOperand::Flags Flags
, bool *Fast
) const {
3539 MVT SVT
= VT
.getSimpleVT();
3540 if (Subtarget
.isHVXVectorType(SVT
, true))
3541 return allowsHvxMemoryAccess(SVT
, Flags
, Fast
);
3542 return TargetLoweringBase::allowsMemoryAccess(
3543 Context
, DL
, VT
, AddrSpace
, Alignment
, Flags
, Fast
);
3546 bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(
3547 EVT VT
, unsigned AddrSpace
, Align Alignment
, MachineMemOperand::Flags Flags
,
3549 MVT SVT
= VT
.getSimpleVT();
3550 if (Subtarget
.isHVXVectorType(SVT
, true))
3551 return allowsHvxMisalignedMemoryAccesses(SVT
, Flags
, Fast
);
3557 std::pair
<const TargetRegisterClass
*, uint8_t>
3558 HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo
*TRI
,
3560 if (Subtarget
.isHVXVectorType(VT
, true)) {
3561 unsigned BitWidth
= VT
.getSizeInBits();
3562 unsigned VecWidth
= Subtarget
.getVectorLength() * 8;
3564 if (VT
.getVectorElementType() == MVT::i1
)
3565 return std::make_pair(&Hexagon::HvxQRRegClass
, 1);
3566 if (BitWidth
== VecWidth
)
3567 return std::make_pair(&Hexagon::HvxVRRegClass
, 1);
3568 assert(BitWidth
== 2 * VecWidth
);
3569 return std::make_pair(&Hexagon::HvxWRRegClass
, 1);
3572 return TargetLowering::findRepresentativeClass(TRI
, VT
);
3575 bool HexagonTargetLowering::shouldReduceLoadWidth(SDNode
*Load
,
3576 ISD::LoadExtType ExtTy
, EVT NewVT
) const {
3577 // TODO: This may be worth removing. Check regression tests for diffs.
3578 if (!TargetLoweringBase::shouldReduceLoadWidth(Load
, ExtTy
, NewVT
))
3581 auto *L
= cast
<LoadSDNode
>(Load
);
3582 std::pair
<SDValue
,int> BO
= getBaseAndOffset(L
->getBasePtr());
3583 // Small-data object, do not shrink.
3584 if (BO
.first
.getOpcode() == HexagonISD::CONST32_GP
)
3586 if (GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(BO
.first
)) {
3587 auto &HTM
= static_cast<const HexagonTargetMachine
&>(getTargetMachine());
3588 const auto *GO
= dyn_cast_or_null
<const GlobalObject
>(GA
->getGlobal());
3589 return !GO
|| !HTM
.getObjFileLowering()->isGlobalInSmallSection(GO
, HTM
);
3594 Value
*HexagonTargetLowering::emitLoadLinked(IRBuilderBase
&Builder
,
3595 Type
*ValueTy
, Value
*Addr
,
3596 AtomicOrdering Ord
) const {
3597 BasicBlock
*BB
= Builder
.GetInsertBlock();
3598 Module
*M
= BB
->getParent()->getParent();
3599 unsigned SZ
= ValueTy
->getPrimitiveSizeInBits();
3600 assert((SZ
== 32 || SZ
== 64) && "Only 32/64-bit atomic loads supported");
3601 Intrinsic::ID IntID
= (SZ
== 32) ? Intrinsic::hexagon_L2_loadw_locked
3602 : Intrinsic::hexagon_L4_loadd_locked
;
3603 Function
*Fn
= Intrinsic::getDeclaration(M
, IntID
);
3605 auto PtrTy
= cast
<PointerType
>(Addr
->getType());
3606 PointerType
*NewPtrTy
=
3607 Builder
.getIntNTy(SZ
)->getPointerTo(PtrTy
->getAddressSpace());
3608 Addr
= Builder
.CreateBitCast(Addr
, NewPtrTy
);
3610 Value
*Call
= Builder
.CreateCall(Fn
, Addr
, "larx");
3612 return Builder
.CreateBitCast(Call
, ValueTy
);
3615 /// Perform a store-conditional operation to Addr. Return the status of the
3616 /// store. This should be 0 if the store succeeded, non-zero otherwise.
3617 Value
*HexagonTargetLowering::emitStoreConditional(IRBuilderBase
&Builder
,
3618 Value
*Val
, Value
*Addr
,
3619 AtomicOrdering Ord
) const {
3620 BasicBlock
*BB
= Builder
.GetInsertBlock();
3621 Module
*M
= BB
->getParent()->getParent();
3622 Type
*Ty
= Val
->getType();
3623 unsigned SZ
= Ty
->getPrimitiveSizeInBits();
3625 Type
*CastTy
= Builder
.getIntNTy(SZ
);
3626 assert((SZ
== 32 || SZ
== 64) && "Only 32/64-bit atomic stores supported");
3627 Intrinsic::ID IntID
= (SZ
== 32) ? Intrinsic::hexagon_S2_storew_locked
3628 : Intrinsic::hexagon_S4_stored_locked
;
3629 Function
*Fn
= Intrinsic::getDeclaration(M
, IntID
);
3631 unsigned AS
= Addr
->getType()->getPointerAddressSpace();
3632 Addr
= Builder
.CreateBitCast(Addr
, CastTy
->getPointerTo(AS
));
3633 Val
= Builder
.CreateBitCast(Val
, CastTy
);
3635 Value
*Call
= Builder
.CreateCall(Fn
, {Addr
, Val
}, "stcx");
3636 Value
*Cmp
= Builder
.CreateICmpEQ(Call
, Builder
.getInt32(0), "");
3637 Value
*Ext
= Builder
.CreateZExt(Cmp
, Type::getInt32Ty(M
->getContext()));
3641 TargetLowering::AtomicExpansionKind
3642 HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst
*LI
) const {
3643 // Do not expand loads and stores that don't exceed 64 bits.
3644 return LI
->getType()->getPrimitiveSizeInBits() > 64
3645 ? AtomicExpansionKind::LLOnly
3646 : AtomicExpansionKind::None
;
3649 bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst
*SI
) const {
3650 // Do not expand loads and stores that don't exceed 64 bits.
3651 return SI
->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64;
3654 TargetLowering::AtomicExpansionKind
3655 HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
3656 AtomicCmpXchgInst
*AI
) const {
3657 return AtomicExpansionKind::LLSC
;