1 //===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the interfaces that Hexagon uses to lower LLVM code
10 // into a selection DAG.
12 //===----------------------------------------------------------------------===//
14 #include "HexagonISelLowering.h"
16 #include "HexagonMachineFunctionInfo.h"
17 #include "HexagonRegisterInfo.h"
18 #include "HexagonSubtarget.h"
19 #include "HexagonTargetMachine.h"
20 #include "HexagonTargetObjectFile.h"
21 #include "llvm/ADT/APInt.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringSwitch.h"
25 #include "llvm/CodeGen/CallingConvLower.h"
26 #include "llvm/CodeGen/MachineFrameInfo.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineMemOperand.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/RuntimeLibcalls.h"
31 #include "llvm/CodeGen/SelectionDAG.h"
32 #include "llvm/CodeGen/TargetCallingConv.h"
33 #include "llvm/CodeGen/ValueTypes.h"
34 #include "llvm/IR/BasicBlock.h"
35 #include "llvm/IR/CallingConv.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/DiagnosticInfo.h"
39 #include "llvm/IR/DiagnosticPrinter.h"
40 #include "llvm/IR/Function.h"
41 #include "llvm/IR/GlobalValue.h"
42 #include "llvm/IR/InlineAsm.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/IntrinsicInst.h"
45 #include "llvm/IR/Intrinsics.h"
46 #include "llvm/IR/IntrinsicsHexagon.h"
47 #include "llvm/IR/IRBuilder.h"
48 #include "llvm/IR/Module.h"
49 #include "llvm/IR/Type.h"
50 #include "llvm/IR/Value.h"
51 #include "llvm/MC/MCRegisterInfo.h"
52 #include "llvm/Support/Casting.h"
53 #include "llvm/Support/CodeGen.h"
54 #include "llvm/Support/CommandLine.h"
55 #include "llvm/Support/Debug.h"
56 #include "llvm/Support/ErrorHandling.h"
57 #include "llvm/Support/MathExtras.h"
58 #include "llvm/Support/raw_ostream.h"
59 #include "llvm/Target/TargetMachine.h"
69 #define DEBUG_TYPE "hexagon-lowering"
71 static cl::opt
<bool> EmitJumpTables("hexagon-emit-jump-tables",
72 cl::init(true), cl::Hidden
,
73 cl::desc("Control jump table emission on Hexagon target"));
76 EnableHexSDNodeSched("enable-hexagon-sdnode-sched", cl::Hidden
,
77 cl::desc("Enable Hexagon SDNode scheduling"));
79 static cl::opt
<bool> EnableFastMath("ffast-math", cl::Hidden
,
80 cl::desc("Enable Fast Math processing"));
82 static cl::opt
<int> MinimumJumpTables("minimum-jump-tables", cl::Hidden
,
84 cl::desc("Set minimum jump tables"));
87 MaxStoresPerMemcpyCL("max-store-memcpy", cl::Hidden
, cl::init(6),
88 cl::desc("Max #stores to inline memcpy"));
91 MaxStoresPerMemcpyOptSizeCL("max-store-memcpy-Os", cl::Hidden
, cl::init(4),
92 cl::desc("Max #stores to inline memcpy"));
95 MaxStoresPerMemmoveCL("max-store-memmove", cl::Hidden
, cl::init(6),
96 cl::desc("Max #stores to inline memmove"));
99 MaxStoresPerMemmoveOptSizeCL("max-store-memmove-Os", cl::Hidden
,
101 cl::desc("Max #stores to inline memmove"));
104 MaxStoresPerMemsetCL("max-store-memset", cl::Hidden
, cl::init(8),
105 cl::desc("Max #stores to inline memset"));
108 MaxStoresPerMemsetOptSizeCL("max-store-memset-Os", cl::Hidden
, cl::init(4),
109 cl::desc("Max #stores to inline memset"));
111 static cl::opt
<bool> AlignLoads("hexagon-align-loads",
112 cl::Hidden
, cl::init(false),
113 cl::desc("Rewrite unaligned loads as a pair of aligned loads"));
116 DisableArgsMinAlignment("hexagon-disable-args-min-alignment", cl::Hidden
,
118 cl::desc("Disable minimum alignment of 1 for "
119 "arguments passed by value on stack"));
123 class HexagonCCState
: public CCState
{
124 unsigned NumNamedVarArgParams
= 0;
127 HexagonCCState(CallingConv::ID CC
, bool IsVarArg
, MachineFunction
&MF
,
128 SmallVectorImpl
<CCValAssign
> &locs
, LLVMContext
&C
,
129 unsigned NumNamedArgs
)
130 : CCState(CC
, IsVarArg
, MF
, locs
, C
),
131 NumNamedVarArgParams(NumNamedArgs
) {}
132 unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams
; }
135 } // end anonymous namespace
138 // Implement calling convention for Hexagon.
140 static bool CC_SkipOdd(unsigned &ValNo
, MVT
&ValVT
, MVT
&LocVT
,
141 CCValAssign::LocInfo
&LocInfo
,
142 ISD::ArgFlagsTy
&ArgFlags
, CCState
&State
) {
143 static const MCPhysReg ArgRegs
[] = {
144 Hexagon::R0
, Hexagon::R1
, Hexagon::R2
,
145 Hexagon::R3
, Hexagon::R4
, Hexagon::R5
147 const unsigned NumArgRegs
= std::size(ArgRegs
);
148 unsigned RegNum
= State
.getFirstUnallocated(ArgRegs
);
150 // RegNum is an index into ArgRegs: skip a register if RegNum is odd.
151 if (RegNum
!= NumArgRegs
&& RegNum
% 2 == 1)
152 State
.AllocateReg(ArgRegs
[RegNum
]);
154 // Always return false here, as this function only makes sure that the first
155 // unallocated register has an even register number and does not actually
156 // allocate a register for the current argument.
160 #include "HexagonGenCallingConv.inc"
164 HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op
, SelectionDAG
&DAG
)
169 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
170 /// by "Src" to address "Dst" of size "Size". Alignment information is
171 /// specified by the specific parameter attribute. The copy will be passed as
172 /// a byval function parameter. Sometimes what we are copying is the end of a
173 /// larger object, the part that does not fit in registers.
174 static SDValue
CreateCopyOfByValArgument(SDValue Src
, SDValue Dst
,
175 SDValue Chain
, ISD::ArgFlagsTy Flags
,
176 SelectionDAG
&DAG
, const SDLoc
&dl
) {
177 SDValue SizeNode
= DAG
.getConstant(Flags
.getByValSize(), dl
, MVT::i32
);
178 return DAG
.getMemcpy(
179 Chain
, dl
, Dst
, Src
, SizeNode
, Flags
.getNonZeroByValAlign(),
180 /*isVolatile=*/false, /*AlwaysInline=*/false,
181 /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo());
185 HexagonTargetLowering::CanLowerReturn(
186 CallingConv::ID CallConv
, MachineFunction
&MF
, bool IsVarArg
,
187 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
188 LLVMContext
&Context
) const {
189 SmallVector
<CCValAssign
, 16> RVLocs
;
190 CCState
CCInfo(CallConv
, IsVarArg
, MF
, RVLocs
, Context
);
192 if (MF
.getSubtarget
<HexagonSubtarget
>().useHVXOps())
193 return CCInfo
.CheckReturn(Outs
, RetCC_Hexagon_HVX
);
194 return CCInfo
.CheckReturn(Outs
, RetCC_Hexagon
);
197 // LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
198 // passed by value, the function prototype is modified to return void and
199 // the value is stored in memory pointed by a pointer passed by caller.
201 HexagonTargetLowering::LowerReturn(SDValue Chain
, CallingConv::ID CallConv
,
203 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
204 const SmallVectorImpl
<SDValue
> &OutVals
,
205 const SDLoc
&dl
, SelectionDAG
&DAG
) const {
206 // CCValAssign - represent the assignment of the return value to locations.
207 SmallVector
<CCValAssign
, 16> RVLocs
;
209 // CCState - Info about the registers and stack slot.
210 CCState
CCInfo(CallConv
, IsVarArg
, DAG
.getMachineFunction(), RVLocs
,
213 // Analyze return values of ISD::RET
214 if (Subtarget
.useHVXOps())
215 CCInfo
.AnalyzeReturn(Outs
, RetCC_Hexagon_HVX
);
217 CCInfo
.AnalyzeReturn(Outs
, RetCC_Hexagon
);
220 SmallVector
<SDValue
, 4> RetOps(1, Chain
);
222 // Copy the result values into the output registers.
223 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
224 CCValAssign
&VA
= RVLocs
[i
];
225 SDValue Val
= OutVals
[i
];
227 switch (VA
.getLocInfo()) {
229 // Loc info must be one of Full, BCvt, SExt, ZExt, or AExt.
230 llvm_unreachable("Unknown loc info!");
231 case CCValAssign::Full
:
233 case CCValAssign::BCvt
:
234 Val
= DAG
.getBitcast(VA
.getLocVT(), Val
);
236 case CCValAssign::SExt
:
237 Val
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, VA
.getLocVT(), Val
);
239 case CCValAssign::ZExt
:
240 Val
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VA
.getLocVT(), Val
);
242 case CCValAssign::AExt
:
243 Val
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, VA
.getLocVT(), Val
);
247 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), Val
, Glue
);
249 // Guarantee that all emitted copies are stuck together with flags.
250 Glue
= Chain
.getValue(1);
251 RetOps
.push_back(DAG
.getRegister(VA
.getLocReg(), VA
.getLocVT()));
254 RetOps
[0] = Chain
; // Update chain.
256 // Add the glue if we have it.
258 RetOps
.push_back(Glue
);
260 return DAG
.getNode(HexagonISD::RET_GLUE
, dl
, MVT::Other
, RetOps
);
263 bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst
*CI
) const {
264 // If either no tail call or told not to tail call at all, don't.
265 return CI
->isTailCall();
268 Register
HexagonTargetLowering::getRegisterByName(
269 const char* RegName
, LLT VT
, const MachineFunction
&) const {
270 // Just support r19, the linux kernel uses it.
271 Register Reg
= StringSwitch
<Register
>(RegName
)
272 .Case("r0", Hexagon::R0
)
273 .Case("r1", Hexagon::R1
)
274 .Case("r2", Hexagon::R2
)
275 .Case("r3", Hexagon::R3
)
276 .Case("r4", Hexagon::R4
)
277 .Case("r5", Hexagon::R5
)
278 .Case("r6", Hexagon::R6
)
279 .Case("r7", Hexagon::R7
)
280 .Case("r8", Hexagon::R8
)
281 .Case("r9", Hexagon::R9
)
282 .Case("r10", Hexagon::R10
)
283 .Case("r11", Hexagon::R11
)
284 .Case("r12", Hexagon::R12
)
285 .Case("r13", Hexagon::R13
)
286 .Case("r14", Hexagon::R14
)
287 .Case("r15", Hexagon::R15
)
288 .Case("r16", Hexagon::R16
)
289 .Case("r17", Hexagon::R17
)
290 .Case("r18", Hexagon::R18
)
291 .Case("r19", Hexagon::R19
)
292 .Case("r20", Hexagon::R20
)
293 .Case("r21", Hexagon::R21
)
294 .Case("r22", Hexagon::R22
)
295 .Case("r23", Hexagon::R23
)
296 .Case("r24", Hexagon::R24
)
297 .Case("r25", Hexagon::R25
)
298 .Case("r26", Hexagon::R26
)
299 .Case("r27", Hexagon::R27
)
300 .Case("r28", Hexagon::R28
)
301 .Case("r29", Hexagon::R29
)
302 .Case("r30", Hexagon::R30
)
303 .Case("r31", Hexagon::R31
)
304 .Case("r1:0", Hexagon::D0
)
305 .Case("r3:2", Hexagon::D1
)
306 .Case("r5:4", Hexagon::D2
)
307 .Case("r7:6", Hexagon::D3
)
308 .Case("r9:8", Hexagon::D4
)
309 .Case("r11:10", Hexagon::D5
)
310 .Case("r13:12", Hexagon::D6
)
311 .Case("r15:14", Hexagon::D7
)
312 .Case("r17:16", Hexagon::D8
)
313 .Case("r19:18", Hexagon::D9
)
314 .Case("r21:20", Hexagon::D10
)
315 .Case("r23:22", Hexagon::D11
)
316 .Case("r25:24", Hexagon::D12
)
317 .Case("r27:26", Hexagon::D13
)
318 .Case("r29:28", Hexagon::D14
)
319 .Case("r31:30", Hexagon::D15
)
320 .Case("sp", Hexagon::R29
)
321 .Case("fp", Hexagon::R30
)
322 .Case("lr", Hexagon::R31
)
323 .Case("p0", Hexagon::P0
)
324 .Case("p1", Hexagon::P1
)
325 .Case("p2", Hexagon::P2
)
326 .Case("p3", Hexagon::P3
)
327 .Case("sa0", Hexagon::SA0
)
328 .Case("lc0", Hexagon::LC0
)
329 .Case("sa1", Hexagon::SA1
)
330 .Case("lc1", Hexagon::LC1
)
331 .Case("m0", Hexagon::M0
)
332 .Case("m1", Hexagon::M1
)
333 .Case("usr", Hexagon::USR
)
334 .Case("ugp", Hexagon::UGP
)
335 .Case("cs0", Hexagon::CS0
)
336 .Case("cs1", Hexagon::CS1
)
337 .Default(Register());
341 report_fatal_error("Invalid register name global variable");
344 /// LowerCallResult - Lower the result values of an ISD::CALL into the
345 /// appropriate copies out of appropriate physical registers. This assumes that
346 /// Chain/Glue are the input chain/glue to use, and that TheCall is the call
347 /// being lowered. Returns a SDNode with the same number of values as the
349 SDValue
HexagonTargetLowering::LowerCallResult(
350 SDValue Chain
, SDValue Glue
, CallingConv::ID CallConv
, bool IsVarArg
,
351 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
352 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
,
353 const SmallVectorImpl
<SDValue
> &OutVals
, SDValue Callee
) const {
354 // Assign locations to each value returned by this call.
355 SmallVector
<CCValAssign
, 16> RVLocs
;
357 CCState
CCInfo(CallConv
, IsVarArg
, DAG
.getMachineFunction(), RVLocs
,
360 if (Subtarget
.useHVXOps())
361 CCInfo
.AnalyzeCallResult(Ins
, RetCC_Hexagon_HVX
);
363 CCInfo
.AnalyzeCallResult(Ins
, RetCC_Hexagon
);
365 // Copy all of the result registers out of their specified physreg.
366 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
368 if (RVLocs
[i
].getValVT() == MVT::i1
) {
369 // Return values of type MVT::i1 require special handling. The reason
370 // is that MVT::i1 is associated with the PredRegs register class, but
371 // values of that type are still returned in R0. Generate an explicit
372 // copy into a predicate register from R0, and treat the value of the
373 // predicate register as the call result.
374 auto &MRI
= DAG
.getMachineFunction().getRegInfo();
375 SDValue FR0
= DAG
.getCopyFromReg(Chain
, dl
, RVLocs
[i
].getLocReg(),
377 // FR0 = (Value, Chain, Glue)
378 Register PredR
= MRI
.createVirtualRegister(&Hexagon::PredRegsRegClass
);
379 SDValue TPR
= DAG
.getCopyToReg(FR0
.getValue(1), dl
, PredR
,
380 FR0
.getValue(0), FR0
.getValue(2));
381 // TPR = (Chain, Glue)
382 // Don't glue this CopyFromReg, because it copies from a virtual
383 // register. If it is glued to the call, InstrEmitter will add it
384 // as an implicit def to the call (EmitMachineNode).
385 RetVal
= DAG
.getCopyFromReg(TPR
.getValue(0), dl
, PredR
, MVT::i1
);
386 Glue
= TPR
.getValue(1);
387 Chain
= TPR
.getValue(0);
389 RetVal
= DAG
.getCopyFromReg(Chain
, dl
, RVLocs
[i
].getLocReg(),
390 RVLocs
[i
].getValVT(), Glue
);
391 Glue
= RetVal
.getValue(2);
392 Chain
= RetVal
.getValue(1);
394 InVals
.push_back(RetVal
.getValue(0));
400 /// LowerCall - Functions arguments are copied from virtual regs to
401 /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
403 HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo
&CLI
,
404 SmallVectorImpl
<SDValue
> &InVals
) const {
405 SelectionDAG
&DAG
= CLI
.DAG
;
407 SmallVectorImpl
<ISD::OutputArg
> &Outs
= CLI
.Outs
;
408 SmallVectorImpl
<SDValue
> &OutVals
= CLI
.OutVals
;
409 SmallVectorImpl
<ISD::InputArg
> &Ins
= CLI
.Ins
;
410 SDValue Chain
= CLI
.Chain
;
411 SDValue Callee
= CLI
.Callee
;
412 CallingConv::ID CallConv
= CLI
.CallConv
;
413 bool IsVarArg
= CLI
.IsVarArg
;
414 bool DoesNotReturn
= CLI
.DoesNotReturn
;
416 bool IsStructRet
= Outs
.empty() ? false : Outs
[0].Flags
.isSRet();
417 MachineFunction
&MF
= DAG
.getMachineFunction();
418 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
419 auto PtrVT
= getPointerTy(MF
.getDataLayout());
421 unsigned NumParams
= CLI
.CB
? CLI
.CB
->getFunctionType()->getNumParams() : 0;
422 if (GlobalAddressSDNode
*GAN
= dyn_cast
<GlobalAddressSDNode
>(Callee
))
423 Callee
= DAG
.getTargetGlobalAddress(GAN
->getGlobal(), dl
, MVT::i32
);
425 // Linux ABI treats var-arg calls the same way as regular ones.
426 bool TreatAsVarArg
= !Subtarget
.isEnvironmentMusl() && IsVarArg
;
428 // Analyze operands of the call, assigning locations to each operand.
429 SmallVector
<CCValAssign
, 16> ArgLocs
;
430 HexagonCCState
CCInfo(CallConv
, TreatAsVarArg
, MF
, ArgLocs
, *DAG
.getContext(),
433 if (Subtarget
.useHVXOps())
434 CCInfo
.AnalyzeCallOperands(Outs
, CC_Hexagon_HVX
);
435 else if (DisableArgsMinAlignment
)
436 CCInfo
.AnalyzeCallOperands(Outs
, CC_Hexagon_Legacy
);
438 CCInfo
.AnalyzeCallOperands(Outs
, CC_Hexagon
);
440 if (CLI
.IsTailCall
) {
441 bool StructAttrFlag
= MF
.getFunction().hasStructRetAttr();
442 CLI
.IsTailCall
= IsEligibleForTailCallOptimization(Callee
, CallConv
,
443 IsVarArg
, IsStructRet
, StructAttrFlag
, Outs
,
445 for (const CCValAssign
&VA
: ArgLocs
) {
447 CLI
.IsTailCall
= false;
451 LLVM_DEBUG(dbgs() << (CLI
.IsTailCall
? "Eligible for Tail Call\n"
452 : "Argument must be passed on stack. "
453 "Not eligible for Tail Call\n"));
455 // Get a count of how many bytes are to be pushed on the stack.
456 unsigned NumBytes
= CCInfo
.getStackSize();
457 SmallVector
<std::pair
<unsigned, SDValue
>, 16> RegsToPass
;
458 SmallVector
<SDValue
, 8> MemOpChains
;
460 const HexagonRegisterInfo
&HRI
= *Subtarget
.getRegisterInfo();
462 DAG
.getCopyFromReg(Chain
, dl
, HRI
.getStackRegister(), PtrVT
);
464 bool NeedsArgAlign
= false;
465 Align LargestAlignSeen
;
466 // Walk the register/memloc assignments, inserting copies/loads.
467 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
468 CCValAssign
&VA
= ArgLocs
[i
];
469 SDValue Arg
= OutVals
[i
];
470 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
471 // Record if we need > 8 byte alignment on an argument.
472 bool ArgAlign
= Subtarget
.isHVXVectorType(VA
.getValVT());
473 NeedsArgAlign
|= ArgAlign
;
475 // Promote the value if needed.
476 switch (VA
.getLocInfo()) {
478 // Loc info must be one of Full, BCvt, SExt, ZExt, or AExt.
479 llvm_unreachable("Unknown loc info!");
480 case CCValAssign::Full
:
482 case CCValAssign::BCvt
:
483 Arg
= DAG
.getBitcast(VA
.getLocVT(), Arg
);
485 case CCValAssign::SExt
:
486 Arg
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, VA
.getLocVT(), Arg
);
488 case CCValAssign::ZExt
:
489 Arg
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VA
.getLocVT(), Arg
);
491 case CCValAssign::AExt
:
492 Arg
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, VA
.getLocVT(), Arg
);
497 unsigned LocMemOffset
= VA
.getLocMemOffset();
498 SDValue MemAddr
= DAG
.getConstant(LocMemOffset
, dl
,
499 StackPtr
.getValueType());
500 MemAddr
= DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, StackPtr
, MemAddr
);
502 LargestAlignSeen
= std::max(
503 LargestAlignSeen
, Align(VA
.getLocVT().getStoreSizeInBits() / 8));
504 if (Flags
.isByVal()) {
505 // The argument is a struct passed by value. According to LLVM, "Arg"
507 MemOpChains
.push_back(CreateCopyOfByValArgument(Arg
, MemAddr
, Chain
,
510 MachinePointerInfo LocPI
= MachinePointerInfo::getStack(
511 DAG
.getMachineFunction(), LocMemOffset
);
512 SDValue S
= DAG
.getStore(Chain
, dl
, Arg
, MemAddr
, LocPI
);
513 MemOpChains
.push_back(S
);
518 // Arguments that can be passed on register must be kept at RegsToPass
521 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), Arg
));
524 if (NeedsArgAlign
&& Subtarget
.hasV60Ops()) {
525 LLVM_DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
526 Align VecAlign
= HRI
.getSpillAlign(Hexagon::HvxVRRegClass
);
527 LargestAlignSeen
= std::max(LargestAlignSeen
, VecAlign
);
528 MFI
.ensureMaxAlignment(LargestAlignSeen
);
530 // Transform all store nodes into one single node because all store
531 // nodes are independent of each other.
532 if (!MemOpChains
.empty())
533 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOpChains
);
536 if (!CLI
.IsTailCall
) {
537 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, dl
);
538 Glue
= Chain
.getValue(1);
541 // Build a sequence of copy-to-reg nodes chained together with token
542 // chain and flag operands which copy the outgoing args into registers.
543 // The Glue is necessary since all emitted instructions must be
545 if (!CLI
.IsTailCall
) {
546 for (const auto &R
: RegsToPass
) {
547 Chain
= DAG
.getCopyToReg(Chain
, dl
, R
.first
, R
.second
, Glue
);
548 Glue
= Chain
.getValue(1);
551 // For tail calls lower the arguments to the 'real' stack slot.
553 // Force all the incoming stack arguments to be loaded from the stack
554 // before any new outgoing arguments are stored to the stack, because the
555 // outgoing stack slots may alias the incoming argument stack slots, and
556 // the alias isn't otherwise explicit. This is slightly more conservative
557 // than necessary, because it means that each store effectively depends
558 // on every argument instead of just those arguments it would clobber.
560 // Do not flag preceding copytoreg stuff together with the following stuff.
562 for (const auto &R
: RegsToPass
) {
563 Chain
= DAG
.getCopyToReg(Chain
, dl
, R
.first
, R
.second
, Glue
);
564 Glue
= Chain
.getValue(1);
569 bool LongCalls
= MF
.getSubtarget
<HexagonSubtarget
>().useLongCalls();
570 unsigned Flags
= LongCalls
? HexagonII::HMOTF_ConstExtended
: 0;
572 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
573 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
574 // node so that legalize doesn't hack it.
575 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
576 Callee
= DAG
.getTargetGlobalAddress(G
->getGlobal(), dl
, PtrVT
, 0, Flags
);
577 } else if (ExternalSymbolSDNode
*S
=
578 dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
579 Callee
= DAG
.getTargetExternalSymbol(S
->getSymbol(), PtrVT
, Flags
);
582 // Returns a chain & a flag for retval copy to use.
583 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
584 SmallVector
<SDValue
, 8> Ops
;
585 Ops
.push_back(Chain
);
586 Ops
.push_back(Callee
);
588 // Add argument registers to the end of the list so that they are
589 // known live into the call.
590 for (const auto &R
: RegsToPass
)
591 Ops
.push_back(DAG
.getRegister(R
.first
, R
.second
.getValueType()));
593 const uint32_t *Mask
= HRI
.getCallPreservedMask(MF
, CallConv
);
594 assert(Mask
&& "Missing call preserved mask for calling convention");
595 Ops
.push_back(DAG
.getRegisterMask(Mask
));
600 if (CLI
.IsTailCall
) {
601 MFI
.setHasTailCall();
602 return DAG
.getNode(HexagonISD::TC_RETURN
, dl
, NodeTys
, Ops
);
605 // Set this here because we need to know this for "hasFP" in frame lowering.
606 // The target-independent code calls getFrameRegister before setting it, and
607 // getFrameRegister uses hasFP to determine whether the function has FP.
608 MFI
.setHasCalls(true);
610 unsigned OpCode
= DoesNotReturn
? HexagonISD::CALLnr
: HexagonISD::CALL
;
611 Chain
= DAG
.getNode(OpCode
, dl
, NodeTys
, Ops
);
612 Glue
= Chain
.getValue(1);
614 // Create the CALLSEQ_END node.
615 Chain
= DAG
.getCALLSEQ_END(Chain
, NumBytes
, 0, Glue
, dl
);
616 Glue
= Chain
.getValue(1);
618 // Handle result values, copying them out of physregs into vregs that we
620 return LowerCallResult(Chain
, Glue
, CallConv
, IsVarArg
, Ins
, dl
, DAG
,
621 InVals
, OutVals
, Callee
);
624 /// Returns true by value, base pointer and offset pointer and addressing
625 /// mode by reference if this node can be combined with a load / store to
626 /// form a post-indexed load / store.
627 bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode
*N
, SDNode
*Op
,
628 SDValue
&Base
, SDValue
&Offset
, ISD::MemIndexedMode
&AM
,
629 SelectionDAG
&DAG
) const {
630 LSBaseSDNode
*LSN
= dyn_cast
<LSBaseSDNode
>(N
);
633 EVT VT
= LSN
->getMemoryVT();
636 bool IsLegalType
= VT
== MVT::i8
|| VT
== MVT::i16
|| VT
== MVT::i32
||
637 VT
== MVT::i64
|| VT
== MVT::f32
|| VT
== MVT::f64
||
638 VT
== MVT::v2i16
|| VT
== MVT::v2i32
|| VT
== MVT::v4i8
||
639 VT
== MVT::v4i16
|| VT
== MVT::v8i8
||
640 Subtarget
.isHVXVectorType(VT
.getSimpleVT());
644 if (Op
->getOpcode() != ISD::ADD
)
646 Base
= Op
->getOperand(0);
647 Offset
= Op
->getOperand(1);
648 if (!isa
<ConstantSDNode
>(Offset
.getNode()))
652 int32_t V
= cast
<ConstantSDNode
>(Offset
.getNode())->getSExtValue();
653 return Subtarget
.getInstrInfo()->isValidAutoIncImm(VT
, V
);
657 HexagonTargetLowering::LowerINLINEASM(SDValue Op
, SelectionDAG
&DAG
) const {
658 MachineFunction
&MF
= DAG
.getMachineFunction();
659 auto &HMFI
= *MF
.getInfo
<HexagonMachineFunctionInfo
>();
660 const HexagonRegisterInfo
&HRI
= *Subtarget
.getRegisterInfo();
661 unsigned LR
= HRI
.getRARegister();
663 if ((Op
.getOpcode() != ISD::INLINEASM
&&
664 Op
.getOpcode() != ISD::INLINEASM_BR
) || HMFI
.hasClobberLR())
667 unsigned NumOps
= Op
.getNumOperands();
668 if (Op
.getOperand(NumOps
-1).getValueType() == MVT::Glue
)
669 --NumOps
; // Ignore the flag operand.
671 for (unsigned i
= InlineAsm::Op_FirstOperand
; i
!= NumOps
;) {
672 unsigned Flags
= cast
<ConstantSDNode
>(Op
.getOperand(i
))->getZExtValue();
673 unsigned NumVals
= InlineAsm::getNumOperandRegisters(Flags
);
674 ++i
; // Skip the ID value.
676 switch (InlineAsm::getKind(Flags
)) {
678 llvm_unreachable("Bad flags!");
679 case InlineAsm::Kind_RegUse
:
680 case InlineAsm::Kind_Imm
:
681 case InlineAsm::Kind_Mem
:
684 case InlineAsm::Kind_Clobber
:
685 case InlineAsm::Kind_RegDef
:
686 case InlineAsm::Kind_RegDefEarlyClobber
: {
687 for (; NumVals
; --NumVals
, ++i
) {
688 Register Reg
= cast
<RegisterSDNode
>(Op
.getOperand(i
))->getReg();
691 HMFI
.setHasClobberLR(true);
702 // Need to transform ISD::PREFETCH into something that doesn't inherit
703 // all of the properties of ISD::PREFETCH, specifically SDNPMayLoad and
705 SDValue
HexagonTargetLowering::LowerPREFETCH(SDValue Op
,
706 SelectionDAG
&DAG
) const {
707 SDValue Chain
= Op
.getOperand(0);
708 SDValue Addr
= Op
.getOperand(1);
709 // Lower it to DCFETCH($reg, #0). A "pat" will try to merge the offset in,
710 // if the "reg" is fed by an "add".
712 SDValue Zero
= DAG
.getConstant(0, DL
, MVT::i32
);
713 return DAG
.getNode(HexagonISD::DCFETCH
, DL
, MVT::Other
, Chain
, Addr
, Zero
);
716 // Custom-handle ISD::READCYCLECOUNTER because the target-independent SDNode
717 // is marked as having side-effects, while the register read on Hexagon does
718 // not have any. TableGen refuses to accept the direct pattern from that node
720 SDValue
HexagonTargetLowering::LowerREADCYCLECOUNTER(SDValue Op
,
721 SelectionDAG
&DAG
) const {
722 SDValue Chain
= Op
.getOperand(0);
724 SDVTList VTs
= DAG
.getVTList(MVT::i64
, MVT::Other
);
725 return DAG
.getNode(HexagonISD::READCYCLE
, dl
, VTs
, Chain
);
728 SDValue
HexagonTargetLowering::LowerINTRINSIC_VOID(SDValue Op
,
729 SelectionDAG
&DAG
) const {
730 SDValue Chain
= Op
.getOperand(0);
731 unsigned IntNo
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
732 // Lower the hexagon_prefetch builtin to DCFETCH, as above.
733 if (IntNo
== Intrinsic::hexagon_prefetch
) {
734 SDValue Addr
= Op
.getOperand(2);
736 SDValue Zero
= DAG
.getConstant(0, DL
, MVT::i32
);
737 return DAG
.getNode(HexagonISD::DCFETCH
, DL
, MVT::Other
, Chain
, Addr
, Zero
);
743 HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op
,
744 SelectionDAG
&DAG
) const {
745 SDValue Chain
= Op
.getOperand(0);
746 SDValue Size
= Op
.getOperand(1);
747 SDValue Align
= Op
.getOperand(2);
750 ConstantSDNode
*AlignConst
= dyn_cast
<ConstantSDNode
>(Align
);
751 assert(AlignConst
&& "Non-constant Align in LowerDYNAMIC_STACKALLOC");
753 unsigned A
= AlignConst
->getSExtValue();
754 auto &HFI
= *Subtarget
.getFrameLowering();
755 // "Zero" means natural stack alignment.
757 A
= HFI
.getStackAlign().value();
760 dbgs () << __func__
<< " Align: " << A
<< " Size: ";
761 Size
.getNode()->dump(&DAG
);
765 SDValue AC
= DAG
.getConstant(A
, dl
, MVT::i32
);
766 SDVTList VTs
= DAG
.getVTList(MVT::i32
, MVT::Other
);
767 SDValue AA
= DAG
.getNode(HexagonISD::ALLOCA
, dl
, VTs
, Chain
, Size
, AC
);
769 DAG
.ReplaceAllUsesOfValueWith(Op
, AA
);
773 SDValue
HexagonTargetLowering::LowerFormalArguments(
774 SDValue Chain
, CallingConv::ID CallConv
, bool IsVarArg
,
775 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
776 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
777 MachineFunction
&MF
= DAG
.getMachineFunction();
778 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
779 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
781 // Linux ABI treats var-arg calls the same way as regular ones.
782 bool TreatAsVarArg
= !Subtarget
.isEnvironmentMusl() && IsVarArg
;
784 // Assign locations to all of the incoming arguments.
785 SmallVector
<CCValAssign
, 16> ArgLocs
;
786 HexagonCCState
CCInfo(CallConv
, TreatAsVarArg
, MF
, ArgLocs
,
788 MF
.getFunction().getFunctionType()->getNumParams());
790 if (Subtarget
.useHVXOps())
791 CCInfo
.AnalyzeFormalArguments(Ins
, CC_Hexagon_HVX
);
792 else if (DisableArgsMinAlignment
)
793 CCInfo
.AnalyzeFormalArguments(Ins
, CC_Hexagon_Legacy
);
795 CCInfo
.AnalyzeFormalArguments(Ins
, CC_Hexagon
);
797 // For LLVM, in the case when returning a struct by value (>8byte),
798 // the first argument is a pointer that points to the location on caller's
799 // stack where the return value will be stored. For Hexagon, the location on
800 // caller's stack is passed only when the struct size is smaller than (and
801 // equal to) 8 bytes. If not, no address will be passed into callee and
802 // callee return the result direclty through R0/R1.
803 auto NextSingleReg
= [] (const TargetRegisterClass
&RC
, unsigned Reg
) {
804 switch (RC
.getID()) {
805 case Hexagon::IntRegsRegClassID
:
806 return Reg
- Hexagon::R0
+ 1;
807 case Hexagon::DoubleRegsRegClassID
:
808 return (Reg
- Hexagon::D0
+ 1) * 2;
809 case Hexagon::HvxVRRegClassID
:
810 return Reg
- Hexagon::V0
+ 1;
811 case Hexagon::HvxWRRegClassID
:
812 return (Reg
- Hexagon::W0
+ 1) * 2;
814 llvm_unreachable("Unexpected register class");
817 auto &HFL
= const_cast<HexagonFrameLowering
&>(*Subtarget
.getFrameLowering());
818 auto &HMFI
= *MF
.getInfo
<HexagonMachineFunctionInfo
>();
819 HFL
.FirstVarArgSavedReg
= 0;
820 HMFI
.setFirstNamedArgFrameIndex(-int(MFI
.getNumFixedObjects()));
822 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
823 CCValAssign
&VA
= ArgLocs
[i
];
824 ISD::ArgFlagsTy Flags
= Ins
[i
].Flags
;
825 bool ByVal
= Flags
.isByVal();
827 // Arguments passed in registers:
828 // 1. 32- and 64-bit values and HVX vectors are passed directly,
829 // 2. Large structs are passed via an address, and the address is
830 // passed in a register.
831 if (VA
.isRegLoc() && ByVal
&& Flags
.getByValSize() <= 8)
832 llvm_unreachable("ByValSize must be bigger than 8 bytes");
834 bool InReg
= VA
.isRegLoc() &&
835 (!ByVal
|| (ByVal
&& Flags
.getByValSize() > 8));
838 MVT RegVT
= VA
.getLocVT();
839 if (VA
.getLocInfo() == CCValAssign::BCvt
)
840 RegVT
= VA
.getValVT();
842 const TargetRegisterClass
*RC
= getRegClassFor(RegVT
);
843 Register VReg
= MRI
.createVirtualRegister(RC
);
844 SDValue Copy
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, RegVT
);
846 // Treat values of type MVT::i1 specially: they are passed in
847 // registers of type i32, but they need to remain as values of
848 // type i1 for consistency of the argument lowering.
849 if (VA
.getValVT() == MVT::i1
) {
850 assert(RegVT
.getSizeInBits() <= 32);
851 SDValue T
= DAG
.getNode(ISD::AND
, dl
, RegVT
,
852 Copy
, DAG
.getConstant(1, dl
, RegVT
));
853 Copy
= DAG
.getSetCC(dl
, MVT::i1
, T
, DAG
.getConstant(0, dl
, RegVT
),
857 unsigned RegSize
= RegVT
.getSizeInBits();
858 assert(RegSize
== 32 || RegSize
== 64 ||
859 Subtarget
.isHVXVectorType(RegVT
));
862 InVals
.push_back(Copy
);
863 MRI
.addLiveIn(VA
.getLocReg(), VReg
);
864 HFL
.FirstVarArgSavedReg
= NextSingleReg(*RC
, VA
.getLocReg());
866 assert(VA
.isMemLoc() && "Argument should be passed in memory");
868 // If it's a byval parameter, then we need to compute the
869 // "real" size, not the size of the pointer.
870 unsigned ObjSize
= Flags
.isByVal()
871 ? Flags
.getByValSize()
872 : VA
.getLocVT().getStoreSizeInBits() / 8;
874 // Create the frame index object for this incoming parameter.
875 int Offset
= HEXAGON_LRFP_SIZE
+ VA
.getLocMemOffset();
876 int FI
= MFI
.CreateFixedObject(ObjSize
, Offset
, true);
877 SDValue FIN
= DAG
.getFrameIndex(FI
, MVT::i32
);
879 if (Flags
.isByVal()) {
880 // If it's a pass-by-value aggregate, then do not dereference the stack
881 // location. Instead, we should generate a reference to the stack
883 InVals
.push_back(FIN
);
885 SDValue L
= DAG
.getLoad(VA
.getValVT(), dl
, Chain
, FIN
,
886 MachinePointerInfo::getFixedStack(MF
, FI
, 0));
892 if (IsVarArg
&& Subtarget
.isEnvironmentMusl()) {
893 for (int i
= HFL
.FirstVarArgSavedReg
; i
< 6; i
++)
894 MRI
.addLiveIn(Hexagon::R0
+i
);
897 if (IsVarArg
&& Subtarget
.isEnvironmentMusl()) {
898 HMFI
.setFirstNamedArgFrameIndex(HMFI
.getFirstNamedArgFrameIndex() - 1);
899 HMFI
.setLastNamedArgFrameIndex(-int(MFI
.getNumFixedObjects()));
901 // Create Frame index for the start of register saved area.
902 int NumVarArgRegs
= 6 - HFL
.FirstVarArgSavedReg
;
903 bool RequiresPadding
= (NumVarArgRegs
& 1);
904 int RegSaveAreaSizePlusPadding
= RequiresPadding
905 ? (NumVarArgRegs
+ 1) * 4
908 if (RegSaveAreaSizePlusPadding
> 0) {
909 // The offset to saved register area should be 8 byte aligned.
910 int RegAreaStart
= HEXAGON_LRFP_SIZE
+ CCInfo
.getStackSize();
911 if (!(RegAreaStart
% 8))
912 RegAreaStart
= (RegAreaStart
+ 7) & -8;
914 int RegSaveAreaFrameIndex
=
915 MFI
.CreateFixedObject(RegSaveAreaSizePlusPadding
, RegAreaStart
, true);
916 HMFI
.setRegSavedAreaStartFrameIndex(RegSaveAreaFrameIndex
);
918 // This will point to the next argument passed via stack.
919 int Offset
= RegAreaStart
+ RegSaveAreaSizePlusPadding
;
920 int FI
= MFI
.CreateFixedObject(Hexagon_PointerSize
, Offset
, true);
921 HMFI
.setVarArgsFrameIndex(FI
);
923 // This will point to the next argument passed via stack, when
924 // there is no saved register area.
925 int Offset
= HEXAGON_LRFP_SIZE
+ CCInfo
.getStackSize();
926 int FI
= MFI
.CreateFixedObject(Hexagon_PointerSize
, Offset
, true);
927 HMFI
.setRegSavedAreaStartFrameIndex(FI
);
928 HMFI
.setVarArgsFrameIndex(FI
);
933 if (IsVarArg
&& !Subtarget
.isEnvironmentMusl()) {
934 // This will point to the next argument passed via stack.
935 int Offset
= HEXAGON_LRFP_SIZE
+ CCInfo
.getStackSize();
936 int FI
= MFI
.CreateFixedObject(Hexagon_PointerSize
, Offset
, true);
937 HMFI
.setVarArgsFrameIndex(FI
);
944 HexagonTargetLowering::LowerVASTART(SDValue Op
, SelectionDAG
&DAG
) const {
945 // VASTART stores the address of the VarArgsFrameIndex slot into the
946 // memory location argument.
947 MachineFunction
&MF
= DAG
.getMachineFunction();
948 HexagonMachineFunctionInfo
*QFI
= MF
.getInfo
<HexagonMachineFunctionInfo
>();
949 SDValue Addr
= DAG
.getFrameIndex(QFI
->getVarArgsFrameIndex(), MVT::i32
);
950 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
952 if (!Subtarget
.isEnvironmentMusl()) {
953 return DAG
.getStore(Op
.getOperand(0), SDLoc(Op
), Addr
, Op
.getOperand(1),
954 MachinePointerInfo(SV
));
956 auto &FuncInfo
= *MF
.getInfo
<HexagonMachineFunctionInfo
>();
957 auto &HFL
= *Subtarget
.getFrameLowering();
959 SmallVector
<SDValue
, 8> MemOps
;
961 // Get frame index of va_list.
962 SDValue FIN
= Op
.getOperand(1);
964 // If first Vararg register is odd, add 4 bytes to start of
965 // saved register area to point to the first register location.
966 // This is because the saved register area has to be 8 byte aligned.
967 // Incase of an odd start register, there will be 4 bytes of padding in
968 // the beginning of saved register area. If all registers area used up,
969 // the following condition will handle it correctly.
970 SDValue SavedRegAreaStartFrameIndex
=
971 DAG
.getFrameIndex(FuncInfo
.getRegSavedAreaStartFrameIndex(), MVT::i32
);
973 auto PtrVT
= getPointerTy(DAG
.getDataLayout());
975 if (HFL
.FirstVarArgSavedReg
& 1)
976 SavedRegAreaStartFrameIndex
=
977 DAG
.getNode(ISD::ADD
, DL
, PtrVT
,
978 DAG
.getFrameIndex(FuncInfo
.getRegSavedAreaStartFrameIndex(),
980 DAG
.getIntPtrConstant(4, DL
));
982 // Store the saved register area start pointer.
984 DAG
.getStore(Op
.getOperand(0), DL
,
985 SavedRegAreaStartFrameIndex
,
986 FIN
, MachinePointerInfo(SV
));
987 MemOps
.push_back(Store
);
989 // Store saved register area end pointer.
990 FIN
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
,
991 FIN
, DAG
.getIntPtrConstant(4, DL
));
992 Store
= DAG
.getStore(Op
.getOperand(0), DL
,
993 DAG
.getFrameIndex(FuncInfo
.getVarArgsFrameIndex(),
995 FIN
, MachinePointerInfo(SV
, 4));
996 MemOps
.push_back(Store
);
998 // Store overflow area pointer.
999 FIN
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
,
1000 FIN
, DAG
.getIntPtrConstant(4, DL
));
1001 Store
= DAG
.getStore(Op
.getOperand(0), DL
,
1002 DAG
.getFrameIndex(FuncInfo
.getVarArgsFrameIndex(),
1004 FIN
, MachinePointerInfo(SV
, 8));
1005 MemOps
.push_back(Store
);
1007 return DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, MemOps
);
1011 HexagonTargetLowering::LowerVACOPY(SDValue Op
, SelectionDAG
&DAG
) const {
1012 // Assert that the linux ABI is enabled for the current compilation.
1013 assert(Subtarget
.isEnvironmentMusl() && "Linux ABI should be enabled");
1014 SDValue Chain
= Op
.getOperand(0);
1015 SDValue DestPtr
= Op
.getOperand(1);
1016 SDValue SrcPtr
= Op
.getOperand(2);
1017 const Value
*DestSV
= cast
<SrcValueSDNode
>(Op
.getOperand(3))->getValue();
1018 const Value
*SrcSV
= cast
<SrcValueSDNode
>(Op
.getOperand(4))->getValue();
1020 // Size of the va_list is 12 bytes as it has 3 pointers. Therefore,
1021 // we need to memcopy 12 bytes from va_list to another similar list.
1022 return DAG
.getMemcpy(Chain
, DL
, DestPtr
, SrcPtr
,
1023 DAG
.getIntPtrConstant(12, DL
), Align(4),
1024 /*isVolatile*/ false, false, false,
1025 MachinePointerInfo(DestSV
), MachinePointerInfo(SrcSV
));
1028 SDValue
HexagonTargetLowering::LowerSETCC(SDValue Op
, SelectionDAG
&DAG
) const {
1029 const SDLoc
&dl(Op
);
1030 SDValue LHS
= Op
.getOperand(0);
1031 SDValue RHS
= Op
.getOperand(1);
1032 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(2))->get();
1036 if (OpTy
== MVT::v2i16
|| OpTy
== MVT::v4i8
) {
1037 MVT ElemTy
= OpTy
.getVectorElementType();
1038 assert(ElemTy
.isScalarInteger());
1039 MVT WideTy
= MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy
.getSizeInBits()),
1040 OpTy
.getVectorNumElements());
1041 return DAG
.getSetCC(dl
, ResTy
,
1042 DAG
.getSExtOrTrunc(LHS
, SDLoc(LHS
), WideTy
),
1043 DAG
.getSExtOrTrunc(RHS
, SDLoc(RHS
), WideTy
), CC
);
1046 // Treat all other vector types as legal.
1047 if (ResTy
.isVector())
1050 // Comparisons of short integers should use sign-extend, not zero-extend,
1051 // since we can represent small negative values in the compare instructions.
1052 // The LLVM default is to use zero-extend arbitrarily in these cases.
1053 auto isSExtFree
= [this](SDValue N
) {
1054 switch (N
.getOpcode()) {
1055 case ISD::TRUNCATE
: {
1056 // A sign-extend of a truncate of a sign-extend is free.
1057 SDValue Op
= N
.getOperand(0);
1058 if (Op
.getOpcode() != ISD::AssertSext
)
1060 EVT OrigTy
= cast
<VTSDNode
>(Op
.getOperand(1))->getVT();
1061 unsigned ThisBW
= ty(N
).getSizeInBits();
1062 unsigned OrigBW
= OrigTy
.getSizeInBits();
1063 // The type that was sign-extended to get the AssertSext must be
1064 // narrower than the type of N (so that N has still the same value
1065 // as the original).
1066 return ThisBW
>= OrigBW
;
1069 // We have sign-extended loads.
1075 if (OpTy
== MVT::i8
|| OpTy
== MVT::i16
) {
1076 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(RHS
);
1077 bool IsNegative
= C
&& C
->getAPIntValue().isNegative();
1078 if (IsNegative
|| isSExtFree(LHS
) || isSExtFree(RHS
))
1079 return DAG
.getSetCC(dl
, ResTy
,
1080 DAG
.getSExtOrTrunc(LHS
, SDLoc(LHS
), MVT::i32
),
1081 DAG
.getSExtOrTrunc(RHS
, SDLoc(RHS
), MVT::i32
), CC
);
1088 HexagonTargetLowering::LowerVSELECT(SDValue Op
, SelectionDAG
&DAG
) const {
1089 SDValue PredOp
= Op
.getOperand(0);
1090 SDValue Op1
= Op
.getOperand(1), Op2
= Op
.getOperand(2);
1092 const SDLoc
&dl(Op
);
1094 if (OpTy
== MVT::v2i16
|| OpTy
== MVT::v4i8
) {
1095 MVT ElemTy
= OpTy
.getVectorElementType();
1096 assert(ElemTy
.isScalarInteger());
1097 MVT WideTy
= MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy
.getSizeInBits()),
1098 OpTy
.getVectorNumElements());
1099 // Generate (trunc (select (_, sext, sext))).
1100 return DAG
.getSExtOrTrunc(
1101 DAG
.getSelect(dl
, WideTy
, PredOp
,
1102 DAG
.getSExtOrTrunc(Op1
, dl
, WideTy
),
1103 DAG
.getSExtOrTrunc(Op2
, dl
, WideTy
)),
1111 HexagonTargetLowering::LowerConstantPool(SDValue Op
, SelectionDAG
&DAG
) const {
1112 EVT ValTy
= Op
.getValueType();
1113 ConstantPoolSDNode
*CPN
= cast
<ConstantPoolSDNode
>(Op
);
1114 Constant
*CVal
= nullptr;
1115 bool isVTi1Type
= false;
1116 if (auto *CV
= dyn_cast
<ConstantVector
>(CPN
->getConstVal())) {
1117 if (cast
<VectorType
>(CV
->getType())->getElementType()->isIntegerTy(1)) {
1118 IRBuilder
<> IRB(CV
->getContext());
1119 SmallVector
<Constant
*, 128> NewConst
;
1120 unsigned VecLen
= CV
->getNumOperands();
1121 assert(isPowerOf2_32(VecLen
) &&
1122 "conversion only supported for pow2 VectorSize");
1123 for (unsigned i
= 0; i
< VecLen
; ++i
)
1124 NewConst
.push_back(IRB
.getInt8(CV
->getOperand(i
)->isZeroValue()));
1126 CVal
= ConstantVector::get(NewConst
);
1130 Align Alignment
= CPN
->getAlign();
1131 bool IsPositionIndependent
= isPositionIndependent();
1132 unsigned char TF
= IsPositionIndependent
? HexagonII::MO_PCREL
: 0;
1134 unsigned Offset
= 0;
1136 if (CPN
->isMachineConstantPoolEntry())
1137 T
= DAG
.getTargetConstantPool(CPN
->getMachineCPVal(), ValTy
, Alignment
,
1139 else if (isVTi1Type
)
1140 T
= DAG
.getTargetConstantPool(CVal
, ValTy
, Alignment
, Offset
, TF
);
1142 T
= DAG
.getTargetConstantPool(CPN
->getConstVal(), ValTy
, Alignment
, Offset
,
1145 assert(cast
<ConstantPoolSDNode
>(T
)->getTargetFlags() == TF
&&
1146 "Inconsistent target flag encountered");
1148 if (IsPositionIndependent
)
1149 return DAG
.getNode(HexagonISD::AT_PCREL
, SDLoc(Op
), ValTy
, T
);
1150 return DAG
.getNode(HexagonISD::CP
, SDLoc(Op
), ValTy
, T
);
1154 HexagonTargetLowering::LowerJumpTable(SDValue Op
, SelectionDAG
&DAG
) const {
1155 EVT VT
= Op
.getValueType();
1156 int Idx
= cast
<JumpTableSDNode
>(Op
)->getIndex();
1157 if (isPositionIndependent()) {
1158 SDValue T
= DAG
.getTargetJumpTable(Idx
, VT
, HexagonII::MO_PCREL
);
1159 return DAG
.getNode(HexagonISD::AT_PCREL
, SDLoc(Op
), VT
, T
);
1162 SDValue T
= DAG
.getTargetJumpTable(Idx
, VT
);
1163 return DAG
.getNode(HexagonISD::JT
, SDLoc(Op
), VT
, T
);
1167 HexagonTargetLowering::LowerRETURNADDR(SDValue Op
, SelectionDAG
&DAG
) const {
1168 const HexagonRegisterInfo
&HRI
= *Subtarget
.getRegisterInfo();
1169 MachineFunction
&MF
= DAG
.getMachineFunction();
1170 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1171 MFI
.setReturnAddressIsTaken(true);
1173 if (verifyReturnAddressArgumentIsConstant(Op
, DAG
))
1176 EVT VT
= Op
.getValueType();
1178 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
1180 SDValue FrameAddr
= LowerFRAMEADDR(Op
, DAG
);
1181 SDValue Offset
= DAG
.getConstant(4, dl
, MVT::i32
);
1182 return DAG
.getLoad(VT
, dl
, DAG
.getEntryNode(),
1183 DAG
.getNode(ISD::ADD
, dl
, VT
, FrameAddr
, Offset
),
1184 MachinePointerInfo());
1187 // Return LR, which contains the return address. Mark it an implicit live-in.
1188 Register Reg
= MF
.addLiveIn(HRI
.getRARegister(), getRegClassFor(MVT::i32
));
1189 return DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
, Reg
, VT
);
1193 HexagonTargetLowering::LowerFRAMEADDR(SDValue Op
, SelectionDAG
&DAG
) const {
1194 const HexagonRegisterInfo
&HRI
= *Subtarget
.getRegisterInfo();
1195 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
1196 MFI
.setFrameAddressIsTaken(true);
1198 EVT VT
= Op
.getValueType();
1200 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
1201 SDValue FrameAddr
= DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
,
1202 HRI
.getFrameRegister(), VT
);
1204 FrameAddr
= DAG
.getLoad(VT
, dl
, DAG
.getEntryNode(), FrameAddr
,
1205 MachinePointerInfo());
1210 HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op
, SelectionDAG
& DAG
) const {
1212 return DAG
.getNode(HexagonISD::BARRIER
, dl
, MVT::Other
, Op
.getOperand(0));
1216 HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op
, SelectionDAG
&DAG
) const {
1218 auto *GAN
= cast
<GlobalAddressSDNode
>(Op
);
1219 auto PtrVT
= getPointerTy(DAG
.getDataLayout());
1220 auto *GV
= GAN
->getGlobal();
1221 int64_t Offset
= GAN
->getOffset();
1223 auto &HLOF
= *HTM
.getObjFileLowering();
1224 Reloc::Model RM
= HTM
.getRelocationModel();
1226 if (RM
== Reloc::Static
) {
1227 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, Offset
);
1228 const GlobalObject
*GO
= GV
->getAliaseeObject();
1229 if (GO
&& Subtarget
.useSmallData() && HLOF
.isGlobalInSmallSection(GO
, HTM
))
1230 return DAG
.getNode(HexagonISD::CONST32_GP
, dl
, PtrVT
, GA
);
1231 return DAG
.getNode(HexagonISD::CONST32
, dl
, PtrVT
, GA
);
1234 bool UsePCRel
= getTargetMachine().shouldAssumeDSOLocal(*GV
->getParent(), GV
);
1236 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, Offset
,
1237 HexagonII::MO_PCREL
);
1238 return DAG
.getNode(HexagonISD::AT_PCREL
, dl
, PtrVT
, GA
);
1242 SDValue GOT
= DAG
.getGLOBAL_OFFSET_TABLE(PtrVT
);
1243 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0, HexagonII::MO_GOT
);
1244 SDValue Off
= DAG
.getConstant(Offset
, dl
, MVT::i32
);
1245 return DAG
.getNode(HexagonISD::AT_GOT
, dl
, PtrVT
, GOT
, GA
, Off
);
1248 // Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
1250 HexagonTargetLowering::LowerBlockAddress(SDValue Op
, SelectionDAG
&DAG
) const {
1251 const BlockAddress
*BA
= cast
<BlockAddressSDNode
>(Op
)->getBlockAddress();
1253 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
1255 Reloc::Model RM
= HTM
.getRelocationModel();
1256 if (RM
== Reloc::Static
) {
1257 SDValue A
= DAG
.getTargetBlockAddress(BA
, PtrVT
);
1258 return DAG
.getNode(HexagonISD::CONST32_GP
, dl
, PtrVT
, A
);
1261 SDValue A
= DAG
.getTargetBlockAddress(BA
, PtrVT
, 0, HexagonII::MO_PCREL
);
1262 return DAG
.getNode(HexagonISD::AT_PCREL
, dl
, PtrVT
, A
);
1266 HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op
, SelectionDAG
&DAG
)
1268 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
1269 SDValue GOTSym
= DAG
.getTargetExternalSymbol(HEXAGON_GOT_SYM_NAME
, PtrVT
,
1270 HexagonII::MO_PCREL
);
1271 return DAG
.getNode(HexagonISD::AT_PCREL
, SDLoc(Op
), PtrVT
, GOTSym
);
1275 HexagonTargetLowering::GetDynamicTLSAddr(SelectionDAG
&DAG
, SDValue Chain
,
1276 GlobalAddressSDNode
*GA
, SDValue Glue
, EVT PtrVT
, unsigned ReturnReg
,
1277 unsigned char OperandFlags
) const {
1278 MachineFunction
&MF
= DAG
.getMachineFunction();
1279 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1280 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
1282 SDValue TGA
= DAG
.getTargetGlobalAddress(GA
->getGlobal(), dl
,
1283 GA
->getValueType(0),
1286 // Create Operands for the call.The Operands should have the following:
1288 // 2. Callee which in this case is the Global address value.
1289 // 3. Registers live into the call.In this case its R0, as we
1290 // have just one argument to be passed.
1292 // Note: The order is important.
1294 const auto &HRI
= *Subtarget
.getRegisterInfo();
1295 const uint32_t *Mask
= HRI
.getCallPreservedMask(MF
, CallingConv::C
);
1296 assert(Mask
&& "Missing call preserved mask for calling convention");
1297 SDValue Ops
[] = { Chain
, TGA
, DAG
.getRegister(Hexagon::R0
, PtrVT
),
1298 DAG
.getRegisterMask(Mask
), Glue
};
1299 Chain
= DAG
.getNode(HexagonISD::CALL
, dl
, NodeTys
, Ops
);
1301 // Inform MFI that function has calls.
1302 MFI
.setAdjustsStack(true);
1304 Glue
= Chain
.getValue(1);
1305 return DAG
.getCopyFromReg(Chain
, dl
, ReturnReg
, PtrVT
, Glue
);
1309 // Lower using the intial executable model for TLS addresses
1312 HexagonTargetLowering::LowerToTLSInitialExecModel(GlobalAddressSDNode
*GA
,
1313 SelectionDAG
&DAG
) const {
1315 int64_t Offset
= GA
->getOffset();
1316 auto PtrVT
= getPointerTy(DAG
.getDataLayout());
1318 // Get the thread pointer.
1319 SDValue TP
= DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
, Hexagon::UGP
, PtrVT
);
1321 bool IsPositionIndependent
= isPositionIndependent();
1323 IsPositionIndependent
? HexagonII::MO_IEGOT
: HexagonII::MO_IE
;
1325 // First generate the TLS symbol address
1326 SDValue TGA
= DAG
.getTargetGlobalAddress(GA
->getGlobal(), dl
, PtrVT
,
1329 SDValue Sym
= DAG
.getNode(HexagonISD::CONST32
, dl
, PtrVT
, TGA
);
1331 if (IsPositionIndependent
) {
1332 // Generate the GOT pointer in case of position independent code
1333 SDValue GOT
= LowerGLOBAL_OFFSET_TABLE(Sym
, DAG
);
1335 // Add the TLS Symbol address to GOT pointer.This gives
1336 // GOT relative relocation for the symbol.
1337 Sym
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, GOT
, Sym
);
1340 // Load the offset value for TLS symbol.This offset is relative to
1342 SDValue LoadOffset
=
1343 DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(), Sym
, MachinePointerInfo());
1345 // Address of the thread local variable is the add of thread
1346 // pointer and the offset of the variable.
1347 return DAG
.getNode(ISD::ADD
, dl
, PtrVT
, TP
, LoadOffset
);
1351 // Lower using the local executable model for TLS addresses
1354 HexagonTargetLowering::LowerToTLSLocalExecModel(GlobalAddressSDNode
*GA
,
1355 SelectionDAG
&DAG
) const {
1357 int64_t Offset
= GA
->getOffset();
1358 auto PtrVT
= getPointerTy(DAG
.getDataLayout());
1360 // Get the thread pointer.
1361 SDValue TP
= DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
, Hexagon::UGP
, PtrVT
);
1362 // Generate the TLS symbol address
1363 SDValue TGA
= DAG
.getTargetGlobalAddress(GA
->getGlobal(), dl
, PtrVT
, Offset
,
1364 HexagonII::MO_TPREL
);
1365 SDValue Sym
= DAG
.getNode(HexagonISD::CONST32
, dl
, PtrVT
, TGA
);
1367 // Address of the thread local variable is the add of thread
1368 // pointer and the offset of the variable.
1369 return DAG
.getNode(ISD::ADD
, dl
, PtrVT
, TP
, Sym
);
1373 // Lower using the general dynamic model for TLS addresses
1376 HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode
*GA
,
1377 SelectionDAG
&DAG
) const {
1379 int64_t Offset
= GA
->getOffset();
1380 auto PtrVT
= getPointerTy(DAG
.getDataLayout());
1382 // First generate the TLS symbol address
1383 SDValue TGA
= DAG
.getTargetGlobalAddress(GA
->getGlobal(), dl
, PtrVT
, Offset
,
1384 HexagonII::MO_GDGOT
);
1386 // Then, generate the GOT pointer
1387 SDValue GOT
= LowerGLOBAL_OFFSET_TABLE(TGA
, DAG
);
1389 // Add the TLS symbol and the GOT pointer
1390 SDValue Sym
= DAG
.getNode(HexagonISD::CONST32
, dl
, PtrVT
, TGA
);
1391 SDValue Chain
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, GOT
, Sym
);
1393 // Copy over the argument to R0
1395 Chain
= DAG
.getCopyToReg(DAG
.getEntryNode(), dl
, Hexagon::R0
, Chain
, InGlue
);
1396 InGlue
= Chain
.getValue(1);
1398 unsigned Flags
= DAG
.getSubtarget
<HexagonSubtarget
>().useLongCalls()
1399 ? HexagonII::MO_GDPLT
| HexagonII::HMOTF_ConstExtended
1400 : HexagonII::MO_GDPLT
;
1402 return GetDynamicTLSAddr(DAG
, Chain
, GA
, InGlue
, PtrVT
,
1403 Hexagon::R0
, Flags
);
1407 // Lower TLS addresses.
1409 // For now for dynamic models, we only support the general dynamic model.
1412 HexagonTargetLowering::LowerGlobalTLSAddress(SDValue Op
,
1413 SelectionDAG
&DAG
) const {
1414 GlobalAddressSDNode
*GA
= cast
<GlobalAddressSDNode
>(Op
);
1416 switch (HTM
.getTLSModel(GA
->getGlobal())) {
1417 case TLSModel::GeneralDynamic
:
1418 case TLSModel::LocalDynamic
:
1419 return LowerToTLSGeneralDynamicModel(GA
, DAG
);
1420 case TLSModel::InitialExec
:
1421 return LowerToTLSInitialExecModel(GA
, DAG
);
1422 case TLSModel::LocalExec
:
1423 return LowerToTLSLocalExecModel(GA
, DAG
);
1425 llvm_unreachable("Bogus TLS model");
1428 //===----------------------------------------------------------------------===//
1429 // TargetLowering Implementation
1430 //===----------------------------------------------------------------------===//
1432 HexagonTargetLowering::HexagonTargetLowering(const TargetMachine
&TM
,
1433 const HexagonSubtarget
&ST
)
1434 : TargetLowering(TM
), HTM(static_cast<const HexagonTargetMachine
&>(TM
)),
1436 auto &HRI
= *Subtarget
.getRegisterInfo();
1438 setPrefLoopAlignment(Align(16));
1439 setMinFunctionAlignment(Align(4));
1440 setPrefFunctionAlignment(Align(16));
1441 setStackPointerRegisterToSaveRestore(HRI
.getStackRegister());
1442 setBooleanContents(TargetLoweringBase::UndefinedBooleanContent
);
1443 setBooleanVectorContents(TargetLoweringBase::UndefinedBooleanContent
);
1445 setMaxAtomicSizeInBitsSupported(64);
1446 setMinCmpXchgSizeInBits(32);
1448 if (EnableHexSDNodeSched
)
1449 setSchedulingPreference(Sched::VLIW
);
1451 setSchedulingPreference(Sched::Source
);
1453 // Limits for inline expansion of memcpy/memmove
1454 MaxStoresPerMemcpy
= MaxStoresPerMemcpyCL
;
1455 MaxStoresPerMemcpyOptSize
= MaxStoresPerMemcpyOptSizeCL
;
1456 MaxStoresPerMemmove
= MaxStoresPerMemmoveCL
;
1457 MaxStoresPerMemmoveOptSize
= MaxStoresPerMemmoveOptSizeCL
;
1458 MaxStoresPerMemset
= MaxStoresPerMemsetCL
;
1459 MaxStoresPerMemsetOptSize
= MaxStoresPerMemsetOptSizeCL
;
1462 // Set up register classes.
1465 addRegisterClass(MVT::i1
, &Hexagon::PredRegsRegClass
);
1466 addRegisterClass(MVT::v2i1
, &Hexagon::PredRegsRegClass
); // bbbbaaaa
1467 addRegisterClass(MVT::v4i1
, &Hexagon::PredRegsRegClass
); // ddccbbaa
1468 addRegisterClass(MVT::v8i1
, &Hexagon::PredRegsRegClass
); // hgfedcba
1469 addRegisterClass(MVT::i32
, &Hexagon::IntRegsRegClass
);
1470 addRegisterClass(MVT::v2i16
, &Hexagon::IntRegsRegClass
);
1471 addRegisterClass(MVT::v4i8
, &Hexagon::IntRegsRegClass
);
1472 addRegisterClass(MVT::i64
, &Hexagon::DoubleRegsRegClass
);
1473 addRegisterClass(MVT::v8i8
, &Hexagon::DoubleRegsRegClass
);
1474 addRegisterClass(MVT::v4i16
, &Hexagon::DoubleRegsRegClass
);
1475 addRegisterClass(MVT::v2i32
, &Hexagon::DoubleRegsRegClass
);
1477 addRegisterClass(MVT::f32
, &Hexagon::IntRegsRegClass
);
1478 addRegisterClass(MVT::f64
, &Hexagon::DoubleRegsRegClass
);
1481 // Handling of scalar operations.
1483 // All operations default to "legal", except:
1484 // - indexed loads and stores (pre-/post-incremented),
1485 // - ANY_EXTEND_VECTOR_INREG, ATOMIC_CMP_SWAP_WITH_SUCCESS, CONCAT_VECTORS,
1486 // ConstantFP, DEBUGTRAP, FCEIL, FCOPYSIGN, FEXP, FEXP2, FFLOOR, FGETSIGN,
1487 // FLOG, FLOG2, FLOG10, FMAXNUM, FMINNUM, FNEARBYINT, FRINT, FROUND, TRAP,
1488 // FTRUNC, PREFETCH, SIGN_EXTEND_VECTOR_INREG, ZERO_EXTEND_VECTOR_INREG,
1489 // which default to "expand" for at least one type.
1492 setOperationAction(ISD::ConstantFP
, MVT::f32
, Legal
);
1493 setOperationAction(ISD::ConstantFP
, MVT::f64
, Legal
);
1494 setOperationAction(ISD::TRAP
, MVT::Other
, Legal
);
1495 setOperationAction(ISD::ConstantPool
, MVT::i32
, Custom
);
1496 setOperationAction(ISD::JumpTable
, MVT::i32
, Custom
);
1497 setOperationAction(ISD::BUILD_PAIR
, MVT::i64
, Expand
);
1498 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
1499 setOperationAction(ISD::INLINEASM
, MVT::Other
, Custom
);
1500 setOperationAction(ISD::INLINEASM_BR
, MVT::Other
, Custom
);
1501 setOperationAction(ISD::PREFETCH
, MVT::Other
, Custom
);
1502 setOperationAction(ISD::READCYCLECOUNTER
, MVT::i64
, Custom
);
1503 setOperationAction(ISD::INTRINSIC_VOID
, MVT::Other
, Custom
);
1504 setOperationAction(ISD::EH_RETURN
, MVT::Other
, Custom
);
1505 setOperationAction(ISD::GLOBAL_OFFSET_TABLE
, MVT::i32
, Custom
);
1506 setOperationAction(ISD::GlobalTLSAddress
, MVT::i32
, Custom
);
1507 setOperationAction(ISD::ATOMIC_FENCE
, MVT::Other
, Custom
);
1509 // Custom legalize GlobalAddress nodes into CONST32.
1510 setOperationAction(ISD::GlobalAddress
, MVT::i32
, Custom
);
1511 setOperationAction(ISD::GlobalAddress
, MVT::i8
, Custom
);
1512 setOperationAction(ISD::BlockAddress
, MVT::i32
, Custom
);
1514 // Hexagon needs to optimize cases with negative constants.
1515 setOperationAction(ISD::SETCC
, MVT::i8
, Custom
);
1516 setOperationAction(ISD::SETCC
, MVT::i16
, Custom
);
1517 setOperationAction(ISD::SETCC
, MVT::v4i8
, Custom
);
1518 setOperationAction(ISD::SETCC
, MVT::v2i16
, Custom
);
1520 // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
1521 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
1522 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
1523 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
1524 if (Subtarget
.isEnvironmentMusl())
1525 setOperationAction(ISD::VACOPY
, MVT::Other
, Custom
);
1527 setOperationAction(ISD::VACOPY
, MVT::Other
, Expand
);
1529 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Expand
);
1530 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Expand
);
1531 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i32
, Custom
);
1534 setMinimumJumpTableEntries(MinimumJumpTables
);
1536 setMinimumJumpTableEntries(std::numeric_limits
<unsigned>::max());
1537 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
1539 for (unsigned LegalIntOp
:
1540 {ISD::ABS
, ISD::SMIN
, ISD::SMAX
, ISD::UMIN
, ISD::UMAX
}) {
1541 setOperationAction(LegalIntOp
, MVT::i32
, Legal
);
1542 setOperationAction(LegalIntOp
, MVT::i64
, Legal
);
1545 // Hexagon has A4_addp_c and A4_subp_c that take and generate a carry bit,
1546 // but they only operate on i64.
1547 for (MVT VT
: MVT::integer_valuetypes()) {
1548 setOperationAction(ISD::UADDO
, VT
, Custom
);
1549 setOperationAction(ISD::USUBO
, VT
, Custom
);
1550 setOperationAction(ISD::SADDO
, VT
, Expand
);
1551 setOperationAction(ISD::SSUBO
, VT
, Expand
);
1552 setOperationAction(ISD::UADDO_CARRY
, VT
, Expand
);
1553 setOperationAction(ISD::USUBO_CARRY
, VT
, Expand
);
1555 setOperationAction(ISD::UADDO_CARRY
, MVT::i64
, Custom
);
1556 setOperationAction(ISD::USUBO_CARRY
, MVT::i64
, Custom
);
1558 setOperationAction(ISD::CTLZ
, MVT::i8
, Promote
);
1559 setOperationAction(ISD::CTLZ
, MVT::i16
, Promote
);
1560 setOperationAction(ISD::CTTZ
, MVT::i8
, Promote
);
1561 setOperationAction(ISD::CTTZ
, MVT::i16
, Promote
);
1563 // Popcount can count # of 1s in i64 but returns i32.
1564 setOperationAction(ISD::CTPOP
, MVT::i8
, Promote
);
1565 setOperationAction(ISD::CTPOP
, MVT::i16
, Promote
);
1566 setOperationAction(ISD::CTPOP
, MVT::i32
, Promote
);
1567 setOperationAction(ISD::CTPOP
, MVT::i64
, Legal
);
1569 setOperationAction(ISD::BITREVERSE
, MVT::i32
, Legal
);
1570 setOperationAction(ISD::BITREVERSE
, MVT::i64
, Legal
);
1571 setOperationAction(ISD::BSWAP
, MVT::i32
, Legal
);
1572 setOperationAction(ISD::BSWAP
, MVT::i64
, Legal
);
1574 setOperationAction(ISD::FSHL
, MVT::i32
, Legal
);
1575 setOperationAction(ISD::FSHL
, MVT::i64
, Legal
);
1576 setOperationAction(ISD::FSHR
, MVT::i32
, Legal
);
1577 setOperationAction(ISD::FSHR
, MVT::i64
, Legal
);
1579 for (unsigned IntExpOp
:
1580 {ISD::SDIV
, ISD::UDIV
, ISD::SREM
, ISD::UREM
,
1581 ISD::SDIVREM
, ISD::UDIVREM
, ISD::ROTL
, ISD::ROTR
,
1582 ISD::SHL_PARTS
, ISD::SRA_PARTS
, ISD::SRL_PARTS
,
1583 ISD::SMUL_LOHI
, ISD::UMUL_LOHI
}) {
1584 for (MVT VT
: MVT::integer_valuetypes())
1585 setOperationAction(IntExpOp
, VT
, Expand
);
1588 for (unsigned FPExpOp
:
1589 {ISD::FDIV
, ISD::FREM
, ISD::FSQRT
, ISD::FSIN
, ISD::FCOS
, ISD::FSINCOS
,
1590 ISD::FPOW
, ISD::FCOPYSIGN
}) {
1591 for (MVT VT
: MVT::fp_valuetypes())
1592 setOperationAction(FPExpOp
, VT
, Expand
);
1595 // No extending loads from i32.
1596 for (MVT VT
: MVT::integer_valuetypes()) {
1597 setLoadExtAction(ISD::ZEXTLOAD
, VT
, MVT::i32
, Expand
);
1598 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i32
, Expand
);
1599 setLoadExtAction(ISD::EXTLOAD
, VT
, MVT::i32
, Expand
);
1601 // Turn FP truncstore into trunc + store.
1602 setTruncStoreAction(MVT::f64
, MVT::f32
, Expand
);
1603 // Turn FP extload into load/fpextend.
1604 for (MVT VT
: MVT::fp_valuetypes())
1605 setLoadExtAction(ISD::EXTLOAD
, VT
, MVT::f32
, Expand
);
1607 // Expand BR_CC and SELECT_CC for all integer and fp types.
1608 for (MVT VT
: MVT::integer_valuetypes()) {
1609 setOperationAction(ISD::BR_CC
, VT
, Expand
);
1610 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
1612 for (MVT VT
: MVT::fp_valuetypes()) {
1613 setOperationAction(ISD::BR_CC
, VT
, Expand
);
1614 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
1616 setOperationAction(ISD::BR_CC
, MVT::Other
, Expand
);
1619 // Handling of vector operations.
1622 // Set the action for vector operations to "expand", then override it with
1623 // either "custom" or "legal" for specific cases.
1624 static const unsigned VectExpOps
[] = {
1625 // Integer arithmetic:
1626 ISD::ADD
, ISD::SUB
, ISD::MUL
, ISD::SDIV
, ISD::UDIV
,
1627 ISD::SREM
, ISD::UREM
, ISD::SDIVREM
, ISD::UDIVREM
, ISD::SADDO
,
1628 ISD::UADDO
, ISD::SSUBO
, ISD::USUBO
, ISD::SMUL_LOHI
, ISD::UMUL_LOHI
,
1630 ISD::AND
, ISD::OR
, ISD::XOR
, ISD::ROTL
, ISD::ROTR
,
1631 ISD::CTPOP
, ISD::CTLZ
, ISD::CTTZ
, ISD::BSWAP
, ISD::BITREVERSE
,
1632 // Floating point arithmetic/math functions:
1633 ISD::FADD
, ISD::FSUB
, ISD::FMUL
, ISD::FMA
, ISD::FDIV
,
1634 ISD::FREM
, ISD::FNEG
, ISD::FABS
, ISD::FSQRT
, ISD::FSIN
,
1635 ISD::FCOS
, ISD::FPOW
, ISD::FLOG
, ISD::FLOG2
,
1636 ISD::FLOG10
, ISD::FEXP
, ISD::FEXP2
, ISD::FCEIL
, ISD::FTRUNC
,
1637 ISD::FRINT
, ISD::FNEARBYINT
, ISD::FROUND
, ISD::FFLOOR
,
1638 ISD::FMINNUM
, ISD::FMAXNUM
, ISD::FSINCOS
, ISD::FLDEXP
,
1640 ISD::BR_CC
, ISD::SELECT_CC
, ISD::ConstantPool
,
1642 ISD::BUILD_VECTOR
, ISD::SCALAR_TO_VECTOR
,
1643 ISD::EXTRACT_VECTOR_ELT
, ISD::INSERT_VECTOR_ELT
,
1644 ISD::EXTRACT_SUBVECTOR
, ISD::INSERT_SUBVECTOR
,
1645 ISD::CONCAT_VECTORS
, ISD::VECTOR_SHUFFLE
,
1649 for (MVT VT
: MVT::fixedlen_vector_valuetypes()) {
1650 for (unsigned VectExpOp
: VectExpOps
)
1651 setOperationAction(VectExpOp
, VT
, Expand
);
1653 // Expand all extending loads and truncating stores:
1654 for (MVT TargetVT
: MVT::fixedlen_vector_valuetypes()) {
1657 setLoadExtAction(ISD::EXTLOAD
, TargetVT
, VT
, Expand
);
1658 setLoadExtAction(ISD::ZEXTLOAD
, TargetVT
, VT
, Expand
);
1659 setLoadExtAction(ISD::SEXTLOAD
, TargetVT
, VT
, Expand
);
1660 setTruncStoreAction(VT
, TargetVT
, Expand
);
1663 // Normalize all inputs to SELECT to be vectors of i32.
1664 if (VT
.getVectorElementType() != MVT::i32
) {
1665 MVT VT32
= MVT::getVectorVT(MVT::i32
, VT
.getSizeInBits()/32);
1666 setOperationAction(ISD::SELECT
, VT
, Promote
);
1667 AddPromotedToType(ISD::SELECT
, VT
, VT32
);
1669 setOperationAction(ISD::SRA
, VT
, Custom
);
1670 setOperationAction(ISD::SHL
, VT
, Custom
);
1671 setOperationAction(ISD::SRL
, VT
, Custom
);
1674 // Extending loads from (native) vectors of i8 into (native) vectors of i16
1676 setLoadExtAction(ISD::EXTLOAD
, MVT::v2i16
, MVT::v2i8
, Legal
);
1677 setLoadExtAction(ISD::ZEXTLOAD
, MVT::v2i16
, MVT::v2i8
, Legal
);
1678 setLoadExtAction(ISD::SEXTLOAD
, MVT::v2i16
, MVT::v2i8
, Legal
);
1679 setLoadExtAction(ISD::EXTLOAD
, MVT::v4i16
, MVT::v4i8
, Legal
);
1680 setLoadExtAction(ISD::ZEXTLOAD
, MVT::v4i16
, MVT::v4i8
, Legal
);
1681 setLoadExtAction(ISD::SEXTLOAD
, MVT::v4i16
, MVT::v4i8
, Legal
);
1683 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v2i8
, Legal
);
1684 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v2i16
, Legal
);
1685 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v2i32
, Legal
);
1687 // Types natively supported:
1688 for (MVT NativeVT
: {MVT::v8i1
, MVT::v4i1
, MVT::v2i1
, MVT::v4i8
,
1689 MVT::v8i8
, MVT::v2i16
, MVT::v4i16
, MVT::v2i32
}) {
1690 setOperationAction(ISD::BUILD_VECTOR
, NativeVT
, Custom
);
1691 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, NativeVT
, Custom
);
1692 setOperationAction(ISD::INSERT_VECTOR_ELT
, NativeVT
, Custom
);
1693 setOperationAction(ISD::EXTRACT_SUBVECTOR
, NativeVT
, Custom
);
1694 setOperationAction(ISD::INSERT_SUBVECTOR
, NativeVT
, Custom
);
1695 setOperationAction(ISD::CONCAT_VECTORS
, NativeVT
, Custom
);
1697 setOperationAction(ISD::ADD
, NativeVT
, Legal
);
1698 setOperationAction(ISD::SUB
, NativeVT
, Legal
);
1699 setOperationAction(ISD::MUL
, NativeVT
, Legal
);
1700 setOperationAction(ISD::AND
, NativeVT
, Legal
);
1701 setOperationAction(ISD::OR
, NativeVT
, Legal
);
1702 setOperationAction(ISD::XOR
, NativeVT
, Legal
);
1704 if (NativeVT
.getVectorElementType() != MVT::i1
) {
1705 setOperationAction(ISD::SPLAT_VECTOR
, NativeVT
, Legal
);
1706 setOperationAction(ISD::BSWAP
, NativeVT
, Legal
);
1707 setOperationAction(ISD::BITREVERSE
, NativeVT
, Legal
);
1711 for (MVT VT
: {MVT::v8i8
, MVT::v4i16
, MVT::v2i32
}) {
1712 setOperationAction(ISD::SMIN
, VT
, Legal
);
1713 setOperationAction(ISD::SMAX
, VT
, Legal
);
1714 setOperationAction(ISD::UMIN
, VT
, Legal
);
1715 setOperationAction(ISD::UMAX
, VT
, Legal
);
1718 // Custom lower unaligned loads.
1719 // Also, for both loads and stores, verify the alignment of the address
1720 // in case it is a compile-time constant. This is a usability feature to
1721 // provide a meaningful error message to users.
1722 for (MVT VT
: {MVT::i16
, MVT::i32
, MVT::v4i8
, MVT::i64
, MVT::v8i8
,
1723 MVT::v2i16
, MVT::v4i16
, MVT::v2i32
}) {
1724 setOperationAction(ISD::LOAD
, VT
, Custom
);
1725 setOperationAction(ISD::STORE
, VT
, Custom
);
1728 // Custom-lower load/stores of boolean vectors.
1729 for (MVT VT
: {MVT::v2i1
, MVT::v4i1
, MVT::v8i1
}) {
1730 setOperationAction(ISD::LOAD
, VT
, Custom
);
1731 setOperationAction(ISD::STORE
, VT
, Custom
);
1734 // Normalize integer compares to EQ/GT/UGT
1735 for (MVT VT
: {MVT::v2i16
, MVT::v4i8
, MVT::v8i8
, MVT::v2i32
, MVT::v4i16
,
1737 setCondCodeAction(ISD::SETNE
, VT
, Expand
);
1738 setCondCodeAction(ISD::SETLE
, VT
, Expand
);
1739 setCondCodeAction(ISD::SETGE
, VT
, Expand
);
1740 setCondCodeAction(ISD::SETLT
, VT
, Expand
);
1741 setCondCodeAction(ISD::SETULE
, VT
, Expand
);
1742 setCondCodeAction(ISD::SETUGE
, VT
, Expand
);
1743 setCondCodeAction(ISD::SETULT
, VT
, Expand
);
1746 // Normalize boolean compares to [U]LE/[U]LT
1747 for (MVT VT
: {MVT::i1
, MVT::v2i1
, MVT::v4i1
, MVT::v8i1
}) {
1748 setCondCodeAction(ISD::SETGE
, VT
, Expand
);
1749 setCondCodeAction(ISD::SETGT
, VT
, Expand
);
1750 setCondCodeAction(ISD::SETUGE
, VT
, Expand
);
1751 setCondCodeAction(ISD::SETUGT
, VT
, Expand
);
1754 // Custom-lower bitcasts from i8 to v8i1.
1755 setOperationAction(ISD::BITCAST
, MVT::i8
, Custom
);
1756 setOperationAction(ISD::SETCC
, MVT::v2i16
, Custom
);
1757 setOperationAction(ISD::VSELECT
, MVT::v4i8
, Custom
);
1758 setOperationAction(ISD::VSELECT
, MVT::v2i16
, Custom
);
1759 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v4i8
, Custom
);
1760 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v4i16
, Custom
);
1761 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v8i8
, Custom
);
1764 setOperationAction(ISD::FMA
, MVT::f64
, Expand
);
1765 setOperationAction(ISD::FADD
, MVT::f64
, Expand
);
1766 setOperationAction(ISD::FSUB
, MVT::f64
, Expand
);
1767 setOperationAction(ISD::FMUL
, MVT::f64
, Expand
);
1769 setOperationAction(ISD::FMINNUM
, MVT::f32
, Legal
);
1770 setOperationAction(ISD::FMAXNUM
, MVT::f32
, Legal
);
1772 setOperationAction(ISD::FP_TO_UINT
, MVT::i1
, Promote
);
1773 setOperationAction(ISD::FP_TO_UINT
, MVT::i8
, Promote
);
1774 setOperationAction(ISD::FP_TO_UINT
, MVT::i16
, Promote
);
1775 setOperationAction(ISD::FP_TO_SINT
, MVT::i1
, Promote
);
1776 setOperationAction(ISD::FP_TO_SINT
, MVT::i8
, Promote
);
1777 setOperationAction(ISD::FP_TO_SINT
, MVT::i16
, Promote
);
1778 setOperationAction(ISD::UINT_TO_FP
, MVT::i1
, Promote
);
1779 setOperationAction(ISD::UINT_TO_FP
, MVT::i8
, Promote
);
1780 setOperationAction(ISD::UINT_TO_FP
, MVT::i16
, Promote
);
1781 setOperationAction(ISD::SINT_TO_FP
, MVT::i1
, Promote
);
1782 setOperationAction(ISD::SINT_TO_FP
, MVT::i8
, Promote
);
1783 setOperationAction(ISD::SINT_TO_FP
, MVT::i16
, Promote
);
1785 // Special handling for half-precision floating point conversions.
1786 // Lower half float conversions into library calls.
1787 setOperationAction(ISD::FP16_TO_FP
, MVT::f32
, Expand
);
1788 setOperationAction(ISD::FP16_TO_FP
, MVT::f64
, Expand
);
1789 setOperationAction(ISD::FP_TO_FP16
, MVT::f32
, Expand
);
1790 setOperationAction(ISD::FP_TO_FP16
, MVT::f64
, Expand
);
1792 setLoadExtAction(ISD::EXTLOAD
, MVT::f32
, MVT::f16
, Expand
);
1793 setLoadExtAction(ISD::EXTLOAD
, MVT::f64
, MVT::f16
, Expand
);
1794 setTruncStoreAction(MVT::f32
, MVT::f16
, Expand
);
1795 setTruncStoreAction(MVT::f64
, MVT::f16
, Expand
);
1797 // Handling of indexed loads/stores: default is "expand".
1799 for (MVT VT
: {MVT::i8
, MVT::i16
, MVT::i32
, MVT::i64
, MVT::f32
, MVT::f64
,
1800 MVT::v2i16
, MVT::v2i32
, MVT::v4i8
, MVT::v4i16
, MVT::v8i8
}) {
1801 setIndexedLoadAction(ISD::POST_INC
, VT
, Legal
);
1802 setIndexedStoreAction(ISD::POST_INC
, VT
, Legal
);
1805 // Subtarget-specific operation actions.
1807 if (Subtarget
.hasV60Ops()) {
1808 setOperationAction(ISD::ROTL
, MVT::i32
, Legal
);
1809 setOperationAction(ISD::ROTL
, MVT::i64
, Legal
);
1810 setOperationAction(ISD::ROTR
, MVT::i32
, Legal
);
1811 setOperationAction(ISD::ROTR
, MVT::i64
, Legal
);
1813 if (Subtarget
.hasV66Ops()) {
1814 setOperationAction(ISD::FADD
, MVT::f64
, Legal
);
1815 setOperationAction(ISD::FSUB
, MVT::f64
, Legal
);
1817 if (Subtarget
.hasV67Ops()) {
1818 setOperationAction(ISD::FMINNUM
, MVT::f64
, Legal
);
1819 setOperationAction(ISD::FMAXNUM
, MVT::f64
, Legal
);
1820 setOperationAction(ISD::FMUL
, MVT::f64
, Legal
);
1823 setTargetDAGCombine(ISD::OR
);
1824 setTargetDAGCombine(ISD::TRUNCATE
);
1825 setTargetDAGCombine(ISD::VSELECT
);
1827 if (Subtarget
.useHVXOps())
1828 initializeHVXLowering();
1830 computeRegisterProperties(&HRI
);
1833 // Library calls for unsupported operations
1835 bool FastMath
= EnableFastMath
;
1837 setLibcallName(RTLIB::SDIV_I32
, "__hexagon_divsi3");
1838 setLibcallName(RTLIB::SDIV_I64
, "__hexagon_divdi3");
1839 setLibcallName(RTLIB::UDIV_I32
, "__hexagon_udivsi3");
1840 setLibcallName(RTLIB::UDIV_I64
, "__hexagon_udivdi3");
1841 setLibcallName(RTLIB::SREM_I32
, "__hexagon_modsi3");
1842 setLibcallName(RTLIB::SREM_I64
, "__hexagon_moddi3");
1843 setLibcallName(RTLIB::UREM_I32
, "__hexagon_umodsi3");
1844 setLibcallName(RTLIB::UREM_I64
, "__hexagon_umoddi3");
1846 setLibcallName(RTLIB::SINTTOFP_I128_F64
, "__hexagon_floattidf");
1847 setLibcallName(RTLIB::SINTTOFP_I128_F32
, "__hexagon_floattisf");
1848 setLibcallName(RTLIB::FPTOUINT_F32_I128
, "__hexagon_fixunssfti");
1849 setLibcallName(RTLIB::FPTOUINT_F64_I128
, "__hexagon_fixunsdfti");
1850 setLibcallName(RTLIB::FPTOSINT_F32_I128
, "__hexagon_fixsfti");
1851 setLibcallName(RTLIB::FPTOSINT_F64_I128
, "__hexagon_fixdfti");
1853 // This is the only fast library function for sqrtd.
1855 setLibcallName(RTLIB::SQRT_F64
, "__hexagon_fast2_sqrtdf2");
1857 // Prefix is: nothing for "slow-math",
1858 // "fast2_" for V5+ fast-math double-precision
1859 // (actually, keep fast-math and fast-math2 separate for now)
1861 setLibcallName(RTLIB::ADD_F64
, "__hexagon_fast_adddf3");
1862 setLibcallName(RTLIB::SUB_F64
, "__hexagon_fast_subdf3");
1863 setLibcallName(RTLIB::MUL_F64
, "__hexagon_fast_muldf3");
1864 setLibcallName(RTLIB::DIV_F64
, "__hexagon_fast_divdf3");
1865 setLibcallName(RTLIB::DIV_F32
, "__hexagon_fast_divsf3");
1867 setLibcallName(RTLIB::ADD_F64
, "__hexagon_adddf3");
1868 setLibcallName(RTLIB::SUB_F64
, "__hexagon_subdf3");
1869 setLibcallName(RTLIB::MUL_F64
, "__hexagon_muldf3");
1870 setLibcallName(RTLIB::DIV_F64
, "__hexagon_divdf3");
1871 setLibcallName(RTLIB::DIV_F32
, "__hexagon_divsf3");
1875 setLibcallName(RTLIB::SQRT_F32
, "__hexagon_fast2_sqrtf");
1877 setLibcallName(RTLIB::SQRT_F32
, "__hexagon_sqrtf");
1879 // Routines to handle fp16 storage type.
1880 setLibcallName(RTLIB::FPROUND_F32_F16
, "__truncsfhf2");
1881 setLibcallName(RTLIB::FPROUND_F64_F16
, "__truncdfhf2");
1882 setLibcallName(RTLIB::FPEXT_F16_F32
, "__extendhfsf2");
1884 // These cause problems when the shift amount is non-constant.
1885 setLibcallName(RTLIB::SHL_I128
, nullptr);
1886 setLibcallName(RTLIB::SRL_I128
, nullptr);
1887 setLibcallName(RTLIB::SRA_I128
, nullptr);
1890 const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode
) const {
1891 switch ((HexagonISD::NodeType
)Opcode
) {
1892 case HexagonISD::ADDC
: return "HexagonISD::ADDC";
1893 case HexagonISD::SUBC
: return "HexagonISD::SUBC";
1894 case HexagonISD::ALLOCA
: return "HexagonISD::ALLOCA";
1895 case HexagonISD::AT_GOT
: return "HexagonISD::AT_GOT";
1896 case HexagonISD::AT_PCREL
: return "HexagonISD::AT_PCREL";
1897 case HexagonISD::BARRIER
: return "HexagonISD::BARRIER";
1898 case HexagonISD::CALL
: return "HexagonISD::CALL";
1899 case HexagonISD::CALLnr
: return "HexagonISD::CALLnr";
1900 case HexagonISD::CALLR
: return "HexagonISD::CALLR";
1901 case HexagonISD::COMBINE
: return "HexagonISD::COMBINE";
1902 case HexagonISD::CONST32_GP
: return "HexagonISD::CONST32_GP";
1903 case HexagonISD::CONST32
: return "HexagonISD::CONST32";
1904 case HexagonISD::CP
: return "HexagonISD::CP";
1905 case HexagonISD::DCFETCH
: return "HexagonISD::DCFETCH";
1906 case HexagonISD::EH_RETURN
: return "HexagonISD::EH_RETURN";
1907 case HexagonISD::TSTBIT
: return "HexagonISD::TSTBIT";
1908 case HexagonISD::EXTRACTU
: return "HexagonISD::EXTRACTU";
1909 case HexagonISD::INSERT
: return "HexagonISD::INSERT";
1910 case HexagonISD::JT
: return "HexagonISD::JT";
1911 case HexagonISD::RET_GLUE
: return "HexagonISD::RET_GLUE";
1912 case HexagonISD::TC_RETURN
: return "HexagonISD::TC_RETURN";
1913 case HexagonISD::VASL
: return "HexagonISD::VASL";
1914 case HexagonISD::VASR
: return "HexagonISD::VASR";
1915 case HexagonISD::VLSR
: return "HexagonISD::VLSR";
1916 case HexagonISD::MFSHL
: return "HexagonISD::MFSHL";
1917 case HexagonISD::MFSHR
: return "HexagonISD::MFSHR";
1918 case HexagonISD::SSAT
: return "HexagonISD::SSAT";
1919 case HexagonISD::USAT
: return "HexagonISD::USAT";
1920 case HexagonISD::SMUL_LOHI
: return "HexagonISD::SMUL_LOHI";
1921 case HexagonISD::UMUL_LOHI
: return "HexagonISD::UMUL_LOHI";
1922 case HexagonISD::USMUL_LOHI
: return "HexagonISD::USMUL_LOHI";
1923 case HexagonISD::VEXTRACTW
: return "HexagonISD::VEXTRACTW";
1924 case HexagonISD::VINSERTW0
: return "HexagonISD::VINSERTW0";
1925 case HexagonISD::VROR
: return "HexagonISD::VROR";
1926 case HexagonISD::READCYCLE
: return "HexagonISD::READCYCLE";
1927 case HexagonISD::PTRUE
: return "HexagonISD::PTRUE";
1928 case HexagonISD::PFALSE
: return "HexagonISD::PFALSE";
1929 case HexagonISD::D2P
: return "HexagonISD::D2P";
1930 case HexagonISD::P2D
: return "HexagonISD::P2D";
1931 case HexagonISD::V2Q
: return "HexagonISD::V2Q";
1932 case HexagonISD::Q2V
: return "HexagonISD::Q2V";
1933 case HexagonISD::QCAT
: return "HexagonISD::QCAT";
1934 case HexagonISD::QTRUE
: return "HexagonISD::QTRUE";
1935 case HexagonISD::QFALSE
: return "HexagonISD::QFALSE";
1936 case HexagonISD::TL_EXTEND
: return "HexagonISD::TL_EXTEND";
1937 case HexagonISD::TL_TRUNCATE
: return "HexagonISD::TL_TRUNCATE";
1938 case HexagonISD::TYPECAST
: return "HexagonISD::TYPECAST";
1939 case HexagonISD::VALIGN
: return "HexagonISD::VALIGN";
1940 case HexagonISD::VALIGNADDR
: return "HexagonISD::VALIGNADDR";
1941 case HexagonISD::ISEL
: return "HexagonISD::ISEL";
1942 case HexagonISD::OP_END
: break;
1948 HexagonTargetLowering::validateConstPtrAlignment(SDValue Ptr
, Align NeedAlign
,
1949 const SDLoc
&dl
, SelectionDAG
&DAG
) const {
1950 auto *CA
= dyn_cast
<ConstantSDNode
>(Ptr
);
1953 unsigned Addr
= CA
->getZExtValue();
1955 Addr
!= 0 ? Align(1ull << llvm::countr_zero(Addr
)) : NeedAlign
;
1956 if (HaveAlign
>= NeedAlign
)
1959 static int DK_MisalignedTrap
= llvm::getNextAvailablePluginDiagnosticKind();
1961 struct DiagnosticInfoMisalignedTrap
: public DiagnosticInfo
{
1962 DiagnosticInfoMisalignedTrap(StringRef M
)
1963 : DiagnosticInfo(DK_MisalignedTrap
, DS_Remark
), Msg(M
) {}
1964 void print(DiagnosticPrinter
&DP
) const override
{
1967 static bool classof(const DiagnosticInfo
*DI
) {
1968 return DI
->getKind() == DK_MisalignedTrap
;
1974 raw_string_ostream
O(ErrMsg
);
1975 O
<< "Misaligned constant address: " << format_hex(Addr
, 10)
1976 << " has alignment " << HaveAlign
.value()
1977 << ", but the memory access requires " << NeedAlign
.value();
1978 if (DebugLoc DL
= dl
.getDebugLoc())
1979 DL
.print(O
<< ", at ");
1980 O
<< ". The instruction has been replaced with a trap.";
1982 DAG
.getContext()->diagnose(DiagnosticInfoMisalignedTrap(O
.str()));
1987 HexagonTargetLowering::replaceMemWithUndef(SDValue Op
, SelectionDAG
&DAG
)
1989 const SDLoc
&dl(Op
);
1990 auto *LS
= cast
<LSBaseSDNode
>(Op
.getNode());
1991 assert(!LS
->isIndexed() && "Not expecting indexed ops on constant address");
1993 SDValue Chain
= LS
->getChain();
1994 SDValue Trap
= DAG
.getNode(ISD::TRAP
, dl
, MVT::Other
, Chain
);
1995 if (LS
->getOpcode() == ISD::LOAD
)
1996 return DAG
.getMergeValues({DAG
.getUNDEF(ty(Op
)), Trap
}, dl
);
2000 // Bit-reverse Load Intrinsic: Check if the instruction is a bit reverse load
2002 static bool isBrevLdIntrinsic(const Value
*Inst
) {
2003 unsigned ID
= cast
<IntrinsicInst
>(Inst
)->getIntrinsicID();
2004 return (ID
== Intrinsic::hexagon_L2_loadrd_pbr
||
2005 ID
== Intrinsic::hexagon_L2_loadri_pbr
||
2006 ID
== Intrinsic::hexagon_L2_loadrh_pbr
||
2007 ID
== Intrinsic::hexagon_L2_loadruh_pbr
||
2008 ID
== Intrinsic::hexagon_L2_loadrb_pbr
||
2009 ID
== Intrinsic::hexagon_L2_loadrub_pbr
);
2012 // Bit-reverse Load Intrinsic :Crawl up and figure out the object from previous
2013 // instruction. So far we only handle bitcast, extract value and bit reverse
2014 // load intrinsic instructions. Should we handle CGEP ?
2015 static Value
*getBrevLdObject(Value
*V
) {
2016 if (Operator::getOpcode(V
) == Instruction::ExtractValue
||
2017 Operator::getOpcode(V
) == Instruction::BitCast
)
2018 V
= cast
<Operator
>(V
)->getOperand(0);
2019 else if (isa
<IntrinsicInst
>(V
) && isBrevLdIntrinsic(V
))
2020 V
= cast
<Instruction
>(V
)->getOperand(0);
2024 // Bit-reverse Load Intrinsic: For a PHI Node return either an incoming edge or
2025 // a back edge. If the back edge comes from the intrinsic itself, the incoming
2026 // edge is returned.
2027 static Value
*returnEdge(const PHINode
*PN
, Value
*IntrBaseVal
) {
2028 const BasicBlock
*Parent
= PN
->getParent();
2030 for (unsigned i
= 0, e
= PN
->getNumIncomingValues(); i
< e
; ++i
) {
2031 BasicBlock
*Blk
= PN
->getIncomingBlock(i
);
2032 // Determine if the back edge is originated from intrinsic.
2033 if (Blk
== Parent
) {
2034 Value
*BackEdgeVal
= PN
->getIncomingValue(i
);
2036 // Loop over till we return the same Value or we hit the IntrBaseVal.
2038 BaseVal
= BackEdgeVal
;
2039 BackEdgeVal
= getBrevLdObject(BackEdgeVal
);
2040 } while ((BaseVal
!= BackEdgeVal
) && (IntrBaseVal
!= BackEdgeVal
));
2041 // If the getBrevLdObject returns IntrBaseVal, we should return the
2043 if (IntrBaseVal
== BackEdgeVal
)
2047 } else // Set the node to incoming edge.
2050 assert(Idx
>= 0 && "Unexpected index to incoming argument in PHI");
2051 return PN
->getIncomingValue(Idx
);
2054 // Bit-reverse Load Intrinsic: Figure out the underlying object the base
2055 // pointer points to, for the bit-reverse load intrinsic. Setting this to
2056 // memoperand might help alias analysis to figure out the dependencies.
2057 static Value
*getUnderLyingObjectForBrevLdIntr(Value
*V
) {
2058 Value
*IntrBaseVal
= V
;
2060 // Loop over till we return the same Value, implies we either figure out
2061 // the object or we hit a PHI
2064 V
= getBrevLdObject(V
);
2065 } while (BaseVal
!= V
);
2067 // Identify the object from PHINode.
2068 if (const PHINode
*PN
= dyn_cast
<PHINode
>(V
))
2069 return returnEdge(PN
, IntrBaseVal
);
2070 // For non PHI nodes, the object is the last value returned by getBrevLdObject
2075 /// Given an intrinsic, checks if on the target the intrinsic will need to map
2076 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
2077 /// true and store the intrinsic information into the IntrinsicInfo that was
2078 /// passed to the function.
2079 bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo
&Info
,
2081 MachineFunction
&MF
,
2082 unsigned Intrinsic
) const {
2083 switch (Intrinsic
) {
2084 case Intrinsic::hexagon_L2_loadrd_pbr
:
2085 case Intrinsic::hexagon_L2_loadri_pbr
:
2086 case Intrinsic::hexagon_L2_loadrh_pbr
:
2087 case Intrinsic::hexagon_L2_loadruh_pbr
:
2088 case Intrinsic::hexagon_L2_loadrb_pbr
:
2089 case Intrinsic::hexagon_L2_loadrub_pbr
: {
2090 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
2091 auto &DL
= I
.getCalledFunction()->getParent()->getDataLayout();
2092 auto &Cont
= I
.getCalledFunction()->getParent()->getContext();
2093 // The intrinsic function call is of the form { ElTy, i8* }
2094 // @llvm.hexagon.L2.loadXX.pbr(i8*, i32). The pointer and memory access type
2095 // should be derived from ElTy.
2096 Type
*ElTy
= I
.getCalledFunction()->getReturnType()->getStructElementType(0);
2097 Info
.memVT
= MVT::getVT(ElTy
);
2098 llvm::Value
*BasePtrVal
= I
.getOperand(0);
2099 Info
.ptrVal
= getUnderLyingObjectForBrevLdIntr(BasePtrVal
);
2100 // The offset value comes through Modifier register. For now, assume the
2103 Info
.align
= DL
.getABITypeAlign(Info
.memVT
.getTypeForEVT(Cont
));
2104 Info
.flags
= MachineMemOperand::MOLoad
;
2107 case Intrinsic::hexagon_V6_vgathermw
:
2108 case Intrinsic::hexagon_V6_vgathermw_128B
:
2109 case Intrinsic::hexagon_V6_vgathermh
:
2110 case Intrinsic::hexagon_V6_vgathermh_128B
:
2111 case Intrinsic::hexagon_V6_vgathermhw
:
2112 case Intrinsic::hexagon_V6_vgathermhw_128B
:
2113 case Intrinsic::hexagon_V6_vgathermwq
:
2114 case Intrinsic::hexagon_V6_vgathermwq_128B
:
2115 case Intrinsic::hexagon_V6_vgathermhq
:
2116 case Intrinsic::hexagon_V6_vgathermhq_128B
:
2117 case Intrinsic::hexagon_V6_vgathermhwq
:
2118 case Intrinsic::hexagon_V6_vgathermhwq_128B
: {
2119 const Module
&M
= *I
.getParent()->getParent()->getParent();
2120 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
2121 Type
*VecTy
= I
.getArgOperand(1)->getType();
2122 Info
.memVT
= MVT::getVT(VecTy
);
2123 Info
.ptrVal
= I
.getArgOperand(0);
2126 MaybeAlign(M
.getDataLayout().getTypeAllocSizeInBits(VecTy
) / 8);
2127 Info
.flags
= MachineMemOperand::MOLoad
|
2128 MachineMemOperand::MOStore
|
2129 MachineMemOperand::MOVolatile
;
2138 bool HexagonTargetLowering::hasBitTest(SDValue X
, SDValue Y
) const {
2139 return X
.getValueType().isScalarInteger(); // 'tstbit'
2142 bool HexagonTargetLowering::isTruncateFree(Type
*Ty1
, Type
*Ty2
) const {
2143 return isTruncateFree(EVT::getEVT(Ty1
), EVT::getEVT(Ty2
));
2146 bool HexagonTargetLowering::isTruncateFree(EVT VT1
, EVT VT2
) const {
2147 if (!VT1
.isSimple() || !VT2
.isSimple())
2149 return VT1
.getSimpleVT() == MVT::i64
&& VT2
.getSimpleVT() == MVT::i32
;
2152 bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(
2153 const MachineFunction
&MF
, EVT VT
) const {
2154 return isOperationLegalOrCustom(ISD::FMA
, VT
);
2157 // Should we expand the build vector with shuffles?
2158 bool HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT
,
2159 unsigned DefinedValues
) const {
2163 bool HexagonTargetLowering::isExtractSubvectorCheap(EVT ResVT
, EVT SrcVT
,
2164 unsigned Index
) const {
2165 assert(ResVT
.getVectorElementType() == SrcVT
.getVectorElementType());
2166 if (!ResVT
.isSimple() || !SrcVT
.isSimple())
2169 MVT ResTy
= ResVT
.getSimpleVT(), SrcTy
= SrcVT
.getSimpleVT();
2170 if (ResTy
.getVectorElementType() != MVT::i1
)
2173 // Non-HVX bool vectors are relatively cheap.
2174 return SrcTy
.getVectorNumElements() <= 8;
2177 bool HexagonTargetLowering::isTargetCanonicalConstantNode(SDValue Op
) const {
2178 return Op
.getOpcode() == ISD::CONCAT_VECTORS
||
2179 TargetLowering::isTargetCanonicalConstantNode(Op
);
2182 bool HexagonTargetLowering::isShuffleMaskLegal(ArrayRef
<int> Mask
,
2187 TargetLoweringBase::LegalizeTypeAction
2188 HexagonTargetLowering::getPreferredVectorAction(MVT VT
) const {
2189 unsigned VecLen
= VT
.getVectorMinNumElements();
2190 MVT ElemTy
= VT
.getVectorElementType();
2192 if (VecLen
== 1 || VT
.isScalableVector())
2193 return TargetLoweringBase::TypeScalarizeVector
;
2195 if (Subtarget
.useHVXOps()) {
2196 unsigned Action
= getPreferredHvxVectorAction(VT
);
2198 return static_cast<TargetLoweringBase::LegalizeTypeAction
>(Action
);
2201 // Always widen (remaining) vectors of i1.
2202 if (ElemTy
== MVT::i1
)
2203 return TargetLoweringBase::TypeWidenVector
;
2204 // Widen non-power-of-2 vectors. Such types cannot be split right now,
2205 // and computeRegisterProperties will override "split" with "widen",
2206 // which can cause other issues.
2207 if (!isPowerOf2_32(VecLen
))
2208 return TargetLoweringBase::TypeWidenVector
;
2210 return TargetLoweringBase::TypeSplitVector
;
2213 TargetLoweringBase::LegalizeAction
2214 HexagonTargetLowering::getCustomOperationAction(SDNode
&Op
) const {
2215 if (Subtarget
.useHVXOps()) {
2216 unsigned Action
= getCustomHvxOperationAction(Op
);
2218 return static_cast<TargetLoweringBase::LegalizeAction
>(Action
);
2220 return TargetLoweringBase::Legal
;
2223 std::pair
<SDValue
, int>
2224 HexagonTargetLowering::getBaseAndOffset(SDValue Addr
) const {
2225 if (Addr
.getOpcode() == ISD::ADD
) {
2226 SDValue Op1
= Addr
.getOperand(1);
2227 if (auto *CN
= dyn_cast
<const ConstantSDNode
>(Op1
.getNode()))
2228 return { Addr
.getOperand(0), CN
->getSExtValue() };
2233 // Lower a vector shuffle (V1, V2, V3). V1 and V2 are the two vectors
2234 // to select data from, V3 is the permutation.
2236 HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op
, SelectionDAG
&DAG
)
2238 const auto *SVN
= cast
<ShuffleVectorSDNode
>(Op
);
2239 ArrayRef
<int> AM
= SVN
->getMask();
2240 assert(AM
.size() <= 8 && "Unexpected shuffle mask");
2241 unsigned VecLen
= AM
.size();
2244 assert(!Subtarget
.isHVXVectorType(VecTy
, true) &&
2245 "HVX shuffles should be legal");
2246 assert(VecTy
.getSizeInBits() <= 64 && "Unexpected vector length");
2248 SDValue Op0
= Op
.getOperand(0);
2249 SDValue Op1
= Op
.getOperand(1);
2250 const SDLoc
&dl(Op
);
2252 // If the inputs are not the same as the output, bail. This is not an
2253 // error situation, but complicates the handling and the default expansion
2254 // (into BUILD_VECTOR) should be adequate.
2255 if (ty(Op0
) != VecTy
|| ty(Op1
) != VecTy
)
2258 // Normalize the mask so that the first non-negative index comes from
2259 // the first operand.
2260 SmallVector
<int,8> Mask(AM
.begin(), AM
.end());
2261 unsigned F
= llvm::find_if(AM
, [](int M
) { return M
>= 0; }) - AM
.data();
2263 return DAG
.getUNDEF(VecTy
);
2264 if (AM
[F
] >= int(VecLen
)) {
2265 ShuffleVectorSDNode::commuteMask(Mask
);
2266 std::swap(Op0
, Op1
);
2269 // Express the shuffle mask in terms of bytes.
2270 SmallVector
<int,8> ByteMask
;
2271 unsigned ElemBytes
= VecTy
.getVectorElementType().getSizeInBits() / 8;
2272 for (int M
: Mask
) {
2274 for (unsigned j
= 0; j
!= ElemBytes
; ++j
)
2275 ByteMask
.push_back(-1);
2277 for (unsigned j
= 0; j
!= ElemBytes
; ++j
)
2278 ByteMask
.push_back(M
*ElemBytes
+ j
);
2281 assert(ByteMask
.size() <= 8);
2283 // All non-undef (non-negative) indexes are well within [0..127], so they
2284 // fit in a single byte. Build two 64-bit words:
2285 // - MaskIdx where each byte is the corresponding index (for non-negative
2286 // indexes), and 0xFF for negative indexes, and
2287 // - MaskUnd that has 0xFF for each negative index.
2288 uint64_t MaskIdx
= 0;
2289 uint64_t MaskUnd
= 0;
2290 for (unsigned i
= 0, e
= ByteMask
.size(); i
!= e
; ++i
) {
2292 uint64_t M
= ByteMask
[i
] & 0xFF;
2298 if (ByteMask
.size() == 4) {
2300 if (MaskIdx
== (0x03020100 | MaskUnd
))
2303 if (MaskIdx
== (0x00010203 | MaskUnd
)) {
2304 SDValue T0
= DAG
.getBitcast(MVT::i32
, Op0
);
2305 SDValue T1
= DAG
.getNode(ISD::BSWAP
, dl
, MVT::i32
, T0
);
2306 return DAG
.getBitcast(VecTy
, T1
);
2311 getCombine(Op1
, Op0
, dl
, typeJoin({ty(Op1
), ty(Op0
)}), DAG
);
2312 if (MaskIdx
== (0x06040200 | MaskUnd
))
2313 return getInstr(Hexagon::S2_vtrunehb
, dl
, VecTy
, {Concat10
}, DAG
);
2314 if (MaskIdx
== (0x07050301 | MaskUnd
))
2315 return getInstr(Hexagon::S2_vtrunohb
, dl
, VecTy
, {Concat10
}, DAG
);
2318 getCombine(Op0
, Op1
, dl
, typeJoin({ty(Op0
), ty(Op1
)}), DAG
);
2319 if (MaskIdx
== (0x02000604 | MaskUnd
))
2320 return getInstr(Hexagon::S2_vtrunehb
, dl
, VecTy
, {Concat01
}, DAG
);
2321 if (MaskIdx
== (0x03010705 | MaskUnd
))
2322 return getInstr(Hexagon::S2_vtrunohb
, dl
, VecTy
, {Concat01
}, DAG
);
2325 if (ByteMask
.size() == 8) {
2327 if (MaskIdx
== (0x0706050403020100ull
| MaskUnd
))
2330 if (MaskIdx
== (0x0001020304050607ull
| MaskUnd
)) {
2331 SDValue T0
= DAG
.getBitcast(MVT::i64
, Op0
);
2332 SDValue T1
= DAG
.getNode(ISD::BSWAP
, dl
, MVT::i64
, T0
);
2333 return DAG
.getBitcast(VecTy
, T1
);
2337 if (MaskIdx
== (0x0d0c050409080100ull
| MaskUnd
))
2338 return getInstr(Hexagon::S2_shuffeh
, dl
, VecTy
, {Op1
, Op0
}, DAG
);
2339 if (MaskIdx
== (0x0f0e07060b0a0302ull
| MaskUnd
))
2340 return getInstr(Hexagon::S2_shuffoh
, dl
, VecTy
, {Op1
, Op0
}, DAG
);
2341 if (MaskIdx
== (0x0d0c090805040100ull
| MaskUnd
))
2342 return getInstr(Hexagon::S2_vtrunewh
, dl
, VecTy
, {Op1
, Op0
}, DAG
);
2343 if (MaskIdx
== (0x0f0e0b0a07060302ull
| MaskUnd
))
2344 return getInstr(Hexagon::S2_vtrunowh
, dl
, VecTy
, {Op1
, Op0
}, DAG
);
2345 if (MaskIdx
== (0x0706030205040100ull
| MaskUnd
)) {
2346 VectorPair P
= opSplit(Op0
, dl
, DAG
);
2347 return getInstr(Hexagon::S2_packhl
, dl
, VecTy
, {P
.second
, P
.first
}, DAG
);
2351 if (MaskIdx
== (0x0e060c040a020800ull
| MaskUnd
))
2352 return getInstr(Hexagon::S2_shuffeb
, dl
, VecTy
, {Op1
, Op0
}, DAG
);
2353 if (MaskIdx
== (0x0f070d050b030901ull
| MaskUnd
))
2354 return getInstr(Hexagon::S2_shuffob
, dl
, VecTy
, {Op1
, Op0
}, DAG
);
2361 HexagonTargetLowering::getSplatValue(SDValue Op
, SelectionDAG
&DAG
) const {
2362 switch (Op
.getOpcode()) {
2363 case ISD::BUILD_VECTOR
:
2364 if (SDValue S
= cast
<BuildVectorSDNode
>(Op
)->getSplatValue())
2367 case ISD::SPLAT_VECTOR
:
2368 return Op
.getOperand(0);
2373 // Create a Hexagon-specific node for shifting a vector by an integer.
2375 HexagonTargetLowering::getVectorShiftByInt(SDValue Op
, SelectionDAG
&DAG
)
2378 switch (Op
.getOpcode()) {
2380 NewOpc
= HexagonISD::VASL
;
2383 NewOpc
= HexagonISD::VASR
;
2386 NewOpc
= HexagonISD::VLSR
;
2389 llvm_unreachable("Unexpected shift opcode");
2392 if (SDValue Sp
= getSplatValue(Op
.getOperand(1), DAG
))
2393 return DAG
.getNode(NewOpc
, SDLoc(Op
), ty(Op
), Op
.getOperand(0), Sp
);
2398 HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op
, SelectionDAG
&DAG
) const {
2399 const SDLoc
&dl(Op
);
2401 // First try to convert the shift (by vector) to a shift by a scalar.
2402 // If we first split the shift, the shift amount will become 'extract
2403 // subvector', and will no longer be recognized as scalar.
2405 if (SDValue S
= getVectorShiftByInt(Op
, DAG
))
2408 unsigned Opc
= Res
.getOpcode();
2410 case HexagonISD::VASR
:
2411 case HexagonISD::VLSR
:
2412 case HexagonISD::VASL
:
2415 // No instructions for shifts by non-scalars.
2419 MVT ResTy
= ty(Res
);
2420 if (ResTy
.getVectorElementType() != MVT::i8
)
2423 // For shifts of i8, extend the inputs to i16, then truncate back to i8.
2424 assert(ResTy
.getVectorElementType() == MVT::i8
);
2425 SDValue Val
= Res
.getOperand(0), Amt
= Res
.getOperand(1);
2427 auto ShiftPartI8
= [&dl
, &DAG
, this](unsigned Opc
, SDValue V
, SDValue A
) {
2429 MVT ExtTy
= MVT::getVectorVT(MVT::i16
, Ty
.getVectorNumElements());
2430 SDValue ExtV
= Opc
== HexagonISD::VASR
? DAG
.getSExtOrTrunc(V
, dl
, ExtTy
)
2431 : DAG
.getZExtOrTrunc(V
, dl
, ExtTy
);
2432 SDValue ExtS
= DAG
.getNode(Opc
, dl
, ExtTy
, {ExtV
, A
});
2433 return DAG
.getZExtOrTrunc(ExtS
, dl
, Ty
);
2436 if (ResTy
.getSizeInBits() == 32)
2437 return ShiftPartI8(Opc
, Val
, Amt
);
2439 auto [LoV
, HiV
] = opSplit(Val
, dl
, DAG
);
2440 return DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, ResTy
,
2441 {ShiftPartI8(Opc
, LoV
, Amt
), ShiftPartI8(Opc
, HiV
, Amt
)});
2445 HexagonTargetLowering::LowerROTL(SDValue Op
, SelectionDAG
&DAG
) const {
2446 if (isa
<ConstantSDNode
>(Op
.getOperand(1).getNode()))
2452 HexagonTargetLowering::LowerBITCAST(SDValue Op
, SelectionDAG
&DAG
) const {
2454 SDValue InpV
= Op
.getOperand(0);
2455 MVT InpTy
= ty(InpV
);
2456 assert(ResTy
.getSizeInBits() == InpTy
.getSizeInBits());
2457 const SDLoc
&dl(Op
);
2459 // Handle conversion from i8 to v8i1.
2460 if (InpTy
== MVT::i8
) {
2461 if (ResTy
== MVT::v8i1
) {
2462 SDValue Sc
= DAG
.getBitcast(tyScalar(InpTy
), InpV
);
2463 SDValue Ext
= DAG
.getZExtOrTrunc(Sc
, dl
, MVT::i32
);
2464 return getInstr(Hexagon::C2_tfrrp
, dl
, ResTy
, Ext
, DAG
);
2473 HexagonTargetLowering::getBuildVectorConstInts(ArrayRef
<SDValue
> Values
,
2474 MVT VecTy
, SelectionDAG
&DAG
,
2475 MutableArrayRef
<ConstantInt
*> Consts
) const {
2476 MVT ElemTy
= VecTy
.getVectorElementType();
2477 unsigned ElemWidth
= ElemTy
.getSizeInBits();
2478 IntegerType
*IntTy
= IntegerType::get(*DAG
.getContext(), ElemWidth
);
2479 bool AllConst
= true;
2481 for (unsigned i
= 0, e
= Values
.size(); i
!= e
; ++i
) {
2482 SDValue V
= Values
[i
];
2484 Consts
[i
] = ConstantInt::get(IntTy
, 0);
2487 // Make sure to always cast to IntTy.
2488 if (auto *CN
= dyn_cast
<ConstantSDNode
>(V
.getNode())) {
2489 const ConstantInt
*CI
= CN
->getConstantIntValue();
2490 Consts
[i
] = ConstantInt::get(IntTy
, CI
->getValue().getSExtValue());
2491 } else if (auto *CN
= dyn_cast
<ConstantFPSDNode
>(V
.getNode())) {
2492 const ConstantFP
*CF
= CN
->getConstantFPValue();
2493 APInt A
= CF
->getValueAPF().bitcastToAPInt();
2494 Consts
[i
] = ConstantInt::get(IntTy
, A
.getZExtValue());
2503 HexagonTargetLowering::buildVector32(ArrayRef
<SDValue
> Elem
, const SDLoc
&dl
,
2504 MVT VecTy
, SelectionDAG
&DAG
) const {
2505 MVT ElemTy
= VecTy
.getVectorElementType();
2506 assert(VecTy
.getVectorNumElements() == Elem
.size());
2508 SmallVector
<ConstantInt
*,4> Consts(Elem
.size());
2509 bool AllConst
= getBuildVectorConstInts(Elem
, VecTy
, DAG
, Consts
);
2511 unsigned First
, Num
= Elem
.size();
2512 for (First
= 0; First
!= Num
; ++First
) {
2513 if (!isUndef(Elem
[First
]))
2517 return DAG
.getUNDEF(VecTy
);
2520 llvm::all_of(Consts
, [](ConstantInt
*CI
) { return CI
->isZero(); }))
2521 return getZero(dl
, VecTy
, DAG
);
2523 if (ElemTy
== MVT::i16
|| ElemTy
== MVT::f16
) {
2524 assert(Elem
.size() == 2);
2526 // The 'Consts' array will have all values as integers regardless
2527 // of the vector element type.
2528 uint32_t V
= (Consts
[0]->getZExtValue() & 0xFFFF) |
2529 Consts
[1]->getZExtValue() << 16;
2530 return DAG
.getBitcast(VecTy
, DAG
.getConstant(V
, dl
, MVT::i32
));
2533 if (ElemTy
== MVT::f16
) {
2534 E0
= DAG
.getZExtOrTrunc(DAG
.getBitcast(MVT::i16
, Elem
[0]), dl
, MVT::i32
);
2535 E1
= DAG
.getZExtOrTrunc(DAG
.getBitcast(MVT::i16
, Elem
[1]), dl
, MVT::i32
);
2540 SDValue N
= getInstr(Hexagon::A2_combine_ll
, dl
, MVT::i32
, {E1
, E0
}, DAG
);
2541 return DAG
.getBitcast(VecTy
, N
);
2544 if (ElemTy
== MVT::i8
) {
2545 // First try generating a constant.
2547 int32_t V
= (Consts
[0]->getZExtValue() & 0xFF) |
2548 (Consts
[1]->getZExtValue() & 0xFF) << 8 |
2549 (Consts
[2]->getZExtValue() & 0xFF) << 16 |
2550 Consts
[3]->getZExtValue() << 24;
2551 return DAG
.getBitcast(MVT::v4i8
, DAG
.getConstant(V
, dl
, MVT::i32
));
2555 bool IsSplat
= true;
2556 for (unsigned i
= First
+1; i
!= Num
; ++i
) {
2557 if (Elem
[i
] == Elem
[First
] || isUndef(Elem
[i
]))
2563 // Legalize the operand of SPLAT_VECTOR.
2564 SDValue Ext
= DAG
.getZExtOrTrunc(Elem
[First
], dl
, MVT::i32
);
2565 return DAG
.getNode(ISD::SPLAT_VECTOR
, dl
, VecTy
, Ext
);
2569 // (zxtb(Elem[0]) | (zxtb(Elem[1]) << 8)) |
2570 // (zxtb(Elem[2]) | (zxtb(Elem[3]) << 8)) << 16
2571 assert(Elem
.size() == 4);
2573 for (unsigned i
= 0; i
!= 4; ++i
) {
2574 Vs
[i
] = DAG
.getZExtOrTrunc(Elem
[i
], dl
, MVT::i32
);
2575 Vs
[i
] = DAG
.getZeroExtendInReg(Vs
[i
], dl
, MVT::i8
);
2577 SDValue S8
= DAG
.getConstant(8, dl
, MVT::i32
);
2578 SDValue T0
= DAG
.getNode(ISD::SHL
, dl
, MVT::i32
, {Vs
[1], S8
});
2579 SDValue T1
= DAG
.getNode(ISD::SHL
, dl
, MVT::i32
, {Vs
[3], S8
});
2580 SDValue B0
= DAG
.getNode(ISD::OR
, dl
, MVT::i32
, {Vs
[0], T0
});
2581 SDValue B1
= DAG
.getNode(ISD::OR
, dl
, MVT::i32
, {Vs
[2], T1
});
2583 SDValue R
= getInstr(Hexagon::A2_combine_ll
, dl
, MVT::i32
, {B1
, B0
}, DAG
);
2584 return DAG
.getBitcast(MVT::v4i8
, R
);
2588 dbgs() << "VecTy: " << VecTy
<< '\n';
2590 llvm_unreachable("Unexpected vector element type");
2594 HexagonTargetLowering::buildVector64(ArrayRef
<SDValue
> Elem
, const SDLoc
&dl
,
2595 MVT VecTy
, SelectionDAG
&DAG
) const {
2596 MVT ElemTy
= VecTy
.getVectorElementType();
2597 assert(VecTy
.getVectorNumElements() == Elem
.size());
2599 SmallVector
<ConstantInt
*,8> Consts(Elem
.size());
2600 bool AllConst
= getBuildVectorConstInts(Elem
, VecTy
, DAG
, Consts
);
2602 unsigned First
, Num
= Elem
.size();
2603 for (First
= 0; First
!= Num
; ++First
) {
2604 if (!isUndef(Elem
[First
]))
2608 return DAG
.getUNDEF(VecTy
);
2611 llvm::all_of(Consts
, [](ConstantInt
*CI
) { return CI
->isZero(); }))
2612 return getZero(dl
, VecTy
, DAG
);
2614 // First try splat if possible.
2615 if (ElemTy
== MVT::i16
|| ElemTy
== MVT::f16
) {
2616 bool IsSplat
= true;
2617 for (unsigned i
= First
+1; i
!= Num
; ++i
) {
2618 if (Elem
[i
] == Elem
[First
] || isUndef(Elem
[i
]))
2624 // Legalize the operand of SPLAT_VECTOR
2625 SDValue S
= ElemTy
== MVT::f16
? DAG
.getBitcast(MVT::i16
, Elem
[First
])
2627 SDValue Ext
= DAG
.getZExtOrTrunc(S
, dl
, MVT::i32
);
2628 return DAG
.getNode(ISD::SPLAT_VECTOR
, dl
, VecTy
, Ext
);
2632 // Then try constant.
2635 unsigned W
= ElemTy
.getSizeInBits();
2636 uint64_t Mask
= (1ull << W
) - 1;
2637 for (unsigned i
= 0; i
!= Num
; ++i
)
2638 Val
= (Val
<< W
) | (Consts
[Num
-1-i
]->getZExtValue() & Mask
);
2639 SDValue V0
= DAG
.getConstant(Val
, dl
, MVT::i64
);
2640 return DAG
.getBitcast(VecTy
, V0
);
2643 // Build two 32-bit vectors and concatenate.
2644 MVT HalfTy
= MVT::getVectorVT(ElemTy
, Num
/2);
2645 SDValue L
= (ElemTy
== MVT::i32
)
2647 : buildVector32(Elem
.take_front(Num
/2), dl
, HalfTy
, DAG
);
2648 SDValue H
= (ElemTy
== MVT::i32
)
2650 : buildVector32(Elem
.drop_front(Num
/2), dl
, HalfTy
, DAG
);
2651 return getCombine(H
, L
, dl
, VecTy
, DAG
);
2655 HexagonTargetLowering::extractVector(SDValue VecV
, SDValue IdxV
,
2656 const SDLoc
&dl
, MVT ValTy
, MVT ResTy
,
2657 SelectionDAG
&DAG
) const {
2658 MVT VecTy
= ty(VecV
);
2659 assert(!ValTy
.isVector() ||
2660 VecTy
.getVectorElementType() == ValTy
.getVectorElementType());
2661 if (VecTy
.getVectorElementType() == MVT::i1
)
2662 return extractVectorPred(VecV
, IdxV
, dl
, ValTy
, ResTy
, DAG
);
2664 unsigned VecWidth
= VecTy
.getSizeInBits();
2665 unsigned ValWidth
= ValTy
.getSizeInBits();
2666 unsigned ElemWidth
= VecTy
.getVectorElementType().getSizeInBits();
2667 assert((VecWidth
% ElemWidth
) == 0);
2668 assert(VecWidth
== 32 || VecWidth
== 64);
2670 // Cast everything to scalar integer types.
2671 MVT ScalarTy
= tyScalar(VecTy
);
2672 VecV
= DAG
.getBitcast(ScalarTy
, VecV
);
2674 SDValue WidthV
= DAG
.getConstant(ValWidth
, dl
, MVT::i32
);
2677 if (auto *IdxN
= dyn_cast
<ConstantSDNode
>(IdxV
)) {
2678 unsigned Off
= IdxN
->getZExtValue() * ElemWidth
;
2679 if (VecWidth
== 64 && ValWidth
== 32) {
2680 assert(Off
== 0 || Off
== 32);
2681 ExtV
= Off
== 0 ? LoHalf(VecV
, DAG
) : HiHalf(VecV
, DAG
);
2682 } else if (Off
== 0 && (ValWidth
% 8) == 0) {
2683 ExtV
= DAG
.getZeroExtendInReg(VecV
, dl
, tyScalar(ValTy
));
2685 SDValue OffV
= DAG
.getConstant(Off
, dl
, MVT::i32
);
2686 // The return type of EXTRACTU must be the same as the type of the
2688 ExtV
= DAG
.getNode(HexagonISD::EXTRACTU
, dl
, ScalarTy
,
2689 {VecV
, WidthV
, OffV
});
2692 if (ty(IdxV
) != MVT::i32
)
2693 IdxV
= DAG
.getZExtOrTrunc(IdxV
, dl
, MVT::i32
);
2694 SDValue OffV
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
,
2695 DAG
.getConstant(ElemWidth
, dl
, MVT::i32
));
2696 ExtV
= DAG
.getNode(HexagonISD::EXTRACTU
, dl
, ScalarTy
,
2697 {VecV
, WidthV
, OffV
});
2700 // Cast ExtV to the requested result type.
2701 ExtV
= DAG
.getZExtOrTrunc(ExtV
, dl
, tyScalar(ResTy
));
2702 ExtV
= DAG
.getBitcast(ResTy
, ExtV
);
2707 HexagonTargetLowering::extractVectorPred(SDValue VecV
, SDValue IdxV
,
2708 const SDLoc
&dl
, MVT ValTy
, MVT ResTy
,
2709 SelectionDAG
&DAG
) const {
2710 // Special case for v{8,4,2}i1 (the only boolean vectors legal in Hexagon
2711 // without any coprocessors).
2712 MVT VecTy
= ty(VecV
);
2713 unsigned VecWidth
= VecTy
.getSizeInBits();
2714 unsigned ValWidth
= ValTy
.getSizeInBits();
2715 assert(VecWidth
== VecTy
.getVectorNumElements() &&
2716 "Vector elements should equal vector width size");
2717 assert(VecWidth
== 8 || VecWidth
== 4 || VecWidth
== 2);
2719 // Check if this is an extract of the lowest bit.
2720 if (auto *IdxN
= dyn_cast
<ConstantSDNode
>(IdxV
)) {
2721 // Extracting the lowest bit is a no-op, but it changes the type,
2722 // so it must be kept as an operation to avoid errors related to
2724 if (IdxN
->isZero() && ValTy
.getSizeInBits() == 1)
2725 return DAG
.getNode(HexagonISD::TYPECAST
, dl
, MVT::i1
, VecV
);
2728 // If the value extracted is a single bit, use tstbit.
2729 if (ValWidth
== 1) {
2730 SDValue A0
= getInstr(Hexagon::C2_tfrpr
, dl
, MVT::i32
, {VecV
}, DAG
);
2731 SDValue M0
= DAG
.getConstant(8 / VecWidth
, dl
, MVT::i32
);
2732 SDValue I0
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
, M0
);
2733 return DAG
.getNode(HexagonISD::TSTBIT
, dl
, MVT::i1
, A0
, I0
);
2736 // Each bool vector (v2i1, v4i1, v8i1) always occupies 8 bits in
2737 // a predicate register. The elements of the vector are repeated
2738 // in the register (if necessary) so that the total number is 8.
2739 // The extracted subvector will need to be expanded in such a way.
2740 unsigned Scale
= VecWidth
/ ValWidth
;
2742 // Generate (p2d VecV) >> 8*Idx to move the interesting bytes to
2744 assert(ty(IdxV
) == MVT::i32
);
2745 unsigned VecRep
= 8 / VecWidth
;
2746 SDValue S0
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
,
2747 DAG
.getConstant(8*VecRep
, dl
, MVT::i32
));
2748 SDValue T0
= DAG
.getNode(HexagonISD::P2D
, dl
, MVT::i64
, VecV
);
2749 SDValue T1
= DAG
.getNode(ISD::SRL
, dl
, MVT::i64
, T0
, S0
);
2751 // The longest possible subvector is at most 32 bits, so it is always
2752 // contained in the low subregister.
2753 T1
= LoHalf(T1
, DAG
);
2754 T1
= expandPredicate(T1
, dl
, DAG
);
2758 return DAG
.getNode(HexagonISD::D2P
, dl
, ResTy
, T1
);
2762 HexagonTargetLowering::insertVector(SDValue VecV
, SDValue ValV
, SDValue IdxV
,
2763 const SDLoc
&dl
, MVT ValTy
,
2764 SelectionDAG
&DAG
) const {
2765 MVT VecTy
= ty(VecV
);
2766 if (VecTy
.getVectorElementType() == MVT::i1
)
2767 return insertVectorPred(VecV
, ValV
, IdxV
, dl
, ValTy
, DAG
);
2769 unsigned VecWidth
= VecTy
.getSizeInBits();
2770 unsigned ValWidth
= ValTy
.getSizeInBits();
2771 assert(VecWidth
== 32 || VecWidth
== 64);
2772 assert((VecWidth
% ValWidth
) == 0);
2774 // Cast everything to scalar integer types.
2775 MVT ScalarTy
= MVT::getIntegerVT(VecWidth
);
2776 // The actual type of ValV may be different than ValTy (which is related
2777 // to the vector type).
2778 unsigned VW
= ty(ValV
).getSizeInBits();
2779 ValV
= DAG
.getBitcast(MVT::getIntegerVT(VW
), ValV
);
2780 VecV
= DAG
.getBitcast(ScalarTy
, VecV
);
2782 ValV
= DAG
.getAnyExtOrTrunc(ValV
, dl
, ScalarTy
);
2784 SDValue WidthV
= DAG
.getConstant(ValWidth
, dl
, MVT::i32
);
2787 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(IdxV
)) {
2788 unsigned W
= C
->getZExtValue() * ValWidth
;
2789 SDValue OffV
= DAG
.getConstant(W
, dl
, MVT::i32
);
2790 InsV
= DAG
.getNode(HexagonISD::INSERT
, dl
, ScalarTy
,
2791 {VecV
, ValV
, WidthV
, OffV
});
2793 if (ty(IdxV
) != MVT::i32
)
2794 IdxV
= DAG
.getZExtOrTrunc(IdxV
, dl
, MVT::i32
);
2795 SDValue OffV
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
, WidthV
);
2796 InsV
= DAG
.getNode(HexagonISD::INSERT
, dl
, ScalarTy
,
2797 {VecV
, ValV
, WidthV
, OffV
});
2800 return DAG
.getNode(ISD::BITCAST
, dl
, VecTy
, InsV
);
2804 HexagonTargetLowering::insertVectorPred(SDValue VecV
, SDValue ValV
,
2805 SDValue IdxV
, const SDLoc
&dl
,
2806 MVT ValTy
, SelectionDAG
&DAG
) const {
2807 MVT VecTy
= ty(VecV
);
2808 unsigned VecLen
= VecTy
.getVectorNumElements();
2810 if (ValTy
== MVT::i1
) {
2811 SDValue ToReg
= getInstr(Hexagon::C2_tfrpr
, dl
, MVT::i32
, {VecV
}, DAG
);
2812 SDValue Ext
= DAG
.getSExtOrTrunc(ValV
, dl
, MVT::i32
);
2813 SDValue Width
= DAG
.getConstant(8 / VecLen
, dl
, MVT::i32
);
2814 SDValue Idx
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
, Width
);
2816 DAG
.getNode(HexagonISD::INSERT
, dl
, MVT::i32
, {ToReg
, Ext
, Width
, Idx
});
2817 return getInstr(Hexagon::C2_tfrrp
, dl
, VecTy
, {Ins
}, DAG
);
2820 assert(ValTy
.getVectorElementType() == MVT::i1
);
2821 SDValue ValR
= ValTy
.isVector()
2822 ? DAG
.getNode(HexagonISD::P2D
, dl
, MVT::i64
, ValV
)
2823 : DAG
.getSExtOrTrunc(ValV
, dl
, MVT::i64
);
2825 unsigned Scale
= VecLen
/ ValTy
.getVectorNumElements();
2828 for (unsigned R
= Scale
; R
> 1; R
/= 2) {
2829 ValR
= contractPredicate(ValR
, dl
, DAG
);
2830 ValR
= getCombine(DAG
.getUNDEF(MVT::i32
), ValR
, dl
, MVT::i64
, DAG
);
2833 SDValue Width
= DAG
.getConstant(64 / Scale
, dl
, MVT::i32
);
2834 SDValue Idx
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
, Width
);
2835 SDValue VecR
= DAG
.getNode(HexagonISD::P2D
, dl
, MVT::i64
, VecV
);
2837 DAG
.getNode(HexagonISD::INSERT
, dl
, MVT::i64
, {VecR
, ValR
, Width
, Idx
});
2838 return DAG
.getNode(HexagonISD::D2P
, dl
, VecTy
, Ins
);
2842 HexagonTargetLowering::expandPredicate(SDValue Vec32
, const SDLoc
&dl
,
2843 SelectionDAG
&DAG
) const {
2844 assert(ty(Vec32
).getSizeInBits() == 32);
2846 return DAG
.getUNDEF(MVT::i64
);
2847 SDValue P
= DAG
.getBitcast(MVT::v4i8
, Vec32
);
2848 SDValue X
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::v4i16
, P
);
2849 return DAG
.getBitcast(MVT::i64
, X
);
2853 HexagonTargetLowering::contractPredicate(SDValue Vec64
, const SDLoc
&dl
,
2854 SelectionDAG
&DAG
) const {
2855 assert(ty(Vec64
).getSizeInBits() == 64);
2857 return DAG
.getUNDEF(MVT::i32
);
2858 // Collect even bytes:
2859 SDValue A
= DAG
.getBitcast(MVT::v8i8
, Vec64
);
2860 SDValue S
= DAG
.getVectorShuffle(MVT::v8i8
, dl
, A
, DAG
.getUNDEF(MVT::v8i8
),
2861 {0, 2, 4, 6, 1, 3, 5, 7});
2862 return extractVector(S
, DAG
.getConstant(0, dl
, MVT::i32
), dl
, MVT::v4i8
,
2867 HexagonTargetLowering::getZero(const SDLoc
&dl
, MVT Ty
, SelectionDAG
&DAG
)
2869 if (Ty
.isVector()) {
2870 unsigned W
= Ty
.getSizeInBits();
2872 return DAG
.getBitcast(Ty
, DAG
.getConstant(0, dl
, MVT::getIntegerVT(W
)));
2873 return DAG
.getNode(ISD::SPLAT_VECTOR
, dl
, Ty
, getZero(dl
, MVT::i32
, DAG
));
2877 return DAG
.getConstant(0, dl
, Ty
);
2878 if (Ty
.isFloatingPoint())
2879 return DAG
.getConstantFP(0.0, dl
, Ty
);
2880 llvm_unreachable("Invalid type for zero");
2884 HexagonTargetLowering::appendUndef(SDValue Val
, MVT ResTy
, SelectionDAG
&DAG
)
2886 MVT ValTy
= ty(Val
);
2887 assert(ValTy
.getVectorElementType() == ResTy
.getVectorElementType());
2889 unsigned ValLen
= ValTy
.getVectorNumElements();
2890 unsigned ResLen
= ResTy
.getVectorNumElements();
2891 if (ValLen
== ResLen
)
2894 const SDLoc
&dl(Val
);
2895 assert(ValLen
< ResLen
);
2896 assert(ResLen
% ValLen
== 0);
2898 SmallVector
<SDValue
, 4> Concats
= {Val
};
2899 for (unsigned i
= 1, e
= ResLen
/ ValLen
; i
< e
; ++i
)
2900 Concats
.push_back(DAG
.getUNDEF(ValTy
));
2902 return DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, ResTy
, Concats
);
2906 HexagonTargetLowering::getCombine(SDValue Hi
, SDValue Lo
, const SDLoc
&dl
,
2907 MVT ResTy
, SelectionDAG
&DAG
) const {
2908 MVT ElemTy
= ty(Hi
);
2909 assert(ElemTy
== ty(Lo
));
2911 if (!ElemTy
.isVector()) {
2912 assert(ElemTy
.isScalarInteger());
2913 MVT PairTy
= MVT::getIntegerVT(2 * ElemTy
.getSizeInBits());
2914 SDValue Pair
= DAG
.getNode(ISD::BUILD_PAIR
, dl
, PairTy
, Lo
, Hi
);
2915 return DAG
.getBitcast(ResTy
, Pair
);
2918 unsigned Width
= ElemTy
.getSizeInBits();
2919 MVT IntTy
= MVT::getIntegerVT(Width
);
2920 MVT PairTy
= MVT::getIntegerVT(2 * Width
);
2922 DAG
.getNode(ISD::BUILD_PAIR
, dl
, PairTy
,
2923 {DAG
.getBitcast(IntTy
, Lo
), DAG
.getBitcast(IntTy
, Hi
)});
2924 return DAG
.getBitcast(ResTy
, Pair
);
2928 HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op
, SelectionDAG
&DAG
) const {
2930 unsigned BW
= VecTy
.getSizeInBits();
2931 const SDLoc
&dl(Op
);
2932 SmallVector
<SDValue
,8> Ops
;
2933 for (unsigned i
= 0, e
= Op
.getNumOperands(); i
!= e
; ++i
)
2934 Ops
.push_back(Op
.getOperand(i
));
2937 return buildVector32(Ops
, dl
, VecTy
, DAG
);
2939 return buildVector64(Ops
, dl
, VecTy
, DAG
);
2941 if (VecTy
== MVT::v8i1
|| VecTy
== MVT::v4i1
|| VecTy
== MVT::v2i1
) {
2942 // Check if this is a special case or all-0 or all-1.
2943 bool All0
= true, All1
= true;
2944 for (SDValue P
: Ops
) {
2945 auto *CN
= dyn_cast
<ConstantSDNode
>(P
.getNode());
2946 if (CN
== nullptr) {
2947 All0
= All1
= false;
2950 uint32_t C
= CN
->getZExtValue();
2955 return DAG
.getNode(HexagonISD::PFALSE
, dl
, VecTy
);
2957 return DAG
.getNode(HexagonISD::PTRUE
, dl
, VecTy
);
2959 // For each i1 element in the resulting predicate register, put 1
2960 // shifted by the index of the element into a general-purpose register,
2961 // then or them together and transfer it back into a predicate register.
2963 SDValue Z
= getZero(dl
, MVT::i32
, DAG
);
2964 // Always produce 8 bits, repeat inputs if necessary.
2965 unsigned Rep
= 8 / VecTy
.getVectorNumElements();
2966 for (unsigned i
= 0; i
!= 8; ++i
) {
2967 SDValue S
= DAG
.getConstant(1ull << i
, dl
, MVT::i32
);
2968 Rs
[i
] = DAG
.getSelect(dl
, MVT::i32
, Ops
[i
/Rep
], S
, Z
);
2970 for (ArrayRef
<SDValue
> A(Rs
); A
.size() != 1; A
= A
.drop_back(A
.size()/2)) {
2971 for (unsigned i
= 0, e
= A
.size()/2; i
!= e
; ++i
)
2972 Rs
[i
] = DAG
.getNode(ISD::OR
, dl
, MVT::i32
, Rs
[2*i
], Rs
[2*i
+1]);
2974 // Move the value directly to a predicate register.
2975 return getInstr(Hexagon::C2_tfrrp
, dl
, VecTy
, {Rs
[0]}, DAG
);
2982 HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op
,
2983 SelectionDAG
&DAG
) const {
2985 const SDLoc
&dl(Op
);
2986 if (VecTy
.getSizeInBits() == 64) {
2987 assert(Op
.getNumOperands() == 2);
2988 return getCombine(Op
.getOperand(1), Op
.getOperand(0), dl
, VecTy
, DAG
);
2991 MVT ElemTy
= VecTy
.getVectorElementType();
2992 if (ElemTy
== MVT::i1
) {
2993 assert(VecTy
== MVT::v2i1
|| VecTy
== MVT::v4i1
|| VecTy
== MVT::v8i1
);
2994 MVT OpTy
= ty(Op
.getOperand(0));
2995 // Scale is how many times the operands need to be contracted to match
2996 // the representation in the target register.
2997 unsigned Scale
= VecTy
.getVectorNumElements() / OpTy
.getVectorNumElements();
2998 assert(Scale
== Op
.getNumOperands() && Scale
> 1);
3000 // First, convert all bool vectors to integers, then generate pairwise
3001 // inserts to form values of doubled length. Up until there are only
3002 // two values left to concatenate, all of these values will fit in a
3003 // 32-bit integer, so keep them as i32 to use 32-bit inserts.
3004 SmallVector
<SDValue
,4> Words
[2];
3007 for (SDValue P
: Op
.getNode()->op_values()) {
3008 SDValue W
= DAG
.getNode(HexagonISD::P2D
, dl
, MVT::i64
, P
);
3009 for (unsigned R
= Scale
; R
> 1; R
/= 2) {
3010 W
= contractPredicate(W
, dl
, DAG
);
3011 W
= getCombine(DAG
.getUNDEF(MVT::i32
), W
, dl
, MVT::i64
, DAG
);
3014 Words
[IdxW
].push_back(W
);
3018 SDValue WidthV
= DAG
.getConstant(64 / Scale
, dl
, MVT::i32
);
3019 Words
[IdxW
^ 1].clear();
3021 for (unsigned i
= 0, e
= Words
[IdxW
].size(); i
!= e
; i
+= 2) {
3022 SDValue W0
= Words
[IdxW
][i
], W1
= Words
[IdxW
][i
+1];
3023 // Insert W1 into W0 right next to the significant bits of W0.
3024 SDValue T
= DAG
.getNode(HexagonISD::INSERT
, dl
, MVT::i32
,
3025 {W0
, W1
, WidthV
, WidthV
});
3026 Words
[IdxW
^ 1].push_back(T
);
3032 // At this point there should only be two words left, and Scale should be 2.
3033 assert(Scale
== 2 && Words
[IdxW
].size() == 2);
3035 SDValue WW
= getCombine(Words
[IdxW
][1], Words
[IdxW
][0], dl
, MVT::i64
, DAG
);
3036 return DAG
.getNode(HexagonISD::D2P
, dl
, VecTy
, WW
);
3043 HexagonTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op
,
3044 SelectionDAG
&DAG
) const {
3045 SDValue Vec
= Op
.getOperand(0);
3046 MVT ElemTy
= ty(Vec
).getVectorElementType();
3047 return extractVector(Vec
, Op
.getOperand(1), SDLoc(Op
), ElemTy
, ty(Op
), DAG
);
3051 HexagonTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op
,
3052 SelectionDAG
&DAG
) const {
3053 return extractVector(Op
.getOperand(0), Op
.getOperand(1), SDLoc(Op
),
3054 ty(Op
), ty(Op
), DAG
);
3058 HexagonTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op
,
3059 SelectionDAG
&DAG
) const {
3060 return insertVector(Op
.getOperand(0), Op
.getOperand(1), Op
.getOperand(2),
3061 SDLoc(Op
), ty(Op
).getVectorElementType(), DAG
);
3065 HexagonTargetLowering::LowerINSERT_SUBVECTOR(SDValue Op
,
3066 SelectionDAG
&DAG
) const {
3067 SDValue ValV
= Op
.getOperand(1);
3068 return insertVector(Op
.getOperand(0), ValV
, Op
.getOperand(2),
3069 SDLoc(Op
), ty(ValV
), DAG
);
3073 HexagonTargetLowering::allowTruncateForTailCall(Type
*Ty1
, Type
*Ty2
) const {
3074 // Assuming the caller does not have either a signext or zeroext modifier, and
3075 // only one value is accepted, any reasonable truncation is allowed.
3076 if (!Ty1
->isIntegerTy() || !Ty2
->isIntegerTy())
3079 // FIXME: in principle up to 64-bit could be made safe, but it would be very
3080 // fragile at the moment: any support for multiple value returns would be
3081 // liable to disallow tail calls involving i64 -> iN truncation in many cases.
3082 return Ty1
->getPrimitiveSizeInBits() <= 32;
3086 HexagonTargetLowering::LowerLoad(SDValue Op
, SelectionDAG
&DAG
) const {
3088 const SDLoc
&dl(Op
);
3089 LoadSDNode
*LN
= cast
<LoadSDNode
>(Op
.getNode());
3090 MVT MemTy
= LN
->getMemoryVT().getSimpleVT();
3091 ISD::LoadExtType ET
= LN
->getExtensionType();
3093 bool LoadPred
= MemTy
== MVT::v2i1
|| MemTy
== MVT::v4i1
|| MemTy
== MVT::v8i1
;
3095 SDValue NL
= DAG
.getLoad(
3096 LN
->getAddressingMode(), ISD::ZEXTLOAD
, MVT::i32
, dl
, LN
->getChain(),
3097 LN
->getBasePtr(), LN
->getOffset(), LN
->getPointerInfo(),
3098 /*MemoryVT*/ MVT::i8
, LN
->getAlign(), LN
->getMemOperand()->getFlags(),
3099 LN
->getAAInfo(), LN
->getRanges());
3100 LN
= cast
<LoadSDNode
>(NL
.getNode());
3103 Align ClaimAlign
= LN
->getAlign();
3104 if (!validateConstPtrAlignment(LN
->getBasePtr(), ClaimAlign
, dl
, DAG
))
3105 return replaceMemWithUndef(Op
, DAG
);
3107 // Call LowerUnalignedLoad for all loads, it recognizes loads that
3108 // don't need extra aligning.
3109 SDValue LU
= LowerUnalignedLoad(SDValue(LN
, 0), DAG
);
3111 SDValue TP
= getInstr(Hexagon::C2_tfrrp
, dl
, MemTy
, {LU
}, DAG
);
3112 if (ET
== ISD::SEXTLOAD
) {
3113 TP
= DAG
.getSExtOrTrunc(TP
, dl
, Ty
);
3114 } else if (ET
!= ISD::NON_EXTLOAD
) {
3115 TP
= DAG
.getZExtOrTrunc(TP
, dl
, Ty
);
3117 SDValue Ch
= cast
<LoadSDNode
>(LU
.getNode())->getChain();
3118 return DAG
.getMergeValues({TP
, Ch
}, dl
);
3124 HexagonTargetLowering::LowerStore(SDValue Op
, SelectionDAG
&DAG
) const {
3125 const SDLoc
&dl(Op
);
3126 StoreSDNode
*SN
= cast
<StoreSDNode
>(Op
.getNode());
3127 SDValue Val
= SN
->getValue();
3130 if (Ty
== MVT::v2i1
|| Ty
== MVT::v4i1
|| Ty
== MVT::v8i1
) {
3131 // Store the exact predicate (all bits).
3132 SDValue TR
= getInstr(Hexagon::C2_tfrpr
, dl
, MVT::i32
, {Val
}, DAG
);
3133 SDValue NS
= DAG
.getTruncStore(SN
->getChain(), dl
, TR
, SN
->getBasePtr(),
3134 MVT::i8
, SN
->getMemOperand());
3135 if (SN
->isIndexed()) {
3136 NS
= DAG
.getIndexedStore(NS
, dl
, SN
->getBasePtr(), SN
->getOffset(),
3137 SN
->getAddressingMode());
3139 SN
= cast
<StoreSDNode
>(NS
.getNode());
3142 Align ClaimAlign
= SN
->getAlign();
3143 if (!validateConstPtrAlignment(SN
->getBasePtr(), ClaimAlign
, dl
, DAG
))
3144 return replaceMemWithUndef(Op
, DAG
);
3146 MVT StoreTy
= SN
->getMemoryVT().getSimpleVT();
3147 Align NeedAlign
= Subtarget
.getTypeAlignment(StoreTy
);
3148 if (ClaimAlign
< NeedAlign
)
3149 return expandUnalignedStore(SN
, DAG
);
3150 return SDValue(SN
, 0);
3154 HexagonTargetLowering::LowerUnalignedLoad(SDValue Op
, SelectionDAG
&DAG
)
3156 LoadSDNode
*LN
= cast
<LoadSDNode
>(Op
.getNode());
3157 MVT LoadTy
= ty(Op
);
3158 unsigned NeedAlign
= Subtarget
.getTypeAlignment(LoadTy
).value();
3159 unsigned HaveAlign
= LN
->getAlign().value();
3160 if (HaveAlign
>= NeedAlign
)
3163 const SDLoc
&dl(Op
);
3164 const DataLayout
&DL
= DAG
.getDataLayout();
3165 LLVMContext
&Ctx
= *DAG
.getContext();
3167 // If the load aligning is disabled or the load can be broken up into two
3168 // smaller legal loads, do the default (target-independent) expansion.
3169 bool DoDefault
= false;
3170 // Handle it in the default way if this is an indexed load.
3171 if (!LN
->isUnindexed())
3175 if (allowsMemoryAccessForAlignment(Ctx
, DL
, LN
->getMemoryVT(),
3176 *LN
->getMemOperand()))
3180 if (!DoDefault
&& (2 * HaveAlign
) == NeedAlign
) {
3181 // The PartTy is the equivalent of "getLoadableTypeOfSize(HaveAlign)".
3182 MVT PartTy
= HaveAlign
<= 8 ? MVT::getIntegerVT(8 * HaveAlign
)
3183 : MVT::getVectorVT(MVT::i8
, HaveAlign
);
3185 allowsMemoryAccessForAlignment(Ctx
, DL
, PartTy
, *LN
->getMemOperand());
3188 std::pair
<SDValue
, SDValue
> P
= expandUnalignedLoad(LN
, DAG
);
3189 return DAG
.getMergeValues({P
.first
, P
.second
}, dl
);
3192 // The code below generates two loads, both aligned as NeedAlign, and
3193 // with the distance of NeedAlign between them. For that to cover the
3194 // bits that need to be loaded (and without overlapping), the size of
3195 // the loads should be equal to NeedAlign. This is true for all loadable
3196 // types, but add an assertion in case something changes in the future.
3197 assert(LoadTy
.getSizeInBits() == 8*NeedAlign
);
3199 unsigned LoadLen
= NeedAlign
;
3200 SDValue Base
= LN
->getBasePtr();
3201 SDValue Chain
= LN
->getChain();
3202 auto BO
= getBaseAndOffset(Base
);
3203 unsigned BaseOpc
= BO
.first
.getOpcode();
3204 if (BaseOpc
== HexagonISD::VALIGNADDR
&& BO
.second
% LoadLen
== 0)
3207 if (BO
.second
% LoadLen
!= 0) {
3208 BO
.first
= DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, BO
.first
,
3209 DAG
.getConstant(BO
.second
% LoadLen
, dl
, MVT::i32
));
3210 BO
.second
-= BO
.second
% LoadLen
;
3212 SDValue BaseNoOff
= (BaseOpc
!= HexagonISD::VALIGNADDR
)
3213 ? DAG
.getNode(HexagonISD::VALIGNADDR
, dl
, MVT::i32
, BO
.first
,
3214 DAG
.getConstant(NeedAlign
, dl
, MVT::i32
))
3217 DAG
.getMemBasePlusOffset(BaseNoOff
, TypeSize::Fixed(BO
.second
), dl
);
3218 SDValue Base1
= DAG
.getMemBasePlusOffset(
3219 BaseNoOff
, TypeSize::Fixed(BO
.second
+ LoadLen
), dl
);
3221 MachineMemOperand
*WideMMO
= nullptr;
3222 if (MachineMemOperand
*MMO
= LN
->getMemOperand()) {
3223 MachineFunction
&MF
= DAG
.getMachineFunction();
3224 WideMMO
= MF
.getMachineMemOperand(
3225 MMO
->getPointerInfo(), MMO
->getFlags(), 2 * LoadLen
, Align(LoadLen
),
3226 MMO
->getAAInfo(), MMO
->getRanges(), MMO
->getSyncScopeID(),
3227 MMO
->getSuccessOrdering(), MMO
->getFailureOrdering());
3230 SDValue Load0
= DAG
.getLoad(LoadTy
, dl
, Chain
, Base0
, WideMMO
);
3231 SDValue Load1
= DAG
.getLoad(LoadTy
, dl
, Chain
, Base1
, WideMMO
);
3233 SDValue Aligned
= DAG
.getNode(HexagonISD::VALIGN
, dl
, LoadTy
,
3234 {Load1
, Load0
, BaseNoOff
.getOperand(0)});
3235 SDValue NewChain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
3236 Load0
.getValue(1), Load1
.getValue(1));
3237 SDValue M
= DAG
.getMergeValues({Aligned
, NewChain
}, dl
);
3242 HexagonTargetLowering::LowerUAddSubO(SDValue Op
, SelectionDAG
&DAG
) const {
3243 SDValue X
= Op
.getOperand(0), Y
= Op
.getOperand(1);
3244 auto *CY
= dyn_cast
<ConstantSDNode
>(Y
);
3248 const SDLoc
&dl(Op
);
3249 SDVTList VTs
= Op
.getNode()->getVTList();
3250 assert(VTs
.NumVTs
== 2);
3251 assert(VTs
.VTs
[1] == MVT::i1
);
3252 unsigned Opc
= Op
.getOpcode();
3255 uint32_t VY
= CY
->getZExtValue();
3256 assert(VY
!= 0 && "This should have been folded");
3261 if (Opc
== ISD::UADDO
) {
3262 SDValue Op
= DAG
.getNode(ISD::ADD
, dl
, VTs
.VTs
[0], {X
, Y
});
3263 SDValue Ov
= DAG
.getSetCC(dl
, MVT::i1
, Op
, getZero(dl
, ty(Op
), DAG
),
3265 return DAG
.getMergeValues({Op
, Ov
}, dl
);
3267 if (Opc
== ISD::USUBO
) {
3268 SDValue Op
= DAG
.getNode(ISD::SUB
, dl
, VTs
.VTs
[0], {X
, Y
});
3269 SDValue Ov
= DAG
.getSetCC(dl
, MVT::i1
, Op
,
3270 DAG
.getConstant(-1, dl
, ty(Op
)), ISD::SETEQ
);
3271 return DAG
.getMergeValues({Op
, Ov
}, dl
);
3278 SDValue
HexagonTargetLowering::LowerUAddSubOCarry(SDValue Op
,
3279 SelectionDAG
&DAG
) const {
3280 const SDLoc
&dl(Op
);
3281 unsigned Opc
= Op
.getOpcode();
3282 SDValue X
= Op
.getOperand(0), Y
= Op
.getOperand(1), C
= Op
.getOperand(2);
3284 if (Opc
== ISD::UADDO_CARRY
)
3285 return DAG
.getNode(HexagonISD::ADDC
, dl
, Op
.getNode()->getVTList(),
3288 EVT CarryTy
= C
.getValueType();
3289 SDValue SubC
= DAG
.getNode(HexagonISD::SUBC
, dl
, Op
.getNode()->getVTList(),
3290 { X
, Y
, DAG
.getLogicalNOT(dl
, C
, CarryTy
) });
3291 SDValue Out
[] = { SubC
.getValue(0),
3292 DAG
.getLogicalNOT(dl
, SubC
.getValue(1), CarryTy
) };
3293 return DAG
.getMergeValues(Out
, dl
);
3297 HexagonTargetLowering::LowerEH_RETURN(SDValue Op
, SelectionDAG
&DAG
) const {
3298 SDValue Chain
= Op
.getOperand(0);
3299 SDValue Offset
= Op
.getOperand(1);
3300 SDValue Handler
= Op
.getOperand(2);
3302 auto PtrVT
= getPointerTy(DAG
.getDataLayout());
3304 // Mark function as containing a call to EH_RETURN.
3305 HexagonMachineFunctionInfo
*FuncInfo
=
3306 DAG
.getMachineFunction().getInfo
<HexagonMachineFunctionInfo
>();
3307 FuncInfo
->setHasEHReturn();
3309 unsigned OffsetReg
= Hexagon::R28
;
3312 DAG
.getNode(ISD::ADD
, dl
, PtrVT
, DAG
.getRegister(Hexagon::R30
, PtrVT
),
3313 DAG
.getIntPtrConstant(4, dl
));
3314 Chain
= DAG
.getStore(Chain
, dl
, Handler
, StoreAddr
, MachinePointerInfo());
3315 Chain
= DAG
.getCopyToReg(Chain
, dl
, OffsetReg
, Offset
);
3317 // Not needed we already use it as explict input to EH_RETURN.
3318 // MF.getRegInfo().addLiveOut(OffsetReg);
3320 return DAG
.getNode(HexagonISD::EH_RETURN
, dl
, MVT::Other
, Chain
);
3324 HexagonTargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const {
3325 unsigned Opc
= Op
.getOpcode();
3327 // Handle INLINEASM first.
3328 if (Opc
== ISD::INLINEASM
|| Opc
== ISD::INLINEASM_BR
)
3329 return LowerINLINEASM(Op
, DAG
);
3331 if (isHvxOperation(Op
.getNode(), DAG
)) {
3332 // If HVX lowering returns nothing, try the default lowering.
3333 if (SDValue V
= LowerHvxOperation(Op
, DAG
))
3340 Op
.getNode()->dumpr(&DAG
);
3341 if (Opc
> HexagonISD::OP_BEGIN
&& Opc
< HexagonISD::OP_END
)
3342 errs() << "Error: check for a non-legal type in this operation\n";
3344 llvm_unreachable("Should not custom lower this!");
3345 case ISD::CONCAT_VECTORS
: return LowerCONCAT_VECTORS(Op
, DAG
);
3346 case ISD::INSERT_SUBVECTOR
: return LowerINSERT_SUBVECTOR(Op
, DAG
);
3347 case ISD::INSERT_VECTOR_ELT
: return LowerINSERT_VECTOR_ELT(Op
, DAG
);
3348 case ISD::EXTRACT_SUBVECTOR
: return LowerEXTRACT_SUBVECTOR(Op
, DAG
);
3349 case ISD::EXTRACT_VECTOR_ELT
: return LowerEXTRACT_VECTOR_ELT(Op
, DAG
);
3350 case ISD::BUILD_VECTOR
: return LowerBUILD_VECTOR(Op
, DAG
);
3351 case ISD::VECTOR_SHUFFLE
: return LowerVECTOR_SHUFFLE(Op
, DAG
);
3352 case ISD::BITCAST
: return LowerBITCAST(Op
, DAG
);
3353 case ISD::LOAD
: return LowerLoad(Op
, DAG
);
3354 case ISD::STORE
: return LowerStore(Op
, DAG
);
3356 case ISD::USUBO
: return LowerUAddSubO(Op
, DAG
);
3357 case ISD::UADDO_CARRY
:
3358 case ISD::USUBO_CARRY
: return LowerUAddSubOCarry(Op
, DAG
);
3361 case ISD::SRL
: return LowerVECTOR_SHIFT(Op
, DAG
);
3362 case ISD::ROTL
: return LowerROTL(Op
, DAG
);
3363 case ISD::ConstantPool
: return LowerConstantPool(Op
, DAG
);
3364 case ISD::JumpTable
: return LowerJumpTable(Op
, DAG
);
3365 case ISD::EH_RETURN
: return LowerEH_RETURN(Op
, DAG
);
3366 case ISD::RETURNADDR
: return LowerRETURNADDR(Op
, DAG
);
3367 case ISD::FRAMEADDR
: return LowerFRAMEADDR(Op
, DAG
);
3368 case ISD::GlobalTLSAddress
: return LowerGlobalTLSAddress(Op
, DAG
);
3369 case ISD::ATOMIC_FENCE
: return LowerATOMIC_FENCE(Op
, DAG
);
3370 case ISD::GlobalAddress
: return LowerGLOBALADDRESS(Op
, DAG
);
3371 case ISD::BlockAddress
: return LowerBlockAddress(Op
, DAG
);
3372 case ISD::GLOBAL_OFFSET_TABLE
: return LowerGLOBAL_OFFSET_TABLE(Op
, DAG
);
3373 case ISD::VACOPY
: return LowerVACOPY(Op
, DAG
);
3374 case ISD::VASTART
: return LowerVASTART(Op
, DAG
);
3375 case ISD::DYNAMIC_STACKALLOC
: return LowerDYNAMIC_STACKALLOC(Op
, DAG
);
3376 case ISD::SETCC
: return LowerSETCC(Op
, DAG
);
3377 case ISD::VSELECT
: return LowerVSELECT(Op
, DAG
);
3378 case ISD::INTRINSIC_WO_CHAIN
: return LowerINTRINSIC_WO_CHAIN(Op
, DAG
);
3379 case ISD::INTRINSIC_VOID
: return LowerINTRINSIC_VOID(Op
, DAG
);
3380 case ISD::PREFETCH
: return LowerPREFETCH(Op
, DAG
);
3381 case ISD::READCYCLECOUNTER
: return LowerREADCYCLECOUNTER(Op
, DAG
);
3389 HexagonTargetLowering::LowerOperationWrapper(SDNode
*N
,
3390 SmallVectorImpl
<SDValue
> &Results
,
3391 SelectionDAG
&DAG
) const {
3392 if (isHvxOperation(N
, DAG
)) {
3393 LowerHvxOperationWrapper(N
, Results
, DAG
);
3394 if (!Results
.empty())
3399 unsigned Opc
= N
->getOpcode();
3402 case HexagonISD::SSAT
:
3403 case HexagonISD::USAT
:
3404 Results
.push_back(opJoin(SplitVectorOp(Op
, DAG
), SDLoc(Op
), DAG
));
3407 // We are only custom-lowering stores to verify the alignment of the
3408 // address if it is a compile-time constant. Since a store can be
3409 // modified during type-legalization (the value being stored may need
3410 // legalization), return empty Results here to indicate that we don't
3411 // really make any changes in the custom lowering.
3414 TargetLowering::LowerOperationWrapper(N
, Results
, DAG
);
3420 HexagonTargetLowering::ReplaceNodeResults(SDNode
*N
,
3421 SmallVectorImpl
<SDValue
> &Results
,
3422 SelectionDAG
&DAG
) const {
3423 if (isHvxOperation(N
, DAG
)) {
3424 ReplaceHvxNodeResults(N
, Results
, DAG
);
3425 if (!Results
.empty())
3430 switch (N
->getOpcode()) {
3436 // Handle a bitcast from v8i1 to i8.
3437 if (N
->getValueType(0) == MVT::i8
) {
3438 if (N
->getOperand(0).getValueType() == MVT::v8i1
) {
3439 SDValue P
= getInstr(Hexagon::C2_tfrpr
, dl
, MVT::i32
,
3440 N
->getOperand(0), DAG
);
3441 SDValue T
= DAG
.getAnyExtOrTrunc(P
, dl
, MVT::i8
);
3442 Results
.push_back(T
);
3450 HexagonTargetLowering::PerformDAGCombine(SDNode
*N
,
3451 DAGCombinerInfo
&DCI
) const {
3452 if (isHvxOperation(N
, DCI
.DAG
)) {
3453 if (SDValue V
= PerformHvxDAGCombine(N
, DCI
))
3459 const SDLoc
&dl(Op
);
3460 unsigned Opc
= Op
.getOpcode();
3462 if (Opc
== ISD::TRUNCATE
) {
3463 SDValue Op0
= Op
.getOperand(0);
3464 // fold (truncate (build pair x, y)) -> (truncate x) or x
3465 if (Op0
.getOpcode() == ISD::BUILD_PAIR
) {
3466 EVT TruncTy
= Op
.getValueType();
3467 SDValue Elem0
= Op0
.getOperand(0);
3468 // if we match the low element of the pair, just return it.
3469 if (Elem0
.getValueType() == TruncTy
)
3471 // otherwise, if the low part is still too large, apply the truncate.
3472 if (Elem0
.getValueType().bitsGT(TruncTy
))
3473 return DCI
.DAG
.getNode(ISD::TRUNCATE
, dl
, TruncTy
, Elem0
);
3477 if (DCI
.isBeforeLegalizeOps())
3480 if (Opc
== HexagonISD::P2D
) {
3481 SDValue P
= Op
.getOperand(0);
3482 switch (P
.getOpcode()) {
3483 case HexagonISD::PTRUE
:
3484 return DCI
.DAG
.getConstant(-1, dl
, ty(Op
));
3485 case HexagonISD::PFALSE
:
3486 return getZero(dl
, ty(Op
), DCI
.DAG
);
3490 } else if (Opc
== ISD::VSELECT
) {
3491 // This is pretty much duplicated in HexagonISelLoweringHVX...
3493 // (vselect (xor x, ptrue), v0, v1) -> (vselect x, v1, v0)
3494 SDValue Cond
= Op
.getOperand(0);
3495 if (Cond
->getOpcode() == ISD::XOR
) {
3496 SDValue C0
= Cond
.getOperand(0), C1
= Cond
.getOperand(1);
3497 if (C1
->getOpcode() == HexagonISD::PTRUE
) {
3498 SDValue VSel
= DCI
.DAG
.getNode(ISD::VSELECT
, dl
, ty(Op
), C0
,
3499 Op
.getOperand(2), Op
.getOperand(1));
3503 } else if (Opc
== ISD::TRUNCATE
) {
3504 SDValue Op0
= Op
.getOperand(0);
3505 // fold (truncate (build pair x, y)) -> (truncate x) or x
3506 if (Op0
.getOpcode() == ISD::BUILD_PAIR
) {
3507 MVT TruncTy
= ty(Op
);
3508 SDValue Elem0
= Op0
.getOperand(0);
3509 // if we match the low element of the pair, just return it.
3510 if (ty(Elem0
) == TruncTy
)
3512 // otherwise, if the low part is still too large, apply the truncate.
3513 if (ty(Elem0
).bitsGT(TruncTy
))
3514 return DCI
.DAG
.getNode(ISD::TRUNCATE
, dl
, TruncTy
, Elem0
);
3516 } else if (Opc
== ISD::OR
) {
3517 // fold (or (shl xx, s), (zext y)) -> (COMBINE (shl xx, s-32), y)
3519 auto fold0
= [&, this](SDValue Op
) {
3520 if (ty(Op
) != MVT::i64
)
3522 SDValue Shl
= Op
.getOperand(0);
3523 SDValue Zxt
= Op
.getOperand(1);
3524 if (Shl
.getOpcode() != ISD::SHL
)
3525 std::swap(Shl
, Zxt
);
3527 if (Shl
.getOpcode() != ISD::SHL
|| Zxt
.getOpcode() != ISD::ZERO_EXTEND
)
3530 SDValue Z
= Zxt
.getOperand(0);
3531 auto *Amt
= dyn_cast
<ConstantSDNode
>(Shl
.getOperand(1));
3532 if (Amt
&& Amt
->getZExtValue() >= 32 && ty(Z
).getSizeInBits() <= 32) {
3533 unsigned A
= Amt
->getZExtValue();
3534 SDValue S
= Shl
.getOperand(0);
3535 SDValue T0
= DCI
.DAG
.getNode(ISD::SHL
, dl
, ty(S
), S
,
3536 DCI
.DAG
.getConstant(32 - A
, dl
, MVT::i32
));
3537 SDValue T1
= DCI
.DAG
.getZExtOrTrunc(T0
, dl
, MVT::i32
);
3538 SDValue T2
= DCI
.DAG
.getZExtOrTrunc(Z
, dl
, MVT::i32
);
3539 return DCI
.DAG
.getNode(HexagonISD::COMBINE
, dl
, MVT::i64
, {T1
, T2
});
3544 if (SDValue R
= fold0(Op
))
3551 /// Returns relocation base for the given PIC jumptable.
3553 HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table
,
3554 SelectionDAG
&DAG
) const {
3555 int Idx
= cast
<JumpTableSDNode
>(Table
)->getIndex();
3556 EVT VT
= Table
.getValueType();
3557 SDValue T
= DAG
.getTargetJumpTable(Idx
, VT
, HexagonII::MO_PCREL
);
3558 return DAG
.getNode(HexagonISD::AT_PCREL
, SDLoc(Table
), VT
, T
);
3561 //===----------------------------------------------------------------------===//
3562 // Inline Assembly Support
3563 //===----------------------------------------------------------------------===//
3565 TargetLowering::ConstraintType
3566 HexagonTargetLowering::getConstraintType(StringRef Constraint
) const {
3567 if (Constraint
.size() == 1) {
3568 switch (Constraint
[0]) {
3571 if (Subtarget
.useHVXOps())
3572 return C_RegisterClass
;
3575 return C_RegisterClass
;
3580 return TargetLowering::getConstraintType(Constraint
);
3583 std::pair
<unsigned, const TargetRegisterClass
*>
3584 HexagonTargetLowering::getRegForInlineAsmConstraint(
3585 const TargetRegisterInfo
*TRI
, StringRef Constraint
, MVT VT
) const {
3587 if (Constraint
.size() == 1) {
3588 switch (Constraint
[0]) {
3590 switch (VT
.SimpleTy
) {
3592 return {0u, nullptr};
3598 return {0u, &Hexagon::IntRegsRegClass
};
3601 return {0u, &Hexagon::DoubleRegsRegClass
};
3606 return {0u, nullptr};
3607 return {0u, &Hexagon::ModRegsRegClass
};
3609 switch (VT
.getSizeInBits()) {
3611 return {0u, nullptr};
3614 return {0u, &Hexagon::HvxQRRegClass
};
3618 switch (VT
.getSizeInBits()) {
3620 return {0u, nullptr};
3622 return {0u, &Hexagon::HvxVRRegClass
};
3624 if (Subtarget
.hasV60Ops() && Subtarget
.useHVX128BOps())
3625 return {0u, &Hexagon::HvxVRRegClass
};
3626 return {0u, &Hexagon::HvxWRRegClass
};
3628 return {0u, &Hexagon::HvxWRRegClass
};
3632 return {0u, nullptr};
3636 return TargetLowering::getRegForInlineAsmConstraint(TRI
, Constraint
, VT
);
3639 /// isFPImmLegal - Returns true if the target can instruction select the
3640 /// specified FP immediate natively. If false, the legalizer will
3641 /// materialize the FP immediate as a load from a constant pool.
3642 bool HexagonTargetLowering::isFPImmLegal(const APFloat
&Imm
, EVT VT
,
3643 bool ForCodeSize
) const {
3647 /// isLegalAddressingMode - Return true if the addressing mode represented by
3648 /// AM is legal for this target, for a load/store of the specified type.
3649 bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout
&DL
,
3650 const AddrMode
&AM
, Type
*Ty
,
3651 unsigned AS
, Instruction
*I
) const {
3652 if (Ty
->isSized()) {
3653 // When LSR detects uses of the same base address to access different
3654 // types (e.g. unions), it will assume a conservative type for these
3656 // LSR Use: Kind=Address of void in addrspace(4294967295), ...
3657 // The type Ty passed here would then be "void". Skip the alignment
3658 // checks, but do not return false right away, since that confuses
3659 // LSR into crashing.
3660 Align A
= DL
.getABITypeAlign(Ty
);
3661 // The base offset must be a multiple of the alignment.
3662 if (!isAligned(A
, AM
.BaseOffs
))
3664 // The shifted offset must fit in 11 bits.
3665 if (!isInt
<11>(AM
.BaseOffs
>> Log2(A
)))
3669 // No global is ever allowed as a base.
3673 int Scale
= AM
.Scale
;
3677 case 0: // No scale reg, "r+i", "r", or just "i".
3679 default: // No scaled addressing mode.
3685 /// Return true if folding a constant offset with the given GlobalAddress is
3686 /// legal. It is frequently not legal in PIC relocation models.
3687 bool HexagonTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode
*GA
)
3689 return HTM
.getRelocationModel() == Reloc::Static
;
3692 /// isLegalICmpImmediate - Return true if the specified immediate is legal
3693 /// icmp immediate, that is the target has icmp instructions which can compare
3694 /// a register against the immediate without having to materialize the
3695 /// immediate into a register.
3696 bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm
) const {
3697 return Imm
>= -512 && Imm
<= 511;
3700 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
3701 /// for tail call optimization. Targets which want to do tail call
3702 /// optimization should implement this function.
3703 bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
3705 CallingConv::ID CalleeCC
,
3707 bool IsCalleeStructRet
,
3708 bool IsCallerStructRet
,
3709 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
3710 const SmallVectorImpl
<SDValue
> &OutVals
,
3711 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
3712 SelectionDAG
& DAG
) const {
3713 const Function
&CallerF
= DAG
.getMachineFunction().getFunction();
3714 CallingConv::ID CallerCC
= CallerF
.getCallingConv();
3715 bool CCMatch
= CallerCC
== CalleeCC
;
3717 // ***************************************************************************
3718 // Look for obvious safe cases to perform tail call optimization that do not
3719 // require ABI changes.
3720 // ***************************************************************************
3722 // If this is a tail call via a function pointer, then don't do it!
3723 if (!isa
<GlobalAddressSDNode
>(Callee
) &&
3724 !isa
<ExternalSymbolSDNode
>(Callee
)) {
3728 // Do not optimize if the calling conventions do not match and the conventions
3729 // used are not C or Fast.
3731 bool R
= (CallerCC
== CallingConv::C
|| CallerCC
== CallingConv::Fast
);
3732 bool E
= (CalleeCC
== CallingConv::C
|| CalleeCC
== CallingConv::Fast
);
3733 // If R & E, then ok.
3738 // Do not tail call optimize vararg calls.
3742 // Also avoid tail call optimization if either caller or callee uses struct
3743 // return semantics.
3744 if (IsCalleeStructRet
|| IsCallerStructRet
)
3747 // In addition to the cases above, we also disable Tail Call Optimization if
3748 // the calling convention code that at least one outgoing argument needs to
3749 // go on the stack. We cannot check that here because at this point that
3750 // information is not available.
3754 /// Returns the target specific optimal type for load and store operations as
3755 /// a result of memset, memcpy, and memmove lowering.
3757 /// If DstAlign is zero that means it's safe to destination alignment can
3758 /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
3759 /// a need to check it against alignment requirement, probably because the
3760 /// source does not need to be loaded. If 'IsMemset' is true, that means it's
3761 /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of
3762 /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
3763 /// does not need to be loaded. It returns EVT::Other if the type should be
3764 /// determined using generic target-independent logic.
3765 EVT
HexagonTargetLowering::getOptimalMemOpType(
3766 const MemOp
&Op
, const AttributeList
&FuncAttributes
) const {
3767 if (Op
.size() >= 8 && Op
.isAligned(Align(8)))
3769 if (Op
.size() >= 4 && Op
.isAligned(Align(4)))
3771 if (Op
.size() >= 2 && Op
.isAligned(Align(2)))
3776 bool HexagonTargetLowering::allowsMemoryAccess(
3777 LLVMContext
&Context
, const DataLayout
&DL
, EVT VT
, unsigned AddrSpace
,
3778 Align Alignment
, MachineMemOperand::Flags Flags
, unsigned *Fast
) const {
3779 MVT SVT
= VT
.getSimpleVT();
3780 if (Subtarget
.isHVXVectorType(SVT
, true))
3781 return allowsHvxMemoryAccess(SVT
, Flags
, Fast
);
3782 return TargetLoweringBase::allowsMemoryAccess(
3783 Context
, DL
, VT
, AddrSpace
, Alignment
, Flags
, Fast
);
3786 bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(
3787 EVT VT
, unsigned AddrSpace
, Align Alignment
, MachineMemOperand::Flags Flags
,
3788 unsigned *Fast
) const {
3789 MVT SVT
= VT
.getSimpleVT();
3790 if (Subtarget
.isHVXVectorType(SVT
, true))
3791 return allowsHvxMisalignedMemoryAccesses(SVT
, Flags
, Fast
);
3797 std::pair
<const TargetRegisterClass
*, uint8_t>
3798 HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo
*TRI
,
3800 if (Subtarget
.isHVXVectorType(VT
, true)) {
3801 unsigned BitWidth
= VT
.getSizeInBits();
3802 unsigned VecWidth
= Subtarget
.getVectorLength() * 8;
3804 if (VT
.getVectorElementType() == MVT::i1
)
3805 return std::make_pair(&Hexagon::HvxQRRegClass
, 1);
3806 if (BitWidth
== VecWidth
)
3807 return std::make_pair(&Hexagon::HvxVRRegClass
, 1);
3808 assert(BitWidth
== 2 * VecWidth
);
3809 return std::make_pair(&Hexagon::HvxWRRegClass
, 1);
3812 return TargetLowering::findRepresentativeClass(TRI
, VT
);
3815 bool HexagonTargetLowering::shouldReduceLoadWidth(SDNode
*Load
,
3816 ISD::LoadExtType ExtTy
, EVT NewVT
) const {
3817 // TODO: This may be worth removing. Check regression tests for diffs.
3818 if (!TargetLoweringBase::shouldReduceLoadWidth(Load
, ExtTy
, NewVT
))
3821 auto *L
= cast
<LoadSDNode
>(Load
);
3822 std::pair
<SDValue
,int> BO
= getBaseAndOffset(L
->getBasePtr());
3823 // Small-data object, do not shrink.
3824 if (BO
.first
.getOpcode() == HexagonISD::CONST32_GP
)
3826 if (GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(BO
.first
)) {
3827 auto &HTM
= static_cast<const HexagonTargetMachine
&>(getTargetMachine());
3828 const auto *GO
= dyn_cast_or_null
<const GlobalObject
>(GA
->getGlobal());
3829 return !GO
|| !HTM
.getObjFileLowering()->isGlobalInSmallSection(GO
, HTM
);
3834 void HexagonTargetLowering::AdjustInstrPostInstrSelection(MachineInstr
&MI
,
3835 SDNode
*Node
) const {
3836 AdjustHvxInstrPostInstrSelection(MI
, Node
);
3839 Value
*HexagonTargetLowering::emitLoadLinked(IRBuilderBase
&Builder
,
3840 Type
*ValueTy
, Value
*Addr
,
3841 AtomicOrdering Ord
) const {
3842 BasicBlock
*BB
= Builder
.GetInsertBlock();
3843 Module
*M
= BB
->getParent()->getParent();
3844 unsigned SZ
= ValueTy
->getPrimitiveSizeInBits();
3845 assert((SZ
== 32 || SZ
== 64) && "Only 32/64-bit atomic loads supported");
3846 Intrinsic::ID IntID
= (SZ
== 32) ? Intrinsic::hexagon_L2_loadw_locked
3847 : Intrinsic::hexagon_L4_loadd_locked
;
3848 Function
*Fn
= Intrinsic::getDeclaration(M
, IntID
);
3850 auto PtrTy
= cast
<PointerType
>(Addr
->getType());
3851 PointerType
*NewPtrTy
=
3852 Builder
.getIntNTy(SZ
)->getPointerTo(PtrTy
->getAddressSpace());
3853 Addr
= Builder
.CreateBitCast(Addr
, NewPtrTy
);
3855 Value
*Call
= Builder
.CreateCall(Fn
, Addr
, "larx");
3857 return Builder
.CreateBitCast(Call
, ValueTy
);
3860 /// Perform a store-conditional operation to Addr. Return the status of the
3861 /// store. This should be 0 if the store succeeded, non-zero otherwise.
3862 Value
*HexagonTargetLowering::emitStoreConditional(IRBuilderBase
&Builder
,
3863 Value
*Val
, Value
*Addr
,
3864 AtomicOrdering Ord
) const {
3865 BasicBlock
*BB
= Builder
.GetInsertBlock();
3866 Module
*M
= BB
->getParent()->getParent();
3867 Type
*Ty
= Val
->getType();
3868 unsigned SZ
= Ty
->getPrimitiveSizeInBits();
3870 Type
*CastTy
= Builder
.getIntNTy(SZ
);
3871 assert((SZ
== 32 || SZ
== 64) && "Only 32/64-bit atomic stores supported");
3872 Intrinsic::ID IntID
= (SZ
== 32) ? Intrinsic::hexagon_S2_storew_locked
3873 : Intrinsic::hexagon_S4_stored_locked
;
3874 Function
*Fn
= Intrinsic::getDeclaration(M
, IntID
);
3876 unsigned AS
= Addr
->getType()->getPointerAddressSpace();
3877 Addr
= Builder
.CreateBitCast(Addr
, CastTy
->getPointerTo(AS
));
3878 Val
= Builder
.CreateBitCast(Val
, CastTy
);
3880 Value
*Call
= Builder
.CreateCall(Fn
, {Addr
, Val
}, "stcx");
3881 Value
*Cmp
= Builder
.CreateICmpEQ(Call
, Builder
.getInt32(0), "");
3882 Value
*Ext
= Builder
.CreateZExt(Cmp
, Type::getInt32Ty(M
->getContext()));
3886 TargetLowering::AtomicExpansionKind
3887 HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst
*LI
) const {
3888 // Do not expand loads and stores that don't exceed 64 bits.
3889 return LI
->getType()->getPrimitiveSizeInBits() > 64
3890 ? AtomicExpansionKind::LLOnly
3891 : AtomicExpansionKind::None
;
3894 TargetLowering::AtomicExpansionKind
3895 HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst
*SI
) const {
3896 // Do not expand loads and stores that don't exceed 64 bits.
3897 return SI
->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64
3898 ? AtomicExpansionKind::Expand
3899 : AtomicExpansionKind::None
;
3902 TargetLowering::AtomicExpansionKind
3903 HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
3904 AtomicCmpXchgInst
*AI
) const {
3905 return AtomicExpansionKind::LLSC
;