[AMDGPU] New gfx940 mfma instructions
[llvm-project.git] / llvm / lib / Target / AArch64 / AArch64SelectionDAGInfo.cpp
blob893269c1a7efd39411f2b4a86fb0f1acbcb27286
1 //===-- AArch64SelectionDAGInfo.cpp - AArch64 SelectionDAG Info -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the AArch64SelectionDAGInfo class.
11 //===----------------------------------------------------------------------===//
13 #include "AArch64TargetMachine.h"
14 using namespace llvm;
16 #define DEBUG_TYPE "aarch64-selectiondag-info"
18 SDValue AArch64SelectionDAGInfo::EmitMOPS(AArch64ISD::NodeType SDOpcode,
19 SelectionDAG &DAG, const SDLoc &DL,
20 SDValue Chain, SDValue Dst,
21 SDValue SrcOrValue, SDValue Size,
22 Align Alignment, bool isVolatile,
23 MachinePointerInfo DstPtrInfo,
24 MachinePointerInfo SrcPtrInfo) const {
26 // Get the constant size of the copy/set.
27 uint64_t ConstSize = 0;
28 if (auto *C = dyn_cast<ConstantSDNode>(Size))
29 ConstSize = C->getZExtValue();
31 const bool IsSet = SDOpcode == AArch64ISD::MOPS_MEMSET ||
32 SDOpcode == AArch64ISD::MOPS_MEMSET_TAGGING;
34 const auto MachineOpcode = [&]() {
35 switch (SDOpcode) {
36 case AArch64ISD::MOPS_MEMSET:
37 return AArch64::MOPSMemorySetPseudo;
38 case AArch64ISD::MOPS_MEMSET_TAGGING:
39 return AArch64::MOPSMemorySetTaggingPseudo;
40 case AArch64ISD::MOPS_MEMCOPY:
41 return AArch64::MOPSMemoryCopyPseudo;
42 case AArch64ISD::MOPS_MEMMOVE:
43 return AArch64::MOPSMemoryMovePseudo;
44 default:
45 llvm_unreachable("Unhandled MOPS ISD Opcode");
47 }();
49 MachineMemOperand::Flags Flags = MachineMemOperand::MOStore;
50 if (isVolatile)
51 Flags |= MachineMemOperand::MOVolatile;
52 if (!IsSet)
53 Flags |= MachineMemOperand::MOLoad;
55 MachineFunction &MF = DAG.getMachineFunction();
57 auto *DstOp =
58 MF.getMachineMemOperand(DstPtrInfo, Flags, ConstSize, Alignment);
59 auto *SrcOp =
60 MF.getMachineMemOperand(SrcPtrInfo, Flags, ConstSize, Alignment);
62 if (IsSet) {
63 // Extend value to i64 if required
64 if (SrcOrValue.getValueType() != MVT::i64)
65 SrcOrValue = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, SrcOrValue);
66 SDValue Ops[] = {Dst, Size, SrcOrValue, Chain};
67 const EVT ResultTys[] = {MVT::i64, MVT::i64, MVT::Other};
68 MachineSDNode *Node = DAG.getMachineNode(MachineOpcode, DL, ResultTys, Ops);
69 DAG.setNodeMemRefs(Node, {DstOp});
70 return SDValue(Node, 2);
71 } else {
72 SDValue Ops[] = {Dst, SrcOrValue, Size, Chain};
73 const EVT ResultTys[] = {MVT::i64, MVT::i64, MVT::i64, MVT::Other};
74 MachineSDNode *Node = DAG.getMachineNode(MachineOpcode, DL, ResultTys, Ops);
75 DAG.setNodeMemRefs(Node, {DstOp, SrcOp});
76 return SDValue(Node, 3);
80 SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemcpy(
81 SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src,
82 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
83 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
84 const AArch64Subtarget &STI =
85 DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
86 if (STI.hasMOPS())
87 return EmitMOPS(AArch64ISD::MOPS_MEMCOPY, DAG, DL, Chain, Dst, Src, Size,
88 Alignment, isVolatile, DstPtrInfo, SrcPtrInfo);
89 return SDValue();
92 SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
93 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
94 SDValue Size, Align Alignment, bool isVolatile,
95 MachinePointerInfo DstPtrInfo) const {
96 const AArch64Subtarget &STI =
97 DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
99 if (STI.hasMOPS()) {
100 return EmitMOPS(AArch64ISD::MOPS_MEMSET, DAG, dl, Chain, Dst, Src, Size,
101 Alignment, isVolatile, DstPtrInfo, MachinePointerInfo{});
104 // Check to see if there is a specialized entry-point for memory zeroing.
105 ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
106 ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size);
107 const char *bzeroName =
108 (V && V->isZero())
109 ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO)
110 : nullptr;
111 // For small size (< 256), it is not beneficial to use bzero
112 // instead of memset.
113 if (bzeroName && (!SizeValue || SizeValue->getZExtValue() > 256)) {
114 const AArch64TargetLowering &TLI = *STI.getTargetLowering();
116 EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
117 Type *IntPtrTy = Type::getInt8PtrTy(*DAG.getContext());
118 TargetLowering::ArgListTy Args;
119 TargetLowering::ArgListEntry Entry;
120 Entry.Node = Dst;
121 Entry.Ty = IntPtrTy;
122 Args.push_back(Entry);
123 Entry.Node = Size;
124 Args.push_back(Entry);
125 TargetLowering::CallLoweringInfo CLI(DAG);
126 CLI.setDebugLoc(dl)
127 .setChain(Chain)
128 .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
129 DAG.getExternalSymbol(bzeroName, IntPtr),
130 std::move(Args))
131 .setDiscardResult();
132 std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
133 return CallResult.second;
135 return SDValue();
138 SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemmove(
139 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
140 SDValue Size, Align Alignment, bool isVolatile,
141 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
142 const AArch64Subtarget &STI =
143 DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
144 if (STI.hasMOPS()) {
145 return EmitMOPS(AArch64ISD::MOPS_MEMMOVE, DAG, dl, Chain, Dst, Src, Size,
146 Alignment, isVolatile, DstPtrInfo, SrcPtrInfo);
148 return SDValue();
151 static const int kSetTagLoopThreshold = 176;
153 static SDValue EmitUnrolledSetTag(SelectionDAG &DAG, const SDLoc &dl,
154 SDValue Chain, SDValue Ptr, uint64_t ObjSize,
155 const MachineMemOperand *BaseMemOperand,
156 bool ZeroData) {
157 MachineFunction &MF = DAG.getMachineFunction();
158 unsigned ObjSizeScaled = ObjSize / 16;
160 SDValue TagSrc = Ptr;
161 if (Ptr.getOpcode() == ISD::FrameIndex) {
162 int FI = cast<FrameIndexSDNode>(Ptr)->getIndex();
163 Ptr = DAG.getTargetFrameIndex(FI, MVT::i64);
164 // A frame index operand may end up as [SP + offset] => it is fine to use SP
165 // register as the tag source.
166 TagSrc = DAG.getRegister(AArch64::SP, MVT::i64);
169 const unsigned OpCode1 = ZeroData ? AArch64ISD::STZG : AArch64ISD::STG;
170 const unsigned OpCode2 = ZeroData ? AArch64ISD::STZ2G : AArch64ISD::ST2G;
172 SmallVector<SDValue, 8> OutChains;
173 unsigned OffsetScaled = 0;
174 while (OffsetScaled < ObjSizeScaled) {
175 if (ObjSizeScaled - OffsetScaled >= 2) {
176 SDValue AddrNode =
177 DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(OffsetScaled * 16), dl);
178 SDValue St = DAG.getMemIntrinsicNode(
179 OpCode2, dl, DAG.getVTList(MVT::Other),
180 {Chain, TagSrc, AddrNode},
181 MVT::v4i64,
182 MF.getMachineMemOperand(BaseMemOperand, OffsetScaled * 16, 16 * 2));
183 OffsetScaled += 2;
184 OutChains.push_back(St);
185 continue;
188 if (ObjSizeScaled - OffsetScaled > 0) {
189 SDValue AddrNode =
190 DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(OffsetScaled * 16), dl);
191 SDValue St = DAG.getMemIntrinsicNode(
192 OpCode1, dl, DAG.getVTList(MVT::Other),
193 {Chain, TagSrc, AddrNode},
194 MVT::v2i64,
195 MF.getMachineMemOperand(BaseMemOperand, OffsetScaled * 16, 16));
196 OffsetScaled += 1;
197 OutChains.push_back(St);
201 SDValue Res = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
202 return Res;
205 SDValue AArch64SelectionDAGInfo::EmitTargetCodeForSetTag(
206 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Addr,
207 SDValue Size, MachinePointerInfo DstPtrInfo, bool ZeroData) const {
208 uint64_t ObjSize = cast<ConstantSDNode>(Size)->getZExtValue();
209 assert(ObjSize % 16 == 0);
211 MachineFunction &MF = DAG.getMachineFunction();
212 MachineMemOperand *BaseMemOperand = MF.getMachineMemOperand(
213 DstPtrInfo, MachineMemOperand::MOStore, ObjSize, Align(16));
215 bool UseSetTagRangeLoop =
216 kSetTagLoopThreshold >= 0 && (int)ObjSize >= kSetTagLoopThreshold;
217 if (!UseSetTagRangeLoop)
218 return EmitUnrolledSetTag(DAG, dl, Chain, Addr, ObjSize, BaseMemOperand,
219 ZeroData);
221 const EVT ResTys[] = {MVT::i64, MVT::i64, MVT::Other};
223 unsigned Opcode;
224 if (Addr.getOpcode() == ISD::FrameIndex) {
225 int FI = cast<FrameIndexSDNode>(Addr)->getIndex();
226 Addr = DAG.getTargetFrameIndex(FI, MVT::i64);
227 Opcode = ZeroData ? AArch64::STZGloop : AArch64::STGloop;
228 } else {
229 Opcode = ZeroData ? AArch64::STZGloop_wback : AArch64::STGloop_wback;
231 SDValue Ops[] = {DAG.getTargetConstant(ObjSize, dl, MVT::i64), Addr, Chain};
232 SDNode *St = DAG.getMachineNode(Opcode, dl, ResTys, Ops);
234 DAG.setNodeMemRefs(cast<MachineSDNode>(St), {BaseMemOperand});
235 return SDValue(St, 2);