1 //===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the ARMSelectionDAGInfo class.
11 //===----------------------------------------------------------------------===//
13 #include "ARMTargetMachine.h"
14 #include "llvm/CodeGen/SelectionDAG.h"
15 #include "llvm/IR/DerivedTypes.h"
18 #define DEBUG_TYPE "arm-selectiondag-info"
20 // Emit, if possible, a specialized version of the given Libcall. Typically this
21 // means selecting the appropriately aligned version, but we also convert memset
23 SDValue
ARMSelectionDAGInfo::EmitSpecializedLibcall(
24 SelectionDAG
&DAG
, const SDLoc
&dl
, SDValue Chain
, SDValue Dst
, SDValue Src
,
25 SDValue Size
, unsigned Align
, RTLIB::Libcall LC
) const {
26 const ARMSubtarget
&Subtarget
=
27 DAG
.getMachineFunction().getSubtarget
<ARMSubtarget
>();
28 const ARMTargetLowering
*TLI
= Subtarget
.getTargetLowering();
30 // Only use a specialized AEABI function if the default version of this
31 // Libcall is an AEABI function.
32 if (std::strncmp(TLI
->getLibcallName(LC
), "__aeabi", 7) != 0)
35 // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
36 // able to translate memset to memclr and use the value to index the function
46 AEABILibcall
= AEABI_MEMCPY
;
49 AEABILibcall
= AEABI_MEMMOVE
;
52 AEABILibcall
= AEABI_MEMSET
;
53 if (ConstantSDNode
*ConstantSrc
= dyn_cast
<ConstantSDNode
>(Src
))
54 if (ConstantSrc
->getZExtValue() == 0)
55 AEABILibcall
= AEABI_MEMCLR
;
61 // Choose the most-aligned libcall variant that we can
68 AlignVariant
= ALIGN8
;
69 else if ((Align
& 3) == 0)
70 AlignVariant
= ALIGN4
;
72 AlignVariant
= ALIGN1
;
74 TargetLowering::ArgListTy Args
;
75 TargetLowering::ArgListEntry Entry
;
76 Entry
.Ty
= DAG
.getDataLayout().getIntPtrType(*DAG
.getContext());
78 Args
.push_back(Entry
);
79 if (AEABILibcall
== AEABI_MEMCLR
) {
81 Args
.push_back(Entry
);
82 } else if (AEABILibcall
== AEABI_MEMSET
) {
83 // Adjust parameters for memset, EABI uses format (ptr, size, value),
84 // GNU library uses (ptr, value, size)
85 // See RTABI section 4.3.4
87 Args
.push_back(Entry
);
89 // Extend or truncate the argument to be an i32 value for the call.
90 if (Src
.getValueType().bitsGT(MVT::i32
))
91 Src
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i32
, Src
);
92 else if (Src
.getValueType().bitsLT(MVT::i32
))
93 Src
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, MVT::i32
, Src
);
96 Entry
.Ty
= Type::getInt32Ty(*DAG
.getContext());
98 Args
.push_back(Entry
);
101 Args
.push_back(Entry
);
104 Args
.push_back(Entry
);
107 char const *FunctionNames
[4][3] = {
108 { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" },
109 { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
110 { "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" },
111 { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" }
113 TargetLowering::CallLoweringInfo
CLI(DAG
);
117 TLI
->getLibcallCallingConv(LC
), Type::getVoidTy(*DAG
.getContext()),
118 DAG
.getExternalSymbol(FunctionNames
[AEABILibcall
][AlignVariant
],
119 TLI
->getPointerTy(DAG
.getDataLayout())),
122 std::pair
<SDValue
,SDValue
> CallResult
= TLI
->LowerCallTo(CLI
);
124 return CallResult
.second
;
127 SDValue
ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
128 SelectionDAG
&DAG
, const SDLoc
&dl
, SDValue Chain
, SDValue Dst
, SDValue Src
,
129 SDValue Size
, unsigned Align
, bool isVolatile
, bool AlwaysInline
,
130 MachinePointerInfo DstPtrInfo
, MachinePointerInfo SrcPtrInfo
) const {
131 const ARMSubtarget
&Subtarget
=
132 DAG
.getMachineFunction().getSubtarget
<ARMSubtarget
>();
133 // Do repeated 4-byte loads and stores. To be improved.
134 // This requires 4-byte alignment.
135 if ((Align
& 3) != 0)
137 // This requires the copy size to be a constant, preferably
138 // within a subtarget-specific limit.
139 ConstantSDNode
*ConstantSize
= dyn_cast
<ConstantSDNode
>(Size
);
141 return EmitSpecializedLibcall(DAG
, dl
, Chain
, Dst
, Src
, Size
, Align
,
143 uint64_t SizeVal
= ConstantSize
->getZExtValue();
144 if (!AlwaysInline
&& SizeVal
> Subtarget
.getMaxInlineSizeThreshold())
145 return EmitSpecializedLibcall(DAG
, dl
, Chain
, Dst
, Src
, Size
, Align
,
148 unsigned BytesLeft
= SizeVal
& 3;
149 unsigned NumMemOps
= SizeVal
>> 2;
150 unsigned EmittedNumMemOps
= 0;
154 // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
155 const unsigned MaxLoadsInLDM
= Subtarget
.isThumb1Only() ? 4 : 6;
158 uint64_t SrcOff
= 0, DstOff
= 0;
160 // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
161 // VLDM/VSTM and make this code emit it when appropriate. This would reduce
162 // pressure on the general purpose registers. However this seems harder to map
163 // onto the register allocator's view of the world.
165 // The number of MEMCPY pseudo-instructions to emit. We use up to
166 // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
167 // later on. This is a lower bound on the number of MEMCPY operations we must
169 unsigned NumMEMCPYs
= (NumMemOps
+ MaxLoadsInLDM
- 1) / MaxLoadsInLDM
;
171 // Code size optimisation: do not inline memcpy if expansion results in
172 // more instructions than the libary call.
173 if (NumMEMCPYs
> 1 && Subtarget
.hasMinSize()) {
177 SDVTList VTs
= DAG
.getVTList(MVT::i32
, MVT::i32
, MVT::Other
, MVT::Glue
);
179 for (unsigned I
= 0; I
!= NumMEMCPYs
; ++I
) {
180 // Evenly distribute registers among MEMCPY operations to reduce register
182 unsigned NextEmittedNumMemOps
= NumMemOps
* (I
+ 1) / NumMEMCPYs
;
183 unsigned NumRegs
= NextEmittedNumMemOps
- EmittedNumMemOps
;
185 Dst
= DAG
.getNode(ARMISD::MEMCPY
, dl
, VTs
, Chain
, Dst
, Src
,
186 DAG
.getConstant(NumRegs
, dl
, MVT::i32
));
187 Src
= Dst
.getValue(1);
188 Chain
= Dst
.getValue(2);
190 DstPtrInfo
= DstPtrInfo
.getWithOffset(NumRegs
* VTSize
);
191 SrcPtrInfo
= SrcPtrInfo
.getWithOffset(NumRegs
* VTSize
);
193 EmittedNumMemOps
= NextEmittedNumMemOps
;
199 // Issue loads / stores for the trailing (1 - 3) bytes.
200 auto getRemainingValueType
= [](unsigned BytesLeft
) {
201 return (BytesLeft
>= 2) ? MVT::i16
: MVT::i8
;
203 auto getRemainingSize
= [](unsigned BytesLeft
) {
204 return (BytesLeft
>= 2) ? 2 : 1;
207 unsigned BytesLeftSave
= BytesLeft
;
210 VT
= getRemainingValueType(BytesLeft
);
211 VTSize
= getRemainingSize(BytesLeft
);
212 Loads
[i
] = DAG
.getLoad(VT
, dl
, Chain
,
213 DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, Src
,
214 DAG
.getConstant(SrcOff
, dl
, MVT::i32
)),
215 SrcPtrInfo
.getWithOffset(SrcOff
));
216 TFOps
[i
] = Loads
[i
].getValue(1);
221 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
222 makeArrayRef(TFOps
, i
));
225 BytesLeft
= BytesLeftSave
;
227 VT
= getRemainingValueType(BytesLeft
);
228 VTSize
= getRemainingSize(BytesLeft
);
229 TFOps
[i
] = DAG
.getStore(Chain
, dl
, Loads
[i
],
230 DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, Dst
,
231 DAG
.getConstant(DstOff
, dl
, MVT::i32
)),
232 DstPtrInfo
.getWithOffset(DstOff
));
237 return DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
238 makeArrayRef(TFOps
, i
));
241 SDValue
ARMSelectionDAGInfo::EmitTargetCodeForMemmove(
242 SelectionDAG
&DAG
, const SDLoc
&dl
, SDValue Chain
, SDValue Dst
, SDValue Src
,
243 SDValue Size
, unsigned Align
, bool isVolatile
,
244 MachinePointerInfo DstPtrInfo
, MachinePointerInfo SrcPtrInfo
) const {
245 return EmitSpecializedLibcall(DAG
, dl
, Chain
, Dst
, Src
, Size
, Align
,
249 SDValue
ARMSelectionDAGInfo::EmitTargetCodeForMemset(
250 SelectionDAG
&DAG
, const SDLoc
&dl
, SDValue Chain
, SDValue Dst
, SDValue Src
,
251 SDValue Size
, unsigned Align
, bool isVolatile
,
252 MachinePointerInfo DstPtrInfo
) const {
253 return EmitSpecializedLibcall(DAG
, dl
, Chain
, Dst
, Src
, Size
, Align
,