1 //===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
10 #include "llvm/Analysis/ScalarEvolution.h"
11 #include "llvm/Analysis/TargetTransformInfo.h"
12 #include "llvm/IR/IRBuilder.h"
13 #include "llvm/IR/IntrinsicInst.h"
14 #include "llvm/IR/MDBuilder.h"
15 #include "llvm/Support/Debug.h"
16 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
19 #define DEBUG_TYPE "lower-mem-intrinsics"
23 void llvm::createMemCpyLoopKnownSize(
24 Instruction
*InsertBefore
, Value
*SrcAddr
, Value
*DstAddr
,
25 ConstantInt
*CopyLen
, Align SrcAlign
, Align DstAlign
, bool SrcIsVolatile
,
26 bool DstIsVolatile
, bool CanOverlap
, const TargetTransformInfo
&TTI
,
27 std::optional
<uint32_t> AtomicElementSize
) {
28 // No need to expand zero length copies.
29 if (CopyLen
->isZero())
32 BasicBlock
*PreLoopBB
= InsertBefore
->getParent();
33 BasicBlock
*PostLoopBB
= nullptr;
34 Function
*ParentFunc
= PreLoopBB
->getParent();
35 LLVMContext
&Ctx
= PreLoopBB
->getContext();
36 const DataLayout
&DL
= ParentFunc
->getParent()->getDataLayout();
38 MDNode
*NewDomain
= MDB
.createAnonymousAliasScopeDomain("MemCopyDomain");
39 StringRef Name
= "MemCopyAliasScope";
40 MDNode
*NewScope
= MDB
.createAnonymousAliasScope(NewDomain
, Name
);
42 unsigned SrcAS
= cast
<PointerType
>(SrcAddr
->getType())->getAddressSpace();
43 unsigned DstAS
= cast
<PointerType
>(DstAddr
->getType())->getAddressSpace();
45 Type
*TypeOfCopyLen
= CopyLen
->getType();
46 Type
*LoopOpType
= TTI
.getMemcpyLoopLoweringType(
47 Ctx
, CopyLen
, SrcAS
, DstAS
, SrcAlign
.value(), DstAlign
.value(),
49 assert((!AtomicElementSize
|| !LoopOpType
->isVectorTy()) &&
50 "Atomic memcpy lowering is not supported for vector operand type");
52 unsigned LoopOpSize
= DL
.getTypeStoreSize(LoopOpType
);
53 assert((!AtomicElementSize
|| LoopOpSize
% *AtomicElementSize
== 0) &&
54 "Atomic memcpy lowering is not supported for selected operand size");
56 uint64_t LoopEndCount
= CopyLen
->getZExtValue() / LoopOpSize
;
58 if (LoopEndCount
!= 0) {
60 PostLoopBB
= PreLoopBB
->splitBasicBlock(InsertBefore
, "memcpy-split");
62 BasicBlock::Create(Ctx
, "load-store-loop", ParentFunc
, PostLoopBB
);
63 PreLoopBB
->getTerminator()->setSuccessor(0, LoopBB
);
65 IRBuilder
<> PLBuilder(PreLoopBB
->getTerminator());
67 // Cast the Src and Dst pointers to pointers to the loop operand type (if
69 PointerType
*SrcOpType
= PointerType::get(LoopOpType
, SrcAS
);
70 PointerType
*DstOpType
= PointerType::get(LoopOpType
, DstAS
);
71 if (SrcAddr
->getType() != SrcOpType
) {
72 SrcAddr
= PLBuilder
.CreateBitCast(SrcAddr
, SrcOpType
);
74 if (DstAddr
->getType() != DstOpType
) {
75 DstAddr
= PLBuilder
.CreateBitCast(DstAddr
, DstOpType
);
78 Align
PartDstAlign(commonAlignment(DstAlign
, LoopOpSize
));
79 Align
PartSrcAlign(commonAlignment(SrcAlign
, LoopOpSize
));
81 IRBuilder
<> LoopBuilder(LoopBB
);
82 PHINode
*LoopIndex
= LoopBuilder
.CreatePHI(TypeOfCopyLen
, 2, "loop-index");
83 LoopIndex
->addIncoming(ConstantInt::get(TypeOfCopyLen
, 0U), PreLoopBB
);
86 LoopBuilder
.CreateInBoundsGEP(LoopOpType
, SrcAddr
, LoopIndex
);
87 LoadInst
*Load
= LoopBuilder
.CreateAlignedLoad(LoopOpType
, SrcGEP
,
88 PartSrcAlign
, SrcIsVolatile
);
90 // Set alias scope for loads.
91 Load
->setMetadata(LLVMContext::MD_alias_scope
,
92 MDNode::get(Ctx
, NewScope
));
95 LoopBuilder
.CreateInBoundsGEP(LoopOpType
, DstAddr
, LoopIndex
);
96 StoreInst
*Store
= LoopBuilder
.CreateAlignedStore(
97 Load
, DstGEP
, PartDstAlign
, DstIsVolatile
);
99 // Indicate that stores don't overlap loads.
100 Store
->setMetadata(LLVMContext::MD_noalias
, MDNode::get(Ctx
, NewScope
));
102 if (AtomicElementSize
) {
103 Load
->setAtomic(AtomicOrdering::Unordered
);
104 Store
->setAtomic(AtomicOrdering::Unordered
);
107 LoopBuilder
.CreateAdd(LoopIndex
, ConstantInt::get(TypeOfCopyLen
, 1U));
108 LoopIndex
->addIncoming(NewIndex
, LoopBB
);
110 // Create the loop branch condition.
111 Constant
*LoopEndCI
= ConstantInt::get(TypeOfCopyLen
, LoopEndCount
);
112 LoopBuilder
.CreateCondBr(LoopBuilder
.CreateICmpULT(NewIndex
, LoopEndCI
),
116 uint64_t BytesCopied
= LoopEndCount
* LoopOpSize
;
117 uint64_t RemainingBytes
= CopyLen
->getZExtValue() - BytesCopied
;
118 if (RemainingBytes
) {
119 IRBuilder
<> RBuilder(PostLoopBB
? PostLoopBB
->getFirstNonPHI()
122 SmallVector
<Type
*, 5> RemainingOps
;
123 TTI
.getMemcpyLoopResidualLoweringType(RemainingOps
, Ctx
, RemainingBytes
,
124 SrcAS
, DstAS
, SrcAlign
.value(),
125 DstAlign
.value(), AtomicElementSize
);
127 for (auto *OpTy
: RemainingOps
) {
128 Align
PartSrcAlign(commonAlignment(SrcAlign
, BytesCopied
));
129 Align
PartDstAlign(commonAlignment(DstAlign
, BytesCopied
));
131 // Calculate the new index
132 unsigned OperandSize
= DL
.getTypeStoreSize(OpTy
);
134 (!AtomicElementSize
|| OperandSize
% *AtomicElementSize
== 0) &&
135 "Atomic memcpy lowering is not supported for selected operand size");
137 uint64_t GepIndex
= BytesCopied
/ OperandSize
;
138 assert(GepIndex
* OperandSize
== BytesCopied
&&
139 "Division should have no Remainder!");
140 // Cast source to operand type and load
141 PointerType
*SrcPtrType
= PointerType::get(OpTy
, SrcAS
);
142 Value
*CastedSrc
= SrcAddr
->getType() == SrcPtrType
144 : RBuilder
.CreateBitCast(SrcAddr
, SrcPtrType
);
145 Value
*SrcGEP
= RBuilder
.CreateInBoundsGEP(
146 OpTy
, CastedSrc
, ConstantInt::get(TypeOfCopyLen
, GepIndex
));
148 RBuilder
.CreateAlignedLoad(OpTy
, SrcGEP
, PartSrcAlign
, SrcIsVolatile
);
150 // Set alias scope for loads.
151 Load
->setMetadata(LLVMContext::MD_alias_scope
,
152 MDNode::get(Ctx
, NewScope
));
154 // Cast destination to operand type and store.
155 PointerType
*DstPtrType
= PointerType::get(OpTy
, DstAS
);
156 Value
*CastedDst
= DstAddr
->getType() == DstPtrType
158 : RBuilder
.CreateBitCast(DstAddr
, DstPtrType
);
159 Value
*DstGEP
= RBuilder
.CreateInBoundsGEP(
160 OpTy
, CastedDst
, ConstantInt::get(TypeOfCopyLen
, GepIndex
));
161 StoreInst
*Store
= RBuilder
.CreateAlignedStore(Load
, DstGEP
, PartDstAlign
,
164 // Indicate that stores don't overlap loads.
165 Store
->setMetadata(LLVMContext::MD_noalias
, MDNode::get(Ctx
, NewScope
));
167 if (AtomicElementSize
) {
168 Load
->setAtomic(AtomicOrdering::Unordered
);
169 Store
->setAtomic(AtomicOrdering::Unordered
);
171 BytesCopied
+= OperandSize
;
174 assert(BytesCopied
== CopyLen
->getZExtValue() &&
175 "Bytes copied should match size in the call!");
178 void llvm::createMemCpyLoopUnknownSize(
179 Instruction
*InsertBefore
, Value
*SrcAddr
, Value
*DstAddr
, Value
*CopyLen
,
180 Align SrcAlign
, Align DstAlign
, bool SrcIsVolatile
, bool DstIsVolatile
,
181 bool CanOverlap
, const TargetTransformInfo
&TTI
,
182 std::optional
<uint32_t> AtomicElementSize
) {
183 BasicBlock
*PreLoopBB
= InsertBefore
->getParent();
184 BasicBlock
*PostLoopBB
=
185 PreLoopBB
->splitBasicBlock(InsertBefore
, "post-loop-memcpy-expansion");
187 Function
*ParentFunc
= PreLoopBB
->getParent();
188 const DataLayout
&DL
= ParentFunc
->getParent()->getDataLayout();
189 LLVMContext
&Ctx
= PreLoopBB
->getContext();
191 MDNode
*NewDomain
= MDB
.createAnonymousAliasScopeDomain("MemCopyDomain");
192 StringRef Name
= "MemCopyAliasScope";
193 MDNode
*NewScope
= MDB
.createAnonymousAliasScope(NewDomain
, Name
);
195 unsigned SrcAS
= cast
<PointerType
>(SrcAddr
->getType())->getAddressSpace();
196 unsigned DstAS
= cast
<PointerType
>(DstAddr
->getType())->getAddressSpace();
198 Type
*LoopOpType
= TTI
.getMemcpyLoopLoweringType(
199 Ctx
, CopyLen
, SrcAS
, DstAS
, SrcAlign
.value(), DstAlign
.value(),
201 assert((!AtomicElementSize
|| !LoopOpType
->isVectorTy()) &&
202 "Atomic memcpy lowering is not supported for vector operand type");
203 unsigned LoopOpSize
= DL
.getTypeStoreSize(LoopOpType
);
204 assert((!AtomicElementSize
|| LoopOpSize
% *AtomicElementSize
== 0) &&
205 "Atomic memcpy lowering is not supported for selected operand size");
207 IRBuilder
<> PLBuilder(PreLoopBB
->getTerminator());
209 PointerType
*SrcOpType
= PointerType::get(LoopOpType
, SrcAS
);
210 PointerType
*DstOpType
= PointerType::get(LoopOpType
, DstAS
);
211 if (SrcAddr
->getType() != SrcOpType
) {
212 SrcAddr
= PLBuilder
.CreateBitCast(SrcAddr
, SrcOpType
);
214 if (DstAddr
->getType() != DstOpType
) {
215 DstAddr
= PLBuilder
.CreateBitCast(DstAddr
, DstOpType
);
218 // Calculate the loop trip count, and remaining bytes to copy after the loop.
219 Type
*CopyLenType
= CopyLen
->getType();
220 IntegerType
*ILengthType
= dyn_cast
<IntegerType
>(CopyLenType
);
221 assert(ILengthType
&&
222 "expected size argument to memcpy to be an integer type!");
223 Type
*Int8Type
= Type::getInt8Ty(Ctx
);
224 bool LoopOpIsInt8
= LoopOpType
== Int8Type
;
225 ConstantInt
*CILoopOpSize
= ConstantInt::get(ILengthType
, LoopOpSize
);
226 Value
*RuntimeLoopCount
= LoopOpIsInt8
?
228 PLBuilder
.CreateUDiv(CopyLen
, CILoopOpSize
);
230 BasicBlock::Create(Ctx
, "loop-memcpy-expansion", ParentFunc
, PostLoopBB
);
231 IRBuilder
<> LoopBuilder(LoopBB
);
233 Align
PartSrcAlign(commonAlignment(SrcAlign
, LoopOpSize
));
234 Align
PartDstAlign(commonAlignment(DstAlign
, LoopOpSize
));
236 PHINode
*LoopIndex
= LoopBuilder
.CreatePHI(CopyLenType
, 2, "loop-index");
237 LoopIndex
->addIncoming(ConstantInt::get(CopyLenType
, 0U), PreLoopBB
);
239 Value
*SrcGEP
= LoopBuilder
.CreateInBoundsGEP(LoopOpType
, SrcAddr
, LoopIndex
);
240 LoadInst
*Load
= LoopBuilder
.CreateAlignedLoad(LoopOpType
, SrcGEP
,
241 PartSrcAlign
, SrcIsVolatile
);
243 // Set alias scope for loads.
244 Load
->setMetadata(LLVMContext::MD_alias_scope
, MDNode::get(Ctx
, NewScope
));
246 Value
*DstGEP
= LoopBuilder
.CreateInBoundsGEP(LoopOpType
, DstAddr
, LoopIndex
);
248 LoopBuilder
.CreateAlignedStore(Load
, DstGEP
, PartDstAlign
, DstIsVolatile
);
250 // Indicate that stores don't overlap loads.
251 Store
->setMetadata(LLVMContext::MD_noalias
, MDNode::get(Ctx
, NewScope
));
253 if (AtomicElementSize
) {
254 Load
->setAtomic(AtomicOrdering::Unordered
);
255 Store
->setAtomic(AtomicOrdering::Unordered
);
258 LoopBuilder
.CreateAdd(LoopIndex
, ConstantInt::get(CopyLenType
, 1U));
259 LoopIndex
->addIncoming(NewIndex
, LoopBB
);
261 bool requiresResidual
=
262 !LoopOpIsInt8
&& !(AtomicElementSize
&& LoopOpSize
== AtomicElementSize
);
263 if (requiresResidual
) {
264 Type
*ResLoopOpType
= AtomicElementSize
265 ? Type::getIntNTy(Ctx
, *AtomicElementSize
* 8)
267 unsigned ResLoopOpSize
= DL
.getTypeStoreSize(ResLoopOpType
);
268 assert((ResLoopOpSize
== AtomicElementSize
? *AtomicElementSize
: 1) &&
269 "Store size is expected to match type size");
272 Value
*RuntimeResidual
= PLBuilder
.CreateURem(CopyLen
, CILoopOpSize
);
273 Value
*RuntimeBytesCopied
= PLBuilder
.CreateSub(CopyLen
, RuntimeResidual
);
275 // Loop body for the residual copy.
276 BasicBlock
*ResLoopBB
= BasicBlock::Create(Ctx
, "loop-memcpy-residual",
277 PreLoopBB
->getParent(),
279 // Residual loop header.
280 BasicBlock
*ResHeaderBB
= BasicBlock::Create(
281 Ctx
, "loop-memcpy-residual-header", PreLoopBB
->getParent(), nullptr);
283 // Need to update the pre-loop basic block to branch to the correct place.
284 // branch to the main loop if the count is non-zero, branch to the residual
285 // loop if the copy size is smaller then 1 iteration of the main loop but
286 // non-zero and finally branch to after the residual loop if the memcpy
288 ConstantInt
*Zero
= ConstantInt::get(ILengthType
, 0U);
289 PLBuilder
.CreateCondBr(PLBuilder
.CreateICmpNE(RuntimeLoopCount
, Zero
),
290 LoopBB
, ResHeaderBB
);
291 PreLoopBB
->getTerminator()->eraseFromParent();
293 LoopBuilder
.CreateCondBr(
294 LoopBuilder
.CreateICmpULT(NewIndex
, RuntimeLoopCount
), LoopBB
,
297 // Determine if we need to branch to the residual loop or bypass it.
298 IRBuilder
<> RHBuilder(ResHeaderBB
);
299 RHBuilder
.CreateCondBr(RHBuilder
.CreateICmpNE(RuntimeResidual
, Zero
),
300 ResLoopBB
, PostLoopBB
);
302 // Copy the residual with single byte load/store loop.
303 IRBuilder
<> ResBuilder(ResLoopBB
);
304 PHINode
*ResidualIndex
=
305 ResBuilder
.CreatePHI(CopyLenType
, 2, "residual-loop-index");
306 ResidualIndex
->addIncoming(Zero
, ResHeaderBB
);
308 Value
*SrcAsResLoopOpType
= ResBuilder
.CreateBitCast(
309 SrcAddr
, PointerType::get(ResLoopOpType
, SrcAS
));
310 Value
*DstAsResLoopOpType
= ResBuilder
.CreateBitCast(
311 DstAddr
, PointerType::get(ResLoopOpType
, DstAS
));
312 Value
*FullOffset
= ResBuilder
.CreateAdd(RuntimeBytesCopied
, ResidualIndex
);
313 Value
*SrcGEP
= ResBuilder
.CreateInBoundsGEP(
314 ResLoopOpType
, SrcAsResLoopOpType
, FullOffset
);
315 LoadInst
*Load
= ResBuilder
.CreateAlignedLoad(ResLoopOpType
, SrcGEP
,
316 PartSrcAlign
, SrcIsVolatile
);
318 // Set alias scope for loads.
319 Load
->setMetadata(LLVMContext::MD_alias_scope
,
320 MDNode::get(Ctx
, NewScope
));
322 Value
*DstGEP
= ResBuilder
.CreateInBoundsGEP(
323 ResLoopOpType
, DstAsResLoopOpType
, FullOffset
);
324 StoreInst
*Store
= ResBuilder
.CreateAlignedStore(Load
, DstGEP
, PartDstAlign
,
327 // Indicate that stores don't overlap loads.
328 Store
->setMetadata(LLVMContext::MD_noalias
, MDNode::get(Ctx
, NewScope
));
330 if (AtomicElementSize
) {
331 Load
->setAtomic(AtomicOrdering::Unordered
);
332 Store
->setAtomic(AtomicOrdering::Unordered
);
334 Value
*ResNewIndex
= ResBuilder
.CreateAdd(
335 ResidualIndex
, ConstantInt::get(CopyLenType
, ResLoopOpSize
));
336 ResidualIndex
->addIncoming(ResNewIndex
, ResLoopBB
);
338 // Create the loop branch condition.
339 ResBuilder
.CreateCondBr(
340 ResBuilder
.CreateICmpULT(ResNewIndex
, RuntimeResidual
), ResLoopBB
,
343 // In this case the loop operand type was a byte, and there is no need for a
344 // residual loop to copy the remaining memory after the main loop.
345 // We do however need to patch up the control flow by creating the
346 // terminators for the preloop block and the memcpy loop.
347 ConstantInt
*Zero
= ConstantInt::get(ILengthType
, 0U);
348 PLBuilder
.CreateCondBr(PLBuilder
.CreateICmpNE(RuntimeLoopCount
, Zero
),
350 PreLoopBB
->getTerminator()->eraseFromParent();
351 LoopBuilder
.CreateCondBr(
352 LoopBuilder
.CreateICmpULT(NewIndex
, RuntimeLoopCount
), LoopBB
,
357 // Lower memmove to IR. memmove is required to correctly copy overlapping memory
358 // regions; therefore, it has to check the relative positions of the source and
359 // destination pointers and choose the copy direction accordingly.
361 // The code below is an IR rendition of this C function:
363 // void* memmove(void* dst, const void* src, size_t n) {
364 // unsigned char* d = dst;
365 // const unsigned char* s = src;
373 // for (size_t i = 0; i < n; ++i) {
379 static void createMemMoveLoop(Instruction
*InsertBefore
, Value
*SrcAddr
,
380 Value
*DstAddr
, Value
*CopyLen
, Align SrcAlign
,
381 Align DstAlign
, bool SrcIsVolatile
,
383 const TargetTransformInfo
&TTI
) {
384 Type
*TypeOfCopyLen
= CopyLen
->getType();
385 BasicBlock
*OrigBB
= InsertBefore
->getParent();
386 Function
*F
= OrigBB
->getParent();
387 const DataLayout
&DL
= F
->getParent()->getDataLayout();
388 // TODO: Use different element type if possible?
389 Type
*EltTy
= Type::getInt8Ty(F
->getContext());
391 // Create the a comparison of src and dst, based on which we jump to either
392 // the forward-copy part of the function (if src >= dst) or the backwards-copy
393 // part (if src < dst).
394 // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else
395 // structure. Its block terminators (unconditional branches) are replaced by
396 // the appropriate conditional branches when the loop is built.
397 ICmpInst
*PtrCompare
= new ICmpInst(InsertBefore
, ICmpInst::ICMP_ULT
,
398 SrcAddr
, DstAddr
, "compare_src_dst");
399 Instruction
*ThenTerm
, *ElseTerm
;
400 SplitBlockAndInsertIfThenElse(PtrCompare
, InsertBefore
, &ThenTerm
,
403 // Each part of the function consists of two blocks:
404 // copy_backwards: used to skip the loop when n == 0
405 // copy_backwards_loop: the actual backwards loop BB
406 // copy_forward: used to skip the loop when n == 0
407 // copy_forward_loop: the actual forward loop BB
408 BasicBlock
*CopyBackwardsBB
= ThenTerm
->getParent();
409 CopyBackwardsBB
->setName("copy_backwards");
410 BasicBlock
*CopyForwardBB
= ElseTerm
->getParent();
411 CopyForwardBB
->setName("copy_forward");
412 BasicBlock
*ExitBB
= InsertBefore
->getParent();
413 ExitBB
->setName("memmove_done");
415 unsigned PartSize
= DL
.getTypeStoreSize(EltTy
);
416 Align
PartSrcAlign(commonAlignment(SrcAlign
, PartSize
));
417 Align
PartDstAlign(commonAlignment(DstAlign
, PartSize
));
419 // Initial comparison of n == 0 that lets us skip the loops altogether. Shared
420 // between both backwards and forward copy clauses.
422 new ICmpInst(OrigBB
->getTerminator(), ICmpInst::ICMP_EQ
, CopyLen
,
423 ConstantInt::get(TypeOfCopyLen
, 0), "compare_n_to_0");
425 // Copying backwards.
427 BasicBlock::Create(F
->getContext(), "copy_backwards_loop", F
, CopyForwardBB
);
428 IRBuilder
<> LoopBuilder(LoopBB
);
430 PHINode
*LoopPhi
= LoopBuilder
.CreatePHI(TypeOfCopyLen
, 0);
431 Value
*IndexPtr
= LoopBuilder
.CreateSub(
432 LoopPhi
, ConstantInt::get(TypeOfCopyLen
, 1), "index_ptr");
433 Value
*Element
= LoopBuilder
.CreateAlignedLoad(
434 EltTy
, LoopBuilder
.CreateInBoundsGEP(EltTy
, SrcAddr
, IndexPtr
),
435 PartSrcAlign
, "element");
436 LoopBuilder
.CreateAlignedStore(
437 Element
, LoopBuilder
.CreateInBoundsGEP(EltTy
, DstAddr
, IndexPtr
),
439 LoopBuilder
.CreateCondBr(
440 LoopBuilder
.CreateICmpEQ(IndexPtr
, ConstantInt::get(TypeOfCopyLen
, 0)),
442 LoopPhi
->addIncoming(IndexPtr
, LoopBB
);
443 LoopPhi
->addIncoming(CopyLen
, CopyBackwardsBB
);
444 BranchInst::Create(ExitBB
, LoopBB
, CompareN
, ThenTerm
);
445 ThenTerm
->eraseFromParent();
448 BasicBlock
*FwdLoopBB
=
449 BasicBlock::Create(F
->getContext(), "copy_forward_loop", F
, ExitBB
);
450 IRBuilder
<> FwdLoopBuilder(FwdLoopBB
);
451 PHINode
*FwdCopyPhi
= FwdLoopBuilder
.CreatePHI(TypeOfCopyLen
, 0, "index_ptr");
452 Value
*SrcGEP
= FwdLoopBuilder
.CreateInBoundsGEP(EltTy
, SrcAddr
, FwdCopyPhi
);
454 FwdLoopBuilder
.CreateAlignedLoad(EltTy
, SrcGEP
, PartSrcAlign
, "element");
455 Value
*DstGEP
= FwdLoopBuilder
.CreateInBoundsGEP(EltTy
, DstAddr
, FwdCopyPhi
);
456 FwdLoopBuilder
.CreateAlignedStore(FwdElement
, DstGEP
, PartDstAlign
);
457 Value
*FwdIndexPtr
= FwdLoopBuilder
.CreateAdd(
458 FwdCopyPhi
, ConstantInt::get(TypeOfCopyLen
, 1), "index_increment");
459 FwdLoopBuilder
.CreateCondBr(FwdLoopBuilder
.CreateICmpEQ(FwdIndexPtr
, CopyLen
),
461 FwdCopyPhi
->addIncoming(FwdIndexPtr
, FwdLoopBB
);
462 FwdCopyPhi
->addIncoming(ConstantInt::get(TypeOfCopyLen
, 0), CopyForwardBB
);
464 BranchInst::Create(ExitBB
, FwdLoopBB
, CompareN
, ElseTerm
);
465 ElseTerm
->eraseFromParent();
468 static void createMemSetLoop(Instruction
*InsertBefore
, Value
*DstAddr
,
469 Value
*CopyLen
, Value
*SetValue
, Align DstAlign
,
471 Type
*TypeOfCopyLen
= CopyLen
->getType();
472 BasicBlock
*OrigBB
= InsertBefore
->getParent();
473 Function
*F
= OrigBB
->getParent();
474 const DataLayout
&DL
= F
->getParent()->getDataLayout();
476 OrigBB
->splitBasicBlock(InsertBefore
, "split");
478 = BasicBlock::Create(F
->getContext(), "loadstoreloop", F
, NewBB
);
480 IRBuilder
<> Builder(OrigBB
->getTerminator());
482 // Cast pointer to the type of value getting stored
483 unsigned dstAS
= cast
<PointerType
>(DstAddr
->getType())->getAddressSpace();
484 DstAddr
= Builder
.CreateBitCast(DstAddr
,
485 PointerType::get(SetValue
->getType(), dstAS
));
487 Builder
.CreateCondBr(
488 Builder
.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen
, 0), CopyLen
), NewBB
,
490 OrigBB
->getTerminator()->eraseFromParent();
492 unsigned PartSize
= DL
.getTypeStoreSize(SetValue
->getType());
493 Align
PartAlign(commonAlignment(DstAlign
, PartSize
));
495 IRBuilder
<> LoopBuilder(LoopBB
);
496 PHINode
*LoopIndex
= LoopBuilder
.CreatePHI(TypeOfCopyLen
, 0);
497 LoopIndex
->addIncoming(ConstantInt::get(TypeOfCopyLen
, 0), OrigBB
);
499 LoopBuilder
.CreateAlignedStore(
501 LoopBuilder
.CreateInBoundsGEP(SetValue
->getType(), DstAddr
, LoopIndex
),
502 PartAlign
, IsVolatile
);
505 LoopBuilder
.CreateAdd(LoopIndex
, ConstantInt::get(TypeOfCopyLen
, 1));
506 LoopIndex
->addIncoming(NewIndex
, LoopBB
);
508 LoopBuilder
.CreateCondBr(LoopBuilder
.CreateICmpULT(NewIndex
, CopyLen
), LoopBB
,
512 template <typename T
>
513 static bool canOverlap(MemTransferBase
<T
> *Memcpy
, ScalarEvolution
*SE
) {
515 auto *SrcSCEV
= SE
->getSCEV(Memcpy
->getRawSource());
516 auto *DestSCEV
= SE
->getSCEV(Memcpy
->getRawDest());
517 if (SE
->isKnownPredicateAt(CmpInst::ICMP_NE
, SrcSCEV
, DestSCEV
, Memcpy
))
523 void llvm::expandMemCpyAsLoop(MemCpyInst
*Memcpy
,
524 const TargetTransformInfo
&TTI
,
525 ScalarEvolution
*SE
) {
526 bool CanOverlap
= canOverlap(Memcpy
, SE
);
527 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Memcpy
->getLength())) {
528 createMemCpyLoopKnownSize(
529 /* InsertBefore */ Memcpy
,
530 /* SrcAddr */ Memcpy
->getRawSource(),
531 /* DstAddr */ Memcpy
->getRawDest(),
533 /* SrcAlign */ Memcpy
->getSourceAlign().valueOrOne(),
534 /* DestAlign */ Memcpy
->getDestAlign().valueOrOne(),
535 /* SrcIsVolatile */ Memcpy
->isVolatile(),
536 /* DstIsVolatile */ Memcpy
->isVolatile(),
537 /* CanOverlap */ CanOverlap
,
538 /* TargetTransformInfo */ TTI
);
540 createMemCpyLoopUnknownSize(
541 /* InsertBefore */ Memcpy
,
542 /* SrcAddr */ Memcpy
->getRawSource(),
543 /* DstAddr */ Memcpy
->getRawDest(),
544 /* CopyLen */ Memcpy
->getLength(),
545 /* SrcAlign */ Memcpy
->getSourceAlign().valueOrOne(),
546 /* DestAlign */ Memcpy
->getDestAlign().valueOrOne(),
547 /* SrcIsVolatile */ Memcpy
->isVolatile(),
548 /* DstIsVolatile */ Memcpy
->isVolatile(),
549 /* CanOverlap */ CanOverlap
,
550 /* TargetTransformInfo */ TTI
);
554 bool llvm::expandMemMoveAsLoop(MemMoveInst
*Memmove
,
555 const TargetTransformInfo
&TTI
) {
556 Value
*CopyLen
= Memmove
->getLength();
557 Value
*SrcAddr
= Memmove
->getRawSource();
558 Value
*DstAddr
= Memmove
->getRawDest();
559 Align SrcAlign
= Memmove
->getSourceAlign().valueOrOne();
560 Align DstAlign
= Memmove
->getDestAlign().valueOrOne();
561 bool SrcIsVolatile
= Memmove
->isVolatile();
562 bool DstIsVolatile
= SrcIsVolatile
;
563 IRBuilder
<> CastBuilder(Memmove
);
565 unsigned SrcAS
= SrcAddr
->getType()->getPointerAddressSpace();
566 unsigned DstAS
= DstAddr
->getType()->getPointerAddressSpace();
567 if (SrcAS
!= DstAS
) {
568 if (!TTI
.addrspacesMayAlias(SrcAS
, DstAS
)) {
569 // We may not be able to emit a pointer comparison, but we don't have
570 // to. Expand as memcpy.
571 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(CopyLen
)) {
572 createMemCpyLoopKnownSize(/*InsertBefore=*/Memmove
, SrcAddr
, DstAddr
,
573 CI
, SrcAlign
, DstAlign
, SrcIsVolatile
,
575 /*CanOverlap=*/false, TTI
);
577 createMemCpyLoopUnknownSize(/*InsertBefore=*/Memmove
, SrcAddr
, DstAddr
,
578 CopyLen
, SrcAlign
, DstAlign
, SrcIsVolatile
,
580 /*CanOverlap=*/false, TTI
);
586 if (TTI
.isValidAddrSpaceCast(DstAS
, SrcAS
))
587 DstAddr
= CastBuilder
.CreateAddrSpaceCast(DstAddr
, SrcAddr
->getType());
588 else if (TTI
.isValidAddrSpaceCast(SrcAS
, DstAS
))
589 SrcAddr
= CastBuilder
.CreateAddrSpaceCast(SrcAddr
, DstAddr
->getType());
591 // We don't know generically if it's legal to introduce an
592 // addrspacecast. We need to know either if it's legal to insert an
593 // addrspacecast, or if the address spaces cannot alias.
595 dbgs() << "Do not know how to expand memmove between different "
602 /*InsertBefore=*/Memmove
, SrcAddr
, DstAddr
, CopyLen
, SrcAlign
, DstAlign
,
603 SrcIsVolatile
, DstIsVolatile
, TTI
);
607 void llvm::expandMemSetAsLoop(MemSetInst
*Memset
) {
608 createMemSetLoop(/* InsertBefore */ Memset
,
609 /* DstAddr */ Memset
->getRawDest(),
610 /* CopyLen */ Memset
->getLength(),
611 /* SetValue */ Memset
->getValue(),
612 /* Alignment */ Memset
->getDestAlign().valueOrOne(),
613 Memset
->isVolatile());
616 void llvm::expandAtomicMemCpyAsLoop(AtomicMemCpyInst
*AtomicMemcpy
,
617 const TargetTransformInfo
&TTI
,
618 ScalarEvolution
*SE
) {
619 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(AtomicMemcpy
->getLength())) {
620 createMemCpyLoopKnownSize(
621 /* InsertBefore */ AtomicMemcpy
,
622 /* SrcAddr */ AtomicMemcpy
->getRawSource(),
623 /* DstAddr */ AtomicMemcpy
->getRawDest(),
625 /* SrcAlign */ AtomicMemcpy
->getSourceAlign().valueOrOne(),
626 /* DestAlign */ AtomicMemcpy
->getDestAlign().valueOrOne(),
627 /* SrcIsVolatile */ AtomicMemcpy
->isVolatile(),
628 /* DstIsVolatile */ AtomicMemcpy
->isVolatile(),
629 /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec.
630 /* TargetTransformInfo */ TTI
,
631 /* AtomicCpySize */ AtomicMemcpy
->getElementSizeInBytes());
633 createMemCpyLoopUnknownSize(
634 /* InsertBefore */ AtomicMemcpy
,
635 /* SrcAddr */ AtomicMemcpy
->getRawSource(),
636 /* DstAddr */ AtomicMemcpy
->getRawDest(),
637 /* CopyLen */ AtomicMemcpy
->getLength(),
638 /* SrcAlign */ AtomicMemcpy
->getSourceAlign().valueOrOne(),
639 /* DestAlign */ AtomicMemcpy
->getDestAlign().valueOrOne(),
640 /* SrcIsVolatile */ AtomicMemcpy
->isVolatile(),
641 /* DstIsVolatile */ AtomicMemcpy
->isVolatile(),
642 /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec.
643 /* TargetTransformInfo */ TTI
,
644 /* AtomicCpySize */ AtomicMemcpy
->getElementSizeInBytes());