1 //===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
10 #include "llvm/Analysis/ScalarEvolution.h"
11 #include "llvm/Analysis/TargetTransformInfo.h"
12 #include "llvm/IR/IRBuilder.h"
13 #include "llvm/IR/IntrinsicInst.h"
14 #include "llvm/IR/MDBuilder.h"
15 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
19 void llvm::createMemCpyLoopKnownSize(Instruction
*InsertBefore
, Value
*SrcAddr
,
20 Value
*DstAddr
, ConstantInt
*CopyLen
,
21 Align SrcAlign
, Align DstAlign
,
22 bool SrcIsVolatile
, bool DstIsVolatile
,
24 const TargetTransformInfo
&TTI
,
25 Optional
<uint32_t> AtomicElementSize
) {
26 // No need to expand zero length copies.
27 if (CopyLen
->isZero())
30 BasicBlock
*PreLoopBB
= InsertBefore
->getParent();
31 BasicBlock
*PostLoopBB
= nullptr;
32 Function
*ParentFunc
= PreLoopBB
->getParent();
33 LLVMContext
&Ctx
= PreLoopBB
->getContext();
34 const DataLayout
&DL
= ParentFunc
->getParent()->getDataLayout();
36 MDNode
*NewDomain
= MDB
.createAnonymousAliasScopeDomain("MemCopyDomain");
37 StringRef Name
= "MemCopyAliasScope";
38 MDNode
*NewScope
= MDB
.createAnonymousAliasScope(NewDomain
, Name
);
40 unsigned SrcAS
= cast
<PointerType
>(SrcAddr
->getType())->getAddressSpace();
41 unsigned DstAS
= cast
<PointerType
>(DstAddr
->getType())->getAddressSpace();
43 Type
*TypeOfCopyLen
= CopyLen
->getType();
44 Type
*LoopOpType
= TTI
.getMemcpyLoopLoweringType(
45 Ctx
, CopyLen
, SrcAS
, DstAS
, SrcAlign
.value(), DstAlign
.value(),
47 assert((!AtomicElementSize
|| !LoopOpType
->isVectorTy()) &&
48 "Atomic memcpy lowering is not supported for vector operand type");
50 unsigned LoopOpSize
= DL
.getTypeStoreSize(LoopOpType
);
51 assert((!AtomicElementSize
|| LoopOpSize
% *AtomicElementSize
== 0) &&
52 "Atomic memcpy lowering is not supported for selected operand size");
54 uint64_t LoopEndCount
= CopyLen
->getZExtValue() / LoopOpSize
;
56 if (LoopEndCount
!= 0) {
58 PostLoopBB
= PreLoopBB
->splitBasicBlock(InsertBefore
, "memcpy-split");
60 BasicBlock::Create(Ctx
, "load-store-loop", ParentFunc
, PostLoopBB
);
61 PreLoopBB
->getTerminator()->setSuccessor(0, LoopBB
);
63 IRBuilder
<> PLBuilder(PreLoopBB
->getTerminator());
65 // Cast the Src and Dst pointers to pointers to the loop operand type (if
67 PointerType
*SrcOpType
= PointerType::get(LoopOpType
, SrcAS
);
68 PointerType
*DstOpType
= PointerType::get(LoopOpType
, DstAS
);
69 if (SrcAddr
->getType() != SrcOpType
) {
70 SrcAddr
= PLBuilder
.CreateBitCast(SrcAddr
, SrcOpType
);
72 if (DstAddr
->getType() != DstOpType
) {
73 DstAddr
= PLBuilder
.CreateBitCast(DstAddr
, DstOpType
);
76 Align
PartDstAlign(commonAlignment(DstAlign
, LoopOpSize
));
77 Align
PartSrcAlign(commonAlignment(SrcAlign
, LoopOpSize
));
79 IRBuilder
<> LoopBuilder(LoopBB
);
80 PHINode
*LoopIndex
= LoopBuilder
.CreatePHI(TypeOfCopyLen
, 2, "loop-index");
81 LoopIndex
->addIncoming(ConstantInt::get(TypeOfCopyLen
, 0U), PreLoopBB
);
84 LoopBuilder
.CreateInBoundsGEP(LoopOpType
, SrcAddr
, LoopIndex
);
85 LoadInst
*Load
= LoopBuilder
.CreateAlignedLoad(LoopOpType
, SrcGEP
,
86 PartSrcAlign
, SrcIsVolatile
);
88 // Set alias scope for loads.
89 Load
->setMetadata(LLVMContext::MD_alias_scope
,
90 MDNode::get(Ctx
, NewScope
));
93 LoopBuilder
.CreateInBoundsGEP(LoopOpType
, DstAddr
, LoopIndex
);
94 StoreInst
*Store
= LoopBuilder
.CreateAlignedStore(
95 Load
, DstGEP
, PartDstAlign
, DstIsVolatile
);
97 // Indicate that stores don't overlap loads.
98 Store
->setMetadata(LLVMContext::MD_noalias
, MDNode::get(Ctx
, NewScope
));
100 if (AtomicElementSize
) {
101 Load
->setAtomic(AtomicOrdering::Unordered
);
102 Store
->setAtomic(AtomicOrdering::Unordered
);
105 LoopBuilder
.CreateAdd(LoopIndex
, ConstantInt::get(TypeOfCopyLen
, 1U));
106 LoopIndex
->addIncoming(NewIndex
, LoopBB
);
108 // Create the loop branch condition.
109 Constant
*LoopEndCI
= ConstantInt::get(TypeOfCopyLen
, LoopEndCount
);
110 LoopBuilder
.CreateCondBr(LoopBuilder
.CreateICmpULT(NewIndex
, LoopEndCI
),
114 uint64_t BytesCopied
= LoopEndCount
* LoopOpSize
;
115 uint64_t RemainingBytes
= CopyLen
->getZExtValue() - BytesCopied
;
116 if (RemainingBytes
) {
117 IRBuilder
<> RBuilder(PostLoopBB
? PostLoopBB
->getFirstNonPHI()
120 SmallVector
<Type
*, 5> RemainingOps
;
121 TTI
.getMemcpyLoopResidualLoweringType(RemainingOps
, Ctx
, RemainingBytes
,
122 SrcAS
, DstAS
, SrcAlign
.value(),
123 DstAlign
.value(), AtomicElementSize
);
125 for (auto OpTy
: RemainingOps
) {
126 Align
PartSrcAlign(commonAlignment(SrcAlign
, BytesCopied
));
127 Align
PartDstAlign(commonAlignment(DstAlign
, BytesCopied
));
129 // Calculate the new index
130 unsigned OperandSize
= DL
.getTypeStoreSize(OpTy
);
132 (!AtomicElementSize
|| OperandSize
% *AtomicElementSize
== 0) &&
133 "Atomic memcpy lowering is not supported for selected operand size");
135 uint64_t GepIndex
= BytesCopied
/ OperandSize
;
136 assert(GepIndex
* OperandSize
== BytesCopied
&&
137 "Division should have no Remainder!");
138 // Cast source to operand type and load
139 PointerType
*SrcPtrType
= PointerType::get(OpTy
, SrcAS
);
140 Value
*CastedSrc
= SrcAddr
->getType() == SrcPtrType
142 : RBuilder
.CreateBitCast(SrcAddr
, SrcPtrType
);
143 Value
*SrcGEP
= RBuilder
.CreateInBoundsGEP(
144 OpTy
, CastedSrc
, ConstantInt::get(TypeOfCopyLen
, GepIndex
));
146 RBuilder
.CreateAlignedLoad(OpTy
, SrcGEP
, PartSrcAlign
, SrcIsVolatile
);
148 // Set alias scope for loads.
149 Load
->setMetadata(LLVMContext::MD_alias_scope
,
150 MDNode::get(Ctx
, NewScope
));
152 // Cast destination to operand type and store.
153 PointerType
*DstPtrType
= PointerType::get(OpTy
, DstAS
);
154 Value
*CastedDst
= DstAddr
->getType() == DstPtrType
156 : RBuilder
.CreateBitCast(DstAddr
, DstPtrType
);
157 Value
*DstGEP
= RBuilder
.CreateInBoundsGEP(
158 OpTy
, CastedDst
, ConstantInt::get(TypeOfCopyLen
, GepIndex
));
159 StoreInst
*Store
= RBuilder
.CreateAlignedStore(Load
, DstGEP
, PartDstAlign
,
162 // Indicate that stores don't overlap loads.
163 Store
->setMetadata(LLVMContext::MD_noalias
, MDNode::get(Ctx
, NewScope
));
165 if (AtomicElementSize
) {
166 Load
->setAtomic(AtomicOrdering::Unordered
);
167 Store
->setAtomic(AtomicOrdering::Unordered
);
169 BytesCopied
+= OperandSize
;
172 assert(BytesCopied
== CopyLen
->getZExtValue() &&
173 "Bytes copied should match size in the call!");
176 void llvm::createMemCpyLoopUnknownSize(Instruction
*InsertBefore
,
177 Value
*SrcAddr
, Value
*DstAddr
,
178 Value
*CopyLen
, Align SrcAlign
,
179 Align DstAlign
, bool SrcIsVolatile
,
180 bool DstIsVolatile
, bool CanOverlap
,
181 const TargetTransformInfo
&TTI
,
182 Optional
<uint32_t> AtomicElementSize
) {
183 BasicBlock
*PreLoopBB
= InsertBefore
->getParent();
184 BasicBlock
*PostLoopBB
=
185 PreLoopBB
->splitBasicBlock(InsertBefore
, "post-loop-memcpy-expansion");
187 Function
*ParentFunc
= PreLoopBB
->getParent();
188 const DataLayout
&DL
= ParentFunc
->getParent()->getDataLayout();
189 LLVMContext
&Ctx
= PreLoopBB
->getContext();
191 MDNode
*NewDomain
= MDB
.createAnonymousAliasScopeDomain("MemCopyDomain");
192 StringRef Name
= "MemCopyAliasScope";
193 MDNode
*NewScope
= MDB
.createAnonymousAliasScope(NewDomain
, Name
);
195 unsigned SrcAS
= cast
<PointerType
>(SrcAddr
->getType())->getAddressSpace();
196 unsigned DstAS
= cast
<PointerType
>(DstAddr
->getType())->getAddressSpace();
198 Type
*LoopOpType
= TTI
.getMemcpyLoopLoweringType(
199 Ctx
, CopyLen
, SrcAS
, DstAS
, SrcAlign
.value(), DstAlign
.value(),
201 assert((!AtomicElementSize
|| !LoopOpType
->isVectorTy()) &&
202 "Atomic memcpy lowering is not supported for vector operand type");
203 unsigned LoopOpSize
= DL
.getTypeStoreSize(LoopOpType
);
204 assert((!AtomicElementSize
|| LoopOpSize
% *AtomicElementSize
== 0) &&
205 "Atomic memcpy lowering is not supported for selected operand size");
207 IRBuilder
<> PLBuilder(PreLoopBB
->getTerminator());
209 PointerType
*SrcOpType
= PointerType::get(LoopOpType
, SrcAS
);
210 PointerType
*DstOpType
= PointerType::get(LoopOpType
, DstAS
);
211 if (SrcAddr
->getType() != SrcOpType
) {
212 SrcAddr
= PLBuilder
.CreateBitCast(SrcAddr
, SrcOpType
);
214 if (DstAddr
->getType() != DstOpType
) {
215 DstAddr
= PLBuilder
.CreateBitCast(DstAddr
, DstOpType
);
218 // Calculate the loop trip count, and remaining bytes to copy after the loop.
219 Type
*CopyLenType
= CopyLen
->getType();
220 IntegerType
*ILengthType
= dyn_cast
<IntegerType
>(CopyLenType
);
221 assert(ILengthType
&&
222 "expected size argument to memcpy to be an integer type!");
223 Type
*Int8Type
= Type::getInt8Ty(Ctx
);
224 bool LoopOpIsInt8
= LoopOpType
== Int8Type
;
225 ConstantInt
*CILoopOpSize
= ConstantInt::get(ILengthType
, LoopOpSize
);
226 Value
*RuntimeLoopCount
= LoopOpIsInt8
?
228 PLBuilder
.CreateUDiv(CopyLen
, CILoopOpSize
);
230 BasicBlock::Create(Ctx
, "loop-memcpy-expansion", ParentFunc
, PostLoopBB
);
231 IRBuilder
<> LoopBuilder(LoopBB
);
233 Align
PartSrcAlign(commonAlignment(SrcAlign
, LoopOpSize
));
234 Align
PartDstAlign(commonAlignment(DstAlign
, LoopOpSize
));
236 PHINode
*LoopIndex
= LoopBuilder
.CreatePHI(CopyLenType
, 2, "loop-index");
237 LoopIndex
->addIncoming(ConstantInt::get(CopyLenType
, 0U), PreLoopBB
);
239 Value
*SrcGEP
= LoopBuilder
.CreateInBoundsGEP(LoopOpType
, SrcAddr
, LoopIndex
);
240 LoadInst
*Load
= LoopBuilder
.CreateAlignedLoad(LoopOpType
, SrcGEP
,
241 PartSrcAlign
, SrcIsVolatile
);
243 // Set alias scope for loads.
244 Load
->setMetadata(LLVMContext::MD_alias_scope
, MDNode::get(Ctx
, NewScope
));
246 Value
*DstGEP
= LoopBuilder
.CreateInBoundsGEP(LoopOpType
, DstAddr
, LoopIndex
);
248 LoopBuilder
.CreateAlignedStore(Load
, DstGEP
, PartDstAlign
, DstIsVolatile
);
250 // Indicate that stores don't overlap loads.
251 Store
->setMetadata(LLVMContext::MD_noalias
, MDNode::get(Ctx
, NewScope
));
253 if (AtomicElementSize
) {
254 Load
->setAtomic(AtomicOrdering::Unordered
);
255 Store
->setAtomic(AtomicOrdering::Unordered
);
258 LoopBuilder
.CreateAdd(LoopIndex
, ConstantInt::get(CopyLenType
, 1U));
259 LoopIndex
->addIncoming(NewIndex
, LoopBB
);
261 bool requiresResidual
=
262 !LoopOpIsInt8
&& !(AtomicElementSize
&& LoopOpSize
== AtomicElementSize
);
263 if (requiresResidual
) {
264 Type
*ResLoopOpType
= AtomicElementSize
265 ? Type::getIntNTy(Ctx
, *AtomicElementSize
* 8)
267 unsigned ResLoopOpSize
= DL
.getTypeStoreSize(ResLoopOpType
);
268 assert((ResLoopOpSize
== AtomicElementSize
? *AtomicElementSize
: 1) &&
269 "Store size is expected to match type size");
272 Value
*RuntimeResidual
= PLBuilder
.CreateURem(CopyLen
, CILoopOpSize
);
273 Value
*RuntimeBytesCopied
= PLBuilder
.CreateSub(CopyLen
, RuntimeResidual
);
275 // Loop body for the residual copy.
276 BasicBlock
*ResLoopBB
= BasicBlock::Create(Ctx
, "loop-memcpy-residual",
277 PreLoopBB
->getParent(),
279 // Residual loop header.
280 BasicBlock
*ResHeaderBB
= BasicBlock::Create(
281 Ctx
, "loop-memcpy-residual-header", PreLoopBB
->getParent(), nullptr);
283 // Need to update the pre-loop basic block to branch to the correct place.
284 // branch to the main loop if the count is non-zero, branch to the residual
285 // loop if the copy size is smaller then 1 iteration of the main loop but
286 // non-zero and finally branch to after the residual loop if the memcpy
288 ConstantInt
*Zero
= ConstantInt::get(ILengthType
, 0U);
289 PLBuilder
.CreateCondBr(PLBuilder
.CreateICmpNE(RuntimeLoopCount
, Zero
),
290 LoopBB
, ResHeaderBB
);
291 PreLoopBB
->getTerminator()->eraseFromParent();
293 LoopBuilder
.CreateCondBr(
294 LoopBuilder
.CreateICmpULT(NewIndex
, RuntimeLoopCount
), LoopBB
,
297 // Determine if we need to branch to the residual loop or bypass it.
298 IRBuilder
<> RHBuilder(ResHeaderBB
);
299 RHBuilder
.CreateCondBr(RHBuilder
.CreateICmpNE(RuntimeResidual
, Zero
),
300 ResLoopBB
, PostLoopBB
);
302 // Copy the residual with single byte load/store loop.
303 IRBuilder
<> ResBuilder(ResLoopBB
);
304 PHINode
*ResidualIndex
=
305 ResBuilder
.CreatePHI(CopyLenType
, 2, "residual-loop-index");
306 ResidualIndex
->addIncoming(Zero
, ResHeaderBB
);
308 Value
*SrcAsResLoopOpType
= ResBuilder
.CreateBitCast(
309 SrcAddr
, PointerType::get(ResLoopOpType
, SrcAS
));
310 Value
*DstAsResLoopOpType
= ResBuilder
.CreateBitCast(
311 DstAddr
, PointerType::get(ResLoopOpType
, DstAS
));
312 Value
*FullOffset
= ResBuilder
.CreateAdd(RuntimeBytesCopied
, ResidualIndex
);
313 Value
*SrcGEP
= ResBuilder
.CreateInBoundsGEP(
314 ResLoopOpType
, SrcAsResLoopOpType
, FullOffset
);
315 LoadInst
*Load
= ResBuilder
.CreateAlignedLoad(ResLoopOpType
, SrcGEP
,
316 PartSrcAlign
, SrcIsVolatile
);
318 // Set alias scope for loads.
319 Load
->setMetadata(LLVMContext::MD_alias_scope
,
320 MDNode::get(Ctx
, NewScope
));
322 Value
*DstGEP
= ResBuilder
.CreateInBoundsGEP(
323 ResLoopOpType
, DstAsResLoopOpType
, FullOffset
);
324 StoreInst
*Store
= ResBuilder
.CreateAlignedStore(Load
, DstGEP
, PartDstAlign
,
327 // Indicate that stores don't overlap loads.
328 Store
->setMetadata(LLVMContext::MD_noalias
, MDNode::get(Ctx
, NewScope
));
330 if (AtomicElementSize
) {
331 Load
->setAtomic(AtomicOrdering::Unordered
);
332 Store
->setAtomic(AtomicOrdering::Unordered
);
334 Value
*ResNewIndex
= ResBuilder
.CreateAdd(
335 ResidualIndex
, ConstantInt::get(CopyLenType
, ResLoopOpSize
));
336 ResidualIndex
->addIncoming(ResNewIndex
, ResLoopBB
);
338 // Create the loop branch condition.
339 ResBuilder
.CreateCondBr(
340 ResBuilder
.CreateICmpULT(ResNewIndex
, RuntimeResidual
), ResLoopBB
,
343 // In this case the loop operand type was a byte, and there is no need for a
344 // residual loop to copy the remaining memory after the main loop.
345 // We do however need to patch up the control flow by creating the
346 // terminators for the preloop block and the memcpy loop.
347 ConstantInt
*Zero
= ConstantInt::get(ILengthType
, 0U);
348 PLBuilder
.CreateCondBr(PLBuilder
.CreateICmpNE(RuntimeLoopCount
, Zero
),
350 PreLoopBB
->getTerminator()->eraseFromParent();
351 LoopBuilder
.CreateCondBr(
352 LoopBuilder
.CreateICmpULT(NewIndex
, RuntimeLoopCount
), LoopBB
,
357 // Lower memmove to IR. memmove is required to correctly copy overlapping memory
358 // regions; therefore, it has to check the relative positions of the source and
359 // destination pointers and choose the copy direction accordingly.
361 // The code below is an IR rendition of this C function:
363 // void* memmove(void* dst, const void* src, size_t n) {
364 // unsigned char* d = dst;
365 // const unsigned char* s = src;
373 // for (size_t i = 0; i < n; ++i) {
379 static void createMemMoveLoop(Instruction
*InsertBefore
, Value
*SrcAddr
,
380 Value
*DstAddr
, Value
*CopyLen
, Align SrcAlign
,
381 Align DstAlign
, bool SrcIsVolatile
,
382 bool DstIsVolatile
) {
383 Type
*TypeOfCopyLen
= CopyLen
->getType();
384 BasicBlock
*OrigBB
= InsertBefore
->getParent();
385 Function
*F
= OrigBB
->getParent();
386 const DataLayout
&DL
= F
->getParent()->getDataLayout();
388 // TODO: Use different element type if possible?
389 IRBuilder
<> CastBuilder(InsertBefore
);
390 Type
*EltTy
= CastBuilder
.getInt8Ty();
392 CastBuilder
.getInt8PtrTy(SrcAddr
->getType()->getPointerAddressSpace());
393 SrcAddr
= CastBuilder
.CreateBitCast(SrcAddr
, PtrTy
);
394 DstAddr
= CastBuilder
.CreateBitCast(DstAddr
, PtrTy
);
396 // Create the a comparison of src and dst, based on which we jump to either
397 // the forward-copy part of the function (if src >= dst) or the backwards-copy
398 // part (if src < dst).
399 // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else
400 // structure. Its block terminators (unconditional branches) are replaced by
401 // the appropriate conditional branches when the loop is built.
402 ICmpInst
*PtrCompare
= new ICmpInst(InsertBefore
, ICmpInst::ICMP_ULT
,
403 SrcAddr
, DstAddr
, "compare_src_dst");
404 Instruction
*ThenTerm
, *ElseTerm
;
405 SplitBlockAndInsertIfThenElse(PtrCompare
, InsertBefore
, &ThenTerm
,
408 // Each part of the function consists of two blocks:
409 // copy_backwards: used to skip the loop when n == 0
410 // copy_backwards_loop: the actual backwards loop BB
411 // copy_forward: used to skip the loop when n == 0
412 // copy_forward_loop: the actual forward loop BB
413 BasicBlock
*CopyBackwardsBB
= ThenTerm
->getParent();
414 CopyBackwardsBB
->setName("copy_backwards");
415 BasicBlock
*CopyForwardBB
= ElseTerm
->getParent();
416 CopyForwardBB
->setName("copy_forward");
417 BasicBlock
*ExitBB
= InsertBefore
->getParent();
418 ExitBB
->setName("memmove_done");
420 unsigned PartSize
= DL
.getTypeStoreSize(EltTy
);
421 Align
PartSrcAlign(commonAlignment(SrcAlign
, PartSize
));
422 Align
PartDstAlign(commonAlignment(DstAlign
, PartSize
));
424 // Initial comparison of n == 0 that lets us skip the loops altogether. Shared
425 // between both backwards and forward copy clauses.
427 new ICmpInst(OrigBB
->getTerminator(), ICmpInst::ICMP_EQ
, CopyLen
,
428 ConstantInt::get(TypeOfCopyLen
, 0), "compare_n_to_0");
430 // Copying backwards.
432 BasicBlock::Create(F
->getContext(), "copy_backwards_loop", F
, CopyForwardBB
);
433 IRBuilder
<> LoopBuilder(LoopBB
);
434 PHINode
*LoopPhi
= LoopBuilder
.CreatePHI(TypeOfCopyLen
, 0);
435 Value
*IndexPtr
= LoopBuilder
.CreateSub(
436 LoopPhi
, ConstantInt::get(TypeOfCopyLen
, 1), "index_ptr");
437 Value
*Element
= LoopBuilder
.CreateAlignedLoad(
438 EltTy
, LoopBuilder
.CreateInBoundsGEP(EltTy
, SrcAddr
, IndexPtr
),
439 PartSrcAlign
, "element");
440 LoopBuilder
.CreateAlignedStore(
441 Element
, LoopBuilder
.CreateInBoundsGEP(EltTy
, DstAddr
, IndexPtr
),
443 LoopBuilder
.CreateCondBr(
444 LoopBuilder
.CreateICmpEQ(IndexPtr
, ConstantInt::get(TypeOfCopyLen
, 0)),
446 LoopPhi
->addIncoming(IndexPtr
, LoopBB
);
447 LoopPhi
->addIncoming(CopyLen
, CopyBackwardsBB
);
448 BranchInst::Create(ExitBB
, LoopBB
, CompareN
, ThenTerm
);
449 ThenTerm
->eraseFromParent();
452 BasicBlock
*FwdLoopBB
=
453 BasicBlock::Create(F
->getContext(), "copy_forward_loop", F
, ExitBB
);
454 IRBuilder
<> FwdLoopBuilder(FwdLoopBB
);
455 PHINode
*FwdCopyPhi
= FwdLoopBuilder
.CreatePHI(TypeOfCopyLen
, 0, "index_ptr");
456 Value
*SrcGEP
= FwdLoopBuilder
.CreateInBoundsGEP(EltTy
, SrcAddr
, FwdCopyPhi
);
458 FwdLoopBuilder
.CreateAlignedLoad(EltTy
, SrcGEP
, PartSrcAlign
, "element");
459 Value
*DstGEP
= FwdLoopBuilder
.CreateInBoundsGEP(EltTy
, DstAddr
, FwdCopyPhi
);
460 FwdLoopBuilder
.CreateAlignedStore(FwdElement
, DstGEP
, PartDstAlign
);
461 Value
*FwdIndexPtr
= FwdLoopBuilder
.CreateAdd(
462 FwdCopyPhi
, ConstantInt::get(TypeOfCopyLen
, 1), "index_increment");
463 FwdLoopBuilder
.CreateCondBr(FwdLoopBuilder
.CreateICmpEQ(FwdIndexPtr
, CopyLen
),
465 FwdCopyPhi
->addIncoming(FwdIndexPtr
, FwdLoopBB
);
466 FwdCopyPhi
->addIncoming(ConstantInt::get(TypeOfCopyLen
, 0), CopyForwardBB
);
468 BranchInst::Create(ExitBB
, FwdLoopBB
, CompareN
, ElseTerm
);
469 ElseTerm
->eraseFromParent();
472 static void createMemSetLoop(Instruction
*InsertBefore
, Value
*DstAddr
,
473 Value
*CopyLen
, Value
*SetValue
, Align DstAlign
,
475 Type
*TypeOfCopyLen
= CopyLen
->getType();
476 BasicBlock
*OrigBB
= InsertBefore
->getParent();
477 Function
*F
= OrigBB
->getParent();
478 const DataLayout
&DL
= F
->getParent()->getDataLayout();
480 OrigBB
->splitBasicBlock(InsertBefore
, "split");
482 = BasicBlock::Create(F
->getContext(), "loadstoreloop", F
, NewBB
);
484 IRBuilder
<> Builder(OrigBB
->getTerminator());
486 // Cast pointer to the type of value getting stored
487 unsigned dstAS
= cast
<PointerType
>(DstAddr
->getType())->getAddressSpace();
488 DstAddr
= Builder
.CreateBitCast(DstAddr
,
489 PointerType::get(SetValue
->getType(), dstAS
));
491 Builder
.CreateCondBr(
492 Builder
.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen
, 0), CopyLen
), NewBB
,
494 OrigBB
->getTerminator()->eraseFromParent();
496 unsigned PartSize
= DL
.getTypeStoreSize(SetValue
->getType());
497 Align
PartAlign(commonAlignment(DstAlign
, PartSize
));
499 IRBuilder
<> LoopBuilder(LoopBB
);
500 PHINode
*LoopIndex
= LoopBuilder
.CreatePHI(TypeOfCopyLen
, 0);
501 LoopIndex
->addIncoming(ConstantInt::get(TypeOfCopyLen
, 0), OrigBB
);
503 LoopBuilder
.CreateAlignedStore(
505 LoopBuilder
.CreateInBoundsGEP(SetValue
->getType(), DstAddr
, LoopIndex
),
506 PartAlign
, IsVolatile
);
509 LoopBuilder
.CreateAdd(LoopIndex
, ConstantInt::get(TypeOfCopyLen
, 1));
510 LoopIndex
->addIncoming(NewIndex
, LoopBB
);
512 LoopBuilder
.CreateCondBr(LoopBuilder
.CreateICmpULT(NewIndex
, CopyLen
), LoopBB
,
516 template <typename T
>
517 static bool canOverlap(MemTransferBase
<T
> *Memcpy
, ScalarEvolution
*SE
) {
519 auto *SrcSCEV
= SE
->getSCEV(Memcpy
->getRawSource());
520 auto *DestSCEV
= SE
->getSCEV(Memcpy
->getRawDest());
521 if (SE
->isKnownPredicateAt(CmpInst::ICMP_NE
, SrcSCEV
, DestSCEV
, Memcpy
))
527 void llvm::expandMemCpyAsLoop(MemCpyInst
*Memcpy
,
528 const TargetTransformInfo
&TTI
,
529 ScalarEvolution
*SE
) {
530 bool CanOverlap
= canOverlap(Memcpy
, SE
);
531 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Memcpy
->getLength())) {
532 createMemCpyLoopKnownSize(
533 /* InsertBefore */ Memcpy
,
534 /* SrcAddr */ Memcpy
->getRawSource(),
535 /* DstAddr */ Memcpy
->getRawDest(),
537 /* SrcAlign */ Memcpy
->getSourceAlign().valueOrOne(),
538 /* DestAlign */ Memcpy
->getDestAlign().valueOrOne(),
539 /* SrcIsVolatile */ Memcpy
->isVolatile(),
540 /* DstIsVolatile */ Memcpy
->isVolatile(),
541 /* CanOverlap */ CanOverlap
,
542 /* TargetTransformInfo */ TTI
);
544 createMemCpyLoopUnknownSize(
545 /* InsertBefore */ Memcpy
,
546 /* SrcAddr */ Memcpy
->getRawSource(),
547 /* DstAddr */ Memcpy
->getRawDest(),
548 /* CopyLen */ Memcpy
->getLength(),
549 /* SrcAlign */ Memcpy
->getSourceAlign().valueOrOne(),
550 /* DestAlign */ Memcpy
->getDestAlign().valueOrOne(),
551 /* SrcIsVolatile */ Memcpy
->isVolatile(),
552 /* DstIsVolatile */ Memcpy
->isVolatile(),
553 /* CanOverlap */ CanOverlap
,
554 /* TargetTransformInfo */ TTI
);
558 void llvm::expandMemMoveAsLoop(MemMoveInst
*Memmove
) {
559 createMemMoveLoop(/* InsertBefore */ Memmove
,
560 /* SrcAddr */ Memmove
->getRawSource(),
561 /* DstAddr */ Memmove
->getRawDest(),
562 /* CopyLen */ Memmove
->getLength(),
563 /* SrcAlign */ Memmove
->getSourceAlign().valueOrOne(),
564 /* DestAlign */ Memmove
->getDestAlign().valueOrOne(),
565 /* SrcIsVolatile */ Memmove
->isVolatile(),
566 /* DstIsVolatile */ Memmove
->isVolatile());
569 void llvm::expandMemSetAsLoop(MemSetInst
*Memset
) {
570 createMemSetLoop(/* InsertBefore */ Memset
,
571 /* DstAddr */ Memset
->getRawDest(),
572 /* CopyLen */ Memset
->getLength(),
573 /* SetValue */ Memset
->getValue(),
574 /* Alignment */ Memset
->getDestAlign().valueOrOne(),
575 Memset
->isVolatile());
578 void llvm::expandAtomicMemCpyAsLoop(AtomicMemCpyInst
*AtomicMemcpy
,
579 const TargetTransformInfo
&TTI
,
580 ScalarEvolution
*SE
) {
581 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(AtomicMemcpy
->getLength())) {
582 createMemCpyLoopKnownSize(
583 /* InsertBefore */ AtomicMemcpy
,
584 /* SrcAddr */ AtomicMemcpy
->getRawSource(),
585 /* DstAddr */ AtomicMemcpy
->getRawDest(),
587 /* SrcAlign */ AtomicMemcpy
->getSourceAlign().valueOrOne(),
588 /* DestAlign */ AtomicMemcpy
->getDestAlign().valueOrOne(),
589 /* SrcIsVolatile */ AtomicMemcpy
->isVolatile(),
590 /* DstIsVolatile */ AtomicMemcpy
->isVolatile(),
591 /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec.
592 /* TargetTransformInfo */ TTI
,
593 /* AtomicCpySize */ AtomicMemcpy
->getElementSizeInBytes());
595 createMemCpyLoopUnknownSize(
596 /* InsertBefore */ AtomicMemcpy
,
597 /* SrcAddr */ AtomicMemcpy
->getRawSource(),
598 /* DstAddr */ AtomicMemcpy
->getRawDest(),
599 /* CopyLen */ AtomicMemcpy
->getLength(),
600 /* SrcAlign */ AtomicMemcpy
->getSourceAlign().valueOrOne(),
601 /* DestAlign */ AtomicMemcpy
->getDestAlign().valueOrOne(),
602 /* SrcIsVolatile */ AtomicMemcpy
->isVolatile(),
603 /* DstIsVolatile */ AtomicMemcpy
->isVolatile(),
604 /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec.
605 /* TargetTransformInfo */ TTI
,
606 /* AtomicCpySize */ AtomicMemcpy
->getElementSizeInBytes());