1 //===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
10 #include "llvm/Analysis/ScalarEvolution.h"
11 #include "llvm/Analysis/TargetTransformInfo.h"
12 #include "llvm/IR/IRBuilder.h"
13 #include "llvm/IR/IntrinsicInst.h"
14 #include "llvm/IR/MDBuilder.h"
15 #include "llvm/Support/Debug.h"
16 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
19 #define DEBUG_TYPE "lower-mem-intrinsics"
23 void llvm::createMemCpyLoopKnownSize(
24 Instruction
*InsertBefore
, Value
*SrcAddr
, Value
*DstAddr
,
25 ConstantInt
*CopyLen
, Align SrcAlign
, Align DstAlign
, bool SrcIsVolatile
,
26 bool DstIsVolatile
, bool CanOverlap
, const TargetTransformInfo
&TTI
,
27 std::optional
<uint32_t> AtomicElementSize
) {
28 // No need to expand zero length copies.
29 if (CopyLen
->isZero())
32 BasicBlock
*PreLoopBB
= InsertBefore
->getParent();
33 BasicBlock
*PostLoopBB
= nullptr;
34 Function
*ParentFunc
= PreLoopBB
->getParent();
35 LLVMContext
&Ctx
= PreLoopBB
->getContext();
36 const DataLayout
&DL
= ParentFunc
->getParent()->getDataLayout();
38 MDNode
*NewDomain
= MDB
.createAnonymousAliasScopeDomain("MemCopyDomain");
39 StringRef Name
= "MemCopyAliasScope";
40 MDNode
*NewScope
= MDB
.createAnonymousAliasScope(NewDomain
, Name
);
42 unsigned SrcAS
= cast
<PointerType
>(SrcAddr
->getType())->getAddressSpace();
43 unsigned DstAS
= cast
<PointerType
>(DstAddr
->getType())->getAddressSpace();
45 Type
*TypeOfCopyLen
= CopyLen
->getType();
46 Type
*LoopOpType
= TTI
.getMemcpyLoopLoweringType(
47 Ctx
, CopyLen
, SrcAS
, DstAS
, SrcAlign
.value(), DstAlign
.value(),
49 assert((!AtomicElementSize
|| !LoopOpType
->isVectorTy()) &&
50 "Atomic memcpy lowering is not supported for vector operand type");
52 unsigned LoopOpSize
= DL
.getTypeStoreSize(LoopOpType
);
53 assert((!AtomicElementSize
|| LoopOpSize
% *AtomicElementSize
== 0) &&
54 "Atomic memcpy lowering is not supported for selected operand size");
56 uint64_t LoopEndCount
= CopyLen
->getZExtValue() / LoopOpSize
;
58 if (LoopEndCount
!= 0) {
60 PostLoopBB
= PreLoopBB
->splitBasicBlock(InsertBefore
, "memcpy-split");
62 BasicBlock::Create(Ctx
, "load-store-loop", ParentFunc
, PostLoopBB
);
63 PreLoopBB
->getTerminator()->setSuccessor(0, LoopBB
);
65 IRBuilder
<> PLBuilder(PreLoopBB
->getTerminator());
67 Align
PartDstAlign(commonAlignment(DstAlign
, LoopOpSize
));
68 Align
PartSrcAlign(commonAlignment(SrcAlign
, LoopOpSize
));
70 IRBuilder
<> LoopBuilder(LoopBB
);
71 PHINode
*LoopIndex
= LoopBuilder
.CreatePHI(TypeOfCopyLen
, 2, "loop-index");
72 LoopIndex
->addIncoming(ConstantInt::get(TypeOfCopyLen
, 0U), PreLoopBB
);
75 LoopBuilder
.CreateInBoundsGEP(LoopOpType
, SrcAddr
, LoopIndex
);
76 LoadInst
*Load
= LoopBuilder
.CreateAlignedLoad(LoopOpType
, SrcGEP
,
77 PartSrcAlign
, SrcIsVolatile
);
79 // Set alias scope for loads.
80 Load
->setMetadata(LLVMContext::MD_alias_scope
,
81 MDNode::get(Ctx
, NewScope
));
84 LoopBuilder
.CreateInBoundsGEP(LoopOpType
, DstAddr
, LoopIndex
);
85 StoreInst
*Store
= LoopBuilder
.CreateAlignedStore(
86 Load
, DstGEP
, PartDstAlign
, DstIsVolatile
);
88 // Indicate that stores don't overlap loads.
89 Store
->setMetadata(LLVMContext::MD_noalias
, MDNode::get(Ctx
, NewScope
));
91 if (AtomicElementSize
) {
92 Load
->setAtomic(AtomicOrdering::Unordered
);
93 Store
->setAtomic(AtomicOrdering::Unordered
);
96 LoopBuilder
.CreateAdd(LoopIndex
, ConstantInt::get(TypeOfCopyLen
, 1U));
97 LoopIndex
->addIncoming(NewIndex
, LoopBB
);
99 // Create the loop branch condition.
100 Constant
*LoopEndCI
= ConstantInt::get(TypeOfCopyLen
, LoopEndCount
);
101 LoopBuilder
.CreateCondBr(LoopBuilder
.CreateICmpULT(NewIndex
, LoopEndCI
),
105 uint64_t BytesCopied
= LoopEndCount
* LoopOpSize
;
106 uint64_t RemainingBytes
= CopyLen
->getZExtValue() - BytesCopied
;
107 if (RemainingBytes
) {
108 IRBuilder
<> RBuilder(PostLoopBB
? PostLoopBB
->getFirstNonPHI()
111 SmallVector
<Type
*, 5> RemainingOps
;
112 TTI
.getMemcpyLoopResidualLoweringType(RemainingOps
, Ctx
, RemainingBytes
,
113 SrcAS
, DstAS
, SrcAlign
.value(),
114 DstAlign
.value(), AtomicElementSize
);
116 for (auto *OpTy
: RemainingOps
) {
117 Align
PartSrcAlign(commonAlignment(SrcAlign
, BytesCopied
));
118 Align
PartDstAlign(commonAlignment(DstAlign
, BytesCopied
));
120 // Calculate the new index
121 unsigned OperandSize
= DL
.getTypeStoreSize(OpTy
);
123 (!AtomicElementSize
|| OperandSize
% *AtomicElementSize
== 0) &&
124 "Atomic memcpy lowering is not supported for selected operand size");
126 uint64_t GepIndex
= BytesCopied
/ OperandSize
;
127 assert(GepIndex
* OperandSize
== BytesCopied
&&
128 "Division should have no Remainder!");
130 Value
*SrcGEP
= RBuilder
.CreateInBoundsGEP(
131 OpTy
, SrcAddr
, ConstantInt::get(TypeOfCopyLen
, GepIndex
));
133 RBuilder
.CreateAlignedLoad(OpTy
, SrcGEP
, PartSrcAlign
, SrcIsVolatile
);
135 // Set alias scope for loads.
136 Load
->setMetadata(LLVMContext::MD_alias_scope
,
137 MDNode::get(Ctx
, NewScope
));
139 Value
*DstGEP
= RBuilder
.CreateInBoundsGEP(
140 OpTy
, DstAddr
, ConstantInt::get(TypeOfCopyLen
, GepIndex
));
141 StoreInst
*Store
= RBuilder
.CreateAlignedStore(Load
, DstGEP
, PartDstAlign
,
144 // Indicate that stores don't overlap loads.
145 Store
->setMetadata(LLVMContext::MD_noalias
, MDNode::get(Ctx
, NewScope
));
147 if (AtomicElementSize
) {
148 Load
->setAtomic(AtomicOrdering::Unordered
);
149 Store
->setAtomic(AtomicOrdering::Unordered
);
151 BytesCopied
+= OperandSize
;
154 assert(BytesCopied
== CopyLen
->getZExtValue() &&
155 "Bytes copied should match size in the call!");
158 void llvm::createMemCpyLoopUnknownSize(
159 Instruction
*InsertBefore
, Value
*SrcAddr
, Value
*DstAddr
, Value
*CopyLen
,
160 Align SrcAlign
, Align DstAlign
, bool SrcIsVolatile
, bool DstIsVolatile
,
161 bool CanOverlap
, const TargetTransformInfo
&TTI
,
162 std::optional
<uint32_t> AtomicElementSize
) {
163 BasicBlock
*PreLoopBB
= InsertBefore
->getParent();
164 BasicBlock
*PostLoopBB
=
165 PreLoopBB
->splitBasicBlock(InsertBefore
, "post-loop-memcpy-expansion");
167 Function
*ParentFunc
= PreLoopBB
->getParent();
168 const DataLayout
&DL
= ParentFunc
->getParent()->getDataLayout();
169 LLVMContext
&Ctx
= PreLoopBB
->getContext();
171 MDNode
*NewDomain
= MDB
.createAnonymousAliasScopeDomain("MemCopyDomain");
172 StringRef Name
= "MemCopyAliasScope";
173 MDNode
*NewScope
= MDB
.createAnonymousAliasScope(NewDomain
, Name
);
175 unsigned SrcAS
= cast
<PointerType
>(SrcAddr
->getType())->getAddressSpace();
176 unsigned DstAS
= cast
<PointerType
>(DstAddr
->getType())->getAddressSpace();
178 Type
*LoopOpType
= TTI
.getMemcpyLoopLoweringType(
179 Ctx
, CopyLen
, SrcAS
, DstAS
, SrcAlign
.value(), DstAlign
.value(),
181 assert((!AtomicElementSize
|| !LoopOpType
->isVectorTy()) &&
182 "Atomic memcpy lowering is not supported for vector operand type");
183 unsigned LoopOpSize
= DL
.getTypeStoreSize(LoopOpType
);
184 assert((!AtomicElementSize
|| LoopOpSize
% *AtomicElementSize
== 0) &&
185 "Atomic memcpy lowering is not supported for selected operand size");
187 IRBuilder
<> PLBuilder(PreLoopBB
->getTerminator());
189 // Calculate the loop trip count, and remaining bytes to copy after the loop.
190 Type
*CopyLenType
= CopyLen
->getType();
191 IntegerType
*ILengthType
= dyn_cast
<IntegerType
>(CopyLenType
);
192 assert(ILengthType
&&
193 "expected size argument to memcpy to be an integer type!");
194 Type
*Int8Type
= Type::getInt8Ty(Ctx
);
195 bool LoopOpIsInt8
= LoopOpType
== Int8Type
;
196 ConstantInt
*CILoopOpSize
= ConstantInt::get(ILengthType
, LoopOpSize
);
197 Value
*RuntimeLoopCount
= LoopOpIsInt8
?
199 PLBuilder
.CreateUDiv(CopyLen
, CILoopOpSize
);
201 BasicBlock::Create(Ctx
, "loop-memcpy-expansion", ParentFunc
, PostLoopBB
);
202 IRBuilder
<> LoopBuilder(LoopBB
);
204 Align
PartSrcAlign(commonAlignment(SrcAlign
, LoopOpSize
));
205 Align
PartDstAlign(commonAlignment(DstAlign
, LoopOpSize
));
207 PHINode
*LoopIndex
= LoopBuilder
.CreatePHI(CopyLenType
, 2, "loop-index");
208 LoopIndex
->addIncoming(ConstantInt::get(CopyLenType
, 0U), PreLoopBB
);
210 Value
*SrcGEP
= LoopBuilder
.CreateInBoundsGEP(LoopOpType
, SrcAddr
, LoopIndex
);
211 LoadInst
*Load
= LoopBuilder
.CreateAlignedLoad(LoopOpType
, SrcGEP
,
212 PartSrcAlign
, SrcIsVolatile
);
214 // Set alias scope for loads.
215 Load
->setMetadata(LLVMContext::MD_alias_scope
, MDNode::get(Ctx
, NewScope
));
217 Value
*DstGEP
= LoopBuilder
.CreateInBoundsGEP(LoopOpType
, DstAddr
, LoopIndex
);
219 LoopBuilder
.CreateAlignedStore(Load
, DstGEP
, PartDstAlign
, DstIsVolatile
);
221 // Indicate that stores don't overlap loads.
222 Store
->setMetadata(LLVMContext::MD_noalias
, MDNode::get(Ctx
, NewScope
));
224 if (AtomicElementSize
) {
225 Load
->setAtomic(AtomicOrdering::Unordered
);
226 Store
->setAtomic(AtomicOrdering::Unordered
);
229 LoopBuilder
.CreateAdd(LoopIndex
, ConstantInt::get(CopyLenType
, 1U));
230 LoopIndex
->addIncoming(NewIndex
, LoopBB
);
232 bool requiresResidual
=
233 !LoopOpIsInt8
&& !(AtomicElementSize
&& LoopOpSize
== AtomicElementSize
);
234 if (requiresResidual
) {
235 Type
*ResLoopOpType
= AtomicElementSize
236 ? Type::getIntNTy(Ctx
, *AtomicElementSize
* 8)
238 unsigned ResLoopOpSize
= DL
.getTypeStoreSize(ResLoopOpType
);
239 assert((ResLoopOpSize
== AtomicElementSize
? *AtomicElementSize
: 1) &&
240 "Store size is expected to match type size");
243 Value
*RuntimeResidual
= PLBuilder
.CreateURem(CopyLen
, CILoopOpSize
);
244 Value
*RuntimeBytesCopied
= PLBuilder
.CreateSub(CopyLen
, RuntimeResidual
);
246 // Loop body for the residual copy.
247 BasicBlock
*ResLoopBB
= BasicBlock::Create(Ctx
, "loop-memcpy-residual",
248 PreLoopBB
->getParent(),
250 // Residual loop header.
251 BasicBlock
*ResHeaderBB
= BasicBlock::Create(
252 Ctx
, "loop-memcpy-residual-header", PreLoopBB
->getParent(), nullptr);
254 // Need to update the pre-loop basic block to branch to the correct place.
255 // branch to the main loop if the count is non-zero, branch to the residual
256 // loop if the copy size is smaller then 1 iteration of the main loop but
257 // non-zero and finally branch to after the residual loop if the memcpy
259 ConstantInt
*Zero
= ConstantInt::get(ILengthType
, 0U);
260 PLBuilder
.CreateCondBr(PLBuilder
.CreateICmpNE(RuntimeLoopCount
, Zero
),
261 LoopBB
, ResHeaderBB
);
262 PreLoopBB
->getTerminator()->eraseFromParent();
264 LoopBuilder
.CreateCondBr(
265 LoopBuilder
.CreateICmpULT(NewIndex
, RuntimeLoopCount
), LoopBB
,
268 // Determine if we need to branch to the residual loop or bypass it.
269 IRBuilder
<> RHBuilder(ResHeaderBB
);
270 RHBuilder
.CreateCondBr(RHBuilder
.CreateICmpNE(RuntimeResidual
, Zero
),
271 ResLoopBB
, PostLoopBB
);
273 // Copy the residual with single byte load/store loop.
274 IRBuilder
<> ResBuilder(ResLoopBB
);
275 PHINode
*ResidualIndex
=
276 ResBuilder
.CreatePHI(CopyLenType
, 2, "residual-loop-index");
277 ResidualIndex
->addIncoming(Zero
, ResHeaderBB
);
279 Value
*FullOffset
= ResBuilder
.CreateAdd(RuntimeBytesCopied
, ResidualIndex
);
281 ResBuilder
.CreateInBoundsGEP(ResLoopOpType
, SrcAddr
, FullOffset
);
282 LoadInst
*Load
= ResBuilder
.CreateAlignedLoad(ResLoopOpType
, SrcGEP
,
283 PartSrcAlign
, SrcIsVolatile
);
285 // Set alias scope for loads.
286 Load
->setMetadata(LLVMContext::MD_alias_scope
,
287 MDNode::get(Ctx
, NewScope
));
290 ResBuilder
.CreateInBoundsGEP(ResLoopOpType
, DstAddr
, FullOffset
);
291 StoreInst
*Store
= ResBuilder
.CreateAlignedStore(Load
, DstGEP
, PartDstAlign
,
294 // Indicate that stores don't overlap loads.
295 Store
->setMetadata(LLVMContext::MD_noalias
, MDNode::get(Ctx
, NewScope
));
297 if (AtomicElementSize
) {
298 Load
->setAtomic(AtomicOrdering::Unordered
);
299 Store
->setAtomic(AtomicOrdering::Unordered
);
301 Value
*ResNewIndex
= ResBuilder
.CreateAdd(
302 ResidualIndex
, ConstantInt::get(CopyLenType
, ResLoopOpSize
));
303 ResidualIndex
->addIncoming(ResNewIndex
, ResLoopBB
);
305 // Create the loop branch condition.
306 ResBuilder
.CreateCondBr(
307 ResBuilder
.CreateICmpULT(ResNewIndex
, RuntimeResidual
), ResLoopBB
,
310 // In this case the loop operand type was a byte, and there is no need for a
311 // residual loop to copy the remaining memory after the main loop.
312 // We do however need to patch up the control flow by creating the
313 // terminators for the preloop block and the memcpy loop.
314 ConstantInt
*Zero
= ConstantInt::get(ILengthType
, 0U);
315 PLBuilder
.CreateCondBr(PLBuilder
.CreateICmpNE(RuntimeLoopCount
, Zero
),
317 PreLoopBB
->getTerminator()->eraseFromParent();
318 LoopBuilder
.CreateCondBr(
319 LoopBuilder
.CreateICmpULT(NewIndex
, RuntimeLoopCount
), LoopBB
,
324 // Lower memmove to IR. memmove is required to correctly copy overlapping memory
325 // regions; therefore, it has to check the relative positions of the source and
326 // destination pointers and choose the copy direction accordingly.
328 // The code below is an IR rendition of this C function:
330 // void* memmove(void* dst, const void* src, size_t n) {
331 // unsigned char* d = dst;
332 // const unsigned char* s = src;
340 // for (size_t i = 0; i < n; ++i) {
346 static void createMemMoveLoop(Instruction
*InsertBefore
, Value
*SrcAddr
,
347 Value
*DstAddr
, Value
*CopyLen
, Align SrcAlign
,
348 Align DstAlign
, bool SrcIsVolatile
,
350 const TargetTransformInfo
&TTI
) {
351 Type
*TypeOfCopyLen
= CopyLen
->getType();
352 BasicBlock
*OrigBB
= InsertBefore
->getParent();
353 Function
*F
= OrigBB
->getParent();
354 const DataLayout
&DL
= F
->getParent()->getDataLayout();
355 // TODO: Use different element type if possible?
356 Type
*EltTy
= Type::getInt8Ty(F
->getContext());
358 // Create the a comparison of src and dst, based on which we jump to either
359 // the forward-copy part of the function (if src >= dst) or the backwards-copy
360 // part (if src < dst).
361 // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else
362 // structure. Its block terminators (unconditional branches) are replaced by
363 // the appropriate conditional branches when the loop is built.
364 ICmpInst
*PtrCompare
= new ICmpInst(InsertBefore
, ICmpInst::ICMP_ULT
,
365 SrcAddr
, DstAddr
, "compare_src_dst");
366 Instruction
*ThenTerm
, *ElseTerm
;
367 SplitBlockAndInsertIfThenElse(PtrCompare
, InsertBefore
, &ThenTerm
,
370 // Each part of the function consists of two blocks:
371 // copy_backwards: used to skip the loop when n == 0
372 // copy_backwards_loop: the actual backwards loop BB
373 // copy_forward: used to skip the loop when n == 0
374 // copy_forward_loop: the actual forward loop BB
375 BasicBlock
*CopyBackwardsBB
= ThenTerm
->getParent();
376 CopyBackwardsBB
->setName("copy_backwards");
377 BasicBlock
*CopyForwardBB
= ElseTerm
->getParent();
378 CopyForwardBB
->setName("copy_forward");
379 BasicBlock
*ExitBB
= InsertBefore
->getParent();
380 ExitBB
->setName("memmove_done");
382 unsigned PartSize
= DL
.getTypeStoreSize(EltTy
);
383 Align
PartSrcAlign(commonAlignment(SrcAlign
, PartSize
));
384 Align
PartDstAlign(commonAlignment(DstAlign
, PartSize
));
386 // Initial comparison of n == 0 that lets us skip the loops altogether. Shared
387 // between both backwards and forward copy clauses.
389 new ICmpInst(OrigBB
->getTerminator(), ICmpInst::ICMP_EQ
, CopyLen
,
390 ConstantInt::get(TypeOfCopyLen
, 0), "compare_n_to_0");
392 // Copying backwards.
394 BasicBlock::Create(F
->getContext(), "copy_backwards_loop", F
, CopyForwardBB
);
395 IRBuilder
<> LoopBuilder(LoopBB
);
397 PHINode
*LoopPhi
= LoopBuilder
.CreatePHI(TypeOfCopyLen
, 0);
398 Value
*IndexPtr
= LoopBuilder
.CreateSub(
399 LoopPhi
, ConstantInt::get(TypeOfCopyLen
, 1), "index_ptr");
400 Value
*Element
= LoopBuilder
.CreateAlignedLoad(
401 EltTy
, LoopBuilder
.CreateInBoundsGEP(EltTy
, SrcAddr
, IndexPtr
),
402 PartSrcAlign
, "element");
403 LoopBuilder
.CreateAlignedStore(
404 Element
, LoopBuilder
.CreateInBoundsGEP(EltTy
, DstAddr
, IndexPtr
),
406 LoopBuilder
.CreateCondBr(
407 LoopBuilder
.CreateICmpEQ(IndexPtr
, ConstantInt::get(TypeOfCopyLen
, 0)),
409 LoopPhi
->addIncoming(IndexPtr
, LoopBB
);
410 LoopPhi
->addIncoming(CopyLen
, CopyBackwardsBB
);
411 BranchInst::Create(ExitBB
, LoopBB
, CompareN
, ThenTerm
);
412 ThenTerm
->eraseFromParent();
415 BasicBlock
*FwdLoopBB
=
416 BasicBlock::Create(F
->getContext(), "copy_forward_loop", F
, ExitBB
);
417 IRBuilder
<> FwdLoopBuilder(FwdLoopBB
);
418 PHINode
*FwdCopyPhi
= FwdLoopBuilder
.CreatePHI(TypeOfCopyLen
, 0, "index_ptr");
419 Value
*SrcGEP
= FwdLoopBuilder
.CreateInBoundsGEP(EltTy
, SrcAddr
, FwdCopyPhi
);
421 FwdLoopBuilder
.CreateAlignedLoad(EltTy
, SrcGEP
, PartSrcAlign
, "element");
422 Value
*DstGEP
= FwdLoopBuilder
.CreateInBoundsGEP(EltTy
, DstAddr
, FwdCopyPhi
);
423 FwdLoopBuilder
.CreateAlignedStore(FwdElement
, DstGEP
, PartDstAlign
);
424 Value
*FwdIndexPtr
= FwdLoopBuilder
.CreateAdd(
425 FwdCopyPhi
, ConstantInt::get(TypeOfCopyLen
, 1), "index_increment");
426 FwdLoopBuilder
.CreateCondBr(FwdLoopBuilder
.CreateICmpEQ(FwdIndexPtr
, CopyLen
),
428 FwdCopyPhi
->addIncoming(FwdIndexPtr
, FwdLoopBB
);
429 FwdCopyPhi
->addIncoming(ConstantInt::get(TypeOfCopyLen
, 0), CopyForwardBB
);
431 BranchInst::Create(ExitBB
, FwdLoopBB
, CompareN
, ElseTerm
);
432 ElseTerm
->eraseFromParent();
435 static void createMemSetLoop(Instruction
*InsertBefore
, Value
*DstAddr
,
436 Value
*CopyLen
, Value
*SetValue
, Align DstAlign
,
438 Type
*TypeOfCopyLen
= CopyLen
->getType();
439 BasicBlock
*OrigBB
= InsertBefore
->getParent();
440 Function
*F
= OrigBB
->getParent();
441 const DataLayout
&DL
= F
->getParent()->getDataLayout();
443 OrigBB
->splitBasicBlock(InsertBefore
, "split");
445 = BasicBlock::Create(F
->getContext(), "loadstoreloop", F
, NewBB
);
447 IRBuilder
<> Builder(OrigBB
->getTerminator());
449 Builder
.CreateCondBr(
450 Builder
.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen
, 0), CopyLen
), NewBB
,
452 OrigBB
->getTerminator()->eraseFromParent();
454 unsigned PartSize
= DL
.getTypeStoreSize(SetValue
->getType());
455 Align
PartAlign(commonAlignment(DstAlign
, PartSize
));
457 IRBuilder
<> LoopBuilder(LoopBB
);
458 PHINode
*LoopIndex
= LoopBuilder
.CreatePHI(TypeOfCopyLen
, 0);
459 LoopIndex
->addIncoming(ConstantInt::get(TypeOfCopyLen
, 0), OrigBB
);
461 LoopBuilder
.CreateAlignedStore(
463 LoopBuilder
.CreateInBoundsGEP(SetValue
->getType(), DstAddr
, LoopIndex
),
464 PartAlign
, IsVolatile
);
467 LoopBuilder
.CreateAdd(LoopIndex
, ConstantInt::get(TypeOfCopyLen
, 1));
468 LoopIndex
->addIncoming(NewIndex
, LoopBB
);
470 LoopBuilder
.CreateCondBr(LoopBuilder
.CreateICmpULT(NewIndex
, CopyLen
), LoopBB
,
474 template <typename T
>
475 static bool canOverlap(MemTransferBase
<T
> *Memcpy
, ScalarEvolution
*SE
) {
477 auto *SrcSCEV
= SE
->getSCEV(Memcpy
->getRawSource());
478 auto *DestSCEV
= SE
->getSCEV(Memcpy
->getRawDest());
479 if (SE
->isKnownPredicateAt(CmpInst::ICMP_NE
, SrcSCEV
, DestSCEV
, Memcpy
))
485 void llvm::expandMemCpyAsLoop(MemCpyInst
*Memcpy
,
486 const TargetTransformInfo
&TTI
,
487 ScalarEvolution
*SE
) {
488 bool CanOverlap
= canOverlap(Memcpy
, SE
);
489 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Memcpy
->getLength())) {
490 createMemCpyLoopKnownSize(
491 /* InsertBefore */ Memcpy
,
492 /* SrcAddr */ Memcpy
->getRawSource(),
493 /* DstAddr */ Memcpy
->getRawDest(),
495 /* SrcAlign */ Memcpy
->getSourceAlign().valueOrOne(),
496 /* DestAlign */ Memcpy
->getDestAlign().valueOrOne(),
497 /* SrcIsVolatile */ Memcpy
->isVolatile(),
498 /* DstIsVolatile */ Memcpy
->isVolatile(),
499 /* CanOverlap */ CanOverlap
,
500 /* TargetTransformInfo */ TTI
);
502 createMemCpyLoopUnknownSize(
503 /* InsertBefore */ Memcpy
,
504 /* SrcAddr */ Memcpy
->getRawSource(),
505 /* DstAddr */ Memcpy
->getRawDest(),
506 /* CopyLen */ Memcpy
->getLength(),
507 /* SrcAlign */ Memcpy
->getSourceAlign().valueOrOne(),
508 /* DestAlign */ Memcpy
->getDestAlign().valueOrOne(),
509 /* SrcIsVolatile */ Memcpy
->isVolatile(),
510 /* DstIsVolatile */ Memcpy
->isVolatile(),
511 /* CanOverlap */ CanOverlap
,
512 /* TargetTransformInfo */ TTI
);
516 bool llvm::expandMemMoveAsLoop(MemMoveInst
*Memmove
,
517 const TargetTransformInfo
&TTI
) {
518 Value
*CopyLen
= Memmove
->getLength();
519 Value
*SrcAddr
= Memmove
->getRawSource();
520 Value
*DstAddr
= Memmove
->getRawDest();
521 Align SrcAlign
= Memmove
->getSourceAlign().valueOrOne();
522 Align DstAlign
= Memmove
->getDestAlign().valueOrOne();
523 bool SrcIsVolatile
= Memmove
->isVolatile();
524 bool DstIsVolatile
= SrcIsVolatile
;
525 IRBuilder
<> CastBuilder(Memmove
);
527 unsigned SrcAS
= SrcAddr
->getType()->getPointerAddressSpace();
528 unsigned DstAS
= DstAddr
->getType()->getPointerAddressSpace();
529 if (SrcAS
!= DstAS
) {
530 if (!TTI
.addrspacesMayAlias(SrcAS
, DstAS
)) {
531 // We may not be able to emit a pointer comparison, but we don't have
532 // to. Expand as memcpy.
533 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(CopyLen
)) {
534 createMemCpyLoopKnownSize(/*InsertBefore=*/Memmove
, SrcAddr
, DstAddr
,
535 CI
, SrcAlign
, DstAlign
, SrcIsVolatile
,
537 /*CanOverlap=*/false, TTI
);
539 createMemCpyLoopUnknownSize(/*InsertBefore=*/Memmove
, SrcAddr
, DstAddr
,
540 CopyLen
, SrcAlign
, DstAlign
, SrcIsVolatile
,
542 /*CanOverlap=*/false, TTI
);
548 if (TTI
.isValidAddrSpaceCast(DstAS
, SrcAS
))
549 DstAddr
= CastBuilder
.CreateAddrSpaceCast(DstAddr
, SrcAddr
->getType());
550 else if (TTI
.isValidAddrSpaceCast(SrcAS
, DstAS
))
551 SrcAddr
= CastBuilder
.CreateAddrSpaceCast(SrcAddr
, DstAddr
->getType());
553 // We don't know generically if it's legal to introduce an
554 // addrspacecast. We need to know either if it's legal to insert an
555 // addrspacecast, or if the address spaces cannot alias.
557 dbgs() << "Do not know how to expand memmove between different "
564 /*InsertBefore=*/Memmove
, SrcAddr
, DstAddr
, CopyLen
, SrcAlign
, DstAlign
,
565 SrcIsVolatile
, DstIsVolatile
, TTI
);
569 void llvm::expandMemSetAsLoop(MemSetInst
*Memset
) {
570 createMemSetLoop(/* InsertBefore */ Memset
,
571 /* DstAddr */ Memset
->getRawDest(),
572 /* CopyLen */ Memset
->getLength(),
573 /* SetValue */ Memset
->getValue(),
574 /* Alignment */ Memset
->getDestAlign().valueOrOne(),
575 Memset
->isVolatile());
578 void llvm::expandAtomicMemCpyAsLoop(AtomicMemCpyInst
*AtomicMemcpy
,
579 const TargetTransformInfo
&TTI
,
580 ScalarEvolution
*SE
) {
581 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(AtomicMemcpy
->getLength())) {
582 createMemCpyLoopKnownSize(
583 /* InsertBefore */ AtomicMemcpy
,
584 /* SrcAddr */ AtomicMemcpy
->getRawSource(),
585 /* DstAddr */ AtomicMemcpy
->getRawDest(),
587 /* SrcAlign */ AtomicMemcpy
->getSourceAlign().valueOrOne(),
588 /* DestAlign */ AtomicMemcpy
->getDestAlign().valueOrOne(),
589 /* SrcIsVolatile */ AtomicMemcpy
->isVolatile(),
590 /* DstIsVolatile */ AtomicMemcpy
->isVolatile(),
591 /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec.
592 /* TargetTransformInfo */ TTI
,
593 /* AtomicCpySize */ AtomicMemcpy
->getElementSizeInBytes());
595 createMemCpyLoopUnknownSize(
596 /* InsertBefore */ AtomicMemcpy
,
597 /* SrcAddr */ AtomicMemcpy
->getRawSource(),
598 /* DstAddr */ AtomicMemcpy
->getRawDest(),
599 /* CopyLen */ AtomicMemcpy
->getLength(),
600 /* SrcAlign */ AtomicMemcpy
->getSourceAlign().valueOrOne(),
601 /* DestAlign */ AtomicMemcpy
->getDestAlign().valueOrOne(),
602 /* SrcIsVolatile */ AtomicMemcpy
->isVolatile(),
603 /* DstIsVolatile */ AtomicMemcpy
->isVolatile(),
604 /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec.
605 /* TargetTransformInfo */ TTI
,
606 /* AtomicCpySize */ AtomicMemcpy
->getElementSizeInBytes());