1 //===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
10 #include "llvm/Analysis/TargetTransformInfo.h"
11 #include "llvm/IR/IRBuilder.h"
12 #include "llvm/IR/IntrinsicInst.h"
13 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
17 static unsigned getLoopOperandSizeInBytes(Type
*Type
) {
18 if (VectorType
*VTy
= dyn_cast
<VectorType
>(Type
)) {
19 return VTy
->getBitWidth() / 8;
22 return Type
->getPrimitiveSizeInBits() / 8;
25 void llvm::createMemCpyLoopKnownSize(Instruction
*InsertBefore
, Value
*SrcAddr
,
26 Value
*DstAddr
, ConstantInt
*CopyLen
,
27 unsigned SrcAlign
, unsigned DestAlign
,
28 bool SrcIsVolatile
, bool DstIsVolatile
,
29 const TargetTransformInfo
&TTI
) {
30 // No need to expand zero length copies.
31 if (CopyLen
->isZero())
34 BasicBlock
*PreLoopBB
= InsertBefore
->getParent();
35 BasicBlock
*PostLoopBB
= nullptr;
36 Function
*ParentFunc
= PreLoopBB
->getParent();
37 LLVMContext
&Ctx
= PreLoopBB
->getContext();
39 Type
*TypeOfCopyLen
= CopyLen
->getType();
41 TTI
.getMemcpyLoopLoweringType(Ctx
, CopyLen
, SrcAlign
, DestAlign
);
43 unsigned LoopOpSize
= getLoopOperandSizeInBytes(LoopOpType
);
44 uint64_t LoopEndCount
= CopyLen
->getZExtValue() / LoopOpSize
;
46 unsigned SrcAS
= cast
<PointerType
>(SrcAddr
->getType())->getAddressSpace();
47 unsigned DstAS
= cast
<PointerType
>(DstAddr
->getType())->getAddressSpace();
49 if (LoopEndCount
!= 0) {
51 PostLoopBB
= PreLoopBB
->splitBasicBlock(InsertBefore
, "memcpy-split");
53 BasicBlock::Create(Ctx
, "load-store-loop", ParentFunc
, PostLoopBB
);
54 PreLoopBB
->getTerminator()->setSuccessor(0, LoopBB
);
56 IRBuilder
<> PLBuilder(PreLoopBB
->getTerminator());
58 // Cast the Src and Dst pointers to pointers to the loop operand type (if
60 PointerType
*SrcOpType
= PointerType::get(LoopOpType
, SrcAS
);
61 PointerType
*DstOpType
= PointerType::get(LoopOpType
, DstAS
);
62 if (SrcAddr
->getType() != SrcOpType
) {
63 SrcAddr
= PLBuilder
.CreateBitCast(SrcAddr
, SrcOpType
);
65 if (DstAddr
->getType() != DstOpType
) {
66 DstAddr
= PLBuilder
.CreateBitCast(DstAddr
, DstOpType
);
69 IRBuilder
<> LoopBuilder(LoopBB
);
70 PHINode
*LoopIndex
= LoopBuilder
.CreatePHI(TypeOfCopyLen
, 2, "loop-index");
71 LoopIndex
->addIncoming(ConstantInt::get(TypeOfCopyLen
, 0U), PreLoopBB
);
74 LoopBuilder
.CreateInBoundsGEP(LoopOpType
, SrcAddr
, LoopIndex
);
75 Value
*Load
= LoopBuilder
.CreateLoad(LoopOpType
, SrcGEP
, SrcIsVolatile
);
77 LoopBuilder
.CreateInBoundsGEP(LoopOpType
, DstAddr
, LoopIndex
);
78 LoopBuilder
.CreateStore(Load
, DstGEP
, DstIsVolatile
);
81 LoopBuilder
.CreateAdd(LoopIndex
, ConstantInt::get(TypeOfCopyLen
, 1U));
82 LoopIndex
->addIncoming(NewIndex
, LoopBB
);
84 // Create the loop branch condition.
85 Constant
*LoopEndCI
= ConstantInt::get(TypeOfCopyLen
, LoopEndCount
);
86 LoopBuilder
.CreateCondBr(LoopBuilder
.CreateICmpULT(NewIndex
, LoopEndCI
),
90 uint64_t BytesCopied
= LoopEndCount
* LoopOpSize
;
91 uint64_t RemainingBytes
= CopyLen
->getZExtValue() - BytesCopied
;
93 IRBuilder
<> RBuilder(PostLoopBB
? PostLoopBB
->getFirstNonPHI()
96 // Update the alignment based on the copy size used in the loop body.
97 SrcAlign
= std::min(SrcAlign
, LoopOpSize
);
98 DestAlign
= std::min(DestAlign
, LoopOpSize
);
100 SmallVector
<Type
*, 5> RemainingOps
;
101 TTI
.getMemcpyLoopResidualLoweringType(RemainingOps
, Ctx
, RemainingBytes
,
102 SrcAlign
, DestAlign
);
104 for (auto OpTy
: RemainingOps
) {
105 // Calaculate the new index
106 unsigned OperandSize
= getLoopOperandSizeInBytes(OpTy
);
107 uint64_t GepIndex
= BytesCopied
/ OperandSize
;
108 assert(GepIndex
* OperandSize
== BytesCopied
&&
109 "Division should have no Remainder!");
110 // Cast source to operand type and load
111 PointerType
*SrcPtrType
= PointerType::get(OpTy
, SrcAS
);
112 Value
*CastedSrc
= SrcAddr
->getType() == SrcPtrType
114 : RBuilder
.CreateBitCast(SrcAddr
, SrcPtrType
);
115 Value
*SrcGEP
= RBuilder
.CreateInBoundsGEP(
116 OpTy
, CastedSrc
, ConstantInt::get(TypeOfCopyLen
, GepIndex
));
117 Value
*Load
= RBuilder
.CreateLoad(OpTy
, SrcGEP
, SrcIsVolatile
);
119 // Cast destination to operand type and store.
120 PointerType
*DstPtrType
= PointerType::get(OpTy
, DstAS
);
121 Value
*CastedDst
= DstAddr
->getType() == DstPtrType
123 : RBuilder
.CreateBitCast(DstAddr
, DstPtrType
);
124 Value
*DstGEP
= RBuilder
.CreateInBoundsGEP(
125 OpTy
, CastedDst
, ConstantInt::get(TypeOfCopyLen
, GepIndex
));
126 RBuilder
.CreateStore(Load
, DstGEP
, DstIsVolatile
);
128 BytesCopied
+= OperandSize
;
131 assert(BytesCopied
== CopyLen
->getZExtValue() &&
132 "Bytes copied should match size in the call!");
135 void llvm::createMemCpyLoopUnknownSize(Instruction
*InsertBefore
,
136 Value
*SrcAddr
, Value
*DstAddr
,
137 Value
*CopyLen
, unsigned SrcAlign
,
138 unsigned DestAlign
, bool SrcIsVolatile
,
140 const TargetTransformInfo
&TTI
) {
141 BasicBlock
*PreLoopBB
= InsertBefore
->getParent();
142 BasicBlock
*PostLoopBB
=
143 PreLoopBB
->splitBasicBlock(InsertBefore
, "post-loop-memcpy-expansion");
145 Function
*ParentFunc
= PreLoopBB
->getParent();
146 LLVMContext
&Ctx
= PreLoopBB
->getContext();
149 TTI
.getMemcpyLoopLoweringType(Ctx
, CopyLen
, SrcAlign
, DestAlign
);
150 unsigned LoopOpSize
= getLoopOperandSizeInBytes(LoopOpType
);
152 IRBuilder
<> PLBuilder(PreLoopBB
->getTerminator());
154 unsigned SrcAS
= cast
<PointerType
>(SrcAddr
->getType())->getAddressSpace();
155 unsigned DstAS
= cast
<PointerType
>(DstAddr
->getType())->getAddressSpace();
156 PointerType
*SrcOpType
= PointerType::get(LoopOpType
, SrcAS
);
157 PointerType
*DstOpType
= PointerType::get(LoopOpType
, DstAS
);
158 if (SrcAddr
->getType() != SrcOpType
) {
159 SrcAddr
= PLBuilder
.CreateBitCast(SrcAddr
, SrcOpType
);
161 if (DstAddr
->getType() != DstOpType
) {
162 DstAddr
= PLBuilder
.CreateBitCast(DstAddr
, DstOpType
);
165 // Calculate the loop trip count, and remaining bytes to copy after the loop.
166 Type
*CopyLenType
= CopyLen
->getType();
167 IntegerType
*ILengthType
= dyn_cast
<IntegerType
>(CopyLenType
);
168 assert(ILengthType
&&
169 "expected size argument to memcpy to be an integer type!");
170 Type
*Int8Type
= Type::getInt8Ty(Ctx
);
171 bool LoopOpIsInt8
= LoopOpType
== Int8Type
;
172 ConstantInt
*CILoopOpSize
= ConstantInt::get(ILengthType
, LoopOpSize
);
173 Value
*RuntimeLoopCount
= LoopOpIsInt8
?
175 PLBuilder
.CreateUDiv(CopyLen
, CILoopOpSize
);
177 BasicBlock::Create(Ctx
, "loop-memcpy-expansion", ParentFunc
, PostLoopBB
);
178 IRBuilder
<> LoopBuilder(LoopBB
);
180 PHINode
*LoopIndex
= LoopBuilder
.CreatePHI(CopyLenType
, 2, "loop-index");
181 LoopIndex
->addIncoming(ConstantInt::get(CopyLenType
, 0U), PreLoopBB
);
183 Value
*SrcGEP
= LoopBuilder
.CreateInBoundsGEP(LoopOpType
, SrcAddr
, LoopIndex
);
184 Value
*Load
= LoopBuilder
.CreateLoad(LoopOpType
, SrcGEP
, SrcIsVolatile
);
185 Value
*DstGEP
= LoopBuilder
.CreateInBoundsGEP(LoopOpType
, DstAddr
, LoopIndex
);
186 LoopBuilder
.CreateStore(Load
, DstGEP
, DstIsVolatile
);
189 LoopBuilder
.CreateAdd(LoopIndex
, ConstantInt::get(CopyLenType
, 1U));
190 LoopIndex
->addIncoming(NewIndex
, LoopBB
);
194 Value
*RuntimeResidual
= PLBuilder
.CreateURem(CopyLen
, CILoopOpSize
);
195 Value
*RuntimeBytesCopied
= PLBuilder
.CreateSub(CopyLen
, RuntimeResidual
);
197 // Loop body for the residual copy.
198 BasicBlock
*ResLoopBB
= BasicBlock::Create(Ctx
, "loop-memcpy-residual",
199 PreLoopBB
->getParent(),
201 // Residual loop header.
202 BasicBlock
*ResHeaderBB
= BasicBlock::Create(
203 Ctx
, "loop-memcpy-residual-header", PreLoopBB
->getParent(), nullptr);
205 // Need to update the pre-loop basic block to branch to the correct place.
206 // branch to the main loop if the count is non-zero, branch to the residual
207 // loop if the copy size is smaller then 1 iteration of the main loop but
208 // non-zero and finally branch to after the residual loop if the memcpy
210 ConstantInt
*Zero
= ConstantInt::get(ILengthType
, 0U);
211 PLBuilder
.CreateCondBr(PLBuilder
.CreateICmpNE(RuntimeLoopCount
, Zero
),
212 LoopBB
, ResHeaderBB
);
213 PreLoopBB
->getTerminator()->eraseFromParent();
215 LoopBuilder
.CreateCondBr(
216 LoopBuilder
.CreateICmpULT(NewIndex
, RuntimeLoopCount
), LoopBB
,
219 // Determine if we need to branch to the residual loop or bypass it.
220 IRBuilder
<> RHBuilder(ResHeaderBB
);
221 RHBuilder
.CreateCondBr(RHBuilder
.CreateICmpNE(RuntimeResidual
, Zero
),
222 ResLoopBB
, PostLoopBB
);
224 // Copy the residual with single byte load/store loop.
225 IRBuilder
<> ResBuilder(ResLoopBB
);
226 PHINode
*ResidualIndex
=
227 ResBuilder
.CreatePHI(CopyLenType
, 2, "residual-loop-index");
228 ResidualIndex
->addIncoming(Zero
, ResHeaderBB
);
231 ResBuilder
.CreateBitCast(SrcAddr
, PointerType::get(Int8Type
, SrcAS
));
233 ResBuilder
.CreateBitCast(DstAddr
, PointerType::get(Int8Type
, DstAS
));
234 Value
*FullOffset
= ResBuilder
.CreateAdd(RuntimeBytesCopied
, ResidualIndex
);
236 ResBuilder
.CreateInBoundsGEP(Int8Type
, SrcAsInt8
, FullOffset
);
237 Value
*Load
= ResBuilder
.CreateLoad(Int8Type
, SrcGEP
, SrcIsVolatile
);
239 ResBuilder
.CreateInBoundsGEP(Int8Type
, DstAsInt8
, FullOffset
);
240 ResBuilder
.CreateStore(Load
, DstGEP
, DstIsVolatile
);
243 ResBuilder
.CreateAdd(ResidualIndex
, ConstantInt::get(CopyLenType
, 1U));
244 ResidualIndex
->addIncoming(ResNewIndex
, ResLoopBB
);
246 // Create the loop branch condition.
247 ResBuilder
.CreateCondBr(
248 ResBuilder
.CreateICmpULT(ResNewIndex
, RuntimeResidual
), ResLoopBB
,
251 // In this case the loop operand type was a byte, and there is no need for a
252 // residual loop to copy the remaining memory after the main loop.
253 // We do however need to patch up the control flow by creating the
254 // terminators for the preloop block and the memcpy loop.
255 ConstantInt
*Zero
= ConstantInt::get(ILengthType
, 0U);
256 PLBuilder
.CreateCondBr(PLBuilder
.CreateICmpNE(RuntimeLoopCount
, Zero
),
258 PreLoopBB
->getTerminator()->eraseFromParent();
259 LoopBuilder
.CreateCondBr(
260 LoopBuilder
.CreateICmpULT(NewIndex
, RuntimeLoopCount
), LoopBB
,
265 // Lower memmove to IR. memmove is required to correctly copy overlapping memory
266 // regions; therefore, it has to check the relative positions of the source and
267 // destination pointers and choose the copy direction accordingly.
269 // The code below is an IR rendition of this C function:
271 // void* memmove(void* dst, const void* src, size_t n) {
272 // unsigned char* d = dst;
273 // const unsigned char* s = src;
281 // for (size_t i = 0; i < n; ++i) {
287 static void createMemMoveLoop(Instruction
*InsertBefore
,
288 Value
*SrcAddr
, Value
*DstAddr
, Value
*CopyLen
,
289 unsigned SrcAlign
, unsigned DestAlign
,
290 bool SrcIsVolatile
, bool DstIsVolatile
) {
291 Type
*TypeOfCopyLen
= CopyLen
->getType();
292 BasicBlock
*OrigBB
= InsertBefore
->getParent();
293 Function
*F
= OrigBB
->getParent();
295 Type
*EltTy
= cast
<PointerType
>(SrcAddr
->getType())->getElementType();
297 // Create the a comparison of src and dst, based on which we jump to either
298 // the forward-copy part of the function (if src >= dst) or the backwards-copy
299 // part (if src < dst).
300 // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else
301 // structure. Its block terminators (unconditional branches) are replaced by
302 // the appropriate conditional branches when the loop is built.
303 ICmpInst
*PtrCompare
= new ICmpInst(InsertBefore
, ICmpInst::ICMP_ULT
,
304 SrcAddr
, DstAddr
, "compare_src_dst");
305 Instruction
*ThenTerm
, *ElseTerm
;
306 SplitBlockAndInsertIfThenElse(PtrCompare
, InsertBefore
, &ThenTerm
,
309 // Each part of the function consists of two blocks:
310 // copy_backwards: used to skip the loop when n == 0
311 // copy_backwards_loop: the actual backwards loop BB
312 // copy_forward: used to skip the loop when n == 0
313 // copy_forward_loop: the actual forward loop BB
314 BasicBlock
*CopyBackwardsBB
= ThenTerm
->getParent();
315 CopyBackwardsBB
->setName("copy_backwards");
316 BasicBlock
*CopyForwardBB
= ElseTerm
->getParent();
317 CopyForwardBB
->setName("copy_forward");
318 BasicBlock
*ExitBB
= InsertBefore
->getParent();
319 ExitBB
->setName("memmove_done");
321 // Initial comparison of n == 0 that lets us skip the loops altogether. Shared
322 // between both backwards and forward copy clauses.
324 new ICmpInst(OrigBB
->getTerminator(), ICmpInst::ICMP_EQ
, CopyLen
,
325 ConstantInt::get(TypeOfCopyLen
, 0), "compare_n_to_0");
327 // Copying backwards.
329 BasicBlock::Create(F
->getContext(), "copy_backwards_loop", F
, CopyForwardBB
);
330 IRBuilder
<> LoopBuilder(LoopBB
);
331 PHINode
*LoopPhi
= LoopBuilder
.CreatePHI(TypeOfCopyLen
, 0);
332 Value
*IndexPtr
= LoopBuilder
.CreateSub(
333 LoopPhi
, ConstantInt::get(TypeOfCopyLen
, 1), "index_ptr");
334 Value
*Element
= LoopBuilder
.CreateLoad(
335 EltTy
, LoopBuilder
.CreateInBoundsGEP(EltTy
, SrcAddr
, IndexPtr
),
337 LoopBuilder
.CreateStore(
338 Element
, LoopBuilder
.CreateInBoundsGEP(EltTy
, DstAddr
, IndexPtr
));
339 LoopBuilder
.CreateCondBr(
340 LoopBuilder
.CreateICmpEQ(IndexPtr
, ConstantInt::get(TypeOfCopyLen
, 0)),
342 LoopPhi
->addIncoming(IndexPtr
, LoopBB
);
343 LoopPhi
->addIncoming(CopyLen
, CopyBackwardsBB
);
344 BranchInst::Create(ExitBB
, LoopBB
, CompareN
, ThenTerm
);
345 ThenTerm
->eraseFromParent();
348 BasicBlock
*FwdLoopBB
=
349 BasicBlock::Create(F
->getContext(), "copy_forward_loop", F
, ExitBB
);
350 IRBuilder
<> FwdLoopBuilder(FwdLoopBB
);
351 PHINode
*FwdCopyPhi
= FwdLoopBuilder
.CreatePHI(TypeOfCopyLen
, 0, "index_ptr");
352 Value
*FwdElement
= FwdLoopBuilder
.CreateLoad(
353 EltTy
, FwdLoopBuilder
.CreateInBoundsGEP(EltTy
, SrcAddr
, FwdCopyPhi
),
355 FwdLoopBuilder
.CreateStore(
356 FwdElement
, FwdLoopBuilder
.CreateInBoundsGEP(EltTy
, DstAddr
, FwdCopyPhi
));
357 Value
*FwdIndexPtr
= FwdLoopBuilder
.CreateAdd(
358 FwdCopyPhi
, ConstantInt::get(TypeOfCopyLen
, 1), "index_increment");
359 FwdLoopBuilder
.CreateCondBr(FwdLoopBuilder
.CreateICmpEQ(FwdIndexPtr
, CopyLen
),
361 FwdCopyPhi
->addIncoming(FwdIndexPtr
, FwdLoopBB
);
362 FwdCopyPhi
->addIncoming(ConstantInt::get(TypeOfCopyLen
, 0), CopyForwardBB
);
364 BranchInst::Create(ExitBB
, FwdLoopBB
, CompareN
, ElseTerm
);
365 ElseTerm
->eraseFromParent();
368 static void createMemSetLoop(Instruction
*InsertBefore
,
369 Value
*DstAddr
, Value
*CopyLen
, Value
*SetValue
,
370 unsigned Align
, bool IsVolatile
) {
371 Type
*TypeOfCopyLen
= CopyLen
->getType();
372 BasicBlock
*OrigBB
= InsertBefore
->getParent();
373 Function
*F
= OrigBB
->getParent();
375 OrigBB
->splitBasicBlock(InsertBefore
, "split");
377 = BasicBlock::Create(F
->getContext(), "loadstoreloop", F
, NewBB
);
379 IRBuilder
<> Builder(OrigBB
->getTerminator());
381 // Cast pointer to the type of value getting stored
382 unsigned dstAS
= cast
<PointerType
>(DstAddr
->getType())->getAddressSpace();
383 DstAddr
= Builder
.CreateBitCast(DstAddr
,
384 PointerType::get(SetValue
->getType(), dstAS
));
386 Builder
.CreateCondBr(
387 Builder
.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen
, 0), CopyLen
), NewBB
,
389 OrigBB
->getTerminator()->eraseFromParent();
391 IRBuilder
<> LoopBuilder(LoopBB
);
392 PHINode
*LoopIndex
= LoopBuilder
.CreatePHI(TypeOfCopyLen
, 0);
393 LoopIndex
->addIncoming(ConstantInt::get(TypeOfCopyLen
, 0), OrigBB
);
395 LoopBuilder
.CreateStore(
397 LoopBuilder
.CreateInBoundsGEP(SetValue
->getType(), DstAddr
, LoopIndex
),
401 LoopBuilder
.CreateAdd(LoopIndex
, ConstantInt::get(TypeOfCopyLen
, 1));
402 LoopIndex
->addIncoming(NewIndex
, LoopBB
);
404 LoopBuilder
.CreateCondBr(LoopBuilder
.CreateICmpULT(NewIndex
, CopyLen
), LoopBB
,
408 void llvm::expandMemCpyAsLoop(MemCpyInst
*Memcpy
,
409 const TargetTransformInfo
&TTI
) {
410 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Memcpy
->getLength())) {
411 createMemCpyLoopKnownSize(/* InsertBefore */ Memcpy
,
412 /* SrcAddr */ Memcpy
->getRawSource(),
413 /* DstAddr */ Memcpy
->getRawDest(),
415 /* SrcAlign */ Memcpy
->getSourceAlignment(),
416 /* DestAlign */ Memcpy
->getDestAlignment(),
417 /* SrcIsVolatile */ Memcpy
->isVolatile(),
418 /* DstIsVolatile */ Memcpy
->isVolatile(),
419 /* TargetTransformInfo */ TTI
);
421 createMemCpyLoopUnknownSize(/* InsertBefore */ Memcpy
,
422 /* SrcAddr */ Memcpy
->getRawSource(),
423 /* DstAddr */ Memcpy
->getRawDest(),
424 /* CopyLen */ Memcpy
->getLength(),
425 /* SrcAlign */ Memcpy
->getSourceAlignment(),
426 /* DestAlign */ Memcpy
->getDestAlignment(),
427 /* SrcIsVolatile */ Memcpy
->isVolatile(),
428 /* DstIsVolatile */ Memcpy
->isVolatile(),
429 /* TargetTransfomrInfo */ TTI
);
433 void llvm::expandMemMoveAsLoop(MemMoveInst
*Memmove
) {
434 createMemMoveLoop(/* InsertBefore */ Memmove
,
435 /* SrcAddr */ Memmove
->getRawSource(),
436 /* DstAddr */ Memmove
->getRawDest(),
437 /* CopyLen */ Memmove
->getLength(),
438 /* SrcAlign */ Memmove
->getSourceAlignment(),
439 /* DestAlign */ Memmove
->getDestAlignment(),
440 /* SrcIsVolatile */ Memmove
->isVolatile(),
441 /* DstIsVolatile */ Memmove
->isVolatile());
444 void llvm::expandMemSetAsLoop(MemSetInst
*Memset
) {
445 createMemSetLoop(/* InsertBefore */ Memset
,
446 /* DstAddr */ Memset
->getRawDest(),
447 /* CopyLen */ Memset
->getLength(),
448 /* SetValue */ Memset
->getValue(),
449 /* Alignment */ Memset
->getDestAlignment(),
450 Memset
->isVolatile());