[RISCV] Fix mgather -> riscv.masked.strided.load combine not extending indices (...
[llvm-project.git] / llvm / lib / Transforms / Utils / LowerMemIntrinsics.cpp
blobc75de8687879d722cbe0363d81d61fd20e7dc78d
1 //===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
10 #include "llvm/Analysis/ScalarEvolution.h"
11 #include "llvm/Analysis/TargetTransformInfo.h"
12 #include "llvm/IR/IRBuilder.h"
13 #include "llvm/IR/IntrinsicInst.h"
14 #include "llvm/IR/MDBuilder.h"
15 #include "llvm/Support/Debug.h"
16 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
17 #include <optional>
19 #define DEBUG_TYPE "lower-mem-intrinsics"
21 using namespace llvm;
23 void llvm::createMemCpyLoopKnownSize(
24 Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr,
25 ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile,
26 bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI,
27 std::optional<uint32_t> AtomicElementSize) {
28 // No need to expand zero length copies.
29 if (CopyLen->isZero())
30 return;
32 BasicBlock *PreLoopBB = InsertBefore->getParent();
33 BasicBlock *PostLoopBB = nullptr;
34 Function *ParentFunc = PreLoopBB->getParent();
35 LLVMContext &Ctx = PreLoopBB->getContext();
36 const DataLayout &DL = ParentFunc->getParent()->getDataLayout();
37 MDBuilder MDB(Ctx);
38 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain");
39 StringRef Name = "MemCopyAliasScope";
40 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
42 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
43 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
45 Type *TypeOfCopyLen = CopyLen->getType();
46 Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
47 Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(),
48 AtomicElementSize);
49 assert((!AtomicElementSize || !LoopOpType->isVectorTy()) &&
50 "Atomic memcpy lowering is not supported for vector operand type");
52 unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
53 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
54 "Atomic memcpy lowering is not supported for selected operand size");
56 uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize;
58 if (LoopEndCount != 0) {
59 // Split
60 PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "memcpy-split");
61 BasicBlock *LoopBB =
62 BasicBlock::Create(Ctx, "load-store-loop", ParentFunc, PostLoopBB);
63 PreLoopBB->getTerminator()->setSuccessor(0, LoopBB);
65 IRBuilder<> PLBuilder(PreLoopBB->getTerminator());
67 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize));
68 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize));
70 IRBuilder<> LoopBuilder(LoopBB);
71 PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 2, "loop-index");
72 LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0U), PreLoopBB);
73 // Loop Body
74 Value *SrcGEP =
75 LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
76 LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP,
77 PartSrcAlign, SrcIsVolatile);
78 if (!CanOverlap) {
79 // Set alias scope for loads.
80 Load->setMetadata(LLVMContext::MD_alias_scope,
81 MDNode::get(Ctx, NewScope));
83 Value *DstGEP =
84 LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
85 StoreInst *Store = LoopBuilder.CreateAlignedStore(
86 Load, DstGEP, PartDstAlign, DstIsVolatile);
87 if (!CanOverlap) {
88 // Indicate that stores don't overlap loads.
89 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
91 if (AtomicElementSize) {
92 Load->setAtomic(AtomicOrdering::Unordered);
93 Store->setAtomic(AtomicOrdering::Unordered);
95 Value *NewIndex =
96 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U));
97 LoopIndex->addIncoming(NewIndex, LoopBB);
99 // Create the loop branch condition.
100 Constant *LoopEndCI = ConstantInt::get(TypeOfCopyLen, LoopEndCount);
101 LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopEndCI),
102 LoopBB, PostLoopBB);
105 uint64_t BytesCopied = LoopEndCount * LoopOpSize;
106 uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied;
107 if (RemainingBytes) {
108 IRBuilder<> RBuilder(PostLoopBB ? PostLoopBB->getFirstNonPHI()
109 : InsertBefore);
111 SmallVector<Type *, 5> RemainingOps;
112 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
113 SrcAS, DstAS, SrcAlign.value(),
114 DstAlign.value(), AtomicElementSize);
116 for (auto *OpTy : RemainingOps) {
117 Align PartSrcAlign(commonAlignment(SrcAlign, BytesCopied));
118 Align PartDstAlign(commonAlignment(DstAlign, BytesCopied));
120 // Calculate the new index
121 unsigned OperandSize = DL.getTypeStoreSize(OpTy);
122 assert(
123 (!AtomicElementSize || OperandSize % *AtomicElementSize == 0) &&
124 "Atomic memcpy lowering is not supported for selected operand size");
126 uint64_t GepIndex = BytesCopied / OperandSize;
127 assert(GepIndex * OperandSize == BytesCopied &&
128 "Division should have no Remainder!");
130 Value *SrcGEP = RBuilder.CreateInBoundsGEP(
131 OpTy, SrcAddr, ConstantInt::get(TypeOfCopyLen, GepIndex));
132 LoadInst *Load =
133 RBuilder.CreateAlignedLoad(OpTy, SrcGEP, PartSrcAlign, SrcIsVolatile);
134 if (!CanOverlap) {
135 // Set alias scope for loads.
136 Load->setMetadata(LLVMContext::MD_alias_scope,
137 MDNode::get(Ctx, NewScope));
139 Value *DstGEP = RBuilder.CreateInBoundsGEP(
140 OpTy, DstAddr, ConstantInt::get(TypeOfCopyLen, GepIndex));
141 StoreInst *Store = RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign,
142 DstIsVolatile);
143 if (!CanOverlap) {
144 // Indicate that stores don't overlap loads.
145 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
147 if (AtomicElementSize) {
148 Load->setAtomic(AtomicOrdering::Unordered);
149 Store->setAtomic(AtomicOrdering::Unordered);
151 BytesCopied += OperandSize;
154 assert(BytesCopied == CopyLen->getZExtValue() &&
155 "Bytes copied should match size in the call!");
158 void llvm::createMemCpyLoopUnknownSize(
159 Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen,
160 Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile,
161 bool CanOverlap, const TargetTransformInfo &TTI,
162 std::optional<uint32_t> AtomicElementSize) {
163 BasicBlock *PreLoopBB = InsertBefore->getParent();
164 BasicBlock *PostLoopBB =
165 PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion");
167 Function *ParentFunc = PreLoopBB->getParent();
168 const DataLayout &DL = ParentFunc->getParent()->getDataLayout();
169 LLVMContext &Ctx = PreLoopBB->getContext();
170 MDBuilder MDB(Ctx);
171 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain");
172 StringRef Name = "MemCopyAliasScope";
173 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
175 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
176 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
178 Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
179 Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(),
180 AtomicElementSize);
181 assert((!AtomicElementSize || !LoopOpType->isVectorTy()) &&
182 "Atomic memcpy lowering is not supported for vector operand type");
183 unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
184 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
185 "Atomic memcpy lowering is not supported for selected operand size");
187 IRBuilder<> PLBuilder(PreLoopBB->getTerminator());
189 // Calculate the loop trip count, and remaining bytes to copy after the loop.
190 Type *CopyLenType = CopyLen->getType();
191 IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType);
192 assert(ILengthType &&
193 "expected size argument to memcpy to be an integer type!");
194 Type *Int8Type = Type::getInt8Ty(Ctx);
195 bool LoopOpIsInt8 = LoopOpType == Int8Type;
196 ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize);
197 Value *RuntimeLoopCount = LoopOpIsInt8 ?
198 CopyLen :
199 PLBuilder.CreateUDiv(CopyLen, CILoopOpSize);
200 BasicBlock *LoopBB =
201 BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, PostLoopBB);
202 IRBuilder<> LoopBuilder(LoopBB);
204 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize));
205 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize));
207 PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index");
208 LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB);
210 Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
211 LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP,
212 PartSrcAlign, SrcIsVolatile);
213 if (!CanOverlap) {
214 // Set alias scope for loads.
215 Load->setMetadata(LLVMContext::MD_alias_scope, MDNode::get(Ctx, NewScope));
217 Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
218 StoreInst *Store =
219 LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
220 if (!CanOverlap) {
221 // Indicate that stores don't overlap loads.
222 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
224 if (AtomicElementSize) {
225 Load->setAtomic(AtomicOrdering::Unordered);
226 Store->setAtomic(AtomicOrdering::Unordered);
228 Value *NewIndex =
229 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U));
230 LoopIndex->addIncoming(NewIndex, LoopBB);
232 bool requiresResidual =
233 !LoopOpIsInt8 && !(AtomicElementSize && LoopOpSize == AtomicElementSize);
234 if (requiresResidual) {
235 Type *ResLoopOpType = AtomicElementSize
236 ? Type::getIntNTy(Ctx, *AtomicElementSize * 8)
237 : Int8Type;
238 unsigned ResLoopOpSize = DL.getTypeStoreSize(ResLoopOpType);
239 assert((ResLoopOpSize == AtomicElementSize ? *AtomicElementSize : 1) &&
240 "Store size is expected to match type size");
242 // Add in the
243 Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize);
244 Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual);
246 // Loop body for the residual copy.
247 BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual",
248 PreLoopBB->getParent(),
249 PostLoopBB);
250 // Residual loop header.
251 BasicBlock *ResHeaderBB = BasicBlock::Create(
252 Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr);
254 // Need to update the pre-loop basic block to branch to the correct place.
255 // branch to the main loop if the count is non-zero, branch to the residual
256 // loop if the copy size is smaller then 1 iteration of the main loop but
257 // non-zero and finally branch to after the residual loop if the memcpy
258 // size is zero.
259 ConstantInt *Zero = ConstantInt::get(ILengthType, 0U);
260 PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero),
261 LoopBB, ResHeaderBB);
262 PreLoopBB->getTerminator()->eraseFromParent();
264 LoopBuilder.CreateCondBr(
265 LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB,
266 ResHeaderBB);
268 // Determine if we need to branch to the residual loop or bypass it.
269 IRBuilder<> RHBuilder(ResHeaderBB);
270 RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidual, Zero),
271 ResLoopBB, PostLoopBB);
273 // Copy the residual with single byte load/store loop.
274 IRBuilder<> ResBuilder(ResLoopBB);
275 PHINode *ResidualIndex =
276 ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index");
277 ResidualIndex->addIncoming(Zero, ResHeaderBB);
279 Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex);
280 Value *SrcGEP =
281 ResBuilder.CreateInBoundsGEP(ResLoopOpType, SrcAddr, FullOffset);
282 LoadInst *Load = ResBuilder.CreateAlignedLoad(ResLoopOpType, SrcGEP,
283 PartSrcAlign, SrcIsVolatile);
284 if (!CanOverlap) {
285 // Set alias scope for loads.
286 Load->setMetadata(LLVMContext::MD_alias_scope,
287 MDNode::get(Ctx, NewScope));
289 Value *DstGEP =
290 ResBuilder.CreateInBoundsGEP(ResLoopOpType, DstAddr, FullOffset);
291 StoreInst *Store = ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign,
292 DstIsVolatile);
293 if (!CanOverlap) {
294 // Indicate that stores don't overlap loads.
295 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
297 if (AtomicElementSize) {
298 Load->setAtomic(AtomicOrdering::Unordered);
299 Store->setAtomic(AtomicOrdering::Unordered);
301 Value *ResNewIndex = ResBuilder.CreateAdd(
302 ResidualIndex, ConstantInt::get(CopyLenType, ResLoopOpSize));
303 ResidualIndex->addIncoming(ResNewIndex, ResLoopBB);
305 // Create the loop branch condition.
306 ResBuilder.CreateCondBr(
307 ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB,
308 PostLoopBB);
309 } else {
310 // In this case the loop operand type was a byte, and there is no need for a
311 // residual loop to copy the remaining memory after the main loop.
312 // We do however need to patch up the control flow by creating the
313 // terminators for the preloop block and the memcpy loop.
314 ConstantInt *Zero = ConstantInt::get(ILengthType, 0U);
315 PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero),
316 LoopBB, PostLoopBB);
317 PreLoopBB->getTerminator()->eraseFromParent();
318 LoopBuilder.CreateCondBr(
319 LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB,
320 PostLoopBB);
324 // Lower memmove to IR. memmove is required to correctly copy overlapping memory
325 // regions; therefore, it has to check the relative positions of the source and
326 // destination pointers and choose the copy direction accordingly.
328 // The code below is an IR rendition of this C function:
330 // void* memmove(void* dst, const void* src, size_t n) {
331 // unsigned char* d = dst;
332 // const unsigned char* s = src;
333 // if (s < d) {
334 // // copy backwards
335 // while (n--) {
336 // d[n] = s[n];
337 // }
338 // } else {
339 // // copy forward
340 // for (size_t i = 0; i < n; ++i) {
341 // d[i] = s[i];
342 // }
343 // }
344 // return dst;
345 // }
346 static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr,
347 Value *DstAddr, Value *CopyLen, Align SrcAlign,
348 Align DstAlign, bool SrcIsVolatile,
349 bool DstIsVolatile,
350 const TargetTransformInfo &TTI) {
351 Type *TypeOfCopyLen = CopyLen->getType();
352 BasicBlock *OrigBB = InsertBefore->getParent();
353 Function *F = OrigBB->getParent();
354 const DataLayout &DL = F->getParent()->getDataLayout();
355 // TODO: Use different element type if possible?
356 Type *EltTy = Type::getInt8Ty(F->getContext());
358 // Create the a comparison of src and dst, based on which we jump to either
359 // the forward-copy part of the function (if src >= dst) or the backwards-copy
360 // part (if src < dst).
361 // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else
362 // structure. Its block terminators (unconditional branches) are replaced by
363 // the appropriate conditional branches when the loop is built.
364 ICmpInst *PtrCompare = new ICmpInst(InsertBefore, ICmpInst::ICMP_ULT,
365 SrcAddr, DstAddr, "compare_src_dst");
366 Instruction *ThenTerm, *ElseTerm;
367 SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore, &ThenTerm,
368 &ElseTerm);
370 // Each part of the function consists of two blocks:
371 // copy_backwards: used to skip the loop when n == 0
372 // copy_backwards_loop: the actual backwards loop BB
373 // copy_forward: used to skip the loop when n == 0
374 // copy_forward_loop: the actual forward loop BB
375 BasicBlock *CopyBackwardsBB = ThenTerm->getParent();
376 CopyBackwardsBB->setName("copy_backwards");
377 BasicBlock *CopyForwardBB = ElseTerm->getParent();
378 CopyForwardBB->setName("copy_forward");
379 BasicBlock *ExitBB = InsertBefore->getParent();
380 ExitBB->setName("memmove_done");
382 unsigned PartSize = DL.getTypeStoreSize(EltTy);
383 Align PartSrcAlign(commonAlignment(SrcAlign, PartSize));
384 Align PartDstAlign(commonAlignment(DstAlign, PartSize));
386 // Initial comparison of n == 0 that lets us skip the loops altogether. Shared
387 // between both backwards and forward copy clauses.
388 ICmpInst *CompareN =
389 new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen,
390 ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0");
392 // Copying backwards.
393 BasicBlock *LoopBB =
394 BasicBlock::Create(F->getContext(), "copy_backwards_loop", F, CopyForwardBB);
395 IRBuilder<> LoopBuilder(LoopBB);
397 PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
398 Value *IndexPtr = LoopBuilder.CreateSub(
399 LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr");
400 Value *Element = LoopBuilder.CreateAlignedLoad(
401 EltTy, LoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, IndexPtr),
402 PartSrcAlign, "element");
403 LoopBuilder.CreateAlignedStore(
404 Element, LoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, IndexPtr),
405 PartDstAlign);
406 LoopBuilder.CreateCondBr(
407 LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)),
408 ExitBB, LoopBB);
409 LoopPhi->addIncoming(IndexPtr, LoopBB);
410 LoopPhi->addIncoming(CopyLen, CopyBackwardsBB);
411 BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm);
412 ThenTerm->eraseFromParent();
414 // Copying forward.
415 BasicBlock *FwdLoopBB =
416 BasicBlock::Create(F->getContext(), "copy_forward_loop", F, ExitBB);
417 IRBuilder<> FwdLoopBuilder(FwdLoopBB);
418 PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr");
419 Value *SrcGEP = FwdLoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, FwdCopyPhi);
420 Value *FwdElement =
421 FwdLoopBuilder.CreateAlignedLoad(EltTy, SrcGEP, PartSrcAlign, "element");
422 Value *DstGEP = FwdLoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, FwdCopyPhi);
423 FwdLoopBuilder.CreateAlignedStore(FwdElement, DstGEP, PartDstAlign);
424 Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd(
425 FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment");
426 FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen),
427 ExitBB, FwdLoopBB);
428 FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB);
429 FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB);
431 BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm);
432 ElseTerm->eraseFromParent();
435 static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr,
436 Value *CopyLen, Value *SetValue, Align DstAlign,
437 bool IsVolatile) {
438 Type *TypeOfCopyLen = CopyLen->getType();
439 BasicBlock *OrigBB = InsertBefore->getParent();
440 Function *F = OrigBB->getParent();
441 const DataLayout &DL = F->getParent()->getDataLayout();
442 BasicBlock *NewBB =
443 OrigBB->splitBasicBlock(InsertBefore, "split");
444 BasicBlock *LoopBB
445 = BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB);
447 IRBuilder<> Builder(OrigBB->getTerminator());
449 Builder.CreateCondBr(
450 Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB,
451 LoopBB);
452 OrigBB->getTerminator()->eraseFromParent();
454 unsigned PartSize = DL.getTypeStoreSize(SetValue->getType());
455 Align PartAlign(commonAlignment(DstAlign, PartSize));
457 IRBuilder<> LoopBuilder(LoopBB);
458 PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
459 LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB);
461 LoopBuilder.CreateAlignedStore(
462 SetValue,
463 LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex),
464 PartAlign, IsVolatile);
466 Value *NewIndex =
467 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1));
468 LoopIndex->addIncoming(NewIndex, LoopBB);
470 LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
471 NewBB);
474 template <typename T>
475 static bool canOverlap(MemTransferBase<T> *Memcpy, ScalarEvolution *SE) {
476 if (SE) {
477 auto *SrcSCEV = SE->getSCEV(Memcpy->getRawSource());
478 auto *DestSCEV = SE->getSCEV(Memcpy->getRawDest());
479 if (SE->isKnownPredicateAt(CmpInst::ICMP_NE, SrcSCEV, DestSCEV, Memcpy))
480 return false;
482 return true;
485 void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
486 const TargetTransformInfo &TTI,
487 ScalarEvolution *SE) {
488 bool CanOverlap = canOverlap(Memcpy, SE);
489 if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) {
490 createMemCpyLoopKnownSize(
491 /* InsertBefore */ Memcpy,
492 /* SrcAddr */ Memcpy->getRawSource(),
493 /* DstAddr */ Memcpy->getRawDest(),
494 /* CopyLen */ CI,
495 /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(),
496 /* DestAlign */ Memcpy->getDestAlign().valueOrOne(),
497 /* SrcIsVolatile */ Memcpy->isVolatile(),
498 /* DstIsVolatile */ Memcpy->isVolatile(),
499 /* CanOverlap */ CanOverlap,
500 /* TargetTransformInfo */ TTI);
501 } else {
502 createMemCpyLoopUnknownSize(
503 /* InsertBefore */ Memcpy,
504 /* SrcAddr */ Memcpy->getRawSource(),
505 /* DstAddr */ Memcpy->getRawDest(),
506 /* CopyLen */ Memcpy->getLength(),
507 /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(),
508 /* DestAlign */ Memcpy->getDestAlign().valueOrOne(),
509 /* SrcIsVolatile */ Memcpy->isVolatile(),
510 /* DstIsVolatile */ Memcpy->isVolatile(),
511 /* CanOverlap */ CanOverlap,
512 /* TargetTransformInfo */ TTI);
516 bool llvm::expandMemMoveAsLoop(MemMoveInst *Memmove,
517 const TargetTransformInfo &TTI) {
518 Value *CopyLen = Memmove->getLength();
519 Value *SrcAddr = Memmove->getRawSource();
520 Value *DstAddr = Memmove->getRawDest();
521 Align SrcAlign = Memmove->getSourceAlign().valueOrOne();
522 Align DstAlign = Memmove->getDestAlign().valueOrOne();
523 bool SrcIsVolatile = Memmove->isVolatile();
524 bool DstIsVolatile = SrcIsVolatile;
525 IRBuilder<> CastBuilder(Memmove);
527 unsigned SrcAS = SrcAddr->getType()->getPointerAddressSpace();
528 unsigned DstAS = DstAddr->getType()->getPointerAddressSpace();
529 if (SrcAS != DstAS) {
530 if (!TTI.addrspacesMayAlias(SrcAS, DstAS)) {
531 // We may not be able to emit a pointer comparison, but we don't have
532 // to. Expand as memcpy.
533 if (ConstantInt *CI = dyn_cast<ConstantInt>(CopyLen)) {
534 createMemCpyLoopKnownSize(/*InsertBefore=*/Memmove, SrcAddr, DstAddr,
535 CI, SrcAlign, DstAlign, SrcIsVolatile,
536 DstIsVolatile,
537 /*CanOverlap=*/false, TTI);
538 } else {
539 createMemCpyLoopUnknownSize(/*InsertBefore=*/Memmove, SrcAddr, DstAddr,
540 CopyLen, SrcAlign, DstAlign, SrcIsVolatile,
541 DstIsVolatile,
542 /*CanOverlap=*/false, TTI);
545 return true;
548 if (TTI.isValidAddrSpaceCast(DstAS, SrcAS))
549 DstAddr = CastBuilder.CreateAddrSpaceCast(DstAddr, SrcAddr->getType());
550 else if (TTI.isValidAddrSpaceCast(SrcAS, DstAS))
551 SrcAddr = CastBuilder.CreateAddrSpaceCast(SrcAddr, DstAddr->getType());
552 else {
553 // We don't know generically if it's legal to introduce an
554 // addrspacecast. We need to know either if it's legal to insert an
555 // addrspacecast, or if the address spaces cannot alias.
556 LLVM_DEBUG(
557 dbgs() << "Do not know how to expand memmove between different "
558 "address spaces\n");
559 return false;
563 createMemMoveLoop(
564 /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CopyLen, SrcAlign, DstAlign,
565 SrcIsVolatile, DstIsVolatile, TTI);
566 return true;
569 void llvm::expandMemSetAsLoop(MemSetInst *Memset) {
570 createMemSetLoop(/* InsertBefore */ Memset,
571 /* DstAddr */ Memset->getRawDest(),
572 /* CopyLen */ Memset->getLength(),
573 /* SetValue */ Memset->getValue(),
574 /* Alignment */ Memset->getDestAlign().valueOrOne(),
575 Memset->isVolatile());
578 void llvm::expandAtomicMemCpyAsLoop(AtomicMemCpyInst *AtomicMemcpy,
579 const TargetTransformInfo &TTI,
580 ScalarEvolution *SE) {
581 if (ConstantInt *CI = dyn_cast<ConstantInt>(AtomicMemcpy->getLength())) {
582 createMemCpyLoopKnownSize(
583 /* InsertBefore */ AtomicMemcpy,
584 /* SrcAddr */ AtomicMemcpy->getRawSource(),
585 /* DstAddr */ AtomicMemcpy->getRawDest(),
586 /* CopyLen */ CI,
587 /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(),
588 /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(),
589 /* SrcIsVolatile */ AtomicMemcpy->isVolatile(),
590 /* DstIsVolatile */ AtomicMemcpy->isVolatile(),
591 /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec.
592 /* TargetTransformInfo */ TTI,
593 /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes());
594 } else {
595 createMemCpyLoopUnknownSize(
596 /* InsertBefore */ AtomicMemcpy,
597 /* SrcAddr */ AtomicMemcpy->getRawSource(),
598 /* DstAddr */ AtomicMemcpy->getRawDest(),
599 /* CopyLen */ AtomicMemcpy->getLength(),
600 /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(),
601 /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(),
602 /* SrcIsVolatile */ AtomicMemcpy->isVolatile(),
603 /* DstIsVolatile */ AtomicMemcpy->isVolatile(),
604 /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec.
605 /* TargetTransformInfo */ TTI,
606 /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes());