1 //===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains a pass (at IR level) to replace atomic instructions with
10 // __atomic_* library calls, or target specific instruction which implement the
11 // same semantics in a way which better fits the target backend. This can
12 // include the use of (intrinsic-based) load-linked/store-conditional loops,
13 // AtomicCmpXchg, or type coercions.
15 //===----------------------------------------------------------------------===//
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/STLFunctionalExtras.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/Analysis/InstSimplifyFolder.h"
21 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
22 #include "llvm/CodeGen/AtomicExpand.h"
23 #include "llvm/CodeGen/AtomicExpandUtils.h"
24 #include "llvm/CodeGen/RuntimeLibcallUtil.h"
25 #include "llvm/CodeGen/TargetLowering.h"
26 #include "llvm/CodeGen/TargetPassConfig.h"
27 #include "llvm/CodeGen/TargetSubtargetInfo.h"
28 #include "llvm/CodeGen/ValueTypes.h"
29 #include "llvm/IR/Attributes.h"
30 #include "llvm/IR/BasicBlock.h"
31 #include "llvm/IR/Constant.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DataLayout.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/IR/IRBuilder.h"
37 #include "llvm/IR/InstIterator.h"
38 #include "llvm/IR/Instruction.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/MDBuilder.h"
41 #include "llvm/IR/MemoryModelRelaxationAnnotations.h"
42 #include "llvm/IR/Module.h"
43 #include "llvm/IR/Type.h"
44 #include "llvm/IR/User.h"
45 #include "llvm/IR/Value.h"
46 #include "llvm/InitializePasses.h"
47 #include "llvm/Pass.h"
48 #include "llvm/Support/AtomicOrdering.h"
49 #include "llvm/Support/Casting.h"
50 #include "llvm/Support/Debug.h"
51 #include "llvm/Support/ErrorHandling.h"
52 #include "llvm/Support/raw_ostream.h"
53 #include "llvm/Target/TargetMachine.h"
54 #include "llvm/Transforms/Utils/LowerAtomic.h"
61 #define DEBUG_TYPE "atomic-expand"
65 class AtomicExpandImpl
{
66 const TargetLowering
*TLI
= nullptr;
67 const DataLayout
*DL
= nullptr;
70 bool bracketInstWithFences(Instruction
*I
, AtomicOrdering Order
);
71 IntegerType
*getCorrespondingIntegerType(Type
*T
, const DataLayout
&DL
);
72 LoadInst
*convertAtomicLoadToIntegerType(LoadInst
*LI
);
73 bool tryExpandAtomicLoad(LoadInst
*LI
);
74 bool expandAtomicLoadToLL(LoadInst
*LI
);
75 bool expandAtomicLoadToCmpXchg(LoadInst
*LI
);
76 StoreInst
*convertAtomicStoreToIntegerType(StoreInst
*SI
);
77 bool tryExpandAtomicStore(StoreInst
*SI
);
78 void expandAtomicStore(StoreInst
*SI
);
79 bool tryExpandAtomicRMW(AtomicRMWInst
*AI
);
80 AtomicRMWInst
*convertAtomicXchgToIntegerType(AtomicRMWInst
*RMWI
);
82 insertRMWLLSCLoop(IRBuilderBase
&Builder
, Type
*ResultTy
, Value
*Addr
,
83 Align AddrAlign
, AtomicOrdering MemOpOrder
,
84 function_ref
<Value
*(IRBuilderBase
&, Value
*)> PerformOp
);
85 void expandAtomicOpToLLSC(
86 Instruction
*I
, Type
*ResultTy
, Value
*Addr
, Align AddrAlign
,
87 AtomicOrdering MemOpOrder
,
88 function_ref
<Value
*(IRBuilderBase
&, Value
*)> PerformOp
);
89 void expandPartwordAtomicRMW(
90 AtomicRMWInst
*I
, TargetLoweringBase::AtomicExpansionKind ExpansionKind
);
91 AtomicRMWInst
*widenPartwordAtomicRMW(AtomicRMWInst
*AI
);
92 bool expandPartwordCmpXchg(AtomicCmpXchgInst
*I
);
93 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst
*AI
);
94 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst
*CI
);
96 AtomicCmpXchgInst
*convertCmpXchgToIntegerType(AtomicCmpXchgInst
*CI
);
97 static Value
*insertRMWCmpXchgLoop(
98 IRBuilderBase
&Builder
, Type
*ResultType
, Value
*Addr
, Align AddrAlign
,
99 AtomicOrdering MemOpOrder
, SyncScope::ID SSID
,
100 function_ref
<Value
*(IRBuilderBase
&, Value
*)> PerformOp
,
101 CreateCmpXchgInstFun CreateCmpXchg
);
102 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst
*CI
);
104 bool expandAtomicCmpXchg(AtomicCmpXchgInst
*CI
);
105 bool isIdempotentRMW(AtomicRMWInst
*RMWI
);
106 bool simplifyIdempotentRMW(AtomicRMWInst
*RMWI
);
108 bool expandAtomicOpToLibcall(Instruction
*I
, unsigned Size
, Align Alignment
,
109 Value
*PointerOperand
, Value
*ValueOperand
,
110 Value
*CASExpected
, AtomicOrdering Ordering
,
111 AtomicOrdering Ordering2
,
112 ArrayRef
<RTLIB::Libcall
> Libcalls
);
113 void expandAtomicLoadToLibcall(LoadInst
*LI
);
114 void expandAtomicStoreToLibcall(StoreInst
*LI
);
115 void expandAtomicRMWToLibcall(AtomicRMWInst
*I
);
116 void expandAtomicCASToLibcall(AtomicCmpXchgInst
*I
);
119 llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst
*AI
,
120 CreateCmpXchgInstFun CreateCmpXchg
);
123 bool run(Function
&F
, const TargetMachine
*TM
);
126 class AtomicExpandLegacy
: public FunctionPass
{
128 static char ID
; // Pass identification, replacement for typeid
130 AtomicExpandLegacy() : FunctionPass(ID
) {
131 initializeAtomicExpandLegacyPass(*PassRegistry::getPassRegistry());
134 bool runOnFunction(Function
&F
) override
;
137 // IRBuilder to be used for replacement atomic instructions.
138 struct ReplacementIRBuilder
139 : IRBuilder
<InstSimplifyFolder
, IRBuilderCallbackInserter
> {
140 MDNode
*MMRAMD
= nullptr;
142 // Preserves the DebugLoc from I, and preserves still valid metadata.
143 // Enable StrictFP builder mode when appropriate.
144 explicit ReplacementIRBuilder(Instruction
*I
, const DataLayout
&DL
)
145 : IRBuilder(I
->getContext(), DL
,
146 IRBuilderCallbackInserter(
147 [this](Instruction
*I
) { addMMRAMD(I
); })) {
149 this->CollectMetadataToCopy(I
, {LLVMContext::MD_pcsections
});
150 if (BB
->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP
))
151 this->setIsFPConstrained(true);
153 MMRAMD
= I
->getMetadata(LLVMContext::MD_mmra
);
156 void addMMRAMD(Instruction
*I
) {
157 if (canInstructionHaveMMRAs(*I
))
158 I
->setMetadata(LLVMContext::MD_mmra
, MMRAMD
);
162 } // end anonymous namespace
164 char AtomicExpandLegacy::ID
= 0;
166 char &llvm::AtomicExpandID
= AtomicExpandLegacy::ID
;
168 INITIALIZE_PASS_BEGIN(AtomicExpandLegacy
, DEBUG_TYPE
,
169 "Expand Atomic instructions", false, false)
170 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig
)
171 INITIALIZE_PASS_END(AtomicExpandLegacy
, DEBUG_TYPE
,
172 "Expand Atomic instructions", false, false)
174 // Helper functions to retrieve the size of atomic instructions.
175 static unsigned getAtomicOpSize(LoadInst
*LI
) {
176 const DataLayout
&DL
= LI
->getDataLayout();
177 return DL
.getTypeStoreSize(LI
->getType());
180 static unsigned getAtomicOpSize(StoreInst
*SI
) {
181 const DataLayout
&DL
= SI
->getDataLayout();
182 return DL
.getTypeStoreSize(SI
->getValueOperand()->getType());
185 static unsigned getAtomicOpSize(AtomicRMWInst
*RMWI
) {
186 const DataLayout
&DL
= RMWI
->getDataLayout();
187 return DL
.getTypeStoreSize(RMWI
->getValOperand()->getType());
190 static unsigned getAtomicOpSize(AtomicCmpXchgInst
*CASI
) {
191 const DataLayout
&DL
= CASI
->getDataLayout();
192 return DL
.getTypeStoreSize(CASI
->getCompareOperand()->getType());
195 // Determine if a particular atomic operation has a supported size,
196 // and is of appropriate alignment, to be passed through for target
197 // lowering. (Versus turning into a __atomic libcall)
198 template <typename Inst
>
199 static bool atomicSizeSupported(const TargetLowering
*TLI
, Inst
*I
) {
200 unsigned Size
= getAtomicOpSize(I
);
201 Align Alignment
= I
->getAlign();
202 return Alignment
>= Size
&&
203 Size
<= TLI
->getMaxAtomicSizeInBitsSupported() / 8;
206 bool AtomicExpandImpl::run(Function
&F
, const TargetMachine
*TM
) {
207 const auto *Subtarget
= TM
->getSubtargetImpl(F
);
208 if (!Subtarget
->enableAtomicExpand())
210 TLI
= Subtarget
->getTargetLowering();
211 DL
= &F
.getDataLayout();
213 SmallVector
<Instruction
*, 1> AtomicInsts
;
215 // Changing control-flow while iterating through it is a bad idea, so gather a
216 // list of all atomic instructions before we start.
217 for (Instruction
&I
: instructions(F
))
218 if (I
.isAtomic() && !isa
<FenceInst
>(&I
))
219 AtomicInsts
.push_back(&I
);
221 bool MadeChange
= false;
222 for (auto *I
: AtomicInsts
) {
223 auto LI
= dyn_cast
<LoadInst
>(I
);
224 auto SI
= dyn_cast
<StoreInst
>(I
);
225 auto RMWI
= dyn_cast
<AtomicRMWInst
>(I
);
226 auto CASI
= dyn_cast
<AtomicCmpXchgInst
>(I
);
227 assert((LI
|| SI
|| RMWI
|| CASI
) && "Unknown atomic instruction");
229 // If the Size/Alignment is not supported, replace with a libcall.
231 if (!atomicSizeSupported(TLI
, LI
)) {
232 expandAtomicLoadToLibcall(LI
);
237 if (!atomicSizeSupported(TLI
, SI
)) {
238 expandAtomicStoreToLibcall(SI
);
243 if (!atomicSizeSupported(TLI
, RMWI
)) {
244 expandAtomicRMWToLibcall(RMWI
);
249 if (!atomicSizeSupported(TLI
, CASI
)) {
250 expandAtomicCASToLibcall(CASI
);
256 if (LI
&& TLI
->shouldCastAtomicLoadInIR(LI
) ==
257 TargetLoweringBase::AtomicExpansionKind::CastToInteger
) {
258 I
= LI
= convertAtomicLoadToIntegerType(LI
);
261 TLI
->shouldCastAtomicStoreInIR(SI
) ==
262 TargetLoweringBase::AtomicExpansionKind::CastToInteger
) {
263 I
= SI
= convertAtomicStoreToIntegerType(SI
);
266 TLI
->shouldCastAtomicRMWIInIR(RMWI
) ==
267 TargetLoweringBase::AtomicExpansionKind::CastToInteger
) {
268 I
= RMWI
= convertAtomicXchgToIntegerType(RMWI
);
271 // TODO: when we're ready to make the change at the IR level, we can
272 // extend convertCmpXchgToInteger for floating point too.
273 if (CASI
->getCompareOperand()->getType()->isPointerTy()) {
274 // TODO: add a TLI hook to control this so that each target can
275 // convert to lowering the original type one at a time.
276 I
= CASI
= convertCmpXchgToIntegerType(CASI
);
281 if (TLI
->shouldInsertFencesForAtomic(I
)) {
282 auto FenceOrdering
= AtomicOrdering::Monotonic
;
283 if (LI
&& isAcquireOrStronger(LI
->getOrdering())) {
284 FenceOrdering
= LI
->getOrdering();
285 LI
->setOrdering(AtomicOrdering::Monotonic
);
286 } else if (SI
&& isReleaseOrStronger(SI
->getOrdering())) {
287 FenceOrdering
= SI
->getOrdering();
288 SI
->setOrdering(AtomicOrdering::Monotonic
);
289 } else if (RMWI
&& (isReleaseOrStronger(RMWI
->getOrdering()) ||
290 isAcquireOrStronger(RMWI
->getOrdering()))) {
291 FenceOrdering
= RMWI
->getOrdering();
292 RMWI
->setOrdering(AtomicOrdering::Monotonic
);
294 TLI
->shouldExpandAtomicCmpXchgInIR(CASI
) ==
295 TargetLoweringBase::AtomicExpansionKind::None
&&
296 (isReleaseOrStronger(CASI
->getSuccessOrdering()) ||
297 isAcquireOrStronger(CASI
->getSuccessOrdering()) ||
298 isAcquireOrStronger(CASI
->getFailureOrdering()))) {
299 // If a compare and swap is lowered to LL/SC, we can do smarter fence
300 // insertion, with a stronger one on the success path than on the
301 // failure path. As a result, fence insertion is directly done by
302 // expandAtomicCmpXchg in that case.
303 FenceOrdering
= CASI
->getMergedOrdering();
304 CASI
->setSuccessOrdering(AtomicOrdering::Monotonic
);
305 CASI
->setFailureOrdering(AtomicOrdering::Monotonic
);
308 if (FenceOrdering
!= AtomicOrdering::Monotonic
) {
309 MadeChange
|= bracketInstWithFences(I
, FenceOrdering
);
311 } else if (I
->hasAtomicStore() &&
312 TLI
->shouldInsertTrailingFenceForAtomicStore(I
)) {
313 auto FenceOrdering
= AtomicOrdering::Monotonic
;
315 FenceOrdering
= SI
->getOrdering();
317 FenceOrdering
= RMWI
->getOrdering();
318 else if (CASI
&& TLI
->shouldExpandAtomicCmpXchgInIR(CASI
) !=
319 TargetLoweringBase::AtomicExpansionKind::LLSC
)
320 // LLSC is handled in expandAtomicCmpXchg().
321 FenceOrdering
= CASI
->getSuccessOrdering();
323 IRBuilder
Builder(I
);
324 if (auto TrailingFence
=
325 TLI
->emitTrailingFence(Builder
, I
, FenceOrdering
)) {
326 TrailingFence
->moveAfter(I
);
332 MadeChange
|= tryExpandAtomicLoad(LI
);
334 MadeChange
|= tryExpandAtomicStore(SI
);
336 // There are two different ways of expanding RMW instructions:
337 // - into a load if it is idempotent
338 // - into a Cmpxchg/LL-SC loop otherwise
339 // we try them in that order.
341 if (isIdempotentRMW(RMWI
) && simplifyIdempotentRMW(RMWI
)) {
344 MadeChange
|= tryExpandAtomicRMW(RMWI
);
347 MadeChange
|= tryExpandAtomicCmpXchg(CASI
);
352 bool AtomicExpandLegacy::runOnFunction(Function
&F
) {
354 auto *TPC
= getAnalysisIfAvailable
<TargetPassConfig
>();
357 auto *TM
= &TPC
->getTM
<TargetMachine
>();
359 return AE
.run(F
, TM
);
362 FunctionPass
*llvm::createAtomicExpandLegacyPass() {
363 return new AtomicExpandLegacy();
366 PreservedAnalyses
AtomicExpandPass::run(Function
&F
,
367 FunctionAnalysisManager
&AM
) {
370 bool Changed
= AE
.run(F
, TM
);
372 return PreservedAnalyses::all();
374 return PreservedAnalyses::none();
377 bool AtomicExpandImpl::bracketInstWithFences(Instruction
*I
,
378 AtomicOrdering Order
) {
379 ReplacementIRBuilder
Builder(I
, *DL
);
381 auto LeadingFence
= TLI
->emitLeadingFence(Builder
, I
, Order
);
383 auto TrailingFence
= TLI
->emitTrailingFence(Builder
, I
, Order
);
384 // We have a guard here because not every atomic operation generates a
387 TrailingFence
->moveAfter(I
);
389 return (LeadingFence
|| TrailingFence
);
392 /// Get the iX type with the same bitwidth as T.
394 AtomicExpandImpl::getCorrespondingIntegerType(Type
*T
, const DataLayout
&DL
) {
395 EVT VT
= TLI
->getMemValueType(DL
, T
);
396 unsigned BitWidth
= VT
.getStoreSizeInBits();
397 assert(BitWidth
== VT
.getSizeInBits() && "must be a power of two");
398 return IntegerType::get(T
->getContext(), BitWidth
);
401 /// Convert an atomic load of a non-integral type to an integer load of the
402 /// equivalent bitwidth. See the function comment on
403 /// convertAtomicStoreToIntegerType for background.
404 LoadInst
*AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst
*LI
) {
405 auto *M
= LI
->getModule();
406 Type
*NewTy
= getCorrespondingIntegerType(LI
->getType(), M
->getDataLayout());
408 ReplacementIRBuilder
Builder(LI
, *DL
);
410 Value
*Addr
= LI
->getPointerOperand();
412 auto *NewLI
= Builder
.CreateLoad(NewTy
, Addr
);
413 NewLI
->setAlignment(LI
->getAlign());
414 NewLI
->setVolatile(LI
->isVolatile());
415 NewLI
->setAtomic(LI
->getOrdering(), LI
->getSyncScopeID());
416 LLVM_DEBUG(dbgs() << "Replaced " << *LI
<< " with " << *NewLI
<< "\n");
418 Value
*NewVal
= Builder
.CreateBitCast(NewLI
, LI
->getType());
419 LI
->replaceAllUsesWith(NewVal
);
420 LI
->eraseFromParent();
425 AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst
*RMWI
) {
426 auto *M
= RMWI
->getModule();
428 getCorrespondingIntegerType(RMWI
->getType(), M
->getDataLayout());
430 ReplacementIRBuilder
Builder(RMWI
, *DL
);
432 Value
*Addr
= RMWI
->getPointerOperand();
433 Value
*Val
= RMWI
->getValOperand();
434 Value
*NewVal
= Val
->getType()->isPointerTy()
435 ? Builder
.CreatePtrToInt(Val
, NewTy
)
436 : Builder
.CreateBitCast(Val
, NewTy
);
438 auto *NewRMWI
= Builder
.CreateAtomicRMW(AtomicRMWInst::Xchg
, Addr
, NewVal
,
439 RMWI
->getAlign(), RMWI
->getOrdering(),
440 RMWI
->getSyncScopeID());
441 NewRMWI
->setVolatile(RMWI
->isVolatile());
442 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI
<< " with " << *NewRMWI
<< "\n");
444 Value
*NewRVal
= RMWI
->getType()->isPointerTy()
445 ? Builder
.CreateIntToPtr(NewRMWI
, RMWI
->getType())
446 : Builder
.CreateBitCast(NewRMWI
, RMWI
->getType());
447 RMWI
->replaceAllUsesWith(NewRVal
);
448 RMWI
->eraseFromParent();
452 bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst
*LI
) {
453 switch (TLI
->shouldExpandAtomicLoadInIR(LI
)) {
454 case TargetLoweringBase::AtomicExpansionKind::None
:
456 case TargetLoweringBase::AtomicExpansionKind::LLSC
:
457 expandAtomicOpToLLSC(
458 LI
, LI
->getType(), LI
->getPointerOperand(), LI
->getAlign(),
460 [](IRBuilderBase
&Builder
, Value
*Loaded
) { return Loaded
; });
462 case TargetLoweringBase::AtomicExpansionKind::LLOnly
:
463 return expandAtomicLoadToLL(LI
);
464 case TargetLoweringBase::AtomicExpansionKind::CmpXChg
:
465 return expandAtomicLoadToCmpXchg(LI
);
466 case TargetLoweringBase::AtomicExpansionKind::NotAtomic
:
467 LI
->setAtomic(AtomicOrdering::NotAtomic
);
470 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
474 bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst
*SI
) {
475 switch (TLI
->shouldExpandAtomicStoreInIR(SI
)) {
476 case TargetLoweringBase::AtomicExpansionKind::None
:
478 case TargetLoweringBase::AtomicExpansionKind::Expand
:
479 expandAtomicStore(SI
);
481 case TargetLoweringBase::AtomicExpansionKind::NotAtomic
:
482 SI
->setAtomic(AtomicOrdering::NotAtomic
);
485 llvm_unreachable("Unhandled case in tryExpandAtomicStore");
489 bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst
*LI
) {
490 ReplacementIRBuilder
Builder(LI
, *DL
);
492 // On some architectures, load-linked instructions are atomic for larger
493 // sizes than normal loads. For example, the only 64-bit load guaranteed
494 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
495 Value
*Val
= TLI
->emitLoadLinked(Builder
, LI
->getType(),
496 LI
->getPointerOperand(), LI
->getOrdering());
497 TLI
->emitAtomicCmpXchgNoStoreLLBalance(Builder
);
499 LI
->replaceAllUsesWith(Val
);
500 LI
->eraseFromParent();
505 bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst
*LI
) {
506 ReplacementIRBuilder
Builder(LI
, *DL
);
507 AtomicOrdering Order
= LI
->getOrdering();
508 if (Order
== AtomicOrdering::Unordered
)
509 Order
= AtomicOrdering::Monotonic
;
511 Value
*Addr
= LI
->getPointerOperand();
512 Type
*Ty
= LI
->getType();
513 Constant
*DummyVal
= Constant::getNullValue(Ty
);
515 Value
*Pair
= Builder
.CreateAtomicCmpXchg(
516 Addr
, DummyVal
, DummyVal
, LI
->getAlign(), Order
,
517 AtomicCmpXchgInst::getStrongestFailureOrdering(Order
));
518 Value
*Loaded
= Builder
.CreateExtractValue(Pair
, 0, "loaded");
520 LI
->replaceAllUsesWith(Loaded
);
521 LI
->eraseFromParent();
526 /// Convert an atomic store of a non-integral type to an integer store of the
527 /// equivalent bitwidth. We used to not support floating point or vector
528 /// atomics in the IR at all. The backends learned to deal with the bitcast
529 /// idiom because that was the only way of expressing the notion of a atomic
530 /// float or vector store. The long term plan is to teach each backend to
531 /// instruction select from the original atomic store, but as a migration
532 /// mechanism, we convert back to the old format which the backends understand.
533 /// Each backend will need individual work to recognize the new format.
534 StoreInst
*AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst
*SI
) {
535 ReplacementIRBuilder
Builder(SI
, *DL
);
536 auto *M
= SI
->getModule();
537 Type
*NewTy
= getCorrespondingIntegerType(SI
->getValueOperand()->getType(),
539 Value
*NewVal
= Builder
.CreateBitCast(SI
->getValueOperand(), NewTy
);
541 Value
*Addr
= SI
->getPointerOperand();
543 StoreInst
*NewSI
= Builder
.CreateStore(NewVal
, Addr
);
544 NewSI
->setAlignment(SI
->getAlign());
545 NewSI
->setVolatile(SI
->isVolatile());
546 NewSI
->setAtomic(SI
->getOrdering(), SI
->getSyncScopeID());
547 LLVM_DEBUG(dbgs() << "Replaced " << *SI
<< " with " << *NewSI
<< "\n");
548 SI
->eraseFromParent();
552 void AtomicExpandImpl::expandAtomicStore(StoreInst
*SI
) {
553 // This function is only called on atomic stores that are too large to be
554 // atomic if implemented as a native store. So we replace them by an
555 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
556 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
557 // It is the responsibility of the target to only signal expansion via
558 // shouldExpandAtomicRMW in cases where this is required and possible.
559 ReplacementIRBuilder
Builder(SI
, *DL
);
560 AtomicOrdering Ordering
= SI
->getOrdering();
561 assert(Ordering
!= AtomicOrdering::NotAtomic
);
562 AtomicOrdering RMWOrdering
= Ordering
== AtomicOrdering::Unordered
563 ? AtomicOrdering::Monotonic
565 AtomicRMWInst
*AI
= Builder
.CreateAtomicRMW(
566 AtomicRMWInst::Xchg
, SI
->getPointerOperand(), SI
->getValueOperand(),
567 SI
->getAlign(), RMWOrdering
);
568 SI
->eraseFromParent();
570 // Now we have an appropriate swap instruction, lower it as usual.
571 tryExpandAtomicRMW(AI
);
574 static void createCmpXchgInstFun(IRBuilderBase
&Builder
, Value
*Addr
,
575 Value
*Loaded
, Value
*NewVal
, Align AddrAlign
,
576 AtomicOrdering MemOpOrder
, SyncScope::ID SSID
,
577 Value
*&Success
, Value
*&NewLoaded
) {
578 Type
*OrigTy
= NewVal
->getType();
580 // This code can go away when cmpxchg supports FP and vector types.
581 assert(!OrigTy
->isPointerTy());
582 bool NeedBitcast
= OrigTy
->isFloatingPointTy() || OrigTy
->isVectorTy();
584 IntegerType
*IntTy
= Builder
.getIntNTy(OrigTy
->getPrimitiveSizeInBits());
585 NewVal
= Builder
.CreateBitCast(NewVal
, IntTy
);
586 Loaded
= Builder
.CreateBitCast(Loaded
, IntTy
);
589 Value
*Pair
= Builder
.CreateAtomicCmpXchg(
590 Addr
, Loaded
, NewVal
, AddrAlign
, MemOpOrder
,
591 AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder
), SSID
);
592 Success
= Builder
.CreateExtractValue(Pair
, 1, "success");
593 NewLoaded
= Builder
.CreateExtractValue(Pair
, 0, "newloaded");
596 NewLoaded
= Builder
.CreateBitCast(NewLoaded
, OrigTy
);
599 bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst
*AI
) {
600 LLVMContext
&Ctx
= AI
->getModule()->getContext();
601 TargetLowering::AtomicExpansionKind Kind
= TLI
->shouldExpandAtomicRMWInIR(AI
);
603 case TargetLoweringBase::AtomicExpansionKind::None
:
605 case TargetLoweringBase::AtomicExpansionKind::LLSC
: {
606 unsigned MinCASSize
= TLI
->getMinCmpXchgSizeInBits() / 8;
607 unsigned ValueSize
= getAtomicOpSize(AI
);
608 if (ValueSize
< MinCASSize
) {
609 expandPartwordAtomicRMW(AI
,
610 TargetLoweringBase::AtomicExpansionKind::LLSC
);
612 auto PerformOp
= [&](IRBuilderBase
&Builder
, Value
*Loaded
) {
613 return buildAtomicRMWValue(AI
->getOperation(), Builder
, Loaded
,
614 AI
->getValOperand());
616 expandAtomicOpToLLSC(AI
, AI
->getType(), AI
->getPointerOperand(),
617 AI
->getAlign(), AI
->getOrdering(), PerformOp
);
621 case TargetLoweringBase::AtomicExpansionKind::CmpXChg
: {
622 unsigned MinCASSize
= TLI
->getMinCmpXchgSizeInBits() / 8;
623 unsigned ValueSize
= getAtomicOpSize(AI
);
624 if (ValueSize
< MinCASSize
) {
625 expandPartwordAtomicRMW(AI
,
626 TargetLoweringBase::AtomicExpansionKind::CmpXChg
);
628 SmallVector
<StringRef
> SSNs
;
629 Ctx
.getSyncScopeNames(SSNs
);
630 auto MemScope
= SSNs
[AI
->getSyncScopeID()].empty()
632 : SSNs
[AI
->getSyncScopeID()];
633 OptimizationRemarkEmitter
ORE(AI
->getFunction());
635 return OptimizationRemark(DEBUG_TYPE
, "Passed", AI
)
636 << "A compare and swap loop was generated for an atomic "
637 << AI
->getOperationName(AI
->getOperation()) << " operation at "
638 << MemScope
<< " memory scope";
640 expandAtomicRMWToCmpXchg(AI
, createCmpXchgInstFun
);
644 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic
: {
645 unsigned MinCASSize
= TLI
->getMinCmpXchgSizeInBits() / 8;
646 unsigned ValueSize
= getAtomicOpSize(AI
);
647 if (ValueSize
< MinCASSize
) {
648 AtomicRMWInst::BinOp Op
= AI
->getOperation();
649 // Widen And/Or/Xor and give the target another chance at expanding it.
650 if (Op
== AtomicRMWInst::Or
|| Op
== AtomicRMWInst::Xor
||
651 Op
== AtomicRMWInst::And
) {
652 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI
));
656 expandAtomicRMWToMaskedIntrinsic(AI
);
659 case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic
: {
660 TLI
->emitBitTestAtomicRMWIntrinsic(AI
);
663 case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic
: {
664 TLI
->emitCmpArithAtomicRMWIntrinsic(AI
);
667 case TargetLoweringBase::AtomicExpansionKind::NotAtomic
:
668 return lowerAtomicRMWInst(AI
);
669 case TargetLoweringBase::AtomicExpansionKind::Expand
:
670 TLI
->emitExpandAtomicRMW(AI
);
673 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
679 struct PartwordMaskValues
{
680 // These three fields are guaranteed to be set by createMaskInstrs.
681 Type
*WordType
= nullptr;
682 Type
*ValueType
= nullptr;
683 Type
*IntValueType
= nullptr;
684 Value
*AlignedAddr
= nullptr;
685 Align AlignedAddrAlignment
;
686 // The remaining fields can be null.
687 Value
*ShiftAmt
= nullptr;
688 Value
*Mask
= nullptr;
689 Value
*Inv_Mask
= nullptr;
692 LLVM_ATTRIBUTE_UNUSED
693 raw_ostream
&operator<<(raw_ostream
&O
, const PartwordMaskValues
&PMV
) {
694 auto PrintObj
= [&O
](auto *V
) {
701 O
<< "PartwordMaskValues {\n";
703 PrintObj(PMV
.WordType
);
705 PrintObj(PMV
.ValueType
);
706 O
<< " AlignedAddr: ";
707 PrintObj(PMV
.AlignedAddr
);
708 O
<< " AlignedAddrAlignment: " << PMV
.AlignedAddrAlignment
.value() << '\n';
710 PrintObj(PMV
.ShiftAmt
);
714 PrintObj(PMV
.Inv_Mask
);
719 } // end anonymous namespace
721 /// This is a helper function which builds instructions to provide
722 /// values necessary for partword atomic operations. It takes an
723 /// incoming address, Addr, and ValueType, and constructs the address,
724 /// shift-amounts and masks needed to work with a larger value of size
727 /// AlignedAddr: Addr rounded down to a multiple of WordSize
729 /// ShiftAmt: Number of bits to right-shift a WordSize value loaded
730 /// from AlignAddr for it to have the same value as if
731 /// ValueType was loaded from Addr.
733 /// Mask: Value to mask with the value loaded from AlignAddr to
734 /// include only the part that would've been loaded from Addr.
736 /// Inv_Mask: The inverse of Mask.
737 static PartwordMaskValues
createMaskInstrs(IRBuilderBase
&Builder
,
738 Instruction
*I
, Type
*ValueType
,
739 Value
*Addr
, Align AddrAlign
,
740 unsigned MinWordSize
) {
741 PartwordMaskValues PMV
;
743 Module
*M
= I
->getModule();
744 LLVMContext
&Ctx
= M
->getContext();
745 const DataLayout
&DL
= M
->getDataLayout();
746 unsigned ValueSize
= DL
.getTypeStoreSize(ValueType
);
748 PMV
.ValueType
= PMV
.IntValueType
= ValueType
;
749 if (PMV
.ValueType
->isFloatingPointTy() || PMV
.ValueType
->isVectorTy())
751 Type::getIntNTy(Ctx
, ValueType
->getPrimitiveSizeInBits());
753 PMV
.WordType
= MinWordSize
> ValueSize
? Type::getIntNTy(Ctx
, MinWordSize
* 8)
755 if (PMV
.ValueType
== PMV
.WordType
) {
756 PMV
.AlignedAddr
= Addr
;
757 PMV
.AlignedAddrAlignment
= AddrAlign
;
758 PMV
.ShiftAmt
= ConstantInt::get(PMV
.ValueType
, 0);
759 PMV
.Mask
= ConstantInt::get(PMV
.ValueType
, ~0, /*isSigned*/ true);
763 PMV
.AlignedAddrAlignment
= Align(MinWordSize
);
765 assert(ValueSize
< MinWordSize
);
767 PointerType
*PtrTy
= cast
<PointerType
>(Addr
->getType());
768 IntegerType
*IntTy
= DL
.getIndexType(Ctx
, PtrTy
->getAddressSpace());
771 if (AddrAlign
< MinWordSize
) {
772 PMV
.AlignedAddr
= Builder
.CreateIntrinsic(
773 Intrinsic::ptrmask
, {PtrTy
, IntTy
},
774 {Addr
, ConstantInt::get(IntTy
, ~(uint64_t)(MinWordSize
- 1))}, nullptr,
777 Value
*AddrInt
= Builder
.CreatePtrToInt(Addr
, IntTy
);
778 PtrLSB
= Builder
.CreateAnd(AddrInt
, MinWordSize
- 1, "PtrLSB");
780 // If the alignment is high enough, the LSB are known 0.
781 PMV
.AlignedAddr
= Addr
;
782 PtrLSB
= ConstantInt::getNullValue(IntTy
);
785 if (DL
.isLittleEndian()) {
786 // turn bytes into bits
787 PMV
.ShiftAmt
= Builder
.CreateShl(PtrLSB
, 3);
789 // turn bytes into bits, and count from the other side.
790 PMV
.ShiftAmt
= Builder
.CreateShl(
791 Builder
.CreateXor(PtrLSB
, MinWordSize
- ValueSize
), 3);
794 PMV
.ShiftAmt
= Builder
.CreateTrunc(PMV
.ShiftAmt
, PMV
.WordType
, "ShiftAmt");
795 PMV
.Mask
= Builder
.CreateShl(
796 ConstantInt::get(PMV
.WordType
, (1 << (ValueSize
* 8)) - 1), PMV
.ShiftAmt
,
799 PMV
.Inv_Mask
= Builder
.CreateNot(PMV
.Mask
, "Inv_Mask");
804 static Value
*extractMaskedValue(IRBuilderBase
&Builder
, Value
*WideWord
,
805 const PartwordMaskValues
&PMV
) {
806 assert(WideWord
->getType() == PMV
.WordType
&& "Widened type mismatch");
807 if (PMV
.WordType
== PMV
.ValueType
)
810 Value
*Shift
= Builder
.CreateLShr(WideWord
, PMV
.ShiftAmt
, "shifted");
811 Value
*Trunc
= Builder
.CreateTrunc(Shift
, PMV
.IntValueType
, "extracted");
812 return Builder
.CreateBitCast(Trunc
, PMV
.ValueType
);
815 static Value
*insertMaskedValue(IRBuilderBase
&Builder
, Value
*WideWord
,
816 Value
*Updated
, const PartwordMaskValues
&PMV
) {
817 assert(WideWord
->getType() == PMV
.WordType
&& "Widened type mismatch");
818 assert(Updated
->getType() == PMV
.ValueType
&& "Value type mismatch");
819 if (PMV
.WordType
== PMV
.ValueType
)
822 Updated
= Builder
.CreateBitCast(Updated
, PMV
.IntValueType
);
824 Value
*ZExt
= Builder
.CreateZExt(Updated
, PMV
.WordType
, "extended");
826 Builder
.CreateShl(ZExt
, PMV
.ShiftAmt
, "shifted", /*HasNUW*/ true);
827 Value
*And
= Builder
.CreateAnd(WideWord
, PMV
.Inv_Mask
, "unmasked");
828 Value
*Or
= Builder
.CreateOr(And
, Shift
, "inserted");
832 /// Emit IR to implement a masked version of a given atomicrmw
833 /// operation. (That is, only the bits under the Mask should be
834 /// affected by the operation)
835 static Value
*performMaskedAtomicOp(AtomicRMWInst::BinOp Op
,
836 IRBuilderBase
&Builder
, Value
*Loaded
,
837 Value
*Shifted_Inc
, Value
*Inc
,
838 const PartwordMaskValues
&PMV
) {
839 // TODO: update to use
840 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
841 // to merge bits from two values without requiring PMV.Inv_Mask.
843 case AtomicRMWInst::Xchg
: {
844 Value
*Loaded_MaskOut
= Builder
.CreateAnd(Loaded
, PMV
.Inv_Mask
);
845 Value
*FinalVal
= Builder
.CreateOr(Loaded_MaskOut
, Shifted_Inc
);
848 case AtomicRMWInst::Or
:
849 case AtomicRMWInst::Xor
:
850 case AtomicRMWInst::And
:
851 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
852 case AtomicRMWInst::Add
:
853 case AtomicRMWInst::Sub
:
854 case AtomicRMWInst::Nand
: {
855 // The other arithmetic ops need to be masked into place.
856 Value
*NewVal
= buildAtomicRMWValue(Op
, Builder
, Loaded
, Shifted_Inc
);
857 Value
*NewVal_Masked
= Builder
.CreateAnd(NewVal
, PMV
.Mask
);
858 Value
*Loaded_MaskOut
= Builder
.CreateAnd(Loaded
, PMV
.Inv_Mask
);
859 Value
*FinalVal
= Builder
.CreateOr(Loaded_MaskOut
, NewVal_Masked
);
862 case AtomicRMWInst::Max
:
863 case AtomicRMWInst::Min
:
864 case AtomicRMWInst::UMax
:
865 case AtomicRMWInst::UMin
:
866 case AtomicRMWInst::FAdd
:
867 case AtomicRMWInst::FSub
:
868 case AtomicRMWInst::FMin
:
869 case AtomicRMWInst::FMax
:
870 case AtomicRMWInst::UIncWrap
:
871 case AtomicRMWInst::UDecWrap
: {
872 // Finally, other ops will operate on the full value, so truncate down to
873 // the original size, and expand out again after doing the
874 // operation. Bitcasts will be inserted for FP values.
875 Value
*Loaded_Extract
= extractMaskedValue(Builder
, Loaded
, PMV
);
876 Value
*NewVal
= buildAtomicRMWValue(Op
, Builder
, Loaded_Extract
, Inc
);
877 Value
*FinalVal
= insertMaskedValue(Builder
, Loaded
, NewVal
, PMV
);
881 llvm_unreachable("Unknown atomic op");
885 /// Expand a sub-word atomicrmw operation into an appropriate
886 /// word-sized operation.
888 /// It will create an LL/SC or cmpxchg loop, as appropriate, the same
889 /// way as a typical atomicrmw expansion. The only difference here is
890 /// that the operation inside of the loop may operate upon only a
891 /// part of the value.
892 void AtomicExpandImpl::expandPartwordAtomicRMW(
893 AtomicRMWInst
*AI
, TargetLoweringBase::AtomicExpansionKind ExpansionKind
) {
894 // Widen And/Or/Xor and give the target another chance at expanding it.
895 AtomicRMWInst::BinOp Op
= AI
->getOperation();
896 if (Op
== AtomicRMWInst::Or
|| Op
== AtomicRMWInst::Xor
||
897 Op
== AtomicRMWInst::And
) {
898 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI
));
901 AtomicOrdering MemOpOrder
= AI
->getOrdering();
902 SyncScope::ID SSID
= AI
->getSyncScopeID();
904 ReplacementIRBuilder
Builder(AI
, *DL
);
906 PartwordMaskValues PMV
=
907 createMaskInstrs(Builder
, AI
, AI
->getType(), AI
->getPointerOperand(),
908 AI
->getAlign(), TLI
->getMinCmpXchgSizeInBits() / 8);
910 Value
*ValOperand_Shifted
= nullptr;
911 if (Op
== AtomicRMWInst::Xchg
|| Op
== AtomicRMWInst::Add
||
912 Op
== AtomicRMWInst::Sub
|| Op
== AtomicRMWInst::Nand
) {
913 Value
*ValOp
= Builder
.CreateBitCast(AI
->getValOperand(), PMV
.IntValueType
);
915 Builder
.CreateShl(Builder
.CreateZExt(ValOp
, PMV
.WordType
), PMV
.ShiftAmt
,
916 "ValOperand_Shifted");
919 auto PerformPartwordOp
= [&](IRBuilderBase
&Builder
, Value
*Loaded
) {
920 return performMaskedAtomicOp(Op
, Builder
, Loaded
, ValOperand_Shifted
,
921 AI
->getValOperand(), PMV
);
925 if (ExpansionKind
== TargetLoweringBase::AtomicExpansionKind::CmpXChg
) {
926 OldResult
= insertRMWCmpXchgLoop(Builder
, PMV
.WordType
, PMV
.AlignedAddr
,
927 PMV
.AlignedAddrAlignment
, MemOpOrder
, SSID
,
928 PerformPartwordOp
, createCmpXchgInstFun
);
930 assert(ExpansionKind
== TargetLoweringBase::AtomicExpansionKind::LLSC
);
931 OldResult
= insertRMWLLSCLoop(Builder
, PMV
.WordType
, PMV
.AlignedAddr
,
932 PMV
.AlignedAddrAlignment
, MemOpOrder
,
936 Value
*FinalOldResult
= extractMaskedValue(Builder
, OldResult
, PMV
);
937 AI
->replaceAllUsesWith(FinalOldResult
);
938 AI
->eraseFromParent();
941 /// Copy metadata that's safe to preserve when widening atomics.
942 static void copyMetadataForAtomic(Instruction
&Dest
,
943 const Instruction
&Source
) {
944 SmallVector
<std::pair
<unsigned, MDNode
*>, 8> MD
;
945 Source
.getAllMetadata(MD
);
946 LLVMContext
&Ctx
= Dest
.getContext();
949 for (auto [ID
, N
] : MD
) {
951 case LLVMContext::MD_dbg
:
952 case LLVMContext::MD_tbaa
:
953 case LLVMContext::MD_tbaa_struct
:
954 case LLVMContext::MD_alias_scope
:
955 case LLVMContext::MD_noalias
:
956 case LLVMContext::MD_access_group
:
957 case LLVMContext::MD_mmra
:
958 Dest
.setMetadata(ID
, N
);
961 if (ID
== Ctx
.getMDKindID("amdgpu.no.remote.memory"))
962 Dest
.setMetadata(ID
, N
);
963 else if (ID
== Ctx
.getMDKindID("amdgpu.no.fine.grained.memory"))
964 Dest
.setMetadata(ID
, N
);
971 // Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
972 AtomicRMWInst
*AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst
*AI
) {
973 ReplacementIRBuilder
Builder(AI
, *DL
);
974 AtomicRMWInst::BinOp Op
= AI
->getOperation();
976 assert((Op
== AtomicRMWInst::Or
|| Op
== AtomicRMWInst::Xor
||
977 Op
== AtomicRMWInst::And
) &&
978 "Unable to widen operation");
980 PartwordMaskValues PMV
=
981 createMaskInstrs(Builder
, AI
, AI
->getType(), AI
->getPointerOperand(),
982 AI
->getAlign(), TLI
->getMinCmpXchgSizeInBits() / 8);
984 Value
*ValOperand_Shifted
=
985 Builder
.CreateShl(Builder
.CreateZExt(AI
->getValOperand(), PMV
.WordType
),
986 PMV
.ShiftAmt
, "ValOperand_Shifted");
990 if (Op
== AtomicRMWInst::And
)
992 Builder
.CreateOr(ValOperand_Shifted
, PMV
.Inv_Mask
, "AndOperand");
994 NewOperand
= ValOperand_Shifted
;
996 AtomicRMWInst
*NewAI
= Builder
.CreateAtomicRMW(
997 Op
, PMV
.AlignedAddr
, NewOperand
, PMV
.AlignedAddrAlignment
,
998 AI
->getOrdering(), AI
->getSyncScopeID());
1000 copyMetadataForAtomic(*NewAI
, *AI
);
1002 Value
*FinalOldResult
= extractMaskedValue(Builder
, NewAI
, PMV
);
1003 AI
->replaceAllUsesWith(FinalOldResult
);
1004 AI
->eraseFromParent();
1008 bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst
*CI
) {
1009 // The basic idea here is that we're expanding a cmpxchg of a
1010 // smaller memory size up to a word-sized cmpxchg. To do this, we
1011 // need to add a retry-loop for strong cmpxchg, so that
1012 // modifications to other parts of the word don't cause a spurious
1015 // This generates code like the following:
1016 // [[Setup mask values PMV.*]]
1017 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
1018 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
1019 // %InitLoaded = load i32* %addr
1020 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
1021 // br partword.cmpxchg.loop
1022 // partword.cmpxchg.loop:
1023 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
1024 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
1025 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
1026 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
1027 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
1028 // i32 %FullWord_NewVal success_ordering failure_ordering
1029 // %OldVal = extractvalue { i32, i1 } %NewCI, 0
1030 // %Success = extractvalue { i32, i1 } %NewCI, 1
1031 // br i1 %Success, label %partword.cmpxchg.end,
1032 // label %partword.cmpxchg.failure
1033 // partword.cmpxchg.failure:
1034 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
1035 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
1036 // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
1037 // label %partword.cmpxchg.end
1038 // partword.cmpxchg.end:
1039 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
1040 // %FinalOldVal = trunc i32 %tmp1 to i8
1041 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
1042 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
1044 Value
*Addr
= CI
->getPointerOperand();
1045 Value
*Cmp
= CI
->getCompareOperand();
1046 Value
*NewVal
= CI
->getNewValOperand();
1048 BasicBlock
*BB
= CI
->getParent();
1049 Function
*F
= BB
->getParent();
1050 ReplacementIRBuilder
Builder(CI
, *DL
);
1051 LLVMContext
&Ctx
= Builder
.getContext();
1054 BB
->splitBasicBlock(CI
->getIterator(), "partword.cmpxchg.end");
1056 BasicBlock::Create(Ctx
, "partword.cmpxchg.failure", F
, EndBB
);
1057 auto LoopBB
= BasicBlock::Create(Ctx
, "partword.cmpxchg.loop", F
, FailureBB
);
1059 // The split call above "helpfully" added a branch at the end of BB
1060 // (to the wrong place).
1061 std::prev(BB
->end())->eraseFromParent();
1062 Builder
.SetInsertPoint(BB
);
1064 PartwordMaskValues PMV
=
1065 createMaskInstrs(Builder
, CI
, CI
->getCompareOperand()->getType(), Addr
,
1066 CI
->getAlign(), TLI
->getMinCmpXchgSizeInBits() / 8);
1068 // Shift the incoming values over, into the right location in the word.
1069 Value
*NewVal_Shifted
=
1070 Builder
.CreateShl(Builder
.CreateZExt(NewVal
, PMV
.WordType
), PMV
.ShiftAmt
);
1071 Value
*Cmp_Shifted
=
1072 Builder
.CreateShl(Builder
.CreateZExt(Cmp
, PMV
.WordType
), PMV
.ShiftAmt
);
1074 // Load the entire current word, and mask into place the expected and new
1076 LoadInst
*InitLoaded
= Builder
.CreateLoad(PMV
.WordType
, PMV
.AlignedAddr
);
1077 InitLoaded
->setVolatile(CI
->isVolatile());
1078 Value
*InitLoaded_MaskOut
= Builder
.CreateAnd(InitLoaded
, PMV
.Inv_Mask
);
1079 Builder
.CreateBr(LoopBB
);
1081 // partword.cmpxchg.loop:
1082 Builder
.SetInsertPoint(LoopBB
);
1083 PHINode
*Loaded_MaskOut
= Builder
.CreatePHI(PMV
.WordType
, 2);
1084 Loaded_MaskOut
->addIncoming(InitLoaded_MaskOut
, BB
);
1086 // Mask/Or the expected and new values into place in the loaded word.
1087 Value
*FullWord_NewVal
= Builder
.CreateOr(Loaded_MaskOut
, NewVal_Shifted
);
1088 Value
*FullWord_Cmp
= Builder
.CreateOr(Loaded_MaskOut
, Cmp_Shifted
);
1089 AtomicCmpXchgInst
*NewCI
= Builder
.CreateAtomicCmpXchg(
1090 PMV
.AlignedAddr
, FullWord_Cmp
, FullWord_NewVal
, PMV
.AlignedAddrAlignment
,
1091 CI
->getSuccessOrdering(), CI
->getFailureOrdering(), CI
->getSyncScopeID());
1092 NewCI
->setVolatile(CI
->isVolatile());
1093 // When we're building a strong cmpxchg, we need a loop, so you
1094 // might think we could use a weak cmpxchg inside. But, using strong
1095 // allows the below comparison for ShouldContinue, and we're
1096 // expecting the underlying cmpxchg to be a machine instruction,
1097 // which is strong anyways.
1098 NewCI
->setWeak(CI
->isWeak());
1100 Value
*OldVal
= Builder
.CreateExtractValue(NewCI
, 0);
1101 Value
*Success
= Builder
.CreateExtractValue(NewCI
, 1);
1104 Builder
.CreateBr(EndBB
);
1106 Builder
.CreateCondBr(Success
, EndBB
, FailureBB
);
1108 // partword.cmpxchg.failure:
1109 Builder
.SetInsertPoint(FailureBB
);
1110 // Upon failure, verify that the masked-out part of the loaded value
1111 // has been modified. If it didn't, abort the cmpxchg, since the
1112 // masked-in part must've.
1113 Value
*OldVal_MaskOut
= Builder
.CreateAnd(OldVal
, PMV
.Inv_Mask
);
1114 Value
*ShouldContinue
= Builder
.CreateICmpNE(Loaded_MaskOut
, OldVal_MaskOut
);
1115 Builder
.CreateCondBr(ShouldContinue
, LoopBB
, EndBB
);
1117 // Add the second value to the phi from above
1118 Loaded_MaskOut
->addIncoming(OldVal_MaskOut
, FailureBB
);
1120 // partword.cmpxchg.end:
1121 Builder
.SetInsertPoint(CI
);
1123 Value
*FinalOldVal
= extractMaskedValue(Builder
, OldVal
, PMV
);
1124 Value
*Res
= PoisonValue::get(CI
->getType());
1125 Res
= Builder
.CreateInsertValue(Res
, FinalOldVal
, 0);
1126 Res
= Builder
.CreateInsertValue(Res
, Success
, 1);
1128 CI
->replaceAllUsesWith(Res
);
1129 CI
->eraseFromParent();
1133 void AtomicExpandImpl::expandAtomicOpToLLSC(
1134 Instruction
*I
, Type
*ResultType
, Value
*Addr
, Align AddrAlign
,
1135 AtomicOrdering MemOpOrder
,
1136 function_ref
<Value
*(IRBuilderBase
&, Value
*)> PerformOp
) {
1137 ReplacementIRBuilder
Builder(I
, *DL
);
1138 Value
*Loaded
= insertRMWLLSCLoop(Builder
, ResultType
, Addr
, AddrAlign
,
1139 MemOpOrder
, PerformOp
);
1141 I
->replaceAllUsesWith(Loaded
);
1142 I
->eraseFromParent();
1145 void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst
*AI
) {
1146 ReplacementIRBuilder
Builder(AI
, *DL
);
1148 PartwordMaskValues PMV
=
1149 createMaskInstrs(Builder
, AI
, AI
->getType(), AI
->getPointerOperand(),
1150 AI
->getAlign(), TLI
->getMinCmpXchgSizeInBits() / 8);
1152 // The value operand must be sign-extended for signed min/max so that the
1153 // target's signed comparison instructions can be used. Otherwise, just
1155 Instruction::CastOps CastOp
= Instruction::ZExt
;
1156 AtomicRMWInst::BinOp RMWOp
= AI
->getOperation();
1157 if (RMWOp
== AtomicRMWInst::Max
|| RMWOp
== AtomicRMWInst::Min
)
1158 CastOp
= Instruction::SExt
;
1160 Value
*ValOperand_Shifted
= Builder
.CreateShl(
1161 Builder
.CreateCast(CastOp
, AI
->getValOperand(), PMV
.WordType
),
1162 PMV
.ShiftAmt
, "ValOperand_Shifted");
1163 Value
*OldResult
= TLI
->emitMaskedAtomicRMWIntrinsic(
1164 Builder
, AI
, PMV
.AlignedAddr
, ValOperand_Shifted
, PMV
.Mask
, PMV
.ShiftAmt
,
1166 Value
*FinalOldResult
= extractMaskedValue(Builder
, OldResult
, PMV
);
1167 AI
->replaceAllUsesWith(FinalOldResult
);
1168 AI
->eraseFromParent();
1171 void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1172 AtomicCmpXchgInst
*CI
) {
1173 ReplacementIRBuilder
Builder(CI
, *DL
);
1175 PartwordMaskValues PMV
= createMaskInstrs(
1176 Builder
, CI
, CI
->getCompareOperand()->getType(), CI
->getPointerOperand(),
1177 CI
->getAlign(), TLI
->getMinCmpXchgSizeInBits() / 8);
1179 Value
*CmpVal_Shifted
= Builder
.CreateShl(
1180 Builder
.CreateZExt(CI
->getCompareOperand(), PMV
.WordType
), PMV
.ShiftAmt
,
1182 Value
*NewVal_Shifted
= Builder
.CreateShl(
1183 Builder
.CreateZExt(CI
->getNewValOperand(), PMV
.WordType
), PMV
.ShiftAmt
,
1185 Value
*OldVal
= TLI
->emitMaskedAtomicCmpXchgIntrinsic(
1186 Builder
, CI
, PMV
.AlignedAddr
, CmpVal_Shifted
, NewVal_Shifted
, PMV
.Mask
,
1187 CI
->getMergedOrdering());
1188 Value
*FinalOldVal
= extractMaskedValue(Builder
, OldVal
, PMV
);
1189 Value
*Res
= PoisonValue::get(CI
->getType());
1190 Res
= Builder
.CreateInsertValue(Res
, FinalOldVal
, 0);
1191 Value
*Success
= Builder
.CreateICmpEQ(
1192 CmpVal_Shifted
, Builder
.CreateAnd(OldVal
, PMV
.Mask
), "Success");
1193 Res
= Builder
.CreateInsertValue(Res
, Success
, 1);
1195 CI
->replaceAllUsesWith(Res
);
1196 CI
->eraseFromParent();
1199 Value
*AtomicExpandImpl::insertRMWLLSCLoop(
1200 IRBuilderBase
&Builder
, Type
*ResultTy
, Value
*Addr
, Align AddrAlign
,
1201 AtomicOrdering MemOpOrder
,
1202 function_ref
<Value
*(IRBuilderBase
&, Value
*)> PerformOp
) {
1203 LLVMContext
&Ctx
= Builder
.getContext();
1204 BasicBlock
*BB
= Builder
.GetInsertBlock();
1205 Function
*F
= BB
->getParent();
1208 F
->getDataLayout().getTypeStoreSize(ResultTy
) &&
1209 "Expected at least natural alignment at this point.");
1211 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1213 // The standard expansion we produce is:
1216 // %loaded = @load.linked(%addr)
1217 // %new = some_op iN %loaded, %incr
1218 // %stored = @store_conditional(%new, %addr)
1219 // %try_again = icmp i32 ne %stored, 0
1220 // br i1 %try_again, label %loop, label %atomicrmw.end
1223 BasicBlock
*ExitBB
=
1224 BB
->splitBasicBlock(Builder
.GetInsertPoint(), "atomicrmw.end");
1225 BasicBlock
*LoopBB
= BasicBlock::Create(Ctx
, "atomicrmw.start", F
, ExitBB
);
1227 // The split call above "helpfully" added a branch at the end of BB (to the
1229 std::prev(BB
->end())->eraseFromParent();
1230 Builder
.SetInsertPoint(BB
);
1231 Builder
.CreateBr(LoopBB
);
1233 // Start the main loop block now that we've taken care of the preliminaries.
1234 Builder
.SetInsertPoint(LoopBB
);
1235 Value
*Loaded
= TLI
->emitLoadLinked(Builder
, ResultTy
, Addr
, MemOpOrder
);
1237 Value
*NewVal
= PerformOp(Builder
, Loaded
);
1239 Value
*StoreSuccess
=
1240 TLI
->emitStoreConditional(Builder
, NewVal
, Addr
, MemOpOrder
);
1241 Value
*TryAgain
= Builder
.CreateICmpNE(
1242 StoreSuccess
, ConstantInt::get(IntegerType::get(Ctx
, 32), 0), "tryagain");
1243 Builder
.CreateCondBr(TryAgain
, LoopBB
, ExitBB
);
1245 Builder
.SetInsertPoint(ExitBB
, ExitBB
->begin());
1249 /// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1250 /// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1251 /// IR. As a migration step, we convert back to what use to be the standard
1252 /// way to represent a pointer cmpxchg so that we can update backends one by
1255 AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst
*CI
) {
1256 auto *M
= CI
->getModule();
1257 Type
*NewTy
= getCorrespondingIntegerType(CI
->getCompareOperand()->getType(),
1258 M
->getDataLayout());
1260 ReplacementIRBuilder
Builder(CI
, *DL
);
1262 Value
*Addr
= CI
->getPointerOperand();
1264 Value
*NewCmp
= Builder
.CreatePtrToInt(CI
->getCompareOperand(), NewTy
);
1265 Value
*NewNewVal
= Builder
.CreatePtrToInt(CI
->getNewValOperand(), NewTy
);
1267 auto *NewCI
= Builder
.CreateAtomicCmpXchg(
1268 Addr
, NewCmp
, NewNewVal
, CI
->getAlign(), CI
->getSuccessOrdering(),
1269 CI
->getFailureOrdering(), CI
->getSyncScopeID());
1270 NewCI
->setVolatile(CI
->isVolatile());
1271 NewCI
->setWeak(CI
->isWeak());
1272 LLVM_DEBUG(dbgs() << "Replaced " << *CI
<< " with " << *NewCI
<< "\n");
1274 Value
*OldVal
= Builder
.CreateExtractValue(NewCI
, 0);
1275 Value
*Succ
= Builder
.CreateExtractValue(NewCI
, 1);
1277 OldVal
= Builder
.CreateIntToPtr(OldVal
, CI
->getCompareOperand()->getType());
1279 Value
*Res
= PoisonValue::get(CI
->getType());
1280 Res
= Builder
.CreateInsertValue(Res
, OldVal
, 0);
1281 Res
= Builder
.CreateInsertValue(Res
, Succ
, 1);
1283 CI
->replaceAllUsesWith(Res
);
1284 CI
->eraseFromParent();
1288 bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst
*CI
) {
1289 AtomicOrdering SuccessOrder
= CI
->getSuccessOrdering();
1290 AtomicOrdering FailureOrder
= CI
->getFailureOrdering();
1291 Value
*Addr
= CI
->getPointerOperand();
1292 BasicBlock
*BB
= CI
->getParent();
1293 Function
*F
= BB
->getParent();
1294 LLVMContext
&Ctx
= F
->getContext();
1295 // If shouldInsertFencesForAtomic() returns true, then the target does not
1296 // want to deal with memory orders, and emitLeading/TrailingFence should take
1297 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1298 // should preserve the ordering.
1299 bool ShouldInsertFencesForAtomic
= TLI
->shouldInsertFencesForAtomic(CI
);
1300 AtomicOrdering MemOpOrder
= ShouldInsertFencesForAtomic
1301 ? AtomicOrdering::Monotonic
1302 : CI
->getMergedOrdering();
1304 // In implementations which use a barrier to achieve release semantics, we can
1305 // delay emitting this barrier until we know a store is actually going to be
1306 // attempted. The cost of this delay is that we need 2 copies of the block
1307 // emitting the load-linked, affecting code size.
1309 // Ideally, this logic would be unconditional except for the minsize check
1310 // since in other cases the extra blocks naturally collapse down to the
1311 // minimal loop. Unfortunately, this puts too much stress on later
1312 // optimisations so we avoid emitting the extra logic in those cases too.
1313 bool HasReleasedLoadBB
= !CI
->isWeak() && ShouldInsertFencesForAtomic
&&
1314 SuccessOrder
!= AtomicOrdering::Monotonic
&&
1315 SuccessOrder
!= AtomicOrdering::Acquire
&&
1318 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1319 // do it even on minsize.
1320 bool UseUnconditionalReleaseBarrier
= F
->hasMinSize() && !CI
->isWeak();
1322 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1324 // The full expansion we produce is:
1326 // %aligned.addr = ...
1328 // %unreleasedload = @load.linked(%aligned.addr)
1329 // %unreleasedload.extract = extract value from %unreleasedload
1330 // %should_store = icmp eq %unreleasedload.extract, %desired
1331 // br i1 %should_store, label %cmpxchg.releasingstore,
1332 // label %cmpxchg.nostore
1333 // cmpxchg.releasingstore:
1335 // br label cmpxchg.trystore
1336 // cmpxchg.trystore:
1337 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1338 // [%releasedload, %cmpxchg.releasedload]
1339 // %updated.new = insert %new into %loaded.trystore
1340 // %stored = @store_conditional(%updated.new, %aligned.addr)
1341 // %success = icmp eq i32 %stored, 0
1342 // br i1 %success, label %cmpxchg.success,
1343 // label %cmpxchg.releasedload/%cmpxchg.failure
1344 // cmpxchg.releasedload:
1345 // %releasedload = @load.linked(%aligned.addr)
1346 // %releasedload.extract = extract value from %releasedload
1347 // %should_store = icmp eq %releasedload.extract, %desired
1348 // br i1 %should_store, label %cmpxchg.trystore,
1349 // label %cmpxchg.failure
1352 // br label %cmpxchg.end
1354 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1356 // %cmpxchg.releasedload/%cmpxchg.trystore]
1357 // @load_linked_fail_balance()?
1358 // br label %cmpxchg.failure
1361 // br label %cmpxchg.end
1363 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1364 // [%loaded.trystore, %cmpxchg.trystore]
1365 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1366 // %loaded = extract value from %loaded.exit
1367 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1368 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1370 BasicBlock
*ExitBB
= BB
->splitBasicBlock(CI
->getIterator(), "cmpxchg.end");
1371 auto FailureBB
= BasicBlock::Create(Ctx
, "cmpxchg.failure", F
, ExitBB
);
1372 auto NoStoreBB
= BasicBlock::Create(Ctx
, "cmpxchg.nostore", F
, FailureBB
);
1373 auto SuccessBB
= BasicBlock::Create(Ctx
, "cmpxchg.success", F
, NoStoreBB
);
1374 auto ReleasedLoadBB
=
1375 BasicBlock::Create(Ctx
, "cmpxchg.releasedload", F
, SuccessBB
);
1377 BasicBlock::Create(Ctx
, "cmpxchg.trystore", F
, ReleasedLoadBB
);
1378 auto ReleasingStoreBB
=
1379 BasicBlock::Create(Ctx
, "cmpxchg.fencedstore", F
, TryStoreBB
);
1380 auto StartBB
= BasicBlock::Create(Ctx
, "cmpxchg.start", F
, ReleasingStoreBB
);
1382 ReplacementIRBuilder
Builder(CI
, *DL
);
1384 // The split call above "helpfully" added a branch at the end of BB (to the
1385 // wrong place), but we might want a fence too. It's easiest to just remove
1386 // the branch entirely.
1387 std::prev(BB
->end())->eraseFromParent();
1388 Builder
.SetInsertPoint(BB
);
1389 if (ShouldInsertFencesForAtomic
&& UseUnconditionalReleaseBarrier
)
1390 TLI
->emitLeadingFence(Builder
, CI
, SuccessOrder
);
1392 PartwordMaskValues PMV
=
1393 createMaskInstrs(Builder
, CI
, CI
->getCompareOperand()->getType(), Addr
,
1394 CI
->getAlign(), TLI
->getMinCmpXchgSizeInBits() / 8);
1395 Builder
.CreateBr(StartBB
);
1397 // Start the main loop block now that we've taken care of the preliminaries.
1398 Builder
.SetInsertPoint(StartBB
);
1399 Value
*UnreleasedLoad
=
1400 TLI
->emitLoadLinked(Builder
, PMV
.WordType
, PMV
.AlignedAddr
, MemOpOrder
);
1401 Value
*UnreleasedLoadExtract
=
1402 extractMaskedValue(Builder
, UnreleasedLoad
, PMV
);
1403 Value
*ShouldStore
= Builder
.CreateICmpEQ(
1404 UnreleasedLoadExtract
, CI
->getCompareOperand(), "should_store");
1406 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1407 // jump straight past that fence instruction (if it exists).
1408 Builder
.CreateCondBr(ShouldStore
, ReleasingStoreBB
, NoStoreBB
);
1410 Builder
.SetInsertPoint(ReleasingStoreBB
);
1411 if (ShouldInsertFencesForAtomic
&& !UseUnconditionalReleaseBarrier
)
1412 TLI
->emitLeadingFence(Builder
, CI
, SuccessOrder
);
1413 Builder
.CreateBr(TryStoreBB
);
1415 Builder
.SetInsertPoint(TryStoreBB
);
1416 PHINode
*LoadedTryStore
=
1417 Builder
.CreatePHI(PMV
.WordType
, 2, "loaded.trystore");
1418 LoadedTryStore
->addIncoming(UnreleasedLoad
, ReleasingStoreBB
);
1419 Value
*NewValueInsert
=
1420 insertMaskedValue(Builder
, LoadedTryStore
, CI
->getNewValOperand(), PMV
);
1421 Value
*StoreSuccess
= TLI
->emitStoreConditional(Builder
, NewValueInsert
,
1422 PMV
.AlignedAddr
, MemOpOrder
);
1423 StoreSuccess
= Builder
.CreateICmpEQ(
1424 StoreSuccess
, ConstantInt::get(Type::getInt32Ty(Ctx
), 0), "success");
1425 BasicBlock
*RetryBB
= HasReleasedLoadBB
? ReleasedLoadBB
: StartBB
;
1426 Builder
.CreateCondBr(StoreSuccess
, SuccessBB
,
1427 CI
->isWeak() ? FailureBB
: RetryBB
);
1429 Builder
.SetInsertPoint(ReleasedLoadBB
);
1431 if (HasReleasedLoadBB
) {
1433 TLI
->emitLoadLinked(Builder
, PMV
.WordType
, PMV
.AlignedAddr
, MemOpOrder
);
1434 Value
*SecondLoadExtract
= extractMaskedValue(Builder
, SecondLoad
, PMV
);
1435 ShouldStore
= Builder
.CreateICmpEQ(SecondLoadExtract
,
1436 CI
->getCompareOperand(), "should_store");
1438 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1439 // jump straight past that fence instruction (if it exists).
1440 Builder
.CreateCondBr(ShouldStore
, TryStoreBB
, NoStoreBB
);
1441 // Update PHI node in TryStoreBB.
1442 LoadedTryStore
->addIncoming(SecondLoad
, ReleasedLoadBB
);
1444 Builder
.CreateUnreachable();
1446 // Make sure later instructions don't get reordered with a fence if
1448 Builder
.SetInsertPoint(SuccessBB
);
1449 if (ShouldInsertFencesForAtomic
||
1450 TLI
->shouldInsertTrailingFenceForAtomicStore(CI
))
1451 TLI
->emitTrailingFence(Builder
, CI
, SuccessOrder
);
1452 Builder
.CreateBr(ExitBB
);
1454 Builder
.SetInsertPoint(NoStoreBB
);
1455 PHINode
*LoadedNoStore
=
1456 Builder
.CreatePHI(UnreleasedLoad
->getType(), 2, "loaded.nostore");
1457 LoadedNoStore
->addIncoming(UnreleasedLoad
, StartBB
);
1458 if (HasReleasedLoadBB
)
1459 LoadedNoStore
->addIncoming(SecondLoad
, ReleasedLoadBB
);
1461 // In the failing case, where we don't execute the store-conditional, the
1462 // target might want to balance out the load-linked with a dedicated
1463 // instruction (e.g., on ARM, clearing the exclusive monitor).
1464 TLI
->emitAtomicCmpXchgNoStoreLLBalance(Builder
);
1465 Builder
.CreateBr(FailureBB
);
1467 Builder
.SetInsertPoint(FailureBB
);
1468 PHINode
*LoadedFailure
=
1469 Builder
.CreatePHI(UnreleasedLoad
->getType(), 2, "loaded.failure");
1470 LoadedFailure
->addIncoming(LoadedNoStore
, NoStoreBB
);
1472 LoadedFailure
->addIncoming(LoadedTryStore
, TryStoreBB
);
1473 if (ShouldInsertFencesForAtomic
)
1474 TLI
->emitTrailingFence(Builder
, CI
, FailureOrder
);
1475 Builder
.CreateBr(ExitBB
);
1477 // Finally, we have control-flow based knowledge of whether the cmpxchg
1478 // succeeded or not. We expose this to later passes by converting any
1479 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1481 Builder
.SetInsertPoint(ExitBB
, ExitBB
->begin());
1482 PHINode
*LoadedExit
=
1483 Builder
.CreatePHI(UnreleasedLoad
->getType(), 2, "loaded.exit");
1484 LoadedExit
->addIncoming(LoadedTryStore
, SuccessBB
);
1485 LoadedExit
->addIncoming(LoadedFailure
, FailureBB
);
1486 PHINode
*Success
= Builder
.CreatePHI(Type::getInt1Ty(Ctx
), 2, "success");
1487 Success
->addIncoming(ConstantInt::getTrue(Ctx
), SuccessBB
);
1488 Success
->addIncoming(ConstantInt::getFalse(Ctx
), FailureBB
);
1490 // This is the "exit value" from the cmpxchg expansion. It may be of
1491 // a type wider than the one in the cmpxchg instruction.
1492 Value
*LoadedFull
= LoadedExit
;
1494 Builder
.SetInsertPoint(ExitBB
, std::next(Success
->getIterator()));
1495 Value
*Loaded
= extractMaskedValue(Builder
, LoadedFull
, PMV
);
1497 // Look for any users of the cmpxchg that are just comparing the loaded value
1498 // against the desired one, and replace them with the CFG-derived version.
1499 SmallVector
<ExtractValueInst
*, 2> PrunedInsts
;
1500 for (auto *User
: CI
->users()) {
1501 ExtractValueInst
*EV
= dyn_cast
<ExtractValueInst
>(User
);
1505 assert(EV
->getNumIndices() == 1 && EV
->getIndices()[0] <= 1 &&
1506 "weird extraction from { iN, i1 }");
1508 if (EV
->getIndices()[0] == 0)
1509 EV
->replaceAllUsesWith(Loaded
);
1511 EV
->replaceAllUsesWith(Success
);
1513 PrunedInsts
.push_back(EV
);
1516 // We can remove the instructions now we're no longer iterating through them.
1517 for (auto *EV
: PrunedInsts
)
1518 EV
->eraseFromParent();
1520 if (!CI
->use_empty()) {
1521 // Some use of the full struct return that we don't understand has happened,
1522 // so we've got to reconstruct it properly.
1524 Res
= Builder
.CreateInsertValue(PoisonValue::get(CI
->getType()), Loaded
, 0);
1525 Res
= Builder
.CreateInsertValue(Res
, Success
, 1);
1527 CI
->replaceAllUsesWith(Res
);
1530 CI
->eraseFromParent();
1534 bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst
*RMWI
) {
1535 auto C
= dyn_cast
<ConstantInt
>(RMWI
->getValOperand());
1539 AtomicRMWInst::BinOp Op
= RMWI
->getOperation();
1541 case AtomicRMWInst::Add
:
1542 case AtomicRMWInst::Sub
:
1543 case AtomicRMWInst::Or
:
1544 case AtomicRMWInst::Xor
:
1546 case AtomicRMWInst::And
:
1547 return C
->isMinusOne();
1548 // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1554 bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst
*RMWI
) {
1555 if (auto ResultingLoad
= TLI
->lowerIdempotentRMWIntoFencedLoad(RMWI
)) {
1556 tryExpandAtomicLoad(ResultingLoad
);
1562 Value
*AtomicExpandImpl::insertRMWCmpXchgLoop(
1563 IRBuilderBase
&Builder
, Type
*ResultTy
, Value
*Addr
, Align AddrAlign
,
1564 AtomicOrdering MemOpOrder
, SyncScope::ID SSID
,
1565 function_ref
<Value
*(IRBuilderBase
&, Value
*)> PerformOp
,
1566 CreateCmpXchgInstFun CreateCmpXchg
) {
1567 LLVMContext
&Ctx
= Builder
.getContext();
1568 BasicBlock
*BB
= Builder
.GetInsertBlock();
1569 Function
*F
= BB
->getParent();
1571 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1573 // The standard expansion we produce is:
1575 // %init_loaded = load atomic iN* %addr
1578 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1579 // %new = some_op iN %loaded, %incr
1580 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1581 // %new_loaded = extractvalue { iN, i1 } %pair, 0
1582 // %success = extractvalue { iN, i1 } %pair, 1
1583 // br i1 %success, label %atomicrmw.end, label %loop
1586 BasicBlock
*ExitBB
=
1587 BB
->splitBasicBlock(Builder
.GetInsertPoint(), "atomicrmw.end");
1588 BasicBlock
*LoopBB
= BasicBlock::Create(Ctx
, "atomicrmw.start", F
, ExitBB
);
1590 // The split call above "helpfully" added a branch at the end of BB (to the
1591 // wrong place), but we want a load. It's easiest to just remove
1592 // the branch entirely.
1593 std::prev(BB
->end())->eraseFromParent();
1594 Builder
.SetInsertPoint(BB
);
1595 LoadInst
*InitLoaded
= Builder
.CreateAlignedLoad(ResultTy
, Addr
, AddrAlign
);
1596 Builder
.CreateBr(LoopBB
);
1598 // Start the main loop block now that we've taken care of the preliminaries.
1599 Builder
.SetInsertPoint(LoopBB
);
1600 PHINode
*Loaded
= Builder
.CreatePHI(ResultTy
, 2, "loaded");
1601 Loaded
->addIncoming(InitLoaded
, BB
);
1603 Value
*NewVal
= PerformOp(Builder
, Loaded
);
1605 Value
*NewLoaded
= nullptr;
1606 Value
*Success
= nullptr;
1608 CreateCmpXchg(Builder
, Addr
, Loaded
, NewVal
, AddrAlign
,
1609 MemOpOrder
== AtomicOrdering::Unordered
1610 ? AtomicOrdering::Monotonic
1612 SSID
, Success
, NewLoaded
);
1613 assert(Success
&& NewLoaded
);
1615 Loaded
->addIncoming(NewLoaded
, LoopBB
);
1617 Builder
.CreateCondBr(Success
, ExitBB
, LoopBB
);
1619 Builder
.SetInsertPoint(ExitBB
, ExitBB
->begin());
1623 bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst
*CI
) {
1624 unsigned MinCASSize
= TLI
->getMinCmpXchgSizeInBits() / 8;
1625 unsigned ValueSize
= getAtomicOpSize(CI
);
1627 switch (TLI
->shouldExpandAtomicCmpXchgInIR(CI
)) {
1629 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1630 case TargetLoweringBase::AtomicExpansionKind::None
:
1631 if (ValueSize
< MinCASSize
)
1632 return expandPartwordCmpXchg(CI
);
1634 case TargetLoweringBase::AtomicExpansionKind::LLSC
: {
1635 return expandAtomicCmpXchg(CI
);
1637 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic
:
1638 expandAtomicCmpXchgToMaskedIntrinsic(CI
);
1640 case TargetLoweringBase::AtomicExpansionKind::NotAtomic
:
1641 return lowerAtomicCmpXchgInst(CI
);
1645 // Note: This function is exposed externally by AtomicExpandUtils.h
1646 bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst
*AI
,
1647 CreateCmpXchgInstFun CreateCmpXchg
) {
1648 ReplacementIRBuilder
Builder(AI
, AI
->getDataLayout());
1649 Builder
.setIsFPConstrained(
1650 AI
->getFunction()->hasFnAttribute(Attribute::StrictFP
));
1652 // FIXME: If FP exceptions are observable, we should force them off for the
1653 // loop for the FP atomics.
1654 Value
*Loaded
= AtomicExpandImpl::insertRMWCmpXchgLoop(
1655 Builder
, AI
->getType(), AI
->getPointerOperand(), AI
->getAlign(),
1656 AI
->getOrdering(), AI
->getSyncScopeID(),
1657 [&](IRBuilderBase
&Builder
, Value
*Loaded
) {
1658 return buildAtomicRMWValue(AI
->getOperation(), Builder
, Loaded
,
1659 AI
->getValOperand());
1663 AI
->replaceAllUsesWith(Loaded
);
1664 AI
->eraseFromParent();
1668 // In order to use one of the sized library calls such as
1669 // __atomic_fetch_add_4, the alignment must be sufficient, the size
1670 // must be one of the potentially-specialized sizes, and the value
1671 // type must actually exist in C on the target (otherwise, the
1672 // function wouldn't actually be defined.)
1673 static bool canUseSizedAtomicCall(unsigned Size
, Align Alignment
,
1674 const DataLayout
&DL
) {
1675 // TODO: "LargestSize" is an approximation for "largest type that
1676 // you can express in C". It seems to be the case that int128 is
1677 // supported on all 64-bit platforms, otherwise only up to 64-bit
1678 // integers are supported. If we get this wrong, then we'll try to
1679 // call a sized libcall that doesn't actually exist. There should
1680 // really be some more reliable way in LLVM of determining integer
1681 // sizes which are valid in the target's C ABI...
1682 unsigned LargestSize
= DL
.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1683 return Alignment
>= Size
&&
1684 (Size
== 1 || Size
== 2 || Size
== 4 || Size
== 8 || Size
== 16) &&
1685 Size
<= LargestSize
;
1688 void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst
*I
) {
1689 static const RTLIB::Libcall Libcalls
[6] = {
1690 RTLIB::ATOMIC_LOAD
, RTLIB::ATOMIC_LOAD_1
, RTLIB::ATOMIC_LOAD_2
,
1691 RTLIB::ATOMIC_LOAD_4
, RTLIB::ATOMIC_LOAD_8
, RTLIB::ATOMIC_LOAD_16
};
1692 unsigned Size
= getAtomicOpSize(I
);
1694 bool expanded
= expandAtomicOpToLibcall(
1695 I
, Size
, I
->getAlign(), I
->getPointerOperand(), nullptr, nullptr,
1696 I
->getOrdering(), AtomicOrdering::NotAtomic
, Libcalls
);
1698 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load");
1701 void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst
*I
) {
1702 static const RTLIB::Libcall Libcalls
[6] = {
1703 RTLIB::ATOMIC_STORE
, RTLIB::ATOMIC_STORE_1
, RTLIB::ATOMIC_STORE_2
,
1704 RTLIB::ATOMIC_STORE_4
, RTLIB::ATOMIC_STORE_8
, RTLIB::ATOMIC_STORE_16
};
1705 unsigned Size
= getAtomicOpSize(I
);
1707 bool expanded
= expandAtomicOpToLibcall(
1708 I
, Size
, I
->getAlign(), I
->getPointerOperand(), I
->getValueOperand(),
1709 nullptr, I
->getOrdering(), AtomicOrdering::NotAtomic
, Libcalls
);
1711 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store");
1714 void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst
*I
) {
1715 static const RTLIB::Libcall Libcalls
[6] = {
1716 RTLIB::ATOMIC_COMPARE_EXCHANGE
, RTLIB::ATOMIC_COMPARE_EXCHANGE_1
,
1717 RTLIB::ATOMIC_COMPARE_EXCHANGE_2
, RTLIB::ATOMIC_COMPARE_EXCHANGE_4
,
1718 RTLIB::ATOMIC_COMPARE_EXCHANGE_8
, RTLIB::ATOMIC_COMPARE_EXCHANGE_16
};
1719 unsigned Size
= getAtomicOpSize(I
);
1721 bool expanded
= expandAtomicOpToLibcall(
1722 I
, Size
, I
->getAlign(), I
->getPointerOperand(), I
->getNewValOperand(),
1723 I
->getCompareOperand(), I
->getSuccessOrdering(), I
->getFailureOrdering(),
1726 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS");
1729 static ArrayRef
<RTLIB::Libcall
> GetRMWLibcall(AtomicRMWInst::BinOp Op
) {
1730 static const RTLIB::Libcall LibcallsXchg
[6] = {
1731 RTLIB::ATOMIC_EXCHANGE
, RTLIB::ATOMIC_EXCHANGE_1
,
1732 RTLIB::ATOMIC_EXCHANGE_2
, RTLIB::ATOMIC_EXCHANGE_4
,
1733 RTLIB::ATOMIC_EXCHANGE_8
, RTLIB::ATOMIC_EXCHANGE_16
};
1734 static const RTLIB::Libcall LibcallsAdd
[6] = {
1735 RTLIB::UNKNOWN_LIBCALL
, RTLIB::ATOMIC_FETCH_ADD_1
,
1736 RTLIB::ATOMIC_FETCH_ADD_2
, RTLIB::ATOMIC_FETCH_ADD_4
,
1737 RTLIB::ATOMIC_FETCH_ADD_8
, RTLIB::ATOMIC_FETCH_ADD_16
};
1738 static const RTLIB::Libcall LibcallsSub
[6] = {
1739 RTLIB::UNKNOWN_LIBCALL
, RTLIB::ATOMIC_FETCH_SUB_1
,
1740 RTLIB::ATOMIC_FETCH_SUB_2
, RTLIB::ATOMIC_FETCH_SUB_4
,
1741 RTLIB::ATOMIC_FETCH_SUB_8
, RTLIB::ATOMIC_FETCH_SUB_16
};
1742 static const RTLIB::Libcall LibcallsAnd
[6] = {
1743 RTLIB::UNKNOWN_LIBCALL
, RTLIB::ATOMIC_FETCH_AND_1
,
1744 RTLIB::ATOMIC_FETCH_AND_2
, RTLIB::ATOMIC_FETCH_AND_4
,
1745 RTLIB::ATOMIC_FETCH_AND_8
, RTLIB::ATOMIC_FETCH_AND_16
};
1746 static const RTLIB::Libcall LibcallsOr
[6] = {
1747 RTLIB::UNKNOWN_LIBCALL
, RTLIB::ATOMIC_FETCH_OR_1
,
1748 RTLIB::ATOMIC_FETCH_OR_2
, RTLIB::ATOMIC_FETCH_OR_4
,
1749 RTLIB::ATOMIC_FETCH_OR_8
, RTLIB::ATOMIC_FETCH_OR_16
};
1750 static const RTLIB::Libcall LibcallsXor
[6] = {
1751 RTLIB::UNKNOWN_LIBCALL
, RTLIB::ATOMIC_FETCH_XOR_1
,
1752 RTLIB::ATOMIC_FETCH_XOR_2
, RTLIB::ATOMIC_FETCH_XOR_4
,
1753 RTLIB::ATOMIC_FETCH_XOR_8
, RTLIB::ATOMIC_FETCH_XOR_16
};
1754 static const RTLIB::Libcall LibcallsNand
[6] = {
1755 RTLIB::UNKNOWN_LIBCALL
, RTLIB::ATOMIC_FETCH_NAND_1
,
1756 RTLIB::ATOMIC_FETCH_NAND_2
, RTLIB::ATOMIC_FETCH_NAND_4
,
1757 RTLIB::ATOMIC_FETCH_NAND_8
, RTLIB::ATOMIC_FETCH_NAND_16
};
1760 case AtomicRMWInst::BAD_BINOP
:
1761 llvm_unreachable("Should not have BAD_BINOP.");
1762 case AtomicRMWInst::Xchg
:
1763 return ArrayRef(LibcallsXchg
);
1764 case AtomicRMWInst::Add
:
1765 return ArrayRef(LibcallsAdd
);
1766 case AtomicRMWInst::Sub
:
1767 return ArrayRef(LibcallsSub
);
1768 case AtomicRMWInst::And
:
1769 return ArrayRef(LibcallsAnd
);
1770 case AtomicRMWInst::Or
:
1771 return ArrayRef(LibcallsOr
);
1772 case AtomicRMWInst::Xor
:
1773 return ArrayRef(LibcallsXor
);
1774 case AtomicRMWInst::Nand
:
1775 return ArrayRef(LibcallsNand
);
1776 case AtomicRMWInst::Max
:
1777 case AtomicRMWInst::Min
:
1778 case AtomicRMWInst::UMax
:
1779 case AtomicRMWInst::UMin
:
1780 case AtomicRMWInst::FMax
:
1781 case AtomicRMWInst::FMin
:
1782 case AtomicRMWInst::FAdd
:
1783 case AtomicRMWInst::FSub
:
1784 case AtomicRMWInst::UIncWrap
:
1785 case AtomicRMWInst::UDecWrap
:
1786 // No atomic libcalls are available for max/min/umax/umin.
1789 llvm_unreachable("Unexpected AtomicRMW operation.");
1792 void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst
*I
) {
1793 ArrayRef
<RTLIB::Libcall
> Libcalls
= GetRMWLibcall(I
->getOperation());
1795 unsigned Size
= getAtomicOpSize(I
);
1797 bool Success
= false;
1798 if (!Libcalls
.empty())
1799 Success
= expandAtomicOpToLibcall(
1800 I
, Size
, I
->getAlign(), I
->getPointerOperand(), I
->getValOperand(),
1801 nullptr, I
->getOrdering(), AtomicOrdering::NotAtomic
, Libcalls
);
1803 // The expansion failed: either there were no libcalls at all for
1804 // the operation (min/max), or there were only size-specialized
1805 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1806 // CAS libcall, via a CAS loop, instead.
1808 expandAtomicRMWToCmpXchg(
1809 I
, [this](IRBuilderBase
&Builder
, Value
*Addr
, Value
*Loaded
,
1810 Value
*NewVal
, Align Alignment
, AtomicOrdering MemOpOrder
,
1811 SyncScope::ID SSID
, Value
*&Success
, Value
*&NewLoaded
) {
1812 // Create the CAS instruction normally...
1813 AtomicCmpXchgInst
*Pair
= Builder
.CreateAtomicCmpXchg(
1814 Addr
, Loaded
, NewVal
, Alignment
, MemOpOrder
,
1815 AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder
), SSID
);
1816 Success
= Builder
.CreateExtractValue(Pair
, 1, "success");
1817 NewLoaded
= Builder
.CreateExtractValue(Pair
, 0, "newloaded");
1819 // ...and then expand the CAS into a libcall.
1820 expandAtomicCASToLibcall(Pair
);
1825 // A helper routine for the above expandAtomic*ToLibcall functions.
1827 // 'Libcalls' contains an array of enum values for the particular
1828 // ATOMIC libcalls to be emitted. All of the other arguments besides
1829 // 'I' are extracted from the Instruction subclass by the
1830 // caller. Depending on the particular call, some will be null.
1831 bool AtomicExpandImpl::expandAtomicOpToLibcall(
1832 Instruction
*I
, unsigned Size
, Align Alignment
, Value
*PointerOperand
,
1833 Value
*ValueOperand
, Value
*CASExpected
, AtomicOrdering Ordering
,
1834 AtomicOrdering Ordering2
, ArrayRef
<RTLIB::Libcall
> Libcalls
) {
1835 assert(Libcalls
.size() == 6);
1837 LLVMContext
&Ctx
= I
->getContext();
1838 Module
*M
= I
->getModule();
1839 const DataLayout
&DL
= M
->getDataLayout();
1840 IRBuilder
<> Builder(I
);
1841 IRBuilder
<> AllocaBuilder(&I
->getFunction()->getEntryBlock().front());
1843 bool UseSizedLibcall
= canUseSizedAtomicCall(Size
, Alignment
, DL
);
1844 Type
*SizedIntTy
= Type::getIntNTy(Ctx
, Size
* 8);
1846 const Align AllocaAlignment
= DL
.getPrefTypeAlign(SizedIntTy
);
1848 // TODO: the "order" argument type is "int", not int32. So
1849 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1850 ConstantInt
*SizeVal64
= ConstantInt::get(Type::getInt64Ty(Ctx
), Size
);
1851 assert(Ordering
!= AtomicOrdering::NotAtomic
&& "expect atomic MO");
1852 Constant
*OrderingVal
=
1853 ConstantInt::get(Type::getInt32Ty(Ctx
), (int)toCABI(Ordering
));
1854 Constant
*Ordering2Val
= nullptr;
1856 assert(Ordering2
!= AtomicOrdering::NotAtomic
&& "expect atomic MO");
1858 ConstantInt::get(Type::getInt32Ty(Ctx
), (int)toCABI(Ordering2
));
1860 bool HasResult
= I
->getType() != Type::getVoidTy(Ctx
);
1862 RTLIB::Libcall RTLibType
;
1863 if (UseSizedLibcall
) {
1866 RTLibType
= Libcalls
[1];
1869 RTLibType
= Libcalls
[2];
1872 RTLibType
= Libcalls
[3];
1875 RTLibType
= Libcalls
[4];
1878 RTLibType
= Libcalls
[5];
1881 } else if (Libcalls
[0] != RTLIB::UNKNOWN_LIBCALL
) {
1882 RTLibType
= Libcalls
[0];
1884 // Can't use sized function, and there's no generic for this
1885 // operation, so give up.
1889 if (!TLI
->getLibcallName(RTLibType
)) {
1890 // This target does not implement the requested atomic libcall so give up.
1894 // Build up the function call. There's two kinds. First, the sized
1895 // variants. These calls are going to be one of the following (with
1897 // iN __atomic_load_N(iN *ptr, int ordering)
1898 // void __atomic_store_N(iN *ptr, iN val, int ordering)
1899 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1900 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1901 // int success_order, int failure_order)
1903 // Note that these functions can be used for non-integer atomic
1904 // operations, the values just need to be bitcast to integers on the
1907 // And, then, the generic variants. They look like the following:
1908 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1909 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1910 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1912 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1913 // void *desired, int success_order,
1914 // int failure_order)
1916 // The different signatures are built up depending on the
1917 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1920 AllocaInst
*AllocaCASExpected
= nullptr;
1921 AllocaInst
*AllocaValue
= nullptr;
1922 AllocaInst
*AllocaResult
= nullptr;
1925 SmallVector
<Value
*, 6> Args
;
1929 if (!UseSizedLibcall
) {
1930 // Note, getIntPtrType is assumed equivalent to size_t.
1931 Args
.push_back(ConstantInt::get(DL
.getIntPtrType(Ctx
), Size
));
1935 // note: This assumes all address spaces share a common libfunc
1936 // implementation and that addresses are convertable. For systems without
1937 // that property, we'd need to extend this mechanism to support AS-specific
1938 // families of atomic intrinsics.
1939 Value
*PtrVal
= PointerOperand
;
1940 PtrVal
= Builder
.CreateAddrSpaceCast(PtrVal
, PointerType::getUnqual(Ctx
));
1941 Args
.push_back(PtrVal
);
1943 // 'expected' argument, if present.
1945 AllocaCASExpected
= AllocaBuilder
.CreateAlloca(CASExpected
->getType());
1946 AllocaCASExpected
->setAlignment(AllocaAlignment
);
1947 Builder
.CreateLifetimeStart(AllocaCASExpected
, SizeVal64
);
1948 Builder
.CreateAlignedStore(CASExpected
, AllocaCASExpected
, AllocaAlignment
);
1949 Args
.push_back(AllocaCASExpected
);
1952 // 'val' argument ('desired' for cas), if present.
1954 if (UseSizedLibcall
) {
1956 Builder
.CreateBitOrPointerCast(ValueOperand
, SizedIntTy
);
1957 Args
.push_back(IntValue
);
1959 AllocaValue
= AllocaBuilder
.CreateAlloca(ValueOperand
->getType());
1960 AllocaValue
->setAlignment(AllocaAlignment
);
1961 Builder
.CreateLifetimeStart(AllocaValue
, SizeVal64
);
1962 Builder
.CreateAlignedStore(ValueOperand
, AllocaValue
, AllocaAlignment
);
1963 Args
.push_back(AllocaValue
);
1968 if (!CASExpected
&& HasResult
&& !UseSizedLibcall
) {
1969 AllocaResult
= AllocaBuilder
.CreateAlloca(I
->getType());
1970 AllocaResult
->setAlignment(AllocaAlignment
);
1971 Builder
.CreateLifetimeStart(AllocaResult
, SizeVal64
);
1972 Args
.push_back(AllocaResult
);
1975 // 'ordering' ('success_order' for cas) argument.
1976 Args
.push_back(OrderingVal
);
1978 // 'failure_order' argument, if present.
1980 Args
.push_back(Ordering2Val
);
1982 // Now, the return type.
1984 ResultTy
= Type::getInt1Ty(Ctx
);
1985 Attr
= Attr
.addRetAttribute(Ctx
, Attribute::ZExt
);
1986 } else if (HasResult
&& UseSizedLibcall
)
1987 ResultTy
= SizedIntTy
;
1989 ResultTy
= Type::getVoidTy(Ctx
);
1991 // Done with setting up arguments and return types, create the call:
1992 SmallVector
<Type
*, 6> ArgTys
;
1993 for (Value
*Arg
: Args
)
1994 ArgTys
.push_back(Arg
->getType());
1995 FunctionType
*FnType
= FunctionType::get(ResultTy
, ArgTys
, false);
1996 FunctionCallee LibcallFn
=
1997 M
->getOrInsertFunction(TLI
->getLibcallName(RTLibType
), FnType
, Attr
);
1998 CallInst
*Call
= Builder
.CreateCall(LibcallFn
, Args
);
1999 Call
->setAttributes(Attr
);
2000 Value
*Result
= Call
;
2002 // And then, extract the results...
2003 if (ValueOperand
&& !UseSizedLibcall
)
2004 Builder
.CreateLifetimeEnd(AllocaValue
, SizeVal64
);
2007 // The final result from the CAS is {load of 'expected' alloca, bool result
2009 Type
*FinalResultTy
= I
->getType();
2010 Value
*V
= PoisonValue::get(FinalResultTy
);
2011 Value
*ExpectedOut
= Builder
.CreateAlignedLoad(
2012 CASExpected
->getType(), AllocaCASExpected
, AllocaAlignment
);
2013 Builder
.CreateLifetimeEnd(AllocaCASExpected
, SizeVal64
);
2014 V
= Builder
.CreateInsertValue(V
, ExpectedOut
, 0);
2015 V
= Builder
.CreateInsertValue(V
, Result
, 1);
2016 I
->replaceAllUsesWith(V
);
2017 } else if (HasResult
) {
2019 if (UseSizedLibcall
)
2020 V
= Builder
.CreateBitOrPointerCast(Result
, I
->getType());
2022 V
= Builder
.CreateAlignedLoad(I
->getType(), AllocaResult
,
2024 Builder
.CreateLifetimeEnd(AllocaResult
, SizeVal64
);
2026 I
->replaceAllUsesWith(V
);
2028 I
->eraseFromParent();