1 //===- LLVMInlining.cpp - LLVM inlining interface and logic -----*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Logic for inlining LLVM functions and the definition of the
10 // LLVMInliningInterface.
12 //===----------------------------------------------------------------------===//
14 #include "LLVMInlining.h"
15 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
16 #include "mlir/IR/Matchers.h"
17 #include "mlir/Interfaces/DataLayoutInterfaces.h"
18 #include "mlir/Transforms/InliningUtils.h"
19 #include "llvm/ADT/ScopeExit.h"
20 #include "llvm/Support/Debug.h"
22 #define DEBUG_TYPE "llvm-inliner"
26 /// Check whether the given alloca is an input to a lifetime intrinsic,
27 /// optionally passing through one or more casts on the way. This is not
28 /// transitive through block arguments.
29 static bool hasLifetimeMarkers(LLVM::AllocaOp allocaOp
) {
30 SmallVector
<Operation
*> stack(allocaOp
->getUsers().begin(),
31 allocaOp
->getUsers().end());
32 while (!stack
.empty()) {
33 Operation
*op
= stack
.pop_back_val();
34 if (isa
<LLVM::LifetimeStartOp
, LLVM::LifetimeEndOp
>(op
))
36 if (isa
<LLVM::BitcastOp
>(op
))
37 stack
.append(op
->getUsers().begin(), op
->getUsers().end());
42 /// Handles alloca operations in the inlined blocks:
43 /// - Moves all alloca operations with a constant size in the former entry block
44 /// of the callee into the entry block of the caller, so they become part of
45 /// the function prologue/epilogue during code generation.
46 /// - Inserts lifetime intrinsics that limit the scope of inlined static allocas
47 /// to the inlined blocks.
48 /// - Inserts StackSave and StackRestore operations if dynamic allocas were
51 handleInlinedAllocas(Operation
*call
,
52 iterator_range
<Region::iterator
> inlinedBlocks
) {
53 Block
*calleeEntryBlock
= &(*inlinedBlocks
.begin());
54 Block
*callerEntryBlock
= &(*calleeEntryBlock
->getParent()->begin());
55 if (calleeEntryBlock
== callerEntryBlock
)
58 SmallVector
<std::tuple
<LLVM::AllocaOp
, IntegerAttr
, bool>> allocasToMove
;
59 bool shouldInsertLifetimes
= false;
60 bool hasDynamicAlloca
= false;
61 // Conservatively only move static alloca operations that are part of the
62 // entry block and do not inspect nested regions, since they may execute
63 // conditionally or have other unknown semantics.
64 for (auto allocaOp
: calleeEntryBlock
->getOps
<LLVM::AllocaOp
>()) {
65 IntegerAttr arraySize
;
66 if (!matchPattern(allocaOp
.getArraySize(), m_Constant(&arraySize
))) {
67 hasDynamicAlloca
= true;
70 bool shouldInsertLifetime
=
71 arraySize
.getValue() != 0 && !hasLifetimeMarkers(allocaOp
);
72 shouldInsertLifetimes
|= shouldInsertLifetime
;
73 allocasToMove
.emplace_back(allocaOp
, arraySize
, shouldInsertLifetime
);
75 // Check the remaining inlined blocks for dynamic allocas as well.
76 for (Block
&block
: llvm::drop_begin(inlinedBlocks
)) {
80 llvm::any_of(block
.getOps
<LLVM::AllocaOp
>(), [](auto allocaOp
) {
81 return !matchPattern(allocaOp
.getArraySize(), m_Constant());
84 if (allocasToMove
.empty() && !hasDynamicAlloca
)
86 OpBuilder
builder(calleeEntryBlock
, calleeEntryBlock
->begin());
88 if (hasDynamicAlloca
) {
89 // This may result in multiple stacksave/stackrestore intrinsics in the same
90 // scope if some are already present in the body of the caller. This is not
91 // invalid IR, but LLVM cleans these up in InstCombineCalls.cpp, along with
92 // other cases where the stacksave/stackrestore is redundant.
93 stackPtr
= builder
.create
<LLVM::StackSaveOp
>(
94 call
->getLoc(), LLVM::LLVMPointerType::get(call
->getContext()));
96 builder
.setInsertionPoint(callerEntryBlock
, callerEntryBlock
->begin());
97 for (auto &[allocaOp
, arraySize
, shouldInsertLifetime
] : allocasToMove
) {
98 auto newConstant
= builder
.create
<LLVM::ConstantOp
>(
99 allocaOp
->getLoc(), allocaOp
.getArraySize().getType(), arraySize
);
100 // Insert a lifetime start intrinsic where the alloca was before moving it.
101 if (shouldInsertLifetime
) {
102 OpBuilder::InsertionGuard
insertionGuard(builder
);
103 builder
.setInsertionPoint(allocaOp
);
104 builder
.create
<LLVM::LifetimeStartOp
>(
105 allocaOp
.getLoc(), arraySize
.getValue().getLimitedValue(),
106 allocaOp
.getResult());
108 allocaOp
->moveAfter(newConstant
);
109 allocaOp
.getArraySizeMutable().assign(newConstant
.getResult());
111 if (!shouldInsertLifetimes
&& !hasDynamicAlloca
)
113 // Insert a lifetime end intrinsic before each return in the callee function.
114 for (Block
&block
: inlinedBlocks
) {
115 if (!block
.getTerminator()->hasTrait
<OpTrait::ReturnLike
>())
117 builder
.setInsertionPoint(block
.getTerminator());
118 if (hasDynamicAlloca
)
119 builder
.create
<LLVM::StackRestoreOp
>(call
->getLoc(), stackPtr
);
120 for (auto &[allocaOp
, arraySize
, shouldInsertLifetime
] : allocasToMove
) {
121 if (shouldInsertLifetime
)
122 builder
.create
<LLVM::LifetimeEndOp
>(
123 allocaOp
.getLoc(), arraySize
.getValue().getLimitedValue(),
124 allocaOp
.getResult());
129 /// Maps all alias scopes in the inlined operations to deep clones of the scopes
130 /// and domain. This is required for code such as `foo(a, b); foo(a2, b2);` to
131 /// not incorrectly return `noalias` for e.g. operations on `a` and `a2`.
133 deepCloneAliasScopes(iterator_range
<Region::iterator
> inlinedBlocks
) {
134 DenseMap
<Attribute
, Attribute
> mapping
;
136 // Register handles in the walker to create the deep clones.
137 // The walker ensures that an attribute is only ever walked once and does a
138 // post-order walk, ensuring the domain is visited prior to the scope.
139 AttrTypeWalker walker
;
141 // Perform the deep clones while visiting. Builders create a distinct
142 // attribute to make sure that new instances are always created by the
144 walker
.addWalk([&](LLVM::AliasScopeDomainAttr domainAttr
) {
145 mapping
[domainAttr
] = LLVM::AliasScopeDomainAttr::get(
146 domainAttr
.getContext(), domainAttr
.getDescription());
149 walker
.addWalk([&](LLVM::AliasScopeAttr scopeAttr
) {
150 mapping
[scopeAttr
] = LLVM::AliasScopeAttr::get(
151 cast
<LLVM::AliasScopeDomainAttr
>(mapping
.lookup(scopeAttr
.getDomain())),
152 scopeAttr
.getDescription());
155 // Map an array of scopes to an array of deep clones.
156 auto convertScopeList
= [&](ArrayAttr arrayAttr
) -> ArrayAttr
{
160 // Create the deep clones if necessary.
161 walker
.walk(arrayAttr
);
163 return ArrayAttr::get(arrayAttr
.getContext(),
164 llvm::map_to_vector(arrayAttr
, [&](Attribute attr
) {
165 return mapping
.lookup(attr
);
169 for (Block
&block
: inlinedBlocks
) {
170 for (Operation
&op
: block
) {
171 if (auto aliasInterface
= dyn_cast
<LLVM::AliasAnalysisOpInterface
>(op
)) {
172 aliasInterface
.setAliasScopes(
173 convertScopeList(aliasInterface
.getAliasScopesOrNull()));
174 aliasInterface
.setNoAliasScopes(
175 convertScopeList(aliasInterface
.getNoAliasScopesOrNull()));
178 if (auto noAliasScope
= dyn_cast
<LLVM::NoAliasScopeDeclOp
>(op
)) {
179 // Create the deep clones if necessary.
180 walker
.walk(noAliasScope
.getScopeAttr());
182 noAliasScope
.setScopeAttr(cast
<LLVM::AliasScopeAttr
>(
183 mapping
.lookup(noAliasScope
.getScopeAttr())));
189 /// Creates a new ArrayAttr by concatenating `lhs` with `rhs`.
190 /// Returns null if both parameters are null. If only one attribute is null,
191 /// return the other.
192 static ArrayAttr
concatArrayAttr(ArrayAttr lhs
, ArrayAttr rhs
) {
198 SmallVector
<Attribute
> result
;
199 llvm::append_range(result
, lhs
);
200 llvm::append_range(result
, rhs
);
201 return ArrayAttr::get(lhs
.getContext(), result
);
204 /// Attempts to return the underlying pointer value that `pointerValue` is based
205 /// on. This traverses down the chain of operations to the last operation
206 /// producing the base pointer and returns it. If it encounters an operation it
207 /// cannot further traverse through, returns the operation's result.
208 static Value
getUnderlyingObject(Value pointerValue
) {
210 if (auto gepOp
= pointerValue
.getDefiningOp
<LLVM::GEPOp
>()) {
211 pointerValue
= gepOp
.getBase();
215 if (auto addrCast
= pointerValue
.getDefiningOp
<LLVM::AddrSpaceCastOp
>()) {
216 pointerValue
= addrCast
.getOperand();
226 /// Attempts to return the set of all underlying pointer values that
227 /// `pointerValue` is based on. This function traverses through select
228 /// operations and block arguments unlike getUnderlyingObject.
229 static SmallVector
<Value
> getUnderlyingObjectSet(Value pointerValue
) {
230 SmallVector
<Value
> result
;
232 SmallVector
<Value
> workList
{pointerValue
};
233 // Avoid dataflow loops.
234 SmallPtrSet
<Value
, 4> seen
;
236 Value current
= workList
.pop_back_val();
237 current
= getUnderlyingObject(current
);
239 if (!seen
.insert(current
).second
)
242 if (auto selectOp
= current
.getDefiningOp
<LLVM::SelectOp
>()) {
243 workList
.push_back(selectOp
.getTrueValue());
244 workList
.push_back(selectOp
.getFalseValue());
248 if (auto blockArg
= dyn_cast
<BlockArgument
>(current
)) {
249 Block
*parentBlock
= blockArg
.getParentBlock();
251 // Attempt to find all block argument operands for every predecessor.
252 // If any operand to the block argument wasn't found in a predecessor,
253 // conservatively add the block argument to the result set.
254 SmallVector
<Value
> operands
;
255 bool anyUnknown
= false;
256 for (auto iter
= parentBlock
->pred_begin();
257 iter
!= parentBlock
->pred_end(); iter
++) {
258 auto branch
= dyn_cast
<BranchOpInterface
>((*iter
)->getTerminator());
260 result
.push_back(blockArg
);
265 Value operand
= branch
.getSuccessorOperands(
266 iter
.getSuccessorIndex())[blockArg
.getArgNumber()];
268 result
.push_back(blockArg
);
273 operands
.push_back(operand
);
277 llvm::append_range(workList
, operands
);
282 result
.push_back(current
);
283 } while (!workList
.empty());
288 /// Creates a new AliasScopeAttr for every noalias parameter and attaches it to
289 /// the appropriate inlined memory operations in an attempt to preserve the
290 /// original semantics of the parameter attribute.
291 static void createNewAliasScopesFromNoAliasParameter(
292 Operation
*call
, iterator_range
<Region::iterator
> inlinedBlocks
) {
294 // First collect all noalias parameters. These have been specially marked by
295 // the `handleArgument` implementation by using the `ssa.copy` intrinsic and
296 // attaching a `noalias` attribute to it.
297 // These are only meant to be temporary and should therefore be deleted after
298 // we're done using them here.
299 SetVector
<LLVM::SSACopyOp
> noAliasParams
;
300 for (Value argument
: cast
<LLVM::CallOp
>(call
).getArgOperands()) {
301 for (Operation
*user
: argument
.getUsers()) {
302 auto ssaCopy
= llvm::dyn_cast
<LLVM::SSACopyOp
>(user
);
305 if (!ssaCopy
->hasAttr(LLVM::LLVMDialect::getNoAliasAttrName()))
308 noAliasParams
.insert(ssaCopy
);
312 // If there were none, we have nothing to do here.
313 if (noAliasParams
.empty())
316 // Scope exit block to make it impossible to forget to get rid of the
318 auto exit
= llvm::make_scope_exit([&] {
319 for (LLVM::SSACopyOp ssaCopyOp
: noAliasParams
) {
320 ssaCopyOp
.replaceAllUsesWith(ssaCopyOp
.getOperand());
325 // Create a new domain for this specific inlining and a new scope for every
326 // noalias parameter.
327 auto functionDomain
= LLVM::AliasScopeDomainAttr::get(
328 call
->getContext(), cast
<LLVM::CallOp
>(call
).getCalleeAttr().getAttr());
329 DenseMap
<Value
, LLVM::AliasScopeAttr
> pointerScopes
;
330 for (LLVM::SSACopyOp copyOp
: noAliasParams
) {
331 auto scope
= LLVM::AliasScopeAttr::get(functionDomain
);
332 pointerScopes
[copyOp
] = scope
;
334 OpBuilder(call
).create
<LLVM::NoAliasScopeDeclOp
>(call
->getLoc(), scope
);
337 // Go through every instruction and attempt to find which noalias parameters
338 // it is definitely based on and definitely not based on.
339 for (Block
&inlinedBlock
: inlinedBlocks
) {
340 for (auto aliasInterface
:
341 inlinedBlock
.getOps
<LLVM::AliasAnalysisOpInterface
>()) {
343 // Collect the pointer arguments affected by the alias scopes.
344 SmallVector
<Value
> pointerArgs
= aliasInterface
.getAccessedOperands();
346 // Find the set of underlying pointers that this pointer is based on.
347 SmallPtrSet
<Value
, 4> basedOnPointers
;
348 for (Value pointer
: pointerArgs
)
349 llvm::copy(getUnderlyingObjectSet(pointer
),
350 std::inserter(basedOnPointers
, basedOnPointers
.begin()));
352 bool aliasesOtherKnownObject
= false;
353 // Go through the based on pointers and check that they are either:
354 // * Constants that can be ignored (undef, poison, null pointer).
355 // * Based on a noalias parameter.
356 // * Other pointers that we know can't alias with our noalias parameter.
358 // Any other value might be a pointer based on any noalias parameter that
359 // hasn't been identified. In that case conservatively don't add any
360 // scopes to this operation indicating either aliasing or not aliasing
361 // with any parameter.
362 if (llvm::any_of(basedOnPointers
, [&](Value object
) {
363 if (matchPattern(object
, m_Constant()))
366 if (noAliasParams
.contains(object
.getDefiningOp
<LLVM::SSACopyOp
>()))
369 // TODO: This should include other arguments from the inlined
371 if (isa_and_nonnull
<LLVM::AllocaOp
, LLVM::AddressOfOp
>(
372 object
.getDefiningOp())) {
373 aliasesOtherKnownObject
= true;
380 // Add all noalias parameter scopes to the noalias scope list that we are
382 SmallVector
<Attribute
> noAliasScopes
;
383 for (LLVM::SSACopyOp noAlias
: noAliasParams
) {
384 if (basedOnPointers
.contains(noAlias
))
387 noAliasScopes
.push_back(pointerScopes
[noAlias
]);
390 if (!noAliasScopes
.empty())
391 aliasInterface
.setNoAliasScopes(
392 concatArrayAttr(aliasInterface
.getNoAliasScopesOrNull(),
393 ArrayAttr::get(call
->getContext(), noAliasScopes
)));
395 // Don't add alias scopes to call operations or operations that might
396 // operate on pointers not based on any noalias parameter.
397 // Since we add all scopes to an operation's noalias list that it
398 // definitely doesn't alias, we mustn't do the same for the alias.scope
399 // list if other objects are involved.
401 // Consider the following case:
403 // %1 = select %magic, %0, %noalias_param
404 // store 5, %1 (1) noalias=[scope(...)]
406 // store 3, %0 (2) noalias=[scope(noalias_param), scope(...)]
408 // We can add the scopes of any noalias parameters that aren't
409 // noalias_param's scope to (1) and add all of them to (2). We mustn't add
410 // the scope of noalias_param to the alias.scope list of (1) since
411 // that would mean (2) cannot alias with (1) which is wrong since both may
414 // In conclusion, only add scopes to the alias.scope list if all pointers
415 // have a corresponding scope.
416 // Call operations are included in this list since we do not know whether
417 // the callee accesses any memory besides the ones passed as its
419 if (aliasesOtherKnownObject
||
420 isa
<LLVM::CallOp
>(aliasInterface
.getOperation()))
423 SmallVector
<Attribute
> aliasScopes
;
424 for (LLVM::SSACopyOp noAlias
: noAliasParams
)
425 if (basedOnPointers
.contains(noAlias
))
426 aliasScopes
.push_back(pointerScopes
[noAlias
]);
428 if (!aliasScopes
.empty())
429 aliasInterface
.setAliasScopes(
430 concatArrayAttr(aliasInterface
.getAliasScopesOrNull(),
431 ArrayAttr::get(call
->getContext(), aliasScopes
)));
436 /// Appends any alias scopes of the call operation to any inlined memory
439 appendCallOpAliasScopes(Operation
*call
,
440 iterator_range
<Region::iterator
> inlinedBlocks
) {
441 auto callAliasInterface
= dyn_cast
<LLVM::AliasAnalysisOpInterface
>(call
);
442 if (!callAliasInterface
)
445 ArrayAttr aliasScopes
= callAliasInterface
.getAliasScopesOrNull();
446 ArrayAttr noAliasScopes
= callAliasInterface
.getNoAliasScopesOrNull();
447 // If the call has neither alias scopes or noalias scopes we have nothing to
449 if (!aliasScopes
&& !noAliasScopes
)
452 // Simply append the call op's alias and noalias scopes to any operation
453 // implementing AliasAnalysisOpInterface.
454 for (Block
&block
: inlinedBlocks
) {
455 for (auto aliasInterface
: block
.getOps
<LLVM::AliasAnalysisOpInterface
>()) {
457 aliasInterface
.setAliasScopes(concatArrayAttr(
458 aliasInterface
.getAliasScopesOrNull(), aliasScopes
));
461 aliasInterface
.setNoAliasScopes(concatArrayAttr(
462 aliasInterface
.getNoAliasScopesOrNull(), noAliasScopes
));
467 /// Handles all interactions with alias scopes during inlining.
468 static void handleAliasScopes(Operation
*call
,
469 iterator_range
<Region::iterator
> inlinedBlocks
) {
470 deepCloneAliasScopes(inlinedBlocks
);
471 createNewAliasScopesFromNoAliasParameter(call
, inlinedBlocks
);
472 appendCallOpAliasScopes(call
, inlinedBlocks
);
475 /// Appends any access groups of the call operation to any inlined memory
477 static void handleAccessGroups(Operation
*call
,
478 iterator_range
<Region::iterator
> inlinedBlocks
) {
479 auto callAccessGroupInterface
= dyn_cast
<LLVM::AccessGroupOpInterface
>(call
);
480 if (!callAccessGroupInterface
)
483 auto accessGroups
= callAccessGroupInterface
.getAccessGroupsOrNull();
487 // Simply append the call op's access groups to any operation implementing
488 // AccessGroupOpInterface.
489 for (Block
&block
: inlinedBlocks
)
490 for (auto accessGroupOpInterface
:
491 block
.getOps
<LLVM::AccessGroupOpInterface
>())
492 accessGroupOpInterface
.setAccessGroups(concatArrayAttr(
493 accessGroupOpInterface
.getAccessGroupsOrNull(), accessGroups
));
496 /// If `requestedAlignment` is higher than the alignment specified on `alloca`,
497 /// realigns `alloca` if this does not exceed the natural stack alignment.
498 /// Returns the post-alignment of `alloca`, whether it was realigned or not.
499 static unsigned tryToEnforceAllocaAlignment(LLVM::AllocaOp alloca
,
500 unsigned requestedAlignment
,
501 DataLayout
const &dataLayout
) {
502 unsigned allocaAlignment
= alloca
.getAlignment().value_or(1);
503 if (requestedAlignment
<= allocaAlignment
)
504 // No realignment necessary.
505 return allocaAlignment
;
506 unsigned naturalStackAlignmentBits
= dataLayout
.getStackAlignment();
507 // If the natural stack alignment is not specified, the data layout returns
508 // zero. Optimistically allow realignment in this case.
509 if (naturalStackAlignmentBits
== 0 ||
510 // If the requested alignment exceeds the natural stack alignment, this
511 // will trigger a dynamic stack realignment, so we prefer to copy...
512 8 * requestedAlignment
<= naturalStackAlignmentBits
||
513 // ...unless the alloca already triggers dynamic stack realignment. Then
514 // we might as well further increase the alignment to avoid a copy.
515 8 * allocaAlignment
> naturalStackAlignmentBits
) {
516 alloca
.setAlignment(requestedAlignment
);
517 allocaAlignment
= requestedAlignment
;
519 return allocaAlignment
;
522 /// Tries to find and return the alignment of the pointer `value` by looking for
523 /// an alignment attribute on the defining allocation op or function argument.
524 /// If the found alignment is lower than `requestedAlignment`, tries to realign
525 /// the pointer, then returns the resulting post-alignment, regardless of
526 /// whether it was realigned or not. If no existing alignment attribute is
527 /// found, returns 1 (i.e., assume that no alignment is guaranteed).
528 static unsigned tryToEnforceAlignment(Value value
, unsigned requestedAlignment
,
529 DataLayout
const &dataLayout
) {
530 if (Operation
*definingOp
= value
.getDefiningOp()) {
531 if (auto alloca
= dyn_cast
<LLVM::AllocaOp
>(definingOp
))
532 return tryToEnforceAllocaAlignment(alloca
, requestedAlignment
,
534 if (auto addressOf
= dyn_cast
<LLVM::AddressOfOp
>(definingOp
))
535 if (auto global
= SymbolTable::lookupNearestSymbolFrom
<LLVM::GlobalOp
>(
536 definingOp
, addressOf
.getGlobalNameAttr()))
537 return global
.getAlignment().value_or(1);
538 // We don't currently handle this operation; assume no alignment.
541 // Since there is no defining op, this is a block argument. Probably this
542 // comes directly from a function argument, so check that this is the case.
543 Operation
*parentOp
= value
.getParentBlock()->getParentOp();
544 if (auto func
= dyn_cast
<LLVM::LLVMFuncOp
>(parentOp
)) {
545 // Use the alignment attribute set for this argument in the parent function
546 // if it has been set.
547 auto blockArg
= llvm::cast
<BlockArgument
>(value
);
548 if (Attribute alignAttr
= func
.getArgAttr(
549 blockArg
.getArgNumber(), LLVM::LLVMDialect::getAlignAttrName()))
550 return cast
<IntegerAttr
>(alignAttr
).getValue().getLimitedValue();
552 // We didn't find anything useful; assume no alignment.
556 /// Introduces a new alloca and copies the memory pointed to by `argument` to
557 /// the address of the new alloca, then returns the value of the new alloca.
558 static Value
handleByValArgumentInit(OpBuilder
&builder
, Location loc
,
559 Value argument
, Type elementType
,
560 unsigned elementTypeSize
,
561 unsigned targetAlignment
) {
562 // Allocate the new value on the stack.
565 // Since this is a static alloca, we can put it directly in the entry block,
566 // so they can be absorbed into the prologue/epilogue at code generation.
567 OpBuilder::InsertionGuard
insertionGuard(builder
);
568 Block
*entryBlock
= &(*argument
.getParentRegion()->begin());
569 builder
.setInsertionPointToStart(entryBlock
);
570 Value one
= builder
.create
<LLVM::ConstantOp
>(loc
, builder
.getI64Type(),
571 builder
.getI64IntegerAttr(1));
572 allocaOp
= builder
.create
<LLVM::AllocaOp
>(
573 loc
, argument
.getType(), elementType
, one
, targetAlignment
);
575 // Copy the pointee to the newly allocated value.
576 Value copySize
= builder
.create
<LLVM::ConstantOp
>(
577 loc
, builder
.getI64Type(), builder
.getI64IntegerAttr(elementTypeSize
));
578 builder
.create
<LLVM::MemcpyOp
>(loc
, allocaOp
, argument
, copySize
,
579 /*isVolatile=*/false);
583 /// Handles a function argument marked with the byval attribute by introducing a
584 /// memcpy or realigning the defining operation, if required either due to the
585 /// pointee being writeable in the callee, and/or due to an alignment mismatch.
586 /// `requestedAlignment` specifies the alignment set in the "align" argument
587 /// attribute (or 1 if no align attribute was set).
588 static Value
handleByValArgument(OpBuilder
&builder
, Operation
*callable
,
589 Value argument
, Type elementType
,
590 unsigned requestedAlignment
) {
591 auto func
= cast
<LLVM::LLVMFuncOp
>(callable
);
592 LLVM::MemoryEffectsAttr memoryEffects
= func
.getMemoryAttr();
593 // If there is no memory effects attribute, assume that the function is
595 bool isReadOnly
= memoryEffects
&&
596 memoryEffects
.getArgMem() != LLVM::ModRefInfo::ModRef
&&
597 memoryEffects
.getArgMem() != LLVM::ModRefInfo::Mod
;
598 // Check if there's an alignment mismatch requiring us to copy.
599 DataLayout dataLayout
= DataLayout::closest(callable
);
600 unsigned minimumAlignment
= dataLayout
.getTypeABIAlignment(elementType
);
602 if (requestedAlignment
<= minimumAlignment
)
604 unsigned currentAlignment
=
605 tryToEnforceAlignment(argument
, requestedAlignment
, dataLayout
);
606 if (currentAlignment
>= requestedAlignment
)
609 unsigned targetAlignment
= std::max(requestedAlignment
, minimumAlignment
);
610 return handleByValArgumentInit(builder
, func
.getLoc(), argument
, elementType
,
611 dataLayout
.getTypeSize(elementType
),
616 struct LLVMInlinerInterface
: public DialectInlinerInterface
{
617 using DialectInlinerInterface::DialectInlinerInterface
;
619 LLVMInlinerInterface(Dialect
*dialect
)
620 : DialectInlinerInterface(dialect
),
621 // Cache set of StringAttrs for fast lookup in `isLegalToInline`.
622 disallowedFunctionAttrs({
623 StringAttr::get(dialect
->getContext(), "noduplicate"),
624 StringAttr::get(dialect
->getContext(), "noinline"),
625 StringAttr::get(dialect
->getContext(), "optnone"),
626 StringAttr::get(dialect
->getContext(), "presplitcoroutine"),
627 StringAttr::get(dialect
->getContext(), "returns_twice"),
628 StringAttr::get(dialect
->getContext(), "strictfp"),
631 bool isLegalToInline(Operation
*call
, Operation
*callable
,
632 bool wouldBeCloned
) const final
{
635 auto callOp
= dyn_cast
<LLVM::CallOp
>(call
);
637 LLVM_DEBUG(llvm::dbgs()
638 << "Cannot inline: call is not an LLVM::CallOp\n");
641 auto funcOp
= dyn_cast
<LLVM::LLVMFuncOp
>(callable
);
643 LLVM_DEBUG(llvm::dbgs()
644 << "Cannot inline: callable is not an LLVM::LLVMFuncOp\n");
647 // TODO: Generate aliasing metadata from noalias argument/result attributes.
648 if (auto attrs
= funcOp
.getArgAttrs()) {
649 for (DictionaryAttr attrDict
: attrs
->getAsRange
<DictionaryAttr
>()) {
650 if (attrDict
.contains(LLVM::LLVMDialect::getInAllocaAttrName())) {
651 LLVM_DEBUG(llvm::dbgs() << "Cannot inline " << funcOp
.getSymName()
652 << ": inalloca arguments not supported\n");
657 // TODO: Handle exceptions.
658 if (funcOp
.getPersonality()) {
659 LLVM_DEBUG(llvm::dbgs() << "Cannot inline " << funcOp
.getSymName()
660 << ": unhandled function personality\n");
663 if (funcOp
.getPassthrough()) {
664 // TODO: Used attributes should not be passthrough.
665 if (llvm::any_of(*funcOp
.getPassthrough(), [&](Attribute attr
) {
666 auto stringAttr
= dyn_cast
<StringAttr
>(attr
);
669 if (disallowedFunctionAttrs
.contains(stringAttr
)) {
670 LLVM_DEBUG(llvm::dbgs()
671 << "Cannot inline " << funcOp
.getSymName()
672 << ": found disallowed function attribute "
673 << stringAttr
<< "\n");
683 bool isLegalToInline(Region
*, Region
*, bool, IRMapping
&) const final
{
687 /// Conservative allowlist of operations supported so far.
688 bool isLegalToInline(Operation
*op
, Region
*, bool, IRMapping
&) const final
{
692 if (isa
<LLVM::AllocaOp
,
695 LLVM::AtomicCmpXchgOp
,
703 LLVM::LifetimeStartOp
,
706 LLVM::MemcpyInlineOp
,
709 LLVM::NoAliasScopeDeclOp
,
710 LLVM::StackRestoreOp
,
713 LLVM::UnreachableOp
>(op
))
716 LLVM_DEBUG(llvm::dbgs()
717 << "Cannot inline: unhandled side effecting operation \""
718 << op
->getName() << "\"\n");
722 /// Handle the given inlined return by replacing it with a branch. This
723 /// overload is called when the inlined region has more than one block.
724 void handleTerminator(Operation
*op
, Block
*newDest
) const final
{
725 // Only return needs to be handled here.
726 auto returnOp
= dyn_cast
<LLVM::ReturnOp
>(op
);
730 // Replace the return with a branch to the dest.
731 OpBuilder
builder(op
);
732 builder
.create
<LLVM::BrOp
>(op
->getLoc(), returnOp
.getOperands(), newDest
);
736 /// Handle the given inlined return by replacing the uses of the call with the
737 /// operands of the return. This overload is called when the inlined region
738 /// only contains one block.
739 void handleTerminator(Operation
*op
,
740 ArrayRef
<Value
> valuesToRepl
) const final
{
741 // Return will be the only terminator present.
742 auto returnOp
= cast
<LLVM::ReturnOp
>(op
);
744 // Replace the values directly with the return operands.
745 assert(returnOp
.getNumOperands() == valuesToRepl
.size());
746 for (const auto &[dst
, src
] :
747 llvm::zip(valuesToRepl
, returnOp
.getOperands()))
748 dst
.replaceAllUsesWith(src
);
751 Value
handleArgument(OpBuilder
&builder
, Operation
*call
, Operation
*callable
,
753 DictionaryAttr argumentAttrs
) const final
{
754 if (std::optional
<NamedAttribute
> attr
=
755 argumentAttrs
.getNamed(LLVM::LLVMDialect::getByValAttrName())) {
756 Type elementType
= cast
<TypeAttr
>(attr
->getValue()).getValue();
757 unsigned requestedAlignment
= 1;
758 if (std::optional
<NamedAttribute
> alignAttr
=
759 argumentAttrs
.getNamed(LLVM::LLVMDialect::getAlignAttrName())) {
760 requestedAlignment
= cast
<IntegerAttr
>(alignAttr
->getValue())
764 return handleByValArgument(builder
, callable
, argument
, elementType
,
767 if ([[maybe_unused
]] std::optional
<NamedAttribute
> attr
=
768 argumentAttrs
.getNamed(LLVM::LLVMDialect::getNoAliasAttrName())) {
769 if (argument
.use_empty())
772 // This code is essentially a workaround for deficiencies in the
773 // inliner interface: We need to transform operations *after* inlined
774 // based on the argument attributes of the parameters *before* inlining.
775 // This method runs prior to actual inlining and thus cannot transform the
776 // post-inlining code, while `processInlinedCallBlocks` does not have
777 // access to pre-inlining function arguments. Additionally, it is required
778 // to distinguish which parameter an SSA value originally came from.
779 // As a workaround until this is changed: Create an ssa.copy intrinsic
780 // with the noalias attribute that can easily be found, and is extremely
781 // unlikely to exist in the code prior to inlining, using this to
782 // communicate between this method and `processInlinedCallBlocks`.
783 // TODO: Fix this by refactoring the inliner interface.
784 auto copyOp
= builder
.create
<LLVM::SSACopyOp
>(call
->getLoc(), argument
);
785 copyOp
->setDiscardableAttr(
786 builder
.getStringAttr(LLVM::LLVMDialect::getNoAliasAttrName()),
787 builder
.getUnitAttr());
793 void processInlinedCallBlocks(
795 iterator_range
<Region::iterator
> inlinedBlocks
) const override
{
796 handleInlinedAllocas(call
, inlinedBlocks
);
797 handleAliasScopes(call
, inlinedBlocks
);
798 handleAccessGroups(call
, inlinedBlocks
);
801 // Keeping this (immutable) state on the interface allows us to look up
802 // StringAttrs instead of looking up strings, since StringAttrs are bound to
803 // the current context and thus cannot be initialized as static fields.
804 const DenseSet
<StringAttr
> disallowedFunctionAttrs
;
807 } // end anonymous namespace
809 void LLVM::detail::addLLVMInlinerInterface(LLVM::LLVMDialect
*dialect
) {
810 dialect
->addInterfaces
<LLVMInlinerInterface
>();