[Workflow] Roll back some settings since they caused more issues
[llvm-project.git] / mlir / lib / Dialect / LLVMIR / IR / LLVMInlining.cpp
blobb40be73ff21f7031017ae4c45023a0da8fa59efb
1 //===- LLVMInlining.cpp - LLVM inlining interface and logic -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Logic for inlining LLVM functions and the definition of the
10 // LLVMInliningInterface.
12 //===----------------------------------------------------------------------===//
14 #include "LLVMInlining.h"
15 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
16 #include "mlir/IR/Matchers.h"
17 #include "mlir/Interfaces/DataLayoutInterfaces.h"
18 #include "mlir/Transforms/InliningUtils.h"
19 #include "llvm/ADT/ScopeExit.h"
20 #include "llvm/Support/Debug.h"
22 #define DEBUG_TYPE "llvm-inliner"
24 using namespace mlir;
26 /// Check whether the given alloca is an input to a lifetime intrinsic,
27 /// optionally passing through one or more casts on the way. This is not
28 /// transitive through block arguments.
29 static bool hasLifetimeMarkers(LLVM::AllocaOp allocaOp) {
30 SmallVector<Operation *> stack(allocaOp->getUsers().begin(),
31 allocaOp->getUsers().end());
32 while (!stack.empty()) {
33 Operation *op = stack.pop_back_val();
34 if (isa<LLVM::LifetimeStartOp, LLVM::LifetimeEndOp>(op))
35 return true;
36 if (isa<LLVM::BitcastOp>(op))
37 stack.append(op->getUsers().begin(), op->getUsers().end());
39 return false;
42 /// Handles alloca operations in the inlined blocks:
43 /// - Moves all alloca operations with a constant size in the former entry block
44 /// of the callee into the entry block of the caller, so they become part of
45 /// the function prologue/epilogue during code generation.
46 /// - Inserts lifetime intrinsics that limit the scope of inlined static allocas
47 /// to the inlined blocks.
48 /// - Inserts StackSave and StackRestore operations if dynamic allocas were
49 /// inlined.
50 static void
51 handleInlinedAllocas(Operation *call,
52 iterator_range<Region::iterator> inlinedBlocks) {
53 Block *calleeEntryBlock = &(*inlinedBlocks.begin());
54 Block *callerEntryBlock = &(*calleeEntryBlock->getParent()->begin());
55 if (calleeEntryBlock == callerEntryBlock)
56 // Nothing to do.
57 return;
58 SmallVector<std::tuple<LLVM::AllocaOp, IntegerAttr, bool>> allocasToMove;
59 bool shouldInsertLifetimes = false;
60 bool hasDynamicAlloca = false;
61 // Conservatively only move static alloca operations that are part of the
62 // entry block and do not inspect nested regions, since they may execute
63 // conditionally or have other unknown semantics.
64 for (auto allocaOp : calleeEntryBlock->getOps<LLVM::AllocaOp>()) {
65 IntegerAttr arraySize;
66 if (!matchPattern(allocaOp.getArraySize(), m_Constant(&arraySize))) {
67 hasDynamicAlloca = true;
68 continue;
70 bool shouldInsertLifetime =
71 arraySize.getValue() != 0 && !hasLifetimeMarkers(allocaOp);
72 shouldInsertLifetimes |= shouldInsertLifetime;
73 allocasToMove.emplace_back(allocaOp, arraySize, shouldInsertLifetime);
75 // Check the remaining inlined blocks for dynamic allocas as well.
76 for (Block &block : llvm::drop_begin(inlinedBlocks)) {
77 if (hasDynamicAlloca)
78 break;
79 hasDynamicAlloca =
80 llvm::any_of(block.getOps<LLVM::AllocaOp>(), [](auto allocaOp) {
81 return !matchPattern(allocaOp.getArraySize(), m_Constant());
82 });
84 if (allocasToMove.empty() && !hasDynamicAlloca)
85 return;
86 OpBuilder builder(calleeEntryBlock, calleeEntryBlock->begin());
87 Value stackPtr;
88 if (hasDynamicAlloca) {
89 // This may result in multiple stacksave/stackrestore intrinsics in the same
90 // scope if some are already present in the body of the caller. This is not
91 // invalid IR, but LLVM cleans these up in InstCombineCalls.cpp, along with
92 // other cases where the stacksave/stackrestore is redundant.
93 stackPtr = builder.create<LLVM::StackSaveOp>(
94 call->getLoc(), LLVM::LLVMPointerType::get(call->getContext()));
96 builder.setInsertionPoint(callerEntryBlock, callerEntryBlock->begin());
97 for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) {
98 auto newConstant = builder.create<LLVM::ConstantOp>(
99 allocaOp->getLoc(), allocaOp.getArraySize().getType(), arraySize);
100 // Insert a lifetime start intrinsic where the alloca was before moving it.
101 if (shouldInsertLifetime) {
102 OpBuilder::InsertionGuard insertionGuard(builder);
103 builder.setInsertionPoint(allocaOp);
104 builder.create<LLVM::LifetimeStartOp>(
105 allocaOp.getLoc(), arraySize.getValue().getLimitedValue(),
106 allocaOp.getResult());
108 allocaOp->moveAfter(newConstant);
109 allocaOp.getArraySizeMutable().assign(newConstant.getResult());
111 if (!shouldInsertLifetimes && !hasDynamicAlloca)
112 return;
113 // Insert a lifetime end intrinsic before each return in the callee function.
114 for (Block &block : inlinedBlocks) {
115 if (!block.getTerminator()->hasTrait<OpTrait::ReturnLike>())
116 continue;
117 builder.setInsertionPoint(block.getTerminator());
118 if (hasDynamicAlloca)
119 builder.create<LLVM::StackRestoreOp>(call->getLoc(), stackPtr);
120 for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) {
121 if (shouldInsertLifetime)
122 builder.create<LLVM::LifetimeEndOp>(
123 allocaOp.getLoc(), arraySize.getValue().getLimitedValue(),
124 allocaOp.getResult());
129 /// Maps all alias scopes in the inlined operations to deep clones of the scopes
130 /// and domain. This is required for code such as `foo(a, b); foo(a2, b2);` to
131 /// not incorrectly return `noalias` for e.g. operations on `a` and `a2`.
132 static void
133 deepCloneAliasScopes(iterator_range<Region::iterator> inlinedBlocks) {
134 DenseMap<Attribute, Attribute> mapping;
136 // Register handles in the walker to create the deep clones.
137 // The walker ensures that an attribute is only ever walked once and does a
138 // post-order walk, ensuring the domain is visited prior to the scope.
139 AttrTypeWalker walker;
141 // Perform the deep clones while visiting. Builders create a distinct
142 // attribute to make sure that new instances are always created by the
143 // uniquer.
144 walker.addWalk([&](LLVM::AliasScopeDomainAttr domainAttr) {
145 mapping[domainAttr] = LLVM::AliasScopeDomainAttr::get(
146 domainAttr.getContext(), domainAttr.getDescription());
149 walker.addWalk([&](LLVM::AliasScopeAttr scopeAttr) {
150 mapping[scopeAttr] = LLVM::AliasScopeAttr::get(
151 cast<LLVM::AliasScopeDomainAttr>(mapping.lookup(scopeAttr.getDomain())),
152 scopeAttr.getDescription());
155 // Map an array of scopes to an array of deep clones.
156 auto convertScopeList = [&](ArrayAttr arrayAttr) -> ArrayAttr {
157 if (!arrayAttr)
158 return nullptr;
160 // Create the deep clones if necessary.
161 walker.walk(arrayAttr);
163 return ArrayAttr::get(arrayAttr.getContext(),
164 llvm::map_to_vector(arrayAttr, [&](Attribute attr) {
165 return mapping.lookup(attr);
166 }));
169 for (Block &block : inlinedBlocks) {
170 for (Operation &op : block) {
171 if (auto aliasInterface = dyn_cast<LLVM::AliasAnalysisOpInterface>(op)) {
172 aliasInterface.setAliasScopes(
173 convertScopeList(aliasInterface.getAliasScopesOrNull()));
174 aliasInterface.setNoAliasScopes(
175 convertScopeList(aliasInterface.getNoAliasScopesOrNull()));
178 if (auto noAliasScope = dyn_cast<LLVM::NoAliasScopeDeclOp>(op)) {
179 // Create the deep clones if necessary.
180 walker.walk(noAliasScope.getScopeAttr());
182 noAliasScope.setScopeAttr(cast<LLVM::AliasScopeAttr>(
183 mapping.lookup(noAliasScope.getScopeAttr())));
189 /// Creates a new ArrayAttr by concatenating `lhs` with `rhs`.
190 /// Returns null if both parameters are null. If only one attribute is null,
191 /// return the other.
192 static ArrayAttr concatArrayAttr(ArrayAttr lhs, ArrayAttr rhs) {
193 if (!lhs)
194 return rhs;
195 if (!rhs)
196 return lhs;
198 SmallVector<Attribute> result;
199 llvm::append_range(result, lhs);
200 llvm::append_range(result, rhs);
201 return ArrayAttr::get(lhs.getContext(), result);
204 /// Attempts to return the underlying pointer value that `pointerValue` is based
205 /// on. This traverses down the chain of operations to the last operation
206 /// producing the base pointer and returns it. If it encounters an operation it
207 /// cannot further traverse through, returns the operation's result.
208 static Value getUnderlyingObject(Value pointerValue) {
209 while (true) {
210 if (auto gepOp = pointerValue.getDefiningOp<LLVM::GEPOp>()) {
211 pointerValue = gepOp.getBase();
212 continue;
215 if (auto addrCast = pointerValue.getDefiningOp<LLVM::AddrSpaceCastOp>()) {
216 pointerValue = addrCast.getOperand();
217 continue;
220 break;
223 return pointerValue;
226 /// Attempts to return the set of all underlying pointer values that
227 /// `pointerValue` is based on. This function traverses through select
228 /// operations and block arguments unlike getUnderlyingObject.
229 static SmallVector<Value> getUnderlyingObjectSet(Value pointerValue) {
230 SmallVector<Value> result;
232 SmallVector<Value> workList{pointerValue};
233 // Avoid dataflow loops.
234 SmallPtrSet<Value, 4> seen;
235 do {
236 Value current = workList.pop_back_val();
237 current = getUnderlyingObject(current);
239 if (!seen.insert(current).second)
240 continue;
242 if (auto selectOp = current.getDefiningOp<LLVM::SelectOp>()) {
243 workList.push_back(selectOp.getTrueValue());
244 workList.push_back(selectOp.getFalseValue());
245 continue;
248 if (auto blockArg = dyn_cast<BlockArgument>(current)) {
249 Block *parentBlock = blockArg.getParentBlock();
251 // Attempt to find all block argument operands for every predecessor.
252 // If any operand to the block argument wasn't found in a predecessor,
253 // conservatively add the block argument to the result set.
254 SmallVector<Value> operands;
255 bool anyUnknown = false;
256 for (auto iter = parentBlock->pred_begin();
257 iter != parentBlock->pred_end(); iter++) {
258 auto branch = dyn_cast<BranchOpInterface>((*iter)->getTerminator());
259 if (!branch) {
260 result.push_back(blockArg);
261 anyUnknown = true;
262 break;
265 Value operand = branch.getSuccessorOperands(
266 iter.getSuccessorIndex())[blockArg.getArgNumber()];
267 if (!operand) {
268 result.push_back(blockArg);
269 anyUnknown = true;
270 break;
273 operands.push_back(operand);
276 if (!anyUnknown)
277 llvm::append_range(workList, operands);
279 continue;
282 result.push_back(current);
283 } while (!workList.empty());
285 return result;
288 /// Creates a new AliasScopeAttr for every noalias parameter and attaches it to
289 /// the appropriate inlined memory operations in an attempt to preserve the
290 /// original semantics of the parameter attribute.
291 static void createNewAliasScopesFromNoAliasParameter(
292 Operation *call, iterator_range<Region::iterator> inlinedBlocks) {
294 // First collect all noalias parameters. These have been specially marked by
295 // the `handleArgument` implementation by using the `ssa.copy` intrinsic and
296 // attaching a `noalias` attribute to it.
297 // These are only meant to be temporary and should therefore be deleted after
298 // we're done using them here.
299 SetVector<LLVM::SSACopyOp> noAliasParams;
300 for (Value argument : cast<LLVM::CallOp>(call).getArgOperands()) {
301 for (Operation *user : argument.getUsers()) {
302 auto ssaCopy = llvm::dyn_cast<LLVM::SSACopyOp>(user);
303 if (!ssaCopy)
304 continue;
305 if (!ssaCopy->hasAttr(LLVM::LLVMDialect::getNoAliasAttrName()))
306 continue;
308 noAliasParams.insert(ssaCopy);
312 // If there were none, we have nothing to do here.
313 if (noAliasParams.empty())
314 return;
316 // Scope exit block to make it impossible to forget to get rid of the
317 // intrinsics.
318 auto exit = llvm::make_scope_exit([&] {
319 for (LLVM::SSACopyOp ssaCopyOp : noAliasParams) {
320 ssaCopyOp.replaceAllUsesWith(ssaCopyOp.getOperand());
321 ssaCopyOp->erase();
325 // Create a new domain for this specific inlining and a new scope for every
326 // noalias parameter.
327 auto functionDomain = LLVM::AliasScopeDomainAttr::get(
328 call->getContext(), cast<LLVM::CallOp>(call).getCalleeAttr().getAttr());
329 DenseMap<Value, LLVM::AliasScopeAttr> pointerScopes;
330 for (LLVM::SSACopyOp copyOp : noAliasParams) {
331 auto scope = LLVM::AliasScopeAttr::get(functionDomain);
332 pointerScopes[copyOp] = scope;
334 OpBuilder(call).create<LLVM::NoAliasScopeDeclOp>(call->getLoc(), scope);
337 // Go through every instruction and attempt to find which noalias parameters
338 // it is definitely based on and definitely not based on.
339 for (Block &inlinedBlock : inlinedBlocks) {
340 for (auto aliasInterface :
341 inlinedBlock.getOps<LLVM::AliasAnalysisOpInterface>()) {
343 // Collect the pointer arguments affected by the alias scopes.
344 SmallVector<Value> pointerArgs = aliasInterface.getAccessedOperands();
346 // Find the set of underlying pointers that this pointer is based on.
347 SmallPtrSet<Value, 4> basedOnPointers;
348 for (Value pointer : pointerArgs)
349 llvm::copy(getUnderlyingObjectSet(pointer),
350 std::inserter(basedOnPointers, basedOnPointers.begin()));
352 bool aliasesOtherKnownObject = false;
353 // Go through the based on pointers and check that they are either:
354 // * Constants that can be ignored (undef, poison, null pointer).
355 // * Based on a noalias parameter.
356 // * Other pointers that we know can't alias with our noalias parameter.
358 // Any other value might be a pointer based on any noalias parameter that
359 // hasn't been identified. In that case conservatively don't add any
360 // scopes to this operation indicating either aliasing or not aliasing
361 // with any parameter.
362 if (llvm::any_of(basedOnPointers, [&](Value object) {
363 if (matchPattern(object, m_Constant()))
364 return false;
366 if (noAliasParams.contains(object.getDefiningOp<LLVM::SSACopyOp>()))
367 return false;
369 // TODO: This should include other arguments from the inlined
370 // callable.
371 if (isa_and_nonnull<LLVM::AllocaOp, LLVM::AddressOfOp>(
372 object.getDefiningOp())) {
373 aliasesOtherKnownObject = true;
374 return false;
376 return true;
378 continue;
380 // Add all noalias parameter scopes to the noalias scope list that we are
381 // not based on.
382 SmallVector<Attribute> noAliasScopes;
383 for (LLVM::SSACopyOp noAlias : noAliasParams) {
384 if (basedOnPointers.contains(noAlias))
385 continue;
387 noAliasScopes.push_back(pointerScopes[noAlias]);
390 if (!noAliasScopes.empty())
391 aliasInterface.setNoAliasScopes(
392 concatArrayAttr(aliasInterface.getNoAliasScopesOrNull(),
393 ArrayAttr::get(call->getContext(), noAliasScopes)));
395 // Don't add alias scopes to call operations or operations that might
396 // operate on pointers not based on any noalias parameter.
397 // Since we add all scopes to an operation's noalias list that it
398 // definitely doesn't alias, we mustn't do the same for the alias.scope
399 // list if other objects are involved.
401 // Consider the following case:
402 // %0 = llvm.alloca
403 // %1 = select %magic, %0, %noalias_param
404 // store 5, %1 (1) noalias=[scope(...)]
405 // ...
406 // store 3, %0 (2) noalias=[scope(noalias_param), scope(...)]
408 // We can add the scopes of any noalias parameters that aren't
409 // noalias_param's scope to (1) and add all of them to (2). We mustn't add
410 // the scope of noalias_param to the alias.scope list of (1) since
411 // that would mean (2) cannot alias with (1) which is wrong since both may
412 // store to %0.
414 // In conclusion, only add scopes to the alias.scope list if all pointers
415 // have a corresponding scope.
416 // Call operations are included in this list since we do not know whether
417 // the callee accesses any memory besides the ones passed as its
418 // arguments.
419 if (aliasesOtherKnownObject ||
420 isa<LLVM::CallOp>(aliasInterface.getOperation()))
421 continue;
423 SmallVector<Attribute> aliasScopes;
424 for (LLVM::SSACopyOp noAlias : noAliasParams)
425 if (basedOnPointers.contains(noAlias))
426 aliasScopes.push_back(pointerScopes[noAlias]);
428 if (!aliasScopes.empty())
429 aliasInterface.setAliasScopes(
430 concatArrayAttr(aliasInterface.getAliasScopesOrNull(),
431 ArrayAttr::get(call->getContext(), aliasScopes)));
436 /// Appends any alias scopes of the call operation to any inlined memory
437 /// operation.
438 static void
439 appendCallOpAliasScopes(Operation *call,
440 iterator_range<Region::iterator> inlinedBlocks) {
441 auto callAliasInterface = dyn_cast<LLVM::AliasAnalysisOpInterface>(call);
442 if (!callAliasInterface)
443 return;
445 ArrayAttr aliasScopes = callAliasInterface.getAliasScopesOrNull();
446 ArrayAttr noAliasScopes = callAliasInterface.getNoAliasScopesOrNull();
447 // If the call has neither alias scopes or noalias scopes we have nothing to
448 // do here.
449 if (!aliasScopes && !noAliasScopes)
450 return;
452 // Simply append the call op's alias and noalias scopes to any operation
453 // implementing AliasAnalysisOpInterface.
454 for (Block &block : inlinedBlocks) {
455 for (auto aliasInterface : block.getOps<LLVM::AliasAnalysisOpInterface>()) {
456 if (aliasScopes)
457 aliasInterface.setAliasScopes(concatArrayAttr(
458 aliasInterface.getAliasScopesOrNull(), aliasScopes));
460 if (noAliasScopes)
461 aliasInterface.setNoAliasScopes(concatArrayAttr(
462 aliasInterface.getNoAliasScopesOrNull(), noAliasScopes));
467 /// Handles all interactions with alias scopes during inlining.
468 static void handleAliasScopes(Operation *call,
469 iterator_range<Region::iterator> inlinedBlocks) {
470 deepCloneAliasScopes(inlinedBlocks);
471 createNewAliasScopesFromNoAliasParameter(call, inlinedBlocks);
472 appendCallOpAliasScopes(call, inlinedBlocks);
475 /// Appends any access groups of the call operation to any inlined memory
476 /// operation.
477 static void handleAccessGroups(Operation *call,
478 iterator_range<Region::iterator> inlinedBlocks) {
479 auto callAccessGroupInterface = dyn_cast<LLVM::AccessGroupOpInterface>(call);
480 if (!callAccessGroupInterface)
481 return;
483 auto accessGroups = callAccessGroupInterface.getAccessGroupsOrNull();
484 if (!accessGroups)
485 return;
487 // Simply append the call op's access groups to any operation implementing
488 // AccessGroupOpInterface.
489 for (Block &block : inlinedBlocks)
490 for (auto accessGroupOpInterface :
491 block.getOps<LLVM::AccessGroupOpInterface>())
492 accessGroupOpInterface.setAccessGroups(concatArrayAttr(
493 accessGroupOpInterface.getAccessGroupsOrNull(), accessGroups));
496 /// If `requestedAlignment` is higher than the alignment specified on `alloca`,
497 /// realigns `alloca` if this does not exceed the natural stack alignment.
498 /// Returns the post-alignment of `alloca`, whether it was realigned or not.
499 static unsigned tryToEnforceAllocaAlignment(LLVM::AllocaOp alloca,
500 unsigned requestedAlignment,
501 DataLayout const &dataLayout) {
502 unsigned allocaAlignment = alloca.getAlignment().value_or(1);
503 if (requestedAlignment <= allocaAlignment)
504 // No realignment necessary.
505 return allocaAlignment;
506 unsigned naturalStackAlignmentBits = dataLayout.getStackAlignment();
507 // If the natural stack alignment is not specified, the data layout returns
508 // zero. Optimistically allow realignment in this case.
509 if (naturalStackAlignmentBits == 0 ||
510 // If the requested alignment exceeds the natural stack alignment, this
511 // will trigger a dynamic stack realignment, so we prefer to copy...
512 8 * requestedAlignment <= naturalStackAlignmentBits ||
513 // ...unless the alloca already triggers dynamic stack realignment. Then
514 // we might as well further increase the alignment to avoid a copy.
515 8 * allocaAlignment > naturalStackAlignmentBits) {
516 alloca.setAlignment(requestedAlignment);
517 allocaAlignment = requestedAlignment;
519 return allocaAlignment;
522 /// Tries to find and return the alignment of the pointer `value` by looking for
523 /// an alignment attribute on the defining allocation op or function argument.
524 /// If the found alignment is lower than `requestedAlignment`, tries to realign
525 /// the pointer, then returns the resulting post-alignment, regardless of
526 /// whether it was realigned or not. If no existing alignment attribute is
527 /// found, returns 1 (i.e., assume that no alignment is guaranteed).
528 static unsigned tryToEnforceAlignment(Value value, unsigned requestedAlignment,
529 DataLayout const &dataLayout) {
530 if (Operation *definingOp = value.getDefiningOp()) {
531 if (auto alloca = dyn_cast<LLVM::AllocaOp>(definingOp))
532 return tryToEnforceAllocaAlignment(alloca, requestedAlignment,
533 dataLayout);
534 if (auto addressOf = dyn_cast<LLVM::AddressOfOp>(definingOp))
535 if (auto global = SymbolTable::lookupNearestSymbolFrom<LLVM::GlobalOp>(
536 definingOp, addressOf.getGlobalNameAttr()))
537 return global.getAlignment().value_or(1);
538 // We don't currently handle this operation; assume no alignment.
539 return 1;
541 // Since there is no defining op, this is a block argument. Probably this
542 // comes directly from a function argument, so check that this is the case.
543 Operation *parentOp = value.getParentBlock()->getParentOp();
544 if (auto func = dyn_cast<LLVM::LLVMFuncOp>(parentOp)) {
545 // Use the alignment attribute set for this argument in the parent function
546 // if it has been set.
547 auto blockArg = llvm::cast<BlockArgument>(value);
548 if (Attribute alignAttr = func.getArgAttr(
549 blockArg.getArgNumber(), LLVM::LLVMDialect::getAlignAttrName()))
550 return cast<IntegerAttr>(alignAttr).getValue().getLimitedValue();
552 // We didn't find anything useful; assume no alignment.
553 return 1;
556 /// Introduces a new alloca and copies the memory pointed to by `argument` to
557 /// the address of the new alloca, then returns the value of the new alloca.
558 static Value handleByValArgumentInit(OpBuilder &builder, Location loc,
559 Value argument, Type elementType,
560 unsigned elementTypeSize,
561 unsigned targetAlignment) {
562 // Allocate the new value on the stack.
563 Value allocaOp;
565 // Since this is a static alloca, we can put it directly in the entry block,
566 // so they can be absorbed into the prologue/epilogue at code generation.
567 OpBuilder::InsertionGuard insertionGuard(builder);
568 Block *entryBlock = &(*argument.getParentRegion()->begin());
569 builder.setInsertionPointToStart(entryBlock);
570 Value one = builder.create<LLVM::ConstantOp>(loc, builder.getI64Type(),
571 builder.getI64IntegerAttr(1));
572 allocaOp = builder.create<LLVM::AllocaOp>(
573 loc, argument.getType(), elementType, one, targetAlignment);
575 // Copy the pointee to the newly allocated value.
576 Value copySize = builder.create<LLVM::ConstantOp>(
577 loc, builder.getI64Type(), builder.getI64IntegerAttr(elementTypeSize));
578 builder.create<LLVM::MemcpyOp>(loc, allocaOp, argument, copySize,
579 /*isVolatile=*/false);
580 return allocaOp;
583 /// Handles a function argument marked with the byval attribute by introducing a
584 /// memcpy or realigning the defining operation, if required either due to the
585 /// pointee being writeable in the callee, and/or due to an alignment mismatch.
586 /// `requestedAlignment` specifies the alignment set in the "align" argument
587 /// attribute (or 1 if no align attribute was set).
588 static Value handleByValArgument(OpBuilder &builder, Operation *callable,
589 Value argument, Type elementType,
590 unsigned requestedAlignment) {
591 auto func = cast<LLVM::LLVMFuncOp>(callable);
592 LLVM::MemoryEffectsAttr memoryEffects = func.getMemoryAttr();
593 // If there is no memory effects attribute, assume that the function is
594 // not read-only.
595 bool isReadOnly = memoryEffects &&
596 memoryEffects.getArgMem() != LLVM::ModRefInfo::ModRef &&
597 memoryEffects.getArgMem() != LLVM::ModRefInfo::Mod;
598 // Check if there's an alignment mismatch requiring us to copy.
599 DataLayout dataLayout = DataLayout::closest(callable);
600 unsigned minimumAlignment = dataLayout.getTypeABIAlignment(elementType);
601 if (isReadOnly) {
602 if (requestedAlignment <= minimumAlignment)
603 return argument;
604 unsigned currentAlignment =
605 tryToEnforceAlignment(argument, requestedAlignment, dataLayout);
606 if (currentAlignment >= requestedAlignment)
607 return argument;
609 unsigned targetAlignment = std::max(requestedAlignment, minimumAlignment);
610 return handleByValArgumentInit(builder, func.getLoc(), argument, elementType,
611 dataLayout.getTypeSize(elementType),
612 targetAlignment);
615 namespace {
616 struct LLVMInlinerInterface : public DialectInlinerInterface {
617 using DialectInlinerInterface::DialectInlinerInterface;
619 LLVMInlinerInterface(Dialect *dialect)
620 : DialectInlinerInterface(dialect),
621 // Cache set of StringAttrs for fast lookup in `isLegalToInline`.
622 disallowedFunctionAttrs({
623 StringAttr::get(dialect->getContext(), "noduplicate"),
624 StringAttr::get(dialect->getContext(), "noinline"),
625 StringAttr::get(dialect->getContext(), "optnone"),
626 StringAttr::get(dialect->getContext(), "presplitcoroutine"),
627 StringAttr::get(dialect->getContext(), "returns_twice"),
628 StringAttr::get(dialect->getContext(), "strictfp"),
629 }) {}
631 bool isLegalToInline(Operation *call, Operation *callable,
632 bool wouldBeCloned) const final {
633 if (!wouldBeCloned)
634 return false;
635 auto callOp = dyn_cast<LLVM::CallOp>(call);
636 if (!callOp) {
637 LLVM_DEBUG(llvm::dbgs()
638 << "Cannot inline: call is not an LLVM::CallOp\n");
639 return false;
641 auto funcOp = dyn_cast<LLVM::LLVMFuncOp>(callable);
642 if (!funcOp) {
643 LLVM_DEBUG(llvm::dbgs()
644 << "Cannot inline: callable is not an LLVM::LLVMFuncOp\n");
645 return false;
647 // TODO: Generate aliasing metadata from noalias argument/result attributes.
648 if (auto attrs = funcOp.getArgAttrs()) {
649 for (DictionaryAttr attrDict : attrs->getAsRange<DictionaryAttr>()) {
650 if (attrDict.contains(LLVM::LLVMDialect::getInAllocaAttrName())) {
651 LLVM_DEBUG(llvm::dbgs() << "Cannot inline " << funcOp.getSymName()
652 << ": inalloca arguments not supported\n");
653 return false;
657 // TODO: Handle exceptions.
658 if (funcOp.getPersonality()) {
659 LLVM_DEBUG(llvm::dbgs() << "Cannot inline " << funcOp.getSymName()
660 << ": unhandled function personality\n");
661 return false;
663 if (funcOp.getPassthrough()) {
664 // TODO: Used attributes should not be passthrough.
665 if (llvm::any_of(*funcOp.getPassthrough(), [&](Attribute attr) {
666 auto stringAttr = dyn_cast<StringAttr>(attr);
667 if (!stringAttr)
668 return false;
669 if (disallowedFunctionAttrs.contains(stringAttr)) {
670 LLVM_DEBUG(llvm::dbgs()
671 << "Cannot inline " << funcOp.getSymName()
672 << ": found disallowed function attribute "
673 << stringAttr << "\n");
674 return true;
676 return false;
678 return false;
680 return true;
683 bool isLegalToInline(Region *, Region *, bool, IRMapping &) const final {
684 return true;
687 /// Conservative allowlist of operations supported so far.
688 bool isLegalToInline(Operation *op, Region *, bool, IRMapping &) const final {
689 if (isPure(op))
690 return true;
691 // clang-format off
692 if (isa<LLVM::AllocaOp,
693 LLVM::AssumeOp,
694 LLVM::AtomicRMWOp,
695 LLVM::AtomicCmpXchgOp,
696 LLVM::CallOp,
697 LLVM::DbgDeclareOp,
698 LLVM::DbgLabelOp,
699 LLVM::DbgValueOp,
700 LLVM::FenceOp,
701 LLVM::InlineAsmOp,
702 LLVM::LifetimeEndOp,
703 LLVM::LifetimeStartOp,
704 LLVM::LoadOp,
705 LLVM::MemcpyOp,
706 LLVM::MemcpyInlineOp,
707 LLVM::MemmoveOp,
708 LLVM::MemsetOp,
709 LLVM::NoAliasScopeDeclOp,
710 LLVM::StackRestoreOp,
711 LLVM::StackSaveOp,
712 LLVM::StoreOp,
713 LLVM::UnreachableOp>(op))
714 return true;
715 // clang-format on
716 LLVM_DEBUG(llvm::dbgs()
717 << "Cannot inline: unhandled side effecting operation \""
718 << op->getName() << "\"\n");
719 return false;
722 /// Handle the given inlined return by replacing it with a branch. This
723 /// overload is called when the inlined region has more than one block.
724 void handleTerminator(Operation *op, Block *newDest) const final {
725 // Only return needs to be handled here.
726 auto returnOp = dyn_cast<LLVM::ReturnOp>(op);
727 if (!returnOp)
728 return;
730 // Replace the return with a branch to the dest.
731 OpBuilder builder(op);
732 builder.create<LLVM::BrOp>(op->getLoc(), returnOp.getOperands(), newDest);
733 op->erase();
736 /// Handle the given inlined return by replacing the uses of the call with the
737 /// operands of the return. This overload is called when the inlined region
738 /// only contains one block.
739 void handleTerminator(Operation *op,
740 ArrayRef<Value> valuesToRepl) const final {
741 // Return will be the only terminator present.
742 auto returnOp = cast<LLVM::ReturnOp>(op);
744 // Replace the values directly with the return operands.
745 assert(returnOp.getNumOperands() == valuesToRepl.size());
746 for (const auto &[dst, src] :
747 llvm::zip(valuesToRepl, returnOp.getOperands()))
748 dst.replaceAllUsesWith(src);
751 Value handleArgument(OpBuilder &builder, Operation *call, Operation *callable,
752 Value argument,
753 DictionaryAttr argumentAttrs) const final {
754 if (std::optional<NamedAttribute> attr =
755 argumentAttrs.getNamed(LLVM::LLVMDialect::getByValAttrName())) {
756 Type elementType = cast<TypeAttr>(attr->getValue()).getValue();
757 unsigned requestedAlignment = 1;
758 if (std::optional<NamedAttribute> alignAttr =
759 argumentAttrs.getNamed(LLVM::LLVMDialect::getAlignAttrName())) {
760 requestedAlignment = cast<IntegerAttr>(alignAttr->getValue())
761 .getValue()
762 .getLimitedValue();
764 return handleByValArgument(builder, callable, argument, elementType,
765 requestedAlignment);
767 if ([[maybe_unused]] std::optional<NamedAttribute> attr =
768 argumentAttrs.getNamed(LLVM::LLVMDialect::getNoAliasAttrName())) {
769 if (argument.use_empty())
770 return argument;
772 // This code is essentially a workaround for deficiencies in the
773 // inliner interface: We need to transform operations *after* inlined
774 // based on the argument attributes of the parameters *before* inlining.
775 // This method runs prior to actual inlining and thus cannot transform the
776 // post-inlining code, while `processInlinedCallBlocks` does not have
777 // access to pre-inlining function arguments. Additionally, it is required
778 // to distinguish which parameter an SSA value originally came from.
779 // As a workaround until this is changed: Create an ssa.copy intrinsic
780 // with the noalias attribute that can easily be found, and is extremely
781 // unlikely to exist in the code prior to inlining, using this to
782 // communicate between this method and `processInlinedCallBlocks`.
783 // TODO: Fix this by refactoring the inliner interface.
784 auto copyOp = builder.create<LLVM::SSACopyOp>(call->getLoc(), argument);
785 copyOp->setDiscardableAttr(
786 builder.getStringAttr(LLVM::LLVMDialect::getNoAliasAttrName()),
787 builder.getUnitAttr());
788 return copyOp;
790 return argument;
793 void processInlinedCallBlocks(
794 Operation *call,
795 iterator_range<Region::iterator> inlinedBlocks) const override {
796 handleInlinedAllocas(call, inlinedBlocks);
797 handleAliasScopes(call, inlinedBlocks);
798 handleAccessGroups(call, inlinedBlocks);
801 // Keeping this (immutable) state on the interface allows us to look up
802 // StringAttrs instead of looking up strings, since StringAttrs are bound to
803 // the current context and thus cannot be initialized as static fields.
804 const DenseSet<StringAttr> disallowedFunctionAttrs;
807 } // end anonymous namespace
809 void LLVM::detail::addLLVMInlinerInterface(LLVM::LLVMDialect *dialect) {
810 dialect->addInterfaces<LLVMInlinerInterface>();