[mlir][linalg] Add support for masked vectorization of `tensor.insert_slice` (1/N...
[llvm-project.git] / llvm / lib / Analysis / FunctionPropertiesAnalysis.cpp
blob0ffbc90d7ee22d646ae9e3d77b1b992adf55f792
1 //===- FunctionPropertiesAnalysis.cpp - Function Properties Analysis ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the FunctionPropertiesInfo and FunctionPropertiesAnalysis
10 // classes used to extract function properties.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Analysis/FunctionPropertiesAnalysis.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/Analysis/LoopInfo.h"
18 #include "llvm/IR/CFG.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/Dominators.h"
21 #include "llvm/IR/Instructions.h"
22 #include "llvm/IR/IntrinsicInst.h"
23 #include "llvm/Support/CommandLine.h"
24 #include <deque>
26 using namespace llvm;
28 namespace llvm {
29 cl::opt<bool> EnableDetailedFunctionProperties(
30 "enable-detailed-function-properties", cl::Hidden, cl::init(false),
31 cl::desc("Whether or not to compute detailed function properties."));
33 cl::opt<unsigned> BigBasicBlockInstructionThreshold(
34 "big-basic-block-instruction-threshold", cl::Hidden, cl::init(500),
35 cl::desc("The minimum number of instructions a basic block should contain "
36 "before being considered big."));
38 cl::opt<unsigned> MediumBasicBlockInstructionThreshold(
39 "medium-basic-block-instruction-threshold", cl::Hidden, cl::init(15),
40 cl::desc("The minimum number of instructions a basic block should contain "
41 "before being considered medium-sized."));
42 } // namespace llvm
44 static cl::opt<unsigned> CallWithManyArgumentsThreshold(
45 "call-with-many-arguments-threshold", cl::Hidden, cl::init(4),
46 cl::desc("The minimum number of arguments a function call must have before "
47 "it is considered having many arguments."));
49 namespace {
50 int64_t getNumBlocksFromCond(const BasicBlock &BB) {
51 int64_t Ret = 0;
52 if (const auto *BI = dyn_cast<BranchInst>(BB.getTerminator())) {
53 if (BI->isConditional())
54 Ret += BI->getNumSuccessors();
55 } else if (const auto *SI = dyn_cast<SwitchInst>(BB.getTerminator())) {
56 Ret += (SI->getNumCases() + (nullptr != SI->getDefaultDest()));
58 return Ret;
61 int64_t getUses(const Function &F) {
62 return ((!F.hasLocalLinkage()) ? 1 : 0) + F.getNumUses();
64 } // namespace
66 void FunctionPropertiesInfo::reIncludeBB(const BasicBlock &BB) {
67 updateForBB(BB, +1);
70 void FunctionPropertiesInfo::updateForBB(const BasicBlock &BB,
71 int64_t Direction) {
72 assert(Direction == 1 || Direction == -1);
73 BasicBlockCount += Direction;
74 BlocksReachedFromConditionalInstruction +=
75 (Direction * getNumBlocksFromCond(BB));
76 for (const auto &I : BB) {
77 if (auto *CS = dyn_cast<CallBase>(&I)) {
78 const auto *Callee = CS->getCalledFunction();
79 if (Callee && !Callee->isIntrinsic() && !Callee->isDeclaration())
80 DirectCallsToDefinedFunctions += Direction;
82 if (I.getOpcode() == Instruction::Load) {
83 LoadInstCount += Direction;
84 } else if (I.getOpcode() == Instruction::Store) {
85 StoreInstCount += Direction;
88 TotalInstructionCount += Direction * BB.sizeWithoutDebug();
90 if (EnableDetailedFunctionProperties) {
91 unsigned SuccessorCount = succ_size(&BB);
92 if (SuccessorCount == 1)
93 BasicBlocksWithSingleSuccessor += Direction;
94 else if (SuccessorCount == 2)
95 BasicBlocksWithTwoSuccessors += Direction;
96 else if (SuccessorCount > 2)
97 BasicBlocksWithMoreThanTwoSuccessors += Direction;
99 unsigned PredecessorCount = pred_size(&BB);
100 if (PredecessorCount == 1)
101 BasicBlocksWithSinglePredecessor += Direction;
102 else if (PredecessorCount == 2)
103 BasicBlocksWithTwoPredecessors += Direction;
104 else if (PredecessorCount > 2)
105 BasicBlocksWithMoreThanTwoPredecessors += Direction;
107 if (TotalInstructionCount > BigBasicBlockInstructionThreshold)
108 BigBasicBlocks += Direction;
109 else if (TotalInstructionCount > MediumBasicBlockInstructionThreshold)
110 MediumBasicBlocks += Direction;
111 else
112 SmallBasicBlocks += Direction;
114 // Calculate critical edges by looking through all successors of a basic
115 // block that has multiple successors and finding ones that have multiple
116 // predecessors, which represent critical edges.
117 if (SuccessorCount > 1) {
118 for (const auto *Successor : successors(&BB)) {
119 if (pred_size(Successor) > 1)
120 CriticalEdgeCount += Direction;
124 ControlFlowEdgeCount += Direction * SuccessorCount;
126 if (const auto *BI = dyn_cast<BranchInst>(BB.getTerminator())) {
127 if (!BI->isConditional())
128 UnconditionalBranchCount += Direction;
131 for (const Instruction &I : BB.instructionsWithoutDebug()) {
132 if (I.isCast())
133 CastInstructionCount += Direction;
135 if (I.getType()->isFloatTy())
136 FloatingPointInstructionCount += Direction;
137 else if (I.getType()->isIntegerTy())
138 IntegerInstructionCount += Direction;
140 if (isa<IntrinsicInst>(I))
141 ++IntrinsicCount;
143 if (const auto *Call = dyn_cast<CallInst>(&I)) {
144 if (Call->isIndirectCall())
145 IndirectCallCount += Direction;
146 else
147 DirectCallCount += Direction;
149 if (Call->getType()->isIntegerTy())
150 CallReturnsIntegerCount += Direction;
151 else if (Call->getType()->isFloatingPointTy())
152 CallReturnsFloatCount += Direction;
153 else if (Call->getType()->isPointerTy())
154 CallReturnsPointerCount += Direction;
155 else if (Call->getType()->isVectorTy()) {
156 if (Call->getType()->getScalarType()->isIntegerTy())
157 CallReturnsVectorIntCount += Direction;
158 else if (Call->getType()->getScalarType()->isFloatingPointTy())
159 CallReturnsVectorFloatCount += Direction;
160 else if (Call->getType()->getScalarType()->isPointerTy())
161 CallReturnsVectorPointerCount += Direction;
164 if (Call->arg_size() > CallWithManyArgumentsThreshold)
165 CallWithManyArgumentsCount += Direction;
167 for (const auto &Arg : Call->args()) {
168 if (Arg->getType()->isPointerTy()) {
169 CallWithPointerArgumentCount += Direction;
170 break;
175 #define COUNT_OPERAND(OPTYPE) \
176 if (isa<OPTYPE>(Operand)) { \
177 OPTYPE##OperandCount += Direction; \
178 continue; \
181 for (unsigned int OperandIndex = 0; OperandIndex < I.getNumOperands();
182 ++OperandIndex) {
183 Value *Operand = I.getOperand(OperandIndex);
184 COUNT_OPERAND(GlobalValue)
185 COUNT_OPERAND(ConstantInt)
186 COUNT_OPERAND(ConstantFP)
187 COUNT_OPERAND(Constant)
188 COUNT_OPERAND(Instruction)
189 COUNT_OPERAND(BasicBlock)
190 COUNT_OPERAND(InlineAsm)
191 COUNT_OPERAND(Argument)
193 // We only get to this point if we haven't matched any of the other
194 // operand types.
195 UnknownOperandCount += Direction;
198 #undef CHECK_OPERAND
203 void FunctionPropertiesInfo::updateAggregateStats(const Function &F,
204 const LoopInfo &LI) {
206 Uses = getUses(F);
207 TopLevelLoopCount = llvm::size(LI);
208 MaxLoopDepth = 0;
209 std::deque<const Loop *> Worklist;
210 llvm::append_range(Worklist, LI);
211 while (!Worklist.empty()) {
212 const auto *L = Worklist.front();
213 MaxLoopDepth =
214 std::max(MaxLoopDepth, static_cast<int64_t>(L->getLoopDepth()));
215 Worklist.pop_front();
216 llvm::append_range(Worklist, L->getSubLoops());
220 FunctionPropertiesInfo FunctionPropertiesInfo::getFunctionPropertiesInfo(
221 Function &F, FunctionAnalysisManager &FAM) {
222 return getFunctionPropertiesInfo(F, FAM.getResult<DominatorTreeAnalysis>(F),
223 FAM.getResult<LoopAnalysis>(F));
226 FunctionPropertiesInfo FunctionPropertiesInfo::getFunctionPropertiesInfo(
227 const Function &F, const DominatorTree &DT, const LoopInfo &LI) {
229 FunctionPropertiesInfo FPI;
230 for (const auto &BB : F)
231 if (DT.isReachableFromEntry(&BB))
232 FPI.reIncludeBB(BB);
233 FPI.updateAggregateStats(F, LI);
234 return FPI;
237 void FunctionPropertiesInfo::print(raw_ostream &OS) const {
238 #define PRINT_PROPERTY(PROP_NAME) OS << #PROP_NAME ": " << PROP_NAME << "\n";
240 PRINT_PROPERTY(BasicBlockCount)
241 PRINT_PROPERTY(BlocksReachedFromConditionalInstruction)
242 PRINT_PROPERTY(Uses)
243 PRINT_PROPERTY(DirectCallsToDefinedFunctions)
244 PRINT_PROPERTY(LoadInstCount)
245 PRINT_PROPERTY(StoreInstCount)
246 PRINT_PROPERTY(MaxLoopDepth)
247 PRINT_PROPERTY(TopLevelLoopCount)
248 PRINT_PROPERTY(TotalInstructionCount)
250 if (EnableDetailedFunctionProperties) {
251 PRINT_PROPERTY(BasicBlocksWithSingleSuccessor)
252 PRINT_PROPERTY(BasicBlocksWithTwoSuccessors)
253 PRINT_PROPERTY(BasicBlocksWithMoreThanTwoSuccessors)
254 PRINT_PROPERTY(BasicBlocksWithSinglePredecessor)
255 PRINT_PROPERTY(BasicBlocksWithTwoPredecessors)
256 PRINT_PROPERTY(BasicBlocksWithMoreThanTwoPredecessors)
257 PRINT_PROPERTY(BigBasicBlocks)
258 PRINT_PROPERTY(MediumBasicBlocks)
259 PRINT_PROPERTY(SmallBasicBlocks)
260 PRINT_PROPERTY(CastInstructionCount)
261 PRINT_PROPERTY(FloatingPointInstructionCount)
262 PRINT_PROPERTY(IntegerInstructionCount)
263 PRINT_PROPERTY(ConstantIntOperandCount)
264 PRINT_PROPERTY(ConstantFPOperandCount)
265 PRINT_PROPERTY(ConstantOperandCount)
266 PRINT_PROPERTY(InstructionOperandCount)
267 PRINT_PROPERTY(BasicBlockOperandCount)
268 PRINT_PROPERTY(GlobalValueOperandCount)
269 PRINT_PROPERTY(InlineAsmOperandCount)
270 PRINT_PROPERTY(ArgumentOperandCount)
271 PRINT_PROPERTY(UnknownOperandCount)
272 PRINT_PROPERTY(CriticalEdgeCount)
273 PRINT_PROPERTY(ControlFlowEdgeCount)
274 PRINT_PROPERTY(UnconditionalBranchCount)
275 PRINT_PROPERTY(IntrinsicCount)
276 PRINT_PROPERTY(DirectCallCount)
277 PRINT_PROPERTY(IndirectCallCount)
278 PRINT_PROPERTY(CallReturnsIntegerCount)
279 PRINT_PROPERTY(CallReturnsFloatCount)
280 PRINT_PROPERTY(CallReturnsPointerCount)
281 PRINT_PROPERTY(CallReturnsVectorIntCount)
282 PRINT_PROPERTY(CallReturnsVectorFloatCount)
283 PRINT_PROPERTY(CallReturnsVectorPointerCount)
284 PRINT_PROPERTY(CallWithManyArgumentsCount)
285 PRINT_PROPERTY(CallWithPointerArgumentCount)
288 #undef PRINT_PROPERTY
290 OS << "\n";
293 AnalysisKey FunctionPropertiesAnalysis::Key;
295 FunctionPropertiesInfo
296 FunctionPropertiesAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
297 return FunctionPropertiesInfo::getFunctionPropertiesInfo(F, FAM);
300 PreservedAnalyses
301 FunctionPropertiesPrinterPass::run(Function &F, FunctionAnalysisManager &AM) {
302 OS << "Printing analysis results of CFA for function "
303 << "'" << F.getName() << "':"
304 << "\n";
305 AM.getResult<FunctionPropertiesAnalysis>(F).print(OS);
306 return PreservedAnalyses::all();
309 FunctionPropertiesUpdater::FunctionPropertiesUpdater(
310 FunctionPropertiesInfo &FPI, CallBase &CB)
311 : FPI(FPI), CallSiteBB(*CB.getParent()), Caller(*CallSiteBB.getParent()) {
312 assert(isa<CallInst>(CB) || isa<InvokeInst>(CB));
313 // For BBs that are likely to change, we subtract from feature totals their
314 // contribution. Some features, like max loop counts or depths, are left
315 // invalid, as they will be updated post-inlining.
316 SmallPtrSet<const BasicBlock *, 4> LikelyToChangeBBs;
317 // The CB BB will change - it'll either be split or the callee's body (single
318 // BB) will be pasted in.
319 LikelyToChangeBBs.insert(&CallSiteBB);
321 // The caller's entry BB may change due to new alloca instructions.
322 LikelyToChangeBBs.insert(&*Caller.begin());
324 // The successors may become unreachable in the case of `invoke` inlining.
325 // We track successors separately, too, because they form a boundary, together
326 // with the CB BB ('Entry') between which the inlined callee will be pasted.
327 Successors.insert(succ_begin(&CallSiteBB), succ_end(&CallSiteBB));
329 // the outcome of the inlining may be that some edges get lost (DCEd BBs
330 // because inlining brought some constant, for example). We don't know which
331 // edges will be removed, so we list all of them as potentially removable.
332 // Some BBs have (at this point) duplicate edges. Remove duplicates, otherwise
333 // the DT updater will not apply changes correctly.
334 DenseSet<const BasicBlock *> Inserted;
335 for (auto *Succ : successors(&CallSiteBB))
336 if (Inserted.insert(Succ).second)
337 DomTreeUpdates.emplace_back(DominatorTree::UpdateKind::Delete,
338 const_cast<BasicBlock *>(&CallSiteBB),
339 const_cast<BasicBlock *>(Succ));
340 // Reuse Inserted (which has some allocated capacity at this point) below, if
341 // we have an invoke.
342 Inserted.clear();
343 // Inlining only handles invoke and calls. If this is an invoke, and inlining
344 // it pulls another invoke, the original landing pad may get split, so as to
345 // share its content with other potential users. So the edge up to which we
346 // need to invalidate and then re-account BB data is the successors of the
347 // current landing pad. We can leave the current lp, too - if it doesn't get
348 // split, then it will be the place traversal stops. Either way, the
349 // discounted BBs will be checked if reachable and re-added.
350 if (const auto *II = dyn_cast<InvokeInst>(&CB)) {
351 const auto *UnwindDest = II->getUnwindDest();
352 Successors.insert(succ_begin(UnwindDest), succ_end(UnwindDest));
353 // Same idea as above, we pretend we lose all these edges.
354 for (auto *Succ : successors(UnwindDest))
355 if (Inserted.insert(Succ).second)
356 DomTreeUpdates.emplace_back(DominatorTree::UpdateKind::Delete,
357 const_cast<BasicBlock *>(UnwindDest),
358 const_cast<BasicBlock *>(Succ));
361 // Exclude the CallSiteBB, if it happens to be its own successor (1-BB loop).
362 // We are only interested in BBs the graph moves past the callsite BB to
363 // define the frontier past which we don't want to re-process BBs. Including
364 // the callsite BB in this case would prematurely stop the traversal in
365 // finish().
366 Successors.erase(&CallSiteBB);
368 for (const auto *BB : Successors)
369 LikelyToChangeBBs.insert(BB);
371 // Commit the change. While some of the BBs accounted for above may play dual
372 // role - e.g. caller's entry BB may be the same as the callsite BB - set
373 // insertion semantics make sure we account them once. This needs to be
374 // followed in `finish`, too.
375 for (const auto *BB : LikelyToChangeBBs)
376 FPI.updateForBB(*BB, -1);
379 DominatorTree &FunctionPropertiesUpdater::getUpdatedDominatorTree(
380 FunctionAnalysisManager &FAM) const {
381 auto &DT =
382 FAM.getResult<DominatorTreeAnalysis>(const_cast<Function &>(Caller));
384 SmallVector<DominatorTree::UpdateType, 2> FinalDomTreeUpdates;
386 DenseSet<const BasicBlock *> Inserted;
387 for (auto *Succ : successors(&CallSiteBB))
388 if (Inserted.insert(Succ).second)
389 FinalDomTreeUpdates.push_back({DominatorTree::UpdateKind::Insert,
390 const_cast<BasicBlock *>(&CallSiteBB),
391 const_cast<BasicBlock *>(Succ)});
393 // Perform the deletes last, so that any new nodes connected to nodes
394 // participating in the edge deletion are known to the DT.
395 for (auto &Upd : DomTreeUpdates)
396 if (!llvm::is_contained(successors(Upd.getFrom()), Upd.getTo()))
397 FinalDomTreeUpdates.push_back(Upd);
399 DT.applyUpdates(FinalDomTreeUpdates);
400 #ifdef EXPENSIVE_CHECKS
401 assert(DT.verify(DominatorTree::VerificationLevel::Full));
402 #endif
403 return DT;
406 void FunctionPropertiesUpdater::finish(FunctionAnalysisManager &FAM) const {
407 // Update feature values from the BBs that were copied from the callee, or
408 // might have been modified because of inlining. The latter have been
409 // subtracted in the FunctionPropertiesUpdater ctor.
410 // There could be successors that were reached before but now are only
411 // reachable from elsewhere in the CFG.
412 // One example is the following diamond CFG (lines are arrows pointing down):
413 // A
414 // / \
415 // B C
416 // | |
417 // | D
418 // | |
419 // | E
420 // \ /
421 // F
422 // There's a call site in C that is inlined. Upon doing that, it turns out
423 // it expands to
424 // call void @llvm.trap()
425 // unreachable
426 // F isn't reachable from C anymore, but we did discount it when we set up
427 // FunctionPropertiesUpdater, so we need to re-include it here.
428 // At the same time, D and E were reachable before, but now are not anymore,
429 // so we need to leave D out (we discounted it at setup), and explicitly
430 // remove E.
431 SetVector<const BasicBlock *> Reinclude;
432 SetVector<const BasicBlock *> Unreachable;
433 auto &DT = getUpdatedDominatorTree(FAM);
435 if (&CallSiteBB != &*Caller.begin())
436 Reinclude.insert(&*Caller.begin());
438 // Distribute the successors to the 2 buckets.
439 for (const auto *Succ : Successors)
440 if (DT.isReachableFromEntry(Succ))
441 Reinclude.insert(Succ);
442 else
443 Unreachable.insert(Succ);
445 // For reinclusion, we want to stop at the reachable successors, who are at
446 // the beginning of the worklist; but, starting from the callsite bb and
447 // ending at those successors, we also want to perform a traversal.
448 // IncludeSuccessorsMark is the index after which we include successors.
449 const auto IncludeSuccessorsMark = Reinclude.size();
450 bool CSInsertion = Reinclude.insert(&CallSiteBB);
451 (void)CSInsertion;
452 assert(CSInsertion);
453 for (size_t I = 0; I < Reinclude.size(); ++I) {
454 const auto *BB = Reinclude[I];
455 FPI.reIncludeBB(*BB);
456 if (I >= IncludeSuccessorsMark)
457 Reinclude.insert(succ_begin(BB), succ_end(BB));
460 // For exclusion, we don't need to exclude the set of BBs that were successors
461 // before and are now unreachable, because we already did that at setup. For
462 // the rest, as long as a successor is unreachable, we want to explicitly
463 // exclude it.
464 const auto AlreadyExcludedMark = Unreachable.size();
465 for (size_t I = 0; I < Unreachable.size(); ++I) {
466 const auto *U = Unreachable[I];
467 if (I >= AlreadyExcludedMark)
468 FPI.updateForBB(*U, -1);
469 for (const auto *Succ : successors(U))
470 if (!DT.isReachableFromEntry(Succ))
471 Unreachable.insert(Succ);
474 const auto &LI = FAM.getResult<LoopAnalysis>(const_cast<Function &>(Caller));
475 FPI.updateAggregateStats(Caller, LI);
476 #ifdef EXPENSIVE_CHECKS
477 assert(isUpdateValid(Caller, FPI, FAM));
478 #endif
481 bool FunctionPropertiesUpdater::isUpdateValid(Function &F,
482 const FunctionPropertiesInfo &FPI,
483 FunctionAnalysisManager &FAM) {
484 if (!FAM.getResult<DominatorTreeAnalysis>(F).verify(
485 DominatorTree::VerificationLevel::Full))
486 return false;
487 DominatorTree DT(F);
488 LoopInfo LI(DT);
489 auto Fresh = FunctionPropertiesInfo::getFunctionPropertiesInfo(F, DT, LI);
490 return FPI == Fresh;