llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp

   1 //===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This implements feature and label extraction for offline supervised learning
  10 // of a IR to native size model.
  11 //
  12 //===----------------------------------------------------------------------===//
  13 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
  14
  15 #ifdef LLVM_HAVE_TFLITE
  16 #include "llvm/Analysis/Utils/TFUtils.h"
  17 #endif
  18 #include "llvm/IR/Function.h"
  19 #include "llvm/IR/PassManager.h"
  20 #include "llvm/Support/raw_ostream.h"
  21
  22 using namespace llvm;
  23
  24 AnalysisKey InlineSizeEstimatorAnalysis::Key;
  25
  26 #ifdef LLVM_HAVE_TFLITE
  27 #include "llvm/Analysis/LoopInfo.h"
  28 #include "llvm/Analysis/TargetLibraryInfo.h"
  29 #include "llvm/Analysis/TargetTransformInfo.h"
  30 #include "llvm/IR/BasicBlock.h"
  31 #include "llvm/IR/Dominators.h"
  32 #include "llvm/IR/Instructions.h"
  33 #include "llvm/Support/Casting.h"
  34 #include "llvm/Support/CommandLine.h"
  35 #include <algorithm>
  36 #include <deque>
  37 #include <optional>
  38
  39 cl::opt<std::string> TFIR2NativeModelPath(
  40     "ml-inliner-ir2native-model", cl::Hidden,
  41     cl::desc("Path to saved model evaluating native size from IR."));
  42
  43 #define DEBUG_TYPE "inline-size-estimator"
  44 namespace {
  45 unsigned getMaxInstructionID() {
  46 #define LAST_OTHER_INST(NR) return NR;
  47 #include "llvm/IR/Instruction.def"
  48 }
  49
  50 class IRToNativeSizeLearning {
  51 public:
  52   enum class NamedFeatureIndex : size_t {
  53     InitialSize,
  54     Blocks,
  55     Calls,
  56     IsLocal,
  57     IsLinkOnceODR,
  58     IsLinkOnce,
  59     Loops,
  60     MaxLoopDepth,
  61     MaxDomTreeLevel,
  62
  63     NumNamedFeatures
  64   };
  65   static const size_t NumNamedFeatures =
  66       static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures);
  67   struct FunctionFeatures {
  68     static const size_t FeatureCount;
  69
  70     std::array<int32_t, NumNamedFeatures> NamedFeatures = {0};
  71     std::vector<int32_t> InstructionHistogram;
  72     std::vector<int32_t> InstructionPairHistogram;
  73
  74     void fillTensor(int32_t *Ptr) const;
  75     int32_t &operator[](NamedFeatureIndex Pos) {
  76       return NamedFeatures[static_cast<size_t>(Pos)];
  77     }
  78   };
  79   IRToNativeSizeLearning() = default;
  80
  81   static FunctionFeatures getFunctionFeatures(Function &F,
  82                                               FunctionAnalysisManager &FAM);
  83 };
  84
  85 // This is a point in time - we determined including these pairs of
  86 // consecutive instructions (in the IR layout available at inline time) as
  87 // features improves the model performance. We want to move away from manual
  88 // feature selection.
  89 // The array is given in opcode pairs rather than labels because 1) labels
  90 // weren't readily available, and 2) the successions were hand - extracted.
  91 //
  92 // This array must be sorted.
  93 static const std::array<std::pair<size_t, size_t>, 137>
  94     ImportantInstructionSuccessions{
  95         {{1, 1},   {1, 4},   {1, 5},   {1, 7},   {1, 8},   {1, 9},   {1, 11},
  96          {1, 12},  {1, 13},  {1, 14},  {1, 18},  {1, 20},  {1, 22},  {1, 24},
  97          {1, 25},  {1, 26},  {1, 27},  {1, 28},  {1, 29},  {1, 30},  {1, 31},
  98          {1, 32},  {1, 33},  {1, 34},  {1, 39},  {1, 40},  {1, 42},  {1, 45},
  99          {2, 1},   {2, 2},   {2, 13},  {2, 28},  {2, 29},  {2, 32},  {2, 33},
 100          {2, 34},  {2, 38},  {2, 48},  {2, 49},  {2, 53},  {2, 55},  {2, 56},
 101          {13, 2},  {13, 13}, {13, 26}, {13, 33}, {13, 34}, {13, 56}, {15, 27},
 102          {28, 2},  {28, 48}, {28, 53}, {29, 2},  {29, 33}, {29, 56}, {31, 31},
 103          {31, 33}, {31, 34}, {31, 49}, {32, 1},  {32, 2},  {32, 13}, {32, 15},
 104          {32, 28}, {32, 29}, {32, 32}, {32, 33}, {32, 34}, {32, 39}, {32, 40},
 105          {32, 48}, {32, 49}, {32, 53}, {32, 56}, {33, 1},  {33, 2},  {33, 32},
 106          {33, 33}, {33, 34}, {33, 49}, {33, 53}, {33, 56}, {34, 1},  {34, 2},
 107          {34, 32}, {34, 33}, {34, 34}, {34, 49}, {34, 53}, {34, 56}, {38, 34},
 108          {39, 57}, {40, 34}, {47, 15}, {47, 49}, {48, 2},  {48, 34}, {48, 56},
 109          {49, 1},  {49, 2},  {49, 28}, {49, 32}, {49, 33}, {49, 34}, {49, 39},
 110          {49, 49}, {49, 56}, {53, 1},  {53, 2},  {53, 28}, {53, 34}, {53, 53},
 111          {53, 57}, {55, 1},  {55, 28}, {55, 34}, {55, 53}, {55, 55}, {55, 56},
 112          {56, 1},  {56, 2},  {56, 7},  {56, 13}, {56, 32}, {56, 33}, {56, 34},
 113          {56, 49}, {56, 53}, {56, 56}, {56, 64}, {57, 34}, {57, 56}, {57, 57},
 114          {64, 1},  {64, 64}, {65, 1},  {65, 65}}};
 115
 116 // We have: 9 calculated features (the features here); 1 feature for each
 117 // instruction opcode; and 1 feature for each manually-identified sequence.
 118 // For the latter 2, we build a histogram: we count the number of
 119 // occurrences of each instruction opcode or succession of instructions,
 120 // respectively.
 121 // Note that instruction opcodes start from 1. For convenience, we also have an
 122 // always 0 feature for the '0' opcode, hence the extra 1.
 123 const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount =
 124     ImportantInstructionSuccessions.size() + getMaxInstructionID() + 1 +
 125     IRToNativeSizeLearning::NumNamedFeatures;
 126
 127 size_t getSize(Function &F, TargetTransformInfo &TTI) {
 128   size_t Ret = 0;
 129   for (const auto &BB : F)
 130     for (const auto &I : BB)
 131       Ret += *(TTI.getInstructionCost(
 132           &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize).getValue());
 133   return Ret;
 134 }
 135
 136 size_t getSize(Function &F, FunctionAnalysisManager &FAM) {
 137   auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
 138   return getSize(F, TTI);
 139 }
 140
 141 unsigned getMaxDominatorTreeDepth(const Function &F,
 142                                   const DominatorTree &Tree) {
 143   unsigned Ret = 0;
 144   for (const auto &BB : F)
 145     if (const auto *TN = Tree.getNode(&BB))
 146       Ret = std::max(Ret, TN->getLevel());
 147   return Ret;
 148 }
 149 } // namespace
 150
 151 IRToNativeSizeLearning::FunctionFeatures
 152 IRToNativeSizeLearning::getFunctionFeatures(Function &F,
 153                                             FunctionAnalysisManager &FAM) {
 154   assert(llvm::is_sorted(ImportantInstructionSuccessions) &&
 155          "expected function features are sorted");
 156
 157   auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F);
 158   FunctionFeatures FF;
 159   size_t InstrCount = getMaxInstructionID() + 1;
 160   FF.InstructionHistogram.resize(InstrCount);
 161
 162   FF.InstructionPairHistogram.resize(ImportantInstructionSuccessions.size());
 163
 164   int StartID = 0;
 165   int LastID = StartID;
 166   auto getPairIndex = [](size_t a, size_t b) {
 167     auto I = llvm::find(ImportantInstructionSuccessions, std::make_pair(a, b));
 168     if (I == ImportantInstructionSuccessions.end())
 169       return -1;
 170     return static_cast<int>(
 171         std::distance(ImportantInstructionSuccessions.begin(), I));
 172   };
 173
 174   // We don't want debug calls, because they'd just add noise.
 175   for (const auto &BB : F) {
 176     for (const auto &I : BB.instructionsWithoutDebug()) {
 177       auto ID = I.getOpcode();
 178
 179       ++FF.InstructionHistogram[ID];
 180       int PairIndex = getPairIndex(LastID, ID);
 181       if (PairIndex >= 0)
 182         ++FF.InstructionPairHistogram[PairIndex];
 183       LastID = ID;
 184       if (isa<CallBase>(I))
 185         ++FF[NamedFeatureIndex::Calls];
 186     }
 187   }
 188
 189   FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM);
 190   FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage();
 191   FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage();
 192   FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage();
 193   FF[NamedFeatureIndex::Blocks] = F.size();
 194   auto &LI = FAM.getResult<LoopAnalysis>(F);
 195   FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end());
 196   for (auto &L : LI)
 197     FF[NamedFeatureIndex::MaxLoopDepth] =
 198         std::max(FF[NamedFeatureIndex::MaxLoopDepth],
 199                  static_cast<int32_t>(L->getLoopDepth()));
 200   FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree);
 201   return FF;
 202 }
 203
 204 void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const {
 205   std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr);
 206   Ptr += NamedFeatures.size();
 207   std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr);
 208   Ptr += InstructionHistogram.size();
 209   std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(),
 210             Ptr);
 211 }
 212
 213 bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() {
 214   return !TFIR2NativeModelPath.empty();
 215 }
 216
 217 InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {
 218   if (!isEvaluatorRequested()) {
 219     return;
 220   }
 221   std::vector<TensorSpec> InputSpecs{TensorSpec::createSpec<int32_t>(
 222       "serving_default_input_1",
 223       {1, static_cast<int64_t>(
 224               IRToNativeSizeLearning::FunctionFeatures::FeatureCount)})};
 225   std::vector<TensorSpec> OutputSpecs{
 226       TensorSpec::createSpec<float>("StatefulPartitionedCall", {1})};
 227   Evaluator = std::make_unique<TFModelEvaluator>(
 228       TFIR2NativeModelPath.getValue().c_str(), InputSpecs, OutputSpecs);
 229   if (!Evaluator || !Evaluator->isValid()) {
 230     Evaluator.reset();
 231     return;
 232   }
 233 }
 234
 235 InlineSizeEstimatorAnalysis::Result
 236 InlineSizeEstimatorAnalysis::run(const Function &F,
 237                                  FunctionAnalysisManager &FAM) {
 238   if (!Evaluator)
 239     return std::nullopt;
 240   auto Features = IRToNativeSizeLearning::getFunctionFeatures(
 241       const_cast<Function &>(F), FAM);
 242   int32_t *V = Evaluator->getInput<int32_t>(0);
 243   Features.fillTensor(V);
 244   auto ER = Evaluator->evaluate();
 245   if (!ER)
 246     return std::nullopt;
 247   float Ret = *ER->getTensorValue<float>(0);
 248   if (Ret < 0.0)
 249     Ret = 0.0;
 250   return static_cast<size_t>(Ret);
 251 }
 252
 253 InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
 254 InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis(
 255     InlineSizeEstimatorAnalysis &&Other)
 256     : Evaluator(std::move(Other.Evaluator)) {}
 257
 258 #else
 259 namespace llvm {
 260 class TFModelEvaluator {};
 261 } // namespace llvm
 262 InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() = default;
 263 InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis(
 264     InlineSizeEstimatorAnalysis &&) {}
 265 InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() = default;
 266 InlineSizeEstimatorAnalysis::Result
 267 InlineSizeEstimatorAnalysis::run(const Function &F,
 268                                  FunctionAnalysisManager &FAM) {
 269   return std::nullopt;
 270 }
 271 bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; }
 272 #endif
 273
 274 PreservedAnalyses
 275 InlineSizeEstimatorAnalysisPrinterPass::run(Function &F,
 276                                             FunctionAnalysisManager &AM) {
 277   OS << "[InlineSizeEstimatorAnalysis] size estimate for " << F.getName()
 278      << ": " << AM.getResult<InlineSizeEstimatorAnalysis>(F) << "\n";
 279   return PreservedAnalyses::all();
 280 }