llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp

   1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner  --===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file implements a model runner using TFLite, allowing the
  10 // loading of a model from a command line option.
  11 //
  12 //===----------------------------------------------------------------------===//
  13 #include "llvm/Analysis/TensorSpec.h"
  14 #include "llvm/Config/config.h"
  15 #if defined(LLVM_HAVE_TFLITE)
  16
  17 #include "llvm/ADT/BitVector.h"
  18 #include "llvm/Analysis/CallGraph.h"
  19 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
  20 #include "llvm/Analysis/MLInlineAdvisor.h"
  21 #include "llvm/Analysis/ModelUnderTrainingRunner.h"
  22 #include "llvm/Analysis/NoInferenceModelRunner.h"
  23 #include "llvm/Analysis/Utils/TFUtils.h"
  24 #include "llvm/Analysis/Utils/TrainingLogger.h"
  25 #include "llvm/IR/LLVMContext.h"
  26 #include "llvm/Support/CommandLine.h"
  27 #include "llvm/Support/ManagedStatic.h"
  28
  29 #include <vector>
  30 #include <optional>
  31
  32 using namespace llvm;
  33
  34 static cl::opt<std::string> TrainingLog(
  35     "training-log", cl::Hidden,
  36     cl::desc("Path where the development - mode inlining log is saved."));
  37
  38 static cl::opt<std::string> TFModelUnderTrainingPath(
  39     "ml-inliner-model-under-training", cl::Hidden,
  40     cl::desc(R"(Path to SavedModel from the previous training iteration.
  41 The directory is also expected to contain a JSON specification of the
  42 outputs expected to be logged, where the first entry must be the
  43 inlining decision. The file containing the specification should be
  44 called output_spec.json. The expected JSON value is an array of
  45 dictionaries. Each dictionary should have 2 keys:
  46
  47 - "tensor_spec, followed by the TensorSpec description of the
  48 output; and
  49 - "logging_name", a string indicating the name to use when
  50 logging the output values.
  51
  52 Example:
  53 [
  54   {
  55     "logging_name" : "some_name",
  56     "tensor_spec" : {
  57       "name" : "model_name",
  58       "port" : 0,
  59       "shape" : [2, 3],
  60       "type" : "float"
  61       }
  62   }
  63 ]
  64
  65 The first value must always correspond to the decision.)"));
  66
  67 static cl::opt<std::string> TFOutputSpecOverride(
  68     "ml-inliner-output-spec-override", cl::Hidden,
  69     cl::desc("Override the path to the output spec json file. See "
  70              "-ml-inliner-model-under-training documentation for the "
  71              "specification of that file."));
  72
  73 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix",
  74                                          cl::Hidden, cl::init("action_"),
  75                                          cl::desc("Prefix for feature names."));
  76
  77 namespace {
  78 /// An InlineEvent, used by TrainingLogger.
  79 struct InlineEvent {
  80   /// What the default policy's decision would have been.
  81   int64_t DefaultDecision = 0;
  82
  83   /// What we advised. When training off the default policy, this is the same as
  84   /// DefaultDecision.
  85   int64_t AdvisedDecision = 0;
  86
  87   /// What actually happened. This would be 'false' in the case of an inline
  88   /// error, even if AdvisedDecision were true, otherwise it agrees with
  89   /// AdvisedDecision.
  90   bool Effect = false;
  91
  92   /// What the change in size was: size_after - size_before
  93   int64_t Reward = 0;
  94 };
  95
  96 /// Collect data we may use for training a model.
  97 class TrainingLogger final {
  98 public:
  99   TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR);
 100
 101   /// Log one inlining event.
 102   void logInlineEvent(const InlineEvent &Event,
 103                       const MLModelRunner &ModelRunner);
 104
 105 private:
 106   StringRef LogFileName;
 107   const ModelUnderTrainingRunner *const MUTR;
 108   std::unique_ptr<Logger> L;
 109   BitVector Effects;
 110   /// Set these 2 clearly OOB, to make sure we set them later.
 111   size_t DefaultDecisionPos = std::numeric_limits<size_t>::max();
 112   size_t DecisionPos = std::numeric_limits<size_t>::max();
 113 };
 114
 115 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting
 116 /// the offline training scenario. Note that training happens outside of the
 117 /// compiler, this facility is concerned with producing training data ("logs").
 118 /// This InlineAdvisor can operate in the following modes:
 119 ///
 120 /// 1) collect logs for the default policy. This is useful for bootstrapping
 121 /// training, which will be considerably faster by starting from a reasonable
 122 /// policy.
 123 ///
 124 /// 2) collect logs for the ML policy, using a model from a previous
 125 /// training. Potentially, that model uses internally some small random
 126 /// perturbation of its weights, to induce exploration (setting this up is the
 127 /// responsibility of the training algorithm). The logs would then be used to
 128 /// retrain and improve on this model.
 129 ///
 130 /// 3) use the provided model, with no logging. This is useful for end to end
 131 /// validation - the model, in this case, is a release candidate and shouldn't
 132 /// have random perturbations. It is a convenience feature: rather than needing
 133 /// to take the release candidate model and compile it in 'release' mode,
 134 /// validate it, then potentially discard it, it's easier to just pass the model
 135 /// to the compiler, albeit compilation would be slower, as a one-off. Once the
 136 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in
 137 /// release mode. The expectation is that a well-trained model provides a good
 138 /// policy over a sufficiently diverse codebase, over many changes (i.e.
 139 /// training happens seldom).
 140 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor {
 141 public:
 142   DevelopmentModeMLInlineAdvisor(
 143       Module &M, ModuleAnalysisManager &MAM,
 144       std::unique_ptr<MLModelRunner> ModelRunner,
 145       std::function<bool(CallBase &)> GetDefaultAdvice,
 146       std::unique_ptr<TrainingLogger> Logger);
 147
 148   size_t getTotalSizeEstimate();
 149
 150   void updateNativeSizeEstimate(int64_t Change) {
 151     *CurrentNativeSize += Change;
 152   }
 153   void resetNativeSize(Function *F) {
 154     PreservedAnalyses PA = PreservedAnalyses::all();
 155     PA.abandon<InlineSizeEstimatorAnalysis>();
 156     FAM.invalidate(*F, PA);
 157   }
 158
 159   std::unique_ptr<MLInlineAdvice>
 160   getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
 161
 162   std::optional<size_t> getNativeSizeEstimate(const Function &F) const;
 163
 164 private:
 165   bool isLogging() const { return !!Logger; }
 166   std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override;
 167
 168   const bool IsDoingInference;
 169   std::unique_ptr<TrainingLogger> Logger;
 170
 171   const std::optional<int32_t> InitialNativeSize;
 172   std::optional<int32_t> CurrentNativeSize;
 173 };
 174
 175 /// A variant of MLInlineAdvice that tracks all non-trivial inlining
 176 /// decisions, for training/logging.
 177 class LoggingMLInlineAdvice : public MLInlineAdvice {
 178 public:
 179   LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB,
 180                         OptimizationRemarkEmitter &ORE, bool Recommendation,
 181                         TrainingLogger &Logger,
 182                         std::optional<size_t> CallerSizeEstimateBefore,
 183                         std::optional<size_t> CalleeSizeEstimateBefore,
 184                         bool DefaultDecision, bool Mandatory = false)
 185       : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger),
 186         CallerSizeEstimateBefore(CallerSizeEstimateBefore),
 187         CalleeSizeEstimateBefore(CalleeSizeEstimateBefore),
 188         DefaultDecision(DefaultDecision), Mandatory(Mandatory) {}
 189
 190   virtual ~LoggingMLInlineAdvice() = default;
 191
 192 private:
 193   DevelopmentModeMLInlineAdvisor *getAdvisor() const {
 194     return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor);
 195   }
 196   void recordInliningImpl() override {
 197     MLInlineAdvice::recordInliningImpl();
 198     getAdvisor()->resetNativeSize(Caller);
 199     int Reward = std::numeric_limits<int>::max();
 200     if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
 201         !getAdvisor()->isForcedToStop()) {
 202       int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) +
 203                             *CalleeSizeEstimateBefore;
 204       Reward = NativeSizeAfter -
 205                (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
 206       getAdvisor()->updateNativeSizeEstimate(Reward);
 207     }
 208     log(Reward, /*Success=*/true);
 209   }
 210
 211   void recordInliningWithCalleeDeletedImpl() override {
 212     MLInlineAdvice::recordInliningWithCalleeDeletedImpl();
 213     getAdvisor()->resetNativeSize(Caller);
 214     if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
 215         !getAdvisor()->isForcedToStop()) {
 216       int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller);
 217       int Reward = NativeSizeAfter -
 218                    (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
 219       getAdvisor()->updateNativeSizeEstimate(Reward);
 220       log(Reward, /*Success=*/true);
 221     } else {
 222       log(NoReward, /*Success=*/true);
 223     }
 224   }
 225
 226   void recordUnsuccessfulInliningImpl(const InlineResult &Result) override {
 227     MLInlineAdvice::recordUnsuccessfulInliningImpl(Result);
 228     log(NoReward, /*Success=*/false);
 229   }
 230
 231   void recordUnattemptedInliningImpl() override {
 232     MLInlineAdvice::recordUnattemptedInliningImpl();
 233     log(NoReward, /*Success=*/false);
 234   }
 235
 236   void log(int64_t Reward, bool Success) {
 237     if (Mandatory)
 238       return;
 239     InlineEvent Event;
 240     Event.AdvisedDecision = isInliningRecommended();
 241     Event.DefaultDecision = DefaultDecision;
 242     Event.Effect = Success;
 243     Event.Reward = Reward;
 244     Logger.logInlineEvent(Event, getAdvisor()->getModelRunner());
 245   }
 246
 247   static const int64_t NoReward = 0;
 248   TrainingLogger &Logger;
 249   const std::optional<size_t> CallerSizeEstimateBefore;
 250   const std::optional<size_t> CalleeSizeEstimateBefore;
 251   const int64_t DefaultDecision;
 252   const int64_t Mandatory;
 253 };
 254
 255 static const std::vector<TensorSpec> TrainingOnlyFeatures{
 256     TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}),
 257     TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}),
 258     TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}),
 259     TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})};
 260
 261 static const std::vector<TensorSpec> getInputFeatures() {
 262   std::vector<TensorSpec> InputSpecs;
 263   for (size_t I = 0; I < NumberOfFeatures; ++I)
 264     InputSpecs.push_back(TensorSpec::createSpec<int64_t>(
 265         TFFeedPrefix + FeatureMap[I].name(), FeatureMap[I].shape()));
 266   append_range(InputSpecs, TrainingOnlyFeatures);
 267   return InputSpecs;
 268 }
 269
 270 } // namespace
 271
 272 TrainingLogger::TrainingLogger(StringRef LogFileName,
 273                                const ModelUnderTrainingRunner *MUTR)
 274     : LogFileName(LogFileName), MUTR(MUTR) {
 275   // The first output is the inlining decision.
 276   std::vector<TensorSpec> FT(FeatureMap.begin(), FeatureMap.end());
 277
 278   if (MUTR)
 279     append_range(FT, MUTR->extraOutputsForLoggingSpecs());
 280
 281   DefaultDecisionPos = FT.size();
 282   FT.push_back(DefaultDecisionSpec);
 283
 284   DecisionPos = FT.size();
 285   FT.push_back(InlineDecisionSpec);
 286   std::error_code EC;
 287   auto OS = std::make_unique<raw_fd_ostream>(TrainingLog, EC);
 288   if (EC)
 289     dbgs() << (EC.message() + ":" + TrainingLog);
 290
 291   L = std::make_unique<Logger>(
 292       std::move(OS), FT, TensorSpec::createSpec<int64_t>(RewardName, {1}),
 293       InlineSizeEstimatorAnalysis::isEvaluatorRequested());
 294   L->switchContext("");
 295 }
 296
 297 /// Log one inlining event.
 298 void TrainingLogger::logInlineEvent(const InlineEvent &Event,
 299                                     const MLModelRunner &ModelRunner) {
 300   L->startObservation();
 301   size_t CurrentFeature = 0;
 302   for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature)
 303     L->logTensorValue(CurrentFeature,
 304                       reinterpret_cast<const char *>(
 305                           ModelRunner.getTensorUntyped(CurrentFeature)));
 306
 307   if (MUTR)
 308     for (size_t I = 0; I < MUTR->extraOutputsForLoggingSpecs().size(); ++I) {
 309       const char *RawData =
 310           reinterpret_cast<const char *>(MUTR->getUntypedExtraOutputValue(I));
 311       L->logTensorValue(CurrentFeature, RawData);
 312       ++CurrentFeature;
 313     }
 314
 315   assert(CurrentFeature == DefaultDecisionPos);
 316   L->logTensorValue(DefaultDecisionPos,
 317                     reinterpret_cast<const char *>(&Event.DefaultDecision));
 318   L->logTensorValue(DecisionPos,
 319                     reinterpret_cast<const char *>(&Event.AdvisedDecision));
 320   L->endObservation();
 321   if (InlineSizeEstimatorAnalysis::isEvaluatorRequested())
 322     L->logReward(Event.Reward);
 323
 324   // For debugging / later use
 325   Effects.push_back(Event.Effect);
 326 }
 327
 328 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
 329     Module &M, ModuleAnalysisManager &MAM,
 330     std::unique_ptr<MLModelRunner> ModelRunner,
 331     std::function<bool(CallBase &)> GetDefaultAdvice,
 332     std::unique_ptr<TrainingLogger> Logger)
 333     : MLInlineAdvisor(M, MAM, std::move(ModelRunner), GetDefaultAdvice),
 334       IsDoingInference(isa<ModelUnderTrainingRunner>(getModelRunner())),
 335       Logger(std::move(Logger)),
 336       InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0),
 337       CurrentNativeSize(InitialNativeSize) {
 338   // We cannot have the case of neither inference nor logging.
 339   assert(IsDoingInference || isLogging());
 340 }
 341
 342 std::optional<size_t>
 343 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const {
 344   if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested())
 345     return std::nullopt;
 346   auto &R =
 347       FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F));
 348   if (!R) {
 349     F.getParent()->getContext().emitError(
 350         "Native size estimator is not present.");
 351     return 0;
 352   }
 353   return *R;
 354 }
 355
 356 std::unique_ptr<MLInlineAdvice>
 357 DevelopmentModeMLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) {
 358   return std::make_unique<LoggingMLInlineAdvice>(
 359       /*Advisor=*/this,
 360       /*CB=*/CB, /*ORE=*/getCallerORE(CB), /*Recommendation=*/true,
 361       /*Logger=*/*Logger,
 362       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
 363       /*CalleeSizeEstimateBefore=*/
 364       getNativeSizeEstimate(*CB.getCalledFunction()),
 365       /*DefaultDecision=*/true, /*Mandatory*/ true);
 366 }
 367
 368 std::unique_ptr<MLInlineAdvice>
 369 DevelopmentModeMLInlineAdvisor::getAdviceFromModel(
 370     CallBase &CB, OptimizationRemarkEmitter &ORE) {
 371   if (IsDoingInference && !isLogging())
 372     return MLInlineAdvisor::getAdviceFromModel(CB, ORE);
 373
 374   bool DefaultAdvice = GetDefaultAdvice(CB);
 375   auto Recommendation =
 376       IsDoingInference ? static_cast<bool>(ModelRunner->evaluate<int64_t>())
 377                        : DefaultAdvice;
 378   return std::make_unique<LoggingMLInlineAdvice>(
 379       /*Advisor=*/this,
 380       /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation,
 381       /*Logger=*/*Logger,
 382       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
 383       /*CalleeSizeEstimateBefore=*/
 384       getNativeSizeEstimate(*CB.getCalledFunction()),
 385       /*DefaultDecision=*/DefaultAdvice);
 386 }
 387
 388 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() {
 389   if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested())
 390     return 0;
 391   size_t Ret = 0;
 392   for (auto &F : M) {
 393     if (F.isDeclaration())
 394       continue;
 395     Ret += *getNativeSizeEstimate(F);
 396   }
 397   return Ret;
 398 }
 399
 400 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor(
 401     Module &M, ModuleAnalysisManager &MAM,
 402     std::function<bool(CallBase &)> GetDefaultAdvice) {
 403   auto &Ctx = M.getContext();
 404   std::unique_ptr<MLModelRunner> Runner;
 405   if (TFModelUnderTrainingPath.empty())
 406     Runner.reset(new NoInferenceModelRunner(Ctx, getInputFeatures()));
 407   else
 408     Runner = ModelUnderTrainingRunner::createAndEnsureValid(
 409         Ctx, TFModelUnderTrainingPath, DecisionName, getInputFeatures(),
 410         TFOutputSpecOverride);
 411   if (!Runner)
 412     return nullptr;
 413   std::unique_ptr<TrainingLogger> Logger;
 414   if (!TrainingLog.empty())
 415     Logger = std::make_unique<TrainingLogger>(
 416         TrainingLog, dyn_cast<ModelUnderTrainingRunner>(Runner.get()));
 417
 418   return std::make_unique<DevelopmentModeMLInlineAdvisor>(
 419       M, MAM, std::move(Runner), GetDefaultAdvice, std::move(Logger));
 420 }
 421 #endif // defined(LLVM_HAVE_TFLITE)