llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp

   1 //===- MLRegAllocEvictAdvisor.cpp - ML eviction advisor -------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // Implementation of the ML eviction advisor and reward injection pass
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "AllocationOrder.h"
  14 #include "RegAllocEvictionAdvisor.h"
  15 #include "RegAllocGreedy.h"
  16 #include "llvm/Analysis/InteractiveModelRunner.h"
  17 #include "llvm/Analysis/MLModelRunner.h"
  18 #include "llvm/Analysis/TensorSpec.h"
  19 #if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TFLITE)
  20 #include "llvm/Analysis/ModelUnderTrainingRunner.h"
  21 #include "llvm/Analysis/NoInferenceModelRunner.h"
  22 #include "llvm/Analysis/Utils/TrainingLogger.h"
  23 #endif
  24 #include "MLRegAllocEvictAdvisor.h"
  25 #include "llvm/Analysis/ReleaseModeModelRunner.h"
  26 #include "llvm/CodeGen/CalcSpillWeights.h"
  27 #include "llvm/CodeGen/LiveRegMatrix.h"
  28 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
  29 #include "llvm/CodeGen/MachineFunction.h"
  30 #include "llvm/CodeGen/MachineLoopInfo.h"
  31 #include "llvm/CodeGen/MachineRegisterInfo.h"
  32 #include "llvm/CodeGen/Passes.h"
  33 #include "llvm/CodeGen/RegisterClassInfo.h"
  34 #include "llvm/CodeGen/VirtRegMap.h"
  35 #include "llvm/IR/Module.h"
  36 #include "llvm/InitializePasses.h"
  37 #include "llvm/Pass.h"
  38 #include "llvm/PassRegistry.h"
  39 #include "llvm/Support/CommandLine.h"
  40 #include "llvm/Support/ErrorHandling.h"
  41
  42 #include <array>
  43 #include <bitset>
  44 #include <memory>
  45 #include <unordered_map>
  46
  47 using namespace llvm;
  48
  49 #define DEBUG_TYPE "ml-regalloc"
  50
  51 // Generated header in release (AOT) mode
  52 #if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL)
  53 #include "RegAllocEvictModel.h"
  54 using CompiledModelType = RegAllocEvictModel;
  55 #else
  56 using CompiledModelType = NoopSavedModelImpl;
  57 #endif
  58
  59 static cl::opt<std::string> InteractiveChannelBaseName(
  60     "regalloc-evict-interactive-channel-base", cl::Hidden,
  61     cl::desc(
  62         "Base file path for the interactive mode. The incoming filename should "
  63         "have the name <regalloc-evict-interactive-channel-base>.in, while the "
  64         "outgoing name should be "
  65         "<regalloc-evict-interactive-channel-base>.out"));
  66
  67 static cl::opt<unsigned> MaxEvictionCount(
  68     "mlregalloc-max-eviction-count", cl::Hidden,
  69     cl::desc("The maximum number of times a live range can be "
  70              "evicted before preventing it from being evicted"),
  71     cl::init(100));
  72
  73 // Options that only make sense in development mode
  74 #ifdef LLVM_HAVE_TFLITE
  75 #include "RegAllocScore.h"
  76 #include "llvm/Analysis/Utils/TFUtils.h"
  77
  78 static cl::opt<std::string> TrainingLog(
  79     "regalloc-training-log", cl::Hidden,
  80     cl::desc("Training log for the register allocator eviction model"));
  81
  82 static cl::opt<std::string> ModelUnderTraining(
  83     "regalloc-model", cl::Hidden,
  84     cl::desc("The model being trained for register allocation eviction"));
  85
  86 static cl::opt<bool> EnableDevelopmentFeatures(
  87     "regalloc-enable-development-features", cl::Hidden,
  88     cl::desc("Whether or not to enable features under development for the ML "
  89              "regalloc advisor"));
  90
  91 #else
  92 static const bool EnableDevelopmentFeatures = false;
  93 #endif // #ifdef LLVM_HAVE_TFLITE
  94
  95 /// The score injection pass.
  96 /// This pass calculates the score for a function and inserts it in the log, but
  97 /// this happens only in development mode. It's a no-op otherwise.
  98 namespace llvm {
  99 extern cl::opt<unsigned> EvictInterferenceCutoff;
 100
 101 class RegAllocScoring : public MachineFunctionPass {
 102 public:
 103   static char ID;
 104
 105   RegAllocScoring() : MachineFunctionPass(ID) {
 106     initializeRegAllocScoringPass(*PassRegistry::getPassRegistry());
 107   }
 108
 109   ~RegAllocScoring() override = default;
 110
 111   StringRef getPassName() const override {
 112     return "Register Allocation Pass Scoring";
 113   }
 114
 115   /// RegAllocReward analysis usage.
 116   void getAnalysisUsage(AnalysisUsage &AU) const override {
 117     AU.setPreservesAll();
 118     AU.addRequired<RegAllocEvictionAdvisorAnalysis>();
 119     AU.addRequired<RegAllocPriorityAdvisorAnalysis>();
 120     AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
 121     MachineFunctionPass::getAnalysisUsage(AU);
 122   }
 123
 124   /// Performs this pass
 125   bool runOnMachineFunction(MachineFunction &) override;
 126 };
 127
 128 char RegAllocScoring::ID = 0;
 129 FunctionPass *createRegAllocScoringPass() { return new RegAllocScoring(); }
 130
 131 } // namespace llvm
 132
 133 INITIALIZE_PASS(RegAllocScoring, "regallocscoringpass",
 134                 "Register Allocation Scoring Pass", false, false)
 135
 136 // ===================================
 137 // Common ML Advisor declarations
 138 // ===================================
 139 namespace {
 140 // The model can only accept a specified number of opcodes and will error it if
 141 // fed an opcode it hasn't seen before. This constant sets the current cutoff.
 142 static const int OpcodeValueCutoff = 17716;
 143
 144 // Most features are as described above, so we'll reuse this vector in defining
 145 // them.
 146 static const std::vector<int64_t> PerLiveRangeShape{1, NumberOfInterferences};
 147
 148 // --------------
 149 // Features table
 150 // --------------
 151 // For each interfering live range (incl. the candidate) we collect a number of
 152 // features. However, because the features are of different types (and because
 153 // of ML best practices), we organize the tensors per feature, not per
 154 // candidate. Each such tensor has a scalar value corresponding to the
 155 // interferring live range at that position, in the order in AllocationOrder.
 156 // The last position corresponds to the virt reg seeking allocation.
 157 // Exception to all that is the progression feature, which is just a scalar (see
 158 // its documentation for details).
 159 // Note on naming: the "_by_max" are normalized using the largest value of that
 160 // tensor, as observed in the current decision making stage (i.e. for the
 161 // current call to the advisor's tryFindEvictionCandidate)
 162 //
 163 // The feature list format: type, name, shape, documentation.
 164 // Note: we can really just use int64 and float, hence the modeling of some
 165 // bools as int64 values.
 166 #define RA_EVICT_FEATURES_LIST(M)                                              \
 167   M(int64_t, mask, PerLiveRangeShape,                                          \
 168     "boolean values, 0 for unavailable candidates (i.e. if a position is 0, "  \
 169     "it "                                                                      \
 170     "can't be evicted)")                                                       \
 171   M(int64_t, is_free, PerLiveRangeShape,                                       \
 172     "boolean values, 1 if this phys reg is actually free (no interferences)")  \
 173   M(float, nr_urgent, PerLiveRangeShape,                                       \
 174     "number of 'urgent' intervals, normalized. Urgent are those that are OK "  \
 175     "to break cascades")                                                       \
 176   M(float, nr_broken_hints, PerLiveRangeShape,                                 \
 177     "if this position were evicted, how many broken hints would there be")     \
 178   M(int64_t, is_hint, PerLiveRangeShape,                                       \
 179     "is this a preferred phys reg for the candidate")                          \
 180   M(int64_t, is_local, PerLiveRangeShape,                                      \
 181     "is this live range local to a basic block")                               \
 182   M(float, nr_rematerializable, PerLiveRangeShape,                             \
 183     "nr rematerializable ranges")                                              \
 184   M(float, nr_defs_and_uses, PerLiveRangeShape,                                \
 185     "bb freq - weighed nr defs and uses")                                      \
 186   M(float, weighed_reads_by_max, PerLiveRangeShape,                            \
 187     "bb freq - weighed nr of reads, normalized")                               \
 188   M(float, weighed_writes_by_max, PerLiveRangeShape,                           \
 189     "bb feq - weighed nr of writes, normalized")                               \
 190   M(float, weighed_read_writes_by_max, PerLiveRangeShape,                      \
 191     "bb freq - weighed nr of uses that are both read and writes, normalized")  \
 192   M(float, weighed_indvars_by_max, PerLiveRangeShape,                          \
 193     "bb freq - weighed nr of uses that are indvars, normalized")               \
 194   M(float, hint_weights_by_max, PerLiveRangeShape,                             \
 195     "bb freq - weighed nr of uses that are hints, normalized")                 \
 196   M(float, start_bb_freq_by_max, PerLiveRangeShape,                            \
 197     "the freq in the start block, normalized")                                 \
 198   M(float, end_bb_freq_by_max, PerLiveRangeShape,                              \
 199     "freq of end block, normalized")                                           \
 200   M(float, hottest_bb_freq_by_max, PerLiveRangeShape,                          \
 201     "hottest BB freq, normalized")                                             \
 202   M(float, liverange_size, PerLiveRangeShape,                                  \
 203     "size (instr index diff) of the LR")                                       \
 204   M(float, use_def_density, PerLiveRangeShape,                                 \
 205     "the max weight, as computed by the manual heuristic")                     \
 206   M(int64_t, max_stage, PerLiveRangeShape,                                     \
 207     "largest stage of an interval in this LR")                                 \
 208   M(int64_t, min_stage, PerLiveRangeShape,                                     \
 209     "lowest stage of an interval in this LR")                                  \
 210   M(float, progress, {1}, "ratio of current queue size to initial size")
 211
 212 #ifdef LLVM_HAVE_TFLITE
 213 #define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M)                                  \
 214   M(int64_t, instructions, InstructionsShape,                                  \
 215     "Opcodes of the instructions covered by the eviction problem")
 216
 217 #define RA_EVICT_REST_DEVELOPMENT_FEATURES(M)                                  \
 218   M(int64_t, instructions_mapping, InstructionsMappingShape,                   \
 219     "A binary matrix mapping LRs to instruction opcodes")                      \
 220   M(float, mbb_frequencies, MBBFrequencyShape,                                 \
 221     "A vector of machine basic block frequencies")                             \
 222   M(int64_t, mbb_mapping, InstructionsShape,                                   \
 223     "A vector of indices mapping instructions to MBBs")
 224 #else
 225 #define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M)
 226 #define RA_EVICT_REST_DEVELOPMENT_FEATURES(M)
 227 #endif
 228
 229 // The model learns to pick one of the mask == 1 interferences. This is the
 230 // name of the output tensor. The contract with the model is that the output
 231 // will be guaranteed to be to a mask == 1 position. Using a macro here to
 232 // avoid 'not used' warnings (and keep cond compilation to a minimum)
 233 #define DecisionName "index_to_evict"
 234 static const TensorSpec DecisionSpec =
 235     TensorSpec::createSpec<int64_t>(DecisionName, {1});
 236
 237 // Named features index.
 238 enum FeatureIDs {
 239 #define _FEATURE_IDX_SIMPLE(_, name, __, ___) name
 240 #define _FEATURE_IDX(A, B, C, D) _FEATURE_IDX_SIMPLE(A, B, C, D),
 241   RA_EVICT_FEATURES_LIST(_FEATURE_IDX) FeatureCount,
 242 #ifdef LLVM_HAVE_TFLITE
 243   RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_FEATURE_IDX_SIMPLE) = FeatureCount,
 244 #else
 245   RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_FEATURE_IDX)
 246 #endif // #ifdef LLVM_HAVE_TFLITE
 247   RA_EVICT_REST_DEVELOPMENT_FEATURES(_FEATURE_IDX) FeaturesWithDevelopmentCount
 248 #undef _FEATURE_IDX
 249 #undef _FEATURE_IDX_SIMPLE
 250 };
 251
 252 // The ML advisor will typically have a sparse input to the evaluator, because
 253 // various phys regs won't be available. It's easier (maintenance-wise) to
 254 // bulk-reset the state of the evaluator each time we are about to use it
 255 // again.
 256 template <typename T> size_t getTotalSize(const std::vector<int64_t> &Shape) {
 257   size_t Ret = sizeof(T);
 258   for (const auto V : Shape)
 259     Ret *= V;
 260   return Ret;
 261 }
 262
 263 void resetInputs(MLModelRunner &Runner) {
 264 #define _RESET(TYPE, NAME, SHAPE, __)                                          \
 265   std::memset(Runner.getTensorUntyped(FeatureIDs::NAME), 0,                    \
 266               getTotalSize<TYPE>(SHAPE));
 267   RA_EVICT_FEATURES_LIST(_RESET)
 268   if (EnableDevelopmentFeatures) {
 269     RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_RESET)
 270     RA_EVICT_REST_DEVELOPMENT_FEATURES(_RESET)
 271 #undef _RESET
 272   }
 273 }
 274
 275 // Per-live interval components that get aggregated into the feature values
 276 // that will be passed to the evaluator.
 277 struct LIFeatureComponents {
 278   double R = 0;
 279   double W = 0;
 280   double RW = 0;
 281   double IndVarUpdates = 0;
 282   double HintWeights = 0.0;
 283   int64_t NumDefsAndUses = 0;
 284   float HottestBlockFreq = 0.0;
 285   bool IsRemat = false;
 286 };
 287
 288 using CandidateRegList =
 289     std::array<std::pair<MCRegister, bool>, NumberOfInterferences>;
 290 using FeaturesListNormalizer =
 291     llvm::SmallVector<float, FeatureIDs::FeatureCount>;
 292
 293 /// The ML evictor (commonalities between release and development mode)
 294 class MLEvictAdvisor : public RegAllocEvictionAdvisor {
 295 public:
 296   MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
 297                  MLModelRunner *Runner, const MachineBlockFrequencyInfo &MBFI,
 298                  const MachineLoopInfo &Loops);
 299
 300 protected:
 301   const RegAllocEvictionAdvisor &getDefaultAdvisor() const {
 302     return static_cast<const RegAllocEvictionAdvisor &>(DefaultAdvisor);
 303   }
 304
 305   // The assumption is that if the Runner could not be constructed, we emit-ed
 306   // error, and we shouldn't be asking for it here.
 307   const MLModelRunner &getRunner() const { return *Runner; }
 308
 309   /// This just calls Evaluate on the Runner, but in the development mode
 310   /// case, if we're just capturing the log of the default advisor, it needs
 311   /// to call the latter instead, so we need to pass all the necessary
 312   /// parameters for it. In the development case, it will also log.
 313   virtual int64_t
 314   tryFindEvictionCandidatePosition(const LiveInterval &VirtReg,
 315                                    const AllocationOrder &Order,
 316                                    unsigned OrderLimit, uint8_t CostPerUseLimit,
 317                                    const SmallVirtRegSet &FixedRegisters) const;
 318
 319   /// Load the features of the given VirtReg (allocated or not) at column Pos,
 320   /// but if  that can't be evicted, return false instead.
 321   bool
 322   loadInterferenceFeatures(const LiveInterval &VirtReg, MCRegister PhysReg,
 323                            bool IsHint, const SmallVirtRegSet &FixedRegisters,
 324                            llvm::SmallVectorImpl<float> &Largest, size_t Pos,
 325                            SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const;
 326
 327 private:
 328   static float getInitialQueueSize(const MachineFunction &MF);
 329
 330   MCRegister tryFindEvictionCandidate(
 331       const LiveInterval &VirtReg, const AllocationOrder &Order,
 332       uint8_t CostPerUseLimit,
 333       const SmallVirtRegSet &FixedRegisters) const override;
 334
 335   void extractFeatures(const SmallVectorImpl<const LiveInterval *> &Intervals,
 336                        llvm::SmallVectorImpl<float> &Largest, size_t Pos,
 337                        int64_t IsHint, int64_t LocalIntfsCount, float NumUrgent,
 338                        SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const;
 339
 340   // Point-in-time: we didn't learn this, so we always delegate to the
 341   // default.
 342   bool canEvictHintInterference(
 343       const LiveInterval &VirtReg, MCRegister PhysReg,
 344       const SmallVirtRegSet &FixedRegisters) const override {
 345     return getDefaultAdvisor().canEvictHintInterference(VirtReg, PhysReg,
 346                                                         FixedRegisters);
 347   }
 348
 349   const LIFeatureComponents &
 350   getLIFeatureComponents(const LiveInterval &LI) const;
 351
 352   // Hold on to a default advisor for:
 353   // 1) the implementation of canEvictHintInterference, because we didn't
 354   // learn that nuance yet; 2) for bootstrapping (logging) in the development
 355   // mode case.
 356   const DefaultEvictionAdvisor DefaultAdvisor;
 357   MLModelRunner *const Runner;
 358   const MachineBlockFrequencyInfo &MBFI;
 359   const MachineLoopInfo &Loops;
 360
 361   // Indices of those features we don't want to normalize.
 362   // This could be static and shared, but its initialization is non-trivial.
 363   std::bitset<FeatureIDs::FeatureCount> DoNotNormalize;
 364   const float InitialQSize;
 365
 366   using RegID = unsigned;
 367   mutable DenseMap<RegID, LIFeatureComponents> CachedFeatures;
 368
 369   mutable std::unordered_map<unsigned, unsigned> VirtRegEvictionCounts;
 370
 371   void onEviction(Register RegBeingEvicted) const {
 372     // If we cannot find the virtual register in the map, we just assume it has
 373     // not been evicted before and thus has a value of zero (which is what the
 374     // subscript operator returns by default).
 375     ++VirtRegEvictionCounts[RegBeingEvicted.id()];
 376   }
 377
 378   unsigned getEvictionCount(Register Reg) const {
 379     auto EvictionCountIt = VirtRegEvictionCounts.find(Reg.id());
 380     if (EvictionCountIt != VirtRegEvictionCounts.end())
 381       return EvictionCountIt->second;
 382     return 0;
 383   }
 384 };
 385
 386 #define _DECL_FEATURES(type, name, shape, _)                                   \
 387   TensorSpec::createSpec<type>(#name, shape),
 388
 389 // ===================================
 390 // Release (AOT) - specifics
 391 // ===================================
 392 class ReleaseModeEvictionAdvisorAnalysis final
 393     : public RegAllocEvictionAdvisorAnalysis {
 394 public:
 395   ReleaseModeEvictionAdvisorAnalysis()
 396       : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Release) {
 397     if (EnableDevelopmentFeatures) {
 398       InputFeatures = {RA_EVICT_FEATURES_LIST(
 399           _DECL_FEATURES) RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_FEATURES)
 400                            RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_FEATURES)};
 401     } else {
 402       InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)};
 403     }
 404   }
 405   // support for isa<> and dyn_cast.
 406   static bool classof(const RegAllocEvictionAdvisorAnalysis *R) {
 407     return R->getAdvisorMode() == AdvisorMode::Release;
 408   }
 409
 410 private:
 411   std::vector<TensorSpec> InputFeatures;
 412
 413   void getAnalysisUsage(AnalysisUsage &AU) const override {
 414     AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
 415     AU.addRequired<MachineLoopInfoWrapperPass>();
 416     RegAllocEvictionAdvisorAnalysis::getAnalysisUsage(AU);
 417   }
 418
 419   std::unique_ptr<RegAllocEvictionAdvisor>
 420   getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
 421     if (!Runner) {
 422       if (InteractiveChannelBaseName.empty())
 423         Runner = std::make_unique<ReleaseModeModelRunner<CompiledModelType>>(
 424             MF.getFunction().getContext(), InputFeatures, DecisionName);
 425       else
 426         Runner = std::make_unique<InteractiveModelRunner>(
 427             MF.getFunction().getContext(), InputFeatures, DecisionSpec,
 428             InteractiveChannelBaseName + ".out",
 429             InteractiveChannelBaseName + ".in");
 430     }
 431     return std::make_unique<MLEvictAdvisor>(
 432         MF, RA, Runner.get(),
 433         getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(),
 434         getAnalysis<MachineLoopInfoWrapperPass>().getLI());
 435   }
 436   std::unique_ptr<MLModelRunner> Runner;
 437 };
 438
 439 // ===================================
 440 // Development mode-specifics
 441 // ===================================
 442 //
 443 // Features we log
 444 #ifdef LLVM_HAVE_TFLITE
 445 static const TensorSpec Reward = TensorSpec::createSpec<float>("reward", {1});
 446
 447 // Features we bind on the model. The tensor names have a prefix, and we also
 448 // need to include some tensors that are expected to be present by the
 449 // training algo.
 450 // TODO: can we just get rid of these?
 451 #define _DECL_TRAIN_FEATURES(type, name, shape, _)                             \
 452   TensorSpec::createSpec<type>(std::string("action_") + #name, shape),
 453
 454 class DevelopmentModeEvictAdvisor : public MLEvictAdvisor {
 455 public:
 456   DevelopmentModeEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
 457                               MLModelRunner *Runner,
 458                               const MachineBlockFrequencyInfo &MBFI,
 459                               const MachineLoopInfo &Loops, Logger *Log)
 460       : MLEvictAdvisor(MF, RA, Runner, MBFI, Loops), Log(Log) {}
 461
 462 private:
 463   int64_t tryFindEvictionCandidatePosition(
 464       const LiveInterval &VirtReg, const AllocationOrder &Order,
 465       unsigned OrderLimit, uint8_t CostPerUseLimit,
 466       const SmallVirtRegSet &FixedRegisters) const override;
 467
 468   Logger *const Log;
 469 };
 470
 471 class DevelopmentModeEvictionAdvisorAnalysis final
 472     : public RegAllocEvictionAdvisorAnalysis {
 473 public:
 474   DevelopmentModeEvictionAdvisorAnalysis()
 475       : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Development) {
 476     if (EnableDevelopmentFeatures) {
 477       InputFeatures = {RA_EVICT_FEATURES_LIST(
 478           _DECL_FEATURES) RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_FEATURES)
 479                            RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_FEATURES)};
 480       TrainingInputFeatures = {
 481           RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES)
 482               RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_TRAIN_FEATURES)
 483                   RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_TRAIN_FEATURES)
 484                       TensorSpec::createSpec<float>("action_discount", {1}),
 485           TensorSpec::createSpec<int32_t>("action_step_type", {1}),
 486           TensorSpec::createSpec<float>("action_reward", {1})};
 487     } else {
 488       InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)};
 489       TrainingInputFeatures = {
 490           RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES)
 491               TensorSpec::createSpec<float>("action_discount", {1}),
 492           TensorSpec::createSpec<int32_t>("action_step_type", {1}),
 493           TensorSpec::createSpec<float>("action_reward", {1})};
 494     }
 495   }
 496   // support for isa<> and dyn_cast.
 497   static bool classof(const RegAllocEvictionAdvisorAnalysis *R) {
 498     return R->getAdvisorMode() == AdvisorMode::Development;
 499   }
 500
 501   void logRewardIfNeeded(const MachineFunction &MF,
 502                          llvm::function_ref<float()> GetReward) override {
 503     if (!Log || !Log->hasAnyObservationForContext(MF.getName()))
 504       return;
 505     // The function pass manager would run all the function passes for a
 506     // function, so we assume the last context belongs to this function. If
 507     // this invariant ever changes, we can implement at that time switching
 508     // contexts. At this point, it'd be an error
 509     if (Log->currentContext() != MF.getName()) {
 510       MF.getFunction().getContext().emitError(
 511           "The training log context shouldn't have had changed.");
 512     }
 513     if (Log->hasObservationInProgress())
 514       Log->logReward<float>(GetReward());
 515   }
 516
 517 private:
 518   std::vector<TensorSpec> InputFeatures;
 519   std::vector<TensorSpec> TrainingInputFeatures;
 520
 521   void getAnalysisUsage(AnalysisUsage &AU) const override {
 522     AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
 523     AU.addRequired<MachineLoopInfoWrapperPass>();
 524     RegAllocEvictionAdvisorAnalysis::getAnalysisUsage(AU);
 525   }
 526
 527   bool doInitialization(Module &M) override {
 528     LLVMContext &Ctx = M.getContext();
 529     if (ModelUnderTraining.empty() && TrainingLog.empty()) {
 530       Ctx.emitError("Regalloc development mode should be requested with at "
 531                     "least logging enabled and/or a training model");
 532       return false;
 533     }
 534     if (ModelUnderTraining.empty())
 535       Runner = std::make_unique<NoInferenceModelRunner>(Ctx, InputFeatures);
 536     else
 537       Runner = ModelUnderTrainingRunner::createAndEnsureValid(
 538           Ctx, ModelUnderTraining, DecisionName, TrainingInputFeatures);
 539     if (!Runner) {
 540       Ctx.emitError("Regalloc: could not set up the model runner");
 541       return false;
 542     }
 543     if (TrainingLog.empty())
 544       return false;
 545     std::error_code EC;
 546     auto OS = std::make_unique<raw_fd_ostream>(TrainingLog, EC);
 547     if (EC) {
 548       M.getContext().emitError(EC.message() + ":" + TrainingLog);
 549       return false;
 550     }
 551     std::vector<TensorSpec> LFS = InputFeatures;
 552     if (auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(Runner.get()))
 553       append_range(LFS, MUTR->extraOutputsForLoggingSpecs());
 554     // We always log the output; in particular, if we're not evaluating, we
 555     // don't have an output spec json file. That's why we handle the
 556     // 'normal' output separately.
 557     LFS.push_back(DecisionSpec);
 558
 559     Log = std::make_unique<Logger>(std::move(OS), LFS, Reward,
 560                                    /*IncludeReward*/ true);
 561     return false;
 562   }
 563
 564   std::unique_ptr<RegAllocEvictionAdvisor>
 565   getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
 566     if (!Runner)
 567       return nullptr;
 568     if (Log)
 569       Log->switchContext(MF.getName());
 570     return std::make_unique<DevelopmentModeEvictAdvisor>(
 571         MF, RA, Runner.get(),
 572         getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(),
 573         getAnalysis<MachineLoopInfoWrapperPass>().getLI(), Log.get());
 574   }
 575
 576   std::unique_ptr<MLModelRunner> Runner;
 577   std::unique_ptr<Logger> Log;
 578 };
 579
 580 #endif // #ifdef LLVM_HAVE_TFLITE
 581 } // namespace
 582
 583 float MLEvictAdvisor::getInitialQueueSize(const MachineFunction &MF) {
 584   auto &MRI = MF.getRegInfo();
 585   unsigned NumUsedRegs = 0;
 586   for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
 587     Register Reg = Register::index2VirtReg(I);
 588     if (!MRI.reg_nodbg_empty(Reg))
 589       ++NumUsedRegs;
 590   }
 591   return static_cast<float>(NumUsedRegs);
 592 }
 593
 594 MLEvictAdvisor::MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
 595                                MLModelRunner *Runner,
 596                                const MachineBlockFrequencyInfo &MBFI,
 597                                const MachineLoopInfo &Loops)
 598     : RegAllocEvictionAdvisor(MF, RA), DefaultAdvisor(MF, RA),
 599       Runner(std::move(Runner)), MBFI(MBFI), Loops(Loops),
 600       InitialQSize(MLEvictAdvisor::getInitialQueueSize(MF)) {
 601   assert(this->Runner);
 602   Runner->switchContext(MF.getName());
 603   DoNotNormalize.set(FeatureIDs::mask);
 604   DoNotNormalize.set(FeatureIDs::is_free);
 605   DoNotNormalize.set(FeatureIDs::is_hint);
 606   DoNotNormalize.set(FeatureIDs::is_local);
 607   DoNotNormalize.set(FeatureIDs::min_stage);
 608   DoNotNormalize.set(FeatureIDs::max_stage);
 609   DoNotNormalize.set(FeatureIDs::progress);
 610 }
 611
 612 int64_t MLEvictAdvisor::tryFindEvictionCandidatePosition(
 613     const LiveInterval &, const AllocationOrder &, unsigned, uint8_t,
 614     const SmallVirtRegSet &) const {
 615   int64_t Ret = Runner->evaluate<int64_t>();
 616   assert(Ret >= 0);
 617   assert(Ret <= CandidateVirtRegPos);
 618   return Ret;
 619 }
 620
 621 bool MLEvictAdvisor::loadInterferenceFeatures(
 622     const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
 623     const SmallVirtRegSet &FixedRegisters,
 624     llvm::SmallVectorImpl<float> &Largest, size_t Pos,
 625     llvm::SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const {
 626   // It is only possible to evict virtual register interference.
 627   if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) {
 628     // leave unavailable
 629     return false;
 630   }
 631
 632   const bool IsLocal = LIS->intervalIsInOneMBB(VirtReg);
 633   int64_t LocalIntfs = 0;
 634   float NumUrgent = 0.0f;
 635
 636   // The cascade tracking is the same as in the default advisor
 637   unsigned Cascade = RA.getExtraInfo().getCascadeOrCurrentNext(VirtReg.reg());
 638
 639   SmallVector<const LiveInterval *, MaxInterferences> InterferingIntervals;
 640   for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
 641     LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
 642     // Different from the default heuristic, we don't make any assumptions
 643     // about what having more than 10 results in the query may mean.
 644     const auto &IFIntervals = Q.interferingVRegs(EvictInterferenceCutoff);
 645     if (IFIntervals.empty() && InterferingIntervals.empty())
 646       continue;
 647     if (IFIntervals.size() >= EvictInterferenceCutoff)
 648       return false;
 649     InterferingIntervals.append(IFIntervals.begin(), IFIntervals.end());
 650     for (const LiveInterval *Intf : reverse(IFIntervals)) {
 651       assert(Intf->reg().isVirtual() &&
 652              "Only expecting virtual register interference from query");
 653       // This is the same set of legality checks as in the default case: don't
 654       // try to evict fixed regs or 'done' ones. Also don't break cascades,
 655       // except in the urgent case, with the same nuances used in the default
 656       // heuristic.
 657       // We could try sharing this between the advisors, but it may end up
 658       // more complex than it is right now.
 659       if (FixedRegisters.count(Intf->reg()))
 660         return false;
 661       if (RA.getExtraInfo().getStage(*Intf) == RS_Done)
 662         return false;
 663       bool Urgent =
 664           !VirtReg.isSpillable() &&
 665           (Intf->isSpillable() ||
 666            RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg())) <
 667                RegClassInfo.getNumAllocatableRegs(
 668                    MRI->getRegClass(Intf->reg())));
 669
 670       unsigned IntfCascade = RA.getExtraInfo().getCascade(Intf->reg());
 671       // There is a potential that the model could be adversarial and
 672       // continually evict live ranges over and over again, leading to a
 673       // large amount of compile time being spent in regalloc. If we hit the
 674       // threshold, prevent the range from being evicted. We still let the
 675       // range through if it is urgent as we are required to produce an
 676       // eviction if the candidate is not spillable.
 677       if (getEvictionCount(Intf->reg()) > MaxEvictionCount && !Urgent)
 678         return false;
 679
 680       // Only evict older cascades or live ranges without a cascade.
 681       if (Cascade <= IntfCascade) {
 682         if (!Urgent)
 683           return false;
 684         ++NumUrgent;
 685       }
 686
 687       LocalIntfs += (IsLocal && LIS->intervalIsInOneMBB(*Intf) &&
 688                      (!EnableLocalReassign || !canReassign(*Intf, PhysReg)));
 689     }
 690   }
 691   // OK, so if we made it this far, this LR is an eviction candidate, load its
 692   // features.
 693   extractFeatures(InterferingIntervals, Largest, Pos, IsHint, LocalIntfs,
 694                   NumUrgent, LRPosInfo);
 695   return true;
 696 }
 697
 698 MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
 699     const LiveInterval &VirtReg, const AllocationOrder &Order,
 700     uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const {
 701   auto MaybeOrderLimit = getOrderLimit(VirtReg, Order, CostPerUseLimit);
 702   if (!MaybeOrderLimit)
 703     return MCRegister::NoRegister;
 704   unsigned OrderLimit = *MaybeOrderLimit;
 705
 706   // The heuristic sets initial costs such as, if CostPerUseLimit is
 707   // max<uint8_t>, then any of the costs of the legally-evictable intervals
 708   // would be lower. When that happens, one of those will be selected.
 709   // Therefore, we allow the candidate be selected, unless the candidate is
 710   // unspillable, in which case it would be incorrect to not find a register
 711   // for it.
 712   const bool MustFindEviction =
 713       (!VirtReg.isSpillable() && CostPerUseLimit == static_cast<uint8_t>(~0u));
 714   // Number of available candidates - if 0, no need to continue.
 715   size_t Available = 0;
 716   // Make sure we don't have leftover partial state from an attempt where we
 717   // had no available candidates and bailed out early.
 718   resetInputs(*Runner);
 719
 720   // Track the index->register mapping because AllocationOrder doesn't do that
 721   // and we'd have to scan it.
 722   // Also track their mask, to write asserts/debug.
 723   CandidateRegList Regs;
 724   Regs.fill({0, false});
 725
 726   // Track the largest value of features seen during this eviction session. We
 727   // only normalize (some of) the float features, but it's just simpler to
 728   // dimension 'Largest' to all the features, especially since we have the
 729   // 'DoNotNormalize' list.
 730   FeaturesListNormalizer Largest(FeatureIDs::FeatureCount, 0.0);
 731
 732   // Same overal idea as in the default eviction policy - we visit the values
 733   // of AllocationOrder one at a time. If it's not legally available, we mask
 734   // off the corresponding feature column (==do nothing because we already
 735   // reset all the features to 0) Use Pos to capture the column we load
 736   // features at - in AllocationOrder order.
 737   size_t Pos = 0;
 738   SmallVector<LRStartEndInfo, NumberOfInterferences> LRPosInfo;
 739   for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E;
 740        ++I, ++Pos) {
 741     MCRegister PhysReg = *I;
 742     assert(!Regs[Pos].second);
 743     assert(PhysReg);
 744     if (!canAllocatePhysReg(CostPerUseLimit, PhysReg)) {
 745       continue;
 746     }
 747     if (loadInterferenceFeatures(VirtReg, PhysReg, I.isHint(), FixedRegisters,
 748                                  Largest, Pos, LRPosInfo)) {
 749       ++Available;
 750       Regs[Pos] = std::make_pair(PhysReg, true);
 751     }
 752   }
 753   if (Available == 0) {
 754     // Nothing to decide, nothing to learn.
 755     assert(!MustFindEviction);
 756     return MCRegister::NoRegister;
 757   }
 758   const size_t ValidPosLimit = Pos;
 759   // If we must find eviction, the candidate should be masked out of the
 760   // decision making process.
 761   Regs[CandidateVirtRegPos].second = !MustFindEviction;
 762   if (!MustFindEviction)
 763     extractFeatures(SmallVector<const LiveInterval *, 1>(1, &VirtReg), Largest,
 764                     CandidateVirtRegPos, /*IsHint*/ 0,
 765                     /*LocalIntfsCount*/ 0,
 766                     /*NumUrgent*/ 0.0, LRPosInfo);
 767   assert(InitialQSize > 0.0 && "We couldn't have gotten here if we had "
 768                                "nothing to allocate initially.");
 769 #ifdef LLVM_HAVE_TFLITE
 770   if (EnableDevelopmentFeatures) {
 771     extractInstructionFeatures(
 772         LRPosInfo, Runner,
 773         [this](SlotIndex InputIndex) -> int {
 774           auto *CurrentMachineInstruction =
 775               LIS->getInstructionFromIndex(InputIndex);
 776           if (!CurrentMachineInstruction) {
 777             return -1;
 778           }
 779           return CurrentMachineInstruction->getOpcode();
 780         },
 781         [this](SlotIndex InputIndex) -> float {
 782           auto *CurrentMachineInstruction =
 783               LIS->getInstructionFromIndex(InputIndex);
 784           return MBFI.getBlockFreqRelativeToEntryBlock(
 785               CurrentMachineInstruction->getParent());
 786         },
 787         [this](SlotIndex InputIndex) -> MachineBasicBlock * {
 788           auto *CurrentMachineInstruction =
 789               LIS->getInstructionFromIndex(InputIndex);
 790           return CurrentMachineInstruction->getParent();
 791         },
 792         FeatureIDs::instructions, FeatureIDs::instructions_mapping,
 793         FeatureIDs::mbb_frequencies, FeatureIDs::mbb_mapping,
 794         LIS->getSlotIndexes()->getLastIndex());
 795   }
 796 #endif // #ifdef LLVM_HAVE_TFLITE
 797   // Normalize the features.
 798   for (auto &V : Largest)
 799     V = V ? V : 1.0;
 800   for (size_t FeatureIndex = 0; FeatureIndex < FeatureIDs::FeatureCount;
 801        ++FeatureIndex) {
 802     if (DoNotNormalize.test(FeatureIndex))
 803       continue;
 804     for (size_t Pos = 0; Pos < NumberOfInterferences; ++Pos) {
 805       Runner->getTensor<float>(FeatureIndex)[Pos] /= Largest[FeatureIndex];
 806     }
 807   }
 808   *Runner->getTensor<float>(FeatureIDs::progress) =
 809       static_cast<float>(RA.getQueueSize()) / InitialQSize;
 810
 811   // Get a decision.
 812   size_t CandidatePos = tryFindEvictionCandidatePosition(
 813       VirtReg, Order, OrderLimit, CostPerUseLimit, FixedRegisters);
 814   // The contract with the ML side is that CandidatePos is mask == 1 (i.e.
 815   // Regs[CandidatePos].second)
 816   assert(Regs[CandidatePos].second);
 817   if (CandidatePos == CandidateVirtRegPos) {
 818     assert(!MustFindEviction);
 819     return MCRegister::NoRegister;
 820   }
 821   assert(CandidatePos < ValidPosLimit);
 822   (void)ValidPosLimit;
 823
 824   // Update information about how many times the virtual registers being
 825   // evicted have been evicted so that we can prevent the model from evicting
 826   // the same ranges continually and eating compile time.
 827   if (CandidatePos == CandidateVirtRegPos) {
 828     onEviction(VirtReg.reg());
 829   } else {
 830     for (MCRegUnit Unit : TRI->regunits(Regs[CandidatePos].first)) {
 831       LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
 832       const auto &IFIntervals = Q.interferingVRegs(EvictInterferenceCutoff);
 833       for (const LiveInterval *Intf : reverse(IFIntervals)) {
 834         onEviction(Intf->reg());
 835       }
 836     }
 837   }
 838
 839   return Regs[CandidatePos].first;
 840 }
 841
 842 const LIFeatureComponents &
 843 MLEvictAdvisor::getLIFeatureComponents(const LiveInterval &LI) const {
 844   RegID ID = LI.reg().id();
 845   LIFeatureComponents Empty;
 846   auto I = CachedFeatures.insert(std::make_pair(ID, Empty));
 847   LIFeatureComponents &Ret = I.first->getSecond();
 848   if (!I.second)
 849     return Ret;
 850
 851   SmallPtrSet<MachineInstr *, 8> Visited;
 852   const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
 853
 854   for (MachineRegisterInfo::reg_instr_nodbg_iterator
 855            I = MRI->reg_instr_nodbg_begin(LI.reg()),
 856            E = MRI->reg_instr_nodbg_end();
 857        I != E;) {
 858     MachineInstr *MI = &*(I++);
 859
 860     ++Ret.NumDefsAndUses;
 861     if (!Visited.insert(MI).second)
 862       continue;
 863
 864     if (MI->isIdentityCopy() || MI->isImplicitDef())
 865       continue;
 866
 867     bool Reads, Writes;
 868     std::tie(Reads, Writes) = MI->readsWritesVirtualRegister(LI.reg());
 869
 870     float Freq = MBFI.getBlockFreqRelativeToEntryBlock(MI->getParent());
 871     Ret.HottestBlockFreq = std::max(Freq, Ret.HottestBlockFreq);
 872
 873     Ret.R += (Reads && !Writes) * Freq;
 874     Ret.W += (!Reads && Writes) * Freq;
 875     Ret.RW += (Reads && Writes) * Freq;
 876
 877     auto *MBB = MI->getParent();
 878     auto *Loop = Loops.getLoopFor(MBB);
 879     bool IsExiting = Loop ? Loop->isLoopExiting(MBB) : false;
 880
 881     if (Writes && IsExiting && LIS->isLiveOutOfMBB(LI, MBB))
 882       Ret.IndVarUpdates += Freq;
 883
 884     if (MI->isCopy() && VirtRegAuxInfo::copyHint(MI, LI.reg(), TRI, *MRI))
 885       Ret.HintWeights += Freq;
 886   }
 887   Ret.IsRemat = VirtRegAuxInfo::isRematerializable(
 888       LI, *LIS, *VRM, *MF.getSubtarget().getInstrInfo());
 889   return Ret;
 890 }
 891
 892 // Overall, this currently mimics what we do for weight calculation, but instead
 893 // of accummulating the various features, we keep them separate.
 894 void MLEvictAdvisor::extractFeatures(
 895     const SmallVectorImpl<const LiveInterval *> &Intervals,
 896     llvm::SmallVectorImpl<float> &Largest, size_t Pos, int64_t IsHint,
 897     int64_t LocalIntfsCount, float NumUrgent,
 898     SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const {
 899   int64_t NumDefsAndUses = 0;
 900   int64_t NumBrokenHints = 0;
 901   double R = 0.0;
 902   double W = 0.0;
 903   double RW = 0.0;
 904   double IndVarUpdates = 0.0;
 905   double HintWeights = 0.0;
 906   float StartBBFreq = 0.0;
 907   float EndBBFreq = 0.0;
 908   float HottestBlockFreq = 0.0;
 909   int32_t NumRematerializable = 0;
 910   float TotalWeight = 0.0;
 911
 912   SlotIndex EndSI = LIS->getSlotIndexes()->getZeroIndex();
 913   SlotIndex StartSI = LIS->getSlotIndexes()->getLastIndex();
 914   int64_t MaxStage = 0;
 915   int64_t MinStage =
 916       Intervals.empty() ? 0 : std::numeric_limits<int64_t>::max();
 917
 918   for (const auto *L : Intervals) {
 919     const LiveInterval &LI = *L;
 920     MaxStage = std::max<int64_t>(
 921         MaxStage, static_cast<int64_t>(RA.getExtraInfo().getStage(LI)));
 922     MinStage = std::min<int64_t>(
 923         MinStage, static_cast<int64_t>(RA.getExtraInfo().getStage(LI)));
 924
 925     TotalWeight = std::max(TotalWeight, LI.weight());
 926
 927     if (LI.beginIndex() < StartSI)
 928       StartSI = LI.beginIndex();
 929
 930     if (LI.endIndex() > EndSI)
 931       EndSI = LI.endIndex();
 932     const LIFeatureComponents &LIFC = getLIFeatureComponents(LI);
 933     NumBrokenHints += VRM->hasPreferredPhys(LI.reg());
 934
 935     NumDefsAndUses += LIFC.NumDefsAndUses;
 936     HottestBlockFreq = std::max(HottestBlockFreq, LIFC.HottestBlockFreq);
 937     R += LIFC.R;
 938     W += LIFC.W;
 939     RW += LIFC.RW;
 940
 941     IndVarUpdates += LIFC.IndVarUpdates;
 942
 943     HintWeights += LIFC.HintWeights;
 944     NumRematerializable += LIFC.IsRemat;
 945
 946     if (EnableDevelopmentFeatures) {
 947       for (auto CurrentSegment : LI) {
 948         LRPosInfo.push_back(
 949             LRStartEndInfo{CurrentSegment.start, CurrentSegment.end, Pos});
 950       }
 951     }
 952   }
 953   size_t Size = 0;
 954   if (!Intervals.empty()) {
 955     StartBBFreq =
 956         MBFI.getBlockFreqRelativeToEntryBlock(LIS->getMBBFromIndex(StartSI));
 957     if (EndSI >= LIS->getSlotIndexes()->getLastIndex())
 958       EndSI = LIS->getSlotIndexes()->getLastIndex().getPrevIndex();
 959     EndBBFreq =
 960         MBFI.getBlockFreqRelativeToEntryBlock(LIS->getMBBFromIndex(EndSI));
 961     Size = StartSI.distance(EndSI);
 962   }
 963   // Set the features at the column 'Pos'.
 964 #define SET(ID, TYPE, VAL)                                                     \
 965   do {                                                                         \
 966     Runner->getTensor<TYPE>(FeatureIDs::ID)[Pos] = static_cast<TYPE>(VAL);     \
 967     if (!DoNotNormalize.test(FeatureIDs::ID))                                  \
 968       Largest[FeatureIDs::ID] =                                                \
 969           std::max(Largest[FeatureIDs::ID], static_cast<float>(VAL));          \
 970   } while (false)
 971   SET(mask, int64_t, 1);
 972   SET(is_free, int64_t, Intervals.empty());
 973   SET(nr_urgent, float, NumUrgent);
 974   SET(nr_broken_hints, float, NumBrokenHints);
 975   SET(is_hint, int64_t, IsHint);
 976   SET(is_local, int64_t, LocalIntfsCount);
 977   SET(nr_rematerializable, float, NumRematerializable);
 978   SET(nr_defs_and_uses, float, NumDefsAndUses);
 979   SET(weighed_reads_by_max, float, R);
 980   SET(weighed_writes_by_max, float, W);
 981   SET(weighed_read_writes_by_max, float, RW);
 982   SET(weighed_indvars_by_max, float, IndVarUpdates);
 983   SET(hint_weights_by_max, float, HintWeights);
 984   SET(start_bb_freq_by_max, float, StartBBFreq);
 985   SET(end_bb_freq_by_max, float, EndBBFreq);
 986   SET(hottest_bb_freq_by_max, float, HottestBlockFreq);
 987   SET(liverange_size, float, Size);
 988   SET(use_def_density, float, TotalWeight);
 989   SET(max_stage, int64_t, MaxStage);
 990   SET(min_stage, int64_t, MinStage);
 991 #undef SET
 992 }
 993
 994 void llvm::extractInstructionFeatures(
 995     SmallVectorImpl<LRStartEndInfo> &LRPosInfo, MLModelRunner *RegallocRunner,
 996     function_ref<int(SlotIndex)> GetOpcode,
 997     function_ref<float(SlotIndex)> GetMBBFreq,
 998     function_ref<MachineBasicBlock *(SlotIndex)> GetMBBReference,
 999     const int InstructionsIndex, const int InstructionsMappingIndex,
1000     const int MBBFreqIndex, const int MBBMappingIndex,
1001     const SlotIndex LastIndex) {
1002   // This function extracts instruction based features relevant to the eviction
1003   // problem currently being solved. This function ends up extracting two
1004   // tensors.
1005   // 1 - A vector of size max instruction count. It contains the opcodes of the
1006   // instructions spanned by all the intervals in the current instance of the
1007   // eviction problem.
1008   // 2 - A binary mapping matrix of size (LR count * max
1009   // instruction count) which maps where the LRs are live to the actual opcodes
1010   // for which they are live.
1011   // 3 - A vector of size max supported MBB count storing MBB frequencies,
1012   // encompassing all of the MBBs covered by the eviction problem.
1013   // 4 - A vector of size max instruction count of indices to members of the MBB
1014   // frequency vector, mapping each instruction to its associated MBB.
1015
1016   // Start off by sorting the segments based on the beginning slot index.
1017   std::sort(
1018       LRPosInfo.begin(), LRPosInfo.end(),
1019       [](LRStartEndInfo A, LRStartEndInfo B) { return A.Begin < B.Begin; });
1020   size_t InstructionIndex = 0;
1021   size_t CurrentSegmentIndex = 0;
1022   SlotIndex CurrentIndex = LRPosInfo[0].Begin;
1023   std::map<MachineBasicBlock *, size_t> VisitedMBBs;
1024   size_t CurrentMBBIndex = 0;
1025   // This loop processes all the segments sequentially by starting at the
1026   // beginning slot index of the first segment, iterating through all the slot
1027   // indices before the end slot index of that segment (while checking for
1028   // overlaps with segments that start at greater slot indices). After hitting
1029   // that end index, the current segment being processed gets bumped until they
1030   // are all processed or the max instruction count is hit, where everything is
1031   // just truncated.
1032   while (true) {
1033     // If the index that we are currently at is within the current segment and
1034     // we haven't hit the max instruction count, continue processing the current
1035     // segment.
1036     while (CurrentIndex <= LRPosInfo[CurrentSegmentIndex].End &&
1037            InstructionIndex < ModelMaxSupportedInstructionCount) {
1038       int CurrentOpcode = GetOpcode(CurrentIndex);
1039       // If the current machine instruction is null, skip it
1040       if (CurrentOpcode == -1) {
1041         // If we're currently at the last index in the SlotIndex analysis,
1042         // we can't go any further, so return from the function
1043         if (CurrentIndex >= LastIndex) {
1044           return;
1045         }
1046         CurrentIndex = CurrentIndex.getNextIndex();
1047         continue;
1048       }
1049       MachineBasicBlock *CurrentMBBReference = GetMBBReference(CurrentIndex);
1050       if (VisitedMBBs.count(CurrentMBBReference) == 0) {
1051         VisitedMBBs[CurrentMBBReference] = CurrentMBBIndex;
1052         ++CurrentMBBIndex;
1053       }
1054       extractMBBFrequency(CurrentIndex, InstructionIndex, VisitedMBBs,
1055                           GetMBBFreq, CurrentMBBReference, RegallocRunner,
1056                           MBBFreqIndex, MBBMappingIndex);
1057       // Current code assumes we're not going to get any disjointed segments
1058       assert(LRPosInfo[CurrentSegmentIndex].Begin <= CurrentIndex);
1059       RegallocRunner->getTensor<int64_t>(InstructionsIndex)[InstructionIndex] =
1060           CurrentOpcode < OpcodeValueCutoff ? CurrentOpcode : 0;
1061       // set value in the binary mapping matrix for the current instruction
1062       auto CurrentSegmentPosition = LRPosInfo[CurrentSegmentIndex].Pos;
1063       RegallocRunner->getTensor<int64_t>(
1064           InstructionsMappingIndex)[CurrentSegmentPosition *
1065                                         ModelMaxSupportedInstructionCount +
1066                                     InstructionIndex] = 1;
1067       // All of the segments are sorted based on the beginning slot index, but
1068       // this doesn't mean that the beginning slot index of the next segment is
1069       // after the end segment of the one being currently processed. This while
1070       // loop checks for overlapping segments and modifies the portion of the
1071       // column in the mapping matrix for the currently processed instruction
1072       // for the LR it is checking. Also make sure that the beginning of the
1073       // current segment we're checking for overlap in is less than the current
1074       // index, otherwise we're done checking overlaps.
1075       size_t OverlapCheckCurrentSegment = CurrentSegmentIndex + 1;
1076       while (OverlapCheckCurrentSegment < LRPosInfo.size() &&
1077              LRPosInfo[OverlapCheckCurrentSegment].Begin <= CurrentIndex) {
1078         auto OverlapCurrentSegmentPosition =
1079             LRPosInfo[OverlapCheckCurrentSegment].Pos;
1080         if (LRPosInfo[OverlapCheckCurrentSegment].End >= CurrentIndex) {
1081           RegallocRunner->getTensor<int64_t>(
1082               InstructionsMappingIndex)[OverlapCurrentSegmentPosition *
1083                                             ModelMaxSupportedInstructionCount +
1084                                         InstructionIndex] = 1;
1085         }
1086         ++OverlapCheckCurrentSegment;
1087       }
1088       ++InstructionIndex;
1089       if (CurrentIndex >= LastIndex) {
1090         return;
1091       }
1092       CurrentIndex = CurrentIndex.getNextIndex();
1093     }
1094     // if we've just finished processing through the last segment or if we've
1095     // hit the maximum number of instructions, break out of the loop.
1096     if (CurrentSegmentIndex == LRPosInfo.size() - 1 ||
1097         InstructionIndex >= ModelMaxSupportedInstructionCount) {
1098       break;
1099     }
1100     // If the segments are not overlapping, we need to move to the beginning
1101     // index of the next segment to avoid having instructions not attached to
1102     // any register.
1103     if (LRPosInfo[CurrentSegmentIndex + 1].Begin >
1104         LRPosInfo[CurrentSegmentIndex].End) {
1105       CurrentIndex = LRPosInfo[CurrentSegmentIndex + 1].Begin;
1106     }
1107     ++CurrentSegmentIndex;
1108   }
1109 }
1110
1111 void llvm::extractMBBFrequency(
1112     const SlotIndex CurrentIndex, const size_t CurrentInstructionIndex,
1113     std::map<MachineBasicBlock *, size_t> &VisitedMBBs,
1114     function_ref<float(SlotIndex)> GetMBBFreq,
1115     MachineBasicBlock *CurrentMBBReference, MLModelRunner *RegallocRunner,
1116     const int MBBFreqIndex, const int MBBMappingIndex) {
1117   size_t CurrentMBBIndex = VisitedMBBs[CurrentMBBReference];
1118   float CurrentMBBFreq = GetMBBFreq(CurrentIndex);
1119   if (CurrentMBBIndex < ModelMaxSupportedMBBCount) {
1120     RegallocRunner->getTensor<float>(MBBFreqIndex)[CurrentMBBIndex] =
1121         CurrentMBBFreq;
1122     RegallocRunner->getTensor<int64_t>(
1123         MBBMappingIndex)[CurrentInstructionIndex] = CurrentMBBIndex;
1124   }
1125 }
1126
1127 // Development mode-specific implementations
1128 #ifdef LLVM_HAVE_TFLITE
1129
1130 RegAllocEvictionAdvisorAnalysis *llvm::createDevelopmentModeAdvisor() {
1131   return new DevelopmentModeEvictionAdvisorAnalysis();
1132 }
1133
1134 int64_t DevelopmentModeEvictAdvisor::tryFindEvictionCandidatePosition(
1135     const LiveInterval &VirtReg, const AllocationOrder &Order,
1136     unsigned OrderLimit, uint8_t CostPerUseLimit,
1137     const SmallVirtRegSet &FixedRegisters) const {
1138   int64_t Ret = 0;
1139   if (isa<ModelUnderTrainingRunner>(getRunner())) {
1140     Ret = MLEvictAdvisor::tryFindEvictionCandidatePosition(
1141         VirtReg, Order, OrderLimit, CostPerUseLimit, FixedRegisters);
1142   } else {
1143     MCRegister PhysReg = getDefaultAdvisor().tryFindEvictionCandidate(
1144         VirtReg, Order, CostPerUseLimit, FixedRegisters);
1145     // Find the index of the selected PhysReg. We need it for logging,
1146     // otherwise this is wasted cycles (but so would starting development mode
1147     // without a model nor logging)
1148     if (!PhysReg)
1149       Ret = CandidateVirtRegPos;
1150     else
1151       for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit);
1152            I != E; ++I, ++Ret)
1153         if (*I == PhysReg)
1154           break;
1155   }
1156   if (TrainingLog.empty())
1157     return Ret;
1158   // TODO(mtrofin): when we support optional rewards, this can go away. In the
1159   // meantime, we log the "pretend" reward (0) for the previous observation
1160   // before starting a new one.
1161   if (Log->hasObservationInProgress())
1162     Log->logReward<float>(0.0);
1163
1164   Log->startObservation();
1165   size_t CurrentFeature = 0;
1166   size_t FeatureCount = EnableDevelopmentFeatures
1167                             ? FeatureIDs::FeaturesWithDevelopmentCount
1168                             : FeatureIDs::FeatureCount;
1169   for (; CurrentFeature < FeatureCount; ++CurrentFeature) {
1170     Log->logTensorValue(CurrentFeature,
1171                         reinterpret_cast<const char *>(
1172                             getRunner().getTensorUntyped(CurrentFeature)));
1173   }
1174   if (auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(&getRunner()))
1175     for (size_t I = 0; I < MUTR->extraOutputsForLoggingSpecs().size();
1176          ++I, ++CurrentFeature)
1177       Log->logTensorValue(
1178           CurrentFeature,
1179           reinterpret_cast<const char *>(MUTR->getUntypedExtraOutputValue(I)));
1180   // The output is right after the features and the extra outputs
1181   Log->logTensorValue(CurrentFeature, reinterpret_cast<const char *>(&Ret));
1182   Log->endObservation();
1183   return Ret;
1184 }
1185
1186 bool RegAllocScoring::runOnMachineFunction(MachineFunction &MF) {
1187   std::optional<float> CachedReward;
1188   auto GetReward = [&]() {
1189     if (!CachedReward)
1190       CachedReward = static_cast<float>(
1191           calculateRegAllocScore(
1192               MF, getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI())
1193               .getScore());
1194     return *CachedReward;
1195   };
1196
1197   getAnalysis<RegAllocEvictionAdvisorAnalysis>().logRewardIfNeeded(MF,
1198                                                                    GetReward);
1199   getAnalysis<RegAllocPriorityAdvisorAnalysis>().logRewardIfNeeded(MF,
1200                                                                    GetReward);
1201   return false;
1202 }
1203 #endif // #ifdef LLVM_HAVE_TFLITE
1204
1205 RegAllocEvictionAdvisorAnalysis *llvm::createReleaseModeAdvisor() {
1206   return llvm::isEmbeddedModelEvaluatorValid<CompiledModelType>() ||
1207                  !InteractiveChannelBaseName.empty()
1208              ? new ReleaseModeEvictionAdvisorAnalysis()
1209              : nullptr;
1210 }
1211
1212 // In all cases except development mode, we don't need scoring.
1213 #if !defined(LLVM_HAVE_TFLITE)
1214 bool RegAllocScoring::runOnMachineFunction(MachineFunction &) { return false; }
1215 #endif