tools/llvm-exegesis/lib/Latency.cpp

   1 //===-- Latency.cpp ---------------------------------------------*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #include "Latency.h"
  11
  12 #include "Assembler.h"
  13 #include "BenchmarkRunner.h"
  14 #include "MCInstrDescView.h"
  15 #include "llvm/ADT/STLExtras.h"
  16 #include "llvm/MC/MCInst.h"
  17 #include "llvm/MC/MCInstBuilder.h"
  18 #include "llvm/Support/FormatVariadic.h"
  19
  20 namespace exegesis {
  21
  22 struct ExecutionClass {
  23   ExecutionMode Mask;
  24   const char *Description;
  25 } static const kExecutionClasses[] = {
  26     {ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS |
  27          ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS,
  28      "Repeating a single implicitly serial instruction"},
  29     {ExecutionMode::SERIAL_VIA_EXPLICIT_REGS,
  30      "Repeating a single explicitly serial instruction"},
  31     {ExecutionMode::SERIAL_VIA_MEMORY_INSTR |
  32          ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR,
  33      "Repeating two instructions"},
  34 };
  35
  36 static constexpr size_t kMaxAliasingInstructions = 10;
  37
  38 static std::vector<Instruction>
  39 computeAliasingInstructions(const LLVMState &State, const Instruction &Instr,
  40                             size_t MaxAliasingInstructions) {
  41   // Randomly iterate the set of instructions.
  42   std::vector<unsigned> Opcodes;
  43   Opcodes.resize(State.getInstrInfo().getNumOpcodes());
  44   std::iota(Opcodes.begin(), Opcodes.end(), 0U);
  45   std::shuffle(Opcodes.begin(), Opcodes.end(), randomGenerator());
  46
  47   std::vector<Instruction> AliasingInstructions;
  48   for (const unsigned OtherOpcode : Opcodes) {
  49     if (OtherOpcode == Instr.Description->getOpcode())
  50       continue;
  51     const Instruction OtherInstr(State, OtherOpcode);
  52     if (OtherInstr.hasMemoryOperands())
  53       continue;
  54     if (Instr.hasAliasingRegistersThrough(OtherInstr))
  55       AliasingInstructions.push_back(std::move(OtherInstr));
  56     if (AliasingInstructions.size() >= MaxAliasingInstructions)
  57       break;
  58   }
  59   return AliasingInstructions;
  60 }
  61
  62 static ExecutionMode getExecutionModes(const Instruction &Instr) {
  63   ExecutionMode EM = ExecutionMode::UNKNOWN;
  64   if (Instr.hasAliasingImplicitRegisters())
  65     EM |= ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS;
  66   if (Instr.hasTiedRegisters())
  67     EM |= ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS;
  68   if (Instr.hasMemoryOperands())
  69     EM |= ExecutionMode::SERIAL_VIA_MEMORY_INSTR;
  70   else {
  71     if (Instr.hasAliasingRegisters())
  72       EM |= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS;
  73     if (Instr.hasOneUseOrOneDef())
  74       EM |= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR;
  75   }
  76   return EM;
  77 }
  78
  79 static void appendCodeTemplates(const LLVMState &State,
  80                                 const Instruction &Instr,
  81                                 ExecutionMode ExecutionModeBit,
  82                                 llvm::StringRef ExecutionClassDescription,
  83                                 std::vector<CodeTemplate> &CodeTemplates) {
  84   assert(isEnumValue(ExecutionModeBit) && "Bit must be a power of two");
  85   switch (ExecutionModeBit) {
  86   case ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS:
  87     // Nothing to do, the instruction is always serial.
  88     LLVM_FALLTHROUGH;
  89   case ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS: {
  90     // Picking whatever value for the tied variable will make the instruction
  91     // serial.
  92     CodeTemplate CT;
  93     CT.Execution = ExecutionModeBit;
  94     CT.Info = ExecutionClassDescription;
  95     CT.Instructions.push_back(Instr);
  96     CodeTemplates.push_back(std::move(CT));
  97     return;
  98   }
  99   case ExecutionMode::SERIAL_VIA_MEMORY_INSTR: {
 100     // Select back-to-back memory instruction.
 101     // TODO: Implement me.
 102     return;
 103   }
 104   case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS: {
 105     // Making the execution of this instruction serial by selecting one def
 106     // register to alias with one use register.
 107     const AliasingConfigurations SelfAliasing(Instr, Instr);
 108     assert(!SelfAliasing.empty() && !SelfAliasing.hasImplicitAliasing() &&
 109            "Instr must alias itself explicitly");
 110     InstructionTemplate IT(Instr);
 111     // This is a self aliasing instruction so defs and uses are from the same
 112     // instance, hence twice IT in the following call.
 113     setRandomAliasing(SelfAliasing, IT, IT);
 114     CodeTemplate CT;
 115     CT.Execution = ExecutionModeBit;
 116     CT.Info = ExecutionClassDescription;
 117     CT.Instructions.push_back(std::move(IT));
 118     CodeTemplates.push_back(std::move(CT));
 119     return;
 120   }
 121   case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR: {
 122     // Select back-to-back non-memory instruction.
 123     for (const auto OtherInstr :
 124          computeAliasingInstructions(State, Instr, kMaxAliasingInstructions)) {
 125       const AliasingConfigurations Forward(Instr, OtherInstr);
 126       const AliasingConfigurations Back(OtherInstr, Instr);
 127       InstructionTemplate ThisIT(Instr);
 128       InstructionTemplate OtherIT(OtherInstr);
 129       if (!Forward.hasImplicitAliasing())
 130         setRandomAliasing(Forward, ThisIT, OtherIT);
 131       if (!Back.hasImplicitAliasing())
 132         setRandomAliasing(Back, OtherIT, ThisIT);
 133       CodeTemplate CT;
 134       CT.Execution = ExecutionModeBit;
 135       CT.Info = ExecutionClassDescription;
 136       CT.Instructions.push_back(std::move(ThisIT));
 137       CT.Instructions.push_back(std::move(OtherIT));
 138       CodeTemplates.push_back(std::move(CT));
 139     }
 140     return;
 141   }
 142   default:
 143     llvm_unreachable("Unhandled enum value");
 144   }
 145 }
 146
 147 LatencySnippetGenerator::~LatencySnippetGenerator() = default;
 148
 149 llvm::Expected<std::vector<CodeTemplate>>
 150 LatencySnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
 151   std::vector<CodeTemplate> Results;
 152   const ExecutionMode EM = getExecutionModes(Instr);
 153   for (const auto EC : kExecutionClasses) {
 154     for (const auto ExecutionModeBit : getExecutionModeBits(EM & EC.Mask))
 155       appendCodeTemplates(State, Instr, ExecutionModeBit, EC.Description,
 156                           Results);
 157     if (!Results.empty())
 158       break;
 159   }
 160   if (Results.empty())
 161     return llvm::make_error<BenchmarkFailure>(
 162         "No strategy found to make the execution serial");
 163   return std::move(Results);
 164 }
 165
 166 const char *LatencyBenchmarkRunner::getCounterName() const {
 167   if (!State.getSubtargetInfo().getSchedModel().hasExtraProcessorInfo())
 168     llvm::report_fatal_error("sched model is missing extra processor info!");
 169   const char *CounterName = State.getSubtargetInfo()
 170                                 .getSchedModel()
 171                                 .getExtraProcessorInfo()
 172                                 .PfmCounters.CycleCounter;
 173   if (!CounterName)
 174     llvm::report_fatal_error("sched model does not define a cycle counter");
 175   return CounterName;
 176 }
 177
 178 LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
 179
 180 llvm::Expected<std::vector<BenchmarkMeasure>>
 181 LatencyBenchmarkRunner::runMeasurements(
 182     const FunctionExecutor &Executor) const {
 183   // Cycle measurements include some overhead from the kernel. Repeat the
 184   // measure several times and take the minimum value.
 185   constexpr const int NumMeasurements = 30;
 186   int64_t MinValue = std::numeric_limits<int64_t>::max();
 187   const char *CounterName = getCounterName();
 188   if (!CounterName)
 189     llvm::report_fatal_error("could not determine cycle counter name");
 190   for (size_t I = 0; I < NumMeasurements; ++I) {
 191     auto ExpectedCounterValue = Executor.runAndMeasure(CounterName);
 192     if (!ExpectedCounterValue)
 193       return ExpectedCounterValue.takeError();
 194     if (*ExpectedCounterValue < MinValue)
 195       MinValue = *ExpectedCounterValue;
 196   }
 197   std::vector<BenchmarkMeasure> Result = {
 198       BenchmarkMeasure::Create("latency", MinValue)};
 199   return std::move(Result);
 200 }
 201
 202 } // namespace exegesis