1 //===- llvm/Target/TargetSchedule.cpp - Sched Machine Model ---------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a wrapper around MCSchedModel that allows the interface
11 // to benefit from information currently only available in TargetInstrInfo.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/CodeGen/TargetSchedule.h"
16 #include "llvm/CodeGen/MachineFunction.h"
17 #include "llvm/CodeGen/MachineInstr.h"
18 #include "llvm/CodeGen/MachineOperand.h"
19 #include "llvm/CodeGen/TargetInstrInfo.h"
20 #include "llvm/CodeGen/TargetRegisterInfo.h"
21 #include "llvm/CodeGen/TargetSubtargetInfo.h"
22 #include "llvm/MC/MCInstrDesc.h"
23 #include "llvm/MC/MCInstrItineraries.h"
24 #include "llvm/MC/MCSchedule.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Support/ErrorHandling.h"
27 #include "llvm/Support/raw_ostream.h"
34 static cl::opt
<bool> EnableSchedModel("schedmodel", cl::Hidden
, cl::init(true),
35 cl::desc("Use TargetSchedModel for latency lookup"));
37 static cl::opt
<bool> EnableSchedItins("scheditins", cl::Hidden
, cl::init(true),
38 cl::desc("Use InstrItineraryData for latency lookup"));
40 bool TargetSchedModel::hasInstrSchedModel() const {
41 return EnableSchedModel
&& SchedModel
.hasInstrSchedModel();
44 bool TargetSchedModel::hasInstrItineraries() const {
45 return EnableSchedItins
&& !InstrItins
.isEmpty();
48 static unsigned gcd(unsigned Dividend
, unsigned Divisor
) {
49 // Dividend and Divisor will be naturally swapped as needed.
51 unsigned Rem
= Dividend
% Divisor
;
58 static unsigned lcm(unsigned A
, unsigned B
) {
59 unsigned LCM
= (uint64_t(A
) * B
) / gcd(A
, B
);
60 assert((LCM
>= A
&& LCM
>= B
) && "LCM overflow");
64 void TargetSchedModel::init(const MCSchedModel
&sm
,
65 const TargetSubtargetInfo
*sti
,
66 const TargetInstrInfo
*tii
) {
70 STI
->initInstrItins(InstrItins
);
72 unsigned NumRes
= SchedModel
.getNumProcResourceKinds();
73 ResourceFactors
.resize(NumRes
);
74 ResourceLCM
= SchedModel
.IssueWidth
;
75 for (unsigned Idx
= 0; Idx
< NumRes
; ++Idx
) {
76 unsigned NumUnits
= SchedModel
.getProcResource(Idx
)->NumUnits
;
78 ResourceLCM
= lcm(ResourceLCM
, NumUnits
);
80 MicroOpFactor
= ResourceLCM
/ SchedModel
.IssueWidth
;
81 for (unsigned Idx
= 0; Idx
< NumRes
; ++Idx
) {
82 unsigned NumUnits
= SchedModel
.getProcResource(Idx
)->NumUnits
;
83 ResourceFactors
[Idx
] = NumUnits
? (ResourceLCM
/ NumUnits
) : 0;
87 /// Returns true only if instruction is specified as single issue.
88 bool TargetSchedModel::mustBeginGroup(const MachineInstr
*MI
,
89 const MCSchedClassDesc
*SC
) const {
90 if (hasInstrSchedModel()) {
92 SC
= resolveSchedClass(MI
);
94 return SC
->BeginGroup
;
99 bool TargetSchedModel::mustEndGroup(const MachineInstr
*MI
,
100 const MCSchedClassDesc
*SC
) const {
101 if (hasInstrSchedModel()) {
103 SC
= resolveSchedClass(MI
);
110 unsigned TargetSchedModel::getNumMicroOps(const MachineInstr
*MI
,
111 const MCSchedClassDesc
*SC
) const {
112 if (hasInstrItineraries()) {
113 int UOps
= InstrItins
.getNumMicroOps(MI
->getDesc().getSchedClass());
114 return (UOps
>= 0) ? UOps
: TII
->getNumMicroOps(&InstrItins
, *MI
);
116 if (hasInstrSchedModel()) {
118 SC
= resolveSchedClass(MI
);
120 return SC
->NumMicroOps
;
122 return MI
->isTransient() ? 0 : 1;
125 // The machine model may explicitly specify an invalid latency, which
126 // effectively means infinite latency. Since users of the TargetSchedule API
127 // don't know how to handle this, we convert it to a very large latency that is
128 // easy to distinguish when debugging the DAG but won't induce overflow.
129 static unsigned capLatency(int Cycles
) {
130 return Cycles
>= 0 ? Cycles
: 1000;
133 /// Return the MCSchedClassDesc for this instruction. Some SchedClasses require
134 /// evaluation of predicates that depend on instruction operands or flags.
135 const MCSchedClassDesc
*TargetSchedModel::
136 resolveSchedClass(const MachineInstr
*MI
) const {
137 // Get the definition's scheduling class descriptor from this machine model.
138 unsigned SchedClass
= MI
->getDesc().getSchedClass();
139 const MCSchedClassDesc
*SCDesc
= SchedModel
.getSchedClassDesc(SchedClass
);
140 if (!SCDesc
->isValid())
146 while (SCDesc
->isVariant()) {
147 assert(++NIter
< 6 && "Variants are nested deeper than the magic number");
149 SchedClass
= STI
->resolveSchedClass(SchedClass
, MI
, this);
150 SCDesc
= SchedModel
.getSchedClassDesc(SchedClass
);
155 /// Find the def index of this operand. This index maps to the machine model and
156 /// is independent of use operands. Def operands may be reordered with uses or
157 /// merged with uses without affecting the def index (e.g. before/after
158 /// regalloc). However, an instruction's def operands must never be reordered
159 /// with respect to each other.
160 static unsigned findDefIdx(const MachineInstr
*MI
, unsigned DefOperIdx
) {
162 for (unsigned i
= 0; i
!= DefOperIdx
; ++i
) {
163 const MachineOperand
&MO
= MI
->getOperand(i
);
164 if (MO
.isReg() && MO
.isDef())
170 /// Find the use index of this operand. This is independent of the instruction's
173 /// Note that uses are not determined by the operand's isUse property, which
174 /// is simply the inverse of isDef. Here we consider any readsReg operand to be
175 /// a "use". The machine model allows an operand to be both a Def and Use.
176 static unsigned findUseIdx(const MachineInstr
*MI
, unsigned UseOperIdx
) {
178 for (unsigned i
= 0; i
!= UseOperIdx
; ++i
) {
179 const MachineOperand
&MO
= MI
->getOperand(i
);
180 if (MO
.isReg() && MO
.readsReg() && !MO
.isDef())
186 // Top-level API for clients that know the operand indices.
187 unsigned TargetSchedModel::computeOperandLatency(
188 const MachineInstr
*DefMI
, unsigned DefOperIdx
,
189 const MachineInstr
*UseMI
, unsigned UseOperIdx
) const {
191 if (!hasInstrSchedModel() && !hasInstrItineraries())
192 return TII
->defaultDefLatency(SchedModel
, *DefMI
);
194 if (hasInstrItineraries()) {
197 OperLatency
= TII
->getOperandLatency(&InstrItins
, *DefMI
, DefOperIdx
,
201 unsigned DefClass
= DefMI
->getDesc().getSchedClass();
202 OperLatency
= InstrItins
.getOperandCycle(DefClass
, DefOperIdx
);
204 if (OperLatency
>= 0)
207 // No operand latency was found.
208 unsigned InstrLatency
= TII
->getInstrLatency(&InstrItins
, *DefMI
);
210 // Expected latency is the max of the stage latency and itinerary props.
211 // Rather than directly querying InstrItins stage latency, we call a TII
212 // hook to allow subtargets to specialize latency. This hook is only
213 // applicable to the InstrItins model. InstrSchedModel should model all
214 // special cases without TII hooks.
216 std::max(InstrLatency
, TII
->defaultDefLatency(SchedModel
, *DefMI
));
219 // hasInstrSchedModel()
220 const MCSchedClassDesc
*SCDesc
= resolveSchedClass(DefMI
);
221 unsigned DefIdx
= findDefIdx(DefMI
, DefOperIdx
);
222 if (DefIdx
< SCDesc
->NumWriteLatencyEntries
) {
223 // Lookup the definition's write latency in SubtargetInfo.
224 const MCWriteLatencyEntry
*WLEntry
=
225 STI
->getWriteLatencyEntry(SCDesc
, DefIdx
);
226 unsigned WriteID
= WLEntry
->WriteResourceID
;
227 unsigned Latency
= capLatency(WLEntry
->Cycles
);
231 // Lookup the use's latency adjustment in SubtargetInfo.
232 const MCSchedClassDesc
*UseDesc
= resolveSchedClass(UseMI
);
233 if (UseDesc
->NumReadAdvanceEntries
== 0)
235 unsigned UseIdx
= findUseIdx(UseMI
, UseOperIdx
);
236 int Advance
= STI
->getReadAdvanceCycles(UseDesc
, UseIdx
, WriteID
);
237 if (Advance
> 0 && (unsigned)Advance
> Latency
) // unsigned wrap
239 return Latency
- Advance
;
241 // If DefIdx does not exist in the model (e.g. implicit defs), then return
242 // unit latency (defaultDefLatency may be too conservative).
244 if (SCDesc
->isValid() && !DefMI
->getOperand(DefOperIdx
).isImplicit()
245 && !DefMI
->getDesc().OpInfo
[DefOperIdx
].isOptionalDef()
246 && SchedModel
.isComplete()) {
247 errs() << "DefIdx " << DefIdx
<< " exceeds machine model writes for "
248 << *DefMI
<< " (Try with MCSchedModel.CompleteModel set to false)";
249 llvm_unreachable("incomplete machine model");
252 // FIXME: Automatically giving all implicit defs defaultDefLatency is
253 // undesirable. We should only do it for defs that are known to the MC
254 // desc like flags. Truly implicit defs should get 1 cycle latency.
255 return DefMI
->isTransient() ? 0 : TII
->defaultDefLatency(SchedModel
, *DefMI
);
259 TargetSchedModel::computeInstrLatency(const MCSchedClassDesc
&SCDesc
) const {
260 unsigned Latency
= 0;
261 for (unsigned DefIdx
= 0, DefEnd
= SCDesc
.NumWriteLatencyEntries
;
262 DefIdx
!= DefEnd
; ++DefIdx
) {
263 // Lookup the definition's write latency in SubtargetInfo.
264 const MCWriteLatencyEntry
*WLEntry
=
265 STI
->getWriteLatencyEntry(&SCDesc
, DefIdx
);
266 Latency
= std::max(Latency
, capLatency(WLEntry
->Cycles
));
271 unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode
) const {
272 assert(hasInstrSchedModel() && "Only call this function with a SchedModel");
274 unsigned SCIdx
= TII
->get(Opcode
).getSchedClass();
275 const MCSchedClassDesc
*SCDesc
= SchedModel
.getSchedClassDesc(SCIdx
);
277 if (SCDesc
->isValid() && !SCDesc
->isVariant())
278 return computeInstrLatency(*SCDesc
);
280 if (SCDesc
->isValid()) {
281 assert (!SCDesc
->isVariant() && "No MI sched latency: SCDesc->isVariant()");
282 return computeInstrLatency(*SCDesc
);
288 TargetSchedModel::computeInstrLatency(const MachineInstr
*MI
,
289 bool UseDefaultDefLatency
) const {
290 // For the itinerary model, fall back to the old subtarget hook.
291 // Allow subtargets to compute Bundle latencies outside the machine model.
292 if (hasInstrItineraries() || MI
->isBundle() ||
293 (!hasInstrSchedModel() && !UseDefaultDefLatency
))
294 return TII
->getInstrLatency(&InstrItins
, *MI
);
296 if (hasInstrSchedModel()) {
297 const MCSchedClassDesc
*SCDesc
= resolveSchedClass(MI
);
298 if (SCDesc
->isValid())
299 return computeInstrLatency(*SCDesc
);
301 return TII
->defaultDefLatency(SchedModel
, *MI
);
304 unsigned TargetSchedModel::
305 computeOutputLatency(const MachineInstr
*DefMI
, unsigned DefOperIdx
,
306 const MachineInstr
*DepMI
) const {
307 if (!SchedModel
.isOutOfOrder())
310 // Out-of-order processor can dispatch WAW dependencies in the same cycle.
312 // Treat predication as a data dependency for out-of-order cpus. In-order
313 // cpus do not need to treat predicated writes specially.
315 // TODO: The following hack exists because predication passes do not
316 // correctly append imp-use operands, and readsReg() strangely returns false
317 // for predicated defs.
318 unsigned Reg
= DefMI
->getOperand(DefOperIdx
).getReg();
319 const MachineFunction
&MF
= *DefMI
->getMF();
320 const TargetRegisterInfo
*TRI
= MF
.getSubtarget().getRegisterInfo();
321 if (!DepMI
->readsRegister(Reg
, TRI
) && TII
->isPredicated(*DepMI
))
322 return computeInstrLatency(DefMI
);
324 // If we have a per operand scheduling model, check if this def is writing
325 // an unbuffered resource. If so, it treated like an in-order cpu.
326 if (hasInstrSchedModel()) {
327 const MCSchedClassDesc
*SCDesc
= resolveSchedClass(DefMI
);
328 if (SCDesc
->isValid()) {
329 for (const MCWriteProcResEntry
*PRI
= STI
->getWriteProcResBegin(SCDesc
),
330 *PRE
= STI
->getWriteProcResEnd(SCDesc
); PRI
!= PRE
; ++PRI
) {
331 if (!SchedModel
.getProcResource(PRI
->ProcResourceIdx
)->BufferSize
)
339 static Optional
<double>
340 getRThroughputFromItineraries(unsigned schedClass
,
341 const InstrItineraryData
*IID
){
342 Optional
<double> Throughput
;
344 for (const InstrStage
*IS
= IID
->beginStage(schedClass
),
345 *E
= IID
->endStage(schedClass
);
347 if (IS
->getCycles()) {
348 double Temp
= countPopulation(IS
->getUnits()) * 1.0 / IS
->getCycles();
349 Throughput
= Throughput
.hasValue()
350 ? std::min(Throughput
.getValue(), Temp
)
354 if (Throughput
.hasValue())
355 // We need reciprocal throughput that's why we return such value.
356 return 1 / Throughput
.getValue();
360 static Optional
<double>
361 getRThroughputFromInstrSchedModel(const MCSchedClassDesc
*SCDesc
,
362 const TargetSubtargetInfo
*STI
,
363 const MCSchedModel
&SchedModel
) {
364 Optional
<double> Throughput
;
366 for (const MCWriteProcResEntry
*WPR
= STI
->getWriteProcResBegin(SCDesc
),
367 *WEnd
= STI
->getWriteProcResEnd(SCDesc
);
368 WPR
!= WEnd
; ++WPR
) {
371 SchedModel
.getProcResource(WPR
->ProcResourceIdx
)->NumUnits
;
372 double Temp
= NumUnits
* 1.0 / WPR
->Cycles
;
373 Throughput
= Throughput
.hasValue()
374 ? std::min(Throughput
.getValue(), Temp
)
378 if (Throughput
.hasValue())
379 // We need reciprocal throughput that's why we return such value.
380 return 1 / Throughput
.getValue();
385 TargetSchedModel::computeInstrRThroughput(const MachineInstr
*MI
) const {
386 if (hasInstrItineraries())
387 return getRThroughputFromItineraries(MI
->getDesc().getSchedClass(),
388 getInstrItineraries());
389 if (hasInstrSchedModel())
390 return getRThroughputFromInstrSchedModel(resolveSchedClass(MI
), STI
,
392 return Optional
<double>();
396 TargetSchedModel::computeInstrRThroughput(unsigned Opcode
) const {
397 unsigned SchedClass
= TII
->get(Opcode
).getSchedClass();
398 if (hasInstrItineraries())
399 return getRThroughputFromItineraries(SchedClass
, getInstrItineraries());
400 if (hasInstrSchedModel()) {
401 const MCSchedClassDesc
*SCDesc
= SchedModel
.getSchedClassDesc(SchedClass
);
402 if (SCDesc
->isValid() && !SCDesc
->isVariant())
403 return getRThroughputFromInstrSchedModel(SCDesc
, STI
, SchedModel
);
405 return Optional
<double>();