1 //=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines a hazard recognizer for the SystemZ scheduler.
11 // This class is used by the SystemZ scheduling strategy to maintain
12 // the state during scheduling, and provide cost functions for
13 // scheduling candidates. This includes:
15 // * Decoder grouping. A decoder group can maximally hold 3 uops, and
16 // instructions that always begin a new group should be scheduled when
17 // the current decoder group is empty.
18 // * Processor resources usage. It is beneficial to balance the use of
21 // A goal is to consider all instructions, also those outside of any
22 // scheduling region. Such instructions are "advanced" past and include
23 // single instructions before a scheduling region, branches etc.
25 // A block that has only one predecessor continues scheduling with the state
26 // of it (which may be updated by emitting branches).
28 // ===---------------------------------------------------------------------===//
30 #include "SystemZHazardRecognizer.h"
31 #include "llvm/ADT/Statistic.h"
35 #define DEBUG_TYPE "machine-scheduler"
37 // This is the limit of processor resource usage at which the
38 // scheduler should try to look for other instructions (not using the
39 // critical resource).
40 static cl::opt
<int> ProcResCostLim("procres-cost-lim", cl::Hidden
,
41 cl::desc("The OOO window for processor "
42 "resources during scheduling."),
45 unsigned SystemZHazardRecognizer::
46 getNumDecoderSlots(SUnit
*SU
) const {
47 const MCSchedClassDesc
*SC
= getSchedClass(SU
);
49 return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
51 assert((SC
->NumMicroOps
!= 2 || (SC
->BeginGroup
&& !SC
->EndGroup
)) &&
52 "Only cracked instruction can have 2 uops.");
53 assert((SC
->NumMicroOps
< 3 || (SC
->BeginGroup
&& SC
->EndGroup
)) &&
54 "Expanded instructions always group alone.");
55 assert((SC
->NumMicroOps
< 3 || (SC
->NumMicroOps
% 3 == 0)) &&
56 "Expanded instructions fill the group(s).");
58 return SC
->NumMicroOps
;
61 unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit
*SU
) const {
62 unsigned Idx
= CurrGroupSize
;
66 if (SU
!= nullptr && !fitsIntoCurrentGroup(SU
)) {
67 if (Idx
== 1 || Idx
== 2)
69 else if (Idx
== 4 || Idx
== 5)
76 ScheduleHazardRecognizer::HazardType
SystemZHazardRecognizer::
77 getHazardType(SUnit
*SU
, int Stalls
) {
78 return (fitsIntoCurrentGroup(SU
) ? NoHazard
: Hazard
);
81 void SystemZHazardRecognizer::Reset() {
83 CurrGroupHas4RegOps
= false;
84 clearProcResCounters();
86 LastFPdOpCycleIdx
= UINT_MAX
;
87 LastEmittedMI
= nullptr;
88 LLVM_DEBUG(CurGroupDbg
= "";);
92 SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit
*SU
) const {
93 const MCSchedClassDesc
*SC
= getSchedClass(SU
);
97 // A cracked instruction only fits into schedule if the current
100 return (CurrGroupSize
== 0);
102 // An instruction with 4 register operands will not fit in last slot.
103 assert ((CurrGroupSize
< 2 || !CurrGroupHas4RegOps
) &&
104 "Current decoder group is already full!");
105 if (CurrGroupSize
== 2 && has4RegOps(SU
->getInstr()))
108 // Since a full group is handled immediately in EmitInstruction(),
109 // SU should fit into current group. NumSlots should be 1 or 0,
110 // since it is not a cracked or expanded instruction.
111 assert ((getNumDecoderSlots(SU
) <= 1) && (CurrGroupSize
< 3) &&
112 "Expected normal instruction to fit in non-full group!");
117 bool SystemZHazardRecognizer::has4RegOps(const MachineInstr
*MI
) const {
118 const MachineFunction
&MF
= *MI
->getParent()->getParent();
119 const TargetRegisterInfo
*TRI
= &TII
->getRegisterInfo();
120 const MCInstrDesc
&MID
= MI
->getDesc();
122 for (unsigned OpIdx
= 0; OpIdx
< MID
.getNumOperands(); OpIdx
++) {
123 const TargetRegisterClass
*RC
= TII
->getRegClass(MID
, OpIdx
, TRI
, MF
);
126 if (OpIdx
>= MID
.getNumDefs() &&
127 MID
.getOperandConstraint(OpIdx
, MCOI::TIED_TO
) != -1)
134 void SystemZHazardRecognizer::nextGroup() {
135 if (CurrGroupSize
== 0)
138 LLVM_DEBUG(dumpCurrGroup("Completed decode group"));
139 LLVM_DEBUG(CurGroupDbg
= "";);
141 int NumGroups
= ((CurrGroupSize
> 3) ? (CurrGroupSize
/ 3) : 1);
142 assert((CurrGroupSize
<= 3 || CurrGroupSize
% 3 == 0) &&
143 "Current decoder group bad.");
145 // Reset counter for next group.
147 CurrGroupHas4RegOps
= false;
149 GrpCount
+= ((unsigned) NumGroups
);
151 // Decrease counters for execution units.
152 for (unsigned i
= 0; i
< SchedModel
->getNumProcResourceKinds(); ++i
)
153 ProcResourceCounters
[i
] = ((ProcResourceCounters
[i
] > NumGroups
)
154 ? (ProcResourceCounters
[i
] - NumGroups
)
157 // Clear CriticalResourceIdx if it is now below the threshold.
158 if (CriticalResourceIdx
!= UINT_MAX
&&
159 (ProcResourceCounters
[CriticalResourceIdx
] <=
161 CriticalResourceIdx
= UINT_MAX
;
163 LLVM_DEBUG(dumpState(););
166 #ifndef NDEBUG // Debug output
167 void SystemZHazardRecognizer::dumpSU(SUnit
*SU
, raw_ostream
&OS
) const {
168 OS
<< "SU(" << SU
->NodeNum
<< "):";
169 OS
<< TII
->getName(SU
->getInstr()->getOpcode());
171 const MCSchedClassDesc
*SC
= getSchedClass(SU
);
175 for (TargetSchedModel::ProcResIter
176 PI
= SchedModel
->getWriteProcResBegin(SC
),
177 PE
= SchedModel
->getWriteProcResEnd(SC
); PI
!= PE
; ++PI
) {
178 const MCProcResourceDesc
&PRD
=
179 *SchedModel
->getProcResource(PI
->ProcResourceIdx
);
180 std::string
FU(PRD
.Name
);
181 // trim e.g. Z13_FXaUnit -> FXa
182 FU
= FU
.substr(FU
.find('_') + 1);
183 size_t Pos
= FU
.find("Unit");
184 if (Pos
!= std::string::npos
)
186 if (FU
== "LS") // LSUnit -> LSU
190 if (PI
->ReleaseAtCycle
> 1)
191 OS
<< "(" << PI
->ReleaseAtCycle
<< "cyc)";
194 if (SC
->NumMicroOps
> 1)
195 OS
<< "/" << SC
->NumMicroOps
<< "uops";
196 if (SC
->BeginGroup
&& SC
->EndGroup
)
197 OS
<< "/GroupsAlone";
198 else if (SC
->BeginGroup
)
199 OS
<< "/BeginsGroup";
200 else if (SC
->EndGroup
)
202 if (SU
->isUnbuffered
)
204 if (has4RegOps(SU
->getInstr()))
208 void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg
) const {
209 dbgs() << "++ " << Msg
;
212 if (CurGroupDbg
.empty())
213 dbgs() << " <empty>\n";
215 dbgs() << "{ " << CurGroupDbg
<< " }";
216 dbgs() << " (" << CurrGroupSize
<< " decoder slot"
217 << (CurrGroupSize
> 1 ? "s":"")
218 << (CurrGroupHas4RegOps
? ", 4RegOps" : "")
223 void SystemZHazardRecognizer::dumpProcResourceCounters() const {
226 for (unsigned i
= 0; i
< SchedModel
->getNumProcResourceKinds(); ++i
)
227 if (ProcResourceCounters
[i
] > 0) {
235 dbgs() << "++ | Resource counters: ";
236 for (unsigned i
= 0; i
< SchedModel
->getNumProcResourceKinds(); ++i
)
237 if (ProcResourceCounters
[i
] > 0)
238 dbgs() << SchedModel
->getProcResource(i
)->Name
239 << ":" << ProcResourceCounters
[i
] << " ";
242 if (CriticalResourceIdx
!= UINT_MAX
)
243 dbgs() << "++ | Critical resource: "
244 << SchedModel
->getProcResource(CriticalResourceIdx
)->Name
248 void SystemZHazardRecognizer::dumpState() const {
249 dumpCurrGroup("| Current decoder group");
250 dbgs() << "++ | Current cycle index: "
251 << getCurrCycleIdx() << "\n";
252 dumpProcResourceCounters();
253 if (LastFPdOpCycleIdx
!= UINT_MAX
)
254 dbgs() << "++ | Last FPd cycle index: " << LastFPdOpCycleIdx
<< "\n";
259 void SystemZHazardRecognizer::clearProcResCounters() {
260 ProcResourceCounters
.assign(SchedModel
->getNumProcResourceKinds(), 0);
261 CriticalResourceIdx
= UINT_MAX
;
264 static inline bool isBranchRetTrap(MachineInstr
*MI
) {
265 return (MI
->isBranch() || MI
->isReturn() ||
266 MI
->getOpcode() == SystemZ::CondTrap
);
269 // Update state with SU as the next scheduled unit.
270 void SystemZHazardRecognizer::
271 EmitInstruction(SUnit
*SU
) {
272 const MCSchedClassDesc
*SC
= getSchedClass(SU
);
273 LLVM_DEBUG(dbgs() << "++ HazardRecognizer emitting "; dumpSU(SU
, dbgs());
275 LLVM_DEBUG(dumpCurrGroup("Decode group before emission"););
277 // If scheduling an SU that must begin a new decoder group, move on
279 if (!fitsIntoCurrentGroup(SU
))
282 LLVM_DEBUG(raw_string_ostream
cgd(CurGroupDbg
);
283 if (CurGroupDbg
.length()) cgd
<< ", "; dumpSU(SU
, cgd
););
285 LastEmittedMI
= SU
->getInstr();
287 // After returning from a call, we don't know much about the state.
289 LLVM_DEBUG(dbgs() << "++ Clearing state after call.\n";);
291 LastEmittedMI
= SU
->getInstr();
295 // Increase counter for execution unit(s).
296 for (TargetSchedModel::ProcResIter
297 PI
= SchedModel
->getWriteProcResBegin(SC
),
298 PE
= SchedModel
->getWriteProcResEnd(SC
); PI
!= PE
; ++PI
) {
299 // Don't handle FPd together with the other resources.
300 if (SchedModel
->getProcResource(PI
->ProcResourceIdx
)->BufferSize
== 1)
303 ProcResourceCounters
[PI
->ProcResourceIdx
];
304 CurrCounter
+= PI
->ReleaseAtCycle
;
305 // Check if this is now the new critical resource.
306 if ((CurrCounter
> ProcResCostLim
) &&
307 (CriticalResourceIdx
== UINT_MAX
||
308 (PI
->ProcResourceIdx
!= CriticalResourceIdx
&&
310 ProcResourceCounters
[CriticalResourceIdx
]))) {
312 dbgs() << "++ New critical resource: "
313 << SchedModel
->getProcResource(PI
->ProcResourceIdx
)->Name
315 CriticalResourceIdx
= PI
->ProcResourceIdx
;
319 // Make note of an instruction that uses a blocking resource (FPd).
320 if (SU
->isUnbuffered
) {
321 LastFPdOpCycleIdx
= getCurrCycleIdx(SU
);
322 LLVM_DEBUG(dbgs() << "++ Last FPd cycle index: " << LastFPdOpCycleIdx
326 // Insert SU into current group by increasing number of slots used
328 CurrGroupSize
+= getNumDecoderSlots(SU
);
329 CurrGroupHas4RegOps
|= has4RegOps(SU
->getInstr());
330 unsigned GroupLim
= (CurrGroupHas4RegOps
? 2 : 3);
331 assert((CurrGroupSize
<= GroupLim
|| CurrGroupSize
== getNumDecoderSlots(SU
))
332 && "SU does not fit into decoder group!");
334 // Check if current group is now full/ended. If so, move on to next
335 // group to be ready to evaluate more candidates.
336 if (CurrGroupSize
>= GroupLim
|| SC
->EndGroup
)
340 int SystemZHazardRecognizer::groupingCost(SUnit
*SU
) const {
341 const MCSchedClassDesc
*SC
= getSchedClass(SU
);
345 // If SU begins new group, it can either break a current group early
346 // or fit naturally if current group is empty (negative cost).
347 if (SC
->BeginGroup
) {
349 return 3 - CurrGroupSize
;
353 // Similarly, a group-ending SU may either fit well (last in group), or
354 // end the group prematurely.
356 unsigned resultingGroupSize
=
357 (CurrGroupSize
+ getNumDecoderSlots(SU
));
358 if (resultingGroupSize
< 3)
359 return (3 - resultingGroupSize
);
363 // An instruction with 4 register operands will not fit in last slot.
364 if (CurrGroupSize
== 2 && has4RegOps(SU
->getInstr()))
367 // Most instructions can be placed in any decoder slot.
371 bool SystemZHazardRecognizer::isFPdOpPreferred_distance(SUnit
*SU
) const {
372 assert (SU
->isUnbuffered
);
373 // If this is the first FPd op, it should be scheduled high.
374 if (LastFPdOpCycleIdx
== UINT_MAX
)
376 // If this is not the first PFd op, it should go into the other side
377 // of the processor to use the other FPd unit there. This should
378 // generally happen if two FPd ops are placed with 2 other
379 // instructions between them (modulo 6).
380 unsigned SUCycleIdx
= getCurrCycleIdx(SU
);
381 if (LastFPdOpCycleIdx
> SUCycleIdx
)
382 return ((LastFPdOpCycleIdx
- SUCycleIdx
) == 3);
383 return ((SUCycleIdx
- LastFPdOpCycleIdx
) == 3);
386 int SystemZHazardRecognizer::
387 resourcesCost(SUnit
*SU
) {
390 const MCSchedClassDesc
*SC
= getSchedClass(SU
);
394 // For a FPd op, either return min or max value as indicated by the
395 // distance to any prior FPd op.
396 if (SU
->isUnbuffered
)
397 Cost
= (isFPdOpPreferred_distance(SU
) ? INT_MIN
: INT_MAX
);
398 // For other instructions, give a cost to the use of the critical resource.
399 else if (CriticalResourceIdx
!= UINT_MAX
) {
400 for (TargetSchedModel::ProcResIter
401 PI
= SchedModel
->getWriteProcResBegin(SC
),
402 PE
= SchedModel
->getWriteProcResEnd(SC
); PI
!= PE
; ++PI
)
403 if (PI
->ProcResourceIdx
== CriticalResourceIdx
)
404 Cost
= PI
->ReleaseAtCycle
;
410 void SystemZHazardRecognizer::emitInstruction(MachineInstr
*MI
,
412 // Make a temporary SUnit.
415 // Set interesting flags.
416 SU
.isCall
= MI
->isCall();
418 const MCSchedClassDesc
*SC
= SchedModel
->resolveSchedClass(MI
);
419 for (const MCWriteProcResEntry
&PRE
:
420 make_range(SchedModel
->getWriteProcResBegin(SC
),
421 SchedModel
->getWriteProcResEnd(SC
))) {
422 switch (SchedModel
->getProcResource(PRE
.ProcResourceIdx
)->BufferSize
) {
424 SU
.hasReservedResource
= true;
427 SU
.isUnbuffered
= true;
434 unsigned GroupSizeBeforeEmit
= CurrGroupSize
;
435 EmitInstruction(&SU
);
437 if (!TakenBranch
&& isBranchRetTrap(MI
)) {
438 // NT Branch on second slot ends group.
439 if (GroupSizeBeforeEmit
== 1)
443 if (TakenBranch
&& CurrGroupSize
> 0)
446 assert ((!MI
->isTerminator() || isBranchRetTrap(MI
)) &&
447 "Scheduler: unhandled terminator!");
450 void SystemZHazardRecognizer::
451 copyState(SystemZHazardRecognizer
*Incoming
) {
452 // Current decoder group
453 CurrGroupSize
= Incoming
->CurrGroupSize
;
454 LLVM_DEBUG(CurGroupDbg
= Incoming
->CurGroupDbg
;);
456 // Processor resources
457 ProcResourceCounters
= Incoming
->ProcResourceCounters
;
458 CriticalResourceIdx
= Incoming
->CriticalResourceIdx
;
461 LastFPdOpCycleIdx
= Incoming
->LastFPdOpCycleIdx
;
462 GrpCount
= Incoming
->GrpCount
;