1 //===-- PPCScheduleP8.td - PPC P8 Scheduling Definitions ---*- tablegen -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the itinerary class data for the POWER8 processor.
11 //===----------------------------------------------------------------------===//
13 // Scheduling for the P8 involves tracking two types of resources:
14 // 1. The dispatch bundle slots
15 // 2. The functional unit resources
18 def P8_DU1 : FuncUnit;
19 def P8_DU2 : FuncUnit;
20 def P8_DU3 : FuncUnit;
21 def P8_DU4 : FuncUnit;
22 def P8_DU5 : FuncUnit;
23 def P8_DU6 : FuncUnit;
24 def P8_DU7 : FuncUnit; // Only branch instructions will use DU7,DU8
25 def P8_DU8 : FuncUnit;
27 // 10 insns per cycle (2-LU, 2-LSU, 2-FXU, 2-FPU, 1-CRU, 1-BRU).
29 def P8_LU1 : FuncUnit; // Loads or fixed-point operations 1
30 def P8_LU2 : FuncUnit; // Loads or fixed-point operations 2
32 // Load/Store pipelines can handle Stores, fixed-point loads, and simple
33 // fixed-point operations.
34 def P8_LSU1 : FuncUnit; // Load/Store pipeline 1
35 def P8_LSU2 : FuncUnit; // Load/Store pipeline 2
38 def P8_FXU1 : FuncUnit; // FX pipeline 1
39 def P8_FXU2 : FuncUnit; // FX pipeline 2
41 // The Floating-Point Unit (FPU) and Vector Media Extension (VMX) units
42 // are combined on P7 and newer into a Vector Scalar Unit (VSU).
43 // The P8 Instruction latency documents still refers to the unit as the
44 // FPU, so keep in mind that FPU==VSU.
45 // In contrast to the P7, the VMX units on P8 are symmetric, so no need to
46 // split vector integer ops or 128-bit load/store/perms to the specific units.
47 def P8_FPU1 : FuncUnit; // VS pipeline 1
48 def P8_FPU2 : FuncUnit; // VS pipeline 2
50 def P8_CRU : FuncUnit; // CR unit (CR logicals and move-from-SPRs)
51 def P8_BRU : FuncUnit; // BR unit
53 def P8Itineraries : ProcessorItineraries<
54 [P8_DU1, P8_DU2, P8_DU3, P8_DU4, P8_DU5, P8_DU6, P8_DU7, P8_DU8,
55 P8_LU1, P8_LU2, P8_LSU1, P8_LSU2, P8_FXU1, P8_FXU2,
56 P8_FPU1, P8_FPU2, P8_CRU, P8_BRU], [], [
57 InstrItinData<IIC_IntSimple , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
58 P8_DU4, P8_DU5, P8_DU6], 0>,
59 InstrStage<1, [P8_FXU1, P8_FXU2,
63 InstrItinData<IIC_IntGeneral , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
64 P8_DU4, P8_DU5, P8_DU6], 0>,
65 InstrStage<1, [P8_FXU1, P8_FXU2, P8_LU1,
66 P8_LU2, P8_LSU1, P8_LSU2]>],
68 InstrItinData<IIC_IntISEL, [InstrStage<1, [P8_DU1], 0>,
69 InstrStage<1, [P8_FXU1, P8_FXU2], 0>,
70 InstrStage<1, [P8_BRU]>],
72 InstrItinData<IIC_IntCompare , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
73 P8_DU4, P8_DU5, P8_DU6], 0>,
74 InstrStage<1, [P8_FXU1, P8_FXU2]>],
76 InstrItinData<IIC_IntDivW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
77 P8_DU4, P8_DU5, P8_DU6], 0>,
78 InstrStage<15, [P8_FXU1, P8_FXU2]>],
80 InstrItinData<IIC_IntDivD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
81 P8_DU4, P8_DU5, P8_DU6], 0>,
82 InstrStage<23, [P8_FXU1, P8_FXU2]>],
84 InstrItinData<IIC_IntMulHW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
85 P8_DU4, P8_DU5, P8_DU6], 0>,
86 InstrStage<1, [P8_FXU1, P8_FXU2]>],
88 InstrItinData<IIC_IntMulHWU , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
89 P8_DU4, P8_DU5, P8_DU6], 0>,
90 InstrStage<1, [P8_FXU1, P8_FXU2]>],
92 InstrItinData<IIC_IntMulHD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
93 P8_DU4, P8_DU5, P8_DU6], 0>,
94 InstrStage<1, [P8_FXU1, P8_FXU2]>],
96 InstrItinData<IIC_IntMulLI , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
97 P8_DU4, P8_DU5, P8_DU6], 0>,
98 InstrStage<1, [P8_FXU1, P8_FXU2]>],
100 InstrItinData<IIC_IntRotate , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
101 P8_DU4, P8_DU5, P8_DU6], 0>,
102 InstrStage<1, [P8_FXU1, P8_FXU2]>],
104 InstrItinData<IIC_IntRotateD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
105 P8_DU4, P8_DU5, P8_DU6], 0>,
106 InstrStage<1, [P8_FXU1, P8_FXU2]>],
108 InstrItinData<IIC_IntRotateDI , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
109 P8_DU4, P8_DU5, P8_DU6], 0>,
110 InstrStage<1, [P8_FXU1, P8_FXU2]>],
112 InstrItinData<IIC_IntShift , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
113 P8_DU4, P8_DU5, P8_DU6], 0>,
114 InstrStage<1, [P8_FXU1, P8_FXU2]>],
116 InstrItinData<IIC_IntTrapW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
117 P8_DU4, P8_DU5, P8_DU6], 0>,
118 InstrStage<1, [P8_FXU1, P8_FXU2]>],
120 InstrItinData<IIC_IntTrapD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
121 P8_DU4, P8_DU5, P8_DU6], 0>,
122 InstrStage<1, [P8_FXU1, P8_FXU2]>],
124 InstrItinData<IIC_BrB , [InstrStage<1, [P8_DU7, P8_DU8], 0>,
125 InstrStage<1, [P8_BRU]>],
127 // FIXME - the Br* groups below are not branch related, so should probably
129 // IIC_BrCR consists of the cr* instructions. (crand,crnor,creqv, etc).
130 // and should be 'First' in dispatch.
131 InstrItinData<IIC_BrCR , [InstrStage<1, [P8_DU1], 0>,
132 InstrStage<1, [P8_CRU]>],
134 // IIC_BrMCR consists of the mcrf instruction.
135 InstrItinData<IIC_BrMCR , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
136 P8_DU4, P8_DU5, P8_DU6], 0>,
137 InstrStage<1, [P8_CRU]>],
139 // IIC_BrMCRX consists of mcrxr (obsolete instruction) and mtcrf, which
140 // should be first in the dispatch group.
141 InstrItinData<IIC_BrMCRX , [InstrStage<1, [P8_DU1], 0>,
142 InstrStage<1, [P8_FXU1, P8_FXU2]>],
144 InstrItinData<IIC_BrMCRX , [InstrStage<1, [P8_DU1], 0>,
145 InstrStage<1, [P8_FXU1, P8_FXU2]>],
147 InstrItinData<IIC_LdStLoad , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
148 P8_DU4, P8_DU5, P8_DU6], 0>,
149 InstrStage<1, [P8_LSU1, P8_LSU2,
152 InstrItinData<IIC_LdStLoadUpd , [InstrStage<1, [P8_DU1], 0>,
153 InstrStage<1, [P8_DU2], 0>,
154 InstrStage<1, [P8_LSU1, P8_LSU2,
155 P8_LU1, P8_LU2 ], 0>,
156 InstrStage<1, [P8_FXU1, P8_FXU2]>],
158 // Update-Indexed form loads/stores are no longer first and last in the
159 // dispatch group. They are simply cracked, so require DU1,DU2.
160 InstrItinData<IIC_LdStLoadUpdX, [InstrStage<1, [P8_DU1], 0>,
161 InstrStage<1, [P8_DU2], 0>,
162 InstrStage<1, [P8_LSU1, P8_LSU2,
164 InstrStage<1, [P8_FXU1, P8_FXU2]>],
166 InstrItinData<IIC_LdStLD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
167 P8_DU4, P8_DU5, P8_DU6], 0>,
168 InstrStage<1, [P8_LSU1, P8_LSU2,
171 InstrItinData<IIC_LdStLDU , [InstrStage<1, [P8_DU1], 0>,
172 InstrStage<1, [P8_DU2], 0>,
173 InstrStage<1, [P8_LSU1, P8_LSU2,
175 InstrStage<1, [P8_FXU1, P8_FXU2]>],
177 InstrItinData<IIC_LdStLDUX , [InstrStage<1, [P8_DU1], 0>,
178 InstrStage<1, [P8_DU2], 0>,
179 InstrStage<1, [P8_LSU1, P8_LSU2,
181 InstrStage<1, [P8_FXU1, P8_FXU2]>],
183 InstrItinData<IIC_LdStLFD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
184 P8_DU4, P8_DU5, P8_DU6], 0>,
185 InstrStage<1, [P8_LU1, P8_LU2]>],
187 InstrItinData<IIC_LdStLVecX , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
188 P8_DU4, P8_DU5, P8_DU6], 0>,
189 InstrStage<1, [P8_LU1, P8_LU2]>],
191 InstrItinData<IIC_LdStLFDU , [InstrStage<1, [P8_DU1], 0>,
192 InstrStage<1, [P8_DU2], 0>,
193 InstrStage<1, [P8_LU1, P8_LU2], 0>,
194 InstrStage<1, [P8_FXU1, P8_FXU2]>],
196 InstrItinData<IIC_LdStLFDUX , [InstrStage<1, [P8_DU1], 0>,
197 InstrStage<1, [P8_DU2], 0>,
198 InstrStage<1, [P8_LU1, P8_LU2], 0>,
199 InstrStage<1, [P8_FXU1, P8_FXU2]>],
201 InstrItinData<IIC_LdStLHA , [InstrStage<1, [P8_DU1], 0>,
202 InstrStage<1, [P8_DU2], 0>,
203 InstrStage<1, [P8_LSU1, P8_LSU2,
205 InstrStage<1, [P8_FXU1, P8_FXU2,
208 InstrItinData<IIC_LdStLHAU , [InstrStage<1, [P8_DU1], 0>,
209 InstrStage<1, [P8_DU2], 0>,
210 InstrStage<1, [P8_LSU1, P8_LSU2,
212 InstrStage<1, [P8_FXU1, P8_FXU2]>,
213 InstrStage<1, [P8_FXU1, P8_FXU2]>],
215 // first+last in dispatch group.
216 InstrItinData<IIC_LdStLHAUX , [InstrStage<1, [P8_DU1], 0>,
217 InstrStage<1, [P8_DU2], 0>,
218 InstrStage<1, [P8_DU3], 0>,
219 InstrStage<1, [P8_DU4], 0>,
220 InstrStage<1, [P8_DU5], 0>,
221 InstrStage<1, [P8_DU6], 0>,
222 InstrStage<1, [P8_LSU1, P8_LSU2,
224 InstrStage<1, [P8_FXU1, P8_FXU2]>,
225 InstrStage<1, [P8_FXU1, P8_FXU2]>],
227 InstrItinData<IIC_LdStLWA , [InstrStage<1, [P8_DU1], 0>,
228 InstrStage<1, [P8_DU2], 0>,
229 InstrStage<1, [P8_LSU1, P8_LSU2,
231 InstrStage<1, [P8_FXU1, P8_FXU2]>],
233 InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P8_DU1], 0>,
234 InstrStage<1, [P8_DU2], 0>,
235 InstrStage<1, [P8_DU3], 0>,
236 InstrStage<1, [P8_DU4], 0>,
237 InstrStage<1, [P8_LSU1, P8_LSU2,
241 InstrItinData<IIC_LdStLDARX, [InstrStage<1, [P8_DU1], 0>,
242 InstrStage<1, [P8_DU2], 0>,
243 InstrStage<1, [P8_DU3], 0>,
244 InstrStage<1, [P8_DU4], 0>,
245 InstrStage<1, [P8_DU5], 0>,
246 InstrStage<1, [P8_DU6], 0>,
247 InstrStage<1, [P8_LSU1, P8_LSU2,
250 InstrItinData<IIC_LdStLMW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
251 P8_DU4, P8_DU5, P8_DU6], 0>,
252 InstrStage<1, [P8_LSU1, P8_LSU2,
255 // Stores are dual-issued from the issue queue, so may only take up one
256 // dispatch slot. The instruction will be broken into two IOPS. The agen
257 // op is issued to the LSU, and the data op (register fetch) is issued
258 // to either the LU (GPR store) or the VSU (FPR store).
259 InstrItinData<IIC_LdStStore , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
260 P8_DU4, P8_DU5, P8_DU6], 0>,
261 InstrStage<1, [P8_LSU1, P8_LSU2]>,
262 InstrStage<1, [P8_LU1, P8_LU2]>],
264 InstrItinData<IIC_LdStSTD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
265 P8_DU4, P8_DU5, P8_DU6], 0>,
266 InstrStage<1, [P8_LU1, P8_LU2,
269 InstrItinData<IIC_LdStSTU , [InstrStage<1, [P8_DU1], 0>,
270 InstrStage<1, [P8_DU2], 0>,
271 InstrStage<1, [P8_LU1, P8_LU2,
272 P8_LSU1, P8_LSU2], 0>,
273 InstrStage<1, [P8_FXU1, P8_FXU2]>],
276 InstrItinData<IIC_LdStSTUX , [InstrStage<1, [P8_DU1], 0>,
277 InstrStage<1, [P8_DU2], 0>,
278 InstrStage<1, [P8_DU3], 0>,
279 InstrStage<1, [P8_DU4], 0>,
280 InstrStage<1, [P8_DU5], 0>,
281 InstrStage<1, [P8_DU6], 0>,
282 InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
283 InstrStage<1, [P8_FXU1, P8_FXU2]>,
284 InstrStage<1, [P8_FXU1, P8_FXU2]>],
286 InstrItinData<IIC_LdStSTFD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
287 P8_DU4, P8_DU5, P8_DU6], 0>,
288 InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
289 InstrStage<1, [P8_FPU1, P8_FPU2]>],
291 InstrItinData<IIC_LdStSTFDU , [InstrStage<1, [P8_DU1], 0>,
292 InstrStage<1, [P8_DU2], 0>,
293 InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
294 InstrStage<1, [P8_FXU1, P8_FXU2], 0>,
295 InstrStage<1, [P8_FPU1, P8_FPU2]>],
297 InstrItinData<IIC_LdStSTVEBX , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
298 P8_DU4, P8_DU5, P8_DU6], 0>,
299 InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
300 InstrStage<1, [P8_FPU1, P8_FPU2]>],
302 InstrItinData<IIC_LdStSTDCX , [InstrStage<1, [P8_DU1], 0>,
303 InstrStage<1, [P8_DU2], 0>,
304 InstrStage<1, [P8_DU3], 0>,
305 InstrStage<1, [P8_DU4], 0>,
306 InstrStage<1, [P8_DU5], 0>,
307 InstrStage<1, [P8_DU6], 0>,
308 InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
309 InstrStage<1, [P8_LU1, P8_LU2]>],
311 InstrItinData<IIC_LdStSTWCX , [InstrStage<1, [P8_DU1], 0>,
312 InstrStage<1, [P8_DU2], 0>,
313 InstrStage<1, [P8_DU3], 0>,
314 InstrStage<1, [P8_DU4], 0>,
315 InstrStage<1, [P8_DU5], 0>,
316 InstrStage<1, [P8_DU6], 0>,
317 InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
318 InstrStage<1, [P8_LU1, P8_LU2]>],
320 InstrItinData<IIC_SprMFCR , [InstrStage<1, [P8_DU1], 0>,
321 InstrStage<1, [P8_CRU]>],
323 InstrItinData<IIC_SprMFCRF , [InstrStage<1, [P8_DU1], 0>,
324 InstrStage<1, [P8_CRU]>],
326 InstrItinData<IIC_SprMTSPR , [InstrStage<1, [P8_DU1], 0>,
327 InstrStage<1, [P8_FXU1, P8_FXU2]>],
329 InstrItinData<IIC_FPGeneral , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
330 P8_DU4, P8_DU5, P8_DU6], 0>,
331 InstrStage<1, [P8_FPU1, P8_FPU2]>],
333 InstrItinData<IIC_FPAddSub , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
334 P8_DU4, P8_DU5, P8_DU6], 0>,
335 InstrStage<1, [P8_FPU1, P8_FPU2]>],
337 InstrItinData<IIC_FPCompare , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
338 P8_DU4, P8_DU5, P8_DU6], 0>,
339 InstrStage<1, [P8_FPU1, P8_FPU2]>],
341 InstrItinData<IIC_FPDivD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
342 P8_DU4, P8_DU5, P8_DU6], 0>,
343 InstrStage<1, [P8_FPU1, P8_FPU2]>],
345 InstrItinData<IIC_FPDivS , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
346 P8_DU4, P8_DU5, P8_DU6], 0>,
347 InstrStage<1, [P8_FPU1, P8_FPU2]>],
349 InstrItinData<IIC_FPSqrtD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
350 P8_DU4, P8_DU5, P8_DU6], 0>,
351 InstrStage<1, [P8_FPU1, P8_FPU2]>],
353 InstrItinData<IIC_FPSqrtS , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
354 P8_DU4, P8_DU5, P8_DU6], 0>,
355 InstrStage<1, [P8_FPU1, P8_FPU2]>],
357 InstrItinData<IIC_FPFused , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
358 P8_DU4, P8_DU5, P8_DU6], 0>,
359 InstrStage<1, [P8_FPU1, P8_FPU2]>],
361 InstrItinData<IIC_FPRes , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
362 P8_DU4, P8_DU5, P8_DU6], 0>,
363 InstrStage<1, [P8_FPU1, P8_FPU2]>],
365 InstrItinData<IIC_VecGeneral , [InstrStage<1, [P8_DU1], 0>,
366 InstrStage<1, [P8_FPU1, P8_FPU2]>],
368 InstrItinData<IIC_VecVSL , [InstrStage<1, [P8_DU1], 0>,
369 InstrStage<1, [P8_FPU1, P8_FPU2]>],
371 InstrItinData<IIC_VecVSR , [InstrStage<1, [P8_DU1], 0>,
372 InstrStage<1, [P8_FPU1, P8_FPU2]>],
374 InstrItinData<IIC_VecFP , [InstrStage<1, [P8_DU1], 0>,
375 InstrStage<1, [P8_FPU1, P8_FPU2]>],
377 InstrItinData<IIC_VecFPCompare, [InstrStage<1, [P8_DU1], 0>,
378 InstrStage<1, [P8_FPU1, P8_FPU2]>],
380 InstrItinData<IIC_VecFPRound , [InstrStage<1, [P8_DU1], 0>,
381 InstrStage<1, [P8_FPU1, P8_FPU2]>],
383 InstrItinData<IIC_VecComplex , [InstrStage<1, [P8_DU1], 0>,
384 InstrStage<1, [P8_FPU1, P8_FPU2]>],
386 InstrItinData<IIC_VecPerm , [InstrStage<1, [P8_DU1, P8_DU2], 0>,
387 InstrStage<1, [P8_FPU1, P8_FPU2]>],
391 // ===---------------------------------------------------------------------===//
392 // P8 machine model for scheduling and other instruction cost heuristics.
393 // P8 has an 8 insn dispatch group (6 non-branch, 2 branch) and can issue up
394 // to 10 insns per cycle (2-LU, 2-LSU, 2-FXU, 2-FPU, 1-CRU, 1-BRU).
396 def P8Model : SchedMachineModel {
397 let IssueWidth = 8; // up to 8 instructions dispatched per cycle.
398 // up to six non-branch instructions.
399 // up to two branches in a dispatch group.
401 let LoadLatency = 3; // Optimistic load latency assuming bypass.
402 // This is overriden by OperandCycles if the
403 // Itineraries are queried instead.
404 let MispredictPenalty = 16;
406 // Try to make sure we have at least 10 dispatch groups in a loop.
407 let LoopMicroOpBufferSize = 60;
409 let CompleteModel = 0;
411 let Itineraries = P8Itineraries;