1 //===-- PPCScheduleP8.td - PPC P8 Scheduling Definitions ---*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the itinerary class data for the POWER8 processor.
12 //===----------------------------------------------------------------------===//
14 // Scheduling for the P8 involves tracking two types of resources:
15 // 1. The dispatch bundle slots
16 // 2. The functional unit resources
19 def P8_DU1 : FuncUnit;
20 def P8_DU2 : FuncUnit;
21 def P8_DU3 : FuncUnit;
22 def P8_DU4 : FuncUnit;
23 def P8_DU5 : FuncUnit;
24 def P8_DU6 : FuncUnit;
25 def P8_DU7 : FuncUnit; // Only branch instructions will use DU7,DU8
26 def P8_DU8 : FuncUnit;
28 // 10 insns per cycle (2-LU, 2-LSU, 2-FXU, 2-FPU, 1-CRU, 1-BRU).
30 def P8_LU1 : FuncUnit; // Loads or fixed-point operations 1
31 def P8_LU2 : FuncUnit; // Loads or fixed-point operations 2
33 // Load/Store pipelines can handle Stores, fixed-point loads, and simple
34 // fixed-point operations.
35 def P8_LSU1 : FuncUnit; // Load/Store pipeline 1
36 def P8_LSU2 : FuncUnit; // Load/Store pipeline 2
39 def P8_FXU1 : FuncUnit; // FX pipeline 1
40 def P8_FXU2 : FuncUnit; // FX pipeline 2
42 // The Floating-Point Unit (FPU) and Vector Media Extension (VMX) units
43 // are combined on P7 and newer into a Vector Scalar Unit (VSU).
44 // The P8 Instruction latency documents still refers to the unit as the
45 // FPU, so keep in mind that FPU==VSU.
46 // In contrast to the P7, the VMX units on P8 are symmetric, so no need to
47 // split vector integer ops or 128-bit load/store/perms to the specific units.
48 def P8_FPU1 : FuncUnit; // VS pipeline 1
49 def P8_FPU2 : FuncUnit; // VS pipeline 2
51 def P8_CRU : FuncUnit; // CR unit (CR logicals and move-from-SPRs)
52 def P8_BRU : FuncUnit; // BR unit
54 def P8Itineraries : ProcessorItineraries<
55 [P8_DU1, P8_DU2, P8_DU3, P8_DU4, P8_DU5, P8_DU6, P8_DU7, P8_DU8,
56 P8_LU1, P8_LU2, P8_LSU1, P8_LSU2, P8_FXU1, P8_FXU2,
57 P8_FPU1, P8_FPU2, P8_CRU, P8_BRU], [], [
58 InstrItinData<IIC_IntSimple , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
59 P8_DU4, P8_DU5, P8_DU6], 0>,
60 InstrStage<1, [P8_FXU1, P8_FXU2,
64 InstrItinData<IIC_IntGeneral , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
65 P8_DU4, P8_DU5, P8_DU6], 0>,
66 InstrStage<1, [P8_FXU1, P8_FXU2, P8_LU1,
67 P8_LU2, P8_LSU1, P8_LSU2]>],
69 InstrItinData<IIC_IntISEL, [InstrStage<1, [P8_DU1], 0>,
70 InstrStage<1, [P8_FXU1, P8_FXU2], 0>,
71 InstrStage<1, [P8_BRU]>],
73 InstrItinData<IIC_IntCompare , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
74 P8_DU4, P8_DU5, P8_DU6], 0>,
75 InstrStage<1, [P8_FXU1, P8_FXU2]>],
77 InstrItinData<IIC_IntDivW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
78 P8_DU4, P8_DU5, P8_DU6], 0>,
79 InstrStage<15, [P8_FXU1, P8_FXU2]>],
81 InstrItinData<IIC_IntDivD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
82 P8_DU4, P8_DU5, P8_DU6], 0>,
83 InstrStage<23, [P8_FXU1, P8_FXU2]>],
85 InstrItinData<IIC_IntMulHW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
86 P8_DU4, P8_DU5, P8_DU6], 0>,
87 InstrStage<1, [P8_FXU1, P8_FXU2]>],
89 InstrItinData<IIC_IntMulHWU , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
90 P8_DU4, P8_DU5, P8_DU6], 0>,
91 InstrStage<1, [P8_FXU1, P8_FXU2]>],
93 InstrItinData<IIC_IntMulLI , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
94 P8_DU4, P8_DU5, P8_DU6], 0>,
95 InstrStage<1, [P8_FXU1, P8_FXU2]>],
97 InstrItinData<IIC_IntRotate , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
98 P8_DU4, P8_DU5, P8_DU6], 0>,
99 InstrStage<1, [P8_FXU1, P8_FXU2]>],
101 InstrItinData<IIC_IntRotateD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
102 P8_DU4, P8_DU5, P8_DU6], 0>,
103 InstrStage<1, [P8_FXU1, P8_FXU2]>],
105 InstrItinData<IIC_IntShift , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
106 P8_DU4, P8_DU5, P8_DU6], 0>,
107 InstrStage<1, [P8_FXU1, P8_FXU2]>],
109 InstrItinData<IIC_IntTrapW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
110 P8_DU4, P8_DU5, P8_DU6], 0>,
111 InstrStage<1, [P8_FXU1, P8_FXU2]>],
113 InstrItinData<IIC_IntTrapD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
114 P8_DU4, P8_DU5, P8_DU6], 0>,
115 InstrStage<1, [P8_FXU1, P8_FXU2]>],
117 InstrItinData<IIC_BrB , [InstrStage<1, [P8_DU7, P8_DU8], 0>,
118 InstrStage<1, [P8_BRU]>],
120 // FIXME - the Br* groups below are not branch related, so should probably
122 // IIC_BrCR consists of the cr* instructions. (crand,crnor,creqv, etc).
123 // and should be 'First' in dispatch.
124 InstrItinData<IIC_BrCR , [InstrStage<1, [P8_DU1], 0>,
125 InstrStage<1, [P8_CRU]>],
127 // IIC_BrMCR consists of the mcrf instruction.
128 InstrItinData<IIC_BrMCR , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
129 P8_DU4, P8_DU5, P8_DU6], 0>,
130 InstrStage<1, [P8_CRU]>],
132 // IIC_BrMCRX consists of mcrxr (obsolete instruction) and mtcrf, which
133 // should be first in the dispatch group.
134 InstrItinData<IIC_BrMCRX , [InstrStage<1, [P8_DU1], 0>,
135 InstrStage<1, [P8_FXU1, P8_FXU2]>],
137 InstrItinData<IIC_BrMCRX , [InstrStage<1, [P8_DU1], 0>,
138 InstrStage<1, [P8_FXU1, P8_FXU2]>],
140 InstrItinData<IIC_LdStLoad , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
141 P8_DU4, P8_DU5, P8_DU6], 0>,
142 InstrStage<1, [P8_LSU1, P8_LSU2,
145 InstrItinData<IIC_LdStLoadUpd , [InstrStage<1, [P8_DU1], 0>,
146 InstrStage<1, [P8_DU2], 0>,
147 InstrStage<1, [P8_LSU1, P8_LSU2,
148 P8_LU1, P8_LU2 ], 0>,
149 InstrStage<1, [P8_FXU1, P8_FXU2]>],
151 // Update-Indexed form loads/stores are no longer first and last in the
152 // dispatch group. They are simply cracked, so require DU1,DU2.
153 InstrItinData<IIC_LdStLoadUpdX, [InstrStage<1, [P8_DU1], 0>,
154 InstrStage<1, [P8_DU2], 0>,
155 InstrStage<1, [P8_LSU1, P8_LSU2,
157 InstrStage<1, [P8_FXU1, P8_FXU2]>],
159 InstrItinData<IIC_LdStLD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
160 P8_DU4, P8_DU5, P8_DU6], 0>,
161 InstrStage<1, [P8_LSU1, P8_LSU2,
164 InstrItinData<IIC_LdStLDU , [InstrStage<1, [P8_DU1], 0>,
165 InstrStage<1, [P8_DU2], 0>,
166 InstrStage<1, [P8_LSU1, P8_LSU2,
168 InstrStage<1, [P8_FXU1, P8_FXU2]>],
170 InstrItinData<IIC_LdStLDUX , [InstrStage<1, [P8_DU1], 0>,
171 InstrStage<1, [P8_DU2], 0>,
172 InstrStage<1, [P8_LSU1, P8_LSU2,
174 InstrStage<1, [P8_FXU1, P8_FXU2]>],
176 InstrItinData<IIC_LdStLFD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
177 P8_DU4, P8_DU5, P8_DU6], 0>,
178 InstrStage<1, [P8_LU1, P8_LU2]>],
180 InstrItinData<IIC_LdStLVecX , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
181 P8_DU4, P8_DU5, P8_DU6], 0>,
182 InstrStage<1, [P8_LU1, P8_LU2]>],
184 InstrItinData<IIC_LdStLFDU , [InstrStage<1, [P8_DU1], 0>,
185 InstrStage<1, [P8_DU2], 0>,
186 InstrStage<1, [P8_LU1, P8_LU2], 0>,
187 InstrStage<1, [P8_FXU1, P8_FXU2]>],
189 InstrItinData<IIC_LdStLFDUX , [InstrStage<1, [P8_DU1], 0>,
190 InstrStage<1, [P8_DU2], 0>,
191 InstrStage<1, [P8_LU1, P8_LU2], 0>,
192 InstrStage<1, [P8_FXU1, P8_FXU2]>],
194 InstrItinData<IIC_LdStLHA , [InstrStage<1, [P8_DU1], 0>,
195 InstrStage<1, [P8_DU2], 0>,
196 InstrStage<1, [P8_LSU1, P8_LSU2,
198 InstrStage<1, [P8_FXU1, P8_FXU2,
201 InstrItinData<IIC_LdStLHAU , [InstrStage<1, [P8_DU1], 0>,
202 InstrStage<1, [P8_DU2], 0>,
203 InstrStage<1, [P8_LSU1, P8_LSU2,
205 InstrStage<1, [P8_FXU1, P8_FXU2]>,
206 InstrStage<1, [P8_FXU1, P8_FXU2]>],
208 // first+last in dispatch group.
209 InstrItinData<IIC_LdStLHAUX , [InstrStage<1, [P8_DU1], 0>,
210 InstrStage<1, [P8_DU2], 0>,
211 InstrStage<1, [P8_DU3], 0>,
212 InstrStage<1, [P8_DU4], 0>,
213 InstrStage<1, [P8_DU5], 0>,
214 InstrStage<1, [P8_DU6], 0>,
215 InstrStage<1, [P8_LSU1, P8_LSU2,
217 InstrStage<1, [P8_FXU1, P8_FXU2]>,
218 InstrStage<1, [P8_FXU1, P8_FXU2]>],
220 InstrItinData<IIC_LdStLWA , [InstrStage<1, [P8_DU1], 0>,
221 InstrStage<1, [P8_DU2], 0>,
222 InstrStage<1, [P8_LSU1, P8_LSU2,
224 InstrStage<1, [P8_FXU1, P8_FXU2]>],
226 InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P8_DU1], 0>,
227 InstrStage<1, [P8_DU2], 0>,
228 InstrStage<1, [P8_DU3], 0>,
229 InstrStage<1, [P8_DU4], 0>,
230 InstrStage<1, [P8_LSU1, P8_LSU2,
234 InstrItinData<IIC_LdStLDARX, [InstrStage<1, [P8_DU1], 0>,
235 InstrStage<1, [P8_DU2], 0>,
236 InstrStage<1, [P8_DU3], 0>,
237 InstrStage<1, [P8_DU4], 0>,
238 InstrStage<1, [P8_DU5], 0>,
239 InstrStage<1, [P8_DU6], 0>,
240 InstrStage<1, [P8_LSU1, P8_LSU2,
243 InstrItinData<IIC_LdStLMW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
244 P8_DU4, P8_DU5, P8_DU6], 0>,
245 InstrStage<1, [P8_LSU1, P8_LSU2,
248 // Stores are dual-issued from the issue queue, so may only take up one
249 // dispatch slot. The instruction will be broken into two IOPS. The agen
250 // op is issued to the LSU, and the data op (register fetch) is issued
251 // to either the LU (GPR store) or the VSU (FPR store).
252 InstrItinData<IIC_LdStStore , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
253 P8_DU4, P8_DU5, P8_DU6], 0>,
254 InstrStage<1, [P8_LSU1, P8_LSU2]>,
255 InstrStage<1, [P8_LU1, P8_LU2]>],
257 InstrItinData<IIC_LdStSTD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
258 P8_DU4, P8_DU5, P8_DU6], 0>,
259 InstrStage<1, [P8_LU1, P8_LU2,
262 InstrItinData<IIC_LdStSTDU , [InstrStage<1, [P8_DU1], 0>,
263 InstrStage<1, [P8_DU2], 0>,
264 InstrStage<1, [P8_LU1, P8_LU2,
265 P8_LSU1, P8_LSU2], 0>,
266 InstrStage<1, [P8_FXU1, P8_FXU2]>],
269 InstrItinData<IIC_LdStSTDUX , [InstrStage<1, [P8_DU1], 0>,
270 InstrStage<1, [P8_DU2], 0>,
271 InstrStage<1, [P8_DU3], 0>,
272 InstrStage<1, [P8_DU4], 0>,
273 InstrStage<1, [P8_DU5], 0>,
274 InstrStage<1, [P8_DU6], 0>,
275 InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
276 InstrStage<1, [P8_FXU1, P8_FXU2]>,
277 InstrStage<1, [P8_FXU1, P8_FXU2]>],
279 InstrItinData<IIC_LdStSTFD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
280 P8_DU4, P8_DU5, P8_DU6], 0>,
281 InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
282 InstrStage<1, [P8_FPU1, P8_FPU2]>],
284 InstrItinData<IIC_LdStSTFDU , [InstrStage<1, [P8_DU1], 0>,
285 InstrStage<1, [P8_DU2], 0>,
286 InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
287 InstrStage<1, [P8_FXU1, P8_FXU2], 0>,
288 InstrStage<1, [P8_FPU1, P8_FPU2]>],
290 InstrItinData<IIC_LdStSTVEBX , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
291 P8_DU4, P8_DU5, P8_DU6], 0>,
292 InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
293 InstrStage<1, [P8_FPU1, P8_FPU2]>],
295 InstrItinData<IIC_LdStSTDCX , [InstrStage<1, [P8_DU1], 0>,
296 InstrStage<1, [P8_DU2], 0>,
297 InstrStage<1, [P8_DU3], 0>,
298 InstrStage<1, [P8_DU4], 0>,
299 InstrStage<1, [P8_DU5], 0>,
300 InstrStage<1, [P8_DU6], 0>,
301 InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
302 InstrStage<1, [P8_LU1, P8_LU2]>],
304 InstrItinData<IIC_LdStSTWCX , [InstrStage<1, [P8_DU1], 0>,
305 InstrStage<1, [P8_DU2], 0>,
306 InstrStage<1, [P8_DU3], 0>,
307 InstrStage<1, [P8_DU4], 0>,
308 InstrStage<1, [P8_DU5], 0>,
309 InstrStage<1, [P8_DU6], 0>,
310 InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
311 InstrStage<1, [P8_LU1, P8_LU2]>],
313 InstrItinData<IIC_SprMFCR , [InstrStage<1, [P8_DU1], 0>,
314 InstrStage<1, [P8_CRU]>],
316 InstrItinData<IIC_SprMFCRF , [InstrStage<1, [P8_DU1], 0>,
317 InstrStage<1, [P8_CRU]>],
319 InstrItinData<IIC_SprMTSPR , [InstrStage<1, [P8_DU1], 0>,
320 InstrStage<1, [P8_FXU1, P8_FXU2]>],
322 InstrItinData<IIC_FPGeneral , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
323 P8_DU4, P8_DU5, P8_DU6], 0>,
324 InstrStage<1, [P8_FPU1, P8_FPU2]>],
326 InstrItinData<IIC_FPAddSub , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
327 P8_DU4, P8_DU5, P8_DU6], 0>,
328 InstrStage<1, [P8_FPU1, P8_FPU2]>],
330 InstrItinData<IIC_FPCompare , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
331 P8_DU4, P8_DU5, P8_DU6], 0>,
332 InstrStage<1, [P8_FPU1, P8_FPU2]>],
334 InstrItinData<IIC_FPDivD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
335 P8_DU4, P8_DU5, P8_DU6], 0>,
336 InstrStage<1, [P8_FPU1, P8_FPU2]>],
338 InstrItinData<IIC_FPDivS , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
339 P8_DU4, P8_DU5, P8_DU6], 0>,
340 InstrStage<1, [P8_FPU1, P8_FPU2]>],
342 InstrItinData<IIC_FPSqrtD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
343 P8_DU4, P8_DU5, P8_DU6], 0>,
344 InstrStage<1, [P8_FPU1, P8_FPU2]>],
346 InstrItinData<IIC_FPSqrtS , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
347 P8_DU4, P8_DU5, P8_DU6], 0>,
348 InstrStage<1, [P8_FPU1, P8_FPU2]>],
350 InstrItinData<IIC_FPFused , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
351 P8_DU4, P8_DU5, P8_DU6], 0>,
352 InstrStage<1, [P8_FPU1, P8_FPU2]>],
354 InstrItinData<IIC_FPRes , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
355 P8_DU4, P8_DU5, P8_DU6], 0>,
356 InstrStage<1, [P8_FPU1, P8_FPU2]>],
358 InstrItinData<IIC_VecGeneral , [InstrStage<1, [P8_DU1], 0>,
359 InstrStage<1, [P8_FPU1, P8_FPU2]>],
361 InstrItinData<IIC_VecVSL , [InstrStage<1, [P8_DU1], 0>,
362 InstrStage<1, [P8_FPU1, P8_FPU2]>],
364 InstrItinData<IIC_VecVSR , [InstrStage<1, [P8_DU1], 0>,
365 InstrStage<1, [P8_FPU1, P8_FPU2]>],
367 InstrItinData<IIC_VecFP , [InstrStage<1, [P8_DU1], 0>,
368 InstrStage<1, [P8_FPU1, P8_FPU2]>],
370 InstrItinData<IIC_VecFPCompare, [InstrStage<1, [P8_DU1], 0>,
371 InstrStage<1, [P8_FPU1, P8_FPU2]>],
373 InstrItinData<IIC_VecFPRound , [InstrStage<1, [P8_DU1], 0>,
374 InstrStage<1, [P8_FPU1, P8_FPU2]>],
376 InstrItinData<IIC_VecComplex , [InstrStage<1, [P8_DU1], 0>,
377 InstrStage<1, [P8_FPU1, P8_FPU2]>],
379 InstrItinData<IIC_VecPerm , [InstrStage<1, [P8_DU1, P8_DU2], 0>,
380 InstrStage<1, [P8_FPU1, P8_FPU2]>],
384 // ===---------------------------------------------------------------------===//
385 // P8 machine model for scheduling and other instruction cost heuristics.
386 // P8 has an 8 insn dispatch group (6 non-branch, 2 branch) and can issue up
387 // to 10 insns per cycle (2-LU, 2-LSU, 2-FXU, 2-FPU, 1-CRU, 1-BRU).
389 def P8Model : SchedMachineModel {
390 let IssueWidth = 8; // up to 8 instructions dispatched per cycle.
391 // up to six non-branch instructions.
392 // up to two branches in a dispatch group.
394 let LoadLatency = 3; // Optimistic load latency assuming bypass.
395 // This is overriden by OperandCycles if the
396 // Itineraries are queried instead.
397 let MispredictPenalty = 16;
399 // Try to make sure we have at least 10 dispatch groups in a loop.
400 let LoopMicroOpBufferSize = 60;
402 let CompleteModel = 0;
404 let Itineraries = P8Itineraries;