1 //===-- X86Schedule.td - X86 Scheduling Definitions --------*- tablegen -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 //===----------------------------------------------------------------------===//
10 // InstrSchedModel annotations for out-of-order CPUs.
12 // Instructions with folded loads need to read the memory operand immediately,
13 // but other register operands don't have to be read until the load is ready.
14 // These operands are marked with ReadAfterLd.
15 def ReadAfterLd : SchedRead;
16 def ReadAfterVecLd : SchedRead;
17 def ReadAfterVecXLd : SchedRead;
18 def ReadAfterVecYLd : SchedRead;
20 // Instructions that move data between general purpose registers and vector
21 // registers may be subject to extra latency due to data bypass delays.
22 // This SchedRead describes a bypass delay caused by data being moved from the
23 // integer unit to the floating point unit.
24 def ReadInt2Fpu : SchedRead;
26 // Instructions with both a load and a store folded are modeled as a folded
28 def WriteRMW : SchedWrite;
30 // Helper to set SchedWrite ExePorts/Latency/ResourceCycles/NumMicroOps.
31 multiclass X86WriteRes<SchedWrite SchedRW,
32 list<ProcResourceKind> ExePorts,
33 int Lat, list<int> Res, int UOps> {
34 def : WriteRes<SchedRW, ExePorts> {
36 let ResourceCycles = Res;
37 let NumMicroOps = UOps;
41 // Most instructions can fold loads, so almost every SchedWrite comes in two
42 // variants: With and without a folded load.
43 // An X86FoldableSchedWrite holds a reference to the corresponding SchedWrite
44 // with a folded load.
45 class X86FoldableSchedWrite : SchedWrite {
46 // The SchedWrite to use when a load is folded into the instruction.
48 // The SchedRead to tag register operands than don't need to be ready
49 // until the folded load has completed.
50 SchedRead ReadAfterFold;
53 // Multiclass that produces a linked pair of SchedWrites.
54 multiclass X86SchedWritePair<SchedRead ReadAfter = ReadAfterLd> {
55 // Register-Memory operation.
57 // Register-Register operation.
58 def NAME : X86FoldableSchedWrite {
59 let Folded = !cast<SchedWrite>(NAME#"Ld");
60 let ReadAfterFold = ReadAfter;
64 // Helpers to mark SchedWrites as unsupported.
65 multiclass X86WriteResUnsupported<SchedWrite SchedRW> {
66 let Unsupported = 1 in {
67 def : WriteRes<SchedRW, []>;
70 multiclass X86WriteResPairUnsupported<X86FoldableSchedWrite SchedRW> {
71 let Unsupported = 1 in {
72 def : WriteRes<SchedRW, []>;
73 def : WriteRes<SchedRW.Folded, []>;
77 // Multiclass that wraps X86FoldableSchedWrite for each vector width.
78 class X86SchedWriteWidths<X86FoldableSchedWrite sScl,
79 X86FoldableSchedWrite s128,
80 X86FoldableSchedWrite s256,
81 X86FoldableSchedWrite s512> {
82 X86FoldableSchedWrite Scl = sScl; // Scalar float/double operations.
83 X86FoldableSchedWrite MMX = sScl; // MMX operations.
84 X86FoldableSchedWrite XMM = s128; // XMM operations.
85 X86FoldableSchedWrite YMM = s256; // YMM operations.
86 X86FoldableSchedWrite ZMM = s512; // ZMM operations.
89 // Multiclass that wraps X86SchedWriteWidths for each fp vector type.
90 class X86SchedWriteSizes<X86SchedWriteWidths sPS,
91 X86SchedWriteWidths sPD> {
92 X86SchedWriteWidths PS = sPS;
93 X86SchedWriteWidths PD = sPD;
96 // Multiclass that wraps move/load/store triple for a vector width.
97 class X86SchedWriteMoveLS<SchedWrite MoveRR,
100 SchedWrite RR = MoveRR;
101 SchedWrite RM = LoadRM;
102 SchedWrite MR = StoreMR;
105 // Multiclass that wraps X86SchedWriteMoveLS for each vector width.
106 class X86SchedWriteMoveLSWidths<X86SchedWriteMoveLS sScl,
107 X86SchedWriteMoveLS s128,
108 X86SchedWriteMoveLS s256,
109 X86SchedWriteMoveLS s512> {
110 X86SchedWriteMoveLS Scl = sScl; // Scalar float/double operations.
111 X86SchedWriteMoveLS MMX = sScl; // MMX operations.
112 X86SchedWriteMoveLS XMM = s128; // XMM operations.
113 X86SchedWriteMoveLS YMM = s256; // YMM operations.
114 X86SchedWriteMoveLS ZMM = s512; // ZMM operations.
117 // Loads, stores, and moves, not folded with other operations.
118 def WriteLoad : SchedWrite;
119 def WriteStore : SchedWrite;
120 def WriteStoreNT : SchedWrite;
121 def WriteMove : SchedWrite;
122 def WriteCopy : WriteSequence<[WriteLoad, WriteStore]>; // mem->mem copy
125 defm WriteALU : X86SchedWritePair; // Simple integer ALU op.
126 defm WriteADC : X86SchedWritePair; // Integer ALU + flags op.
127 def WriteALURMW : WriteSequence<[WriteALULd, WriteRMW]>;
128 def WriteADCRMW : WriteSequence<[WriteADCLd, WriteRMW]>;
129 def WriteLEA : SchedWrite; // LEA instructions can't fold loads.
131 // Integer multiplication
132 defm WriteIMul8 : X86SchedWritePair; // Integer 8-bit multiplication.
133 defm WriteIMul16 : X86SchedWritePair; // Integer 16-bit multiplication.
134 defm WriteIMul16Imm : X86SchedWritePair; // Integer 16-bit multiplication by immediate.
135 defm WriteIMul16Reg : X86SchedWritePair; // Integer 16-bit multiplication by register.
136 defm WriteIMul32 : X86SchedWritePair; // Integer 32-bit multiplication.
137 defm WriteIMul32Imm : X86SchedWritePair; // Integer 32-bit multiplication by immediate.
138 defm WriteIMul32Reg : X86SchedWritePair; // Integer 32-bit multiplication by register.
139 defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication.
140 defm WriteIMul64Imm : X86SchedWritePair; // Integer 64-bit multiplication by immediate.
141 defm WriteIMul64Reg : X86SchedWritePair; // Integer 64-bit multiplication by register.
142 def WriteIMulH : SchedWrite; // Integer multiplication, high part.
144 def WriteBSWAP32 : SchedWrite; // Byte Order (Endianness) 32-bit Swap.
145 def WriteBSWAP64 : SchedWrite; // Byte Order (Endianness) 64-bit Swap.
146 defm WriteCMPXCHG : X86SchedWritePair; // Compare and set, compare and swap.
147 def WriteCMPXCHGRMW : SchedWrite; // Compare and set, compare and swap.
148 def WriteXCHG : SchedWrite; // Compare+Exchange - TODO RMW support.
151 defm WriteDiv8 : X86SchedWritePair;
152 defm WriteDiv16 : X86SchedWritePair;
153 defm WriteDiv32 : X86SchedWritePair;
154 defm WriteDiv64 : X86SchedWritePair;
155 defm WriteIDiv8 : X86SchedWritePair;
156 defm WriteIDiv16 : X86SchedWritePair;
157 defm WriteIDiv32 : X86SchedWritePair;
158 defm WriteIDiv64 : X86SchedWritePair;
160 defm WriteBSF : X86SchedWritePair; // Bit scan forward.
161 defm WriteBSR : X86SchedWritePair; // Bit scan reverse.
162 defm WritePOPCNT : X86SchedWritePair; // Bit population count.
163 defm WriteLZCNT : X86SchedWritePair; // Leading zero count.
164 defm WriteTZCNT : X86SchedWritePair; // Trailing zero count.
165 defm WriteCMOV : X86SchedWritePair; // Conditional move.
166 def WriteFCMOV : SchedWrite; // X87 conditional move.
167 def WriteSETCC : SchedWrite; // Set register based on condition code.
168 def WriteSETCCStore : SchedWrite;
169 def WriteLAHFSAHF : SchedWrite; // Load/Store flags in AH.
171 def WriteBitTest : SchedWrite; // Bit Test
172 def WriteBitTestImmLd : SchedWrite;
173 def WriteBitTestRegLd : SchedWrite;
175 def WriteBitTestSet : SchedWrite; // Bit Test + Set
176 def WriteBitTestSetImmLd : SchedWrite;
177 def WriteBitTestSetRegLd : SchedWrite;
178 def WriteBitTestSetImmRMW : WriteSequence<[WriteBitTestSetImmLd, WriteRMW]>;
179 def WriteBitTestSetRegRMW : WriteSequence<[WriteBitTestSetRegLd, WriteRMW]>;
181 // Integer shifts and rotates.
182 defm WriteShift : X86SchedWritePair;
183 defm WriteShiftCL : X86SchedWritePair;
184 defm WriteRotate : X86SchedWritePair;
185 defm WriteRotateCL : X86SchedWritePair;
187 // Double shift instructions.
188 def WriteSHDrri : SchedWrite;
189 def WriteSHDrrcl : SchedWrite;
190 def WriteSHDmri : SchedWrite;
191 def WriteSHDmrcl : SchedWrite;
193 // BMI1 BEXTR/BLS, BMI2 BZHI
194 defm WriteBEXTR : X86SchedWritePair;
195 defm WriteBLS : X86SchedWritePair;
196 defm WriteBZHI : X86SchedWritePair;
198 // Idioms that clear a register, like xorps %xmm0, %xmm0.
199 // These can often bypass execution ports completely.
200 def WriteZero : SchedWrite;
202 // Branches don't produce values, so they have no latency, but they still
203 // consume resources. Indirect branches can fold loads.
204 defm WriteJump : X86SchedWritePair;
206 // Floating point. This covers both scalar and vector operations.
207 def WriteFLD0 : SchedWrite;
208 def WriteFLD1 : SchedWrite;
209 def WriteFLDC : SchedWrite;
210 def WriteFLoad : SchedWrite;
211 def WriteFLoadX : SchedWrite;
212 def WriteFLoadY : SchedWrite;
213 def WriteFMaskedLoad : SchedWrite;
214 def WriteFMaskedLoadY : SchedWrite;
215 def WriteFStore : SchedWrite;
216 def WriteFStoreX : SchedWrite;
217 def WriteFStoreY : SchedWrite;
218 def WriteFStoreNT : SchedWrite;
219 def WriteFStoreNTX : SchedWrite;
220 def WriteFStoreNTY : SchedWrite;
221 def WriteFMaskedStore : SchedWrite;
222 def WriteFMaskedStoreY : SchedWrite;
223 def WriteFMove : SchedWrite;
224 def WriteFMoveX : SchedWrite;
225 def WriteFMoveY : SchedWrite;
227 defm WriteFAdd : X86SchedWritePair<ReadAfterVecLd>; // Floating point add/sub.
228 defm WriteFAddX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point add/sub (XMM).
229 defm WriteFAddY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point add/sub (YMM).
230 defm WriteFAddZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point add/sub (ZMM).
231 defm WriteFAdd64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double add/sub.
232 defm WriteFAdd64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double add/sub (XMM).
233 defm WriteFAdd64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double add/sub (YMM).
234 defm WriteFAdd64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double add/sub (ZMM).
235 defm WriteFCmp : X86SchedWritePair<ReadAfterVecLd>; // Floating point compare.
236 defm WriteFCmpX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point compare (XMM).
237 defm WriteFCmpY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point compare (YMM).
238 defm WriteFCmpZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point compare (ZMM).
239 defm WriteFCmp64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double compare.
240 defm WriteFCmp64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double compare (XMM).
241 defm WriteFCmp64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double compare (YMM).
242 defm WriteFCmp64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double compare (ZMM).
243 defm WriteFCom : X86SchedWritePair<ReadAfterVecLd>; // Floating point compare to flags.
244 defm WriteFMul : X86SchedWritePair<ReadAfterVecLd>; // Floating point multiplication.
245 defm WriteFMulX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point multiplication (XMM).
246 defm WriteFMulY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point multiplication (YMM).
247 defm WriteFMulZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point multiplication (YMM).
248 defm WriteFMul64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double multiplication.
249 defm WriteFMul64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double multiplication (XMM).
250 defm WriteFMul64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double multiplication (YMM).
251 defm WriteFMul64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double multiplication (ZMM).
252 defm WriteFDiv : X86SchedWritePair<ReadAfterVecLd>; // Floating point division.
253 defm WriteFDivX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point division (XMM).
254 defm WriteFDivY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point division (YMM).
255 defm WriteFDivZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point division (ZMM).
256 defm WriteFDiv64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double division.
257 defm WriteFDiv64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double division (XMM).
258 defm WriteFDiv64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double division (YMM).
259 defm WriteFDiv64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double division (ZMM).
260 defm WriteFSqrt : X86SchedWritePair<ReadAfterVecLd>; // Floating point square root.
261 defm WriteFSqrtX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point square root (XMM).
262 defm WriteFSqrtY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point square root (YMM).
263 defm WriteFSqrtZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point square root (ZMM).
264 defm WriteFSqrt64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double square root.
265 defm WriteFSqrt64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double square root (XMM).
266 defm WriteFSqrt64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double square root (YMM).
267 defm WriteFSqrt64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double square root (ZMM).
268 defm WriteFSqrt80 : X86SchedWritePair<ReadAfterVecLd>; // Floating point long double square root.
269 defm WriteFRcp : X86SchedWritePair<ReadAfterVecLd>; // Floating point reciprocal estimate.
270 defm WriteFRcpX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point reciprocal estimate (XMM).
271 defm WriteFRcpY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal estimate (YMM).
272 defm WriteFRcpZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal estimate (ZMM).
273 defm WriteFRsqrt : X86SchedWritePair<ReadAfterVecLd>; // Floating point reciprocal square root estimate.
274 defm WriteFRsqrtX: X86SchedWritePair<ReadAfterVecXLd>; // Floating point reciprocal square root estimate (XMM).
275 defm WriteFRsqrtY: X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal square root estimate (YMM).
276 defm WriteFRsqrtZ: X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal square root estimate (ZMM).
277 defm WriteFMA : X86SchedWritePair<ReadAfterVecLd>; // Fused Multiply Add.
278 defm WriteFMAX : X86SchedWritePair<ReadAfterVecXLd>; // Fused Multiply Add (XMM).
279 defm WriteFMAY : X86SchedWritePair<ReadAfterVecYLd>; // Fused Multiply Add (YMM).
280 defm WriteFMAZ : X86SchedWritePair<ReadAfterVecYLd>; // Fused Multiply Add (ZMM).
281 defm WriteDPPD : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double dot product.
282 defm WriteDPPS : X86SchedWritePair<ReadAfterVecXLd>; // Floating point single dot product.
283 defm WriteDPPSY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point single dot product (YMM).
284 defm WriteDPPSZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point single dot product (ZMM).
285 defm WriteFSign : X86SchedWritePair<ReadAfterVecLd>; // Floating point fabs/fchs.
286 defm WriteFRnd : X86SchedWritePair<ReadAfterVecXLd>; // Floating point rounding.
287 defm WriteFRndY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point rounding (YMM).
288 defm WriteFRndZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point rounding (ZMM).
289 defm WriteFLogic : X86SchedWritePair<ReadAfterVecXLd>; // Floating point and/or/xor logicals.
290 defm WriteFLogicY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point and/or/xor logicals (YMM).
291 defm WriteFLogicZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point and/or/xor logicals (ZMM).
292 defm WriteFTest : X86SchedWritePair<ReadAfterVecXLd>; // Floating point TEST instructions.
293 defm WriteFTestY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point TEST instructions (YMM).
294 defm WriteFTestZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point TEST instructions (ZMM).
295 defm WriteFShuffle : X86SchedWritePair<ReadAfterVecXLd>; // Floating point vector shuffles.
296 defm WriteFShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector shuffles (YMM).
297 defm WriteFShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector shuffles (ZMM).
298 defm WriteFVarShuffle : X86SchedWritePair<ReadAfterVecXLd>; // Floating point vector variable shuffles.
299 defm WriteFVarShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector variable shuffles (YMM).
300 defm WriteFVarShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector variable shuffles (ZMM).
301 defm WriteFBlend : X86SchedWritePair<ReadAfterVecXLd>; // Floating point vector blends.
302 defm WriteFBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector blends (YMM).
303 defm WriteFBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector blends (ZMM).
304 defm WriteFVarBlend : X86SchedWritePair<ReadAfterVecXLd>; // Fp vector variable blends.
305 defm WriteFVarBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Fp vector variable blends (YMM).
306 defm WriteFVarBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Fp vector variable blends (YMZMM).
308 // FMA Scheduling helper class.
309 class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
311 // Horizontal Add/Sub (float and integer)
312 defm WriteFHAdd : X86SchedWritePair<ReadAfterVecXLd>;
313 defm WriteFHAddY : X86SchedWritePair<ReadAfterVecYLd>;
314 defm WriteFHAddZ : X86SchedWritePair<ReadAfterVecYLd>;
315 defm WritePHAdd : X86SchedWritePair<ReadAfterVecLd>;
316 defm WritePHAddX : X86SchedWritePair<ReadAfterVecXLd>;
317 defm WritePHAddY : X86SchedWritePair<ReadAfterVecYLd>;
318 defm WritePHAddZ : X86SchedWritePair<ReadAfterVecYLd>;
320 // Vector integer operations.
321 def WriteVecLoad : SchedWrite;
322 def WriteVecLoadX : SchedWrite;
323 def WriteVecLoadY : SchedWrite;
324 def WriteVecLoadNT : SchedWrite;
325 def WriteVecLoadNTY : SchedWrite;
326 def WriteVecMaskedLoad : SchedWrite;
327 def WriteVecMaskedLoadY : SchedWrite;
328 def WriteVecStore : SchedWrite;
329 def WriteVecStoreX : SchedWrite;
330 def WriteVecStoreY : SchedWrite;
331 def WriteVecStoreNT : SchedWrite;
332 def WriteVecStoreNTY : SchedWrite;
333 def WriteVecMaskedStore : SchedWrite;
334 def WriteVecMaskedStoreY : SchedWrite;
335 def WriteVecMove : SchedWrite;
336 def WriteVecMoveX : SchedWrite;
337 def WriteVecMoveY : SchedWrite;
338 def WriteVecMoveToGpr : SchedWrite;
339 def WriteVecMoveFromGpr : SchedWrite;
341 defm WriteVecALU : X86SchedWritePair<ReadAfterVecLd>; // Vector integer ALU op, no logicals.
342 defm WriteVecALUX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer ALU op, no logicals (XMM).
343 defm WriteVecALUY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer ALU op, no logicals (YMM).
344 defm WriteVecALUZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer ALU op, no logicals (ZMM).
345 defm WriteVecLogic : X86SchedWritePair<ReadAfterVecLd>; // Vector integer and/or/xor logicals.
346 defm WriteVecLogicX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer and/or/xor logicals (XMM).
347 defm WriteVecLogicY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer and/or/xor logicals (YMM).
348 defm WriteVecLogicZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer and/or/xor logicals (ZMM).
349 defm WriteVecTest : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer TEST instructions.
350 defm WriteVecTestY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer TEST instructions (YMM).
351 defm WriteVecTestZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer TEST instructions (ZMM).
352 defm WriteVecShift : X86SchedWritePair<ReadAfterVecLd>; // Vector integer shifts (default).
353 defm WriteVecShiftX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer shifts (XMM).
354 defm WriteVecShiftY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer shifts (YMM).
355 defm WriteVecShiftZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer shifts (ZMM).
356 defm WriteVecShiftImm : X86SchedWritePair<ReadAfterVecLd>; // Vector integer immediate shifts (default).
357 defm WriteVecShiftImmX: X86SchedWritePair<ReadAfterVecXLd>; // Vector integer immediate shifts (XMM).
358 defm WriteVecShiftImmY: X86SchedWritePair<ReadAfterVecYLd>; // Vector integer immediate shifts (YMM).
359 defm WriteVecShiftImmZ: X86SchedWritePair<ReadAfterVecYLd>; // Vector integer immediate shifts (ZMM).
360 defm WriteVecIMul : X86SchedWritePair<ReadAfterVecLd>; // Vector integer multiply (default).
361 defm WriteVecIMulX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer multiply (XMM).
362 defm WriteVecIMulY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer multiply (YMM).
363 defm WriteVecIMulZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer multiply (ZMM).
364 defm WritePMULLD : X86SchedWritePair<ReadAfterVecXLd>; // Vector PMULLD.
365 defm WritePMULLDY : X86SchedWritePair<ReadAfterVecYLd>; // Vector PMULLD (YMM).
366 defm WritePMULLDZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector PMULLD (ZMM).
367 defm WriteShuffle : X86SchedWritePair<ReadAfterVecLd>; // Vector shuffles.
368 defm WriteShuffleX : X86SchedWritePair<ReadAfterVecXLd>; // Vector shuffles (XMM).
369 defm WriteShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Vector shuffles (YMM).
370 defm WriteShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector shuffles (ZMM).
371 defm WriteVarShuffle : X86SchedWritePair<ReadAfterVecLd>; // Vector variable shuffles.
372 defm WriteVarShuffleX : X86SchedWritePair<ReadAfterVecXLd>; // Vector variable shuffles (XMM).
373 defm WriteVarShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable shuffles (YMM).
374 defm WriteVarShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable shuffles (ZMM).
375 defm WriteBlend : X86SchedWritePair<ReadAfterVecXLd>; // Vector blends.
376 defm WriteBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Vector blends (YMM).
377 defm WriteBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector blends (ZMM).
378 defm WriteVarBlend : X86SchedWritePair<ReadAfterVecXLd>; // Vector variable blends.
379 defm WriteVarBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable blends (YMM).
380 defm WriteVarBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable blends (ZMM).
381 defm WritePSADBW : X86SchedWritePair<ReadAfterVecLd>; // Vector PSADBW.
382 defm WritePSADBWX : X86SchedWritePair<ReadAfterVecXLd>; // Vector PSADBW (XMM).
383 defm WritePSADBWY : X86SchedWritePair<ReadAfterVecYLd>; // Vector PSADBW (YMM).
384 defm WritePSADBWZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector PSADBW (ZMM).
385 defm WriteMPSAD : X86SchedWritePair<ReadAfterVecXLd>; // Vector MPSAD.
386 defm WriteMPSADY : X86SchedWritePair<ReadAfterVecYLd>; // Vector MPSAD (YMM).
387 defm WriteMPSADZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector MPSAD (ZMM).
388 defm WritePHMINPOS : X86SchedWritePair<ReadAfterVecXLd>; // Vector PHMINPOS.
390 // Vector insert/extract operations.
391 defm WriteVecInsert : X86SchedWritePair; // Insert gpr to vector element.
392 def WriteVecExtract : SchedWrite; // Extract vector element to gpr.
393 def WriteVecExtractSt : SchedWrite; // Extract vector element and store.
395 // MOVMSK operations.
396 def WriteFMOVMSK : SchedWrite;
397 def WriteVecMOVMSK : SchedWrite;
398 def WriteVecMOVMSKY : SchedWrite;
399 def WriteMMXMOVMSK : SchedWrite;
401 // Conversion between integer and float.
402 defm WriteCvtSD2I : X86SchedWritePair<ReadAfterVecLd>; // Double -> Integer.
403 defm WriteCvtPD2I : X86SchedWritePair<ReadAfterVecXLd>; // Double -> Integer (XMM).
404 defm WriteCvtPD2IY : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Integer (YMM).
405 defm WriteCvtPD2IZ : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Integer (ZMM).
407 defm WriteCvtSS2I : X86SchedWritePair<ReadAfterVecLd>; // Float -> Integer.
408 defm WriteCvtPS2I : X86SchedWritePair<ReadAfterVecXLd>; // Float -> Integer (XMM).
409 defm WriteCvtPS2IY : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Integer (YMM).
410 defm WriteCvtPS2IZ : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Integer (ZMM).
412 defm WriteCvtI2SD : X86SchedWritePair<ReadAfterVecLd>; // Integer -> Double.
413 defm WriteCvtI2PD : X86SchedWritePair<ReadAfterVecXLd>; // Integer -> Double (XMM).
414 defm WriteCvtI2PDY : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Double (YMM).
415 defm WriteCvtI2PDZ : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Double (ZMM).
417 defm WriteCvtI2SS : X86SchedWritePair<ReadAfterVecLd>; // Integer -> Float.
418 defm WriteCvtI2PS : X86SchedWritePair<ReadAfterVecXLd>; // Integer -> Float (XMM).
419 defm WriteCvtI2PSY : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Float (YMM).
420 defm WriteCvtI2PSZ : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Float (ZMM).
422 defm WriteCvtSS2SD : X86SchedWritePair<ReadAfterVecLd>; // Float -> Double size conversion.
423 defm WriteCvtPS2PD : X86SchedWritePair<ReadAfterVecXLd>; // Float -> Double size conversion (XMM).
424 defm WriteCvtPS2PDY : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Double size conversion (YMM).
425 defm WriteCvtPS2PDZ : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Double size conversion (ZMM).
427 defm WriteCvtSD2SS : X86SchedWritePair<ReadAfterVecLd>; // Double -> Float size conversion.
428 defm WriteCvtPD2PS : X86SchedWritePair<ReadAfterVecXLd>; // Double -> Float size conversion (XMM).
429 defm WriteCvtPD2PSY : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Float size conversion (YMM).
430 defm WriteCvtPD2PSZ : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Float size conversion (ZMM).
432 defm WriteCvtPH2PS : X86SchedWritePair<ReadAfterVecXLd>; // Half -> Float size conversion.
433 defm WriteCvtPH2PSY : X86SchedWritePair<ReadAfterVecYLd>; // Half -> Float size conversion (YMM).
434 defm WriteCvtPH2PSZ : X86SchedWritePair<ReadAfterVecYLd>; // Half -> Float size conversion (ZMM).
436 def WriteCvtPS2PH : SchedWrite; // // Float -> Half size conversion.
437 def WriteCvtPS2PHY : SchedWrite; // // Float -> Half size conversion (YMM).
438 def WriteCvtPS2PHZ : SchedWrite; // // Float -> Half size conversion (ZMM).
439 def WriteCvtPS2PHSt : SchedWrite; // // Float -> Half + store size conversion.
440 def WriteCvtPS2PHYSt : SchedWrite; // // Float -> Half + store size conversion (YMM).
441 def WriteCvtPS2PHZSt : SchedWrite; // // Float -> Half + store size conversion (ZMM).
443 // CRC32 instruction.
444 defm WriteCRC32 : X86SchedWritePair<ReadAfterLd>;
446 // Strings instructions.
447 // Packed Compare Implicit Length Strings, Return Mask
448 defm WritePCmpIStrM : X86SchedWritePair<ReadAfterVecXLd>;
449 // Packed Compare Explicit Length Strings, Return Mask
450 defm WritePCmpEStrM : X86SchedWritePair<ReadAfterVecXLd>;
451 // Packed Compare Implicit Length Strings, Return Index
452 defm WritePCmpIStrI : X86SchedWritePair<ReadAfterVecXLd>;
453 // Packed Compare Explicit Length Strings, Return Index
454 defm WritePCmpEStrI : X86SchedWritePair<ReadAfterVecXLd>;
457 defm WriteAESDecEnc : X86SchedWritePair<ReadAfterVecXLd>; // Decryption, encryption.
458 defm WriteAESIMC : X86SchedWritePair<ReadAfterVecXLd>; // InvMixColumn.
459 defm WriteAESKeyGen : X86SchedWritePair<ReadAfterVecXLd>; // Key Generation.
461 // Carry-less multiplication instructions.
462 defm WriteCLMul : X86SchedWritePair<ReadAfterVecXLd>;
465 def WriteEMMS : SchedWrite;
468 def WriteLDMXCSR : SchedWrite;
469 def WriteSTMXCSR : SchedWrite;
471 // Catch-all for expensive system instructions.
472 def WriteSystem : SchedWrite;
475 defm WriteFShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // Fp 256-bit width vector shuffles.
476 defm WriteFVarShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // Fp 256-bit width variable shuffles.
477 defm WriteShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // 256-bit width vector shuffles.
478 defm WriteVarShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // 256-bit width vector variable shuffles.
479 defm WriteVarVecShift : X86SchedWritePair<ReadAfterVecXLd>; // Variable vector shifts.
480 defm WriteVarVecShiftY : X86SchedWritePair<ReadAfterVecYLd>; // Variable vector shifts (YMM).
481 defm WriteVarVecShiftZ : X86SchedWritePair<ReadAfterVecYLd>; // Variable vector shifts (ZMM).
483 // Old microcoded instructions that nobody use.
484 def WriteMicrocoded : SchedWrite;
486 // Fence instructions.
487 def WriteFence : SchedWrite;
489 // Nop, not very useful expect it provides a model for nops!
490 def WriteNop : SchedWrite;
492 // Move/Load/Store wrappers.
494 : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStore>;
496 : X86SchedWriteMoveLS<WriteFMoveX, WriteFLoadX, WriteFStoreX>;
498 : X86SchedWriteMoveLS<WriteFMoveY, WriteFLoadY, WriteFStoreY>;
499 def SchedWriteFMoveLS
500 : X86SchedWriteMoveLSWidths<WriteFMoveLS, WriteFMoveLSX,
501 WriteFMoveLSY, WriteFMoveLSY>;
504 : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNT>;
506 : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNTX>;
508 : X86SchedWriteMoveLS<WriteFMoveY, WriteFLoadY, WriteFStoreNTY>;
509 def SchedWriteFMoveLSNT
510 : X86SchedWriteMoveLSWidths<WriteFMoveLSNT, WriteFMoveLSNTX,
511 WriteFMoveLSNTY, WriteFMoveLSNTY>;
514 : X86SchedWriteMoveLS<WriteVecMove, WriteVecLoad, WriteVecStore>;
516 : X86SchedWriteMoveLS<WriteVecMoveX, WriteVecLoadX, WriteVecStoreX>;
518 : X86SchedWriteMoveLS<WriteVecMoveY, WriteVecLoadY, WriteVecStoreY>;
519 def SchedWriteVecMoveLS
520 : X86SchedWriteMoveLSWidths<WriteVecMoveLS, WriteVecMoveLSX,
521 WriteVecMoveLSY, WriteVecMoveLSY>;
524 : X86SchedWriteMoveLS<WriteVecMove, WriteVecLoadNT, WriteVecStoreNT>;
525 def WriteVecMoveLSNTX
526 : X86SchedWriteMoveLS<WriteVecMoveX, WriteVecLoadNT, WriteVecStoreNT>;
527 def WriteVecMoveLSNTY
528 : X86SchedWriteMoveLS<WriteVecMoveY, WriteVecLoadNTY, WriteVecStoreNTY>;
529 def SchedWriteVecMoveLSNT
530 : X86SchedWriteMoveLSWidths<WriteVecMoveLSNT, WriteVecMoveLSNTX,
531 WriteVecMoveLSNTY, WriteVecMoveLSNTY>;
533 // Vector width wrappers.
535 : X86SchedWriteWidths<WriteFAdd, WriteFAddX, WriteFAddY, WriteFAddZ>;
537 : X86SchedWriteWidths<WriteFAdd64, WriteFAdd64X, WriteFAdd64Y, WriteFAdd64Z>;
539 : X86SchedWriteWidths<WriteFHAdd, WriteFHAdd, WriteFHAddY, WriteFHAddZ>;
541 : X86SchedWriteWidths<WriteFCmp, WriteFCmpX, WriteFCmpY, WriteFCmpZ>;
543 : X86SchedWriteWidths<WriteFCmp64, WriteFCmp64X, WriteFCmp64Y, WriteFCmp64Z>;
545 : X86SchedWriteWidths<WriteFMul, WriteFMulX, WriteFMulY, WriteFMulZ>;
547 : X86SchedWriteWidths<WriteFMul64, WriteFMul64X, WriteFMul64Y, WriteFMul64Z>;
549 : X86SchedWriteWidths<WriteFMA, WriteFMAX, WriteFMAY, WriteFMAZ>;
551 : X86SchedWriteWidths<WriteDPPD, WriteDPPD, WriteDPPD, WriteDPPD>;
553 : X86SchedWriteWidths<WriteDPPS, WriteDPPS, WriteDPPSY, WriteDPPSZ>;
555 : X86SchedWriteWidths<WriteFDiv, WriteFDivX, WriteFDivY, WriteFDivZ>;
557 : X86SchedWriteWidths<WriteFDiv64, WriteFDiv64X, WriteFDiv64Y, WriteFDiv64Z>;
559 : X86SchedWriteWidths<WriteFSqrt, WriteFSqrtX,
560 WriteFSqrtY, WriteFSqrtZ>;
561 def SchedWriteFSqrt64
562 : X86SchedWriteWidths<WriteFSqrt64, WriteFSqrt64X,
563 WriteFSqrt64Y, WriteFSqrt64Z>;
565 : X86SchedWriteWidths<WriteFRcp, WriteFRcpX, WriteFRcpY, WriteFRcpZ>;
567 : X86SchedWriteWidths<WriteFRsqrt, WriteFRsqrtX, WriteFRsqrtY, WriteFRsqrtZ>;
569 : X86SchedWriteWidths<WriteFRnd, WriteFRnd, WriteFRndY, WriteFRndZ>;
571 : X86SchedWriteWidths<WriteFLogic, WriteFLogic, WriteFLogicY, WriteFLogicZ>;
573 : X86SchedWriteWidths<WriteFTest, WriteFTest, WriteFTestY, WriteFTestZ>;
575 def SchedWriteFShuffle
576 : X86SchedWriteWidths<WriteFShuffle, WriteFShuffle,
577 WriteFShuffleY, WriteFShuffleZ>;
578 def SchedWriteFVarShuffle
579 : X86SchedWriteWidths<WriteFVarShuffle, WriteFVarShuffle,
580 WriteFVarShuffleY, WriteFVarShuffleZ>;
582 : X86SchedWriteWidths<WriteFBlend, WriteFBlend, WriteFBlendY, WriteFBlendZ>;
583 def SchedWriteFVarBlend
584 : X86SchedWriteWidths<WriteFVarBlend, WriteFVarBlend,
585 WriteFVarBlendY, WriteFVarBlendZ>;
587 def SchedWriteCvtDQ2PD
588 : X86SchedWriteWidths<WriteCvtI2SD, WriteCvtI2PD,
589 WriteCvtI2PDY, WriteCvtI2PDZ>;
590 def SchedWriteCvtDQ2PS
591 : X86SchedWriteWidths<WriteCvtI2SS, WriteCvtI2PS,
592 WriteCvtI2PSY, WriteCvtI2PSZ>;
593 def SchedWriteCvtPD2DQ
594 : X86SchedWriteWidths<WriteCvtSD2I, WriteCvtPD2I,
595 WriteCvtPD2IY, WriteCvtPD2IZ>;
596 def SchedWriteCvtPS2DQ
597 : X86SchedWriteWidths<WriteCvtSS2I, WriteCvtPS2I,
598 WriteCvtPS2IY, WriteCvtPS2IZ>;
599 def SchedWriteCvtPS2PD
600 : X86SchedWriteWidths<WriteCvtSS2SD, WriteCvtPS2PD,
601 WriteCvtPS2PDY, WriteCvtPS2PDZ>;
602 def SchedWriteCvtPD2PS
603 : X86SchedWriteWidths<WriteCvtSD2SS, WriteCvtPD2PS,
604 WriteCvtPD2PSY, WriteCvtPD2PSZ>;
607 : X86SchedWriteWidths<WriteVecALU, WriteVecALUX, WriteVecALUY, WriteVecALUZ>;
609 : X86SchedWriteWidths<WritePHAdd, WritePHAddX, WritePHAddY, WritePHAddZ>;
610 def SchedWriteVecLogic
611 : X86SchedWriteWidths<WriteVecLogic, WriteVecLogicX,
612 WriteVecLogicY, WriteVecLogicZ>;
613 def SchedWriteVecTest
614 : X86SchedWriteWidths<WriteVecTest, WriteVecTest,
615 WriteVecTestY, WriteVecTestZ>;
616 def SchedWriteVecShift
617 : X86SchedWriteWidths<WriteVecShift, WriteVecShiftX,
618 WriteVecShiftY, WriteVecShiftZ>;
619 def SchedWriteVecShiftImm
620 : X86SchedWriteWidths<WriteVecShiftImm, WriteVecShiftImmX,
621 WriteVecShiftImmY, WriteVecShiftImmZ>;
622 def SchedWriteVarVecShift
623 : X86SchedWriteWidths<WriteVarVecShift, WriteVarVecShift,
624 WriteVarVecShiftY, WriteVarVecShiftZ>;
625 def SchedWriteVecIMul
626 : X86SchedWriteWidths<WriteVecIMul, WriteVecIMulX,
627 WriteVecIMulY, WriteVecIMulZ>;
629 : X86SchedWriteWidths<WritePMULLD, WritePMULLD,
630 WritePMULLDY, WritePMULLDZ>;
632 : X86SchedWriteWidths<WriteMPSAD, WriteMPSAD,
633 WriteMPSADY, WriteMPSADZ>;
635 : X86SchedWriteWidths<WritePSADBW, WritePSADBWX,
636 WritePSADBWY, WritePSADBWZ>;
638 def SchedWriteShuffle
639 : X86SchedWriteWidths<WriteShuffle, WriteShuffleX,
640 WriteShuffleY, WriteShuffleZ>;
641 def SchedWriteVarShuffle
642 : X86SchedWriteWidths<WriteVarShuffle, WriteVarShuffleX,
643 WriteVarShuffleY, WriteVarShuffleZ>;
645 : X86SchedWriteWidths<WriteBlend, WriteBlend, WriteBlendY, WriteBlendZ>;
646 def SchedWriteVarBlend
647 : X86SchedWriteWidths<WriteVarBlend, WriteVarBlend,
648 WriteVarBlendY, WriteVarBlendZ>;
650 // Vector size wrappers.
651 def SchedWriteFAddSizes
652 : X86SchedWriteSizes<SchedWriteFAdd, SchedWriteFAdd64>;
653 def SchedWriteFCmpSizes
654 : X86SchedWriteSizes<SchedWriteFCmp, SchedWriteFCmp64>;
655 def SchedWriteFMulSizes
656 : X86SchedWriteSizes<SchedWriteFMul, SchedWriteFMul64>;
657 def SchedWriteFDivSizes
658 : X86SchedWriteSizes<SchedWriteFDiv, SchedWriteFDiv64>;
659 def SchedWriteFSqrtSizes
660 : X86SchedWriteSizes<SchedWriteFSqrt, SchedWriteFSqrt64>;
661 def SchedWriteFLogicSizes
662 : X86SchedWriteSizes<SchedWriteFLogic, SchedWriteFLogic>;
663 def SchedWriteFShuffleSizes
664 : X86SchedWriteSizes<SchedWriteFShuffle, SchedWriteFShuffle>;
666 //===----------------------------------------------------------------------===//
667 // Generic Processor Scheduler Models.
669 // IssueWidth is analogous to the number of decode units. Core and its
670 // descendents, including Nehalem and SandyBridge have 4 decoders.
671 // Resources beyond the decoder operate on micro-ops and are bufferred
672 // so adjacent micro-ops don't directly compete.
674 // MicroOpBufferSize > 1 indicates that RAW dependencies can be
675 // decoded in the same cycle. The value 32 is a reasonably arbitrary
676 // number of in-flight instructions.
678 // HighLatency=10 is optimistic. X86InstrInfo::isHighLatencyDef
679 // indicates high latency opcodes. Alternatively, InstrItinData
680 // entries may be included here to define specific operand
681 // latencies. Since these latencies are not used for pipeline hazards,
682 // they do not need to be exact.
684 // The GenericX86Model contains no instruction schedules
685 // and disables PostRAScheduler.
686 class GenericX86Model : SchedMachineModel {
688 let MicroOpBufferSize = 32;
690 let HighLatency = 10;
691 let PostRAScheduler = 0;
692 let CompleteModel = 0;
695 def GenericModel : GenericX86Model;
697 // Define a model with the PostRAScheduler enabled.
698 def GenericPostRAModel : GenericX86Model {
699 let PostRAScheduler = 1;