1 //=- X86SchedSandyBridge.td - X86 Sandy Bridge Scheduling ----*- tablegen -*-=//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the machine model for Sandy Bridge to support instruction
10 // scheduling and other instruction cost heuristics.
12 // Note that we define some instructions here that are not supported by SNB,
13 // but we still have to define them because SNB is the default subtarget for
14 // X86. These instructions are tagged with a comment `Unsupported = 1`.
16 //===----------------------------------------------------------------------===//
18 def SandyBridgeModel : SchedMachineModel {
19 // All x86 instructions are modeled as a single micro-op, and SB can decode 4
20 // instructions per cycle.
21 // FIXME: Identify instructions that aren't a single fused micro-op.
23 let MicroOpBufferSize = 168; // Based on the reorder buffer.
25 let MispredictPenalty = 16;
27 // Based on the LSD (loop-stream detector) queue size.
28 let LoopMicroOpBufferSize = 28;
30 // This flag is set to allow the scheduler to assign
31 // a default model to unrecognized opcodes.
32 let CompleteModel = 0;
35 let SchedModel = SandyBridgeModel in {
37 // Sandy Bridge can issue micro-ops to 6 different ports in one cycle.
39 // Ports 0, 1, and 5 handle all computation.
40 def SBPort0 : ProcResource<1>;
41 def SBPort1 : ProcResource<1>;
42 def SBPort5 : ProcResource<1>;
44 // Ports 2 and 3 are identical. They handle loads and the address half of
46 def SBPort23 : ProcResource<2>;
48 // Port 4 gets the data half of stores. Store data can be available later than
49 // the store address, but since we don't model the latency of stores, we can
51 def SBPort4 : ProcResource<1>;
53 // Many micro-ops are capable of issuing on multiple ports.
54 def SBPort01 : ProcResGroup<[SBPort0, SBPort1]>;
55 def SBPort05 : ProcResGroup<[SBPort0, SBPort5]>;
56 def SBPort15 : ProcResGroup<[SBPort1, SBPort5]>;
57 def SBPort015 : ProcResGroup<[SBPort0, SBPort1, SBPort5]>;
59 // 54 Entry Unified Scheduler
60 def SBPortAny : ProcResGroup<[SBPort0, SBPort1, SBPort23, SBPort4, SBPort5]> {
64 // Integer division issued on port 0.
65 def SBDivider : ProcResource<1>;
66 // FP division and sqrt on port 0.
67 def SBFPDivider : ProcResource<1>;
69 // Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
70 // cycles after the memory operand.
71 def : ReadAdvance<ReadAfterLd, 5>;
73 // Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available
74 // until 5/6/7 cycles after the memory operand.
75 def : ReadAdvance<ReadAfterVecLd, 5>;
76 def : ReadAdvance<ReadAfterVecXLd, 6>;
77 def : ReadAdvance<ReadAfterVecYLd, 7>;
79 def : ReadAdvance<ReadInt2Fpu, 0>;
81 // Many SchedWrites are defined in pairs with and without a folded load.
82 // Instructions with folded loads are usually micro-fused, so they only appear
83 // as two micro-ops when queued in the reservation station.
84 // This multiclass defines the resource usage for variants with and without
86 multiclass SBWriteResPair<X86FoldableSchedWrite SchedRW,
87 list<ProcResourceKind> ExePorts,
88 int Lat, list<int> Res = [1], int UOps = 1,
90 // Register variant is using a single cycle on ExePort.
91 def : WriteRes<SchedRW, ExePorts> {
93 let ResourceCycles = Res;
94 let NumMicroOps = UOps;
97 // Memory variant also uses a cycle on port 2/3 and adds LoadLat cycles to
98 // the latency (default = 5).
99 def : WriteRes<SchedRW.Folded, !listconcat([SBPort23], ExePorts)> {
100 let Latency = !add(Lat, LoadLat);
101 let ResourceCycles = !listconcat([1], Res);
102 let NumMicroOps = !add(UOps, 1);
106 // A folded store needs a cycle on port 4 for the store data, and an extra port
107 // 2/3 cycle to recompute the address.
108 def : WriteRes<WriteRMW, [SBPort23,SBPort4]>;
110 def : WriteRes<WriteStore, [SBPort23, SBPort4]>;
111 def : WriteRes<WriteStoreNT, [SBPort23, SBPort4]>;
112 def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 5; }
113 def : WriteRes<WriteMove, [SBPort015]>;
114 def : WriteRes<WriteZero, []>;
115 def : WriteRes<WriteVecMaskedGatherWriteback, []> { let Latency = 5; let NumMicroOps = 0; }
118 defm : SBWriteResPair<WriteALU, [SBPort015], 1>;
119 defm : SBWriteResPair<WriteADC, [SBPort05,SBPort015], 2, [1,1], 2>;
121 defm : SBWriteResPair<WriteIMul8, [SBPort1], 3>;
122 defm : SBWriteResPair<WriteIMul16, [SBPort1,SBPort05,SBPort015], 4, [1,1,2], 4>;
123 defm : X86WriteRes<WriteIMul16Imm, [SBPort1,SBPort015], 4, [1,1], 2>;
124 defm : X86WriteRes<WriteIMul16ImmLd, [SBPort1,SBPort015,SBPort23], 8, [1,1,1], 3>;
125 defm : SBWriteResPair<WriteIMul16Reg, [SBPort1], 3>;
126 defm : SBWriteResPair<WriteIMul32, [SBPort1,SBPort05,SBPort015], 4, [1,1,1], 3>;
127 defm : SBWriteResPair<WriteMULX32, [SBPort1,SBPort05,SBPort015], 4, [1,1,1], 3>;
128 defm : SBWriteResPair<WriteIMul32Imm, [SBPort1], 3>;
129 defm : SBWriteResPair<WriteIMul32Reg, [SBPort1], 3>;
130 defm : SBWriteResPair<WriteIMul64, [SBPort1,SBPort0], 4, [1,1], 2>;
131 defm : SBWriteResPair<WriteMULX64, [SBPort1,SBPort0], 4, [1,1], 2>;
132 defm : SBWriteResPair<WriteIMul64Imm, [SBPort1], 3>;
133 defm : SBWriteResPair<WriteIMul64Reg, [SBPort1], 3>;
134 def : WriteRes<WriteIMulH, []> { let Latency = 3; }
136 defm : X86WriteRes<WriteXCHG, [SBPort015], 2, [3], 3>;
137 defm : X86WriteRes<WriteBSWAP32, [SBPort1], 1, [1], 1>;
138 defm : X86WriteRes<WriteBSWAP64, [SBPort1, SBPort05], 2, [1,1], 2>;
139 defm : X86WriteRes<WriteCMPXCHG, [SBPort05, SBPort015], 5, [1,3], 4>;
140 defm : X86WriteRes<WriteCMPXCHGRMW,[SBPort015, SBPort5, SBPort23, SBPort4], 8, [1, 2, 2, 1], 6>;
142 defm : SBWriteResPair<WriteDiv8, [SBPort0, SBDivider], 25, [1, 10]>;
143 defm : SBWriteResPair<WriteDiv16, [SBPort0, SBDivider], 25, [1, 10]>;
144 defm : SBWriteResPair<WriteDiv32, [SBPort0, SBDivider], 25, [1, 10]>;
145 defm : SBWriteResPair<WriteDiv64, [SBPort0, SBDivider], 25, [1, 10]>;
146 defm : SBWriteResPair<WriteIDiv8, [SBPort0, SBDivider], 25, [1, 10]>;
147 defm : SBWriteResPair<WriteIDiv16, [SBPort0, SBDivider], 25, [1, 10]>;
148 defm : SBWriteResPair<WriteIDiv32, [SBPort0, SBDivider], 25, [1, 10]>;
149 defm : SBWriteResPair<WriteIDiv64, [SBPort0, SBDivider], 25, [1, 10]>;
152 defm : X86WriteRes<WriteSHDrri, [SBPort05, SBPort015], 2, [1, 1], 2>;
153 defm : X86WriteRes<WriteSHDrrcl,[SBPort05, SBPort015], 4, [3, 1], 4>;
154 defm : X86WriteRes<WriteSHDmri, [SBPort4,SBPort23,SBPort05,SBPort015], 8, [1, 2, 1, 1], 5>;
155 defm : X86WriteRes<WriteSHDmrcl,[SBPort4,SBPort23,SBPort05,SBPort015], 10, [1, 2, 3, 1], 7>;
157 defm : SBWriteResPair<WriteShift, [SBPort05], 1>;
158 defm : SBWriteResPair<WriteShiftCL, [SBPort05], 3, [3], 3>;
159 defm : SBWriteResPair<WriteRotate, [SBPort05], 2, [2], 2>;
160 defm : SBWriteResPair<WriteRotateCL, [SBPort05], 3, [3], 3>;
162 defm : SBWriteResPair<WriteJump, [SBPort5], 1>;
163 defm : SBWriteResPair<WriteCRC32, [SBPort1], 3, [1], 1, 5>;
165 defm : SBWriteResPair<WriteCMOV, [SBPort05,SBPort015], 2, [1,1], 2>; // Conditional move.
166 defm : X86WriteRes<WriteFCMOV, [SBPort5,SBPort05], 3, [2,1], 3>; // x87 conditional move.
167 def : WriteRes<WriteSETCC, [SBPort05]>; // Setcc.
168 def : WriteRes<WriteSETCCStore, [SBPort05,SBPort4,SBPort23]> {
173 defm : X86WriteRes<WriteLAHFSAHF, [SBPort05], 1, [1], 1>;
174 defm : X86WriteRes<WriteBitTest, [SBPort05], 1, [1], 1>;
175 defm : X86WriteRes<WriteBitTestImmLd, [SBPort05,SBPort23], 6, [1,1], 2>;
176 //defm : X86WriteRes<WriteBitTestRegLd, [SBPort05,SBPort23], 6, [1,1], 2>;
177 defm : X86WriteRes<WriteBitTestSet, [SBPort05], 1, [1], 1>;
178 defm : X86WriteRes<WriteBitTestSetImmLd, [SBPort05,SBPort23], 6, [1,1], 3>;
179 defm : X86WriteRes<WriteBitTestSetRegLd, [SBPort05,SBPort23,SBPort5,SBPort015], 8, [1,1,1,1], 5>;
181 // This is for simple LEAs with one or two input operands.
182 // The complex ones can only execute on port 1, and they require two cycles on
183 // the port to read all inputs. We don't model that.
184 def : WriteRes<WriteLEA, [SBPort01]>;
187 defm : SBWriteResPair<WriteBSF, [SBPort1], 3, [1], 1, 5>;
188 defm : SBWriteResPair<WriteBSR, [SBPort1], 3, [1], 1, 5>;
189 defm : SBWriteResPair<WriteLZCNT, [SBPort1], 3, [1], 1, 5>;
190 defm : SBWriteResPair<WriteTZCNT, [SBPort1], 3, [1], 1, 5>;
191 defm : SBWriteResPair<WritePOPCNT, [SBPort1], 3, [1], 1, 6>;
193 // BMI1 BEXTR/BLS, BMI2 BZHI
194 // NOTE: These don't exist on Sandy Bridge. Ports are guesses.
195 defm : SBWriteResPair<WriteBEXTR, [SBPort05,SBPort1], 2, [1,1], 2>;
196 defm : SBWriteResPair<WriteBLS, [SBPort015], 1>;
197 defm : SBWriteResPair<WriteBZHI, [SBPort1], 1>;
199 // Scalar and vector floating point.
200 defm : X86WriteRes<WriteFLD0, [SBPort5], 1, [1], 1>;
201 defm : X86WriteRes<WriteFLD1, [SBPort0,SBPort5], 1, [1,1], 2>;
202 defm : X86WriteRes<WriteFLDC, [SBPort0,SBPort1], 1, [1,1], 2>;
203 defm : X86WriteRes<WriteFLoad, [SBPort23], 5, [1], 1>;
204 defm : X86WriteRes<WriteFLoadX, [SBPort23], 6, [1], 1>;
205 defm : X86WriteRes<WriteFLoadY, [SBPort23], 7, [1], 1>;
206 defm : X86WriteRes<WriteFMaskedLoad, [SBPort23,SBPort05], 8, [1,2], 3>;
207 defm : X86WriteRes<WriteFMaskedLoadY, [SBPort23,SBPort05], 9, [1,2], 3>;
208 defm : X86WriteRes<WriteFStore, [SBPort23,SBPort4], 1, [1,1], 1>;
209 defm : X86WriteRes<WriteFStoreX, [SBPort23,SBPort4], 1, [1,1], 1>;
210 defm : X86WriteRes<WriteFStoreY, [SBPort23,SBPort4], 1, [1,1], 1>;
211 defm : X86WriteRes<WriteFStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>;
212 defm : X86WriteRes<WriteFStoreNTX, [SBPort23,SBPort4], 1, [1,1], 1>;
213 defm : X86WriteRes<WriteFStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>;
215 defm : X86WriteRes<WriteFMaskedStore32, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
216 defm : X86WriteRes<WriteFMaskedStore32Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
217 defm : X86WriteRes<WriteFMaskedStore64, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
218 defm : X86WriteRes<WriteFMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
220 defm : X86WriteRes<WriteFMove, [SBPort5], 1, [1], 1>;
221 defm : X86WriteRes<WriteFMoveX, [SBPort5], 1, [1], 1>;
222 defm : X86WriteRes<WriteFMoveY, [SBPort5], 1, [1], 1>;
223 defm : X86WriteRes<WriteEMMS, [SBPort015], 31, [31], 31>;
225 defm : SBWriteResPair<WriteFAdd, [SBPort1], 3, [1], 1, 6>;
226 defm : SBWriteResPair<WriteFAddX, [SBPort1], 3, [1], 1, 6>;
227 defm : SBWriteResPair<WriteFAddY, [SBPort1], 3, [1], 1, 7>;
228 defm : SBWriteResPair<WriteFAddZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
229 defm : SBWriteResPair<WriteFAdd64, [SBPort1], 3, [1], 1, 6>;
230 defm : SBWriteResPair<WriteFAdd64X, [SBPort1], 3, [1], 1, 6>;
231 defm : SBWriteResPair<WriteFAdd64Y, [SBPort1], 3, [1], 1, 7>;
232 defm : SBWriteResPair<WriteFAdd64Z, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
234 defm : SBWriteResPair<WriteFCmp, [SBPort1], 3, [1], 1, 6>;
235 defm : SBWriteResPair<WriteFCmpX, [SBPort1], 3, [1], 1, 6>;
236 defm : SBWriteResPair<WriteFCmpY, [SBPort1], 3, [1], 1, 7>;
237 defm : SBWriteResPair<WriteFCmpZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
238 defm : SBWriteResPair<WriteFCmp64, [SBPort1], 3, [1], 1, 6>;
239 defm : SBWriteResPair<WriteFCmp64X, [SBPort1], 3, [1], 1, 6>;
240 defm : SBWriteResPair<WriteFCmp64Y, [SBPort1], 3, [1], 1, 7>;
241 defm : SBWriteResPair<WriteFCmp64Z, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
243 defm : SBWriteResPair<WriteFCom, [SBPort1], 3>;
244 defm : SBWriteResPair<WriteFComX, [SBPort1], 3>;
246 defm : SBWriteResPair<WriteFMul, [SBPort0], 5, [1], 1, 6>;
247 defm : SBWriteResPair<WriteFMulX, [SBPort0], 5, [1], 1, 6>;
248 defm : SBWriteResPair<WriteFMulY, [SBPort0], 5, [1], 1, 7>;
249 defm : SBWriteResPair<WriteFMulZ, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1
250 defm : SBWriteResPair<WriteFMul64, [SBPort0], 5, [1], 1, 6>;
251 defm : SBWriteResPair<WriteFMul64X, [SBPort0], 5, [1], 1, 6>;
252 defm : SBWriteResPair<WriteFMul64Y, [SBPort0], 5, [1], 1, 7>;
253 defm : SBWriteResPair<WriteFMul64Z, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1
255 defm : SBWriteResPair<WriteFDiv, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
256 defm : SBWriteResPair<WriteFDivX, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
257 defm : SBWriteResPair<WriteFDivY, [SBPort0,SBPort05,SBFPDivider], 29, [2,1,28], 3, 7>;
258 defm : SBWriteResPair<WriteFDivZ, [SBPort0,SBPort05,SBFPDivider], 29, [2,1,28], 3, 7>; // Unsupported = 1
259 defm : SBWriteResPair<WriteFDiv64, [SBPort0,SBFPDivider], 22, [1,22], 1, 6>;
260 defm : SBWriteResPair<WriteFDiv64X, [SBPort0,SBFPDivider], 22, [1,22], 1, 6>;
261 defm : SBWriteResPair<WriteFDiv64Y, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44], 3, 7>;
262 defm : SBWriteResPair<WriteFDiv64Z, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44], 3, 7>; // Unsupported = 1
264 defm : SBWriteResPair<WriteFRcp, [SBPort0], 5, [1], 1, 6>;
265 defm : SBWriteResPair<WriteFRcpX, [SBPort0], 5, [1], 1, 6>;
266 defm : SBWriteResPair<WriteFRcpY, [SBPort0,SBPort05], 7, [2,1], 3, 7>;
267 defm : SBWriteResPair<WriteFRcpZ, [SBPort0,SBPort05], 7, [2,1], 3, 7>; // Unsupported = 1
269 defm : SBWriteResPair<WriteFRsqrt, [SBPort0], 5, [1], 1, 6>;
270 defm : SBWriteResPair<WriteFRsqrtX,[SBPort0], 5, [1], 1, 6>;
271 defm : SBWriteResPair<WriteFRsqrtY,[SBPort0,SBPort05], 7, [2,1], 3, 7>;
272 defm : SBWriteResPair<WriteFRsqrtZ,[SBPort0,SBPort05], 7, [2,1], 3, 7>; // Unsupported = 1
274 defm : SBWriteResPair<WriteFSqrt, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
275 defm : SBWriteResPair<WriteFSqrtX, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
276 defm : SBWriteResPair<WriteFSqrtY, [SBPort0,SBPort05,SBFPDivider], 29, [2,1,28], 3, 7>;
277 defm : SBWriteResPair<WriteFSqrtZ, [SBPort0,SBPort05,SBFPDivider], 29, [2,1,28], 3, 7>; // Unsupported = 1
278 defm : SBWriteResPair<WriteFSqrt64, [SBPort0,SBFPDivider], 21, [1,21], 1, 6>;
279 defm : SBWriteResPair<WriteFSqrt64X, [SBPort0,SBFPDivider], 21, [1,21], 1, 6>;
280 defm : SBWriteResPair<WriteFSqrt64Y, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44], 3, 7>;
281 defm : SBWriteResPair<WriteFSqrt64Z, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44], 3, 7>; // Unsupported = 1
282 defm : SBWriteResPair<WriteFSqrt80, [SBPort0,SBFPDivider], 24, [1,24], 1, 6>;
284 defm : SBWriteResPair<WriteDPPD, [SBPort0,SBPort1,SBPort5], 9, [1,1,1], 3, 6>;
285 defm : SBWriteResPair<WriteDPPS, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 6>;
286 defm : SBWriteResPair<WriteDPPSY, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 7>;
287 defm : SBWriteResPair<WriteDPPSZ, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 7>; // Unsupported = 1
288 defm : SBWriteResPair<WriteFSign, [SBPort5], 1>;
289 defm : SBWriteResPair<WriteFRnd, [SBPort1], 3, [1], 1, 6>;
290 defm : SBWriteResPair<WriteFRndY, [SBPort1], 3, [1], 1, 7>;
291 defm : SBWriteResPair<WriteFRndZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
292 defm : SBWriteResPair<WriteFLogic, [SBPort5], 1, [1], 1, 6>;
293 defm : SBWriteResPair<WriteFLogicY, [SBPort5], 1, [1], 1, 7>;
294 defm : SBWriteResPair<WriteFLogicZ, [SBPort5], 1, [1], 1, 7>; // Unsupported = 1
295 defm : SBWriteResPair<WriteFTest, [SBPort0], 1, [1], 1, 6>;
296 defm : SBWriteResPair<WriteFTestY, [SBPort0], 1, [1], 1, 7>;
297 defm : SBWriteResPair<WriteFTestZ, [SBPort0], 1, [1], 1, 7>; // Unsupported = 1
298 defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1, [1], 1, 6>;
299 defm : SBWriteResPair<WriteFShuffleY,[SBPort5], 1, [1], 1, 7>;
300 defm : SBWriteResPair<WriteFShuffleZ,[SBPort5], 1, [1], 1, 7>; // Unsupported = 1
301 defm : SBWriteResPair<WriteFVarShuffle, [SBPort5], 1, [1], 1, 6>;
302 defm : SBWriteResPair<WriteFVarShuffleY,[SBPort5], 1, [1], 1, 7>;
303 defm : SBWriteResPair<WriteFVarShuffleZ,[SBPort5], 1, [1], 1, 7>; // Unsupported = 1
304 defm : SBWriteResPair<WriteFBlend, [SBPort05], 1, [1], 1, 6>;
305 defm : SBWriteResPair<WriteFBlendY, [SBPort05], 1, [1], 1, 7>;
306 defm : SBWriteResPair<WriteFBlendZ, [SBPort05], 1, [1], 1, 7>; // Unsupported = 1
307 defm : SBWriteResPair<WriteFVarBlend, [SBPort05], 2, [2], 2, 6>;
308 defm : SBWriteResPair<WriteFVarBlendY,[SBPort05], 2, [2], 2, 7>;
309 defm : SBWriteResPair<WriteFVarBlendZ,[SBPort05], 2, [2], 2, 7>; // Unsupported = 1
311 // Conversion between integer and float.
312 defm : SBWriteResPair<WriteCvtSS2I, [SBPort0,SBPort1], 5, [1,1], 2>;
313 defm : SBWriteResPair<WriteCvtPS2I, [SBPort1], 3, [1], 1, 6>;
314 defm : SBWriteResPair<WriteCvtPS2IY, [SBPort1], 3, [1], 1, 7>;
315 defm : SBWriteResPair<WriteCvtPS2IZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
316 defm : SBWriteResPair<WriteCvtSD2I, [SBPort0,SBPort1], 5, [1,1], 2>;
317 defm : SBWriteResPair<WriteCvtPD2I, [SBPort1,SBPort5], 4, [1,1], 2, 6>;
318 defm : X86WriteRes<WriteCvtPD2IY, [SBPort1,SBPort5], 4, [1,1], 2>;
319 defm : X86WriteRes<WriteCvtPD2IZ, [SBPort1,SBPort5], 4, [1,1], 2>; // Unsupported = 1
320 defm : X86WriteRes<WriteCvtPD2IYLd, [SBPort1,SBPort5,SBPort23], 11, [1,1,1], 3>;
321 defm : X86WriteRes<WriteCvtPD2IZLd, [SBPort1,SBPort5,SBPort23], 11, [1,1,1], 3>; // Unsupported = 1
323 defm : X86WriteRes<WriteCvtI2SS, [SBPort1,SBPort5], 5, [1,2], 3>;
324 defm : X86WriteRes<WriteCvtI2SSLd, [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>;
325 defm : SBWriteResPair<WriteCvtI2PS, [SBPort1], 3, [1], 1, 6>;
326 defm : SBWriteResPair<WriteCvtI2PSY, [SBPort1], 3, [1], 1, 7>;
327 defm : SBWriteResPair<WriteCvtI2PSZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
328 defm : X86WriteRes<WriteCvtI2SD, [SBPort1,SBPort5], 4, [1,1], 2>;
329 defm : X86WriteRes<WriteCvtI2PD, [SBPort1,SBPort5], 4, [1,1], 2>;
330 defm : X86WriteRes<WriteCvtI2PDY, [SBPort1,SBPort5], 4, [1,1], 2>;
331 defm : X86WriteRes<WriteCvtI2PDZ, [SBPort1,SBPort5], 4, [1,1], 2>; // Unsupported = 1
332 defm : X86WriteRes<WriteCvtI2SDLd, [SBPort1,SBPort23], 9, [1,1], 2>;
333 defm : X86WriteRes<WriteCvtI2PDLd, [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>;
334 defm : X86WriteRes<WriteCvtI2PDYLd, [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>;
335 defm : X86WriteRes<WriteCvtI2PDZLd, [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>; // Unsupported = 1
337 defm : SBWriteResPair<WriteCvtSS2SD, [SBPort0], 1, [1], 1, 6>;
338 defm : X86WriteRes<WriteCvtPS2PD, [SBPort0,SBPort5], 2, [1,1], 2>;
339 defm : X86WriteRes<WriteCvtPS2PDY, [SBPort0,SBPort5], 2, [1,1], 2>;
340 defm : X86WriteRes<WriteCvtPS2PDZ, [SBPort0,SBPort5], 2, [1,1], 2>; // Unsupported = 1
341 defm : X86WriteRes<WriteCvtPS2PDLd, [SBPort0,SBPort23], 7, [1,1], 2>;
342 defm : X86WriteRes<WriteCvtPS2PDYLd, [SBPort0,SBPort23], 7, [1,1], 2>;
343 defm : X86WriteRes<WriteCvtPS2PDZLd, [SBPort0,SBPort23], 7, [1,1], 2>; // Unsupported = 1
344 defm : SBWriteResPair<WriteCvtSD2SS, [SBPort1,SBPort5], 4, [1,1], 2, 6>;
345 defm : SBWriteResPair<WriteCvtPD2PS, [SBPort1,SBPort5], 4, [1,1], 2, 6>;
346 defm : SBWriteResPair<WriteCvtPD2PSY, [SBPort1,SBPort5], 4, [1,1], 2, 7>;
347 defm : SBWriteResPair<WriteCvtPD2PSZ, [SBPort1,SBPort5], 4, [1,1], 2, 7>; // Unsupported = 1
349 defm : SBWriteResPair<WriteCvtPH2PS, [SBPort1], 3>;
350 defm : SBWriteResPair<WriteCvtPH2PSY, [SBPort1], 3>;
351 defm : SBWriteResPair<WriteCvtPH2PSZ, [SBPort1], 3>; // Unsupported = 1
353 defm : X86WriteRes<WriteCvtPS2PH, [SBPort1], 3, [1], 1>;
354 defm : X86WriteRes<WriteCvtPS2PHY, [SBPort1], 3, [1], 1>;
355 defm : X86WriteRes<WriteCvtPS2PHZ, [SBPort1], 3, [1], 1>; // Unsupported = 1
356 defm : X86WriteRes<WriteCvtPS2PHSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>;
357 defm : X86WriteRes<WriteCvtPS2PHYSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>;
358 defm : X86WriteRes<WriteCvtPS2PHZSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>; // Unsupported = 1
360 // Vector integer operations.
361 defm : X86WriteRes<WriteVecLoad, [SBPort23], 5, [1], 1>;
362 defm : X86WriteRes<WriteVecLoadX, [SBPort23], 6, [1], 1>;
363 defm : X86WriteRes<WriteVecLoadY, [SBPort23], 7, [1], 1>;
364 defm : X86WriteRes<WriteVecLoadNT, [SBPort23], 6, [1], 1>;
365 defm : X86WriteRes<WriteVecLoadNTY, [SBPort23], 7, [1], 1>;
366 defm : X86WriteRes<WriteVecMaskedLoad, [SBPort23,SBPort05], 8, [1,2], 3>;
367 defm : X86WriteRes<WriteVecMaskedLoadY, [SBPort23,SBPort05], 9, [1,2], 3>;
368 defm : X86WriteRes<WriteVecStore, [SBPort23,SBPort4], 1, [1,1], 1>;
369 defm : X86WriteRes<WriteVecStoreX, [SBPort23,SBPort4], 1, [1,1], 1>;
370 defm : X86WriteRes<WriteVecStoreY, [SBPort23,SBPort4], 1, [1,1], 1>;
371 defm : X86WriteRes<WriteVecStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>;
372 defm : X86WriteRes<WriteVecStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>;
373 defm : X86WriteRes<WriteVecMaskedStore32, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
374 defm : X86WriteRes<WriteVecMaskedStore32Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
375 defm : X86WriteRes<WriteVecMaskedStore64, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
376 defm : X86WriteRes<WriteVecMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
377 defm : X86WriteRes<WriteVecMove, [SBPort05], 1, [1], 1>;
378 defm : X86WriteRes<WriteVecMoveX, [SBPort015], 1, [1], 1>;
379 defm : X86WriteRes<WriteVecMoveY, [SBPort05], 1, [1], 1>;
380 defm : X86WriteRes<WriteVecMoveToGpr, [SBPort0], 2, [1], 1>;
381 defm : X86WriteRes<WriteVecMoveFromGpr, [SBPort5], 1, [1], 1>;
383 defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 5>;
384 defm : SBWriteResPair<WriteVecLogicX,[SBPort015], 1, [1], 1, 6>;
385 defm : SBWriteResPair<WriteVecLogicY,[SBPort015], 1, [1], 1, 7>;
386 defm : SBWriteResPair<WriteVecLogicZ,[SBPort015], 1, [1], 1, 7>; // Unsupported = 1
387 defm : SBWriteResPair<WriteVecTest, [SBPort0,SBPort5], 2, [1,1], 2, 6>;
388 defm : SBWriteResPair<WriteVecTestY, [SBPort0,SBPort5], 2, [1,1], 2, 7>;
389 defm : SBWriteResPair<WriteVecTestZ, [SBPort0,SBPort5], 2, [1,1], 2, 7>; // Unsupported = 1
390 defm : SBWriteResPair<WriteVecALU, [SBPort1], 3, [1], 1, 5>;
391 defm : SBWriteResPair<WriteVecALUX, [SBPort15], 1, [1], 1, 6>;
392 defm : SBWriteResPair<WriteVecALUY, [SBPort15], 1, [1], 1, 7>;
393 defm : SBWriteResPair<WriteVecALUZ, [SBPort15], 1, [1], 1, 7>; // Unsupported = 1
394 defm : SBWriteResPair<WriteVecIMul, [SBPort0], 5, [1], 1, 5>;
395 defm : SBWriteResPair<WriteVecIMulX, [SBPort0], 5, [1], 1, 6>;
396 defm : SBWriteResPair<WriteVecIMulY, [SBPort0], 5, [1], 1, 7>;
397 defm : SBWriteResPair<WriteVecIMulZ, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1
398 defm : SBWriteResPair<WritePMULLD, [SBPort0], 5, [1], 1, 6>;
399 defm : SBWriteResPair<WritePMULLDY, [SBPort0], 5, [1], 1, 7>; // TODO this is probably wrong for 256/512-bit for the "generic" model
400 defm : SBWriteResPair<WritePMULLDZ, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1
401 defm : SBWriteResPair<WriteShuffle, [SBPort5], 1, [1], 1, 5>;
402 defm : SBWriteResPair<WriteShuffleX, [SBPort15], 1, [1], 1, 6>;
403 defm : SBWriteResPair<WriteShuffleY, [SBPort5], 1, [1], 1, 7>;
404 defm : SBWriteResPair<WriteShuffleZ, [SBPort5], 1, [1], 1, 7>; // Unsupported = 1
405 defm : SBWriteResPair<WriteVarShuffle, [SBPort15], 1, [1], 1, 5>;
406 defm : SBWriteResPair<WriteVarShuffleX, [SBPort15], 1, [1], 1, 6>;
407 defm : SBWriteResPair<WriteVarShuffleY, [SBPort15], 1, [1], 1, 7>;
408 defm : SBWriteResPair<WriteVarShuffleZ, [SBPort15], 1, [1], 1, 7>; // Unsupported = 1
409 defm : SBWriteResPair<WriteBlend, [SBPort15], 1, [1], 1, 6>;
410 defm : SBWriteResPair<WriteBlendY, [SBPort15], 1, [1], 1, 7>;
411 defm : SBWriteResPair<WriteBlendZ, [SBPort15], 1, [1], 1, 7>; // Unsupported = 1
412 defm : SBWriteResPair<WriteVarBlend, [SBPort15], 2, [2], 2, 6>;
413 defm : SBWriteResPair<WriteVarBlendY,[SBPort15], 2, [2], 2, 7>;
414 defm : SBWriteResPair<WriteVarBlendZ,[SBPort15], 2, [2], 2, 7>; // Unsupported = 1
415 defm : SBWriteResPair<WriteMPSAD, [SBPort0, SBPort15], 7, [1,2], 3, 6>;
416 defm : SBWriteResPair<WriteMPSADY, [SBPort0, SBPort15], 7, [1,2], 3, 7>;
417 defm : SBWriteResPair<WriteMPSADZ, [SBPort0, SBPort15], 7, [1,2], 3, 7>; // Unsupported = 1
418 defm : SBWriteResPair<WritePSADBW, [SBPort0], 5, [1], 1, 5>;
419 defm : SBWriteResPair<WritePSADBWX, [SBPort0], 5, [1], 1, 6>;
420 defm : SBWriteResPair<WritePSADBWY, [SBPort0], 5, [1], 1, 7>;
421 defm : SBWriteResPair<WritePSADBWZ, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1
422 defm : SBWriteResPair<WritePHMINPOS, [SBPort0], 5, [1], 1, 6>;
424 // Vector integer shifts.
425 defm : SBWriteResPair<WriteVecShift, [SBPort5], 1, [1], 1, 5>;
426 defm : SBWriteResPair<WriteVecShiftX, [SBPort0,SBPort15], 2, [1,1], 2, 6>;
427 defm : SBWriteResPair<WriteVecShiftY, [SBPort0,SBPort15], 4, [1,1], 2, 7>;
428 defm : SBWriteResPair<WriteVecShiftZ, [SBPort0,SBPort15], 4, [1,1], 2, 7>; // Unsupported = 1
429 defm : SBWriteResPair<WriteVecShiftImm, [SBPort5], 1, [1], 1, 5>;
430 defm : SBWriteResPair<WriteVecShiftImmX, [SBPort0], 1, [1], 1, 6>;
431 defm : SBWriteResPair<WriteVecShiftImmY, [SBPort0], 1, [1], 1, 7>;
432 defm : SBWriteResPair<WriteVecShiftImmZ, [SBPort0], 1, [1], 1, 7>; // Unsupported = 1
433 defm : SBWriteResPair<WriteVarVecShift, [SBPort0], 1, [1], 1, 6>;
434 defm : SBWriteResPair<WriteVarVecShiftY, [SBPort0], 1, [1], 1, 7>;
435 defm : SBWriteResPair<WriteVarVecShiftZ, [SBPort0], 1, [1], 1, 7>; // Unsupported = 1
437 // Vector insert/extract operations.
438 def : WriteRes<WriteVecInsert, [SBPort5,SBPort15]> {
442 def : WriteRes<WriteVecInsertLd, [SBPort23,SBPort15]> {
447 def : WriteRes<WriteVecExtract, [SBPort0,SBPort15]> {
451 def : WriteRes<WriteVecExtractSt, [SBPort4,SBPort23,SBPort15]> {
456 ////////////////////////////////////////////////////////////////////////////////
457 // Horizontal add/sub instructions.
458 ////////////////////////////////////////////////////////////////////////////////
460 defm : SBWriteResPair<WriteFHAdd, [SBPort1,SBPort5], 5, [1,2], 3, 6>;
461 defm : SBWriteResPair<WriteFHAddY, [SBPort1,SBPort5], 5, [1,2], 3, 7>;
462 defm : SBWriteResPair<WriteFHAddZ, [SBPort1,SBPort5], 5, [1,2], 3, 7>; // Unsupported = 1
463 defm : SBWriteResPair<WritePHAdd, [SBPort15], 3, [3], 3, 5>;
464 defm : SBWriteResPair<WritePHAddX, [SBPort15], 3, [3], 3, 6>;
465 defm : SBWriteResPair<WritePHAddY, [SBPort15], 3, [3], 3, 7>;
466 defm : SBWriteResPair<WritePHAddZ, [SBPort15], 3, [3], 3, 7>; // Unsupported = 1
468 ////////////////////////////////////////////////////////////////////////////////
469 // String instructions.
470 ////////////////////////////////////////////////////////////////////////////////
472 // Packed Compare Implicit Length Strings, Return Mask
473 def : WriteRes<WritePCmpIStrM, [SBPort0]> {
476 let ResourceCycles = [3];
478 def : WriteRes<WritePCmpIStrMLd, [SBPort0, SBPort23]> {
481 let ResourceCycles = [3,1];
484 // Packed Compare Explicit Length Strings, Return Mask
485 def : WriteRes<WritePCmpEStrM, [SBPort015]> {
487 let ResourceCycles = [8];
489 def : WriteRes<WritePCmpEStrMLd, [SBPort015, SBPort23]> {
491 let ResourceCycles = [7, 1];
494 // Packed Compare Implicit Length Strings, Return Index
495 def : WriteRes<WritePCmpIStrI, [SBPort0]> {
498 let ResourceCycles = [3];
500 def : WriteRes<WritePCmpIStrILd, [SBPort0,SBPort23]> {
503 let ResourceCycles = [3,1];
506 // Packed Compare Explicit Length Strings, Return Index
507 def : WriteRes<WritePCmpEStrI, [SBPort015]> {
509 let ResourceCycles = [8];
511 def : WriteRes<WritePCmpEStrILd, [SBPort015, SBPort23]> {
513 let ResourceCycles = [7, 1];
516 // MOVMSK Instructions.
517 def : WriteRes<WriteFMOVMSK, [SBPort0]> { let Latency = 2; }
518 def : WriteRes<WriteVecMOVMSK, [SBPort0]> { let Latency = 2; }
519 def : WriteRes<WriteVecMOVMSKY, [SBPort0]> { let Latency = 2; }
520 def : WriteRes<WriteMMXMOVMSK, [SBPort0]> { let Latency = 1; }
523 def : WriteRes<WriteAESDecEnc, [SBPort5,SBPort015]> {
526 let ResourceCycles = [1,1];
528 def : WriteRes<WriteAESDecEncLd, [SBPort5,SBPort23,SBPort015]> {
531 let ResourceCycles = [1,1,1];
534 def : WriteRes<WriteAESIMC, [SBPort5]> {
537 let ResourceCycles = [2];
539 def : WriteRes<WriteAESIMCLd, [SBPort5,SBPort23]> {
542 let ResourceCycles = [2,1];
545 def : WriteRes<WriteAESKeyGen, [SBPort015]> {
547 let ResourceCycles = [11];
549 def : WriteRes<WriteAESKeyGenLd, [SBPort015, SBPort23]> {
551 let ResourceCycles = [10, 1];
554 // Carry-less multiplication instructions.
555 def : WriteRes<WriteCLMul, [SBPort015]> {
557 let ResourceCycles = [18];
559 def : WriteRes<WriteCLMulLd, [SBPort015, SBPort23]> {
561 let ResourceCycles = [17, 1];
565 // FIXME: This is probably wrong. Only STMXCSR should require Port4.
566 def : WriteRes<WriteLDMXCSR, [SBPort0,SBPort4,SBPort5,SBPort23]> { let Latency = 5; let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; }
567 def : WriteRes<WriteSTMXCSR, [SBPort0,SBPort4,SBPort5,SBPort23]> { let Latency = 5; let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; }
569 def : WriteRes<WriteSystem, [SBPort015]> { let Latency = 100; }
570 def : WriteRes<WriteMicrocoded, [SBPort015]> { let Latency = 100; }
571 def : WriteRes<WriteFence, [SBPort23, SBPort4]>;
572 def : WriteRes<WriteNop, []>;
574 // AVX2/FMA is not supported on that architecture, but we should define the basic
575 // scheduling resources anyway.
576 defm : SBWriteResPair<WriteFShuffle256, [SBPort5], 1, [1], 1, 7>;
577 defm : SBWriteResPair<WriteFVarShuffle256, [SBPort5], 1, [1], 1, 7>;
578 defm : SBWriteResPair<WriteShuffle256, [SBPort5], 1, [1], 1, 7>;
579 defm : SBWriteResPair<WriteVPMOV256, [SBPort5], 1, [1], 1, 7>;
580 defm : SBWriteResPair<WriteVarShuffle256, [SBPort5], 1, [1], 1, 7>;
581 defm : SBWriteResPair<WriteFMA, [SBPort01], 5>;
582 defm : SBWriteResPair<WriteFMAX, [SBPort01], 5>;
583 defm : SBWriteResPair<WriteFMAY, [SBPort01], 5>;
584 defm : SBWriteResPair<WriteFMAZ, [SBPort01], 5>; // Unsupported = 1
586 // Remaining SNB instrs.
588 def SBWriteResGroup1 : SchedWriteRes<[SBPort1]> {
591 let ResourceCycles = [1];
593 def: InstRW<[SBWriteResGroup1], (instrs COMP_FST0r,
598 def SBWriteResGroup2 : SchedWriteRes<[SBPort5]> {
601 let ResourceCycles = [1];
603 def: InstRW<[SBWriteResGroup2], (instrs FDECSTP, FINCSTP, FFREE, FFREEP, FNOP,
604 LD_Frr, ST_Frr, ST_FPrr)>;
605 def: InstRW<[SBWriteResGroup2], (instrs LOOP, LOOPE, LOOPNE)>; // FIXME: This seems wrong compared to other Intel CPUs.
606 def: InstRW<[SBWriteResGroup2], (instrs RETQ)>;
608 def SBWriteResGroup4 : SchedWriteRes<[SBPort05]> {
611 let ResourceCycles = [1];
613 def: InstRW<[SBWriteResGroup4], (instrs CDQ, CQO)>;
615 def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> {
618 let ResourceCycles = [1];
620 def: InstRW<[SBWriteResGroup5], (instrs MMX_PABSBrr,
629 def SBWriteResGroup11 : SchedWriteRes<[SBPort015]> {
632 let ResourceCycles = [2];
634 def: InstRW<[SBWriteResGroup11], (instrs SCASB,
639 def SBWriteResGroup12 : SchedWriteRes<[SBPort0,SBPort1]> {
642 let ResourceCycles = [1,1];
644 def: InstRW<[SBWriteResGroup12], (instregex "(V?)(U?)COMI(SD|SS)rr")>;
646 def SBWriteResGroup15 : SchedWriteRes<[SBPort0,SBPort015]> {
649 let ResourceCycles = [1,1];
651 def: InstRW<[SBWriteResGroup15], (instrs CWD,
654 def SBWriteResGroup18 : SchedWriteRes<[SBPort5,SBPort015]> {
657 let ResourceCycles = [1,1];
659 def: InstRW<[SBWriteResGroup18], (instrs JCXZ, JECXZ, JRCXZ,
662 def SBWriteResGroup21 : SchedWriteRes<[SBPort1]> {
665 let ResourceCycles = [1];
667 def: InstRW<[SBWriteResGroup21], (instrs PUSHFS64)>;
669 def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> {
672 let ResourceCycles = [1,1];
674 def: InstRW<[SBWriteResGroup22], (instregex "(V?)EXTRACTPSrr")>;
676 def SBWriteResGroup23 : SchedWriteRes<[SBPort05]> {
679 let ResourceCycles = [3];
681 def: InstRW<[SBWriteResGroup23], (instregex "RCL(8|16|32|64)r1",
682 "RCR(8|16|32|64)r1")>;
684 def SBWriteResGroup25_1 : SchedWriteRes<[SBPort23,SBPort015]> {
687 let ResourceCycles = [1,2];
689 def: InstRW<[SBWriteResGroup25_1], (instrs LEAVE, LEAVE64)>;
691 def SBWriteResGroup26_2 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> {
694 let ResourceCycles = [1,1,1];
696 def: InstRW<[SBWriteResGroup26_2], (instrs COM_FIPr, COM_FIr, UCOM_FIPr, UCOM_FIr)>;
698 def SBWriteResGroup29 : SchedWriteRes<[SBPort1,SBPort015]> {
701 let ResourceCycles = [1,1];
703 def: InstRW<[SBWriteResGroup29], (instrs MOV64sr)>;
705 def SBWriteResGroup29_2 : SchedWriteRes<[SBPort5,SBPort015]> {
708 let ResourceCycles = [1,3];
710 def: InstRW<[SBWriteResGroup29_2], (instrs PAUSE)>;
712 def SBWriteResGroup31 : SchedWriteRes<[SBPort23]> {
715 let ResourceCycles = [1];
717 def: InstRW<[SBWriteResGroup31], (instregex "MOVSX(16|32|64)rm(8|16|32)",
718 "MOVZX(16|32|64)rm(8|16)")>;
720 def SBWriteResGroup76 : SchedWriteRes<[SBPort05]> {
723 let ResourceCycles = [8];
725 def: InstRW<[SBWriteResGroup76], (instregex "RCL(8|16|32|64)r(i|CL)",
726 "RCR(8|16|32|64)r(i|CL)")>;
728 def SBWriteResGroup33 : SchedWriteRes<[SBPort4,SBPort23]> {
731 let ResourceCycles = [1,1];
733 def: InstRW<[SBWriteResGroup33], (instregex "PUSH(16r|32r|64r|64i8)")>;
735 def SBWriteResGroup35 : SchedWriteRes<[SBPort1,SBPort5]> {
738 let ResourceCycles = [1,2];
740 def: InstRW<[SBWriteResGroup35], (instrs CLI)>;
742 def SBWriteResGroup35_2 : SchedWriteRes<[SBPort1,SBPort4,SBPort23]> {
745 let ResourceCycles = [1,1,1];
747 def: InstRW<[SBWriteResGroup35_2], (instrs PUSHGS64)>;
748 def: InstRW<[SBWriteResGroup35_2], (instregex "ISTT_FP(16|32|64)m")>;
750 def SBWriteResGroup36 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
753 let ResourceCycles = [1,1,1];
755 def: InstRW<[SBWriteResGroup36], (instrs CALL64pcrel32)>;
756 def: InstRW<[SBWriteResGroup36], (instregex "CALL(16|32|64)r",
759 def SBWriteResGroup40 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
762 let ResourceCycles = [1,1,1];
764 def: InstRW<[SBWriteResGroup40], (instrs STOSB, STOSL, STOSQ, STOSW)>;
766 def SBWriteResGroup41 : SchedWriteRes<[SBPort5,SBPort015]> {
769 let ResourceCycles = [1,3];
771 def: InstRW<[SBWriteResGroup41], (instrs FNINIT)>;
773 def SBWriteResGroup45 : SchedWriteRes<[SBPort0,SBPort4,SBPort23,SBPort15]> {
776 let ResourceCycles = [1,1,1,1];
778 def: InstRW<[SBWriteResGroup45], (instregex "(V?)PEXTR(D|Q)mr",
781 def SBWriteResGroup46 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
784 let ResourceCycles = [1,1,1,1];
786 def: InstRW<[SBWriteResGroup46], (instregex "CLFLUSH")>;
788 def SBWriteResGroup47 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
791 let ResourceCycles = [1,2,1,1];
793 def: InstRW<[SBWriteResGroup47], (instregex "FXRSTOR")>;
795 def SBWriteResGroup48 : SchedWriteRes<[SBPort23]> {
798 let ResourceCycles = [1];
800 def: InstRW<[SBWriteResGroup48], (instrs MMX_MOVD64from64rm,
802 def: InstRW<[SBWriteResGroup48], (instregex "POP(16|32|64)r",
812 def SBWriteResGroup49 : SchedWriteRes<[SBPort5,SBPort23]> {
815 let ResourceCycles = [1,1];
817 def: InstRW<[SBWriteResGroup49], (instrs MOV16sm)>;
819 def SBWriteResGroup51 : SchedWriteRes<[SBPort23,SBPort15]> {
822 let ResourceCycles = [1,1];
824 def: InstRW<[SBWriteResGroup51], (instrs MMX_PABSBrm,
832 def SBWriteResGroup52 : SchedWriteRes<[SBPort23,SBPort015]> {
835 let ResourceCycles = [1,1];
837 def: InstRW<[SBWriteResGroup52], (instrs LODSL, LODSQ)>;
839 def SBWriteResGroup53 : SchedWriteRes<[SBPort4,SBPort23]> {
842 let ResourceCycles = [1,2];
844 def: InstRW<[SBWriteResGroup53], (instregex "ST_F(32|64)m",
845 "ST_FP(32|64|80)m")>;
847 def SBWriteResGroup54 : SchedWriteRes<[SBPort23]> {
850 let ResourceCycles = [1];
852 def: InstRW<[SBWriteResGroup54], (instrs VBROADCASTSDYrm,
858 def SBWriteResGroup58 : SchedWriteRes<[SBPort23,SBPort05]> {
861 let ResourceCycles = [1,1];
863 def: InstRW<[SBWriteResGroup58], (instrs VINSERTF128rm)>;
865 def SBWriteResGroup59 : SchedWriteRes<[SBPort23,SBPort15]> {
868 let ResourceCycles = [1,1];
870 def: InstRW<[SBWriteResGroup59], (instrs MMX_PADDQirm)>;
872 def SBWriteResGroup62 : SchedWriteRes<[SBPort5,SBPort23]> {
875 let ResourceCycles = [2,1];
877 def: InstRW<[SBWriteResGroup62], (instrs VERRm, VERWm)>;
879 def SBWriteResGroup63 : SchedWriteRes<[SBPort23,SBPort015]> {
882 let ResourceCycles = [1,2];
884 def: InstRW<[SBWriteResGroup63], (instrs LODSB, LODSW)>;
886 def SBWriteResGroup64 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> {
889 let ResourceCycles = [1,1,1];
891 def: InstRW<[SBWriteResGroup64], (instrs FARJMP64m)>;
893 def SBWriteResGroup66 : SchedWriteRes<[SBPort0,SBPort4,SBPort23]> {
896 let ResourceCycles = [1,1,2];
898 def: InstRW<[SBWriteResGroup66], (instrs FNSTSWm)>;
900 def SBWriteResGroup67 : SchedWriteRes<[SBPort1,SBPort5,SBPort015]> {
903 let ResourceCycles = [1,2,1];
905 def: InstRW<[SBWriteResGroup67], (instregex "SLDT(16|32|64)r",
908 def SBWriteResGroup68 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
911 let ResourceCycles = [1,1,2];
913 def: InstRW<[SBWriteResGroup68], (instrs FNSTCW16m)>;
914 def: InstRW<[SBWriteResGroup68], (instregex "CALL(16|32|64)m")>;
916 def SBWriteResGroup69 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
919 let ResourceCycles = [1,2,1];
921 def: InstRW<[SBWriteResGroup69], (instregex "SAR(8|16|32|64)m(1|i)",
922 "SHL(8|16|32|64)m(1|i)",
923 "SHR(8|16|32|64)m(1|i)")>;
925 def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
928 let ResourceCycles = [1,1,1];
930 def: InstRW<[SBWriteResGroup77], (instregex "(V?)(U?)COMI(SD|SS)rm")>;
932 def SBWriteResGroup81 : SchedWriteRes<[SBPort4, SBPort23, SBPort015]> {
935 let ResourceCycles = [1, 2, 1];
937 def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG(8|16)B")>;
939 def SBWriteResGroup83 : SchedWriteRes<[SBPort23,SBPort015]> {
942 let ResourceCycles = [2,3];
944 def: InstRW<[SBWriteResGroup83], (instrs CMPSB,
949 def SBWriteResGroup84 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
952 let ResourceCycles = [1,2,2];
954 def: InstRW<[SBWriteResGroup84], (instrs FLDCW16m)>;
956 def SBWriteResGroup85 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
959 let ResourceCycles = [1,2,2];
961 def: InstRW<[SBWriteResGroup85], (instregex "ROL(8|16|32|64)m(1|i)",
962 "ROR(8|16|32|64)m(1|i)")>;
964 def SBWriteResGroup86 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
967 let ResourceCycles = [1,2,2];
969 def: InstRW<[SBWriteResGroup86], (instrs MOVSB, MOVSL, MOVSQ, MOVSW)>;
970 def: InstRW<[SBWriteResGroup86], (instregex "XADD(8|16|32|64)rm")>;
972 def SBWriteResGroup87 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
975 let ResourceCycles = [1,1,1,2];
977 def: InstRW<[SBWriteResGroup87], (instrs FARCALL64m)>;
979 def SBWriteResGroup93 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
982 let ResourceCycles = [1,1,1];
984 def: InstRW<[SBWriteResGroup93], (instregex "CVT(T?)(SD|SS)2SI(64)?rm")>;
986 def SBWriteResGroup95 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> {
989 let ResourceCycles = [1,1,1];
991 def: InstRW<[SBWriteResGroup95], (instregex "LD_F(32|64|80)m")>;
993 def SBWriteResGroup97 : SchedWriteRes<[SBPort1,SBPort4,SBPort23]> {
996 let ResourceCycles = [1,1,2];
998 def: InstRW<[SBWriteResGroup97], (instregex "IST_F(16|32)m",
999 "IST_FP(16|32|64)m")>;
1001 def SBWriteResGroup97_2 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
1003 let NumMicroOps = 6;
1004 let ResourceCycles = [1,2,3];
1006 def: InstRW<[SBWriteResGroup97_2], (instregex "ROL(8|16|32|64)mCL",
1007 "ROR(8|16|32|64)mCL",
1008 "SAR(8|16|32|64)mCL",
1009 "SHL(8|16|32|64)mCL",
1010 "SHR(8|16|32|64)mCL")>;
1012 def SBWriteResGroup98 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
1014 let NumMicroOps = 6;
1015 let ResourceCycles = [1,2,3];
1017 def: SchedAlias<WriteADCRMW, SBWriteResGroup98>;
1019 def SBWriteResGroup99 : SchedWriteRes<[SBPort4,SBPort23,SBPort05,SBPort015]> {
1021 let NumMicroOps = 6;
1022 let ResourceCycles = [1,2,2,1];
1024 def: InstRW<[SBWriteResGroup99, ReadAfterLd], (instrs ADC8mr, ADC16mr, ADC32mr, ADC64mr,
1025 SBB8mr, SBB16mr, SBB32mr, SBB64mr)>;
1027 def SBWriteResGroup100 : SchedWriteRes<[SBPort4,SBPort5,SBPort23,SBPort05,SBPort015]> {
1029 let NumMicroOps = 6;
1030 let ResourceCycles = [1,1,2,1,1];
1032 def : SchedAlias<WriteBitTestRegLd, SBWriteResGroup100>; // TODO - this is incorrect - no RMW
1034 def SBWriteResGroup101 : SchedWriteRes<[SBPort1,SBPort23]> {
1036 let NumMicroOps = 2;
1037 let ResourceCycles = [1,1];
1039 def: InstRW<[SBWriteResGroup101], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
1040 "ILD_F(16|32|64)m")>;
1042 def SBWriteResGroup104 : SchedWriteRes<[SBPort0,SBPort23]> {
1044 let NumMicroOps = 2;
1045 let ResourceCycles = [1,1];
1047 def: InstRW<[SBWriteResGroup104], (instregex "(V?)PCMPGTQrm")>;
1049 def SBWriteResGroup106 : SchedWriteRes<[SBPort1,SBPort23]> {
1051 let NumMicroOps = 3;
1052 let ResourceCycles = [2,1];
1054 def: InstRW<[SBWriteResGroup106], (instregex "FICOM(P?)(16|32)m")>;
1056 def SBWriteResGroup108 : SchedWriteRes<[SBPort05,SBPort23]> {
1058 let NumMicroOps = 11;
1059 let ResourceCycles = [7,4];
1061 def: InstRW<[SBWriteResGroup108], (instregex "RCL(8|16|32|64)m",
1062 "RCR(8|16|32|64)m")>;
1064 def SBWriteResGroup111 : SchedWriteRes<[SBPort0,SBPort23]> {
1066 let NumMicroOps = 2;
1067 let ResourceCycles = [1,1];
1069 def: InstRW<[SBWriteResGroup111], (instregex "MUL_F(32|64)m")>;
1071 def SBWriteResGroup114 : SchedWriteRes<[SBPort1,SBPort23]> {
1073 let NumMicroOps = 3;
1074 let ResourceCycles = [2,1];
1076 def: InstRW<[SBWriteResGroup114], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>;
1078 def SBWriteResGroup119 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
1080 let NumMicroOps = 3;
1081 let ResourceCycles = [1,1,1];
1083 def: InstRW<[SBWriteResGroup119], (instregex "MUL_FI(16|32)m")>;
1085 def SBWriteResGroup130 : SchedWriteRes<[SBPort0,SBPort23]> {
1087 let NumMicroOps = 2;
1088 let ResourceCycles = [1,1];
1090 def: InstRW<[SBWriteResGroup130], (instregex "DIV(R?)_F(32|64)m")>;
1092 def SBWriteResGroup131 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
1094 let NumMicroOps = 3;
1095 let ResourceCycles = [1,1,1];
1097 def: InstRW<[SBWriteResGroup131], (instregex "DIV(R?)_FI(16|32)m")>;
1099 def SBWriteResGroupVzeroall : SchedWriteRes<[SBPort5]> {
1101 let NumMicroOps = 20;
1102 let ResourceCycles = [2];
1104 def: InstRW<[SBWriteResGroupVzeroall], (instrs VZEROALL)>;
1106 def SBWriteResGroupVzeroupper : SchedWriteRes<[]> {
1108 let NumMicroOps = 4;
1109 let ResourceCycles = [];
1111 def: InstRW<[SBWriteResGroupVzeroupper], (instrs VZEROUPPER)>;
1113 def: InstRW<[WriteZero], (instrs CLC)>;
1115 // Instruction variants handled by the renamer. These might not need execution
1116 // ports in certain conditions.
1117 // See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
1118 // section "Sandy Bridge and Ivy Bridge Pipeline" > "Register allocation and
1120 // These can be investigated with llvm-exegesis, e.g.
1121 // echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
1122 // echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
1124 def SBWriteZeroLatency : SchedWriteRes<[]> {
1128 def SBWriteZeroIdiom : SchedWriteVariant<[
1129 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
1130 SchedVar<NoSchedPred, [WriteALU]>
1132 def : InstRW<[SBWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
1135 def SBWriteFZeroIdiom : SchedWriteVariant<[
1136 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
1137 SchedVar<NoSchedPred, [WriteFLogic]>
1139 def : InstRW<[SBWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr,
1142 def SBWriteFZeroIdiomY : SchedWriteVariant<[
1143 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
1144 SchedVar<NoSchedPred, [WriteFLogicY]>
1146 def : InstRW<[SBWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr)>;
1148 def SBWriteVZeroIdiomLogicX : SchedWriteVariant<[
1149 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
1150 SchedVar<NoSchedPred, [WriteVecLogicX]>
1152 def : InstRW<[SBWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr)>;
1154 def SBWriteVZeroIdiomALUX : SchedWriteVariant<[
1155 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
1156 SchedVar<NoSchedPred, [WriteVecALUX]>
1158 def : InstRW<[SBWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
1162 PCMPGTBrr, VPCMPGTBrr,
1163 PCMPGTDrr, VPCMPGTDrr,
1164 PCMPGTWrr, VPCMPGTWrr)>;
1166 def SBWritePCMPGTQ : SchedWriteRes<[SBPort0]> {
1168 let NumMicroOps = 1;
1169 let ResourceCycles = [1];
1172 def SBWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
1173 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
1174 SchedVar<NoSchedPred, [SBWritePCMPGTQ]>
1176 def : InstRW<[SBWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr)>;
1178 // CMOVs that use both Z and C flag require an extra uop.
1179 def SBWriteCMOVA_CMOVBErr : SchedWriteRes<[SBPort05,SBPort015]> {
1181 let ResourceCycles = [2,1];
1182 let NumMicroOps = 3;
1185 def SBWriteCMOVA_CMOVBErm : SchedWriteRes<[SBPort23,SBPort05,SBPort015]> {
1187 let ResourceCycles = [1,2,1];
1188 let NumMicroOps = 4;
1191 def SBCMOVA_CMOVBErr : SchedWriteVariant<[
1192 SchedVar<MCSchedPredicate<IsCMOVArr_Or_CMOVBErr>, [SBWriteCMOVA_CMOVBErr]>,
1193 SchedVar<NoSchedPred, [WriteCMOV]>
1196 def SBCMOVA_CMOVBErm : SchedWriteVariant<[
1197 SchedVar<MCSchedPredicate<IsCMOVArm_Or_CMOVBErm>, [SBWriteCMOVA_CMOVBErm]>,
1198 SchedVar<NoSchedPred, [WriteCMOV.Folded]>
1201 def : InstRW<[SBCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>;
1202 def : InstRW<[SBCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
1204 // SETCCs that use both Z and C flag require an extra uop.
1205 def SBWriteSETA_SETBEr : SchedWriteRes<[SBPort05]> {
1207 let ResourceCycles = [2];
1208 let NumMicroOps = 2;
1211 def SBWriteSETA_SETBEm : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
1213 let ResourceCycles = [1,1,2];
1214 let NumMicroOps = 4;
1217 def SBSETA_SETBErr : SchedWriteVariant<[
1218 SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [SBWriteSETA_SETBEr]>,
1219 SchedVar<NoSchedPred, [WriteSETCC]>
1222 def SBSETA_SETBErm : SchedWriteVariant<[
1223 SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [SBWriteSETA_SETBEm]>,
1224 SchedVar<NoSchedPred, [WriteSETCCStore]>
1227 def : InstRW<[SBSETA_SETBErr], (instrs SETCCr)>;
1228 def : InstRW<[SBSETA_SETBErm], (instrs SETCCm)>;