1 //=- X86SchedIceLake.td - X86 Ice Lake Scheduling ------------*- tablegen -*-=//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the machine model for Ice Lake to support
10 // instruction scheduling and other instruction cost heuristics.
12 // TODO: This is mainly a copy X86SchedSkylakeServer.td, but allows us to
13 // iteratively improve scheduling handling toward better modelling the
14 // Ice Lake (Sunny/Cypress Cove) microarchitecture.
16 //===----------------------------------------------------------------------===//
18 def IceLakeModel : SchedMachineModel {
19 // All x86 instructions are modeled as a single micro-op, and Ice Lake can
20 // decode 6 instructions per cycle.
22 let MicroOpBufferSize = 352; // Based on the reorder buffer.
24 let MispredictPenalty = 14;
26 // Based on the LSD (loop-stream detector) queue size and benchmarking data.
27 let LoopMicroOpBufferSize = 50;
29 // This flag is set to allow the scheduler to assign a default model to
30 // unrecognized opcodes.
31 let CompleteModel = 0;
34 let SchedModel = IceLakeModel in {
36 // Ice Lake can issue micro-ops to 8 different ports in one cycle.
38 // Ports 0, 1, 5, and 6 handle all computation.
39 // Ports 4 and 9 gets the data half of stores. Store data can be available later
40 // than the store address, but since we don't model the latency of stores, we
42 // Ports 2 and 3 are identical. They handle loads and address calculations.
43 // Ports 7 and 8 are identical. They handle stores address calculations.
44 def ICXPort0 : ProcResource<1>;
45 def ICXPort1 : ProcResource<1>;
46 def ICXPort2 : ProcResource<1>;
47 def ICXPort3 : ProcResource<1>;
48 def ICXPort4 : ProcResource<1>;
49 def ICXPort5 : ProcResource<1>;
50 def ICXPort6 : ProcResource<1>;
51 def ICXPort7 : ProcResource<1>;
52 def ICXPort8 : ProcResource<1>;
53 def ICXPort9 : ProcResource<1>;
55 // Many micro-ops are capable of issuing on multiple ports.
56 def ICXPort01 : ProcResGroup<[ICXPort0, ICXPort1]>;
57 def ICXPort23 : ProcResGroup<[ICXPort2, ICXPort3]>;
58 def ICXPort04 : ProcResGroup<[ICXPort0, ICXPort4]>;
59 def ICXPort05 : ProcResGroup<[ICXPort0, ICXPort5]>;
60 def ICXPort06 : ProcResGroup<[ICXPort0, ICXPort6]>;
61 def ICXPort15 : ProcResGroup<[ICXPort1, ICXPort5]>;
62 def ICXPort16 : ProcResGroup<[ICXPort1, ICXPort6]>;
63 def ICXPort49 : ProcResGroup<[ICXPort4, ICXPort9]>;
64 def ICXPort56 : ProcResGroup<[ICXPort5, ICXPort6]>;
65 def ICXPort78 : ProcResGroup<[ICXPort7, ICXPort8]>;
66 def ICXPort015 : ProcResGroup<[ICXPort0, ICXPort1, ICXPort5]>;
67 def ICXPort056 : ProcResGroup<[ICXPort0, ICXPort5, ICXPort6]>;
68 def ICXPort0156: ProcResGroup<[ICXPort0, ICXPort1, ICXPort5, ICXPort6]>;
70 def ICXDivider : ProcResource<1>; // Integer division issued on port 0.
71 // FP division and sqrt on port 0.
72 def ICXFPDivider : ProcResource<1>;
74 // 60 Entry Unified Scheduler
75 def ICXPortAny : ProcResGroup<[ICXPort0, ICXPort1, ICXPort2, ICXPort3, ICXPort4,
76 ICXPort5, ICXPort6, ICXPort7, ICXPort8, ICXPort9]> {
80 // Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
81 // cycles after the memory operand.
82 def : ReadAdvance<ReadAfterLd, 5>;
84 // Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available
85 // until 5/6/7 cycles after the memory operand.
86 def : ReadAdvance<ReadAfterVecLd, 5>;
87 def : ReadAdvance<ReadAfterVecXLd, 6>;
88 def : ReadAdvance<ReadAfterVecYLd, 7>;
90 def : ReadAdvance<ReadInt2Fpu, 0>;
92 // Many SchedWrites are defined in pairs with and without a folded load.
93 // Instructions with folded loads are usually micro-fused, so they only appear
94 // as two micro-ops when queued in the reservation station.
95 // This multiclass defines the resource usage for variants with and without
97 multiclass ICXWriteResPair<X86FoldableSchedWrite SchedRW,
98 list<ProcResourceKind> ExePorts,
99 int Lat, list<int> Res = [1], int UOps = 1,
100 int LoadLat = 5, int LoadUOps = 1> {
101 // Register variant is using a single cycle on ExePort.
102 def : WriteRes<SchedRW, ExePorts> {
104 let ReleaseAtCycles = Res;
105 let NumMicroOps = UOps;
108 // Memory variant also uses a cycle on port 2/3 and adds LoadLat cycles to
109 // the latency (default = 5).
110 def : WriteRes<SchedRW.Folded, !listconcat([ICXPort23], ExePorts)> {
111 let Latency = !add(Lat, LoadLat);
112 let ReleaseAtCycles = !listconcat([1], Res);
113 let NumMicroOps = !add(UOps, LoadUOps);
117 // A folded store needs a cycle on port 4 for the store data, and an extra port
118 // 2/3/7 cycle to recompute the address.
119 def : WriteRes<WriteRMW, [ICXPort78,ICXPort49]>;
122 defm : ICXWriteResPair<WriteALU, [ICXPort0156], 1>; // Simple integer ALU op.
123 defm : ICXWriteResPair<WriteADC, [ICXPort06], 1>; // Integer ALU + flags op.
125 // Integer multiplication.
126 defm : ICXWriteResPair<WriteIMul8, [ICXPort1], 3>;
127 defm : ICXWriteResPair<WriteIMul16, [ICXPort1,ICXPort06,ICXPort0156], 4, [1,1,2], 4>;
128 defm : X86WriteRes<WriteIMul16Imm, [ICXPort1,ICXPort0156], 4, [1,1], 2>;
129 defm : X86WriteRes<WriteIMul16ImmLd, [ICXPort1,ICXPort0156,ICXPort23], 8, [1,1,1], 3>;
130 defm : X86WriteRes<WriteIMul16Reg, [ICXPort1], 3, [1], 1>;
131 defm : X86WriteRes<WriteIMul16RegLd, [ICXPort1,ICXPort0156,ICXPort23], 8, [1,1,1], 3>;
132 defm : ICXWriteResPair<WriteIMul32, [ICXPort1,ICXPort06,ICXPort0156], 4, [1,1,1], 3>;
133 defm : ICXWriteResPair<WriteMULX32, [ICXPort1,ICXPort06,ICXPort0156], 3, [1,1,1], 3>;
134 defm : ICXWriteResPair<WriteIMul32Imm, [ICXPort1], 3>;
135 defm : ICXWriteResPair<WriteIMul32Reg, [ICXPort1], 3>;
136 defm : ICXWriteResPair<WriteIMul64, [ICXPort1,ICXPort5], 4, [1,1], 2>;
137 defm : ICXWriteResPair<WriteMULX64, [ICXPort1,ICXPort5], 3, [1,1], 2>;
138 defm : ICXWriteResPair<WriteIMul64Imm, [ICXPort1], 3>;
139 defm : ICXWriteResPair<WriteIMul64Reg, [ICXPort1], 3>;
140 def ICXWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; }
141 def : WriteRes<WriteIMulHLd, []> {
142 let Latency = !add(ICXWriteIMulH.Latency, SkylakeServerModel.LoadLatency);
145 defm : X86WriteRes<WriteBSWAP32, [ICXPort15], 1, [1], 1>;
146 defm : X86WriteRes<WriteBSWAP64, [ICXPort06, ICXPort15], 2, [1,1], 2>;
147 defm : X86WriteRes<WriteCMPXCHG,[ICXPort06, ICXPort0156], 5, [2,3], 5>;
148 defm : X86WriteRes<WriteCMPXCHGRMW,[ICXPort23,ICXPort06,ICXPort0156,ICXPort78,ICXPort49], 8, [1,2,1,1,1], 6>;
149 defm : X86WriteRes<WriteXCHG, [ICXPort0156], 2, [3], 3>;
151 // TODO: Why isn't the ICXDivider used?
152 defm : ICXWriteResPair<WriteDiv8, [ICXPort0, ICXDivider], 25, [1,10], 1, 4>;
153 defm : X86WriteRes<WriteDiv16, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort05,ICXPort0156], 76, [7,2,8,3,1,11], 32>;
154 defm : X86WriteRes<WriteDiv32, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort05,ICXPort0156], 76, [7,2,8,3,1,11], 32>;
155 defm : X86WriteRes<WriteDiv64, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort05,ICXPort0156], 76, [7,2,8,3,1,11], 32>;
156 defm : X86WriteRes<WriteDiv16Ld, [ICXPort0,ICXPort23,ICXDivider], 29, [1,1,10], 2>;
157 defm : X86WriteRes<WriteDiv32Ld, [ICXPort0,ICXPort23,ICXDivider], 29, [1,1,10], 2>;
158 defm : X86WriteRes<WriteDiv64Ld, [ICXPort0,ICXPort23,ICXDivider], 29, [1,1,10], 2>;
160 defm : X86WriteRes<WriteIDiv8, [ICXPort0, ICXDivider], 25, [1,10], 1>;
161 defm : X86WriteRes<WriteIDiv16, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort06,ICXPort0156], 102, [4,2,4,8,14,34], 66>;
162 defm : X86WriteRes<WriteIDiv32, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort06,ICXPort0156], 102, [4,2,4,8,14,34], 66>;
163 defm : X86WriteRes<WriteIDiv64, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort06,ICXPort0156], 102, [4,2,4,8,14,34], 66>;
164 defm : X86WriteRes<WriteIDiv8Ld, [ICXPort0,ICXPort5,ICXPort23,ICXPort0156], 28, [2,4,1,1], 8>;
165 defm : X86WriteRes<WriteIDiv16Ld, [ICXPort0,ICXPort5,ICXPort23,ICXPort0156], 28, [2,4,1,1], 8>;
166 defm : X86WriteRes<WriteIDiv32Ld, [ICXPort0,ICXPort5,ICXPort23,ICXPort0156], 28, [2,4,1,1], 8>;
167 defm : X86WriteRes<WriteIDiv64Ld, [ICXPort0,ICXPort5,ICXPort23,ICXPort0156], 28, [2,4,1,1], 8>;
169 defm : ICXWriteResPair<WriteCRC32, [ICXPort1], 3>;
171 def : WriteRes<WriteLEA, [ICXPort15]>; // LEA instructions can't fold loads.
173 defm : ICXWriteResPair<WriteCMOV, [ICXPort06], 1, [1], 1>; // Conditional move.
174 defm : X86WriteRes<WriteFCMOV, [ICXPort1], 3, [1], 1>; // x87 conditional move.
175 def : WriteRes<WriteSETCC, [ICXPort06]>; // Setcc.
176 def : WriteRes<WriteSETCCStore, [ICXPort06,ICXPort49,ICXPort78]> {
180 defm : X86WriteRes<WriteLAHFSAHF, [ICXPort06], 1, [1], 1>;
181 defm : X86WriteRes<WriteBitTest, [ICXPort06], 1, [1], 1>;
182 defm : X86WriteRes<WriteBitTestImmLd, [ICXPort06,ICXPort23], 6, [1,1], 2>;
183 defm : X86WriteRes<WriteBitTestRegLd, [ICXPort0156,ICXPort23], 6, [1,1], 2>;
184 defm : X86WriteRes<WriteBitTestSet, [ICXPort06], 1, [1], 1>;
185 defm : X86WriteRes<WriteBitTestSetImmLd, [ICXPort06,ICXPort23], 5, [1,1], 3>;
186 defm : X86WriteRes<WriteBitTestSetRegLd, [ICXPort0156,ICXPort23], 5, [1,1], 2>;
188 // Integer shifts and rotates.
189 defm : ICXWriteResPair<WriteShift, [ICXPort06], 1>;
190 defm : ICXWriteResPair<WriteShiftCL, [ICXPort06], 3, [3], 3>;
191 defm : ICXWriteResPair<WriteRotate, [ICXPort06], 1, [1], 1>;
192 defm : ICXWriteResPair<WriteRotateCL, [ICXPort06], 3, [3], 3>;
195 defm : X86WriteRes<WriteSHDrri, [ICXPort1], 3, [1], 1>;
196 defm : X86WriteRes<WriteSHDrrcl,[ICXPort1,ICXPort06,ICXPort0156], 6, [1, 2, 1], 4>;
197 defm : X86WriteRes<WriteSHDmri, [ICXPort1,ICXPort23,ICXPort78,ICXPort0156], 9, [1, 1, 1, 1], 4>;
198 defm : X86WriteRes<WriteSHDmrcl,[ICXPort1,ICXPort23,ICXPort78,ICXPort06,ICXPort0156], 11, [1, 1, 1, 2, 1], 6>;
201 defm : ICXWriteResPair<WriteBSF, [ICXPort1], 3>;
202 defm : ICXWriteResPair<WriteBSR, [ICXPort1], 3>;
203 defm : ICXWriteResPair<WriteLZCNT, [ICXPort1], 3>;
204 defm : ICXWriteResPair<WriteTZCNT, [ICXPort1], 3>;
205 defm : ICXWriteResPair<WritePOPCNT, [ICXPort1], 3>;
207 // BMI1 BEXTR/BLS, BMI2 BZHI
208 defm : ICXWriteResPair<WriteBEXTR, [ICXPort06,ICXPort15], 2, [1,1], 2>;
209 defm : ICXWriteResPair<WriteBLS, [ICXPort15], 1>;
210 defm : ICXWriteResPair<WriteBZHI, [ICXPort15], 1>;
212 // Loads, stores, and moves, not folded with other operations.
213 defm : X86WriteRes<WriteLoad, [ICXPort23], 5, [1], 1>;
214 defm : X86WriteRes<WriteStore, [ICXPort78, ICXPort49], 1, [1,1], 1>;
215 defm : X86WriteRes<WriteStoreNT, [ICXPort78, ICXPort49], 1, [1,1], 2>;
216 defm : X86WriteRes<WriteMove, [ICXPort0156], 1, [1], 1>;
218 // Model the effect of clobbering the read-write mask operand of the GATHER operation.
219 // Does not cost anything by itself, only has latency, matching that of the WriteLoad,
220 defm : X86WriteRes<WriteVecMaskedGatherWriteback, [], 5, [], 0>;
222 // Idioms that clear a register, like xorps %xmm0, %xmm0.
223 // These can often bypass execution ports completely.
224 def : WriteRes<WriteZero, []>;
226 // Branches don't produce values, so they have no latency, but they still
227 // consume resources. Indirect branches can fold loads.
228 defm : ICXWriteResPair<WriteJump, [ICXPort06], 1>;
230 // Floating point. This covers both scalar and vector operations.
231 defm : X86WriteRes<WriteFLD0, [ICXPort05], 1, [1], 1>;
232 defm : X86WriteRes<WriteFLD1, [ICXPort05], 1, [2], 2>;
233 defm : X86WriteRes<WriteFLDC, [ICXPort05], 1, [2], 2>;
234 defm : X86WriteRes<WriteFLoad, [ICXPort23], 5, [1], 1>;
235 defm : X86WriteRes<WriteFLoadX, [ICXPort23], 6, [1], 1>;
236 defm : X86WriteRes<WriteFLoadY, [ICXPort23], 7, [1], 1>;
237 defm : X86WriteRes<WriteFMaskedLoad, [ICXPort23,ICXPort015], 7, [1,1], 2>;
238 defm : X86WriteRes<WriteFMaskedLoadY, [ICXPort23,ICXPort015], 8, [1,1], 2>;
239 defm : X86WriteRes<WriteFStore, [ICXPort78,ICXPort49], 1, [1,1], 2>;
240 defm : X86WriteRes<WriteFStoreX, [ICXPort78,ICXPort49], 1, [1,1], 2>;
241 defm : X86WriteRes<WriteFStoreY, [ICXPort78,ICXPort49], 1, [1,1], 2>;
242 defm : X86WriteRes<WriteFStoreNT, [ICXPort78,ICXPort49], 1, [1,1], 2>;
243 defm : X86WriteRes<WriteFStoreNTX, [ICXPort78,ICXPort49], 1, [1,1], 2>;
244 defm : X86WriteRes<WriteFStoreNTY, [ICXPort78,ICXPort49], 1, [1,1], 2>;
246 defm : X86WriteRes<WriteFMaskedStore32, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 2>;
247 defm : X86WriteRes<WriteFMaskedStore32Y, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 2>;
248 defm : X86WriteRes<WriteFMaskedStore64, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 2>;
249 defm : X86WriteRes<WriteFMaskedStore64Y, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 2>;
251 defm : X86WriteRes<WriteFMove, [ICXPort015], 1, [1], 1>;
252 defm : X86WriteRes<WriteFMoveX, [ICXPort015], 1, [1], 1>;
253 defm : X86WriteRes<WriteFMoveY, [ICXPort015], 1, [1], 1>;
254 defm : X86WriteRes<WriteFMoveZ, [ICXPort05], 1, [1], 1>;
255 defm : X86WriteRes<WriteEMMS, [ICXPort05,ICXPort0156], 10, [9,1], 10>;
257 defm : ICXWriteResPair<WriteFAdd, [ICXPort01], 4, [1], 1, 5>; // Floating point add/sub.
258 defm : ICXWriteResPair<WriteFAddX, [ICXPort01], 4, [1], 1, 6>;
259 defm : ICXWriteResPair<WriteFAddY, [ICXPort01], 4, [1], 1, 7>;
260 defm : ICXWriteResPair<WriteFAddZ, [ICXPort05], 4, [1], 1, 7>;
261 defm : ICXWriteResPair<WriteFAdd64, [ICXPort01], 4, [1], 1, 5>; // Floating point double add/sub.
262 defm : ICXWriteResPair<WriteFAdd64X, [ICXPort01], 4, [1], 1, 6>;
263 defm : ICXWriteResPair<WriteFAdd64Y, [ICXPort01], 4, [1], 1, 7>;
264 defm : ICXWriteResPair<WriteFAdd64Z, [ICXPort05], 4, [1], 1, 7>;
266 defm : ICXWriteResPair<WriteFCmp, [ICXPort01], 4, [1], 1, 5>; // Floating point compare.
267 defm : ICXWriteResPair<WriteFCmpX, [ICXPort01], 4, [1], 1, 6>;
268 defm : ICXWriteResPair<WriteFCmpY, [ICXPort01], 4, [1], 1, 7>;
269 defm : ICXWriteResPair<WriteFCmpZ, [ICXPort05], 4, [1], 1, 7>;
270 defm : ICXWriteResPair<WriteFCmp64, [ICXPort01], 4, [1], 1, 5>; // Floating point double compare.
271 defm : ICXWriteResPair<WriteFCmp64X, [ICXPort01], 4, [1], 1, 6>;
272 defm : ICXWriteResPair<WriteFCmp64Y, [ICXPort01], 4, [1], 1, 7>;
273 defm : ICXWriteResPair<WriteFCmp64Z, [ICXPort05], 4, [1], 1, 7>;
275 defm : ICXWriteResPair<WriteFCom, [ICXPort0], 2>; // Floating point compare to flags (X87).
276 defm : ICXWriteResPair<WriteFComX, [ICXPort0], 2>; // Floating point compare to flags (SSE).
278 defm : ICXWriteResPair<WriteFMul, [ICXPort01], 4, [1], 1, 5>; // Floating point multiplication.
279 defm : ICXWriteResPair<WriteFMulX, [ICXPort01], 4, [1], 1, 6>;
280 defm : ICXWriteResPair<WriteFMulY, [ICXPort01], 4, [1], 1, 7>;
281 defm : ICXWriteResPair<WriteFMulZ, [ICXPort05], 4, [1], 1, 7>;
282 defm : ICXWriteResPair<WriteFMul64, [ICXPort01], 4, [1], 1, 5>; // Floating point double multiplication.
283 defm : ICXWriteResPair<WriteFMul64X, [ICXPort01], 4, [1], 1, 6>;
284 defm : ICXWriteResPair<WriteFMul64Y, [ICXPort01], 4, [1], 1, 7>;
285 defm : ICXWriteResPair<WriteFMul64Z, [ICXPort05], 4, [1], 1, 7>;
287 defm : ICXWriteResPair<WriteFDiv, [ICXPort0,ICXFPDivider], 11, [1,3], 1, 5>; // 10-14 cycles. // Floating point division.
288 defm : ICXWriteResPair<WriteFDivX, [ICXPort0,ICXFPDivider], 11, [1,3], 1, 6>; // 10-14 cycles.
289 defm : ICXWriteResPair<WriteFDivY, [ICXPort0,ICXFPDivider], 11, [1,5], 1, 7>; // 10-14 cycles.
290 defm : ICXWriteResPair<WriteFDivZ, [ICXPort0,ICXPort5,ICXFPDivider], 18, [2,1,10], 3, 7>; // 10-14 cycles.
291 defm : ICXWriteResPair<WriteFDiv64, [ICXPort0,ICXFPDivider], 14, [1,4], 1, 5>; // 10-14 cycles. // Floating point division.
292 defm : ICXWriteResPair<WriteFDiv64X, [ICXPort0,ICXFPDivider], 14, [1,4], 1, 6>; // 10-14 cycles.
293 defm : ICXWriteResPair<WriteFDiv64Y, [ICXPort0,ICXFPDivider], 14, [1,8], 1, 7>; // 10-14 cycles.
294 defm : ICXWriteResPair<WriteFDiv64Z, [ICXPort0,ICXPort5,ICXFPDivider], 23, [2,1,16], 3, 7>; // 10-14 cycles.
296 defm : ICXWriteResPair<WriteFSqrt, [ICXPort0,ICXFPDivider], 12, [1,3], 1, 5>; // Floating point square root.
297 defm : ICXWriteResPair<WriteFSqrtX, [ICXPort0,ICXFPDivider], 12, [1,3], 1, 6>;
298 defm : ICXWriteResPair<WriteFSqrtY, [ICXPort0,ICXFPDivider], 12, [1,6], 1, 7>;
299 defm : ICXWriteResPair<WriteFSqrtZ, [ICXPort0,ICXPort5,ICXFPDivider], 20, [2,1,12], 3, 7>;
300 defm : ICXWriteResPair<WriteFSqrt64, [ICXPort0,ICXFPDivider], 18, [1,6], 1, 5>; // Floating point double square root.
301 defm : ICXWriteResPair<WriteFSqrt64X, [ICXPort0,ICXFPDivider], 18, [1,6], 1, 6>;
302 defm : ICXWriteResPair<WriteFSqrt64Y, [ICXPort0,ICXFPDivider], 18, [1,12],1, 7>;
303 defm : ICXWriteResPair<WriteFSqrt64Z, [ICXPort0,ICXPort5,ICXFPDivider], 32, [2,1,24], 3, 7>;
304 defm : ICXWriteResPair<WriteFSqrt80, [ICXPort0,ICXFPDivider], 21, [1,7]>; // Floating point long double square root.
306 defm : ICXWriteResPair<WriteFRcp, [ICXPort0], 4, [1], 1, 5>; // Floating point reciprocal estimate.
307 defm : ICXWriteResPair<WriteFRcpX, [ICXPort0], 4, [1], 1, 6>;
308 defm : ICXWriteResPair<WriteFRcpY, [ICXPort0], 4, [1], 1, 7>;
309 defm : ICXWriteResPair<WriteFRcpZ, [ICXPort0,ICXPort5], 4, [2,1], 3, 7>;
311 defm : ICXWriteResPair<WriteFRsqrt, [ICXPort0], 4, [1], 1, 5>; // Floating point reciprocal square root estimate.
312 defm : ICXWriteResPair<WriteFRsqrtX,[ICXPort0], 4, [1], 1, 6>;
313 defm : ICXWriteResPair<WriteFRsqrtY,[ICXPort0], 4, [1], 1, 7>;
314 defm : ICXWriteResPair<WriteFRsqrtZ,[ICXPort0,ICXPort5], 9, [2,1], 3, 7>;
316 defm : ICXWriteResPair<WriteFMA, [ICXPort01], 4, [1], 1, 5>; // Fused Multiply Add.
317 defm : ICXWriteResPair<WriteFMAX, [ICXPort01], 4, [1], 1, 6>;
318 defm : ICXWriteResPair<WriteFMAY, [ICXPort01], 4, [1], 1, 7>;
319 defm : ICXWriteResPair<WriteFMAZ, [ICXPort0], 4, [1], 1, 7>;
320 defm : ICXWriteResPair<WriteDPPD, [ICXPort5,ICXPort015], 9, [1,2], 3, 6>; // Floating point double dot product.
321 defm : ICXWriteResPair<WriteDPPS, [ICXPort5,ICXPort015], 13, [1,3], 4, 6>;
322 defm : ICXWriteResPair<WriteDPPSY,[ICXPort5,ICXPort015], 13, [1,3], 4, 7>;
323 defm : ICXWriteResPair<WriteFSign, [ICXPort0], 1>; // Floating point fabs/fchs.
324 defm : ICXWriteResPair<WriteFRnd, [ICXPort01], 8, [2], 2, 6>; // Floating point rounding.
325 defm : ICXWriteResPair<WriteFRndY, [ICXPort01], 8, [2], 2, 7>;
326 defm : ICXWriteResPair<WriteFRndZ, [ICXPort05], 8, [2], 2, 7>;
327 defm : ICXWriteResPair<WriteFLogic, [ICXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
328 defm : ICXWriteResPair<WriteFLogicY, [ICXPort015], 1, [1], 1, 7>;
329 defm : ICXWriteResPair<WriteFLogicZ, [ICXPort05], 1, [1], 1, 7>;
330 defm : ICXWriteResPair<WriteFTest, [ICXPort0], 2, [1], 1, 6>; // Floating point TEST instructions.
331 defm : ICXWriteResPair<WriteFTestY, [ICXPort0], 2, [1], 1, 7>;
332 defm : ICXWriteResPair<WriteFTestZ, [ICXPort0], 2, [1], 1, 7>;
333 defm : ICXWriteResPair<WriteFShuffle, [ICXPort15], 1, [1], 1, 6>; // Floating point vector shuffles.
334 defm : ICXWriteResPair<WriteFShuffleY, [ICXPort15], 1, [1], 1, 7>;
335 defm : ICXWriteResPair<WriteFShuffleZ, [ICXPort5], 1, [1], 1, 7>;
336 defm : ICXWriteResPair<WriteFVarShuffle, [ICXPort15], 1, [1], 1, 6>; // Floating point vector variable shuffles.
337 defm : ICXWriteResPair<WriteFVarShuffleY, [ICXPort15], 1, [1], 1, 7>;
338 defm : ICXWriteResPair<WriteFVarShuffleZ, [ICXPort5], 1, [1], 1, 7>;
339 defm : ICXWriteResPair<WriteFBlend, [ICXPort015], 1, [1], 1, 6>; // Floating point vector blends.
340 defm : ICXWriteResPair<WriteFBlendY,[ICXPort015], 1, [1], 1, 7>;
341 defm : ICXWriteResPair<WriteFBlendZ,[ICXPort015], 1, [1], 1, 7>;
342 defm : ICXWriteResPair<WriteFVarBlend, [ICXPort015], 2, [2], 2, 6>; // Fp vector variable blends.
343 defm : ICXWriteResPair<WriteFVarBlendY,[ICXPort015], 2, [2], 2, 7>;
344 defm : ICXWriteResPair<WriteFVarBlendZ,[ICXPort015], 2, [2], 2, 7>;
346 // FMA Scheduling helper class.
347 // class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
349 // Vector integer operations.
350 defm : X86WriteRes<WriteVecLoad, [ICXPort23], 5, [1], 1>;
351 defm : X86WriteRes<WriteVecLoadX, [ICXPort23], 6, [1], 1>;
352 defm : X86WriteRes<WriteVecLoadY, [ICXPort23], 7, [1], 1>;
353 defm : X86WriteRes<WriteVecLoadNT, [ICXPort23], 6, [1], 1>;
354 defm : X86WriteRes<WriteVecLoadNTY, [ICXPort23], 7, [1], 1>;
355 defm : X86WriteRes<WriteVecMaskedLoad, [ICXPort23,ICXPort015], 7, [1,1], 2>;
356 defm : X86WriteRes<WriteVecMaskedLoadY, [ICXPort23,ICXPort015], 8, [1,1], 2>;
357 defm : X86WriteRes<WriteVecStore, [ICXPort78,ICXPort49], 1, [1,1], 2>;
358 defm : X86WriteRes<WriteVecStoreX, [ICXPort78,ICXPort49], 1, [1,1], 2>;
359 defm : X86WriteRes<WriteVecStoreY, [ICXPort78,ICXPort49], 1, [1,1], 2>;
360 defm : X86WriteRes<WriteVecStoreNT, [ICXPort78,ICXPort49], 1, [1,1], 2>;
361 defm : X86WriteRes<WriteVecStoreNTY, [ICXPort78,ICXPort49], 1, [1,1], 2>;
362 defm : X86WriteRes<WriteVecMaskedStore32, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 2>;
363 defm : X86WriteRes<WriteVecMaskedStore32Y, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 2>;
364 defm : X86WriteRes<WriteVecMaskedStore64, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 2>;
365 defm : X86WriteRes<WriteVecMaskedStore64Y, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 2>;
366 defm : X86WriteRes<WriteVecMove, [ICXPort05], 1, [1], 1>;
367 defm : X86WriteRes<WriteVecMoveX, [ICXPort015], 1, [1], 1>;
368 defm : X86WriteRes<WriteVecMoveY, [ICXPort015], 1, [1], 1>;
369 defm : X86WriteRes<WriteVecMoveZ, [ICXPort05], 1, [1], 1>;
370 defm : X86WriteRes<WriteVecMoveToGpr, [ICXPort0], 2, [1], 1>;
371 defm : X86WriteRes<WriteVecMoveFromGpr, [ICXPort5], 1, [1], 1>;
373 defm : ICXWriteResPair<WriteVecALU, [ICXPort05], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
374 defm : ICXWriteResPair<WriteVecALUX, [ICXPort01], 1, [1], 1, 6>;
375 defm : ICXWriteResPair<WriteVecALUY, [ICXPort01], 1, [1], 1, 7>;
376 defm : ICXWriteResPair<WriteVecALUZ, [ICXPort0], 1, [1], 1, 7>;
377 defm : ICXWriteResPair<WriteVecLogic, [ICXPort05], 1, [1], 1, 5>; // Vector integer and/or/xor.
378 defm : ICXWriteResPair<WriteVecLogicX,[ICXPort015], 1, [1], 1, 6>;
379 defm : ICXWriteResPair<WriteVecLogicY,[ICXPort015], 1, [1], 1, 7>;
380 defm : ICXWriteResPair<WriteVecLogicZ,[ICXPort05], 1, [1], 1, 7>;
381 defm : ICXWriteResPair<WriteVecTest, [ICXPort0,ICXPort5], 3, [1,1], 2, 6>; // Vector integer TEST instructions.
382 defm : ICXWriteResPair<WriteVecTestY, [ICXPort0,ICXPort5], 3, [1,1], 2, 7>;
383 defm : ICXWriteResPair<WriteVecTestZ, [ICXPort0,ICXPort5], 3, [1,1], 2, 7>;
384 defm : ICXWriteResPair<WriteVecIMul, [ICXPort0], 5, [1], 1, 5>; // Vector integer multiply.
385 defm : ICXWriteResPair<WriteVecIMulX, [ICXPort01], 5, [1], 1, 6>;
386 defm : ICXWriteResPair<WriteVecIMulY, [ICXPort01], 5, [1], 1, 7>;
387 defm : ICXWriteResPair<WriteVecIMulZ, [ICXPort05], 5, [1], 1, 7>;
388 defm : ICXWriteResPair<WritePMULLD, [ICXPort01], 10, [2], 2, 6>; // Vector PMULLD.
389 defm : ICXWriteResPair<WritePMULLDY, [ICXPort01], 10, [2], 2, 7>;
390 defm : ICXWriteResPair<WritePMULLDZ, [ICXPort05], 10, [2], 2, 7>;
391 defm : ICXWriteResPair<WriteShuffle, [ICXPort5], 1, [1], 1, 5>; // Vector shuffles.
392 defm : ICXWriteResPair<WriteShuffleX, [ICXPort15], 1, [1], 1, 6>;
393 defm : ICXWriteResPair<WriteShuffleY, [ICXPort15], 1, [1], 1, 7>;
394 defm : ICXWriteResPair<WriteShuffleZ, [ICXPort5], 1, [1], 1, 7>;
395 defm : ICXWriteResPair<WriteVarShuffle, [ICXPort5], 1, [1], 1, 5>; // Vector variable shuffles.
396 defm : ICXWriteResPair<WriteVarShuffleX, [ICXPort15], 1, [1], 1, 6>;
397 defm : ICXWriteResPair<WriteVarShuffleY, [ICXPort15], 1, [1], 1, 7>;
398 defm : ICXWriteResPair<WriteVarShuffleZ, [ICXPort5], 1, [1], 1, 7>;
399 defm : ICXWriteResPair<WriteBlend, [ICXPort5], 1, [1], 1, 6>; // Vector blends.
400 defm : ICXWriteResPair<WriteBlendY,[ICXPort5], 1, [1], 1, 7>;
401 defm : ICXWriteResPair<WriteBlendZ,[ICXPort5], 1, [1], 1, 7>;
402 defm : ICXWriteResPair<WriteVarBlend, [ICXPort015], 2, [2], 2, 6>; // Vector variable blends.
403 defm : ICXWriteResPair<WriteVarBlendY,[ICXPort015], 2, [2], 2, 6>;
404 defm : ICXWriteResPair<WriteVarBlendZ,[ICXPort05], 2, [1], 1, 6>;
405 defm : ICXWriteResPair<WriteMPSAD, [ICXPort5], 4, [2], 2, 6>; // Vector MPSAD.
406 defm : ICXWriteResPair<WriteMPSADY, [ICXPort5], 4, [2], 2, 7>;
407 defm : ICXWriteResPair<WriteMPSADZ, [ICXPort5], 4, [2], 2, 7>;
408 defm : ICXWriteResPair<WritePSADBW, [ICXPort5], 3, [1], 1, 5>; // Vector PSADBW.
409 defm : ICXWriteResPair<WritePSADBWX, [ICXPort5], 3, [1], 1, 6>;
410 defm : ICXWriteResPair<WritePSADBWY, [ICXPort5], 3, [1], 1, 7>;
411 defm : ICXWriteResPair<WritePSADBWZ, [ICXPort5], 3, [1], 1, 7>; // TODO: 512-bit ops require ports 0/1 to be joined.
412 defm : ICXWriteResPair<WritePHMINPOS, [ICXPort0], 4, [1], 1, 6>; // Vector PHMINPOS.
414 // Vector integer shifts.
415 defm : ICXWriteResPair<WriteVecShift, [ICXPort0], 1, [1], 1, 5>;
416 defm : X86WriteRes<WriteVecShiftX, [ICXPort5,ICXPort01], 2, [1,1], 2>;
417 defm : X86WriteRes<WriteVecShiftY, [ICXPort5,ICXPort01], 4, [1,1], 2>;
418 defm : X86WriteRes<WriteVecShiftZ, [ICXPort5,ICXPort0], 4, [1,1], 2>;
419 defm : X86WriteRes<WriteVecShiftXLd, [ICXPort01,ICXPort23], 7, [1,1], 2>;
420 defm : X86WriteRes<WriteVecShiftYLd, [ICXPort01,ICXPort23], 8, [1,1], 2>;
421 defm : X86WriteRes<WriteVecShiftZLd, [ICXPort0,ICXPort23], 8, [1,1], 2>;
423 defm : ICXWriteResPair<WriteVecShiftImm, [ICXPort0], 1, [1], 1, 5>;
424 defm : ICXWriteResPair<WriteVecShiftImmX, [ICXPort01], 1, [1], 1, 6>; // Vector integer immediate shifts.
425 defm : ICXWriteResPair<WriteVecShiftImmY, [ICXPort01], 1, [1], 1, 7>;
426 defm : ICXWriteResPair<WriteVecShiftImmZ, [ICXPort0], 1, [1], 1, 7>;
427 defm : ICXWriteResPair<WriteVarVecShift, [ICXPort01], 1, [1], 1, 6>; // Variable vector shifts.
428 defm : ICXWriteResPair<WriteVarVecShiftY, [ICXPort01], 1, [1], 1, 7>;
429 defm : ICXWriteResPair<WriteVarVecShiftZ, [ICXPort0], 1, [1], 1, 7>;
431 // Vector insert/extract operations.
432 def : WriteRes<WriteVecInsert, [ICXPort5]> {
435 let ReleaseAtCycles = [2];
437 def : WriteRes<WriteVecInsertLd, [ICXPort5,ICXPort23]> {
441 def: InstRW<[WriteVecInsertLd], (instregex "(V?)MOV(H|L)(PD|PS)rm")>;
443 def : WriteRes<WriteVecExtract, [ICXPort0,ICXPort5]> {
447 def : WriteRes<WriteVecExtractSt, [ICXPort49,ICXPort5,ICXPort78]> {
452 // Conversion between integer and float.
453 defm : ICXWriteResPair<WriteCvtSS2I, [ICXPort01], 6, [2], 2>; // Needs more work: DD vs DQ.
454 defm : ICXWriteResPair<WriteCvtPS2I, [ICXPort01], 3>;
455 defm : ICXWriteResPair<WriteCvtPS2IY, [ICXPort01], 3>;
456 defm : ICXWriteResPair<WriteCvtPS2IZ, [ICXPort05], 3>;
457 defm : ICXWriteResPair<WriteCvtSD2I, [ICXPort01], 6, [2], 2>;
458 defm : ICXWriteResPair<WriteCvtPD2I, [ICXPort01], 3>;
459 defm : ICXWriteResPair<WriteCvtPD2IY, [ICXPort01], 3>;
460 defm : ICXWriteResPair<WriteCvtPD2IZ, [ICXPort05], 3>;
462 defm : ICXWriteResPair<WriteCvtI2SS, [ICXPort1], 4>;
463 defm : ICXWriteResPair<WriteCvtI2PS, [ICXPort01], 4>;
464 defm : ICXWriteResPair<WriteCvtI2PSY, [ICXPort01], 4>;
465 defm : ICXWriteResPair<WriteCvtI2PSZ, [ICXPort05], 4>; // Needs more work: DD vs DQ.
466 defm : ICXWriteResPair<WriteCvtI2SD, [ICXPort1], 4>;
467 defm : ICXWriteResPair<WriteCvtI2PD, [ICXPort01], 4>;
468 defm : ICXWriteResPair<WriteCvtI2PDY, [ICXPort01], 4>;
469 defm : ICXWriteResPair<WriteCvtI2PDZ, [ICXPort05], 4>;
471 defm : ICXWriteResPair<WriteCvtSS2SD, [ICXPort1], 3>;
472 defm : ICXWriteResPair<WriteCvtPS2PD, [ICXPort1], 3>;
473 defm : ICXWriteResPair<WriteCvtPS2PDY, [ICXPort5,ICXPort01], 3, [1,1], 2>;
474 defm : ICXWriteResPair<WriteCvtPS2PDZ, [ICXPort05], 3, [2], 2>;
475 defm : ICXWriteResPair<WriteCvtSD2SS, [ICXPort5,ICXPort01], 5, [1,1], 2, 5>;
476 defm : ICXWriteResPair<WriteCvtPD2PS, [ICXPort5,ICXPort01], 5, [1,1], 2, 6>;
477 defm : ICXWriteResPair<WriteCvtPD2PSY, [ICXPort5,ICXPort01], 7, [1,1], 2, 7>;
478 defm : ICXWriteResPair<WriteCvtPD2PSZ, [ICXPort5,ICXPort0], 7, [1,1], 2, 7>;
480 defm : X86WriteRes<WriteCvtPH2PS, [ICXPort5,ICXPort01], 5, [1,1], 2>;
481 defm : X86WriteRes<WriteCvtPH2PSY, [ICXPort5,ICXPort01], 7, [1,1], 2>;
482 defm : X86WriteRes<WriteCvtPH2PSZ, [ICXPort5,ICXPort0], 7, [1,1], 2>;
483 defm : X86WriteRes<WriteCvtPH2PSLd, [ICXPort23,ICXPort01], 9, [1,1], 2>;
484 defm : X86WriteRes<WriteCvtPH2PSYLd, [ICXPort23,ICXPort01], 10, [1,1], 2>;
485 defm : X86WriteRes<WriteCvtPH2PSZLd, [ICXPort23,ICXPort05], 10, [1,1], 2>;
487 defm : X86WriteRes<WriteCvtPS2PH, [ICXPort5,ICXPort01], 5, [1,1], 2>;
488 defm : X86WriteRes<WriteCvtPS2PHY, [ICXPort5,ICXPort01], 7, [1,1], 2>;
489 defm : X86WriteRes<WriteCvtPS2PHZ, [ICXPort5,ICXPort05], 7, [1,1], 2>;
490 defm : X86WriteRes<WriteCvtPS2PHSt, [ICXPort49,ICXPort5,ICXPort78,ICXPort01], 6, [1,1,1,1], 4>;
491 defm : X86WriteRes<WriteCvtPS2PHYSt, [ICXPort49,ICXPort5,ICXPort78,ICXPort01], 8, [1,1,1,1], 4>;
492 defm : X86WriteRes<WriteCvtPS2PHZSt, [ICXPort49,ICXPort5,ICXPort78,ICXPort05], 8, [1,1,1,1], 4>;
494 // Strings instructions.
496 // Packed Compare Implicit Length Strings, Return Mask
497 def : WriteRes<WritePCmpIStrM, [ICXPort0]> {
500 let ReleaseAtCycles = [3];
502 def : WriteRes<WritePCmpIStrMLd, [ICXPort0, ICXPort23]> {
505 let ReleaseAtCycles = [3,1];
508 // Packed Compare Explicit Length Strings, Return Mask
509 def : WriteRes<WritePCmpEStrM, [ICXPort0, ICXPort5, ICXPort015, ICXPort0156]> {
512 let ReleaseAtCycles = [4,3,1,1];
514 def : WriteRes<WritePCmpEStrMLd, [ICXPort0, ICXPort5, ICXPort23, ICXPort015, ICXPort0156]> {
516 let NumMicroOps = 10;
517 let ReleaseAtCycles = [4,3,1,1,1];
520 // Packed Compare Implicit Length Strings, Return Index
521 def : WriteRes<WritePCmpIStrI, [ICXPort0]> {
524 let ReleaseAtCycles = [3];
526 def : WriteRes<WritePCmpIStrILd, [ICXPort0, ICXPort23]> {
529 let ReleaseAtCycles = [3,1];
532 // Packed Compare Explicit Length Strings, Return Index
533 def : WriteRes<WritePCmpEStrI, [ICXPort0,ICXPort5,ICXPort0156]> {
536 let ReleaseAtCycles = [4,3,1];
538 def : WriteRes<WritePCmpEStrILd, [ICXPort0, ICXPort5, ICXPort23, ICXPort0156]> {
541 let ReleaseAtCycles = [4,3,1,1];
544 // MOVMSK Instructions.
545 def : WriteRes<WriteFMOVMSK, [ICXPort0]> { let Latency = 2; }
546 def : WriteRes<WriteVecMOVMSK, [ICXPort0]> { let Latency = 2; }
547 def : WriteRes<WriteVecMOVMSKY, [ICXPort0]> { let Latency = 2; }
548 def : WriteRes<WriteMMXMOVMSK, [ICXPort0]> { let Latency = 2; }
551 def : WriteRes<WriteAESDecEnc, [ICXPort0]> { // Decryption, encryption.
554 let ReleaseAtCycles = [1];
556 def : WriteRes<WriteAESDecEncLd, [ICXPort0, ICXPort23]> {
559 let ReleaseAtCycles = [1,1];
562 def : WriteRes<WriteAESIMC, [ICXPort0]> { // InvMixColumn.
565 let ReleaseAtCycles = [2];
567 def : WriteRes<WriteAESIMCLd, [ICXPort0, ICXPort23]> {
570 let ReleaseAtCycles = [2,1];
573 def : WriteRes<WriteAESKeyGen, [ICXPort0,ICXPort5,ICXPort015]> { // Key Generation.
575 let NumMicroOps = 11;
576 let ReleaseAtCycles = [3,6,2];
578 def : WriteRes<WriteAESKeyGenLd, [ICXPort0,ICXPort5,ICXPort23,ICXPort015]> {
580 let NumMicroOps = 11;
581 let ReleaseAtCycles = [3,6,1,1];
584 // Carry-less multiplication instructions.
585 def : WriteRes<WriteCLMul, [ICXPort5]> {
588 let ReleaseAtCycles = [1];
590 def : WriteRes<WriteCLMulLd, [ICXPort5, ICXPort23]> {
593 let ReleaseAtCycles = [1,1];
596 // Catch-all for expensive system instructions.
597 def : WriteRes<WriteSystem, [ICXPort0156]> { let Latency = 100; } // def WriteSystem : SchedWrite;
600 defm : ICXWriteResPair<WriteFShuffle256, [ICXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector shuffles.
601 defm : ICXWriteResPair<WriteFVarShuffle256, [ICXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector variable shuffles.
602 defm : ICXWriteResPair<WriteShuffle256, [ICXPort5], 3, [1], 1, 7>; // 256-bit width vector shuffles.
603 defm : ICXWriteResPair<WriteVPMOV256, [ICXPort5], 3, [1], 1, 7>; // 256-bit width packed vector width-changing move.
604 defm : ICXWriteResPair<WriteVarShuffle256, [ICXPort5], 3, [1], 1, 7>; // 256-bit width vector variable shuffles.
606 // Old microcoded instructions that nobody use.
607 def : WriteRes<WriteMicrocoded, [ICXPort0156]> { let Latency = 100; } // def WriteMicrocoded : SchedWrite;
609 // Fence instructions.
610 def : WriteRes<WriteFence, [ICXPort78, ICXPort49]>;
613 def : WriteRes<WriteLDMXCSR, [ICXPort0,ICXPort23,ICXPort0156]> { let Latency = 7; let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; }
614 def : WriteRes<WriteSTMXCSR, [ICXPort49,ICXPort5,ICXPort78]> { let Latency = 2; let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; }
616 // Nop, not very useful expect it provides a model for nops!
617 def : WriteRes<WriteNop, []>;
619 ////////////////////////////////////////////////////////////////////////////////
620 // Horizontal add/sub instructions.
621 ////////////////////////////////////////////////////////////////////////////////
623 defm : ICXWriteResPair<WriteFHAdd, [ICXPort5,ICXPort015], 6, [2,1], 3, 6>;
624 defm : ICXWriteResPair<WriteFHAddY, [ICXPort5,ICXPort015], 6, [2,1], 3, 7>;
625 defm : ICXWriteResPair<WritePHAdd, [ICXPort5,ICXPort05], 3, [2,1], 3, 5>;
626 defm : ICXWriteResPair<WritePHAddX, [ICXPort5,ICXPort015], 3, [2,1], 3, 6>;
627 defm : ICXWriteResPair<WritePHAddY, [ICXPort5,ICXPort015], 3, [2,1], 3, 7>;
631 def ICXWriteResGroup1 : SchedWriteRes<[ICXPort0]> {
634 let ReleaseAtCycles = [1];
636 def: InstRW<[ICXWriteResGroup1], (instregex "KAND(B|D|Q|W)rr",
643 "KSET0(B|D|Q|W)", // Same as KXOR
644 "KSET1(B|D|Q|W)", // Same as KXNOR
648 "MMX_PCMPEQ(B|D|W)rr",
649 "MMX_PCMPGT(B|D|W)rr",
650 "MMX_P(MAX|MIN)SWrr",
651 "MMX_P(MAX|MIN)UBrr",
654 "VPMOVB2M(Z|Z128|Z256)rr",
655 "VPMOVD2M(Z|Z128|Z256)rr",
656 "VPMOVQ2M(Z|Z128|Z256)rr",
657 "VPMOVW2M(Z|Z128|Z256)rr")>;
659 def ICXWriteResGroup3 : SchedWriteRes<[ICXPort5]> {
662 let ReleaseAtCycles = [1];
664 def: InstRW<[ICXWriteResGroup3], (instregex "COM(P?)_FST0r",
667 "VPBROADCAST(D|Q)rr",
668 "(V?)INSERTPS(Z?)rr",
669 "(V?)MOV(HL|LH)PS(Z?)rr",
670 "(V?)MOVDDUP(Y|Z128|Z256)?rr",
671 "(V?)PALIGNR(Y|Z128|Z256)?rri",
672 "(V?)PERMIL(PD|PS)(Y|Z128|Z256)?ri",
673 "(V?)PERMIL(PD|PS)(Y|Z128|Z256)?rr",
674 "(V?)UNPCK(L|H)(PD|PS)(Y|Z128|Z256)?rr")>;
676 def ICXWriteResGroup4 : SchedWriteRes<[ICXPort6]> {
679 let ReleaseAtCycles = [1];
681 def: InstRW<[ICXWriteResGroup4], (instregex "JMP(16|32|64)r")>;
683 def ICXWriteResGroup6 : SchedWriteRes<[ICXPort05]> {
686 let ReleaseAtCycles = [1];
688 def: InstRW<[ICXWriteResGroup6], (instrs FINCSTP, FNOP)>;
690 def ICXWriteResGroup7 : SchedWriteRes<[ICXPort06]> {
693 let ReleaseAtCycles = [1];
695 def: InstRW<[ICXWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>;
697 def ICXWriteResGroup8 : SchedWriteRes<[ICXPort15]> {
700 let ReleaseAtCycles = [1];
702 def: InstRW<[ICXWriteResGroup8], (instregex "ANDN(32|64)rr")>;
704 def ICXWriteResGroup9 : SchedWriteRes<[ICXPort015]> {
707 let ReleaseAtCycles = [1];
709 def: InstRW<[ICXWriteResGroup9], (instregex "VBLENDMPD(Z128|Z256)rr",
710 "VBLENDMPS(Z128|Z256)rr",
711 "VPADD(B|D|Q|W)(Y|Z|Z128|Z256)rr",
712 "(V?)PADD(B|D|Q|W)rr",
713 "(V?)MOV(SD|SS)(Z?)rr",
715 "VPBLENDMB(Z128|Z256)rr",
716 "VPBLENDMD(Z128|Z256)rr",
717 "VPBLENDMQ(Z128|Z256)rr",
718 "VPBLENDMW(Z128|Z256)rr",
719 "VPSUB(B|D|Q|W)(Y|Z|Z128|Z256)rrk",
720 "VPTERNLOGD(Z|Z128|Z256)rri",
721 "VPTERNLOGQ(Z|Z128|Z256)rri")>;
723 def ICXWriteResGroup10 : SchedWriteRes<[ICXPort0156]> {
726 let ReleaseAtCycles = [1];
728 def: InstRW<[ICXWriteResGroup10], (instrs SGDT64m,
734 def ICXWriteResGroup11 : SchedWriteRes<[ICXPort49,ICXPort78]> {
737 let ReleaseAtCycles = [1,1];
739 def: InstRW<[ICXWriteResGroup11], (instrs FBSTPm, VMPTRSTm)>;
740 def: InstRW<[ICXWriteResGroup11], (instregex "KMOV(B|D|Q|W)mk",
741 "ST_FP(32|64|80)m")>;
743 def ICXWriteResGroup13 : SchedWriteRes<[ICXPort5]> {
746 let ReleaseAtCycles = [2];
748 def: InstRW<[ICXWriteResGroup13], (instrs MMX_MOVQ2DQrr)>;
750 def ICXWriteResGroup14 : SchedWriteRes<[ICXPort05]> {
753 let ReleaseAtCycles = [2];
755 def: InstRW<[ICXWriteResGroup14], (instrs FDECSTP,
758 def ICXWriteResGroup17 : SchedWriteRes<[ICXPort0156]> {
761 let ReleaseAtCycles = [2];
763 def: InstRW<[ICXWriteResGroup17], (instrs LFENCE,
767 def ICXWriteResGroup20 : SchedWriteRes<[ICXPort6,ICXPort0156]> {
770 let ReleaseAtCycles = [1,1];
772 def: InstRW<[ICXWriteResGroup20], (instregex "CLFLUSH")>;
774 def ICXWriteResGroup21 : SchedWriteRes<[ICXPort49,ICXPort78]> {
777 let ReleaseAtCycles = [1,1];
779 def: InstRW<[ICXWriteResGroup21], (instrs SFENCE)>;
781 def ICXWriteResGroup23 : SchedWriteRes<[ICXPort06,ICXPort0156]> {
784 let ReleaseAtCycles = [1,1];
786 def: InstRW<[ICXWriteResGroup23], (instrs CWD,
791 ADC64i32, SBB64i32)>;
793 def ICXWriteResGroup25 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort78]> {
796 let ReleaseAtCycles = [1,1,1];
798 def: InstRW<[ICXWriteResGroup25], (instrs FNSTCW16m)>;
800 def ICXWriteResGroup27 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort15]> {
803 let ReleaseAtCycles = [1,1,1];
805 def: InstRW<[ICXWriteResGroup27], (instregex "MOVBE(16|32|64)mr")>;
807 def ICXWriteResGroup28 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort0156]> {
810 let ReleaseAtCycles = [1,1,1];
812 def: InstRW<[ICXWriteResGroup28], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8,
813 STOSB, STOSL, STOSQ, STOSW)>;
814 def: InstRW<[ICXWriteResGroup28], (instregex "PUSH(16|32|64)rmr")>;
816 def ICXWriteResGroup29 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort15]> {
819 let ReleaseAtCycles = [2,2,1];
821 def: InstRW<[ICXWriteResGroup29], (instregex "VMOVDQU8Zmr(b?)")>;
823 def ICXWriteResGroup30 : SchedWriteRes<[ICXPort0]> {
826 let ReleaseAtCycles = [1];
828 def: InstRW<[ICXWriteResGroup30], (instregex "KMOV(B|D|Q|W)rk",
829 "KORTEST(B|D|Q|W)rr",
830 "KTEST(B|D|Q|W)rr")>;
832 def ICXWriteResGroup31 : SchedWriteRes<[ICXPort1]> {
835 let ReleaseAtCycles = [1];
837 def: InstRW<[ICXWriteResGroup31], (instregex "PDEP(32|64)rr",
840 def ICXWriteResGroup32 : SchedWriteRes<[ICXPort5]> {
843 let ReleaseAtCycles = [1];
845 def: InstRW<[ICXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)",
846 "VALIGND(Z|Z128|Z256)rri",
847 "VALIGNQ(Z|Z128|Z256)rri",
848 "VPBROADCAST(B|W)rr",
849 "(V?)PACK(U|S)S(DW|WB)(Y|Z|Z128|Z256)?rr",
850 "VP(MAX|MIN)(S|U)Q(Z|Z128|Z256)rr")>;
852 def ICXWriteResGroup33 : SchedWriteRes<[ICXPort5]> {
855 let ReleaseAtCycles = [1];
857 def: InstRW<[ICXWriteResGroup33], (instregex "KADD(B|D|Q|W)rr",
858 "KSHIFTL(B|D|Q|W)ri",
859 "KSHIFTR(B|D|Q|W)ri",
860 "KUNPCK(BW|DQ|WD)rr",
861 "VCMPPD(Z|Z128|Z256)rri",
862 "VCMPPS(Z|Z128|Z256)rri",
864 "VFPCLASS(PD|PS)(Z|Z128|Z256)rr",
865 "VFPCLASS(SD|SS)Zrr",
866 "VPCMPB(Z|Z128|Z256)rri",
867 "VPCMPD(Z|Z128|Z256)rri",
868 "VPCMPEQ(B|D|Q|W)(Z|Z128|Z256)rr",
869 "VPCMPGT(B|D|Q|W)(Z|Z128|Z256)rr",
870 "VPCMPQ(Z|Z128|Z256)rri",
871 "VPCMPU(B|D|Q|W)(Z|Z128|Z256)rri",
872 "VPCMPW(Z|Z128|Z256)rri",
873 "VPTEST(N?)M(B|D|Q|W)(Z|Z128|Z256)rr")>;
875 def ICXWriteResGroup34 : SchedWriteRes<[ICXPort0,ICXPort0156]> {
878 let ReleaseAtCycles = [1,1];
880 def: InstRW<[ICXWriteResGroup34], (instrs FNSTSW16r)>;
882 def ICXWriteResGroup37 : SchedWriteRes<[ICXPort0,ICXPort5]> {
885 let ReleaseAtCycles = [1,2];
887 def: InstRW<[ICXWriteResGroup37], (instregex "MMX_PH(ADD|SUB)SWrr")>;
889 def ICXWriteResGroup38 : SchedWriteRes<[ICXPort5,ICXPort01]> {
892 let ReleaseAtCycles = [2,1];
894 def: InstRW<[ICXWriteResGroup38], (instregex "(V?)PH(ADD|SUB)SW(Y?)rr")>;
896 def ICXWriteResGroup41 : SchedWriteRes<[ICXPort5,ICXPort0156]> {
899 let ReleaseAtCycles = [2,1];
901 def: InstRW<[ICXWriteResGroup41], (instrs MMX_PACKSSDWrr,
905 def ICXWriteResGroup42 : SchedWriteRes<[ICXPort6,ICXPort0156]> {
908 let ReleaseAtCycles = [1,2];
910 def: InstRW<[ICXWriteResGroup42], (instregex "CLD")>;
912 def ICXWriteResGroup43 : SchedWriteRes<[ICXPort49,ICXPort78]> {
915 let ReleaseAtCycles = [1,2];
917 def: InstRW<[ICXWriteResGroup43], (instrs MFENCE)>;
919 def ICXWriteResGroup44 : SchedWriteRes<[ICXPort06,ICXPort0156]> {
922 let ReleaseAtCycles = [1,2];
924 def: InstRW<[ICXWriteResGroup44], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
925 RCR8r1, RCR16r1, RCR32r1, RCR64r1)>;
927 def ICXWriteResGroup44b : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort0156]> {
930 let ReleaseAtCycles = [2,3,2];
932 def: InstRW<[ICXWriteResGroup44b], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>;
934 def ICXWriteResGroup44c : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort0156]> {
937 let ReleaseAtCycles = [2,3,2];
939 def: InstRW<[ICXWriteResGroup44c], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>;
941 def ICXWriteResGroup45 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort78]> {
944 let ReleaseAtCycles = [1,1,1];
946 def: InstRW<[ICXWriteResGroup45], (instrs FNSTSWm)>;
948 def ICXWriteResGroup47 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort78,ICXPort0156]> {
951 let ReleaseAtCycles = [1,1,1,1];
953 def: InstRW<[ICXWriteResGroup47], (instregex "CALL(16|32|64)r")>;
955 def ICXWriteResGroup48 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort06,ICXPort0156]> {
958 let ReleaseAtCycles = [1,1,1,1];
960 def: InstRW<[ICXWriteResGroup48], (instrs CALL64pcrel32)>;
962 def ICXWriteResGroup49 : SchedWriteRes<[ICXPort0]> {
965 let ReleaseAtCycles = [1];
967 def: InstRW<[ICXWriteResGroup49], (instregex "MUL_(FPrST0|FST0r|FrST0)")>;
969 def ICXWriteResGroup50 : SchedWriteRes<[ICXPort01]> {
972 let ReleaseAtCycles = [1];
974 def: InstRW<[ICXWriteResGroup50], (instregex "VCVTPD2QQ(Z128|Z256)rr",
975 "VCVTPD2UQQ(Z128|Z256)rr",
976 "VCVTPS2DQ(Y|Z128|Z256)rr",
978 "VCVTPS2UDQ(Z128|Z256)rr",
979 "VCVTTPD2QQ(Z128|Z256)rr",
980 "VCVTTPD2UQQ(Z128|Z256)rr",
981 "VCVTTPS2DQ(Z128|Z256)rr",
983 "VCVTTPS2UDQ(Z128|Z256)rr")>;
985 def ICXWriteResGroup50z : SchedWriteRes<[ICXPort05]> {
988 let ReleaseAtCycles = [1];
990 def: InstRW<[ICXWriteResGroup50z], (instrs VCVTPD2QQZrr,
999 def ICXWriteResGroup51 : SchedWriteRes<[ICXPort5]> {
1001 let NumMicroOps = 2;
1002 let ReleaseAtCycles = [2];
1004 def: InstRW<[ICXWriteResGroup51], (instregex "VEXPANDPD(Z|Z128|Z256)rr",
1005 "VEXPANDPS(Z|Z128|Z256)rr",
1006 "VPEXPANDD(Z|Z128|Z256)rr",
1007 "VPEXPANDQ(Z|Z128|Z256)rr",
1008 "VPMOVDB(Z|Z128|Z256)rr",
1009 "VPMOVDW(Z|Z128|Z256)rr",
1010 "VPMOVQB(Z|Z128|Z256)rr",
1011 "VPMOVQW(Z|Z128|Z256)rr",
1012 "VPMOVSDB(Z|Z128|Z256)rr",
1013 "VPMOVSDW(Z|Z128|Z256)rr",
1014 "VPMOVSQB(Z|Z128|Z256)rr",
1015 "VPMOVSQD(Z|Z128|Z256)rr",
1016 "VPMOVSQW(Z|Z128|Z256)rr",
1017 "VPMOVSWB(Z|Z128|Z256)rr",
1018 "VPMOVUSDB(Z|Z128|Z256)rr",
1019 "VPMOVUSDW(Z|Z128|Z256)rr",
1020 "VPMOVUSQB(Z|Z128|Z256)rr",
1021 "VPMOVUSQD(Z|Z128|Z256)rr",
1022 "VPMOVUSWB(Z|Z128|Z256)rr",
1023 "VPMOVWB(Z|Z128|Z256)rr")>;
1025 def ICXWriteResGroup54 : SchedWriteRes<[ICXPort49,ICXPort5,ICXPort78]> {
1027 let NumMicroOps = 3;
1028 let ReleaseAtCycles = [1,1,1];
1030 def: InstRW<[ICXWriteResGroup54], (instregex "IST(T?)_FP(16|32|64)m",
1032 "VPMOVQD(Z|Z128|Z256)mr(b?)")>;
1034 def ICXWriteResGroup55 : SchedWriteRes<[ICXPort0156]> {
1036 let NumMicroOps = 4;
1037 let ReleaseAtCycles = [4];
1039 def: InstRW<[ICXWriteResGroup55], (instrs FNCLEX)>;
1041 def ICXWriteResGroup56 : SchedWriteRes<[]> {
1043 let NumMicroOps = 4;
1044 let ReleaseAtCycles = [];
1046 def: InstRW<[ICXWriteResGroup56], (instrs VZEROUPPER)>;
1048 def ICXWriteResGroup57 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort0156]> {
1050 let NumMicroOps = 4;
1051 let ReleaseAtCycles = [1,1,2];
1053 def: InstRW<[ICXWriteResGroup57], (instregex "LAR(16|32|64)rr")>;
1055 def ICXWriteResGroup61 : SchedWriteRes<[ICXPort5,ICXPort01]> {
1057 let NumMicroOps = 2;
1058 let ReleaseAtCycles = [1,1];
1060 def: InstRW<[ICXWriteResGroup61], (instregex "MMX_CVT(T?)PD2PIrr",
1061 "MMX_CVT(T?)PS2PIrr",
1064 "(V?)CVT(T?)PD2DQrr",
1071 "(V?)CVTSI(64)?2SDrr",
1074 "VCVTSI(64)?2SDZrr",
1078 "VCVTTPD2UDQZ128rr",
1080 "VCVTTPS2UQQZ128rr",
1084 "VCVTUSI(64)?2SDZrr")>;
1086 def ICXWriteResGroup62 : SchedWriteRes<[ICXPort5,ICXPort015]> {
1088 let NumMicroOps = 3;
1089 let ReleaseAtCycles = [2,1];
1091 def: InstRW<[ICXWriteResGroup62], (instregex "VPCONFLICTQZ128rr")>;
1093 def ICXWriteResGroup63 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort06]> {
1095 let NumMicroOps = 3;
1096 let ReleaseAtCycles = [1,1,1];
1098 def: InstRW<[ICXWriteResGroup63], (instregex "STR(16|32|64)r")>;
1100 def ICXWriteResGroup65 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort01]> {
1102 let NumMicroOps = 3;
1103 let ReleaseAtCycles = [1,1,1];
1105 def: InstRW<[ICXWriteResGroup65], (instregex "VCVTPS2PHZ128mr(b?)",
1106 "VCVTPS2PHZ256mr(b?)",
1107 "VCVTPS2PHZmr(b?)")>;
1109 def ICXWriteResGroup66 : SchedWriteRes<[ICXPort49,ICXPort5,ICXPort78]> {
1111 let NumMicroOps = 4;
1112 let ReleaseAtCycles = [1,2,1];
1114 def: InstRW<[ICXWriteResGroup66], (instregex "VPMOVDB(Z|Z128|Z256)mr(b?)",
1115 "VPMOVDW(Z|Z128|Z256)mr(b?)",
1116 "VPMOVQB(Z|Z128|Z256)mr(b?)",
1117 "VPMOVQW(Z|Z128|Z256)mr(b?)",
1118 "VPMOVSDB(Z|Z128|Z256)mr(b?)",
1119 "VPMOVSDW(Z|Z128|Z256)mr(b?)",
1120 "VPMOVSQB(Z|Z128|Z256)mr(b?)",
1121 "VPMOVSQD(Z|Z128|Z256)mr(b?)",
1122 "VPMOVSQW(Z|Z128|Z256)mr(b?)",
1123 "VPMOVSWB(Z|Z128|Z256)mr(b?)",
1124 "VPMOVUSDB(Z|Z128|Z256)mr(b?)",
1125 "VPMOVUSDW(Z|Z128|Z256)mr(b?)",
1126 "VPMOVUSQB(Z|Z128|Z256)mr(b?)",
1127 "VPMOVUSQD(Z|Z128|Z256)mr(b?)",
1128 "VPMOVUSQW(Z|Z128|Z256)mr(b?)",
1129 "VPMOVUSWB(Z|Z128|Z256)mr(b?)",
1130 "VPMOVWB(Z|Z128|Z256)mr(b?)")>;
1132 def ICXWriteResGroup67 : SchedWriteRes<[ICXPort06,ICXPort0156]> {
1134 let NumMicroOps = 5;
1135 let ReleaseAtCycles = [1,4];
1137 def: InstRW<[ICXWriteResGroup67], (instrs XSETBV)>;
1139 def ICXWriteResGroup69 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort0156]> {
1141 let NumMicroOps = 6;
1142 let ReleaseAtCycles = [1,1,4];
1144 def: InstRW<[ICXWriteResGroup69], (instregex "PUSHF(16|64)")>;
1146 def ICXWriteResGroup71 : SchedWriteRes<[ICXPort23]> {
1148 let NumMicroOps = 1;
1149 let ReleaseAtCycles = [1];
1151 def: InstRW<[ICXWriteResGroup71], (instrs VBROADCASTSSrm,
1161 def ICXWriteResGroup72 : SchedWriteRes<[ICXPort5]> {
1163 let NumMicroOps = 2;
1164 let ReleaseAtCycles = [2];
1166 def: InstRW<[ICXWriteResGroup72], (instrs MMX_CVTPI2PSrr)>;
1167 def: InstRW<[ICXWriteResGroup72], (instregex "VCOMPRESSPD(Z|Z128|Z256)rr",
1168 "VCOMPRESSPS(Z|Z128|Z256)rr",
1169 "VPCOMPRESSD(Z|Z128|Z256)rr",
1170 "VPCOMPRESSQ(Z|Z128|Z256)rr",
1171 "VPERMW(Z|Z128|Z256)rr")>;
1173 def ICXWriteResGroup73 : SchedWriteRes<[ICXPort0,ICXPort23]> {
1175 let NumMicroOps = 2;
1176 let ReleaseAtCycles = [1,1];
1178 def: InstRW<[ICXWriteResGroup73], (instrs MMX_PADDSBrm,
1199 def ICXWriteResGroup76 : SchedWriteRes<[ICXPort6,ICXPort23]> {
1201 let NumMicroOps = 2;
1202 let ReleaseAtCycles = [1,1];
1204 def: InstRW<[ICXWriteResGroup76], (instrs FARJMP64m)>;
1205 def: InstRW<[ICXWriteResGroup76], (instregex "JMP(16|32|64)m")>;
1207 def ICXWriteResGroup79 : SchedWriteRes<[ICXPort23,ICXPort15]> {
1209 let NumMicroOps = 2;
1210 let ReleaseAtCycles = [1,1];
1212 def: InstRW<[ICXWriteResGroup79], (instregex "ANDN(32|64)rm",
1213 "MOVBE(16|32|64)rm")>;
1215 def ICXWriteResGroup80 : SchedWriteRes<[ICXPort23,ICXPort015]> {
1217 let NumMicroOps = 2;
1218 let ReleaseAtCycles = [1,1];
1220 def: InstRW<[ICXWriteResGroup80], (instregex "VMOV(64to|QI2)PQIZrm(b?)")>;
1221 def: InstRW<[ICXWriteResGroup80], (instrs VMOVDI2PDIZrm)>;
1223 def ICXWriteResGroup81 : SchedWriteRes<[ICXPort23,ICXPort0156]> {
1225 let NumMicroOps = 2;
1226 let ReleaseAtCycles = [1,1];
1228 def: InstRW<[ICXWriteResGroup81], (instrs POP16r, POP32r, POP64r)>;
1229 def: InstRW<[ICXWriteResGroup81], (instregex "POP(16|32|64)rmr")>;
1231 def ICXWriteResGroup82 : SchedWriteRes<[ICXPort5,ICXPort01]> {
1233 let NumMicroOps = 3;
1234 let ReleaseAtCycles = [2,1];
1236 def: InstRW<[ICXWriteResGroup82], (instregex "(V?)CVTSI642SSrr",
1238 "VCVTUSI642SSZrr")>;
1240 def ICXWriteResGroup84 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort06,ICXPort0156]> {
1242 let NumMicroOps = 4;
1243 let ReleaseAtCycles = [1,1,1,1];
1245 def: InstRW<[ICXWriteResGroup84], (instregex "SLDT(16|32|64)r")>;
1247 def ICXWriteResGroup86 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort06]> {
1249 let NumMicroOps = 4;
1250 let ReleaseAtCycles = [1,1,1,1];
1252 def: InstRW<[ICXWriteResGroup86], (instregex "SAR(8|16|32|64)m(1|i)",
1253 "SHL(8|16|32|64)m(1|i)",
1254 "SHR(8|16|32|64)m(1|i)")>;
1256 def ICXWriteResGroup87 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort0156]> {
1258 let NumMicroOps = 4;
1259 let ReleaseAtCycles = [1,1,1,1];
1261 def: InstRW<[ICXWriteResGroup87], (instregex "POP(16|32|64)rmm",
1262 "PUSH(16|32|64)rmm")>;
1264 def ICXWriteResGroup88 : SchedWriteRes<[ICXPort6,ICXPort0156]> {
1266 let NumMicroOps = 6;
1267 let ReleaseAtCycles = [1,5];
1269 def: InstRW<[ICXWriteResGroup88], (instrs STD)>;
1271 def ICXWriteResGroup89 : SchedWriteRes<[ICXPort23]> {
1273 let NumMicroOps = 1;
1274 let ReleaseAtCycles = [1];
1276 def: InstRW<[ICXWriteResGroup89], (instregex "LD_F(32|64|80)m")>;
1277 def: InstRW<[ICXWriteResGroup89], (instrs VBROADCASTF128rm,
1287 def ICXWriteResGroup90 : SchedWriteRes<[ICXPort01,ICXPort5]> {
1289 let NumMicroOps = 2;
1290 let ReleaseAtCycles = [1,1];
1292 def: InstRW<[ICXWriteResGroup90], (instrs VCVTDQ2PDYrr)>;
1294 def ICXWriteResGroup92 : SchedWriteRes<[ICXPort5,ICXPort23]> {
1296 let NumMicroOps = 2;
1297 let ReleaseAtCycles = [1,1];
1299 def: InstRW<[ICXWriteResGroup92], (instregex "VMOV(SD|SS)Zrm(b?)",
1300 "VPBROADCAST(B|W)(Z128)?rm",
1301 "(V?)INSERTPS(Z?)rm",
1302 "(V?)PALIGNR(Z128)?rmi",
1303 "(V?)PERMIL(PD|PS)(Z128)?m(b?)i",
1304 "(V?)PERMIL(PD|PS)(Z128)?rm",
1305 "(V?)UNPCK(L|H)(PD|PS)(Z128)?rm")>;
1307 def ICXWriteResGroup93 : SchedWriteRes<[ICXPort5,ICXPort01]> {
1309 let NumMicroOps = 2;
1310 let ReleaseAtCycles = [1,1];
1312 def: InstRW<[ICXWriteResGroup93], (instregex "VCVTDQ2PDZ256rr",
1313 "VCVTPD2DQ(Y|Z256)rr",
1315 "VCVTPS2PD(Y|Z256)rr",
1319 "VCVTTPD2DQ(Y|Z256)rr",
1320 "VCVTTPD2UDQZ256rr",
1322 "VCVTTPS2UQQZ256rr",
1324 "VCVTUQQ2PSZ256rr")>;
1326 def ICXWriteResGroup93z : SchedWriteRes<[ICXPort5,ICXPort05]> {
1328 let NumMicroOps = 2;
1329 let ReleaseAtCycles = [1,1];
1331 def: InstRW<[ICXWriteResGroup93z], (instrs VCVTDQ2PDZrr,
1345 def ICXWriteResGroup95 : SchedWriteRes<[ICXPort23,ICXPort015]> {
1347 let NumMicroOps = 2;
1348 let ReleaseAtCycles = [1,1];
1350 def: InstRW<[ICXWriteResGroup95], (instrs VMOVNTDQAZ128rm,
1352 def: InstRW<[ICXWriteResGroup95, ReadAfterVecXLd],
1353 (instregex "VBLENDMPDZ128rm(b?)",
1354 "VBLENDMPSZ128rm(b?)",
1355 "VBROADCASTI32X2Z128rm(b?)",
1356 "VBROADCASTSSZ128rm(b?)",
1357 "VINSERT(F|I)128rm",
1358 "VMOVAPDZ128rm(b?)",
1359 "VMOVAPSZ128rm(b?)",
1360 "VMOVDDUPZ128rm(b?)",
1361 "VMOVDQA32Z128rm(b?)",
1362 "VMOVDQA64Z128rm(b?)",
1363 "VMOVDQU16Z128rm(b?)",
1364 "VMOVDQU32Z128rm(b?)",
1365 "VMOVDQU64Z128rm(b?)",
1366 "VMOVDQU8Z128rm(b?)",
1367 "VMOVSHDUPZ128rm(b?)",
1368 "VMOVSLDUPZ128rm(b?)",
1369 "VMOVUPDZ128rm(b?)",
1370 "VMOVUPSZ128rm(b?)",
1371 "VPADD(B|D|Q|W)Z128rm(b?)",
1372 "(V?)PADD(B|D|Q|W)rm",
1373 "VPBLENDM(B|D|Q|W)Z128rm(b?)",
1374 "VPBROADCASTDZ128rm(b?)",
1375 "VPBROADCASTQZ128rm(b?)",
1376 "VPSUB(B|D|Q|W)Z128rm(b?)",
1377 "(V?)PSUB(B|D|Q|W)rm",
1378 "VPTERNLOGDZ128rm(b?)i",
1379 "VPTERNLOGQZ128rm(b?)i")>;
1381 def ICXWriteResGroup96 : SchedWriteRes<[ICXPort5,ICXPort23]> {
1383 let NumMicroOps = 3;
1384 let ReleaseAtCycles = [2,1];
1386 def: InstRW<[ICXWriteResGroup96], (instrs MMX_PACKSSDWrm,
1390 def ICXWriteResGroup97 : SchedWriteRes<[ICXPort5,ICXPort015]> {
1392 let NumMicroOps = 3;
1393 let ReleaseAtCycles = [2,1];
1395 def: InstRW<[ICXWriteResGroup97], (instregex "VPERMI2WZ128rr",
1402 def ICXWriteResGroup99 : SchedWriteRes<[ICXPort23,ICXPort0156]> {
1404 let NumMicroOps = 3;
1405 let ReleaseAtCycles = [1,2];
1407 def: InstRW<[ICXWriteResGroup99], (instrs LEAVE, LEAVE64,
1408 SCASB, SCASL, SCASQ, SCASW)>;
1410 def ICXWriteResGroup100 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort01]> {
1412 let NumMicroOps = 3;
1413 let ReleaseAtCycles = [1,1,1];
1415 def: InstRW<[ICXWriteResGroup100], (instregex "(V?)CVT(T?)SS2SI64(Z?)rr",
1416 "VCVT(T?)SS2USI64Zrr")>;
1418 def ICXWriteResGroup101 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort05]> {
1420 let NumMicroOps = 3;
1421 let ReleaseAtCycles = [1,1,1];
1423 def: InstRW<[ICXWriteResGroup101], (instrs FLDCW16m)>;
1425 def ICXWriteResGroup103 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort0156]> {
1427 let NumMicroOps = 3;
1428 let ReleaseAtCycles = [1,1,1];
1430 def: InstRW<[ICXWriteResGroup103], (instregex "KMOV(B|D|Q|W)km")>;
1432 def ICXWriteResGroup104 : SchedWriteRes<[ICXPort6,ICXPort23,ICXPort0156]> {
1434 let NumMicroOps = 3;
1435 let ReleaseAtCycles = [1,1,1];
1437 def: InstRW<[ICXWriteResGroup104], (instrs LRET64, RET64)>;
1439 def ICXWriteResGroup106 : SchedWriteRes<[ICXPort49,ICXPort5,ICXPort78]> {
1441 let NumMicroOps = 4;
1442 let ReleaseAtCycles = [1,2,1];
1444 def: InstRW<[ICXWriteResGroup106], (instregex "VCOMPRESSPD(Z|Z128|Z256)mr(b?)",
1445 "VCOMPRESSPS(Z|Z128|Z256)mr(b?)",
1446 "VPCOMPRESSD(Z|Z128|Z256)mr(b?)",
1447 "VPCOMPRESSQ(Z|Z128|Z256)mr(b?)")>;
1449 def ICXWriteResGroup107 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort06]> {
1451 let NumMicroOps = 5;
1452 let ReleaseAtCycles = [1,1,1,2];
1454 def: InstRW<[ICXWriteResGroup107], (instregex "ROL(8|16|32|64)m(1|i)",
1455 "ROR(8|16|32|64)m(1|i)")>;
1457 def ICXWriteResGroup107_1 : SchedWriteRes<[ICXPort06]> {
1459 let NumMicroOps = 2;
1460 let ReleaseAtCycles = [2];
1462 def: InstRW<[ICXWriteResGroup107_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1,
1463 ROR8r1, ROR16r1, ROR32r1, ROR64r1)>;
1465 def ICXWriteResGroup108 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort0156]> {
1467 let NumMicroOps = 5;
1468 let ReleaseAtCycles = [1,1,1,2];
1470 def: InstRW<[ICXWriteResGroup108], (instregex "XADD(8|16|32|64)rm")>;
1472 def ICXWriteResGroup109 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort23,ICXPort78,ICXPort0156]> {
1474 let NumMicroOps = 5;
1475 let ReleaseAtCycles = [1,1,1,1,1];
1477 def: InstRW<[ICXWriteResGroup109], (instregex "CALL(16|32|64)m")>;
1478 def: InstRW<[ICXWriteResGroup109], (instrs FARCALL64m)>;
1480 def ICXWriteResGroup110 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort78,ICXPort0156]> {
1482 let NumMicroOps = 7;
1483 let ReleaseAtCycles = [1,2,2,2];
1485 def: InstRW<[ICXWriteResGroup110], (instrs VPSCATTERDQZ128mr,
1488 VSCATTERQPDZ128mr)>;
1490 def ICXWriteResGroup111 : SchedWriteRes<[ICXPort6,ICXPort06,ICXPort15,ICXPort0156]> {
1492 let NumMicroOps = 7;
1493 let ReleaseAtCycles = [1,3,1,2];
1495 def: InstRW<[ICXWriteResGroup111], (instrs LOOP)>;
1497 def ICXWriteResGroup112 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort78,ICXPort0156]> {
1499 let NumMicroOps = 11;
1500 let ReleaseAtCycles = [1,4,4,2];
1502 def: InstRW<[ICXWriteResGroup112], (instrs VPSCATTERDQZ256mr,
1505 VSCATTERQPDZ256mr)>;
1507 def ICXWriteResGroup113 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort78,ICXPort0156]> {
1509 let NumMicroOps = 19;
1510 let ReleaseAtCycles = [1,8,8,2];
1512 def: InstRW<[ICXWriteResGroup113], (instrs VPSCATTERDQZmr,
1517 def ICXWriteResGroup114 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> {
1519 let NumMicroOps = 36;
1520 let ReleaseAtCycles = [1,16,1,16,2];
1522 def: InstRW<[ICXWriteResGroup114], (instrs VSCATTERDPSZmr)>;
1524 def ICXWriteResGroup118 : SchedWriteRes<[ICXPort1,ICXPort23]> {
1526 let NumMicroOps = 2;
1527 let ReleaseAtCycles = [1,1];
1529 def: InstRW<[ICXWriteResGroup118], (instregex "PDEP(32|64)rm",
1532 def ICXWriteResGroup119 : SchedWriteRes<[ICXPort5,ICXPort23]> {
1534 let NumMicroOps = 2;
1535 let ReleaseAtCycles = [1,1];
1537 def: InstRW<[ICXWriteResGroup119], (instregex "FCOM(P?)(32|64)m",
1538 "VPBROADCASTB(Z|Z256)rm(b?)",
1539 "VPBROADCASTW(Z|Z256)rm(b?)",
1540 "(V?)PALIGNR(Y|Z256)rmi",
1541 "(V?)PERMIL(PD|PS)(Y|Z256)m(b?)i",
1542 "(V?)PERMIL(PD|PS)(Y|Z256)rm",
1543 "(V?)UNPCK(L|H)(PD|PS)(Y|Z256)rm")>;
1544 def: InstRW<[ICXWriteResGroup119], (instrs VPBROADCASTBYrm,
1550 def ICXWriteResGroup121 : SchedWriteRes<[ICXPort23,ICXPort015]> {
1552 let NumMicroOps = 2;
1553 let ReleaseAtCycles = [1,1];
1555 def: InstRW<[ICXWriteResGroup121], (instrs VMOVNTDQAZ256rm,
1557 def: InstRW<[ICXWriteResGroup121, ReadAfterVecYLd],
1558 (instregex "VBLENDMPD(Z|Z256)rm(b?)",
1559 "VBLENDMPS(Z|Z256)rm(b?)",
1560 "VBROADCASTF32X2Z256rm(b?)",
1561 "VBROADCASTF32X2Zrm(b?)",
1562 "VBROADCASTF32X4Z256rm(b?)",
1563 "VBROADCASTF32X4rm(b?)",
1564 "VBROADCASTF32X8rm(b?)",
1565 "VBROADCASTF64X2Z128rm(b?)",
1566 "VBROADCASTF64X2rm(b?)",
1567 "VBROADCASTF64X4rm(b?)",
1568 "VBROADCASTI32X2Z256rm(b?)",
1569 "VBROADCASTI32X2Zrm(b?)",
1570 "VBROADCASTI32X4Z256rm(b?)",
1571 "VBROADCASTI32X4rm(b?)",
1572 "VBROADCASTI32X8rm(b?)",
1573 "VBROADCASTI64X2Z128rm(b?)",
1574 "VBROADCASTI64X2rm(b?)",
1575 "VBROADCASTI64X4rm(b?)",
1576 "VBROADCASTSD(Z|Z256)rm(b?)",
1577 "VBROADCASTSS(Z|Z256)rm(b?)",
1578 "VINSERTF32x4(Z|Z256)rm(b?)",
1579 "VINSERTF32x8Zrm(b?)",
1580 "VINSERTF64x2(Z|Z256)rm(b?)",
1581 "VINSERTF64x4Zrm(b?)",
1582 "VINSERTI32x4(Z|Z256)rm(b?)",
1583 "VINSERTI32x8Zrm(b?)",
1584 "VINSERTI64x2(Z|Z256)rm(b?)",
1585 "VINSERTI64x4Zrm(b?)",
1586 "VMOVAPD(Z|Z256)rm(b?)",
1587 "VMOVAPS(Z|Z256)rm(b?)",
1588 "VMOVDDUP(Z|Z256)rm(b?)",
1589 "VMOVDQA32(Z|Z256)rm(b?)",
1590 "VMOVDQA64(Z|Z256)rm(b?)",
1591 "VMOVDQU16(Z|Z256)rm(b?)",
1592 "VMOVDQU32(Z|Z256)rm(b?)",
1593 "VMOVDQU64(Z|Z256)rm(b?)",
1594 "VMOVDQU8(Z|Z256)rm(b?)",
1595 "VMOVSHDUP(Z|Z256)rm(b?)",
1596 "VMOVSLDUP(Z|Z256)rm(b?)",
1597 "VMOVUPD(Z|Z256)rm(b?)",
1598 "VMOVUPS(Z|Z256)rm(b?)",
1599 "VPADD(B|D|Q|W)Yrm",
1600 "VPADD(B|D|Q|W)(Z|Z256)rm(b?)",
1601 "VPBLENDM(B|D|Q|W)(Z|Z256)rm(b?)",
1602 "VPBROADCASTD(Z|Z256)rm(b?)",
1603 "VPBROADCASTQ(Z|Z256)rm(b?)",
1604 "VPSUB(B|D|Q|W)Yrm",
1605 "VPSUB(B|D|Q|W)(Z|Z256)rm(b?)",
1606 "VPTERNLOGD(Z|Z256)rm(b?)i",
1607 "VPTERNLOGQ(Z|Z256)rm(b?)i")>;
1609 def ICXWriteResGroup123 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
1611 let NumMicroOps = 4;
1612 let ReleaseAtCycles = [1,2,1];
1614 def: InstRW<[ICXWriteResGroup123], (instregex "MMX_PH(ADD|SUB)SWrm")>;
1616 def ICXWriteResGroup127 : SchedWriteRes<[ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> {
1618 let NumMicroOps = 5;
1619 let ReleaseAtCycles = [1,1,1,2];
1621 def: InstRW<[ICXWriteResGroup127], (instregex "RCL(8|16|32|64)m(1|i)",
1622 "RCR(8|16|32|64)m(1|i)")>;
1624 def ICXWriteResGroup128 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort06]> {
1626 let NumMicroOps = 6;
1627 let ReleaseAtCycles = [1,1,1,3];
1629 def: InstRW<[ICXWriteResGroup128], (instregex "ROL(8|16|32|64)mCL",
1630 "ROR(8|16|32|64)mCL",
1631 "SAR(8|16|32|64)mCL",
1632 "SHL(8|16|32|64)mCL",
1633 "SHR(8|16|32|64)mCL")>;
1635 def ICXWriteResGroup130 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> {
1637 let NumMicroOps = 6;
1638 let ReleaseAtCycles = [1,1,1,2,1];
1640 def: SchedAlias<WriteADCRMW, ICXWriteResGroup130>;
1642 def ICXWriteResGroup131 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> {
1644 let NumMicroOps = 8;
1645 let ReleaseAtCycles = [1,2,1,2,2];
1647 def: InstRW<[ICXWriteResGroup131], (instrs VPSCATTERQDZ128mr,
1650 VSCATTERQPSZ256mr)>;
1652 def ICXWriteResGroup132 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> {
1654 let NumMicroOps = 12;
1655 let ReleaseAtCycles = [1,4,1,4,2];
1657 def: InstRW<[ICXWriteResGroup132], (instrs VPSCATTERDDZ128mr,
1658 VSCATTERDPSZ128mr)>;
1660 def ICXWriteResGroup133 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> {
1662 let NumMicroOps = 20;
1663 let ReleaseAtCycles = [1,8,1,8,2];
1665 def: InstRW<[ICXWriteResGroup133], (instrs VPSCATTERDDZ256mr,
1666 VSCATTERDPSZ256mr)>;
1668 def ICXWriteResGroup134 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> {
1670 let NumMicroOps = 36;
1671 let ReleaseAtCycles = [1,16,1,16,2];
1673 def: InstRW<[ICXWriteResGroup134], (instrs VPSCATTERDDZmr)>;
1675 def ICXWriteResGroup135 : SchedWriteRes<[ICXPort0,ICXPort23]> {
1677 let NumMicroOps = 2;
1678 let ReleaseAtCycles = [1,1];
1680 def: InstRW<[ICXWriteResGroup135], (instrs MMX_CVTPI2PSrm)>;
1682 def ICXWriteResGroup136 : SchedWriteRes<[ICXPort5,ICXPort23]> {
1684 let NumMicroOps = 2;
1685 let ReleaseAtCycles = [1,1];
1687 def: InstRW<[ICXWriteResGroup136], (instrs VPMOVSXBWYrm,
1691 def: InstRW<[ICXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i",
1692 "VFPCLASSSDZrm(b?)",
1693 "VFPCLASSSSZrm(b?)",
1695 "VPERMI2DZ128rm(b?)",
1696 "VPERMI2PDZ128rm(b?)",
1697 "VPERMI2PSZ128rm(b?)",
1698 "VPERMI2QZ128rm(b?)",
1699 "VPERMT2DZ128rm(b?)",
1700 "VPERMT2PDZ128rm(b?)",
1701 "VPERMT2PSZ128rm(b?)",
1702 "VPERMT2QZ128rm(b?)",
1703 "VPMAXSQZ128rm(b?)",
1704 "VPMAXUQZ128rm(b?)",
1705 "VPMINSQZ128rm(b?)",
1706 "VPMINUQZ128rm(b?)")>;
1708 def ICXWriteResGroup136_2 : SchedWriteRes<[ICXPort5,ICXPort23]> {
1710 let NumMicroOps = 2;
1711 let ReleaseAtCycles = [1,1];
1713 def: InstRW<[ICXWriteResGroup136_2], (instregex "VCMP(PD|PS)Z128rm(b?)i",
1715 "VFPCLASSPDZ128rm(b?)",
1716 "VFPCLASSPSZ128rm(b?)",
1717 "VPCMPBZ128rmi(b?)",
1718 "VPCMPDZ128rmi(b?)",
1719 "VPCMPEQ(B|D|Q|W)Z128rm(b?)",
1720 "VPCMPGT(B|D|Q|W)Z128rm(b?)",
1721 "VPCMPQZ128rmi(b?)",
1722 "VPCMPU(B|D|Q|W)Z128rmi(b?)",
1723 "VPCMPWZ128rmi(b?)",
1724 "(V?)PACK(U|S)S(DW|WB)(Z128)?rm",
1725 "VPTESTMBZ128rm(b?)",
1726 "VPTESTMDZ128rm(b?)",
1727 "VPTESTMQZ128rm(b?)",
1728 "VPTESTMWZ128rm(b?)",
1729 "VPTESTNMBZ128rm(b?)",
1730 "VPTESTNMDZ128rm(b?)",
1731 "VPTESTNMQZ128rm(b?)",
1732 "VPTESTNMWZ128rm(b?)")>;
1734 def ICXWriteResGroup137 : SchedWriteRes<[ICXPort23,ICXPort01]> {
1736 let NumMicroOps = 2;
1737 let ReleaseAtCycles = [1,1];
1739 def: InstRW<[ICXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIrm",
1742 def ICXWriteResGroup143 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23]> {
1744 let NumMicroOps = 4;
1745 let ReleaseAtCycles = [2,1,1];
1747 def: InstRW<[ICXWriteResGroup143], (instregex "(V?)PHADDSWrm",
1750 def ICXWriteResGroup146 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort23,ICXPort0156]> {
1752 let NumMicroOps = 5;
1753 let ReleaseAtCycles = [1,2,1,1];
1755 def: InstRW<[ICXWriteResGroup146], (instregex "LAR(16|32|64)rm",
1756 "LSL(16|32|64)rm")>;
1758 def ICXWriteResGroup148 : SchedWriteRes<[ICXPort5,ICXPort23]> {
1760 let NumMicroOps = 2;
1761 let ReleaseAtCycles = [1,1];
1763 def: InstRW<[ICXWriteResGroup148], (instrs VPCMPGTQYrm)>;
1764 def: InstRW<[ICXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
1766 "VALIGND(Z|Z256)rm(b?)i",
1767 "VALIGNQ(Z|Z256)rm(b?)i",
1768 "VPMAXSQ(Z|Z256)rm(b?)",
1769 "VPMAXUQ(Z|Z256)rm(b?)",
1770 "VPMINSQ(Z|Z256)rm(b?)",
1771 "VPMINUQ(Z|Z256)rm(b?)")>;
1773 def ICXWriteResGroup148_2 : SchedWriteRes<[ICXPort5,ICXPort23]> {
1775 let NumMicroOps = 2;
1776 let ReleaseAtCycles = [1,1];
1778 def: InstRW<[ICXWriteResGroup148_2], (instregex "VCMPPD(Z|Z256)rm(b?)i",
1779 "VCMPPS(Z|Z256)rm(b?)i",
1780 "VFPCLASSPD(Z|Z256)rm(b?)",
1781 "VFPCLASSPS(Z|Z256)rm(b?)",
1782 "VPCMPB(Z|Z256)rmi(b?)",
1783 "VPCMPD(Z|Z256)rmi(b?)",
1784 "VPCMPEQB(Z|Z256)rm(b?)",
1785 "VPCMPEQD(Z|Z256)rm(b?)",
1786 "VPCMPEQQ(Z|Z256)rm(b?)",
1787 "VPCMPEQW(Z|Z256)rm(b?)",
1788 "VPCMPGTB(Z|Z256)rm(b?)",
1789 "VPCMPGTD(Z|Z256)rm(b?)",
1790 "VPCMPGTQ(Z|Z256)rm(b?)",
1791 "VPCMPGTW(Z|Z256)rm(b?)",
1792 "VPCMPQ(Z|Z256)rmi(b?)",
1793 "VPCMPU(B|D|Q|W)Z256rmi(b?)",
1794 "VPCMPU(B|D|Q|W)Zrmi(b?)",
1795 "VPCMPW(Z|Z256)rmi(b?)",
1796 "(V?)PACK(U|S)S(DW|WB)(Y|Z|Z256)rm",
1797 "VPTESTM(B|D|Q|W)Z256rm(b?)",
1798 "VPTESTM(B|D|Q|W)Zrm(b?)",
1799 "VPTESTNM(B|D|Q|W)Z256rm(b?)",
1800 "VPTESTNM(B|D|Q|W)Zrm(b?)")>;
1802 def ICXWriteResGroup149 : SchedWriteRes<[ICXPort23,ICXPort01]> {
1804 let NumMicroOps = 2;
1805 let ReleaseAtCycles = [1,1];
1807 def: InstRW<[ICXWriteResGroup149], (instregex "VCVTDQ2PDZ128rm(b?)",
1808 "VCVTDQ2PSZ128rm(b?)",
1810 "VCVTPD2QQZ128rm(b?)",
1811 "VCVTPD2UQQZ128rm(b?)",
1812 "VCVTPH2PSZ128rm(b?)",
1813 "VCVTPS2DQZ128rm(b?)",
1815 "VCVTPS2PDZ128rm(b?)",
1816 "VCVTPS2QQZ128rm(b?)",
1817 "VCVTPS2UDQZ128rm(b?)",
1818 "VCVTPS2UQQZ128rm(b?)",
1819 "VCVTQQ2PDZ128rm(b?)",
1820 "VCVTQQ2PSZ128rm(b?)",
1823 "VCVTTPD2QQZ128rm(b?)",
1824 "VCVTTPD2UQQZ128rm(b?)",
1825 "VCVTTPS2DQZ128rm(b?)",
1827 "VCVTTPS2QQZ128rm(b?)",
1828 "VCVTTPS2UDQZ128rm(b?)",
1829 "VCVTTPS2UQQZ128rm(b?)",
1830 "VCVTUDQ2PDZ128rm(b?)",
1831 "VCVTUDQ2PSZ128rm(b?)",
1832 "VCVTUQQ2PDZ128rm(b?)",
1833 "VCVTUQQ2PSZ128rm(b?)")>;
1835 def ICXWriteResGroup151 : SchedWriteRes<[ICXPort5,ICXPort23]> {
1837 let NumMicroOps = 3;
1838 let ReleaseAtCycles = [2,1];
1840 def: InstRW<[ICXWriteResGroup151], (instregex "VEXPANDPDZ128rm(b?)",
1841 "VEXPANDPSZ128rm(b?)",
1842 "VPEXPANDDZ128rm(b?)",
1843 "VPEXPANDQZ128rm(b?)")>;
1845 def ICXWriteResGroup154 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23]> {
1847 let NumMicroOps = 4;
1848 let ReleaseAtCycles = [2,1,1];
1850 def: InstRW<[ICXWriteResGroup154], (instrs VPHADDSWYrm,
1853 def ICXWriteResGroup157 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> {
1855 let NumMicroOps = 8;
1856 let ReleaseAtCycles = [1,1,1,1,1,3];
1858 def: InstRW<[ICXWriteResGroup157], (instregex "XCHG(8|16|32|64)rm")>;
1860 def ICXWriteResGroup160 : SchedWriteRes<[ICXPort0,ICXPort23]> {
1862 let NumMicroOps = 2;
1863 let ReleaseAtCycles = [1,1];
1865 def: InstRW<[ICXWriteResGroup160], (instregex "MUL_F(32|64)m")>;
1867 def ICXWriteResGroup161 : SchedWriteRes<[ICXPort23,ICXPort01]> {
1869 let NumMicroOps = 2;
1870 let ReleaseAtCycles = [1,1];
1872 def: InstRW<[ICXWriteResGroup161], (instrs VCVTDQ2PSYrm,
1874 def: InstRW<[ICXWriteResGroup161], (instregex "VCVTDQ2(PD|PS)(Z|Z256)rm(b?)",
1875 "VCVTPH2PS(Z|Z256)rm(b?)",
1876 "VCVTPS2PD(Z|Z256)rm(b?)",
1877 "VCVTQQ2PD(Z|Z256)rm(b?)",
1878 "VCVTQQ2PSZ256rm(b?)",
1879 "VCVT(T?)PD2QQ(Z|Z256)rm(b?)",
1880 "VCVT(T?)PD2UQQ(Z|Z256)rm(b?)",
1882 "VCVT(T?)PS2DQ(Z|Z256)rm(b?)",
1883 "VCVT(T?)PS2QQZ256rm(b?)",
1884 "VCVT(T?)PS2UDQ(Z|Z256)rm(b?)",
1885 "VCVT(T?)PS2UQQZ256rm(b?)",
1886 "VCVTUDQ2(PD|PS)(Z|Z256)rm(b?)",
1887 "VCVTUQQ2PD(Z|Z256)rm(b?)",
1888 "VCVTUQQ2PSZ256rm(b?)")>;
1890 def ICXWriteResGroup162 : SchedWriteRes<[ICXPort5,ICXPort23]> {
1892 let NumMicroOps = 3;
1893 let ReleaseAtCycles = [2,1];
1895 def: InstRW<[ICXWriteResGroup162], (instregex "FICOM(P?)(16|32)m",
1896 "VEXPANDPD(Z|Z256)rm(b?)",
1897 "VEXPANDPS(Z|Z256)rm(b?)",
1898 "VPEXPANDD(Z|Z256)rm(b?)",
1899 "VPEXPANDQ(Z|Z256)rm(b?)")>;
1901 def ICXWriteResGroup164 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
1903 let NumMicroOps = 3;
1904 let ReleaseAtCycles = [1,1,1];
1906 def: InstRW<[ICXWriteResGroup164], (instregex "(V?)CVTDQ2PDrm")>;
1908 def ICXWriteResGroup166 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort01]> {
1910 let NumMicroOps = 3;
1911 let ReleaseAtCycles = [1,1,1];
1913 def: InstRW<[ICXWriteResGroup166], (instrs CVTPD2DQrm,
1918 def ICXWriteResGroup167 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> {
1920 let NumMicroOps = 4;
1921 let ReleaseAtCycles = [2,1,1];
1923 def: InstRW<[ICXWriteResGroup167], (instregex "VPCONFLICTQZ128rm(b?)")>;
1925 def ICXWriteResGroup169 : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort0156]> {
1927 let NumMicroOps = 7;
1928 let ReleaseAtCycles = [2,3,2];
1930 def: InstRW<[ICXWriteResGroup169], (instregex "RCL(16|32|64)rCL",
1931 "RCR(16|32|64)rCL")>;
1933 def ICXWriteResGroup170 : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort15,ICXPort0156]> {
1935 let NumMicroOps = 9;
1936 let ReleaseAtCycles = [1,5,1,2];
1938 def: InstRW<[ICXWriteResGroup170], (instrs RCL8rCL)>;
1940 def ICXWriteResGroup171 : SchedWriteRes<[ICXPort06,ICXPort0156]> {
1942 let NumMicroOps = 11;
1943 let ReleaseAtCycles = [2,9];
1945 def: InstRW<[ICXWriteResGroup171], (instrs LOOPE, LOOPNE)>;
1947 def ICXWriteResGroup174 : SchedWriteRes<[ICXPort01]> {
1949 let NumMicroOps = 3;
1950 let ReleaseAtCycles = [3];
1952 def: InstRW<[ICXWriteResGroup174], (instregex "VPMULLQ(Z128|Z256)rr")>;
1954 def ICXWriteResGroup174z : SchedWriteRes<[ICXPort0]> {
1956 let NumMicroOps = 3;
1957 let ReleaseAtCycles = [3];
1959 def: InstRW<[ICXWriteResGroup174z], (instregex "VPMULLQZrr")>;
1961 def ICXWriteResGroup175 : SchedWriteRes<[ICXPort5,ICXPort23]> {
1963 let NumMicroOps = 3;
1964 let ReleaseAtCycles = [2,1];
1966 def: InstRW<[ICXWriteResGroup175], (instregex "VPERMWZ128rm(b?)")>;
1968 def ICXWriteResGroup176 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort01]> {
1970 let NumMicroOps = 3;
1971 let ReleaseAtCycles = [1,1,1];
1973 def: InstRW<[ICXWriteResGroup176], (instregex "VCVT(T?)SD2USIZrm(b?)",
1974 "VCVT(T?)SS2USI64Zrm(b?)")>;
1976 def ICXWriteResGroup177 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort01]> {
1978 let NumMicroOps = 3;
1979 let ReleaseAtCycles = [1,1,1];
1981 def: InstRW<[ICXWriteResGroup177], (instregex "VCVT(T?)PS2QQZrm(b?)",
1982 "VCVT(T?)PS2UQQZrm(b?)")>;
1984 def ICXWriteResGroup180 : SchedWriteRes<[ICXPort5,ICXPort23]> {
1986 let NumMicroOps = 3;
1987 let ReleaseAtCycles = [2,1];
1989 def: InstRW<[ICXWriteResGroup180], (instregex "(ADD|SUB|SUBR)_FI(16|32)m",
1993 def ICXWriteResGroup181 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
1995 let NumMicroOps = 3;
1996 let ReleaseAtCycles = [1,1,1];
1998 def: InstRW<[ICXWriteResGroup181], (instrs VCVTDQ2PDYrm)>;
2000 def ICXWriteResGroup183 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> {
2002 let NumMicroOps = 4;
2003 let ReleaseAtCycles = [2,1,1];
2005 def: InstRW<[ICXWriteResGroup183], (instregex "VPERMI2WZ128rm(b?)",
2006 "VPERMT2WZ128rm(b?)")>;
2008 def ICXWriteResGroup187 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
2010 let NumMicroOps = 3;
2011 let ReleaseAtCycles = [1,1,1];
2013 def: InstRW<[ICXWriteResGroup187], (instregex "MUL_FI(16|32)m")>;
2015 def ICXWriteResGroup188 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort01]> {
2017 let NumMicroOps = 3;
2018 let ReleaseAtCycles = [1,1,1];
2020 def: InstRW<[ICXWriteResGroup188], (instregex "VCVTPD2DQZrm(b?)",
2021 "VCVTPD2UDQZrm(b?)",
2023 "VCVTTPD2DQZrm(b?)",
2024 "VCVTTPD2UDQZrm(b?)",
2025 "VCVTUQQ2PSZrm(b?)")>;
2027 def ICXWriteResGroup189 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> {
2029 let NumMicroOps = 4;
2030 let ReleaseAtCycles = [2,1,1];
2032 def: InstRW<[ICXWriteResGroup189], (instregex "VPERMI2WZ256rm(b?)",
2034 "VPERMT2WZ256rm(b?)",
2035 "VPERMT2WZrm(b?)")>;
2037 def ICXWriteResGroup190 : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort15,ICXPort0156]> {
2039 let NumMicroOps = 10;
2040 let ReleaseAtCycles = [2,4,1,3];
2042 def: InstRW<[ICXWriteResGroup190], (instrs RCR8rCL)>;
2044 def ICXWriteResGroup191 : SchedWriteRes<[ICXPort0]> {
2046 let NumMicroOps = 1;
2047 let ReleaseAtCycles = [1];
2049 def: InstRW<[ICXWriteResGroup191], (instregex "DIVR_(FPrST0|FST0r|FrST0)")>;
2051 def ICXWriteResGroup194 : SchedWriteRes<[ICXPort1,ICXPort5,ICXPort01,ICXPort23,ICXPort015]> {
2053 let NumMicroOps = 8;
2054 let ReleaseAtCycles = [1,2,2,1,2];
2056 def: InstRW<[ICXWriteResGroup194], (instregex "VPCONFLICTDZ128rm(b?)")>;
2058 def ICXWriteResGroup195 : SchedWriteRes<[ICXPort1,ICXPort23,ICXPort78,ICXPort06,ICXPort15,ICXPort0156]> {
2060 let NumMicroOps = 10;
2061 let ReleaseAtCycles = [1,1,1,5,1,1];
2063 def: InstRW<[ICXWriteResGroup195], (instregex "RCL(8|16|32|64)mCL")>;
2065 def ICXWriteResGroup199 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort06,ICXPort15,ICXPort0156]> {
2067 let NumMicroOps = 14;
2068 let ReleaseAtCycles = [1,1,1,4,2,5];
2070 def: InstRW<[ICXWriteResGroup199], (instrs CMPXCHG8B)>;
2072 def ICXWriteResGroup200 : SchedWriteRes<[ICXPort1, ICXPort05, ICXPort6]> {
2074 let NumMicroOps = 34;
2075 let ReleaseAtCycles = [1, 4, 5];
2077 def: InstRW<[ICXWriteResGroup200], (instrs VZEROALL)>;
2079 def ICXWriteResGroup202 : SchedWriteRes<[ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort05,ICXPort0156]> {
2081 let NumMicroOps = 15;
2082 let ReleaseAtCycles = [2,1,2,4,2,4];
2084 def: InstRW<[ICXWriteResGroup202], (instrs XCH_F)>;
2086 def ICXWriteResGroup205 : SchedWriteRes<[ICXPort23,ICXPort01]> {
2088 let NumMicroOps = 4;
2089 let ReleaseAtCycles = [1,3];
2091 def: InstRW<[ICXWriteResGroup205], (instregex "VPMULLQZ128rm(b?)")>;
2093 def ICXWriteResGroup207 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort06,ICXPort0156]> {
2095 let NumMicroOps = 8;
2096 let ReleaseAtCycles = [1,1,1,5];
2098 def: InstRW<[ICXWriteResGroup207], (instrs CPUID, RDTSC)>;
2100 def ICXWriteResGroup208 : SchedWriteRes<[ICXPort1,ICXPort23,ICXPort78,ICXPort06,ICXPort15,ICXPort0156]> {
2102 let NumMicroOps = 11;
2103 let ReleaseAtCycles = [2,1,1,4,1,2];
2105 def: InstRW<[ICXWriteResGroup208], (instregex "RCR(8|16|32|64)mCL")>;
2107 def ICXWriteResGroup211 : SchedWriteRes<[ICXPort23,ICXPort01]> {
2109 let NumMicroOps = 4;
2110 let ReleaseAtCycles = [1,3];
2112 def: InstRW<[ICXWriteResGroup211], (instregex "VPMULLQZ256rm(b?)")>;
2114 def ICXWriteResGroup211_1 : SchedWriteRes<[ICXPort23,ICXPort0]> {
2116 let NumMicroOps = 4;
2117 let ReleaseAtCycles = [1,3];
2119 def: InstRW<[ICXWriteResGroup211_1], (instregex "VPMULLQZrm(b?)")>;
2121 def ICXWriteResGroup215 : SchedWriteRes<[ICXPort0]> {
2123 let NumMicroOps = 1;
2124 let ReleaseAtCycles = [1];
2126 def: InstRW<[ICXWriteResGroup215], (instregex "DIV_(FPrST0|FST0r|FrST0)")>;
2128 def ICXWriteGatherEVEX2 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> {
2130 let NumMicroOps = 5; // 2 uops perform multiple loads
2131 let ReleaseAtCycles = [1,2,1,1];
2133 def: InstRW<[ICXWriteGatherEVEX2], (instrs VGATHERQPSZ128rm, VPGATHERQDZ128rm,
2134 VGATHERDPDZ128rm, VPGATHERDQZ128rm,
2135 VGATHERQPDZ128rm, VPGATHERQQZ128rm)>;
2137 def ICXWriteGatherEVEX4 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> {
2139 let NumMicroOps = 5; // 2 uops perform multiple loads
2140 let ReleaseAtCycles = [1,4,1,1];
2142 def: InstRW<[ICXWriteGatherEVEX4], (instrs VGATHERQPSZ256rm, VPGATHERQDZ256rm,
2143 VGATHERQPDZ256rm, VPGATHERQQZ256rm,
2144 VGATHERDPSZ128rm, VPGATHERDDZ128rm,
2145 VGATHERDPDZ256rm, VPGATHERDQZ256rm)>;
2147 def ICXWriteGatherEVEX8 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> {
2149 let NumMicroOps = 5; // 2 uops perform multiple loads
2150 let ReleaseAtCycles = [1,8,1,1];
2152 def: InstRW<[ICXWriteGatherEVEX8], (instrs VGATHERDPSZ256rm, VPGATHERDDZ256rm,
2153 VGATHERDPDZrm, VPGATHERDQZrm,
2154 VGATHERQPDZrm, VPGATHERQQZrm,
2155 VGATHERQPSZrm, VPGATHERQDZrm)>;
2157 def ICXWriteGatherEVEX16 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> {
2159 let NumMicroOps = 5; // 2 uops perform multiple loads
2160 let ReleaseAtCycles = [1,16,1,1];
2162 def: InstRW<[ICXWriteGatherEVEX16], (instrs VGATHERDPSZrm, VPGATHERDDZrm)>;
2164 def ICXWriteResGroup219 : SchedWriteRes<[ICXPort49,ICXPort5,ICXPort6,ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> {
2166 let NumMicroOps = 8;
2167 let ReleaseAtCycles = [1,1,1,1,1,1,2];
2169 def: InstRW<[ICXWriteResGroup219], (instrs INSB, INSL, INSW)>;
2171 def ICXWriteResGroup220 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort0156]> {
2173 let NumMicroOps = 10;
2174 let ReleaseAtCycles = [1,2,7];
2176 def: InstRW<[ICXWriteResGroup220], (instrs MWAITrr)>;
2178 def ICXWriteResGroup223 : SchedWriteRes<[ICXPort0,ICXPort23]> {
2180 let NumMicroOps = 2;
2181 let ReleaseAtCycles = [1,1];
2183 def: InstRW<[ICXWriteResGroup223], (instregex "DIV_F(32|64)m")>;
2185 def ICXWriteResGroupVEX2 : SchedWriteRes<[ICXPort0, ICXPort23, ICXPort5, ICXPort015]> {
2187 let NumMicroOps = 5; // 2 uops perform multiple loads
2188 let ReleaseAtCycles = [1,2,1,1];
2190 def: InstRW<[ICXWriteResGroupVEX2], (instrs VGATHERDPDrm, VPGATHERDQrm,
2191 VGATHERQPDrm, VPGATHERQQrm,
2192 VGATHERQPSrm, VPGATHERQDrm)>;
2194 def ICXWriteResGroupVEX4 : SchedWriteRes<[ICXPort0, ICXPort23, ICXPort5, ICXPort015]> {
2196 let NumMicroOps = 5; // 2 uops peform multiple loads
2197 let ReleaseAtCycles = [1,4,1,1];
2199 def: InstRW<[ICXWriteResGroupVEX4], (instrs VGATHERDPDYrm, VPGATHERDQYrm,
2200 VGATHERDPSrm, VPGATHERDDrm,
2201 VGATHERQPDYrm, VPGATHERQQYrm,
2202 VGATHERQPSYrm, VPGATHERQDYrm)>;
2204 def ICXWriteResGroupVEX8 : SchedWriteRes<[ICXPort0, ICXPort23, ICXPort5, ICXPort015]> {
2206 let NumMicroOps = 5; // 2 uops perform multiple loads
2207 let ReleaseAtCycles = [1,8,1,1];
2209 def: InstRW<[ICXWriteResGroupVEX8], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>;
2211 def ICXWriteResGroup225 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort015]> {
2213 let NumMicroOps = 14;
2214 let ReleaseAtCycles = [5,5,4];
2216 def: InstRW<[ICXWriteResGroup225], (instregex "VPCONFLICTDZ128rr",
2217 "VPCONFLICTQZ256rr")>;
2219 def ICXWriteResGroup228 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> {
2221 let NumMicroOps = 19;
2222 let ReleaseAtCycles = [2,1,4,1,1,4,6];
2224 def: InstRW<[ICXWriteResGroup228], (instrs CMPXCHG16B)>;
2226 def ICXWriteResGroup233 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
2228 let NumMicroOps = 3;
2229 let ReleaseAtCycles = [1,1,1];
2231 def: InstRW<[ICXWriteResGroup233], (instregex "DIV_FI(16|32)m")>;
2233 def ICXWriteResGroup239 : SchedWriteRes<[ICXPort0,ICXPort23]> {
2235 let NumMicroOps = 2;
2236 let ReleaseAtCycles = [1,1];
2238 def: InstRW<[ICXWriteResGroup239], (instregex "DIVR_F(32|64)m")>;
2240 def ICXWriteResGroup242 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23,ICXPort015]> {
2242 let NumMicroOps = 15;
2243 let ReleaseAtCycles = [5,5,1,4];
2245 def: InstRW<[ICXWriteResGroup242], (instregex "VPCONFLICTQZ256rm(b?)")>;
2247 def ICXWriteResGroup243 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
2249 let NumMicroOps = 3;
2250 let ReleaseAtCycles = [1,1,1];
2252 def: InstRW<[ICXWriteResGroup243], (instregex "DIVR_FI(16|32)m")>;
2254 def ICXWriteResGroup247 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort23,ICXPort06,ICXPort0156]> {
2256 let NumMicroOps = 23;
2257 let ReleaseAtCycles = [1,5,3,4,10];
2259 def: InstRW<[ICXWriteResGroup247], (instregex "IN(8|16|32)ri",
2262 def ICXWriteResGroup248 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> {
2264 let NumMicroOps = 23;
2265 let ReleaseAtCycles = [1,5,2,1,4,10];
2267 def: InstRW<[ICXWriteResGroup248], (instregex "OUT(8|16|32)ir",
2270 def ICXWriteResGroup249 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort015]> {
2272 let NumMicroOps = 21;
2273 let ReleaseAtCycles = [9,7,5];
2275 def: InstRW<[ICXWriteResGroup249], (instregex "VPCONFLICTDZ256rr",
2278 def ICXWriteResGroup250 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort23,ICXPort0156]> {
2280 let NumMicroOps = 31;
2281 let ReleaseAtCycles = [1,8,1,21];
2283 def: InstRW<[ICXWriteResGroup250], (instregex "XRSTOR(64)?")>;
2285 def ICXWriteResGroup252 : SchedWriteRes<[ICXPort1,ICXPort49,ICXPort5,ICXPort6,ICXPort23,ICXPort78,ICXPort15,ICXPort0156]> {
2287 let NumMicroOps = 18;
2288 let ReleaseAtCycles = [1,1,2,3,1,1,1,8];
2290 def: InstRW<[ICXWriteResGroup252], (instrs VMCLEARm)>;
2292 def ICXWriteResGroup253 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort23,ICXPort78,ICXPort0156]> {
2294 let NumMicroOps = 39;
2295 let ReleaseAtCycles = [1,10,1,1,26];
2297 def: InstRW<[ICXWriteResGroup253], (instrs XSAVE64)>;
2299 def ICXWriteResGroup254 : SchedWriteRes<[ICXPort5,ICXPort0156]> {
2301 let NumMicroOps = 22;
2302 let ReleaseAtCycles = [2,20];
2304 def: InstRW<[ICXWriteResGroup254], (instrs RDTSCP)>;
2306 def ICXWriteResGroup255 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort23,ICXPort78,ICXPort0156]> {
2308 let NumMicroOps = 40;
2309 let ReleaseAtCycles = [1,11,1,1,26];
2311 def: InstRW<[ICXWriteResGroup255], (instrs XSAVE)>;
2312 def: InstRW<[ICXWriteResGroup255], (instregex "XSAVEC", "XSAVES", "XSAVEOPT")>;
2314 def ICXWriteResGroup256 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23,ICXPort015]> {
2316 let NumMicroOps = 22;
2317 let ReleaseAtCycles = [9,7,1,5];
2319 def: InstRW<[ICXWriteResGroup256], (instregex "VPCONFLICTDZ256rm(b?)",
2320 "VPCONFLICTQZrm(b?)")>;
2322 def ICXWriteResGroup258 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort05,ICXPort06,ICXPort0156]> {
2324 let NumMicroOps = 64;
2325 let ReleaseAtCycles = [2,8,5,10,39];
2327 def: InstRW<[ICXWriteResGroup258], (instrs FLDENVm)>;
2329 def ICXWriteResGroup259 : SchedWriteRes<[ICXPort0,ICXPort6,ICXPort23,ICXPort05,ICXPort06,ICXPort15,ICXPort0156]> {
2331 let NumMicroOps = 88;
2332 let ReleaseAtCycles = [4,4,31,1,2,1,45];
2334 def: InstRW<[ICXWriteResGroup259], (instrs FXRSTOR64)>;
2336 def ICXWriteResGroup260 : SchedWriteRes<[ICXPort0,ICXPort6,ICXPort23,ICXPort05,ICXPort06,ICXPort15,ICXPort0156]> {
2338 let NumMicroOps = 90;
2339 let ReleaseAtCycles = [4,2,33,1,2,1,47];
2341 def: InstRW<[ICXWriteResGroup260], (instrs FXRSTOR)>;
2343 def ICXWriteResGroup261 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort015]> {
2345 let NumMicroOps = 35;
2346 let ReleaseAtCycles = [17,11,7];
2348 def: InstRW<[ICXWriteResGroup261], (instregex "VPCONFLICTDZrr")>;
2350 def ICXWriteResGroup262 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23,ICXPort015]> {
2352 let NumMicroOps = 36;
2353 let ReleaseAtCycles = [17,11,1,7];
2355 def: InstRW<[ICXWriteResGroup262], (instregex "VPCONFLICTDZrm(b?)")>;
2357 def ICXWriteResGroup263 : SchedWriteRes<[ICXPort5,ICXPort05,ICXPort0156]> {
2359 let NumMicroOps = 15;
2360 let ReleaseAtCycles = [6,3,6];
2362 def: InstRW<[ICXWriteResGroup263], (instrs FNINIT)>;
2364 def ICXWriteResGroup266 : SchedWriteRes<[ICXPort0,ICXPort1,ICXPort49,ICXPort5,ICXPort6,ICXPort78,ICXPort06,ICXPort0156]> {
2366 let NumMicroOps = 100;
2367 let ReleaseAtCycles = [9,1,11,16,1,11,21,30];
2369 def: InstRW<[ICXWriteResGroup266], (instrs FSTENVm)>;
2371 def ICXWriteResGroup267 : SchedWriteRes<[ICXPort6,ICXPort0156]> {
2373 let NumMicroOps = 4;
2374 let ReleaseAtCycles = [1,3];
2376 def: InstRW<[ICXWriteResGroup267], (instrs PAUSE)>;
2378 def: InstRW<[WriteZero], (instrs CLC)>;
2381 // Instruction variants handled by the renamer. These might not need execution
2382 // ports in certain conditions.
2383 // See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
2384 // section "Skylake Pipeline" > "Register allocation and renaming".
2385 // These can be investigated with llvm-exegesis, e.g.
2386 // echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
2387 // echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
2389 def ICXWriteZeroLatency : SchedWriteRes<[]> {
2393 def ICXWriteZeroIdiom : SchedWriteVariant<[
2394 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
2395 SchedVar<NoSchedPred, [WriteALU]>
2397 def : InstRW<[ICXWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
2400 def ICXWriteFZeroIdiom : SchedWriteVariant<[
2401 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
2402 SchedVar<NoSchedPred, [WriteFLogic]>
2404 def : InstRW<[ICXWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr,
2409 def ICXWriteFZeroIdiomY : SchedWriteVariant<[
2410 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
2411 SchedVar<NoSchedPred, [WriteFLogicY]>
2413 def : InstRW<[ICXWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr,
2414 VXORPSZ256rr, VXORPDZ256rr)>;
2416 def ICXWriteFZeroIdiomZ : SchedWriteVariant<[
2417 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
2418 SchedVar<NoSchedPred, [WriteFLogicZ]>
2420 def : InstRW<[ICXWriteFZeroIdiomZ], (instrs VXORPSZrr, VXORPDZrr)>;
2422 def ICXWriteVZeroIdiomLogicX : SchedWriteVariant<[
2423 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
2424 SchedVar<NoSchedPred, [WriteVecLogicX]>
2426 def : InstRW<[ICXWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr,
2427 VPXORDZ128rr, VPXORQZ128rr)>;
2429 def ICXWriteVZeroIdiomLogicY : SchedWriteVariant<[
2430 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
2431 SchedVar<NoSchedPred, [WriteVecLogicY]>
2433 def : InstRW<[ICXWriteVZeroIdiomLogicY], (instrs VPXORYrr,
2434 VPXORDZ256rr, VPXORQZ256rr)>;
2436 def ICXWriteVZeroIdiomLogicZ : SchedWriteVariant<[
2437 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
2438 SchedVar<NoSchedPred, [WriteVecLogicZ]>
2440 def : InstRW<[ICXWriteVZeroIdiomLogicZ], (instrs VPXORDZrr, VPXORQZrr)>;
2442 def ICXWriteVZeroIdiomALUX : SchedWriteVariant<[
2443 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
2444 SchedVar<NoSchedPred, [WriteVecALUX]>
2446 def : InstRW<[ICXWriteVZeroIdiomALUX], (instrs PCMPGTBrr, VPCMPGTBrr,
2447 PCMPGTDrr, VPCMPGTDrr,
2448 PCMPGTWrr, VPCMPGTWrr)>;
2450 def ICXWriteVZeroIdiomALUY : SchedWriteVariant<[
2451 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
2452 SchedVar<NoSchedPred, [WriteVecALUY]>
2454 def : InstRW<[ICXWriteVZeroIdiomALUY], (instrs VPCMPGTBYrr,
2458 def ICXWritePSUB : SchedWriteRes<[ICXPort015]> {
2460 let NumMicroOps = 1;
2461 let ReleaseAtCycles = [1];
2464 def ICXWriteVZeroIdiomPSUB : SchedWriteVariant<[
2465 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
2466 SchedVar<NoSchedPred, [ICXWritePSUB]>
2469 def : InstRW<[ICXWriteVZeroIdiomPSUB], (instrs PSUBBrr, VPSUBBrr, VPSUBBZ128rr,
2470 PSUBDrr, VPSUBDrr, VPSUBDZ128rr,
2471 PSUBQrr, VPSUBQrr, VPSUBQZ128rr,
2472 PSUBWrr, VPSUBWrr, VPSUBWZ128rr,
2473 VPSUBBYrr, VPSUBBZ256rr,
2474 VPSUBDYrr, VPSUBDZ256rr,
2475 VPSUBQYrr, VPSUBQZ256rr,
2476 VPSUBWYrr, VPSUBWZ256rr,
2481 def ICXWritePCMPGTQ : SchedWriteRes<[ICXPort5]> {
2483 let NumMicroOps = 1;
2484 let ReleaseAtCycles = [1];
2487 def ICXWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
2488 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
2489 SchedVar<NoSchedPred, [ICXWritePCMPGTQ]>
2491 def : InstRW<[ICXWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr,
2495 // CMOVs that use both Z and C flag require an extra uop.
2496 def ICXWriteCMOVA_CMOVBErr : SchedWriteRes<[ICXPort06]> {
2498 let ReleaseAtCycles = [2];
2499 let NumMicroOps = 2;
2502 def ICXWriteCMOVA_CMOVBErm : SchedWriteRes<[ICXPort23,ICXPort06]> {
2504 let ReleaseAtCycles = [1,2];
2505 let NumMicroOps = 3;
2508 def ICXCMOVA_CMOVBErr : SchedWriteVariant<[
2509 SchedVar<MCSchedPredicate<IsCMOVArr_Or_CMOVBErr>, [ICXWriteCMOVA_CMOVBErr]>,
2510 SchedVar<NoSchedPred, [WriteCMOV]>
2513 def ICXCMOVA_CMOVBErm : SchedWriteVariant<[
2514 SchedVar<MCSchedPredicate<IsCMOVArm_Or_CMOVBErm>, [ICXWriteCMOVA_CMOVBErm]>,
2515 SchedVar<NoSchedPred, [WriteCMOV.Folded]>
2518 def : InstRW<[ICXCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>;
2519 def : InstRW<[ICXCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
2521 // SETCCs that use both Z and C flag require an extra uop.
2522 def ICXWriteSETA_SETBEr : SchedWriteRes<[ICXPort06]> {
2524 let ReleaseAtCycles = [2];
2525 let NumMicroOps = 2;
2528 def ICXWriteSETA_SETBEm : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort06]> {
2530 let ReleaseAtCycles = [1,1,2];
2531 let NumMicroOps = 4;
2534 def ICXSETA_SETBErr : SchedWriteVariant<[
2535 SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [ICXWriteSETA_SETBEr]>,
2536 SchedVar<NoSchedPred, [WriteSETCC]>
2539 def ICXSETA_SETBErm : SchedWriteVariant<[
2540 SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [ICXWriteSETA_SETBEm]>,
2541 SchedVar<NoSchedPred, [WriteSETCCStore]>
2544 def : InstRW<[ICXSETA_SETBErr], (instrs SETCCr)>;
2545 def : InstRW<[ICXSETA_SETBErm], (instrs SETCCm)>;
2547 ///////////////////////////////////////////////////////////////////////////////
2548 // Dependency breaking instructions.
2549 ///////////////////////////////////////////////////////////////////////////////
2551 def : IsZeroIdiomFunction<[
2553 DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>,
2562 PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
2563 PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr
2564 ], ZeroIdiomPredicate>,
2571 // xmm int variants.
2573 VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
2574 VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr,
2577 VXORPSYrr, VXORPDYrr, VPXORYrr,
2578 VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr,
2579 VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr,
2582 VXORPSZrr, VXORPDZrr, VPXORDZrr, VPXORQZrr,
2583 VXORPSZ128rr, VXORPDZ128rr, VPXORDZ128rr, VPXORQZ128rr,
2584 VXORPSZ256rr, VXORPDZ256rr, VPXORDZ256rr, VPXORQZ256rr,
2585 VPSUBBZrr, VPSUBWZrr, VPSUBDZrr, VPSUBQZrr,
2586 VPSUBBZ128rr, VPSUBWZ128rr, VPSUBDZ128rr, VPSUBQZ128rr,
2587 VPSUBBZ256rr, VPSUBWZ256rr, VPSUBDZ256rr, VPSUBQZ256rr,
2588 ], ZeroIdiomPredicate>,