1 //=- X86ScheduleBdVer2.td - X86 BdVer2 (Piledriver) Scheduling * tablegen -*-=//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the machine model for AMD bdver2 (Piledriver) to support
10 // instruction scheduling and other instruction cost heuristics.
12 // * AMD Software Optimization Guide for AMD Family 15h Processors.
13 // https://support.amd.com/TechDocs/47414_15h_sw_opt_guide.pdf
14 // * The microarchitecture of Intel, AMD and VIA CPUs, By Agner Fog
15 // http://www.agner.org/optimize/microarchitecture.pdf
16 // * https://www.realworldtech.com/bulldozer/
17 // Yes, that is for Bulldozer aka bdver1, not Piledriver aka bdver2.
19 //===----------------------------------------------------------------------===//
21 def BdVer2Model : SchedMachineModel {
22 let IssueWidth = 4; // Up to 4 IPC can be decoded, issued, retired.
23 let MicroOpBufferSize = 128; // RCU reorder buffer size, which is unconfirmed.
24 let LoopMicroOpBufferSize = -1; // There does not seem to be a loop buffer.
25 let LoadLatency = 4; // L1 data cache has a 4-cycle load-to-use latency.
26 let HighLatency = 25; // FIXME: any better choice?
27 let MispredictPenalty = 20; // Minimum branch misdirection penalty.
29 let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass.
31 // FIXME: Incomplete. This flag is set to allow the scheduler to assign
32 // a default model to unrecognized opcodes.
33 let CompleteModel = 0;
34 } // SchedMachineModel
36 let SchedModel = BdVer2Model in {
39 //===----------------------------------------------------------------------===//
41 //===----------------------------------------------------------------------===//
43 // There are total of eight pipes.
45 //===----------------------------------------------------------------------===//
46 // Integer execution pipes
49 // Two EX (ALU) pipes.
50 def PdEX0 : ProcResource<1>; // ALU, Integer Pipe0
51 def PdEX1 : ProcResource<1>; // ALU, Integer Pipe1
52 def PdEX01 : ProcResGroup<[PdEX0, PdEX1]>;
54 // Two AGLU pipes, identical.
55 def PdAGLU01 : ProcResource<2>; // AGU, Integer Pipe[23]
57 //===----------------------------------------------------------------------===//
58 // Floating point execution pipes
63 def PdFPU0 : ProcResource<1>; // Vector/FPU Pipe0
64 def PdFPU1 : ProcResource<1>; // Vector/FPU Pipe1
65 def PdFPU2 : ProcResource<1>; // Vector/FPU Pipe2
66 def PdFPU3 : ProcResource<1>; // Vector/FPU Pipe3
69 def PdFPU01 : ProcResGroup<[PdFPU0, PdFPU1]>;
70 def PdFPU23 : ProcResGroup<[PdFPU2, PdFPU3]>;
73 //===----------------------------------------------------------------------===//
75 //===----------------------------------------------------------------------===//
77 // The Retire Control Unit on Piledriver can retire up to 4 macro-ops per cycle.
78 // On the other hand, the RCU reorder buffer size for Piledriver does not
79 // seem be specified in any trustworthy source.
80 // But as per https://www.realworldtech.com/bulldozer/6/ the Bulldozer had
81 // RCU reorder buffer size of 128. So that is a good guess for now.
82 def PdRCU : RetireControlUnit<128, 4>;
85 //===----------------------------------------------------------------------===//
87 //===----------------------------------------------------------------------===//
89 // There are total of two pipelines, each one with it's own scheduler.
91 //===----------------------------------------------------------------------===//
92 // Integer Pipeline Scheduling
95 // There is one Integer Scheduler per core.
97 // Integer physical register file has 96 registers of 64-bit.
98 def PdIntegerPRF : RegisterFile<96, [GR64, CCR]>;
100 // Unified Integer, Memory Scheduler has 40 entries.
101 def PdEX : ProcResGroup<[PdEX0, PdEX1, PdAGLU01]> {
102 // Up to 4 IPC can be decoded, issued, retired.
107 //===----------------------------------------------------------------------===//
108 // FPU Pipeline Scheduling
111 // The FPU unit is shared between the two cores.
113 // FP physical register file has 160 registers of 128-bit.
114 // Operations on 256-bit data types are cracked into two COPs.
115 def PdFpuPRF : RegisterFile<160, [VR64, VR128, VR256], [1, 1, 2]>;
117 // Unified FP Scheduler has 64 entries,
118 def PdFPU : ProcResGroup<[PdFPU0, PdFPU1, PdFPU2, PdFPU3]> {
119 // Up to 4 IPC can be decoded, issued, retired.
124 //===----------------------------------------------------------------------===//
126 //===----------------------------------------------------------------------===//
128 //===----------------------------------------------------------------------===//
132 let Super = PdAGLU01 in
133 def PdLoad : ProcResource<2> {
134 // For Piledriver, the load queue is 40 entries deep.
138 def PdLoadQueue : LoadQueue<PdLoad>;
140 let Super = PdAGLU01 in
141 def PdStore : ProcResource<1> {
142 // For Piledriver, the store queue is 24 entries deep.
146 def PdStoreQueue : StoreQueue<PdStore>;
148 //===----------------------------------------------------------------------===//
149 // Integer Execution Units
152 def PdDiv : ProcResource<1>; // PdEX0; unpipelined integer division
153 def PdCount : ProcResource<1>; // PdEX0; POPCNT, LZCOUNT
155 def PdMul : ProcResource<1>; // PdEX1; integer multiplication
156 def PdBranch : ProcResource<1>; // PdEX1; JMP, fused branches
158 //===----------------------------------------------------------------------===//
159 // Floating-Point Units
162 // Two FMAC/FPFMA units.
163 def PdFPFMA : ProcResource<2>; // PdFPU0, PdFPU1
165 // One 128-bit integer multiply-accumulate unit.
166 def PdFPMMA : ProcResource<1>; // PdFPU0
168 // One fp conversion unit.
169 def PdFPCVT : ProcResource<1>; // PdFPU0
171 // One unit for shuffles, packs, permutes, shifts.
172 def PdFPXBR : ProcResource<1>; // PdFPU1
174 // Two 128-bit packed integer units.
175 def PdFPMAL : ProcResource<2>; // PdFPU2, PdFPU3
177 // One FP store unit.
178 def PdFPSTO : ProcResource<1>; // PdFPU3
181 //===----------------------------------------------------------------------===//
182 // Basic helper classes.
183 //===----------------------------------------------------------------------===//
185 // Many SchedWrites are defined in pairs with and without a folded load.
186 // Instructions with folded loads are usually micro-fused, so they only appear
187 // as two micro-ops when dispatched by the schedulers.
188 // This multiclass defines the resource usage for variants with and without
190 multiclass PdWriteRes<SchedWrite SchedRW,
191 list<ProcResourceKind> ExePorts, int Lat = 1,
192 list<int> Res = [], int UOps = 1> {
193 def : WriteRes<SchedRW, ExePorts> {
195 let ResourceCycles = Res;
196 let NumMicroOps = UOps;
200 multiclass __pdWriteResPair<X86FoldableSchedWrite SchedRW,
201 list<ProcResourceKind> ExePorts, int Lat,
202 list<int> Res, int UOps,
203 int LoadLat, int LoadRes, int LoadUOps> {
204 defm : PdWriteRes<SchedRW, ExePorts, Lat, Res, UOps>;
206 defm : PdWriteRes<SchedRW.Folded,
207 !listconcat([PdLoad], ExePorts),
209 !if(!and(!empty(Res), !eq(LoadRes, 1)),
211 !listconcat([LoadRes],
213 !listsplat(1, !size(ExePorts)),
215 !add(UOps, LoadUOps)>;
218 multiclass PdWriteResExPair<X86FoldableSchedWrite SchedRW,
219 list<ProcResourceKind> ExePorts, int Lat = 1,
220 list<int> Res = [], int UOps = 1,
222 defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
223 /*LoadLat*/4, /*LoadRes*/3, LoadUOps>;
226 multiclass PdWriteResXMMPair<X86FoldableSchedWrite SchedRW,
227 list<ProcResourceKind> ExePorts, int Lat = 1,
228 list<int> Res = [], int UOps = 1,
230 defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
231 /*LoadLat*/5, /*LoadRes*/3, LoadUOps>;
234 multiclass PdWriteResYMMPair<X86FoldableSchedWrite SchedRW,
235 list<ProcResourceKind> ExePorts, int Lat,
236 list<int> Res = [], int UOps = 2,
238 defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
239 /*LoadLat*/5, /*LoadRes*/3, LoadUOps>;
242 //===----------------------------------------------------------------------===//
244 //===----------------------------------------------------------------------===//
246 // L1 data cache has a 4-cycle load-to-use latency, so ReadAfterLd registers
247 // needn't be available until 4 cycles after the memory operand.
248 def : ReadAdvance<ReadAfterLd, 4>;
250 // Vector loads are 5 cycles, so ReadAfterVec*Ld registers needn't be available
251 // until 5 cycles after the memory operand.
252 def : ReadAdvance<ReadAfterVecLd, 5>;
253 def : ReadAdvance<ReadAfterVecXLd, 5>;
254 def : ReadAdvance<ReadAfterVecYLd, 5>;
256 // Transfer from int domain to ivec domain incurs additional latency of 8..10cy
257 // Reference: Agner, Microarchitecture, "AMD Bulldozer, Piledriver, Steamroller
258 // and Excavator pipeline", "Data delay between different execution domains"
259 def : ReadAdvance<ReadInt2Fpu, -10>;
261 // A folded store needs a cycle on the PdStore for the store data.
262 def : WriteRes<WriteRMW, [PdStore]>;
264 ////////////////////////////////////////////////////////////////////////////////
265 // Loads, stores, and moves, not folded with other operations.
266 ////////////////////////////////////////////////////////////////////////////////
268 def : WriteRes<WriteLoad, [PdLoad]> { let Latency = 5; let ResourceCycles = [2]; }
269 def : WriteRes<WriteStore, [PdStore]>;
270 def : WriteRes<WriteStoreNT, [PdStore]>;
271 def : WriteRes<WriteMove, [PdEX01]> { let ResourceCycles = [2]; }
272 defm : X86WriteResUnsupported<WriteVecMaskedGatherWriteback>;
275 // FIXME: These are copy and pasted from WriteLoad/Store.
276 def : WriteRes<WriteLDMXCSR, [PdLoad]> { let Latency = 5; }
277 def : WriteRes<WriteSTMXCSR, [PdStore]> { let NumMicroOps = 2; let ResourceCycles = [18]; }
279 // Treat misc copies as a move.
280 def : InstRW<[WriteMove], (instrs COPY)>;
282 ////////////////////////////////////////////////////////////////////////////////
283 // Idioms that clear a register, like xorps %xmm0, %xmm0.
284 // These can often bypass execution ports completely.
285 ////////////////////////////////////////////////////////////////////////////////
287 def : WriteRes<WriteZero, [/*No ExePorts*/]>;
289 ////////////////////////////////////////////////////////////////////////////////
290 // Branches don't produce values, so they have no latency, but they still
291 // consume resources. Indirect branches can fold loads.
292 ////////////////////////////////////////////////////////////////////////////////
294 defm : PdWriteResExPair<WriteJump, [PdEX1, PdBranch]>;
296 ////////////////////////////////////////////////////////////////////////////////
297 // Special case scheduling classes.
298 ////////////////////////////////////////////////////////////////////////////////
300 def : WriteRes<WriteSystem, [PdEX01]> { let Latency = 100; }
301 def : WriteRes<WriteMicrocoded, [PdEX01]> { let Latency = 100; }
302 def : WriteRes<WriteFence, [PdStore]>;
304 def PdWriteXLAT : SchedWriteRes<[PdEX01]> {
307 def : InstRW<[PdWriteXLAT], (instrs XLAT)>;
309 def PdWriteLARrr : SchedWriteRes<[PdEX01]> {
311 let ResourceCycles = [375];
312 let NumMicroOps = 45;
314 def : InstRW<[PdWriteLARrr], (instregex "LAR(16|32|64)rr",
317 // Nops don't have dependencies, so there's no actual latency, but we set this
318 // to '1' to tell the scheduler that the nop uses an ALU slot for a cycle.
319 def : WriteRes<WriteNop, [PdEX01]> { let ResourceCycles = [2]; }
321 ////////////////////////////////////////////////////////////////////////////////
323 ////////////////////////////////////////////////////////////////////////////////
325 defm : PdWriteResExPair<WriteALU, [PdEX01], 1, [2]>;
327 def PdWriteALURMW : SchedWriteRes<[PdLoad, PdEX01, PdStore]> {
329 let ResourceCycles = [3, 2, 1];
332 def : SchedAlias<WriteALURMW, PdWriteALURMW>;
334 def PdWriteLXADD : SchedWriteRes<[PdEX01]> {
336 let ResourceCycles = [88];
339 def : InstRW<[PdWriteLXADD], (instrs LXADD8, LXADD16, LXADD32, LXADD64)>;
341 def PdWriteBMI1 : SchedWriteRes<[PdEX01]> {
343 let ResourceCycles = [2];
346 def : InstRW<[PdWriteBMI1],
347 (instrs BLCFILL32rr, BLCFILL64rr, BLCI32rr, BLCI64rr,
348 BLCIC32rr, BLCIC64rr, BLCMSK32rr, BLCMSK64rr,
349 BLCS32rr, BLCS64rr, BLSFILL32rr, BLSFILL64rr,
350 BLSIC32rr, BLSIC64rr, T1MSKC32rr, T1MSKC64rr,
351 TZMSK32rr, TZMSK64rr)>;
353 def PdWriteBMI1m : SchedWriteRes<[PdLoad, PdEX01]> {
355 let ResourceCycles = [3, 3];
358 def : InstRW<[PdWriteBMI1m],
359 (instrs BLCFILL32rm, BLCFILL64rm, BLCI32rm, BLCI64rm,
360 BLCIC32rm, BLCIC64rm, BLCMSK32rm, BLCMSK64rm,
361 BLCS32rm, BLCS64rm, BLSFILL32rm, BLSFILL64rm,
362 BLSIC32rm, BLSIC64rm, T1MSKC32rm, T1MSKC64rm,
363 TZMSK32rm, TZMSK64rm)>;
365 defm : PdWriteResExPair<WriteADC, [PdEX01], 1, [2]>;
367 def PdWriteADCSBB64ri32 : SchedWriteRes<[PdEX01]> {
368 let ResourceCycles = [3];
370 def : InstRW<[PdWriteADCSBB64ri32], (instrs ADC64ri32, SBB64ri32)>;
372 defm : PdWriteRes<WriteBSWAP32, [PdEX01]>;
373 defm : PdWriteRes<WriteBSWAP64, [PdEX01]>;
374 defm : PdWriteRes<WriteCMPXCHG, [PdEX1], 3, [3], 5>;
375 defm : PdWriteRes<WriteCMPXCHGRMW, [PdEX1, PdStore, PdLoad], 3, [44, 1, 1], 2>;
376 defm : PdWriteRes<WriteXCHG, [PdEX1], 1, [], 2>;
378 def PdWriteCMPXCHG8rr : SchedWriteRes<[PdEX1]> {
380 let ResourceCycles = [3];
383 def : InstRW<[PdWriteCMPXCHG8rr], (instrs CMPXCHG8rr)>;
385 def PdWriteCMPXCHG8rm : SchedWriteRes<[PdEX1]> {
387 let ResourceCycles = [23];
390 def : InstRW<[PdWriteCMPXCHG8rm], (instrs CMPXCHG8rm)>;
392 def PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm : SchedWriteRes<[PdEX1]> {
394 let ResourceCycles = [21];
397 def : InstRW<[PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm],
398 (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>;
400 def PdWriteCMPXCHG8B : SchedWriteRes<[PdEX1]> {
402 let ResourceCycles = [26];
403 let NumMicroOps = 18;
405 def : InstRW<[PdWriteCMPXCHG8B], (instrs CMPXCHG8B)>;
407 def PdWriteCMPXCHG16B : SchedWriteRes<[PdEX1]> {
409 let ResourceCycles = [69];
410 let NumMicroOps = 22;
412 def : InstRW<[PdWriteCMPXCHG16B], (instrs CMPXCHG16B)>;
414 def PdWriteXADD : SchedWriteRes<[PdEX1]> {
416 let ResourceCycles = [1];
419 def : InstRW<[PdWriteXADD], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr)>;
421 def PdWriteXADDm : SchedWriteRes<[PdEX1]> {
423 let ResourceCycles = [20];
426 def : InstRW<[PdWriteXADDm], (instrs XADD8rm, XADD16rm, XADD32rm, XADD64rm)>;
428 defm : PdWriteResExPair<WriteIMul8, [PdEX1, PdMul], 4, [1, 4]>;
429 defm : PdWriteResExPair<WriteIMul16, [PdEX1, PdMul], 4, [1, 5], 2>;
430 defm : PdWriteResExPair<WriteIMul16Imm, [PdEX1, PdMul], 5, [1, 5], 2>;
431 defm : PdWriteResExPair<WriteIMul16Reg, [PdEX1, PdMul], 4, [1, 2]>;
432 defm : PdWriteResExPair<WriteIMul32, [PdEX1, PdMul], 4, [1, 4]>;
433 defm : PdWriteResExPair<WriteIMul32Imm, [PdEX1, PdMul], 4, [1, 2], 1, 1>;
434 defm : PdWriteResExPair<WriteIMul32Reg, [PdEX1, PdMul], 4, [1, 2]>;
435 defm : PdWriteResExPair<WriteIMul64, [PdEX1, PdMul], 6, [1, 6]>;
436 defm : PdWriteResExPair<WriteIMul64Imm, [PdEX1, PdMul], 6, [1, 4],1, 1>;
437 defm : PdWriteResExPair<WriteIMul64Reg, [PdEX1, PdMul], 6, [1, 4]>;
440 defm : X86WriteResUnsupported<WriteIMulH>;
441 defm : X86WriteResPairUnsupported<WriteMULX32>;
442 defm : X86WriteResPairUnsupported<WriteMULX64>;
444 defm : PdWriteResExPair<WriteDiv8, [PdEX1, PdDiv], 12, [1, 12]>;
445 defm : PdWriteResExPair<WriteDiv16, [PdEX1, PdDiv], 15, [1, 15], 2>;
446 defm : PdWriteResExPair<WriteDiv32, [PdEX1, PdDiv], 14, [1, 14], 2>;
447 defm : PdWriteResExPair<WriteDiv64, [PdEX1, PdDiv], 14, [1, 14], 2>;
449 defm : PdWriteResExPair<WriteIDiv8, [PdEX1, PdDiv], 12, [1, 12]>;
450 defm : PdWriteResExPair<WriteIDiv16, [PdEX1, PdDiv], 15, [1, 17], 2>;
451 defm : PdWriteResExPair<WriteIDiv32, [PdEX1, PdDiv], 14, [1, 25], 2>;
452 defm : PdWriteResExPair<WriteIDiv64, [PdEX1, PdDiv], 14, [1, 14], 2>;
454 defm : PdWriteResExPair<WriteCRC32, [PdEX01], 2, [4], 3>;
456 def PdWriteCRC32r32r16 : SchedWriteRes<[PdEX01]> {
458 let ResourceCycles = [10];
461 def : InstRW<[PdWriteCRC32r32r16], (instrs CRC32r32r16)>;
463 def PdWriteCRC32r32r32 : SchedWriteRes<[PdEX01]> {
465 let ResourceCycles = [12];
468 def : InstRW<[PdWriteCRC32r32r32], (instrs CRC32r32r32)>;
470 def PdWriteCRC32r64r64 : SchedWriteRes<[PdEX01]> {
472 let ResourceCycles = [17];
473 let NumMicroOps = 11;
475 def : InstRW<[PdWriteCRC32r64r64], (instrs CRC32r64r64)>;
477 defm : PdWriteResExPair<WriteCMOV, [PdEX01]>; // Conditional move.
479 def PdWriteCMOVm : SchedWriteRes<[PdLoad, PdEX01]> {
481 let ResourceCycles = [3, 3];
485 def PdWriteCMOVmVar : SchedWriteVariant<[
486 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_BE">>, [PdWriteCMOVm]>,
487 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_A">>, [PdWriteCMOVm]>,
488 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_L">>, [PdWriteCMOVm]>,
489 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_GE">>, [PdWriteCMOVm]>,
490 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_LE">>, [PdWriteCMOVm]>,
491 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_G">>, [PdWriteCMOVm]>,
492 SchedVar<NoSchedPred, [WriteCMOV.Folded]>
495 def : InstRW<[PdWriteCMOVmVar], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
497 defm : PdWriteRes<WriteFCMOV, [PdFPU0, PdFPFMA]>; // x87 conditional move.
499 def : WriteRes<WriteSETCC, [PdEX01]>; // Setcc.
500 def : WriteRes<WriteSETCCStore, [PdEX01, PdStore]>;
502 def PdWriteSETGEmSETGmSETLEmSETLm : SchedWriteRes<[PdEX01]> {
503 let ResourceCycles = [2];
507 def PdSETGEmSETGmSETLEmSETLm : SchedWriteVariant<[
508 SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_GE">>, [PdWriteSETGEmSETGmSETLEmSETLm]>,
509 SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_G">>, [PdWriteSETGEmSETGmSETLEmSETLm]>,
510 SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_LE">>, [PdWriteSETGEmSETGmSETLEmSETLm]>,
511 SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_L">>, [PdWriteSETGEmSETGmSETLEmSETLm]>,
512 SchedVar<NoSchedPred, [WriteSETCCStore]>
514 def : InstRW<[PdSETGEmSETGmSETLEmSETLm], (instrs SETCCm)>;
516 defm : PdWriteRes<WriteLAHFSAHF, [PdEX01], 2, [4], 2>;
518 def PdWriteLAHF : SchedWriteRes<[PdEX01]> {
520 let ResourceCycles = [4];
523 def : InstRW<[PdWriteLAHF], (instrs LAHF)>;
525 def PdWriteSAHF : SchedWriteRes<[PdEX01]> {
527 let ResourceCycles = [2];
530 def : InstRW<[PdWriteSAHF], (instrs SAHF)>;
532 defm : PdWriteRes<WriteBitTest, [PdEX01], 1, [2], 1>;
533 defm : PdWriteRes<WriteBitTestImmLd, [PdEX01, PdLoad], 5, [2, 3], 1>;
534 defm : PdWriteRes<WriteBitTestRegLd, [PdEX01, PdLoad], 5, [7, 2], 7>;
535 defm : PdWriteRes<WriteBitTestSet, [PdEX01], 2, [2], 2>;
536 defm : PdWriteRes<WriteBitTestSetImmLd, [PdEX01, PdLoad], 6, [1, 1], 4>;
537 defm : PdWriteRes<WriteBitTestSetRegLd, [PdEX01, PdLoad], 6, [1, 1], 10>;
539 def PdWriteBTSIm : SchedWriteRes<[PdEX01, PdLoad]> {
541 let ResourceCycles = [42, 1];
544 def : SchedAlias<WriteBitTestSetImmRMW, PdWriteBTSIm>;
545 def PdWriteBTSRm : SchedWriteRes<[PdEX01, PdLoad]> {
547 let ResourceCycles = [44, 1];
548 let NumMicroOps = 10;
550 def : SchedAlias<WriteBitTestSetRegRMW, PdWriteBTSRm>;
552 // This is for simple LEAs with one or two input operands.
553 def : WriteRes<WriteLEA, [PdEX01]> { let ResourceCycles = [2]; }
555 // This write is used for slow LEA instructions.
556 def PdWrite3OpsLEA : SchedWriteRes<[PdEX01]> {
558 let ResourceCycles = [2];
561 // On Piledriver, a slow LEA is either a 3Ops LEA (base, index, offset),
562 // or an LEA with a `Scale` value different than 1.
563 def PdSlowLEAPredicate : MCSchedPredicate<
565 // A 3-operand LEA (base, index, offset).
566 IsThreeOperandsLEAFn,
567 // An LEA with a "Scale" different than 1.
569 CheckIsImmOperand<2>,
570 CheckNot<CheckImmOperand<2, 1>>
575 def PdWriteLEA : SchedWriteVariant<[
576 SchedVar<PdSlowLEAPredicate, [PdWrite3OpsLEA]>,
577 SchedVar<NoSchedPred, [WriteLEA]>
580 def : InstRW<[PdWriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>;
582 def PdWriteLEA16r : SchedWriteRes<[PdEX01]> {
583 let ResourceCycles = [3];
586 def : InstRW<[PdWriteLEA16r], (instrs LEA16r)>;
589 defm : PdWriteResExPair<WriteBSF, [PdEX01], 3, [6], 6, 2>;
590 defm : PdWriteResExPair<WriteBSR, [PdEX01], 4, [8], 7, 2>;
591 defm : PdWriteResExPair<WritePOPCNT, [PdEX01], 4, [4]>;
592 defm : PdWriteResExPair<WriteLZCNT, [PdEX0], 2, [2], 2>;
593 defm : PdWriteResExPair<WriteTZCNT, [PdEX0], 2, [2], 2>;
595 // BMI1 BEXTR, BMI2 BZHI
596 defm : PdWriteResExPair<WriteBEXTR, [PdEX01], 2, [2], 2>;
597 defm : PdWriteResExPair<WriteBLS, [PdEX01], 2, [2], 2>;
598 defm : PdWriteResExPair<WriteBZHI, [PdEX01]>;
600 def PdWriteBEXTRI : SchedWriteRes<[PdEX01]> {
602 let ResourceCycles = [4];
605 def : InstRW<[PdWriteBEXTRI], (instrs BEXTRI32ri, BEXTRI64ri)>;
607 def PdWriteBEXTRIm : SchedWriteRes<[PdEX01]> {
609 let ResourceCycles = [5];
612 def : InstRW<[PdWriteBEXTRIm], (instrs BEXTRI32mi, BEXTRI64mi)>;
614 ////////////////////////////////////////////////////////////////////////////////
615 // Integer shifts and rotates.
616 ////////////////////////////////////////////////////////////////////////////////
618 defm : PdWriteResExPair<WriteShift, [PdEX01], 1, [2]>;
619 defm : PdWriteResExPair<WriteShiftCL, [PdEX01]>;
620 defm : PdWriteResExPair<WriteRotate, [PdEX01], 1, [2]>;
621 defm : PdWriteResExPair<WriteRotateCL, [PdEX01]>;
623 def PdWriteRCL8rCL : SchedWriteRes<[PdEX01]> {
625 let ResourceCycles = [24];
626 let NumMicroOps = 26;
628 def : InstRW<[PdWriteRCL8rCL], (instrs RCL8rCL)>;
630 def PdWriteRCR8ri : SchedWriteRes<[PdEX01]> {
632 let ResourceCycles = [23];
633 let NumMicroOps = 23;
635 def : InstRW<[PdWriteRCR8ri], (instrs RCR8ri)>;
637 def PdWriteRCR8rCL : SchedWriteRes<[PdEX01]> {
639 let ResourceCycles = [22];
640 let NumMicroOps = 24;
642 def : InstRW<[PdWriteRCR8rCL], (instrs RCR8rCL)>;
644 def PdWriteRCL16rCL : SchedWriteRes<[PdEX01]> {
646 let ResourceCycles = [20];
647 let NumMicroOps = 22;
649 def : InstRW<[PdWriteRCL16rCL], (instrs RCL16rCL)>;
651 def PdWriteRCR16ri : SchedWriteRes<[PdEX01]> {
653 let ResourceCycles = [19];
654 let NumMicroOps = 19;
656 def : InstRW<[PdWriteRCR16ri], (instrs RCR16ri)>;
658 def PdWriteRCL3264rCL : SchedWriteRes<[PdEX01]> {
660 let ResourceCycles = [14];
661 let NumMicroOps = 17;
663 def : InstRW<[PdWriteRCL3264rCL], (instrs RCL32rCL, RCL64rCL)>;
665 def PdWriteRCR3264rCL : SchedWriteRes<[PdEX01]> {
667 let ResourceCycles = [13];
668 let NumMicroOps = 16;
670 def : InstRW<[PdWriteRCR3264rCL], (instrs RCR32rCL, RCR64rCL)>;
672 def PdWriteRCR32riRCR64ri : SchedWriteRes<[PdEX01]> {
674 let ResourceCycles = [14];
675 let NumMicroOps = 15;
677 def : InstRW<[PdWriteRCR32riRCR64ri], (instrs RCR32ri, RCR64ri)>;
680 def PdWriteRCR16rCL : SchedWriteRes<[PdEX01]> {
682 let ResourceCycles = [18];
683 let NumMicroOps = 20;
685 def : InstRW<[PdWriteRCR16rCL], (instrs RCR16rCL)>;
687 def PdWriteRCL16ri : SchedWriteRes<[PdEX01]> {
689 let ResourceCycles = [21];
690 let NumMicroOps = 21;
692 def : InstRW<[PdWriteRCL16ri], (instrs RCL16ri)>;
694 def PdWriteRCL3264ri : SchedWriteRes<[PdEX01]> {
696 let ResourceCycles = [15];
697 let NumMicroOps = 16;
699 def : InstRW<[PdWriteRCL3264ri], (instrs RCL32ri, RCL64ri)>;
701 def PdWriteRCL8ri : SchedWriteRes<[PdEX01]> {
703 let ResourceCycles = [25];
704 let NumMicroOps = 25;
706 def : InstRW<[PdWriteRCL8ri], (instrs RCL8ri)>;
709 defm : PdWriteRes<WriteSHDrri, [PdEX01], 3, [6], 6>;
710 defm : PdWriteRes<WriteSHDrrcl, [PdEX01], 3, [8], 7>;
712 def PdWriteSHLD32rri8SHRD16rri8 : SchedWriteRes<[PdEX01]> {
714 let ResourceCycles = [6];
717 def : InstRW<[PdWriteSHLD32rri8SHRD16rri8 ], (instrs SHLD32rri8, SHRD16rri8)>;
719 def PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL : SchedWriteRes<[PdEX01]> {
721 let ResourceCycles = [6];
724 def : InstRW<[PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL], (instrs SHLD16rrCL,
728 defm : PdWriteRes<WriteSHDmri, [PdLoad, PdEX01], 4, [1, 22], 8>;
729 defm : PdWriteRes<WriteSHDmrcl, [PdLoad, PdEX01], 4, [1, 22], 8>;
731 ////////////////////////////////////////////////////////////////////////////////
732 // Floating point. This covers both scalar and vector operations.
733 ////////////////////////////////////////////////////////////////////////////////
735 defm : PdWriteRes<WriteFLD0, [PdFPU1, PdFPSTO], 3>;
736 defm : PdWriteRes<WriteFLD1, [PdFPU1, PdFPSTO], 3>;
737 defm : PdWriteRes<WriteFLDC, [PdFPU1, PdFPSTO], 3>;
739 defm : PdWriteRes<WriteFLoad, [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3]>;
740 defm : PdWriteRes<WriteFLoadX, [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3]>;
741 defm : PdWriteRes<WriteFLoadY, [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3], 2>;
743 defm : PdWriteRes<WriteFMaskedLoad, [PdLoad, PdFPU01, PdFPFMA], 6, [3, 1, 4]>;
744 defm : PdWriteRes<WriteFMaskedLoadY, [PdLoad, PdFPU01, PdFPFMA], 6, [3, 2, 4], 2>;
746 defm : PdWriteRes<WriteFStore, [PdStore, PdFPU23, PdFPSTO], 2, [1, 3, 1]>;
747 defm : PdWriteRes<WriteFStoreX, [PdStore, PdFPU23, PdFPSTO], 1, [1, 3, 1]>;
748 defm : PdWriteRes<WriteFStoreY, [PdStore, PdFPU23, PdFPSTO], 1, [1, 36, 2], 4>;
750 def PdWriteMOVHPm : SchedWriteRes<[PdStore, PdFPU23, PdFPSTO]> {
752 let ResourceCycles = [1, 3, 1];
755 def : InstRW<[PdWriteMOVHPm], (instrs MOVHPDmr, MOVHPSmr, VMOVHPDmr, VMOVHPSmr)>;
757 def PdWriteVMOVUPDYmrVMOVUPSYmr : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> {
760 def : InstRW<[PdWriteVMOVUPDYmrVMOVUPSYmr], (instrs VMOVUPDYmr, VMOVUPSYmr)>;
762 defm : PdWriteRes<WriteFStoreNT, [PdStore, PdFPU1, PdFPSTO], 3>;
763 defm : PdWriteRes<WriteFStoreNTX, [PdStore, PdFPU1, PdFPSTO], 3>;
764 defm : PdWriteRes<WriteFStoreNTY, [PdStore, PdFPU1, PdFPSTO], 3, [2, 2, 2], 4>;
766 defm : PdWriteRes<WriteFMaskedStore32, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>;
767 defm : PdWriteRes<WriteFMaskedStore64, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>;
768 defm : PdWriteRes<WriteFMaskedStore32Y, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>;
769 defm : PdWriteRes<WriteFMaskedStore64Y, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>;
771 defm : PdWriteRes<WriteFMove, [PdFPU01, PdFPFMA]>;
772 defm : PdWriteRes<WriteFMoveX, [PdFPU01, PdFPFMA], 1, [1, 2]>;
773 defm : PdWriteRes<WriteFMoveY, [PdFPU01, PdFPFMA], 2, [2, 2], 2>;
775 defm : PdWriteRes<WriteEMMS, [PdFPU01, PdFPFMA], 2>;
777 defm : PdWriteResXMMPair<WriteFAdd, [PdFPU0, PdFPFMA], 5>;
778 defm : PdWriteResXMMPair<WriteFAddX, [PdFPU0, PdFPFMA], 5>;
779 defm : PdWriteResYMMPair<WriteFAddY, [PdFPU0, PdFPFMA], 5, [1, 2]>;
780 defm : X86WriteResPairUnsupported<WriteFAddZ>;
782 def PdWriteX87Add: SchedWriteRes<[PdLoad, PdFPU0, PdFPFMA]> {
784 let ResourceCycles = [3, 1, 10];
786 def : InstRW<[PdWriteX87Add], (instrs ADD_FI16m, ADD_FI32m, ADD_F32m, ADD_F64m,
787 SUB_FI16m, SUB_FI32m, SUB_F32m, SUB_F64m,
788 SUBR_FI16m, SUBR_FI32m, SUBR_F32m, SUBR_F64m)>;
790 defm : PdWriteResXMMPair<WriteFAdd64, [PdFPU0, PdFPFMA], 5>;
791 defm : PdWriteResXMMPair<WriteFAdd64X, [PdFPU0, PdFPFMA], 5>;
792 defm : PdWriteResYMMPair<WriteFAdd64Y, [PdFPU0, PdFPFMA], 5, [1, 2]>;
793 defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
795 defm : PdWriteResXMMPair<WriteFCmp, [PdFPU0, PdFPFMA], 2>;
796 defm : PdWriteResXMMPair<WriteFCmpX, [PdFPU0, PdFPFMA], 2>;
797 defm : PdWriteResYMMPair<WriteFCmpY, [PdFPU0, PdFPFMA], 2, [1, 2]>;
798 defm : X86WriteResPairUnsupported<WriteFCmpZ>;
800 defm : PdWriteResXMMPair<WriteFCmp64, [PdFPU0, PdFPFMA], 2>;
801 defm : PdWriteResXMMPair<WriteFCmp64X, [PdFPU0, PdFPFMA], 2>;
802 defm : PdWriteResYMMPair<WriteFCmp64Y, [PdFPU0, PdFPFMA], 2, [1, 2]>;
803 defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
805 defm : PdWriteResXMMPair<WriteFCom, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>;
806 defm : PdWriteResXMMPair<WriteFComX, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>;
808 def PdWriteFCOMPm : SchedWriteRes<[PdFPU1, PdFPFMA]> {
811 def : InstRW<[PdWriteFCOMPm], (instrs FCOM32m, FCOM64m, FCOMP32m, FCOMP64m)>;
813 def PdWriteTST_F_UCOM_FPPr : SchedWriteRes<[PdFPU1, PdFPFMA]>;
814 def : InstRW<[PdWriteTST_F_UCOM_FPPr], (instrs TST_F, UCOM_FPPr)>;
816 defm : PdWriteResXMMPair<WriteFMul, [PdFPU1, PdFPFMA], 5>;
817 defm : PdWriteResXMMPair<WriteFMulX, [PdFPU1, PdFPFMA], 5>;
818 defm : PdWriteResYMMPair<WriteFMulY, [PdFPU1, PdFPFMA], 5, [1, 2]>;
819 defm : X86WriteResPairUnsupported<WriteFMulZ>;
821 def PdWriteX87Mul: SchedWriteRes<[PdLoad, PdFPU1, PdFPFMA]> {
823 let ResourceCycles = [3, 1, 10];
825 def : InstRW<[PdWriteX87Mul], (instrs MUL_FI16m, MUL_FI32m, MUL_F32m, MUL_F64m)>;
827 defm : PdWriteResXMMPair<WriteFMul64, [PdFPU1, PdFPFMA], 5>;
828 defm : PdWriteResXMMPair<WriteFMul64X, [PdFPU1, PdFPFMA], 5>;
829 defm : PdWriteResYMMPair<WriteFMul64Y, [PdFPU1, PdFPFMA], 5, [1, 2]>;
830 defm : X86WriteResPairUnsupported<WriteFMul64Z>;
832 defm : PdWriteResXMMPair<WriteFMA, [PdFPU, PdFPFMA], 5, [1, 3]>;
833 defm : PdWriteResXMMPair<WriteFMAX, [PdFPU, PdFPFMA], 5, [1, 3]>;
834 defm : PdWriteResYMMPair<WriteFMAY, [PdFPU, PdFPFMA], 5, [1, 3]>;
835 defm : X86WriteResPairUnsupported<WriteFMAZ>;
838 defm : PdWriteResXMMPair<WriteDPPD, [PdFPU1, PdFPFMA], 15, [1, 10], 15, 2>;
840 defm : PdWriteResXMMPair<WriteDPPS, [PdFPU1, PdFPFMA], 25, [1, 14], 16, 2>;
841 defm : PdWriteResYMMPair<WriteDPPSY, [PdFPU1, PdFPFMA], 27, [2, 25], /*or 29*/ 25, 4>;
842 defm : X86WriteResPairUnsupported<WriteDPPSZ>;
844 def PdWriteVDPPSrri : SchedWriteRes<[PdFPU1, PdFPFMA]> {
846 let ResourceCycles = [1, 14];
847 let NumMicroOps = 17;
849 def : InstRW<[PdWriteVDPPSrri], (instrs VDPPSrri)>;
851 defm : PdWriteResXMMPair<WriteFRcp, [PdFPU1, PdFPFMA], 5>;
852 defm : PdWriteResXMMPair<WriteFRcpX, [PdFPU1, PdFPFMA], 5>;
853 defm : PdWriteResYMMPair<WriteFRcpY, [PdFPU1, PdFPFMA], 5, [2, 1]>;
854 defm : X86WriteResPairUnsupported<WriteFRcpZ>;
856 defm : PdWriteResXMMPair<WriteFRsqrt, [PdFPU1, PdFPFMA], 5, [1, 2]>;
857 defm : PdWriteResXMMPair<WriteFRsqrtX, [PdFPU1, PdFPFMA], 5>;
858 defm : PdWriteResYMMPair<WriteFRsqrtY, [PdFPU1, PdFPFMA], 5, [2, 2]>;
859 defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
861 defm : PdWriteResXMMPair<WriteFDiv, [PdFPU1, PdFPFMA], 9, [1, 9]>;
862 defm : PdWriteResXMMPair<WriteFDivX, [PdFPU1, PdFPFMA], 9, [1, 9]>;
863 defm : PdWriteResYMMPair<WriteFDivY, [PdFPU1, PdFPFMA], 9, [2, 18]>;
864 defm : X86WriteResPairUnsupported<WriteFDivZ>;
866 def PdWriteX87Div: SchedWriteRes<[PdLoad, PdFPU0, PdFPFMA]> {
868 let ResourceCycles = [3, 1, 18];
870 def : InstRW<[PdWriteX87Div], (instrs DIV_FI16m, DIV_FI32m,
871 DIVR_FI16m, DIVR_FI32m,
873 DIVR_F32m, DIVR_F64m)>;
875 defm : PdWriteResXMMPair<WriteFDiv64, [PdFPU1, PdFPFMA], 9, [1, 9]>;
876 defm : PdWriteResXMMPair<WriteFDiv64X, [PdFPU1, PdFPFMA], 9, [1, 9]>;
877 defm : PdWriteResYMMPair<WriteFDiv64Y, [PdFPU1, PdFPFMA], 9, [2, 18]>;
878 defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
880 defm : PdWriteResXMMPair<WriteFSqrt, [PdFPU1, PdFPFMA], 9, [1, 9]>;
881 defm : PdWriteResXMMPair<WriteFSqrtX, [PdFPU1, PdFPFMA], 9, [1, 9]>;
882 defm : PdWriteResYMMPair<WriteFSqrtY, [PdFPU1, PdFPFMA], 9, [2, 18]>;
883 defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
885 defm : PdWriteResXMMPair<WriteFSqrt64, [PdFPU1, PdFPFMA], 9, [1, 9]>;
886 defm : PdWriteResXMMPair<WriteFSqrt64X, [PdFPU1, PdFPFMA], 9, [1, 9]>;
887 defm : PdWriteResYMMPair<WriteFSqrt64Y, [PdFPU1, PdFPFMA], 9, [2, 18]>;
888 defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
890 defm : PdWriteResXMMPair<WriteFSqrt80, [PdFPU1, PdFPFMA], 1, [1, 18]>;
891 defm : PdWriteResXMMPair<WriteFSign, [PdFPU1, PdFPFMA], 1, [1, 4]>;
893 defm : PdWriteResXMMPair<WriteFRnd, [PdFPU1, PdFPSTO], 4, []>;
894 defm : PdWriteResYMMPair<WriteFRndY, [PdFPU1, PdFPSTO], 4, [2, 1], 2>;
895 defm : X86WriteResPairUnsupported<WriteFRndZ>;
897 def PdWriteVFRCZP : SchedWriteRes<[PdFPU1, PdFPSTO]> {
899 let ResourceCycles = [2, 1];
902 def : InstRW<[PdWriteVFRCZP], (instrs VFRCZPDrr, VFRCZPSrr)>;
904 def PdWriteVFRCZS : SchedWriteRes<[PdFPU1, PdFPSTO]> {
906 let ResourceCycles = [10, 1];
909 def : InstRW<[PdWriteVFRCZS], (instrs VFRCZSDrr, VFRCZSSrr)>;
911 def PdWriteVFRCZm : SchedWriteRes<[PdFPU1, PdFPSTO]> {
913 let ResourceCycles = [2, 1];
916 def : InstRW<[PdWriteVFRCZm], (instrs VFRCZPDrm, VFRCZPSrm,
917 VFRCZSDrm, VFRCZSSrm)>;
919 def PdWriteVFRCZY : SchedWriteRes<[PdFPU1, PdFPSTO]> {
921 let ResourceCycles = [3, 1];
924 def : InstRW<[PdWriteVFRCZY], (instrs VFRCZPSYrr, VFRCZPDYrr)>;
926 def PdWriteVFRCZYm : SchedWriteRes<[PdFPU1, PdFPSTO]> {
928 let ResourceCycles = [4, 1];
931 def : InstRW<[PdWriteVFRCZYm], (instrs VFRCZPSYrm, VFRCZPDYrm)>;
933 defm : PdWriteResXMMPair<WriteFLogic, [PdFPU01, PdFPFMA], 2, [1, 2]>;
934 defm : PdWriteResYMMPair<WriteFLogicY, [PdFPU01, PdFPFMA], 2, [2, 2]>;
935 defm : X86WriteResPairUnsupported<WriteFLogicZ>;
937 defm : PdWriteResXMMPair<WriteFTest, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>;
938 defm : PdWriteResYMMPair<WriteFTestY, [PdFPU01, PdFPFMA, PdEX0], 1, [4, 4, 1], 4, 2>;
939 defm : X86WriteResPairUnsupported<WriteFTestZ>;
941 defm : PdWriteResXMMPair<WriteFShuffle, [PdFPU01, PdFPFMA], 2, [1, 2]>;
942 defm : PdWriteResYMMPair<WriteFShuffleY, [PdFPU01, PdFPFMA], 2, [2, 4], 2>;
943 defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
945 def PdWriteVBROADCASTF128 : SchedWriteRes<[PdFPU01, PdFPFMA]> {
947 let ResourceCycles = [1, 3];
950 def : InstRW<[PdWriteVBROADCASTF128], (instrs VBROADCASTF128)>;
952 defm : PdWriteResXMMPair<WriteFVarShuffle, [PdFPU01, PdFPFMA], 3, [1, 2]>;
953 defm : PdWriteResYMMPair<WriteFVarShuffleY, [PdFPU01, PdFPFMA], 3, [2, 4], 2>;
954 defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
956 defm : PdWriteResXMMPair<WriteFBlend, [PdFPU01, PdFPFMA], 2, [1, 3]>;
957 defm : PdWriteResYMMPair<WriteFBlendY, [PdFPU01, PdFPFMA], 2, [2, 3], 2>;
958 defm : X86WriteResPairUnsupported<WriteFBlendZ>;
960 defm : PdWriteResXMMPair<WriteFVarBlend, [PdFPU01, PdFPFMA], 2, [1, 3]>;
961 defm : PdWriteResYMMPair<WriteFVarBlendY, [PdFPU01, PdFPFMA], 2, [2, 4], 2>;
962 defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
964 defm : PdWriteResXMMPair<WriteFShuffle256, [PdFPU01, PdFPFMA], 2, [1, 3], 2>;
965 defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
967 def PdWriteVEXTRACTF128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> {
969 let ResourceCycles = [1, 2];
971 def : InstRW<[PdWriteVEXTRACTF128rr], (instrs VEXTRACTF128rr)>;
973 def PdWriteVEXTRACTF128mr : SchedWriteRes<[PdFPU01, PdFPFMA]> {
975 let ResourceCycles = [1, 4];
978 def : InstRW<[PdWriteVEXTRACTF128mr], (instrs VEXTRACTF128mr)>;
980 def PdWriteVPERM2F128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> {
982 let ResourceCycles = [1, 6];
985 def : InstRW<[PdWriteVPERM2F128rr], (instrs VPERM2F128rr)>;
987 def PdWriteVPERM2F128rm : SchedWriteRes<[PdFPU01, PdFPFMA]> {
988 let Latency = 8; // 4 + 4
989 let ResourceCycles = [1, 8];
990 let NumMicroOps = 10;
992 def : InstRW<[PdWriteVPERM2F128rm], (instrs VPERM2F128rm)>;
994 ////////////////////////////////////////////////////////////////////////////////
996 ////////////////////////////////////////////////////////////////////////////////
998 defm : PdWriteResXMMPair<WriteCvtSS2I, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>;
1000 defm : PdWriteResXMMPair<WriteCvtPS2I, [PdFPU0, PdFPCVT, PdFPSTO], 4>;
1001 defm : PdWriteResYMMPair<WriteCvtPS2IY, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>;
1002 defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
1004 defm : PdWriteResXMMPair<WriteCvtSD2I, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>;
1006 defm : PdWriteResXMMPair<WriteCvtPD2I, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>;
1007 defm : PdWriteResYMMPair<WriteCvtPD2IY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>;
1008 defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
1010 def PdWriteMMX_CVTTPD2PIirr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> {
1012 let NumMicroOps = 2;
1014 def : InstRW<[PdWriteMMX_CVTTPD2PIirr], (instrs MMX_CVTTPD2PIirr)>;
1016 // FIXME: f+3 ST, LD+STC latency
1017 defm : PdWriteResXMMPair<WriteCvtI2SS, [PdFPU0, PdFPCVT, PdFPSTO], 4, [], 2>;
1018 // FIXME: .Folded version is one NumMicroOp *less*..
1020 defm : PdWriteResXMMPair<WriteCvtI2PS, [PdFPU0, PdFPCVT, PdFPSTO], 4>;
1021 defm : PdWriteResYMMPair<WriteCvtI2PSY, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>;
1022 defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
1024 defm : PdWriteResXMMPair<WriteCvtI2SD, [PdFPU0, PdFPCVT, PdFPSTO], 4, [], 2>;
1025 // FIXME: .Folded version is one NumMicroOp *less*..
1027 def PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> {
1029 let ResourceCycles = [1, 3, 1];
1030 let NumMicroOps = 2;
1032 def : InstRW<[PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr], (instrs CVTSI642SDrr, CVTSI642SSrr, CVTSI2SDrr, CVTSI2SSrr)>;
1034 defm : PdWriteResXMMPair<WriteCvtI2PD, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>;
1035 defm : PdWriteResYMMPair<WriteCvtI2PDY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 1>;
1036 defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
1038 defm : PdWriteResXMMPair<WriteCvtSS2SD, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>;
1040 defm : PdWriteResXMMPair<WriteCvtPS2PD, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>;
1041 defm : PdWriteResYMMPair<WriteCvtPS2PDY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 1>;
1042 defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
1044 defm : PdWriteResXMMPair<WriteCvtSD2SS, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>;
1046 defm : PdWriteResXMMPair<WriteCvtPD2PS, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>;
1047 defm : PdWriteResYMMPair<WriteCvtPD2PSY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>;
1048 defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
1050 def PdWriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> {
1052 let NumMicroOps = 2;
1054 def : InstRW<[PdWriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr], (instrs MMX_CVTPD2PIirr,
1057 def PdWriteMMX_CVTPI2PSirr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> {
1059 let NumMicroOps = 2;
1061 def : InstRW<[PdWriteMMX_CVTPI2PSirr], (instrs MMX_CVTPI2PSirr)>;
1063 defm : PdWriteResXMMPair<WriteCvtPH2PS, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 2, 1>;
1064 defm : PdWriteResYMMPair<WriteCvtPH2PSY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 3>;
1065 defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
1067 defm : PdWriteRes<WriteCvtPS2PH, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 2>;
1068 defm : PdWriteRes<WriteCvtPS2PHY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>;
1069 defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
1071 defm : PdWriteRes<WriteCvtPS2PHSt, [PdFPU0, PdFPCVT, PdFPSTO, PdStore], 4, [1, 2, 1, 1], 3>;
1072 defm : PdWriteRes<WriteCvtPS2PHYSt, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdStore], 4, [1, 2, 1, 1, 1], 4>;
1073 defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
1075 ////////////////////////////////////////////////////////////////////////////////
1076 // Vector integer operations.
1077 ////////////////////////////////////////////////////////////////////////////////
1079 defm : PdWriteRes<WriteVecLoad, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 3]>;
1080 defm : PdWriteRes<WriteVecLoadX, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 3]>;
1081 defm : PdWriteRes<WriteVecLoadY, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 2, 3], 2>;
1083 defm : PdWriteRes<WriteVecLoadNT, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 4]>;
1084 defm : PdWriteRes<WriteVecLoadNTY, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 2, 4]>;
1086 defm : PdWriteRes<WriteVecMaskedLoad, [PdLoad, PdFPU01, PdFPMAL], 6, [3, 1, 2]>;
1087 defm : PdWriteRes<WriteVecMaskedLoadY, [PdLoad, PdFPU01, PdFPMAL], 6, [3, 2, 4], 2>;
1089 defm : PdWriteRes<WriteVecStore, [PdStore, PdFPU23, PdFPSTO], 2, [1, 3, 1]>;
1090 defm : PdWriteRes<WriteVecStoreX, [PdStore, PdFPU23, PdFPSTO], 1, [1, 3, 1]>;
1091 defm : PdWriteRes<WriteVecStoreY, [PdStore, PdFPU23, PdFPSTO], 1, [2, 36, 2], 4>;
1093 def PdWriteVMOVDQUYmr : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> {
1094 let NumMicroOps = 8;
1096 def : InstRW<[PdWriteVMOVDQUYmr], (instrs VMOVDQUYmr)>;
1098 defm : PdWriteRes<WriteVecStoreNT, [PdStore, PdFPU1, PdFPSTO], 2>;
1099 defm : PdWriteRes<WriteVecStoreNTY, [PdStore, PdFPU1, PdFPSTO], 2, [2, 2, 2], 4>;
1101 defm : X86WriteResUnsupported<WriteVecMaskedStore32>;
1102 defm : X86WriteResUnsupported<WriteVecMaskedStore32Y>;
1103 defm : X86WriteResUnsupported<WriteVecMaskedStore64>;
1104 defm : X86WriteResUnsupported<WriteVecMaskedStore64Y>;
1106 defm : PdWriteRes<WriteVecMove, [PdFPU01, PdFPMAL], 2>;
1107 defm : PdWriteRes<WriteVecMoveX, [PdFPU01, PdFPMAL], 1, [1, 2]>;
1108 defm : PdWriteRes<WriteVecMoveY, [PdFPU01, PdFPMAL], 2, [2, 2], 2>;
1110 def PdWriteMOVDQArr : SchedWriteRes<[PdFPU01, PdFPMAL]> {
1112 def : InstRW<[PdWriteMOVDQArr], (instrs MOVDQArr)>;
1114 def PdWriteMOVQ2DQrr : SchedWriteRes<[PdFPU01, PdFPMAL]> {
1117 def : InstRW<[PdWriteMOVQ2DQrr], (instrs MMX_MOVQ2DQrr)>;
1119 defm : PdWriteRes<WriteVecMoveToGpr, [PdFPU0, PdFPFMA, PdEX0], 11>;
1120 defm : PdWriteRes<WriteVecMoveFromGpr, [PdFPU01, PdFPFMA], 11, [1, 2], 2>;
1122 defm : PdWriteResXMMPair<WriteVecALU, [PdFPU01, PdFPMAL], 2>;
1123 defm : PdWriteResXMMPair<WriteVecALUX, [PdFPU01, PdFPMAL], 2, [1, 2]>;
1124 defm : X86WriteResPairUnsupported<WriteVecALUY>;
1125 defm : X86WriteResPairUnsupported<WriteVecALUZ>;
1127 defm : PdWriteResXMMPair<WriteVecShift, [PdFPU01, PdFPMAL], 3, [1, 2]>;
1128 defm : PdWriteResXMMPair<WriteVecShiftX, [PdFPU01, PdFPMAL], 3, [1, 2]>;
1129 defm : X86WriteResPairUnsupported<WriteVecShiftY>;
1130 defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
1132 defm : PdWriteResXMMPair<WriteVecShiftImm, [PdFPU01, PdFPMAL], 2, [1, 2]>;
1133 defm : PdWriteResXMMPair<WriteVecShiftImmX, [PdFPU01, PdFPMAL], 2, [1, 2]>;
1134 defm : X86WriteResPairUnsupported<WriteVecShiftImmY>;
1135 defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
1137 defm : PdWriteResXMMPair<WriteVecIMul, [PdFPU0, PdFPMMA], 4>;
1138 defm : PdWriteResXMMPair<WriteVecIMulX, [PdFPU0, PdFPMMA], 4>;
1139 defm : X86WriteResPairUnsupported<WriteVecIMulY>;
1140 defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
1142 defm : PdWriteResXMMPair<WritePMULLD, [PdFPU0, PdFPU01, PdFPMMA, PdFPMAL], 5, [2, 1, 2, 1]>;
1143 defm : X86WriteResPairUnsupported<WritePMULLDY>;
1144 defm : X86WriteResPairUnsupported<WritePMULLDZ>;
1146 def PdWriteVPMACS : SchedWriteRes<[PdFPU0, PdFPMMA, PdFPMAL]> {
1149 def : InstRW<[PdWriteVPMACS], (instrs VPMACSDQHrr, VPMACSDQLrr, VPMACSSDQHrr,
1152 defm : PdWriteResXMMPair<WriteMPSAD, [PdFPU0, PdFPMMA], 9, [1, 4], 8>;
1153 defm : X86WriteResPairUnsupported<WriteMPSADY>;
1154 defm : X86WriteResPairUnsupported<WriteMPSADZ>;
1156 def PdWriteVMPSADBW : SchedWriteRes<[PdFPU0, PdFPMMA]> {
1158 let ResourceCycles = [1, 4];
1159 let NumMicroOps = 10;
1161 def : InstRW<[PdWriteVMPSADBW], (instrs VMPSADBWrri)>;
1163 defm : PdWriteResXMMPair<WritePSADBW, [PdFPU01, PdFPMAL], 4, [1, 2], 2>;
1164 defm : PdWriteResXMMPair<WritePSADBWX, [PdFPU01, PdFPMAL], 4, [1, 2], 2>;
1165 defm : X86WriteResPairUnsupported<WritePSADBWY>;
1166 defm : X86WriteResPairUnsupported<WritePSADBWZ>;
1168 defm : PdWriteResXMMPair<WritePHMINPOS, [PdFPU0, PdFPMAL], 4, [], 2>;
1170 defm : PdWriteResXMMPair<WriteShuffle, [PdFPU01, PdFPMAL], 2, [1, 2]>;
1171 defm : PdWriteResXMMPair<WriteShuffleX, [PdFPU01, PdFPMAL], 2, [1, 2]>;
1172 defm : PdWriteResYMMPair<WriteShuffleY, [PdFPU01, PdFPMAL], 2, [1, 4]>;
1173 defm : X86WriteResPairUnsupported<WriteShuffleZ>;
1175 defm : PdWriteResXMMPair<WriteVarShuffle, [PdFPU01, PdFPMAL], 3, [1, 2]>;
1176 defm : PdWriteResXMMPair<WriteVarShuffleX, [PdFPU01, PdFPMAL], 3, [1, 3]>;
1177 defm : X86WriteResPairUnsupported<WriteVarShuffleY>;
1178 defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
1180 def PdWriteVPPERM : SchedWriteRes<[PdFPU01, PdFPMAL]> {
1182 let ResourceCycles = [1, 3];
1184 def : InstRW<[PdWriteVPPERM], (instrs VPPERMrrr, VPPERMrrr_REV)>;
1186 defm : PdWriteResXMMPair<WriteBlend, [PdFPU01, PdFPMAL], 2>;
1187 defm : X86WriteResPairUnsupported<WriteBlendY>;
1188 defm : X86WriteResPairUnsupported<WriteBlendZ>;
1190 defm : PdWriteResXMMPair<WriteVarBlend, [PdFPU01, PdFPMAL], 2, [1, 2]>;
1191 defm : X86WriteResPairUnsupported<WriteVarBlendY>;
1192 defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
1194 defm : PdWriteResXMMPair<WriteVecLogic, [PdFPU01, PdFPMAL], 2>;
1195 defm : PdWriteResXMMPair<WriteVecLogicX, [PdFPU01, PdFPMAL], 2, [1, 2]>;
1196 defm : X86WriteResPairUnsupported<WriteVecLogicY>;
1197 defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
1199 defm : PdWriteResXMMPair<WriteVecTest, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>;
1200 defm : PdWriteResYMMPair<WriteVecTestY, [PdFPU01, PdFPFMA, PdEX0], 1, [2, 4, 1], 4, 2>;
1201 defm : X86WriteResPairUnsupported<WriteVecTestZ>;
1203 defm : PdWriteResXMMPair<WriteShuffle256, [PdFPU01, PdFPMAL]>;
1204 defm : PdWriteResXMMPair<WriteVPMOV256, [PdFPU01, PdFPMAL]>;
1205 defm : PdWriteResXMMPair<WriteVarShuffle256, [PdFPU01, PdFPMAL]>;
1207 defm : PdWriteResXMMPair<WriteVarVecShift, [PdFPU01, PdFPMAL], 3, [1, 2]>;
1208 defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
1209 defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
1211 ////////////////////////////////////////////////////////////////////////////////
1212 // Vector insert/extract operations.
1213 ////////////////////////////////////////////////////////////////////////////////
1215 defm : PdWriteRes<WriteVecInsert, [PdFPU01, PdFPMAL], 2, [1, 3], 2>;
1216 defm : PdWriteRes<WriteVecInsertLd, [PdFPU01, PdFPMAL, PdLoad], 6, [1, 4, 3], 2>;
1218 defm : PdWriteRes<WriteVecExtract, [PdFPU0, PdFPFMA, PdEX0], 12, [1, 3, 1], 2>;
1219 defm : PdWriteRes<WriteVecExtractSt, [PdFPU1, PdFPSTO, PdStore], 13, [2, 1, 1], 2>;
1221 def PdWriteEXTRQ : SchedWriteRes<[PdFPU01, PdFPMAL]> {
1223 let ResourceCycles = [1, 3];
1225 def : InstRW<[PdWriteEXTRQ], (instrs EXTRQ, EXTRQI)>;
1227 ////////////////////////////////////////////////////////////////////////////////
1228 // SSE42 String instructions.
1229 ////////////////////////////////////////////////////////////////////////////////
1231 defm : PdWriteResXMMPair<WritePCmpIStrI, [PdFPU1, PdFPFMA, PdEX0], 11, [1, 6, 1], 7, 1>;
1232 defm : PdWriteResXMMPair<WritePCmpIStrM, [PdFPU1, PdFPFMA, PdEX0], 7, [1, 8, 1], 7, 2>;
1234 defm : PdWriteResXMMPair<WritePCmpEStrI, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 14, [1, 10, 10, 10, 1, 1], 27, 1>;
1235 defm : PdWriteResXMMPair<WritePCmpEStrM, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 10, [1, 10, 10, 10, 1, 1], 27, 1>;
1237 ////////////////////////////////////////////////////////////////////////////////
1238 // MOVMSK Instructions.
1239 ////////////////////////////////////////////////////////////////////////////////
1241 defm : PdWriteRes<WriteFMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 12, [], 2>;
1243 defm : PdWriteRes<WriteVecMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 12, [], 2>;
1244 defm : X86WriteResUnsupported<WriteVecMOVMSKY>;
1245 // defm : X86WriteResUnsupported<WriteVecMOVMSKZ>;
1247 defm : PdWriteRes<WriteMMXMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 10, [], 2>;
1249 ////////////////////////////////////////////////////////////////////////////////
1250 // AES Instructions.
1251 ////////////////////////////////////////////////////////////////////////////////
1253 defm : PdWriteResXMMPair<WriteAESIMC, [PdFPU0, PdFPMMA], 5>;
1254 defm : PdWriteResXMMPair<WriteAESKeyGen, [PdFPU0, PdFPMMA], 5>;
1255 defm : PdWriteResXMMPair<WriteAESDecEnc, [PdFPU0, PdFPMMA], 9, [], 2>;
1257 ////////////////////////////////////////////////////////////////////////////////
1258 // Horizontal add/sub instructions.
1259 ////////////////////////////////////////////////////////////////////////////////
1261 defm : PdWriteResXMMPair<WriteFHAdd, [PdFPU0, PdFPFMA], 11, [1, 5], 3, 1>;
1262 defm : PdWriteResYMMPair<WriteFHAddY, [PdFPU0, PdFPFMA], 11, [1, 8], 8, 2>;
1263 defm : X86WriteResPairUnsupported<WriteFHAddZ>;
1265 defm : PdWriteResXMMPair<WritePHAdd, [PdFPU01, PdFPMAL], 5, [1, 4], 3, 1>;
1266 defm : PdWriteResXMMPair<WritePHAddX, [PdFPU01, PdFPMAL], 2, [1, 2]>;
1267 defm : X86WriteResPairUnsupported<WritePHAddY>;
1268 defm : X86WriteResPairUnsupported<WritePHAddZ>;
1270 def : InstRW<[WritePHAdd], (instrs PHADDDrr, PHSUBDrr,
1272 PHADDSWrr, PHSUBSWrr,
1273 VPHADDDrr, VPHSUBDrr,
1274 VPHADDWrr, VPHSUBWrr,
1275 VPHADDSWrr, VPHSUBSWrr)>;
1277 def : InstRW<[WritePHAdd.Folded], (instrs PHADDDrm, PHSUBDrm,
1279 PHADDSWrm, PHSUBSWrm,
1280 VPHADDDrm, VPHSUBDrm,
1281 VPHADDWrm, VPHSUBWrm,
1282 VPHADDSWrm, VPHSUBSWrm)>;
1284 ////////////////////////////////////////////////////////////////////////////////
1285 // Carry-less multiplication instructions.
1286 ////////////////////////////////////////////////////////////////////////////////
1288 defm : PdWriteResXMMPair<WriteCLMul, [PdFPU0, PdFPMMA], 12, [1, 7], 5, 1>;
1290 def PdWriteVPCLMULQDQrr : SchedWriteRes<[PdFPU0, PdFPMMA]> {
1292 let ResourceCycles = [1, 7];
1293 let NumMicroOps = 6;
1295 def : InstRW<[PdWriteVPCLMULQDQrr], (instrs VPCLMULQDQrr)>;
1297 ////////////////////////////////////////////////////////////////////////////////
1298 // SSE4A instructions.
1299 ////////////////////////////////////////////////////////////////////////////////
1301 def PdWriteINSERTQ : SchedWriteRes<[PdFPU01, PdFPMAL]> {
1303 let ResourceCycles = [1, 2];
1305 def : InstRW<[PdWriteINSERTQ], (instrs INSERTQ)>;
1307 def PdWriteINSERTQI : SchedWriteRes<[PdFPU01, PdFPMAL]> {
1309 let ResourceCycles = [1, 3];
1311 def : InstRW<[PdWriteINSERTQI], (instrs INSERTQI)>;
1313 ////////////////////////////////////////////////////////////////////////////////
1314 // AVX instructions.
1315 ////////////////////////////////////////////////////////////////////////////////
1317 def PdWriteVBROADCASTYLd : SchedWriteRes<[PdLoad, PdFPU01, PdFPFMA]> {
1319 let ResourceCycles = [1, 2, 4];
1320 let NumMicroOps = 2;
1322 def : InstRW<[PdWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm,
1325 def PdWriteVZEROALL : SchedWriteRes<[]> {
1327 let NumMicroOps = 32;
1329 def : InstRW<[PdWriteVZEROALL], (instrs VZEROALL)>;
1331 def PdWriteVZEROUPPER : SchedWriteRes<[]> {
1333 let NumMicroOps = 16;
1335 def : InstRW<[PdWriteVZEROUPPER], (instrs VZEROUPPER)>;
1337 ///////////////////////////////////////////////////////////////////////////////
1338 // SchedWriteVariant definitions.
1339 ///////////////////////////////////////////////////////////////////////////////
1341 def PdWriteZeroLatency : SchedWriteRes<[]> {
1345 def PdWriteZeroIdiom : SchedWriteVariant<[
1346 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
1347 SchedVar<MCSchedPredicate<TruePred>, [WriteALU]>
1349 def : InstRW<[PdWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
1352 def PdWriteFZeroIdiom : SchedWriteVariant<[
1353 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
1354 SchedVar<MCSchedPredicate<TruePred>, [WriteFLogic]>
1356 def : InstRW<[PdWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr,
1358 ANDNPSrr, VANDNPSrr,
1359 ANDNPDrr, VANDNPDrr)>;
1361 // VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr "zero-idioms" have latency of 1.
1363 def PdWriteVZeroIdiomLogic : SchedWriteVariant<[
1364 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
1365 SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogic]>
1367 def : InstRW<[PdWriteVZeroIdiomLogic], (instrs MMX_PXORirr, MMX_PANDNirr)>;
1369 def PdWriteVZeroIdiomLogicX : SchedWriteVariant<[
1370 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
1371 SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogicX]>
1373 def : InstRW<[PdWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr,
1374 PANDNrr, VPANDNrr)>;
1376 def PdWriteVZeroIdiomALU : SchedWriteVariant<[
1377 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
1378 SchedVar<MCSchedPredicate<TruePred>, [WriteVecALU]>
1380 def : InstRW<[PdWriteVZeroIdiomALU], (instrs MMX_PSUBBirr, MMX_PSUBDirr,
1381 MMX_PSUBQirr, MMX_PSUBWirr,
1386 def PdWriteVZeroIdiomALUX : SchedWriteVariant<[
1387 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
1388 SchedVar<MCSchedPredicate<TruePred>, [WriteVecALUX]>
1390 def : InstRW<[PdWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
1394 PCMPGTBrr, VPCMPGTBrr,
1395 PCMPGTDrr, VPCMPGTDrr,
1396 PCMPGTWrr, VPCMPGTWrr)>;
1398 ///////////////////////////////////////////////////////////////////////////////
1399 // Dependency breaking instructions.
1400 ///////////////////////////////////////////////////////////////////////////////
1402 // VPCMPGTQ, but not PCMPGTQ!
1404 def : IsZeroIdiomFunction<[
1406 DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>,
1410 MMX_PXORirr, MMX_PANDNirr, MMX_PSUBBirr,
1411 MMX_PSUBDirr, MMX_PSUBQirr, MMX_PSUBWirr,
1412 MMX_PSUBSBirr, MMX_PSUBSWirr, MMX_PSUBUSBirr, MMX_PSUBUSWirr,
1413 MMX_PCMPGTBirr, MMX_PCMPGTDirr, MMX_PCMPGTWirr
1414 ], ZeroIdiomPredicate>,
1419 XORPSrr, XORPDrr, ANDNPSrr, ANDNPDrr,
1423 PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
1424 PSUBSBrr, PSUBSWrr, PSUBUSBrr, PSUBUSWrr,
1425 PCMPGTBrr, PCMPGTDrr, PCMPGTWrr
1426 ], ZeroIdiomPredicate>,
1431 VXORPSrr, VXORPDrr, VANDNPSrr, VANDNPDrr,
1433 // xmm int variants.
1435 VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
1436 VPSUBSBrr, VPSUBSWrr, VPSUBUSBrr, VPSUBUSWrr,
1437 VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr,
1440 VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr
1441 ], ZeroIdiomPredicate>
1444 def : IsDepBreakingFunction<[
1446 DepBreakingClass<[ SBB32rr, SBB64rr ], ZeroIdiomPredicate>,
1447 DepBreakingClass<[ CMP32rr, CMP64rr ], CheckSameRegOperand<0, 1> >,
1451 MMX_PCMPEQBirr, MMX_PCMPEQDirr, MMX_PCMPEQWirr
1452 ], ZeroIdiomPredicate>,
1456 PCMPEQBrr, PCMPEQWrr, PCMPEQDrr
1457 // But not PCMPEQQrr.
1458 ], ZeroIdiomPredicate>,
1462 VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr
1463 // But not VPCMPEQQrr.
1464 ], ZeroIdiomPredicate>