1 //==- RISCVSchedSiFive7.td - SiFive7 Scheduling Definitions --*- tablegen -*-=//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 //===----------------------------------------------------------------------===//
11 /// c is true if mx has the worst case behavior compared to LMULs in MxList.
12 /// On the SiFive7, the worst case LMUL is the Largest LMUL
13 /// and the worst case sew is the smallest SEW for that LMUL.
14 class SiFive7IsWorstCaseMX<string mx, list<string> MxList> {
15 defvar LLMUL = LargestLMUL<MxList>.r;
16 bit c = !eq(mx, LLMUL);
19 /// c is true if mx and sew have the worst case behavior compared to LMULs in
20 /// MxList. On the SiFive7, the worst case LMUL is the Largest LMUL
21 /// and the worst case sew is the smallest SEW for that LMUL.
22 class SiFive7IsWorstCaseMXSEW<string mx, int sew, list<string> MxList,
24 defvar LLMUL = LargestLMUL<MxList>.r;
25 defvar SSEW = SmallestSEW<mx, isF>.r;
26 bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW));
29 /// Number of DLEN parts = (LMUL * VLEN) / DLEN.
30 /// Since DLEN = VLEN / 2, Num DLEN parts = 2 * LMUL.
31 class SiFive7GetCyclesDefault<string mx> {
43 class SiFive7GetCyclesNarrowing<string mx> {
54 class SiFive7GetCyclesVMask<string mx> {
66 /// VLDM and VSTM can't read/write more than 2 DLENs of data.
67 /// 2 DLENs when LMUL=8. 1 DLEN for all other DLENs
68 class SiFive7GetMaskLoadStoreCycles<string mx> {
75 // Cycles for nf=2 segmented loads and stores are calculated using the
76 // formula (2 * VLEN * LMUL) / DLEN = 4 * LMUL
77 class SiFive7GetCyclesSegmentedSeg2<string mx> {
89 // Cycles for segmented loads and stores are calculated using the
90 // formula vl * ceil((SEW * nf) / DLEN), where SEW * nf is the segment size.
91 class SiFive7GetCyclesSegmented<string mx, int sew, int nf> {
94 // (VLEN * LMUL) / SEW
95 defvar VLUpperBound = !cond(
96 !eq(mx, "M1") : !div(VLEN, sew),
97 !eq(mx, "M2") : !div(!mul(VLEN, 2), sew),
98 !eq(mx, "M4") : !div(!mul(VLEN, 4), sew),
99 !eq(mx, "M8") : !div(!mul(VLEN, 8), sew),
100 !eq(mx, "MF2") : !div(!div(VLEN, 2), sew),
101 !eq(mx, "MF4") : !div(!div(VLEN, 4), sew),
102 !eq(mx, "MF8") : !div(!div(VLEN, 8), sew),
104 // We can calculate ceil(a/b) using (a + b - 1) / b.
105 defvar a = !mul(sew, nf);
107 int c = !mul(VLUpperBound, !div(!sub(!add(a, b), 1), b));
110 class SiFive7GetCyclesOnePerElement<string mx, int sew> {
111 // FIXME: On SiFive7, VLEN is 512. Although a user can request the compiler
112 // to use a different VLEN, this model will not make scheduling decisions
113 // based on the user specified VLEN.
114 // c = ceil(VLEN / SEW) * LMUL
115 // Note: c >= 1 since the smallest VLEN is 512 / 8 = 8, and the
116 // largest division performed on VLEN is in MF8 case with division
117 // by 8. Therefore, there is no need to ceil the result.
118 int VLEN = !div(512, sew);
120 !eq(mx, "M1") : VLEN,
121 !eq(mx, "M2") : !mul(VLEN, 2),
122 !eq(mx, "M4") : !mul(VLEN, 4),
123 !eq(mx, "M8") : !mul(VLEN, 8),
124 !eq(mx, "MF2") : !div(VLEN, 2),
125 !eq(mx, "MF4") : !div(VLEN, 4),
126 !eq(mx, "MF8") : !div(VLEN, 8)
130 class SiFive7GetDivOrSqrtFactor<int sew> {
132 // TODO: Add SchedSEWSetFP upstream and remove the SEW=8 case.
140 /// Cycles for reductions take approximately VL*SEW/DLEN + 5(4 + log(DLEN/SEW))
142 class SiFive7GetReductionCycles<string mx, int sew> {
143 // VLUpperBound*SEW/DLEN is equivalent to 2*LMUL since
144 // VLUpperBound=(VLEN*LMUL)/SEW.
146 defvar DLEN = !div(VLEN, 2);
147 defvar TwoTimesLMUL = !cond(
158 !mul(5, !add(4, !logtwo(!div(DLEN, sew))))
162 /// Cycles for ordered reductions take approximatley 6*VL cycles
163 class SiFive7GetOrderedReductionCycles<string mx, int sew> {
165 // (VLEN * LMUL) / SEW
166 defvar VLUpperBound = !cond(
167 !eq(mx, "M1") : !div(VLEN, sew),
168 !eq(mx, "M2") : !div(!mul(VLEN, 2), sew),
169 !eq(mx, "M4") : !div(!mul(VLEN, 4), sew),
170 !eq(mx, "M8") : !div(!mul(VLEN, 8), sew),
171 !eq(mx, "MF2") : !div(!div(VLEN, 2), sew),
172 !eq(mx, "MF4") : !div(!div(VLEN, 4), sew),
173 !eq(mx, "MF8") : !div(!div(VLEN, 8), sew),
175 int c = !mul(6, VLUpperBound);
178 class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2>
179 : ReadAdvance<read, cycles, [WriteIALU, WriteIALU32,
180 WriteShiftImm, WriteShiftImm32,
181 WriteShiftReg, WriteShiftReg32,
182 WriteSHXADD, WriteSHXADD32,
183 WriteRotateImm, WriteRotateImm32,
184 WriteRotateReg, WriteRotateReg32,
185 WriteSingleBit, WriteSingleBitImm,
186 WriteBEXT, WriteBEXTI,
187 WriteCLZ, WriteCLZ32, WriteCTZ, WriteCTZ32,
188 WriteCPOP, WriteCPOP32,
189 WriteREV8, WriteORCB, WriteSFB,
190 WriteIMul, WriteIMul32,
191 WriteIDiv, WriteIDiv32,
192 WriteLDB, WriteLDH, WriteLDW, WriteLDD]>;
194 // SiFive7 machine model for scheduling and other instruction cost heuristics.
195 def SiFive7Model : SchedMachineModel {
196 let MicroOpBufferSize = 0; // Explicitly set to zero since SiFive7 is in-order.
197 let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
199 let MispredictPenalty = 3;
200 let CompleteModel = 0;
201 let EnableIntervals = true;
202 let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx,
203 HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne,
204 HasStdExtZknh, HasStdExtZksed, HasStdExtZksh,
208 // The SiFive7 microarchitecture has three pipelines: A, B, V.
209 // Pipe A can handle memory, integer alu and vector operations.
210 // Pipe B can handle integer alu, control flow, integer multiply and divide,
211 // and floating point computation.
212 // The V pipeline is modeled by the VCQ, VA, VL, and VS resources.
213 let SchedModel = SiFive7Model in {
214 let BufferSize = 0 in {
215 def SiFive7PipeA : ProcResource<1>;
216 def SiFive7PipeB : ProcResource<1>;
217 def SiFive7IDiv : ProcResource<1>; // Int Division
218 def SiFive7FDiv : ProcResource<1>; // FP Division/Sqrt
219 def SiFive7VA : ProcResource<1>; // Arithmetic sequencer
220 def SiFive7VL : ProcResource<1>; // Load sequencer
221 def SiFive7VS : ProcResource<1>; // Store sequencer
222 // The VCQ accepts instructions from the the A Pipe and holds them until the
223 // vector unit is ready to dequeue them. The unit dequeues up to one instruction
224 // per cycle, in order, as soon as the sequencer for that type of instruction is
225 // avaliable. This resource is meant to be used for 1 cycle by all vector
226 // instructions, to model that only one vector instruction may be dequed at a
227 // time. The actual dequeueing into the sequencer is modeled by the VA, VL, and
228 // VS sequencer resources below. Each of them will only accept a single
229 // instruction at a time and remain busy for the number of cycles associated
230 // with that instruction.
231 def SiFive7VCQ : ProcResource<1>; // Vector Command Queue
234 def SiFive7PipeAB : ProcResGroup<[SiFive7PipeA, SiFive7PipeB]>;
238 def : WriteRes<WriteJmp, [SiFive7PipeB]>;
239 def : WriteRes<WriteJal, [SiFive7PipeB]>;
240 def : WriteRes<WriteJalr, [SiFive7PipeB]>;
243 //Short forward branch
244 def : WriteRes<WriteSFB, [SiFive7PipeA, SiFive7PipeB]> {
249 // Integer arithmetic and logic
251 def : WriteRes<WriteIALU, [SiFive7PipeAB]>;
252 def : WriteRes<WriteIALU32, [SiFive7PipeAB]>;
253 def : WriteRes<WriteShiftImm, [SiFive7PipeAB]>;
254 def : WriteRes<WriteShiftImm32, [SiFive7PipeAB]>;
255 def : WriteRes<WriteShiftReg, [SiFive7PipeAB]>;
256 def : WriteRes<WriteShiftReg32, [SiFive7PipeAB]>;
259 // Integer multiplication
261 def : WriteRes<WriteIMul, [SiFive7PipeB]>;
262 def : WriteRes<WriteIMul32, [SiFive7PipeB]>;
266 def : WriteRes<WriteIDiv, [SiFive7PipeB, SiFive7IDiv]> {
268 let ReleaseAtCycles = [1, 65];
270 def : WriteRes<WriteIDiv32, [SiFive7PipeB, SiFive7IDiv]> {
272 let ReleaseAtCycles = [1, 33];
277 // Rotates are in the late-B ALU.
278 def : WriteRes<WriteRotateImm, [SiFive7PipeB]>;
279 def : WriteRes<WriteRotateImm32, [SiFive7PipeB]>;
280 def : WriteRes<WriteRotateReg, [SiFive7PipeB]>;
281 def : WriteRes<WriteRotateReg32, [SiFive7PipeB]>;
283 // clz[w]/ctz[w] are in the late-B ALU.
284 def : WriteRes<WriteCLZ, [SiFive7PipeB]>;
285 def : WriteRes<WriteCLZ32, [SiFive7PipeB]>;
286 def : WriteRes<WriteCTZ, [SiFive7PipeB]>;
287 def : WriteRes<WriteCTZ32, [SiFive7PipeB]>;
289 // cpop[w] look exactly like multiply.
290 def : WriteRes<WriteCPOP, [SiFive7PipeB]>;
291 def : WriteRes<WriteCPOP32, [SiFive7PipeB]>;
293 // orc.b is in the late-B ALU.
294 def : WriteRes<WriteORCB, [SiFive7PipeB]>;
296 // rev8 is in the late-A and late-B ALUs.
297 def : WriteRes<WriteREV8, [SiFive7PipeAB]>;
299 // shNadd[.uw] is on the early-B and late-B ALUs.
300 def : WriteRes<WriteSHXADD, [SiFive7PipeB]>;
301 def : WriteRes<WriteSHXADD32, [SiFive7PipeB]>;
304 // Single-bit instructions
305 // BEXT[I] instruction is available on all ALUs and the other instructions
306 // are only available on the SiFive7B pipe.
308 def : WriteRes<WriteSingleBit, [SiFive7PipeB]>;
309 def : WriteRes<WriteSingleBitImm, [SiFive7PipeB]>;
310 def : WriteRes<WriteBEXT, [SiFive7PipeAB]>;
311 def : WriteRes<WriteBEXTI, [SiFive7PipeAB]>;
315 def : WriteRes<WriteSTB, [SiFive7PipeA]>;
316 def : WriteRes<WriteSTH, [SiFive7PipeA]>;
317 def : WriteRes<WriteSTW, [SiFive7PipeA]>;
318 def : WriteRes<WriteSTD, [SiFive7PipeA]>;
319 def : WriteRes<WriteFST16, [SiFive7PipeA]>;
320 def : WriteRes<WriteFST32, [SiFive7PipeA]>;
321 def : WriteRes<WriteFST64, [SiFive7PipeA]>;
324 def : WriteRes<WriteLDB, [SiFive7PipeA]>;
325 def : WriteRes<WriteLDH, [SiFive7PipeA]>;
326 def : WriteRes<WriteLDW, [SiFive7PipeA]>;
327 def : WriteRes<WriteLDD, [SiFive7PipeA]>;
331 def : WriteRes<WriteFLD16, [SiFive7PipeA]>;
332 def : WriteRes<WriteFLD32, [SiFive7PipeA]>;
333 def : WriteRes<WriteFLD64, [SiFive7PipeA]>;
337 def : WriteRes<WriteAtomicSTW, [SiFive7PipeA]>;
338 def : WriteRes<WriteAtomicSTD, [SiFive7PipeA]>;
341 def : WriteRes<WriteAtomicW, [SiFive7PipeA]>;
342 def : WriteRes<WriteAtomicD, [SiFive7PipeA]>;
343 def : WriteRes<WriteAtomicLDW, [SiFive7PipeA]>;
344 def : WriteRes<WriteAtomicLDD, [SiFive7PipeA]>;
349 def : WriteRes<WriteFAdd16, [SiFive7PipeB]>;
350 def : WriteRes<WriteFMul16, [SiFive7PipeB]>;
351 def : WriteRes<WriteFMA16, [SiFive7PipeB]>;
354 def : WriteRes<WriteFSGNJ16, [SiFive7PipeB]>;
355 def : WriteRes<WriteFMinMax16, [SiFive7PipeB]>;
358 let Latency = 14, ReleaseAtCycles = [1, 13] in {
359 def : WriteRes<WriteFDiv16, [SiFive7PipeB, SiFive7FDiv]>;
360 def : WriteRes<WriteFSqrt16, [SiFive7PipeB, SiFive7FDiv]>;
365 def : WriteRes<WriteFAdd32, [SiFive7PipeB]>;
366 def : WriteRes<WriteFMul32, [SiFive7PipeB]>;
367 def : WriteRes<WriteFMA32, [SiFive7PipeB]>;
370 def : WriteRes<WriteFSGNJ32, [SiFive7PipeB]>;
371 def : WriteRes<WriteFMinMax32, [SiFive7PipeB]>;
374 def : WriteRes<WriteFDiv32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27;
375 let ReleaseAtCycles = [1, 26]; }
376 def : WriteRes<WriteFSqrt32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27;
377 let ReleaseAtCycles = [1, 26]; }
381 def : WriteRes<WriteFAdd64, [SiFive7PipeB]>;
382 def : WriteRes<WriteFMul64, [SiFive7PipeB]>;
383 def : WriteRes<WriteFMA64, [SiFive7PipeB]>;
386 def : WriteRes<WriteFSGNJ64, [SiFive7PipeB]>;
387 def : WriteRes<WriteFMinMax64, [SiFive7PipeB]>;
390 def : WriteRes<WriteFDiv64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56;
391 let ReleaseAtCycles = [1, 55]; }
392 def : WriteRes<WriteFSqrt64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56;
393 let ReleaseAtCycles = [1, 55]; }
397 def : WriteRes<WriteFCvtI32ToF16, [SiFive7PipeB]>;
398 def : WriteRes<WriteFCvtI32ToF32, [SiFive7PipeB]>;
399 def : WriteRes<WriteFCvtI32ToF64, [SiFive7PipeB]>;
400 def : WriteRes<WriteFCvtI64ToF16, [SiFive7PipeB]>;
401 def : WriteRes<WriteFCvtI64ToF32, [SiFive7PipeB]>;
402 def : WriteRes<WriteFCvtI64ToF64, [SiFive7PipeB]>;
403 def : WriteRes<WriteFCvtF16ToI32, [SiFive7PipeB]>;
404 def : WriteRes<WriteFCvtF16ToI64, [SiFive7PipeB]>;
405 def : WriteRes<WriteFCvtF16ToF32, [SiFive7PipeB]>;
406 def : WriteRes<WriteFCvtF16ToF64, [SiFive7PipeB]>;
407 def : WriteRes<WriteFCvtF32ToI32, [SiFive7PipeB]>;
408 def : WriteRes<WriteFCvtF32ToI64, [SiFive7PipeB]>;
409 def : WriteRes<WriteFCvtF32ToF16, [SiFive7PipeB]>;
410 def : WriteRes<WriteFCvtF32ToF64, [SiFive7PipeB]>;
411 def : WriteRes<WriteFCvtF64ToI32, [SiFive7PipeB]>;
412 def : WriteRes<WriteFCvtF64ToI64, [SiFive7PipeB]>;
413 def : WriteRes<WriteFCvtF64ToF16, [SiFive7PipeB]>;
414 def : WriteRes<WriteFCvtF64ToF32, [SiFive7PipeB]>;
416 def : WriteRes<WriteFClass16, [SiFive7PipeB]>;
417 def : WriteRes<WriteFClass32, [SiFive7PipeB]>;
418 def : WriteRes<WriteFClass64, [SiFive7PipeB]>;
419 def : WriteRes<WriteFCmp16, [SiFive7PipeB]>;
420 def : WriteRes<WriteFCmp32, [SiFive7PipeB]>;
421 def : WriteRes<WriteFCmp64, [SiFive7PipeB]>;
422 def : WriteRes<WriteFMovI16ToF16, [SiFive7PipeB]>;
423 def : WriteRes<WriteFMovF16ToI16, [SiFive7PipeB]>;
424 def : WriteRes<WriteFMovI32ToF32, [SiFive7PipeB]>;
425 def : WriteRes<WriteFMovF32ToI32, [SiFive7PipeB]>;
426 def : WriteRes<WriteFMovI64ToF64, [SiFive7PipeB]>;
427 def : WriteRes<WriteFMovF64ToI64, [SiFive7PipeB]>;
430 // 6. Configuration-Setting Instructions
432 def : WriteRes<WriteVSETVLI, [SiFive7PipeA]>;
433 def : WriteRes<WriteVSETIVLI, [SiFive7PipeA]>;
434 def : WriteRes<WriteVSETVL, [SiFive7PipeA]>;
437 // 7. Vector Loads and Stores
438 // Unit-stride loads and stores can operate at the full bandwidth of the memory
439 // pipe. The memory pipe is DLEN bits wide on x280.
440 foreach mx = SchedMxList in {
441 defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
442 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
443 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
444 defm "" : LMULWriteResMX<"WriteVLDE", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
445 defm "" : LMULWriteResMX<"WriteVLDFF", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
447 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
448 defm "" : LMULWriteResMX<"WriteVSTE", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
451 foreach mx = SchedMxList in {
452 defvar Cycles = SiFive7GetMaskLoadStoreCycles<mx>.c;
453 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
454 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
455 defm "" : LMULWriteResMX<"WriteVLDM", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
456 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
457 defm "" : LMULWriteResMX<"WriteVSTM", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
460 // Strided loads and stores operate at one element per cycle and should be
461 // scheduled accordingly. Indexed loads and stores operate at one element per
462 // cycle, and they stall the machine until all addresses have been generated,
463 // so they cannot be scheduled. Indexed and strided loads and stores have LMUL
464 // specific suffixes, but since SEW is already encoded in the name of the
465 // resource, we do not need to use LMULSEWXXX constructors. However, we do
466 // use the SEW from the name to determine the number of Cycles.
468 // This predicate is true when the rs2 operand of vlse or vsse is x0, false
470 def VLDSX0Pred : MCSchedPredicate<CheckRegOperand<3, X0>>;
472 foreach mx = SchedMxList in {
473 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
474 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8>.c;
475 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
476 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS8", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
477 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
478 [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
479 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
480 defm "" : LMULWriteResMX<"WriteVLDUX8", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
481 defm "" : LMULWriteResMX<"WriteVLDOX8", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
483 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
484 defm "" : LMULWriteResMX<"WriteVSTS8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
485 defm "" : LMULWriteResMX<"WriteVSTUX8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
486 defm "" : LMULWriteResMX<"WriteVSTOX8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
489 // TODO: The MxLists need to be filtered by EEW. We only need to support
490 // LMUL >= SEW_min/ELEN. Here, the smallest EEW prevents us from having MF8
491 // since LMUL >= 16/64.
492 foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
493 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
494 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16>.c;
495 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
496 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS16", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
497 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
498 [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
499 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
500 defm "" : LMULWriteResMX<"WriteVLDUX16", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
501 defm "" : LMULWriteResMX<"WriteVLDOX16", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
503 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
504 defm "" : LMULWriteResMX<"WriteVSTS16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
505 defm "" : LMULWriteResMX<"WriteVSTUX16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
506 defm "" : LMULWriteResMX<"WriteVSTOX16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
509 foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
510 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
511 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32>.c;
512 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
513 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS32", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
514 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
515 [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
516 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
517 defm "" : LMULWriteResMX<"WriteVLDUX32", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
518 defm "" : LMULWriteResMX<"WriteVLDOX32", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
520 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
521 defm "" : LMULWriteResMX<"WriteVSTS32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
522 defm "" : LMULWriteResMX<"WriteVSTUX32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
523 defm "" : LMULWriteResMX<"WriteVSTOX32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
526 foreach mx = ["M1", "M2", "M4", "M8"] in {
527 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
528 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64>.c;
529 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
530 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS64", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
531 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
532 [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
533 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
534 defm "" : LMULWriteResMX<"WriteVLDUX64", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
535 defm "" : LMULWriteResMX<"WriteVLDOX64", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
537 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
538 defm "" : LMULWriteResMX<"WriteVSTS64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
539 defm "" : LMULWriteResMX<"WriteVSTUX64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
540 defm "" : LMULWriteResMX<"WriteVSTOX64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
544 // VLD*R is LMUL aware
545 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in
546 def : WriteRes<WriteVLD1R, [SiFive7VCQ, SiFive7VL]>;
547 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in
548 def : WriteRes<WriteVLD2R, [SiFive7VCQ, SiFive7VL]>;
549 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in
550 def : WriteRes<WriteVLD4R, [SiFive7VCQ, SiFive7VL]>;
551 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in
552 def : WriteRes<WriteVLD8R, [SiFive7VCQ, SiFive7VL]>;
553 // VST*R is LMUL aware
554 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in
555 def : WriteRes<WriteVST1R, [SiFive7VCQ, SiFive7VS]>;
556 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in
557 def : WriteRes<WriteVST2R, [SiFive7VCQ, SiFive7VS]>;
558 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in
559 def : WriteRes<WriteVST4R, [SiFive7VCQ, SiFive7VS]>;
560 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in
561 def : WriteRes<WriteVST8R, [SiFive7VCQ, SiFive7VS]>;
563 // Segmented Loads and Stores
564 // Unit-stride segmented loads and stores are effectively converted into strided
565 // segment loads and stores. Strided segment loads and stores operate at up to
566 // one segment per cycle if the segment fits within one aligned memory beat.
567 // Indexed segment loads and stores operate at the same rate as strided ones,
568 // but they stall the machine until all addresses have been generated.
569 foreach mx = SchedMxList in {
570 foreach eew = [8, 16, 32, 64] in {
571 defvar Cycles = SiFive7GetCyclesSegmentedSeg2<mx>.c;
572 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
573 // Does not chain so set latency high
574 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
575 defm "" : LMULWriteResMX<"WriteVLSEG2e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
576 defm "" : LMULWriteResMX<"WriteVLSEGFF2e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
578 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
579 defm "" : LMULWriteResMX<"WriteVSSEG2e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
581 defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
582 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
583 // Does not chain so set latency high
584 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
585 defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
586 defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
588 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
589 defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
593 foreach mx = SchedMxList in {
595 foreach eew = [8, 16, 32, 64] in {
596 defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
597 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
598 // Does not chain so set latency high
599 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
600 defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
601 defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
602 defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
604 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
605 defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
606 defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
607 defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
613 // 11. Vector Integer Arithmetic Instructions
614 foreach mx = SchedMxList in {
615 defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
616 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
617 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
618 defm "" : LMULWriteResMX<"WriteVIALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
619 defm "" : LMULWriteResMX<"WriteVIALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
620 defm "" : LMULWriteResMX<"WriteVIALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
621 defm "" : LMULWriteResMX<"WriteVICALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
622 defm "" : LMULWriteResMX<"WriteVICALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
623 defm "" : LMULWriteResMX<"WriteVICALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
624 defm "" : LMULWriteResMX<"WriteVShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
625 defm "" : LMULWriteResMX<"WriteVShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
626 defm "" : LMULWriteResMX<"WriteVShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
627 defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
628 defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
629 defm "" : LMULWriteResMX<"WriteVIMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
630 defm "" : LMULWriteResMX<"WriteVIMulX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
631 defm "" : LMULWriteResMX<"WriteVIMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
632 defm "" : LMULWriteResMX<"WriteVIMulAddX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
633 defm "" : LMULWriteResMX<"WriteVIMergeV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
634 defm "" : LMULWriteResMX<"WriteVIMergeX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
635 defm "" : LMULWriteResMX<"WriteVIMergeI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
636 defm "" : LMULWriteResMX<"WriteVIMovV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
637 defm "" : LMULWriteResMX<"WriteVIMovX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
638 defm "" : LMULWriteResMX<"WriteVIMovI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
640 // Mask results can't chain.
641 let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
642 defm "" : LMULWriteResMX<"WriteVICmpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
643 defm "" : LMULWriteResMX<"WriteVICmpX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
644 defm "" : LMULWriteResMX<"WriteVICmpI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
647 foreach mx = SchedMxList in {
648 defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
649 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
650 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
651 defm "" : LMULWriteResMX<"WriteVExtV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
654 foreach mx = SchedMxList in {
655 foreach sew = SchedSEWSet<mx>.val in {
656 defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
657 !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
658 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
659 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
660 defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
661 defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
667 foreach mx = SchedMxListW in {
668 defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
669 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
670 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
671 defm "" : LMULWriteResMX<"WriteVIWALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
672 defm "" : LMULWriteResMX<"WriteVIWALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
673 defm "" : LMULWriteResMX<"WriteVIWALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
674 defm "" : LMULWriteResMX<"WriteVIWMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
675 defm "" : LMULWriteResMX<"WriteVIWMulX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
676 defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
677 defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
681 foreach mx = SchedMxListW in {
682 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
683 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
684 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
685 defm "" : LMULWriteResMX<"WriteVNShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
686 defm "" : LMULWriteResMX<"WriteVNShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
687 defm "" : LMULWriteResMX<"WriteVNShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
691 // 12. Vector Fixed-Point Arithmetic Instructions
692 foreach mx = SchedMxList in {
693 defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
694 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
695 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
696 defm "" : LMULWriteResMX<"WriteVSALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
697 defm "" : LMULWriteResMX<"WriteVSALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
698 defm "" : LMULWriteResMX<"WriteVSALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
699 defm "" : LMULWriteResMX<"WriteVAALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
700 defm "" : LMULWriteResMX<"WriteVAALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
701 defm "" : LMULWriteResMX<"WriteVSMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
702 defm "" : LMULWriteResMX<"WriteVSMulX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
703 defm "" : LMULWriteResMX<"WriteVSShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
704 defm "" : LMULWriteResMX<"WriteVSShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
705 defm "" : LMULWriteResMX<"WriteVSShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
709 foreach mx = SchedMxListW in {
710 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
711 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
712 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
713 defm "" : LMULWriteResMX<"WriteVNClipV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
714 defm "" : LMULWriteResMX<"WriteVNClipX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
715 defm "" : LMULWriteResMX<"WriteVNClipI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
719 // 13. Vector Floating-Point Instructions
720 foreach mx = SchedMxList in {
721 defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
722 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
723 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
724 defm "" : LMULWriteResMX<"WriteVFALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
725 defm "" : LMULWriteResMX<"WriteVFALUF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
726 defm "" : LMULWriteResMX<"WriteVFMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
727 defm "" : LMULWriteResMX<"WriteVFMulF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
728 defm "" : LMULWriteResMX<"WriteVFMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
729 defm "" : LMULWriteResMX<"WriteVFMulAddF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
730 defm "" : LMULWriteResMX<"WriteVFRecpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
731 defm "" : LMULWriteResMX<"WriteVFCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
732 defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
734 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
735 defm "" : LMULWriteResMX<"WriteVFSgnjV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
736 defm "" : LMULWriteResMX<"WriteVFSgnjF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
737 defm "" : LMULWriteResMX<"WriteVFMinMaxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
738 defm "" : LMULWriteResMX<"WriteVFMinMaxF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
739 defm "" : LMULWriteResMX<"WriteVFClassV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
740 defm "" : LMULWriteResMX<"WriteVFMergeV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
741 defm "" : LMULWriteResMX<"WriteVFMovV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
743 // Mask results can't chain.
744 let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
745 defm "" : LMULWriteResMX<"WriteVFCmpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
746 defm "" : LMULWriteResMX<"WriteVFCmpF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
749 foreach mx = SchedMxListF in {
750 foreach sew = SchedSEWSet<mx, isF=1>.val in {
751 defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
752 !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
753 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
754 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
755 defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
756 defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
757 defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
763 foreach mx = SchedMxListW in {
764 defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
765 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
766 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
767 defm "" : LMULWriteResMX<"WriteVFWCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
770 foreach mx = SchedMxListFW in {
771 defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
772 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
773 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
774 defm "" : LMULWriteResMX<"WriteVFWALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
775 defm "" : LMULWriteResMX<"WriteVFWMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
776 defm "" : LMULWriteResMX<"WriteVFWMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
777 defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
778 defm "" : LMULWriteResMX<"WriteVFWCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
779 defm "" : LMULWriteResMX<"WriteVFWMulAddF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
780 defm "" : LMULWriteResMX<"WriteVFWMulF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
781 defm "" : LMULWriteResMX<"WriteVFWALUF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
785 foreach mx = SchedMxListW in {
786 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
787 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
788 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
789 defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
792 foreach mx = SchedMxListFW in {
793 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
794 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
795 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
796 defm "" : LMULWriteResMX<"WriteVFNCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
797 defm "" : LMULWriteResMX<"WriteVFNCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
801 // 14. Vector Reduction Operations
802 foreach mx = SchedMxList in {
803 foreach sew = SchedSEWSet<mx>.val in {
804 defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
805 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
806 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
807 defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFive7VCQ, SiFive7VA],
808 mx, sew, IsWorstCase>;
809 defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SiFive7VCQ, SiFive7VA],
810 mx, sew, IsWorstCase>;
815 foreach mx = SchedMxListWRed in {
816 foreach sew = SchedSEWSet<mx, 0, 1>.val in {
817 defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
818 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c;
819 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
820 defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFive7VCQ, SiFive7VA],
821 mx, sew, IsWorstCase>;
825 foreach mx = SchedMxListF in {
826 foreach sew = SchedSEWSet<mx, 1>.val in {
827 defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
828 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
829 let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in {
830 defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFive7VCQ, SiFive7VA],
831 mx, sew, IsWorstCase>;
832 defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SiFive7VCQ, SiFive7VA],
833 mx, sew, IsWorstCase>;
835 defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
836 let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in
837 defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFive7VCQ, SiFive7VA],
838 mx, sew, IsWorstCase>;
842 foreach mx = SchedMxListFWRed in {
843 foreach sew = SchedSEWSet<mx, 1, 1>.val in {
844 defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
845 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c;
846 let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in
847 defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFive7VCQ, SiFive7VA],
848 mx, sew, IsWorstCase>;
849 defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
850 let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in
851 defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFive7VCQ, SiFive7VA],
852 mx, sew, IsWorstCase>;
856 // 15. Vector Mask Instructions
857 foreach mx = SchedMxList in {
858 defvar Cycles = SiFive7GetCyclesVMask<mx>.c;
859 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
860 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
861 defm "" : LMULWriteResMX<"WriteVMALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
862 defm "" : LMULWriteResMX<"WriteVMPopV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
863 defm "" : LMULWriteResMX<"WriteVMFFSV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
864 defm "" : LMULWriteResMX<"WriteVMSFSV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
867 foreach mx = SchedMxList in {
868 defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
869 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
870 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
871 defm "" : LMULWriteResMX<"WriteVMIotV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
872 defm "" : LMULWriteResMX<"WriteVMIdxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
876 // 16. Vector Permutation Instructions
877 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 1)] in {
878 def : WriteRes<WriteVIMovVX, [SiFive7VCQ, SiFive7VA]>;
879 def : WriteRes<WriteVIMovXV, [SiFive7VCQ, SiFive7VA]>;
880 def : WriteRes<WriteVFMovVF, [SiFive7VCQ, SiFive7VA]>;
881 def : WriteRes<WriteVFMovFV, [SiFive7VCQ, SiFive7VA]>;
883 foreach mx = SchedMxList in {
884 defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
885 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
886 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
887 defm "" : LMULWriteResMX<"WriteVRGatherVX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
888 defm "" : LMULWriteResMX<"WriteVRGatherVI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
892 foreach mx = SchedMxList in {
893 foreach sew = SchedSEWSet<mx>.val in {
894 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, sew>.c;
895 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
896 let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
897 defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
898 defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
903 foreach mx = SchedMxList in {
904 defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
905 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
906 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
907 defm "" : LMULWriteResMX<"WriteVISlideX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
908 defm "" : LMULWriteResMX<"WriteVISlideI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
909 defm "" : LMULWriteResMX<"WriteVISlide1X", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
910 defm "" : LMULWriteResMX<"WriteVFSlide1F", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
914 // VMov*V is LMUL Aware
915 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in
916 def : WriteRes<WriteVMov1V, [SiFive7VCQ, SiFive7VA]>;
917 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in
918 def : WriteRes<WriteVMov2V, [SiFive7VCQ, SiFive7VA]>;
919 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in
920 def : WriteRes<WriteVMov4V, [SiFive7VCQ, SiFive7VA]>;
921 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in
922 def : WriteRes<WriteVMov8V, [SiFive7VCQ, SiFive7VA]>;
925 def : WriteRes<WriteCSR, [SiFive7PipeB]>;
926 def : WriteRes<WriteNop, []>;
928 def : WriteRes<WriteRdVLENB, [SiFive7PipeB]>;
930 def : InstRW<[WriteIALU], (instrs COPY)>;
932 //===----------------------------------------------------------------------===//
934 // Bypass and advance
935 def : SiFive7AnyToGPRBypass<ReadJmp>;
936 def : SiFive7AnyToGPRBypass<ReadJalr>;
937 def : ReadAdvance<ReadCSR, 0>;
938 def : ReadAdvance<ReadStoreData, 0>;
939 def : ReadAdvance<ReadMemBase, 0>;
940 def : SiFive7AnyToGPRBypass<ReadIALU>;
941 def : SiFive7AnyToGPRBypass<ReadIALU32>;
942 def : SiFive7AnyToGPRBypass<ReadShiftImm>;
943 def : SiFive7AnyToGPRBypass<ReadShiftImm32>;
944 def : SiFive7AnyToGPRBypass<ReadShiftReg>;
945 def : SiFive7AnyToGPRBypass<ReadShiftReg32>;
946 def : ReadAdvance<ReadIDiv, 0>;
947 def : ReadAdvance<ReadIDiv32, 0>;
948 def : ReadAdvance<ReadIMul, 0>;
949 def : ReadAdvance<ReadIMul32, 0>;
950 def : ReadAdvance<ReadAtomicWA, 0>;
951 def : ReadAdvance<ReadAtomicWD, 0>;
952 def : ReadAdvance<ReadAtomicDA, 0>;
953 def : ReadAdvance<ReadAtomicDD, 0>;
954 def : ReadAdvance<ReadAtomicLDW, 0>;
955 def : ReadAdvance<ReadAtomicLDD, 0>;
956 def : ReadAdvance<ReadAtomicSTW, 0>;
957 def : ReadAdvance<ReadAtomicSTD, 0>;
958 def : ReadAdvance<ReadFStoreData, 0>;
959 def : ReadAdvance<ReadFMemBase, 0>;
960 def : ReadAdvance<ReadFAdd16, 0>;
961 def : ReadAdvance<ReadFAdd32, 0>;
962 def : ReadAdvance<ReadFAdd64, 0>;
963 def : ReadAdvance<ReadFMul16, 0>;
964 def : ReadAdvance<ReadFMA16, 0>;
965 def : ReadAdvance<ReadFMA16Addend, 0>;
966 def : ReadAdvance<ReadFMul32, 0>;
967 def : ReadAdvance<ReadFMul64, 0>;
968 def : ReadAdvance<ReadFMA32, 0>;
969 def : ReadAdvance<ReadFMA32Addend, 0>;
970 def : ReadAdvance<ReadFMA64, 0>;
971 def : ReadAdvance<ReadFMA64Addend, 0>;
972 def : ReadAdvance<ReadFDiv16, 0>;
973 def : ReadAdvance<ReadFDiv32, 0>;
974 def : ReadAdvance<ReadFDiv64, 0>;
975 def : ReadAdvance<ReadFSqrt16, 0>;
976 def : ReadAdvance<ReadFSqrt32, 0>;
977 def : ReadAdvance<ReadFSqrt64, 0>;
978 def : ReadAdvance<ReadFCmp16, 0>;
979 def : ReadAdvance<ReadFCmp32, 0>;
980 def : ReadAdvance<ReadFCmp64, 0>;
981 def : ReadAdvance<ReadFSGNJ16, 0>;
982 def : ReadAdvance<ReadFSGNJ32, 0>;
983 def : ReadAdvance<ReadFSGNJ64, 0>;
984 def : ReadAdvance<ReadFMinMax16, 0>;
985 def : ReadAdvance<ReadFMinMax32, 0>;
986 def : ReadAdvance<ReadFMinMax64, 0>;
987 def : ReadAdvance<ReadFCvtF16ToI32, 0>;
988 def : ReadAdvance<ReadFCvtF16ToI64, 0>;
989 def : ReadAdvance<ReadFCvtF32ToI32, 0>;
990 def : ReadAdvance<ReadFCvtF32ToI64, 0>;
991 def : ReadAdvance<ReadFCvtF64ToI32, 0>;
992 def : ReadAdvance<ReadFCvtF64ToI64, 0>;
993 def : ReadAdvance<ReadFCvtI32ToF16, 0>;
994 def : ReadAdvance<ReadFCvtI32ToF32, 0>;
995 def : ReadAdvance<ReadFCvtI32ToF64, 0>;
996 def : ReadAdvance<ReadFCvtI64ToF16, 0>;
997 def : ReadAdvance<ReadFCvtI64ToF32, 0>;
998 def : ReadAdvance<ReadFCvtI64ToF64, 0>;
999 def : ReadAdvance<ReadFCvtF32ToF64, 0>;
1000 def : ReadAdvance<ReadFCvtF64ToF32, 0>;
1001 def : ReadAdvance<ReadFCvtF16ToF32, 0>;
1002 def : ReadAdvance<ReadFCvtF32ToF16, 0>;
1003 def : ReadAdvance<ReadFCvtF16ToF64, 0>;
1004 def : ReadAdvance<ReadFCvtF64ToF16, 0>;
1005 def : ReadAdvance<ReadFMovF16ToI16, 0>;
1006 def : ReadAdvance<ReadFMovI16ToF16, 0>;
1007 def : ReadAdvance<ReadFMovF32ToI32, 0>;
1008 def : ReadAdvance<ReadFMovI32ToF32, 0>;
1009 def : ReadAdvance<ReadFMovF64ToI64, 0>;
1010 def : ReadAdvance<ReadFMovI64ToF64, 0>;
1011 def : ReadAdvance<ReadFClass16, 0>;
1012 def : ReadAdvance<ReadFClass32, 0>;
1013 def : ReadAdvance<ReadFClass64, 0>;
1015 def : SiFive7AnyToGPRBypass<ReadSFBJmp, 0>;
1016 def : SiFive7AnyToGPRBypass<ReadSFBALU, 0>;
1019 def : SiFive7AnyToGPRBypass<ReadRotateImm>;
1020 def : SiFive7AnyToGPRBypass<ReadRotateImm32>;
1021 def : SiFive7AnyToGPRBypass<ReadRotateReg>;
1022 def : SiFive7AnyToGPRBypass<ReadRotateReg32>;
1023 def : SiFive7AnyToGPRBypass<ReadCLZ>;
1024 def : SiFive7AnyToGPRBypass<ReadCLZ32>;
1025 def : SiFive7AnyToGPRBypass<ReadCTZ>;
1026 def : SiFive7AnyToGPRBypass<ReadCTZ32>;
1027 def : ReadAdvance<ReadCPOP, 0>;
1028 def : ReadAdvance<ReadCPOP32, 0>;
1029 def : SiFive7AnyToGPRBypass<ReadORCB>;
1030 def : SiFive7AnyToGPRBypass<ReadREV8>;
1031 def : SiFive7AnyToGPRBypass<ReadSHXADD>;
1032 def : SiFive7AnyToGPRBypass<ReadSHXADD32>;
1033 // Single-bit instructions
1034 def : SiFive7AnyToGPRBypass<ReadSingleBit>;
1035 def : SiFive7AnyToGPRBypass<ReadSingleBitImm>;
1037 // 6. Configuration-Setting Instructions
1038 def : ReadAdvance<ReadVSETVLI, 2>;
1039 def : ReadAdvance<ReadVSETVL, 2>;
1041 // 7. Vector Loads and Stores
1042 def : ReadAdvance<ReadVLDX, 0>;
1043 def : ReadAdvance<ReadVSTX, 0>;
1044 defm "" : LMULReadAdvance<"ReadVSTEV", 0>;
1045 defm "" : LMULReadAdvance<"ReadVSTM", 0>;
1046 def : ReadAdvance<ReadVLDSX, 0>;
1047 def : ReadAdvance<ReadVSTSX, 0>;
1048 defm "" : LMULReadAdvance<"ReadVSTS8V", 0>;
1049 defm "" : LMULReadAdvance<"ReadVSTS16V", 0>;
1050 defm "" : LMULReadAdvance<"ReadVSTS32V", 0>;
1051 defm "" : LMULReadAdvance<"ReadVSTS64V", 0>;
1052 defm "" : LMULReadAdvance<"ReadVLDUXV", 0>;
1053 defm "" : LMULReadAdvance<"ReadVLDOXV", 0>;
1054 defm "" : LMULReadAdvance<"ReadVSTUX8", 0>;
1055 defm "" : LMULReadAdvance<"ReadVSTUX16", 0>;
1056 defm "" : LMULReadAdvance<"ReadVSTUX32", 0>;
1057 defm "" : LMULReadAdvance<"ReadVSTUX64", 0>;
1058 defm "" : LMULReadAdvance<"ReadVSTUXV", 0>;
1059 defm "" : LMULReadAdvance<"ReadVSTUX8V", 0>;
1060 defm "" : LMULReadAdvance<"ReadVSTUX16V", 0>;
1061 defm "" : LMULReadAdvance<"ReadVSTUX32V", 0>;
1062 defm "" : LMULReadAdvance<"ReadVSTUX64V", 0>;
1063 defm "" : LMULReadAdvance<"ReadVSTOX8", 0>;
1064 defm "" : LMULReadAdvance<"ReadVSTOX16", 0>;
1065 defm "" : LMULReadAdvance<"ReadVSTOX32", 0>;
1066 defm "" : LMULReadAdvance<"ReadVSTOX64", 0>;
1067 defm "" : LMULReadAdvance<"ReadVSTOXV", 0>;
1068 defm "" : LMULReadAdvance<"ReadVSTOX8V", 0>;
1069 defm "" : LMULReadAdvance<"ReadVSTOX16V", 0>;
1070 defm "" : LMULReadAdvance<"ReadVSTOX32V", 0>;
1071 defm "" : LMULReadAdvance<"ReadVSTOX64V", 0>;
1073 def : ReadAdvance<ReadVST1R, 0>;
1074 def : ReadAdvance<ReadVST2R, 0>;
1075 def : ReadAdvance<ReadVST4R, 0>;
1076 def : ReadAdvance<ReadVST8R, 0>;
1078 // 12. Vector Integer Arithmetic Instructions
1079 defm : LMULReadAdvance<"ReadVIALUV", 0>;
1080 defm : LMULReadAdvance<"ReadVIALUX", 0>;
1081 defm : LMULReadAdvanceW<"ReadVIWALUV", 0>;
1082 defm : LMULReadAdvanceW<"ReadVIWALUX", 0>;
1083 defm : LMULReadAdvance<"ReadVExtV", 0>;
1084 defm : LMULReadAdvance<"ReadVICALUV", 0>;
1085 defm : LMULReadAdvance<"ReadVICALUX", 0>;
1086 defm : LMULReadAdvance<"ReadVShiftV", 0>;
1087 defm : LMULReadAdvance<"ReadVShiftX", 0>;
1088 defm : LMULReadAdvanceW<"ReadVNShiftV", 0>;
1089 defm : LMULReadAdvanceW<"ReadVNShiftX", 0>;
1090 defm : LMULReadAdvance<"ReadVICmpV", 0>;
1091 defm : LMULReadAdvance<"ReadVICmpX", 0>;
1092 defm : LMULReadAdvance<"ReadVIMinMaxV", 0>;
1093 defm : LMULReadAdvance<"ReadVIMinMaxX", 0>;
1094 defm : LMULReadAdvance<"ReadVIMulV", 0>;
1095 defm : LMULReadAdvance<"ReadVIMulX", 0>;
1096 defm : LMULSEWReadAdvance<"ReadVIDivV", 0>;
1097 defm : LMULSEWReadAdvance<"ReadVIDivX", 0>;
1098 defm : LMULReadAdvanceW<"ReadVIWMulV", 0>;
1099 defm : LMULReadAdvanceW<"ReadVIWMulX", 0>;
1100 defm : LMULReadAdvance<"ReadVIMulAddV", 0>;
1101 defm : LMULReadAdvance<"ReadVIMulAddX", 0>;
1102 defm : LMULReadAdvanceW<"ReadVIWMulAddV", 0>;
1103 defm : LMULReadAdvanceW<"ReadVIWMulAddX", 0>;
1104 defm : LMULReadAdvance<"ReadVIMergeV", 0>;
1105 defm : LMULReadAdvance<"ReadVIMergeX", 0>;
1106 defm : LMULReadAdvance<"ReadVIMovV", 0>;
1107 defm : LMULReadAdvance<"ReadVIMovX", 0>;
1109 // 13. Vector Fixed-Point Arithmetic Instructions
1110 defm "" : LMULReadAdvance<"ReadVSALUV", 0>;
1111 defm "" : LMULReadAdvance<"ReadVSALUX", 0>;
1112 defm "" : LMULReadAdvance<"ReadVAALUV", 0>;
1113 defm "" : LMULReadAdvance<"ReadVAALUX", 0>;
1114 defm "" : LMULReadAdvance<"ReadVSMulV", 0>;
1115 defm "" : LMULReadAdvance<"ReadVSMulX", 0>;
1116 defm "" : LMULReadAdvance<"ReadVSShiftV", 0>;
1117 defm "" : LMULReadAdvance<"ReadVSShiftX", 0>;
1118 defm "" : LMULReadAdvanceW<"ReadVNClipV", 0>;
1119 defm "" : LMULReadAdvanceW<"ReadVNClipX", 0>;
1121 // 14. Vector Floating-Point Instructions
1122 defm "" : LMULReadAdvance<"ReadVFALUV", 0>;
1123 defm "" : LMULReadAdvance<"ReadVFALUF", 0>;
1124 defm "" : LMULReadAdvanceFW<"ReadVFWALUV", 0>;
1125 defm "" : LMULReadAdvanceFW<"ReadVFWALUF", 0>;
1126 defm "" : LMULReadAdvance<"ReadVFMulV", 0>;
1127 defm "" : LMULReadAdvance<"ReadVFMulF", 0>;
1128 defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>;
1129 defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>;
1130 defm "" : LMULReadAdvanceFW<"ReadVFWMulV", 0>;
1131 defm "" : LMULReadAdvanceFW<"ReadVFWMulF", 0>;
1132 defm "" : LMULReadAdvance<"ReadVFMulAddV", 0>;
1133 defm "" : LMULReadAdvance<"ReadVFMulAddF", 0>;
1134 defm "" : LMULReadAdvanceFW<"ReadVFWMulAddV", 0>;
1135 defm "" : LMULReadAdvanceFW<"ReadVFWMulAddF", 0>;
1136 defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
1137 defm "" : LMULReadAdvance<"ReadVFRecpV", 0>;
1138 defm "" : LMULReadAdvance<"ReadVFMinMaxV", 0>;
1139 defm "" : LMULReadAdvance<"ReadVFMinMaxF", 0>;
1140 defm "" : LMULReadAdvance<"ReadVFSgnjV", 0>;
1141 defm "" : LMULReadAdvance<"ReadVFSgnjF", 0>;
1142 defm "" : LMULReadAdvance<"ReadVFCmpV", 0>;
1143 defm "" : LMULReadAdvance<"ReadVFCmpF", 0>;
1144 defm "" : LMULReadAdvance<"ReadVFClassV", 0>;
1145 defm "" : LMULReadAdvance<"ReadVFMergeV", 0>;
1146 defm "" : LMULReadAdvance<"ReadVFMergeF", 0>;
1147 defm "" : LMULReadAdvance<"ReadVFMovF", 0>;
1148 defm "" : LMULReadAdvance<"ReadVFCvtIToFV", 0>;
1149 defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>;
1150 defm "" : LMULReadAdvanceW<"ReadVFWCvtIToFV", 0>;
1151 defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>;
1152 defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToFV", 0>;
1153 defm "" : LMULReadAdvanceFW<"ReadVFNCvtIToFV", 0>;
1154 defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>;
1155 defm "" : LMULReadAdvanceFW<"ReadVFNCvtFToFV", 0>;
1157 // 15. Vector Reduction Operations
1158 def : ReadAdvance<ReadVIRedV, 0>;
1159 def : ReadAdvance<ReadVIRedV0, 0>;
1160 def : ReadAdvance<ReadVIWRedV, 0>;
1161 def : ReadAdvance<ReadVIWRedV0, 0>;
1162 def : ReadAdvance<ReadVFRedV, 0>;
1163 def : ReadAdvance<ReadVFRedV0, 0>;
1164 def : ReadAdvance<ReadVFRedOV, 0>;
1165 def : ReadAdvance<ReadVFRedOV0, 0>;
1166 def : ReadAdvance<ReadVFWRedV, 0>;
1167 def : ReadAdvance<ReadVFWRedV0, 0>;
1168 def : ReadAdvance<ReadVFWRedOV, 0>;
1169 def : ReadAdvance<ReadVFWRedOV0, 0>;
1171 // 16. Vector Mask Instructions
1172 defm "" : LMULReadAdvance<"ReadVMALUV", 0>;
1173 defm "" : LMULReadAdvance<"ReadVMPopV", 0>;
1174 defm "" : LMULReadAdvance<"ReadVMFFSV", 0>;
1175 defm "" : LMULReadAdvance<"ReadVMSFSV", 0>;
1176 defm "" : LMULReadAdvance<"ReadVMIotV", 0>;
1178 // 17. Vector Permutation Instructions
1179 def : ReadAdvance<ReadVIMovVX, 0>;
1180 def : ReadAdvance<ReadVIMovXV, 0>;
1181 def : ReadAdvance<ReadVIMovXX, 0>;
1182 def : ReadAdvance<ReadVFMovVF, 0>;
1183 def : ReadAdvance<ReadVFMovFV, 0>;
1184 def : ReadAdvance<ReadVFMovFX, 0>;
1185 defm "" : LMULReadAdvance<"ReadVISlideV", 0>;
1186 defm "" : LMULReadAdvance<"ReadVISlideX", 0>;
1187 defm "" : LMULReadAdvance<"ReadVFSlideV", 0>;
1188 defm "" : LMULReadAdvance<"ReadVFSlideF", 0>;
1189 defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_data", 0>;
1190 defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_index", 0>;
1191 defm "" : LMULReadAdvance<"ReadVRGatherVX_data", 0>;
1192 defm "" : LMULReadAdvance<"ReadVRGatherVX_index", 0>;
1193 defm "" : LMULReadAdvance<"ReadVRGatherVI_data", 0>;
1194 defm "" : LMULSEWReadAdvance<"ReadVCompressV", 0>;
1196 def : ReadAdvance<ReadVMov1V, 0>;
1197 def : ReadAdvance<ReadVMov2V, 0>;
1198 def : ReadAdvance<ReadVMov4V, 0>;
1199 def : ReadAdvance<ReadVMov8V, 0>;
1202 def : ReadAdvance<ReadVMask, 0>;
1203 def : ReadAdvance<ReadVMergeOp_WorstCase, 0>;
1204 foreach mx = SchedMxList in {
1205 def : ReadAdvance<!cast<SchedRead>("ReadVMergeOp_" # mx), 0>;
1206 foreach sew = SchedSEWSet<mx>.val in
1207 def : ReadAdvance<!cast<SchedRead>("ReadVMergeOp_" # mx # "_E" # sew), 0>;
1210 //===----------------------------------------------------------------------===//
1211 // Unsupported extensions
1212 defm : UnsupportedSchedZbc;
1213 defm : UnsupportedSchedZbkb;
1214 defm : UnsupportedSchedZbkx;
1215 defm : UnsupportedSchedZfa;