1 //==- RISCVSchedSiFive7.td - SiFive7 Scheduling Definitions --*- tablegen -*-=//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 //===----------------------------------------------------------------------===//
11 /// c is true if mx has the worst case behavior compared to LMULs in MxList.
12 /// On the SiFive7, the worst case LMUL is the Largest LMUL
13 /// and the worst case sew is the smallest SEW for that LMUL.
14 class SiFive7IsWorstCaseMX<string mx, list<string> MxList> {
15 defvar LLMUL = LargestLMUL<MxList>.r;
16 bit c = !eq(mx, LLMUL);
19 /// c is true if mx and sew have the worst case behavior compared to LMULs in
20 /// MxList. On the SiFive7, the worst case LMUL is the Largest LMUL
21 /// and the worst case sew is the smallest SEW for that LMUL.
22 class SiFive7IsWorstCaseMXSEW<string mx, int sew, list<string> MxList,
24 defvar LLMUL = LargestLMUL<MxList>.r;
25 defvar SSEW = SmallestSEW<mx, isF>.r;
26 bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW));
29 /// Number of DLEN parts = (LMUL * VLEN) / DLEN.
30 /// Since DLEN = VLEN / 2, Num DLEN parts = 2 * LMUL.
31 class SiFive7GetCyclesDefault<string mx> {
43 class SiFive7GetCyclesNarrowing<string mx> {
54 class SiFive7GetCyclesVMask<string mx> {
66 /// VLDM and VSTM can't read/write more than 2 DLENs of data.
67 /// 2 DLENs when LMUL=8. 1 DLEN for all other DLENs
68 class SiFive7GetMaskLoadStoreCycles<string mx> {
75 // Cycles for nf=2 segmented loads and stores are calculated using the
76 // formula (2 * VLEN * LMUL) / DLEN = 4 * LMUL
77 class SiFive7GetCyclesSegmentedSeg2<string mx> {
89 // Cycles for segmented loads and stores are calculated using the
90 // formula vl * ceil((SEW * nf) / DLEN), where SEW * nf is the segment size.
91 class SiFive7GetCyclesSegmented<string mx, int sew, int nf> {
94 // (VLEN * LMUL) / SEW
95 defvar VLUpperBound = !cond(
96 !eq(mx, "M1") : !div(VLEN, sew),
97 !eq(mx, "M2") : !div(!mul(VLEN, 2), sew),
98 !eq(mx, "M4") : !div(!mul(VLEN, 4), sew),
99 !eq(mx, "M8") : !div(!mul(VLEN, 8), sew),
100 !eq(mx, "MF2") : !div(!div(VLEN, 2), sew),
101 !eq(mx, "MF4") : !div(!div(VLEN, 4), sew),
102 !eq(mx, "MF8") : !div(!div(VLEN, 8), sew),
104 // We can calculate ceil(a/b) using (a + b - 1) / b.
105 defvar a = !mul(sew, nf);
107 int c = !mul(VLUpperBound, !div(!sub(!add(a, b), 1), b));
110 class SiFive7GetCyclesOnePerElement<string mx, int sew> {
111 // FIXME: On SiFive7, VLEN is 512. Although a user can request the compiler
112 // to use a different VLEN, this model will not make scheduling decisions
113 // based on the user specified VLEN.
114 // c = ceil(VLEN / SEW) * LMUL
115 // Note: c >= 1 since the smallest VLEN is 512 / 8 = 8, and the
116 // largest division performed on VLEN is in MF8 case with division
117 // by 8. Therefore, there is no need to ceil the result.
118 int VLEN = !div(512, sew);
120 !eq(mx, "M1") : VLEN,
121 !eq(mx, "M2") : !mul(VLEN, 2),
122 !eq(mx, "M4") : !mul(VLEN, 4),
123 !eq(mx, "M8") : !mul(VLEN, 8),
124 !eq(mx, "MF2") : !div(VLEN, 2),
125 !eq(mx, "MF4") : !div(VLEN, 4),
126 !eq(mx, "MF8") : !div(VLEN, 8)
130 class SiFive7GetDivOrSqrtFactor<int sew> {
132 // TODO: Add SchedSEWSetFP upstream and remove the SEW=8 case.
140 /// Cycles for reductions take approximately VL*SEW/DLEN + 5(4 + log(DLEN/SEW))
142 class SiFive7GetReductionCycles<string mx, int sew> {
143 // VLUpperBound*SEW/DLEN is equivalent to 2*LMUL since
144 // VLUpperBound=(VLEN*LMUL)/SEW.
146 defvar DLEN = !div(VLEN, 2);
147 defvar TwoTimesLMUL = !cond(
158 !mul(5, !add(4, !logtwo(!div(DLEN, sew))))
162 /// Cycles for ordered reductions take approximatley 6*VL cycles
163 class SiFive7GetOrderedReductionCycles<string mx, int sew> {
165 // (VLEN * LMUL) / SEW
166 defvar VLUpperBound = !cond(
167 !eq(mx, "M1") : !div(VLEN, sew),
168 !eq(mx, "M2") : !div(!mul(VLEN, 2), sew),
169 !eq(mx, "M4") : !div(!mul(VLEN, 4), sew),
170 !eq(mx, "M8") : !div(!mul(VLEN, 8), sew),
171 !eq(mx, "MF2") : !div(!div(VLEN, 2), sew),
172 !eq(mx, "MF4") : !div(!div(VLEN, 4), sew),
173 !eq(mx, "MF8") : !div(!div(VLEN, 8), sew),
175 int c = !mul(6, VLUpperBound);
178 class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2>
179 : ReadAdvance<read, cycles, [WriteIALU, WriteIALU32,
180 WriteShiftImm, WriteShiftImm32,
181 WriteShiftReg, WriteShiftReg32,
182 WriteSHXADD, WriteSHXADD32,
183 WriteRotateImm, WriteRotateImm32,
184 WriteRotateReg, WriteRotateReg32,
185 WriteSingleBit, WriteSingleBitImm,
186 WriteBEXT, WriteBEXTI,
187 WriteCLZ, WriteCLZ32, WriteCTZ, WriteCTZ32,
188 WriteCPOP, WriteCPOP32,
189 WriteREV8, WriteORCB, WriteIMinMax, WriteSFB,
190 WriteIMul, WriteIMul32,
191 WriteIDiv, WriteIDiv32,
192 WriteIRem, WriteIRem32,
193 WriteLDB, WriteLDH, WriteLDW, WriteLDD]>;
195 // SiFive7 machine model for scheduling and other instruction cost heuristics.
196 def SiFive7Model : SchedMachineModel {
197 let MicroOpBufferSize = 0; // Explicitly set to zero since SiFive7 is in-order.
198 let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
200 let MispredictPenalty = 3;
201 let CompleteModel = 0;
202 let EnableIntervals = true;
203 let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx,
204 HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne,
205 HasStdExtZknh, HasStdExtZksed, HasStdExtZksh,
209 // The SiFive7 microarchitecture has three pipelines: A, B, V.
210 // Pipe A can handle memory, integer alu and vector operations.
211 // Pipe B can handle integer alu, control flow, integer multiply and divide,
212 // and floating point computation.
213 // The V pipeline is modeled by the VCQ, VA, VL, and VS resources.
214 let SchedModel = SiFive7Model in {
215 let BufferSize = 0 in {
216 def SiFive7PipeA : ProcResource<1>;
217 def SiFive7PipeB : ProcResource<1>;
218 def SiFive7IDiv : ProcResource<1>; // Int Division
219 def SiFive7FDiv : ProcResource<1>; // FP Division/Sqrt
220 def SiFive7VA : ProcResource<1>; // Arithmetic sequencer
221 def SiFive7VL : ProcResource<1>; // Load sequencer
222 def SiFive7VS : ProcResource<1>; // Store sequencer
223 // The VCQ accepts instructions from the the A Pipe and holds them until the
224 // vector unit is ready to dequeue them. The unit dequeues up to one instruction
225 // per cycle, in order, as soon as the sequencer for that type of instruction is
226 // available. This resource is meant to be used for 1 cycle by all vector
227 // instructions, to model that only one vector instruction may be dequed at a
228 // time. The actual dequeueing into the sequencer is modeled by the VA, VL, and
229 // VS sequencer resources below. Each of them will only accept a single
230 // instruction at a time and remain busy for the number of cycles associated
231 // with that instruction.
232 def SiFive7VCQ : ProcResource<1>; // Vector Command Queue
235 def SiFive7PipeAB : ProcResGroup<[SiFive7PipeA, SiFive7PipeB]>;
239 def : WriteRes<WriteJmp, [SiFive7PipeB]>;
240 def : WriteRes<WriteJal, [SiFive7PipeB]>;
241 def : WriteRes<WriteJalr, [SiFive7PipeB]>;
244 //Short forward branch
245 def : WriteRes<WriteSFB, [SiFive7PipeA, SiFive7PipeB]> {
250 // Integer arithmetic and logic
252 def : WriteRes<WriteIALU, [SiFive7PipeAB]>;
253 def : WriteRes<WriteIALU32, [SiFive7PipeAB]>;
254 def : WriteRes<WriteShiftImm, [SiFive7PipeAB]>;
255 def : WriteRes<WriteShiftImm32, [SiFive7PipeAB]>;
256 def : WriteRes<WriteShiftReg, [SiFive7PipeAB]>;
257 def : WriteRes<WriteShiftReg32, [SiFive7PipeAB]>;
260 // Integer multiplication
262 def : WriteRes<WriteIMul, [SiFive7PipeB]>;
263 def : WriteRes<WriteIMul32, [SiFive7PipeB]>;
267 def : WriteRes<WriteIDiv, [SiFive7PipeB, SiFive7IDiv]> {
269 let ReleaseAtCycles = [1, 65];
271 def : WriteRes<WriteIDiv32, [SiFive7PipeB, SiFive7IDiv]> {
273 let ReleaseAtCycles = [1, 33];
277 def : WriteRes<WriteIRem, [SiFive7PipeB, SiFive7IDiv]> {
279 let ReleaseAtCycles = [1, 65];
281 def : WriteRes<WriteIRem32, [SiFive7PipeB, SiFive7IDiv]> {
283 let ReleaseAtCycles = [1, 33];
288 // Rotates are in the late-B ALU.
289 def : WriteRes<WriteRotateImm, [SiFive7PipeB]>;
290 def : WriteRes<WriteRotateImm32, [SiFive7PipeB]>;
291 def : WriteRes<WriteRotateReg, [SiFive7PipeB]>;
292 def : WriteRes<WriteRotateReg32, [SiFive7PipeB]>;
294 // clz[w]/ctz[w] are in the late-B ALU.
295 def : WriteRes<WriteCLZ, [SiFive7PipeB]>;
296 def : WriteRes<WriteCLZ32, [SiFive7PipeB]>;
297 def : WriteRes<WriteCTZ, [SiFive7PipeB]>;
298 def : WriteRes<WriteCTZ32, [SiFive7PipeB]>;
300 // cpop[w] look exactly like multiply.
301 def : WriteRes<WriteCPOP, [SiFive7PipeB]>;
302 def : WriteRes<WriteCPOP32, [SiFive7PipeB]>;
304 // orc.b is in the late-B ALU.
305 def : WriteRes<WriteORCB, [SiFive7PipeB]>;
307 // min/max are in the late-B ALU
308 def : WriteRes<WriteIMinMax, [SiFive7PipeB]>;
310 // rev8 is in the late-A and late-B ALUs.
311 def : WriteRes<WriteREV8, [SiFive7PipeAB]>;
313 // shNadd[.uw] is on the early-B and late-B ALUs.
314 def : WriteRes<WriteSHXADD, [SiFive7PipeB]>;
315 def : WriteRes<WriteSHXADD32, [SiFive7PipeB]>;
318 // Single-bit instructions
319 // BEXT[I] instruction is available on all ALUs and the other instructions
320 // are only available on the SiFive7B pipe.
322 def : WriteRes<WriteSingleBit, [SiFive7PipeB]>;
323 def : WriteRes<WriteSingleBitImm, [SiFive7PipeB]>;
324 def : WriteRes<WriteBEXT, [SiFive7PipeAB]>;
325 def : WriteRes<WriteBEXTI, [SiFive7PipeAB]>;
329 def : WriteRes<WriteSTB, [SiFive7PipeA]>;
330 def : WriteRes<WriteSTH, [SiFive7PipeA]>;
331 def : WriteRes<WriteSTW, [SiFive7PipeA]>;
332 def : WriteRes<WriteSTD, [SiFive7PipeA]>;
333 def : WriteRes<WriteFST16, [SiFive7PipeA]>;
334 def : WriteRes<WriteFST32, [SiFive7PipeA]>;
335 def : WriteRes<WriteFST64, [SiFive7PipeA]>;
338 def : WriteRes<WriteLDB, [SiFive7PipeA]>;
339 def : WriteRes<WriteLDH, [SiFive7PipeA]>;
340 def : WriteRes<WriteLDW, [SiFive7PipeA]>;
341 def : WriteRes<WriteLDD, [SiFive7PipeA]>;
345 def : WriteRes<WriteFLD16, [SiFive7PipeA]>;
346 def : WriteRes<WriteFLD32, [SiFive7PipeA]>;
347 def : WriteRes<WriteFLD64, [SiFive7PipeA]>;
351 def : WriteRes<WriteAtomicSTW, [SiFive7PipeA]>;
352 def : WriteRes<WriteAtomicSTD, [SiFive7PipeA]>;
355 def : WriteRes<WriteAtomicW, [SiFive7PipeA]>;
356 def : WriteRes<WriteAtomicD, [SiFive7PipeA]>;
357 def : WriteRes<WriteAtomicLDW, [SiFive7PipeA]>;
358 def : WriteRes<WriteAtomicLDD, [SiFive7PipeA]>;
363 def : WriteRes<WriteFAdd16, [SiFive7PipeB]>;
364 def : WriteRes<WriteFMul16, [SiFive7PipeB]>;
365 def : WriteRes<WriteFMA16, [SiFive7PipeB]>;
368 def : WriteRes<WriteFSGNJ16, [SiFive7PipeB]>;
369 def : WriteRes<WriteFMinMax16, [SiFive7PipeB]>;
372 let Latency = 14, ReleaseAtCycles = [1, 13] in {
373 def : WriteRes<WriteFDiv16, [SiFive7PipeB, SiFive7FDiv]>;
374 def : WriteRes<WriteFSqrt16, [SiFive7PipeB, SiFive7FDiv]>;
379 def : WriteRes<WriteFAdd32, [SiFive7PipeB]>;
380 def : WriteRes<WriteFMul32, [SiFive7PipeB]>;
381 def : WriteRes<WriteFMA32, [SiFive7PipeB]>;
384 def : WriteRes<WriteFSGNJ32, [SiFive7PipeB]>;
385 def : WriteRes<WriteFMinMax32, [SiFive7PipeB]>;
388 def : WriteRes<WriteFDiv32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27;
389 let ReleaseAtCycles = [1, 26]; }
390 def : WriteRes<WriteFSqrt32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27;
391 let ReleaseAtCycles = [1, 26]; }
395 def : WriteRes<WriteFAdd64, [SiFive7PipeB]>;
396 def : WriteRes<WriteFMul64, [SiFive7PipeB]>;
397 def : WriteRes<WriteFMA64, [SiFive7PipeB]>;
400 def : WriteRes<WriteFSGNJ64, [SiFive7PipeB]>;
401 def : WriteRes<WriteFMinMax64, [SiFive7PipeB]>;
404 def : WriteRes<WriteFDiv64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56;
405 let ReleaseAtCycles = [1, 55]; }
406 def : WriteRes<WriteFSqrt64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56;
407 let ReleaseAtCycles = [1, 55]; }
411 def : WriteRes<WriteFCvtI32ToF16, [SiFive7PipeB]>;
412 def : WriteRes<WriteFCvtI32ToF32, [SiFive7PipeB]>;
413 def : WriteRes<WriteFCvtI32ToF64, [SiFive7PipeB]>;
414 def : WriteRes<WriteFCvtI64ToF16, [SiFive7PipeB]>;
415 def : WriteRes<WriteFCvtI64ToF32, [SiFive7PipeB]>;
416 def : WriteRes<WriteFCvtI64ToF64, [SiFive7PipeB]>;
417 def : WriteRes<WriteFCvtF16ToI32, [SiFive7PipeB]>;
418 def : WriteRes<WriteFCvtF16ToI64, [SiFive7PipeB]>;
419 def : WriteRes<WriteFCvtF16ToF32, [SiFive7PipeB]>;
420 def : WriteRes<WriteFCvtF16ToF64, [SiFive7PipeB]>;
421 def : WriteRes<WriteFCvtF32ToI32, [SiFive7PipeB]>;
422 def : WriteRes<WriteFCvtF32ToI64, [SiFive7PipeB]>;
423 def : WriteRes<WriteFCvtF32ToF16, [SiFive7PipeB]>;
424 def : WriteRes<WriteFCvtF32ToF64, [SiFive7PipeB]>;
425 def : WriteRes<WriteFCvtF64ToI32, [SiFive7PipeB]>;
426 def : WriteRes<WriteFCvtF64ToI64, [SiFive7PipeB]>;
427 def : WriteRes<WriteFCvtF64ToF16, [SiFive7PipeB]>;
428 def : WriteRes<WriteFCvtF64ToF32, [SiFive7PipeB]>;
430 def : WriteRes<WriteFClass16, [SiFive7PipeB]>;
431 def : WriteRes<WriteFClass32, [SiFive7PipeB]>;
432 def : WriteRes<WriteFClass64, [SiFive7PipeB]>;
433 def : WriteRes<WriteFCmp16, [SiFive7PipeB]>;
434 def : WriteRes<WriteFCmp32, [SiFive7PipeB]>;
435 def : WriteRes<WriteFCmp64, [SiFive7PipeB]>;
436 def : WriteRes<WriteFMovI16ToF16, [SiFive7PipeB]>;
437 def : WriteRes<WriteFMovF16ToI16, [SiFive7PipeB]>;
438 def : WriteRes<WriteFMovI32ToF32, [SiFive7PipeB]>;
439 def : WriteRes<WriteFMovF32ToI32, [SiFive7PipeB]>;
440 def : WriteRes<WriteFMovI64ToF64, [SiFive7PipeB]>;
441 def : WriteRes<WriteFMovF64ToI64, [SiFive7PipeB]>;
444 // 6. Configuration-Setting Instructions
446 def : WriteRes<WriteVSETVLI, [SiFive7PipeA]>;
447 def : WriteRes<WriteVSETIVLI, [SiFive7PipeA]>;
448 def : WriteRes<WriteVSETVL, [SiFive7PipeA]>;
451 // 7. Vector Loads and Stores
452 // Unit-stride loads and stores can operate at the full bandwidth of the memory
453 // pipe. The memory pipe is DLEN bits wide on x280.
454 foreach mx = SchedMxList in {
455 defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
456 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
457 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
458 defm "" : LMULWriteResMX<"WriteVLDE", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
459 defm "" : LMULWriteResMX<"WriteVLDFF", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
461 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
462 defm "" : LMULWriteResMX<"WriteVSTE", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
465 foreach mx = SchedMxList in {
466 defvar Cycles = SiFive7GetMaskLoadStoreCycles<mx>.c;
467 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
468 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
469 defm "" : LMULWriteResMX<"WriteVLDM", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
470 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
471 defm "" : LMULWriteResMX<"WriteVSTM", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
474 // Strided loads and stores operate at one element per cycle and should be
475 // scheduled accordingly. Indexed loads and stores operate at one element per
476 // cycle, and they stall the machine until all addresses have been generated,
477 // so they cannot be scheduled. Indexed and strided loads and stores have LMUL
478 // specific suffixes, but since SEW is already encoded in the name of the
479 // resource, we do not need to use LMULSEWXXX constructors. However, we do
480 // use the SEW from the name to determine the number of Cycles.
482 foreach mx = SchedMxList in {
483 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
484 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8>.c;
485 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
486 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS8", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
487 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
488 [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
489 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
490 defm "" : LMULWriteResMX<"WriteVLDUX8", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
491 defm "" : LMULWriteResMX<"WriteVLDOX8", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
493 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
494 defm "" : LMULWriteResMX<"WriteVSTS8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
495 defm "" : LMULWriteResMX<"WriteVSTUX8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
496 defm "" : LMULWriteResMX<"WriteVSTOX8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
499 // TODO: The MxLists need to be filtered by EEW. We only need to support
500 // LMUL >= SEW_min/ELEN. Here, the smallest EEW prevents us from having MF8
501 // since LMUL >= 16/64.
502 foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
503 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
504 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16>.c;
505 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
506 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS16", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
507 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
508 [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
509 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
510 defm "" : LMULWriteResMX<"WriteVLDUX16", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
511 defm "" : LMULWriteResMX<"WriteVLDOX16", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
513 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
514 defm "" : LMULWriteResMX<"WriteVSTS16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
515 defm "" : LMULWriteResMX<"WriteVSTUX16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
516 defm "" : LMULWriteResMX<"WriteVSTOX16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
519 foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
520 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
521 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32>.c;
522 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
523 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS32", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
524 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
525 [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
526 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
527 defm "" : LMULWriteResMX<"WriteVLDUX32", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
528 defm "" : LMULWriteResMX<"WriteVLDOX32", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
530 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
531 defm "" : LMULWriteResMX<"WriteVSTS32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
532 defm "" : LMULWriteResMX<"WriteVSTUX32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
533 defm "" : LMULWriteResMX<"WriteVSTOX32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
536 foreach mx = ["M1", "M2", "M4", "M8"] in {
537 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
538 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64>.c;
539 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
540 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS64", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
541 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
542 [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
543 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
544 defm "" : LMULWriteResMX<"WriteVLDUX64", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
545 defm "" : LMULWriteResMX<"WriteVLDOX64", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
547 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
548 defm "" : LMULWriteResMX<"WriteVSTS64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
549 defm "" : LMULWriteResMX<"WriteVSTUX64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
550 defm "" : LMULWriteResMX<"WriteVSTOX64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
554 // VLD*R is LMUL aware
555 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in
556 def : WriteRes<WriteVLD1R, [SiFive7VCQ, SiFive7VL]>;
557 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in
558 def : WriteRes<WriteVLD2R, [SiFive7VCQ, SiFive7VL]>;
559 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in
560 def : WriteRes<WriteVLD4R, [SiFive7VCQ, SiFive7VL]>;
561 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in
562 def : WriteRes<WriteVLD8R, [SiFive7VCQ, SiFive7VL]>;
563 // VST*R is LMUL aware
564 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in
565 def : WriteRes<WriteVST1R, [SiFive7VCQ, SiFive7VS]>;
566 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in
567 def : WriteRes<WriteVST2R, [SiFive7VCQ, SiFive7VS]>;
568 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in
569 def : WriteRes<WriteVST4R, [SiFive7VCQ, SiFive7VS]>;
570 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in
571 def : WriteRes<WriteVST8R, [SiFive7VCQ, SiFive7VS]>;
573 // Segmented Loads and Stores
574 // Unit-stride segmented loads and stores are effectively converted into strided
575 // segment loads and stores. Strided segment loads and stores operate at up to
576 // one segment per cycle if the segment fits within one aligned memory beat.
577 // Indexed segment loads and stores operate at the same rate as strided ones,
578 // but they stall the machine until all addresses have been generated.
579 foreach mx = SchedMxList in {
580 foreach eew = [8, 16, 32, 64] in {
581 defvar Cycles = SiFive7GetCyclesSegmentedSeg2<mx>.c;
582 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
583 // Does not chain so set latency high
584 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
585 defm "" : LMULWriteResMX<"WriteVLSEG2e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
586 defm "" : LMULWriteResMX<"WriteVLSEGFF2e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
588 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
589 defm "" : LMULWriteResMX<"WriteVSSEG2e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
591 defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
592 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
593 // Does not chain so set latency high
594 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
595 defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
596 defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
598 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
599 defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
603 foreach mx = SchedMxList in {
605 foreach eew = [8, 16, 32, 64] in {
606 defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
607 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
608 // Does not chain so set latency high
609 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
610 defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
611 defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
612 defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
614 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
615 defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
616 defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
617 defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
623 // 11. Vector Integer Arithmetic Instructions
624 foreach mx = SchedMxList in {
625 defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
626 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
627 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
628 defm "" : LMULWriteResMX<"WriteVIALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
629 defm "" : LMULWriteResMX<"WriteVIALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
630 defm "" : LMULWriteResMX<"WriteVIALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
631 defm "" : LMULWriteResMX<"WriteVICALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
632 defm "" : LMULWriteResMX<"WriteVICALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
633 defm "" : LMULWriteResMX<"WriteVICALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
634 defm "" : LMULWriteResMX<"WriteVICALUMV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
635 defm "" : LMULWriteResMX<"WriteVICALUMX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
636 defm "" : LMULWriteResMX<"WriteVICALUMI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
637 defm "" : LMULWriteResMX<"WriteVShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
638 defm "" : LMULWriteResMX<"WriteVShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
639 defm "" : LMULWriteResMX<"WriteVShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
640 defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
641 defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
642 defm "" : LMULWriteResMX<"WriteVIMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
643 defm "" : LMULWriteResMX<"WriteVIMulX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
644 defm "" : LMULWriteResMX<"WriteVIMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
645 defm "" : LMULWriteResMX<"WriteVIMulAddX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
646 defm "" : LMULWriteResMX<"WriteVIMergeV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
647 defm "" : LMULWriteResMX<"WriteVIMergeX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
648 defm "" : LMULWriteResMX<"WriteVIMergeI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
649 defm "" : LMULWriteResMX<"WriteVIMovV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
650 defm "" : LMULWriteResMX<"WriteVIMovX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
651 defm "" : LMULWriteResMX<"WriteVIMovI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
653 // Mask results can't chain.
654 let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
655 defm "" : LMULWriteResMX<"WriteVICmpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
656 defm "" : LMULWriteResMX<"WriteVICmpX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
657 defm "" : LMULWriteResMX<"WriteVICmpI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
660 foreach mx = SchedMxList in {
661 defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
662 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
663 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
664 defm "" : LMULWriteResMX<"WriteVExtV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
667 foreach mx = SchedMxList in {
668 foreach sew = SchedSEWSet<mx>.val in {
669 defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
670 !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
671 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
672 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
673 defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
674 defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
680 foreach mx = SchedMxListW in {
681 defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
682 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
683 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
684 defm "" : LMULWriteResMX<"WriteVIWALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
685 defm "" : LMULWriteResMX<"WriteVIWALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
686 defm "" : LMULWriteResMX<"WriteVIWALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
687 defm "" : LMULWriteResMX<"WriteVIWMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
688 defm "" : LMULWriteResMX<"WriteVIWMulX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
689 defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
690 defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
694 foreach mx = SchedMxListW in {
695 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
696 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
697 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
698 defm "" : LMULWriteResMX<"WriteVNShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
699 defm "" : LMULWriteResMX<"WriteVNShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
700 defm "" : LMULWriteResMX<"WriteVNShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
704 // 12. Vector Fixed-Point Arithmetic Instructions
705 foreach mx = SchedMxList in {
706 defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
707 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
708 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
709 defm "" : LMULWriteResMX<"WriteVSALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
710 defm "" : LMULWriteResMX<"WriteVSALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
711 defm "" : LMULWriteResMX<"WriteVSALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
712 defm "" : LMULWriteResMX<"WriteVAALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
713 defm "" : LMULWriteResMX<"WriteVAALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
714 defm "" : LMULWriteResMX<"WriteVSMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
715 defm "" : LMULWriteResMX<"WriteVSMulX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
716 defm "" : LMULWriteResMX<"WriteVSShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
717 defm "" : LMULWriteResMX<"WriteVSShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
718 defm "" : LMULWriteResMX<"WriteVSShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
722 foreach mx = SchedMxListW in {
723 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
724 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
725 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
726 defm "" : LMULWriteResMX<"WriteVNClipV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
727 defm "" : LMULWriteResMX<"WriteVNClipX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
728 defm "" : LMULWriteResMX<"WriteVNClipI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
732 // 13. Vector Floating-Point Instructions
733 foreach mx = SchedMxListF in {
734 foreach sew = SchedSEWSet<mx, isF=1>.val in {
735 defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
736 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
737 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
738 defm "" : LMULSEWWriteResMXSEW<"WriteVFALUV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
739 defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
740 defm "" : LMULSEWWriteResMXSEW<"WriteVFMulV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
741 defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
742 defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
743 defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
744 defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
745 defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
747 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
748 defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
749 defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
750 defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
751 defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
755 foreach mx = SchedMxList in {
756 defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
757 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
758 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
759 defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
761 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
762 defm "" : LMULWriteResMX<"WriteVFClassV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
763 defm "" : LMULWriteResMX<"WriteVFMergeV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
764 defm "" : LMULWriteResMX<"WriteVFMovV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
766 // Mask results can't chain.
767 let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
768 defm "" : LMULWriteResMX<"WriteVFCmpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
769 defm "" : LMULWriteResMX<"WriteVFCmpF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
772 foreach mx = SchedMxListF in {
773 foreach sew = SchedSEWSet<mx, isF=1>.val in {
774 defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
775 !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
776 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
777 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
778 defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
779 defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
780 defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
786 foreach mx = SchedMxListW in {
787 foreach sew = SchedSEWSet<mx, isF=0, isWidening=1>.val in {
788 defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
789 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListW>.c;
790 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
791 defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
794 foreach mx = SchedMxListFW in {
795 foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
796 defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
797 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
798 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
799 defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
800 defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
801 defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
802 defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
803 defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
804 defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
805 defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
808 defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
809 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
810 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
811 defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
814 foreach mx = SchedMxListW in {
815 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
816 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
817 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
818 defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
821 foreach mx = SchedMxListFW in {
822 foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
823 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
824 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
825 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
826 defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
827 defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
832 // 14. Vector Reduction Operations
833 foreach mx = SchedMxList in {
834 foreach sew = SchedSEWSet<mx>.val in {
835 defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
836 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
837 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
838 defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFive7VCQ, SiFive7VA],
839 mx, sew, IsWorstCase>;
840 defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SiFive7VCQ, SiFive7VA],
841 mx, sew, IsWorstCase>;
846 foreach mx = SchedMxListWRed in {
847 foreach sew = SchedSEWSet<mx, 0, 1>.val in {
848 defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
849 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c;
850 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
851 defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFive7VCQ, SiFive7VA],
852 mx, sew, IsWorstCase>;
856 foreach mx = SchedMxListF in {
857 foreach sew = SchedSEWSet<mx, 1>.val in {
858 defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
859 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
860 let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in {
861 defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFive7VCQ, SiFive7VA],
862 mx, sew, IsWorstCase>;
863 defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SiFive7VCQ, SiFive7VA],
864 mx, sew, IsWorstCase>;
866 defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
867 let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in
868 defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFive7VCQ, SiFive7VA],
869 mx, sew, IsWorstCase>;
873 foreach mx = SchedMxListFWRed in {
874 foreach sew = SchedSEWSet<mx, 1, 1>.val in {
875 defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
876 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c;
877 let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in
878 defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFive7VCQ, SiFive7VA],
879 mx, sew, IsWorstCase>;
880 defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
881 let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in
882 defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFive7VCQ, SiFive7VA],
883 mx, sew, IsWorstCase>;
887 // 15. Vector Mask Instructions
888 foreach mx = SchedMxList in {
889 defvar Cycles = SiFive7GetCyclesVMask<mx>.c;
890 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
891 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
892 defm "" : LMULWriteResMX<"WriteVMALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
893 defm "" : LMULWriteResMX<"WriteVMPopV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
894 defm "" : LMULWriteResMX<"WriteVMFFSV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
895 defm "" : LMULWriteResMX<"WriteVMSFSV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
898 foreach mx = SchedMxList in {
899 defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
900 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
901 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
902 defm "" : LMULWriteResMX<"WriteVIotaV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
903 defm "" : LMULWriteResMX<"WriteVIdxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
907 // 16. Vector Permutation Instructions
908 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 1)] in {
909 def : WriteRes<WriteVMovSX, [SiFive7VCQ, SiFive7VA]>;
910 def : WriteRes<WriteVMovXS, [SiFive7VCQ, SiFive7VA]>;
911 def : WriteRes<WriteVMovSF, [SiFive7VCQ, SiFive7VA]>;
912 def : WriteRes<WriteVMovFS, [SiFive7VCQ, SiFive7VA]>;
914 foreach mx = SchedMxList in {
915 defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
916 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
917 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
918 defm "" : LMULWriteResMX<"WriteVRGatherVX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
919 defm "" : LMULWriteResMX<"WriteVRGatherVI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
923 foreach mx = SchedMxList in {
924 foreach sew = SchedSEWSet<mx>.val in {
925 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, sew>.c;
926 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
927 let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
928 defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
929 defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
930 defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
935 foreach mx = SchedMxList in {
936 defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
937 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
938 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
939 defm "" : LMULWriteResMX<"WriteVSlideUpX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
940 defm "" : LMULWriteResMX<"WriteVSlideDownX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
941 defm "" : LMULWriteResMX<"WriteVSlideI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
942 defm "" : LMULWriteResMX<"WriteVISlide1X", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
943 defm "" : LMULWriteResMX<"WriteVFSlide1F", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
947 // VMov*V is LMUL Aware
948 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in
949 def : WriteRes<WriteVMov1V, [SiFive7VCQ, SiFive7VA]>;
950 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in
951 def : WriteRes<WriteVMov2V, [SiFive7VCQ, SiFive7VA]>;
952 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in
953 def : WriteRes<WriteVMov4V, [SiFive7VCQ, SiFive7VA]>;
954 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in
955 def : WriteRes<WriteVMov8V, [SiFive7VCQ, SiFive7VA]>;
958 def : WriteRes<WriteCSR, [SiFive7PipeB]>;
959 def : WriteRes<WriteNop, []>;
961 def : WriteRes<WriteRdVLENB, [SiFive7PipeB]>;
963 def : InstRW<[WriteIALU], (instrs COPY)>;
967 // In principle we don't know the latency of any VCIX instructions (they
968 // depends on a particular coprocessor implementation). However, the default
969 // latency of 1 can lead to issues [1]. So instead we set the latency to the
970 // default provided by `SiFive7GetCyclesDefault`. This is still not accurate
971 // and can lead to suboptimal codegen, but should hopefully be a better
974 // [1] https://github.com/llvm/llvm-project/issues/83391
975 foreach mx = SchedMxList in {
976 defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
977 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
978 let Latency = Cycles,
979 AcquireAtCycles = [0, 1],
980 ReleaseAtCycles = [1, !add(1, Cycles)] in {
981 defm "" : LMULWriteResMX<"WriteVC_V_I", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
982 defm "" : LMULWriteResMX<"WriteVC_V_X", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
983 defm "" : LMULWriteResMX<"WriteVC_V_IV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
984 defm "" : LMULWriteResMX<"WriteVC_V_VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
985 defm "" : LMULWriteResMX<"WriteVC_V_XV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
986 defm "" : LMULWriteResMX<"WriteVC_V_IVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
987 defm "" : LMULWriteResMX<"WriteVC_V_IVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
988 defm "" : LMULWriteResMX<"WriteVC_V_VVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
989 defm "" : LMULWriteResMX<"WriteVC_V_VVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
990 defm "" : LMULWriteResMX<"WriteVC_V_XVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
991 defm "" : LMULWriteResMX<"WriteVC_V_XVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
992 foreach f = ["FPR16", "FPR32", "FPR64"] in {
993 defm "" : LMULWriteResMX<"WriteVC_V_" # f # "V", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
994 defm "" : LMULWriteResMX<"WriteVC_V_" # f # "VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
995 defm "" : LMULWriteResMX<"WriteVC_V_" # f # "VW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
997 defm "" : LMULWriteResMX<"WriteVC_I", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
998 defm "" : LMULWriteResMX<"WriteVC_X", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
999 defm "" : LMULWriteResMX<"WriteVC_IV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1000 defm "" : LMULWriteResMX<"WriteVC_VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1001 defm "" : LMULWriteResMX<"WriteVC_XV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1002 defm "" : LMULWriteResMX<"WriteVC_IVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1003 defm "" : LMULWriteResMX<"WriteVC_IVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1004 defm "" : LMULWriteResMX<"WriteVC_VVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1005 defm "" : LMULWriteResMX<"WriteVC_VVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1006 defm "" : LMULWriteResMX<"WriteVC_XVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1007 defm "" : LMULWriteResMX<"WriteVC_XVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1008 foreach f = ["FPR16", "FPR32", "FPR64"] in {
1009 defm "" : LMULWriteResMX<"WriteVC_" # f # "V", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1010 defm "" : LMULWriteResMX<"WriteVC_" # f # "VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1011 defm "" : LMULWriteResMX<"WriteVC_" # f # "VW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1016 //===----------------------------------------------------------------------===//
1018 // Bypass and advance
1019 def : SiFive7AnyToGPRBypass<ReadJmp>;
1020 def : SiFive7AnyToGPRBypass<ReadJalr>;
1021 def : ReadAdvance<ReadCSR, 0>;
1022 def : SiFive7AnyToGPRBypass<ReadStoreData>;
1023 def : ReadAdvance<ReadMemBase, 0>;
1024 def : SiFive7AnyToGPRBypass<ReadIALU>;
1025 def : SiFive7AnyToGPRBypass<ReadIALU32>;
1026 def : SiFive7AnyToGPRBypass<ReadShiftImm>;
1027 def : SiFive7AnyToGPRBypass<ReadShiftImm32>;
1028 def : SiFive7AnyToGPRBypass<ReadShiftReg>;
1029 def : SiFive7AnyToGPRBypass<ReadShiftReg32>;
1030 def : ReadAdvance<ReadIDiv, 0>;
1031 def : ReadAdvance<ReadIDiv32, 0>;
1032 def : ReadAdvance<ReadIRem, 0>;
1033 def : ReadAdvance<ReadIRem32, 0>;
1034 def : ReadAdvance<ReadIMul, 0>;
1035 def : ReadAdvance<ReadIMul32, 0>;
1036 def : ReadAdvance<ReadAtomicWA, 0>;
1037 def : ReadAdvance<ReadAtomicWD, 0>;
1038 def : ReadAdvance<ReadAtomicDA, 0>;
1039 def : ReadAdvance<ReadAtomicDD, 0>;
1040 def : ReadAdvance<ReadAtomicLDW, 0>;
1041 def : ReadAdvance<ReadAtomicLDD, 0>;
1042 def : ReadAdvance<ReadAtomicSTW, 0>;
1043 def : ReadAdvance<ReadAtomicSTD, 0>;
1044 def : ReadAdvance<ReadFStoreData, 0>;
1045 def : ReadAdvance<ReadFMemBase, 0>;
1046 def : ReadAdvance<ReadFAdd16, 0>;
1047 def : ReadAdvance<ReadFAdd32, 0>;
1048 def : ReadAdvance<ReadFAdd64, 0>;
1049 def : ReadAdvance<ReadFMul16, 0>;
1050 def : ReadAdvance<ReadFMA16, 0>;
1051 def : ReadAdvance<ReadFMA16Addend, 0>;
1052 def : ReadAdvance<ReadFMul32, 0>;
1053 def : ReadAdvance<ReadFMul64, 0>;
1054 def : ReadAdvance<ReadFMA32, 0>;
1055 def : ReadAdvance<ReadFMA32Addend, 0>;
1056 def : ReadAdvance<ReadFMA64, 0>;
1057 def : ReadAdvance<ReadFMA64Addend, 0>;
1058 def : ReadAdvance<ReadFDiv16, 0>;
1059 def : ReadAdvance<ReadFDiv32, 0>;
1060 def : ReadAdvance<ReadFDiv64, 0>;
1061 def : ReadAdvance<ReadFSqrt16, 0>;
1062 def : ReadAdvance<ReadFSqrt32, 0>;
1063 def : ReadAdvance<ReadFSqrt64, 0>;
1064 def : ReadAdvance<ReadFCmp16, 0>;
1065 def : ReadAdvance<ReadFCmp32, 0>;
1066 def : ReadAdvance<ReadFCmp64, 0>;
1067 def : ReadAdvance<ReadFSGNJ16, 0>;
1068 def : ReadAdvance<ReadFSGNJ32, 0>;
1069 def : ReadAdvance<ReadFSGNJ64, 0>;
1070 def : ReadAdvance<ReadFMinMax16, 0>;
1071 def : ReadAdvance<ReadFMinMax32, 0>;
1072 def : ReadAdvance<ReadFMinMax64, 0>;
1073 def : ReadAdvance<ReadFCvtF16ToI32, 0>;
1074 def : ReadAdvance<ReadFCvtF16ToI64, 0>;
1075 def : ReadAdvance<ReadFCvtF32ToI32, 0>;
1076 def : ReadAdvance<ReadFCvtF32ToI64, 0>;
1077 def : ReadAdvance<ReadFCvtF64ToI32, 0>;
1078 def : ReadAdvance<ReadFCvtF64ToI64, 0>;
1079 def : ReadAdvance<ReadFCvtI32ToF16, 0>;
1080 def : ReadAdvance<ReadFCvtI32ToF32, 0>;
1081 def : ReadAdvance<ReadFCvtI32ToF64, 0>;
1082 def : ReadAdvance<ReadFCvtI64ToF16, 0>;
1083 def : ReadAdvance<ReadFCvtI64ToF32, 0>;
1084 def : ReadAdvance<ReadFCvtI64ToF64, 0>;
1085 def : ReadAdvance<ReadFCvtF32ToF64, 0>;
1086 def : ReadAdvance<ReadFCvtF64ToF32, 0>;
1087 def : ReadAdvance<ReadFCvtF16ToF32, 0>;
1088 def : ReadAdvance<ReadFCvtF32ToF16, 0>;
1089 def : ReadAdvance<ReadFCvtF16ToF64, 0>;
1090 def : ReadAdvance<ReadFCvtF64ToF16, 0>;
1091 def : ReadAdvance<ReadFMovF16ToI16, 0>;
1092 def : ReadAdvance<ReadFMovI16ToF16, 0>;
1093 def : ReadAdvance<ReadFMovF32ToI32, 0>;
1094 def : ReadAdvance<ReadFMovI32ToF32, 0>;
1095 def : ReadAdvance<ReadFMovF64ToI64, 0>;
1096 def : ReadAdvance<ReadFMovI64ToF64, 0>;
1097 def : ReadAdvance<ReadFClass16, 0>;
1098 def : ReadAdvance<ReadFClass32, 0>;
1099 def : ReadAdvance<ReadFClass64, 0>;
1101 def : SiFive7AnyToGPRBypass<ReadSFBJmp, 0>;
1102 def : SiFive7AnyToGPRBypass<ReadSFBALU, 0>;
1105 def : SiFive7AnyToGPRBypass<ReadRotateImm>;
1106 def : SiFive7AnyToGPRBypass<ReadRotateImm32>;
1107 def : SiFive7AnyToGPRBypass<ReadRotateReg>;
1108 def : SiFive7AnyToGPRBypass<ReadRotateReg32>;
1109 def : SiFive7AnyToGPRBypass<ReadCLZ>;
1110 def : SiFive7AnyToGPRBypass<ReadCLZ32>;
1111 def : SiFive7AnyToGPRBypass<ReadCTZ>;
1112 def : SiFive7AnyToGPRBypass<ReadCTZ32>;
1113 def : ReadAdvance<ReadCPOP, 0>;
1114 def : ReadAdvance<ReadCPOP32, 0>;
1115 def : SiFive7AnyToGPRBypass<ReadORCB>;
1116 def : SiFive7AnyToGPRBypass<ReadIMinMax>;
1117 def : SiFive7AnyToGPRBypass<ReadREV8>;
1118 def : SiFive7AnyToGPRBypass<ReadSHXADD>;
1119 def : SiFive7AnyToGPRBypass<ReadSHXADD32>;
1120 // Single-bit instructions
1121 def : SiFive7AnyToGPRBypass<ReadSingleBit>;
1122 def : SiFive7AnyToGPRBypass<ReadSingleBitImm>;
1124 // 6. Configuration-Setting Instructions
1125 def : ReadAdvance<ReadVSETVLI, 2>;
1126 def : ReadAdvance<ReadVSETVL, 2>;
1128 // 7. Vector Loads and Stores
1129 def : ReadAdvance<ReadVLDX, 0>;
1130 def : ReadAdvance<ReadVSTX, 0>;
1131 defm "" : LMULReadAdvance<"ReadVSTEV", 0>;
1132 defm "" : LMULReadAdvance<"ReadVSTM", 0>;
1133 def : ReadAdvance<ReadVLDSX, 0>;
1134 def : ReadAdvance<ReadVSTSX, 0>;
1135 defm "" : LMULReadAdvance<"ReadVSTS8V", 0>;
1136 defm "" : LMULReadAdvance<"ReadVSTS16V", 0>;
1137 defm "" : LMULReadAdvance<"ReadVSTS32V", 0>;
1138 defm "" : LMULReadAdvance<"ReadVSTS64V", 0>;
1139 defm "" : LMULReadAdvance<"ReadVLDUXV", 0>;
1140 defm "" : LMULReadAdvance<"ReadVLDOXV", 0>;
1141 defm "" : LMULReadAdvance<"ReadVSTUX8", 0>;
1142 defm "" : LMULReadAdvance<"ReadVSTUX16", 0>;
1143 defm "" : LMULReadAdvance<"ReadVSTUX32", 0>;
1144 defm "" : LMULReadAdvance<"ReadVSTUX64", 0>;
1145 defm "" : LMULReadAdvance<"ReadVSTUXV", 0>;
1146 defm "" : LMULReadAdvance<"ReadVSTUX8V", 0>;
1147 defm "" : LMULReadAdvance<"ReadVSTUX16V", 0>;
1148 defm "" : LMULReadAdvance<"ReadVSTUX32V", 0>;
1149 defm "" : LMULReadAdvance<"ReadVSTUX64V", 0>;
1150 defm "" : LMULReadAdvance<"ReadVSTOX8", 0>;
1151 defm "" : LMULReadAdvance<"ReadVSTOX16", 0>;
1152 defm "" : LMULReadAdvance<"ReadVSTOX32", 0>;
1153 defm "" : LMULReadAdvance<"ReadVSTOX64", 0>;
1154 defm "" : LMULReadAdvance<"ReadVSTOXV", 0>;
1155 defm "" : LMULReadAdvance<"ReadVSTOX8V", 0>;
1156 defm "" : LMULReadAdvance<"ReadVSTOX16V", 0>;
1157 defm "" : LMULReadAdvance<"ReadVSTOX32V", 0>;
1158 defm "" : LMULReadAdvance<"ReadVSTOX64V", 0>;
1160 def : ReadAdvance<ReadVST1R, 0>;
1161 def : ReadAdvance<ReadVST2R, 0>;
1162 def : ReadAdvance<ReadVST4R, 0>;
1163 def : ReadAdvance<ReadVST8R, 0>;
1165 // 12. Vector Integer Arithmetic Instructions
1166 defm : LMULReadAdvance<"ReadVIALUV", 0>;
1167 defm : LMULReadAdvance<"ReadVIALUX", 0>;
1168 defm : LMULReadAdvanceW<"ReadVIWALUV", 0>;
1169 defm : LMULReadAdvanceW<"ReadVIWALUX", 0>;
1170 defm : LMULReadAdvance<"ReadVExtV", 0>;
1171 defm : LMULReadAdvance<"ReadVICALUV", 0>;
1172 defm : LMULReadAdvance<"ReadVICALUX", 0>;
1173 defm : LMULReadAdvance<"ReadVShiftV", 0>;
1174 defm : LMULReadAdvance<"ReadVShiftX", 0>;
1175 defm : LMULReadAdvanceW<"ReadVNShiftV", 0>;
1176 defm : LMULReadAdvanceW<"ReadVNShiftX", 0>;
1177 defm : LMULReadAdvance<"ReadVICmpV", 0>;
1178 defm : LMULReadAdvance<"ReadVICmpX", 0>;
1179 defm : LMULReadAdvance<"ReadVIMinMaxV", 0>;
1180 defm : LMULReadAdvance<"ReadVIMinMaxX", 0>;
1181 defm : LMULReadAdvance<"ReadVIMulV", 0>;
1182 defm : LMULReadAdvance<"ReadVIMulX", 0>;
1183 defm : LMULSEWReadAdvance<"ReadVIDivV", 0>;
1184 defm : LMULSEWReadAdvance<"ReadVIDivX", 0>;
1185 defm : LMULReadAdvanceW<"ReadVIWMulV", 0>;
1186 defm : LMULReadAdvanceW<"ReadVIWMulX", 0>;
1187 defm : LMULReadAdvance<"ReadVIMulAddV", 0>;
1188 defm : LMULReadAdvance<"ReadVIMulAddX", 0>;
1189 defm : LMULReadAdvanceW<"ReadVIWMulAddV", 0>;
1190 defm : LMULReadAdvanceW<"ReadVIWMulAddX", 0>;
1191 defm : LMULReadAdvance<"ReadVIMergeV", 0>;
1192 defm : LMULReadAdvance<"ReadVIMergeX", 0>;
1193 defm : LMULReadAdvance<"ReadVIMovV", 0>;
1194 defm : LMULReadAdvance<"ReadVIMovX", 0>;
1196 // 13. Vector Fixed-Point Arithmetic Instructions
1197 defm "" : LMULReadAdvance<"ReadVSALUV", 0>;
1198 defm "" : LMULReadAdvance<"ReadVSALUX", 0>;
1199 defm "" : LMULReadAdvance<"ReadVAALUV", 0>;
1200 defm "" : LMULReadAdvance<"ReadVAALUX", 0>;
1201 defm "" : LMULReadAdvance<"ReadVSMulV", 0>;
1202 defm "" : LMULReadAdvance<"ReadVSMulX", 0>;
1203 defm "" : LMULReadAdvance<"ReadVSShiftV", 0>;
1204 defm "" : LMULReadAdvance<"ReadVSShiftX", 0>;
1205 defm "" : LMULReadAdvanceW<"ReadVNClipV", 0>;
1206 defm "" : LMULReadAdvanceW<"ReadVNClipX", 0>;
1208 // 14. Vector Floating-Point Instructions
1209 defm "" : LMULSEWReadAdvanceF<"ReadVFALUV", 0>;
1210 defm "" : LMULSEWReadAdvanceF<"ReadVFALUF", 0>;
1211 defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUV", 0>;
1212 defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUF", 0>;
1213 defm "" : LMULSEWReadAdvanceF<"ReadVFMulV", 0>;
1214 defm "" : LMULSEWReadAdvanceF<"ReadVFMulF", 0>;
1215 defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>;
1216 defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>;
1217 defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulV", 0>;
1218 defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulF", 0>;
1219 defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddV", 0>;
1220 defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddF", 0>;
1221 defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddV", 0>;
1222 defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddF", 0>;
1223 defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
1224 defm "" : LMULSEWReadAdvanceF<"ReadVFRecpV", 0>;
1225 defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxV", 0>;
1226 defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxF", 0>;
1227 defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjV", 0>;
1228 defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjF", 0>;
1229 defm "" : LMULReadAdvance<"ReadVFCmpV", 0>;
1230 defm "" : LMULReadAdvance<"ReadVFCmpF", 0>;
1231 defm "" : LMULReadAdvance<"ReadVFClassV", 0>;
1232 defm "" : LMULReadAdvance<"ReadVFMergeV", 0>;
1233 defm "" : LMULReadAdvance<"ReadVFMergeF", 0>;
1234 defm "" : LMULReadAdvance<"ReadVFMovF", 0>;
1235 defm "" : LMULSEWReadAdvanceF<"ReadVFCvtIToFV", 0>;
1236 defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>;
1237 defm "" : LMULSEWReadAdvanceW<"ReadVFWCvtIToFV", 0>;
1238 defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>;
1239 defm "" : LMULSEWReadAdvanceFW<"ReadVFWCvtFToFV", 0>;
1240 defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtIToFV", 0>;
1241 defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>;
1242 defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtFToFV", 0>;
1244 // 15. Vector Reduction Operations
1245 def : ReadAdvance<ReadVIRedV, 0>;
1246 def : ReadAdvance<ReadVIRedV0, 0>;
1247 def : ReadAdvance<ReadVIWRedV, 0>;
1248 def : ReadAdvance<ReadVIWRedV0, 0>;
1249 def : ReadAdvance<ReadVFRedV, 0>;
1250 def : ReadAdvance<ReadVFRedV0, 0>;
1251 def : ReadAdvance<ReadVFRedOV, 0>;
1252 def : ReadAdvance<ReadVFRedOV0, 0>;
1253 def : ReadAdvance<ReadVFWRedV, 0>;
1254 def : ReadAdvance<ReadVFWRedV0, 0>;
1255 def : ReadAdvance<ReadVFWRedOV, 0>;
1256 def : ReadAdvance<ReadVFWRedOV0, 0>;
1258 // 16. Vector Mask Instructions
1259 defm "" : LMULReadAdvance<"ReadVMALUV", 0>;
1260 defm "" : LMULReadAdvance<"ReadVMPopV", 0>;
1261 defm "" : LMULReadAdvance<"ReadVMFFSV", 0>;
1262 defm "" : LMULReadAdvance<"ReadVMSFSV", 0>;
1263 defm "" : LMULReadAdvance<"ReadVIotaV", 0>;
1265 // 17. Vector Permutation Instructions
1266 def : ReadAdvance<ReadVMovXS, 0>;
1267 def : ReadAdvance<ReadVMovSX_V, 0>;
1268 def : ReadAdvance<ReadVMovSX_X, 0>;
1269 def : ReadAdvance<ReadVMovFS, 0>;
1270 def : ReadAdvance<ReadVMovSF_V, 0>;
1271 def : ReadAdvance<ReadVMovSF_F, 0>;
1272 defm "" : LMULReadAdvance<"ReadVISlideV", 0>;
1273 defm "" : LMULReadAdvance<"ReadVISlideX", 0>;
1274 defm "" : LMULReadAdvance<"ReadVFSlideV", 0>;
1275 defm "" : LMULReadAdvance<"ReadVFSlideF", 0>;
1276 defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_data", 0>;
1277 defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_index", 0>;
1278 defm "" : LMULSEWReadAdvance<"ReadVRGatherEI16VV_data", 0>;
1279 defm "" : LMULSEWReadAdvance<"ReadVRGatherEI16VV_index", 0>;
1280 defm "" : LMULReadAdvance<"ReadVRGatherVX_data", 0>;
1281 defm "" : LMULReadAdvance<"ReadVRGatherVX_index", 0>;
1282 defm "" : LMULReadAdvance<"ReadVRGatherVI_data", 0>;
1283 defm "" : LMULSEWReadAdvance<"ReadVCompressV", 0>;
1285 def : ReadAdvance<ReadVMov1V, 0>;
1286 def : ReadAdvance<ReadVMov2V, 0>;
1287 def : ReadAdvance<ReadVMov4V, 0>;
1288 def : ReadAdvance<ReadVMov8V, 0>;
1291 def : ReadAdvance<ReadVMask, 0>;
1292 def : ReadAdvance<ReadVPassthru_WorstCase, 0>;
1293 foreach mx = SchedMxList in {
1294 def : ReadAdvance<!cast<SchedRead>("ReadVPassthru_" # mx), 0>;
1295 foreach sew = SchedSEWSet<mx>.val in
1296 def : ReadAdvance<!cast<SchedRead>("ReadVPassthru_" # mx # "_E" # sew), 0>;
1299 //===----------------------------------------------------------------------===//
1300 // Unsupported extensions
1301 defm : UnsupportedSchedZabha;
1302 defm : UnsupportedSchedZbc;
1303 defm : UnsupportedSchedZbkb;
1304 defm : UnsupportedSchedZbkx;
1305 defm : UnsupportedSchedZfa;
1306 defm : UnsupportedSchedZvk;