1 //=- AArch64SchedNeoverseN2.td - NeoverseN2 Scheduling Defs --*- tablegen -*-=//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the scheduling model for the Arm Neoverse N2 processors.
11 //===----------------------------------------------------------------------===//
13 def NeoverseN2Model : SchedMachineModel {
14 let IssueWidth = 10; // Micro-ops dispatched at a time.
15 let MicroOpBufferSize = 160; // Entries in micro-op re-order buffer.
16 let LoadLatency = 4; // Optimistic load latency.
17 let MispredictPenalty = 10; // Extra cycles for mispredicted branch.
18 let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57.
19 let CompleteModel = 1;
21 list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
22 [HasSVE2p1, HasPAuthLR, HasCPA, HasCSSC]);
25 //===----------------------------------------------------------------------===//
26 // Define each kind of processor resource and number available on Neoverse N2.
27 // Instructions are first fetched and then decoded into internal macro-ops
28 // (MOPs). From there, the MOPs proceed through register renaming and dispatch
29 // stages. A MOP can be split into two micro-ops further down the pipeline
30 // after the decode stage. Once dispatched, micro-ops wait for their operands
31 // and issue out-of-order to one of thirteen issue pipelines. Each issue
32 // pipeline can accept one micro-op per cycle.
34 let SchedModel = NeoverseN2Model in {
36 // Define the (13) issue ports.
37 def N2UnitB : ProcResource<2>; // Branch 0/1
38 def N2UnitS : ProcResource<2>; // Integer single Cycle 0/1
39 def N2UnitM0 : ProcResource<1>; // Integer multicycle 0
40 def N2UnitM1 : ProcResource<1>; // Integer multicycle 1
41 def N2UnitL01 : ProcResource<2>; // Load/Store 0/1
42 def N2UnitL2 : ProcResource<1>; // Load 2
43 def N2UnitD : ProcResource<2>; // Store data 0/1
44 def N2UnitV0 : ProcResource<1>; // FP/ASIMD 0
45 def N2UnitV1 : ProcResource<1>; // FP/ASIMD 1
47 def N2UnitV : ProcResGroup<[N2UnitV0, N2UnitV1]>; // FP/ASIMD 0/1
48 def N2UnitM : ProcResGroup<[N2UnitM0, N2UnitM1]>; // Integer single/multicycle 0/1
49 def N2UnitL : ProcResGroup<[N2UnitL01, N2UnitL2]>; // Load/Store 0/1 and Load 2
50 def N2UnitI : ProcResGroup<[N2UnitS, N2UnitM0, N2UnitM1]>; // Integer single cycle 0/1 and single/multicycle 0/1
52 // Define commonly used read types.
54 // No forwarding is provided for these types.
55 def : ReadAdvance<ReadI, 0>;
56 def : ReadAdvance<ReadISReg, 0>;
57 def : ReadAdvance<ReadIEReg, 0>;
58 def : ReadAdvance<ReadIM, 0>;
59 def : ReadAdvance<ReadIMA, 0>;
60 def : ReadAdvance<ReadID, 0>;
61 def : ReadAdvance<ReadExtrHi, 0>;
62 def : ReadAdvance<ReadAdrBase, 0>;
63 def : ReadAdvance<ReadST, 0>;
64 def : ReadAdvance<ReadVLD, 0>;
66 def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
67 def : WriteRes<WriteBarrier, []> { let Latency = 1; }
68 def : WriteRes<WriteHint, []> { let Latency = 1; }
69 def : WriteRes<WriteLDHi, []> { let Latency = 4; }
71 //===----------------------------------------------------------------------===//
72 // Define customized scheduler read/write types specific to the Neoverse N2.
74 //===----------------------------------------------------------------------===//
75 // Define generic 1 micro-op types
77 def N2Write_1cyc_1B : SchedWriteRes<[N2UnitB]> { let Latency = 1; }
78 def N2Write_1cyc_1I : SchedWriteRes<[N2UnitI]> { let Latency = 1; }
79 def N2Write_1cyc_1M : SchedWriteRes<[N2UnitM]> { let Latency = 1; }
80 def N2Write_1cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 1; }
81 def N2Write_1cyc_1L01 : SchedWriteRes<[N2UnitL01]> { let Latency = 1; }
82 def N2Write_2cyc_1M : SchedWriteRes<[N2UnitM]> { let Latency = 2; }
83 def N2Write_3cyc_1M : SchedWriteRes<[N2UnitM]> { let Latency = 3; }
84 def N2Write_2cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 2;
85 let ReleaseAtCycles = [2]; }
86 def N2Write_3cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 3;
87 let ReleaseAtCycles = [3]; }
88 def N2Write_5cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 5;
89 let ReleaseAtCycles = [5]; }
90 def N2Write_12cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 12;
91 let ReleaseAtCycles = [12]; }
92 def N2Write_20cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 20;
93 let ReleaseAtCycles = [20]; }
94 def N2Write_4cyc_1L : SchedWriteRes<[N2UnitL]> { let Latency = 4; }
95 def N2Write_6cyc_1L : SchedWriteRes<[N2UnitL]> { let Latency = 6; }
96 def N2Write_2cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 2; }
97 def N2Write_3cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 3; }
98 def N2Write_4cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 4; }
99 def N2Write_5cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 5; }
100 def N2Write_12cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 12; }
101 def N2Write_2cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 2; }
102 def N2Write_3cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 3; }
103 def N2Write_4cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 4; }
104 def N2Write_7cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 7;
105 let ReleaseAtCycles = [7]; }
106 def N2Write_9cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 9; }
107 def N2Write_10cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 10; }
108 def N2Write_12cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 12; }
109 def N2Write_13cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 13; }
110 def N2Write_15cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 15; }
111 def N2Write_16cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 16; }
112 def N2Write_20cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 20; }
113 def N2Write_2cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 2; }
114 def N2Write_3cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 3; }
115 def N2Write_4cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 4; }
116 def N2Write_6cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 6; }
117 def N2Write_10cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 10; }
118 def N2Write_6cyc_1L01 : SchedWriteRes<[N2UnitL01]> { let Latency = 6; }
120 //===----------------------------------------------------------------------===//
121 // Define generic 2 micro-op types
123 def N2Write_1cyc_1B_1S : SchedWriteRes<[N2UnitB, N2UnitS]> {
128 def N2Write_6cyc_1M0_1B : SchedWriteRes<[N2UnitM0, N2UnitB]> {
133 def N2Write_9cyc_1M0_1L : SchedWriteRes<[N2UnitM0, N2UnitL]> {
138 def N2Write_3cyc_1I_1M : SchedWriteRes<[N2UnitI, N2UnitM]> {
143 def N2Write_4cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
148 def N2Write_5cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
153 def N2Write_6cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
158 def N2Write_7cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
163 def N2Write_1cyc_1L01_1D : SchedWriteRes<[N2UnitL01, N2UnitD]> {
168 def N2Write_5cyc_1M0_1V : SchedWriteRes<[N2UnitM0, N2UnitV]> {
173 def N2Write_2cyc_1L01_1V : SchedWriteRes<[N2UnitL01, N2UnitV]> {
178 def N2Write_4cyc_1V1_1V : SchedWriteRes<[N2UnitV1, N2UnitV]> {
183 def N2Write_4cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
188 def N2Write_10cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
191 let ReleaseAtCycles = [5, 5];
194 def N2Write_13cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
197 let ReleaseAtCycles = [6, 7];
200 def N2Write_15cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
203 let ReleaseAtCycles = [7, 8];
206 def N2Write_16cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
209 let ReleaseAtCycles = [8, 8];
212 def N2Write_4cyc_2V : SchedWriteRes<[N2UnitV, N2UnitV]> {
217 def N2Write_6cyc_2V : SchedWriteRes<[N2UnitV, N2UnitV]> {
222 def N2Write_6cyc_2L : SchedWriteRes<[N2UnitL, N2UnitL]> {
227 def N2Write_8cyc_1L_1V : SchedWriteRes<[N2UnitL, N2UnitV]> {
232 def N2Write_4cyc_1L01_1V : SchedWriteRes<[N2UnitL01, N2UnitV]> {
237 def N2Write_3cyc_1M0_1M : SchedWriteRes<[N2UnitM0, N2UnitM]> {
242 def N2Write_2cyc_1M0_1M : SchedWriteRes<[N2UnitM0, N2UnitM]> {
247 def N2Write_6cyc_2V1 : SchedWriteRes<[N2UnitV1, N2UnitV1]> {
252 def N2Write_4cyc_1V0_1M : SchedWriteRes<[N2UnitV0, N2UnitM]> {
257 def N2Write_5cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
262 def N2Write_5cyc_1V1_1M0 : SchedWriteRes<[N2UnitV1, N2UnitM0]> {
267 def N2Write_7cyc_1M0_1V0 : SchedWriteRes<[N2UnitM0, N2UnitV0]> {
272 def N2Write_2cyc_1V0_1M : SchedWriteRes<[N2UnitV0, N2UnitM]> {
277 def N2Write_6cyc_1V_1V1 : SchedWriteRes<[N2UnitV, N2UnitV1]> {
282 def N2Write_6cyc_1L_1M : SchedWriteRes<[N2UnitL, N2UnitM]> {
287 def N2Write_6cyc_1L_1S : SchedWriteRes<[N2UnitL, N2UnitS]> {
292 def N2Write_9cyc_1L_1V : SchedWriteRes<[N2UnitL, N2UnitV]> {
297 def N2Write_4cyc_2V1 : SchedWriteRes<[N2UnitV1, N2UnitV1]> {
302 //===----------------------------------------------------------------------===//
303 // Define generic 3 micro-op types
305 def N2Write_1cyc_1L01_1D_1I : SchedWriteRes<[N2UnitL01, N2UnitD, N2UnitI]> {
310 def N2Write_2cyc_1L01_1V_1I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitI]> {
315 def N2Write_2cyc_1L01_2V : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV]> {
320 def N2Write_7cyc_1M_1M0_1V : SchedWriteRes<[N2UnitM, N2UnitM0, N2UnitV]> {
325 def N2Write_8cyc_1M0_1V1_1V : SchedWriteRes<[N2UnitM0, N2UnitV1, N2UnitV]> {
330 def N2Write_10cyc_1V_1L_1S : SchedWriteRes<[N2UnitV, N2UnitL, N2UnitL]> {
335 def N2Write_2cyc_1L01_1S_1V : SchedWriteRes<[N2UnitL01, N2UnitS, N2UnitV]> {
340 def N2Write_4cyc_1L01_1S_1V : SchedWriteRes<[N2UnitL01, N2UnitS, N2UnitV]> {
345 def N2Write_6cyc_3L : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL]> {
350 def N2Write_8cyc_1L_2V : SchedWriteRes<[N2UnitL, N2UnitV, N2UnitV]> {
355 //===----------------------------------------------------------------------===//
356 // Define generic 4 micro-op types
358 def N2Write_2cyc_1L01_2V_1I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV,
364 def N2Write_6cyc_4V0 : SchedWriteRes<[N2UnitV0, N2UnitV0, N2UnitV0, N2UnitV0]> {
369 def N2Write_4cyc_4V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
374 def N2Write_6cyc_4V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
379 def N2Write_8cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> {
384 def N2Write_9cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> {
389 def N2Write_2cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV,
395 def N2Write_4cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV,
401 def N2Write_5cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV,
407 def N2Write_8cyc_2M0_2V0 : SchedWriteRes<[N2UnitM0, N2UnitM0, N2UnitV0,
413 def N2Write_11cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
419 def N2Write_9cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
425 def N2Write_8cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
431 def N2Write_10cyc_2L_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
437 def N2Write_10cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> {
442 def N2Write_4cyc_2M0_2M : SchedWriteRes<[N2UnitM0, N2UnitM0, N2UnitM,
448 def N2Write_6cyc_2I_2L : SchedWriteRes<[N2UnitI, N2UnitI, N2UnitL, N2UnitL]> {
453 def N2Write_7cyc_4L : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL]> {
458 //===----------------------------------------------------------------------===//
459 // Define generic 5 micro-op types
461 def N2Write_2cyc_1L01_2V_2I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV,
467 def N2Write_8cyc_2L_3V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV,
473 //===----------------------------------------------------------------------===//
474 // Define generic 6 micro-op types
476 def N2Write_8cyc_3L_3V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL,
477 N2UnitV, N2UnitV, N2UnitV]> {
482 def N2Write_2cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
483 N2UnitV, N2UnitV, N2UnitV]> {
488 def N2Write_6cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
489 N2UnitV, N2UnitV, N2UnitV]> {
494 def N2Write_4cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
495 N2UnitV, N2UnitV, N2UnitV]> {
500 def N2Write_10cyc_2L_2V_2S : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV,
506 //===----------------------------------------------------------------------===//
507 // Define generic 7 micro-op types
509 def N2Write_8cyc_3L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL,
510 N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
515 //===----------------------------------------------------------------------===//
516 // Define generic 8 micro-op types
518 def N2Write_6cyc_8V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV,
519 N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
524 def N2Write_2cyc_4L01_4V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
525 N2UnitL01, N2UnitV, N2UnitV, N2UnitV,
531 def N2Write_5cyc_4L01_4V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
532 N2UnitL01, N2UnitV, N2UnitV, N2UnitV,
538 def N2Write_8cyc_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL,
539 N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
544 def N2Write_9cyc_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL,
545 N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
550 //===----------------------------------------------------------------------===//
551 // Define generic 10 micro-op types
553 def N2Write_7cyc_5L01_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
554 N2UnitL01, N2UnitL01, N2UnitV,
555 N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
557 let NumMicroOps = 10;
560 //===----------------------------------------------------------------------===//
561 // Define generic 12 micro-op types
563 def N2Write_7cyc_6L01_6V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
564 N2UnitL01, N2UnitL01, N2UnitL01,
565 N2UnitV, N2UnitV, N2UnitV, N2UnitV,
568 let NumMicroOps = 12;
571 //===----------------------------------------------------------------------===//
572 // Define generic 15 micro-op types
574 def N2Write_7cyc_5L01_5S_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
575 N2UnitL01, N2UnitL01, N2UnitS,
576 N2UnitS, N2UnitS, N2UnitS,
577 N2UnitS, N2UnitV, N2UnitV,
578 N2UnitV, N2UnitV, N2UnitV]> {
580 let NumMicroOps = 15;
583 //===----------------------------------------------------------------------===//
584 // Define generic 18 micro-op types
586 def N2Write_11cyc_9L01_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
587 N2UnitL01, N2UnitL01, N2UnitL01,
588 N2UnitL01, N2UnitL01, N2UnitL01,
589 N2UnitV, N2UnitV, N2UnitV,
590 N2UnitV, N2UnitV, N2UnitV,
591 N2UnitV, N2UnitV, N2UnitV]> {
593 let NumMicroOps = 18;
596 //===----------------------------------------------------------------------===//
597 // Define generic 27 micro-op types
599 def N2Write_11cyc_9L01_9S_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
600 N2UnitL01, N2UnitL01, N2UnitL01,
601 N2UnitL01, N2UnitL01, N2UnitL01,
602 N2UnitS, N2UnitS, N2UnitS,
603 N2UnitS, N2UnitS, N2UnitS,
604 N2UnitS, N2UnitS, N2UnitS,
605 N2UnitV, N2UnitV, N2UnitV,
606 N2UnitV, N2UnitV, N2UnitV,
607 N2UnitV, N2UnitV, N2UnitV]> {
609 let NumMicroOps = 27;
612 //===----------------------------------------------------------------------===//
613 // Define types for arithmetic and logical ops with short shifts
614 def N2Write_Arith : SchedWriteVariant<[
615 SchedVar<IsCheapLSL, [N2Write_1cyc_1I]>,
616 SchedVar<NoSchedPred, [N2Write_2cyc_1M]>]>;
618 def N2Write_Logical: SchedWriteVariant<[
619 SchedVar<NeoverseNoLSL, [N2Write_1cyc_1I]>,
620 SchedVar<NoSchedPred, [N2Write_2cyc_1M]>]>;
623 // -----------------------------------------------------------------------------
625 def : InstRW<[WriteI], (instrs COPY)>;
627 // Branch Instructions
628 // -----------------------------------------------------------------------------
631 // Compare and branch
632 def : SchedAlias<WriteBr, N2Write_1cyc_1B>;
635 def : SchedAlias<WriteBrReg, N2Write_1cyc_1B>;
637 // Branch and link, immed
638 // Branch and link, register
639 def : InstRW<[N2Write_1cyc_1B_1S], (instrs BL, BLR)>;
641 // Arithmetic and Logical Instructions
642 // -----------------------------------------------------------------------------
645 // ALU, basic, flagset
646 def : SchedAlias<WriteI, N2Write_1cyc_1I>;
648 // ALU, extend and shift
649 def : SchedAlias<WriteIEReg, N2Write_2cyc_1M>;
651 // Arithmetic, LSL shift, shift <= 4
652 // Arithmetic, flagset, LSL shift, shift <= 4
653 // Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
654 def : SchedAlias<WriteISReg, N2Write_Arith>;
656 // Logical, shift, no flagset
657 def : InstRW<[N2Write_1cyc_1I],
658 (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
660 // Logical, shift, flagset
661 def : InstRW<[N2Write_Logical], (instregex "^(AND|BIC)S[WX]rs$")>;
663 // Arithmetic, immediate to logical address tag
664 def : InstRW<[N2Write_2cyc_1M], (instrs ADDG, SUBG)>;
666 // Convert floating-point condition flags
667 // Flag manipulation instructions
668 def : WriteRes<WriteSys, []> { let Latency = 1; }
670 // Insert Random Tags
671 def : InstRW<[N2Write_2cyc_1M], (instrs IRG, IRGstack)>;
675 // Subtract Pointer, flagset
676 def : InstRW<[N2Write_1cyc_1I], (instrs GMI, SUBP, SUBPS)>;
678 // Move and shift instructions
679 // -----------------------------------------------------------------------------
681 def : SchedAlias<WriteImm, N2Write_1cyc_1I>;
683 // Divide and Multiply Instructions
684 // -----------------------------------------------------------------------------
687 def : SchedAlias<WriteID32, N2Write_12cyc_1M0>;
688 def : SchedAlias<WriteID64, N2Write_20cyc_1M0>;
690 def : WriteRes<WriteIM32, [N2UnitM]> { let Latency = 2; }
691 def : WriteRes<WriteIM64, [N2UnitM]> { let Latency = 2; }
694 def : InstRW<[N2Write_3cyc_1M], (instrs SMULHrr, UMULHrr)>;
696 // Pointer Authentication Instructions (v8.3 PAC)
697 // -----------------------------------------------------------------------------
699 // Authenticate data address
700 // Authenticate instruction address
701 // Compute pointer authentication code for data address
702 // Compute pointer authentication code, using generic key
703 // Compute pointer authentication code for instruction address
704 def : InstRW<[N2Write_5cyc_1M0], (instregex "^AUT", "^PAC")>;
706 // Branch and link, register, with pointer authentication
707 // Branch, register, with pointer authentication
708 // Branch, return, with pointer authentication
709 def : InstRW<[N2Write_6cyc_1M0_1B], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, BRAA,
710 BRAAZ, BRAB, BRABZ, RETAA, RETAB,
714 // Load register, with pointer authentication
715 def : InstRW<[N2Write_9cyc_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>;
717 // Strip pointer authentication code
718 def : InstRW<[N2Write_2cyc_1M0], (instrs XPACD, XPACI, XPACLRI)>;
720 // Miscellaneous data-processing instructions
721 // -----------------------------------------------------------------------------
723 // Bitfield extract, one reg
724 // Bitfield extract, two regs
725 // NOTE: We don't model the difference between EXTR where both operands are the
727 def : SchedAlias<WriteExtr, N2Write_3cyc_1I_1M>;
728 def : InstRW<[N2Write_3cyc_1I_1M], (instrs EXTRWrri, EXTRXrri)>;
730 // Bitfield move, basic
731 def : SchedAlias<WriteIS, N2Write_1cyc_1I>;
733 // Bitfield move, insert
734 def : InstRW<[N2Write_2cyc_1M], (instregex "^BFM[WX]ri$")>;
737 // -----------------------------------------------------------------------------
739 def : SchedAlias<WriteLD, N2Write_4cyc_1L>;
740 def : SchedAlias<WriteLDIdx, N2Write_4cyc_1I_1L>;
742 // Load pair, signed immed offset, signed words
743 def : InstRW<[N2Write_5cyc_1M0, WriteLDHi], (instrs LDPSWi)>;
744 // Load pair, immed post-index or immed pre-index, signed words
745 def : InstRW<[WriteAdr, N2Write_5cyc_1M0, WriteLDHi],
746 (instregex "^LDPSW(post|pre)$")>;
748 // Store instructions
749 // -----------------------------------------------------------------------------
751 def : SchedAlias<WriteST, N2Write_1cyc_1L01_1D>;
752 def : SchedAlias<WriteSTIdx, N2Write_1cyc_1L01_1D_1I>;
753 def : SchedAlias<WriteSTP, N2Write_1cyc_1L01_1D>;
754 def : SchedAlias<WriteAdr, N2Write_1cyc_1I>; // copied from A57.
756 // Tag load instructions
757 // -----------------------------------------------------------------------------
759 // Load allocation tag
760 // Load multiple allocation tags
761 def : InstRW<[N2Write_4cyc_1L], (instrs LDG, LDGM)>;
763 // Tag store instructions
764 // -----------------------------------------------------------------------------
766 // Store allocation tags to one or two granules, post-index
767 // Store allocation tags to one or two granules, pre-index
768 // Store allocation tag to one or two granules, zeroing, post-index
769 // Store Allocation Tag to one or two granules, zeroing, pre-index
770 // Store allocation tag and reg pair to memory, post-Index
771 // Store allocation tag and reg pair to memory, pre-Index
772 def : InstRW<[N2Write_1cyc_1L01_1D_1I], (instrs STGPreIndex, STGPostIndex,
773 ST2GPreIndex, ST2GPostIndex,
774 STZGPreIndex, STZGPostIndex,
775 STZ2GPreIndex, STZ2GPostIndex,
778 // Store allocation tags to one or two granules, signed offset
779 // Store allocation tag to two granules, zeroing, signed offset
780 // Store allocation tag and reg pair to memory, signed offset
781 // Store multiple allocation tags
782 def : InstRW<[N2Write_1cyc_1L01_1D], (instrs STGi, ST2Gi, STZGi,
783 STZ2Gi, STGPi, STGM, STZGM)>;
785 // FP data processing instructions
786 // -----------------------------------------------------------------------------
793 def : SchedAlias<WriteF, N2Write_2cyc_1V>;
796 def : SchedAlias<WriteFCmp, N2Write_2cyc_1V0>;
798 // FP divide, square root
799 def : SchedAlias<WriteFDiv, N2Write_7cyc_1V0>;
802 def : InstRW<[N2Write_7cyc_1V0], (instrs FDIVHrr)>;
804 def : InstRW<[N2Write_10cyc_1V0], (instrs FDIVSrr)>;
806 def : InstRW<[N2Write_15cyc_1V0], (instrs FDIVDrr)>;
808 // FP square root, H-form
809 def : InstRW<[N2Write_7cyc_1V0], (instrs FSQRTHr)>;
810 // FP square root, S-form
811 def : InstRW<[N2Write_9cyc_1V0], (instrs FSQRTSr)>;
812 // FP square root, D-form
813 def : InstRW<[N2Write_16cyc_1V0], (instrs FSQRTDr)>;
816 def : WriteRes<WriteFMul, [N2UnitV]> { let Latency = 3; }
818 // FP multiply accumulate
819 def : InstRW<[N2Write_4cyc_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
821 // FP round to integral
822 def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ][HSD]r$",
823 "^FRINT(32|64)[XZ][SD]r$")>;
825 // FP miscellaneous instructions
826 // -----------------------------------------------------------------------------
828 // FP convert, from gen to vec reg
829 def : InstRW<[N2Write_3cyc_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>;
831 // FP convert, from vec to gen reg
832 def : InstRW<[N2Write_3cyc_1V], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>;
834 // FP convert, Javascript from vec to gen reg
835 // FP convert, from vec to vec reg
836 def : SchedAlias<WriteFCvt, N2Write_3cyc_1V0>;
840 def : SchedAlias<WriteFImm, N2Write_2cyc_1V>;
842 // FP transfer, from gen to low half of vec reg
843 def : InstRW<[N2Write_3cyc_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr,
844 FMOVHWr, FMOVHXr, FMOVSWr, FMOVDXr)>;
846 // FP transfer, from gen to high half of vec reg
847 def : InstRW<[N2Write_5cyc_1M0_1V], (instrs FMOVXDHighr)>;
849 // FP transfer, from vec to gen reg
850 def : SchedAlias<WriteFCopy, N2Write_2cyc_1V>;
852 // FP load instructions
853 // -----------------------------------------------------------------------------
855 // Load vector reg, literal, S/D/Q forms
856 // Load vector reg, unscaled immed
857 def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[SDQ]l$",
860 // Load vector reg, immed post-index
861 def : InstRW<[N2Write_6cyc_1I_1L, WriteI], (instregex "^LDR[BHSDQ]post$")>;
862 // Load vector reg, immed pre-index
863 def : InstRW<[WriteAdr, N2Write_6cyc_1I_1L], (instregex "^LDR[BHSDQ]pre$")>;
865 // Load vector reg, unsigned immed
866 def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[BHSDQ]ui$")>;
868 // Load vector reg, register offset, basic
869 // Load vector reg, register offset, scale, S/D-form
870 // Load vector reg, register offset, extend
871 // Load vector reg, register offset, extend, scale, S/D-form
872 def : InstRW<[N2Write_6cyc_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>;
874 // Load vector reg, register offset, scale, H/Q-form
875 // Load vector reg, register offset, extend, scale, H/Q-form
876 def : InstRW<[N2Write_7cyc_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>;
878 // Load vector pair, immed offset, S/D-form
879 def : InstRW<[N2Write_6cyc_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>;
881 // Load vector pair, immed offset, Q-form
882 def : InstRW<[N2Write_6cyc_2L, WriteLDHi], (instrs LDPQi, LDNPQi)>;
884 // Load vector pair, immed post-index, S/D-form
885 // Load vector pair, immed pre-index, S/D-form
886 def : InstRW<[WriteAdr, N2Write_6cyc_1I_1L, WriteLDHi],
887 (instregex "^LDP[SD](pre|post)$")>;
889 // Load vector pair, immed post-index, Q-form
890 // Load vector pair, immed pre-index, Q-form
891 def : InstRW<[WriteAdr, N2Write_6cyc_2I_2L, WriteLDHi], (instrs LDPQpost,
894 // FP store instructions
895 // -----------------------------------------------------------------------------
897 // Store vector reg, unscaled immed, B/H/S/D-form
898 // Store vector reg, unscaled immed, Q-form
899 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STUR[BHSDQ]i$")>;
901 // Store vector reg, immed post-index, B/H/S/D-form
902 // Store vector reg, immed post-index, Q-form
903 // Store vector reg, immed pre-index, B/H/S/D-form
904 // Store vector reg, immed pre-index, Q-form
905 def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V_1I, ReadAdrBase],
906 (instregex "^STR[BHSDQ](pre|post)$")>;
908 // Store vector reg, unsigned immed, B/H/S/D-form
909 // Store vector reg, unsigned immed, Q-form
910 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STR[BHSDQ]ui$")>;
912 // Store vector reg, register offset, basic, B/H/S/D-form
913 // Store vector reg, register offset, basic, Q-form
914 // Store vector reg, register offset, scale, S/D-form
915 // Store vector reg, register offset, extend, B/H/S/D-form
916 // Store vector reg, register offset, extend, Q-form
917 // Store vector reg, register offset, extend, scale, S/D-form
918 def : InstRW<[N2Write_2cyc_1L01_1V, ReadAdrBase],
919 (instregex "^STR[BSD]ro[WX]$")>;
921 // Store vector reg, register offset, scale, H-form
922 // Store vector reg, register offset, scale, Q-form
923 // Store vector reg, register offset, extend, scale, H-form
924 // Store vector reg, register offset, extend, scale, Q-form
925 def : InstRW<[N2Write_2cyc_1L01_1V, ReadAdrBase],
926 (instregex "^STR[HQ]ro[WX]$")>;
928 // Store vector pair, immed offset, S-form
929 // Store vector pair, immed offset, D-form
930 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STN?P[SD]i$")>;
932 // Store vector pair, immed offset, Q-form
933 def : InstRW<[N2Write_2cyc_1L01_2V], (instrs STPQi, STNPQi)>;
935 // Store vector pair, immed post-index, S-form
936 // Store vector pair, immed post-index, D-form
937 // Store vector pair, immed pre-index, S-form
938 // Store vector pair, immed pre-index, D-form
939 def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V_1I],
940 (instregex "^STP[SD](pre|post)$")>;
942 // Store vector pair, immed post-index, Q-form
943 def : InstRW<[N2Write_2cyc_1L01_2V_1I], (instrs STPQpost)>;
945 // Store vector pair, immed pre-index, Q-form
946 def : InstRW<[N2Write_2cyc_1L01_2V_2I], (instrs STPQpre)>;
948 // ASIMD integer instructions
949 // -----------------------------------------------------------------------------
951 // ASIMD absolute diff
952 // ASIMD absolute diff long
953 // ASIMD arith, basic
954 // ASIMD arith, complex
955 // ASIMD arith, pair-wise
958 // ASIMD max/min, basic and pair-wise
959 def : SchedAlias<WriteVd, N2Write_2cyc_1V>;
960 def : SchedAlias<WriteVq, N2Write_2cyc_1V>;
962 // ASIMD absolute diff accum
963 // ASIMD absolute diff accum long
964 def : InstRW<[N2Write_4cyc_1V1],
965 (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>;
967 // ASIMD arith, reduce, 4H/4S
968 def : InstRW<[N2Write_2cyc_1V1], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>;
970 // ASIMD arith, reduce, 8B/8H
971 def : InstRW<[N2Write_4cyc_1V1_1V],
972 (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>;
974 // ASIMD arith, reduce, 16B
975 def : InstRW<[N2Write_4cyc_1V1], (instrs ADDVv16i8v, SADDLVv16i8v,
979 // ASIMD dot product using signed and unsigned integers
980 def : InstRW<[N2Write_3cyc_1V],
981 (instregex "^([SU]|SU|US)DOT(lane)?(v8|v16)i8$")>;
983 // ASIMD matrix multiply-accumulate
984 def : InstRW<[N2Write_3cyc_1V], (instrs SMMLA, UMMLA, USMMLA)>;
986 // ASIMD max/min, reduce, 4H/4S
987 def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU](MAX|MIN)Vv4i16v$",
988 "^[SU](MAX|MIN)Vv4i32v$")>;
990 // ASIMD max/min, reduce, 8B/8H
991 def : InstRW<[N2Write_4cyc_1V1_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$",
992 "^[SU](MAX|MIN)Vv8i16v$")>;
994 // ASIMD max/min, reduce, 16B
995 def : InstRW<[N2Write_4cyc_2V1], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
998 def : InstRW<[N2Write_4cyc_1V0], (instregex "^MULv", "^SQ(R)?DMULHv")>;
1000 // ASIMD multiply accumulate
1001 def : InstRW<[N2Write_4cyc_1V0], (instregex "^MLAv", "^MLSv")>;
1003 // ASIMD multiply accumulate high
1004 def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>;
1006 // ASIMD multiply accumulate long
1007 def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]MLALv", "^[SU]MLSLv")>;
1009 // ASIMD multiply accumulate saturating long
1010 def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMLALv", "^SQDMLSLv")>;
1012 // ASIMD multiply/multiply long (8x8) polynomial, D-form
1013 // ASIMD multiply/multiply long (8x8) polynomial, Q-form
1014 def : InstRW<[N2Write_3cyc_1V0], (instregex "^PMULL?(v8i8|v16i8)$")>;
1016 // ASIMD multiply long
1017 def : InstRW<[N2Write_3cyc_1V], (instregex "^[SU]MULLv", "^SQDMULLv")>;
1019 // ASIMD pairwise add and accumulate long
1020 def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ADALPv")>;
1022 // ASIMD shift accumulate
1023 def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]SRAv", "^[SU]RSRAv")>;
1025 // ASIMD shift by immed, basic
1026 def : InstRW<[N2Write_2cyc_1V1], (instregex "^SHLv", "^SHLLv", "^SHRNv",
1027 "^SSHLLv", "^SSHRv", "^USHLLv",
1030 // ASIMD shift by immed and insert, basic
1031 def : InstRW<[N2Write_2cyc_1V1], (instregex "^SLIv", "^SRIv")>;
1033 // ASIMD shift by immed, complex
1034 def : InstRW<[N2Write_4cyc_1V1],
1035 (instregex "^RSHRNv", "^SQRSHRNv", "^SQRSHRUNv",
1036 "^(SQSHLU?|UQSHL)[bhsd]$",
1037 "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
1038 "^SQSHRNv", "^SQSHRUNv", "^SRSHRv", "^UQRSHRNv",
1039 "^UQSHRNv", "^URSHRv")>;
1041 // ASIMD shift by register, basic
1042 def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]SHLv")>;
1044 // ASIMD shift by register, complex
1045 def : InstRW<[N2Write_4cyc_1V1],
1046 (instregex "^[SU]RSHLv", "^[SU]QRSHLv",
1047 "^[SU]QSHL(v1i8|v1i16|v1i32|v1i64|v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)$")>;
1049 // ASIMD floating-point instructions
1050 // -----------------------------------------------------------------------------
1052 // ASIMD FP absolute value/difference
1053 // ASIMD FP arith, normal
1055 // ASIMD FP complex add
1056 // ASIMD FP max/min, normal
1057 // ASIMD FP max/min, pairwise
1059 // Handled by SchedAlias<WriteV[dq], ...>
1061 // ASIMD FP complex multiply add
1062 def : InstRW<[N2Write_4cyc_1V], (instregex "^FCMLAv")>;
1064 // ASIMD FP convert, long (F16 to F32)
1065 def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTL(v4|v8)i16")>;
1067 // ASIMD FP convert, long (F32 to F64)
1068 def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVTL(v2|v4)i32")>;
1070 // ASIMD FP convert, narrow (F32 to F16)
1071 def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTN(v4|v8)i16")>;
1073 // ASIMD FP convert, narrow (F64 to F32)
1074 def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVTN(v2|v4)i32",
1075 "^FCVTXN(v2|v4)f32")>;
1077 // ASIMD FP convert, other, D-form F32 and Q-form F64
1078 def : InstRW<[N2Write_3cyc_1V0], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$",
1079 "^[SU]CVTFv2f(32|64)$")>;
1081 // ASIMD FP convert, other, D-form F16 and Q-form F32
1082 def : InstRW<[N2Write_4cyc_2V0], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$",
1083 "^[SU]CVTFv4f(16|32)$")>;
1085 // ASIMD FP convert, other, Q-form F16
1086 def : InstRW<[N2Write_6cyc_4V0], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$",
1087 "^[SU]CVTFv8f16$")>;
1089 // ASIMD FP divide, D-form, F16
1090 def : InstRW<[N2Write_7cyc_1V0], (instrs FDIVv4f16)>;
1092 // ASIMD FP divide, D-form, F32
1093 def : InstRW<[N2Write_10cyc_2V0], (instrs FDIVv2f32)>;
1095 // ASIMD FP divide, Q-form, F16
1096 def : InstRW<[N2Write_13cyc_2V0], (instrs FDIVv8f16)>;
1098 // ASIMD FP divide, Q-form, F32
1099 def : InstRW<[N2Write_10cyc_2V0], (instrs FDIVv4f32)>;
1101 // ASIMD FP divide, Q-form, F64
1102 def : InstRW<[N2Write_15cyc_2V0], (instrs FDIVv2f64)>;
1104 // ASIMD FP max/min, reduce, F32 and D-form F16
1105 def : InstRW<[N2Write_4cyc_1V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>;
1107 // ASIMD FP max/min, reduce, Q-form F16
1108 def : InstRW<[N2Write_6cyc_2V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>;
1110 // ASIMD FP multiply
1111 def : InstRW<[N2Write_3cyc_1V], (instregex "^FMULv", "^FMULXv")>;
1113 // ASIMD FP multiply accumulate
1114 def : InstRW<[N2Write_4cyc_1V], (instregex "^FMLAv", "^FMLSv")>;
1116 // ASIMD FP multiply accumulate long
1117 def : InstRW<[N2Write_5cyc_1V], (instregex "^FMLALv", "^FMLSLv")>;
1119 // ASIMD FP round, D-form F32 and Q-form F64
1120 def : InstRW<[N2Write_3cyc_1V0],
1121 (instregex "^FRINT[AIMNPXZ]v2f(32|64)$",
1122 "^FRINT[32|64)[XZ]v2f(32|64)$")>;
1124 // ASIMD FP round, D-form F16 and Q-form F32
1125 def : InstRW<[N2Write_4cyc_2V0],
1126 (instregex "^FRINT[AIMNPXZ]v4f(16|32)$",
1127 "^FRINT(32|64)[XZ]v4f32$")>;
1130 // ASIMD FP round, Q-form F16
1131 def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
1133 // ASIMD FP square root, D-form, F16
1134 def : InstRW<[N2Write_7cyc_1V0], (instrs FSQRTv4f16)>;
1136 // ASIMD FP square root, D-form, F32
1137 def : InstRW<[N2Write_10cyc_2V0], (instrs FSQRTv2f32)>;
1139 // ASIMD FP square root, Q-form, F16
1140 def : InstRW<[N2Write_13cyc_2V0], (instrs FSQRTv8f16)>;
1142 // ASIMD FP square root, Q-form, F32
1143 def : InstRW<[N2Write_10cyc_2V0], (instrs FSQRTv4f32)>;
1145 // ASIMD FP square root, Q-form, F64
1146 def : InstRW<[N2Write_16cyc_2V0], (instrs FSQRTv2f64)>;
1148 // ASIMD BFloat16 (BF16) instructions
1149 // -----------------------------------------------------------------------------
1151 // ASIMD convert, F32 to BF16
1152 def : InstRW<[N2Write_4cyc_1V0], (instrs BFCVTN, BFCVTN2)>;
1154 // ASIMD dot product
1155 def : InstRW<[N2Write_4cyc_1V], (instrs BFDOTv4bf16, BFDOTv8bf16)>;
1157 // ASIMD matrix multiply accumulate
1158 def : InstRW<[N2Write_5cyc_1V], (instrs BFMMLA)>;
1160 // ASIMD multiply accumulate long
1161 def : InstRW<[N2Write_4cyc_1V], (instrs BFMLALB, BFMLALBIdx, BFMLALT,
1164 // Scalar convert, F32 to BF16
1165 def : InstRW<[N2Write_3cyc_1V0], (instrs BFCVT)>;
1167 // ASIMD miscellaneous instructions
1168 // -----------------------------------------------------------------------------
1170 // ASIMD bit reverse
1171 // ASIMD bitwise insert
1173 // ASIMD duplicate, element
1175 // ASIMD extract narrow
1176 // ASIMD insert, element to element
1177 // ASIMD move, FP immed
1178 // ASIMD move, integer immed
1180 // ASIMD table lookup, 1 or 2 table regs
1181 // ASIMD table lookup extension, 1 table reg
1182 // ASIMD transfer, element to gen reg
1185 // Handled by SchedAlias<WriteV[dq], ...>
1187 // ASIMD duplicate, gen reg
1188 def : InstRW<[N2Write_3cyc_1M0], (instregex "^DUPv.+gpr")>;
1190 // ASIMD extract narrow, saturating
1191 def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]QXTNv", "^SQXTUNv")>;
1193 // ASIMD reciprocal and square root estimate, D-form U32
1194 def : InstRW<[N2Write_3cyc_1V0], (instrs URECPEv2i32, URSQRTEv2i32)>;
1196 // ASIMD reciprocal and square root estimate, Q-form U32
1197 def : InstRW<[N2Write_4cyc_2V0], (instrs URECPEv4i32, URSQRTEv4i32)>;
1199 // ASIMD reciprocal and square root estimate, D-form F32 and scalar forms
1200 def : InstRW<[N2Write_3cyc_1V0], (instrs FRECPEv1f16, FRECPEv1i32,
1201 FRECPEv1i64, FRECPEv2f32,
1202 FRSQRTEv1f16, FRSQRTEv1i32,
1203 FRSQRTEv1i64, FRSQRTEv2f32)>;
1205 // ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32
1206 def : InstRW<[N2Write_4cyc_2V0], (instrs FRECPEv4f16, FRECPEv4f32,
1207 FRSQRTEv4f16, FRSQRTEv4f32)>;
1209 // ASIMD reciprocal and square root estimate, Q-form F16
1210 def : InstRW<[N2Write_6cyc_4V0], (instrs FRECPEv8f16, FRSQRTEv8f16)>;
1212 // ASIMD reciprocal exponent
1213 def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRECPXv")>;
1215 // ASIMD reciprocal step
1216 def : InstRW<[N2Write_4cyc_1V], (instregex "^FRECPSv", "^FRSQRTSv")>;
1218 // ASIMD table lookup, 3 table regs
1219 def : InstRW<[N2Write_4cyc_2V], (instrs TBLv8i8Three, TBLv16i8Three)>;
1221 // ASIMD table lookup, 4 table regs
1222 def : InstRW<[N2Write_4cyc_4V], (instrs TBLv8i8Four, TBLv16i8Four)>;
1224 // ASIMD table lookup extension, 2 table reg
1225 def : InstRW<[N2Write_4cyc_2V], (instrs TBXv8i8Two, TBXv16i8Two)>;
1227 // ASIMD table lookup extension, 3 table reg
1228 def : InstRW<[N2Write_6cyc_4V], (instrs TBXv8i8Three, TBXv16i8Three)>;
1230 // ASIMD table lookup extension, 4 table reg
1231 def : InstRW<[N2Write_6cyc_8V], (instrs TBXv8i8Four, TBXv16i8Four)>;
1233 // ASIMD transfer, gen reg to element
1234 def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
1236 // ASIMD load instructions
1237 // -----------------------------------------------------------------------------
1239 // ASIMD load, 1 element, multiple, 1 reg, D-form
1240 def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>;
1241 def : InstRW<[WriteAdr, N2Write_6cyc_1L],
1242 (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>;
1244 // ASIMD load, 1 element, multiple, 1 reg, Q-form
1245 def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>;
1246 def : InstRW<[WriteAdr, N2Write_6cyc_1L],
1247 (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>;
1249 // ASIMD load, 1 element, multiple, 2 reg, D-form
1250 def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
1251 def : InstRW<[WriteAdr, N2Write_6cyc_2L],
1252 (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
1254 // ASIMD load, 1 element, multiple, 2 reg, Q-form
1255 def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
1256 def : InstRW<[WriteAdr, N2Write_6cyc_2L],
1257 (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
1259 // ASIMD load, 1 element, multiple, 3 reg, D-form
1260 def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
1261 def : InstRW<[WriteAdr, N2Write_6cyc_3L],
1262 (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
1264 // ASIMD load, 1 element, multiple, 3 reg, Q-form
1265 def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
1266 def : InstRW<[WriteAdr, N2Write_6cyc_3L],
1267 (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
1269 // ASIMD load, 1 element, multiple, 4 reg, D-form
1270 def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
1271 def : InstRW<[WriteAdr, N2Write_7cyc_4L],
1272 (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
1274 // ASIMD load, 1 element, multiple, 4 reg, Q-form
1275 def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
1276 def : InstRW<[WriteAdr, N2Write_7cyc_4L],
1277 (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
1279 // ASIMD load, 1 element, one lane, B/H/S
1280 // ASIMD load, 1 element, one lane, D
1281 def : InstRW<[N2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)$")>;
1282 def : InstRW<[WriteAdr, N2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)_POST$")>;
1284 // ASIMD load, 1 element, all lanes, D-form, B/H/S
1285 // ASIMD load, 1 element, all lanes, D-form, D
1286 def : InstRW<[N2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)$")>;
1287 def : InstRW<[WriteAdr, N2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>;
1289 // ASIMD load, 1 element, all lanes, Q-form
1290 def : InstRW<[N2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
1291 def : InstRW<[WriteAdr, N2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
1293 // ASIMD load, 2 element, multiple, D-form, B/H/S
1294 def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)$")>;
1295 def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
1297 // ASIMD load, 2 element, multiple, Q-form, B/H/S
1298 // ASIMD load, 2 element, multiple, Q-form, D
1299 def : InstRW<[N2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
1300 def : InstRW<[WriteAdr, N2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
1302 // ASIMD load, 2 element, one lane, B/H
1303 // ASIMD load, 2 element, one lane, S
1304 // ASIMD load, 2 element, one lane, D
1305 def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)$")>;
1306 def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)_POST$")>;
1308 // ASIMD load, 2 element, all lanes, D-form, B/H/S
1309 // ASIMD load, 2 element, all lanes, D-form, D
1310 def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)$")>;
1311 def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>;
1313 // ASIMD load, 2 element, all lanes, Q-form
1314 def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
1315 def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
1317 // ASIMD load, 3 element, multiple, D-form, B/H/S
1318 def : InstRW<[N2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)$")>;
1319 def : InstRW<[WriteAdr, N2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
1321 // ASIMD load, 3 element, multiple, Q-form, B/H/S
1322 def : InstRW<[N2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s)$")>;
1323 def : InstRW<[WriteAdr, N2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s)_POST$")>;
1325 // ASIMD load, 3 element, multiple, Q-form, D
1326 def : InstRW<[N2Write_8cyc_3L_3V], (instregex "LD3Threev(2d)$")>;
1327 def : InstRW<[WriteAdr, N2Write_8cyc_3L_3V], (instregex "LD3Threev(2d)_POST$")>;
1329 // ASIMD load, 3 element, one lane, B/H
1330 // ASIMD load, 3 element, one lane, S
1331 // ASIMD load, 3 element, one lane, D
1332 def : InstRW<[N2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)$")>;
1333 def : InstRW<[WriteAdr, N2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)_POST$")>;
1335 // ASIMD load, 3 element, all lanes, D-form, B/H/S
1336 // ASIMD load, 3 element, all lanes, D-form, D
1337 def : InstRW<[N2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)$")>;
1338 def : InstRW<[WriteAdr, N2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>;
1340 // ASIMD load, 3 element, all lanes, Q-form, B/H/S
1341 // ASIMD load, 3 element, all lanes, Q-form, D
1342 def : InstRW<[N2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)$")>;
1343 def : InstRW<[WriteAdr, N2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>;
1345 // ASIMD load, 4 element, multiple, D-form, B/H/S
1346 def : InstRW<[N2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)$")>;
1347 def : InstRW<[WriteAdr, N2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
1349 // ASIMD load, 4 element, multiple, Q-form, B/H/S
1350 // ASIMD load, 4 element, multiple, Q-form, D
1351 def : InstRW<[N2Write_9cyc_4L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
1352 def : InstRW<[WriteAdr, N2Write_9cyc_4L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
1354 // ASIMD load, 4 element, one lane, B/H
1355 // ASIMD load, 4 element, one lane, S
1356 // ASIMD load, 4 element, one lane, D
1357 def : InstRW<[N2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)$")>;
1358 def : InstRW<[WriteAdr, N2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)_POST$")>;
1360 // ASIMD load, 4 element, all lanes, D-form, B/H/S
1361 // ASIMD load, 4 element, all lanes, D-form, D
1362 def : InstRW<[N2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)$")>;
1363 def : InstRW<[WriteAdr, N2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>;
1365 // ASIMD load, 4 element, all lanes, Q-form, B/H/S
1366 // ASIMD load, 4 element, all lanes, Q-form, D
1367 def : InstRW<[N2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)$")>;
1368 def : InstRW<[WriteAdr, N2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>;
1370 // ASIMD store instructions
1371 // -----------------------------------------------------------------------------
1373 // ASIMD store, 1 element, multiple, 1 reg, D-form
1374 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "ST1Onev(8b|4h|2s|1d)$")>;
1375 def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
1377 // ASIMD store, 1 element, multiple, 1 reg, Q-form
1378 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "ST1Onev(16b|8h|4s|2d)$")>;
1379 def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
1381 // ASIMD store, 1 element, multiple, 2 reg, D-form
1382 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "ST1Twov(8b|4h|2s|1d)$")>;
1383 def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
1385 // ASIMD store, 1 element, multiple, 2 reg, Q-form
1386 def : InstRW<[N2Write_2cyc_2L01_2V], (instregex "ST1Twov(16b|8h|4s|2d)$")>;
1387 def : InstRW<[WriteAdr, N2Write_2cyc_2L01_2V], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
1389 // ASIMD store, 1 element, multiple, 3 reg, D-form
1390 def : InstRW<[N2Write_2cyc_2L01_2V], (instregex "ST1Threev(8b|4h|2s|1d)$")>;
1391 def : InstRW<[WriteAdr, N2Write_2cyc_2L01_2V], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
1393 // ASIMD store, 1 element, multiple, 3 reg, Q-form
1394 def : InstRW<[N2Write_2cyc_3L01_3V], (instregex "ST1Threev(16b|8h|4s|2d)$")>;
1395 def : InstRW<[WriteAdr, N2Write_2cyc_3L01_3V], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
1397 // ASIMD store, 1 element, multiple, 4 reg, D-form
1398 def : InstRW<[N2Write_2cyc_2L01_2V], (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
1399 def : InstRW<[WriteAdr, N2Write_2cyc_2L01_2V], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
1401 // ASIMD store, 1 element, multiple, 4 reg, Q-form
1402 def : InstRW<[N2Write_2cyc_4L01_4V], (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
1403 def : InstRW<[WriteAdr, N2Write_2cyc_4L01_4V], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
1405 // ASIMD store, 1 element, one lane, B/H/S
1406 // ASIMD store, 1 element, one lane, D
1407 def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "ST1i(8|16|32|64)$")>;
1408 def : InstRW<[WriteAdr, N2Write_4cyc_1L01_1V], (instregex "ST1i(8|16|32|64)_POST$")>;
1410 // ASIMD store, 2 element, multiple, D-form, B/H/S
1411 def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "ST2Twov(8b|4h|2s)$")>;
1412 def : InstRW<[WriteAdr, N2Write_4cyc_1L01_1V], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
1414 // ASIMD store, 2 element, multiple, Q-form, B/H/S
1415 // ASIMD store, 2 element, multiple, Q-form, D
1416 def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
1417 def : InstRW<[WriteAdr, N2Write_4cyc_2L01_2V], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
1419 // ASIMD store, 2 element, one lane, B/H/S
1420 // ASIMD store, 2 element, one lane, D
1421 def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "ST2i(8|16|32|64)$")>;
1422 def : InstRW<[WriteAdr, N2Write_4cyc_1L01_1V], (instregex "ST2i(8|16|32|64)_POST$")>;
1424 // ASIMD store, 3 element, multiple, D-form, B/H/S
1425 def : InstRW<[N2Write_5cyc_2L01_2V], (instregex "ST3Threev(8b|4h|2s)$")>;
1426 def : InstRW<[WriteAdr, N2Write_5cyc_2L01_2V], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
1428 // ASIMD store, 3 element, multiple, Q-form, B/H/S
1429 // ASIMD store, 3 element, multiple, Q-form, D
1430 def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST3Threev(16b|8h|4s|2d)$")>;
1431 def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
1433 // ASIMD store, 3 element, one lane, B/H
1434 // ASIMD store, 3 element, one lane, S
1435 // ASIMD store, 3 element, one lane, D
1436 def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST3i(8|16|32|64)$")>;
1437 def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST3i(8|16|32|64)_POST$")>;
1439 // ASIMD store, 4 element, multiple, D-form, B/H/S
1440 def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST4Fourv(8b|4h|2s)$")>;
1441 def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
1443 // ASIMD store, 4 element, multiple, Q-form, B/H/S
1444 def : InstRW<[N2Write_7cyc_6L01_6V], (instregex "ST4Fourv(16b|8h|4s)$")>;
1445 def : InstRW<[WriteAdr, N2Write_7cyc_6L01_6V], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
1447 // ASIMD store, 4 element, multiple, Q-form, D
1448 def : InstRW<[N2Write_5cyc_4L01_4V], (instregex "ST4Fourv(2d)$")>;
1449 def : InstRW<[WriteAdr, N2Write_5cyc_4L01_4V], (instregex "ST4Fourv(2d)_POST$")>;
1451 // ASIMD store, 4 element, one lane, B/H/S
1452 def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST4i(8|16|32)$")>;
1453 def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST4i(8|16|32)_POST$")>;
1455 // ASIMD store, 4 element, one lane, D
1456 def : InstRW<[N2Write_4cyc_3L01_3V], (instregex "ST4i(64)$")>;
1457 def : InstRW<[WriteAdr, N2Write_4cyc_3L01_3V], (instregex "ST4i(64)_POST$")>;
1459 // Cryptography extensions
1460 // -----------------------------------------------------------------------------
1463 def : InstRW<[N2Write_2cyc_1V], (instregex "^AES[DE]rr$", "^AESI?MCrr")>;
1465 // Crypto polynomial (64x64) multiply long
1466 def : InstRW<[N2Write_2cyc_1V0], (instrs PMULLv1i64, PMULLv2i64)>;
1468 // Crypto SHA1 hash acceleration op
1469 // Crypto SHA1 schedule acceleration ops
1470 def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA1(H|SU0|SU1)")>;
1472 // Crypto SHA1 hash acceleration ops
1473 // Crypto SHA256 hash acceleration ops
1474 def : InstRW<[N2Write_4cyc_1V0], (instregex "^SHA1[CMP]", "^SHA256H2?")>;
1476 // Crypto SHA256 schedule acceleration ops
1477 def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA256SU[01]")>;
1479 // Crypto SHA512 hash acceleration ops
1480 def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA512(H|H2|SU0|SU1)")>;
1483 def : InstRW<[N2Write_2cyc_1V0], (instrs BCAX, EOR3, RAX1, XAR)>;
1486 def : InstRW<[N2Write_2cyc_1V0], (instregex "^SM3PARTW[12]$", "^SM3SS1$",
1487 "^SM3TT[12][AB]$")>;
1490 def : InstRW<[N2Write_4cyc_1V0], (instrs SM4E, SM4ENCKEY)>;
1493 // -----------------------------------------------------------------------------
1495 def : InstRW<[N2Write_2cyc_1M0], (instregex "^CRC32")>;
1497 // SVE Predicate instructions
1498 // -----------------------------------------------------------------------------
1500 // Loop control, based on predicate
1501 def : InstRW<[N2Write_2cyc_1M], (instrs BRKA_PPmP, BRKA_PPzP,
1502 BRKB_PPmP, BRKB_PPzP)>;
1504 // Loop control, based on predicate and flag setting
1505 def : InstRW<[N2Write_3cyc_1M], (instrs BRKAS_PPzP, BRKBS_PPzP)>;
1507 // Loop control, propagating
1508 def : InstRW<[N2Write_2cyc_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>;
1510 // Loop control, propagating and flag setting
1511 def : InstRW<[N2Write_3cyc_1M0_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP,
1514 // Loop control, based on GPR
1515 def : InstRW<[N2Write_3cyc_1M],
1516 (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>;
1518 def : InstRW<[N2Write_3cyc_1M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]$")>;
1521 def : InstRW<[N2Write_1cyc_1M], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>;
1523 // Predicate counting scalar
1524 def : InstRW<[N2Write_2cyc_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>;
1525 def : InstRW<[N2Write_2cyc_1M],
1526 (instregex "^(CNT|DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI$",
1527 "^SQ(DEC|INC)[BHWD]_XPiWdI$",
1528 "^(UQDEC|UQINC)[BHWD]_WPiI$")>;
1530 // Predicate counting scalar, active predicate
1531 def : InstRW<[N2Write_2cyc_1M],
1532 (instregex "^CNTP_XPP_[BHSD]$",
1533 "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]$",
1534 "^(UQDEC|UQINC)P_WP_[BHSD]$",
1535 "^(SQDEC|SQINC|UQDEC|UQINC)P_XPWd_[BHSD]$")>;
1537 // Predicate counting vector, active predicate
1538 def : InstRW<[N2Write_7cyc_1M_1M0_1V],
1539 (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]$")>;
1541 // Predicate logical
1542 def : InstRW<[N2Write_1cyc_1M0],
1543 (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>;
1545 // Predicate logical, flag setting
1546 def : InstRW<[N2Write_2cyc_1M0_1M],
1547 (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP$")>;
1549 // Predicate reverse
1550 def : InstRW<[N2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]$")>;
1553 def : InstRW<[N2Write_1cyc_1M0], (instrs SEL_PPPP)>;
1556 def : InstRW<[N2Write_2cyc_1M], (instregex "^PFALSE$", "^PTRUE_[BHSD]$")>;
1558 // Predicate set/initialize, set flags
1559 def : InstRW<[N2Write_3cyc_1M], (instregex "^PTRUES_[BHSD]$")>;
1561 // Predicate find first/next
1562 def : InstRW<[N2Write_3cyc_1M], (instregex "^PFIRST_B$", "^PNEXT_[BHSD]$")>;
1565 def : InstRW<[N2Write_1cyc_1M], (instrs PTEST_PP)>;
1567 // Predicate transpose
1568 def : InstRW<[N2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSDQ]$")>;
1570 // Predicate unpack and widen
1571 def : InstRW<[N2Write_2cyc_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>;
1573 // Predicate zip/unzip
1574 def : InstRW<[N2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSDQ]$")>;
1576 // SVE integer instructions
1577 // -----------------------------------------------------------------------------
1579 // Arithmetic, absolute diff
1580 def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]",
1581 "^[SU]ABD_ZPZZ_[BHSD]")>;
1583 // Arithmetic, absolute diff accum
1584 def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ABA_ZZZ_[BHSD]$")>;
1586 // Arithmetic, absolute diff accum long
1587 def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]$")>;
1589 // Arithmetic, absolute diff long
1590 def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]$")>;
1592 // Arithmetic, basic
1593 def : InstRW<[N2Write_2cyc_1V],
1594 (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
1595 "^(ADD|SUB)_ZZZ_[BHSD]",
1596 "^(ADD|SUB|SUBR)_ZPZZ_[BHSD]",
1597 "^(ADD|SUB|SUBR)_ZI_[BHSD]",
1598 "^ADR_[SU]XTW_ZZZ_D_[0123]",
1599 "^ADR_LSL_ZZZ_[SD]_[0123]",
1600 "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]",
1601 "^SADDLBT_ZZZ_[HSD]",
1602 "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]",
1603 "^SSUBL(BT|TB)_ZZZ_[HSD]")>;
1605 // Arithmetic, complex
1606 def : InstRW<[N2Write_2cyc_1V],
1607 (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]",
1608 "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
1609 "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]",
1610 "^[SU]Q(ADD|SUB)_ZI_[BHSD]",
1611 "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]",
1612 "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]")>;
1614 // Arithmetic, large integer
1615 def : InstRW<[N2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]$")>;
1617 // Arithmetic, pairwise add
1618 def : InstRW<[N2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]$")>;
1620 // Arithmetic, pairwise add and accum long
1621 def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ADALP_ZPmZ_[HSD]$")>;
1623 // Arithmetic, shift
1624 def : InstRW<[N2Write_2cyc_1V1],
1625 (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]",
1626 "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]",
1627 "^(ASR|LSL|LSR)_ZPmI_[BHSD]",
1628 "^(ASR|LSL|LSR)_ZPmZ_[BHSD]",
1629 "^(ASR|LSL|LSR)_ZZI_[BHSD]",
1630 "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]",
1631 "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>;
1633 // Arithmetic, shift and accumulate
1634 def : InstRW<[N2Write_4cyc_1V1],
1635 (instregex "^(SRSRA|SSRA|URSRA|USRA)_ZZI_[BHSD]$")>;
1637 // Arithmetic, shift by immediate
1638 // Arithmetic, shift by immediate and insert
1639 def : InstRW<[N2Write_2cyc_1V1],
1640 (instregex "^(SHRNB|SHRNT|SSHLLB|SSHLLT|USHLLB|USHLLT|SLI|SRI)_ZZI_[BHSD]$")>;
1642 // Arithmetic, shift complex
1643 def : InstRW<[N2Write_4cyc_1V1],
1644 (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]",
1645 "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]",
1646 "^[SU]QR?SHL_ZPZZ_[BHSD]",
1647 "^(SQSHL|SQSHLU|UQSHL)_(ZPmI|ZPZI)_[BHSD]",
1648 "^SQSHRU?N[BT]_ZZI_[BHS]",
1649 "^UQR?SHRN[BT]_ZZI_[BHS]")>;
1651 // Arithmetic, shift right for divide
1652 def : InstRW<[N2Write_4cyc_1V1], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>;
1654 // Arithmetic, shift rounding
1655 def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]RSHLR?_ZPmZ_[BHSD]",
1656 "^[SU]RSHL_ZPZZ_[BHSD]",
1657 "^[SU]RSHR_(ZPmI|ZPZI)_[BHSD]")>;
1660 def : InstRW<[N2Write_6cyc_2V1], (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]")>;
1663 def : InstRW<[N2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ$")>;
1665 // Count/reverse bits
1666 def : InstRW<[N2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>;
1668 // Broadcast logical bitmask immediate to vector
1669 def : InstRW<[N2Write_2cyc_1V], (instrs DUPM_ZI)>;
1671 // Compare and set flags
1672 def : InstRW<[N2Write_4cyc_1V0_1M],
1673 (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$",
1674 "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>;
1677 def : InstRW<[N2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]$")>;
1679 // Complex dot product 8-bit element
1680 def : InstRW<[N2Write_3cyc_1V], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>;
1682 // Complex dot product 16-bit element
1683 def : InstRW<[N2Write_4cyc_1V0], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>;
1685 // Complex multiply-add B, H, S element size
1686 def : InstRW<[N2Write_4cyc_1V0], (instregex "^CMLA_ZZZ_[BHS]$",
1687 "^CMLA_ZZZI_[HS]$")>;
1689 // Complex multiply-add D element size
1690 def : InstRW<[N2Write_5cyc_2V0], (instrs CMLA_ZZZ_D)>;
1692 // Conditional extract operations, scalar form
1693 def : InstRW<[N2Write_8cyc_1M0_1V1_1V], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>;
1695 // Conditional extract operations, SIMD&FP scalar and vector forms
1696 def : InstRW<[N2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$",
1697 "^COMPACT_ZPZ_[SD]$",
1698 "^SPLICE_ZPZZ?_[BHSD]$")>;
1700 // Convert to floating point, 64b to float or convert to double
1701 def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]",
1702 "^[SU]CVTF_ZPmZ_StoD")>;
1704 // Convert to floating point, 32b to single or half
1705 def : InstRW<[N2Write_4cyc_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>;
1707 // Convert to floating point, 16b to half
1708 def : InstRW<[N2Write_6cyc_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH")>;
1711 def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]$")>;
1713 // Copy, scalar SIMD&FP or imm
1714 def : InstRW<[N2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]$",
1715 "^CPY_ZPzI_[BHSD]$")>;
1718 def : InstRW<[N2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S",
1719 "^[SU]DIV_ZPZZ_S")>;
1722 def : InstRW<[N2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D",
1723 "^[SU]DIV_ZPZZ_D")>;
1725 // Dot product, 8 bit
1726 def : InstRW<[N2Write_3cyc_1V], (instregex "^[SU]DOT_ZZZI?_S$")>;
1728 // Dot product, 8 bit, using signed and unsigned integers
1729 def : InstRW<[N2Write_3cyc_1V], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;
1731 // Dot product, 16 bit
1732 def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]DOT_ZZZI?_D$")>;
1734 // Duplicate, immediate and indexed form
1735 def : InstRW<[N2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]$",
1736 "^DUP_ZZI_[BHSDQ]$")>;
1738 // Duplicate, scalar form
1739 def : InstRW<[N2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]$")>;
1741 // Extend, sign or zero
1742 def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]",
1743 "^[SU]XTH_ZPmZ_[SD]",
1744 "^[SU]XTW_ZPmZ_[D]")>;
1747 def : InstRW<[N2Write_2cyc_1V], (instrs EXT_ZZI, EXT_ZZI_B)>;
1749 // Extract narrow saturating
1750 def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]$",
1751 "^SQXTUN[BT]_ZZ_[BHS]$")>;
1753 // Extract/insert operation, SIMD and FP scalar form
1754 def : InstRW<[N2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$",
1755 "^INSR_ZV_[BHSD]$")>;
1757 // Extract/insert operation, scalar
1758 def : InstRW<[N2Write_5cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]$",
1759 "^INSR_ZR_[BHSD]$")>;
1761 // Histogram operations
1762 def : InstRW<[N2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]$",
1765 // Horizontal operations, B, H, S form, immediate operands only
1766 def : InstRW<[N2Write_4cyc_1V0], (instregex "^INDEX_II_[BHS]$")>;
1768 // Horizontal operations, B, H, S form, scalar, immediate operands/ scalar
1769 // operands only / immediate, scalar operands
1770 def : InstRW<[N2Write_7cyc_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>;
1772 // Horizontal operations, D form, immediate operands only
1773 def : InstRW<[N2Write_5cyc_2V0], (instrs INDEX_II_D)>;
1775 // Horizontal operations, D form, scalar, immediate operands)/ scalar operands
1776 // only / immediate, scalar operands
1777 def : InstRW<[N2Write_8cyc_2M0_2V0], (instregex "^INDEX_(IR|RI|RR)_D$")>;
1780 def : InstRW<[N2Write_2cyc_1V],
1781 (instregex "^(AND|EOR|ORR)_ZI",
1782 "^(AND|BIC|EOR|ORR)_ZZZ",
1783 "^EOR(BT|TB)_ZZZ_[BHSD]",
1784 "^(AND|BIC|EOR|NOT|ORR)_(ZPmZ|ZPZZ)_[BHSD]",
1785 "^NOT_ZPmZ_[BHSD]")>;
1787 // Max/min, basic and pairwise
1788 def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]",
1789 "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]",
1790 "^[SU](MAX|MIN)_ZPZZ_[BHSD]")>;
1792 // Matching operations
1793 def : InstRW<[N2Write_2cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]$")>;
1795 // Matrix multiply-accumulate
1796 def : InstRW<[N2Write_3cyc_1V], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
1799 def : InstRW<[N2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$",
1802 // Multiply, B, H, S element size
1803 def : InstRW<[N2Write_4cyc_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]",
1805 "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]",
1806 "^[SU]MULH_ZPZZ_[BHS]")>;
1808 // Multiply, D element size
1809 def : InstRW<[N2Write_5cyc_2V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D",
1811 "^[SU]MULH_(ZPmZ|ZZZ)_D",
1812 "^[SU]MULH_ZPZZ_D")>;
1815 def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]MULL[BT]_ZZZI_[SD]$",
1816 "^[SU]MULL[BT]_ZZZ_[HSD]$")>;
1818 // Multiply accumulate, B, H, S element size
1819 def : InstRW<[N2Write_4cyc_1V0], (instregex "^ML[AS]_ZZZI_[BHS]$",
1820 "^(ML[AS]|MAD|MSB)_(ZPmZZ|ZPZZZ)_[BHS]")>;
1822 // Multiply accumulate, D element size
1823 def : InstRW<[N2Write_5cyc_2V0], (instregex "^ML[AS]_ZZZI_D$",
1824 "^(ML[AS]|MAD|MSB)_(ZPmZZ|ZPZZZ)_D")>;
1826 // Multiply accumulate long
1827 def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]$",
1828 "^[SU]ML[AS]L[BT]_ZZZI_[SD]$")>;
1830 // Multiply accumulate saturating doubling long regular
1831 def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDML[AS](LB|LT|LBT)_ZZZ_[HSD]$",
1832 "^SQDML[AS](LB|LT)_ZZZI_[SD]$")>;
1834 // Multiply saturating doubling high, B, H, S element size
1835 def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMULH_ZZZ_[BHS]$",
1836 "^SQDMULH_ZZZI_[HS]$")>;
1838 // Multiply saturating doubling high, D element size
1839 def : InstRW<[N2Write_5cyc_2V0], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>;
1841 // Multiply saturating doubling long
1842 def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMULL[BT]_ZZZ_[HSD]$",
1843 "^SQDMULL[BT]_ZZZI_[SD]$")>;
1845 // Multiply saturating rounding doubling regular/complex accumulate, B, H, S
1847 def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDML[AS]H_ZZZ_[BHS]$",
1848 "^SQRDCMLAH_ZZZ_[BHS]$",
1849 "^SQRDML[AS]H_ZZZI_[HS]$",
1850 "^SQRDCMLAH_ZZZI_[HS]$")>;
1852 // Multiply saturating rounding doubling regular/complex accumulate, D element
1854 def : InstRW<[N2Write_5cyc_2V0], (instregex "^SQRDML[AS]H_ZZZI?_D$",
1855 "^SQRDCMLAH_ZZZ_D$")>;
1857 // Multiply saturating rounding doubling regular/complex, B, H, S element size
1858 def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDMULH_ZZZ_[BHS]$",
1859 "^SQRDMULH_ZZZI_[HS]$")>;
1861 // Multiply saturating rounding doubling regular/complex, D element size
1862 def : InstRW<[N2Write_5cyc_2V0], (instregex "^SQRDMULH_ZZZI?_D$")>;
1864 // Multiply/multiply long, (8x8) polynomial
1865 def : InstRW<[N2Write_2cyc_1V0], (instregex "^PMUL_ZZZ_B$",
1866 "^PMULL[BT]_ZZZ_[HDQ]$")>;
1868 // Predicate counting vector
1869 def : InstRW<[N2Write_2cyc_1V0],
1870 (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[HWD]_ZPiI$")>;
1872 // Reciprocal estimate
1873 def : InstRW<[N2Write_4cyc_2V0], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>;
1875 // Reduction, arithmetic, B form
1876 def : InstRW<[N2Write_11cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
1878 // Reduction, arithmetic, H form
1879 def : InstRW<[N2Write_9cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>;
1881 // Reduction, arithmetic, S form
1882 def : InstRW<[N2Write_8cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>;
1884 // Reduction, arithmetic, D form
1885 def : InstRW<[N2Write_8cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
1887 // Reduction, logical
1888 def : InstRW<[N2Write_6cyc_1V_1V1], (instregex "^(ANDV|EORV|ORV)_VPZ_[BHSD]$")>;
1891 def : InstRW<[N2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]$",
1892 "^REVB_ZPmZ_[HSD]$",
1896 // Select, vector form
1897 def : InstRW<[N2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]$")>;
1900 def : InstRW<[N2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]$")>;
1902 // Table lookup extension
1903 def : InstRW<[N2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]$")>;
1905 // Transpose, vector form
1906 def : InstRW<[N2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]$")>;
1908 // Unpack and extend
1909 def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]$")>;
1912 def : InstRW<[N2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>;
1914 // SVE floating-point instructions
1915 // -----------------------------------------------------------------------------
1917 // Floating point absolute value/difference
1918 def : InstRW<[N2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]",
1920 "^FABS_ZPmZ_[HSD]")>;
1922 // Floating point arithmetic
1923 def : InstRW<[N2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]",
1924 "^F(ADD|SUB)_ZPZ[IZ]_[HSD]",
1925 "^FADDP_ZPmZZ_[HSD]",
1927 "^FSUBR_ZPm[IZ]_[HSD]",
1928 "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>;
1930 // Floating point associative add, F16
1931 def : InstRW<[N2Write_10cyc_1V1], (instrs FADDA_VPZ_H)>;
1933 // Floating point associative add, F32
1934 def : InstRW<[N2Write_6cyc_1V1], (instrs FADDA_VPZ_S)>;
1936 // Floating point associative add, F64
1937 def : InstRW<[N2Write_4cyc_1V], (instrs FADDA_VPZ_D)>;
1939 // Floating point compare
1940 def : InstRW<[N2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]$",
1941 "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]$",
1942 "^FCM(LE|LT)_PPzZ0_[HSD]$",
1943 "^FCMUO_PPzZZ_[HSD]$")>;
1945 // Floating point complex add
1946 def : InstRW<[N2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]$")>;
1948 // Floating point complex multiply add
1949 def : InstRW<[N2Write_5cyc_1V], (instregex "^FCMLA_ZPmZZ_[HSD]$",
1950 "^FCMLA_ZZZI_[HS]$")>;
1952 // Floating point convert, long or narrow (F16 to F32 or F32 to F16)
1953 def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)",
1954 "^FCVTLT_ZPmZ_HtoS",
1955 "^FCVTNT_ZPmZ_StoH")>;
1957 // Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32
1959 def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)",
1960 "^FCVTLT_ZPmZ_StoD",
1961 "^FCVTNT_ZPmZ_DtoS")>;
1963 // Floating point convert, round to odd
1964 def : InstRW<[N2Write_3cyc_1V0], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>;
1966 // Floating point base2 log, F16
1967 def : InstRW<[N2Write_6cyc_4V0], (instregex "^FLOGB_(ZPmZ|ZPZZ)_H")>;
1969 // Floating point base2 log, F32
1970 def : InstRW<[N2Write_4cyc_2V0], (instregex "^FLOGB_(ZPmZ|ZPZZ)_S")>;
1972 // Floating point base2 log, F64
1973 def : InstRW<[N2Write_3cyc_1V0], (instregex "^FLOGB_(ZPmZ|ZPZZ)_D")>;
1975 // Floating point convert to integer, F16
1976 def : InstRW<[N2Write_6cyc_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>;
1978 // Floating point convert to integer, F32
1979 def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>;
1981 // Floating point convert to integer, F64
1982 def : InstRW<[N2Write_3cyc_1V0],
1983 (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>;
1985 // Floating point copy
1986 def : InstRW<[N2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]$",
1987 "^FDUP_ZI_[HSD]$")>;
1989 // Floating point divide, F16
1990 def : InstRW<[N2Write_13cyc_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>;
1992 // Floating point divide, F32
1993 def : InstRW<[N2Write_10cyc_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>;
1995 // Floating point divide, F64
1996 def : InstRW<[N2Write_15cyc_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>;
1998 // Floating point min/max pairwise
1999 def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>;
2001 // Floating point min/max
2002 def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]",
2003 "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>;
2005 // Floating point multiply
2006 def : InstRW<[N2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]",
2007 "^FMULX_ZPZZ_[HSD]",
2008 "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]",
2009 "^FMUL_ZPZ[IZ]_[HSD]")>;
2011 // Floating point multiply accumulate
2012 def : InstRW<[N2Write_4cyc_1V], (instregex "^F(N?M(AD|SB)|N?ML[AS])_ZPmZZ_[HSD]$",
2013 "^FN?ML[AS]_ZPZZZ_[HSD]",
2014 "^FML[AS]_ZZZI_[HSD]$")>;
2016 // Floating point multiply add/sub accumulate long
2017 def : InstRW<[N2Write_4cyc_1V], (instregex "^FML[AS]L[BT]_ZZZI?_SHH$")>;
2019 // Floating point reciprocal estimate, F16
2020 def : InstRW<[N2Write_6cyc_4V0], (instregex "^FR(ECP|SQRT)E_ZZ_H", "^FRECPX_ZPmZ_H")>;
2022 // Floating point reciprocal estimate, F32
2023 def : InstRW<[N2Write_4cyc_2V0], (instregex "^FR(ECP|SQRT)E_ZZ_S", "^FRECPX_ZPmZ_S")>;
2025 // Floating point reciprocal estimate, F64
2026 def : InstRW<[N2Write_3cyc_1V0], (instregex "^FR(ECP|SQRT)E_ZZ_D", "^FRECPX_ZPmZ_D")>;
2028 // Floating point reciprocal step
2029 def : InstRW<[N2Write_4cyc_1V0], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>;
2031 // Floating point reduction, F16
2032 def : InstRW<[N2Write_6cyc_2V],
2033 (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H$")>;
2035 // Floating point reduction, F32
2036 def : InstRW<[N2Write_4cyc_1V],
2037 (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S$")>;
2039 // Floating point reduction, F64
2040 def : InstRW<[N2Write_2cyc_1V],
2041 (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D$")>;
2043 // Floating point round to integral, F16
2044 def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>;
2046 // Floating point round to integral, F32
2047 def : InstRW<[N2Write_4cyc_2V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>;
2049 // Floating point round to integral, F64
2050 def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>;
2052 // Floating point square root, F16
2053 def : InstRW<[N2Write_13cyc_1V0], (instregex "^FSQRT_ZPmZ_H")>;
2055 // Floating point square root, F32
2056 def : InstRW<[N2Write_10cyc_1V0], (instregex "^FSQRT_ZPmZ_S")>;
2058 // Floating point square root, F64
2059 def : InstRW<[N2Write_16cyc_1V0], (instregex "^FSQRT_ZPmZ_D")>;
2061 // Floating point trigonometric exponentiation
2062 def : InstRW<[N2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]$")>;
2064 // Floating point trigonometric multiply add
2065 def : InstRW<[N2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]$")>;
2067 // Floating point trigonometric, miscellaneous
2068 def : InstRW<[N2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]$")>;
2070 // SVE BFloat16 (BF16) instructions
2071 // -----------------------------------------------------------------------------
2073 // Convert, F32 to BF16
2074 def : InstRW<[N2Write_3cyc_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
2077 def : InstRW<[N2Write_4cyc_1V], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
2079 // Matrix multiply accumulate
2080 def : InstRW<[N2Write_5cyc_1V], (instrs BFMMLA_ZZZ)>;
2082 // Multiply accumulate long
2083 def : InstRW<[N2Write_4cyc_1V], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>;
2085 // SVE Load instructions
2086 // -----------------------------------------------------------------------------
2089 def : InstRW<[N2Write_6cyc_1L], (instrs LDR_ZXI)>;
2092 def : InstRW<[N2Write_6cyc_1L_1M], (instrs LDR_PXI)>;
2094 // Contiguous load, scalar + imm
2095 def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1[BHWD]_IMM$",
2096 "^LD1S?B_[HSD]_IMM$",
2097 "^LD1S?H_[SD]_IMM$",
2098 "^LD1S?W_D_IMM$" )>;
2099 // Contiguous load, scalar + scalar
2100 def : InstRW<[N2Write_6cyc_1L01], (instregex "^LD1[BHWD]$",
2105 // Contiguous load broadcast, scalar + imm
2106 def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1R[BHWD]_IMM$",
2108 "^LD1RS?B_[HSD]_IMM$",
2109 "^LD1RS?H_[SD]_IMM$",
2111 "^LD1RQ_[BHWD]_IMM$")>;
2113 // Contiguous load broadcast, scalar + scalar
2114 def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1RQ_[BHWD]$")>;
2116 // Non temporal load, scalar + imm
2117 def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZRI$")>;
2119 // Non temporal load, scalar + scalar
2120 def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDNT1[BHWD]_ZRR$")>;
2122 // Non temporal gather load, vector + scalar 32-bit element size
2123 def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LDNT1[BHW]_ZZR_S$",
2124 "^LDNT1S[BH]_ZZR_S$")>;
2126 // Non temporal gather load, vector + scalar 64-bit element size
2127 def : InstRW<[N2Write_10cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D$")>;
2128 def : InstRW<[N2Write_10cyc_2L_2V1], (instrs LDNT1D_ZZR_D)>;
2130 // Contiguous first faulting load, scalar + scalar
2131 def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]$",
2136 // Contiguous non faulting load, scalar + imm
2137 def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM$",
2138 "^LDNF1S?B_[HSD]_IMM$",
2139 "^LDNF1S?H_[SD]_IMM$",
2140 "^LDNF1S?W_D_IMM$")>;
2142 // Contiguous Load two structures to two vectors, scalar + imm
2143 def : InstRW<[N2Write_8cyc_1L_1V], (instregex "^LD2[BHWD]_IMM$")>;
2145 // Contiguous Load two structures to two vectors, scalar + scalar
2146 def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LD2[BHWD]$")>;
2148 // Contiguous Load three structures to three vectors, scalar + imm
2149 def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LD3[BHWD]_IMM$")>;
2151 // Contiguous Load three structures to three vectors, scalar + scalar
2152 def : InstRW<[N2Write_10cyc_1V_1L_1S], (instregex "^LD3[BHWD]$")>;
2154 // Contiguous Load four structures to four vectors, scalar + imm
2155 def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^LD4[BHWD]_IMM$")>;
2157 // Contiguous Load four structures to four vectors, scalar + scalar
2158 def : InstRW<[N2Write_10cyc_2L_2V_2S], (instregex "^LD4[BHWD]$")>;
2160 // Gather load, vector + imm, 32-bit element size
2161 def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM$",
2162 "^GLD(FF)?1W_IMM$")>;
2164 // Gather load, vector + imm, 64-bit element size
2165 def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM$",
2166 "^GLD(FF)?1D_IMM$")>;
2168 // Gather load, 64-bit element size
2169 def : InstRW<[N2Write_9cyc_2L_2V],
2170 (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW(_SCALED)?$",
2171 "^GLD(FF)?1S?[BHW]_D(_SCALED)?$",
2172 "^GLD(FF)?1D_[SU]XTW(_SCALED)?$",
2173 "^GLD(FF)?1D(_SCALED)?$")>;
2175 // Gather load, 32-bit scaled offset
2176 def : InstRW<[N2Write_10cyc_2L_2V],
2177 (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED$",
2178 "^GLD(FF)?1W_[SU]XTW_SCALED")>;
2180 // Gather load, 32-bit unpacked unscaled offset
2181 def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW$",
2182 "^GLD(FF)?1W_[SU]XTW$")>;
2184 // SVE Store instructions
2185 // -----------------------------------------------------------------------------
2187 // Store from predicate reg
2188 def : InstRW<[N2Write_1cyc_1L01], (instrs STR_PXI)>;
2190 // Store from vector reg
2191 def : InstRW<[N2Write_2cyc_1L01_1V], (instrs STR_ZXI)>;
2193 // Contiguous store, scalar + imm
2194 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^ST1[BHWD]_IMM$",
2199 // Contiguous store, scalar + scalar
2200 def : InstRW<[N2Write_2cyc_1L01_1S_1V], (instregex "^ST1H(_[SD])?$")>;
2201 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^ST1[BWD]$",
2205 // Contiguous store two structures from two vectors, scalar + imm
2206 def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "^ST2[BHWD]_IMM$")>;
2208 // Contiguous store two structures from two vectors, scalar + scalar
2209 def : InstRW<[N2Write_4cyc_1L01_1S_1V], (instrs ST2H)>;
2211 // Contiguous store two structures from two vectors, scalar + scalar
2212 def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "^ST2[BWD]$")>;
2214 // Contiguous store three structures from three vectors, scalar + imm
2215 def : InstRW<[N2Write_7cyc_5L01_5V], (instregex "^ST3[BHWD]_IMM$")>;
2217 // Contiguous store three structures from three vectors, scalar + scalar
2218 def : InstRW<[N2Write_7cyc_5L01_5S_5V], (instrs ST3H)>;
2220 // Contiguous store three structures from three vectors, scalar + scalar
2221 def : InstRW<[N2Write_7cyc_5L01_5S_5V], (instregex "^ST3[BWD]$")>;
2223 // Contiguous store four structures from four vectors, scalar + imm
2224 def : InstRW<[N2Write_11cyc_9L01_9V], (instregex "^ST4[BHWD]_IMM$")>;
2226 // Contiguous store four structures from four vectors, scalar + scalar
2227 def : InstRW<[N2Write_11cyc_9L01_9S_9V], (instrs ST4H)>;
2229 // Contiguous store four structures from four vectors, scalar + scalar
2230 def : InstRW<[N2Write_11cyc_9L01_9S_9V], (instregex "^ST4[BWD]$")>;
2232 // Non temporal store, scalar + imm
2233 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>;
2235 // Non temporal store, scalar + scalar
2236 def : InstRW<[N2Write_2cyc_1L01_1S_1V], (instrs STNT1H_ZRR)>;
2237 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>;
2239 // Scatter non temporal store, vector + scalar 32-bit element size
2240 def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^STNT1[BHW]_ZZR_S")>;
2242 // Scatter non temporal store, vector + scalar 64-bit element size
2243 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZZR_D")>;
2245 // Scatter store vector + imm 32-bit element size
2246 def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^SST1[BH]_S_IMM$",
2249 // Scatter store vector + imm 64-bit element size
2250 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D_IMM$",
2253 // Scatter store, 32-bit scaled offset
2254 def : InstRW<[N2Write_4cyc_2L01_2V],
2255 (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>;
2257 // Scatter store, 32-bit unpacked unscaled offset
2258 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D_[SU]XTW$",
2259 "^SST1D_[SU]XTW$")>;
2261 // Scatter store, 32-bit unpacked scaled offset
2262 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$",
2263 "^SST1D_[SU]XTW_SCALED$")>;
2265 // Scatter store, 32-bit unscaled offset
2266 def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^SST1[BH]_S_[SU]XTW$",
2267 "^SST1W_[SU]XTW$")>;
2269 // Scatter store, 64-bit scaled offset
2270 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[HW]_D_SCALED$",
2273 // Scatter store, 64-bit unscaled offset
2274 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D$",
2277 // SVE Miscellaneous instructions
2278 // -----------------------------------------------------------------------------
2280 // Read first fault register, unpredicated
2281 def : InstRW<[N2Write_2cyc_1M0], (instrs RDFFR_P)>;
2283 // Read first fault register, predicated
2284 def : InstRW<[N2Write_3cyc_1M0_1M], (instrs RDFFR_PPz)>;
2286 // Read first fault register and set flags
2287 def : InstRW<[N2Write_4cyc_2M0_2M], (instrs RDFFRS_PPz)>;
2289 // Set first fault register
2290 // Write to first fault register
2291 def : InstRW<[N2Write_2cyc_1M0], (instrs SETFFR, WRFFR)>;
2294 def : InstRW<[N2Write_4cyc_1L], (instregex "^PRF[BHWD]")>;
2296 // SVE Cryptographic instructions
2297 // -----------------------------------------------------------------------------
2300 def : InstRW<[N2Write_2cyc_1V], (instregex "^AES[DE]_ZZZ_B$",
2304 def : InstRW<[N2Write_2cyc_1V0], (instregex "^(BCAX|EOR3)_ZZZZ$",
2306 "^XAR_ZZZI_[BHSD]$")>;
2309 def : InstRW<[N2Write_4cyc_1V0], (instregex "^SM4E(KEY)?_ZZZ_S$")>;