1 //=- AArch64SchedNeoverseV2.td - NeoverseV2 Scheduling Defs --*- tablegen -*-=//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the scheduling model for the Arm Neoverse V2 processors.
10 // All information is taken from the V2 Software Optimisation guide:
12 // https://developer.arm.com/documentation/PJDOC-466751330-593177/r0p2
14 //===----------------------------------------------------------------------===//
16 def NeoverseV2Model : SchedMachineModel {
17 let IssueWidth = 16; // Micro-ops dispatched at a time.
18 let MicroOpBufferSize = 160; // Entries in micro-op re-order buffer. NOTE: Copied from N2.
19 let LoadLatency = 4; // Optimistic load latency.
20 let MispredictPenalty = 10; // Extra cycles for mispredicted branch. NOTE: Copied from N2.
21 let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57.
22 let CompleteModel = 1;
24 list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
29 //===----------------------------------------------------------------------===//
30 // Define each kind of processor resource and number available on Neoverse V2.
31 // Instructions are first fetched and then decoded into internal macro-ops
32 // (MOPs). From there, the MOPs proceed through register renaming and dispatch
33 // stages. A MOP can be split into two micro-ops further down the pipeline
34 // after the decode stage. Once dispatched, micro-ops wait for their operands
35 // and issue out-of-order to one of seventeen issue pipelines. Each issue
36 // pipeline can accept one micro-op per cycle.
38 let SchedModel = NeoverseV2Model in {
40 // Define the (17) issue ports.
41 def V2UnitB : ProcResource<2>; // Branch 0/1
42 def V2UnitS0 : ProcResource<1>; // Integer single-cycle 0
43 def V2UnitS1 : ProcResource<1>; // Integer single-cycle 1
44 def V2UnitS2 : ProcResource<1>; // Integer single-cycle 2
45 def V2UnitS3 : ProcResource<1>; // Integer single-cycle 3
46 def V2UnitM0 : ProcResource<1>; // Integer single/multicycle 0
47 def V2UnitM1 : ProcResource<1>; // Integer single/multicycle 1
48 def V2UnitV0 : ProcResource<1>; // FP/ASIMD 0
49 def V2UnitV1 : ProcResource<1>; // FP/ASIMD 1
50 def V2UnitV2 : ProcResource<1>; // FP/ASIMD 2
51 def V2UnitV3 : ProcResource<1>; // FP/ASIMD 3
52 def V2UnitL01 : ProcResource<2>; // Load/Store 0/1
53 def V2UnitL2 : ProcResource<1>; // Load 2
54 def V2UnitD : ProcResource<2>; // Store data 0/1
56 def V2UnitR : ProcResGroup<[V2UnitS0, V2UnitS1]>; // Integer single-cycle 0/1
57 def V2UnitS : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitS2, V2UnitS3]>; // Integer single-cycle 0/1/2/3
58 def V2UnitF : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitM0, V2UnitM1]>; // Integer single-cycle 0/1 and single/multicycle 0/1
59 def V2UnitI : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitS2, V2UnitS3, V2UnitM0, V2UnitM1]>; // Integer single-cycle 0/1/2/3 and single/multicycle 0/1
60 def V2UnitM : ProcResGroup<[V2UnitM0, V2UnitM1]>; // Integer single/multicycle 0/1
61 def V2UnitL : ProcResGroup<[V2UnitL01, V2UnitL2]>; // Load/Store 0/1 and Load 2
62 def V2UnitV : ProcResGroup<[V2UnitV0, V2UnitV1, V2UnitV2, V2UnitV3]>; // FP/ASIMD 0/1/2/3
63 def V2UnitV01 : ProcResGroup<[V2UnitV0, V2UnitV1]>; // FP/ASIMD 0/1
64 def V2UnitV02 : ProcResGroup<[V2UnitV0, V2UnitV2]>; // FP/ASIMD 0/2
65 def V2UnitV13 : ProcResGroup<[V2UnitV1, V2UnitV3]>; // FP/ASIMD 1/3
66 def V2UnitV23 : ProcResGroup<[V2UnitV2, V2UnitV3]>; // FP/ASIMD 2/3
68 // Define commonly used read types.
70 // No forwarding is provided for these types.
71 def : ReadAdvance<ReadI, 0>;
72 def : ReadAdvance<ReadISReg, 0>;
73 def : ReadAdvance<ReadIEReg, 0>;
74 def : ReadAdvance<ReadIM, 0>;
75 def : ReadAdvance<ReadIMA, 0>;
76 def : ReadAdvance<ReadID, 0>;
77 def : ReadAdvance<ReadExtrHi, 0>;
78 def : ReadAdvance<ReadAdrBase, 0>;
79 def : ReadAdvance<ReadST, 0>;
80 def : ReadAdvance<ReadVLD, 0>;
82 // NOTE: Copied from N2.
83 def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
84 def : WriteRes<WriteBarrier, []> { let Latency = 1; }
85 def : WriteRes<WriteHint, []> { let Latency = 1; }
86 def : WriteRes<WriteLDHi, []> { let Latency = 4; }
88 //===----------------------------------------------------------------------===//
89 // Define customized scheduler read/write types specific to the Neoverse V2.
91 //===----------------------------------------------------------------------===//
93 // Define generic 0 micro-op types
94 def V2Write_0cyc : SchedWriteRes<[]> { let Latency = 0; }
96 // Define generic 1 micro-op types
98 def V2Write_1cyc_1B : SchedWriteRes<[V2UnitB]> { let Latency = 1; }
99 def V2Write_1cyc_1F : SchedWriteRes<[V2UnitF]> { let Latency = 1; }
100 def V2Write_1cyc_1I : SchedWriteRes<[V2UnitI]> { let Latency = 1; }
101 def V2Write_1cyc_1M : SchedWriteRes<[V2UnitM]> { let Latency = 1; }
102 def V2Write_1cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 1; }
103 def V2Write_1cyc_1L01 : SchedWriteRes<[V2UnitL01]> { let Latency = 1; }
104 def V2Write_2cyc_1M : SchedWriteRes<[V2UnitM]> { let Latency = 2; }
105 def V2Write_3cyc_1M : SchedWriteRes<[V2UnitM]> { let Latency = 3; }
106 def V2Write_2cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 2; }
107 def V2Write_3cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 3; }
108 def V2Write_5cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 5; }
109 def V2Write_12cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 12;
110 let ReleaseAtCycles = [12]; }
111 def V2Write_20cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 20;
112 let ReleaseAtCycles = [20]; }
113 def V2Write_4cyc_1L : SchedWriteRes<[V2UnitL]> { let Latency = 4; }
114 def V2Write_6cyc_1L : SchedWriteRes<[V2UnitL]> { let Latency = 6; }
115 def V2Write_2cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 2; }
116 def V2Write_2cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 2; }
117 def V2Write_2cyc_1V01 : SchedWriteRes<[V2UnitV01]> { let Latency = 2; }
118 def V2Write_2cyc_1V23 : SchedWriteRes<[V2UnitV23]> { let Latency = 2; }
119 def V2Write_3cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
120 def V2Write_3cyc_1V01 : SchedWriteRes<[V2UnitV01]> { let Latency = 3;
121 let ReleaseAtCycles = [2]; }
122 def V2Write_3cyc_1V23 : SchedWriteRes<[V2UnitV23]> { let Latency = 3; }
123 def V2Write_4cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
124 def V2Write_5cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
125 def V2Write_6cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 6; }
126 def V2Write_12cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 12; }
127 def V2Write_3cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 3; }
128 def V2Write_3cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 3; }
129 def V2Write_4cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 4; }
130 def V2Write_4cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
131 def V2Write_7cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 7;
132 let ReleaseAtCycles = [7]; }
133 def V2Write_7cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 7;
134 let ReleaseAtCycles = [2]; }
135 def V2Write_9cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 9; }
136 def V2Write_9cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 9;
137 let ReleaseAtCycles = [2]; }
138 def V2Write_10cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 10; }
139 def V2Write_10cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 10;
140 let ReleaseAtCycles = [2]; }
141 def V2Write_12cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 12;
142 let ReleaseAtCycles = [11]; }
143 def V2Write_13cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 13; }
144 def V2Write_15cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 15; }
145 def V2Write_15cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 15;
146 let ReleaseAtCycles = [8]; }
147 def V2Write_16cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 16; }
148 def V2Write_16cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 16;
149 let ReleaseAtCycles = [8]; }
150 def V2Write_20cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 20;
151 let ReleaseAtCycles = [20]; }
152 def V2Write_2cyc_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 2; }
153 def V2Write_2cyc_1V13 : SchedWriteRes<[V2UnitV13]> { let Latency = 2; }
154 def V2Write_3cyc_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 3; }
155 def V2Write_4cyc_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 4; }
156 def V2Write_4cyc_1V13 : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
157 def V2Write_6cyc_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 6; }
158 def V2Write_10cyc_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 10; }
159 def V2Write_6cyc_1L01 : SchedWriteRes<[V2UnitL01]> { let Latency = 6; }
161 //===----------------------------------------------------------------------===//
162 // Define generic 2 micro-op types
164 def V2Write_1cyc_1B_1R : SchedWriteRes<[V2UnitB, V2UnitR]> {
169 def V2Write_6cyc_1M0_1B : SchedWriteRes<[V2UnitM0, V2UnitB]> {
174 def V2Write_9cyc_1M0_1L : SchedWriteRes<[V2UnitM0, V2UnitL]> {
179 def V2Write_3cyc_1I_1M : SchedWriteRes<[V2UnitI, V2UnitM]> {
184 def V2Write_1cyc_2M : SchedWriteRes<[V2UnitM, V2UnitM]> {
189 def V2Write_3cyc_2M : SchedWriteRes<[V2UnitM, V2UnitM]> {
194 def V2Write_4cyc_2M : SchedWriteRes<[V2UnitM, V2UnitM]> {
199 def V2Write_5cyc_1L_1F : SchedWriteRes<[V2UnitL, V2UnitF]> {
204 def V2Write_6cyc_1I_1L : SchedWriteRes<[V2UnitI, V2UnitL]> {
209 def V2Write_7cyc_1F_1L : SchedWriteRes<[V2UnitF, V2UnitL]> {
214 def V2Write_7cyc_1I_1L : SchedWriteRes<[V2UnitI, V2UnitL]> {
219 def V2Write_1cyc_1L01_1D : SchedWriteRes<[V2UnitL01, V2UnitD]> {
224 def V2Write_5cyc_1M0_1V : SchedWriteRes<[V2UnitM0, V2UnitV]> {
229 def V2Write_2cyc_1L01_1V01 : SchedWriteRes<[V2UnitL01, V2UnitV01]> {
234 def V2Write_2cyc_1L01_1V : SchedWriteRes<[V2UnitL01, V2UnitV]> {
239 def V2Write_2cyc_2V01 : SchedWriteRes<[V2UnitV01, V2UnitV01]> {
244 def V2Write_4cyc_2V01 : SchedWriteRes<[V2UnitV01, V2UnitV01]> {
249 def V2Write_4cyc_1L01_1V01 : SchedWriteRes<[V2UnitL01, V2UnitV01]> {
254 def V2Write_4cyc_1V13_1V : SchedWriteRes<[V2UnitV13, V2UnitV]> {
259 def V2Write_4cyc_2V0 : SchedWriteRes<[V2UnitV0, V2UnitV0]> {
264 def V2Write_4cyc_2V02 : SchedWriteRes<[V2UnitV02, V2UnitV02]> {
269 def V2Write_4cyc_2V : SchedWriteRes<[V2UnitV, V2UnitV]> {
274 def V2Write_6cyc_2V : SchedWriteRes<[V2UnitV, V2UnitV]> {
279 def V2Write_6cyc_2L : SchedWriteRes<[V2UnitL, V2UnitL]> {
284 def V2Write_8cyc_1L_1V : SchedWriteRes<[V2UnitL, V2UnitV]> {
289 def V2Write_4cyc_1L01_1V : SchedWriteRes<[V2UnitL01, V2UnitV]> {
294 def V2Write_3cyc_1M0_1M : SchedWriteRes<[V2UnitM0, V2UnitM]> {
299 def V2Write_4cyc_1M0_1M : SchedWriteRes<[V2UnitM0, V2UnitM]> {
304 def V2Write_1cyc_1M0_1M : SchedWriteRes<[V2UnitM0, V2UnitM]> {
309 def V2Write_2cyc_1M0_1M : SchedWriteRes<[V2UnitM0, V2UnitM]> {
314 def V2Write_6cyc_2V1 : SchedWriteRes<[V2UnitV1, V2UnitV1]> {
319 def V2Write_4cyc_1V0_1M0 : SchedWriteRes<[V2UnitV0, V2UnitM0]> {
324 def V2Write_5cyc_1V0_1M0 : SchedWriteRes<[V2UnitV0, V2UnitM0]> {
329 def V2Write_5cyc_2V0 : SchedWriteRes<[V2UnitV0, V2UnitV0]> {
334 def V2Write_5cyc_2V02 : SchedWriteRes<[V2UnitV02, V2UnitV02]> {
339 def V2Write_6cyc_1V1_1M0 : SchedWriteRes<[V2UnitV1, V2UnitM0]> {
344 def V2Write_7cyc_1M0_1V02 : SchedWriteRes<[V2UnitM0, V2UnitV02]> {
349 def V2Write_2cyc_1V0_1M : SchedWriteRes<[V2UnitV0, V2UnitM]> {
354 def V2Write_3cyc_1V0_1M : SchedWriteRes<[V2UnitV0, V2UnitM]> {
359 def V2Write_6cyc_1V_1V13 : SchedWriteRes<[V2UnitV, V2UnitV13]> {
364 def V2Write_6cyc_1L_1M : SchedWriteRes<[V2UnitL, V2UnitM]> {
369 def V2Write_6cyc_1L_1S : SchedWriteRes<[V2UnitL, V2UnitS]> {
374 def V2Write_4cyc_2V13 : SchedWriteRes<[V2UnitV13, V2UnitV13]> {
379 def V2Write_8cyc_1M0_1V01 : SchedWriteRes<[V2UnitM0, V2UnitV01]> {
384 //===----------------------------------------------------------------------===//
385 // Define generic 3 micro-op types
387 def V2Write_1cyc_1L01_1D_1I : SchedWriteRes<[V2UnitL01, V2UnitD, V2UnitI]> {
392 def V2Write_2cyc_1L01_1V01_1I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitI]> {
397 def V2Write_2cyc_1L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01]> {
402 def V2Write_4cyc_1L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01]> {
407 def V2Write_9cyc_1L_2V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV]> {
412 def V2Write_4cyc_3V01 : SchedWriteRes<[V2UnitV01, V2UnitV01, V2UnitV01]> {
417 def V2Write_7cyc_1M_1M0_1V : SchedWriteRes<[V2UnitM, V2UnitM0, V2UnitV]> {
422 def V2Write_2cyc_1L01_1S_1V : SchedWriteRes<[V2UnitL01, V2UnitS, V2UnitV]> {
427 def V2Write_2cyc_1L01_1S_1V01 : SchedWriteRes<[V2UnitL01, V2UnitS, V2UnitV01]> {
432 def V2Write_6cyc_3L : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL]> {
437 def V2Write_6cyc_3V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV]> {
442 def V2Write_8cyc_1L_2V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV]> {
447 //===----------------------------------------------------------------------===//
448 // Define generic 4 micro-op types
450 def V2Write_2cyc_1L01_2V01_1I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01,
456 def V2Write_2cyc_2L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01,
457 V2UnitV01, V2UnitV01]> {
462 def V2Write_4cyc_2L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01,
463 V2UnitV01, V2UnitV01]> {
468 def V2Write_5cyc_1I_3L : SchedWriteRes<[V2UnitI, V2UnitL, V2UnitL, V2UnitL]> {
473 def V2Write_9cyc_2L_2V1 : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV1,
479 def V2Write_6cyc_4V0 : SchedWriteRes<[V2UnitV0, V2UnitV0, V2UnitV0, V2UnitV0]> {
484 def V2Write_8cyc_4V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
489 def V2Write_6cyc_2V_2V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13,
495 def V2Write_8cyc_2V_2V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13,
501 def V2Write_6cyc_4V02 : SchedWriteRes<[V2UnitV02, V2UnitV02, V2UnitV02,
507 def V2Write_6cyc_4V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
512 def V2Write_8cyc_2L_2V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV]> {
517 def V2Write_9cyc_2L_2V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV]> {
522 def V2Write_2cyc_2L01_2V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV,
528 def V2Write_4cyc_2L01_2V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV,
534 def V2Write_8cyc_2M0_2V02 : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitV02,
540 def V2Write_8cyc_2V_2V1 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV1,
546 def V2Write_4cyc_2M0_2M : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitM,
552 def V2Write_5cyc_2M0_2M : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitM,
558 def V2Write_6cyc_2I_2L : SchedWriteRes<[V2UnitI, V2UnitI, V2UnitL, V2UnitL]> {
563 def V2Write_7cyc_4L : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL]> {
568 def V2Write_6cyc_1L01_3V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01,
574 //===----------------------------------------------------------------------===//
575 // Define generic 5 micro-op types
577 def V2Write_2cyc_1L01_2V01_2I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01,
583 def V2Write_8cyc_2L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV,
589 def V2Write_9cyc_1L_4V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV,
595 def V2Write_10cyc_1L_4V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV,
601 def V2Write_6cyc_5V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV,
607 //===----------------------------------------------------------------------===//
608 // Define generic 6 micro-op types
610 def V2Write_8cyc_3L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
611 V2UnitV, V2UnitV, V2UnitV]> {
616 def V2Write_9cyc_3L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
617 V2UnitV, V2UnitV, V2UnitV]> {
622 def V2Write_9cyc_2L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV,
623 V2UnitV, V2UnitV, V2UnitV]> {
628 def V2Write_9cyc_2L_2V_2S : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV,
629 V2UnitV, V2UnitS, V2UnitS]> {
634 def V2Write_9cyc_2V_4V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13,
635 V2UnitV13, V2UnitV13, V2UnitV13]> {
640 def V2Write_2cyc_3L01_3V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
641 V2UnitV, V2UnitV, V2UnitV]> {
646 def V2Write_4cyc_2L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01,
647 V2UnitV01, V2UnitV01, V2UnitV01]> {
652 def V2Write_5cyc_2L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01,
653 V2UnitV01, V2UnitV01, V2UnitV01]> {
658 def V2Write_2cyc_3L01_3V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
659 V2UnitV01, V2UnitV01, V2UnitV01]> {
664 def V2Write_4cyc_2L01_2S_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitS,
665 V2UnitS, V2UnitV01, V2UnitV01]> {
670 //===----------------------------------------------------------------------===//
671 // Define generic 7 micro-op types
673 def V2Write_8cyc_3L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
674 V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
679 //===----------------------------------------------------------------------===//
680 // Define generic 8 micro-op types
682 def V2Write_2cyc_4L01_4V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
683 V2UnitL01, V2UnitV, V2UnitV, V2UnitV,
689 def V2Write_2cyc_4L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
690 V2UnitL01, V2UnitV01, V2UnitV01,
691 V2UnitV01, V2UnitV01]> {
696 def V2Write_4cyc_4L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
697 V2UnitL01, V2UnitV01, V2UnitV01,
698 V2UnitV01, V2UnitV01]> {
703 def V2Write_6cyc_2L01_6V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01,
704 V2UnitV01, V2UnitV01, V2UnitV01,
705 V2UnitV01, V2UnitV01]> {
710 def V2Write_8cyc_4L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL,
711 V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
716 //===----------------------------------------------------------------------===//
717 // Define generic 9 micro-op types
719 def V2Write_6cyc_3L01_6V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
720 V2UnitV01, V2UnitV01, V2UnitV01,
721 V2UnitV01, V2UnitV01, V2UnitV01]> {
726 def V2Write_10cyc_1L_8V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV,
727 V2UnitV, V2UnitV, V2UnitV, V2UnitV,
733 def V2Write_10cyc_3V_3L_3S : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV,
734 V2UnitL, V2UnitL, V2UnitL,
735 V2UnitS, V2UnitS, V2UnitS]> {
740 //===----------------------------------------------------------------------===//
741 // Define generic 10 micro-op types
743 def V2Write_9cyc_6L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL,
744 V2UnitL, V2UnitL, V2UnitV, V2UnitV,
747 let NumMicroOps = 10;
750 //===----------------------------------------------------------------------===//
751 // Define generic 12 micro-op types
753 def V2Write_5cyc_4L01_8V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
754 V2UnitL01, V2UnitV01, V2UnitV01,
755 V2UnitV01, V2UnitV01, V2UnitV01,
756 V2UnitV01, V2UnitV01, V2UnitV01]> {
758 let NumMicroOps = 12;
761 def V2Write_9cyc_4L_8V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
762 V2UnitL, V2UnitV, V2UnitV,
763 V2UnitV, V2UnitV, V2UnitV,
764 V2UnitV, V2UnitV, V2UnitV]> {
766 let NumMicroOps = 12;
769 def V2Write_10cyc_4L_8V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
770 V2UnitL, V2UnitV, V2UnitV,
771 V2UnitV, V2UnitV, V2UnitV,
772 V2UnitV, V2UnitV, V2UnitV]> {
774 let NumMicroOps = 12;
777 //===----------------------------------------------------------------------===//
778 // Define generic 16 micro-op types
780 def V2Write_7cyc_4L01_12V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
781 V2UnitL01, V2UnitV01, V2UnitV01,
782 V2UnitV01, V2UnitV01, V2UnitV01,
783 V2UnitV01, V2UnitV01, V2UnitV01,
784 V2UnitV01, V2UnitV01, V2UnitV01,
787 let NumMicroOps = 16;
790 def V2Write_10cyc_4L_8V_4S : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
791 V2UnitL, V2UnitV, V2UnitV,
792 V2UnitV, V2UnitV, V2UnitV,
793 V2UnitV, V2UnitV, V2UnitV,
794 V2UnitS, V2UnitS, V2UnitS,
797 let NumMicroOps = 16;
800 //===----------------------------------------------------------------------===//
801 // Define generic 18 micro-op types
803 def V2Write_7cyc_9L01_9V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
804 V2UnitL01, V2UnitL01, V2UnitL01,
805 V2UnitL01, V2UnitL01, V2UnitL01,
806 V2UnitV01, V2UnitV01, V2UnitV01,
807 V2UnitV01, V2UnitV01, V2UnitV01,
808 V2UnitV01, V2UnitV01, V2UnitV01]> {
810 let NumMicroOps = 18;
813 //===----------------------------------------------------------------------===//
814 // Define generic 27 micro-op types
816 def V2Write_7cyc_9L01_9S_9V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
817 V2UnitL01, V2UnitL01, V2UnitL01,
818 V2UnitL01, V2UnitL01, V2UnitL01,
819 V2UnitS, V2UnitS, V2UnitS,
820 V2UnitS, V2UnitS, V2UnitS,
821 V2UnitS, V2UnitS, V2UnitS,
822 V2UnitV01, V2UnitV01, V2UnitV01,
823 V2UnitV01, V2UnitV01, V2UnitV01,
824 V2UnitV01, V2UnitV01,
827 let NumMicroOps = 27;
830 //===----------------------------------------------------------------------===//
831 // Define generic 36 micro-op types
833 def V2Write_11cyc_18L01_18V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
834 V2UnitL01, V2UnitL01, V2UnitL01,
835 V2UnitL01, V2UnitL01, V2UnitL01,
836 V2UnitL01, V2UnitL01, V2UnitL01,
837 V2UnitL01, V2UnitL01, V2UnitL01,
838 V2UnitL01, V2UnitL01, V2UnitL01,
839 V2UnitV01, V2UnitV01, V2UnitV01,
840 V2UnitV01, V2UnitV01, V2UnitV01,
841 V2UnitV01, V2UnitV01, V2UnitV01,
842 V2UnitV01, V2UnitV01, V2UnitV01,
843 V2UnitV01, V2UnitV01, V2UnitV01,
844 V2UnitV01, V2UnitV01,
847 let NumMicroOps = 36;
850 //===----------------------------------------------------------------------===//
851 // Define generic 54 micro-op types
853 def V2Write_11cyc_18L01_18S_18V01 : SchedWriteRes<[V2UnitL01, V2UnitL01,
854 V2UnitL01, V2UnitL01,
855 V2UnitL01, V2UnitL01,
856 V2UnitL01, V2UnitL01,
857 V2UnitL01, V2UnitL01,
858 V2UnitL01, V2UnitL01,
859 V2UnitL01, V2UnitL01,
860 V2UnitL01, V2UnitL01,
861 V2UnitL01, V2UnitL01,
862 V2UnitS, V2UnitS, V2UnitS,
863 V2UnitS, V2UnitS, V2UnitS,
864 V2UnitS, V2UnitS, V2UnitS,
865 V2UnitS, V2UnitS, V2UnitS,
866 V2UnitS, V2UnitS, V2UnitS,
867 V2UnitS, V2UnitS, V2UnitS,
868 V2UnitV01, V2UnitV01,
869 V2UnitV01, V2UnitV01,
870 V2UnitV01, V2UnitV01,
871 V2UnitV01, V2UnitV01,
872 V2UnitV01, V2UnitV01,
873 V2UnitV01, V2UnitV01,
874 V2UnitV01, V2UnitV01,
875 V2UnitV01, V2UnitV01,
876 V2UnitV01, V2UnitV01]> {
878 let NumMicroOps = 54;
881 //===----------------------------------------------------------------------===//
882 // Define predicate-controlled types
884 def V2Write_ArithI : SchedWriteVariant<[
885 SchedVar<IsCheapLSL, [V2Write_1cyc_1I]>,
886 SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>;
888 def V2Write_ArithF : SchedWriteVariant<[
889 SchedVar<IsCheapLSL, [V2Write_1cyc_1F]>,
890 SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>;
892 def V2Write_Logical : SchedWriteVariant<[
893 SchedVar<NeoverseNoLSL, [V2Write_1cyc_1F]>,
894 SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>;
896 def V2Write_Extr : SchedWriteVariant<[
897 SchedVar<IsRORImmIdiomPred, [V2Write_1cyc_1I]>,
898 SchedVar<NoSchedPred, [V2Write_3cyc_1I_1M]>]>;
900 def V2Write_LdrHQ : SchedWriteVariant<[
901 SchedVar<NeoverseHQForm, [V2Write_7cyc_1I_1L]>,
902 SchedVar<NoSchedPred, [V2Write_6cyc_1L]>]>;
904 def V2Write_StrHQ : SchedWriteVariant<[
905 SchedVar<NeoverseHQForm, [V2Write_2cyc_1L01_1V01_1I]>,
906 SchedVar<NoSchedPred, [V2Write_2cyc_1L01_1V01]>]>;
908 def V2Write_0or1cyc_1I : SchedWriteVariant<[
909 SchedVar<NeoverseZeroMove, [V2Write_0cyc]>,
910 SchedVar<NoSchedPred, [V2Write_1cyc_1I]>]>;
912 def V2Write_0or2cyc_1V : SchedWriteVariant<[
913 SchedVar<NeoverseZeroMove, [V2Write_0cyc]>,
914 SchedVar<NoSchedPred, [V2Write_2cyc_1V]>]>;
916 def V2Write_0or3cyc_1M0 : SchedWriteVariant<[
917 SchedVar<NeoverseZeroMove, [V2Write_0cyc]>,
918 SchedVar<NoSchedPred, [V2Write_3cyc_1M0]>]>;
920 def V2Write_2or3cyc_1M : SchedWriteVariant<[
921 SchedVar<NeoversePdIsPg, [V2Write_3cyc_1M]>,
922 SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>;
924 def V2Write_3or4cyc_2M : SchedWriteVariant<[
925 SchedVar<NeoversePdIsPg, [V2Write_4cyc_2M]>,
926 SchedVar<NoSchedPred, [V2Write_3cyc_2M]>]>;
928 def V2Write_1or2cyc_1M0 : SchedWriteVariant<[
929 SchedVar<NeoversePdIsPg, [V2Write_2cyc_1M0]>,
930 SchedVar<NoSchedPred, [V2Write_1cyc_1M0]>]>;
932 def V2Write_2or3cyc_1M0 : SchedWriteVariant<[
933 SchedVar<NeoversePdIsPg, [V2Write_3cyc_1M0]>,
934 SchedVar<NoSchedPred, [V2Write_2cyc_1M0]>]>;
936 def V2Write_1or2cyc_1M0_1M : SchedWriteVariant<[
937 SchedVar<NeoversePdIsPg, [V2Write_2cyc_1M0_1M]>,
938 SchedVar<NoSchedPred, [V2Write_1cyc_1M0_1M]>]>;
940 def V2Write_3or4cyc_1M0_1M : SchedWriteVariant<[
941 SchedVar<NeoversePdIsPg, [V2Write_4cyc_1M0_1M]>,
942 SchedVar<NoSchedPred, [V2Write_3cyc_1M0_1M]>]>;
944 def V2Write_4or5cyc_2M0_2M : SchedWriteVariant<[
945 SchedVar<NeoversePdIsPg, [V2Write_5cyc_2M0_2M]>,
946 SchedVar<NoSchedPred, [V2Write_4cyc_2M0_2M]>]>;
948 def V2Write_4or5cyc_1V0_1M0 : SchedWriteVariant<[
949 SchedVar<NeoversePdIsPg, [V2Write_5cyc_1V0_1M0]>,
950 SchedVar<NoSchedPred, [V2Write_4cyc_1V0_1M0]>]>;
952 def V2Write_2or3cyc_1V0_1M : SchedWriteVariant<[
953 SchedVar<NeoversePdIsPg, [V2Write_3cyc_1V0_1M]>,
954 SchedVar<NoSchedPred, [V2Write_2cyc_1V0_1M]>]>;
956 def V2Write_IncDec : SchedWriteVariant<[
957 SchedVar<NeoverseCheapIncDec, [V2Write_1cyc_1F]>,
958 SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>;
960 //===----------------------------------------------------------------------===//
961 // Define forwarded types
963 // NOTE: SOG, p. 16, n. 2: Accumulator forwarding is not supported for
964 // consumers of 64 bit multiply high operations?
965 def V2Wr_IM : SchedWriteRes<[V2UnitM]> { let Latency = 2; }
966 def V2Wr_IMA : SchedWriteRes<[V2UnitM0]> { let Latency = 2; }
967 def V2Wr_IMUL : SchedWriteVariant<[
968 SchedVar<IsReg3ZeroPred, [V2Wr_IM]>,
969 SchedVar<NoSchedPred, [V2Wr_IMA]>]>;
970 def V2Rd_IMA : SchedReadAdvance<1, [V2Wr_IMA]>;
972 def V2Wr_FMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
973 def V2Rd_FMA : SchedReadAdvance<2, [WriteFMul, V2Wr_FMA]>;
975 def V2Wr_VA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
976 def V2Rd_VA : SchedReadAdvance<3, [V2Wr_VA]>;
978 def V2Wr_VDOT : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
979 def V2Rd_VDOT : SchedReadAdvance<2, [V2Wr_VDOT]>;
981 def V2Wr_VMMA : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
982 def V2Rd_VMMA : SchedReadAdvance<2, [V2Wr_VMMA]>;
984 def V2Wr_VMA : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
985 def V2Rd_VMA : SchedReadAdvance<3, [V2Wr_VMA]>;
987 def V2Wr_VMAH : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 4; }
988 def V2Rd_VMAH : SchedReadAdvance<2, [V2Wr_VMAH]>;
990 def V2Wr_VMAL : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
991 def V2Rd_VMAL : SchedReadAdvance<3, [V2Wr_VMAL]>;
993 def V2Wr_VPA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
994 def V2Rd_VPA : SchedReadAdvance<3, [V2Wr_VPA]>;
996 def V2Wr_VSA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
997 def V2Rd_VSA : SchedReadAdvance<3, [V2Wr_VSA]>;
999 def V2Wr_VFCMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
1000 def V2Rd_VFCMA : SchedReadAdvance<2, [V2Wr_VFCMA]>;
1002 def V2Wr_VFM : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
1003 def V2Wr_VFMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
1004 def V2Rd_VFMA : SchedReadAdvance<2, [V2Wr_VFM, V2Wr_VFMA]>;
1006 def V2Wr_VFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
1007 def V2Rd_VFMAL : SchedReadAdvance<2, [V2Wr_VFMAL]>;
1009 def V2Wr_VBFDOT : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1010 def V2Rd_VBFDOT : SchedReadAdvance<2, [V2Wr_VBFDOT]>;
1011 def V2Wr_VBFMMA : SchedWriteRes<[V2UnitV]> { let Latency = 6; }
1012 def V2Rd_VBFMMA : SchedReadAdvance<2, [V2Wr_VBFMMA]>;
1013 def V2Wr_VBFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1014 def V2Rd_VBFMAL : SchedReadAdvance<3, [V2Wr_VBFMAL]>;
1016 def V2Wr_CRC : SchedWriteRes<[V2UnitM0]> { let Latency = 2; }
1017 def V2Rd_CRC : SchedReadAdvance<1, [V2Wr_CRC]>;
1019 def V2Wr_ZA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
1020 def V2Rd_ZA : SchedReadAdvance<3, [V2Wr_ZA]>;
1021 def V2Wr_ZPA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
1022 def V2Rd_ZPA : SchedReadAdvance<3, [V2Wr_ZPA]>;
1023 def V2Wr_ZSA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
1024 def V2Rd_ZSA : SchedReadAdvance<3, [V2Wr_ZSA]>;
1026 def V2Wr_ZDOTB : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
1027 def V2Rd_ZDOTB : SchedReadAdvance<2, [V2Wr_ZDOTB]>;
1028 def V2Wr_ZDOTH : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
1029 def V2Rd_ZDOTH : SchedReadAdvance<3, [V2Wr_ZDOTH]>;
1031 // NOTE: SOG p. 43: Complex multiply-add B, H, S element size: How to reduce
1032 // throughput to 1 in case of forwarding?
1033 def V2Wr_ZCMABHS : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
1034 def V2Rd_ZCMABHS : SchedReadAdvance<3, [V2Wr_ZCMABHS]>;
1035 def V2Wr_ZCMAD : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; }
1036 def V2Rd_ZCMAD : SchedReadAdvance<2, [V2Wr_ZCMAD]>;
1038 def V2Wr_ZMMA : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
1039 def V2Rd_ZMMA : SchedReadAdvance<2, [V2Wr_ZMMA]>;
1041 def V2Wr_ZMABHS : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 4; }
1042 def V2Rd_ZMABHS : SchedReadAdvance<3, [V2Wr_ZMABHS]>;
1043 def V2Wr_ZMAD : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; }
1044 def V2Rd_ZMAD : SchedReadAdvance<2, [V2Wr_ZMAD]>;
1046 def V2Wr_ZMAL : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
1047 def V2Rd_ZMAL : SchedReadAdvance<3, [V2Wr_ZMAL]>;
1049 def V2Wr_ZMASQL : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
1050 def V2Wr_ZMASQBHS : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
1051 def V2Wr_ZMASQD : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; }
1052 def V2Rd_ZMASQ : SchedReadAdvance<2, [V2Wr_ZMASQL, V2Wr_ZMASQBHS,
1055 def V2Wr_ZFCMA : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1056 def V2Rd_ZFCMA : SchedReadAdvance<3, [V2Wr_ZFCMA]>;
1058 def V2Wr_ZFMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
1059 def V2Rd_ZFMA : SchedReadAdvance<2, [V2Wr_ZFMA]>;
1061 def V2Wr_ZFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
1062 def V2Rd_ZFMAL : SchedReadAdvance<2, [V2Wr_ZFMAL]>;
1064 def V2Wr_ZBFDOT : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1065 def V2Rd_ZBFDOT : SchedReadAdvance<2, [V2Wr_ZBFDOT]>;
1066 def V2Wr_ZBFMMA : SchedWriteRes<[V2UnitV]> { let Latency = 6; }
1067 def V2Rd_ZBFMMA : SchedReadAdvance<2, [V2Wr_ZBFMMA]>;
1068 def V2Wr_ZBFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1069 def V2Rd_ZBFMAL : SchedReadAdvance<3, [V2Wr_ZBFMAL]>;
1071 //===----------------------------------------------------------------------===//
1072 // Define types with long resource cycles (rc)
1074 def V2Write_6cyc_1V1_5rc : SchedWriteRes<[V2UnitV1]> { let Latency = 6; let ReleaseAtCycles = [ 5]; }
1075 def V2Write_7cyc_1V02_7rc : SchedWriteRes<[V2UnitV02]> { let Latency = 7; let ReleaseAtCycles = [ 7]; }
1076 def V2Write_10cyc_1V02_5rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [ 5]; }
1077 def V2Write_10cyc_1V02_9rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [ 9]; }
1078 def V2Write_10cyc_1V02_10rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [10]; }
1079 def V2Write_10cyc_1V0_9rc : SchedWriteRes<[V2UnitV0]> { let Latency = 10; let ReleaseAtCycles = [ 9]; }
1080 def V2Write_10cyc_1V1_9rc : SchedWriteRes<[V2UnitV1]> { let Latency = 10; let ReleaseAtCycles = [ 9]; }
1081 def V2Write_13cyc_1V0_12rc : SchedWriteRes<[V2UnitV0]> { let Latency = 13; let ReleaseAtCycles = [12]; }
1082 def V2Write_13cyc_1V02_12rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ReleaseAtCycles = [12]; }
1083 def V2Write_13cyc_1V02_13rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ReleaseAtCycles = [13]; }
1084 def V2Write_15cyc_1V02_14rc : SchedWriteRes<[V2UnitV02]> { let Latency = 15; let ReleaseAtCycles = [14]; }
1085 def V2Write_16cyc_1V02_15rc : SchedWriteRes<[V2UnitV02]> { let Latency = 16; let ReleaseAtCycles = [15]; }
1086 def V2Write_16cyc_1V0_14rc : SchedWriteRes<[V2UnitV0]> { let Latency = 16; let ReleaseAtCycles = [14]; }
1089 // -----------------------------------------------------------------------------
1091 def : InstRW<[WriteI], (instrs COPY)>;
1093 // §3.3 Branch instructions
1094 // -----------------------------------------------------------------------------
1097 // Compare and branch
1098 def : SchedAlias<WriteBr, V2Write_1cyc_1B>;
1101 def : SchedAlias<WriteBrReg, V2Write_1cyc_1B>;
1103 // Branch and link, immed
1104 // Branch and link, register
1105 def : InstRW<[V2Write_1cyc_1B_1R], (instrs BL, BLR)>;
1107 // §3.4 Arithmetic and Logical Instructions
1108 // -----------------------------------------------------------------------------
1111 // ALU, basic, flagset
1112 def : SchedAlias<WriteI, V2Write_1cyc_1I>;
1113 def : InstRW<[V2Write_1cyc_1F], (instregex "^(ADC|SBC)S[WX]r$")>;
1114 def : InstRW<[V2Write_0or1cyc_1I], (instregex "^MOVZ[WX]i$")>;
1116 // ALU, extend and shift
1117 def : SchedAlias<WriteIEReg, V2Write_2cyc_1M>;
1119 // Arithmetic, LSL shift, shift <= 4
1120 // Arithmetic, flagset, LSL shift, shift <= 4
1121 // Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
1122 def : SchedAlias<WriteISReg, V2Write_ArithI>;
1123 def : InstRW<[V2Write_ArithF],
1124 (instregex "^(ADD|SUB)S[WX]rs$")>;
1126 // Arithmetic, immediate to logical address tag
1127 def : InstRW<[V2Write_2cyc_1M], (instrs ADDG, SUBG)>;
1129 // Convert floating-point condition flags
1130 // Flag manipulation instructions
1131 def : WriteRes<WriteSys, []> { let Latency = 1; }
1133 // Insert Random Tags
1134 def : InstRW<[V2Write_2cyc_1M], (instrs IRG, IRGstack)>;
1138 // Subtract Pointer, flagset
1139 def : InstRW<[V2Write_1cyc_1I], (instrs GMI, SUBP, SUBPS)>;
1141 // Logical, shift, no flagset
1142 def : InstRW<[V2Write_1cyc_1I], (instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs$")>;
1143 def : InstRW<[V2Write_0or1cyc_1I], (instregex "^ORR[WX]rs$")>;
1145 // Logical, shift, flagset
1146 def : InstRW<[V2Write_Logical], (instregex "^(AND|BIC)S[WX]rs$")>;
1148 // Move and shift instructions
1149 // -----------------------------------------------------------------------------
1151 def : SchedAlias<WriteImm, V2Write_1cyc_1I>;
1153 // §3.5 Divide and multiply instructions
1154 // -----------------------------------------------------------------------------
1157 def : SchedAlias<WriteID32, V2Write_12cyc_1M0>;
1158 def : SchedAlias<WriteID64, V2Write_20cyc_1M0>;
1160 def : SchedAlias<WriteIM32, V2Write_2cyc_1M>;
1161 def : SchedAlias<WriteIM64, V2Write_2cyc_1M>;
1164 // Multiply accumulate, W-form
1165 // Multiply accumulate, X-form
1166 def : InstRW<[V2Wr_IMUL, ReadIM, ReadIM, V2Rd_IMA],
1167 (instregex "^M(ADD|SUB)[WX]rrr$")>;
1169 // Multiply accumulate long
1171 def : InstRW<[V2Wr_IMUL, ReadIM, ReadIM, V2Rd_IMA],
1172 (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
1175 def : InstRW<[V2Write_3cyc_1M], (instrs SMULHrr, UMULHrr)>;
1177 // Pointer Authentication Instructions (v8.3 PAC)
1178 // -----------------------------------------------------------------------------
1180 // Authenticate data address
1181 // Authenticate instruction address
1182 // Compute pointer authentication code for data address
1183 // Compute pointer authentication code, using generic key
1184 // Compute pointer authentication code for instruction address
1185 def : InstRW<[V2Write_5cyc_1M0], (instregex "^AUT", "^PAC")>;
1187 // Branch and link, register, with pointer authentication
1188 // Branch, register, with pointer authentication
1189 // Branch, return, with pointer authentication
1190 def : InstRW<[V2Write_6cyc_1M0_1B], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, BRAA,
1191 BRAAZ, BRAB, BRABZ, RETAA, RETAB,
1195 // Load register, with pointer authentication
1196 def : InstRW<[V2Write_9cyc_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>;
1198 // Strip pointer authentication code
1199 def : InstRW<[V2Write_2cyc_1M0], (instrs XPACD, XPACI, XPACLRI)>;
1201 // Miscellaneous data-processing instructions
1202 // -----------------------------------------------------------------------------
1204 // Address generation
1205 def : InstRW<[V2Write_1cyc_1F], (instrs ADR, ADRP)>;
1207 // Bitfield extract, one reg
1208 // Bitfield extract, two regs
1209 def : SchedAlias<WriteExtr, V2Write_Extr>;
1210 def : InstRW<[V2Write_Extr], (instrs EXTRWrri, EXTRXrri)>;
1212 // Bitfield move, basic
1213 def : SchedAlias<WriteIS, V2Write_1cyc_1I>;
1215 // Bitfield move, insert
1216 def : InstRW<[V2Write_2cyc_1M], (instregex "^BFM[WX]ri$")>;
1218 // Load instructions
1219 // -----------------------------------------------------------------------------
1221 // NOTE: SOG p. 19: Throughput of LDN?P X-form should be 2, but reported as 3.
1223 def : SchedAlias<WriteLD, V2Write_4cyc_1L>;
1224 def : SchedAlias<WriteLDIdx, V2Write_4cyc_1L>;
1226 // Load register, literal
1227 def : InstRW<[V2Write_5cyc_1L_1F], (instrs LDRWl, LDRXl, LDRSWl, PRFMl)>;
1229 // Load pair, signed immed offset, signed words
1230 def : InstRW<[V2Write_5cyc_1I_3L, WriteLDHi], (instrs LDPSWi)>;
1232 // Load pair, immed post-index or immed pre-index, signed words
1233 def : InstRW<[WriteAdr, V2Write_5cyc_1I_3L, WriteLDHi],
1234 (instregex "^LDPSW(post|pre)$")>;
1236 // Store instructions
1237 // -----------------------------------------------------------------------------
1239 // NOTE: SOG, p. 20: Unsure if STRH uses pipeline I.
1241 def : SchedAlias<WriteST, V2Write_1cyc_1L01_1D>;
1242 def : SchedAlias<WriteSTIdx, V2Write_1cyc_1L01_1D>;
1243 def : SchedAlias<WriteSTP, V2Write_1cyc_1L01_1D>;
1244 def : SchedAlias<WriteAdr, V2Write_1cyc_1I>;
1246 // Tag load instructions
1247 // -----------------------------------------------------------------------------
1249 // Load allocation tag
1250 // Load multiple allocation tags
1251 def : InstRW<[V2Write_4cyc_1L], (instrs LDG, LDGM)>;
1253 // Tag store instructions
1254 // -----------------------------------------------------------------------------
1256 // Store allocation tags to one or two granules, post-index
1257 // Store allocation tags to one or two granules, pre-index
1258 // Store allocation tag to one or two granules, zeroing, post-index
1259 // Store Allocation Tag to one or two granules, zeroing, pre-index
1260 // Store allocation tag and reg pair to memory, post-Index
1261 // Store allocation tag and reg pair to memory, pre-Index
1262 def : InstRW<[V2Write_1cyc_1L01_1D_1I], (instrs STGPreIndex, STGPostIndex,
1263 ST2GPreIndex, ST2GPostIndex,
1264 STZGPreIndex, STZGPostIndex,
1265 STZ2GPreIndex, STZ2GPostIndex,
1266 STGPpre, STGPpost)>;
1268 // Store allocation tags to one or two granules, signed offset
1269 // Store allocation tag to two granules, zeroing, signed offset
1270 // Store allocation tag and reg pair to memory, signed offset
1271 // Store multiple allocation tags
1272 def : InstRW<[V2Write_1cyc_1L01_1D], (instrs STGi, ST2Gi, STZGi,
1273 STZ2Gi, STGPi, STGM, STZGM)>;
1275 // FP data processing instructions
1276 // -----------------------------------------------------------------------------
1278 // FP absolute value
1283 def : SchedAlias<WriteF, V2Write_2cyc_1V>;
1286 def : SchedAlias<WriteFCmp, V2Write_2cyc_1V0>;
1288 // FP divide, square root
1289 def : SchedAlias<WriteFDiv, V2Write_7cyc_1V02>;
1291 // FP divide, H-form
1292 def : InstRW<[V2Write_7cyc_1V02], (instrs FDIVHrr)>;
1293 // FP divide, S-form
1294 def : InstRW<[V2Write_10cyc_1V02], (instrs FDIVSrr)>;
1295 // FP divide, D-form
1296 def : InstRW<[V2Write_15cyc_1V02], (instrs FDIVDrr)>;
1298 // FP square root, H-form
1299 def : InstRW<[V2Write_7cyc_1V02], (instrs FSQRTHr)>;
1300 // FP square root, S-form
1301 def : InstRW<[V2Write_9cyc_1V02], (instrs FSQRTSr)>;
1302 // FP square root, D-form
1303 def : InstRW<[V2Write_16cyc_1V02], (instrs FSQRTDr)>;
1306 def : WriteRes<WriteFMul, [V2UnitV]> { let Latency = 3; }
1308 // FP multiply accumulate
1309 def : InstRW<[V2Wr_FMA, ReadDefault, ReadDefault, V2Rd_FMA],
1310 (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
1312 // FP round to integral
1313 def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ][HSD]r$",
1314 "^FRINT(32|64)[XZ][SD]r$")>;
1316 // FP miscellaneous instructions
1317 // -----------------------------------------------------------------------------
1319 // FP convert, from gen to vec reg
1320 def : InstRW<[V2Write_3cyc_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>;
1322 // FP convert, from vec to gen reg
1323 def : InstRW<[V2Write_3cyc_1V01],
1324 (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]ri?$")>;
1326 // FP convert, Javascript from vec to gen reg
1327 def : SchedAlias<WriteFCvt, V2Write_3cyc_1V0>;
1329 // FP convert, from vec to vec reg
1330 def : InstRW<[V2Write_3cyc_1V02], (instrs FCVTSHr, FCVTDHr, FCVTHSr, FCVTDSr,
1331 FCVTHDr, FCVTSDr, FCVTXNv1i64)>;
1334 // FP move, register
1335 def : SchedAlias<WriteFImm, V2Write_2cyc_1V>;
1337 // FP transfer, from gen to low half of vec reg
1338 def : InstRW<[V2Write_0or3cyc_1M0],
1339 (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
1341 // FP transfer, from gen to high half of vec reg
1342 def : InstRW<[V2Write_5cyc_1M0_1V], (instrs FMOVXDHighr)>;
1344 // FP transfer, from vec to gen reg
1345 def : SchedAlias<WriteFCopy, V2Write_2cyc_2V01>;
1347 // FP load instructions
1348 // -----------------------------------------------------------------------------
1350 // Load vector reg, literal, S/D/Q forms
1351 def : InstRW<[V2Write_7cyc_1F_1L], (instregex "^LDR[SDQ]l$")>;
1353 // Load vector reg, unscaled immed
1354 def : InstRW<[V2Write_6cyc_1L], (instregex "^LDUR[BHSDQ]i$")>;
1356 // Load vector reg, immed post-index
1357 // Load vector reg, immed pre-index
1358 def : InstRW<[WriteAdr, V2Write_6cyc_1I_1L],
1359 (instregex "^LDR[BHSDQ](pre|post)$")>;
1361 // Load vector reg, unsigned immed
1362 def : InstRW<[V2Write_6cyc_1L], (instregex "^LDR[BHSDQ]ui$")>;
1364 // Load vector reg, register offset, basic
1365 // Load vector reg, register offset, scale, S/D-form
1366 // Load vector reg, register offset, scale, H/Q-form
1367 // Load vector reg, register offset, extend
1368 // Load vector reg, register offset, extend, scale, S/D-form
1369 // Load vector reg, register offset, extend, scale, H/Q-form
1370 def : InstRW<[V2Write_LdrHQ, ReadAdrBase], (instregex "^LDR[BHSDQ]ro[WX]$")>;
1372 // Load vector pair, immed offset, S/D-form
1373 def : InstRW<[V2Write_6cyc_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>;
1375 // Load vector pair, immed offset, Q-form
1376 def : InstRW<[V2Write_6cyc_2L, WriteLDHi], (instrs LDPQi, LDNPQi)>;
1378 // Load vector pair, immed post-index, S/D-form
1379 // Load vector pair, immed pre-index, S/D-form
1380 def : InstRW<[WriteAdr, V2Write_6cyc_1I_1L, WriteLDHi],
1381 (instregex "^LDP[SD](pre|post)$")>;
1383 // Load vector pair, immed post-index, Q-form
1384 // Load vector pair, immed pre-index, Q-form
1385 def : InstRW<[WriteAdr, V2Write_6cyc_2I_2L, WriteLDHi], (instrs LDPQpost,
1388 // FP store instructions
1389 // -----------------------------------------------------------------------------
1391 // Store vector reg, unscaled immed, B/H/S/D-form
1392 // Store vector reg, unscaled immed, Q-form
1393 def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^STUR[BHSDQ]i$")>;
1395 // Store vector reg, immed post-index, B/H/S/D-form
1396 // Store vector reg, immed post-index, Q-form
1397 // Store vector reg, immed pre-index, B/H/S/D-form
1398 // Store vector reg, immed pre-index, Q-form
1399 def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01_1I],
1400 (instregex "^STR[BHSDQ](pre|post)$")>;
1402 // Store vector reg, unsigned immed, B/H/S/D-form
1403 // Store vector reg, unsigned immed, Q-form
1404 def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^STR[BHSDQ]ui$")>;
1406 // Store vector reg, register offset, basic, B/H/S/D-form
1407 // Store vector reg, register offset, basic, Q-form
1408 // Store vector reg, register offset, scale, H-form
1409 // Store vector reg, register offset, scale, S/D-form
1410 // Store vector reg, register offset, scale, Q-form
1411 // Store vector reg, register offset, extend, B/H/S/D-form
1412 // Store vector reg, register offset, extend, Q-form
1413 // Store vector reg, register offset, extend, scale, H-form
1414 // Store vector reg, register offset, extend, scale, S/D-form
1415 // Store vector reg, register offset, extend, scale, Q-form
1416 def : InstRW<[V2Write_StrHQ, ReadAdrBase],
1417 (instregex "^STR[BHSDQ]ro[WX]$")>;
1419 // Store vector pair, immed offset, S-form
1420 // Store vector pair, immed offset, D-form
1421 def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^STN?P[SD]i$")>;
1423 // Store vector pair, immed offset, Q-form
1424 def : InstRW<[V2Write_2cyc_1L01_2V01], (instrs STPQi, STNPQi)>;
1426 // Store vector pair, immed post-index, S-form
1427 // Store vector pair, immed post-index, D-form
1428 // Store vector pair, immed pre-index, S-form
1429 // Store vector pair, immed pre-index, D-form
1430 def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01_1I],
1431 (instregex "^STP[SD](pre|post)$")>;
1433 // Store vector pair, immed post-index, Q-form
1434 def : InstRW<[V2Write_2cyc_1L01_2V01_1I], (instrs STPQpost)>;
1436 // Store vector pair, immed pre-index, Q-form
1437 def : InstRW<[V2Write_2cyc_1L01_2V01_2I], (instrs STPQpre)>;
1439 // ASIMD integer instructions
1440 // -----------------------------------------------------------------------------
1442 // ASIMD absolute diff
1443 // ASIMD absolute diff long
1444 // ASIMD arith, basic
1445 // ASIMD arith, complex
1446 // ASIMD arith, pair-wise
1449 // ASIMD max/min, basic and pair-wise
1450 def : SchedAlias<WriteVd, V2Write_2cyc_1V>;
1451 def : SchedAlias<WriteVq, V2Write_2cyc_1V>;
1453 // ASIMD absolute diff accum
1454 // ASIMD absolute diff accum long
1455 def : InstRW<[V2Wr_VA, V2Rd_VA], (instregex "^[SU]ABAL?v")>;
1457 // ASIMD arith, reduce, 4H/4S
1458 def : InstRW<[V2Write_2cyc_1V13], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>;
1460 // ASIMD arith, reduce, 8B/8H
1461 def : InstRW<[V2Write_4cyc_1V13_1V],
1462 (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>;
1464 // ASIMD arith, reduce, 16B
1465 def : InstRW<[V2Write_4cyc_2V13], (instregex "^(ADDV|[SU]ADDLV)v16i8v$")>;
1467 // ASIMD dot product
1468 // ASIMD dot product using signed and unsigned integers
1469 def : InstRW<[V2Wr_VDOT, V2Rd_VDOT],
1470 (instregex "^([SU]|SU|US)DOT(lane)?(v8|v16)i8$")>;
1472 // ASIMD matrix multiply-accumulate
1473 def : InstRW<[V2Wr_VMMA, V2Rd_VMMA], (instrs SMMLA, UMMLA, USMMLA)>;
1475 // ASIMD max/min, reduce, 4H/4S
1476 def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU](MAX|MIN)Vv4i16v$",
1477 "^[SU](MAX|MIN)Vv4i32v$")>;
1479 // ASIMD max/min, reduce, 8B/8H
1480 def : InstRW<[V2Write_4cyc_1V13_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$",
1481 "^[SU](MAX|MIN)Vv8i16v$")>;
1483 // ASIMD max/min, reduce, 16B
1484 def : InstRW<[V2Write_4cyc_2V13], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
1487 def : InstRW<[V2Write_4cyc_1V02], (instregex "^MULv", "^SQ(R)?DMULHv")>;
1489 // ASIMD multiply accumulate
1490 def : InstRW<[V2Wr_VMA, V2Rd_VMA], (instregex "^MLAv", "^MLSv")>;
1492 // ASIMD multiply accumulate high
1493 def : InstRW<[V2Wr_VMAH, V2Rd_VMAH], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>;
1495 // ASIMD multiply accumulate long
1496 def : InstRW<[V2Wr_VMAL, V2Rd_VMAL], (instregex "^[SU]MLALv", "^[SU]MLSLv")>;
1498 // ASIMD multiply accumulate saturating long
1499 def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDML[AS]L[iv]")>;
1501 // ASIMD multiply/multiply long (8x8) polynomial, D-form
1502 // ASIMD multiply/multiply long (8x8) polynomial, Q-form
1503 def : InstRW<[V2Write_3cyc_1V23], (instregex "^PMULL?(v8i8|v16i8)$")>;
1505 // ASIMD multiply long
1506 def : InstRW<[V2Write_3cyc_1V02], (instregex "^[SU]MULLv", "^SQDMULL[iv]")>;
1508 // ASIMD pairwise add and accumulate long
1509 def : InstRW<[V2Wr_VPA, V2Rd_VPA], (instregex "^[SU]ADALPv")>;
1511 // ASIMD shift accumulate
1512 def : InstRW<[V2Wr_VSA, V2Rd_VSA], (instregex "^[SU]SRA[dv]", "^[SU]RSRA[dv]")>;
1514 // ASIMD shift by immed, basic
1515 def : InstRW<[V2Write_2cyc_1V13], (instregex "^SHL[dv]", "^SHLLv", "^SHRNv",
1516 "^SSHLLv", "^SSHR[dv]", "^USHLLv",
1519 // ASIMD shift by immed and insert, basic
1520 def : InstRW<[V2Write_2cyc_1V13], (instregex "^SLI[dv]", "^SRI[dv]")>;
1522 // ASIMD shift by immed, complex
1523 def : InstRW<[V2Write_4cyc_1V13],
1524 (instregex "^RSHRNv", "^SQRSHRU?N[bhsv]", "^(SQSHLU?|UQSHL)[bhsd]$",
1525 "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
1526 "^SQSHRU?N[bhsv]", "^SRSHR[dv]", "^UQRSHRN[bhsv]",
1527 "^UQSHRN[bhsv]", "^URSHR[dv]")>;
1529 // ASIMD shift by register, basic
1530 def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU]SHLv")>;
1532 // ASIMD shift by register, complex
1533 def : InstRW<[V2Write_4cyc_1V13],
1534 (instregex "^[SU]RSHLv", "^[SU]QRSHLv",
1535 "^[SU]QSHL(v1i8|v1i16|v1i32|v1i64|v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)$")>;
1537 // ASIMD floating-point instructions
1538 // -----------------------------------------------------------------------------
1540 // ASIMD FP absolute value/difference
1541 // ASIMD FP arith, normal
1543 // ASIMD FP complex add
1544 // ASIMD FP max/min, normal
1545 // ASIMD FP max/min, pairwise
1547 // Handled by SchedAlias<WriteV[dq], ...>
1549 // ASIMD FP complex multiply add
1550 def : InstRW<[V2Wr_VFCMA, V2Rd_VFCMA], (instregex "^FCMLAv")>;
1552 // ASIMD FP convert, long (F16 to F32)
1553 def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTL(v4|v8)i16")>;
1555 // ASIMD FP convert, long (F32 to F64)
1556 def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVTL(v2|v4)i32")>;
1558 // ASIMD FP convert, narrow (F32 to F16)
1559 def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTN(v4|v8)i16")>;
1561 // ASIMD FP convert, narrow (F64 to F32)
1562 def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVTN(v2|v4)i32",
1563 "^FCVTXN(v2|v4)f32")>;
1565 // ASIMD FP convert, other, D-form F32 and Q-form F64
1566 def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVT[AMNPZ][SU]v2f(32|64)$",
1567 "^FCVT[AMNPZ][SU]v1i64$",
1569 "^[SU]CVTFv2f(32|64)$",
1573 // ASIMD FP convert, other, D-form F16 and Q-form F32
1574 def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVT[AMNPZ][SU]v4f(16|32)$",
1575 "^FCVT[AMNPZ][SU]v1i32$",
1577 "^[SU]CVTFv4f(16|32)$",
1581 // ASIMD FP convert, other, Q-form F16
1582 def : InstRW<[V2Write_6cyc_4V02], (instregex "^FCVT[AMNPZ][SU]v8f16$",
1583 "^FCVT[AMNPZ][SU]v1f16$",
1589 // ASIMD FP divide, D-form, F16
1590 def : InstRW<[V2Write_7cyc_1V02_7rc], (instrs FDIVv4f16)>;
1592 // ASIMD FP divide, D-form, F32
1593 def : InstRW<[V2Write_10cyc_1V02_5rc], (instrs FDIVv2f32)>;
1595 // ASIMD FP divide, Q-form, F16
1596 def : InstRW<[V2Write_13cyc_1V02_13rc], (instrs FDIVv8f16)>;
1598 // ASIMD FP divide, Q-form, F32
1599 def : InstRW<[V2Write_10cyc_1V02_10rc], (instrs FDIVv4f32)>;
1601 // ASIMD FP divide, Q-form, F64
1602 def : InstRW<[V2Write_15cyc_1V02_14rc], (instrs FDIVv2f64)>;
1604 // ASIMD FP max/min, reduce, F32 and D-form F16
1605 def : InstRW<[V2Write_4cyc_2V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>;
1607 // ASIMD FP max/min, reduce, Q-form F16
1608 def : InstRW<[V2Write_6cyc_3V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>;
1610 // ASIMD FP multiply
1611 def : InstRW<[V2Wr_VFM], (instregex "^FMULv", "^FMULXv")>;
1613 // ASIMD FP multiply accumulate
1614 def : InstRW<[V2Wr_VFMA, V2Rd_VFMA], (instregex "^FMLAv", "^FMLSv")>;
1616 // ASIMD FP multiply accumulate long
1617 def : InstRW<[V2Wr_VFMAL, V2Rd_VFMAL], (instregex "^FML[AS]L2?(lane)?v")>;
1619 // ASIMD FP round, D-form F32 and Q-form F64
1620 def : InstRW<[V2Write_3cyc_1V02],
1621 (instregex "^FRINT[AIMNPXZ]v2f(32|64)$",
1622 "^FRINT(32|64)[XZ]v2f(32|64)$")>;
1624 // ASIMD FP round, D-form F16 and Q-form F32
1625 def : InstRW<[V2Write_4cyc_2V02],
1626 (instregex "^FRINT[AIMNPXZ]v4f(16|32)$",
1627 "^FRINT(32|64)[XZ]v4f32$")>;
1629 // ASIMD FP round, Q-form F16
1630 def : InstRW<[V2Write_6cyc_4V02], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
1632 // ASIMD FP square root, D-form, F16
1633 def : InstRW<[V2Write_7cyc_1V02_7rc], (instrs FSQRTv4f16)>;
1635 // ASIMD FP square root, D-form, F32
1636 def : InstRW<[V2Write_10cyc_1V02_5rc], (instrs FSQRTv2f32)>;
1638 // ASIMD FP square root, Q-form, F16
1639 def : InstRW<[V2Write_13cyc_1V02_13rc], (instrs FSQRTv8f16)>;
1641 // ASIMD FP square root, Q-form, F32
1642 def : InstRW<[V2Write_10cyc_1V02_9rc], (instrs FSQRTv4f32)>;
1644 // ASIMD FP square root, Q-form, F64
1645 def : InstRW<[V2Write_16cyc_1V02_15rc], (instrs FSQRTv2f64)>;
1647 // ASIMD BFloat16 (BF16) instructions
1648 // -----------------------------------------------------------------------------
1650 // ASIMD convert, F32 to BF16
1651 def : InstRW<[V2Write_4cyc_2V02], (instrs BFCVTN, BFCVTN2)>;
1653 // ASIMD dot product
1654 def : InstRW<[V2Wr_VBFDOT, V2Rd_VBFDOT], (instrs BFDOTv4bf16, BFDOTv8bf16)>;
1656 // ASIMD matrix multiply accumulate
1657 def : InstRW<[V2Wr_VBFMMA, V2Rd_VBFMMA], (instrs BFMMLA)>;
1659 // ASIMD multiply accumulate long
1660 def : InstRW<[V2Wr_VBFMAL, V2Rd_VBFMAL], (instrs BFMLALB, BFMLALBIdx, BFMLALT,
1663 // Scalar convert, F32 to BF16
1664 def : InstRW<[V2Write_3cyc_1V02], (instrs BFCVT)>;
1666 // ASIMD miscellaneous instructions
1667 // -----------------------------------------------------------------------------
1669 // ASIMD bit reverse
1670 // ASIMD bitwise insert
1672 // ASIMD duplicate, element
1674 // ASIMD extract narrow
1675 // ASIMD insert, element to element
1676 // ASIMD move, FP immed
1677 // ASIMD move, integer immed
1679 // ASIMD table lookup extension, 1 table reg
1682 // Handled by SchedAlias<WriteV[dq], ...>
1683 def : InstRW<[V2Write_0or2cyc_1V], (instrs MOVID, MOVIv2d_ns)>;
1685 // ASIMD duplicate, gen reg
1686 def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUPv.+gpr")>;
1688 // ASIMD extract narrow, saturating
1689 def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]QXTNv", "^SQXTUNv")>;
1691 // ASIMD reciprocal and square root estimate, D-form U32
1692 def : InstRW<[V2Write_3cyc_1V02], (instrs URECPEv2i32, URSQRTEv2i32)>;
1694 // ASIMD reciprocal and square root estimate, Q-form U32
1695 def : InstRW<[V2Write_4cyc_2V02], (instrs URECPEv4i32, URSQRTEv4i32)>;
1697 // ASIMD reciprocal and square root estimate, D-form F32 and scalar forms
1698 def : InstRW<[V2Write_3cyc_1V02], (instrs FRECPEv1f16, FRECPEv1i32,
1699 FRECPEv1i64, FRECPEv2f32,
1700 FRSQRTEv1f16, FRSQRTEv1i32,
1701 FRSQRTEv1i64, FRSQRTEv2f32)>;
1703 // ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32
1704 def : InstRW<[V2Write_4cyc_2V02], (instrs FRECPEv4f16, FRECPEv4f32,
1705 FRSQRTEv4f16, FRSQRTEv4f32)>;
1707 // ASIMD reciprocal and square root estimate, Q-form F16
1708 def : InstRW<[V2Write_6cyc_4V02], (instrs FRECPEv8f16, FRSQRTEv8f16)>;
1710 // ASIMD reciprocal exponent
1711 def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRECPXv")>;
1713 // ASIMD reciprocal step
1714 def : InstRW<[V2Write_4cyc_1V], (instregex "^FRECPS(32|64|v)",
1715 "^FRSQRTS(32|64|v)")>;
1717 // ASIMD table lookup, 1 or 2 table regs
1718 def : InstRW<[V2Write_2cyc_1V01], (instrs TBLv8i8One, TBLv16i8One,
1719 TBLv8i8Two, TBLv16i8Two)>;
1721 // ASIMD table lookup, 3 table regs
1722 def : InstRW<[V2Write_4cyc_2V01], (instrs TBLv8i8Three, TBLv16i8Three)>;
1724 // ASIMD table lookup, 4 table regs
1725 def : InstRW<[V2Write_4cyc_3V01], (instrs TBLv8i8Four, TBLv16i8Four)>;
1727 // ASIMD table lookup extension, 2 table reg
1728 def : InstRW<[V2Write_4cyc_2V], (instrs TBXv8i8Two, TBXv16i8Two)>;
1730 // ASIMD table lookup extension, 3 table reg
1731 def : InstRW<[V2Write_6cyc_3V], (instrs TBXv8i8Three, TBXv16i8Three)>;
1733 // ASIMD table lookup extension, 4 table reg
1734 def : InstRW<[V2Write_6cyc_5V], (instrs TBXv8i8Four, TBXv16i8Four)>;
1736 // ASIMD transfer, element to gen reg
1737 def : InstRW<[V2Write_2cyc_2V01], (instregex "^[SU]MOVv")>;
1739 // ASIMD transfer, gen reg to element
1740 def : InstRW<[V2Write_5cyc_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
1742 // ASIMD load instructions
1743 // -----------------------------------------------------------------------------
1745 // ASIMD load, 1 element, multiple, 1 reg, D-form
1746 def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>;
1747 def : InstRW<[WriteAdr, V2Write_6cyc_1L],
1748 (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>;
1750 // ASIMD load, 1 element, multiple, 1 reg, Q-form
1751 def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>;
1752 def : InstRW<[WriteAdr, V2Write_6cyc_1L],
1753 (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>;
1755 // ASIMD load, 1 element, multiple, 2 reg, D-form
1756 def : InstRW<[V2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
1757 def : InstRW<[WriteAdr, V2Write_6cyc_2L],
1758 (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
1760 // ASIMD load, 1 element, multiple, 2 reg, Q-form
1761 def : InstRW<[V2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
1762 def : InstRW<[WriteAdr, V2Write_6cyc_2L],
1763 (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
1765 // ASIMD load, 1 element, multiple, 3 reg, D-form
1766 def : InstRW<[V2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
1767 def : InstRW<[WriteAdr, V2Write_6cyc_3L],
1768 (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
1770 // ASIMD load, 1 element, multiple, 3 reg, Q-form
1771 def : InstRW<[V2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
1772 def : InstRW<[WriteAdr, V2Write_6cyc_3L],
1773 (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
1775 // ASIMD load, 1 element, multiple, 4 reg, D-form
1776 def : InstRW<[V2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
1777 def : InstRW<[WriteAdr, V2Write_7cyc_4L],
1778 (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
1780 // ASIMD load, 1 element, multiple, 4 reg, Q-form
1781 def : InstRW<[V2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
1782 def : InstRW<[WriteAdr, V2Write_7cyc_4L],
1783 (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
1785 // ASIMD load, 1 element, one lane, B/H/S
1786 // ASIMD load, 1 element, one lane, D
1787 def : InstRW<[V2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)$")>;
1788 def : InstRW<[WriteAdr, V2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)_POST$")>;
1790 // ASIMD load, 1 element, all lanes, D-form, B/H/S
1791 // ASIMD load, 1 element, all lanes, D-form, D
1792 def : InstRW<[V2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)$")>;
1793 def : InstRW<[WriteAdr, V2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>;
1795 // ASIMD load, 1 element, all lanes, Q-form
1796 def : InstRW<[V2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
1797 def : InstRW<[WriteAdr, V2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
1799 // ASIMD load, 2 element, multiple, D-form, B/H/S
1800 def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)$")>;
1801 def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
1803 // ASIMD load, 2 element, multiple, Q-form, B/H/S
1804 // ASIMD load, 2 element, multiple, Q-form, D
1805 def : InstRW<[V2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
1806 def : InstRW<[WriteAdr, V2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
1808 // ASIMD load, 2 element, one lane, B/H
1809 // ASIMD load, 2 element, one lane, S
1810 // ASIMD load, 2 element, one lane, D
1811 def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)$")>;
1812 def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)_POST$")>;
1814 // ASIMD load, 2 element, all lanes, D-form, B/H/S
1815 // ASIMD load, 2 element, all lanes, D-form, D
1816 def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)$")>;
1817 def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>;
1819 // ASIMD load, 2 element, all lanes, Q-form
1820 def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
1821 def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
1823 // ASIMD load, 3 element, multiple, D-form, B/H/S
1824 def : InstRW<[V2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)$")>;
1825 def : InstRW<[WriteAdr, V2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
1827 // ASIMD load, 3 element, multiple, Q-form, B/H/S
1828 // ASIMD load, 3 element, multiple, Q-form, D
1829 def : InstRW<[V2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
1830 def : InstRW<[WriteAdr, V2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
1832 // ASIMD load, 3 element, one lane, B/H
1833 // ASIMD load, 3 element, one lane, S
1834 // ASIMD load, 3 element, one lane, D
1835 def : InstRW<[V2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)$")>;
1836 def : InstRW<[WriteAdr, V2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)_POST$")>;
1838 // ASIMD load, 3 element, all lanes, D-form, B/H/S
1839 // ASIMD load, 3 element, all lanes, D-form, D
1840 def : InstRW<[V2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)$")>;
1841 def : InstRW<[WriteAdr, V2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>;
1843 // ASIMD load, 3 element, all lanes, Q-form, B/H/S
1844 // ASIMD load, 3 element, all lanes, Q-form, D
1845 def : InstRW<[V2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)$")>;
1846 def : InstRW<[WriteAdr, V2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>;
1848 // ASIMD load, 4 element, multiple, D-form, B/H/S
1849 def : InstRW<[V2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)$")>;
1850 def : InstRW<[WriteAdr, V2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
1852 // ASIMD load, 4 element, multiple, Q-form, B/H/S
1853 // ASIMD load, 4 element, multiple, Q-form, D
1854 def : InstRW<[V2Write_9cyc_6L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
1855 def : InstRW<[WriteAdr, V2Write_9cyc_6L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
1857 // ASIMD load, 4 element, one lane, B/H
1858 // ASIMD load, 4 element, one lane, S
1859 // ASIMD load, 4 element, one lane, D
1860 def : InstRW<[V2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)$")>;
1861 def : InstRW<[WriteAdr, V2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)_POST$")>;
1863 // ASIMD load, 4 element, all lanes, D-form, B/H/S
1864 // ASIMD load, 4 element, all lanes, D-form, D
1865 def : InstRW<[V2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)$")>;
1866 def : InstRW<[WriteAdr, V2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>;
1868 // ASIMD load, 4 element, all lanes, Q-form, B/H/S
1869 // ASIMD load, 4 element, all lanes, Q-form, D
1870 def : InstRW<[V2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)$")>;
1871 def : InstRW<[WriteAdr, V2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>;
1873 // ASIMD store instructions
1874 // -----------------------------------------------------------------------------
1876 // ASIMD store, 1 element, multiple, 1 reg, D-form
1877 def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(8b|4h|2s|1d)$")>;
1878 def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
1880 // ASIMD store, 1 element, multiple, 1 reg, Q-form
1881 def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(16b|8h|4s|2d)$")>;
1882 def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
1884 // ASIMD store, 1 element, multiple, 2 reg, D-form
1885 def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "ST1Twov(8b|4h|2s|1d)$")>;
1886 def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
1888 // ASIMD store, 1 element, multiple, 2 reg, Q-form
1889 def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "ST1Twov(16b|8h|4s|2d)$")>;
1890 def : InstRW<[WriteAdr, V2Write_2cyc_2L01_2V01], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
1892 // ASIMD store, 1 element, multiple, 3 reg, D-form
1893 def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "ST1Threev(8b|4h|2s|1d)$")>;
1894 def : InstRW<[WriteAdr, V2Write_2cyc_2L01_2V01], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
1896 // ASIMD store, 1 element, multiple, 3 reg, Q-form
1897 def : InstRW<[V2Write_2cyc_3L01_3V01], (instregex "ST1Threev(16b|8h|4s|2d)$")>;
1898 def : InstRW<[WriteAdr, V2Write_2cyc_3L01_3V01], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
1900 // ASIMD store, 1 element, multiple, 4 reg, D-form
1901 def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
1902 def : InstRW<[WriteAdr, V2Write_2cyc_2L01_2V01], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
1904 // ASIMD store, 1 element, multiple, 4 reg, Q-form
1905 def : InstRW<[V2Write_2cyc_4L01_4V01], (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
1906 def : InstRW<[WriteAdr, V2Write_2cyc_4L01_4V01], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
1908 // ASIMD store, 1 element, one lane, B/H/S
1909 // ASIMD store, 1 element, one lane, D
1910 def : InstRW<[V2Write_4cyc_1L01_2V01], (instregex "ST1i(8|16|32|64)$")>;
1911 def : InstRW<[WriteAdr, V2Write_4cyc_1L01_2V01], (instregex "ST1i(8|16|32|64)_POST$")>;
1913 // ASIMD store, 2 element, multiple, D-form, B/H/S
1914 def : InstRW<[V2Write_4cyc_1L01_2V01], (instregex "ST2Twov(8b|4h|2s)$")>;
1915 def : InstRW<[WriteAdr, V2Write_4cyc_1L01_2V01], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
1917 // ASIMD store, 2 element, multiple, Q-form, B/H/S
1918 // ASIMD store, 2 element, multiple, Q-form, D
1919 def : InstRW<[V2Write_4cyc_2L01_4V01], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
1920 def : InstRW<[WriteAdr, V2Write_4cyc_2L01_4V01], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
1922 // ASIMD store, 2 element, one lane, B/H/S
1923 // ASIMD store, 2 element, one lane, D
1924 def : InstRW<[V2Write_4cyc_1L01_2V01], (instregex "ST2i(8|16|32|64)$")>;
1925 def : InstRW<[WriteAdr, V2Write_4cyc_1L01_2V01], (instregex "ST2i(8|16|32|64)_POST$")>;
1927 // ASIMD store, 3 element, multiple, D-form, B/H/S
1928 def : InstRW<[V2Write_5cyc_2L01_4V01], (instregex "ST3Threev(8b|4h|2s)$")>;
1929 def : InstRW<[WriteAdr, V2Write_5cyc_2L01_4V01], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
1931 // ASIMD store, 3 element, multiple, Q-form, B/H/S
1932 // ASIMD store, 3 element, multiple, Q-form, D
1933 def : InstRW<[V2Write_6cyc_3L01_6V01], (instregex "ST3Threev(16b|8h|4s|2d)$")>;
1934 def : InstRW<[WriteAdr, V2Write_6cyc_3L01_6V01], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
1936 // ASIMD store, 3 element, one lane, B/H
1937 // ASIMD store, 3 element, one lane, S
1938 // ASIMD store, 3 element, one lane, D
1939 def : InstRW<[V2Write_5cyc_2L01_4V01], (instregex "ST3i(8|16|32|64)$")>;
1940 def : InstRW<[WriteAdr, V2Write_5cyc_2L01_4V01], (instregex "ST3i(8|16|32|64)_POST$")>;
1942 // ASIMD store, 4 element, multiple, D-form, B/H/S
1943 def : InstRW<[V2Write_6cyc_2L01_6V01], (instregex "ST4Fourv(8b|4h|2s)$")>;
1944 def : InstRW<[WriteAdr, V2Write_6cyc_2L01_6V01], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
1946 // ASIMD store, 4 element, multiple, Q-form, B/H/S
1947 def : InstRW<[V2Write_7cyc_4L01_12V01], (instregex "ST4Fourv(16b|8h|4s)$")>;
1948 def : InstRW<[WriteAdr, V2Write_7cyc_4L01_12V01], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
1950 // ASIMD store, 4 element, multiple, Q-form, D
1951 def : InstRW<[V2Write_5cyc_4L01_8V01], (instregex "ST4Fourv(2d)$")>;
1952 def : InstRW<[WriteAdr, V2Write_5cyc_4L01_8V01], (instregex "ST4Fourv(2d)_POST$")>;
1954 // ASIMD store, 4 element, one lane, B/H/S
1955 def : InstRW<[V2Write_6cyc_1L01_3V01], (instregex "ST4i(8|16|32)$")>;
1956 def : InstRW<[WriteAdr, V2Write_6cyc_1L01_3V01], (instregex "ST4i(8|16|32)_POST$")>;
1958 // ASIMD store, 4 element, one lane, D
1959 def : InstRW<[V2Write_4cyc_2L01_4V01], (instregex "ST4i(64)$")>;
1960 def : InstRW<[WriteAdr, V2Write_4cyc_2L01_4V01], (instregex "ST4i(64)_POST$")>;
1962 // Cryptography extensions
1963 // -----------------------------------------------------------------------------
1966 def : InstRW<[V2Write_2cyc_1V], (instregex "^AES[DE]rr$", "^AESI?MCrr")>;
1968 // Crypto polynomial (64x64) multiply long
1969 def : InstRW<[V2Write_2cyc_1V], (instrs PMULLv1i64, PMULLv2i64)>;
1971 // Crypto SHA1 hash acceleration op
1972 // Crypto SHA1 schedule acceleration ops
1973 def : InstRW<[V2Write_2cyc_1V0], (instregex "^SHA1(H|SU0|SU1)")>;
1975 // Crypto SHA1 hash acceleration ops
1976 // Crypto SHA256 hash acceleration ops
1977 def : InstRW<[V2Write_4cyc_1V0], (instregex "^SHA1[CMP]", "^SHA256H2?")>;
1979 // Crypto SHA256 schedule acceleration ops
1980 def : InstRW<[V2Write_2cyc_1V0], (instregex "^SHA256SU[01]")>;
1982 // Crypto SHA512 hash acceleration ops
1983 def : InstRW<[V2Write_2cyc_1V0], (instregex "^SHA512(H|H2|SU0|SU1)")>;
1986 def : InstRW<[V2Write_2cyc_1V0], (instrs BCAX, EOR3, RAX1, XAR)>;
1989 def : InstRW<[V2Write_2cyc_1V0], (instregex "^SM3PARTW[12]$", "^SM3SS1$",
1990 "^SM3TT[12][AB]$")>;
1993 def : InstRW<[V2Write_4cyc_1V0], (instrs SM4E, SM4ENCKEY)>;
1996 // -----------------------------------------------------------------------------
1998 def : InstRW<[V2Wr_CRC, V2Rd_CRC], (instregex "^CRC32")>;
2000 // SVE Predicate instructions
2001 // -----------------------------------------------------------------------------
2003 // Loop control, based on predicate
2004 def : InstRW<[V2Write_2or3cyc_1M], (instrs BRKA_PPmP, BRKA_PPzP,
2005 BRKB_PPmP, BRKB_PPzP)>;
2007 // Loop control, based on predicate and flag setting
2008 def : InstRW<[V2Write_3or4cyc_2M], (instrs BRKAS_PPzP, BRKBS_PPzP)>;
2010 // Loop control, propagating
2011 def : InstRW<[V2Write_2or3cyc_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP,
2014 // Loop control, propagating and flag setting
2015 def : InstRW<[V2Write_3or4cyc_1M0_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP,
2018 // Loop control, based on GPR
2019 def : InstRW<[V2Write_3cyc_2M],
2020 (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]")>;
2021 def : InstRW<[V2Write_3cyc_2M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]")>;
2024 def : InstRW<[V2Write_1cyc_2M], (instregex "^CTERM(EQ|NE)_(WW|XX)")>;
2026 // Predicate counting scalar
2027 def : InstRW<[V2Write_2cyc_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>;
2028 def : InstRW<[V2Write_2cyc_1M],
2029 (instregex "^(CNT|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI",
2030 "^SQ(DEC|INC)[BHWD]_XPiWdI",
2031 "^UQ(DEC|INC)[BHWD]_WPiI")>;
2033 // Predicate counting scalar, ALL, {1,2,4}
2034 def : InstRW<[V2Write_IncDec], (instregex "^(DEC|INC)[BHWD]_XPiI")>;
2036 // Predicate counting scalar, active predicate
2037 def : InstRW<[V2Write_2cyc_1M],
2038 (instregex "^CNTP_XPP_[BHSD]",
2039 "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]",
2040 "^(UQDEC|UQINC)P_WP_[BHSD]",
2041 "^(SQDEC|SQINC)P_XPWd_[BHSD]")>;
2043 // Predicate counting vector, active predicate
2044 def : InstRW<[V2Write_7cyc_1M_1M0_1V],
2045 (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]")>;
2047 // Predicate logical
2048 def : InstRW<[V2Write_1or2cyc_1M0],
2049 (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP")>;
2051 // Predicate logical, flag setting
2052 def : InstRW<[V2Write_1or2cyc_1M0_1M],
2053 (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP")>;
2055 // Predicate reverse
2056 def : InstRW<[V2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]")>;
2059 def : InstRW<[V2Write_1cyc_1M0], (instrs SEL_PPPP)>;
2062 def : InstRW<[V2Write_2cyc_1M], (instregex "^PFALSE", "^PTRUE_[BHSD]")>;
2064 // Predicate set/initialize, set flags
2065 def : InstRW<[V2Write_3cyc_2M], (instregex "^PTRUES_[BHSD]")>;
2067 // Predicate find first/next
2068 def : InstRW<[V2Write_2cyc_1M], (instregex "^PFIRST_B", "^PNEXT_[BHSD]")>;
2071 def : InstRW<[V2Write_1cyc_1M], (instrs PTEST_PP)>;
2073 // Predicate transpose
2074 def : InstRW<[V2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSD]")>;
2076 // Predicate unpack and widen
2077 def : InstRW<[V2Write_2cyc_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>;
2079 // Predicate zip/unzip
2080 def : InstRW<[V2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSD]")>;
2082 // SVE integer instructions
2083 // -----------------------------------------------------------------------------
2085 // Arithmetic, absolute diff
2086 def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]",
2087 "^[SU]ABD_ZPZZ_[BHSD]")>;
2089 // Arithmetic, absolute diff accum
2090 def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABA_ZZZ_[BHSD]")>;
2092 // Arithmetic, absolute diff accum long
2093 def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]")>;
2095 // Arithmetic, absolute diff long
2096 def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]")>;
2098 // Arithmetic, basic
2099 def : InstRW<[V2Write_2cyc_1V],
2100 (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
2101 "^(ADD|SUB)_ZZZ_[BHSD]",
2102 "^(ADD|SUB|SUBR)_ZPZZ_[BHSD]",
2103 "^(ADD|SUB|SUBR)_ZI_[BHSD]",
2104 "^ADR_[SU]XTW_ZZZ_D_[0123]",
2105 "^ADR_LSL_ZZZ_[SD]_[0123]",
2106 "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]",
2107 "^SADDLBT_ZZZ_[HSD]",
2108 "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]",
2109 "^SSUBL(BT|TB)_ZZZ_[HSD]")>;
2111 // Arithmetic, complex
2112 def : InstRW<[V2Write_2cyc_1V],
2113 (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]",
2114 "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
2115 "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]",
2116 "^[SU]Q(ADD|SUB)_ZI_[BHSD]",
2117 "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]",
2118 "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]")>;
2120 // Arithmetic, large integer
2121 def : InstRW<[V2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]")>;
2123 // Arithmetic, pairwise add
2124 def : InstRW<[V2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]")>;
2126 // Arithmetic, pairwise add and accum long
2127 def : InstRW<[V2Wr_ZPA, ReadDefault, V2Rd_ZPA],
2128 (instregex "^[SU]ADALP_ZPmZ_[HSD]")>;
2130 // Arithmetic, shift
2131 def : InstRW<[V2Write_2cyc_1V13],
2132 (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]",
2133 "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]",
2134 "^(ASR|LSL|LSR)_ZPmI_[BHSD]",
2135 "^(ASR|LSL|LSR)_ZPmZ_[BHSD]",
2136 "^(ASR|LSL|LSR)_ZZI_[BHSD]",
2137 "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]",
2138 "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>;
2140 // Arithmetic, shift and accumulate
2141 def : InstRW<[V2Wr_ZSA, V2Rd_ZSA], (instregex "^[SU]R?SRA_ZZI_[BHSD]")>;
2143 // Arithmetic, shift by immediate
2144 def : InstRW<[V2Write_2cyc_1V13], (instregex "^SHRN[BT]_ZZI_[BHS]",
2145 "^[SU]SHLL[BT]_ZZI_[HSD]")>;
2147 // Arithmetic, shift by immediate and insert
2148 def : InstRW<[V2Write_2cyc_1V13], (instregex "^(SLI|SRI)_ZZI_[BHSD]")>;
2150 // Arithmetic, shift complex
2151 def : InstRW<[V2Write_4cyc_1V13],
2152 (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]",
2153 "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]",
2154 "^[SU]QR?SHL_ZPZZ_[BHSD]",
2155 "^(SQSHL|SQSHLU|UQSHL)_(ZPmI|ZPZI)_[BHSD]",
2156 "^SQSHRU?N[BT]_ZZI_[BHS]",
2157 "^UQR?SHRN[BT]_ZZI_[BHS]")>;
2159 // Arithmetic, shift right for divide
2160 def : InstRW<[V2Write_4cyc_1V13], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>;
2162 // Arithmetic, shift rounding
2163 def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]RSHLR?_ZPmZ_[BHSD]",
2164 "^[SU]RSHL_ZPZZ_[BHSD]",
2165 "^[SU]RSHR_(ZPmI|ZPZI)_[BHSD]")>;
2168 def : InstRW<[V2Write_6cyc_2V1], (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]")>;
2171 def : InstRW<[V2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ")>;
2173 // Count/reverse bits
2174 def : InstRW<[V2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>;
2176 // Broadcast logical bitmask immediate to vector
2177 def : InstRW<[V2Write_2cyc_1V], (instrs DUPM_ZI)>;
2179 // Compare and set flags
2180 def : InstRW<[V2Write_4or5cyc_1V0_1M0],
2181 (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]",
2182 "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]")>;
2185 def : InstRW<[V2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]")>;
2187 // Complex dot product 8-bit element
2188 def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>;
2190 // Complex dot product 16-bit element
2191 def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>;
2193 // Complex multiply-add B, H, S element size
2194 def : InstRW<[V2Wr_ZCMABHS, V2Rd_ZCMABHS], (instregex "^CMLA_ZZZ_[BHS]",
2195 "^CMLA_ZZZI_[HS]")>;
2197 // Complex multiply-add D element size
2198 def : InstRW<[V2Wr_ZCMAD, V2Rd_ZCMAD], (instrs CMLA_ZZZ_D)>;
2200 // Conditional extract operations, scalar form
2201 def : InstRW<[V2Write_8cyc_1M0_1V01], (instregex "^CLAST[AB]_RPZ_[BHSD]")>;
2203 // Conditional extract operations, SIMD&FP scalar and vector forms
2204 def : InstRW<[V2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]",
2205 "^COMPACT_ZPZ_[SD]",
2206 "^SPLICE_ZPZZ?_[BHSD]")>;
2208 // Convert to floating point, 64b to float or convert to double
2209 def : InstRW<[V2Write_3cyc_1V02], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]",
2210 "^[SU]CVTF_ZPmZ_StoD")>;
2212 // Convert to floating point, 32b to single or half
2213 def : InstRW<[V2Write_4cyc_2V02], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>;
2215 // Convert to floating point, 16b to half
2216 def : InstRW<[V2Write_6cyc_4V02], (instregex "^[SU]CVTF_ZPmZ_HtoH")>;
2219 def : InstRW<[V2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]")>;
2221 // Copy, scalar SIMD&FP or imm
2222 def : InstRW<[V2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]",
2223 "^CPY_ZPzI_[BHSD]")>;
2226 def : InstRW<[V2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S",
2227 "^[SU]DIV_ZPZZ_S")>;
2230 def : InstRW<[V2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D",
2231 "^[SU]DIV_ZPZZ_D")>;
2233 // Dot product, 8 bit
2234 def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_S")>;
2236 // Dot product, 8 bit, using signed and unsigned integers
2237 def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;
2239 // Dot product, 16 bit
2240 def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_D")>;
2242 // Duplicate, immediate and indexed form
2243 def : InstRW<[V2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]",
2244 "^DUP_ZZI_[BHSDQ]")>;
2246 // Duplicate, scalar form
2247 def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]")>;
2249 // Extend, sign or zero
2250 def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU]XTB_ZPmZ_[HSD]",
2251 "^[SU]XTH_ZPmZ_[SD]",
2252 "^[SU]XTW_ZPmZ_[D]")>;
2255 def : InstRW<[V2Write_2cyc_1V], (instrs EXT_ZZI, EXT_ZZI_B)>;
2257 // Extract narrow saturating
2258 def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]",
2259 "^SQXTUN[BT]_ZZ_[BHS]")>;
2261 // Extract/insert operation, SIMD and FP scalar form
2262 def : InstRW<[V2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]",
2263 "^INSR_ZV_[BHSD]")>;
2265 // Extract/insert operation, scalar
2266 def : InstRW<[V2Write_6cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]",
2267 "^INSR_ZR_[BHSD]")>;
2269 // Histogram operations
2270 def : InstRW<[V2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]",
2273 // Horizontal operations, B, H, S form, immediate operands only
2274 def : InstRW<[V2Write_4cyc_1V02], (instregex "^INDEX_II_[BHS]")>;
2276 // Horizontal operations, B, H, S form, scalar, immediate operands/ scalar
2277 // operands only / immediate, scalar operands
2278 def : InstRW<[V2Write_7cyc_1M0_1V02], (instregex "^INDEX_(IR|RI|RR)_[BHS]")>;
2280 // Horizontal operations, D form, immediate operands only
2281 def : InstRW<[V2Write_5cyc_2V02], (instrs INDEX_II_D)>;
2283 // Horizontal operations, D form, scalar, immediate operands)/ scalar operands
2284 // only / immediate, scalar operands
2285 def : InstRW<[V2Write_8cyc_2M0_2V02], (instregex "^INDEX_(IR|RI|RR)_D")>;
2288 def : InstRW<[V2Write_2cyc_1V],
2289 (instregex "^(AND|EOR|ORR)_ZI",
2290 "^(AND|BIC|EOR|ORR)_ZZZ",
2291 "^EOR(BT|TB)_ZZZ_[BHSD]",
2292 "^(AND|BIC|EOR|NOT|ORR)_(ZPmZ|ZPZZ)_[BHSD]",
2293 "^NOT_ZPmZ_[BHSD]")>;
2295 // Max/min, basic and pairwise
2296 def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]",
2297 "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]",
2298 "^[SU](MAX|MIN)_ZPZZ_[BHSD]")>;
2300 // Matching operations
2301 // FIXME: SOG p. 44, n. 5: If the consuming instruction has a flag source, the
2302 // latency for this instruction is 4 cycles.
2303 def : InstRW<[V2Write_2or3cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]")>;
2305 // Matrix multiply-accumulate
2306 def : InstRW<[V2Wr_ZMMA, V2Rd_ZMMA], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
2309 def : InstRW<[V2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]",
2312 // Multiply, B, H, S element size
2313 def : InstRW<[V2Write_4cyc_1V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]",
2315 "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]",
2316 "^[SU]MULH_ZPZZ_[BHS]")>;
2318 // Multiply, D element size
2319 def : InstRW<[V2Write_5cyc_2V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D",
2321 "^[SU]MULH_(ZPmZ|ZZZ)_D",
2322 "^[SU]MULH_ZPZZ_D")>;
2325 def : InstRW<[V2Write_4cyc_1V02], (instregex "^[SU]MULL[BT]_ZZZI_[SD]",
2326 "^[SU]MULL[BT]_ZZZ_[HSD]")>;
2328 // Multiply accumulate, B, H, S element size
2329 def : InstRW<[V2Wr_ZMABHS, V2Rd_ZMABHS],
2330 (instregex "^ML[AS]_ZZZI_[HS]", "^ML[AS]_ZPZZZ_[BHS]")>;
2331 def : InstRW<[V2Wr_ZMABHS, ReadDefault, V2Rd_ZMABHS],
2332 (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>;
2334 // Multiply accumulate, D element size
2335 def : InstRW<[V2Wr_ZMAD, V2Rd_ZMAD],
2336 (instregex "^ML[AS]_ZZZI_D", "^ML[AS]_ZPZZZ_D")>;
2337 def : InstRW<[V2Wr_ZMAD, ReadDefault, V2Rd_ZMAD],
2338 (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_D")>;
2340 // Multiply accumulate long
2341 def : InstRW<[V2Wr_ZMAL, V2Rd_ZMAL], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]",
2342 "^[SU]ML[AS]L[BT]_ZZZI_[SD]")>;
2344 // Multiply accumulate saturating doubling long regular
2345 def : InstRW<[V2Wr_ZMASQL, V2Rd_ZMASQ],
2346 (instregex "^SQDML[AS]L(B|T|BT)_ZZZ_[HSD]",
2347 "^SQDML[AS]L[BT]_ZZZI_[SD]")>;
2349 // Multiply saturating doubling high, B, H, S element size
2350 def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULH_ZZZ_[BHS]",
2351 "^SQDMULH_ZZZI_[HS]")>;
2353 // Multiply saturating doubling high, D element size
2354 def : InstRW<[V2Write_5cyc_2V02], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>;
2356 // Multiply saturating doubling long
2357 def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULL[BT]_ZZZ_[HSD]",
2358 "^SQDMULL[BT]_ZZZI_[SD]")>;
2360 // Multiply saturating rounding doubling regular/complex accumulate, B, H, S
2362 def : InstRW<[V2Wr_ZMASQBHS, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZ_[BHS]",
2363 "^SQRDCMLAH_ZZZ_[BHS]",
2364 "^SQRDML[AS]H_ZZZI_[HS]",
2365 "^SQRDCMLAH_ZZZI_[HS]")>;
2367 // Multiply saturating rounding doubling regular/complex accumulate, D element
2369 def : InstRW<[V2Wr_ZMASQD, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZI?_D",
2370 "^SQRDCMLAH_ZZZ_D")>;
2372 // Multiply saturating rounding doubling regular/complex, B, H, S element size
2373 def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQRDMULH_ZZZ_[BHS]",
2374 "^SQRDMULH_ZZZI_[HS]")>;
2376 // Multiply saturating rounding doubling regular/complex, D element size
2377 def : InstRW<[V2Write_5cyc_2V02], (instregex "^SQRDMULH_ZZZI?_D")>;
2379 // Multiply/multiply long, (8x8) polynomial
2380 def : InstRW<[V2Write_2cyc_1V23], (instregex "^PMUL_ZZZ_B",
2381 "^PMULL[BT]_ZZZ_[HDQ]")>;
2383 // Predicate counting vector
2384 def : InstRW<[V2Write_2cyc_1V], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI")>;
2386 // Reciprocal estimate
2387 def : InstRW<[V2Write_4cyc_2V02], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>;
2389 // Reduction, arithmetic, B form
2390 def : InstRW<[V2Write_9cyc_2V_4V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
2392 // Reduction, arithmetic, H form
2393 def : InstRW<[V2Write_8cyc_2V_2V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>;
2395 // Reduction, arithmetic, S form
2396 def : InstRW<[V2Write_6cyc_2V_2V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>;
2398 // Reduction, arithmetic, D form
2399 def : InstRW<[V2Write_4cyc_2V], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
2401 // Reduction, logical
2402 def : InstRW<[V2Write_6cyc_1V_1V13], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]")>;
2405 def : InstRW<[V2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]",
2410 // Select, vector form
2411 def : InstRW<[V2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]")>;
2414 def : InstRW<[V2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]")>;
2416 // Table lookup extension
2417 def : InstRW<[V2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]")>;
2419 // Transpose, vector form
2420 def : InstRW<[V2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]")>;
2422 // Unpack and extend
2423 def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]")>;
2426 def : InstRW<[V2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]")>;
2428 // SVE floating-point instructions
2429 // -----------------------------------------------------------------------------
2431 // Floating point absolute value/difference
2432 def : InstRW<[V2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]",
2434 "^FABS_ZPmZ_[HSD]")>;
2436 // Floating point arithmetic
2437 def : InstRW<[V2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]",
2438 "^F(ADD|SUB)_ZPZ[IZ]_[HSD]",
2439 "^FADDP_ZPmZZ_[HSD]",
2441 "^FSUBR_ZPm[IZ]_[HSD]",
2442 "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>;
2444 // Floating point associative add, F16
2445 def : InstRW<[V2Write_10cyc_1V1_9rc], (instrs FADDA_VPZ_H)>;
2447 // Floating point associative add, F32
2448 def : InstRW<[V2Write_6cyc_1V1_5rc], (instrs FADDA_VPZ_S)>;
2450 // Floating point associative add, F64
2451 def : InstRW<[V2Write_4cyc_1V], (instrs FADDA_VPZ_D)>;
2453 // Floating point compare
2454 def : InstRW<[V2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]",
2455 "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]",
2456 "^FCM(LE|LT)_PPzZ0_[HSD]",
2457 "^FCMUO_PPzZZ_[HSD]")>;
2459 // Floating point complex add
2460 def : InstRW<[V2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]")>;
2462 // Floating point complex multiply add
2463 def : InstRW<[V2Wr_ZFCMA, ReadDefault, V2Rd_ZFCMA], (instregex "^FCMLA_ZPmZZ_[HSD]")>;
2464 def : InstRW<[V2Wr_ZFCMA, V2Rd_ZFCMA], (instregex "^FCMLA_ZZZI_[HS]")>;
2466 // Floating point convert, long or narrow (F16 to F32 or F32 to F16)
2467 def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVT_ZPmZ_(HtoS|StoH)",
2468 "^FCVTLT_ZPmZ_HtoS",
2469 "^FCVTNT_ZPmZ_StoH")>;
2471 // Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32
2473 def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)",
2474 "^FCVTLT_ZPmZ_StoD",
2475 "^FCVTNT_ZPmZ_DtoS")>;
2477 // Floating point convert, round to odd
2478 def : InstRW<[V2Write_3cyc_1V02], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>;
2480 // Floating point base2 log, F16
2481 def : InstRW<[V2Write_6cyc_4V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_H")>;
2483 // Floating point base2 log, F32
2484 def : InstRW<[V2Write_4cyc_2V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_S")>;
2486 // Floating point base2 log, F64
2487 def : InstRW<[V2Write_3cyc_1V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_D")>;
2489 // Floating point convert to integer, F16
2490 def : InstRW<[V2Write_6cyc_4V02], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>;
2492 // Floating point convert to integer, F32
2493 def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>;
2495 // Floating point convert to integer, F64
2496 def : InstRW<[V2Write_3cyc_1V02],
2497 (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>;
2499 // Floating point copy
2500 def : InstRW<[V2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]",
2503 // Floating point divide, F16
2504 def : InstRW<[V2Write_13cyc_1V02_12rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>;
2506 // Floating point divide, F32
2507 def : InstRW<[V2Write_10cyc_1V02_9rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>;
2509 // Floating point divide, F64
2510 def : InstRW<[V2Write_15cyc_1V02_14rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>;
2512 // Floating point min/max pairwise
2513 def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>;
2515 // Floating point min/max
2516 def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]",
2517 "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>;
2519 // Floating point multiply
2520 def : InstRW<[V2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]",
2521 "^FMULX_ZPZZ_[HSD]",
2522 "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]",
2523 "^FMUL_ZPZ[IZ]_[HSD]")>;
2525 // Floating point multiply accumulate
2526 def : InstRW<[V2Wr_ZFMA, ReadDefault, V2Rd_ZFMA],
2527 (instregex "^FN?ML[AS]_ZPmZZ_[HSD]",
2528 "^FN?(MAD|MSB)_ZPmZZ_[HSD]")>;
2529 def : InstRW<[V2Wr_ZFMA, V2Rd_ZFMA],
2530 (instregex "^FML[AS]_ZZZI_[HSD]",
2531 "^FN?ML[AS]_ZPZZZ_[HSD]")>;
2533 // Floating point multiply add/sub accumulate long
2534 def : InstRW<[V2Wr_ZFMAL, V2Rd_ZFMAL], (instregex "^FML[AS]L[BT]_ZZZI?_SHH")>;
2536 // Floating point reciprocal estimate, F16
2537 def : InstRW<[V2Write_6cyc_4V02], (instregex "^FR(ECP|SQRT)E_ZZ_H", "^FRECPX_ZPmZ_H")>;
2539 // Floating point reciprocal estimate, F32
2540 def : InstRW<[V2Write_4cyc_2V02], (instregex "^FR(ECP|SQRT)E_ZZ_S", "^FRECPX_ZPmZ_S")>;
2542 // Floating point reciprocal estimate, F64
2543 def : InstRW<[V2Write_3cyc_1V02], (instregex "^FR(ECP|SQRT)E_ZZ_D", "^FRECPX_ZPmZ_D")>;
2545 // Floating point reciprocal step
2546 def : InstRW<[V2Write_4cyc_1V], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>;
2548 // Floating point reduction, F16
2549 def : InstRW<[V2Write_8cyc_4V],
2550 (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H")>;
2552 // Floating point reduction, F32
2553 def : InstRW<[V2Write_6cyc_3V],
2554 (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S")>;
2556 // Floating point reduction, F64
2557 def : InstRW<[V2Write_4cyc_2V],
2558 (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D")>;
2560 // Floating point round to integral, F16
2561 def : InstRW<[V2Write_6cyc_4V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>;
2563 // Floating point round to integral, F32
2564 def : InstRW<[V2Write_4cyc_2V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>;
2566 // Floating point round to integral, F64
2567 def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>;
2569 // Floating point square root, F16
2570 def : InstRW<[V2Write_13cyc_1V0_12rc], (instregex "^FSQRT_ZPmZ_H", "^FSQRT_ZPmZ_H")>;
2572 // Floating point square root, F32
2573 def : InstRW<[V2Write_10cyc_1V0_9rc], (instregex "^FSQRT_ZPmZ_S", "^FSQRT_ZPmZ_S")>;
2575 // Floating point square root, F64
2576 def : InstRW<[V2Write_16cyc_1V0_14rc], (instregex "^FSQRT_ZPmZ_D", "^FSQRT_ZPmZ_D")>;
2578 // Floating point trigonometric exponentiation
2579 def : InstRW<[V2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]")>;
2581 // Floating point trigonometric multiply add
2582 def : InstRW<[V2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]")>;
2584 // Floating point trigonometric, miscellaneous
2585 def : InstRW<[V2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]")>;
2587 // SVE BFloat16 (BF16) instructions
2588 // -----------------------------------------------------------------------------
2590 // Convert, F32 to BF16
2591 def : InstRW<[V2Write_4cyc_1V02], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
2594 def : InstRW<[V2Wr_ZBFDOT, V2Rd_ZBFDOT], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
2596 // Matrix multiply accumulate
2597 def : InstRW<[V2Wr_ZBFMMA, V2Rd_ZBFMMA], (instrs BFMMLA_ZZZ)>;
2599 // Multiply accumulate long
2600 def : InstRW<[V2Wr_ZBFMAL, V2Rd_ZBFMAL], (instregex "^BFMLAL[BT]_ZZZI?")>;
2602 // SVE Load instructions
2603 // -----------------------------------------------------------------------------
2606 def : InstRW<[V2Write_6cyc_1L], (instrs LDR_ZXI)>;
2609 def : InstRW<[V2Write_6cyc_1L_1M], (instrs LDR_PXI)>;
2611 // Contiguous load, scalar + imm
2612 def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1[BHWD]_IMM$",
2613 "^LD1S?B_[HSD]_IMM$",
2614 "^LD1S?H_[SD]_IMM$",
2615 "^LD1S?W_D_IMM$" )>;
2616 // Contiguous load, scalar + scalar
2617 def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1[BHWD]$",
2622 // Contiguous load broadcast, scalar + imm
2623 def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1R[BHWD]_IMM$",
2624 "^LD1RS?B_[HSD]_IMM$",
2625 "^LD1RS?H_[SD]_IMM$",
2628 "^LD1RQ_[BHWD]_IMM$")>;
2630 // Contiguous load broadcast, scalar + scalar
2631 def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1RQ_[BHWD]$")>;
2633 // Non temporal load, scalar + imm
2634 // Non temporal load, scalar + scalar
2635 def : InstRW<[V2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZR[IR]$")>;
2637 // Non temporal gather load, vector + scalar 32-bit element size
2638 def : InstRW<[V2Write_9cyc_2L_4V], (instregex "^LDNT1[BHW]_ZZR_S_REAL$",
2639 "^LDNT1S[BH]_ZZR_S_REAL$")>;
2641 // Non temporal gather load, vector + scalar 64-bit element size
2642 def : InstRW<[V2Write_9cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>;
2643 def : InstRW<[V2Write_9cyc_2L_2V1], (instrs LDNT1D_ZZR_D_REAL)>;
2645 // Contiguous first faulting load, scalar + scalar
2646 def : InstRW<[V2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]_REAL$",
2647 "^LDFF1S?B_[HSD]_REAL$",
2648 "^LDFF1S?H_[SD]_REAL$",
2649 "^LDFF1S?W_D_REAL$")>;
2651 // Contiguous non faulting load, scalar + imm
2652 def : InstRW<[V2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM_REAL$",
2653 "^LDNF1S?B_[HSD]_IMM_REAL$",
2654 "^LDNF1S?H_[SD]_IMM_REAL$",
2655 "^LDNF1S?W_D_IMM_REAL$")>;
2657 // Contiguous Load two structures to two vectors, scalar + imm
2658 def : InstRW<[V2Write_8cyc_2L_2V], (instregex "^LD2[BHWD]_IMM$")>;
2660 // Contiguous Load two structures to two vectors, scalar + scalar
2661 def : InstRW<[V2Write_9cyc_2L_2V_2S], (instregex "^LD2[BHWD]$")>;
2663 // Contiguous Load three structures to three vectors, scalar + imm
2664 def : InstRW<[V2Write_9cyc_3L_3V], (instregex "^LD3[BHWD]_IMM$")>;
2666 // Contiguous Load three structures to three vectors, scalar + scalar
2667 def : InstRW<[V2Write_10cyc_3V_3L_3S], (instregex "^LD3[BHWD]$")>;
2669 // Contiguous Load four structures to four vectors, scalar + imm
2670 def : InstRW<[V2Write_9cyc_4L_8V], (instregex "^LD4[BHWD]_IMM$")>;
2672 // Contiguous Load four structures to four vectors, scalar + scalar
2673 def : InstRW<[V2Write_10cyc_4L_8V_4S], (instregex "^LD4[BHWD]$")>;
2675 // Gather load, vector + imm, 32-bit element size
2676 def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$",
2677 "^GLD(FF)?1W_IMM_REAL$")>;
2679 // Gather load, vector + imm, 64-bit element size
2680 def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$",
2681 "^GLD(FF)?1D_IMM_REAL$")>;
2683 // Gather load, 32-bit scaled offset
2684 def : InstRW<[V2Write_10cyc_1L_8V],
2685 (instregex "^GLD(FF)?1S?H_S_[SU]XTW_SCALED_REAL$",
2686 "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>;
2688 // Gather load, 64-bit scaled offset
2689 // NOTE: These instructions are not specified in the SOG.
2690 def : InstRW<[V2Write_10cyc_1L_4V],
2691 (instregex "^GLD(FF)?1S?[HW]_D_([SU]XTW_)?SCALED_REAL$",
2692 "^GLD(FF)?1D_([SU]XTW_)?SCALED_REAL$")>;
2694 // Gather load, 32-bit unpacked unscaled offset
2695 def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$",
2696 "^GLD(FF)?1W_[SU]XTW_REAL$")>;
2698 // Gather load, 64-bit unpacked unscaled offset
2699 // NOTE: These instructions are not specified in the SOG.
2700 def : InstRW<[V2Write_9cyc_1L_2V],
2701 (instregex "^GLD(FF)?1S?[BHW]_D_([SU]XTW_)?REAL$",
2702 "^GLD(FF)?1D_([SU]XTW_)?REAL$")>;
2704 // SVE Store instructions
2705 // -----------------------------------------------------------------------------
2707 // Store from predicate reg
2708 def : InstRW<[V2Write_1cyc_1L01], (instrs STR_PXI)>;
2710 // Store from vector reg
2711 def : InstRW<[V2Write_2cyc_1L01_1V01], (instrs STR_ZXI)>;
2713 // Contiguous store, scalar + imm
2714 def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^ST1[BHWD]_IMM$",
2719 // Contiguous store, scalar + scalar
2720 def : InstRW<[V2Write_2cyc_1L01_1S_1V01], (instregex "^ST1H(_[SD])?$")>;
2721 def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^ST1[BWD]$",
2725 // Contiguous store two structures from two vectors, scalar + imm
2726 def : InstRW<[V2Write_4cyc_1L01_1V01], (instregex "^ST2[BHWD]_IMM$")>;
2728 // Contiguous store two structures from two vectors, scalar + scalar
2729 def : InstRW<[V2Write_4cyc_2L01_2S_2V01], (instrs ST2H)>;
2730 def : InstRW<[V2Write_4cyc_2L01_2V01], (instregex "^ST2[BWD]$")>;
2732 // Contiguous store three structures from three vectors, scalar + imm
2733 def : InstRW<[V2Write_7cyc_9L01_9V01], (instregex "^ST3[BHWD]_IMM$")>;
2735 // Contiguous store three structures from three vectors, scalar + scalar
2736 def : InstRW<[V2Write_7cyc_9L01_9S_9V01], (instregex "^ST3[BHWD]$")>;
2738 // Contiguous store four structures from four vectors, scalar + imm
2739 def : InstRW<[V2Write_11cyc_18L01_18V01], (instregex "^ST4[BHWD]_IMM$")>;
2741 // Contiguous store four structures from four vectors, scalar + scalar
2742 def : InstRW<[V2Write_11cyc_18L01_18S_18V01], (instregex "^ST4[BHWD]$")>;
2744 // Non temporal store, scalar + imm
2745 def : InstRW<[V2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>;
2747 // Non temporal store, scalar + scalar
2748 def : InstRW<[V2Write_2cyc_1L01_1S_1V], (instrs STNT1H_ZRR)>;
2749 def : InstRW<[V2Write_2cyc_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>;
2751 // Scatter non temporal store, vector + scalar 32-bit element size
2752 def : InstRW<[V2Write_4cyc_4L01_4V01], (instregex "^STNT1[BHW]_ZZR_S")>;
2754 // Scatter non temporal store, vector + scalar 64-bit element size
2755 def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^STNT1[BHWD]_ZZR_D")>;
2757 // Scatter store vector + imm 32-bit element size
2758 def : InstRW<[V2Write_4cyc_4L01_4V01], (instregex "^SST1[BH]_S_IMM$",
2761 // Scatter store vector + imm 64-bit element size
2762 def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D_IMM$",
2765 // Scatter store, 32-bit scaled offset
2766 def : InstRW<[V2Write_4cyc_4L01_4V01],
2767 (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>;
2769 // Scatter store, 32-bit unpacked unscaled offset
2770 def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D_[SU]XTW$",
2771 "^SST1D_[SU]XTW$")>;
2773 // Scatter store, 32-bit unpacked scaled offset
2774 def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$",
2775 "^SST1D_[SU]XTW_SCALED$")>;
2777 // Scatter store, 32-bit unscaled offset
2778 def : InstRW<[V2Write_4cyc_4L01_4V01], (instregex "^SST1[BH]_S_[SU]XTW$",
2779 "^SST1W_[SU]XTW$")>;
2781 // Scatter store, 64-bit scaled offset
2782 def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[HW]_D_SCALED$",
2785 // Scatter store, 64-bit unscaled offset
2786 def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D$",
2789 // SVE Miscellaneous instructions
2790 // -----------------------------------------------------------------------------
2792 // Read first fault register, unpredicated
2793 def : InstRW<[V2Write_2cyc_1M0], (instrs RDFFR_P_REAL)>;
2795 // Read first fault register, predicated
2796 def : InstRW<[V2Write_3or4cyc_1M0_1M], (instrs RDFFR_PPz_REAL)>;
2798 // Read first fault register and set flags
2799 def : InstRW<[V2Write_4or5cyc_2M0_2M], (instrs RDFFRS_PPz)>;
2801 // Set first fault register
2802 // Write to first fault register
2803 def : InstRW<[V2Write_2cyc_1M0], (instrs SETFFR, WRFFR)>;
2806 // NOTE: This is not specified in the SOG.
2807 def : InstRW<[V2Write_4cyc_1L], (instregex "^PRF[BHWD]")>;
2809 // SVE Cryptographic instructions
2810 // -----------------------------------------------------------------------------
2813 def : InstRW<[V2Write_2cyc_1V], (instregex "^AES[DE]_ZZZ_B$",
2817 def : InstRW<[V2Write_2cyc_1V0], (instregex "^(BCAX|EOR3)_ZZZZ$",
2819 "^XAR_ZZZI_[BHSD]$")>;
2822 def : InstRW<[V2Write_4cyc_1V0], (instregex "^SM4E(KEY)?_ZZZ_S$")>;