[AArch64] Fix movk parsing with an .equ operand (#124428)
[llvm-project.git] / llvm / lib / Target / AArch64 / AArch64SchedNeoverseV2.td
blob39f7077ae45141259b265d666dbf6cbaf4364641
1 //=- AArch64SchedNeoverseV2.td - NeoverseV2 Scheduling Defs --*- tablegen -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the scheduling model for the Arm Neoverse V2 processors.
10 // All information is taken from the V2 Software Optimisation guide:
12 // https://developer.arm.com/documentation/PJDOC-466751330-593177/r0p2
14 //===----------------------------------------------------------------------===//
16 def NeoverseV2Model : SchedMachineModel {
17   let IssueWidth            =  16; // Micro-ops dispatched at a time.
18   let MicroOpBufferSize     = 320; // Entries in micro-op re-order buffer.
19   let LoadLatency           =   4; // Optimistic load latency.
20   let MispredictPenalty     =  10; // Extra cycles for mispredicted branch.  NOTE: Copied from N2.
21   let LoopMicroOpBufferSize =  16; // NOTE: Copied from Cortex-A57.
22   let CompleteModel         =   1;
24   list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
25                                                     [HasSVE2p1, HasSVEB16B16,
26                                                      HasCPA, HasCSSC]);
29 //===----------------------------------------------------------------------===//
30 // Define each kind of processor resource and number available on Neoverse V2.
31 // Instructions are first fetched and then decoded into internal macro-ops
32 // (MOPs). From there, the MOPs proceed through register renaming and dispatch
33 // stages. A MOP can be split into two micro-ops further down the pipeline
34 // after the decode stage. Once dispatched, micro-ops wait for their operands
35 // and issue out-of-order to one of seventeen issue pipelines. Each issue
36 // pipeline can accept one micro-op per cycle.
38 let SchedModel = NeoverseV2Model in {
40 // Define the (17) issue ports.
41 def V2UnitB   : ProcResource<2>;  // Branch 0/1
42 def V2UnitS0  : ProcResource<1>;  // Integer single-cycle 0
43 def V2UnitS1  : ProcResource<1>;  // Integer single-cycle 1
44 def V2UnitS2  : ProcResource<1>;  // Integer single-cycle 2
45 def V2UnitS3  : ProcResource<1>;  // Integer single-cycle 3
46 def V2UnitM0  : ProcResource<1>;  // Integer single/multicycle 0
47 def V2UnitM1  : ProcResource<1>;  // Integer single/multicycle 1
48 def V2UnitV0  : ProcResource<1>;  // FP/ASIMD 0
49 def V2UnitV1  : ProcResource<1>;  // FP/ASIMD 1
50 def V2UnitV2  : ProcResource<1>;  // FP/ASIMD 2
51 def V2UnitV3  : ProcResource<1>;  // FP/ASIMD 3
52 def V2UnitL01 : ProcResource<2>;  // Load/Store 0/1
53 def V2UnitL2  : ProcResource<1>;  // Load 2
54 def V2UnitD   : ProcResource<2>;  // Store data 0/1
55 def V2UnitFlg : ProcResource<3>;  // Flags
57 def V2UnitR   : ProcResGroup<[V2UnitS0, V2UnitS1]>;  // Integer single-cycle 0/1
58 def V2UnitS   : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitS2, V2UnitS3]>;  // Integer single-cycle 0/1/2/3
59 def V2UnitF   : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitM0, V2UnitM1]>;  // Integer single-cycle 0/1 and single/multicycle 0/1
60 def V2UnitI   : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitS2, V2UnitS3, V2UnitM0, V2UnitM1]>;  // Integer single-cycle 0/1/2/3 and single/multicycle 0/1
61 def V2UnitM   : ProcResGroup<[V2UnitM0, V2UnitM1]>;  // Integer single/multicycle 0/1
62 def V2UnitL   : ProcResGroup<[V2UnitL01, V2UnitL2]>; // Load/Store 0/1 and Load 2
63 def V2UnitV   : ProcResGroup<[V2UnitV0, V2UnitV1, V2UnitV2, V2UnitV3]>;  // FP/ASIMD 0/1/2/3
64 def V2UnitV01 : ProcResGroup<[V2UnitV0, V2UnitV1]>;  // FP/ASIMD 0/1
65 def V2UnitV02 : ProcResGroup<[V2UnitV0, V2UnitV2]>;  // FP/ASIMD 0/2
66 def V2UnitV13 : ProcResGroup<[V2UnitV1, V2UnitV3]>;  // FP/ASIMD 1/3
67 def V2UnitV23 : ProcResGroup<[V2UnitV2, V2UnitV3]>;  // FP/ASIMD 2/3
69 // Define commonly used read types.
71 // No forwarding is provided for these types.
72 def : ReadAdvance<ReadI,       0>;
73 def : ReadAdvance<ReadISReg,   0>;
74 def : ReadAdvance<ReadIEReg,   0>;
75 def : ReadAdvance<ReadIM,      0>;
76 def : ReadAdvance<ReadIMA,     0>;
77 def : ReadAdvance<ReadID,      0>;
78 def : ReadAdvance<ReadExtrHi,  0>;
79 def : ReadAdvance<ReadAdrBase, 0>;
80 def : ReadAdvance<ReadST,      0>;
81 def : ReadAdvance<ReadVLD,     0>;
83 // NOTE: Copied from N2.
84 def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
85 def : WriteRes<WriteBarrier, []> { let Latency = 1; }
86 def : WriteRes<WriteHint,    []> { let Latency = 1; }
87 def : WriteRes<WriteLDHi,    []> { let Latency = 4; }
89 //===----------------------------------------------------------------------===//
90 // Define customized scheduler read/write types specific to the Neoverse V2.
92 //===----------------------------------------------------------------------===//
94 // Define generic 0 micro-op types
95 def V2Write_0c : SchedWriteRes<[]> { let Latency = 0; }
97 // Define generic 1 micro-op types
99 def V2Write_1c_1B    : SchedWriteRes<[V2UnitB]>   { let Latency = 1; }
100 def V2Write_1c_1F    : SchedWriteRes<[V2UnitF]>   { let Latency = 1; }
101 def V2Write_1c_1F_1Flg : SchedWriteRes<[V2UnitF, V2UnitFlg]>   { let Latency = 1; }
102 def V2Write_1c_1I    : SchedWriteRes<[V2UnitI]>   { let Latency = 1; }
103 def V2Write_1c_1M    : SchedWriteRes<[V2UnitM]>   { let Latency = 1; }
104 def V2Write_1c_1M0   : SchedWriteRes<[V2UnitM0]>  { let Latency = 1; }
105 def V2Write_1c_1L01  : SchedWriteRes<[V2UnitL01]> { let Latency = 1; }
106 def V2Write_2c_1M    : SchedWriteRes<[V2UnitM]>   { let Latency = 2; }
107 def V2Write_2c_1M_1Flg : SchedWriteRes<[V2UnitM, V2UnitFlg]>   { let Latency = 2; }
108 def V2Write_3c_1M    : SchedWriteRes<[V2UnitM]>   { let Latency = 3; }
109 def V2Write_2c_1M0   : SchedWriteRes<[V2UnitM0]>  { let Latency = 2; }
110 def V2Write_3c_1M0   : SchedWriteRes<[V2UnitM0]>  { let Latency = 3; }
111 def V2Write_5c_1M0   : SchedWriteRes<[V2UnitM0]>  { let Latency = 5; }
112 def V2Write_12c_1M0  : SchedWriteRes<[V2UnitM0]>  { let Latency = 12;
113                                                     let ReleaseAtCycles = [12]; }
114 def V2Write_20c_1M0  : SchedWriteRes<[V2UnitM0]>  { let Latency = 20;
115                                                     let ReleaseAtCycles = [20]; }
116 def V2Write_4c_1L    : SchedWriteRes<[V2UnitL]>   { let Latency = 4; }
117 def V2Write_6c_1L    : SchedWriteRes<[V2UnitL]>   { let Latency = 6; }
118 def V2Write_2c_1V    : SchedWriteRes<[V2UnitV]>   { let Latency = 2; }
119 def V2Write_2c_1V0   : SchedWriteRes<[V2UnitV0]>  { let Latency = 2; }
120 def V2Write_2c_1V01  : SchedWriteRes<[V2UnitV01]> { let Latency = 2; }
121 def V2Write_2c_1V23  : SchedWriteRes<[V2UnitV23]> { let Latency = 2; }
122 def V2Write_3c_1V    : SchedWriteRes<[V2UnitV]>   { let Latency = 3; }
123 def V2Write_3c_1V01  : SchedWriteRes<[V2UnitV01]> { let Latency = 3;
124                                                     let ReleaseAtCycles = [2]; }
125 def V2Write_3c_1V23  : SchedWriteRes<[V2UnitV23]> { let Latency = 3; }
126 def V2Write_4c_1V    : SchedWriteRes<[V2UnitV]>   { let Latency = 4; }
127 def V2Write_5c_1V    : SchedWriteRes<[V2UnitV]>   { let Latency = 5; }
128 def V2Write_6c_1V    : SchedWriteRes<[V2UnitV]>   { let Latency = 6; }
129 def V2Write_12c_1V   : SchedWriteRes<[V2UnitV]>   { let Latency = 12; }
130 def V2Write_3c_1V0   : SchedWriteRes<[V2UnitV0]>  { let Latency = 3; }
131 def V2Write_3c_1V02  : SchedWriteRes<[V2UnitV02]> { let Latency = 3; }
132 def V2Write_4c_1V0   : SchedWriteRes<[V2UnitV0]>  { let Latency = 4; }
133 def V2Write_4c_1V02  : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
134 def V2Write_7c_1V0   : SchedWriteRes<[V2UnitV0]>  { let Latency = 7;
135                                                     let ReleaseAtCycles = [7]; }
136 def V2Write_7c_1V02  : SchedWriteRes<[V2UnitV02]> { let Latency = 7;
137                                                     let ReleaseAtCycles = [2]; }
138 def V2Write_9c_1V0   : SchedWriteRes<[V2UnitV0]>  { let Latency = 9; }
139 def V2Write_9c_1V02  : SchedWriteRes<[V2UnitV02]> { let Latency = 9;
140                                                     let ReleaseAtCycles = [2]; }
141 def V2Write_10c_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 10; }
142 def V2Write_10c_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 10;
143                                                     let ReleaseAtCycles = [2]; }
144 def V2Write_12c_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 12;
145                                                     let ReleaseAtCycles = [11]; }
146 def V2Write_13c_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 13; }
147 def V2Write_15c_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 15; }
148 def V2Write_15c_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 15;
149                                                     let ReleaseAtCycles = [8]; }
150 def V2Write_16c_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 16; }
151 def V2Write_16c_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 16;
152                                                     let ReleaseAtCycles = [8]; }
153 def V2Write_20c_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 20;
154                                                     let ReleaseAtCycles = [20]; }
155 def V2Write_2c_1V1   : SchedWriteRes<[V2UnitV1]>  { let Latency = 2; }
156 def V2Write_2c_1V13  : SchedWriteRes<[V2UnitV13]> { let Latency = 2; }
157 def V2Write_3c_1V1   : SchedWriteRes<[V2UnitV1]>  { let Latency = 3; }
158 def V2Write_4c_1V1   : SchedWriteRes<[V2UnitV1]>  { let Latency = 4; }
159 def V2Write_4c_1V13  : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
160 def V2Write_6c_1V1   : SchedWriteRes<[V2UnitV1]>  { let Latency = 6; }
161 def V2Write_10c_1V1  : SchedWriteRes<[V2UnitV1]>  { let Latency = 10; }
162 def V2Write_6c_1L01  : SchedWriteRes<[V2UnitL01]> { let Latency = 6; }
164 //===----------------------------------------------------------------------===//
165 // Define generic 2 micro-op types
167 def V2Write_1c_1B_1R : SchedWriteRes<[V2UnitB, V2UnitR]> {
168   let Latency     = 1;
169   let NumMicroOps = 2;
172 def V2Write_6c_1M0_1B : SchedWriteRes<[V2UnitM0, V2UnitB]> {
173   let Latency     = 6;
174   let NumMicroOps = 2;
177 def V2Write_9c_1M0_1L : SchedWriteRes<[V2UnitM0, V2UnitL]> {
178   let Latency     = 9;
179   let NumMicroOps = 2;
182 def V2Write_3c_1I_1M : SchedWriteRes<[V2UnitI, V2UnitM]> {
183   let Latency     = 3;
184   let NumMicroOps = 2;
187 def V2Write_1c_2M : SchedWriteRes<[V2UnitM, V2UnitM]> {
188   let Latency     = 1;
189   let NumMicroOps = 2;
192 def V2Write_3c_2M : SchedWriteRes<[V2UnitM, V2UnitM]> {
193   let Latency     = 3;
194   let NumMicroOps = 2;
197 def V2Write_4c_2M : SchedWriteRes<[V2UnitM, V2UnitM]> {
198   let Latency     = 4;
199   let NumMicroOps = 2;
202 def V2Write_5c_1L_1F : SchedWriteRes<[V2UnitL, V2UnitF]> {
203   let Latency     = 5;
204   let NumMicroOps = 2;
207 def V2Write_6c_1I_1L : SchedWriteRes<[V2UnitI, V2UnitL]> {
208   let Latency     = 6;
209   let NumMicroOps = 2;
212 def V2Write_7c_1F_1L : SchedWriteRes<[V2UnitF, V2UnitL]> {
213   let Latency     = 7;
214   let NumMicroOps = 2;
217 def V2Write_7c_1I_1L : SchedWriteRes<[V2UnitI, V2UnitL]> {
218   let Latency     = 7;
219   let NumMicroOps = 2;
222 def V2Write_1c_1L01_1D : SchedWriteRes<[V2UnitL01, V2UnitD]> {
223   let Latency     = 1;
224   let NumMicroOps = 2;
227 def V2Write_5c_1M0_1V : SchedWriteRes<[V2UnitM0, V2UnitV]> {
228   let Latency     = 5;
229   let NumMicroOps = 2;
232 def V2Write_2c_1L01_1V01 : SchedWriteRes<[V2UnitL01, V2UnitV01]> {
233   let Latency     = 2;
234   let NumMicroOps = 2;
237 def V2Write_2c_1L01_1V : SchedWriteRes<[V2UnitL01, V2UnitV]> {
238   let Latency     = 2;
239   let NumMicroOps = 2;
242 def V2Write_2c_2V01  : SchedWriteRes<[V2UnitV01, V2UnitV01]> {
243   let Latency = 2;
244   let NumMicroOps = 2;
247 def V2Write_4c_2V01  : SchedWriteRes<[V2UnitV01, V2UnitV01]> {
248   let Latency = 4;
249   let NumMicroOps = 2;
252 def V2Write_4c_1L01_1V01  : SchedWriteRes<[V2UnitL01, V2UnitV01]> {
253   let Latency = 4;
254   let NumMicroOps = 2;
257 def V2Write_4c_1V13_1V : SchedWriteRes<[V2UnitV13, V2UnitV]> {
258   let Latency     = 4;
259   let NumMicroOps = 2;
262 def V2Write_4c_2V0 : SchedWriteRes<[V2UnitV0, V2UnitV0]> {
263   let Latency     = 4;
264   let NumMicroOps = 2;
267 def V2Write_4c_2V02 : SchedWriteRes<[V2UnitV02, V2UnitV02]> {
268   let Latency     = 4;
269   let NumMicroOps = 2;
272 def V2Write_4c_2V : SchedWriteRes<[V2UnitV, V2UnitV]> {
273   let Latency     = 4;
274   let NumMicroOps = 2;
277 def V2Write_6c_2V : SchedWriteRes<[V2UnitV, V2UnitV]> {
278   let Latency     = 6;
279   let NumMicroOps = 2;
282 def V2Write_6c_2L : SchedWriteRes<[V2UnitL, V2UnitL]> {
283   let Latency     = 6;
284   let NumMicroOps = 2;
287 def V2Write_8c_1L_1V : SchedWriteRes<[V2UnitL, V2UnitV]> {
288   let Latency     = 8;
289   let NumMicroOps = 2;
292 def V2Write_4c_1L01_1V : SchedWriteRes<[V2UnitL01, V2UnitV]> {
293   let Latency     = 4;
294   let NumMicroOps = 2;
297 def V2Write_3c_1M0_1M  : SchedWriteRes<[V2UnitM0, V2UnitM]> {
298   let Latency     = 3;
299   let NumMicroOps = 2;
302 def V2Write_4c_1M0_1M  : SchedWriteRes<[V2UnitM0, V2UnitM]> {
303   let Latency     = 4;
304   let NumMicroOps = 2;
307 def V2Write_1c_1M0_1M  : SchedWriteRes<[V2UnitM0, V2UnitM]> {
308   let Latency     = 1;
309   let NumMicroOps = 2;
312 def V2Write_2c_1M0_1M  : SchedWriteRes<[V2UnitM0, V2UnitM]> {
313   let Latency     = 2;
314   let NumMicroOps = 2;
317 def V2Write_6c_2V1 : SchedWriteRes<[V2UnitV1, V2UnitV1]> {
318   let Latency     = 6;
319   let NumMicroOps = 2;
322 def V2Write_4c_1V0_1M0 : SchedWriteRes<[V2UnitV0, V2UnitM0]> {
323   let Latency     = 4;
324   let NumMicroOps = 2;
327 def V2Write_5c_1V0_1M0 : SchedWriteRes<[V2UnitV0, V2UnitM0]> {
328   let Latency     = 5;
329   let NumMicroOps = 2;
332 def V2Write_5c_2V0 : SchedWriteRes<[V2UnitV0, V2UnitV0]> {
333   let Latency     = 5;
334   let NumMicroOps = 2;
337 def V2Write_5c_2V02 : SchedWriteRes<[V2UnitV02, V2UnitV02]> {
338   let Latency     = 5;
339   let NumMicroOps = 2;
342 def V2Write_6c_1V1_1M0 : SchedWriteRes<[V2UnitV1, V2UnitM0]> {
343   let Latency     = 6;
344   let NumMicroOps = 2;
347 def V2Write_7c_1M0_1V02 : SchedWriteRes<[V2UnitM0, V2UnitV02]> {
348   let Latency     = 7;
349   let NumMicroOps = 2;
352 def V2Write_2c_1V0_1M : SchedWriteRes<[V2UnitV0, V2UnitM]> {
353   let Latency     = 2;
354   let NumMicroOps = 2;
357 def V2Write_3c_1V0_1M : SchedWriteRes<[V2UnitV0, V2UnitM]> {
358   let Latency     = 3;
359   let NumMicroOps = 2;
362 def V2Write_6c_1V_1V13 : SchedWriteRes<[V2UnitV, V2UnitV13]> {
363   let Latency     = 6;
364   let NumMicroOps = 2;
367 def V2Write_6c_1L_1M : SchedWriteRes<[V2UnitL, V2UnitM]> {
368   let Latency     = 6;
369   let NumMicroOps = 2;
372 def V2Write_6c_1L_1S : SchedWriteRes<[V2UnitL, V2UnitS]> {
373   let Latency     = 6;
374   let NumMicroOps = 2;
377 def V2Write_4c_2V13 : SchedWriteRes<[V2UnitV13, V2UnitV13]> {
378   let Latency     = 4;
379   let NumMicroOps = 2;
382 def V2Write_8c_1M0_1V01 : SchedWriteRes<[V2UnitM0, V2UnitV01]> {
383   let Latency     = 8;
384   let NumMicroOps = 2;
387 //===----------------------------------------------------------------------===//
388 // Define generic 3 micro-op types
390 def V2Write_1c_1L01_1D_1I : SchedWriteRes<[V2UnitL01, V2UnitD, V2UnitI]> {
391   let Latency     = 1;
392   let NumMicroOps = 3;
395 def V2Write_2c_1L01_1V01_1I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitI]> {
396   let Latency     = 2;
397   let NumMicroOps = 3;
400 def V2Write_2c_1L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01]> {
401   let Latency     = 2;
402   let NumMicroOps = 3;
405 def V2Write_4c_1L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01]> {
406   let Latency     = 4;
407   let NumMicroOps = 3;
410 def V2Write_9c_1L_2V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV]> {
411   let Latency     = 9;
412   let NumMicroOps = 3;
415 def V2Write_4c_3V01  : SchedWriteRes<[V2UnitV01, V2UnitV01, V2UnitV01]> {
416   let Latency = 4;
417   let NumMicroOps = 3;
420 def V2Write_7c_1M_1M0_1V : SchedWriteRes<[V2UnitM, V2UnitM0, V2UnitV]> {
421   let Latency     = 7;
422   let NumMicroOps = 3;
425 def V2Write_2c_1L01_1S_1V : SchedWriteRes<[V2UnitL01, V2UnitS, V2UnitV]> {
426   let Latency     = 2;
427   let NumMicroOps = 3;
430 def V2Write_2c_1L01_1S_1V01 : SchedWriteRes<[V2UnitL01, V2UnitS, V2UnitV01]> {
431   let Latency     = 2;
432   let NumMicroOps = 3;
435 def V2Write_6c_3L : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL]> {
436   let Latency     = 6;
437   let NumMicroOps = 3;
440 def V2Write_6c_3V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV]> {
441   let Latency     = 6;
442   let NumMicroOps = 3;
445 def V2Write_8c_1L_2V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV]> {
446   let Latency     = 8;
447   let NumMicroOps = 3;
450 //===----------------------------------------------------------------------===//
451 // Define generic 4 micro-op types
453 def V2Write_2c_1L01_2V01_1I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01,
454                                              V2UnitI]> {
455   let Latency     = 2;
456   let NumMicroOps = 4;
459 def V2Write_2c_2L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01,
460                                           V2UnitV01, V2UnitV01]> {
461   let Latency     = 2;
462   let NumMicroOps = 4;
465 def V2Write_4c_2L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01,
466                                           V2UnitV01, V2UnitV01]> {
467   let Latency     = 4;
468   let NumMicroOps = 4;
471 def V2Write_5c_1I_3L : SchedWriteRes<[V2UnitI, V2UnitL, V2UnitL, V2UnitL]> {
472   let Latency     = 5;
473   let NumMicroOps = 4;
476 def V2Write_9c_2L_2V1 : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV1,
477                                        V2UnitV1]> {
478   let Latency     = 9;
479   let NumMicroOps = 4;
482 def V2Write_6c_4V0 : SchedWriteRes<[V2UnitV0, V2UnitV0, V2UnitV0, V2UnitV0]> {
483   let Latency     = 6;
484   let NumMicroOps = 4;
487 def V2Write_8c_4V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
488   let Latency     = 8;
489   let NumMicroOps = 4;
492 def V2Write_6c_2V_2V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13,
493                                         V2UnitV13]> {
494   let Latency     = 6;
495   let NumMicroOps = 4;
498 def V2Write_8c_2V_2V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13,
499                                         V2UnitV13]> {
500   let Latency     = 8;
501   let NumMicroOps = 4;
504 def V2Write_6c_4V02 : SchedWriteRes<[V2UnitV02, V2UnitV02, V2UnitV02,
505                                      V2UnitV02]> {
506   let Latency     = 6;
507   let NumMicroOps = 4;
510 def V2Write_6c_4V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
511   let Latency     = 6;
512   let NumMicroOps = 4;
515 def V2Write_8c_2L_2V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV]> {
516   let Latency     = 8;
517   let NumMicroOps = 4;
520 def V2Write_9c_2L_2V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV]> {
521   let Latency     = 9;
522   let NumMicroOps = 4;
525 def V2Write_2c_2L01_2V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV,
526                                         V2UnitV]> {
527   let Latency     = 2;
528   let NumMicroOps = 4;
531 def V2Write_4c_2L01_2V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV,
532                                         V2UnitV]> {
533   let Latency     = 4;
534   let NumMicroOps = 4;
537 def V2Write_8c_2M0_2V02 : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitV02,
538                                         V2UnitV02]> {
539   let Latency     = 8;
540   let NumMicroOps = 4;
543 def V2Write_8c_2V_2V1 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV1,
544                                        V2UnitV1]> {
545   let Latency     = 8;
546   let NumMicroOps = 4;
549 def V2Write_4c_2M0_2M : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitM,
550                                        V2UnitM]> {
551   let Latency     = 4;
552   let NumMicroOps = 4;
555 def V2Write_5c_2M0_2M : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitM,
556                                        V2UnitM]> {
557   let Latency     = 5;
558   let NumMicroOps = 4;
561 def V2Write_6c_2I_2L : SchedWriteRes<[V2UnitI, V2UnitI, V2UnitL, V2UnitL]> {
562   let Latency     = 6;
563   let NumMicroOps = 4;
566 def V2Write_7c_4L : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL]> {
567   let Latency     = 7;
568   let NumMicroOps = 4;
571 def V2Write_6c_1L01_3V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01,
572                                           V2UnitV01]> {
573   let Latency     = 6;
574   let NumMicroOps = 4;
577 //===----------------------------------------------------------------------===//
578 // Define generic 5 micro-op types
580 def V2Write_2c_1L01_2V01_2I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01,
581                                              V2UnitI, V2UnitI]> {
582   let Latency     = 2;
583   let NumMicroOps = 5;
586 def V2Write_8c_2L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV,
587                                       V2UnitV]> {
588   let Latency     = 8;
589   let NumMicroOps = 5;
592 def V2Write_9c_1L_4V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV,
593                                       V2UnitV]> {
594   let Latency     = 9;
595   let NumMicroOps = 5;
598 def V2Write_10c_1L_4V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV,
599                                        V2UnitV]> {
600   let Latency     = 10;
601   let NumMicroOps = 5;
604 def V2Write_6c_5V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV,
605                                    V2UnitV]> {
606   let Latency     = 6;
607   let NumMicroOps = 5;
610 //===----------------------------------------------------------------------===//
611 // Define generic 6 micro-op types
613 def V2Write_8c_3L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
614                                       V2UnitV, V2UnitV, V2UnitV]> {
615   let Latency     = 8;
616   let NumMicroOps = 6;
619 def V2Write_9c_3L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
620                                       V2UnitV, V2UnitV, V2UnitV]> {
621   let Latency     = 9;
622   let NumMicroOps = 6;
625 def V2Write_9c_2L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV,
626                                       V2UnitV, V2UnitV, V2UnitV]> {
627   let Latency     = 9;
628   let NumMicroOps = 6;
631 def V2Write_9c_2L_2V_2S : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV,
632                                          V2UnitV, V2UnitS, V2UnitS]> {
633   let Latency     = 9;
634   let NumMicroOps = 6;
637 def V2Write_9c_2V_4V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13,
638                                         V2UnitV13, V2UnitV13, V2UnitV13]> {
639   let Latency     = 9;
640   let NumMicroOps = 6;
643 def V2Write_2c_3L01_3V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
644                                         V2UnitV, V2UnitV, V2UnitV]> {
645   let Latency     = 2;
646   let NumMicroOps = 6;
649 def V2Write_4c_2L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01,
650                                           V2UnitV01, V2UnitV01, V2UnitV01]> {
651   let Latency     = 4;
652   let NumMicroOps = 6;
655 def V2Write_5c_2L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01,
656                                           V2UnitV01, V2UnitV01, V2UnitV01]> {
657   let Latency     = 5;
658   let NumMicroOps = 6;
661 def V2Write_2c_3L01_3V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
662                                           V2UnitV01, V2UnitV01, V2UnitV01]> {
663   let Latency     = 2;
664   let NumMicroOps = 6;
667 def V2Write_4c_2L01_2S_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitS,
668                                              V2UnitS, V2UnitV01, V2UnitV01]> {
669   let Latency     = 4;
670   let NumMicroOps = 6;
673 //===----------------------------------------------------------------------===//
674 // Define generic 7 micro-op types
676 def V2Write_8c_3L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
677                                       V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
678   let Latency     = 8;
679   let NumMicroOps = 7;
682 //===----------------------------------------------------------------------===//
683 // Define generic 8 micro-op types
685 def V2Write_2c_4L01_4V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
686                                         V2UnitL01, V2UnitV, V2UnitV, V2UnitV,
687                                         V2UnitV]> {
688   let Latency     = 2;
689   let NumMicroOps = 8;
692 def V2Write_2c_4L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
693                                           V2UnitL01, V2UnitV01, V2UnitV01,
694                                           V2UnitV01, V2UnitV01]> {
695   let Latency     = 2;
696   let NumMicroOps = 8;
699 def V2Write_4c_4L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
700                                           V2UnitL01, V2UnitV01, V2UnitV01,
701                                           V2UnitV01, V2UnitV01]> {
702   let Latency     = 4;
703   let NumMicroOps = 8;
706 def V2Write_6c_2L01_6V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01,
707                                           V2UnitV01, V2UnitV01, V2UnitV01,
708                                           V2UnitV01, V2UnitV01]> {
709   let Latency     = 6;
710   let NumMicroOps = 8;
713 def V2Write_8c_4L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL,
714                                       V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
715   let Latency     = 8;
716   let NumMicroOps = 8;
719 //===----------------------------------------------------------------------===//
720 // Define generic 9 micro-op types
722 def V2Write_6c_3L01_6V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
723                                           V2UnitV01, V2UnitV01, V2UnitV01,
724                                           V2UnitV01, V2UnitV01, V2UnitV01]> {
725   let Latency     = 6;
726   let NumMicroOps = 9;
729 def V2Write_10c_1L_8V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV,
730                                        V2UnitV, V2UnitV, V2UnitV, V2UnitV,
731                                        V2UnitV]> {
732   let Latency     = 10;
733   let NumMicroOps = 9;
736 def V2Write_10c_3V_3L_3S : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV,
737                                           V2UnitL, V2UnitL, V2UnitL,
738                                           V2UnitS, V2UnitS, V2UnitS]> {
739   let Latency     = 10;
740   let NumMicroOps = 9;
743 //===----------------------------------------------------------------------===//
744 // Define generic 10 micro-op types
746 def V2Write_9c_6L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL,
747                                       V2UnitL, V2UnitL, V2UnitV, V2UnitV,
748                                       V2UnitV, V2UnitV]> {
749   let Latency     = 9;
750   let NumMicroOps = 10;
753 //===----------------------------------------------------------------------===//
754 // Define generic 12 micro-op types
756 def V2Write_5c_4L01_8V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
757                                           V2UnitL01, V2UnitV01, V2UnitV01,
758                                           V2UnitV01, V2UnitV01, V2UnitV01,
759                                           V2UnitV01, V2UnitV01, V2UnitV01]> {
760   let Latency     = 5;
761   let NumMicroOps = 12;
764 def V2Write_9c_4L_8V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
765                                       V2UnitL, V2UnitV, V2UnitV,
766                                       V2UnitV, V2UnitV, V2UnitV,
767                                       V2UnitV, V2UnitV, V2UnitV]> {
768   let Latency     = 9;
769   let NumMicroOps = 12;
772 def V2Write_10c_4L_8V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
773                                        V2UnitL, V2UnitV, V2UnitV,
774                                        V2UnitV, V2UnitV, V2UnitV,
775                                        V2UnitV, V2UnitV, V2UnitV]> {
776   let Latency     = 10;
777   let NumMicroOps = 12;
780 //===----------------------------------------------------------------------===//
781 // Define generic 16 micro-op types
783 def V2Write_7c_4L01_12V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
784                                            V2UnitL01, V2UnitV01, V2UnitV01,
785                                            V2UnitV01, V2UnitV01, V2UnitV01,
786                                            V2UnitV01, V2UnitV01, V2UnitV01,
787                                            V2UnitV01, V2UnitV01, V2UnitV01,
788                                            V2UnitV01]> {
789   let Latency     = 7;
790   let NumMicroOps = 16;
793 def V2Write_10c_4L_8V_4S : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
794                                           V2UnitL, V2UnitV, V2UnitV,
795                                           V2UnitV, V2UnitV, V2UnitV,
796                                           V2UnitV, V2UnitV, V2UnitV,
797                                           V2UnitS, V2UnitS, V2UnitS,
798                                           V2UnitS]> {
799   let Latency     = 10;
800   let NumMicroOps = 16;
803 //===----------------------------------------------------------------------===//
804 // Define generic 18 micro-op types
806 def V2Write_7c_9L01_9V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
807                                           V2UnitL01, V2UnitL01, V2UnitL01,
808                                           V2UnitL01, V2UnitL01, V2UnitL01,
809                                           V2UnitV01, V2UnitV01, V2UnitV01,
810                                           V2UnitV01, V2UnitV01, V2UnitV01,
811                                           V2UnitV01, V2UnitV01, V2UnitV01]> {
812   let Latency     = 7;
813   let NumMicroOps = 18;
816 //===----------------------------------------------------------------------===//
817 // Define generic 27 micro-op types
819 def V2Write_7c_9L01_9S_9V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
820                                              V2UnitL01, V2UnitL01, V2UnitL01,
821                                              V2UnitL01, V2UnitL01, V2UnitL01,
822                                              V2UnitS, V2UnitS, V2UnitS,
823                                              V2UnitS, V2UnitS, V2UnitS,
824                                              V2UnitS, V2UnitS, V2UnitS,
825                                              V2UnitV01, V2UnitV01, V2UnitV01,
826                                              V2UnitV01, V2UnitV01, V2UnitV01,
827                                              V2UnitV01, V2UnitV01,
828                                              V2UnitV01]> {
829   let Latency     = 7;
830   let NumMicroOps = 27;
833 //===----------------------------------------------------------------------===//
834 // Define generic 36 micro-op types
836 def V2Write_11c_18L01_18V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
837                                              V2UnitL01, V2UnitL01, V2UnitL01,
838                                              V2UnitL01, V2UnitL01, V2UnitL01,
839                                              V2UnitL01, V2UnitL01, V2UnitL01,
840                                              V2UnitL01, V2UnitL01, V2UnitL01,
841                                              V2UnitL01, V2UnitL01, V2UnitL01,
842                                              V2UnitV01, V2UnitV01, V2UnitV01,
843                                              V2UnitV01, V2UnitV01, V2UnitV01,
844                                              V2UnitV01, V2UnitV01, V2UnitV01,
845                                              V2UnitV01, V2UnitV01, V2UnitV01,
846                                              V2UnitV01, V2UnitV01, V2UnitV01,
847                                              V2UnitV01, V2UnitV01,
848                                              V2UnitV01]> {
849   let Latency     = 11;
850   let NumMicroOps = 36;
853 //===----------------------------------------------------------------------===//
854 // Define generic 54 micro-op types
856 def V2Write_11c_18L01_18S_18V01 : SchedWriteRes<[V2UnitL01, V2UnitL01,
857                                                  V2UnitL01, V2UnitL01,
858                                                  V2UnitL01, V2UnitL01,
859                                                  V2UnitL01, V2UnitL01,
860                                                  V2UnitL01, V2UnitL01,
861                                                  V2UnitL01, V2UnitL01,
862                                                  V2UnitL01, V2UnitL01,
863                                                  V2UnitL01, V2UnitL01,
864                                                  V2UnitL01, V2UnitL01,
865                                                  V2UnitS, V2UnitS, V2UnitS,
866                                                  V2UnitS, V2UnitS, V2UnitS,
867                                                  V2UnitS, V2UnitS, V2UnitS,
868                                                  V2UnitS, V2UnitS, V2UnitS,
869                                                  V2UnitS, V2UnitS, V2UnitS,
870                                                  V2UnitS, V2UnitS, V2UnitS,
871                                                  V2UnitV01, V2UnitV01,
872                                                  V2UnitV01, V2UnitV01,
873                                                  V2UnitV01, V2UnitV01,
874                                                  V2UnitV01, V2UnitV01,
875                                                  V2UnitV01, V2UnitV01,
876                                                  V2UnitV01, V2UnitV01,
877                                                  V2UnitV01, V2UnitV01,
878                                                  V2UnitV01, V2UnitV01,
879                                                  V2UnitV01, V2UnitV01]> {
880   let Latency     = 11;
881   let NumMicroOps = 54;
884 //===----------------------------------------------------------------------===//
885 // Define predicate-controlled types
887 def V2Write_ArithI : SchedWriteVariant<[
888                        SchedVar<IsCheapLSL,  [V2Write_1c_1I]>,
889                        SchedVar<NoSchedPred, [V2Write_2c_1M]>]>;
891 def V2Write_ArithF : SchedWriteVariant<[
892                        SchedVar<IsCheapLSL,  [V2Write_1c_1F_1Flg]>,
893                        SchedVar<NoSchedPred, [V2Write_2c_1M_1Flg]>]>;
895 def V2Write_Logical : SchedWriteVariant<[
896                         SchedVar<NeoverseNoLSL, [V2Write_1c_1F_1Flg]>,
897                         SchedVar<NoSchedPred,   [V2Write_2c_1M_1Flg]>]>;
899 def V2Write_Extr : SchedWriteVariant<[
900                      SchedVar<IsRORImmIdiomPred, [V2Write_1c_1I]>,
901                      SchedVar<NoSchedPred,       [V2Write_3c_1I_1M]>]>;
903 def V2Write_LdrHQ : SchedWriteVariant<[
904                       SchedVar<NeoverseHQForm,  [V2Write_7c_1I_1L]>,
905                       SchedVar<NoSchedPred,     [V2Write_6c_1L]>]>;
907 def V2Write_StrHQ : SchedWriteVariant<[
908                       SchedVar<NeoverseHQForm,  [V2Write_2c_1L01_1V01_1I]>,
909                       SchedVar<NoSchedPred,     [V2Write_2c_1L01_1V01]>]>;
911 def V2Write_0or1c_1I : SchedWriteVariant<[
912                       SchedVar<NeoverseZeroMove, [V2Write_0c]>,
913                       SchedVar<NoSchedPred,      [V2Write_1c_1I]>]>;
915 def V2Write_0or2c_1V : SchedWriteVariant<[
916                       SchedVar<NeoverseZeroMove, [V2Write_0c]>,
917                       SchedVar<NoSchedPred,      [V2Write_2c_1V]>]>;
919 def V2Write_0or3c_1M0 : SchedWriteVariant<[
920                       SchedVar<NeoverseZeroMove, [V2Write_0c]>,
921                       SchedVar<NoSchedPred,      [V2Write_3c_1M0]>]>;
923 def V2Write_2or3c_1M : SchedWriteVariant<[
924                       SchedVar<NeoversePdIsPg,  [V2Write_3c_1M]>,
925                       SchedVar<NoSchedPred,     [V2Write_2c_1M]>]>;
927 def V2Write_3or4c_2M : SchedWriteVariant<[
928                       SchedVar<NeoversePdIsPg,  [V2Write_4c_2M]>,
929                       SchedVar<NoSchedPred,     [V2Write_3c_2M]>]>;
931 def V2Write_1or2c_1M0 : SchedWriteVariant<[
932                       SchedVar<NeoversePdIsPg,  [V2Write_2c_1M0]>,
933                       SchedVar<NoSchedPred,     [V2Write_1c_1M0]>]>;
935 def V2Write_2or3c_1M0 : SchedWriteVariant<[
936                       SchedVar<NeoversePdIsPg,  [V2Write_3c_1M0]>,
937                       SchedVar<NoSchedPred,     [V2Write_2c_1M0]>]>;
939 def V2Write_1or2c_1M0_1M : SchedWriteVariant<[
940                       SchedVar<NeoversePdIsPg,  [V2Write_2c_1M0_1M]>,
941                       SchedVar<NoSchedPred,     [V2Write_1c_1M0_1M]>]>;
943 def V2Write_3or4c_1M0_1M : SchedWriteVariant<[
944                       SchedVar<NeoversePdIsPg,  [V2Write_4c_1M0_1M]>,
945                       SchedVar<NoSchedPred,     [V2Write_3c_1M0_1M]>]>;
947 def V2Write_4or5c_2M0_2M : SchedWriteVariant<[
948                       SchedVar<NeoversePdIsPg,  [V2Write_5c_2M0_2M]>,
949                       SchedVar<NoSchedPred,     [V2Write_4c_2M0_2M]>]>;
951 def V2Write_4or5c_1V0_1M0 : SchedWriteVariant<[
952                       SchedVar<NeoversePdIsPg,  [V2Write_5c_1V0_1M0]>,
953                       SchedVar<NoSchedPred,     [V2Write_4c_1V0_1M0]>]>;
955 def V2Write_2or3c_1V0_1M : SchedWriteVariant<[
956                       SchedVar<NeoversePdIsPg,  [V2Write_3c_1V0_1M]>,
957                       SchedVar<NoSchedPred,     [V2Write_2c_1V0_1M]>]>;
959 def V2Write_IncDec : SchedWriteVariant<[
960                       SchedVar<NeoverseCheapIncDec, [V2Write_1c_1F]>,
961                       SchedVar<NoSchedPred,         [V2Write_2c_1M]>]>;
963 //===----------------------------------------------------------------------===//
964 // Define forwarded types
966 // NOTE: SOG, p. 16, n. 2: Accumulator forwarding is not supported for
967 // consumers of 64 bit multiply high operations?
968 def V2Wr_IM   : SchedWriteRes<[V2UnitM]>  { let Latency = 2; }
969 def V2Wr_IMA  : SchedWriteRes<[V2UnitM0]> { let Latency = 2; }
970 def V2Wr_IMUL : SchedWriteVariant<[
971                   SchedVar<IsReg3ZeroPred, [V2Wr_IM]>,
972                   SchedVar<NoSchedPred,    [V2Wr_IMA]>]>;
973 def V2Rd_IMA  : SchedReadAdvance<1, [V2Wr_IMA]>;
975 def V2Wr_FMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
976 def V2Rd_FMA : SchedReadAdvance<2, [WriteFMul, V2Wr_FMA]>;
978 def V2Wr_VA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
979 def V2Rd_VA : SchedReadAdvance<3, [V2Wr_VA]>;
981 def V2Wr_VDOT : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
982 def V2Rd_VDOT : SchedReadAdvance<2, [V2Wr_VDOT]>;
984 def V2Wr_VMMA : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
985 def V2Rd_VMMA : SchedReadAdvance<2, [V2Wr_VMMA]>;
987 def V2Wr_VMA : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
988 def V2Rd_VMA : SchedReadAdvance<3, [V2Wr_VMA]>;
990 def V2Wr_VMAH : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 4; }
991 def V2Rd_VMAH : SchedReadAdvance<2, [V2Wr_VMAH]>;
993 def V2Wr_VMAL : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
994 def V2Rd_VMAL : SchedReadAdvance<3, [V2Wr_VMAL]>;
996 def V2Wr_VPA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
997 def V2Rd_VPA : SchedReadAdvance<3, [V2Wr_VPA]>;
999 def V2Wr_VSA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
1000 def V2Rd_VSA : SchedReadAdvance<3, [V2Wr_VSA]>;
1002 def V2Wr_VFCMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
1003 def V2Rd_VFCMA : SchedReadAdvance<2, [V2Wr_VFCMA]>;
1005 def V2Wr_VFM  : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
1006 def V2Wr_VFMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
1007 def V2Rd_VFMA : SchedReadAdvance<2, [V2Wr_VFM, V2Wr_VFMA]>;
1009 def V2Wr_VFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
1010 def V2Rd_VFMAL : SchedReadAdvance<2, [V2Wr_VFMAL]>;
1012 def V2Wr_VBFDOT : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1013 def V2Rd_VBFDOT : SchedReadAdvance<2, [V2Wr_VBFDOT]>;
1014 def V2Wr_VBFMMA : SchedWriteRes<[V2UnitV]> { let Latency = 6; }
1015 def V2Rd_VBFMMA : SchedReadAdvance<2, [V2Wr_VBFMMA]>;
1016 def V2Wr_VBFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1017 def V2Rd_VBFMAL : SchedReadAdvance<3, [V2Wr_VBFMAL]>;
1019 def V2Wr_CRC : SchedWriteRes<[V2UnitM0]> { let Latency = 2; }
1020 def V2Rd_CRC : SchedReadAdvance<1, [V2Wr_CRC]>;
1022 def V2Wr_ZA  : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
1023 def V2Rd_ZA  : SchedReadAdvance<3, [V2Wr_ZA]>;
1024 def V2Wr_ZPA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
1025 def V2Rd_ZPA : SchedReadAdvance<3, [V2Wr_ZPA]>;
1026 def V2Wr_ZSA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
1027 def V2Rd_ZSA : SchedReadAdvance<3, [V2Wr_ZSA]>;
1029 def V2Wr_ZDOTB : SchedWriteRes<[V2UnitV]>   { let Latency = 3; }
1030 def V2Rd_ZDOTB : SchedReadAdvance<2, [V2Wr_ZDOTB]>;
1031 def V2Wr_ZDOTH : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
1032 def V2Rd_ZDOTH : SchedReadAdvance<3, [V2Wr_ZDOTH]>;
1034 // NOTE: SOG p. 43: Complex multiply-add B, H, S element size: How to reduce
1035 // throughput to 1 in case of forwarding?
1036 def V2Wr_ZCMABHS : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
1037 def V2Rd_ZCMABHS : SchedReadAdvance<3, [V2Wr_ZCMABHS]>;
1038 def V2Wr_ZCMAD   : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; }
1039 def V2Rd_ZCMAD   : SchedReadAdvance<2, [V2Wr_ZCMAD]>;
1041 def V2Wr_ZMMA : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
1042 def V2Rd_ZMMA : SchedReadAdvance<2, [V2Wr_ZMMA]>;
1044 def V2Wr_ZMABHS : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 4; }
1045 def V2Rd_ZMABHS : SchedReadAdvance<3, [V2Wr_ZMABHS]>;
1046 def V2Wr_ZMAD  : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; }
1047 def V2Rd_ZMAD  : SchedReadAdvance<2, [V2Wr_ZMAD]>;
1049 def V2Wr_ZMAL : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
1050 def V2Rd_ZMAL : SchedReadAdvance<3, [V2Wr_ZMAL]>;
1052 def V2Wr_ZMASQL   : SchedWriteRes<[V2UnitV02]>            { let Latency = 4; }
1053 def V2Wr_ZMASQBHS : SchedWriteRes<[V2UnitV02]>            { let Latency = 4; }
1054 def V2Wr_ZMASQD   : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; }
1055 def V2Rd_ZMASQ    : SchedReadAdvance<2, [V2Wr_ZMASQL, V2Wr_ZMASQBHS,
1056                                          V2Wr_ZMASQD]>;
1058 def V2Wr_ZFCMA : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1059 def V2Rd_ZFCMA : SchedReadAdvance<3, [V2Wr_ZFCMA]>;
1061 def V2Wr_ZFMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
1062 def V2Rd_ZFMA : SchedReadAdvance<2, [V2Wr_ZFMA]>;
1064 def V2Wr_ZFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
1065 def V2Rd_ZFMAL : SchedReadAdvance<2, [V2Wr_ZFMAL]>;
1067 def V2Wr_ZBFDOT : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1068 def V2Rd_ZBFDOT : SchedReadAdvance<2, [V2Wr_ZBFDOT]>;
1069 def V2Wr_ZBFMMA : SchedWriteRes<[V2UnitV]> { let Latency = 6; }
1070 def V2Rd_ZBFMMA : SchedReadAdvance<2, [V2Wr_ZBFMMA]>;
1071 def V2Wr_ZBFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1072 def V2Rd_ZBFMAL : SchedReadAdvance<3, [V2Wr_ZBFMAL]>;
1074 //===----------------------------------------------------------------------===//
1075 // Define types with long resource cycles (rc)
1077 def V2Write_6c_1V1_5rc    : SchedWriteRes<[V2UnitV1]>  { let Latency =  6; let ReleaseAtCycles = [ 5]; }
1078 def V2Write_7c_1V02_7rc   : SchedWriteRes<[V2UnitV02]> { let Latency =  7; let ReleaseAtCycles = [ 7]; }
1079 def V2Write_10c_1V02_5rc  : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [ 5]; }
1080 def V2Write_10c_1V02_9rc  : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [ 9]; }
1081 def V2Write_10c_1V02_10rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [10]; }
1082 def V2Write_10c_1V1_9rc   : SchedWriteRes<[V2UnitV1]>  { let Latency = 10; let ReleaseAtCycles = [ 9]; }
1083 def V2Write_13c_1V02_12rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ReleaseAtCycles = [12]; }
1084 def V2Write_13c_1V02_13rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ReleaseAtCycles = [13]; }
1085 def V2Write_15c_1V02_14rc : SchedWriteRes<[V2UnitV02]> { let Latency = 15; let ReleaseAtCycles = [14]; }
1086 def V2Write_16c_1V02_14rc : SchedWriteRes<[V2UnitV02]> { let Latency = 16; let ReleaseAtCycles = [14]; }
1087 def V2Write_16c_1V02_15rc : SchedWriteRes<[V2UnitV02]> { let Latency = 16; let ReleaseAtCycles = [15]; }
1089 // Miscellaneous
1090 // -----------------------------------------------------------------------------
1092 def : InstRW<[WriteI], (instrs COPY)>;
1094 // Â§3.3 Branch instructions
1095 // -----------------------------------------------------------------------------
1097 // Branch, immed
1098 // Compare and branch
1099 def : SchedAlias<WriteBr,    V2Write_1c_1B>;
1101 // Branch, register
1102 def : SchedAlias<WriteBrReg, V2Write_1c_1B>;
1104 // Branch and link, immed
1105 // Branch and link, register
1106 def : InstRW<[V2Write_1c_1B_1R], (instrs BL, BLR)>;
1108 // Â§3.4 Arithmetic and Logical Instructions
1109 // -----------------------------------------------------------------------------
1111 // ALU, basic
1112 def : SchedAlias<WriteI, V2Write_1c_1I>;
1114 // ALU, basic, flagset
1115 def : InstRW<[V2Write_1c_1F_1Flg],
1116              (instregex "^(ADD|SUB)S[WX]r[ir]$",
1117                         "^(ADC|SBC)S[WX]r$",
1118                         "^ANDS[WX]ri$",
1119                         "^(AND|BIC)S[WX]rr$")>;
1120 def : InstRW<[V2Write_0or1c_1I], (instregex "^MOVZ[WX]i$")>;
1122 // ALU, extend and shift
1123 def : SchedAlias<WriteIEReg, V2Write_2c_1M>;
1125 // Arithmetic, LSL shift, shift <= 4
1126 // Arithmetic, flagset, LSL shift, shift <= 4
1127 // Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
1128 def : SchedAlias<WriteISReg, V2Write_ArithI>;
1129 def : InstRW<[V2Write_ArithF],
1130              (instregex "^(ADD|SUB)S[WX]rs$")>;
1132 // Arithmetic, immediate to logical address tag
1133 def : InstRW<[V2Write_2c_1M], (instrs ADDG, SUBG)>;
1135 // Conditional compare
1136 def : InstRW<[V2Write_1c_1F_1Flg], (instregex "^CCM[NP][WX][ir]")>;
1138 // Convert floating-point condition flags
1139 // Flag manipulation instructions
1140 def : WriteRes<WriteSys, []> { let Latency = 1; }
1142 // Insert Random Tags
1143 def : InstRW<[V2Write_2c_1M], (instrs IRG, IRGstack)>;
1145 // Insert Tag Mask
1146 // Subtract Pointer
1147 def : InstRW<[V2Write_1c_1I], (instrs GMI, SUBP)>;
1149 // Subtract Pointer, flagset
1150 def : InstRW<[V2Write_1c_1F_1Flg], (instrs SUBPS)>;
1152 // Logical, shift, no flagset
1153 def : InstRW<[V2Write_1c_1I],    (instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs$")>;
1154 def : InstRW<[V2Write_0or1c_1I], (instregex "^ORR[WX]rs$")>;
1156 // Logical, shift, flagset
1157 def : InstRW<[V2Write_Logical], (instregex "^(AND|BIC)S[WX]rs$")>;
1159 // Move and shift instructions
1160 // -----------------------------------------------------------------------------
1162 def : SchedAlias<WriteImm, V2Write_1c_1I>;
1164 // Â§3.5 Divide and multiply instructions
1165 // -----------------------------------------------------------------------------
1167 // SDIV, UDIV
1168 def : SchedAlias<WriteID32,  V2Write_12c_1M0>;
1169 def : SchedAlias<WriteID64,  V2Write_20c_1M0>;
1171 def : SchedAlias<WriteIM32, V2Write_2c_1M>;
1172 def : SchedAlias<WriteIM64, V2Write_2c_1M>;
1174 // Multiply
1175 // Multiply accumulate, W-form
1176 // Multiply accumulate, X-form
1177 def : InstRW<[V2Wr_IMUL, ReadIM, ReadIM, V2Rd_IMA],
1178              (instregex "^M(ADD|SUB)[WX]rrr$")>;
1180 // Multiply accumulate long
1181 // Multiply long
1182 def : InstRW<[V2Wr_IMUL, ReadIM, ReadIM, V2Rd_IMA],
1183              (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
1185 // Multiply high
1186 def : InstRW<[V2Write_3c_1M], (instrs SMULHrr, UMULHrr)>;
1188 // Pointer Authentication Instructions (v8.3 PAC)
1189 // -----------------------------------------------------------------------------
1191 // Authenticate data address
1192 // Authenticate instruction address
1193 // Compute pointer authentication code for data address
1194 // Compute pointer authentication code, using generic key
1195 // Compute pointer authentication code for instruction address
1196 def : InstRW<[V2Write_5c_1M0], (instregex "^AUT", "^PAC")>;
1198 // Branch and link, register, with pointer authentication
1199 // Branch, register, with pointer authentication
1200 // Branch, return, with pointer authentication
1201 def : InstRW<[V2Write_6c_1M0_1B], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, BRAA,
1202                                             BRAAZ, BRAB, BRABZ, RETAA, RETAB,
1203                                             ERETAA, ERETAB)>;
1206 // Load register, with pointer authentication
1207 def : InstRW<[V2Write_9c_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>;
1209 // Strip pointer authentication code
1210 def : InstRW<[V2Write_2c_1M0], (instrs XPACD, XPACI, XPACLRI)>;
1212 // Miscellaneous data-processing instructions
1213 // -----------------------------------------------------------------------------
1215 // Address generation
1216 def : InstRW<[V2Write_1c_1F], (instrs ADR, ADRP)>;
1218 // Bitfield extract, one reg
1219 // Bitfield extract, two regs
1220 def : SchedAlias<WriteExtr, V2Write_Extr>;
1221 def : InstRW<[V2Write_Extr], (instrs EXTRWrri, EXTRXrri)>;
1223 // Bitfield move, basic
1224 def : SchedAlias<WriteIS, V2Write_1c_1I>;
1226 // Bitfield move, insert
1227 def : InstRW<[V2Write_2c_1M], (instregex "^BFM[WX]ri$")>;
1229 // Load instructions
1230 // -----------------------------------------------------------------------------
1232 // NOTE: SOG p. 19: Throughput of LDN?P X-form should be 2, but reported as 3.
1234 def : SchedAlias<WriteLD,    V2Write_4c_1L>;
1235 def : SchedAlias<WriteLDIdx, V2Write_4c_1L>;
1237 // Load register, literal
1238 def : InstRW<[V2Write_5c_1L_1F], (instrs LDRWl, LDRXl, LDRSWl, PRFMl)>;
1240 // Load pair, signed immed offset, signed words
1241 def : InstRW<[V2Write_5c_1I_3L, WriteLDHi], (instrs LDPSWi)>;
1243 // Load pair, immed post-index or immed pre-index, signed words
1244 def : InstRW<[WriteAdr, V2Write_5c_1I_3L, WriteLDHi],
1245              (instregex "^LDPSW(post|pre)$")>;
1247 // Store instructions
1248 // -----------------------------------------------------------------------------
1250 // NOTE: SOG, p. 20: Unsure if STRH uses pipeline I.
1252 def : SchedAlias<WriteST,    V2Write_1c_1L01_1D>;
1253 def : SchedAlias<WriteSTIdx, V2Write_1c_1L01_1D>;
1254 def : SchedAlias<WriteSTP,   V2Write_1c_1L01_1D>;
1255 def : SchedAlias<WriteAdr,   V2Write_1c_1I>;
1257 // Tag load instructions
1258 // -----------------------------------------------------------------------------
1260 // Load allocation tag
1261 // Load multiple allocation tags
1262 def : InstRW<[V2Write_4c_1L], (instrs LDG, LDGM)>;
1264 // Tag store instructions
1265 // -----------------------------------------------------------------------------
1267 // Store allocation tags to one or two granules, post-index
1268 // Store allocation tags to one or two granules, pre-index
1269 // Store allocation tag to one or two granules, zeroing, post-index
1270 // Store Allocation Tag to one or two granules, zeroing, pre-index
1271 // Store allocation tag and reg pair to memory, post-Index
1272 // Store allocation tag and reg pair to memory, pre-Index
1273 def : InstRW<[V2Write_1c_1L01_1D_1I], (instrs STGPreIndex, STGPostIndex,
1274                                                 ST2GPreIndex, ST2GPostIndex,
1275                                                 STZGPreIndex, STZGPostIndex,
1276                                                 STZ2GPreIndex, STZ2GPostIndex,
1277                                                 STGPpre, STGPpost)>;
1279 // Store allocation tags to one or two granules, signed offset
1280 // Store allocation tag to two granules, zeroing, signed offset
1281 // Store allocation tag and reg pair to memory, signed offset
1282 // Store multiple allocation tags
1283 def : InstRW<[V2Write_1c_1L01_1D], (instrs STGi, ST2Gi, STZGi,
1284                                              STZ2Gi, STGPi, STGM, STZGM)>;
1286 // FP data processing instructions
1287 // -----------------------------------------------------------------------------
1289 // FP absolute value
1290 // FP arithmetic
1291 // FP min/max
1292 // FP negate
1293 // FP select
1294 def : SchedAlias<WriteF,     V2Write_2c_1V>;
1296 // FP compare
1297 def : SchedAlias<WriteFCmp,  V2Write_2c_1V0>;
1299 // FP divide, square root
1300 def : SchedAlias<WriteFDiv,  V2Write_7c_1V02>;
1302 // FP divide, H-form
1303 def : InstRW<[V2Write_7c_1V02],  (instrs FDIVHrr)>;
1304 // FP divide, S-form
1305 def : InstRW<[V2Write_10c_1V02], (instrs FDIVSrr)>;
1306 // FP divide, D-form
1307 def : InstRW<[V2Write_15c_1V02], (instrs FDIVDrr)>;
1309 // FP square root, H-form
1310 def : InstRW<[V2Write_7c_1V02],  (instrs FSQRTHr)>;
1311 // FP square root, S-form
1312 def : InstRW<[V2Write_9c_1V02],  (instrs FSQRTSr)>;
1313 // FP square root, D-form
1314 def : InstRW<[V2Write_16c_1V02], (instrs FSQRTDr)>;
1316 // FP multiply
1317 def : WriteRes<WriteFMul, [V2UnitV]> { let Latency = 3; }
1319 // FP multiply accumulate
1320 def : InstRW<[V2Wr_FMA, ReadDefault, ReadDefault, V2Rd_FMA],
1321              (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
1323 // FP round to integral
1324 def : InstRW<[V2Write_3c_1V02], (instregex "^FRINT[AIMNPXZ][HSD]r$",
1325                                              "^FRINT(32|64)[XZ][SD]r$")>;
1327 // FP miscellaneous instructions
1328 // -----------------------------------------------------------------------------
1330 // FP convert, from gen to vec reg
1331 def : InstRW<[V2Write_3c_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>;
1333 // FP convert, from vec to gen reg
1334 def : InstRW<[V2Write_3c_1V01],
1335              (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]ri?$")>;
1337 // FP convert, Javascript from vec to gen reg
1338 def : SchedAlias<WriteFCvt, V2Write_3c_1V0>;
1340 // FP convert, from vec to vec reg
1341 def : InstRW<[V2Write_3c_1V02], (instrs FCVTSHr, FCVTDHr, FCVTHSr, FCVTDSr,
1342                                           FCVTHDr, FCVTSDr, FCVTXNv1i64)>;
1344 // FP move, immed
1345 // FP move, register
1346 def : SchedAlias<WriteFImm, V2Write_2c_1V>;
1348 // FP transfer, from gen to low half of vec reg
1349 def : InstRW<[V2Write_0or3c_1M0],
1350              (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
1352 // FP transfer, from gen to high half of vec reg
1353 def : InstRW<[V2Write_5c_1M0_1V], (instrs FMOVXDHighr)>;
1355 // FP transfer, from vec to gen reg
1356 def : SchedAlias<WriteFCopy, V2Write_2c_2V01>;
1358 // FP load instructions
1359 // -----------------------------------------------------------------------------
1361 // Load vector reg, literal, S/D/Q forms
1362 def : InstRW<[V2Write_7c_1F_1L], (instregex "^LDR[SDQ]l$")>;
1364 // Load vector reg, unscaled immed
1365 def : InstRW<[V2Write_6c_1L], (instregex "^LDUR[BHSDQ]i$")>;
1367 // Load vector reg, immed post-index
1368 // Load vector reg, immed pre-index
1369 def : InstRW<[WriteAdr, V2Write_6c_1I_1L],
1370              (instregex "^LDR[BHSDQ](pre|post)$")>;
1372 // Load vector reg, unsigned immed
1373 def : InstRW<[V2Write_6c_1L], (instregex "^LDR[BHSDQ]ui$")>;
1375 // Load vector reg, register offset, basic
1376 // Load vector reg, register offset, scale, S/D-form
1377 // Load vector reg, register offset, scale, H/Q-form
1378 // Load vector reg, register offset, extend
1379 // Load vector reg, register offset, extend, scale, S/D-form
1380 // Load vector reg, register offset, extend, scale, H/Q-form
1381 def : InstRW<[V2Write_LdrHQ, ReadAdrBase], (instregex "^LDR[BHSDQ]ro[WX]$")>;
1383 // Load vector pair, immed offset, S/D-form
1384 def : InstRW<[V2Write_6c_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>;
1386 // Load vector pair, immed offset, Q-form
1387 def : InstRW<[V2Write_6c_2L, WriteLDHi], (instrs LDPQi, LDNPQi)>;
1389 // Load vector pair, immed post-index, S/D-form
1390 // Load vector pair, immed pre-index, S/D-form
1391 def : InstRW<[WriteAdr, V2Write_6c_1I_1L, WriteLDHi],
1392              (instregex "^LDP[SD](pre|post)$")>;
1394 // Load vector pair, immed post-index, Q-form
1395 // Load vector pair, immed pre-index, Q-form
1396 def : InstRW<[WriteAdr, V2Write_6c_2I_2L, WriteLDHi], (instrs LDPQpost,
1397                                                                 LDPQpre)>;
1399 // FP store instructions
1400 // -----------------------------------------------------------------------------
1402 // Store vector reg, unscaled immed, B/H/S/D-form
1403 // Store vector reg, unscaled immed, Q-form
1404 def : InstRW<[V2Write_2c_1L01_1V01], (instregex "^STUR[BHSDQ]i$")>;
1406 // Store vector reg, immed post-index, B/H/S/D-form
1407 // Store vector reg, immed post-index, Q-form
1408 // Store vector reg, immed pre-index, B/H/S/D-form
1409 // Store vector reg, immed pre-index, Q-form
1410 def : InstRW<[WriteAdr, V2Write_2c_1L01_1V01_1I],
1411              (instregex "^STR[BHSDQ](pre|post)$")>;
1413 // Store vector reg, unsigned immed, B/H/S/D-form
1414 // Store vector reg, unsigned immed, Q-form
1415 def : InstRW<[V2Write_2c_1L01_1V01], (instregex "^STR[BHSDQ]ui$")>;
1417 // Store vector reg, register offset, basic, B/H/S/D-form
1418 // Store vector reg, register offset, basic, Q-form
1419 // Store vector reg, register offset, scale, H-form
1420 // Store vector reg, register offset, scale, S/D-form
1421 // Store vector reg, register offset, scale, Q-form
1422 // Store vector reg, register offset, extend, B/H/S/D-form
1423 // Store vector reg, register offset, extend, Q-form
1424 // Store vector reg, register offset, extend, scale, H-form
1425 // Store vector reg, register offset, extend, scale, S/D-form
1426 // Store vector reg, register offset, extend, scale, Q-form
1427 def : InstRW<[V2Write_StrHQ, ReadAdrBase],
1428              (instregex "^STR[BHSDQ]ro[WX]$")>;
1430 // Store vector pair, immed offset, S-form
1431 // Store vector pair, immed offset, D-form
1432 def : InstRW<[V2Write_2c_1L01_1V01], (instregex "^STN?P[SD]i$")>;
1434 // Store vector pair, immed offset, Q-form
1435 def : InstRW<[V2Write_2c_1L01_2V01], (instrs STPQi, STNPQi)>;
1437 // Store vector pair, immed post-index, S-form
1438 // Store vector pair, immed post-index, D-form
1439 // Store vector pair, immed pre-index, S-form
1440 // Store vector pair, immed pre-index, D-form
1441 def : InstRW<[WriteAdr, V2Write_2c_1L01_1V01_1I],
1442              (instregex "^STP[SD](pre|post)$")>;
1444 // Store vector pair, immed post-index, Q-form
1445 def : InstRW<[V2Write_2c_1L01_2V01_1I], (instrs STPQpost)>;
1447 // Store vector pair, immed pre-index, Q-form
1448 def : InstRW<[V2Write_2c_1L01_2V01_2I], (instrs STPQpre)>;
1450 // ASIMD integer instructions
1451 // -----------------------------------------------------------------------------
1453 // ASIMD absolute diff
1454 // ASIMD absolute diff long
1455 // ASIMD arith, basic
1456 // ASIMD arith, complex
1457 // ASIMD arith, pair-wise
1458 // ASIMD compare
1459 // ASIMD logical
1460 // ASIMD max/min, basic and pair-wise
1461 def : SchedAlias<WriteVd, V2Write_2c_1V>;
1462 def : SchedAlias<WriteVq, V2Write_2c_1V>;
1464 // ASIMD absolute diff accum
1465 // ASIMD absolute diff accum long
1466 def : InstRW<[V2Wr_VA, V2Rd_VA], (instregex "^[SU]ABAL?v")>;
1468 // ASIMD arith, reduce, 4H/4S
1469 def : InstRW<[V2Write_2c_1V13], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>;
1471 // ASIMD arith, reduce, 8B/8H
1472 def : InstRW<[V2Write_4c_1V13_1V],
1473              (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>;
1475 // ASIMD arith, reduce, 16B
1476 def : InstRW<[V2Write_4c_2V13], (instregex "^(ADDV|[SU]ADDLV)v16i8v$")>;
1478 // ASIMD dot product
1479 // ASIMD dot product using signed and unsigned integers
1480 def : InstRW<[V2Wr_VDOT, V2Rd_VDOT],
1481              (instregex "^([SU]|SU|US)DOT(lane)?(v8|v16)i8$")>;
1483 // ASIMD matrix multiply-accumulate
1484 def : InstRW<[V2Wr_VMMA, V2Rd_VMMA], (instrs SMMLA, UMMLA, USMMLA)>;
1486 // ASIMD max/min, reduce, 4H/4S
1487 def : InstRW<[V2Write_2c_1V13], (instregex "^[SU](MAX|MIN)Vv4i16v$",
1488                                            "^[SU](MAX|MIN)Vv4i32v$")>;
1490 // ASIMD max/min, reduce, 8B/8H
1491 def : InstRW<[V2Write_4c_1V13_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$",
1492                                               "^[SU](MAX|MIN)Vv8i16v$")>;
1494 // ASIMD max/min, reduce, 16B
1495 def : InstRW<[V2Write_4c_2V13], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
1497 // ASIMD multiply
1498 def : InstRW<[V2Write_4c_1V02], (instregex "^MULv", "^SQ(R)?DMULHv")>;
1500 // ASIMD multiply accumulate
1501 def : InstRW<[V2Wr_VMA, V2Rd_VMA], (instregex "^MLAv", "^MLSv")>;
1503 // ASIMD multiply accumulate high
1504 def : InstRW<[V2Wr_VMAH, V2Rd_VMAH], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>;
1506 // ASIMD multiply accumulate long
1507 def : InstRW<[V2Wr_VMAL, V2Rd_VMAL], (instregex "^[SU]MLALv", "^[SU]MLSLv")>;
1509 // ASIMD multiply accumulate saturating long
1510 def : InstRW<[V2Write_4c_1V02], (instregex "^SQDML[AS]L[iv]")>;
1512 // ASIMD multiply/multiply long (8x8) polynomial, D-form
1513 // ASIMD multiply/multiply long (8x8) polynomial, Q-form
1514 def : InstRW<[V2Write_3c_1V23], (instregex "^PMULL?(v8i8|v16i8)$")>;
1516 // ASIMD multiply long
1517 def : InstRW<[V2Write_3c_1V02], (instregex "^[SU]MULLv", "^SQDMULL[iv]")>;
1519 // ASIMD pairwise add and accumulate long
1520 def : InstRW<[V2Wr_VPA, V2Rd_VPA], (instregex "^[SU]ADALPv")>;
1522 // ASIMD shift accumulate
1523 def : InstRW<[V2Wr_VSA, V2Rd_VSA], (instregex "^[SU]SRA[dv]", "^[SU]RSRA[dv]")>;
1525 // ASIMD shift by immed, basic
1526 def : InstRW<[V2Write_2c_1V13], (instregex "^SHL[dv]", "^SHLLv", "^SHRNv",
1527                                            "^SSHLLv", "^SSHR[dv]", "^USHLLv",
1528                                            "^USHR[dv]")>;
1530 // ASIMD shift by immed and insert, basic
1531 def : InstRW<[V2Write_2c_1V13], (instregex "^SLI[dv]", "^SRI[dv]")>;
1533 // ASIMD shift by immed, complex
1534 def : InstRW<[V2Write_4c_1V13],
1535              (instregex "^RSHRNv", "^SQRSHRU?N[bhsv]", "^(SQSHLU?|UQSHL)[bhsd]$",
1536                         "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
1537                         "^SQSHRU?N[bhsv]", "^SRSHR[dv]", "^UQRSHRN[bhsv]",
1538                         "^UQSHRN[bhsv]", "^URSHR[dv]")>;
1540 // ASIMD shift by register, basic
1541 def : InstRW<[V2Write_2c_1V13], (instregex "^[SU]SHLv")>;
1543 // ASIMD shift by register, complex
1544 def : InstRW<[V2Write_4c_1V13],
1545              (instregex "^[SU]RSHLv", "^[SU]QRSHLv",
1546                         "^[SU]QSHL(v1i8|v1i16|v1i32|v1i64|v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)$")>;
1548 // ASIMD floating-point instructions
1549 // -----------------------------------------------------------------------------
1551 // ASIMD FP absolute value/difference
1552 // ASIMD FP arith, normal
1553 // ASIMD FP compare
1554 // ASIMD FP complex add
1555 // ASIMD FP max/min, normal
1556 // ASIMD FP max/min, pairwise
1557 // ASIMD FP negate
1558 // Handled by SchedAlias<WriteV[dq], ...>
1560 // ASIMD FP complex multiply add
1561 def : InstRW<[V2Wr_VFCMA, V2Rd_VFCMA], (instregex "^FCMLAv")>;
1563 // ASIMD FP convert, long (F16 to F32)
1564 def : InstRW<[V2Write_4c_2V02], (instregex "^FCVTL(v4|v8)i16")>;
1566 // ASIMD FP convert, long (F32 to F64)
1567 def : InstRW<[V2Write_3c_1V02], (instregex "^FCVTL(v2|v4)i32")>;
1569 // ASIMD FP convert, narrow (F32 to F16)
1570 def : InstRW<[V2Write_4c_2V02], (instregex "^FCVTN(v4|v8)i16")>;
1572 // ASIMD FP convert, narrow (F64 to F32)
1573 def : InstRW<[V2Write_3c_1V02], (instregex "^FCVTN(v2|v4)i32",
1574                                              "^FCVTXN(v2|v4)f32")>;
1576 // ASIMD FP convert, other, D-form F32 and Q-form F64
1577 def : InstRW<[V2Write_3c_1V02], (instregex "^FCVT[AMNPZ][SU]v2f(32|64)$",
1578                                            "^FCVT[AMNPZ][SU]v2i(32|64)_shift$",
1579                                            "^FCVT[AMNPZ][SU]v1i64$",
1580                                            "^FCVTZ[SU]d$",
1581                                            "^[SU]CVTFv2f(32|64)$",
1582                                            "^[SU]CVTFv2i(32|64)_shift$",
1583                                            "^[SU]CVTFv1i64$",
1584                                            "^[SU]CVTFd$")>;
1586 // ASIMD FP convert, other, D-form F16 and Q-form F32
1587 def : InstRW<[V2Write_4c_2V02], (instregex "^FCVT[AMNPZ][SU]v4f(16|32)$",
1588                                            "^FCVT[AMNPZ][SU]v4i(16|32)_shift$",
1589                                            "^FCVT[AMNPZ][SU]v1i32$",
1590                                            "^FCVTZ[SU]s$",
1591                                            "^[SU]CVTFv4f(16|32)$",
1592                                            "^[SU]CVTFv4i(16|32)_shift$",
1593                                            "^[SU]CVTFv1i32$",
1594                                            "^[SU]CVTFs$")>;
1596 // ASIMD FP convert, other, Q-form F16
1597 def : InstRW<[V2Write_6c_4V02], (instregex "^FCVT[AMNPZ][SU]v8f16$",
1598                                            "^FCVT[AMNPZ][SU]v8i16_shift$",
1599                                            "^FCVT[AMNPZ][SU]v1f16$",
1600                                            "^FCVTZ[SU]h$",
1601                                            "^[SU]CVTFv8f16$",
1602                                            "^[SU]CVTFv8i16_shift$",
1603                                            "^[SU]CVTFv1i16$",
1604                                            "^[SU]CVTFh$")>;
1606 // ASIMD FP divide, D-form, F16
1607 def : InstRW<[V2Write_7c_1V02_7rc], (instrs FDIVv4f16)>;
1609 // ASIMD FP divide, D-form, F32
1610 def : InstRW<[V2Write_10c_1V02_5rc], (instrs FDIVv2f32)>;
1612 // ASIMD FP divide, Q-form, F16
1613 def : InstRW<[V2Write_13c_1V02_13rc], (instrs FDIVv8f16)>;
1615 // ASIMD FP divide, Q-form, F32
1616 def : InstRW<[V2Write_10c_1V02_10rc], (instrs FDIVv4f32)>;
1618 // ASIMD FP divide, Q-form, F64
1619 def : InstRW<[V2Write_15c_1V02_14rc], (instrs FDIVv2f64)>;
1621 // ASIMD FP max/min, reduce, F32 and D-form F16
1622 def : InstRW<[V2Write_4c_2V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>;
1624 // ASIMD FP max/min, reduce, Q-form F16
1625 def : InstRW<[V2Write_6c_3V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>;
1627 // ASIMD FP multiply
1628 def : InstRW<[V2Wr_VFM], (instregex "^FMULv", "^FMULXv")>;
1630 // ASIMD FP multiply accumulate
1631 def : InstRW<[V2Wr_VFMA, V2Rd_VFMA], (instregex "^FMLAv", "^FMLSv")>;
1633 // ASIMD FP multiply accumulate long
1634 def : InstRW<[V2Wr_VFMAL, V2Rd_VFMAL], (instregex "^FML[AS]L2?(lane)?v")>;
1636 // ASIMD FP round, D-form F32 and Q-form F64
1637 def : InstRW<[V2Write_3c_1V02],
1638              (instregex "^FRINT[AIMNPXZ]v2f(32|64)$",
1639                         "^FRINT(32|64)[XZ]v2f(32|64)$")>;
1641 // ASIMD FP round, D-form F16 and Q-form F32
1642 def : InstRW<[V2Write_4c_2V02],
1643              (instregex "^FRINT[AIMNPXZ]v4f(16|32)$",
1644                         "^FRINT(32|64)[XZ]v4f32$")>;
1646 // ASIMD FP round, Q-form F16
1647 def : InstRW<[V2Write_6c_4V02], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
1649 // ASIMD FP square root, D-form, F16
1650 def : InstRW<[V2Write_7c_1V02_7rc], (instrs FSQRTv4f16)>;
1652 // ASIMD FP square root, D-form, F32
1653 def : InstRW<[V2Write_10c_1V02_5rc], (instrs FSQRTv2f32)>;
1655 // ASIMD FP square root, Q-form, F16
1656 def : InstRW<[V2Write_13c_1V02_13rc], (instrs FSQRTv8f16)>;
1658 // ASIMD FP square root, Q-form, F32
1659 def : InstRW<[V2Write_10c_1V02_9rc], (instrs FSQRTv4f32)>;
1661 // ASIMD FP square root, Q-form, F64
1662 def : InstRW<[V2Write_16c_1V02_15rc], (instrs FSQRTv2f64)>;
1664 // ASIMD BFloat16 (BF16) instructions
1665 // -----------------------------------------------------------------------------
1667 // ASIMD convert, F32 to BF16
1668 def : InstRW<[V2Write_4c_2V02], (instrs BFCVTN, BFCVTN2)>;
1670 // ASIMD dot product
1671 def : InstRW<[V2Wr_VBFDOT, V2Rd_VBFDOT], (instrs BFDOTv4bf16, BFDOTv8bf16)>;
1673 // ASIMD matrix multiply accumulate
1674 def : InstRW<[V2Wr_VBFMMA, V2Rd_VBFMMA], (instrs BFMMLA)>;
1676 // ASIMD multiply accumulate long
1677 def : InstRW<[V2Wr_VBFMAL, V2Rd_VBFMAL], (instrs BFMLALB, BFMLALBIdx, BFMLALT,
1678                                                  BFMLALTIdx)>;
1680 // Scalar convert, F32 to BF16
1681 def : InstRW<[V2Write_3c_1V02], (instrs BFCVT)>;
1683 // ASIMD miscellaneous instructions
1684 // -----------------------------------------------------------------------------
1686 // ASIMD bit reverse
1687 // ASIMD bitwise insert
1688 // ASIMD count
1689 // ASIMD duplicate, element
1690 // ASIMD extract
1691 // ASIMD extract narrow
1692 // ASIMD insert, element to element
1693 // ASIMD move, FP immed
1694 // ASIMD move, integer immed
1695 // ASIMD reverse
1696 // ASIMD table lookup extension, 1 table reg
1697 // ASIMD transpose
1698 // ASIMD unzip/zip
1699 // Handled by SchedAlias<WriteV[dq], ...>
1700 def : InstRW<[V2Write_0or2c_1V], (instrs MOVID, MOVIv2d_ns)>;
1702 // ASIMD duplicate, gen reg
1703 def : InstRW<[V2Write_3c_1M0], (instregex "^DUPv.+gpr")>;
1705 // ASIMD extract narrow, saturating
1706 def : InstRW<[V2Write_4c_1V13], (instregex "^[SU]QXTNv", "^SQXTUNv")>;
1708 // ASIMD reciprocal and square root estimate, D-form U32
1709 def : InstRW<[V2Write_3c_1V02], (instrs URECPEv2i32, URSQRTEv2i32)>;
1711 // ASIMD reciprocal and square root estimate, Q-form U32
1712 def : InstRW<[V2Write_4c_2V02], (instrs URECPEv4i32, URSQRTEv4i32)>;
1714 // ASIMD reciprocal and square root estimate, D-form F32 and scalar forms
1715 def : InstRW<[V2Write_3c_1V02], (instrs FRECPEv1f16, FRECPEv1i32,
1716                                         FRECPEv1i64, FRECPEv2f32,
1717                                         FRSQRTEv1f16, FRSQRTEv1i32,
1718                                         FRSQRTEv1i64, FRSQRTEv2f32)>;
1720 // ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32
1721 def : InstRW<[V2Write_4c_2V02], (instrs FRECPEv4f16, FRECPEv4f32,
1722                                         FRSQRTEv4f16, FRSQRTEv4f32)>;
1724 // ASIMD reciprocal and square root estimate, Q-form F16
1725 def : InstRW<[V2Write_6c_4V02], (instrs FRECPEv8f16, FRSQRTEv8f16)>;
1727 // ASIMD reciprocal exponent
1728 def : InstRW<[V2Write_3c_1V02], (instregex "^FRECPXv")>;
1730 // ASIMD reciprocal step
1731 def : InstRW<[V2Write_4c_1V], (instregex "^FRECPS(32|64|v)",
1732                                          "^FRSQRTS(32|64|v)")>;
1734 // ASIMD table lookup, 1 or 2 table regs
1735 def : InstRW<[V2Write_2c_1V01], (instrs TBLv8i8One, TBLv16i8One,
1736                                         TBLv8i8Two, TBLv16i8Two)>;
1738 // ASIMD table lookup, 3 table regs
1739 def : InstRW<[V2Write_4c_2V01], (instrs TBLv8i8Three, TBLv16i8Three)>;
1741 // ASIMD table lookup, 4 table regs
1742 def : InstRW<[V2Write_4c_3V01], (instrs TBLv8i8Four, TBLv16i8Four)>;
1744 // ASIMD table lookup extension, 2 table reg
1745 def : InstRW<[V2Write_4c_2V], (instrs TBXv8i8Two, TBXv16i8Two)>;
1747 // ASIMD table lookup extension, 3 table reg
1748 def : InstRW<[V2Write_6c_3V], (instrs TBXv8i8Three, TBXv16i8Three)>;
1750 // ASIMD table lookup extension, 4 table reg
1751 def : InstRW<[V2Write_6c_5V], (instrs TBXv8i8Four, TBXv16i8Four)>;
1753 // ASIMD transfer, element to gen reg
1754 def : InstRW<[V2Write_2c_2V01], (instregex "^[SU]MOVv")>;
1756 // ASIMD transfer, gen reg to element
1757 def : InstRW<[V2Write_5c_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
1759 // ASIMD load instructions
1760 // -----------------------------------------------------------------------------
1762 // ASIMD load, 1 element, multiple, 1 reg, D-form
1763 def : InstRW<[V2Write_6c_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>;
1764 def : InstRW<[WriteAdr, V2Write_6c_1L],
1765              (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>;
1767 // ASIMD load, 1 element, multiple, 1 reg, Q-form
1768 def : InstRW<[V2Write_6c_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>;
1769 def : InstRW<[WriteAdr, V2Write_6c_1L],
1770              (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>;
1772 // ASIMD load, 1 element, multiple, 2 reg, D-form
1773 def : InstRW<[V2Write_6c_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
1774 def : InstRW<[WriteAdr, V2Write_6c_2L],
1775              (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
1777 // ASIMD load, 1 element, multiple, 2 reg, Q-form
1778 def : InstRW<[V2Write_6c_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
1779 def : InstRW<[WriteAdr, V2Write_6c_2L],
1780              (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
1782 // ASIMD load, 1 element, multiple, 3 reg, D-form
1783 def : InstRW<[V2Write_6c_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
1784 def : InstRW<[WriteAdr, V2Write_6c_3L],
1785              (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
1787 // ASIMD load, 1 element, multiple, 3 reg, Q-form
1788 def : InstRW<[V2Write_6c_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
1789 def : InstRW<[WriteAdr, V2Write_6c_3L],
1790              (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
1792 // ASIMD load, 1 element, multiple, 4 reg, D-form
1793 def : InstRW<[V2Write_7c_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
1794 def : InstRW<[WriteAdr, V2Write_7c_4L],
1795              (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
1797 // ASIMD load, 1 element, multiple, 4 reg, Q-form
1798 def : InstRW<[V2Write_7c_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
1799 def : InstRW<[WriteAdr, V2Write_7c_4L],
1800              (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
1802 // ASIMD load, 1 element, one lane, B/H/S
1803 // ASIMD load, 1 element, one lane, D
1804 def : InstRW<[V2Write_8c_1L_1V],           (instregex "LD1i(8|16|32|64)$")>;
1805 def : InstRW<[WriteAdr, V2Write_8c_1L_1V], (instregex "LD1i(8|16|32|64)_POST$")>;
1807 // ASIMD load, 1 element, all lanes, D-form, B/H/S
1808 // ASIMD load, 1 element, all lanes, D-form, D
1809 def : InstRW<[V2Write_8c_1L_1V],           (instregex "LD1Rv(8b|4h|2s|1d)$")>;
1810 def : InstRW<[WriteAdr, V2Write_8c_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>;
1812 // ASIMD load, 1 element, all lanes, Q-form
1813 def : InstRW<[V2Write_8c_1L_1V],           (instregex "LD1Rv(16b|8h|4s|2d)$")>;
1814 def : InstRW<[WriteAdr, V2Write_8c_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
1816 // ASIMD load, 2 element, multiple, D-form, B/H/S
1817 def : InstRW<[V2Write_8c_1L_2V],           (instregex "LD2Twov(8b|4h|2s)$")>;
1818 def : InstRW<[WriteAdr, V2Write_8c_1L_2V], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
1820 // ASIMD load, 2 element, multiple, Q-form, B/H/S
1821 // ASIMD load, 2 element, multiple, Q-form, D
1822 def : InstRW<[V2Write_8c_2L_2V],           (instregex "LD2Twov(16b|8h|4s|2d)$")>;
1823 def : InstRW<[WriteAdr, V2Write_8c_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
1825 // ASIMD load, 2 element, one lane, B/H
1826 // ASIMD load, 2 element, one lane, S
1827 // ASIMD load, 2 element, one lane, D
1828 def : InstRW<[V2Write_8c_1L_2V],           (instregex "LD2i(8|16|32|64)$")>;
1829 def : InstRW<[WriteAdr, V2Write_8c_1L_2V], (instregex "LD2i(8|16|32|64)_POST$")>;
1831 // ASIMD load, 2 element, all lanes, D-form, B/H/S
1832 // ASIMD load, 2 element, all lanes, D-form, D
1833 def : InstRW<[V2Write_8c_1L_2V],            (instregex "LD2Rv(8b|4h|2s|1d)$")>;
1834 def : InstRW<[WriteAdr, V2Write_8c_1L_2V],  (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>;
1836 // ASIMD load, 2 element, all lanes, Q-form
1837 def : InstRW<[V2Write_8c_1L_2V],           (instregex "LD2Rv(16b|8h|4s|2d)$")>;
1838 def : InstRW<[WriteAdr, V2Write_8c_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
1840 // ASIMD load, 3 element, multiple, D-form, B/H/S
1841 def : InstRW<[V2Write_8c_2L_3V],           (instregex "LD3Threev(8b|4h|2s)$")>;
1842 def : InstRW<[WriteAdr, V2Write_8c_2L_3V], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
1844 // ASIMD load, 3 element, multiple, Q-form, B/H/S
1845 // ASIMD load, 3 element, multiple, Q-form, D
1846 def : InstRW<[V2Write_8c_3L_3V],           (instregex "LD3Threev(16b|8h|4s|2d)$")>;
1847 def : InstRW<[WriteAdr, V2Write_8c_3L_3V], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
1849 // ASIMD load, 3 element, one lane, B/H
1850 // ASIMD load, 3 element, one lane, S
1851 // ASIMD load, 3 element, one lane, D
1852 def : InstRW<[V2Write_8c_2L_3V],           (instregex "LD3i(8|16|32|64)$")>;
1853 def : InstRW<[WriteAdr, V2Write_8c_2L_3V], (instregex "LD3i(8|16|32|64)_POST$")>;
1855 // ASIMD load, 3 element, all lanes, D-form, B/H/S
1856 // ASIMD load, 3 element, all lanes, D-form, D
1857 def : InstRW<[V2Write_8c_2L_3V],           (instregex "LD3Rv(8b|4h|2s|1d)$")>;
1858 def : InstRW<[WriteAdr, V2Write_8c_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>;
1860 // ASIMD load, 3 element, all lanes, Q-form, B/H/S
1861 // ASIMD load, 3 element, all lanes, Q-form, D
1862 def : InstRW<[V2Write_8c_3L_3V],           (instregex "LD3Rv(16b|8h|4s|2d)$")>;
1863 def : InstRW<[WriteAdr, V2Write_8c_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>;
1865 // ASIMD load, 4 element, multiple, D-form, B/H/S
1866 def : InstRW<[V2Write_8c_3L_4V],           (instregex "LD4Fourv(8b|4h|2s)$")>;
1867 def : InstRW<[WriteAdr, V2Write_8c_3L_4V], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
1869 // ASIMD load, 4 element, multiple, Q-form, B/H/S
1870 // ASIMD load, 4 element, multiple, Q-form, D
1871 def : InstRW<[V2Write_9c_6L_4V],           (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
1872 def : InstRW<[WriteAdr, V2Write_9c_6L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
1874 // ASIMD load, 4 element, one lane, B/H
1875 // ASIMD load, 4 element, one lane, S
1876 // ASIMD load, 4 element, one lane, D
1877 def : InstRW<[V2Write_8c_3L_4V],           (instregex "LD4i(8|16|32|64)$")>;
1878 def : InstRW<[WriteAdr, V2Write_8c_3L_4V], (instregex "LD4i(8|16|32|64)_POST$")>;
1880 // ASIMD load, 4 element, all lanes, D-form, B/H/S
1881 // ASIMD load, 4 element, all lanes, D-form, D
1882 def : InstRW<[V2Write_8c_3L_4V],           (instregex "LD4Rv(8b|4h|2s|1d)$")>;
1883 def : InstRW<[WriteAdr, V2Write_8c_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>;
1885 // ASIMD load, 4 element, all lanes, Q-form, B/H/S
1886 // ASIMD load, 4 element, all lanes, Q-form, D
1887 def : InstRW<[V2Write_8c_4L_4V],           (instregex "LD4Rv(16b|8h|4s|2d)$")>;
1888 def : InstRW<[WriteAdr, V2Write_8c_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>;
1890 // ASIMD store instructions
1891 // -----------------------------------------------------------------------------
1893 // ASIMD store, 1 element, multiple, 1 reg, D-form
1894 def : InstRW<[V2Write_2c_1L01_1V01],           (instregex "ST1Onev(8b|4h|2s|1d)$")>;
1895 def : InstRW<[WriteAdr, V2Write_2c_1L01_1V01], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
1897 // ASIMD store, 1 element, multiple, 1 reg, Q-form
1898 def : InstRW<[V2Write_2c_1L01_1V01],           (instregex "ST1Onev(16b|8h|4s|2d)$")>;
1899 def : InstRW<[WriteAdr, V2Write_2c_1L01_1V01], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
1901 // ASIMD store, 1 element, multiple, 2 reg, D-form
1902 def : InstRW<[V2Write_2c_1L01_1V01],           (instregex "ST1Twov(8b|4h|2s|1d)$")>;
1903 def : InstRW<[WriteAdr, V2Write_2c_1L01_1V01], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
1905 // ASIMD store, 1 element, multiple, 2 reg, Q-form
1906 def : InstRW<[V2Write_2c_2L01_2V01],           (instregex "ST1Twov(16b|8h|4s|2d)$")>;
1907 def : InstRW<[WriteAdr, V2Write_2c_2L01_2V01], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
1909 // ASIMD store, 1 element, multiple, 3 reg, D-form
1910 def : InstRW<[V2Write_2c_2L01_2V01],           (instregex "ST1Threev(8b|4h|2s|1d)$")>;
1911 def : InstRW<[WriteAdr, V2Write_2c_2L01_2V01], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
1913 // ASIMD store, 1 element, multiple, 3 reg, Q-form
1914 def : InstRW<[V2Write_2c_3L01_3V01],           (instregex "ST1Threev(16b|8h|4s|2d)$")>;
1915 def : InstRW<[WriteAdr, V2Write_2c_3L01_3V01], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
1917 // ASIMD store, 1 element, multiple, 4 reg, D-form
1918 def : InstRW<[V2Write_2c_2L01_2V01],           (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
1919 def : InstRW<[WriteAdr, V2Write_2c_2L01_2V01], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
1921 // ASIMD store, 1 element, multiple, 4 reg, Q-form
1922 def : InstRW<[V2Write_2c_4L01_4V01],           (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
1923 def : InstRW<[WriteAdr, V2Write_2c_4L01_4V01], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
1925 // ASIMD store, 1 element, one lane, B/H/S
1926 // ASIMD store, 1 element, one lane, D
1927 def : InstRW<[V2Write_4c_1L01_2V01],           (instregex "ST1i(8|16|32|64)$")>;
1928 def : InstRW<[WriteAdr, V2Write_4c_1L01_2V01], (instregex "ST1i(8|16|32|64)_POST$")>;
1930 // ASIMD store, 2 element, multiple, D-form, B/H/S
1931 def : InstRW<[V2Write_4c_1L01_2V01],           (instregex "ST2Twov(8b|4h|2s)$")>;
1932 def : InstRW<[WriteAdr, V2Write_4c_1L01_2V01], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
1934 // ASIMD store, 2 element, multiple, Q-form, B/H/S
1935 // ASIMD store, 2 element, multiple, Q-form, D
1936 def : InstRW<[V2Write_4c_2L01_4V01],           (instregex "ST2Twov(16b|8h|4s|2d)$")>;
1937 def : InstRW<[WriteAdr, V2Write_4c_2L01_4V01], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
1939 // ASIMD store, 2 element, one lane, B/H/S
1940 // ASIMD store, 2 element, one lane, D
1941 def : InstRW<[V2Write_4c_1L01_2V01],           (instregex "ST2i(8|16|32|64)$")>;
1942 def : InstRW<[WriteAdr, V2Write_4c_1L01_2V01], (instregex "ST2i(8|16|32|64)_POST$")>;
1944 // ASIMD store, 3 element, multiple, D-form, B/H/S
1945 def : InstRW<[V2Write_5c_2L01_4V01],           (instregex "ST3Threev(8b|4h|2s)$")>;
1946 def : InstRW<[WriteAdr, V2Write_5c_2L01_4V01], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
1948 // ASIMD store, 3 element, multiple, Q-form, B/H/S
1949 // ASIMD store, 3 element, multiple, Q-form, D
1950 def : InstRW<[V2Write_6c_3L01_6V01],           (instregex "ST3Threev(16b|8h|4s|2d)$")>;
1951 def : InstRW<[WriteAdr, V2Write_6c_3L01_6V01], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
1953 // ASIMD store, 3 element, one lane, B/H
1954 // ASIMD store, 3 element, one lane, S
1955 // ASIMD store, 3 element, one lane, D
1956 def : InstRW<[V2Write_5c_2L01_4V01],           (instregex "ST3i(8|16|32|64)$")>;
1957 def : InstRW<[WriteAdr, V2Write_5c_2L01_4V01], (instregex "ST3i(8|16|32|64)_POST$")>;
1959 // ASIMD store, 4 element, multiple, D-form, B/H/S
1960 def : InstRW<[V2Write_6c_2L01_6V01],           (instregex "ST4Fourv(8b|4h|2s)$")>;
1961 def : InstRW<[WriteAdr, V2Write_6c_2L01_6V01], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
1963 // ASIMD store, 4 element, multiple, Q-form, B/H/S
1964 def : InstRW<[V2Write_7c_4L01_12V01],           (instregex "ST4Fourv(16b|8h|4s)$")>;
1965 def : InstRW<[WriteAdr, V2Write_7c_4L01_12V01], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
1967 // ASIMD store, 4 element, multiple, Q-form, D
1968 def : InstRW<[V2Write_5c_4L01_8V01],           (instregex "ST4Fourv(2d)$")>;
1969 def : InstRW<[WriteAdr, V2Write_5c_4L01_8V01], (instregex "ST4Fourv(2d)_POST$")>;
1971 // ASIMD store, 4 element, one lane, B/H/S
1972 def : InstRW<[V2Write_6c_1L01_3V01],           (instregex "ST4i(8|16|32)$")>;
1973 def : InstRW<[WriteAdr, V2Write_6c_1L01_3V01], (instregex "ST4i(8|16|32)_POST$")>;
1975 // ASIMD store, 4 element, one lane, D
1976 def : InstRW<[V2Write_4c_2L01_4V01],            (instregex "ST4i(64)$")>;
1977 def : InstRW<[WriteAdr, V2Write_4c_2L01_4V01],  (instregex "ST4i(64)_POST$")>;
1979 // Cryptography extensions
1980 // -----------------------------------------------------------------------------
1982 // Crypto AES ops
1983 def : InstRW<[V2Write_2c_1V], (instregex "^AES[DE]rr$", "^AESI?MCrr")>;
1985 // Crypto polynomial (64x64) multiply long
1986 def : InstRW<[V2Write_2c_1V], (instrs PMULLv1i64, PMULLv2i64)>;
1988 // Crypto SHA1 hash acceleration op
1989 // Crypto SHA1 schedule acceleration ops
1990 def : InstRW<[V2Write_2c_1V0], (instregex "^SHA1(H|SU0|SU1)")>;
1992 // Crypto SHA1 hash acceleration ops
1993 // Crypto SHA256 hash acceleration ops
1994 def : InstRW<[V2Write_4c_1V0], (instregex "^SHA1[CMP]", "^SHA256H2?")>;
1996 // Crypto SHA256 schedule acceleration ops
1997 def : InstRW<[V2Write_2c_1V0], (instregex "^SHA256SU[01]")>;
1999 // Crypto SHA512 hash acceleration ops
2000 def : InstRW<[V2Write_2c_1V0], (instregex "^SHA512(H|H2|SU0|SU1)")>;
2002 // Crypto SHA3 ops
2003 def : InstRW<[V2Write_2c_1V0], (instrs BCAX, EOR3, RAX1, XAR)>;
2005 // Crypto SM3 ops
2006 def : InstRW<[V2Write_2c_1V0], (instregex "^SM3PARTW[12]$", "^SM3SS1$",
2007                                           "^SM3TT[12][AB]$")>;
2009 // Crypto SM4 ops
2010 def : InstRW<[V2Write_4c_1V0], (instrs SM4E, SM4ENCKEY)>;
2012 // CRC
2013 // -----------------------------------------------------------------------------
2015 def : InstRW<[V2Wr_CRC, V2Rd_CRC], (instregex "^CRC32")>;
2017 // SVE Predicate instructions
2018 // -----------------------------------------------------------------------------
2020 // Loop control, based on predicate
2021 def : InstRW<[V2Write_2or3c_1M], (instrs BRKA_PPmP, BRKA_PPzP,
2022                                          BRKB_PPmP, BRKB_PPzP)>;
2024 // Loop control, based on predicate and flag setting
2025 def : InstRW<[V2Write_3or4c_2M], (instrs BRKAS_PPzP, BRKBS_PPzP)>;
2027 // Loop control, propagating
2028 def : InstRW<[V2Write_2or3c_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP,
2029                                           BRKPB_PPzPP)>;
2031 // Loop control, propagating and flag setting
2032 def : InstRW<[V2Write_3or4c_1M0_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP,
2033                                              BRKPBS_PPzPP)>;
2035 // Loop control, based on GPR
2036 def : InstRW<[V2Write_3c_2M],
2037              (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]")>;
2038 def : InstRW<[V2Write_3c_2M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]")>;
2040 // Loop terminate
2041 def : InstRW<[V2Write_1c_2M], (instregex "^CTERM(EQ|NE)_(WW|XX)")>;
2043 // Predicate counting scalar
2044 def : InstRW<[V2Write_2c_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>;
2045 def : InstRW<[V2Write_2c_1M],
2046              (instregex "^(CNT|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI",
2047                         "^SQ(DEC|INC)[BHWD]_XPiWdI",
2048                         "^UQ(DEC|INC)[BHWD]_WPiI")>;
2050 // Predicate counting scalar, ALL, {1,2,4}
2051 def : InstRW<[V2Write_IncDec], (instregex "^(DEC|INC)[BHWD]_XPiI")>;
2053 // Predicate counting scalar, active predicate
2054 def : InstRW<[V2Write_2c_1M],
2055              (instregex "^CNTP_XPP_[BHSD]",
2056                         "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]",
2057                         "^(UQDEC|UQINC)P_WP_[BHSD]",
2058                         "^(SQDEC|SQINC)P_XPWd_[BHSD]")>;
2060 // Predicate counting vector, active predicate
2061 def : InstRW<[V2Write_7c_1M_1M0_1V],
2062              (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]")>;
2064 // Predicate logical
2065 def : InstRW<[V2Write_1or2c_1M0],
2066              (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP")>;
2068 // Predicate logical, flag setting
2069 def : InstRW<[V2Write_1or2c_1M0_1M],
2070              (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP")>;
2072 // Predicate reverse
2073 def : InstRW<[V2Write_2c_1M], (instregex "^REV_PP_[BHSD]")>;
2075 // Predicate select
2076 def : InstRW<[V2Write_1c_1M0], (instrs SEL_PPPP)>;
2078 // Predicate set
2079 def : InstRW<[V2Write_2c_1M], (instregex "^PFALSE", "^PTRUE_[BHSD]")>;
2081 // Predicate set/initialize, set flags
2082 def : InstRW<[V2Write_3c_2M], (instregex "^PTRUES_[BHSD]")>;
2084 // Predicate find first/next
2085 def : InstRW<[V2Write_2c_1M], (instregex "^PFIRST_B", "^PNEXT_[BHSD]")>;
2087 // Predicate test
2088 def : InstRW<[V2Write_1c_1M], (instrs PTEST_PP)>;
2090 // Predicate transpose
2091 def : InstRW<[V2Write_2c_1M], (instregex "^TRN[12]_PPP_[BHSD]")>;
2093 // Predicate unpack and widen
2094 def : InstRW<[V2Write_2c_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>;
2096 // Predicate zip/unzip
2097 def : InstRW<[V2Write_2c_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSD]")>;
2099 // SVE integer instructions
2100 // -----------------------------------------------------------------------------
2102 // Arithmetic, absolute diff
2103 def : InstRW<[V2Write_2c_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]",
2104                                          "^[SU]ABD_ZPZZ_[BHSD]")>;
2106 // Arithmetic, absolute diff accum
2107 def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABA_ZZZ_[BHSD]")>;
2109 // Arithmetic, absolute diff accum long
2110 def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]")>;
2112 // Arithmetic, absolute diff long
2113 def : InstRW<[V2Write_2c_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]")>;
2115 // Arithmetic, basic
2116 def : InstRW<[V2Write_2c_1V],
2117              (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
2118                         "^(ADD|SUB)_ZZZ_[BHSD]",
2119                         "^(ADD|SUB|SUBR)_ZPZZ_[BHSD]",
2120                         "^(ADD|SUB|SUBR)_ZI_[BHSD]",
2121                         "^ADR_[SU]XTW_ZZZ_D_[0123]",
2122                         "^ADR_LSL_ZZZ_[SD]_[0123]",
2123                         "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]",
2124                         "^SADDLBT_ZZZ_[HSD]",
2125                         "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]",
2126                         "^SSUBL(BT|TB)_ZZZ_[HSD]")>;
2128 // Arithmetic, complex
2129 def : InstRW<[V2Write_2c_1V],
2130              (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]",
2131                         "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
2132                         "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]",
2133                         "^[SU]Q(ADD|SUB)_ZI_[BHSD]",
2134                         "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]",
2135                         "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]")>;
2137 // Arithmetic, large integer
2138 def : InstRW<[V2Write_2c_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]")>;
2140 // Arithmetic, pairwise add
2141 def : InstRW<[V2Write_2c_1V], (instregex "^ADDP_ZPmZ_[BHSD]")>;
2143 // Arithmetic, pairwise add and accum long
2144 def : InstRW<[V2Wr_ZPA, ReadDefault, V2Rd_ZPA],
2145              (instregex "^[SU]ADALP_ZPmZ_[HSD]")>;
2147 // Arithmetic, shift
2148 def : InstRW<[V2Write_2c_1V13],
2149              (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]",
2150                         "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]",
2151                         "^(ASR|LSL|LSR)_ZPmI_[BHSD]",
2152                         "^(ASR|LSL|LSR)_ZPmZ_[BHSD]",
2153                         "^(ASR|LSL|LSR)_ZZI_[BHSD]",
2154                         "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]",
2155                         "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>;
2157 // Arithmetic, shift and accumulate
2158 def : InstRW<[V2Wr_ZSA, V2Rd_ZSA], (instregex "^[SU]R?SRA_ZZI_[BHSD]")>;
2160 // Arithmetic, shift by immediate
2161 def : InstRW<[V2Write_2c_1V13], (instregex "^SHRN[BT]_ZZI_[BHS]",
2162                                            "^[SU]SHLL[BT]_ZZI_[HSD]")>;
2164 // Arithmetic, shift by immediate and insert
2165 def : InstRW<[V2Write_2c_1V13], (instregex "^(SLI|SRI)_ZZI_[BHSD]")>;
2167 // Arithmetic, shift complex
2168 def : InstRW<[V2Write_4c_1V13],
2169              (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]",
2170                         "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]",
2171                         "^[SU]QR?SHL_ZPZZ_[BHSD]",
2172                         "^(SQSHL|SQSHLU|UQSHL)_(ZPmI|ZPZI)_[BHSD]",
2173                         "^SQSHRU?N[BT]_ZZI_[BHS]",
2174                         "^UQR?SHRN[BT]_ZZI_[BHS]")>;
2176 // Arithmetic, shift right for divide
2177 def : InstRW<[V2Write_4c_1V13], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>;
2179 // Arithmetic, shift rounding
2180 def : InstRW<[V2Write_4c_1V13], (instregex "^[SU]RSHLR?_ZPmZ_[BHSD]",
2181                                            "^[SU]RSHL_ZPZZ_[BHSD]",
2182                                            "^[SU]RSHR_(ZPmI|ZPZI)_[BHSD]")>;
2184 // Bit manipulation
2185 def : InstRW<[V2Write_6c_2V1], (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]")>;
2187 // Bitwise select
2188 def : InstRW<[V2Write_2c_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ")>;
2190 // Count/reverse bits
2191 def : InstRW<[V2Write_2c_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>;
2193 // Broadcast logical bitmask immediate to vector
2194 def : InstRW<[V2Write_2c_1V], (instrs DUPM_ZI)>;
2196 // Compare and set flags
2197 def : InstRW<[V2Write_4or5c_1V0_1M0],
2198              (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]",
2199                         "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]")>;
2201 // Complex add
2202 def : InstRW<[V2Write_2c_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]")>;
2204 // Complex dot product 8-bit element
2205 def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>;
2207 // Complex dot product 16-bit element
2208 def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>;
2210 // Complex multiply-add B, H, S element size
2211 def : InstRW<[V2Wr_ZCMABHS, V2Rd_ZCMABHS], (instregex "^CMLA_ZZZ_[BHS]",
2212                                                       "^CMLA_ZZZI_[HS]")>;
2214 // Complex multiply-add D element size
2215 def : InstRW<[V2Wr_ZCMAD, V2Rd_ZCMAD], (instrs CMLA_ZZZ_D)>;
2217 // Conditional extract operations, scalar form
2218 def : InstRW<[V2Write_8c_1M0_1V01], (instregex "^CLAST[AB]_RPZ_[BHSD]")>;
2220 // Conditional extract operations, SIMD&FP scalar and vector forms
2221 def : InstRW<[V2Write_3c_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]",
2222                                           "^COMPACT_ZPZ_[SD]",
2223                                           "^SPLICE_ZPZZ?_[BHSD]")>;
2225 // Convert to floating point, 64b to float or convert to double
2226 def : InstRW<[V2Write_3c_1V02], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]",
2227                                              "^[SU]CVTF_ZPmZ_StoD")>;
2229 // Convert to floating point, 32b to single or half
2230 def : InstRW<[V2Write_4c_2V02], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>;
2232 // Convert to floating point, 16b to half
2233 def : InstRW<[V2Write_6c_4V02], (instregex "^[SU]CVTF_ZPmZ_HtoH")>;
2235 // Copy, scalar
2236 def : InstRW<[V2Write_5c_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]")>;
2238 // Copy, scalar SIMD&FP or imm
2239 def : InstRW<[V2Write_2c_1V], (instregex "^CPY_ZPm[IV]_[BHSD]",
2240                                          "^CPY_ZPzI_[BHSD]")>;
2242 // Divides, 32 bit
2243 def : InstRW<[V2Write_12c_1V0], (instregex "^[SU]DIVR?_ZPmZ_S",
2244                                            "^[SU]DIV_ZPZZ_S")>;
2246 // Divides, 64 bit
2247 def : InstRW<[V2Write_20c_1V0], (instregex "^[SU]DIVR?_ZPmZ_D",
2248                                            "^[SU]DIV_ZPZZ_D")>;
2250 // Dot product, 8 bit
2251 def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_S")>;
2253 // Dot product, 8 bit, using signed and unsigned integers
2254 def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;
2256 // Dot product, 16 bit
2257 def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_D")>;
2259 // Duplicate, immediate and indexed form
2260 def : InstRW<[V2Write_2c_1V], (instregex "^DUP_ZI_[BHSD]",
2261                                          "^DUP_ZZI_[BHSDQ]")>;
2263 // Duplicate, scalar form
2264 def : InstRW<[V2Write_3c_1M0], (instregex "^DUP_ZR_[BHSD]")>;
2266 // Extend, sign or zero
2267 def : InstRW<[V2Write_2c_1V13], (instregex "^[SU]XTB_ZPmZ_[HSD]",
2268                                            "^[SU]XTH_ZPmZ_[SD]",
2269                                            "^[SU]XTW_ZPmZ_[D]")>;
2271 // Extract
2272 def : InstRW<[V2Write_2c_1V], (instrs EXT_ZZI, EXT_ZZI_B)>;
2274 // Extract narrow saturating
2275 def : InstRW<[V2Write_4c_1V13], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]",
2276                                            "^SQXTUN[BT]_ZZ_[BHS]")>;
2278 // Extract/insert operation, SIMD and FP scalar form
2279 def : InstRW<[V2Write_3c_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]",
2280                                           "^INSR_ZV_[BHSD]")>;
2282 // Extract/insert operation, scalar
2283 def : InstRW<[V2Write_6c_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]",
2284                                               "^INSR_ZR_[BHSD]")>;
2286 // Histogram operations
2287 def : InstRW<[V2Write_2c_1V], (instregex "^HISTCNT_ZPzZZ_[SD]",
2288                                          "^HISTSEG_ZZZ")>;
2290 // Horizontal operations, B, H, S form, immediate operands only
2291 def : InstRW<[V2Write_4c_1V02], (instregex "^INDEX_II_[BHS]")>;
2293 // Horizontal operations, B, H, S form, scalar, immediate operands/ scalar
2294 // operands only / immediate, scalar operands
2295 def : InstRW<[V2Write_7c_1M0_1V02], (instregex "^INDEX_(IR|RI|RR)_[BHS]")>;
2297 // Horizontal operations, D form, immediate operands only
2298 def : InstRW<[V2Write_5c_2V02], (instrs INDEX_II_D)>;
2300 // Horizontal operations, D form, scalar, immediate operands)/ scalar operands
2301 // only / immediate, scalar operands
2302 def : InstRW<[V2Write_8c_2M0_2V02], (instregex "^INDEX_(IR|RI|RR)_D")>;
2304 // Logical
2305 def : InstRW<[V2Write_2c_1V],
2306              (instregex "^(AND|EOR|ORR)_ZI",
2307                         "^(AND|BIC|EOR|ORR)_ZZZ",
2308                         "^EOR(BT|TB)_ZZZ_[BHSD]",
2309                         "^(AND|BIC|EOR|NOT|ORR)_(ZPmZ|ZPZZ)_[BHSD]",
2310                         "^NOT_ZPmZ_[BHSD]")>;
2312 // Max/min, basic and pairwise
2313 def : InstRW<[V2Write_2c_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]",
2314                                          "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]",
2315                                          "^[SU](MAX|MIN)_ZPZZ_[BHSD]")>;
2317 // Matching operations
2318 // FIXME: SOG p. 44, n. 5: If the consuming instruction has a flag source, the
2319 // latency for this instruction is 4 cycles.
2320 def : InstRW<[V2Write_2or3c_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]")>;
2322 // Matrix multiply-accumulate
2323 def : InstRW<[V2Wr_ZMMA, V2Rd_ZMMA], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
2325 // Move prefix
2326 def : InstRW<[V2Write_2c_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]",
2327                                          "^MOVPRFX_ZZ")>;
2329 // Multiply, B, H, S element size
2330 def : InstRW<[V2Write_4c_1V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]",
2331                                            "^MUL_ZPZZ_[BHS]",
2332                                            "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]",
2333                                            "^[SU]MULH_ZPZZ_[BHS]")>;
2335 // Multiply, D element size
2336 def : InstRW<[V2Write_5c_2V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D",
2337                                            "^MUL_ZPZZ_D",
2338                                            "^[SU]MULH_(ZPmZ|ZZZ)_D",
2339                                            "^[SU]MULH_ZPZZ_D")>;
2341 // Multiply long
2342 def : InstRW<[V2Write_4c_1V02], (instregex "^[SU]MULL[BT]_ZZZI_[SD]",
2343                                            "^[SU]MULL[BT]_ZZZ_[HSD]")>;
2345 // Multiply accumulate, B, H, S element size
2346 def : InstRW<[V2Wr_ZMABHS, V2Rd_ZMABHS],
2347              (instregex "^ML[AS]_ZZZI_[HS]", "^ML[AS]_ZPZZZ_[BHS]")>;
2348 def : InstRW<[V2Wr_ZMABHS, ReadDefault, V2Rd_ZMABHS],
2349              (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>;
2351 // Multiply accumulate, D element size
2352 def : InstRW<[V2Wr_ZMAD, V2Rd_ZMAD],
2353              (instregex "^ML[AS]_ZZZI_D", "^ML[AS]_ZPZZZ_D")>;
2354 def : InstRW<[V2Wr_ZMAD, ReadDefault, V2Rd_ZMAD],
2355              (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_D")>;
2357 // Multiply accumulate long
2358 def : InstRW<[V2Wr_ZMAL, V2Rd_ZMAL], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]",
2359                                                 "^[SU]ML[AS]L[BT]_ZZZI_[SD]")>;
2361 // Multiply accumulate saturating doubling long regular
2362 def : InstRW<[V2Wr_ZMASQL, V2Rd_ZMASQ],
2363              (instregex "^SQDML[AS]L(B|T|BT)_ZZZ_[HSD]",
2364                         "^SQDML[AS]L[BT]_ZZZI_[SD]")>;
2366 // Multiply saturating doubling high, B, H, S element size
2367 def : InstRW<[V2Write_4c_1V02], (instregex "^SQDMULH_ZZZ_[BHS]",
2368                                            "^SQDMULH_ZZZI_[HS]")>;
2370 // Multiply saturating doubling high, D element size
2371 def : InstRW<[V2Write_5c_2V02], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>;
2373 // Multiply saturating doubling long
2374 def : InstRW<[V2Write_4c_1V02], (instregex "^SQDMULL[BT]_ZZZ_[HSD]",
2375                                            "^SQDMULL[BT]_ZZZI_[SD]")>;
2377 // Multiply saturating rounding doubling regular/complex accumulate, B, H, S
2378 // element size
2379 def : InstRW<[V2Wr_ZMASQBHS, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZ_[BHS]",
2380                                                      "^SQRDCMLAH_ZZZ_[BHS]",
2381                                                      "^SQRDML[AS]H_ZZZI_[HS]",
2382                                                      "^SQRDCMLAH_ZZZI_[HS]")>;
2384 // Multiply saturating rounding doubling regular/complex accumulate, D element
2385 // size
2386 def : InstRW<[V2Wr_ZMASQD, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZI?_D",
2387                                                    "^SQRDCMLAH_ZZZ_D")>;
2389 // Multiply saturating rounding doubling regular/complex, B, H, S element size
2390 def : InstRW<[V2Write_4c_1V02], (instregex "^SQRDMULH_ZZZ_[BHS]",
2391                                            "^SQRDMULH_ZZZI_[HS]")>;
2393 // Multiply saturating rounding doubling regular/complex, D element size
2394 def : InstRW<[V2Write_5c_2V02], (instregex "^SQRDMULH_ZZZI?_D")>;
2396 // Multiply/multiply long, (8x8) polynomial
2397 def : InstRW<[V2Write_2c_1V23], (instregex "^PMUL_ZZZ_B",
2398                                            "^PMULL[BT]_ZZZ_[HDQ]")>;
2400 // Predicate counting vector
2401 def : InstRW<[V2Write_2c_1V], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI")>;
2403 // Reciprocal estimate
2404 def : InstRW<[V2Write_4c_2V02], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>;
2406 // Reduction, arithmetic, B form
2407 def : InstRW<[V2Write_9c_2V_4V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
2409 // Reduction, arithmetic, H form
2410 def : InstRW<[V2Write_8c_2V_2V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>;
2412 // Reduction, arithmetic, S form
2413 def : InstRW<[V2Write_6c_2V_2V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>;
2415 // Reduction, arithmetic, D form
2416 def : InstRW<[V2Write_4c_2V], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
2418 // Reduction, logical
2419 def : InstRW<[V2Write_6c_1V_1V13], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]")>;
2421 // Reverse, vector
2422 def : InstRW<[V2Write_2c_1V], (instregex "^REV_ZZ_[BHSD]",
2423                                          "^REVB_ZPmZ_[HSD]",
2424                                          "^REVH_ZPmZ_[SD]",
2425                                          "^REVW_ZPmZ_D")>;
2427 // Select, vector form
2428 def : InstRW<[V2Write_2c_1V], (instregex "^SEL_ZPZZ_[BHSD]")>;
2430 // Table lookup
2431 def : InstRW<[V2Write_2c_1V], (instregex "^TBL_ZZZZ?_[BHSD]")>;
2433 // Table lookup extension
2434 def : InstRW<[V2Write_2c_1V], (instregex "^TBX_ZZZ_[BHSD]")>;
2436 // Transpose, vector form
2437 def : InstRW<[V2Write_2c_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]")>;
2439 // Unpack and extend
2440 def : InstRW<[V2Write_2c_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]")>;
2442 // Zip/unzip
2443 def : InstRW<[V2Write_2c_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]")>;
2445 // SVE floating-point instructions
2446 // -----------------------------------------------------------------------------
2448 // Floating point absolute value/difference
2449 def : InstRW<[V2Write_2c_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]",
2450                                          "^FABD_ZPZZ_[HSD]",
2451                                          "^FABS_ZPmZ_[HSD]")>;
2453 // Floating point arithmetic
2454 def : InstRW<[V2Write_2c_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]",
2455                                          "^F(ADD|SUB)_ZPZ[IZ]_[HSD]",
2456                                          "^FADDP_ZPmZZ_[HSD]",
2457                                          "^FNEG_ZPmZ_[HSD]",
2458                                          "^FSUBR_ZPm[IZ]_[HSD]",
2459                                          "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>;
2461 // Floating point associative add, F16
2462 def : InstRW<[V2Write_10c_1V1_9rc], (instrs FADDA_VPZ_H)>;
2464 // Floating point associative add, F32
2465 def : InstRW<[V2Write_6c_1V1_5rc], (instrs FADDA_VPZ_S)>;
2467 // Floating point associative add, F64
2468 def : InstRW<[V2Write_4c_1V], (instrs FADDA_VPZ_D)>;
2470 // Floating point compare
2471 def : InstRW<[V2Write_2c_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]",
2472                                           "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]",
2473                                           "^FCM(LE|LT)_PPzZ0_[HSD]",
2474                                           "^FCMUO_PPzZZ_[HSD]")>;
2476 // Floating point complex add
2477 def : InstRW<[V2Write_3c_1V], (instregex "^FCADD_ZPmZ_[HSD]")>;
2479 // Floating point complex multiply add
2480 def : InstRW<[V2Wr_ZFCMA, ReadDefault, V2Rd_ZFCMA], (instregex "^FCMLA_ZPmZZ_[HSD]")>;
2481 def : InstRW<[V2Wr_ZFCMA, V2Rd_ZFCMA],              (instregex "^FCMLA_ZZZI_[HS]")>;
2483 // Floating point convert, long or narrow (F16 to F32 or F32 to F16)
2484 def : InstRW<[V2Write_4c_2V02], (instregex "^FCVT_ZPmZ_(HtoS|StoH)",
2485                                            "^FCVTLT_ZPmZ_HtoS",
2486                                            "^FCVTNT_ZPmZ_StoH")>;
2488 // Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32
2489 // or F64 to F16)
2490 def : InstRW<[V2Write_3c_1V02], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)",
2491                                            "^FCVTLT_ZPmZ_StoD",
2492                                            "^FCVTNT_ZPmZ_DtoS")>;
2494 // Floating point convert, round to odd
2495 def : InstRW<[V2Write_3c_1V02], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>;
2497 // Floating point base2 log, F16
2498 def : InstRW<[V2Write_6c_4V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_H")>;
2500 // Floating point base2 log, F32
2501 def : InstRW<[V2Write_4c_2V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_S")>;
2503 // Floating point base2 log, F64
2504 def : InstRW<[V2Write_3c_1V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_D")>;
2506 // Floating point convert to integer, F16
2507 def : InstRW<[V2Write_6c_4V02], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>;
2509 // Floating point convert to integer, F32
2510 def : InstRW<[V2Write_4c_2V02], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>;
2512 // Floating point convert to integer, F64
2513 def : InstRW<[V2Write_3c_1V02],
2514              (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>;
2516 // Floating point copy
2517 def : InstRW<[V2Write_2c_1V], (instregex "^FCPY_ZPmI_[HSD]",
2518                                          "^FDUP_ZI_[HSD]")>;
2520 // Floating point divide, F16
2521 def : InstRW<[V2Write_13c_1V02_12rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>;
2523 // Floating point divide, F32
2524 def : InstRW<[V2Write_10c_1V02_9rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>;
2526 // Floating point divide, F64
2527 def : InstRW<[V2Write_15c_1V02_14rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>;
2529 // Floating point min/max pairwise
2530 def : InstRW<[V2Write_2c_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>;
2532 // Floating point min/max
2533 def : InstRW<[V2Write_2c_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]",
2534                                          "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>;
2536 // Floating point multiply
2537 def : InstRW<[V2Write_3c_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]",
2538                                          "^FMULX_ZPZZ_[HSD]",
2539                                          "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]",
2540                                          "^FMUL_ZPZ[IZ]_[HSD]")>;
2542 // Floating point multiply accumulate
2543 def : InstRW<[V2Wr_ZFMA, ReadDefault, V2Rd_ZFMA],
2544              (instregex "^FN?ML[AS]_ZPmZZ_[HSD]",
2545                         "^FN?(MAD|MSB)_ZPmZZ_[HSD]")>;
2546 def : InstRW<[V2Wr_ZFMA, V2Rd_ZFMA],
2547              (instregex "^FML[AS]_ZZZI_[HSD]",
2548                         "^FN?ML[AS]_ZPZZZ_[HSD]")>;
2550 // Floating point multiply add/sub accumulate long
2551 def : InstRW<[V2Wr_ZFMAL, V2Rd_ZFMAL], (instregex "^FML[AS]L[BT]_ZZZI?_SHH")>;
2553 // Floating point reciprocal estimate, F16
2554 def : InstRW<[V2Write_6c_4V02], (instregex "^FR(ECP|SQRT)E_ZZ_H", "^FRECPX_ZPmZ_H")>;
2556 // Floating point reciprocal estimate, F32
2557 def : InstRW<[V2Write_4c_2V02], (instregex "^FR(ECP|SQRT)E_ZZ_S", "^FRECPX_ZPmZ_S")>;
2559 // Floating point reciprocal estimate, F64
2560 def : InstRW<[V2Write_3c_1V02], (instregex "^FR(ECP|SQRT)E_ZZ_D", "^FRECPX_ZPmZ_D")>;
2562 // Floating point reciprocal step
2563 def : InstRW<[V2Write_4c_1V], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>;
2565 // Floating point reduction, F16
2566 def : InstRW<[V2Write_8c_4V],
2567              (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H")>;
2569 // Floating point reduction, F32
2570 def : InstRW<[V2Write_6c_3V],
2571              (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S")>;
2573 // Floating point reduction, F64
2574 def : InstRW<[V2Write_4c_2V],
2575              (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D")>;
2577 // Floating point round to integral, F16
2578 def : InstRW<[V2Write_6c_4V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>;
2580 // Floating point round to integral, F32
2581 def : InstRW<[V2Write_4c_2V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>;
2583 // Floating point round to integral, F64
2584 def : InstRW<[V2Write_3c_1V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>;
2586 // Floating point square root, F16
2587 def : InstRW<[V2Write_13c_1V02_12rc], (instregex "^FSQRT_ZPmZ_H")>;
2589 // Floating point square root, F32
2590 def : InstRW<[V2Write_10c_1V02_9rc], (instregex "^FSQRT_ZPmZ_S")>;
2592 // Floating point square root, F64
2593 def : InstRW<[V2Write_16c_1V02_14rc], (instregex "^FSQRT_ZPmZ_D")>;
2595 // Floating point trigonometric exponentiation
2596 def : InstRW<[V2Write_3c_1V1], (instregex "^FEXPA_ZZ_[HSD]")>;
2598 // Floating point trigonometric multiply add
2599 def : InstRW<[V2Write_4c_1V], (instregex "^FTMAD_ZZI_[HSD]")>;
2601 // Floating point trigonometric, miscellaneous
2602 def : InstRW<[V2Write_3c_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]")>;
2604 // SVE BFloat16 (BF16) instructions
2605 // -----------------------------------------------------------------------------
2607 // Convert, F32 to BF16
2608 def : InstRW<[V2Write_4c_1V02], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
2610 // Dot product
2611 def : InstRW<[V2Wr_ZBFDOT, V2Rd_ZBFDOT], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
2613 // Matrix multiply accumulate
2614 def : InstRW<[V2Wr_ZBFMMA, V2Rd_ZBFMMA], (instrs BFMMLA_ZZZ)>;
2616 // Multiply accumulate long
2617 def : InstRW<[V2Wr_ZBFMAL, V2Rd_ZBFMAL], (instregex "^BFMLAL[BT]_ZZZI?")>;
2619 // SVE Load instructions
2620 // -----------------------------------------------------------------------------
2622 // Load vector
2623 def : InstRW<[V2Write_6c_1L], (instrs LDR_ZXI)>;
2625 // Load predicate
2626 def : InstRW<[V2Write_6c_1L_1M], (instrs LDR_PXI)>;
2628 // Contiguous load, scalar + imm
2629 def : InstRW<[V2Write_6c_1L], (instregex "^LD1[BHWD]_IMM$",
2630                                          "^LD1S?B_[HSD]_IMM$",
2631                                          "^LD1S?H_[SD]_IMM$",
2632                                          "^LD1S?W_D_IMM$" )>;
2633 // Contiguous load, scalar + scalar
2634 def : InstRW<[V2Write_6c_1L], (instregex "^LD1[BHWD]$",
2635                                          "^LD1S?B_[HSD]$",
2636                                          "^LD1S?H_[SD]$",
2637                                          "^LD1S?W_D$" )>;
2639 // Contiguous load broadcast, scalar + imm
2640 def : InstRW<[V2Write_6c_1L], (instregex "^LD1R[BHWD]_IMM$",
2641                                          "^LD1RS?B_[HSD]_IMM$",
2642                                          "^LD1RS?H_[SD]_IMM$",
2643                                          "^LD1RW_D_IMM$",
2644                                          "^LD1RSW_IMM$",
2645                                          "^LD1RQ_[BHWD]_IMM$")>;
2647 // Contiguous load broadcast, scalar + scalar
2648 def : InstRW<[V2Write_6c_1L], (instregex "^LD1RQ_[BHWD]$")>;
2650 // Non temporal load, scalar + imm
2651 // Non temporal load, scalar + scalar
2652 def : InstRW<[V2Write_6c_1L], (instregex "^LDNT1[BHWD]_ZR[IR]$")>;
2654 // Non temporal gather load, vector + scalar 32-bit element size
2655 def : InstRW<[V2Write_9c_2L_4V], (instregex "^LDNT1[BHW]_ZZR_S$",
2656                                             "^LDNT1S[BH]_ZZR_S$")>;
2658 // Non temporal gather load, vector + scalar 64-bit element size
2659 def : InstRW<[V2Write_9c_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D$")>;
2660 def : InstRW<[V2Write_9c_2L_2V1], (instrs LDNT1D_ZZR_D)>;
2662 // Contiguous first faulting load, scalar + scalar
2663 def : InstRW<[V2Write_6c_1L_1S], (instregex "^LDFF1[BHWD]$",
2664                                             "^LDFF1S?B_[HSD]$",
2665                                             "^LDFF1S?H_[SD]$",
2666                                             "^LDFF1S?W_D$")>;
2668 // Contiguous non faulting load, scalar + imm
2669 def : InstRW<[V2Write_6c_1L], (instregex "^LDNF1[BHWD]_IMM$",
2670                                          "^LDNF1S?B_[HSD]_IMM$",
2671                                          "^LDNF1S?H_[SD]_IMM$",
2672                                          "^LDNF1S?W_D_IMM$")>;
2674 // Contiguous Load two structures to two vectors, scalar + imm
2675 def : InstRW<[V2Write_8c_2L_2V], (instregex "^LD2[BHWD]_IMM$")>;
2677 // Contiguous Load two structures to two vectors, scalar + scalar
2678 def : InstRW<[V2Write_9c_2L_2V_2S], (instregex "^LD2[BHWD]$")>;
2680 // Contiguous Load three structures to three vectors, scalar + imm
2681 def : InstRW<[V2Write_9c_3L_3V], (instregex "^LD3[BHWD]_IMM$")>;
2683 // Contiguous Load three structures to three vectors, scalar + scalar
2684 def : InstRW<[V2Write_10c_3V_3L_3S], (instregex "^LD3[BHWD]$")>;
2686 // Contiguous Load four structures to four vectors, scalar + imm
2687 def : InstRW<[V2Write_9c_4L_8V], (instregex "^LD4[BHWD]_IMM$")>;
2689 // Contiguous Load four structures to four vectors, scalar + scalar
2690 def : InstRW<[V2Write_10c_4L_8V_4S], (instregex "^LD4[BHWD]$")>;
2692 // Gather load, vector + imm, 32-bit element size
2693 def : InstRW<[V2Write_9c_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_IMM$",
2694                                             "^GLD(FF)?1W_IMM$")>;
2696 // Gather load, vector + imm, 64-bit element size
2697 def : InstRW<[V2Write_9c_1L_4V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM$",
2698                                             "^GLD(FF)?1D_IMM$")>;
2700 // Gather load, 32-bit scaled offset
2701 def : InstRW<[V2Write_10c_1L_8V],
2702              (instregex "^GLD(FF)?1S?H_S_[SU]XTW_SCALED$",
2703                         "^GLD(FF)?1W_[SU]XTW_SCALED")>;
2705 // Gather load, 64-bit scaled offset
2706 // NOTE: These instructions are not specified in the SOG.
2707 def : InstRW<[V2Write_10c_1L_4V],
2708              (instregex "^GLD(FF)?1S?[HW]_D_([SU]XTW_)?SCALED$",
2709                         "^GLD(FF)?1D_([SU]XTW_)?SCALED$")>;
2711 // Gather load, 32-bit unpacked unscaled offset
2712 def : InstRW<[V2Write_9c_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW$",
2713                                             "^GLD(FF)?1W_[SU]XTW$")>;
2715 // Gather load, 64-bit unpacked unscaled offset
2716 // NOTE: These instructions are not specified in the SOG.
2717 def : InstRW<[V2Write_9c_1L_2V],
2718              (instregex "^GLD(FF)?1S?[BHW]_D(_[SU]XTW)?$",
2719                         "^GLD(FF)?1D(_[SU]XTW)?$")>;
2721 // SVE Store instructions
2722 // -----------------------------------------------------------------------------
2724 // Store from predicate reg
2725 def : InstRW<[V2Write_1c_1L01], (instrs STR_PXI)>;
2727 // Store from vector reg
2728 def : InstRW<[V2Write_2c_1L01_1V01], (instrs STR_ZXI)>;
2730 // Contiguous store, scalar + imm
2731 def : InstRW<[V2Write_2c_1L01_1V01], (instregex "^ST1[BHWD]_IMM$",
2732                                                 "^ST1B_[HSD]_IMM$",
2733                                                 "^ST1H_[SD]_IMM$",
2734                                                 "^ST1W_D_IMM$")>;
2736 // Contiguous store, scalar + scalar
2737 def : InstRW<[V2Write_2c_1L01_1S_1V01], (instregex "^ST1H(_[SD])?$")>;
2738 def : InstRW<[V2Write_2c_1L01_1V01], (instregex "^ST1[BWD]$",
2739                                                 "^ST1B_[HSD]$",
2740                                                 "^ST1W_D$")>;
2742 // Contiguous store two structures from two vectors, scalar + imm
2743 def : InstRW<[V2Write_4c_1L01_1V01], (instregex "^ST2[BHWD]_IMM$")>;
2745 // Contiguous store two structures from two vectors, scalar + scalar
2746 def : InstRW<[V2Write_4c_2L01_2S_2V01], (instrs ST2H)>;
2747 def : InstRW<[V2Write_4c_2L01_2V01], (instregex "^ST2[BWD]$")>;
2749 // Contiguous store three structures from three vectors, scalar + imm
2750 def : InstRW<[V2Write_7c_9L01_9V01], (instregex "^ST3[BHWD]_IMM$")>;
2752 // Contiguous store three structures from three vectors, scalar + scalar
2753 def : InstRW<[V2Write_7c_9L01_9S_9V01], (instregex "^ST3[BHWD]$")>;
2755 // Contiguous store four structures from four vectors, scalar + imm
2756 def : InstRW<[V2Write_11c_18L01_18V01], (instregex "^ST4[BHWD]_IMM$")>;
2758 // Contiguous store four structures from four vectors, scalar + scalar
2759 def : InstRW<[V2Write_11c_18L01_18S_18V01], (instregex "^ST4[BHWD]$")>;
2761 // Non temporal store, scalar + imm
2762 def : InstRW<[V2Write_2c_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>;
2764 // Non temporal store, scalar + scalar
2765 def : InstRW<[V2Write_2c_1L01_1S_1V], (instrs STNT1H_ZRR)>;
2766 def : InstRW<[V2Write_2c_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>;
2768 // Scatter non temporal store, vector + scalar 32-bit element size
2769 def : InstRW<[V2Write_4c_4L01_4V01], (instregex "^STNT1[BHW]_ZZR_S")>;
2771 // Scatter non temporal store, vector + scalar 64-bit element size
2772 def : InstRW<[V2Write_2c_2L01_2V01], (instregex "^STNT1[BHWD]_ZZR_D")>;
2774 // Scatter store vector + imm 32-bit element size
2775 def : InstRW<[V2Write_4c_4L01_4V01], (instregex "^SST1[BH]_S_IMM$",
2776                                                 "^SST1W_IMM$")>;
2778 // Scatter store vector + imm 64-bit element size
2779 def : InstRW<[V2Write_2c_2L01_2V01], (instregex "^SST1[BHW]_D_IMM$",
2780                                                 "^SST1D_IMM$")>;
2782 // Scatter store, 32-bit scaled offset
2783 def : InstRW<[V2Write_4c_4L01_4V01],
2784              (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>;
2786 // Scatter store, 32-bit unpacked unscaled offset
2787 def : InstRW<[V2Write_2c_2L01_2V01], (instregex "^SST1[BHW]_D_[SU]XTW$",
2788                                                 "^SST1D_[SU]XTW$")>;
2790 // Scatter store, 32-bit unpacked scaled offset
2791 def : InstRW<[V2Write_2c_2L01_2V01], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$",
2792                                                 "^SST1D_[SU]XTW_SCALED$")>;
2794 // Scatter store, 32-bit unscaled offset
2795 def : InstRW<[V2Write_4c_4L01_4V01], (instregex "^SST1[BH]_S_[SU]XTW$",
2796                                                 "^SST1W_[SU]XTW$")>;
2798 // Scatter store, 64-bit scaled offset
2799 def : InstRW<[V2Write_2c_2L01_2V01], (instregex "^SST1[HW]_D_SCALED$",
2800                                                 "^SST1D_SCALED$")>;
2802 // Scatter store, 64-bit unscaled offset
2803 def : InstRW<[V2Write_2c_2L01_2V01], (instregex "^SST1[BHW]_D$",
2804                                                 "^SST1D$")>;
2806 // SVE Miscellaneous instructions
2807 // -----------------------------------------------------------------------------
2809 // Read first fault register, unpredicated
2810 def : InstRW<[V2Write_2c_1M0], (instrs RDFFR_P)>;
2812 // Read first fault register, predicated
2813 def : InstRW<[V2Write_3or4c_1M0_1M], (instrs RDFFR_PPz)>;
2815 // Read first fault register and set flags
2816 def : InstRW<[V2Write_4or5c_2M0_2M], (instrs RDFFRS_PPz)>;
2818 // Set first fault register
2819 // Write to first fault register
2820 def : InstRW<[V2Write_2c_1M0], (instrs SETFFR, WRFFR)>;
2822 // Prefetch
2823 // NOTE: This is not specified in the SOG.
2824 def : InstRW<[V2Write_4c_1L], (instregex "^PRF[BHWD]")>;
2826 // SVE Cryptographic instructions
2827 // -----------------------------------------------------------------------------
2829 // Crypto AES ops
2830 def : InstRW<[V2Write_2c_1V], (instregex "^AES[DE]_ZZZ_B$",
2831                                          "^AESI?MC_ZZ_B$")>;
2833 // Crypto SHA3 ops
2834 def : InstRW<[V2Write_2c_1V0], (instregex "^(BCAX|EOR3)_ZZZZ$",
2835                                           "^RAX1_ZZZ_D$",
2836                                           "^XAR_ZZZI_[BHSD]$")>;
2838 // Crypto SM4 ops
2839 def : InstRW<[V2Write_4c_1V0], (instregex "^SM4E(KEY)?_ZZZ_S$")>;