[SampleProfileLoader] Fix integer overflow in generateMDProfMetadata (#90217)
[llvm-project.git] / llvm / lib / Target / AArch64 / AArch64SchedNeoverseN2.td
bloba4ac344510de91e428aad70d67a183e98979aed9
1 //=- AArch64SchedNeoverseN2.td - NeoverseN2 Scheduling Defs --*- tablegen -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the scheduling model for the Arm Neoverse N2 processors.
11 //===----------------------------------------------------------------------===//
13 def NeoverseN2Model : SchedMachineModel {
14   let IssueWidth            =  10; // Micro-ops dispatched at a time.
15   let MicroOpBufferSize     = 160; // Entries in micro-op re-order buffer.
16   let LoadLatency           =   4; // Optimistic load latency.
17   let MispredictPenalty     =  10; // Extra cycles for mispredicted branch.
18   let LoopMicroOpBufferSize =  16; // NOTE: Copied from Cortex-A57.
19   let CompleteModel         =   1;
21   list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
22     [HasSVE2p1, HasPAuthLR, HasCPA, HasCSSC]);
25 //===----------------------------------------------------------------------===//
26 // Define each kind of processor resource and number available on Neoverse N2.
27 // Instructions are first fetched and then decoded into internal macro-ops
28 // (MOPs). From there, the MOPs proceed through register renaming and dispatch
29 // stages. A MOP can be split into two micro-ops further down the pipeline
30 // after the decode stage. Once dispatched, micro-ops wait for their operands
31 // and issue out-of-order to one of thirteen issue pipelines. Each issue
32 // pipeline can accept one micro-op per cycle.
34 let SchedModel = NeoverseN2Model in {
36 // Define the (13) issue ports.
37 def N2UnitB   : ProcResource<2>;  // Branch 0/1
38 def N2UnitS   : ProcResource<2>;  // Integer single Cycle 0/1
39 def N2UnitM0  : ProcResource<1>;  // Integer multicycle 0
40 def N2UnitM1  : ProcResource<1>;  // Integer multicycle 1
41 def N2UnitL01 : ProcResource<2>;  // Load/Store 0/1
42 def N2UnitL2  : ProcResource<1>;  // Load 2
43 def N2UnitD   : ProcResource<2>;  // Store data 0/1
44 def N2UnitV0  : ProcResource<1>;  // FP/ASIMD 0
45 def N2UnitV1  : ProcResource<1>;  // FP/ASIMD 1
47 def N2UnitV : ProcResGroup<[N2UnitV0, N2UnitV1]>;  // FP/ASIMD 0/1
48 def N2UnitM : ProcResGroup<[N2UnitM0, N2UnitM1]>;  // Integer single/multicycle 0/1
49 def N2UnitL : ProcResGroup<[N2UnitL01, N2UnitL2]>; // Load/Store 0/1 and Load 2
50 def N2UnitI : ProcResGroup<[N2UnitS, N2UnitM0, N2UnitM1]>; // Integer single cycle 0/1 and single/multicycle 0/1
52 // Define commonly used read types.
54 // No forwarding is provided for these types.
55 def : ReadAdvance<ReadI,       0>;
56 def : ReadAdvance<ReadISReg,   0>;
57 def : ReadAdvance<ReadIEReg,   0>;
58 def : ReadAdvance<ReadIM,      0>;
59 def : ReadAdvance<ReadIMA,     0>;
60 def : ReadAdvance<ReadID,      0>;
61 def : ReadAdvance<ReadExtrHi,  0>;
62 def : ReadAdvance<ReadAdrBase, 0>;
63 def : ReadAdvance<ReadST,      0>;
64 def : ReadAdvance<ReadVLD,     0>;
66 def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
67 def : WriteRes<WriteBarrier, []> { let Latency = 1; }
68 def : WriteRes<WriteHint,    []> { let Latency = 1; }
69 def : WriteRes<WriteLDHi,    []> { let Latency = 4; }
71 //===----------------------------------------------------------------------===//
72 // Define customized scheduler read/write types specific to the Neoverse N2.
74 //===----------------------------------------------------------------------===//
75 // Define generic 1 micro-op types
77 def N2Write_1cyc_1B   : SchedWriteRes<[N2UnitB]>   { let Latency = 1; }
78 def N2Write_1cyc_1I   : SchedWriteRes<[N2UnitI]>   { let Latency = 1; }
79 def N2Write_1cyc_1M   : SchedWriteRes<[N2UnitM]>   { let Latency = 1; }
80 def N2Write_1cyc_1M0  : SchedWriteRes<[N2UnitM0]>  { let Latency = 1; }
81 def N2Write_1cyc_1L01 : SchedWriteRes<[N2UnitL01]> { let Latency = 1; }
82 def N2Write_2cyc_1M   : SchedWriteRes<[N2UnitM]>   { let Latency = 2; }
83 def N2Write_3cyc_1M   : SchedWriteRes<[N2UnitM]>   { let Latency = 3; }
84 def N2Write_2cyc_1M0  : SchedWriteRes<[N2UnitM0]>  { let Latency = 2;
85                                                      let ReleaseAtCycles = [2]; }
86 def N2Write_3cyc_1M0  : SchedWriteRes<[N2UnitM0]>  { let Latency = 3;
87                                                      let ReleaseAtCycles = [3]; }
88 def N2Write_5cyc_1M0  : SchedWriteRes<[N2UnitM0]>  { let Latency = 5;
89                                                      let ReleaseAtCycles = [5]; }
90 def N2Write_12cyc_1M0 : SchedWriteRes<[N2UnitM0]>  { let Latency = 12;
91                                                      let ReleaseAtCycles = [12]; }
92 def N2Write_20cyc_1M0 : SchedWriteRes<[N2UnitM0]>  { let Latency = 20;
93                                                      let ReleaseAtCycles = [20]; }
94 def N2Write_4cyc_1L   : SchedWriteRes<[N2UnitL]>   { let Latency = 4; }
95 def N2Write_6cyc_1L   : SchedWriteRes<[N2UnitL]>   { let Latency = 6; }
96 def N2Write_2cyc_1V   : SchedWriteRes<[N2UnitV]>   { let Latency = 2; }
97 def N2Write_3cyc_1V   : SchedWriteRes<[N2UnitV]>   { let Latency = 3; }
98 def N2Write_4cyc_1V   : SchedWriteRes<[N2UnitV]>   { let Latency = 4; }
99 def N2Write_5cyc_1V   : SchedWriteRes<[N2UnitV]>   { let Latency = 5; }
100 def N2Write_12cyc_1V  : SchedWriteRes<[N2UnitV]>   { let Latency = 12; }
101 def N2Write_2cyc_1V0  : SchedWriteRes<[N2UnitV0]>  { let Latency = 2; }
102 def N2Write_3cyc_1V0  : SchedWriteRes<[N2UnitV0]>  { let Latency = 3; }
103 def N2Write_4cyc_1V0  : SchedWriteRes<[N2UnitV0]>  { let Latency = 4; }
104 def N2Write_7cyc_1V0  : SchedWriteRes<[N2UnitV0]>  { let Latency = 7;
105                                                      let ReleaseAtCycles = [7]; }
106 def N2Write_9cyc_1V0  : SchedWriteRes<[N2UnitV0]>  { let Latency = 9; }
107 def N2Write_10cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 10; }
108 def N2Write_12cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 12; }
109 def N2Write_13cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 13; }
110 def N2Write_15cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 15; }
111 def N2Write_16cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 16; }
112 def N2Write_20cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 20; }
113 def N2Write_2cyc_1V1  : SchedWriteRes<[N2UnitV1]>  { let Latency = 2; }
114 def N2Write_3cyc_1V1  : SchedWriteRes<[N2UnitV1]>  { let Latency = 3; }
115 def N2Write_4cyc_1V1  : SchedWriteRes<[N2UnitV1]>  { let Latency = 4; }
116 def N2Write_6cyc_1V1  : SchedWriteRes<[N2UnitV1]>  { let Latency = 6; }
117 def N2Write_10cyc_1V1 : SchedWriteRes<[N2UnitV1]>  { let Latency = 10; }
118 def N2Write_6cyc_1L01 : SchedWriteRes<[N2UnitL01]> { let Latency = 6; }
120 //===----------------------------------------------------------------------===//
121 // Define generic 2 micro-op types
123 def N2Write_1cyc_1B_1S : SchedWriteRes<[N2UnitB, N2UnitS]> {
124   let Latency     = 1;
125   let NumMicroOps = 2;
128 def N2Write_6cyc_1M0_1B : SchedWriteRes<[N2UnitM0, N2UnitB]> {
129   let Latency     = 6;
130   let NumMicroOps = 2;
133 def N2Write_9cyc_1M0_1L : SchedWriteRes<[N2UnitM0, N2UnitL]> {
134   let Latency     = 9;
135   let NumMicroOps = 2;
138 def N2Write_3cyc_1I_1M : SchedWriteRes<[N2UnitI, N2UnitM]> {
139   let Latency     = 3;
140   let NumMicroOps = 2;
143 def N2Write_4cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
144   let Latency     = 4;
145   let NumMicroOps = 2;
148 def N2Write_5cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
149   let Latency     = 5;
150   let NumMicroOps = 2;
153 def N2Write_6cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
154   let Latency     = 6;
155   let NumMicroOps = 2;
158 def N2Write_7cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
159   let Latency     = 7;
160   let NumMicroOps = 2;
163 def N2Write_1cyc_1L01_1D : SchedWriteRes<[N2UnitL01, N2UnitD]> {
164   let Latency     = 1;
165   let NumMicroOps = 2;
168 def N2Write_5cyc_1M0_1V : SchedWriteRes<[N2UnitM0, N2UnitV]> {
169   let Latency     = 5;
170   let NumMicroOps = 2;
173 def N2Write_2cyc_1L01_1V : SchedWriteRes<[N2UnitL01, N2UnitV]> {
174   let Latency     = 2;
175   let NumMicroOps = 2;
178 def N2Write_4cyc_1V1_1V : SchedWriteRes<[N2UnitV1, N2UnitV]> {
179   let Latency     = 4;
180   let NumMicroOps = 2;
183 def N2Write_4cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
184   let Latency     = 4;
185   let NumMicroOps = 2;
188 def N2Write_10cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
189   let Latency = 10;
190   let NumMicroOps = 2;
191   let ReleaseAtCycles = [5, 5];
194 def N2Write_13cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
195   let Latency = 13;
196   let NumMicroOps = 2;
197   let ReleaseAtCycles = [6, 7];
200 def N2Write_15cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
201   let Latency = 15;
202   let NumMicroOps = 2;
203   let ReleaseAtCycles = [7, 8];
206 def N2Write_16cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
207   let Latency = 16;
208   let NumMicroOps = 2;
209   let ReleaseAtCycles = [8, 8];
212 def N2Write_4cyc_2V : SchedWriteRes<[N2UnitV, N2UnitV]> {
213   let Latency     = 4;
214   let NumMicroOps = 2;
217 def N2Write_6cyc_2V : SchedWriteRes<[N2UnitV, N2UnitV]> {
218   let Latency     = 6;
219   let NumMicroOps = 2;
222 def N2Write_6cyc_2L : SchedWriteRes<[N2UnitL, N2UnitL]> {
223   let Latency     = 6;
224   let NumMicroOps = 2;
227 def N2Write_8cyc_1L_1V : SchedWriteRes<[N2UnitL, N2UnitV]> {
228   let Latency     = 8;
229   let NumMicroOps = 2;
232 def N2Write_4cyc_1L01_1V : SchedWriteRes<[N2UnitL01, N2UnitV]> {
233   let Latency     = 4;
234   let NumMicroOps = 2;
237 def N2Write_3cyc_1M0_1M  : SchedWriteRes<[N2UnitM0, N2UnitM]> {
238   let Latency     = 3;
239   let NumMicroOps = 2;
242 def N2Write_2cyc_1M0_1M  : SchedWriteRes<[N2UnitM0, N2UnitM]> {
243   let Latency     = 2;
244   let NumMicroOps = 2;
247 def N2Write_6cyc_2V1 : SchedWriteRes<[N2UnitV1, N2UnitV1]> {
248   let Latency     = 6;
249   let NumMicroOps = 2;
252 def N2Write_4cyc_1V0_1M : SchedWriteRes<[N2UnitV0, N2UnitM]> {
253   let Latency     = 4;
254   let NumMicroOps = 2;
257 def N2Write_5cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
258   let Latency     = 5;
259   let NumMicroOps = 2;
262 def N2Write_5cyc_1V1_1M0 : SchedWriteRes<[N2UnitV1, N2UnitM0]> {
263   let Latency     = 5;
264   let NumMicroOps = 2;
267 def N2Write_7cyc_1M0_1V0 : SchedWriteRes<[N2UnitM0, N2UnitV0]> {
268   let Latency     = 7;
269   let NumMicroOps = 2;
272 def N2Write_2cyc_1V0_1M : SchedWriteRes<[N2UnitV0, N2UnitM]> {
273   let Latency     = 2;
274   let NumMicroOps = 2;
277 def N2Write_6cyc_1V_1V1 : SchedWriteRes<[N2UnitV, N2UnitV1]> {
278   let Latency     = 6;
279   let NumMicroOps = 2;
282 def N2Write_6cyc_1L_1M : SchedWriteRes<[N2UnitL, N2UnitM]> {
283   let Latency     = 6;
284   let NumMicroOps = 2;
287 def N2Write_6cyc_1L_1S : SchedWriteRes<[N2UnitL, N2UnitS]> {
288   let Latency     = 6;
289   let NumMicroOps = 2;
292 def N2Write_9cyc_1L_1V : SchedWriteRes<[N2UnitL, N2UnitV]> {
293   let Latency     = 9;
294   let NumMicroOps = 2;
297 def N2Write_4cyc_2V1 : SchedWriteRes<[N2UnitV1, N2UnitV1]> {
298   let Latency     = 4;
299   let NumMicroOps = 2;
302 //===----------------------------------------------------------------------===//
303 // Define generic 3 micro-op types
305 def N2Write_1cyc_1L01_1D_1I : SchedWriteRes<[N2UnitL01, N2UnitD, N2UnitI]> {
306   let Latency     = 1;
307   let NumMicroOps = 3;
310 def N2Write_2cyc_1L01_1V_1I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitI]> {
311   let Latency     = 2;
312   let NumMicroOps = 3;
315 def N2Write_2cyc_1L01_2V : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV]> {
316   let Latency     = 2;
317   let NumMicroOps = 3;
320 def N2Write_7cyc_1M_1M0_1V : SchedWriteRes<[N2UnitM, N2UnitM0, N2UnitV]> {
321   let Latency     = 7;
322   let NumMicroOps = 3;
325 def N2Write_8cyc_1M0_1V1_1V : SchedWriteRes<[N2UnitM0, N2UnitV1, N2UnitV]> {
326   let Latency     = 8;
327   let NumMicroOps = 3;
330 def N2Write_10cyc_1V_1L_1S : SchedWriteRes<[N2UnitV, N2UnitL, N2UnitL]> {
331   let Latency     = 10;
332   let NumMicroOps = 3;
335 def N2Write_2cyc_1L01_1S_1V : SchedWriteRes<[N2UnitL01, N2UnitS, N2UnitV]> {
336   let Latency     = 2;
337   let NumMicroOps = 3;
340 def N2Write_4cyc_1L01_1S_1V : SchedWriteRes<[N2UnitL01, N2UnitS, N2UnitV]> {
341   let Latency     = 4;
342   let NumMicroOps = 3;
345 def N2Write_6cyc_3L : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL]> {
346   let Latency     = 6;
347   let NumMicroOps = 3;
350 def N2Write_8cyc_1L_2V : SchedWriteRes<[N2UnitL, N2UnitV, N2UnitV]> {
351   let Latency     = 8;
352   let NumMicroOps = 3;
355 //===----------------------------------------------------------------------===//
356 // Define generic 4 micro-op types
358 def N2Write_2cyc_1L01_2V_1I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV,
359                                              N2UnitI]> {
360   let Latency     = 2;
361   let NumMicroOps = 4;
364 def N2Write_6cyc_4V0 : SchedWriteRes<[N2UnitV0, N2UnitV0, N2UnitV0, N2UnitV0]> {
365   let Latency     = 6;
366   let NumMicroOps = 4;
369 def N2Write_4cyc_4V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
370   let Latency     = 4;
371   let NumMicroOps = 4;
374 def N2Write_6cyc_4V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
375   let Latency     = 6;
376   let NumMicroOps = 4;
379 def N2Write_8cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> {
380   let Latency     = 8;
381   let NumMicroOps = 4;
384 def N2Write_9cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> {
385   let Latency     = 9;
386   let NumMicroOps = 4;
389 def N2Write_2cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV,
390                                           N2UnitV]> {
391   let Latency     = 2;
392   let NumMicroOps = 4;
395 def N2Write_4cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV,
396                                           N2UnitV]> {
397   let Latency     = 4;
398   let NumMicroOps = 4;
401 def N2Write_5cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV,
402                                           N2UnitV]> {
403   let Latency     = 5;
404   let NumMicroOps = 4;
407 def N2Write_8cyc_2M0_2V0 : SchedWriteRes<[N2UnitM0, N2UnitM0, N2UnitV0,
408                                           N2UnitV0]> {
409   let Latency     = 8;
410   let NumMicroOps = 4;
413 def N2Write_11cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
414                                           N2UnitV1]> {
415   let Latency     = 11;
416   let NumMicroOps = 4;
419 def N2Write_9cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
420                                          N2UnitV1]> {
421   let Latency     = 9;
422   let NumMicroOps = 4;
425 def N2Write_8cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
426                                          N2UnitV1]> {
427   let Latency     = 8;
428   let NumMicroOps = 4;
431 def N2Write_10cyc_2L_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
432                                           N2UnitV1]> {
433   let Latency     = 10;
434   let NumMicroOps = 4;
437 def N2Write_10cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> {
438   let Latency     = 10;
439   let NumMicroOps = 4;
442 def N2Write_4cyc_2M0_2M : SchedWriteRes<[N2UnitM0, N2UnitM0, N2UnitM,
443                                          N2UnitM]> {
444   let Latency     = 4;
445   let NumMicroOps = 4;
448 def N2Write_6cyc_2I_2L : SchedWriteRes<[N2UnitI, N2UnitI, N2UnitL, N2UnitL]> {
449   let Latency     = 6;
450   let NumMicroOps = 4;
453 def N2Write_7cyc_4L : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL]> {
454   let Latency     = 7;
455   let NumMicroOps = 4;
458 //===----------------------------------------------------------------------===//
459 // Define generic 5 micro-op types
461 def N2Write_2cyc_1L01_2V_2I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV,
462                                              N2UnitI, N2UnitI]> {
463   let Latency     = 2;
464   let NumMicroOps = 5;
467 def N2Write_8cyc_2L_3V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV,
468                                         N2UnitV]> {
469   let Latency     = 8;
470   let NumMicroOps = 5;
473 //===----------------------------------------------------------------------===//
474 // Define generic 6 micro-op types
476 def N2Write_8cyc_3L_3V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL,
477                                         N2UnitV, N2UnitV, N2UnitV]> {
478   let Latency     = 8;
479   let NumMicroOps = 6;
482 def N2Write_2cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
483                                           N2UnitV, N2UnitV, N2UnitV]> {
484   let Latency     = 2;
485   let NumMicroOps = 6;
488 def N2Write_6cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
489                                           N2UnitV, N2UnitV, N2UnitV]> {
490   let Latency     = 6;
491   let NumMicroOps = 6;
494 def N2Write_4cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
495                                           N2UnitV, N2UnitV, N2UnitV]> {
496   let Latency     = 4;
497   let NumMicroOps = 6;
500 def N2Write_10cyc_2L_2V_2S : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV,
501                                             N2UnitS, N2UnitS]> {
502   let Latency     = 10;
503   let NumMicroOps = 6;
506 //===----------------------------------------------------------------------===//
507 // Define generic 7 micro-op types
509 def N2Write_8cyc_3L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL,
510                                         N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
511   let Latency     = 8;
512   let NumMicroOps = 7;
515 //===----------------------------------------------------------------------===//
516 // Define generic 8 micro-op types
518 def N2Write_6cyc_8V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV,
519                                      N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
520   let Latency     = 6;
521   let NumMicroOps = 8;
524 def N2Write_2cyc_4L01_4V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
525                                           N2UnitL01, N2UnitV, N2UnitV, N2UnitV,
526                                           N2UnitV]> {
527   let Latency     = 2;
528   let NumMicroOps = 8;
531 def N2Write_5cyc_4L01_4V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
532                                           N2UnitL01, N2UnitV, N2UnitV, N2UnitV,
533                                           N2UnitV]> {
534   let Latency     = 5;
535   let NumMicroOps = 8;
538 def N2Write_8cyc_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL,
539                                         N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
540   let Latency     = 8;
541   let NumMicroOps = 8;
544 def N2Write_9cyc_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL,
545                                         N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
546   let Latency     = 9;
547   let NumMicroOps = 8;
550 //===----------------------------------------------------------------------===//
551 // Define generic 10 micro-op types
553 def N2Write_7cyc_5L01_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
554                                           N2UnitL01, N2UnitL01, N2UnitV,
555                                           N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
556   let Latency     = 7;
557   let NumMicroOps = 10;
560 //===----------------------------------------------------------------------===//
561 // Define generic 12 micro-op types
563 def N2Write_7cyc_6L01_6V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
564                                           N2UnitL01, N2UnitL01, N2UnitL01,
565                                           N2UnitV, N2UnitV, N2UnitV, N2UnitV,
566                                           N2UnitV, N2UnitV]> {
567   let Latency     = 7;
568   let NumMicroOps = 12;
571 //===----------------------------------------------------------------------===//
572 // Define generic 15 micro-op types
574 def N2Write_7cyc_5L01_5S_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
575                                              N2UnitL01, N2UnitL01, N2UnitS,
576                                              N2UnitS, N2UnitS, N2UnitS,
577                                              N2UnitS, N2UnitV, N2UnitV,
578                                              N2UnitV, N2UnitV, N2UnitV]> {
579   let Latency     = 7;
580   let NumMicroOps = 15;
583 //===----------------------------------------------------------------------===//
584 // Define generic 18 micro-op types
586 def N2Write_11cyc_9L01_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
587                                            N2UnitL01, N2UnitL01, N2UnitL01,
588                                            N2UnitL01, N2UnitL01, N2UnitL01,
589                                            N2UnitV, N2UnitV, N2UnitV,
590                                            N2UnitV, N2UnitV, N2UnitV,
591                                            N2UnitV, N2UnitV, N2UnitV]> {
592   let Latency     = 11;
593   let NumMicroOps = 18;
596 //===----------------------------------------------------------------------===//
597 // Define generic 27 micro-op types
599 def N2Write_11cyc_9L01_9S_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
600                                               N2UnitL01, N2UnitL01, N2UnitL01,
601                                               N2UnitL01, N2UnitL01, N2UnitL01,
602                                               N2UnitS, N2UnitS, N2UnitS,
603                                               N2UnitS, N2UnitS, N2UnitS,
604                                               N2UnitS, N2UnitS, N2UnitS,
605                                               N2UnitV, N2UnitV, N2UnitV,
606                                               N2UnitV, N2UnitV, N2UnitV,
607                                               N2UnitV, N2UnitV, N2UnitV]> {
608   let Latency     = 11;
609   let NumMicroOps = 27;
612 //===----------------------------------------------------------------------===//
613 // Define types for arithmetic and logical ops with short shifts
614 def N2Write_Arith : SchedWriteVariant<[
615                       SchedVar<IsCheapLSL,  [N2Write_1cyc_1I]>,
616                       SchedVar<NoSchedPred, [N2Write_2cyc_1M]>]>;
618 def N2Write_Logical: SchedWriteVariant<[
619                        SchedVar<NeoverseNoLSL, [N2Write_1cyc_1I]>,
620                        SchedVar<NoSchedPred,   [N2Write_2cyc_1M]>]>;
622 // Miscellaneous
623 // -----------------------------------------------------------------------------
625 def : InstRW<[WriteI], (instrs COPY)>;
627 // Branch Instructions
628 // -----------------------------------------------------------------------------
630 // Branch, immed
631 // Compare and branch
632 def : SchedAlias<WriteBr,    N2Write_1cyc_1B>;
634 // Branch, register
635 def : SchedAlias<WriteBrReg, N2Write_1cyc_1B>;
637 // Branch and link, immed
638 // Branch and link, register
639 def : InstRW<[N2Write_1cyc_1B_1S], (instrs BL, BLR)>;
641 // Arithmetic and Logical Instructions
642 // -----------------------------------------------------------------------------
644 // ALU, basic
645 // ALU, basic, flagset
646 def : SchedAlias<WriteI,     N2Write_1cyc_1I>;
648 // ALU, extend and shift
649 def : SchedAlias<WriteIEReg, N2Write_2cyc_1M>;
651 // Arithmetic, LSL shift, shift <= 4
652 // Arithmetic, flagset, LSL shift, shift <= 4
653 // Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
654 def : SchedAlias<WriteISReg, N2Write_Arith>;
656 // Logical, shift, no flagset
657 def : InstRW<[N2Write_1cyc_1I],
658              (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
660 // Logical, shift, flagset
661 def : InstRW<[N2Write_Logical], (instregex "^(AND|BIC)S[WX]rs$")>;
663 // Arithmetic, immediate to logical address tag
664 def : InstRW<[N2Write_2cyc_1M], (instrs ADDG, SUBG)>;
666 // Convert floating-point condition flags
667 // Flag manipulation instructions
668 def : WriteRes<WriteSys, []> { let Latency = 1; }
670 // Insert Random Tags
671 def : InstRW<[N2Write_2cyc_1M], (instrs IRG, IRGstack)>;
673 // Insert Tag Mask
674 // Subtract Pointer
675 // Subtract Pointer, flagset
676 def : InstRW<[N2Write_1cyc_1I], (instrs GMI, SUBP, SUBPS)>;
678 // Move and shift instructions
679 // -----------------------------------------------------------------------------
681 def : SchedAlias<WriteImm, N2Write_1cyc_1I>;
683 // Divide and Multiply Instructions
684 // -----------------------------------------------------------------------------
686 // SDIV, UDIV
687 def : SchedAlias<WriteID32,  N2Write_12cyc_1M0>;
688 def : SchedAlias<WriteID64,  N2Write_20cyc_1M0>;
690 def : WriteRes<WriteIM32, [N2UnitM]> { let Latency = 2; }
691 def : WriteRes<WriteIM64, [N2UnitM]> { let Latency = 2; }
693 // Multiply high
694 def : InstRW<[N2Write_3cyc_1M], (instrs SMULHrr, UMULHrr)>;
696 // Pointer Authentication Instructions (v8.3 PAC)
697 // -----------------------------------------------------------------------------
699 // Authenticate data address
700 // Authenticate instruction address
701 // Compute pointer authentication code for data address
702 // Compute pointer authentication code, using generic key
703 // Compute pointer authentication code for instruction address
704 def : InstRW<[N2Write_5cyc_1M0], (instregex "^AUT", "^PAC")>;
706 // Branch and link, register, with pointer authentication
707 // Branch, register, with pointer authentication
708 // Branch, return, with pointer authentication
709 def : InstRW<[N2Write_6cyc_1M0_1B], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, BRAA,
710                                             BRAAZ, BRAB, BRABZ, RETAA, RETAB,
711                                             ERETAA, ERETAB)>;
714 // Load register, with pointer authentication
715 def : InstRW<[N2Write_9cyc_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>;
717 // Strip pointer authentication code
718 def : InstRW<[N2Write_2cyc_1M0], (instrs XPACD, XPACI, XPACLRI)>;
720 // Miscellaneous data-processing instructions
721 // -----------------------------------------------------------------------------
723 // Bitfield extract, one reg
724 // Bitfield extract, two regs
725 // NOTE: We don't model the difference between EXTR where both operands are the
726 // same (one reg).
727 def : SchedAlias<WriteExtr, N2Write_3cyc_1I_1M>;
728 def : InstRW<[N2Write_3cyc_1I_1M], (instrs EXTRWrri, EXTRXrri)>;
730 // Bitfield move, basic
731 def : SchedAlias<WriteIS, N2Write_1cyc_1I>;
733 // Bitfield move, insert
734 def : InstRW<[N2Write_2cyc_1M], (instregex "^BFM[WX]ri$")>;
736 // Load instructions
737 // -----------------------------------------------------------------------------
739 def : SchedAlias<WriteLD,    N2Write_4cyc_1L>;
740 def : SchedAlias<WriteLDIdx, N2Write_4cyc_1I_1L>;
742 // Load pair, signed immed offset, signed words
743 def : InstRW<[N2Write_5cyc_1M0, WriteLDHi], (instrs LDPSWi)>;
744 // Load pair, immed post-index or immed pre-index, signed words
745 def : InstRW<[WriteAdr, N2Write_5cyc_1M0, WriteLDHi],
746              (instregex "^LDPSW(post|pre)$")>;
748 // Store instructions
749 // -----------------------------------------------------------------------------
751 def : SchedAlias<WriteST,    N2Write_1cyc_1L01_1D>;
752 def : SchedAlias<WriteSTIdx, N2Write_1cyc_1L01_1D_1I>;
753 def : SchedAlias<WriteSTP,   N2Write_1cyc_1L01_1D>;
754 def : SchedAlias<WriteAdr,   N2Write_1cyc_1I>; // copied from A57.
756 // Tag load instructions
757 // -----------------------------------------------------------------------------
759 // Load allocation tag
760 // Load multiple allocation tags
761 def : InstRW<[N2Write_4cyc_1L], (instrs LDG, LDGM)>;
763 // Tag store instructions
764 // -----------------------------------------------------------------------------
766 // Store allocation tags to one or two granules, post-index
767 // Store allocation tags to one or two granules, pre-index
768 // Store allocation tag to one or two granules, zeroing, post-index
769 // Store Allocation Tag to one or two granules, zeroing, pre-index
770 // Store allocation tag and reg pair to memory, post-Index
771 // Store allocation tag and reg pair to memory, pre-Index
772 def : InstRW<[N2Write_1cyc_1L01_1D_1I], (instrs STGPreIndex, STGPostIndex,
773                                                 ST2GPreIndex, ST2GPostIndex,
774                                                 STZGPreIndex, STZGPostIndex,
775                                                 STZ2GPreIndex, STZ2GPostIndex,
776                                                 STGPpre, STGPpost)>;
778 // Store allocation tags to one or two granules, signed offset
779 // Store allocation tag to two granules, zeroing, signed offset
780 // Store allocation tag and reg pair to memory, signed offset
781 // Store multiple allocation tags
782 def : InstRW<[N2Write_1cyc_1L01_1D], (instrs STGi, ST2Gi, STZGi,
783                                              STZ2Gi, STGPi, STGM, STZGM)>;
785 // FP data processing instructions
786 // -----------------------------------------------------------------------------
788 // FP absolute value
789 // FP arithmetic
790 // FP min/max
791 // FP negate
792 // FP select
793 def : SchedAlias<WriteF,     N2Write_2cyc_1V>;
795 // FP compare
796 def : SchedAlias<WriteFCmp,  N2Write_2cyc_1V0>;
798 // FP divide, square root
799 def : SchedAlias<WriteFDiv,  N2Write_7cyc_1V0>;
801 // FP divide, H-form
802 def : InstRW<[N2Write_7cyc_1V0],  (instrs FDIVHrr)>;
803 // FP divide, S-form
804 def : InstRW<[N2Write_10cyc_1V0], (instrs FDIVSrr)>;
805 // FP divide, D-form
806 def : InstRW<[N2Write_15cyc_1V0], (instrs FDIVDrr)>;
808 // FP square root, H-form
809 def : InstRW<[N2Write_7cyc_1V0],  (instrs FSQRTHr)>;
810 // FP square root, S-form
811 def : InstRW<[N2Write_9cyc_1V0],  (instrs FSQRTSr)>;
812 // FP square root, D-form
813 def : InstRW<[N2Write_16cyc_1V0], (instrs FSQRTDr)>;
815 // FP multiply
816 def : WriteRes<WriteFMul, [N2UnitV]> { let Latency = 3; }
818 // FP multiply accumulate
819 def : InstRW<[N2Write_4cyc_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
821 // FP round to integral
822 def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ][HSD]r$",
823                                             "^FRINT(32|64)[XZ][SD]r$")>;
825 // FP miscellaneous instructions
826 // -----------------------------------------------------------------------------
828 // FP convert, from gen to vec reg
829 def : InstRW<[N2Write_3cyc_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>;
831 // FP convert, from vec to gen reg
832 def : InstRW<[N2Write_3cyc_1V], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>;
834 // FP convert, Javascript from vec to gen reg
835 // FP convert, from vec to vec reg
836 def : SchedAlias<WriteFCvt, N2Write_3cyc_1V0>;
838 // FP move, immed
839 // FP move, register
840 def : SchedAlias<WriteFImm, N2Write_2cyc_1V>;
842 // FP transfer, from gen to low half of vec reg
843 def : InstRW<[N2Write_3cyc_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr,
844                                          FMOVHWr, FMOVHXr, FMOVSWr, FMOVDXr)>;
846 // FP transfer, from gen to high half of vec reg
847 def : InstRW<[N2Write_5cyc_1M0_1V], (instrs FMOVXDHighr)>;
849 // FP transfer, from vec to gen reg
850 def : SchedAlias<WriteFCopy, N2Write_2cyc_1V>;
852 // FP load instructions
853 // -----------------------------------------------------------------------------
855 // Load vector reg, literal, S/D/Q forms
856 // Load vector reg, unscaled immed
857 def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[SDQ]l$",
858                                            "^LDUR[BHSDQ]i$")>;
860 // Load vector reg, immed post-index
861 def : InstRW<[N2Write_6cyc_1I_1L, WriteI], (instregex "^LDR[BHSDQ]post$")>;
862 // Load vector reg, immed pre-index
863 def : InstRW<[WriteAdr, N2Write_6cyc_1I_1L], (instregex "^LDR[BHSDQ]pre$")>;
865 // Load vector reg, unsigned immed
866 def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[BHSDQ]ui$")>;
868 // Load vector reg, register offset, basic
869 // Load vector reg, register offset, scale, S/D-form
870 // Load vector reg, register offset, extend
871 // Load vector reg, register offset, extend, scale, S/D-form
872 def : InstRW<[N2Write_6cyc_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>;
874 // Load vector reg, register offset, scale, H/Q-form
875 // Load vector reg, register offset, extend, scale, H/Q-form
876 def : InstRW<[N2Write_7cyc_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>;
878 // Load vector pair, immed offset, S/D-form
879 def : InstRW<[N2Write_6cyc_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>;
881 // Load vector pair, immed offset, Q-form
882 def : InstRW<[N2Write_6cyc_2L, WriteLDHi], (instrs LDPQi, LDNPQi)>;
884 // Load vector pair, immed post-index, S/D-form
885 // Load vector pair, immed pre-index, S/D-form
886 def : InstRW<[WriteAdr, N2Write_6cyc_1I_1L, WriteLDHi],
887              (instregex "^LDP[SD](pre|post)$")>;
889 // Load vector pair, immed post-index, Q-form
890 // Load vector pair, immed pre-index, Q-form
891 def : InstRW<[WriteAdr, N2Write_6cyc_2I_2L, WriteLDHi], (instrs LDPQpost,
892                                                                 LDPQpre)>;
894 // FP store instructions
895 // -----------------------------------------------------------------------------
897 // Store vector reg, unscaled immed, B/H/S/D-form
898 // Store vector reg, unscaled immed, Q-form
899 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STUR[BHSDQ]i$")>;
901 // Store vector reg, immed post-index, B/H/S/D-form
902 // Store vector reg, immed post-index, Q-form
903 // Store vector reg, immed pre-index, B/H/S/D-form
904 // Store vector reg, immed pre-index, Q-form
905 def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V_1I, ReadAdrBase],
906              (instregex "^STR[BHSDQ](pre|post)$")>;
908 // Store vector reg, unsigned immed, B/H/S/D-form
909 // Store vector reg, unsigned immed, Q-form
910 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STR[BHSDQ]ui$")>;
912 // Store vector reg, register offset, basic, B/H/S/D-form
913 // Store vector reg, register offset, basic, Q-form
914 // Store vector reg, register offset, scale, S/D-form
915 // Store vector reg, register offset, extend, B/H/S/D-form
916 // Store vector reg, register offset, extend, Q-form
917 // Store vector reg, register offset, extend, scale, S/D-form
918 def : InstRW<[N2Write_2cyc_1L01_1V, ReadAdrBase],
919              (instregex "^STR[BSD]ro[WX]$")>;
921 // Store vector reg, register offset, scale, H-form
922 // Store vector reg, register offset, scale, Q-form
923 // Store vector reg, register offset, extend, scale, H-form
924 // Store vector reg, register offset, extend, scale, Q-form
925 def : InstRW<[N2Write_2cyc_1L01_1V, ReadAdrBase],
926              (instregex "^STR[HQ]ro[WX]$")>;
928 // Store vector pair, immed offset, S-form
929 // Store vector pair, immed offset, D-form
930 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STN?P[SD]i$")>;
932 // Store vector pair, immed offset, Q-form
933 def : InstRW<[N2Write_2cyc_1L01_2V], (instrs STPQi, STNPQi)>;
935 // Store vector pair, immed post-index, S-form
936 // Store vector pair, immed post-index, D-form
937 // Store vector pair, immed pre-index, S-form
938 // Store vector pair, immed pre-index, D-form
939 def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V_1I],
940              (instregex "^STP[SD](pre|post)$")>;
942 // Store vector pair, immed post-index, Q-form
943 def : InstRW<[N2Write_2cyc_1L01_2V_1I], (instrs STPQpost)>;
945 // Store vector pair, immed pre-index, Q-form
946 def : InstRW<[N2Write_2cyc_1L01_2V_2I], (instrs STPQpre)>;
948 // ASIMD integer instructions
949 // -----------------------------------------------------------------------------
951 // ASIMD absolute diff
952 // ASIMD absolute diff long
953 // ASIMD arith, basic
954 // ASIMD arith, complex
955 // ASIMD arith, pair-wise
956 // ASIMD compare
957 // ASIMD logical
958 // ASIMD max/min, basic and pair-wise
959 def : SchedAlias<WriteVd, N2Write_2cyc_1V>;
960 def : SchedAlias<WriteVq, N2Write_2cyc_1V>;
962 // ASIMD absolute diff accum
963 // ASIMD absolute diff accum long
964 def : InstRW<[N2Write_4cyc_1V1],
965              (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>;
967 // ASIMD arith, reduce, 4H/4S
968 def : InstRW<[N2Write_2cyc_1V1], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>;
970 // ASIMD arith, reduce, 8B/8H
971 def : InstRW<[N2Write_4cyc_1V1_1V],
972              (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>;
974 // ASIMD arith, reduce, 16B
975 def : InstRW<[N2Write_4cyc_1V1], (instrs ADDVv16i8v, SADDLVv16i8v,
976                                          UADDLVv16i8v)>;
978 // ASIMD dot product
979 // ASIMD dot product using signed and unsigned integers
980 def : InstRW<[N2Write_3cyc_1V],
981              (instregex "^([SU]|SU|US)DOT(lane)?(v8|v16)i8$")>;
983 // ASIMD matrix multiply-accumulate
984 def : InstRW<[N2Write_3cyc_1V], (instrs SMMLA, UMMLA, USMMLA)>;
986 // ASIMD max/min, reduce, 4H/4S
987 def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU](MAX|MIN)Vv4i16v$",
988                                             "^[SU](MAX|MIN)Vv4i32v$")>;
990 // ASIMD max/min, reduce, 8B/8H
991 def : InstRW<[N2Write_4cyc_1V1_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$",
992                                                "^[SU](MAX|MIN)Vv8i16v$")>;
994 // ASIMD max/min, reduce, 16B
995 def : InstRW<[N2Write_4cyc_2V1], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
997 // ASIMD multiply
998 def : InstRW<[N2Write_4cyc_1V0], (instregex "^MULv", "^SQ(R)?DMULHv")>;
1000 // ASIMD multiply accumulate
1001 def : InstRW<[N2Write_4cyc_1V0], (instregex "^MLAv", "^MLSv")>;
1003 // ASIMD multiply accumulate high
1004 def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>;
1006 // ASIMD multiply accumulate long
1007 def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]MLALv", "^[SU]MLSLv")>;
1009 // ASIMD multiply accumulate saturating long
1010 def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMLALv", "^SQDMLSLv")>;
1012 // ASIMD multiply/multiply long (8x8) polynomial, D-form
1013 // ASIMD multiply/multiply long (8x8) polynomial, Q-form
1014 def : InstRW<[N2Write_3cyc_1V0], (instregex "^PMULL?(v8i8|v16i8)$")>;
1016 // ASIMD multiply long
1017 def : InstRW<[N2Write_3cyc_1V], (instregex "^[SU]MULLv", "^SQDMULLv")>;
1019 // ASIMD pairwise add and accumulate long
1020 def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ADALPv")>;
1022 // ASIMD shift accumulate
1023 def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]SRAv", "^[SU]RSRAv")>;
1025 // ASIMD shift by immed, basic
1026 def : InstRW<[N2Write_2cyc_1V1], (instregex "^SHLv", "^SHLLv", "^SHRNv",
1027                                             "^SSHLLv", "^SSHRv", "^USHLLv",
1028                                             "^USHRv")>;
1030 // ASIMD shift by immed and insert, basic
1031 def : InstRW<[N2Write_2cyc_1V1], (instregex "^SLIv", "^SRIv")>;
1033 // ASIMD shift by immed, complex
1034 def : InstRW<[N2Write_4cyc_1V1],
1035              (instregex "^RSHRNv", "^SQRSHRNv", "^SQRSHRUNv",
1036                         "^(SQSHLU?|UQSHL)[bhsd]$",
1037                         "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
1038                         "^SQSHRNv", "^SQSHRUNv", "^SRSHRv", "^UQRSHRNv",
1039                         "^UQSHRNv", "^URSHRv")>;
1041 // ASIMD shift by register, basic
1042 def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]SHLv")>;
1044 // ASIMD shift by register, complex
1045 def : InstRW<[N2Write_4cyc_1V1],
1046              (instregex "^[SU]RSHLv", "^[SU]QRSHLv",
1047                         "^[SU]QSHL(v1i8|v1i16|v1i32|v1i64|v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)$")>;
1049 // ASIMD floating-point instructions
1050 // -----------------------------------------------------------------------------
1052 // ASIMD FP absolute value/difference
1053 // ASIMD FP arith, normal
1054 // ASIMD FP compare
1055 // ASIMD FP complex add
1056 // ASIMD FP max/min, normal
1057 // ASIMD FP max/min, pairwise
1058 // ASIMD FP negate
1059 // Handled by SchedAlias<WriteV[dq], ...>
1061 // ASIMD FP complex multiply add
1062 def : InstRW<[N2Write_4cyc_1V], (instregex "^FCMLAv")>;
1064 // ASIMD FP convert, long (F16 to F32)
1065 def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTL(v4|v8)i16")>;
1067 // ASIMD FP convert, long (F32 to F64)
1068 def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVTL(v2|v4)i32")>;
1070 // ASIMD FP convert, narrow (F32 to F16)
1071 def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTN(v4|v8)i16")>;
1073 // ASIMD FP convert, narrow (F64 to F32)
1074 def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVTN(v2|v4)i32",
1075                                             "^FCVTXN(v2|v4)f32")>;
1077 // ASIMD FP convert, other, D-form F32 and Q-form F64
1078 def : InstRW<[N2Write_3cyc_1V0], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$",
1079                                             "^[SU]CVTFv2f(32|64)$")>;
1081 // ASIMD FP convert, other, D-form F16 and Q-form F32
1082 def : InstRW<[N2Write_4cyc_2V0], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$",
1083                                             "^[SU]CVTFv4f(16|32)$")>;
1085 // ASIMD FP convert, other, Q-form F16
1086 def : InstRW<[N2Write_6cyc_4V0], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$",
1087                                             "^[SU]CVTFv8f16$")>;
1089 // ASIMD FP divide, D-form, F16
1090 def : InstRW<[N2Write_7cyc_1V0], (instrs FDIVv4f16)>;
1092 // ASIMD FP divide, D-form, F32
1093 def : InstRW<[N2Write_10cyc_2V0], (instrs FDIVv2f32)>;
1095 // ASIMD FP divide, Q-form, F16
1096 def : InstRW<[N2Write_13cyc_2V0], (instrs FDIVv8f16)>;
1098 // ASIMD FP divide, Q-form, F32
1099 def : InstRW<[N2Write_10cyc_2V0], (instrs FDIVv4f32)>;
1101 // ASIMD FP divide, Q-form, F64
1102 def : InstRW<[N2Write_15cyc_2V0], (instrs FDIVv2f64)>;
1104 // ASIMD FP max/min, reduce, F32 and D-form F16
1105 def : InstRW<[N2Write_4cyc_1V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>;
1107 // ASIMD FP max/min, reduce, Q-form F16
1108 def : InstRW<[N2Write_6cyc_2V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>;
1110 // ASIMD FP multiply
1111 def : InstRW<[N2Write_3cyc_1V], (instregex "^FMULv", "^FMULXv")>;
1113 // ASIMD FP multiply accumulate
1114 def : InstRW<[N2Write_4cyc_1V], (instregex "^FMLAv", "^FMLSv")>;
1116 // ASIMD FP multiply accumulate long
1117 def : InstRW<[N2Write_5cyc_1V], (instregex "^FMLALv", "^FMLSLv")>;
1119 // ASIMD FP round, D-form F32 and Q-form F64
1120 def : InstRW<[N2Write_3cyc_1V0],
1121              (instregex "^FRINT[AIMNPXZ]v2f(32|64)$",
1122                         "^FRINT[32|64)[XZ]v2f(32|64)$")>;
1124 // ASIMD FP round, D-form F16 and Q-form F32
1125 def : InstRW<[N2Write_4cyc_2V0],
1126              (instregex "^FRINT[AIMNPXZ]v4f(16|32)$",
1127                         "^FRINT(32|64)[XZ]v4f32$")>;
1130 // ASIMD FP round, Q-form F16
1131 def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
1133 // ASIMD FP square root, D-form, F16
1134 def : InstRW<[N2Write_7cyc_1V0], (instrs FSQRTv4f16)>;
1136 // ASIMD FP square root, D-form, F32
1137 def : InstRW<[N2Write_10cyc_2V0], (instrs FSQRTv2f32)>;
1139 // ASIMD FP square root, Q-form, F16
1140 def : InstRW<[N2Write_13cyc_2V0], (instrs FSQRTv8f16)>;
1142 // ASIMD FP square root, Q-form, F32
1143 def : InstRW<[N2Write_10cyc_2V0], (instrs FSQRTv4f32)>;
1145 // ASIMD FP square root, Q-form, F64
1146 def : InstRW<[N2Write_16cyc_2V0], (instrs FSQRTv2f64)>;
1148 // ASIMD BFloat16 (BF16) instructions
1149 // -----------------------------------------------------------------------------
1151 // ASIMD convert, F32 to BF16
1152 def : InstRW<[N2Write_4cyc_1V0], (instrs BFCVTN, BFCVTN2)>;
1154 // ASIMD dot product
1155 def : InstRW<[N2Write_4cyc_1V], (instrs BFDOTv4bf16, BFDOTv8bf16)>;
1157 // ASIMD matrix multiply accumulate
1158 def : InstRW<[N2Write_5cyc_1V], (instrs BFMMLA)>;
1160 // ASIMD multiply accumulate long
1161 def : InstRW<[N2Write_4cyc_1V], (instrs BFMLALB, BFMLALBIdx, BFMLALT,
1162                                         BFMLALTIdx)>;
1164 // Scalar convert, F32 to BF16
1165 def : InstRW<[N2Write_3cyc_1V0], (instrs BFCVT)>;
1167 // ASIMD miscellaneous instructions
1168 // -----------------------------------------------------------------------------
1170 // ASIMD bit reverse
1171 // ASIMD bitwise insert
1172 // ASIMD count
1173 // ASIMD duplicate, element
1174 // ASIMD extract
1175 // ASIMD extract narrow
1176 // ASIMD insert, element to element
1177 // ASIMD move, FP immed
1178 // ASIMD move, integer immed
1179 // ASIMD reverse
1180 // ASIMD table lookup, 1 or 2 table regs
1181 // ASIMD table lookup extension, 1 table reg
1182 // ASIMD transfer, element to gen reg
1183 // ASIMD transpose
1184 // ASIMD unzip/zip
1185 // Handled by SchedAlias<WriteV[dq], ...>
1187 // ASIMD duplicate, gen reg
1188 def : InstRW<[N2Write_3cyc_1M0], (instregex "^DUPv.+gpr")>;
1190 // ASIMD extract narrow, saturating
1191 def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]QXTNv", "^SQXTUNv")>;
1193 // ASIMD reciprocal and square root estimate, D-form U32
1194 def : InstRW<[N2Write_3cyc_1V0], (instrs URECPEv2i32, URSQRTEv2i32)>;
1196 // ASIMD reciprocal and square root estimate, Q-form U32
1197 def : InstRW<[N2Write_4cyc_2V0], (instrs URECPEv4i32, URSQRTEv4i32)>;
1199 // ASIMD reciprocal and square root estimate, D-form F32 and scalar forms
1200 def : InstRW<[N2Write_3cyc_1V0], (instrs FRECPEv1f16, FRECPEv1i32,
1201                                          FRECPEv1i64, FRECPEv2f32,
1202                                          FRSQRTEv1f16, FRSQRTEv1i32,
1203                                          FRSQRTEv1i64, FRSQRTEv2f32)>;
1205 // ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32
1206 def : InstRW<[N2Write_4cyc_2V0], (instrs FRECPEv4f16, FRECPEv4f32,
1207                                          FRSQRTEv4f16, FRSQRTEv4f32)>;
1209 // ASIMD reciprocal and square root estimate, Q-form F16
1210 def : InstRW<[N2Write_6cyc_4V0], (instrs FRECPEv8f16, FRSQRTEv8f16)>;
1212 // ASIMD reciprocal exponent
1213 def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRECPXv")>;
1215 // ASIMD reciprocal step
1216 def : InstRW<[N2Write_4cyc_1V], (instregex "^FRECPSv", "^FRSQRTSv")>;
1218 // ASIMD table lookup, 3 table regs
1219 def : InstRW<[N2Write_4cyc_2V], (instrs TBLv8i8Three, TBLv16i8Three)>;
1221 // ASIMD table lookup, 4 table regs
1222 def : InstRW<[N2Write_4cyc_4V], (instrs TBLv8i8Four, TBLv16i8Four)>;
1224 // ASIMD table lookup extension, 2 table reg
1225 def : InstRW<[N2Write_4cyc_2V], (instrs TBXv8i8Two, TBXv16i8Two)>;
1227 // ASIMD table lookup extension, 3 table reg
1228 def : InstRW<[N2Write_6cyc_4V], (instrs TBXv8i8Three, TBXv16i8Three)>;
1230 // ASIMD table lookup extension, 4 table reg
1231 def : InstRW<[N2Write_6cyc_8V], (instrs TBXv8i8Four, TBXv16i8Four)>;
1233 // ASIMD transfer, gen reg to element
1234 def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
1236 // ASIMD load instructions
1237 // -----------------------------------------------------------------------------
1239 // ASIMD load, 1 element, multiple, 1 reg, D-form
1240 def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>;
1241 def : InstRW<[WriteAdr, N2Write_6cyc_1L],
1242              (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>;
1244 // ASIMD load, 1 element, multiple, 1 reg, Q-form
1245 def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>;
1246 def : InstRW<[WriteAdr, N2Write_6cyc_1L],
1247              (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>;
1249 // ASIMD load, 1 element, multiple, 2 reg, D-form
1250 def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
1251 def : InstRW<[WriteAdr, N2Write_6cyc_2L],
1252              (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
1254 // ASIMD load, 1 element, multiple, 2 reg, Q-form
1255 def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
1256 def : InstRW<[WriteAdr, N2Write_6cyc_2L],
1257              (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
1259 // ASIMD load, 1 element, multiple, 3 reg, D-form
1260 def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
1261 def : InstRW<[WriteAdr, N2Write_6cyc_3L],
1262              (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
1264 // ASIMD load, 1 element, multiple, 3 reg, Q-form
1265 def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
1266 def : InstRW<[WriteAdr, N2Write_6cyc_3L],
1267              (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
1269 // ASIMD load, 1 element, multiple, 4 reg, D-form
1270 def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
1271 def : InstRW<[WriteAdr, N2Write_7cyc_4L],
1272              (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
1274 // ASIMD load, 1 element, multiple, 4 reg, Q-form
1275 def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
1276 def : InstRW<[WriteAdr, N2Write_7cyc_4L],
1277              (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
1279 // ASIMD load, 1 element, one lane, B/H/S
1280 // ASIMD load, 1 element, one lane, D
1281 def : InstRW<[N2Write_8cyc_1L_1V],           (instregex "LD1i(8|16|32|64)$")>;
1282 def : InstRW<[WriteAdr, N2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)_POST$")>;
1284 // ASIMD load, 1 element, all lanes, D-form, B/H/S
1285 // ASIMD load, 1 element, all lanes, D-form, D
1286 def : InstRW<[N2Write_8cyc_1L_1V],           (instregex "LD1Rv(8b|4h|2s|1d)$")>;
1287 def : InstRW<[WriteAdr, N2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>;
1289 // ASIMD load, 1 element, all lanes, Q-form
1290 def : InstRW<[N2Write_8cyc_1L_1V],           (instregex "LD1Rv(16b|8h|4s|2d)$")>;
1291 def : InstRW<[WriteAdr, N2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
1293 // ASIMD load, 2 element, multiple, D-form, B/H/S
1294 def : InstRW<[N2Write_8cyc_1L_2V],           (instregex "LD2Twov(8b|4h|2s)$")>;
1295 def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
1297 // ASIMD load, 2 element, multiple, Q-form, B/H/S
1298 // ASIMD load, 2 element, multiple, Q-form, D
1299 def : InstRW<[N2Write_8cyc_2L_2V],           (instregex "LD2Twov(16b|8h|4s|2d)$")>;
1300 def : InstRW<[WriteAdr, N2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
1302 // ASIMD load, 2 element, one lane, B/H
1303 // ASIMD load, 2 element, one lane, S
1304 // ASIMD load, 2 element, one lane, D
1305 def : InstRW<[N2Write_8cyc_1L_2V],           (instregex "LD2i(8|16|32|64)$")>;
1306 def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)_POST$")>;
1308 // ASIMD load, 2 element, all lanes, D-form, B/H/S
1309 // ASIMD load, 2 element, all lanes, D-form, D
1310 def : InstRW<[N2Write_8cyc_1L_2V],            (instregex "LD2Rv(8b|4h|2s|1d)$")>;
1311 def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V],  (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>;
1313 // ASIMD load, 2 element, all lanes, Q-form
1314 def : InstRW<[N2Write_8cyc_1L_2V],           (instregex "LD2Rv(16b|8h|4s|2d)$")>;
1315 def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
1317 // ASIMD load, 3 element, multiple, D-form, B/H/S
1318 def : InstRW<[N2Write_8cyc_2L_3V],           (instregex "LD3Threev(8b|4h|2s)$")>;
1319 def : InstRW<[WriteAdr, N2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
1321 // ASIMD load, 3 element, multiple, Q-form, B/H/S
1322 def : InstRW<[N2Write_8cyc_3L_3V],           (instregex "LD3Threev(16b|8h|4s)$")>;
1323 def : InstRW<[WriteAdr, N2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s)_POST$")>;
1325 // ASIMD load, 3 element, multiple, Q-form, D
1326 def : InstRW<[N2Write_8cyc_3L_3V],           (instregex "LD3Threev(2d)$")>;
1327 def : InstRW<[WriteAdr, N2Write_8cyc_3L_3V], (instregex "LD3Threev(2d)_POST$")>;
1329 // ASIMD load, 3 element, one lane, B/H
1330 // ASIMD load, 3 element, one lane, S
1331 // ASIMD load, 3 element, one lane, D
1332 def : InstRW<[N2Write_8cyc_2L_3V],           (instregex "LD3i(8|16|32|64)$")>;
1333 def : InstRW<[WriteAdr, N2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)_POST$")>;
1335 // ASIMD load, 3 element, all lanes, D-form, B/H/S
1336 // ASIMD load, 3 element, all lanes, D-form, D
1337 def : InstRW<[N2Write_8cyc_2L_3V],           (instregex "LD3Rv(8b|4h|2s|1d)$")>;
1338 def : InstRW<[WriteAdr, N2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>;
1340 // ASIMD load, 3 element, all lanes, Q-form, B/H/S
1341 // ASIMD load, 3 element, all lanes, Q-form, D
1342 def : InstRW<[N2Write_8cyc_3L_3V],           (instregex "LD3Rv(16b|8h|4s|2d)$")>;
1343 def : InstRW<[WriteAdr, N2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>;
1345 // ASIMD load, 4 element, multiple, D-form, B/H/S
1346 def : InstRW<[N2Write_8cyc_3L_4V],           (instregex "LD4Fourv(8b|4h|2s)$")>;
1347 def : InstRW<[WriteAdr, N2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
1349 // ASIMD load, 4 element, multiple, Q-form, B/H/S
1350 // ASIMD load, 4 element, multiple, Q-form, D
1351 def : InstRW<[N2Write_9cyc_4L_4V],           (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
1352 def : InstRW<[WriteAdr, N2Write_9cyc_4L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
1354 // ASIMD load, 4 element, one lane, B/H
1355 // ASIMD load, 4 element, one lane, S
1356 // ASIMD load, 4 element, one lane, D
1357 def : InstRW<[N2Write_8cyc_3L_4V],           (instregex "LD4i(8|16|32|64)$")>;
1358 def : InstRW<[WriteAdr, N2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)_POST$")>;
1360 // ASIMD load, 4 element, all lanes, D-form, B/H/S
1361 // ASIMD load, 4 element, all lanes, D-form, D
1362 def : InstRW<[N2Write_8cyc_3L_4V],              (instregex "LD4Rv(8b|4h|2s|1d)$")>;
1363 def : InstRW<[WriteAdr, N2Write_8cyc_3L_4V],    (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>;
1365 // ASIMD load, 4 element, all lanes, Q-form, B/H/S
1366 // ASIMD load, 4 element, all lanes, Q-form, D
1367 def : InstRW<[N2Write_8cyc_4L_4V],            (instregex "LD4Rv(16b|8h|4s|2d)$")>;
1368 def : InstRW<[WriteAdr, N2Write_8cyc_4L_4V],  (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>;
1370 // ASIMD store instructions
1371 // -----------------------------------------------------------------------------
1373 // ASIMD store, 1 element, multiple, 1 reg, D-form
1374 def : InstRW<[N2Write_2cyc_1L01_1V],           (instregex "ST1Onev(8b|4h|2s|1d)$")>;
1375 def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
1377 // ASIMD store, 1 element, multiple, 1 reg, Q-form
1378 def : InstRW<[N2Write_2cyc_1L01_1V],           (instregex "ST1Onev(16b|8h|4s|2d)$")>;
1379 def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
1381 // ASIMD store, 1 element, multiple, 2 reg, D-form
1382 def : InstRW<[N2Write_2cyc_1L01_1V],           (instregex "ST1Twov(8b|4h|2s|1d)$")>;
1383 def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
1385 // ASIMD store, 1 element, multiple, 2 reg, Q-form
1386 def : InstRW<[N2Write_2cyc_2L01_2V],           (instregex "ST1Twov(16b|8h|4s|2d)$")>;
1387 def : InstRW<[WriteAdr, N2Write_2cyc_2L01_2V], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
1389 // ASIMD store, 1 element, multiple, 3 reg, D-form
1390 def : InstRW<[N2Write_2cyc_2L01_2V],           (instregex "ST1Threev(8b|4h|2s|1d)$")>;
1391 def : InstRW<[WriteAdr, N2Write_2cyc_2L01_2V], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
1393 // ASIMD store, 1 element, multiple, 3 reg, Q-form
1394 def : InstRW<[N2Write_2cyc_3L01_3V],           (instregex "ST1Threev(16b|8h|4s|2d)$")>;
1395 def : InstRW<[WriteAdr, N2Write_2cyc_3L01_3V], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
1397 // ASIMD store, 1 element, multiple, 4 reg, D-form
1398 def : InstRW<[N2Write_2cyc_2L01_2V],           (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
1399 def : InstRW<[WriteAdr, N2Write_2cyc_2L01_2V], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
1401 // ASIMD store, 1 element, multiple, 4 reg, Q-form
1402 def : InstRW<[N2Write_2cyc_4L01_4V],           (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
1403 def : InstRW<[WriteAdr, N2Write_2cyc_4L01_4V], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
1405 // ASIMD store, 1 element, one lane, B/H/S
1406 // ASIMD store, 1 element, one lane, D
1407 def : InstRW<[N2Write_4cyc_1L01_1V],           (instregex "ST1i(8|16|32|64)$")>;
1408 def : InstRW<[WriteAdr, N2Write_4cyc_1L01_1V], (instregex "ST1i(8|16|32|64)_POST$")>;
1410 // ASIMD store, 2 element, multiple, D-form, B/H/S
1411 def : InstRW<[N2Write_4cyc_1L01_1V],           (instregex "ST2Twov(8b|4h|2s)$")>;
1412 def : InstRW<[WriteAdr, N2Write_4cyc_1L01_1V], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
1414 // ASIMD store, 2 element, multiple, Q-form, B/H/S
1415 // ASIMD store, 2 element, multiple, Q-form, D
1416 def : InstRW<[N2Write_4cyc_2L01_2V],           (instregex "ST2Twov(16b|8h|4s|2d)$")>;
1417 def : InstRW<[WriteAdr, N2Write_4cyc_2L01_2V], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
1419 // ASIMD store, 2 element, one lane, B/H/S
1420 // ASIMD store, 2 element, one lane, D
1421 def : InstRW<[N2Write_4cyc_1L01_1V],           (instregex "ST2i(8|16|32|64)$")>;
1422 def : InstRW<[WriteAdr, N2Write_4cyc_1L01_1V], (instregex "ST2i(8|16|32|64)_POST$")>;
1424 // ASIMD store, 3 element, multiple, D-form, B/H/S
1425 def : InstRW<[N2Write_5cyc_2L01_2V],           (instregex "ST3Threev(8b|4h|2s)$")>;
1426 def : InstRW<[WriteAdr, N2Write_5cyc_2L01_2V], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
1428 // ASIMD store, 3 element, multiple, Q-form, B/H/S
1429 // ASIMD store, 3 element, multiple, Q-form, D
1430 def : InstRW<[N2Write_6cyc_3L01_3V],           (instregex "ST3Threev(16b|8h|4s|2d)$")>;
1431 def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
1433 // ASIMD store, 3 element, one lane, B/H
1434 // ASIMD store, 3 element, one lane, S
1435 // ASIMD store, 3 element, one lane, D
1436 def : InstRW<[N2Write_6cyc_3L01_3V],           (instregex "ST3i(8|16|32|64)$")>;
1437 def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST3i(8|16|32|64)_POST$")>;
1439 // ASIMD store, 4 element, multiple, D-form, B/H/S
1440 def : InstRW<[N2Write_6cyc_3L01_3V],           (instregex "ST4Fourv(8b|4h|2s)$")>;
1441 def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
1443 // ASIMD store, 4 element, multiple, Q-form, B/H/S
1444 def : InstRW<[N2Write_7cyc_6L01_6V],           (instregex "ST4Fourv(16b|8h|4s)$")>;
1445 def : InstRW<[WriteAdr, N2Write_7cyc_6L01_6V], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
1447 // ASIMD store, 4 element, multiple, Q-form, D
1448 def : InstRW<[N2Write_5cyc_4L01_4V],           (instregex "ST4Fourv(2d)$")>;
1449 def : InstRW<[WriteAdr, N2Write_5cyc_4L01_4V], (instregex "ST4Fourv(2d)_POST$")>;
1451 // ASIMD store, 4 element, one lane, B/H/S
1452 def : InstRW<[N2Write_6cyc_3L01_3V],           (instregex "ST4i(8|16|32)$")>;
1453 def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST4i(8|16|32)_POST$")>;
1455 // ASIMD store, 4 element, one lane, D
1456 def : InstRW<[N2Write_4cyc_3L01_3V],            (instregex "ST4i(64)$")>;
1457 def : InstRW<[WriteAdr, N2Write_4cyc_3L01_3V],  (instregex "ST4i(64)_POST$")>;
1459 // Cryptography extensions
1460 // -----------------------------------------------------------------------------
1462 // Crypto AES ops
1463 def : InstRW<[N2Write_2cyc_1V], (instregex "^AES[DE]rr$", "^AESI?MCrr")>;
1465 // Crypto polynomial (64x64) multiply long
1466 def : InstRW<[N2Write_2cyc_1V0], (instrs PMULLv1i64, PMULLv2i64)>;
1468 // Crypto SHA1 hash acceleration op
1469 // Crypto SHA1 schedule acceleration ops
1470 def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA1(H|SU0|SU1)")>;
1472 // Crypto SHA1 hash acceleration ops
1473 // Crypto SHA256 hash acceleration ops
1474 def : InstRW<[N2Write_4cyc_1V0], (instregex "^SHA1[CMP]", "^SHA256H2?")>;
1476 // Crypto SHA256 schedule acceleration ops
1477 def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA256SU[01]")>;
1479 // Crypto SHA512 hash acceleration ops
1480 def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA512(H|H2|SU0|SU1)")>;
1482 // Crypto SHA3 ops
1483 def : InstRW<[N2Write_2cyc_1V0], (instrs BCAX, EOR3, RAX1, XAR)>;
1485 // Crypto SM3 ops
1486 def : InstRW<[N2Write_2cyc_1V0], (instregex "^SM3PARTW[12]$", "^SM3SS1$",
1487                                             "^SM3TT[12][AB]$")>;
1489 // Crypto SM4 ops
1490 def : InstRW<[N2Write_4cyc_1V0], (instrs SM4E, SM4ENCKEY)>;
1492 // CRC
1493 // -----------------------------------------------------------------------------
1495 def : InstRW<[N2Write_2cyc_1M0], (instregex "^CRC32")>;
1497 // SVE Predicate instructions
1498 // -----------------------------------------------------------------------------
1500 // Loop control, based on predicate
1501 def : InstRW<[N2Write_2cyc_1M], (instrs BRKA_PPmP, BRKA_PPzP,
1502                                         BRKB_PPmP, BRKB_PPzP)>;
1504 // Loop control, based on predicate and flag setting
1505 def : InstRW<[N2Write_3cyc_1M], (instrs BRKAS_PPzP, BRKBS_PPzP)>;
1507 // Loop control, propagating
1508 def : InstRW<[N2Write_2cyc_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>;
1510 // Loop control, propagating and flag setting
1511 def : InstRW<[N2Write_3cyc_1M0_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP,
1512                                             BRKPBS_PPzPP)>;
1514 // Loop control, based on GPR
1515 def : InstRW<[N2Write_3cyc_1M],
1516              (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>;
1518 def : InstRW<[N2Write_3cyc_1M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]$")>;
1520 // Loop terminate
1521 def : InstRW<[N2Write_1cyc_1M], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>;
1523 // Predicate counting scalar
1524 def : InstRW<[N2Write_2cyc_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>;
1525 def : InstRW<[N2Write_2cyc_1M],
1526              (instregex "^(CNT|DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI$",
1527                         "^SQ(DEC|INC)[BHWD]_XPiWdI$",
1528                         "^(UQDEC|UQINC)[BHWD]_WPiI$")>;
1530 // Predicate counting scalar, active predicate
1531 def : InstRW<[N2Write_2cyc_1M],
1532              (instregex "^CNTP_XPP_[BHSD]$",
1533                         "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]$",
1534                         "^(UQDEC|UQINC)P_WP_[BHSD]$",
1535                         "^(SQDEC|SQINC|UQDEC|UQINC)P_XPWd_[BHSD]$")>;
1537 // Predicate counting vector, active predicate
1538 def : InstRW<[N2Write_7cyc_1M_1M0_1V],
1539              (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]$")>;
1541 // Predicate logical
1542 def : InstRW<[N2Write_1cyc_1M0],
1543              (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>;
1545 // Predicate logical, flag setting
1546 def : InstRW<[N2Write_2cyc_1M0_1M],
1547              (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP$")>;
1549 // Predicate reverse
1550 def : InstRW<[N2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]$")>;
1552 // Predicate select
1553 def : InstRW<[N2Write_1cyc_1M0], (instrs SEL_PPPP)>;
1555 // Predicate set
1556 def : InstRW<[N2Write_2cyc_1M], (instregex "^PFALSE$", "^PTRUE_[BHSD]$")>;
1558 // Predicate set/initialize, set flags
1559 def : InstRW<[N2Write_3cyc_1M], (instregex "^PTRUES_[BHSD]$")>;
1561 // Predicate find first/next
1562 def : InstRW<[N2Write_3cyc_1M], (instregex "^PFIRST_B$", "^PNEXT_[BHSD]$")>;
1564 // Predicate test
1565 def : InstRW<[N2Write_1cyc_1M], (instrs PTEST_PP)>;
1567 // Predicate transpose
1568 def : InstRW<[N2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSDQ]$")>;
1570 // Predicate unpack and widen
1571 def : InstRW<[N2Write_2cyc_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>;
1573 // Predicate zip/unzip
1574 def : InstRW<[N2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSDQ]$")>;
1576 // SVE integer instructions
1577 // -----------------------------------------------------------------------------
1579 // Arithmetic, absolute diff
1580 def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]",
1581                                            "^[SU]ABD_ZPZZ_[BHSD]")>;
1583 // Arithmetic, absolute diff accum
1584 def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ABA_ZZZ_[BHSD]$")>;
1586 // Arithmetic, absolute diff accum long
1587 def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]$")>;
1589 // Arithmetic, absolute diff long
1590 def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]$")>;
1592 // Arithmetic, basic
1593 def : InstRW<[N2Write_2cyc_1V],
1594              (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
1595                         "^(ADD|SUB)_ZZZ_[BHSD]",
1596                         "^(ADD|SUB|SUBR)_ZPZZ_[BHSD]",
1597                         "^(ADD|SUB|SUBR)_ZI_[BHSD]",
1598                         "^ADR_[SU]XTW_ZZZ_D_[0123]",
1599                         "^ADR_LSL_ZZZ_[SD]_[0123]",
1600                         "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]",
1601                         "^SADDLBT_ZZZ_[HSD]",
1602                         "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]",
1603                         "^SSUBL(BT|TB)_ZZZ_[HSD]")>;
1605 // Arithmetic, complex
1606 def : InstRW<[N2Write_2cyc_1V],
1607              (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]",
1608                         "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
1609                         "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]",
1610                         "^[SU]Q(ADD|SUB)_ZI_[BHSD]",
1611                         "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]",
1612                         "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]")>;
1614 // Arithmetic, large integer
1615 def : InstRW<[N2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]$")>;
1617 // Arithmetic, pairwise add
1618 def : InstRW<[N2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]$")>;
1620 // Arithmetic, pairwise add and accum long
1621 def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ADALP_ZPmZ_[HSD]$")>;
1623 // Arithmetic, shift
1624 def : InstRW<[N2Write_2cyc_1V1],
1625              (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]",
1626                         "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]",
1627                         "^(ASR|LSL|LSR)_ZPmI_[BHSD]",
1628                         "^(ASR|LSL|LSR)_ZPmZ_[BHSD]",
1629                         "^(ASR|LSL|LSR)_ZZI_[BHSD]",
1630                         "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]",
1631                         "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>;
1633 // Arithmetic, shift and accumulate
1634 def : InstRW<[N2Write_4cyc_1V1],
1635              (instregex "^(SRSRA|SSRA|URSRA|USRA)_ZZI_[BHSD]$")>;
1637 // Arithmetic, shift by immediate
1638 // Arithmetic, shift by immediate and insert
1639 def : InstRW<[N2Write_2cyc_1V1],
1640              (instregex "^(SHRNB|SHRNT|SSHLLB|SSHLLT|USHLLB|USHLLT|SLI|SRI)_ZZI_[BHSD]$")>;
1642 // Arithmetic, shift complex
1643 def : InstRW<[N2Write_4cyc_1V1],
1644              (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]",
1645                         "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]",
1646                         "^[SU]QR?SHL_ZPZZ_[BHSD]",
1647                         "^(SQSHL|SQSHLU|UQSHL)_(ZPmI|ZPZI)_[BHSD]",
1648                         "^SQSHRU?N[BT]_ZZI_[BHS]",
1649                         "^UQR?SHRN[BT]_ZZI_[BHS]")>;
1651 // Arithmetic, shift right for divide
1652 def : InstRW<[N2Write_4cyc_1V1], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>;
1654 // Arithmetic, shift rounding
1655 def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]RSHLR?_ZPmZ_[BHSD]",
1656                                              "^[SU]RSHL_ZPZZ_[BHSD]",
1657                                              "^[SU]RSHR_(ZPmI|ZPZI)_[BHSD]")>;
1659 // Bit manipulation
1660 def : InstRW<[N2Write_6cyc_2V1], (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]")>;
1662 // Bitwise select
1663 def : InstRW<[N2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ$")>;
1665 // Count/reverse bits
1666 def : InstRW<[N2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>;
1668 // Broadcast logical bitmask immediate to vector
1669 def : InstRW<[N2Write_2cyc_1V], (instrs DUPM_ZI)>;
1671 // Compare and set flags
1672 def : InstRW<[N2Write_4cyc_1V0_1M],
1673              (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$",
1674                         "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>;
1676 // Complex add
1677 def : InstRW<[N2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]$")>;
1679 // Complex dot product 8-bit element
1680 def : InstRW<[N2Write_3cyc_1V], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>;
1682 // Complex dot product 16-bit element
1683 def : InstRW<[N2Write_4cyc_1V0], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>;
1685 // Complex multiply-add B, H, S element size
1686 def : InstRW<[N2Write_4cyc_1V0], (instregex "^CMLA_ZZZ_[BHS]$",
1687                                             "^CMLA_ZZZI_[HS]$")>;
1689 // Complex multiply-add D element size
1690 def : InstRW<[N2Write_5cyc_2V0], (instrs CMLA_ZZZ_D)>;
1692 // Conditional extract operations, scalar form
1693 def : InstRW<[N2Write_8cyc_1M0_1V1_1V], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>;
1695 // Conditional extract operations, SIMD&FP scalar and vector forms
1696 def : InstRW<[N2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$",
1697                                             "^COMPACT_ZPZ_[SD]$",
1698                                             "^SPLICE_ZPZZ?_[BHSD]$")>;
1700 // Convert to floating point, 64b to float or convert to double
1701 def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]",
1702                                             "^[SU]CVTF_ZPmZ_StoD")>;
1704 // Convert to floating point, 32b to single or half
1705 def : InstRW<[N2Write_4cyc_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>;
1707 // Convert to floating point, 16b to half
1708 def : InstRW<[N2Write_6cyc_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH")>;
1710 // Copy, scalar
1711 def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]$")>;
1713 // Copy, scalar SIMD&FP or imm
1714 def : InstRW<[N2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]$",
1715                                            "^CPY_ZPzI_[BHSD]$")>;
1717 // Divides, 32 bit
1718 def : InstRW<[N2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S",
1719                                              "^[SU]DIV_ZPZZ_S")>;
1721 // Divides, 64 bit
1722 def : InstRW<[N2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D",
1723                                              "^[SU]DIV_ZPZZ_D")>;
1725 // Dot product, 8 bit
1726 def : InstRW<[N2Write_3cyc_1V], (instregex "^[SU]DOT_ZZZI?_S$")>;
1728 // Dot product, 8 bit, using signed and unsigned integers
1729 def : InstRW<[N2Write_3cyc_1V], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;
1731 // Dot product, 16 bit
1732 def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]DOT_ZZZI?_D$")>;
1734 // Duplicate, immediate and indexed form
1735 def : InstRW<[N2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]$",
1736                                            "^DUP_ZZI_[BHSDQ]$")>;
1738 // Duplicate, scalar form
1739 def : InstRW<[N2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]$")>;
1741 // Extend, sign or zero
1742 def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]",
1743                                             "^[SU]XTH_ZPmZ_[SD]",
1744                                             "^[SU]XTW_ZPmZ_[D]")>;
1746 // Extract
1747 def : InstRW<[N2Write_2cyc_1V], (instrs EXT_ZZI, EXT_ZZI_B)>;
1749 // Extract narrow saturating
1750 def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]$",
1751                                             "^SQXTUN[BT]_ZZ_[BHS]$")>;
1753 // Extract/insert operation, SIMD and FP scalar form
1754 def : InstRW<[N2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$",
1755                                             "^INSR_ZV_[BHSD]$")>;
1757 // Extract/insert operation, scalar
1758 def : InstRW<[N2Write_5cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]$",
1759                                                 "^INSR_ZR_[BHSD]$")>;
1761 // Histogram operations
1762 def : InstRW<[N2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]$",
1763                                            "^HISTSEG_ZZZ$")>;
1765 // Horizontal operations, B, H, S form, immediate operands only
1766 def : InstRW<[N2Write_4cyc_1V0], (instregex "^INDEX_II_[BHS]$")>;
1768 // Horizontal operations, B, H, S form, scalar, immediate operands/ scalar
1769 // operands only / immediate, scalar operands
1770 def : InstRW<[N2Write_7cyc_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>;
1772 // Horizontal operations, D form, immediate operands only
1773 def : InstRW<[N2Write_5cyc_2V0], (instrs INDEX_II_D)>;
1775 // Horizontal operations, D form, scalar, immediate operands)/ scalar operands
1776 // only / immediate, scalar operands
1777 def : InstRW<[N2Write_8cyc_2M0_2V0], (instregex "^INDEX_(IR|RI|RR)_D$")>;
1779 // Logical
1780 def : InstRW<[N2Write_2cyc_1V],
1781              (instregex "^(AND|EOR|ORR)_ZI",
1782                         "^(AND|BIC|EOR|ORR)_ZZZ",
1783                         "^EOR(BT|TB)_ZZZ_[BHSD]",
1784                         "^(AND|BIC|EOR|NOT|ORR)_(ZPmZ|ZPZZ)_[BHSD]",
1785                         "^NOT_ZPmZ_[BHSD]")>;
1787 // Max/min, basic and pairwise
1788 def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]",
1789                                            "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]",
1790                                            "^[SU](MAX|MIN)_ZPZZ_[BHSD]")>;
1792 // Matching operations
1793 def : InstRW<[N2Write_2cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]$")>;
1795 // Matrix multiply-accumulate
1796 def : InstRW<[N2Write_3cyc_1V], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
1798 // Move prefix
1799 def : InstRW<[N2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$",
1800                                            "^MOVPRFX_ZZ$")>;
1802 // Multiply, B, H, S element size
1803 def : InstRW<[N2Write_4cyc_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]",
1804                                              "^MUL_ZPZZ_[BHS]",
1805                                              "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]",
1806                                              "^[SU]MULH_ZPZZ_[BHS]")>;
1808 // Multiply, D element size
1809 def : InstRW<[N2Write_5cyc_2V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D",
1810                                              "^MUL_ZPZZ_D",
1811                                              "^[SU]MULH_(ZPmZ|ZZZ)_D",
1812                                              "^[SU]MULH_ZPZZ_D")>;
1814 // Multiply long
1815 def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]MULL[BT]_ZZZI_[SD]$",
1816                                             "^[SU]MULL[BT]_ZZZ_[HSD]$")>;
1818 // Multiply accumulate, B, H, S element size
1819 def : InstRW<[N2Write_4cyc_1V0], (instregex "^ML[AS]_ZZZI_[BHS]$",
1820                                             "^(ML[AS]|MAD|MSB)_(ZPmZZ|ZPZZZ)_[BHS]")>;
1822 // Multiply accumulate, D element size
1823 def : InstRW<[N2Write_5cyc_2V0], (instregex "^ML[AS]_ZZZI_D$",
1824                                             "^(ML[AS]|MAD|MSB)_(ZPmZZ|ZPZZZ)_D")>;
1826 // Multiply accumulate long
1827 def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]$",
1828                                             "^[SU]ML[AS]L[BT]_ZZZI_[SD]$")>;
1830 // Multiply accumulate saturating doubling long regular
1831 def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDML[AS](LB|LT|LBT)_ZZZ_[HSD]$",
1832                                             "^SQDML[AS](LB|LT)_ZZZI_[SD]$")>;
1834 // Multiply saturating doubling high, B, H, S element size
1835 def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMULH_ZZZ_[BHS]$",
1836                                             "^SQDMULH_ZZZI_[HS]$")>;
1838 // Multiply saturating doubling high, D element size
1839 def : InstRW<[N2Write_5cyc_2V0], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>;
1841 // Multiply saturating doubling long
1842 def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMULL[BT]_ZZZ_[HSD]$",
1843                                             "^SQDMULL[BT]_ZZZI_[SD]$")>;
1845 // Multiply saturating rounding doubling regular/complex accumulate, B, H, S
1846 // element size
1847 def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDML[AS]H_ZZZ_[BHS]$",
1848                                             "^SQRDCMLAH_ZZZ_[BHS]$",
1849                                             "^SQRDML[AS]H_ZZZI_[HS]$",
1850                                             "^SQRDCMLAH_ZZZI_[HS]$")>;
1852 // Multiply saturating rounding doubling regular/complex accumulate, D element
1853 // size
1854 def : InstRW<[N2Write_5cyc_2V0], (instregex "^SQRDML[AS]H_ZZZI?_D$",
1855                                             "^SQRDCMLAH_ZZZ_D$")>;
1857 // Multiply saturating rounding doubling regular/complex, B, H, S element size
1858 def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDMULH_ZZZ_[BHS]$",
1859                                             "^SQRDMULH_ZZZI_[HS]$")>;
1861 // Multiply saturating rounding doubling regular/complex, D element size
1862 def : InstRW<[N2Write_5cyc_2V0], (instregex "^SQRDMULH_ZZZI?_D$")>;
1864 // Multiply/multiply long, (8x8) polynomial
1865 def : InstRW<[N2Write_2cyc_1V0], (instregex "^PMUL_ZZZ_B$",
1866                                             "^PMULL[BT]_ZZZ_[HDQ]$")>;
1868 // Predicate counting vector
1869 def : InstRW<[N2Write_2cyc_1V0],
1870              (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[HWD]_ZPiI$")>;
1872 // Reciprocal estimate
1873 def : InstRW<[N2Write_4cyc_2V0], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>;
1875 // Reduction, arithmetic, B form
1876 def : InstRW<[N2Write_11cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
1878 // Reduction, arithmetic, H form
1879 def : InstRW<[N2Write_9cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>;
1881 // Reduction, arithmetic, S form
1882 def : InstRW<[N2Write_8cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>;
1884 // Reduction, arithmetic, D form
1885 def : InstRW<[N2Write_8cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
1887 // Reduction, logical
1888 def : InstRW<[N2Write_6cyc_1V_1V1], (instregex "^(ANDV|EORV|ORV)_VPZ_[BHSD]$")>;
1890 // Reverse, vector
1891 def : InstRW<[N2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]$",
1892                                            "^REVB_ZPmZ_[HSD]$",
1893                                            "^REVH_ZPmZ_[SD]$",
1894                                            "^REVW_ZPmZ_D$")>;
1896 // Select, vector form
1897 def : InstRW<[N2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]$")>;
1899 // Table lookup
1900 def : InstRW<[N2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]$")>;
1902 // Table lookup extension
1903 def : InstRW<[N2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]$")>;
1905 // Transpose, vector form
1906 def : InstRW<[N2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]$")>;
1908 // Unpack and extend
1909 def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]$")>;
1911 // Zip/unzip
1912 def : InstRW<[N2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>;
1914 // SVE floating-point instructions
1915 // -----------------------------------------------------------------------------
1917 // Floating point absolute value/difference
1918 def : InstRW<[N2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]",
1919                                            "^FABD_ZPZZ_[HSD]",
1920                                            "^FABS_ZPmZ_[HSD]")>;
1922 // Floating point arithmetic
1923 def : InstRW<[N2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]",
1924                                            "^F(ADD|SUB)_ZPZ[IZ]_[HSD]",
1925                                            "^FADDP_ZPmZZ_[HSD]",
1926                                            "^FNEG_ZPmZ_[HSD]",
1927                                            "^FSUBR_ZPm[IZ]_[HSD]",
1928                                            "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>;
1930 // Floating point associative add, F16
1931 def : InstRW<[N2Write_10cyc_1V1], (instrs FADDA_VPZ_H)>;
1933 // Floating point associative add, F32
1934 def : InstRW<[N2Write_6cyc_1V1], (instrs FADDA_VPZ_S)>;
1936 // Floating point associative add, F64
1937 def : InstRW<[N2Write_4cyc_1V], (instrs FADDA_VPZ_D)>;
1939 // Floating point compare
1940 def : InstRW<[N2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]$",
1941                                             "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]$",
1942                                             "^FCM(LE|LT)_PPzZ0_[HSD]$",
1943                                             "^FCMUO_PPzZZ_[HSD]$")>;
1945 // Floating point complex add
1946 def : InstRW<[N2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]$")>;
1948 // Floating point complex multiply add
1949 def : InstRW<[N2Write_5cyc_1V], (instregex "^FCMLA_ZPmZZ_[HSD]$",
1950                                            "^FCMLA_ZZZI_[HS]$")>;
1952 // Floating point convert, long or narrow (F16 to F32 or F32 to F16)
1953 def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)",
1954                                             "^FCVTLT_ZPmZ_HtoS",
1955                                             "^FCVTNT_ZPmZ_StoH")>;
1957 // Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32
1958 // or F64 to F16)
1959 def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)",
1960                                             "^FCVTLT_ZPmZ_StoD",
1961                                             "^FCVTNT_ZPmZ_DtoS")>;
1963 // Floating point convert, round to odd
1964 def : InstRW<[N2Write_3cyc_1V0], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>;
1966 // Floating point base2 log, F16
1967 def : InstRW<[N2Write_6cyc_4V0], (instregex "^FLOGB_(ZPmZ|ZPZZ)_H")>;
1969 // Floating point base2 log, F32
1970 def : InstRW<[N2Write_4cyc_2V0], (instregex "^FLOGB_(ZPmZ|ZPZZ)_S")>;
1972 // Floating point base2 log, F64
1973 def : InstRW<[N2Write_3cyc_1V0], (instregex "^FLOGB_(ZPmZ|ZPZZ)_D")>;
1975 // Floating point convert to integer, F16
1976 def : InstRW<[N2Write_6cyc_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>;
1978 // Floating point convert to integer, F32
1979 def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>;
1981 // Floating point convert to integer, F64
1982 def : InstRW<[N2Write_3cyc_1V0],
1983              (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>;
1985 // Floating point copy
1986 def : InstRW<[N2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]$",
1987                                            "^FDUP_ZI_[HSD]$")>;
1989 // Floating point divide, F16
1990 def : InstRW<[N2Write_13cyc_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>;
1992 // Floating point divide, F32
1993 def : InstRW<[N2Write_10cyc_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>;
1995 // Floating point divide, F64
1996 def : InstRW<[N2Write_15cyc_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>;
1998 // Floating point min/max pairwise
1999 def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>;
2001 // Floating point min/max
2002 def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]",
2003                                            "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>;
2005 // Floating point multiply
2006 def : InstRW<[N2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]",
2007                                            "^FMULX_ZPZZ_[HSD]",
2008                                            "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]",
2009                                            "^FMUL_ZPZ[IZ]_[HSD]")>;
2011 // Floating point multiply accumulate
2012 def : InstRW<[N2Write_4cyc_1V], (instregex "^F(N?M(AD|SB)|N?ML[AS])_ZPmZZ_[HSD]$",
2013                                            "^FN?ML[AS]_ZPZZZ_[HSD]",
2014                                            "^FML[AS]_ZZZI_[HSD]$")>;
2016 // Floating point multiply add/sub accumulate long
2017 def : InstRW<[N2Write_4cyc_1V], (instregex "^FML[AS]L[BT]_ZZZI?_SHH$")>;
2019 // Floating point reciprocal estimate, F16
2020 def : InstRW<[N2Write_6cyc_4V0], (instregex "^FR(ECP|SQRT)E_ZZ_H", "^FRECPX_ZPmZ_H")>;
2022 // Floating point reciprocal estimate, F32
2023 def : InstRW<[N2Write_4cyc_2V0], (instregex "^FR(ECP|SQRT)E_ZZ_S", "^FRECPX_ZPmZ_S")>;
2025 // Floating point reciprocal estimate, F64
2026 def : InstRW<[N2Write_3cyc_1V0], (instregex "^FR(ECP|SQRT)E_ZZ_D", "^FRECPX_ZPmZ_D")>;
2028 // Floating point reciprocal step
2029 def : InstRW<[N2Write_4cyc_1V0], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>;
2031 // Floating point reduction, F16
2032 def : InstRW<[N2Write_6cyc_2V],
2033              (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H$")>;
2035 // Floating point reduction, F32
2036 def : InstRW<[N2Write_4cyc_1V],
2037              (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S$")>;
2039 // Floating point reduction, F64
2040 def : InstRW<[N2Write_2cyc_1V],
2041              (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D$")>;
2043 // Floating point round to integral, F16
2044 def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>;
2046 // Floating point round to integral, F32
2047 def : InstRW<[N2Write_4cyc_2V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>;
2049 // Floating point round to integral, F64
2050 def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>;
2052 // Floating point square root, F16
2053 def : InstRW<[N2Write_13cyc_1V0], (instregex "^FSQRT_ZPmZ_H")>;
2055 // Floating point square root, F32
2056 def : InstRW<[N2Write_10cyc_1V0], (instregex "^FSQRT_ZPmZ_S")>;
2058 // Floating point square root, F64
2059 def : InstRW<[N2Write_16cyc_1V0], (instregex "^FSQRT_ZPmZ_D")>;
2061 // Floating point trigonometric exponentiation
2062 def : InstRW<[N2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]$")>;
2064 // Floating point trigonometric multiply add
2065 def : InstRW<[N2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]$")>;
2067 // Floating point trigonometric, miscellaneous
2068 def : InstRW<[N2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]$")>;
2070 // SVE BFloat16 (BF16) instructions
2071 // -----------------------------------------------------------------------------
2073 // Convert, F32 to BF16
2074 def : InstRW<[N2Write_3cyc_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
2076 // Dot product
2077 def : InstRW<[N2Write_4cyc_1V], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
2079 // Matrix multiply accumulate
2080 def : InstRW<[N2Write_5cyc_1V], (instrs BFMMLA_ZZZ)>;
2082 // Multiply accumulate long
2083 def : InstRW<[N2Write_4cyc_1V], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>;
2085 // SVE Load instructions
2086 // -----------------------------------------------------------------------------
2088 // Load vector
2089 def : InstRW<[N2Write_6cyc_1L], (instrs LDR_ZXI)>;
2091 // Load predicate
2092 def : InstRW<[N2Write_6cyc_1L_1M], (instrs LDR_PXI)>;
2094 // Contiguous load, scalar + imm
2095 def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1[BHWD]_IMM$",
2096                                            "^LD1S?B_[HSD]_IMM$",
2097                                            "^LD1S?H_[SD]_IMM$",
2098                                            "^LD1S?W_D_IMM$" )>;
2099 // Contiguous load, scalar + scalar
2100 def : InstRW<[N2Write_6cyc_1L01], (instregex "^LD1[BHWD]$",
2101                                              "^LD1S?B_[HSD]$",
2102                                              "^LD1S?H_[SD]$",
2103                                              "^LD1S?W_D$" )>;
2105 // Contiguous load broadcast, scalar + imm
2106 def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1R[BHWD]_IMM$",
2107                                            "^LD1RSW_IMM$",
2108                                            "^LD1RS?B_[HSD]_IMM$",
2109                                            "^LD1RS?H_[SD]_IMM$",
2110                                            "^LD1RS?W_D_IMM$",
2111                                            "^LD1RQ_[BHWD]_IMM$")>;
2113 // Contiguous load broadcast, scalar + scalar
2114 def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1RQ_[BHWD]$")>;
2116 // Non temporal load, scalar + imm
2117 def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZRI$")>;
2119 // Non temporal load, scalar + scalar
2120 def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDNT1[BHWD]_ZRR$")>;
2122 // Non temporal gather load, vector + scalar 32-bit element size
2123 def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LDNT1[BHW]_ZZR_S$",
2124                                               "^LDNT1S[BH]_ZZR_S$")>;
2126 // Non temporal gather load, vector + scalar 64-bit element size
2127 def : InstRW<[N2Write_10cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D$")>;
2128 def : InstRW<[N2Write_10cyc_2L_2V1], (instrs LDNT1D_ZZR_D)>;
2130 // Contiguous first faulting load, scalar + scalar
2131 def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]$",
2132                                               "^LDFF1S?B_[HSD]$",
2133                                               "^LDFF1S?H_[SD]$",
2134                                               "^LDFF1S?W_D$")>;
2136 // Contiguous non faulting load, scalar + imm
2137 def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM$",
2138                                            "^LDNF1S?B_[HSD]_IMM$",
2139                                            "^LDNF1S?H_[SD]_IMM$",
2140                                            "^LDNF1S?W_D_IMM$")>;
2142 // Contiguous Load two structures to two vectors, scalar + imm
2143 def : InstRW<[N2Write_8cyc_1L_1V], (instregex "^LD2[BHWD]_IMM$")>;
2145 // Contiguous Load two structures to two vectors, scalar + scalar
2146 def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LD2[BHWD]$")>;
2148 // Contiguous Load three structures to three vectors, scalar + imm
2149 def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LD3[BHWD]_IMM$")>;
2151 // Contiguous Load three structures to three vectors, scalar + scalar
2152 def : InstRW<[N2Write_10cyc_1V_1L_1S], (instregex "^LD3[BHWD]$")>;
2154 // Contiguous Load four structures to four vectors, scalar + imm
2155 def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^LD4[BHWD]_IMM$")>;
2157 // Contiguous Load four structures to four vectors, scalar + scalar
2158 def : InstRW<[N2Write_10cyc_2L_2V_2S], (instregex "^LD4[BHWD]$")>;
2160 // Gather load, vector + imm, 32-bit element size
2161 def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM$",
2162                                               "^GLD(FF)?1W_IMM$")>;
2164 // Gather load, vector + imm, 64-bit element size
2165 def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM$",
2166                                               "^GLD(FF)?1D_IMM$")>;
2168 // Gather load, 64-bit element size
2169 def : InstRW<[N2Write_9cyc_2L_2V],
2170              (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW(_SCALED)?$",
2171                         "^GLD(FF)?1S?[BHW]_D(_SCALED)?$",
2172                         "^GLD(FF)?1D_[SU]XTW(_SCALED)?$",
2173                         "^GLD(FF)?1D(_SCALED)?$")>;
2175 // Gather load, 32-bit scaled offset
2176 def : InstRW<[N2Write_10cyc_2L_2V],
2177              (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED$",
2178                         "^GLD(FF)?1W_[SU]XTW_SCALED")>;
2180 // Gather load, 32-bit unpacked unscaled offset
2181 def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW$",
2182                                               "^GLD(FF)?1W_[SU]XTW$")>;
2184 // SVE Store instructions
2185 // -----------------------------------------------------------------------------
2187 // Store from predicate reg
2188 def : InstRW<[N2Write_1cyc_1L01], (instrs STR_PXI)>;
2190 // Store from vector reg
2191 def : InstRW<[N2Write_2cyc_1L01_1V], (instrs STR_ZXI)>;
2193 // Contiguous store, scalar + imm
2194 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^ST1[BHWD]_IMM$",
2195                                                 "^ST1B_[HSD]_IMM$",
2196                                                 "^ST1H_[SD]_IMM$",
2197                                                 "^ST1W_D_IMM$")>;
2199 // Contiguous store, scalar + scalar
2200 def : InstRW<[N2Write_2cyc_1L01_1S_1V], (instregex "^ST1H(_[SD])?$")>;
2201 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^ST1[BWD]$",
2202                                                 "^ST1B_[HSD]$",
2203                                                 "^ST1W_D$")>;
2205 // Contiguous store two structures from two vectors, scalar + imm
2206 def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "^ST2[BHWD]_IMM$")>;
2208 // Contiguous store two structures from two vectors, scalar + scalar
2209 def : InstRW<[N2Write_4cyc_1L01_1S_1V], (instrs ST2H)>;
2211 // Contiguous store two structures from two vectors, scalar + scalar
2212 def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "^ST2[BWD]$")>;
2214 // Contiguous store three structures from three vectors, scalar + imm
2215 def : InstRW<[N2Write_7cyc_5L01_5V], (instregex "^ST3[BHWD]_IMM$")>;
2217 // Contiguous store three structures from three vectors, scalar + scalar
2218 def : InstRW<[N2Write_7cyc_5L01_5S_5V], (instrs ST3H)>;
2220 // Contiguous store three structures from three vectors, scalar + scalar
2221 def : InstRW<[N2Write_7cyc_5L01_5S_5V], (instregex "^ST3[BWD]$")>;
2223 // Contiguous store four structures from four vectors, scalar + imm
2224 def : InstRW<[N2Write_11cyc_9L01_9V], (instregex "^ST4[BHWD]_IMM$")>;
2226 // Contiguous store four structures from four vectors, scalar + scalar
2227 def : InstRW<[N2Write_11cyc_9L01_9S_9V], (instrs ST4H)>;
2229 // Contiguous store four structures from four vectors, scalar + scalar
2230 def : InstRW<[N2Write_11cyc_9L01_9S_9V], (instregex "^ST4[BWD]$")>;
2232 // Non temporal store, scalar + imm
2233 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>;
2235 // Non temporal store, scalar + scalar
2236 def : InstRW<[N2Write_2cyc_1L01_1S_1V], (instrs STNT1H_ZRR)>;
2237 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>;
2239 // Scatter non temporal store, vector + scalar 32-bit element size
2240 def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^STNT1[BHW]_ZZR_S")>;
2242 // Scatter non temporal store, vector + scalar 64-bit element size
2243 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZZR_D")>;
2245 // Scatter store vector + imm 32-bit element size
2246 def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^SST1[BH]_S_IMM$",
2247                                                 "^SST1W_IMM$")>;
2249 // Scatter store vector + imm 64-bit element size
2250 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D_IMM$",
2251                                                 "^SST1D_IMM$")>;
2253 // Scatter store, 32-bit scaled offset
2254 def : InstRW<[N2Write_4cyc_2L01_2V],
2255              (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>;
2257 // Scatter store, 32-bit unpacked unscaled offset
2258 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D_[SU]XTW$",
2259                                                 "^SST1D_[SU]XTW$")>;
2261 // Scatter store, 32-bit unpacked scaled offset
2262 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$",
2263                                                 "^SST1D_[SU]XTW_SCALED$")>;
2265 // Scatter store, 32-bit unscaled offset
2266 def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^SST1[BH]_S_[SU]XTW$",
2267                                                 "^SST1W_[SU]XTW$")>;
2269 // Scatter store, 64-bit scaled offset
2270 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[HW]_D_SCALED$",
2271                                                 "^SST1D_SCALED$")>;
2273 // Scatter store, 64-bit unscaled offset
2274 def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D$",
2275                                                 "^SST1D$")>;
2277 // SVE Miscellaneous instructions
2278 // -----------------------------------------------------------------------------
2280 // Read first fault register, unpredicated
2281 def : InstRW<[N2Write_2cyc_1M0], (instrs RDFFR_P)>;
2283 // Read first fault register, predicated
2284 def : InstRW<[N2Write_3cyc_1M0_1M], (instrs RDFFR_PPz)>;
2286 // Read first fault register and set flags
2287 def : InstRW<[N2Write_4cyc_2M0_2M], (instrs RDFFRS_PPz)>;
2289 // Set first fault register
2290 // Write to first fault register
2291 def : InstRW<[N2Write_2cyc_1M0], (instrs SETFFR, WRFFR)>;
2293 // Prefetch
2294 def : InstRW<[N2Write_4cyc_1L], (instregex "^PRF[BHWD]")>;
2296 // SVE Cryptographic instructions
2297 // -----------------------------------------------------------------------------
2299 // Crypto AES ops
2300 def : InstRW<[N2Write_2cyc_1V], (instregex "^AES[DE]_ZZZ_B$",
2301                                            "^AESI?MC_ZZ_B$")>;
2303 // Crypto SHA3 ops
2304 def : InstRW<[N2Write_2cyc_1V0], (instregex "^(BCAX|EOR3)_ZZZZ$",
2305                                             "^RAX1_ZZZ_D$",
2306                                             "^XAR_ZZZI_[BHSD]$")>;
2308 // Crypto SM4 ops
2309 def : InstRW<[N2Write_4cyc_1V0], (instregex "^SM4E(KEY)?_ZZZ_S$")>;