[llvm-shlib] Fix the version naming style of libLLVM for Windows (#85710)
[llvm-project.git] / llvm / lib / Target / AArch64 / AArch64SchedNeoverseV1.td
blob613db353cb0aaa54f373afeeb216067ec62c3943
1 //=- AArch64SchedNeoverseV1.td - NeoverseV1 Scheduling Model -*- tablegen -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the scheduling model for the Arm Neoverse V1 processors.
11 // References:
12 // - "Arm Neoverse V1 Software Optimization Guide"
13 // - "Arm Neoverse V1 Platform: Unleashing a new performance tier for Arm-based computing"
14 //   https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/neoverse-v1-platform-a-new-performance-tier-for-arm
15 // - "Neoverse V1"
16 //   https://en.wikichip.org/wiki/arm_holdings/microarchitectures/neoverse_v1
19 //===----------------------------------------------------------------------===//
21 def NeoverseV1Model : SchedMachineModel {
22   let IssueWidth            =  15; // Maximum micro-ops dispatch rate.
23   let MicroOpBufferSize     = 256; // Micro-op re-order buffer.
24   let LoadLatency           =   4; // Optimistic load latency.
25   let MispredictPenalty     =  11; // Cycles cost of branch mispredicted.
26   let LoopMicroOpBufferSize =  16; // NOTE: Copied from Cortex-A57.
27   let CompleteModel         =   1;
29   list<Predicate> UnsupportedFeatures = !listconcat(SVE2Unsupported.F,
30                                                     SMEUnsupported.F,
31                                                     [HasMTE, HasCPA,
32                                                     HasCSSC]);
35 //===----------------------------------------------------------------------===//
36 // Define each kind of processor resource and number available on Neoverse V1.
37 // Instructions are first fetched and then decoded into internal macro-ops
38 // (MOPs).  From there, the MOPs proceed through register renaming and dispatch
39 // stages.  A MOP can be split into one or more micro-ops further down the
40 // pipeline, after the decode stage.  Once dispatched, micro-ops wait for their
41 // operands and issue out-of-order to one of the issue pipelines.  Each issue
42 // pipeline can accept one micro-op per cycle.
44 let SchedModel = NeoverseV1Model in {
46 // Define the issue ports.
47 def V1UnitB   : ProcResource<2>;  // Branch 0/1
48 def V1UnitS   : ProcResource<2>;  // Integer single cycle 0/1
49 def V1UnitM0  : ProcResource<1>;  // Integer multicycle 0
50 def V1UnitM1  : ProcResource<1>;  // Integer multicycle 1
51 def V1UnitL01 : ProcResource<2>;  // Load/Store 0/1
52 def V1UnitL2  : ProcResource<1>;  // Load 2
53 def V1UnitD   : ProcResource<2>;  // Store data 0/1
54 def V1UnitV0  : ProcResource<1>;  // FP/ASIMD 0
55 def V1UnitV1  : ProcResource<1>;  // FP/ASIMD 1
56 def V1UnitV2  : ProcResource<1>;  // FP/ASIMD 2
57 def V1UnitV3  : ProcResource<1>;  // FP/ASIMD 3
59 def V1UnitI   : ProcResGroup<[V1UnitS,
60                               V1UnitM0, V1UnitM1]>;   // Integer units
61 def V1UnitJ   : ProcResGroup<[V1UnitS, V1UnitM0]>;    // Integer 0-2 units
62 def V1UnitM   : ProcResGroup<[V1UnitM0, V1UnitM1]>;   // Integer multicycle units
63 def V1UnitL   : ProcResGroup<[V1UnitL01, V1UnitL2]>;  // Load units
64 def V1UnitV   : ProcResGroup<[V1UnitV0, V1UnitV1,
65                               V1UnitV2, V1UnitV3]>;   // FP/ASIMD units
66 def V1UnitV01 : ProcResGroup<[V1UnitV0, V1UnitV1]>;   // FP/ASIMD 0/1 units
67 def V1UnitV02 : ProcResGroup<[V1UnitV0, V1UnitV2]>;   // FP/ASIMD 0/2 units
68 def V1UnitV13 : ProcResGroup<[V1UnitV1, V1UnitV3]>;   // FP/ASIMD 1/3 units
70 // Define commonly used read types.
72 // No generic forwarding is provided for these types.
73 def : ReadAdvance<ReadI,       0>;
74 def : ReadAdvance<ReadISReg,   0>;
75 def : ReadAdvance<ReadIEReg,   0>;
76 def : ReadAdvance<ReadIM,      0>;
77 def : ReadAdvance<ReadIMA,     0>;
78 def : ReadAdvance<ReadID,      0>;
79 def : ReadAdvance<ReadExtrHi,  0>;
80 def : ReadAdvance<ReadAdrBase, 0>;
81 def : ReadAdvance<ReadST,      0>;
82 def : ReadAdvance<ReadVLD,     0>;
84 def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
85 def : WriteRes<WriteBarrier, []> { let Latency = 1; }
86 def : WriteRes<WriteHint,    []> { let Latency = 1; }
89 //===----------------------------------------------------------------------===//
90 // Define generic 0 micro-op types
92 let Latency = 0, NumMicroOps = 0 in
93 def V1Write_0c_0Z : SchedWriteRes<[]>;
96 //===----------------------------------------------------------------------===//
97 // Define generic 1 micro-op types
99 def V1Write_1c_1B      : SchedWriteRes<[V1UnitB]>   { let Latency = 1; }
100 def V1Write_1c_1I      : SchedWriteRes<[V1UnitI]>   { let Latency = 1; }
101 def V1Write_1c_1J      : SchedWriteRes<[V1UnitJ]>   { let Latency = 1; }
102 def V1Write_4c_1L      : SchedWriteRes<[V1UnitL]>   { let Latency = 4; }
103 def V1Write_6c_1L      : SchedWriteRes<[V1UnitL]>   { let Latency = 6; }
104 def V1Write_1c_1L01    : SchedWriteRes<[V1UnitL01]> { let Latency = 1; }
105 def V1Write_4c_1L01    : SchedWriteRes<[V1UnitL01]> { let Latency = 4; }
106 def V1Write_6c_1L01    : SchedWriteRes<[V1UnitL01]> { let Latency = 6; }
107 def V1Write_2c_1M      : SchedWriteRes<[V1UnitM]>   { let Latency = 2; }
108 def V1Write_3c_1M      : SchedWriteRes<[V1UnitM]>   { let Latency = 3; }
109 def V1Write_4c_1M      : SchedWriteRes<[V1UnitM]>   { let Latency = 4; }
110 def V1Write_1c_1M0     : SchedWriteRes<[V1UnitM0]>  { let Latency = 1; }
111 def V1Write_2c_1M0     : SchedWriteRes<[V1UnitM0]>  { let Latency = 2; }
112 def V1Write_3c_1M0     : SchedWriteRes<[V1UnitM0]>  { let Latency = 3; }
113 def V1Write_5c_1M0     : SchedWriteRes<[V1UnitM0]>  { let Latency = 5; }
114 def V1Write_12c5_1M0   : SchedWriteRes<[V1UnitM0]>  { let Latency = 12;
115                                                       let ReleaseAtCycles = [5]; }
116 def V1Write_20c5_1M0   : SchedWriteRes<[V1UnitM0]>  { let Latency = 20;
117                                                       let ReleaseAtCycles = [5]; }
118 def V1Write_2c_1V      : SchedWriteRes<[V1UnitV]>   { let Latency = 2; }
119 def V1Write_3c_1V      : SchedWriteRes<[V1UnitV]>   { let Latency = 3; }
120 def V1Write_4c_1V      : SchedWriteRes<[V1UnitV]>   { let Latency = 4; }
121 def V1Write_5c_1V      : SchedWriteRes<[V1UnitV]>   { let Latency = 5; }
122 def V1Write_2c_1V0     : SchedWriteRes<[V1UnitV0]>  { let Latency = 2; }
123 def V1Write_3c_1V0     : SchedWriteRes<[V1UnitV0]>  { let Latency = 3; }
124 def V1Write_4c_1V0     : SchedWriteRes<[V1UnitV0]>  { let Latency = 4; }
125 def V1Write_6c_1V0     : SchedWriteRes<[V1UnitV0]>  { let Latency = 6; }
126 def V1Write_10c7_1V0   : SchedWriteRes<[V1UnitV0]>  { let Latency = 10;
127                                                       let ReleaseAtCycles = [7]; }
128 def V1Write_12c7_1V0   : SchedWriteRes<[V1UnitV0]>  { let Latency = 12;
129                                                       let ReleaseAtCycles = [7]; }
130 def V1Write_13c10_1V0  : SchedWriteRes<[V1UnitV0]>  { let Latency = 13;
131                                                       let ReleaseAtCycles = [10]; }
132 def V1Write_15c7_1V0   : SchedWriteRes<[V1UnitV0]>  { let Latency = 15;
133                                                       let ReleaseAtCycles = [7]; }
134 def V1Write_16c7_1V0   : SchedWriteRes<[V1UnitV0]>  { let Latency = 16;
135                                                       let ReleaseAtCycles = [7]; }
136 def V1Write_20c7_1V0   : SchedWriteRes<[V1UnitV0]>  { let Latency = 20;
137                                                       let ReleaseAtCycles = [7]; }
138 def V1Write_2c_1V01    : SchedWriteRes<[V1UnitV01]> { let Latency = 2; }
139 def V1Write_3c_1V01    : SchedWriteRes<[V1UnitV01]> { let Latency = 3; }
140 def V1Write_4c_1V01    : SchedWriteRes<[V1UnitV01]> { let Latency = 4; }
141 def V1Write_5c_1V01    : SchedWriteRes<[V1UnitV01]> { let Latency = 5; }
142 def V1Write_3c_1V02    : SchedWriteRes<[V1UnitV02]> { let Latency = 3; }
143 def V1Write_4c_1V02    : SchedWriteRes<[V1UnitV02]> { let Latency = 4; }
144 def V1Write_7c7_1V02   : SchedWriteRes<[V1UnitV02]> { let Latency = 7;
145                                                       let ReleaseAtCycles = [7]; }
146 def V1Write_10c7_1V02  : SchedWriteRes<[V1UnitV02]> { let Latency = 10;
147                                                       let ReleaseAtCycles = [7]; }
148 def V1Write_13c5_1V02  : SchedWriteRes<[V1UnitV02]> { let Latency = 13;
149                                                       let ReleaseAtCycles = [5]; }
150 def V1Write_13c11_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13;
151                                                       let ReleaseAtCycles = [11]; }
152 def V1Write_15c7_1V02  : SchedWriteRes<[V1UnitV02]> { let Latency = 15;
153                                                       let ReleaseAtCycles = [7]; }
154 def V1Write_16c7_1V02  : SchedWriteRes<[V1UnitV02]> { let Latency = 16;
155                                                       let ReleaseAtCycles = [7]; }
156 def V1Write_2c_1V1     : SchedWriteRes<[V1UnitV1]>  { let Latency = 2; }
157 def V1Write_3c_1V1     : SchedWriteRes<[V1UnitV1]>  { let Latency = 3; }
158 def V1Write_4c_1V1     : SchedWriteRes<[V1UnitV1]>  { let Latency = 4; }
159 def V1Write_2c_1V13    : SchedWriteRes<[V1UnitV13]> { let Latency = 2; }
160 def V1Write_4c_1V13    : SchedWriteRes<[V1UnitV13]> { let Latency = 4; }
162 //===----------------------------------------------------------------------===//
163 // Define generic 2 micro-op types
165 let Latency = 1, NumMicroOps = 2 in
166 def V1Write_1c_1B_1S     : SchedWriteRes<[V1UnitB, V1UnitS]>;
167 let Latency = 6, NumMicroOps = 2 in
168 def V1Write_6c_1B_1M0    : SchedWriteRes<[V1UnitB, V1UnitM0]>;
169 let Latency = 3, NumMicroOps = 2 in
170 def V1Write_3c_1I_1M     : SchedWriteRes<[V1UnitI, V1UnitM]>;
171 let Latency = 5, NumMicroOps = 2 in
172 def V1Write_5c_1I_1L     : SchedWriteRes<[V1UnitI, V1UnitL]>;
173 let Latency = 7, NumMicroOps = 2 in
174 def V1Write_7c_1I_1L     : SchedWriteRes<[V1UnitI, V1UnitL]>;
175 let Latency = 6, NumMicroOps = 2 in
176 def V1Write_6c_2L        : SchedWriteRes<[V1UnitL, V1UnitL]>;
177 let Latency = 6, NumMicroOps = 2 in
178 def V1Write_6c_1L_1M     : SchedWriteRes<[V1UnitL, V1UnitM]>;
179 let Latency = 8, NumMicroOps = 2 in
180 def V1Write_8c_1L_1V     : SchedWriteRes<[V1UnitL, V1UnitV]>;
181 let Latency = 9, NumMicroOps = 2 in
182 def V1Write_9c_1L_1V     : SchedWriteRes<[V1UnitL, V1UnitV]>;
183 let Latency = 11, NumMicroOps = 2 in
184 def V1Write_11c_1L_1V     : SchedWriteRes<[V1UnitL, V1UnitV]>;
185 let Latency = 1, NumMicroOps = 2 in
186 def V1Write_1c_1L01_1D   : SchedWriteRes<[V1UnitL01, V1UnitD]>;
187 let Latency = 6, NumMicroOps = 2 in
188 def V1Write_6c_1L01_1S   : SchedWriteRes<[V1UnitL01, V1UnitS]>;
189 let Latency = 7, NumMicroOps = 2 in
190 def V1Write_7c_1L01_1S   : SchedWriteRes<[V1UnitL01, V1UnitS]>;
191 let Latency = 2, NumMicroOps = 2 in
192 def V1Write_2c_1L01_1V   : SchedWriteRes<[V1UnitL01, V1UnitV]>;
193 let Latency = 4, NumMicroOps = 2 in
194 def V1Write_4c_1L01_1V   : SchedWriteRes<[V1UnitL01, V1UnitV]>;
195 let Latency = 6, NumMicroOps = 2 in
196 def V1Write_6c_1L01_1V   : SchedWriteRes<[V1UnitL01, V1UnitV]>;
197 let Latency = 2, NumMicroOps = 2 in
198 def V1Write_2c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]>;
199 let Latency = 4, NumMicroOps = 2 in
200 def V1Write_4c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]>;
201 let Latency = 2, NumMicroOps = 2 in
202 def V1Write_2c_2M0       : SchedWriteRes<[V1UnitM0, V1UnitM0]>;
203 let Latency = 3, NumMicroOps = 2 in
204 def V1Write_3c_2M0       : SchedWriteRes<[V1UnitM0, V1UnitM0]>;
205 let Latency = 9, NumMicroOps = 2 in
206 def V1Write_9c_1M0_1L    : SchedWriteRes<[V1UnitM0, V1UnitL]>;
207 let Latency = 5, NumMicroOps = 2 in
208 def V1Write_5c_1M0_1V    : SchedWriteRes<[V1UnitM0, V1UnitV]>;
209 let Latency = 4, NumMicroOps = 2 in
210 def V1Write_4c_1M0_1V0    : SchedWriteRes<[V1UnitM0, V1UnitV0]>;
211 let Latency = 7, NumMicroOps = 2 in
212 def V1Write_7c_1M0_1V0   : SchedWriteRes<[V1UnitM0, V1UnitV1]>;
213 let Latency = 5, NumMicroOps = 2 in
214 def V1Write_5c_1M0_1V01    : SchedWriteRes<[V1UnitM0, V1UnitV01]>;
215 let Latency = 6, NumMicroOps = 2 in
216 def V1Write_6c_1M0_1V1   : SchedWriteRes<[V1UnitM0, V1UnitV1]>;
217 let Latency = 9, NumMicroOps = 2 in
218 def V1Write_9c_1M0_1V1    : SchedWriteRes<[V1UnitM0, V1UnitV1]>;
219 let Latency = 4, NumMicroOps = 2 in
220 def V1Write_4c_2V        : SchedWriteRes<[V1UnitV, V1UnitV]>;
221 let Latency = 8, NumMicroOps = 2 in
222 def V1Write_8c_1V_1V01   : SchedWriteRes<[V1UnitV, V1UnitV01]>;
223 let Latency = 4, NumMicroOps = 2 in
224 def V1Write_4c_2V0       : SchedWriteRes<[V1UnitV0, V1UnitV0]>;
225 let Latency = 5, NumMicroOps = 2 in
226 def V1Write_5c_2V0       : SchedWriteRes<[V1UnitV0, V1UnitV0]>;
227 let Latency = 2, NumMicroOps = 2 in
228 def V1Write_2c_2V01      : SchedWriteRes<[V1UnitV01, V1UnitV01]>;
229 let Latency = 4, NumMicroOps = 2 in
230 def V1Write_4c_2V01      : SchedWriteRes<[V1UnitV01, V1UnitV01]>;
231 let Latency = 4, NumMicroOps = 2 in
232 def V1Write_4c_2V02      : SchedWriteRes<[V1UnitV02, V1UnitV02]>;
233 let Latency = 6, NumMicroOps = 2 in
234 def V1Write_6c_2V02      : SchedWriteRes<[V1UnitV02, V1UnitV02]>;
235 let Latency = 4, NumMicroOps = 2 in
236 def V1Write_4c_1V13_1V   : SchedWriteRes<[V1UnitV13, V1UnitV]>;
237 let Latency = 4, NumMicroOps = 2 in
238 def V1Write_4c_2V13      : SchedWriteRes<[V1UnitV13, V1UnitV13]>;
240 //===----------------------------------------------------------------------===//
241 // Define generic 3 micro-op types
243 let Latency = 2, NumMicroOps = 3 in
244 def V1Write_2c_1I_1L01_1V01 : SchedWriteRes<[V1UnitI, V1UnitL01, V1UnitV01]>;
245 let Latency = 7, NumMicroOps = 3 in
246 def V1Write_7c_2M0_1V01     : SchedWriteRes<[V1UnitM0, V1UnitM0, V1UnitV01]>;
247 let Latency = 8, NumMicroOps = 3 in
248 def V1Write_8c_1L_2V        : SchedWriteRes<[V1UnitL, V1UnitV, V1UnitV]>;
249 let Latency = 6, NumMicroOps = 3 in
250 def V1Write_6c_3L           : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL]>;
251 let Latency = 2, NumMicroOps = 3 in
252 def V1Write_2c_1L01_1S_1V   : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]>;
253 let Latency = 4, NumMicroOps = 3 in
254 def V1Write_4c_1L01_1S_1V   : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]>;
255 let Latency = 2, NumMicroOps = 3 in
256 def V1Write_2c_2L01_1V01    : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitV01]>;
257 let Latency = 6, NumMicroOps = 3 in
258 def V1Write_6c_3V           : SchedWriteRes<[V1UnitV, V1UnitV, V1UnitV]>;
259 let Latency = 4, NumMicroOps = 3 in
260 def V1Write_4c_3V01         : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>;
261 let Latency = 6, NumMicroOps = 3 in
262 def V1Write_6c_3V01         : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>;
263 let Latency = 8, NumMicroOps = 3 in
264 def V1Write_8c_3V01         : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>;
266 //===----------------------------------------------------------------------===//
267 // Define generic 4 micro-op types
269 let Latency = 8, NumMicroOps = 4 in
270 def V1Write_8c_2M0_2V0   : SchedWriteRes<[V1UnitM0, V1UnitM0,
271                                           V1UnitV0, V1UnitV0]>;
272 let Latency = 7, NumMicroOps = 4 in
273 def V1Write_7c_4L        : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, V1UnitL]>;
274 let Latency = 8, NumMicroOps = 4 in
275 def V1Write_8c_2L_2V        : SchedWriteRes<[V1UnitL, V1UnitL,
276                                              V1UnitV, V1UnitV]>;
277 let Latency = 9, NumMicroOps = 4 in
278 def V1Write_9c_2L_2V        : SchedWriteRes<[V1UnitL, V1UnitL,
279                                              V1UnitV, V1UnitV]>;
280 let Latency = 11, NumMicroOps = 4 in
281 def V1Write_11c_2L_2V       : SchedWriteRes<[V1UnitL, V1UnitL,
282                                              V1UnitV, V1UnitV]>;
283 let Latency = 10, NumMicroOps = 4 in
284 def V1Write_10c_2L01_2V     : SchedWriteRes<[V1UnitL01, V1UnitL01,
285                                              V1UnitV, V1UnitV]>;
286 let Latency = 2, NumMicroOps = 4 in
287 def V1Write_2c_2L01_2V01    : SchedWriteRes<[V1UnitL01, V1UnitL01,
288                                              V1UnitV01, V1UnitV01]>;
289 let Latency = 4, NumMicroOps = 4 in
290 def V1Write_4c_2L01_2V01    : SchedWriteRes<[V1UnitL01, V1UnitL01,
291                                              V1UnitV01, V1UnitV01]>;
292 let Latency = 8, NumMicroOps = 4 in
293 def V1Write_8c_2L01_2V01    : SchedWriteRes<[V1UnitL01, V1UnitL01,
294                                              V1UnitV01, V1UnitV01]>;
295 let Latency = 9, NumMicroOps = 4 in
296 def V1Write_9c_2L01_2V01    : SchedWriteRes<[V1UnitL01, V1UnitL01,
297                                              V1UnitV01, V1UnitV01]>;
298 let Latency = 10, NumMicroOps = 4 in
299 def V1Write_10c_2L01_2V01   : SchedWriteRes<[V1UnitL01, V1UnitL01,
300                                              V1UnitV01, V1UnitV01]>;
301 let Latency = 10, NumMicroOps = 4 in
302 def V1Write_10c_1V_1V01_2V1 : SchedWriteRes<[V1UnitV, V1UnitV01,
303                                              V1UnitV1, V1UnitV1]>;
304 let Latency = 12, NumMicroOps = 4 in
305 def V1Write_12c_1V_1V01_2V1 : SchedWriteRes<[V1UnitV, V1UnitV01,
306                                              V1UnitV1, V1UnitV1]>;
307 let Latency = 6, NumMicroOps = 4 in
308 def V1Write_6c_4V0          : SchedWriteRes<[V1UnitV0, V1UnitV0,
309                                              V1UnitV0, V1UnitV0]>;
310 let Latency = 12, NumMicroOps = 4 in
311 def V1Write_12c_4V01        : SchedWriteRes<[V1UnitV01, V1UnitV01,
312                                              V1UnitV01, V1UnitV01]>;
313 let Latency = 6, NumMicroOps = 4 in
314 def V1Write_6c_4V02         : SchedWriteRes<[V1UnitV02, V1UnitV02]>;
316 //===----------------------------------------------------------------------===//
317 // Define generic 5 micro-op types
319 let Latency = 8, NumMicroOps = 5 in
320 def V1Write_8c_2L_3V            : SchedWriteRes<[V1UnitL, V1UnitL,
321                                                  V1UnitV, V1UnitV, V1UnitV]>;
322 let Latency = 14, NumMicroOps = 5 in
323 def V1Write_14c_1V_1V0_2V1_1V13 : SchedWriteRes<[V1UnitV,
324                                                  V1UnitV0,
325                                                  V1UnitV1, V1UnitV1,
326                                                  V1UnitV13]>;
327 let Latency = 9, NumMicroOps = 5 in
328 def V1Write_9c_1V_4V01          : SchedWriteRes<[V1UnitV,
329                                                  V1UnitV01, V1UnitV01,
330                                                  V1UnitV01, V1UnitV01]>;
331 let Latency = 6, NumMicroOps = 5 in
332 def V1Write_6c_5V01             : SchedWriteRes<[V1UnitV01, V1UnitV01,
333                                                  V1UnitV01, V1UnitV01, V1UnitV01]>;
335 //===----------------------------------------------------------------------===//
336 // Define generic 6 micro-op types
338 let Latency = 6, NumMicroOps = 6 in
339 def V1Write_6c_3L_3V      : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL,
340                                            V1UnitV, V1UnitV, V1UnitV]>;
341 let Latency = 8, NumMicroOps = 6 in
342 def V1Write_8c_3L_3V      : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL,
343                                            V1UnitV, V1UnitV, V1UnitV]>;
344 let Latency = 2, NumMicroOps = 6 in
345 def V1Write_2c_3L01_3V01  : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
346                                            V1UnitV01, V1UnitV01, V1UnitV01]>;
347 let Latency = 5, NumMicroOps = 6 in
348 def V1Write_5c_3L01_3V01  : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
349                                            V1UnitV01, V1UnitV01, V1UnitV01]>;
350 let Latency = 6, NumMicroOps = 6 in
351 def V1Write_6c_3L01_3V01  : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
352                                            V1UnitV01, V1UnitV01, V1UnitV01]>;
353 let Latency = 11, NumMicroOps = 6 in
354 def V1Write_11c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
355                                            V1UnitV01, V1UnitV01, V1UnitV01]>;
356 let Latency = 11, NumMicroOps = 6 in
357 def V1Write_11c_1V_5V01   : SchedWriteRes<[V1UnitV,
358                                            V1UnitV01, V1UnitV01,
359                                            V1UnitV01, V1UnitV01, V1UnitV01]>;
360 let Latency = 13, NumMicroOps = 6 in
361 def V1Write_13c_6V01      : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01,
362                                            V1UnitV01, V1UnitV01, V1UnitV01]>;
364 //===----------------------------------------------------------------------===//
365 // Define generic 7 micro-op types
367 let Latency = 8, NumMicroOps = 7 in
368 def V1Write_8c_3L_4V         : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL,
369                                               V1UnitV, V1UnitV, V1UnitV, V1UnitV]>;
370 let Latency = 8, NumMicroOps = 7 in
371 def V1Write_13c_3L01_1S_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
372                                               V1UnitS,
373                                               V1UnitV01, V1UnitV01, V1UnitV01]>;
375 //===----------------------------------------------------------------------===//
376 // Define generic 8 micro-op types
378 let Latency = 9, NumMicroOps = 8 in
379 def V1Write_9c_4L_4V      : SchedWriteRes<[V1UnitL, V1UnitL,
380                                            V1UnitL, V1UnitL,
381                                            V1UnitV, V1UnitV,
382                                            V1UnitV, V1UnitV]>;
383 let Latency = 2, NumMicroOps = 8 in
384 def V1Write_2c_4L01_4V01  : SchedWriteRes<[V1UnitL01, V1UnitL01,
385                                            V1UnitL01, V1UnitL01,
386                                            V1UnitV01, V1UnitV01,
387                                            V1UnitV01, V1UnitV01]>;
388 let Latency = 4, NumMicroOps = 8 in
389 def V1Write_4c_4L01_4V01  : SchedWriteRes<[V1UnitL01, V1UnitL01,
390                                            V1UnitL01, V1UnitL01,
391                                            V1UnitV01, V1UnitV01,
392                                            V1UnitV01, V1UnitV01]>;
393 let Latency = 12, NumMicroOps = 8 in
394 def V1Write_12c_4L01_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
395                                            V1UnitL01, V1UnitL01,
396                                            V1UnitV01, V1UnitV01,
397                                            V1UnitV01, V1UnitV01]>;
399 //===----------------------------------------------------------------------===//
400 // Define generic 10 micro-op types
402 let Latency = 13, NumMicroOps = 10 in
403 def V1Write_13c_4L01_2S_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
404                                               V1UnitL01, V1UnitL01,
405                                               V1UnitS, V1UnitS,
406                                               V1UnitV01, V1UnitV01,
407                                               V1UnitV01, V1UnitV01]>;
408 let Latency = 7, NumMicroOps = 10 in
409 def V1Write_7c_5L01_5V       : SchedWriteRes<[V1UnitL01, V1UnitL01,
410                                               V1UnitL01, V1UnitL01, V1UnitL01,
411                                               V1UnitV, V1UnitV,
412                                               V1UnitV, V1UnitV, V1UnitV]>;
413 let Latency = 11, NumMicroOps = 10 in
414 def V1Write_11c_10V0         : SchedWriteRes<[V1UnitV0,
415                                               V1UnitV0, V1UnitV0, V1UnitV0,
416                                               V1UnitV0, V1UnitV0, V1UnitV0,
417                                               V1UnitV0, V1UnitV0, V1UnitV0]>;
419 //===----------------------------------------------------------------------===//
420 // Define generic 12 micro-op types
422 let Latency = 7, NumMicroOps = 12 in
423 def V1Write_7c_6L01_6V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
424                                           V1UnitL01, V1UnitL01, V1UnitL01,
425                                           V1UnitV01, V1UnitV01, V1UnitV01,
426                                           V1UnitV01, V1UnitV01, V1UnitV01]>;
428 //===----------------------------------------------------------------------===//
429 // Define generic 15 micro-op types
431 let Latency = 7, NumMicroOps = 15 in
432 def V1Write_7c_5L01_5S_5V : SchedWriteRes<[V1UnitL01, V1UnitL01,
433                                            V1UnitL01, V1UnitL01, V1UnitL01,
434                                            V1UnitS, V1UnitS,
435                                            V1UnitS, V1UnitS, V1UnitS,
436                                            V1UnitV, V1UnitV,
437                                            V1UnitV, V1UnitV, V1UnitV]>;
440 //===----------------------------------------------------------------------===//
441 // Define generic 18 micro-op types
443 let Latency = 19, NumMicroOps = 18 in
444 def V1Write_11c_9L01_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
445                                          V1UnitL01, V1UnitL01, V1UnitL01, 
446                                          V1UnitL01, V1UnitL01, V1UnitL01,
447                                          V1UnitV, V1UnitV, V1UnitV, 
448                                          V1UnitV, V1UnitV, V1UnitV,
449                                          V1UnitV, V1UnitV, V1UnitV]>;
450 let Latency = 19, NumMicroOps = 18 in
451 def V1Write_19c_18V0    : SchedWriteRes<[V1UnitV0, V1UnitV0, V1UnitV0,
452                                          V1UnitV0, V1UnitV0, V1UnitV0, 
453                                          V1UnitV0, V1UnitV0, V1UnitV0,
454                                          V1UnitV0, V1UnitV0, V1UnitV0, 
455                                          V1UnitV0, V1UnitV0, V1UnitV0,
456                                          V1UnitV0, V1UnitV0, V1UnitV0]>;
458 //===----------------------------------------------------------------------===//
459 // Define generic 27 micro-op types
461 let Latency = 11, NumMicroOps = 27 in
462 def V1Write_11c_9L01_9S_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
463                                             V1UnitL01, V1UnitL01, V1UnitL01, 
464                                             V1UnitL01, V1UnitL01, V1UnitL01,
465                                             V1UnitS, V1UnitS, V1UnitS, 
466                                             V1UnitS, V1UnitS, V1UnitS,
467                                             V1UnitS, V1UnitS, V1UnitS,
468                                             V1UnitV, V1UnitV, V1UnitV, 
469                                             V1UnitV, V1UnitV, V1UnitV,
470                                             V1UnitV, V1UnitV, V1UnitV]>;
473 // Miscellaneous Instructions
474 // -----------------------------------------------------------------------------
476 // COPY
477 def : InstRW<[V1Write_1c_1I], (instrs COPY)>;
479 // MSR
480 def : WriteRes<WriteSys, []> { let Latency = 1; }
483 // Branch Instructions
484 // -----------------------------------------------------------------------------
486 // Branch, immed
487 // Compare and branch
488 def : SchedAlias<WriteBr, V1Write_1c_1B>;
490 // Branch, register
491 def : SchedAlias<WriteBrReg, V1Write_1c_1B>;
493 // Branch and link, immed
494 // Branch and link, register
495 def : InstRW<[V1Write_1c_1B_1S], (instrs BL, BLR)>;
497 // Compare and branch
498 def : InstRW<[V1Write_1c_1B], (instregex "^[CT]BN?Z[XW]$")>;
501 // Arithmetic and Logical Instructions
502 // -----------------------------------------------------------------------------
504 // ALU, basic
505 // Conditional compare
506 // Conditional select
507 // Logical, basic
508 // Address generation
509 // Count leading
510 // Reverse bits/bytes
511 // Move immediate
512 def : SchedAlias<WriteI, V1Write_1c_1I>;
514 // ALU, basic, flagset
515 def : InstRW<[V1Write_1c_1J],
516              (instregex "^(ADD|SUB)S[WX]r[ir]$",
517                         "^(ADC|SBC)S[WX]r$",
518                         "^ANDS[WX]ri$",
519                         "^(AND|BIC)S[WX]rr$")>;
521 // ALU, extend and shift
522 def : SchedAlias<WriteIEReg, V1Write_2c_1M>;
524 // Arithmetic, LSL shift, shift <= 4
525 // Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
526 def V1WriteISReg : SchedWriteVariant<
527                      [SchedVar<IsCheapLSL,  [V1Write_1c_1I]>,
528                       SchedVar<NoSchedPred, [V1Write_2c_1M]>]>;
529 def              : SchedAlias<WriteISReg, V1WriteISReg>;
531 // Arithmetic, flagset, LSL shift, shift <= 4
532 // Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4
533 def V1WriteISRegS : SchedWriteVariant<
534                       [SchedVar<IsCheapLSL,  [V1Write_1c_1J]>,
535                        SchedVar<NoSchedPred, [V1Write_2c_1M]>]>;
536 def               : InstRW<[V1WriteISRegS],
537                            (instregex "^(ADD|SUB)S(([WX]r[sx])|Xrx64)$")>;
539 // Logical, shift, no flagset
540 def : InstRW<[V1Write_1c_1I], (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
542 // Logical, shift, flagset
543 def : InstRW<[V1Write_2c_1M], (instregex "^(AND|BIC)S[WX]rs$")>;
545 // Flag manipulation instructions
546 def : InstRW<[V1Write_1c_1J], (instrs SETF8, SETF16, RMIF, CFINV)>;
549 // Divide and multiply instructions
550 // -----------------------------------------------------------------------------
552 // Divide
553 def : SchedAlias<WriteID32, V1Write_12c5_1M0>;
554 def : SchedAlias<WriteID64, V1Write_20c5_1M0>;
556 // Multiply
557 // Multiply accumulate
558 // Multiply accumulate, long
559 // Multiply long
560 def V1WriteIM : SchedWriteVariant<
561                   [SchedVar<NeoverseMULIdiomPred, [V1Write_2c_1M]>,
562                    SchedVar<NoSchedPred,          [V1Write_2c_1M0]>]>;
563 def           : SchedAlias<WriteIM32, V1WriteIM>;
564 def           : SchedAlias<WriteIM64, V1WriteIM>;
566 // Multiply high
567 def : InstRW<[V1Write_3c_1M, ReadIM, ReadIM], (instrs SMULHrr, UMULHrr)>;
570 // Pointer Authentication Instructions (v8.3 PAC)
571 // -----------------------------------------------------------------------------
573 // Authenticate data address
574 // Authenticate instruction address
575 // Compute pointer authentication code for data address
576 // Compute pointer authentication code, using generic key
577 // Compute pointer authentication code for instruction address
578 def : InstRW<[V1Write_5c_1M0], (instregex "^AUT",
579                                           "^PAC")>;
581 // Branch and link, register, with pointer authentication
582 // Branch, register, with pointer authentication
583 // Branch, return, with pointer authentication
584 def : InstRW<[V1Write_6c_1B_1M0], (instregex "^BL?RA[AB]Z?$",
585                                              "^E?RETA[AB]$")>;
587 // Load register, with pointer authentication
588 def : InstRW<[V1Write_9c_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>;
590 // Strip pointer authentication code
591 def : InstRW<[V1Write_2c_1M0], (instrs XPACD, XPACI, XPACLRI)>;
594 // Miscellaneous data-processing instructions
595 // -----------------------------------------------------------------------------
597 // Bitfield extract, one reg
598 // Bitfield extract, two regs
599 def V1WriteExtr : SchedWriteVariant<
600                     [SchedVar<IsRORImmIdiomPred, [V1Write_1c_1I]>,
601                      SchedVar<NoSchedPred,       [V1Write_3c_1I_1M]>]>;
602 def : SchedAlias<WriteExtr, V1WriteExtr>;
604 // Bitfield move, basic
605 // Variable shift
606 def : SchedAlias<WriteIS, V1Write_1c_1I>;
608 // Bitfield move, insert
609 def : InstRW<[V1Write_2c_1M], (instregex "^BFM[WX]ri$")>;
611 // Move immediate
612 def : SchedAlias<WriteImm, V1Write_1c_1I>;
615 // Load instructions
616 // -----------------------------------------------------------------------------
618 // Load register, immed offset
619 def : SchedAlias<WriteLD, V1Write_4c_1L>;
621 // Load register, immed offset, index
622 def : SchedAlias<WriteLDIdx, V1Write_4c_1L>;
623 def : SchedAlias<WriteAdr,   V1Write_1c_1I>;
625 // Load pair, immed offset
626 def : SchedAlias<WriteLDHi, V1Write_4c_1L>;
627 def : InstRW<[V1Write_4c_1L, V1Write_0c_0Z], (instrs LDPWi, LDNPWi)>;
628 def : InstRW<[WriteAdr, V1Write_4c_1L, V1Write_0c_0Z],
629              (instrs LDPWpost, LDPWpre)>;
631 // Load pair, signed immed offset, signed words
632 def : InstRW<[V1Write_5c_1I_1L, V1Write_0c_0Z], (instrs LDPSWi)>;
634 // Load pair, immed post or pre-index, signed words
635 def : InstRW<[WriteAdr, V1Write_5c_1I_1L, V1Write_0c_0Z],
636              (instrs LDPSWpost, LDPSWpre)>;
639 // Store instructions
640 // -----------------------------------------------------------------------------
642 // Store register, immed offset
643 def : SchedAlias<WriteST, V1Write_1c_1L01_1D>;
645 // Store register, immed offset, index
646 def : SchedAlias<WriteSTIdx, V1Write_1c_1L01_1D>;
648 // Store pair, immed offset
649 def : SchedAlias<WriteSTP, V1Write_1c_1L01_1D>;
652 // FP data processing instructions
653 // -----------------------------------------------------------------------------
655 // FP absolute value
656 // FP arithmetic
657 // FP min/max
658 // FP negate
659 def : SchedAlias<WriteF, V1Write_2c_1V>;
661 // FP compare
662 def : SchedAlias<WriteFCmp, V1Write_2c_1V0>;
664 // FP divide
665 // FP square root
666 def : SchedAlias<WriteFDiv, V1Write_10c7_1V02>;
668 // FP divide, H-form
669 // FP square root, H-form
670 def : InstRW<[V1Write_7c7_1V02], (instrs FDIVHrr, FSQRTHr)>;
672 // FP divide, S-form
673 // FP square root, S-form
674 def : InstRW<[V1Write_10c7_1V02], (instrs FDIVSrr, FSQRTSr)>;
676 // FP divide, D-form
677 def : InstRW<[V1Write_15c7_1V02], (instrs FDIVDrr)>;
679 // FP square root, D-form
680 def : InstRW<[V1Write_16c7_1V02], (instrs FSQRTDr)>;
682 // FP multiply
683 def : SchedAlias<WriteFMul, V1Write_3c_1V>;
685 // FP multiply accumulate
686 def : InstRW<[V1Write_4c_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
688 // FP round to integral
689 def : InstRW<[V1Write_3c_1V02], (instregex "^FRINT[AIMNPXZ][HSD]r$",
690                                            "^FRINT(32|64)[XZ][SD]r$")>;
692 // FP select
693 def : InstRW<[V1Write_2c_1V01], (instregex "^FCSEL[HSD]rrr$")>;
696 // FP miscellaneous instructions
697 // -----------------------------------------------------------------------------
699 // FP convert, from gen to vec reg
700 def : InstRW<[V1Write_3c_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>;
702 // FP convert, from vec to gen reg
703 def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>;
705 // FP convert, Javascript from vec to gen reg
706 def : InstRW<[V1Write_3c_1V0], (instrs FJCVTZS)>;
708 // FP convert, from vec to vec reg
709 def : SchedAlias<WriteFCvt, V1Write_3c_1V02>;
711 // FP move, immed
712 def : SchedAlias<WriteFImm, V1Write_2c_1V>;
714 // FP move, register
715 def : InstRW<[V1Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>;
717 // FP transfer, from gen to low half of vec reg
718 def : InstRW<[V1Write_3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
720 // FP transfer, from gen to high half of vec reg
721 def : InstRW<[V1Write_5c_1M0_1V], (instrs FMOVXDHighr)>;
723 // FP transfer, from vec to gen reg
724 def : SchedAlias<WriteFCopy, V1Write_2c_1V1>;
727 // FP load instructions
728 // -----------------------------------------------------------------------------
730 // Load vector reg, literal, S/D/Q forms
731 // Load vector reg, unscaled immed
732 // Load vector reg, unsigned immed
733 def : InstRW<[V1Write_6c_1L, ReadAdrBase], (instregex "^LDR[SDQ]l$",
734                                                       "^LDUR[BHSDQ]i$",
735                                                       "^LDR[BHSDQ]ui$")>;
737 // Load vector reg, immed post-index
738 // Load vector reg, immed pre-index
739 def : InstRW<[WriteAdr, V1Write_6c_1L],
740              (instregex "^LDR[BHSDQ](post|pre)$")>;
742 // Load vector reg, register offset, basic
743 // Load vector reg, register offset, scale, S/D-form
744 // Load vector reg, register offset, extend
745 // Load vector reg, register offset, extend, scale, S/D-form
746 def : InstRW<[V1Write_6c_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>;
748 // Load vector reg, register offset, scale, H/Q-form
749 // Load vector reg, register offset, extend, scale, H/Q-form
750 def : InstRW<[V1Write_7c_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>;
752 // Load vector pair, immed offset, S/D-form
753 def : InstRW<[V1Write_6c_1L, V1Write_0c_0Z], (instregex "^LDN?P[SD]i$")>;
755 // Load vector pair, immed offset, Q-form
756 def : InstRW<[V1Write_6c_1L, WriteLDHi], (instrs LDPQi, LDNPQi)>;
758 // Load vector pair, immed post-index, S/D-form
759 // Load vector pair, immed pre-index, S/D-form
760 def : InstRW<[WriteAdr, V1Write_6c_1L, V1Write_0c_0Z],
761              (instregex "^LDP[SD](pre|post)$")>;
763 // Load vector pair, immed post-index, Q-form
764 // Load vector pair, immed pre-index, Q-form
765 def : InstRW<[WriteAdr, V1Write_6c_1L, WriteLDHi],
766              (instrs LDPQpost, LDPQpre)>;
769 // FP store instructions
770 // -----------------------------------------------------------------------------
772 // Store vector reg, unscaled immed, B/H/S/D/Q-form
773 def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STUR[BHSDQ]i$")>;
775 // Store vector reg, immed post-index, B/H/S/D/Q-form
776 // Store vector reg, immed pre-index, B/H/S/D/Q-form
777 def : InstRW<[WriteAdr, V1Write_2c_1L01_1V01],
778              (instregex "^STR[BHSDQ](pre|post)$")>;
780 // Store vector reg, unsigned immed, B/H/S/D/Q-form
781 def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STR[BHSDQ]ui$")>;
783 // Store vector reg, register offset, basic, B/S/D-form
784 // Store vector reg, register offset, scale, B/S/D-form
785 // Store vector reg, register offset, extend, B/S/D-form
786 // Store vector reg, register offset, extend, scale, B/S/D-form
787 def : InstRW<[V1Write_2c_1L01_1V01, ReadAdrBase],
788              (instregex "^STR[BSD]ro[WX]$")>;
790 // Store vector reg, register offset, basic, H/Q-form
791 // Store vector reg, register offset, scale, H/Q-form
792 // Store vector reg, register offset, extend, H/Q-form
793 // Store vector reg, register offset, extend, scale, H/Q-form
794 def : InstRW<[V1Write_2c_1I_1L01_1V01, ReadAdrBase],
795              (instregex "^STR[HQ]ro[WX]$")>;
797 // Store vector pair, immed offset, S/D/Q-form
798 def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STN?P[SDQ]i$")>;
800 // Store vector pair, immed post-index, S/D-form
801 // Store vector pair, immed pre-index, S/D-form
802 def : InstRW<[WriteAdr, V1Write_2c_1L01_1V01],
803              (instregex "^STP[SD](pre|post)$")>;
805 // Store vector pair, immed post-index, Q-form
806 // Store vector pair, immed pre-index, Q-form
807 def : InstRW<[WriteAdr, V1Write_2c_2L01_1V01], (instrs STPQpre, STPQpost)>;
810 // ASIMD integer instructions
811 // -----------------------------------------------------------------------------
813 // ASIMD absolute diff
814 // ASIMD absolute diff long
815 // ASIMD arith, basic
816 // ASIMD arith, complex
817 // ASIMD arith, pair-wise
818 // ASIMD compare
819 // ASIMD logical
820 // ASIMD max/min, basic and pair-wise
821 def : SchedAlias<WriteVd, V1Write_2c_1V>;
822 def : SchedAlias<WriteVq, V1Write_2c_1V>;
824 // ASIMD absolute diff accum
825 // ASIMD absolute diff accum long
826 // ASIMD pairwise add and accumulate long
827 def : InstRW<[V1Write_4c_1V13], (instregex "^[SU]ABAL?v", "^[SU]ADALPv")>;
829 // ASIMD arith, reduce, 4H/4S
830 // ASIMD max/min, reduce, 4H/4S
831 def : InstRW<[V1Write_2c_1V13], (instregex "^(ADD|[SU]ADDL)Vv4(i16|i32)v$",
832                                            "^[SU](MAX|MIN)Vv4(i16|i32)v$")>;
834 // ASIMD arith, reduce, 8B/8H
835 // ASIMD max/min, reduce, 8B/8H
836 def : InstRW<[V1Write_4c_1V13_1V], (instregex "^(ADD|[SU]ADDL)Vv8(i8|i16)v$",
837                                               "^[SU](MAX|MIN)Vv8(i8|i16)v$")>;
839 // ASIMD arith, reduce, 16B
840 // ASIMD max/min, reduce, 16B
841 def : InstRW<[V1Write_4c_2V13], (instregex "^(ADD|[SU]ADDL)Vv16i8v$",
842                                            "[SU](MAX|MIN)Vv16i8v$")>;
844 // ASIMD dot product
845 // ASIMD dot product using signed and unsigned integers
846 def : InstRW<[V1Write_2c_1V], (instregex "^([SU]|SU|US)DOT(lane)?v(8|16)i8$")>;
848 // ASIMD matrix multiply- accumulate
849 def : InstRW<[V1Write_3c_1V], (instrs SMMLA, UMMLA, USMMLA)>;
851 // ASIMD multiply
852 // ASIMD multiply accumulate
853 // ASIMD multiply accumulate long
854 // ASIMD multiply accumulate high
855 // ASIMD multiply accumulate saturating long
856 def : InstRW<[V1Write_4c_1V02], 
857              (instregex "^MUL(v[148]i16|v[124]i32)$",
858                         "^SQR?DMULH(v[48]i16|v[24]i32)$",
859                         "^ML[AS](v[148]i16|v[124]i32)$",
860                         "^[SU]ML[AS]Lv",
861                         "^SQRDML[AS]H(v[148]i16|v[124]i32)$",
862                         "^SQDML[AS]Lv")>;
864 // ASIMD multiply/multiply long (8x8) polynomial
865 def : InstRW<[V1Write_3c_1V01], (instregex "^PMULL?v(8|16)i8$")>;
867 // ASIMD multiply long
868 def : InstRW<[V1Write_3c_1V02], (instregex "^([SU]|SQD)MULLv")>;
870 // ASIMD shift accumulate
871 // ASIMD shift by immed, complex
872 // ASIMD shift by register, complex
873 def : InstRW<[V1Write_4c_1V13],
874              (instregex "^[SU]R?SRAv",
875                         "^RSHRNv", "^SQRSHRU?Nv", "^(SQSHLU?|UQSHL)[bhsd]$",
876                         "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
877                         "^SQSHU?RNv", "^[SU]RSHRv", "^UQR?SHRNv", 
878                         "^[SU]Q?RSHLv", "^[SU]QSHLv")>;
880 // ASIMD shift by immed, basic
881 // ASIMD shift by immed and insert, basic
882 // ASIMD shift by register, basic
883 def : InstRW<[V1Write_2c_1V13], (instregex "^SHLL?v", "^SHRNv", "^[SU]SHLLv",
884                                           "^[SU]SHRv", "^S[LR]Iv", "^[SU]SHLv")>;
887 // ASIMD FP instructions
888 // -----------------------------------------------------------------------------
890 // ASIMD FP absolute value/difference
891 // ASIMD FP arith, normal
892 // ASIMD FP compare
893 // ASIMD FP complex add
894 // ASIMD FP max/min, normal
895 // ASIMD FP max/min, pairwise
896 // ASIMD FP negate
897 // Covered by "SchedAlias (WriteV[dq]...)" above
899 // ASIMD FP complex multiply add
900 // ASIMD FP multiply accumulate
901 def : InstRW<[V1Write_4c_1V], (instregex "^FCADD(v[48]f16|v[24]f32|v2f64)$",
902                                          "^FML[AS]v")>;
904 // ASIMD FP convert, long (F16 to F32)
905 def : InstRW<[V1Write_4c_2V02], (instregex "^FCVTLv[48]i16$")>;
907 // ASIMD FP convert, long (F32 to F64)
908 def : InstRW<[V1Write_3c_1V02], (instregex "^FCVTLv[24]i32$")>;
910 // ASIMD FP convert, narrow (F32 to F16)
911 def : InstRW<[V1Write_4c_2V02], (instregex "^FCVTNv[48]i16$")>;
913 // ASIMD FP convert, narrow (F64 to F32)
914 def : InstRW<[V1Write_3c_1V02], (instregex "^FCVTNv[24]i32$",
915                                            "^FCVTXN(v[24]f32|v1i64)$")>;
917 // ASIMD FP convert, other, D-form F32 and Q-form F64
918 def : InstRW<[V1Write_3c_1V02], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$",
919                                            "^[SU]CVTFv2f(32|64)$")>;
921 // ASIMD FP convert, other, D-form F16 and Q-form F32
922 def : InstRW<[V1Write_4c_2V02], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$",
923                                            "^[SU]CVTFv4f(16|32)$")>;
925 // ASIMD FP convert, other, Q-form F16
926 def : InstRW<[V1Write_6c_4V02], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$",
927                                            "^[SU]CVTFv8f16$")>;
929 // ASIMD FP divide, D-form, F16
930 // ASIMD FP square root, D-form, F16
931 def : InstRW<[V1Write_7c7_1V02], (instrs FDIVv4f16, FSQRTv4f16)>;
933 // ASIMD FP divide, F32
934 // ASIMD FP square root, F32
935 def : InstRW<[V1Write_10c7_1V02], (instrs FDIVv2f32, FDIVv4f32,
936                                           FSQRTv2f32, FSQRTv4f32)>;
938 // ASIMD FP divide, Q-form, F16
939 def : InstRW<[V1Write_13c5_1V02], (instrs FDIVv8f16)>;
941 // ASIMD FP divide, Q-form, F64
942 def : InstRW<[V1Write_15c7_1V02], (instrs FDIVv2f64)>;
944 // ASIMD FP square root, Q-form, F16
945 def : InstRW<[V1Write_13c11_1V02], (instrs FSQRTv8f16)>;
947 // ASIMD FP square root, Q-form, F64
948 def : InstRW<[V1Write_16c7_1V02], (instrs FSQRTv2f64)>;
950 // ASIMD FP max/min, reduce, F32 and D-form F16
951 def : InstRW<[V1Write_4c_2V], (instregex "^F(MAX|MIN)(NM)?Vv4(i16|i32)v$")>;
953 // ASIMD FP max/min, reduce, Q-form F16
954 def : InstRW<[V1Write_6c_3V], (instregex "^F(MAX|MIN)(NM)?Vv8i16v$")>;
956 // ASIMD FP multiply
957 def : InstRW<[V1Write_3c_1V], (instregex "^FMULX?v")>;
959 // ASIMD FP multiply accumulate long
960 def : InstRW<[V1Write_5c_1V], (instregex "^FML[AS]L2?v")>;
962 // ASIMD FP round, D-form F32 and Q-form F64
963 def : InstRW<[V1Write_3c_1V02], (instregex "^FRINT[AIMNPXZ]v2f(32|64)$")>;
965 // ASIMD FP round, D-form F16 and Q-form F32
966 def : InstRW<[V1Write_4c_2V02], (instregex "^FRINT[AIMNPXZ]v4f(16|32)$")>;
968 // ASIMD FP round, Q-form F16
969 def : InstRW<[V1Write_6c_4V02], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
972 // ASIMD BF instructions
973 // -----------------------------------------------------------------------------
975 // ASIMD convert, F32 to BF16
976 def : InstRW<[V1Write_4c_1V02], (instrs BFCVTN, BFCVTN2)>;
978 // ASIMD dot product
979 def : InstRW<[V1Write_4c_1V], (instregex "^BF(DOT|16DOTlane)v[48]bf16$")>;
981 // ASIMD matrix multiply accumulate
982 def : InstRW<[V1Write_5c_1V], (instrs BFMMLA)>;
984 // ASIMD multiply accumulate long
985 def : InstRW<[V1Write_4c_1V], (instregex "^BFMLAL[BT](Idx)?$")>;
987 // Scalar convert, F32 to BF16
988 def : InstRW<[V1Write_3c_1V02], (instrs BFCVT)>;
991 // ASIMD miscellaneous instructions
992 // -----------------------------------------------------------------------------
994 // ASIMD bit reverse
995 // ASIMD bitwise insert
996 // ASIMD count
997 // ASIMD duplicate, element
998 // ASIMD extract
999 // ASIMD extract narrow
1000 // ASIMD insert, element to element
1001 // ASIMD move, FP immed
1002 // ASIMD move, integer immed
1003 // ASIMD reverse
1004 // ASIMD table lookup, 1 or 2 table regs
1005 // ASIMD table lookup extension, 1 table reg
1006 // ASIMD transfer, element to gen reg
1007 // ASIMD transpose
1008 // ASIMD unzip/zip
1009 // Covered by "SchedAlias (WriteV[dq]...)" above
1011 // ASIMD duplicate, gen reg
1012 def : InstRW<[V1Write_3c_1M0],
1013              (instregex "^DUP((v16|v8)i8|(v8|v4)i16|(v4|v2)i32|v2i64)gpr$")>;
1015 // ASIMD extract narrow, saturating
1016 def : InstRW<[V1Write_4c_1V13], (instregex "^[SU]QXTNv", "^SQXTUNv")>;
1018 // ASIMD reciprocal and square root estimate, D-form U32
1019 // ASIMD reciprocal and square root estimate, D-form F32 and F64
1020 def : InstRW<[V1Write_3c_1V02], (instrs URECPEv2i32,
1021                                         URSQRTEv2i32,
1022                                         FRECPEv1i32, FRECPEv2f32, FRECPEv1i64,
1023                                         FRSQRTEv1i32, FRSQRTEv2f32, FRSQRTEv1i64)>;
1025 // ASIMD reciprocal and square root estimate, Q-form U32
1026 // ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 and F64
1027 def : InstRW<[V1Write_4c_1V02], (instrs URECPEv4i32,
1028                                         URSQRTEv4i32,
1029                                         FRECPEv1f16, FRECPEv4f16,
1030                                         FRECPEv4f32, FRECPEv2f64,
1031                                         FRSQRTEv1f16, FRSQRTEv4f16,
1032                                         FRSQRTEv4f32, FRSQRTEv2f64)>;
1034 // ASIMD reciprocal and square root estimate, Q-form F16
1035 def : InstRW<[V1Write_6c_2V02], (instrs FRECPEv8f16,
1036                                         FRSQRTEv8f16)>;
1038 // ASIMD reciprocal exponent
1039 def : InstRW<[V1Write_3c_1V02], (instrs FRECPXv1f16, FRECPXv1i32, FRECPXv1i64)>;
1041 // ASIMD reciprocal step
1042 def : InstRW<[V1Write_4c_1V], (instregex "^FRECPS(16|32|64)$", "^FRECPSv",
1043                                          "^FRSQRTS(16|32|64)$", "^FRSQRTSv")>;
1045 // ASIMD table lookup, 1 or 2 table regs
1046 // ASIMD table lookup extension, 1 table reg
1047 def : InstRW<[V1Write_2c_2V01], (instregex "^TBLv(8|16)i8(One|Two)$",
1048                                            "^TBXv(8|16)i8One$")>;
1050 // ASIMD table lookup, 3 table regs
1051 // ASIMD table lookup extension, 2 table reg
1052 def : InstRW<[V1Write_4c_2V01], (instrs TBLv8i8Three, TBLv16i8Three,
1053                                         TBXv8i8Two, TBXv16i8Two)>;
1055 // ASIMD table lookup, 4 table regs
1056 def : InstRW<[V1Write_4c_3V01], (instrs TBLv8i8Four, TBLv16i8Four)>;
1058 // ASIMD table lookup extension, 3 table reg
1059 def : InstRW<[V1Write_6c_3V01], (instrs TBXv8i8Three, TBXv16i8Three)>;
1061 // ASIMD table lookup extension, 4 table reg
1062 def : InstRW<[V1Write_6c_5V01], (instrs TBXv8i8Four, TBXv16i8Four)>;
1064 // ASIMD transfer, element to gen reg
1065 def : InstRW<[V1Write_2c_1V], (instregex "^SMOVvi(((8|16)to(32|64))|32to64)$",
1066                                          "^UMOVvi(8|16|32|64)$")>;
1068 // ASIMD transfer, gen reg to element
1069 def : InstRW<[V1Write_5c_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
1072 // ASIMD load instructions
1073 // -----------------------------------------------------------------------------
1075 // ASIMD load, 1 element, multiple, 1 reg
1076 def : InstRW<[V1Write_6c_1L],
1077              (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1078 def : InstRW<[WriteAdr, V1Write_6c_1L],
1079              (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1081 // ASIMD load, 1 element, multiple, 2 reg
1082 def : InstRW<[V1Write_6c_2L],
1083              (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1084 def : InstRW<[WriteAdr, V1Write_6c_2L],
1085              (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1087 // ASIMD load, 1 element, multiple, 3 reg
1088 def : InstRW<[V1Write_6c_3L],
1089              (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1090 def : InstRW<[WriteAdr, V1Write_6c_3L],
1091              (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1093 // ASIMD load, 1 element, multiple, 4 reg, D-form
1094 def : InstRW<[V1Write_6c_2L],
1095              (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
1096 def : InstRW<[WriteAdr, V1Write_6c_2L],
1097              (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
1099 // ASIMD load, 1 element, multiple, 4 reg, Q-form
1100 def : InstRW<[V1Write_7c_4L],
1101              (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
1102 def : InstRW<[WriteAdr, V1Write_7c_4L],
1103              (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
1105 // ASIMD load, 1 element, one lane
1106 // ASIMD load, 1 element, all lanes
1107 def : InstRW<[V1Write_8c_1L_1V],
1108              (instregex "^LD1(i|Rv)(8|16|32|64)$",
1109                         "^LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1110 def : InstRW<[WriteAdr, V1Write_8c_1L_1V],
1111              (instregex "^LD1i(8|16|32|64)_POST$",
1112                         "^LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1114 // ASIMD load, 2 element, multiple, D-form
1115 def : InstRW<[V1Write_8c_1L_2V],
1116              (instregex "^LD2Twov(8b|4h|2s)$")>;
1117 def : InstRW<[WriteAdr, V1Write_8c_1L_2V],
1118              (instregex "^LD2Twov(8b|4h|2s)_POST$")>;
1119                         
1120 // ASIMD load, 2 element, multiple, Q-form
1121 def : InstRW<[V1Write_8c_2L_2V],
1122              (instregex "^LD2Twov(16b|8h|4s|2d)$")>;
1123 def : InstRW<[WriteAdr, V1Write_8c_2L_2V],
1124              (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>;
1125                         
1126 // ASIMD load, 2 element, one lane
1127 // ASIMD load, 2 element, all lanes
1128 def : InstRW<[V1Write_8c_1L_2V],
1129              (instregex "^LD2i(8|16|32|64)$",
1130                         "^LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1131 def : InstRW<[WriteAdr, V1Write_8c_1L_2V],
1132              (instregex "^LD2i(8|16|32|64)_POST$",
1133                         "^LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1134                         
1135 // ASIMD load, 3 element, multiple, D-form
1136 // ASIMD load, 3 element, one lane
1137 // ASIMD load, 3 element, all lanes
1138 def : InstRW<[V1Write_8c_2L_3V],
1139              (instregex "^LD3Threev(8b|4h|2s)$",
1140                         "^LD3i(8|16|32|64)$",
1141                         "^LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1142 def : InstRW<[WriteAdr, V1Write_8c_2L_3V],
1143              (instregex "^LD3Threev(8b|4h|2s)_POST$",
1144                         "^LD3i(8|16|32|64)_POST$",
1145                         "^LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1147 // ASIMD load, 3 element, multiple, Q-form
1148 def : InstRW<[V1Write_8c_3L_3V],
1149              (instregex "^LD3Threev(16b|8h|4s|2d)$")>;
1150 def : InstRW<[WriteAdr, V1Write_8c_3L_3V],
1151              (instregex "^LD3Threev(16b|8h|4s|2d)_POST$")>;
1153 // ASIMD load, 4 element, multiple, D-form
1154 // ASIMD load, 4 element, one lane
1155 // ASIMD load, 4 element, all lanes
1156 def : InstRW<[V1Write_8c_3L_4V],
1157              (instregex "^LD4Fourv(8b|4h|2s)$",
1158                         "^LD4i(8|16|32|64)$",
1159                         "^LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1160 def : InstRW<[WriteAdr, V1Write_8c_3L_4V],
1161              (instregex "^LD4Fourv(8b|4h|2s)_POST$",
1162                         "^LD4i(8|16|32|64)_POST$",
1163                         "^LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1165 // ASIMD load, 4 element, multiple, Q-form
1166 def : InstRW<[V1Write_9c_4L_4V],
1167              (instregex "^LD4Fourv(16b|8h|4s|2d)$")>;
1168 def : InstRW<[WriteAdr, V1Write_9c_4L_4V],
1169              (instregex "^LD4Fourv(16b|8h|4s|2d)_POST$")>;
1172 // ASIMD store instructions
1173 // -----------------------------------------------------------------------------
1175 // ASIMD store, 1 element, multiple, 1 reg
1176 // ASIMD store, 1 element, multiple, 2 reg, D-form
1177 def : InstRW<[V1Write_2c_1L01_1V01],
1178              (instregex "^ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$",
1179                         "^ST1Twov(8b|4h|2s|1d)$")>;
1180 def : InstRW<[WriteAdr, V1Write_2c_1L01_1V01],
1181              (instregex "^ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$",
1182                         "^ST1Twov(8b|4h|2s|1d)_POST$")>;
1184 // ASIMD store, 1 element, multiple, 2 reg, Q-form
1185 // ASIMD store, 1 element, multiple, 3 reg, D-form
1186 // ASIMD store, 1 element, multiple, 4 reg, D-form
1187 def : InstRW<[V1Write_2c_2L01_2V01],
1188              (instregex "^ST1Twov(16b|8h|4s|2d)$",
1189                         "^ST1Threev(8b|4h|2s|1d)$",
1190                         "^ST1Fourv(8b|4h|2s|1d)$")>;
1191 def : InstRW<[WriteAdr, V1Write_2c_2L01_2V01],
1192              (instregex "^ST1Twov(16b|8h|4s|2d)_POST$",
1193                         "^ST1Threev(8b|4h|2s|1d)_POST$",
1194                         "^ST1Fourv(8b|4h|2s|1d)_POST$")>;
1196 // ASIMD store, 1 element, multiple, 3 reg, Q-form
1197 def : InstRW<[V1Write_2c_3L01_3V01],
1198              (instregex "^ST1Threev(16b|8h|4s|2d)$")>;
1199 def : InstRW<[WriteAdr, V1Write_2c_3L01_3V01],
1200              (instregex "^ST1Threev(16b|8h|4s|2d)_POST$")>;
1202 // ASIMD store, 1 element, multiple, 4 reg, Q-form
1203 def : InstRW<[V1Write_2c_4L01_4V01],
1204              (instregex "^ST1Fourv(16b|8h|4s|2d)$")>;
1205 def : InstRW<[WriteAdr, V1Write_2c_4L01_4V01],
1206              (instregex "^ST1Fourv(16b|8h|4s|2d)_POST$")>;
1208 // ASIMD store, 1 element, one lane
1209 // ASIMD store, 2 element, multiple, D-form
1210 // ASIMD store, 2 element, one lane
1211 def : InstRW<[V1Write_4c_1L01_1V01],
1212              (instregex "^ST1i(8|16|32|64)$",
1213                         "^ST2Twov(8b|4h|2s)$",
1214                         "^ST2i(8|16|32|64)$")>;
1215 def : InstRW<[WriteAdr, V1Write_4c_1L01_1V01],
1216              (instregex "^ST1i(8|16|32|64)_POST$",
1217                         "^ST2Twov(8b|4h|2s)_POST$",
1218                         "^ST2i(8|16|32|64)_POST$")>;
1220 // ASIMD store, 2 element, multiple, Q-form
1221 // ASIMD store, 3 element, multiple, D-form
1222 // ASIMD store, 3 element, one lane
1223 // ASIMD store, 4 element, one lane, D
1224 def : InstRW<[V1Write_4c_2L01_2V01],
1225              (instregex "^ST2Twov(16b|8h|4s|2d)$",
1226                         "^ST3Threev(8b|4h|2s)$",
1227                         "^ST3i(8|16|32|64)$",
1228                         "^ST4i64$")>;
1229 def : InstRW<[WriteAdr, V1Write_4c_2L01_2V01],
1230              (instregex "^ST2Twov(16b|8h|4s|2d)_POST$",
1231                         "^ST3Threev(8b|4h|2s)_POST$",
1232                         "^ST3i(8|16|32|64)_POST$",
1233                         "^ST4i64_POST$")>;
1235 // ASIMD store, 3 element, multiple, Q-form
1236 def : InstRW<[V1Write_5c_3L01_3V01],
1237              (instregex "^ST3Threev(16b|8h|4s|2d)$")>;
1238 def : InstRW<[WriteAdr, V1Write_5c_3L01_3V01],
1239              (instregex "^ST3Threev(16b|8h|4s|2d)_POST$")>;
1241 // ASIMD store, 4 element, multiple, D-form
1242 def : InstRW<[V1Write_6c_3L01_3V01],
1243              (instregex "^ST4Fourv(8b|4h|2s)$")>;
1244 def : InstRW<[WriteAdr, V1Write_6c_3L01_3V01],
1245              (instregex "^ST4Fourv(8b|4h|2s)_POST$")>;
1247 // ASIMD store, 4 element, multiple, Q-form, B/H/S
1248 def : InstRW<[V1Write_7c_6L01_6V01],
1249              (instregex "^ST4Fourv(16b|8h|4s)$")>;
1250 def : InstRW<[WriteAdr, V1Write_7c_6L01_6V01],
1251              (instregex "^ST4Fourv(16b|8h|4s)_POST$")>;
1253 // ASIMD store, 4 element, multiple, Q-form, D
1254 def : InstRW<[V1Write_4c_4L01_4V01],
1255              (instrs ST4Fourv2d)>;
1256 def : InstRW<[WriteAdr, V1Write_4c_4L01_4V01],
1257              (instrs ST4Fourv2d_POST)>;
1259 // ASIMD store, 4 element, one lane, B/H/S
1260 def : InstRW<[V1Write_6c_3L_3V],
1261              (instregex "^ST4i(8|16|32)$")>;
1262 def : InstRW<[WriteAdr, V1Write_6c_3L_3V],
1263              (instregex "^ST4i(8|16|32)_POST$")>;
1266 // Cryptography extensions
1267 // -----------------------------------------------------------------------------
1269 // Crypto polynomial (64x64) multiply long
1270 // Covered by "SchedAlias (WriteV[dq]...)" above
1272 // Crypto AES ops
1273 def V1WriteVC : WriteSequence<[V1Write_2c_1V]>;
1274 def V1ReadVC  : SchedReadAdvance<2, [V1WriteVC]>;
1275 def           : InstRW<[V1WriteVC], (instrs AESDrr, AESErr)>;
1276 def           : InstRW<[V1Write_2c_1V, V1ReadVC], (instrs AESMCrr, AESIMCrr)>;
1278 // Crypto SHA1 hash acceleration op
1279 // Crypto SHA1 schedule acceleration ops
1280 // Crypto SHA256 schedule acceleration ops
1281 // Crypto SHA512 hash acceleration ops
1282 // Crypto SM3 ops
1283 def : InstRW<[V1Write_2c_1V0], (instregex "^SHA1(H|SU[01])rr$",
1284                                           "^SHA256SU[01]rr$",
1285                                           "^SHA512(H2?|SU[01])$",
1286                                           "^SM3(PARTW(1|2SM3SS1)|TT[12][AB])$")>;
1288 // Crypto SHA1 hash acceleration ops
1289 // Crypto SHA256 hash acceleration ops
1290 // Crypto SM4 ops
1291 def : InstRW<[V1Write_4c_1V0], (instregex "^SHA1[CMP]rrr$",
1292                                           "^SHA256H2?rrr$",
1293                                           "^SM4E(KEY)?$")>;
1295 // Crypto SHA3 ops
1296 def : InstRW<[V1Write_2c_1V0], (instrs BCAX, EOR3, RAX1, XAR)>;
1299 // CRC instruction
1300 // -----------------------------------------------------------------------------
1302 // CRC checksum ops
1303 def : InstRW<[V1Write_2c_1M0], (instregex "^CRC32C?[BHWX]rr$")>;
1306 // SVE Predicate instructions
1307 // -----------------------------------------------------------------------------
1309 // Loop control, based on predicate
1310 def : InstRW<[V1Write_2c_1M0], (instregex "^BRK[AB]_PP[mz]P$")>;
1311 def : InstRW<[V1Write_2c_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>;
1313 // Loop control, based on predicate and flag setting
1314 def : InstRW<[V1Write_3c_2M0], (instrs BRKAS_PPzP, BRKBS_PPzP, BRKNS_PPzP,
1315                                        BRKPAS_PPzPP, BRKPBS_PPzPP)>;
1317 // Loop control, based on GPR
1318 def : InstRW<[V1Write_3c_2M0], (instregex "^WHILE(LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>;
1320 // Loop terminate
1321 def : InstRW<[V1Write_1c_1M0], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>;
1323 // Predicate counting scalar
1324 // Predicate counting scalar, active predicate
1325 def : InstRW<[V1Write_2c_1M0], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>;
1326 def : InstRW<[V1Write_2c_1M0], (instregex "^(CNT|([SU]Q)?(DEC|INC))[BHWD]_XPiI$",
1327                                           "^SQ(DEC|INC)[BHWD]_XPiWdI$",
1328                                           "^UQ(DEC|INC)[BHWD]_WPiI$",
1329                                           "^CNTP_XPP_[BHSD]$",
1330                                           "^([SU]Q)?(DEC|INC)P_XP_[BHSD]$",
1331                                           "^UQ(DEC|INC)P_WP_[BHSD]$",
1332                                           "^[SU]Q(DEC|INC)P_XPWd_[BHSD]$")>;
1334 // Predicate counting vector, active predicate
1335 def : InstRW<[V1Write_7c_2M0_1V01], (instregex "^([SU]Q)?(DEC|INC)P_ZP_[HSD]$")>;
1337 // Predicate logical
1338 def : InstRW<[V1Write_1c_1M0],
1339              (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>;
1341 // Predicate logical, flag setting
1342 def : InstRW<[V1Write_2c_2M0],
1343              (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)S_PPzPP$")>;
1345 // Predicate reverse
1346 // Predicate set/initialize/find next
1347 // Predicate transpose
1348 // Predicate unpack and widen
1349 // Predicate zip/unzip
1350 def : InstRW<[V1Write_2c_1M0], (instregex "^REV_PP_[BHSD]$",
1351                                           "^PFALSE$", "^PFIRST_B$",
1352                                           "^PNEXT_[BHSD]$", "^PTRUE_[BHSD]$",
1353                                           "^TRN[12]_PPP_[BHSDQ]$",
1354                                           "^(ZIP|UZP)[12]_PPP_[BHSDQ]$")>;
1356 // Predicate set/initialize/find next
1357 // Predicate unpack and widen
1358 def : InstRW<[V1Write_2c_1M0], (instrs PTEST_PP,
1359                                        PUNPKHI_PP, PUNPKLO_PP)>;
1361 // Predicate select
1362 def : InstRW<[V1Write_1c_1M0], (instrs SEL_PPPP)>;
1364 // Predicate set/initialize, set flags
1365 def : InstRW<[V1Write_3c_2M0], (instregex "^PTRUES_[BHSD]$")>;
1369 // SVE integer instructions
1370 // -----------------------------------------------------------------------------
1372 // Arithmetic, basic
1373 // Logical
1374 def : InstRW<[V1Write_2c_1V01],
1375              (instregex "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]$",
1376                         "^(ADD|SUB)_Z(I|P[mZ]Z|ZZ)_[BHSD]$",
1377                         "^ADR_[SU]XTW_ZZZ_D_[0123]$",
1378                         "^ADR_LSL_ZZZ_[SD]_[0123]$",
1379                         "^[SU]ABD_ZP[mZ]Z_[BHSD]$",
1380                         "^[SU](MAX|MIN)_Z(I|P[mZ]Z)_[BHSD]$",
1381                         "^[SU]Q(ADD|SUB)_Z(I|ZZ)_[BHSD]$",
1382                         "^SUBR_Z(I|P[mZ]Z)_[BHSD]$",
1383                         "^(AND|EOR|ORR)_ZI$",
1384                         "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZZZ$",
1385                         "^EOR(BT|TB)_ZZZ_[BHSD]$",
1386                         "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]$")>;
1388 // Arithmetic, shift
1389 def : InstRW<[V1Write_2c_1V1],
1390              (instregex "^(ASR|LSL|LSR)_WIDE_Z(Pm|Z)Z_[BHS]",
1391                         "^(ASR|LSL|LSR)_ZPm[IZ]_[BHSD]",
1392                         "^(ASR|LSL|LSR)_ZZI_[BHSD]",
1393                         "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]",
1394                         "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>;
1396 // Arithmetic, shift right for divide
1397 def : InstRW<[V1Write_4c_1V1], (instregex "^ASRD_ZP[mZ]I_[BHSD]$")>;
1399 // Count/reverse bits
1400 def : InstRW<[V1Write_2c_1V01], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]$")>;
1402 // Broadcast logical bitmask immediate to vector
1403 def : InstRW<[V1Write_2c_1V01], (instrs DUPM_ZI)>;
1405 // Compare and set flags
1406 def : InstRW<[V1Write_4c_1M0_1V0],
1407              (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$",
1408                         "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>;
1410 // Conditional extract operations, scalar form
1411 def : InstRW<[V1Write_9c_1M0_1V1], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>;
1413 // Conditional extract operations, SIMD&FP scalar and vector forms
1414 def : InstRW<[V1Write_3c_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$",
1415                                           "^COMPACT_ZPZ_[SD]$",
1416                                           "^SPLICE_ZPZZ?_[BHSD]$")>;
1418 // Convert to floating point, 64b to float or convert to double
1419 def : InstRW<[V1Write_3c_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]",
1420                                           "^[SU]CVTF_ZPmZ_StoD")>;
1422 // Convert to floating point, 32b to single or half
1423 def : InstRW<[V1Write_4c_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]$")>;
1425 // Convert to floating point, 16b to half
1426 def : InstRW<[V1Write_6c_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH$")>;
1428 // Copy, scalar
1429 def : InstRW<[V1Write_5c_1M0_1V01], (instregex "^CPY_ZPmR_[BHSD]$")>;
1431 // Copy, scalar SIMD&FP or imm
1432 def : InstRW<[V1Write_2c_1V01], (instregex "^CPY_ZP([mz]I|mV)_[BHSD]$")>;
1434 // Divides, 32 bit
1435 def : InstRW<[V1Write_12c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_S$")>;
1437 // Divides, 64 bit
1438 def : InstRW<[V1Write_20c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_D$")>;
1440 // Dot product, 8 bit
1441 def : InstRW<[V1Write_3c_1V01], (instregex "^[SU]DOT_ZZZI?_S$")>;
1443 // Dot product, 8 bit, using signed and unsigned integers
1444 def : InstRW<[V1Write_3c_1V], (instrs SUDOT_ZZZI, USDOT_ZZZ, USDOT_ZZZI)>;
1446 // Dot product, 16 bit
1447 def : InstRW<[V1Write_4c_1V01], (instregex "^[SU]DOT_ZZZI?_D$")>;
1449 // Duplicate, immediate and indexed form
1450 def : InstRW<[V1Write_2c_1V01], (instregex "^DUP_ZI_[BHSD]$",
1451                                            "^DUP_ZZI_[BHSDQ]$")>;
1453 // Duplicate, scalar form
1454 def : InstRW<[V1Write_3c_1M0], (instregex "^DUP_ZR_[BHSD]$")>;
1456 // Extend, sign or zero
1457 def : InstRW<[V1Write_2c_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]$",
1458                                           "^[SU]XTH_ZPmZ_[SD]$",
1459                                           "^[SU]XTW_ZPmZ_[D]$")>;
1461 // Extract
1462 def : InstRW<[V1Write_2c_1V01], (instrs EXT_ZZI)>;
1464 // Extract/insert operation, SIMD and FP scalar form
1465 def : InstRW<[V1Write_3c_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$",
1466                                           "^INSR_ZV_[BHSD]$")>;
1468 // Extract/insert operation, scalar
1469 def : InstRW<[V1Write_6c_1M0_1V1], (instregex "^LAST[AB]_RPZ_[BHSD]$",
1470                                               "^INSR_ZR_[BHSD]$")>;
1472 // Horizontal operations, B, H, S form, imm, imm
1473 def : InstRW<[V1Write_4c_1V0], (instregex "^INDEX_II_[BHS]$")>;
1475 // Horizontal operations, B, H, S form, scalar, imm / scalar / imm, scalar
1476 def : InstRW<[V1Write_7c_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>;
1478 // Horizontal operations, D form, imm, imm
1479 def : InstRW<[V1Write_5c_2V0], (instrs INDEX_II_D)>;
1481 // Horizontal operations, D form, scalar, imm / scalar / imm, scalar
1482 def : InstRW<[V1Write_8c_2M0_2V0], (instregex "^INDEX_(IR|RI|RR)_D$")>;
1484 // Move prefix
1485 def : InstRW<[V1Write_2c_1V01], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$",
1486                                            "^MOVPRFX_ZZ$")>;
1488 // Matrix multiply-accumulate
1489 def : InstRW<[V1Write_3c_1V01], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
1491 // Multiply, B, H, S element size
1492 def : InstRW<[V1Write_4c_1V0], (instregex "^MUL_(ZI|ZPmZ)_[BHS]$",
1493                                           "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]$")>;
1495 // Multiply, D element size
1496 // Multiply accumulate, D element size
1497 def : InstRW<[V1Write_5c_2V0], (instregex "^MUL_(ZI|ZPmZ)_D$",
1498                                           "^[SU]MULH_ZPmZ_D$",
1499                                           "^(MLA|MLS|MAD|MSB)_ZPmZZ_D$")>;
1501 // Multiply accumulate, B, H, S element size
1502 // NOTE: This is not specified in the SOG.
1503 def : InstRW<[V1Write_4c_1V0], (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>;
1505 // Predicate counting vector
1506 def : InstRW<[V1Write_2c_1V0], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI$")>;
1508 // Reduction, arithmetic, B form
1509 def : InstRW<[V1Write_14c_1V_1V0_2V1_1V13],
1510              (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
1512 // Reduction, arithmetic, H form
1513 def : InstRW<[V1Write_12c_1V_1V01_2V1],
1514              (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>;
1516 // Reduction, arithmetic, S form
1517 def : InstRW<[V1Write_10c_1V_1V01_2V1],
1518              (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>;
1520 // Reduction, arithmetic, D form
1521 def : InstRW<[V1Write_8c_1V_1V01],
1522              (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
1524 // Reduction, logical
1525 def : InstRW<[V1Write_12c_4V01], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]$")>;
1527 // Reverse, vector
1528 def : InstRW<[V1Write_2c_1V01], (instregex "^REV_ZZ_[BHSD]$",
1529                                            "^REVB_ZPmZ_[HSD]$",
1530                                            "^REVH_ZPmZ_[SD]$",
1531                                            "^REVW_ZPmZ_D$")>;
1533 // Select, vector form
1534 // Table lookup
1535 // Table lookup extension
1536 // Transpose, vector form
1537 // Unpack and extend
1538 // Zip/unzip
1539 def : InstRW<[V1Write_2c_1V01], (instregex "^SEL_ZPZZ_[BHSD]$",
1540                                            "^TB[LX]_ZZZ_[BHSD]$",
1541                                            "^TRN[12]_ZZZ_[BHSDQ]$",
1542                                            "^[SU]UNPK(HI|LO)_ZZ_[HSD]$",
1543                                            "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>;
1546 // SVE floating-point instructions
1547 // -----------------------------------------------------------------------------
1549 // Floating point absolute value/difference
1550 // Floating point arithmetic
1551 def : InstRW<[V1Write_2c_1V01], (instregex "^FAB[SD]_ZPmZ_[HSD]$",
1552                                            "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]$",
1553                                            "^FADDP_ZPmZZ_[HSD]$",
1554                                            "^FNEG_ZPmZ_[HSD]$",
1555                                            "^FSUBR_ZPm[IZ]_[HSD]$")>;
1557 // Floating point associative add, F16
1558 def : InstRW<[V1Write_19c_18V0], (instrs FADDA_VPZ_H)>;
1560 // Floating point associative add, F32
1561 def : InstRW<[V1Write_11c_10V0], (instrs FADDA_VPZ_S)>;
1563 // Floating point associative add, F64
1564 def : InstRW<[V1Write_8c_3V01], (instrs FADDA_VPZ_D)>;
1566 // Floating point compare
1567 def : InstRW<[V1Write_2c_1V0], (instregex "^FAC(GE|GT)_PPzZZ_[HSD]$",
1568                                           "^FCM(EQ|GE|GT|NE|UO)_PPzZZ_[HSD]$",
1569                                           "^FCM(EQ|GE|GT|LE|LT|NE)_PPzZ0_[HSD]$")>;
1571 // Floating point complex add
1572 def : InstRW<[V1Write_3c_1V01], (instregex "^FCADD_ZPmZ_[HSD]$")>;
1574 // Floating point complex multiply add
1575 def : InstRW<[V1Write_5c_1V01], (instregex "^FCMLA_ZPmZZ_[HSD]$",
1576                                            "^FCMLA_ZZZI_[HS]$")>;
1578 // Floating point convert, long or narrow (F16 to F32 or F32 to F16)
1579 // Floating point convert to integer, F32
1580 def : InstRW<[V1Write_4c_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)$",
1581                                           "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)$")>;
1583 // Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16)
1584 // Floating point convert to integer, F64
1585 def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)$",
1586                                           "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)$")>;
1588 // Floating point convert to integer, F16
1589 def : InstRW<[V1Write_6c_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH$")>;
1591 // Floating point copy
1592 def : InstRW<[V1Write_2c_1V01], (instregex "^FCPY_ZPmI_[HSD]$",
1593                                            "^FDUP_ZI_[HSD]$")>;
1595 // Floating point divide, F16
1596 def : InstRW<[V1Write_13c10_1V0], (instregex "^FDIVR?_ZPmZ_H$")>;
1598 // Floating point divide, F32
1599 def : InstRW<[V1Write_10c7_1V0], (instregex "^FDIVR?_ZPmZ_S$")>;
1601 // Floating point divide, F64
1602 def : InstRW<[V1Write_15c7_1V0], (instregex "^FDIVR?_ZPmZ_D$")>;
1604 // Floating point min/max
1605 def : InstRW<[V1Write_2c_1V01], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]$")>;
1607 // Floating point multiply
1608 def : InstRW<[V1Write_3c_1V01], (instregex "^F(SCALE|MULX)_ZPmZ_[HSD]$",
1609                                            "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]$")>;
1611 // Floating point multiply accumulate
1612 // Floating point reciprocal step
1613 def : InstRW<[V1Write_4c_1V01], (instregex "^F(N?M(AD|SB)|N?ML[AS])_ZPmZZ_[HSD]$",
1614                                            "^FML[AS]_ZZZI_[HSD]$",
1615                                            "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>;
1617 // Floating point reciprocal estimate, F16
1618 def : InstRW<[V1Write_6c_4V0], (instrs FRECPE_ZZ_H, FRSQRTE_ZZ_H)>;
1620 // Floating point reciprocal estimate, F32
1621 def : InstRW<[V1Write_4c_2V0], (instrs FRECPE_ZZ_S, FRSQRTE_ZZ_S)>;
1623 // Floating point reciprocal estimate, F64
1624 def : InstRW<[V1Write_3c_1V0], (instrs FRECPE_ZZ_D, FRSQRTE_ZZ_D)>;
1626 // Floating point reciprocal exponent
1627 def : InstRW<[V1Write_3c_1V0], (instregex "^FRECPX_ZPmZ_[HSD]$")>;
1629 // Floating point reduction, F16
1630 def : InstRW<[V1Write_13c_6V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_H$")>;
1632 // Floating point reduction, F32
1633 def : InstRW<[V1Write_11c_1V_5V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_S$")>;
1635 // Floating point reduction, F64
1636 def : InstRW<[V1Write_9c_1V_4V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_D$")>;
1638 // Floating point round to integral, F16
1639 def : InstRW<[V1Write_6c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H$")>;
1641 // Floating point round to integral, F32
1642 def : InstRW<[V1Write_4c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S$")>;
1644 // Floating point round to integral, F64
1645 def : InstRW<[V1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D$")>;
1647 // Floating point square root, F16
1648 def : InstRW<[V1Write_13c10_1V0], (instrs FSQRT_ZPmZ_H)>;
1650 // Floating point square root, F32
1651 def : InstRW<[V1Write_10c7_1V0], (instrs FSQRT_ZPmZ_S)>;
1653 // Floating point square root, F64
1654 def : InstRW<[V1Write_16c7_1V0], (instrs FSQRT_ZPmZ_D)>;
1656 // Floating point trigonometric
1657 def : InstRW<[V1Write_3c_1V01], (instregex "^FEXPA_ZZ_[HSD]$",
1658                                            "^FTMAD_ZZI_[HSD]$",
1659                                            "^FTS(MUL|SEL)_ZZZ_[HSD]$")>;
1662 // SVE BFloat16 (BF16) instructions
1663 // -----------------------------------------------------------------------------
1665 // Convert, F32 to BF16
1666 def : InstRW<[V1Write_4c_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
1668 // Dot product
1669 def : InstRW<[V1Write_4c_1V01], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
1671 // Matrix multiply accumulate
1672 def : InstRW<[V1Write_5c_1V01], (instrs BFMMLA_ZZZ)>;
1674 // Multiply accumulate long
1675 def : InstRW<[V1Write_5c_1V01], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>;
1678 // SVE Load instructions
1679 // -----------------------------------------------------------------------------
1681 // Load vector
1682 def : InstRW<[V1Write_6c_1L01], (instrs LDR_ZXI)>;
1684 // Load predicate
1685 def : InstRW<[V1Write_6c_1L_1M], (instrs LDR_PXI)>;
1687 // Contiguous load, scalar + imm
1688 // Contiguous load, scalar + scalar
1689 // Contiguous load broadcast, scalar + imm
1690 // Contiguous load broadcast, scalar + scalar
1691 def : InstRW<[V1Write_6c_1L01], (instregex "^LD1[BHWD]_IMM$",
1692                                            "^LD1S?B_[HSD]_IMM$",
1693                                            "^LD1S?H_[SD]_IMM$",
1694                                            "^LD1S?W_D_IMM$",
1695                                            "^LD1[BWD]$",
1696                                            "^LD1S?B_[HSD]$",
1697                                            "^LD1S?W_D$",
1698                                            "^LD1R[BHWD]_IMM$",
1699                                            "^LD1RSW_IMM$",
1700                                            "^LD1RS?B_[HSD]_IMM$",
1701                                            "^LD1RS?H_[SD]_IMM$",
1702                                            "^LD1RS?W_D_IMM$",
1703                                            "^LD1RQ_[BHWD]_IMM$",
1704                                            "^LD1RQ_[BWD]$")>;
1705 def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LD1H$",
1706                                               "^LD1S?H_[SD]$",
1707                                               "^LD1RQ_H$")>;
1709 // Non temporal load, scalar + imm
1710 def : InstRW<[V1Write_6c_1L01], (instregex "^LDNT1[BHWD]_ZRI$")>;
1712 // Non temporal load, scalar + scalar
1713 def : InstRW<[V1Write_7c_1L01_1S], (instrs LDNT1H_ZRR)>;
1714 def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDNT1[BWD]_ZRR$")>;
1716 // Contiguous first faulting load, scalar + scalar
1717 def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LDFF1H_REAL$",
1718                                               "^LDFF1S?H_[SD]_REAL$")>;
1719 def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDFF1[BWD]_REAL$",
1720                                               "^LDFF1S?B_[HSD]_REAL$",
1721                                               "^LDFF1S?W_D_REAL$")>;
1723 // Contiguous non faulting load, scalar + imm
1724 def : InstRW<[V1Write_6c_1L01], (instregex "^LDNF1[BHWD]_IMM_REAL$",
1725                                            "^LDNF1S?B_[HSD]_IMM_REAL$",
1726                                            "^LDNF1S?H_[SD]_IMM_REAL$",
1727                                            "^LDNF1S?W_D_IMM_REAL$")>;
1729 // Contiguous Load two structures to two vectors, scalar + imm
1730 def : InstRW<[V1Write_8c_2L01_2V01], (instregex "^LD2[BHWD]_IMM$")>;
1732 // Contiguous Load two structures to two vectors, scalar + scalar
1733 def : InstRW<[V1Write_10c_2L01_2V01], (instrs LD2H)>;
1734 def : InstRW<[V1Write_9c_2L01_2V01],  (instregex "^LD2[BWD]$")>;
1736 // Contiguous Load three structures to three vectors, scalar + imm
1737 def : InstRW<[V1Write_11c_3L01_3V01], (instregex "^LD3[BHWD]_IMM$")>;
1739 // Contiguous Load three structures to three vectors, scalar + scalar
1740 def : InstRW<[V1Write_13c_3L01_1S_3V01], (instregex "^LD3[BHWD]$")>;
1742 // Contiguous Load four structures to four vectors, scalar + imm
1743 def : InstRW<[V1Write_12c_4L01_4V01], (instregex "^LD4[BHWD]_IMM$")>;
1745 // Contiguous Load four structures to four vectors, scalar + scalar
1746 def : InstRW<[V1Write_13c_4L01_2S_4V01], (instregex "^LD4[BHWD]$")>;
1748 // Gather load, vector + imm, 32-bit element size
1749 def : InstRW<[V1Write_11c_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$",
1750                                              "^GLD(FF)?1W_IMM_REAL$")>;
1752 // Gather load, vector + imm, 64-bit element size
1753 def : InstRW<[V1Write_9c_2L_2V],
1754              (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$",
1755                         "^GLD(FF)?1S?[BHW]_D_([SU]XTW_)?(SCALED_)?REAL$",
1756                         "^GLD(FF)?1D_IMM_REAL$",
1757                         "^GLD(FF)?1D_([SU]XTW_)?(SCALED_)?REAL$")>;
1759 // Gather load, 32-bit scaled offset
1760 def : InstRW<[V1Write_11c_2L_2V],
1761              (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$",
1762                         "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>;
1764 // Gather load, 32-bit unpacked unscaled offset
1765 def : InstRW<[V1Write_9c_1L_1V],
1766              (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$",
1767                         "^GLD(FF)?1W_[SU]XTW_REAL$")>;
1769 // Prefetch
1770 // NOTE: This is not specified in the SOG.
1771 def : InstRW<[V1Write_4c_1L01], (instregex "^PRF[BHWD]")>;
1774 // SVE Store instructions
1775 // -----------------------------------------------------------------------------
1777 // Store from predicate reg
1778 def : InstRW<[V1Write_1c_1L01], (instrs STR_PXI)>;
1780 // Store from vector reg
1781 def : InstRW<[V1Write_2c_1L01_1V], (instrs STR_ZXI)>;
1783 // Contiguous store, scalar + imm
1784 // Contiguous store, scalar + scalar
1785 def : InstRW<[V1Write_2c_1L01_1V], (instregex "^ST1[BHWD]_IMM$",
1786                                               "^ST1B_[HSD]_IMM$",
1787                                               "^ST1H_[SD]_IMM$",
1788                                               "^ST1W_D_IMM$",
1789                                               "^ST1[BWD]$",
1790                                               "^ST1B_[HSD]$",
1791                                               "^ST1W_D$")>;
1792 def : InstRW<[V1Write_2c_1L01_1S_1V], (instregex "^ST1H(_[SD])?$")>;
1794 // Contiguous store two structures from two vectors, scalar + imm
1795 // Contiguous store two structures from two vectors, scalar + scalar
1796 def : InstRW<[V1Write_4c_1L01_1V], (instregex "^ST2[BHWD]_IMM$",
1797                                               "^ST2[BWD]$")>;
1798 def : InstRW<[V1Write_4c_1L01_1S_1V], (instrs ST2H)>;
1800 // Contiguous store three structures from three vectors, scalar + imm
1801 def : InstRW<[V1Write_7c_5L01_5V], (instregex "^ST3[BHWD]_IMM$")>;
1803 // Contiguous store three structures from three vectors, scalar + scalar
1804 def : InstRW<[V1Write_7c_5L01_5S_5V], (instregex "^ST3[BHWD]$")>;
1806 // Contiguous store four structures from four vectors, scalar + imm
1807 def : InstRW<[V1Write_11c_9L01_9V], (instregex "^ST4[BHWD]_IMM$")>;
1809 // Contiguous store four structures from four vectors, scalar + scalar
1810 def : InstRW<[V1Write_11c_9L01_9S_9V], (instregex "^ST4[BHWD]$")>;
1812 // Non temporal store, scalar + imm
1813 // Non temporal store, scalar + scalar
1814 def : InstRW<[V1Write_2c_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$",
1815                                               "^STNT1[BWD]_ZRR$")>;
1816 def : InstRW<[V1Write_2c_1L01_1S_1V], (instrs STNT1H_ZRR)>;
1818 // Scatter store vector + imm 32-bit element size
1819 // Scatter store, 32-bit scaled offset
1820 // Scatter store, 32-bit unscaled offset
1821 def : InstRW<[V1Write_10c_2L01_2V], (instregex "^SST1[BH]_S_IMM$",
1822                                                "^SST1W_IMM$",
1823                                                "^SST1(H_S|W)_[SU]XTW_SCALED$",
1824                                                "^SST1[BH]_S_[SU]XTW$",
1825                                                "^SST1W_[SU]XTW$")>;
1827 // Scatter store, 32-bit unpacked unscaled offset
1828 // Scatter store, 32-bit unpacked scaled offset
1829 def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[BHW]_D_[SU]XTW$",
1830                                               "^SST1D_[SU]XTW$",
1831                                               "^SST1[HW]_D_[SU]XTW_SCALED$",
1832                                               "^SST1D_[SU]XTW_SCALED$")>;
1834 // Scatter store vector + imm 64-bit element size
1835 // Scatter store, 64-bit scaled offset
1836 // Scatter store, 64-bit unscaled offset
1837 def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[BHW]_D_IMM$",
1838                                               "^SST1D_IMM$",
1839                                               "^SST1[HW]_D_SCALED$",
1840                                               "^SST1D_SCALED$",
1841                                               "^SST1[BHW]_D$",
1842                                               "^SST1D$")>;
1845 // SVE Miscellaneous instructions
1846 // -----------------------------------------------------------------------------
1848 // Read first fault register, unpredicated
1849 // Set first fault register
1850 // Write to first fault register
1851 def : InstRW<[V1Write_2c_1M0], (instrs RDFFR_P_REAL,
1852                                        SETFFR,
1853                                        WRFFR)>;
1855 // Read first fault register, predicated
1856 def : InstRW<[V1Write_3c_2M0], (instrs RDFFR_PPz_REAL)>;
1858 // Read first fault register and set flags
1859 def : InstRW<[V1Write_4c_1M], (instrs RDFFRS_PPz)>;