[gn build] Port fef54d0393fd
[llvm-project.git] / llvm / lib / Target / AArch64 / AArch64SchedNeoverseN1.td
blob524fa33f498bb0f1960ae6b66522d802e8a87a57
1 //=- AArch64SchedNeoverseN1.td - NeoverseN1 Scheduling Model -*- tablegen -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the scheduling model for the Arm Neoverse N1 processors.
11 // References:
12 // - "Arm Neoverse N1 Software Optimization Guide"
13 // - https://en.wikichip.org/wiki/arm_holdings/microarchitectures/neoverse_n1
15 //===----------------------------------------------------------------------===//
17 def NeoverseN1Model : SchedMachineModel {
18   let IssueWidth            =   8; // Maximum micro-ops dispatch rate.
19   let MicroOpBufferSize     = 128; // NOTE: Copied from Cortex-A76.
20   let LoadLatency           =   4; // Optimistic load latency.
21   let MispredictPenalty     =  11; // Cycles cost of branch mispredicted.
22   let LoopMicroOpBufferSize =  16; // NOTE: Copied from Cortex-A57.
23   let CompleteModel         =   1;
25   list<Predicate> UnsupportedFeatures = !listconcat(PAUnsupported.F,
26                                                     SMEUnsupported.F,
27                                                     SVEUnsupported.F,
28                                                     [HasMTE, HasCSSC]);
31 //===----------------------------------------------------------------------===//
32 // Define each kind of processor resource and number available on Neoverse N1.
33 // Instructions are first fetched and then decoded into internal macro-ops
34 // (MOPs).  From there, the MOPs proceed through register renaming and dispatch
35 // stages.  A MOP can be split into one or more micro-ops further down the
36 // pipeline, after the decode stage.  Once dispatched, micro-ops wait for their
37 // operands and issue out-of-order to one of the issue pipelines.  Each issue
38 // pipeline can accept one micro-op per cycle.
40 let SchedModel = NeoverseN1Model in {
42 // Define the issue ports.
43 def N1UnitB  : ProcResource<1>;  // Branch
44 def N1UnitS  : ProcResource<2>;  // Integer single cycle 0/1
45 def N1UnitM  : ProcResource<1>;  // Integer multicycle
46 def N1UnitL  : ProcResource<2>;  // Load/Store 0/1
47 def N1UnitD  : ProcResource<2>;  // Store data 0/1
48 def N1UnitV0 : ProcResource<1>;  // FP/ASIMD 0
49 def N1UnitV1 : ProcResource<1>;  // FP/ASIMD 1
51 def N1UnitI : ProcResGroup<[N1UnitS, N1UnitM]>;    // Integer units
52 def N1UnitV : ProcResGroup<[N1UnitV0, N1UnitV1]>;  // FP/ASIMD units
54 // Define commonly used read types.
56 // No generic forwarding is provided for these types.
57 def : ReadAdvance<ReadI,       0>;
58 def : ReadAdvance<ReadISReg,   0>;
59 def : ReadAdvance<ReadIEReg,   0>;
60 def : ReadAdvance<ReadIM,      0>;
61 def : ReadAdvance<ReadIMA,     0>;
62 def : ReadAdvance<ReadID,      0>;
63 def : ReadAdvance<ReadExtrHi,  0>;
64 def : ReadAdvance<ReadAdrBase, 0>;
65 def : ReadAdvance<ReadST,      0>;
66 def : ReadAdvance<ReadVLD,     0>;
68 def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
69 def : WriteRes<WriteBarrier, []> { let Latency = 1; }
70 def : WriteRes<WriteHint,    []> { let Latency = 1; }
73 //===----------------------------------------------------------------------===//
74 // Define generic 0 micro-op types
76 let Latency = 0, NumMicroOps = 0 in
77 def N1Write_0c_0Z : SchedWriteRes<[]>;
79 //===----------------------------------------------------------------------===//
80 // Define generic 1 micro-op types
82 def N1Write_1c_1B     : SchedWriteRes<[N1UnitB]>  { let Latency = 1; }
83 def N1Write_1c_1I     : SchedWriteRes<[N1UnitI]>  { let Latency = 1; }
84 def N1Write_2c_1M     : SchedWriteRes<[N1UnitM]>  { let Latency = 2; }
85 def N1Write_3c_1M     : SchedWriteRes<[N1UnitM]>  { let Latency = 3; }
86 def N1Write_4c3_1M    : SchedWriteRes<[N1UnitM]>  { let Latency = 4;
87                                                     let ReleaseAtCycles = [3]; }
88 def N1Write_5c3_1M    : SchedWriteRes<[N1UnitM]>  { let Latency = 5;
89                                                     let ReleaseAtCycles = [3]; }
90 def N1Write_12c5_1M   : SchedWriteRes<[N1UnitM]>  { let Latency = 12;
91                                                     let ReleaseAtCycles = [5]; }
92 def N1Write_20c5_1M   : SchedWriteRes<[N1UnitM]>  { let Latency = 20;
93                                                     let ReleaseAtCycles = [5]; }
94 def N1Write_4c_1L     : SchedWriteRes<[N1UnitL]>  { let Latency = 4; }
95 def N1Write_5c_1L     : SchedWriteRes<[N1UnitL]>  { let Latency = 5; }
96 def N1Write_7c_1L     : SchedWriteRes<[N1UnitL]>  { let Latency = 7; }
97 def N1Write_2c_1V     : SchedWriteRes<[N1UnitV]>  { let Latency = 2; }
98 def N1Write_3c_1V     : SchedWriteRes<[N1UnitV]>  { let Latency = 3; }
99 def N1Write_4c_1V     : SchedWriteRes<[N1UnitV]>  { let Latency = 4; }
100 def N1Write_5c_1V     : SchedWriteRes<[N1UnitV]>  { let Latency = 5; }
101 def N1Write_2c_1V0    : SchedWriteRes<[N1UnitV0]> { let Latency = 2; }
102 def N1Write_3c_1V0    : SchedWriteRes<[N1UnitV0]> { let Latency = 3; }
103 def N1Write_4c_1V0    : SchedWriteRes<[N1UnitV0]> { let Latency = 4; }
104 def N1Write_7c7_1V0   : SchedWriteRes<[N1UnitV0]> { let Latency = 7;
105                                                     let ReleaseAtCycles = [7]; }
106 def N1Write_10c7_1V0  : SchedWriteRes<[N1UnitV0]> { let Latency = 10;
107                                                     let ReleaseAtCycles = [7]; }
108 def N1Write_13c10_1V0 : SchedWriteRes<[N1UnitV0]> { let Latency = 13;
109                                                     let ReleaseAtCycles = [10]; }
110 def N1Write_15c7_1V0  : SchedWriteRes<[N1UnitV0]> { let Latency = 15;
111                                                     let ReleaseAtCycles = [7]; }
112 def N1Write_17c7_1V0  : SchedWriteRes<[N1UnitV0]> { let Latency = 17;
113                                                     let ReleaseAtCycles = [7]; }
114 def N1Write_2c_1V1    : SchedWriteRes<[N1UnitV1]> { let Latency = 2; }
115 def N1Write_3c_1V1    : SchedWriteRes<[N1UnitV1]> { let Latency = 3; }
116 def N1Write_4c_1V1    : SchedWriteRes<[N1UnitV1]> { let Latency = 4; }
118 //===----------------------------------------------------------------------===//
119 // Define generic 2 micro-op types
121 let Latency = 1, NumMicroOps = 2 in
122 def N1Write_1c_1B_1I   : SchedWriteRes<[N1UnitB, N1UnitI]>;
123 let Latency = 3, NumMicroOps = 2 in
124 def N1Write_3c_1I_1M   : SchedWriteRes<[N1UnitI, N1UnitM]>;
125 let Latency = 2, NumMicroOps = 2 in
126 def N1Write_2c_1I_1L   : SchedWriteRes<[N1UnitI, N1UnitL]>;
127 let Latency = 5, NumMicroOps = 2 in
128 def N1Write_5c_1I_1L   : SchedWriteRes<[N1UnitI, N1UnitL]>;
129 let Latency = 6, NumMicroOps = 2 in
130 def N1Write_6c_1I_1L   : SchedWriteRes<[N1UnitI, N1UnitL]>;
131 let Latency = 7, NumMicroOps = 2 in
132 def N1Write_7c_1I_1L   : SchedWriteRes<[N1UnitI, N1UnitL]>;
133 let Latency = 5, NumMicroOps = 2 in
134 def N1Write_5c_1M_1V   : SchedWriteRes<[N1UnitM, N1UnitV]>;
135 let Latency = 6, NumMicroOps = 2 in
136 def N1Write_6c_1M_1V0  : SchedWriteRes<[N1UnitM, N1UnitV0]>;
137 let Latency = 5, NumMicroOps = 2 in
138 def N1Write_5c_2L      : SchedWriteRes<[N1UnitL, N1UnitL]>;
139 let Latency = 1, NumMicroOps = 2 in
140 def N1Write_1c_1L_1D   : SchedWriteRes<[N1UnitL, N1UnitD]>;
141 let Latency = 2, NumMicroOps = 2 in
142 def N1Write_2c_1L_1V   : SchedWriteRes<[N1UnitL, N1UnitV]>;
143 let Latency = 4, NumMicroOps = 2 in
144 def N1Write_4c_1L_1V   : SchedWriteRes<[N1UnitL, N1UnitV]>;
145 let Latency = 7, NumMicroOps = 2 in
146 def N1Write_7c_1L_1V   : SchedWriteRes<[N1UnitL, N1UnitV]>;
147 let Latency = 4, NumMicroOps = 2 in
148 def N1Write_4c_1V0_1V1 : SchedWriteRes<[N1UnitV0, N1UnitV1]>;
149 let Latency = 4, NumMicroOps = 2 in
150 def N1Write_4c_2V0     : SchedWriteRes<[N1UnitV0, N1UnitV0]>;
151 let Latency = 5, NumMicroOps = 2 in
152 def N1Write_5c_2V0     : SchedWriteRes<[N1UnitV0, N1UnitV0]>;
153 let Latency = 6, NumMicroOps = 2 in
154 def N1Write_6c_2V1     : SchedWriteRes<[N1UnitV1, N1UnitV1]>;
155 let Latency = 5, NumMicroOps = 2 in
156 def N1Write_5c_1V1_1V  : SchedWriteRes<[N1UnitV1, N1UnitV]>;
158 //===----------------------------------------------------------------------===//
159 // Define generic 3 micro-op types
161 let Latency = 7, NumMicroOps = 3 in
162 def N1Write_2c_1I_1L_1V : SchedWriteRes<[N1UnitI, N1UnitL, N1UnitV]>;
163 let Latency = 1, NumMicroOps = 3 in
164 def N1Write_1c_2L_1D    : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitD]>;
165 let Latency = 2, NumMicroOps = 3 in
166 def N1Write_2c_1L_2V    : SchedWriteRes<[N1UnitL, N1UnitV, N1UnitV]>;
167 let Latency = 6, NumMicroOps = 3 in
168 def N1Write_6c_3L       : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL]>;
169 let Latency = 4, NumMicroOps = 3 in
170 def N1Write_4c_3V       : SchedWriteRes<[N1UnitV, N1UnitV, N1UnitV]>;
171 let Latency = 6, NumMicroOps = 3 in
172 def N1Write_6c_3V       : SchedWriteRes<[N1UnitV, N1UnitV, N1UnitV]>;
173 let Latency = 8, NumMicroOps = 3 in
174 def N1Write_8c_3V       : SchedWriteRes<[N1UnitV, N1UnitV, N1UnitV]>;
176 //===----------------------------------------------------------------------===//
177 // Define generic 4 micro-op types
179 let Latency = 2, NumMicroOps = 4 in
180 def N1Write_2c_2I_2L : SchedWriteRes<[N1UnitI, N1UnitI, N1UnitL, N1UnitL]>;
181 let Latency = 6, NumMicroOps = 4 in
182 def N1Write_6c_4L    : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL]>;
183 let Latency = 2, NumMicroOps = 4 in
184 def N1Write_2c_2L_2V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitV, N1UnitV]>;
185 let Latency = 2, NumMicroOps = 4 in
186 def N1Write_3c_2L_2V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitV, N1UnitV]>;
187 let Latency = 5, NumMicroOps = 4 in
188 def N1Write_5c_2L_2V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitV, N1UnitV]>;
189 let Latency = 7, NumMicroOps = 4 in
190 def N1Write_7c_2L_2V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitV, N1UnitV]>;
191 let Latency = 4, NumMicroOps = 4 in
192 def N1Write_4c_4V    : SchedWriteRes<[N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
193 let Latency = 6, NumMicroOps = 4 in
194 def N1Write_6c_4V0   : SchedWriteRes<[N1UnitV0, N1UnitV0, N1UnitV0, N1UnitV0]>;
196 //===----------------------------------------------------------------------===//
197 // Define generic 5 micro-op types
199 let Latency = 3, NumMicroOps = 5 in
200 def N1Write_3c_2L_3V : SchedWriteRes<[N1UnitL, N1UnitL,
201                                       N1UnitV, N1UnitV, N1UnitV]>;
202 let Latency = 7, NumMicroOps = 5 in
203 def N1Write_7c_2L_3V : SchedWriteRes<[N1UnitL, N1UnitL,
204                                       N1UnitV, N1UnitV, N1UnitV]>;
205 let Latency = 6, NumMicroOps = 5 in
206 def N1Write_6c_5V    : SchedWriteRes<[N1UnitV, N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
208 //===----------------------------------------------------------------------===//
209 // Define generic 6 micro-op types
211 let Latency = 3, NumMicroOps = 6 in
212 def N1Write_3c_4L_2V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL,
213                                       N1UnitV, N1UnitV]>;
214 let Latency = 4, NumMicroOps = 6 in
215 def N1Write_4c_3L_3V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
216                                       N1UnitV, N1UnitV, N1UnitV]>;
217 let Latency = 5, NumMicroOps = 6 in
218 def N1Write_5c_3L_3V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
219                                       N1UnitV, N1UnitV, N1UnitV]>;
220 let Latency = 6, NumMicroOps = 6 in
221 def N1Write_6c_3L_3V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
222                                       N1UnitV, N1UnitV, N1UnitV]>;
223 let Latency = 7, NumMicroOps = 6 in
224 def N1Write_7c_3L_3V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
225                                       N1UnitV, N1UnitV, N1UnitV]>;
226 let Latency = 8, NumMicroOps = 6 in
227 def N1Write_8c_3L_3V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
228                                       N1UnitV, N1UnitV, N1UnitV]>;
230 //===----------------------------------------------------------------------===//
231 // Define generic 7 micro-op types
233 let Latency = 8, NumMicroOps = 7 in
234 def N1Write_8c_3L_4V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
235                                       N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
237 //===----------------------------------------------------------------------===//
238 // Define generic 8 micro-op types
240 let Latency = 5, NumMicroOps = 8 in
241 def N1Write_5c_4L_4V  : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL, 
242                                        N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
243 let Latency = 6, NumMicroOps = 8 in
244 def N1Write_6c_4L_4V  : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL, 
245                                        N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
246 let Latency = 8, NumMicroOps = 8 in
247 def N1Write_8c_4L_4V  : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL,
248                                        N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
249 let Latency = 10, NumMicroOps = 8 in
250 def N1Write_10c_4L_4V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL,
251                                        N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
253 //===----------------------------------------------------------------------===//
254 // Define generic 12 micro-op types
256 let Latency = 9, NumMicroOps = 12 in
257 def N1Write_9c_6L_6V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
258                                       N1UnitL, N1UnitL, N1UnitL, 
259                                       N1UnitV, N1UnitV, N1UnitV,
260                                       N1UnitV, N1UnitV, N1UnitV]>;
263 // Miscellaneous Instructions
264 // -----------------------------------------------------------------------------
266 def : InstRW<[WriteI], (instrs COPY)>;
268 // Convert floating-point condition flags
269 // Flag manipulation instructions
270 def : WriteRes<WriteSys, []> { let Latency = 1; }
273 // Branch Instructions
274 // -----------------------------------------------------------------------------
276 // Branch, immed
277 // Compare and branch
278 def : SchedAlias<WriteBr, N1Write_1c_1B>;
280 // Branch, register
281 def : SchedAlias<WriteBrReg, N1Write_1c_1B>;
283 // Branch and link, immed
284 // Branch and link, register
285 def : InstRW<[N1Write_1c_1B_1I], (instrs BL, BLR)>;
287 // Compare and branch
288 def : InstRW<[N1Write_1c_1B], (instregex "^[CT]BN?Z[XW]$")>;
291 // Arithmetic and Logical Instructions
292 // -----------------------------------------------------------------------------
294 // ALU, basic
295 // ALU, basic, flagset
296 // Conditional compare
297 // Conditional select
298 // Logical, basic
299 // Address generation
300 // Count leading
301 // Reverse bits/bytes
302 // Move immediate
303 def : SchedAlias<WriteI, N1Write_1c_1I>;
305 // ALU, extend and shift
306 def : SchedAlias<WriteIEReg, N1Write_2c_1M>;
308 // Arithmetic, LSL shift, shift <= 4
309 // Arithmetic, flagset, LSL shift, shift <= 4
310 // Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
311 def N1WriteISReg : SchedWriteVariant<[
312                      SchedVar<IsCheapLSL,  [N1Write_1c_1I]>,
313                      SchedVar<NoSchedPred, [N1Write_2c_1M]>]>;
314 def              : SchedAlias<WriteISReg, N1WriteISReg>;
316 // Logical, shift, no flagset
317 def : InstRW<[N1Write_1c_1I],
318              (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
320 // Logical, shift, flagset
321 def : InstRW<[N1Write_2c_1M], (instregex "^(AND|BIC)S[WX]rs$")>;
324 // Divide and multiply instructions
325 // -----------------------------------------------------------------------------
327 // Divide
328 def : SchedAlias<WriteID32, N1Write_12c5_1M>;
329 def : SchedAlias<WriteID64, N1Write_20c5_1M>;
331 // Multiply accumulate
332 // Multiply accumulate, long
333 def : SchedAlias<WriteIM32, N1Write_2c_1M>;
334 def : SchedAlias<WriteIM64, N1Write_4c3_1M>;
336 // Multiply high
337 def : InstRW<[N1Write_5c3_1M, ReadIM, ReadIM], (instrs SMULHrr, UMULHrr)>;
340 // Miscellaneous data-processing instructions
341 // -----------------------------------------------------------------------------
343 // Bitfield extract, one reg
344 // Bitfield extract, two regs
345 def N1WriteExtr : SchedWriteVariant<[
346                     SchedVar<IsRORImmIdiomPred, [N1Write_1c_1I]>,
347                     SchedVar<NoSchedPred,       [N1Write_3c_1I_1M]>]>;
348 def : SchedAlias<WriteExtr, N1WriteExtr>;
350 // Bitfield move, basic
351 // Variable shift
352 def : SchedAlias<WriteIS, N1Write_1c_1I>;
354 // Bitfield move, insert
355 def : InstRW<[N1Write_2c_1M], (instregex "^BFM[WX]ri$")>;
357 // Move immediate
358 def : SchedAlias<WriteImm, N1Write_1c_1I>;
360 // Load instructions
361 // -----------------------------------------------------------------------------
363 // Load register, immed offset
364 def : SchedAlias<WriteLD, N1Write_4c_1L>;
366 // Load register, immed offset, index
367 def : SchedAlias<WriteLDIdx, N1Write_4c_1L>;
368 def : SchedAlias<WriteAdr,   N1Write_1c_1I>;
370 // Load pair, immed offset
371 def : SchedAlias<WriteLDHi, N1Write_4c_1L>;
373 // Load pair, immed offset, W-form
374 def : InstRW<[N1Write_4c_1L, N1Write_0c_0Z], (instrs LDPWi, LDNPWi)>;
376 // Load pair, signed immed offset, signed words
377 def : InstRW<[N1Write_5c_1I_1L, N1Write_0c_0Z], (instrs LDPSWi)>;
379 // Load pair, immed post or pre-index, signed words
380 def : InstRW<[WriteAdr, N1Write_5c_1I_1L, N1Write_0c_0Z],
381              (instrs LDPSWpost, LDPSWpre)>;
384 // Store instructions
385 // -----------------------------------------------------------------------------
387 // Store register, immed offset
388 def : SchedAlias<WriteST, N1Write_1c_1L_1D>;
390 // Store register, immed offset, index
391 def : SchedAlias<WriteSTIdx, N1Write_1c_1L_1D>;
393 // Store pair, immed offset
394 def : SchedAlias<WriteSTP, N1Write_1c_2L_1D>;
396 // Store pair, immed offset, W-form
397 def : InstRW<[N1Write_1c_1L_1D], (instrs STPWi)>;
400 // FP data processing instructions
401 // -----------------------------------------------------------------------------
403 // FP absolute value
404 // FP arithmetic
405 // FP min/max
406 // FP negate
407 // FP select
408 def : SchedAlias<WriteF, N1Write_2c_1V>;
410 // FP compare
411 def : SchedAlias<WriteFCmp, N1Write_2c_1V0>;
413 // FP divide
414 // FP square root
415 def : SchedAlias<WriteFDiv, N1Write_10c7_1V0>;
417 // FP divide, H-form
418 // FP square root, H-form
419 def : InstRW<[N1Write_7c7_1V0], (instrs FDIVHrr, FSQRTHr)>;
421 // FP divide, S-form
422 // FP square root, S-form
423 def : InstRW<[N1Write_10c7_1V0], (instrs FDIVSrr, FSQRTSr)>;
425 // FP divide, D-form
426 def : InstRW<[N1Write_15c7_1V0], (instrs FDIVDrr)>;
428 // FP square root, D-form
429 def : InstRW<[N1Write_17c7_1V0], (instrs FSQRTDr)>;
431 // FP multiply
432 def : SchedAlias<WriteFMul, N1Write_3c_1V>;
434 // FP multiply accumulate
435 def : InstRW<[N1Write_4c_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
437 // FP round to integral
438 def : InstRW<[N1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ][HSD]r$",
439                                           "^FRINT(32|64)[XZ][SD]r$")>;
442 // FP miscellaneous instructions
443 // -----------------------------------------------------------------------------
445 // FP convert, from vec to vec reg
446 // FP convert, Javascript from vec to gen reg
447 def : SchedAlias<WriteFCvt, N1Write_3c_1V>;
449 // FP convert, from gen to vec reg
450 def : InstRW<[N1Write_6c_1M_1V0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>;
452 // FP convert, from vec to gen reg
453 def : InstRW<[N1Write_4c_1V0_1V1], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>;
455 // FP move, immed
456 def : SchedAlias<WriteFImm, N1Write_2c_1V>;
458 // FP move, register
459 def : InstRW<[N1Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>;
461 // FP transfer, from gen to low half of vec reg
462 // FP transfer, from gen to high half of vec reg
463 def : InstRW<[N1Write_3c_1M], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr,
464                                       FMOVXDHighr)>;
466 // FP transfer, from vec to gen reg
467 def : SchedAlias<WriteFCopy, N1Write_2c_1V1>;
470 // FP load instructions
471 // -----------------------------------------------------------------------------
473 // Load vector reg, literal, S/D/Q forms
474 // Load vector reg, unscaled immed
475 def : InstRW<[N1Write_5c_1L, ReadAdrBase], (instregex "^LDR[SDQ]l$",
476                                                       "^LDUR[BHSDQ]i$")>;
478 // Load vector reg, immed post-index
479 // Load vector reg, immed pre-index
480 def : InstRW<[WriteAdr, N1Write_5c_1L],
481              (instregex "^LDR[BHSDQ](post|pre)$")>;
483 // Load vector reg, unsigned immed
484 def : InstRW<[N1Write_5c_1I_1L], (instregex "^LDR[BHSDQ]ui$")>;
486 // Load vector reg, register offset, basic
487 // Load vector reg, register offset, scale, S/D-form
488 // Load vector reg, register offset, extend
489 // Load vector reg, register offset, extend, scale, S/D-form
490 def : InstRW<[N1Write_5c_1I_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>;
492 // Load vector reg, register offset, scale, H/Q-form
493 // Load vector reg, register offset, extend, scale, H/Q-form
494 def : InstRW<[N1Write_6c_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>;
496 // Load vector pair, immed offset, S/D-form
497 def : InstRW<[N1Write_5c_1I_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>;
499 // Load vector pair, immed offset, H/Q-form
500 def : InstRW<[N1Write_7c_1I_1L, WriteLDHi], (instregex "^LDPN?[HQ]i$")>;
502 // Load vector pair, immed post-index, S/D-form
503 // Load vector pair, immed pre-index, S/D-form
504 def : InstRW<[WriteAdr, N1Write_5c_1L, WriteLDHi],
505              (instregex "^LDP[SD](pre|post)$")>;
507 // Load vector pair, immed post-index, Q-form
508 // Load vector pair, immed pre-index, Q-form
509 def : InstRW<[WriteAdr, N1Write_7c_1L, WriteLDHi],
510              (instrs LDPQpost, LDPQpre)>;
513 // FP store instructions
514 // -----------------------------------------------------------------------------
516 // Store vector reg, unscaled immed, B/H/S/D-form
517 def : InstRW<[N1Write_2c_1I_1L], (instregex "^STUR[BHSD]i$")>;
519 // Store vector reg, unscaled immed, Q-form
520 def : InstRW<[N1Write_2c_2I_2L], (instrs STURQi)>;
522 // Store vector reg, immed post-index, B/H/S/D-form
523 // Store vector reg, immed pre-index, B/H/S/D-form
524 def : InstRW<[WriteAdr, N1Write_2c_1L_1V], (instregex "^STR[BHSD](pre|post)$")>;
526 // Store vector reg, immed pre-index, Q-form
527 // Store vector reg, immed post-index, Q-form
528 def : InstRW<[WriteAdr, N1Write_2c_2L_2V], (instrs STRQpre, STRQpost)>;
530 // Store vector reg, unsigned immed, B/H/S/D-form
531 def : InstRW<[N1Write_2c_1L_1V], (instregex "^STR[BHSD]ui$")>;
533 // Store vector reg, unsigned immed, Q-form
534 def : InstRW<[N1Write_2c_2L_2V], (instrs STRQui)>;
536 // Store vector reg, register offset, basic, B/S/D-form
537 // Store vector reg, register offset, scale, B/S/D-form
538 // Store vector reg, register offset, extend, B/S/D-form
539 // Store vector reg, register offset, extend, scale, B/S/D-form
540 def : InstRW<[N1Write_2c_1L_1V, ReadAdrBase], (instregex "^STR[BSD]ro[WX]$")>;
542 // Store vector reg, register offset, basic, H-form
543 // Store vector reg, register offset, scale, H-form
544 // Store vector reg, register offset, extend, H-form
545 // Store vector reg, register offset, extend, scale, H-form
546 def : InstRW<[N1Write_2c_1I_1L_1V, ReadAdrBase], (instregex "^STRHro[WX]$")>;
548 // Store vector reg, register offset, basic, Q-form
549 // Store vector reg, register offset, scale, Q-form
550 // Store vector reg, register offset, extend, Q-form
551 // Store vector reg, register offset, extend, scale, Q-form
552 def : InstRW<[N1Write_2c_2L_2V, ReadAdrBase], (instregex "^STRQro[WX]$")>;
554 // Store vector pair, immed offset, S-form
555 def : InstRW<[N1Write_2c_1L_1V], (instrs STPSi, STNPSi)>;
557 // Store vector pair, immed offset, D-form
558 def : InstRW<[N1Write_2c_2L_2V], (instrs STPDi, STNPDi)>;
560 // Store vector pair, immed offset, Q-form
561 def : InstRW<[N1Write_3c_4L_2V], (instrs STPQi, STNPQi)>;
563 // Store vector pair, immed post-index, S-form
564 // Store vector pair, immed pre-index, S-form
565 def : InstRW<[WriteAdr, N1Write_2c_1L_1V], (instrs STPSpre, STPSpost)>;
567 // Store vector pair, immed post-index, D-form
568 // Store vector pair, immed pre-index, D-form
569 def : InstRW<[WriteAdr, N1Write_2c_2L_2V], (instrs STPDpre, STPDpost)>;
571 // Store vector pair, immed post-index, Q-form
572 // Store vector pair, immed pre-index, Q-form
573 def : InstRW<[WriteAdr, N1Write_3c_4L_2V], (instrs STPQpre, STPQpost)>;
576 // ASIMD integer instructions
577 // -----------------------------------------------------------------------------
579 // ASIMD absolute diff
580 // ASIMD absolute diff long
581 // ASIMD arith, basic
582 // ASIMD arith, complex
583 // ASIMD arith, pair-wise
584 // ASIMD compare
585 // ASIMD logical
586 // ASIMD max/min, basic and pair-wise
587 def : SchedAlias<WriteVd, N1Write_2c_1V>;
588 def : SchedAlias<WriteVq, N1Write_2c_1V>;
590 // ASIMD absolute diff accum
591 // ASIMD absolute diff accum long
592 def : InstRW<[N1Write_4c_1V1], (instregex "^[SU]ABAL?v")>;
594 // ASIMD arith, reduce, 4H/4S
595 def : InstRW<[N1Write_3c_1V1], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>;
597 // ASIMD arith, reduce, 8B/8H
598 def : InstRW<[N1Write_5c_1V1_1V], (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>;
600 // ASIMD arith, reduce, 16B
601 def : InstRW<[N1Write_6c_2V1], (instregex "^(ADDV|[SU]ADDLV)v16i8v$")>;
603 // ASIMD max/min, reduce, 4H/4S
604 def : InstRW<[N1Write_3c_1V1], (instregex "^[SU](MAX|MIN)Vv4(i16|i32)v$")>;
606 // ASIMD max/min, reduce, 8B/8H
607 def : InstRW<[N1Write_5c_1V1_1V], (instregex "^[SU](MAX|MIN)Vv8(i8|i16)v$")>;
609 // ASIMD max/min, reduce, 16B
610 def : InstRW<[N1Write_6c_2V1], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
612 // ASIMD multiply, D-form
613 // ASIMD multiply accumulate, D-form
614 // ASIMD multiply accumulate high, D-form
615 // ASIMD multiply accumulate saturating long
616 // ASIMD multiply long
617 // ASIMD multiply accumulate long
618 def : InstRW<[N1Write_4c_1V0], (instregex "^MUL(v[14]i16|v[12]i32)$",
619                                           "^ML[AS](v[14]i16|v[12]i32)$",
620                                           "^SQ(R)?DMULH(v[14]i16|v[12]i32)$",
621                                           "^SQRDML[AS]H(v[14]i16|v[12]i32)$",
622                                           "^SQDML[AS]Lv",
623                                           "^([SU]|SQD)MULLv",
624                                           "^[SU]ML[AS]Lv")>;
626 // ASIMD multiply, Q-form
627 // ASIMD multiply accumulate, Q-form
628 // ASIMD multiply accumulate high, Q-form
629 def : InstRW<[N1Write_5c_2V0], (instregex "^MUL(v8i16|v4i32)$",
630                                           "^ML[AS](v8i16|v4i32)$",
631                                           "^SQ(R)?DMULH(v8i16|v4i32)$",
632                                           "^SQRDML[AS]H(v8i16|v4i32)$")>;
634 // ASIMD multiply/multiply long (8x8) polynomial, D-form
635 def : InstRW<[N1Write_3c_1V0], (instrs PMULv8i8, PMULLv8i8)>;
637 // ASIMD multiply/multiply long (8x8) polynomial, Q-form
638 def : InstRW<[N1Write_4c_2V0], (instrs PMULv16i8, PMULLv16i8)>;
640 // ASIMD pairwise add and accumulate long
641 def : InstRW<[N1Write_4c_1V1], (instregex "^[SU]ADALPv")>;
643 // ASIMD shift accumulate
644 def : InstRW<[N1Write_4c_1V1], (instregex "^[SU]R?SRAv")>;
646 // ASIMD shift by immed, basic
647 // ASIMD shift by immed and insert, basic
648 // ASIMD shift by register, basic
649 def : InstRW<[N1Write_2c_1V1], (instregex "^SHLL?v", "^SHRNv", "^[SU]SHLLv",
650                                           "^[SU]SHRv", "^S[LR]Iv", "^[SU]SHLv")>;
652 // ASIMD shift by immed, complex
653 // ASIMD shift by register, complex
654 def : InstRW<[N1Write_4c_1V1],
655              (instregex "^RSHRNv", "^SQRSHRU?Nv", "^(SQSHLU?|UQSHL)[bhsd]$",
656                         "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
657                         "^SQSHU?RNv", "^[SU]RSHRv", "^UQR?SHRNv", 
658                         "^[SU]Q?RSHLv", "^[SU]QSHLv")>;
661 // ASIMD FP instructions
662 // -----------------------------------------------------------------------------
664 // ASIMD FP absolute value/difference
665 // ASIMD FP arith, normal
666 // ASIMD FP compare
667 // ASIMD FP max/min, normal
668 // ASIMD FP max/min, pairwise
669 // ASIMD FP negate
670 // Covered by "SchedAlias (WriteV[dq]...)" above
672 // ASIMD FP convert, long (F16 to F32)
673 def : InstRW<[N1Write_4c_2V0], (instregex "^FCVTL(v4|v8)i16$")>;
675 // ASIMD FP convert, long (F32 to F64)
676 def : InstRW<[N1Write_3c_1V0], (instregex "^FCVTL(v2|v4)i32$")>;
678 // ASIMD FP convert, narrow (F32 to F16)
679 def : InstRW<[N1Write_4c_2V0], (instregex "^FCVTN(v4|v8)i16$")>;
681 // ASIMD FP convert, narrow (F64 to F32)
682 def : InstRW<[N1Write_3c_1V0], (instregex "^FCVTN(v2|v4)i32$",
683                                           "^FCVTXN(v2|v4)f32$")>;
685 // ASIMD FP convert, other, D-form F32 and Q-form F64
686 def : InstRW<[N1Write_3c_1V0], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$",
687                                           "^[SU]CVTFv2f(32|64)$")>;
689 // ASIMD FP convert, other, D-form F16 and Q-form F32
690 def : InstRW<[N1Write_4c_2V0], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$",
691                                           "^[SU]CVTFv4f(16|32)$")>;
693 // ASIMD FP convert, other, Q-form F16
694 def : InstRW<[N1Write_6c_4V0], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$",
695                                           "^[SU]CVTFv8f16$")>;
697 // ASIMD FP divide, D-form, F16
698 // ASIMD FP square root, D-form, F16
699 def : InstRW<[N1Write_7c7_1V0], (instrs FDIVv4f16, FSQRTv4f16)>;
701 // ASIMD FP divide, D-form, F32
702 // ASIMD FP square root, D-form, F32
703 def : InstRW<[N1Write_10c7_1V0], (instrs FDIVv2f32, FSQRTv2f32)>;
705 // ASIMD FP divide, Q-form, F16
706 // ASIMD FP square root, Q-form, F16
707 def : InstRW<[N1Write_13c10_1V0], (instrs FDIVv8f16, FSQRTv8f16)>;
709 // ASIMD FP divide, Q-form, F32
710 // ASIMD FP square root, Q-form, F32
711 def : InstRW<[N1Write_10c7_1V0], (instrs FDIVv4f32, FSQRTv4f32)>;
713 // ASIMD FP divide, Q-form, F64
714 def : InstRW<[N1Write_15c7_1V0], (instrs FDIVv2f64)>;
716 // ASIMD FP square root, Q-form, F64
717 def : InstRW<[N1Write_17c7_1V0], (instrs FSQRTv2f64)>;
719 // ASIMD FP max/min, reduce, F32 and D-form F16
720 def : InstRW<[N1Write_5c_1V], (instregex "^F(MAX|MIN)(NM)?Vv4(i16|i32)v$")>;
722 // ASIMD FP max/min, reduce, Q-form F16
723 def : InstRW<[N1Write_8c_3V], (instregex "^F(MAX|MIN)(NM)?Vv8i16v$")>;
725 // ASIMD FP multiply
726 def : InstRW<[N1Write_3c_1V], (instregex "^FMULX?v")>;
728 // ASIMD FP multiply accumulate
729 def : InstRW<[N1Write_4c_1V], (instregex "^FML[AS]v")>;
731 // ASIMD FP multiply accumulate long
732 def : InstRW<[N1Write_5c_1V], (instregex "^FML[AS]L2?v")>;
734 // ASIMD FP round, D-form F32 and Q-form F64
735 def : InstRW<[N1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ]v2f(32|64)$")>;
737 // ASIMD FP round, D-form F16 and Q-form F32
738 def : InstRW<[N1Write_4c_2V0], (instregex "^FRINT[AIMNPXZ]v4f(16|32)$")>;
740 // ASIMD FP round, Q-form F16
741 def : InstRW<[N1Write_6c_4V0], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
744 // ASIMD miscellaneous instructions
745 // -----------------------------------------------------------------------------
747 // ASIMD bit reverse
748 // ASIMD bitwise insert
749 // ASIMD count
750 // ASIMD duplicate, element
751 // ASIMD extract
752 // ASIMD extract narrow
753 // ASIMD insert, element to element
754 // ASIMD move, FP immed
755 // ASIMD move, integer immed
756 // ASIMD reverse
757 // ASIMD table lookup, 1 or 2 table regs
758 // ASIMD table lookup extension, 1 table reg
759 // ASIMD transfer, element to gen reg
760 // ASIMD transpose
761 // ASIMD unzip/zip
762 // Covered by "SchedAlias (WriteV[dq]...)" above
764 // ASIMD duplicate, gen reg
765 def : InstRW<[N1Write_3c_1M],
766              (instregex "^DUP((v16|v8)i8|(v8|v4)i16|(v4|v2)i32|v2i64)gpr$")>;
768 // ASIMD extract narrow, saturating
769 def : InstRW<[N1Write_4c_1V1], (instregex "^[SU]QXTNv", "^SQXTUNv")>;
771 // ASIMD reciprocal and square root estimate, D-form F32 and F64
772 def : InstRW<[N1Write_3c_1V0], (instrs FRECPEv1i32, FRECPEv2f32, FRECPEv1i64,
773                                        FRECPXv1i32, FRECPXv1i64,
774                                        URECPEv2i32,
775                                        FRSQRTEv1i32, FRSQRTEv2f32, FRSQRTEv1i64,
776                                        URSQRTEv2i32)>;
778 // ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32
779 def : InstRW<[N1Write_4c_2V0], (instrs FRECPEv1f16, FRECPEv4f16, FRECPEv4f32,
780                                        FRECPXv1f16,
781                                        URECPEv4i32,
782                                        FRSQRTEv1f16, FRSQRTEv4f16, FRSQRTEv4f32,
783                                        URSQRTEv4i32)>;
785 // ASIMD reciprocal and square root estimate, Q-form F16
786 def : InstRW<[N1Write_6c_4V0], (instrs FRECPEv8f16,
787                                        FRSQRTEv8f16)>;
789 // ASIMD reciprocal step
790 def : InstRW<[N1Write_4c_1V], (instregex "^FRECPS(16|32|64)$", "^FRECPSv",
791                                          "^FRSQRTS(16|32|64)$", "^FRSQRTSv")>;
793 // ASIMD table lookup, 3 table regs
794 // ASIMD table lookup extension, 2 table reg
795 def : InstRW<[N1Write_4c_4V], (instrs TBLv8i8Three, TBLv16i8Three,
796                                       TBXv8i8Two, TBXv16i8Two)>;
798 // ASIMD table lookup, 4 table regs
799 def : InstRW<[N1Write_4c_3V], (instrs TBLv8i8Four, TBLv16i8Four)>;
801 // ASIMD table lookup extension, 3 table reg
802 def : InstRW<[N1Write_6c_3V], (instrs TBXv8i8Three, TBXv16i8Three)>;
804 // ASIMD table lookup extension, 4 table reg
805 def : InstRW<[N1Write_6c_5V], (instrs TBXv8i8Four, TBXv16i8Four)>;
807 // ASIMD transfer, element to gen reg
808 def : InstRW<[N1Write_2c_1V1], (instregex "^SMOVvi(((8|16)to(32|64))|32to64)$",
809                                           "^UMOVvi(8|16|32|64)$")>;
811 // ASIMD transfer, gen reg to element
812 def : InstRW<[N1Write_5c_1M_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
815 // ASIMD load instructions
816 // -----------------------------------------------------------------------------
818 // ASIMD load, 1 element, multiple, 1 reg
819 def : InstRW<[N1Write_5c_1L],
820              (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
821 def : InstRW<[WriteAdr, N1Write_5c_1L],
822              (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
824 // ASIMD load, 1 element, multiple, 2 reg
825 def : InstRW<[N1Write_5c_2L],
826              (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
827 def : InstRW<[WriteAdr, N1Write_5c_2L],
828              (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
830 // ASIMD load, 1 element, multiple, 3 reg
831 def : InstRW<[N1Write_6c_3L],
832              (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
833 def : InstRW<[WriteAdr, N1Write_6c_3L],
834              (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
836 // ASIMD load, 1 element, multiple, 4 reg
837 def : InstRW<[N1Write_6c_4L],
838              (instregex "^LD1Fourv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
839 def : InstRW<[WriteAdr, N1Write_6c_4L],
840              (instregex "^LD1Fourv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
842 // ASIMD load, 1 element, one lane
843 // ASIMD load, 1 element, all lanes
844 def : InstRW<[N1Write_7c_1L_1V],
845              (instregex "LD1(i|Rv)(8|16|32|64)$",
846                         "LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
847 def : InstRW<[WriteAdr, N1Write_7c_1L_1V],
848              (instregex "LD1i(8|16|32|64)_POST$",
849                         "LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
851 // ASIMD load, 2 element, multiple
852 // ASIMD load, 2 element, one lane
853 // ASIMD load, 2 element, all lanes
854 def : InstRW<[N1Write_7c_2L_2V],
855              (instregex "LD2Twov(8b|16b|4h|8h|2s|4s|2d)$",
856                         "LD2i(8|16|32|64)$",
857                         "LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
858 def : InstRW<[WriteAdr, N1Write_7c_2L_2V],
859              (instregex "LD2Twov(8b|16b|4h|8h|2s|4s|2d)_POST$",
860                         "LD2i(8|16|32|64)_POST$",
861                         "LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
863 // ASIMD load, 3 element, multiple
864 def : InstRW<[N1Write_8c_3L_3V],
865              (instregex "LD3Threev(8b|16b|4h|8h|2s|4s|2d)$")>;
866 def : InstRW<[WriteAdr, N1Write_8c_3L_3V],
867              (instregex "LD3Threev(8b|16b|4h|8h|2s|4s|2d)_POST$")>;
869 // ASIMD load, 3 element, one lane
870 // ASIMD load, 3 element, all lanes
871 def : InstRW<[N1Write_7c_2L_3V],
872              (instregex "LD3i(8|16|32|64)$",
873                         "LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
874 def : InstRW<[WriteAdr, N1Write_7c_2L_3V],
875              (instregex "LD3i(8|16|32|64)_POST$",
876                         "LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
878 // ASIMD load, 4 element, multiple, D-form
879 def : InstRW<[N1Write_8c_3L_4V],
880              (instregex "LD4Fourv(8b|4h|2s)$")>;
881 def : InstRW<[WriteAdr, N1Write_8c_3L_4V],
882              (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
884 // ASIMD load, 4 element, multiple, Q-form
885 def : InstRW<[N1Write_10c_4L_4V],
886              (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
887 def : InstRW<[WriteAdr, N1Write_10c_4L_4V],
888              (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
890 // ASIMD load, 4 element, one lane
891 // ASIMD load, 4 element, all lanes
892 def : InstRW<[N1Write_8c_4L_4V],
893              (instregex "LD4i(8|16|32|64)$",
894                         "LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
895 def : InstRW<[WriteAdr, N1Write_8c_4L_4V],
896              (instregex "LD4i(8|16|32|64)_POST$",
897                         "LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
900 // ASIMD store instructions
901 // -----------------------------------------------------------------------------
903 // ASIMD store, 1 element, multiple, 1 reg, D-form
904 def : InstRW<[N1Write_2c_1L_1V],
905              (instregex "ST1Onev(8b|4h|2s|1d)$")>;
906 def : InstRW<[WriteAdr, N1Write_2c_1L_1V],
907              (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
909 // ASIMD store, 1 element, multiple, 1 reg, Q-form
910 def : InstRW<[N1Write_2c_1L_1V],
911              (instregex "ST1Onev(16b|8h|4s|2d)$")>;
912 def : InstRW<[WriteAdr, N1Write_2c_1L_1V],
913              (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
915 // ASIMD store, 1 element, multiple, 2 reg, D-form
916 def : InstRW<[N1Write_2c_1L_2V],
917              (instregex "ST1Twov(8b|4h|2s|1d)$")>;
918 def : InstRW<[WriteAdr, N1Write_2c_1L_2V],
919              (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
921 // ASIMD store, 1 element, multiple, 2 reg, Q-form
922 def : InstRW<[N1Write_3c_2L_2V],
923              (instregex "ST1Twov(16b|8h|4s|2d)$")>;
924 def : InstRW<[WriteAdr, N1Write_3c_2L_2V],
925              (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
927 // ASIMD store, 1 element, multiple, 3 reg, D-form
928 def : InstRW<[N1Write_3c_2L_3V],           
929              (instregex "ST1Threev(8b|4h|2s|1d)$")>;
930 def : InstRW<[WriteAdr, N1Write_3c_2L_3V],
931              (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
933 // ASIMD store, 1 element, multiple, 3 reg, Q-form
934 def : InstRW<[N1Write_4c_3L_3V],
935              (instregex "ST1Threev(16b|8h|4s|2d)$")>;
936 def : InstRW<[WriteAdr, N1Write_4c_3L_3V],
937              (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
939 // ASIMD store, 1 element, multiple, 4 reg, D-form
940 def : InstRW<[N1Write_3c_2L_2V],
941              (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
942 def : InstRW<[WriteAdr, N1Write_3c_2L_2V],
943              (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
945 // ASIMD store, 1 element, multiple, 4 reg, Q-form
946 def : InstRW<[N1Write_5c_4L_4V],
947              (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
948 def : InstRW<[WriteAdr, N1Write_5c_4L_4V],
949              (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
951 // ASIMD store, 1 element, one lane
952 def : InstRW<[N1Write_4c_1L_1V],
953              (instregex "ST1i(8|16|32|64)$")>;
954 def : InstRW<[WriteAdr, N1Write_4c_1L_1V],
955              (instregex "ST1i(8|16|32|64)_POST$")>;
957 // ASIMD store, 2 element, multiple, D-form, B/H/S
958 def : InstRW<[N1Write_4c_1L_1V],
959              (instregex "ST2Twov(8b|4h|2s)$")>;
960 def : InstRW<[WriteAdr, N1Write_4c_1L_1V],
961              (instregex "ST2Twov(8b|4h|2s)_POST$")>;
963 // ASIMD store, 2 element, multiple, Q-form
964 def : InstRW<[N1Write_5c_2L_2V],
965              (instregex "ST2Twov(16b|8h|4s|2d)$")>;
966 def : InstRW<[WriteAdr, N1Write_5c_2L_2V],
967              (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
969 // ASIMD store, 2 element, one lane
970 def : InstRW<[N1Write_4c_1L_1V],
971              (instregex "ST2i(8|16|32|64)$")>;
972 def : InstRW<[WriteAdr, N1Write_4c_1L_1V],
973              (instregex "ST2i(8|16|32|64)_POST$")>;
975 // ASIMD store, 3 element, multiple, D-form, B/H/S
976 def : InstRW<[N1Write_5c_2L_2V],
977              (instregex "ST3Threev(8b|4h|2s)$")>;
978 def : InstRW<[WriteAdr, N1Write_5c_2L_2V],
979              (instregex "ST3Threev(8b|4h|2s)_POST$")>;
981 // ASIMD store, 3 element, multiple, Q-form
982 def : InstRW<[N1Write_6c_3L_3V],
983              (instregex "ST3Threev(16b|8h|4s|2d)$")>;
984 def : InstRW<[WriteAdr, N1Write_6c_3L_3V],
985              (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
987 // ASIMD store, 3 element, one lane, B/H/S
988 def : InstRW<[N1Write_4c_3L_3V],
989              (instregex "ST3i(8|16|32)$")>;
990 def : InstRW<[WriteAdr, N1Write_4c_3L_3V],
991              (instregex "ST3i(8|16|32)_POST$")>;
993 // ASIMD store, 3 element, one lane, D
994 def : InstRW<[N1Write_5c_3L_3V],
995              (instrs ST3i64)>;
996 def : InstRW<[WriteAdr, N1Write_5c_3L_3V],
997              (instrs ST3i64_POST)>;
999 // ASIMD store, 4 element, multiple, D-form, B/H/S
1000 def : InstRW<[N1Write_7c_3L_3V],
1001              (instregex "ST4Fourv(8b|4h|2s)$")>;
1002 def : InstRW<[WriteAdr, N1Write_7c_3L_3V],
1003              (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
1005 // ASIMD store, 4 element, multiple, Q-form, B/H/S
1006 def : InstRW<[N1Write_9c_6L_6V],
1007              (instregex "ST4Fourv(16b|8h|4s)$")>;
1008 def : InstRW<[WriteAdr, N1Write_9c_6L_6V],
1009              (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
1011 // ASIMD store, 4 element, multiple, Q-form, D
1012 def : InstRW<[N1Write_6c_4L_4V],
1013              (instrs ST4Fourv2d)>;
1014 def : InstRW<[WriteAdr, N1Write_6c_4L_4V],
1015              (instrs ST4Fourv2d_POST)>;
1017 // ASIMD store, 4 element, one lane, B/H/S
1018 def : InstRW<[N1Write_5c_3L_3V],
1019              (instregex "ST4i(8|16|32)$")>;
1020 def : InstRW<[WriteAdr, N1Write_5c_3L_3V],
1021              (instregex "ST4i(8|16|32)_POST$")>;
1023 // ASIMD store, 4 element, one lane, D
1024 def : InstRW<[N1Write_4c_3L_3V],
1025              (instrs ST4i64)>;
1026 def : InstRW<[WriteAdr, N1Write_4c_3L_3V],
1027              (instrs ST4i64_POST)>;
1030 // Cryptography extensions
1031 // -----------------------------------------------------------------------------
1033 // Crypto AES ops
1034 def N1WriteVC : WriteSequence<[N1Write_2c_1V0]>;
1035 def N1ReadVC  : SchedReadAdvance<2, [N1WriteVC]>;
1036 def           : InstRW<[N1WriteVC], (instrs AESDrr, AESErr)>;
1037 def           : InstRW<[N1Write_2c_1V0, N1ReadVC], (instrs AESMCrr, AESIMCrr)>;
1039 // Crypto polynomial (64x64) multiply long
1040 // Crypto SHA1 hash acceleration op
1041 // Crypto SHA1 schedule acceleration ops
1042 // Crypto SHA256 schedule acceleration ops
1043 def : InstRW<[N1Write_2c_1V0], (instregex "^PMULLv[12]i64$",
1044                                           "^SHA1(H|SU0|SU1)rr",
1045                                           "^SHA256SU[01]rr")>;
1047 // Crypto SHA1 hash acceleration ops
1048 // Crypto SHA256 hash acceleration ops
1049 def : InstRW<[N1Write_4c_1V0], (instregex "^SHA1[CMP]rrr$",
1050                                           "^SHA256H2?rrr$")>;
1053 // CRC
1054 // -----------------------------------------------------------------------------
1056 // CRC checksum ops
1057 def : InstRW<[N1Write_2c_1M], (instregex "^CRC32C?[BHWX]rr$")>;