[AMDGPU] New gfx940 mfma instructions
[llvm-project.git] / llvm / lib / Target / AArch64 / AArch64SchedFalkorDetails.td
bloba3a038f869fbb58a07e3276d4f41176583b7b835
1 //==- AArch64SchedFalkorDetails.td - Falkor Scheduling Defs -*- tablegen -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the uop and latency details for the machine model for the
10 // Qualcomm Falkor subtarget.
12 //===----------------------------------------------------------------------===//
14 // Contains all of the Falkor specific SchedWriteRes types. The approach
15 // below is to define a generic SchedWriteRes for every combination of
16 // latency and microOps. The naming conventions is to use a prefix, one field
17 // for latency, and one or more microOp count/type designators.
18 //   Prefix: FalkorWr
19 //   MicroOp Count/Types: #(B|X|Y|Z|LD|ST|SD|VX|VY|VSD)
20 //   Latency: #cyc
22 // e.g. FalkorWr_1Z_6SD_4VX_6cyc means there are 11 micro-ops to be issued
23 //      down one Z pipe, six SD pipes, four VX pipes and the total latency is
24 //      six cycles.
26 // Contains all of the Falkor specific ReadAdvance types for forwarding logic.
28 // Contains all of the Falkor specific WriteVariant types for immediate zero
29 // and LSLFast.
30 //===----------------------------------------------------------------------===//
32 //===----------------------------------------------------------------------===//
33 // Define 0 micro-op types
34 def FalkorWr_LdInc_none_2cyc : SchedWriteRes<[]> {
35   let Latency = 2;
36   let NumMicroOps = 0;
38 def FalkorWr_StInc_none_2cyc : SchedWriteRes<[]> {
39   let Latency = 2;
40   let NumMicroOps = 0;
42 def FalkorWr_none_3cyc : SchedWriteRes<[]> {
43   let Latency = 3;
44   let NumMicroOps = 0;
46 def FalkorWr_none_4cyc : SchedWriteRes<[]> {
47   let Latency = 4;
48   let NumMicroOps = 0;
51 //===----------------------------------------------------------------------===//
52 // Define 1 micro-op types
54 def FalkorWr_1X_2cyc    : SchedWriteRes<[FalkorUnitX]>   { let Latency = 2; }
55 def FalkorWr_IMUL32_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; }
56 def FalkorWr_IMUL64_1X_4cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; }
57 def FalkorWr_IMUL64_1X_5cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 5; }
58 def FalkorWr_1Z_0cyc    : SchedWriteRes<[FalkorUnitZ]>   { let Latency = 0; }
59 def FalkorWr_1ZB_0cyc   : SchedWriteRes<[FalkorUnitZB]>  { let Latency = 0; }
60 def FalkorWr_1LD_3cyc   : SchedWriteRes<[FalkorUnitLD]>  { let Latency = 3; }
61 def FalkorWr_1LD_4cyc   : SchedWriteRes<[FalkorUnitLD]>  { let Latency = 4; }
62 def FalkorWr_1XYZ_0cyc  : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 0; }
63 def FalkorWr_1XYZ_1cyc  : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 1; }
64 def FalkorWr_1XYZ_2cyc  : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 2; }
65 def FalkorWr_1XYZB_0cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 0; }
66 def FalkorWr_1XYZB_1cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 1; }
67 def FalkorWr_1none_0cyc : SchedWriteRes<[]>              { let Latency = 0; }
69 def FalkorWr_1VXVY_0cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 0; }
70 def FalkorWr_1VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 1; }
71 def FalkorWr_1VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 2; }
72 def FalkorWr_1VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 3; }
73 def FalkorWr_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; }
74 def FalkorWr_VMUL32_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; }
75 def FalkorWr_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; }
76 def FalkorWr_FMUL32_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; }
77 def FalkorWr_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; }
78 def FalkorWr_FMUL64_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; }
80 def FalkorWr_1LD_0cyc   : SchedWriteRes<[FalkorUnitLD]>  { let Latency = 0; }
81 def FalkorWr_1ST_0cyc   : SchedWriteRes<[FalkorUnitST]>  { let Latency = 0; }
82 def FalkorWr_1ST_3cyc   : SchedWriteRes<[FalkorUnitST]>  { let Latency = 3; }
84 def FalkorWr_1GTOV_0cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 0; }
85 def FalkorWr_1GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 1; }
86 def FalkorWr_1GTOV_4cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 4; }
87 def FalkorWr_1VTOG_1cyc : SchedWriteRes<[FalkorUnitVTOG]>{ let Latency = 1; }
89 //===----------------------------------------------------------------------===//
90 // Define 2 micro-op types
92 def FalkorWr_2VXVY_0cyc   : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
93   let Latency = 0;
94   let NumMicroOps = 2;
96 def FalkorWr_2VXVY_1cyc   : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
97   let Latency = 1;
98   let NumMicroOps = 2;
100 def FalkorWr_2VXVY_2cyc   : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
101   let Latency = 2;
102   let NumMicroOps = 2;
104 def FalkorWr_2VXVY_3cyc   : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
105   let Latency = 3;
106   let NumMicroOps = 2;
108 def FalkorWr_2VXVY_4cyc   : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
109   let Latency = 4;
110   let NumMicroOps = 2;
112 def FalkorWr_VMUL32_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
113   let Latency = 4;
114   let NumMicroOps = 2;
116 def FalkorWr_2VXVY_5cyc   : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
117   let Latency = 5;
118   let NumMicroOps = 2;
120 def FalkorWr_FMUL32_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
121   let Latency = 5;
122   let NumMicroOps = 2;
124 def FalkorWr_2VXVY_6cyc   : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
125   let Latency = 6;
126   let NumMicroOps = 2;
128 def FalkorWr_FMUL64_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
129   let Latency = 6;
130   let NumMicroOps = 2;
133 def FalkorWr_1LD_1VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> {
134   let Latency = 4;
135   let NumMicroOps = 2;
137 def FalkorWr_1XYZ_1LD_4cyc  : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> {
138   let Latency = 4;
139   let NumMicroOps = 2;
141 def FalkorWr_2LD_3cyc   : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
142   let Latency = 3;
143   let NumMicroOps = 2;
146 def FalkorWr_1VX_1VY_5cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
147   let Latency = 5;
148   let NumMicroOps = 2;
151 def FalkorWr_1VX_1VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
152   let Latency = 2;
153   let NumMicroOps = 2;
156 def FalkorWr_1VX_1VY_4cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
157   let Latency = 4;
158   let NumMicroOps = 2;
161 def FalkorWr_1VX_1VY_10cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
162   let Latency = 10;
163   let NumMicroOps = 2;
166 def FalkorWr_1VX_1VY_12cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
167   let Latency = 12;
168   let NumMicroOps = 2;
171 def FalkorWr_1VX_1VY_14cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
172   let Latency = 14;
173   let NumMicroOps = 2;
176 def FalkorWr_1VX_1VY_21cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
177   let Latency = 21;
178   let NumMicroOps = 2;
181 def FalkorWr_1GTOV_1VXVY_2cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitVXVY]> {
182   let Latency = 2;
183   let NumMicroOps = 2;
186 def FalkorWr_2GTOV_1cyc    : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitGTOV]> {
187   let Latency = 1;
188   let NumMicroOps = 2;
191 def FalkorWr_1XYZ_1ST_4cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST]> {
192   let Latency = 4;
193   let NumMicroOps = 2;
195 def FalkorWr_1XYZ_1LD_5cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> {
196   let Latency = 5;
197   let NumMicroOps = 2;
200 def FalkorWr_2XYZ_2cyc   : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitXYZ]> {
201   let Latency = 2;
202   let NumMicroOps = 2;
205 def FalkorWr_1Z_1XY_0cyc : SchedWriteRes<[FalkorUnitZ, FalkorUnitXY]> {
206   let Latency = 0;
207   let NumMicroOps = 2;
210 def FalkorWr_1X_1Z_8cyc  : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
211   let Latency = 8;
212   let NumMicroOps = 2;
213   let ResourceCycles = [2, 8];
216 def FalkorWr_1X_1Z_11cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
217   let Latency = 11;
218   let NumMicroOps = 2;
219   let ResourceCycles = [2, 11];
222 def FalkorWr_1LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitZ]> {
223   let Latency = 3;
224   let NumMicroOps = 2;
227 def FalkorWr_1LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD]> {
228   let Latency = 3;
229   let NumMicroOps = 2;
232 def FalkorWr_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitSD, FalkorUnitST]> {
233   let Latency = 0;
234   let NumMicroOps = 2;
237 def FalkorWr_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitVSD, FalkorUnitST]> {
238   let Latency = 0;
239   let NumMicroOps = 2;
242 //===----------------------------------------------------------------------===//
243 // Define 3 micro-op types
245 def FalkorWr_1ST_1SD_1LD_0cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD,
246                                                FalkorUnitLD]> {
247   let Latency = 0;
248   let NumMicroOps = 3;
251 def FalkorWr_1ST_1SD_1LD_3cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD,
252                                                FalkorUnitLD]> {
253   let Latency = 3;
254   let NumMicroOps = 3;
257 def FalkorWr_3VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
258   let Latency = 3;
259   let NumMicroOps = 3;
262 def FalkorWr_3VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
263   let Latency = 4;
264   let NumMicroOps = 3;
267 def FalkorWr_3VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
268   let Latency = 5;
269   let NumMicroOps = 3;
272 def FalkorWr_3VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
273   let Latency = 6;
274   let NumMicroOps = 3;
277 def FalkorWr_1LD_2VXVY_4cyc  : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> {
278   let Latency = 4;
279   let NumMicroOps = 3;
282 def FalkorWr_2LD_1none_3cyc  : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
283   let Latency = 3;
284   let NumMicroOps = 3;
287 def FalkorWr_3LD_3cyc        : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
288                                               FalkorUnitLD]> {
289   let Latency = 3;
290   let NumMicroOps = 3;
293 def FalkorWr_2LD_1Z_3cyc     : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
294                                              FalkorUnitZ]> {
295   let Latency = 3;
296   let NumMicroOps = 3;
299 def FalkorWr_1XYZ_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitSD, FalkorUnitST]> {
300   let Latency = 0;
301   let NumMicroOps = 3;
303 def FalkorWr_1XYZ_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitVSD, FalkorUnitST]> {
304   let Latency = 0;
305   let NumMicroOps = 3;
307 //===----------------------------------------------------------------------===//
308 // Define 4 micro-op types
310 def FalkorWr_2VX_2VY_14cyc  : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
311                                              FalkorUnitVX, FalkorUnitVY]> {
312   let Latency = 14;
313   let NumMicroOps = 4;
316 def FalkorWr_2VX_2VY_20cyc  : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
317                                              FalkorUnitVX, FalkorUnitVY]> {
318   let Latency = 20;
319   let NumMicroOps = 4;
322 def FalkorWr_2VX_2VY_21cyc  : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
323                                              FalkorUnitVX, FalkorUnitVY]> {
324   let Latency = 21;
325   let NumMicroOps = 4;
328 def FalkorWr_2VX_2VY_24cyc  : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
329                                              FalkorUnitVX, FalkorUnitVY]> {
330   let Latency = 24;
331   let NumMicroOps = 4;
334 def FalkorWr_4VXVY_2cyc    : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
335                                             FalkorUnitVXVY, FalkorUnitVXVY]> {
336   let Latency = 2;
337   let NumMicroOps = 4;
339 def FalkorWr_4VXVY_3cyc    : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
340                                             FalkorUnitVXVY, FalkorUnitVXVY]> {
341   let Latency = 3;
342   let NumMicroOps = 4;
344 def FalkorWr_4VXVY_4cyc    : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
345                                             FalkorUnitVXVY, FalkorUnitVXVY]> {
346   let Latency = 4;
347   let NumMicroOps = 4;
349 def FalkorWr_4VXVY_6cyc    : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
350                                             FalkorUnitVXVY, FalkorUnitVXVY]> {
351   let Latency = 6;
352   let NumMicroOps = 4;
355 def FalkorWr_4LD_3cyc      : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
356                                             FalkorUnitLD, FalkorUnitLD]> {
357   let Latency = 3;
358   let NumMicroOps = 4;
361 def FalkorWr_1LD_3VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY,
362                                             FalkorUnitVXVY, FalkorUnitVXVY]> {
363   let Latency = 4;
364   let NumMicroOps = 4;
367 def FalkorWr_2LD_2none_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
368   let Latency = 3;
369   let NumMicroOps = 4;
372 def FalkorWr_2LD_1ST_1SD_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitST,
373                                               FalkorUnitSD, FalkorUnitLD]> {
374   let Latency = 3;
375   let NumMicroOps = 4;
378 def FalkorWr_2VSD_2ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
379                                            FalkorUnitST, FalkorUnitVSD]> {
380   let Latency = 0;
381   let NumMicroOps = 4;
384 //===----------------------------------------------------------------------===//
385 // Define 5 micro-op types
387 def FalkorWr_1LD_4VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY,
388                                             FalkorUnitVXVY, FalkorUnitVXVY,
389                                             FalkorUnitVXVY]> {
390   let Latency = 4;
391   let NumMicroOps = 5;
393 def FalkorWr_2LD_2VXVY_1none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
394                                             FalkorUnitVXVY, FalkorUnitVXVY]> {
395   let Latency = 4;
396   let NumMicroOps = 5;
398 def FalkorWr_5VXVY_7cyc    : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
399                                             FalkorUnitVXVY, FalkorUnitVXVY,
400                                             FalkorUnitVXVY]> {
401   let Latency = 7;
402   let NumMicroOps = 5;
404 def FalkorWr_1XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST,
405                                                 FalkorUnitVSD, FalkorUnitST,
406                                                 FalkorUnitVSD]> {
407   let Latency = 0;
408   let NumMicroOps = 5;
410 def FalkorWr_1VXVY_2ST_2VSD_0cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
411                                                   FalkorUnitVSD, FalkorUnitST,
412                                                   FalkorUnitVSD]> {
413   let Latency = 0;
414   let NumMicroOps = 5;
416 //===----------------------------------------------------------------------===//
417 // Define 6 micro-op types
419 def FalkorWr_2LD_2VXVY_2none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
420                                             FalkorUnitVXVY, FalkorUnitVXVY]> {
421   let Latency = 4;
422   let NumMicroOps = 6;
425 def FalkorWr_2XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST,
426                                                 FalkorUnitVSD, FalkorUnitXYZ,
427                                                 FalkorUnitST, FalkorUnitVSD]> {
428   let Latency = 0;
429   let NumMicroOps = 6;
432 def FalkorWr_2VXVY_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
433                                                  FalkorUnitVSD, FalkorUnitVXVY,
434                                                  FalkorUnitST, FalkorUnitVSD]> {
435   let Latency = 0;
436   let NumMicroOps = 6;
439 def FalkorWr_3VSD_3ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
440                                            FalkorUnitST, FalkorUnitVSD,
441                                            FalkorUnitST, FalkorUnitVSD]> {
442   let Latency = 0;
443   let NumMicroOps = 6;
446 //===----------------------------------------------------------------------===//
447 // Define 8 micro-op types
449 def FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
450                                              FalkorUnitVXVY, FalkorUnitVXVY,
451                                              FalkorUnitLD, FalkorUnitLD,
452                                              FalkorUnitVXVY, FalkorUnitVXVY]> {
453   let Latency = 4;
454   let NumMicroOps = 8;
457 def FalkorWr_4VSD_4ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
458                                            FalkorUnitST, FalkorUnitVSD,
459                                            FalkorUnitST, FalkorUnitVSD,
460                                            FalkorUnitST, FalkorUnitVSD]> {
461   let Latency = 0;
462   let NumMicroOps = 8;
465 //===----------------------------------------------------------------------===//
466 // Define 9 micro-op types
468 def FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
469                                              FalkorUnitLD, FalkorUnitVXVY,
470                                              FalkorUnitVXVY, FalkorUnitLD,
471                                              FalkorUnitLD, FalkorUnitXYZ,
472                                              FalkorUnitVXVY, FalkorUnitVXVY]> {
473   let Latency = 4;
474   let NumMicroOps = 9;
477 def FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
478                                              FalkorUnitLD, FalkorUnitVXVY,
479                                              FalkorUnitVXVY, FalkorUnitXYZ,
480                                              FalkorUnitLD, FalkorUnitLD,
481                                              FalkorUnitVXVY, FalkorUnitVXVY]> {
482   let Latency = 4;
483   let NumMicroOps = 9;
486 //===----------------------------------------------------------------------===//
487 // Define 10 micro-op types
489 def FalkorWr_2VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
490                                                  FalkorUnitVSD, FalkorUnitVXVY,
491                                                  FalkorUnitST, FalkorUnitVSD,
492                                                  FalkorUnitST, FalkorUnitVSD,
493                                                  FalkorUnitST, FalkorUnitVSD]> {
494   let Latency = 0;
495   let NumMicroOps = 10;
498 //===----------------------------------------------------------------------===//
499 // Define 12 micro-op types
501 def FalkorWr_4VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
502                                                  FalkorUnitVSD, FalkorUnitVXVY,
503                                                  FalkorUnitST, FalkorUnitVSD,
504                                                  FalkorUnitVXVY, FalkorUnitST,
505                                                  FalkorUnitVSD, FalkorUnitVXVY,
506                                                  FalkorUnitST, FalkorUnitVSD]> {
507   let Latency = 0;
508   let NumMicroOps = 12;
511 // Forwarding logic is modeled for multiply add/accumulate and
512 // load/store base register increment.
513 // -----------------------------------------------------------------------------
514 def FalkorReadIMA32  : SchedReadAdvance<3, [FalkorWr_IMUL32_1X_2cyc]>;
515 def FalkorReadIMA64  : SchedReadAdvance<4, [FalkorWr_IMUL64_1X_4cyc, FalkorWr_IMUL64_1X_5cyc]>;
516 def FalkorReadVMA    : SchedReadAdvance<3, [FalkorWr_VMUL32_1VXVY_4cyc, FalkorWr_VMUL32_2VXVY_4cyc]>;
517 def FalkorReadFMA32  : SchedReadAdvance<1, [FalkorWr_FMUL32_1VXVY_5cyc, FalkorWr_FMUL32_2VXVY_5cyc]>;
518 def FalkorReadFMA64  : SchedReadAdvance<2, [FalkorWr_FMUL64_1VXVY_6cyc, FalkorWr_FMUL64_2VXVY_6cyc]>;
520 def FalkorReadIncLd  : SchedReadAdvance<1, [FalkorWr_LdInc_none_2cyc]>;
521 def FalkorReadIncSt  : SchedReadAdvance<1, [FalkorWr_StInc_none_2cyc]>;
523 // SchedPredicates and WriteVariants for Immediate Zero and LSLFast/ASRFast
524 // -----------------------------------------------------------------------------
525 def FalkorImmZPred    : SchedPredicate<[{MI->getOperand(1).isImm() &&
526                                          MI->getOperand(1).getImm() == 0}]>;
527 def FalkorOp1ZrReg    : SchedPredicate<[{MI->getOperand(1).getReg() == AArch64::WZR ||
529                                          MI->getOperand(1).getReg() == AArch64::XZR}]>;
530 def FalkorShiftExtFastPred : SchedPredicate<[{TII->isFalkorShiftExtFast(*MI)}]>;
532 def FalkorWr_FMOV  : SchedWriteVariant<[
533                        SchedVar<FalkorOp1ZrReg,  [FalkorWr_1none_0cyc]>,
534                        SchedVar<NoSchedPred,     [FalkorWr_1GTOV_1cyc]>]>;
536 def FalkorWr_MOVZ  : SchedWriteVariant<[
537                        SchedVar<FalkorImmZPred, [FalkorWr_1none_0cyc]>,
538                        SchedVar<NoSchedPred,    [FalkorWr_1XYZB_0cyc]>]>; // imm fwd
541 def FalkorWr_ADDSUBsx : SchedWriteVariant<[
542                           SchedVar<FalkorShiftExtFastPred, [FalkorWr_1XYZ_1cyc]>,
543                           SchedVar<NoSchedPred,            [FalkorWr_2XYZ_2cyc]>]>;
545 def FalkorWr_LDRro : SchedWriteVariant<[
546                        SchedVar<FalkorShiftExtFastPred, [FalkorWr_1LD_3cyc]>,
547                        SchedVar<NoSchedPred,            [FalkorWr_1XYZ_1LD_4cyc]>]>;
549 def FalkorWr_LDRSro : SchedWriteVariant<[
550                         SchedVar<FalkorShiftExtFastPred, [FalkorWr_1LD_4cyc]>,
551                         SchedVar<NoSchedPred,            [FalkorWr_1XYZ_1LD_5cyc]>]>;
553 def FalkorWr_ORRi : SchedWriteVariant<[
554                       SchedVar<FalkorOp1ZrReg, [FalkorWr_1XYZ_0cyc]>, // imm fwd
555                       SchedVar<NoSchedPred,    [FalkorWr_1XYZ_1cyc]>]>;
557 def FalkorWr_PRFMro : SchedWriteVariant<[
558                         SchedVar<FalkorShiftExtFastPred, [FalkorWr_1ST_3cyc]>,
559                         SchedVar<NoSchedPred,            [FalkorWr_1XYZ_1ST_4cyc]>]>;
561 def FalkorWr_STRVro : SchedWriteVariant<[
562                         SchedVar<FalkorShiftExtFastPred, [FalkorWr_1VSD_1ST_0cyc]>,
563                         SchedVar<NoSchedPred,            [FalkorWr_1XYZ_1VSD_1ST_0cyc]>]>;
565 def FalkorWr_STRQro : SchedWriteVariant<[
566                         SchedVar<FalkorShiftExtFastPred, [FalkorWr_1XYZ_2ST_2VSD_0cyc]>,
567                         SchedVar<NoSchedPred,            [FalkorWr_2XYZ_2ST_2VSD_0cyc]>]>;
569 def FalkorWr_STRro : SchedWriteVariant<[
570                        SchedVar<FalkorShiftExtFastPred, [FalkorWr_1SD_1ST_0cyc]>,
571                        SchedVar<NoSchedPred,            [FalkorWr_1XYZ_1SD_1ST_0cyc]>]>;
573 //===----------------------------------------------------------------------===//
574 // Specialize the coarse model by associating instruction groups with the
575 // subtarget-defined types. As the modeled is refined, this will override most
576 // of the earlier mappings.
578 // Miscellaneous
579 // -----------------------------------------------------------------------------
581 // FIXME: This could be better modeled by looking at the regclasses of the operands.
582 def : InstRW<[FalkorWr_1XYZ_1cyc], (instrs COPY)>;
584 // SIMD Floating-point Instructions
585 // -----------------------------------------------------------------------------
586 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(FABS|FNEG)v2f32$")>;
588 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT))(v2f32|v2i32p)$")>;
589 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^FAC(GE|GT)(32|64)$")>;
590 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^FCM(EQ|GE|GT)(32|64|v2f32|v2i32)$")>;
591 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^FCM(EQ|LE|GE|GT|LT)(v1i32|v1i64|v2i32)rz$")>;
592 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^FRINT(A|I|M|N|P|X|Z)v2f32$")>;
594 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^F(MAX|MIN)(NM)?Vv4i32v$")>;
595 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(FABD|FADD|FSUB)v2f32$")>;
596 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^FADDP(v2i32p|v2i64p|v2f32)$")>;
598 def : InstRW<[FalkorWr_1VXVY_4cyc],   (instregex "^FCVT(N|M|P|Z|A)(S|U)(v1i32|v1i64|v2f32)$")>;
599 def : InstRW<[FalkorWr_1VXVY_4cyc],   (instrs FCVTXNv1i64)>;
600 def : InstRW<[FalkorWr_1VXVY_4cyc],   (instregex "^FCVTZ(S|U)v2i32(_shift)?$")>;
602 def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
603                                       (instregex "^(FMUL|FMULX)(v2f32|(v1i32_indexed|v2i32_indexed))$")>;
604 def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
605                                       (instrs FMULX32)>;
607 def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
608                                       (instregex "^(FMUL|FMULX)v1i64_indexed$")>;
609 def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
610                                       (instrs FMULX64)>;
612 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^(FABS|FNEG)(v2f64|v4f32)$")>;
614 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f64|v4f32|v2i64p)$")>;
615 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32)rz$")>;
616 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instrs FCVTLv4i16, FCVTLv2i32)>;
617 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32)$")>;
619 def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instrs FDIVv2f32)>;
620 def : InstRW<[FalkorWr_1VX_1VY_12cyc],(instrs FSQRTv2f32)>;
622 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(FABD|FADD(P)?|FSUB)(v2f64|v4f32)$")>;
624 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^FCVT(N|M|P|Z|A)(S|U)(v2f64|v4f32)$")>;
625 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instrs FCVTLv8i16, FCVTLv4i32)>;
626 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^FCVTZ(S|U)(v2i64|v4i32)(_shift)?$")>;
628 def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc],
629                                       (instregex "^(FMUL|FMULX)(v2f64|v4f32|v4i32_indexed)$")>;
631 def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc],
632                                       (instregex "^(FMUL|FMULX)v2i64_indexed$")>;
634 def : InstRW<[FalkorWr_3VXVY_4cyc],   (instrs FCVTNv4i16, FCVTNv2i32, FCVTXNv2f32)>;
635 def : InstRW<[FalkorWr_3VXVY_5cyc],   (instrs FCVTNv8i16, FCVTNv4i32, FCVTXNv4f32)>;
637 def : InstRW<[FalkorWr_2VX_2VY_14cyc],(instrs FDIVv2f64)>;
638 def : InstRW<[FalkorWr_2VX_2VY_20cyc],(instrs FDIVv4f32)>;
639 def : InstRW<[FalkorWr_2VX_2VY_21cyc],(instrs FSQRTv2f64)>;
640 def : InstRW<[FalkorWr_2VX_2VY_24cyc],(instrs FSQRTv4f32)>;
642 def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
643                                       (instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>;
644 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
645                                       (instregex "^ML(A|S)(v16i8|v8i16|v4i32|v2i64)(_indexed)?$")>;
647 def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, FalkorReadFMA32],
648                                       (instregex "^FML(A|S)(v2f32|(v1i32_indexed|v2i32_indexed))$")>;
649 def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, FalkorReadFMA64],
650                                       (instregex "^FML(A|S)v1i64_indexed$")>;
651 def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc, FalkorReadFMA32],
652                                       (instregex "^FML(A|S)(v4f32|v4i32_indexed)$")>;
653 def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc, FalkorReadFMA64],
654                                       (instregex "^FML(A|S)(v2f64|v2i64_indexed)$")>;
656 // SIMD Integer Instructions
657 // -----------------------------------------------------------------------------
658 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^ADD(v1i64|v2i32|v4i16|v8i8)$")>;
659 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instrs ADDPv2i64p)>;
660 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(AND|ORR|ORN|BIC|EOR)v8i8$")>;
661 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(BIC|ORR)(v2i32|v4i16)$")>;
662 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^NEG(v1i64|v2i32|v4i16|v8i8)$")>;
663 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^SUB(v1i64|v2i32|v4i16|v8i8)$")>;
665 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v2i32|v4i16|v8i8)(_v.*)?$")>;
666 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^(S|U)SHLv1i64$")>;
667 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^(S|U)SHR(v2i32|v4i16|v8i8)_shift$")>;
668 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^(S|U)SHRd$")>;
669 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^((S|U)?(MAX|MIN)P?|ABS|ADDP|CM(EQ|GE|HS|GT|HI))(v1i64|v2i32|v4i16|v8i8)$")>;
670 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^CM(EQ|GE|HS|GT|HI)(v1i64|v2i32|v4i16|v8i8)$")>;
671 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^CM(EQ|LE|GE|GT|LT)(v1i64|v2i32|v4i16|v8i8)rz$")>;
672 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^CMTST(v1i64|v2i32|v4i16|v8i8)$")>;
673 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instrs PMULv8i8)>;
674 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^SHL(v2i32|v4i16|v8i8)_shift$")>;
675 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^SHLd$")>;
677 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^SQNEG(v2i32|v4i16|v8i8)$")>;
678 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)R?SRA(d|(v2i32|v4i16|v8i8)_shift)$")>;
679 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)(ABD|ADALP)(v8i8|v4i16|v2i32)(_v.*)?$")>;
680 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)ADDLVv4i16v$")>;
681 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
682 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)QSHLU?(d|s|h|b|(v8i8|v4i16|v2i32)_shift)$")>;
683 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)(QSHL|RSHL|QRSHL)(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
684 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(s|h|b)$")>;
685 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)QSUB(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
686 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)RHADD(v2i32|v4i16|v8i8)$")>;
687 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)RSHR(v2i32|v4i16|v8i8)_shift$")>;
688 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)RSHRd$")>;
689 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^R?SHRN(v2i32|v4i16|v8i8)_shift$")>;
690 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(SU|US)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
691 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)?(MAX|MIN)V(v4i16v|v4i32v)$")>;
692 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instrs ADDVv4i16v)>;
693 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^S(L|R)I(d|(v8i8|v4i16|v2i32)_shift)$")>;
694 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^SQABS(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
695 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^SQNEG(v1i8|v1i16|v1i32|v1i64)$")>;
697 def : InstRW<[FalkorWr_1VXVY_4cyc],   (instregex "^(S|U)ADDLVv8i8v$")>;
698 def : InstRW<[FalkorWr_1VXVY_4cyc],   (instregex "^(S|U)?(MAX|MIN)V(v8i8v|v8i16v)$")>;
699 def : InstRW<[FalkorWr_1VXVY_4cyc],   (instrs ADDVv8i8v)>;
700 def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc],
701                                       (instregex "^MUL(v2i32|v4i16|v8i8)(_indexed)?$")>;
702 def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc],
703                                       (instregex "^SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
704 def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc],
705                                       (instregex "^SQDMULL(i16|i32)$")>;
706 def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
707                                       (instregex "^SQRDML(A|S)H(i16|i32|v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
709 def : InstRW<[FalkorWr_1VXVY_5cyc],   (instregex "^(S|U)?(MAX|MIN)Vv16i8v$")>;
711 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instrs ADDVv4i32v)>;
713 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instrs ADDVv8i16v)>;
714 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^(ADD|SUB)HNv.*$")>;
715 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^(S|U)ABA(v2i32|v4i16|v8i8)$")>;
717 def : InstRW<[FalkorWr_2VXVY_5cyc],   (instrs ADDVv16i8v)>;
719 def : InstRW<[FalkorWr_2VXVY_6cyc],   (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32)_shift?$")>;
720 def : InstRW<[FalkorWr_2VXVY_6cyc],   (instregex "^R(ADD|SUB)HNv.*$")>;
722 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^ADD(v16i8|v8i16|v4i32|v2i64)$")>;
723 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instrs ADDPv2i64)>; // sz==11
724 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^(AND|ORR|ORN|BIC|EOR)v16i8$")>;
725 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^(BIC|ORR)(v8i16|v4i32)$")>;
726 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^(NEG|SUB)(v16i8|v8i16|v4i32|v2i64)$")>;
728 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(S|U)ADDLv.*$")>;
729 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v16i8|v2i64|v4i32|v8i16)(_v.*)?$")>;
730 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(S|U)SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>;
731 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(S|U)SHR(v16i8|v8i16|v4i32|v2i64)_shift$")>;
732 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(S|U)SUBLv.*$")>;
733 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^((S|U)?(MAX|MIN)P?|ABS)(v16i8|v2i64|v4i32|v8i16)$")>;
734 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^ADDP(v4i32|v8i16|v16i8)$")>; // sz!=11
735 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^CM(EQ|GE|HS|GT|HI)(v16i8|v2i64|v4i32|v8i16)$")>;
736 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^CM(EQ|LE|GE|GT|LT)(v16i8|v2i64|v4i32|v8i16)rz$")>;
737 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(CMTST|PMUL)(v16i8|v2i64|v4i32|v8i16)$")>;
738 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^PMULL(v8i8|v16i8)$")>;
739 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^SHL(v16i8|v8i16|v4i32|v2i64)_shift$")>;
740 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>;
742 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)R?SRA(v2i64|v4i32|v8i16|v16i8)_shift$")>;
743 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)ABD(v16i8|v8i16|v4i32|v2i64)$")>;
744 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)ABDLv.*$")>;
745 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)(ADALP|QADD)(v16i8|v8i16|v4i32|v2i64)(_v.*)?$")>;
746 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)QSHLU?(v2i64|v4i32|v8i16|v16i8)_shift$")>;
747 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)(QSHL|RSHL|QRSHL|QSUB|RHADD)(v16i8|v8i16|v4i32|v2i64)$")>;
748 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)RSHR(v2i64|v4i32|v8i16|v16i8)_shift$")>;
749 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^R?SHRN(v2i64|v4i32|v8i16|v16i8)_shift$")>;
750 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(SU|US)QADD(v16i8|v8i16|v4i32|v2i64)$")>;
751 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^PMULL(v1i64|v2i64)$")>;
752 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^S(L|R)I(v16i8|v8i16|v4i32|v2i64)_shift$")>;
753 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^SQ(ABS|NEG)(v16i8|v8i16|v4i32|v2i64)$")>;
755 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
756                                       (instregex "^(MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
757 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
758                                       (instregex "^SQDMULLv.*$")>;
759 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
760                                       (instregex "^SQRDML(A|S)H(v16i8|v8i16|v4i32)(_indexed)?$")>;
762 def : InstRW<[FalkorWr_3VXVY_3cyc],   (instregex "^(S|U)ADDLVv4i32v$")>;
764 def : InstRW<[FalkorWr_3VXVY_5cyc],   (instregex "^(S|U)ADDLVv8i16v$")>;
766 def : InstRW<[FalkorWr_3VXVY_6cyc],   (instregex "^(S|U)ADDLVv16i8v$")>;
768 def : InstRW<[FalkorWr_4VXVY_2cyc],   (instregex "^(S|U)(ADD|SUB)Wv.*$")>;
770 def : InstRW<[FalkorWr_4VXVY_3cyc],   (instregex "^(S|U)ABALv.*$")>;
772 def : InstRW<[FalkorWr_4VXVY_4cyc],   (instregex "^(S|U)ABA(v16i8|v8i16|v4i32)$")>;
774 def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
775                                       (instregex "^SQD(MLAL|MLSL)(i16|i32|v1i32_indexed|v1i64_indexed)$")>;
776 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
777                                       (instregex "^SQD(MLAL|MLSL)v[248].*$")>;
779 // SIMD Load Instructions
780 // -----------------------------------------------------------------------------
781 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],       (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>;
782 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
783                                                          (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>;
784 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],       (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
785 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
786                                                          (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
787 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],       (instrs LD2i64)>;
788 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
789                                                          (instrs LD2i64_POST)>;
791 def : InstRW<[FalkorWr_1LD_1VXVY_4cyc, FalkorReadIncLd], (instregex "^LD1i(8|16|32)$")>;
792 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1VXVY_4cyc, FalkorReadIncLd],
793                                                          (instregex "^LD1i(8|16|32)_POST$")>;
795 def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
796 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd],
797                                                          (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
798 def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD2Twov(8b|4h|2s)$")>;
799 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd],
800                                                          (instregex "^LD2Twov(8b|4h|2s)_POST$")>;
801 def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD2Rv(8b|4h|2s|1d)$")>;
802 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd],
803                                                          (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>;
805 def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd],       (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
806 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
807                                                          (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
808 def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd],       (instregex "^LD2Twov(16b|8h|4s|2d)$")>;
809 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
810                                                          (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>;
811 def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd],       (instregex "^LD2Rv(16b|8h|4s|2d)$")>;
812 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
813                                                          (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>;
814 def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd],       (instrs LD3i64)>;
815 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
816                                                          (instrs LD3i64_POST)>;
817 def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd],       (instrs LD4i64)>;
818 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
819                                                          (instrs LD4i64_POST)>;
821 def : InstRW<[FalkorWr_1LD_2VXVY_4cyc, FalkorReadIncLd], (instregex "^LD2i(8|16|32)$")>;
822 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_2VXVY_4cyc, FalkorReadIncLd],
823                                                          (instregex "^LD2i(8|16|32)_POST$")>;
825 def : InstRW<[FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
826 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_1none_3cyc, FalkorReadIncLd],
827                                                          (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
828 def : InstRW<[FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD3Rv(8b|4h|2s|1d)$")>;
829 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_1none_3cyc, FalkorReadIncLd],
830                                                          (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>;
832 def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd],       (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
833 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd],
834                                                          (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
835 def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd],       (instrs LD3Threev2d)>;
836 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd],
837                                                          (instrs LD3Threev2d_POST)>;
838 def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd],       (instregex "^LD3Rv(16b|8h|4s|2d)$")>;
839 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd],
840                                                          (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>;
842 def : InstRW<[FalkorWr_1LD_3VXVY_4cyc, FalkorReadIncLd], (instregex "^LD3i(8|16|32)$")>;
843 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3VXVY_4cyc, FalkorReadIncLd],
844                                                          (instregex "^LD3i(8|16|32)_POST$")>;
846 def : InstRW<[FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
847 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2none_3cyc, FalkorReadIncLd],
848                                                          (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
849 def : InstRW<[FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], (instregex "^LD4Rv(8b|4h|2s|1d)$")>;
850 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2none_3cyc, FalkorReadIncLd],
851                                                          (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>;
853 def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd],       (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
854 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd],
855                                                          (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
856 def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd],       (instrs LD4Fourv2d)>;
857 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd],
858                                                          (instrs LD4Fourv2d_POST)>;
859 def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd],       (instregex "^LD4Rv(16b|8h|4s|2d)$")>;
860 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd],
861                                                          (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>;
863 def : InstRW<[FalkorWr_1LD_4VXVY_4cyc, FalkorReadIncLd], (instregex "^LD4i(8|16|32)$")>;
864 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_4VXVY_4cyc, FalkorReadIncLd],
865                                                          (instregex "^LD4i(8|16|32)_POST$")>;
867 def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc, FalkorReadIncLd],
868                                                          (instregex "^LD3Threev(8b|4h|2s)$")>;
869 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_1none_4cyc, FalkorReadIncLd],
870                                                          (instregex "^LD3Threev(8b|4h|2s)_POST$")>;
872 def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc, FalkorReadIncLd],
873                                                          (instregex "^LD4Fourv(8b|4h|2s)$")>;
874 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_2none_4cyc, FalkorReadIncLd],
875                                                          (instregex "^LD4Fourv(8b|4h|2s)_POST$")>;
877 def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc, FalkorReadIncLd],
878                                                          (instregex "^LD3Threev(16b|8h|4s)$")>;
880 def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc, FalkorReadIncLd],
881                                                          (instregex "^LD4Fourv(16b|8h|4s)$")>;
883 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc, FalkorReadIncLd],
884                                                          (instregex "^LD3Threev(16b|8h|4s)_POST$")>;
886 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc, FalkorReadIncLd],
887                                                          (instregex "^LD4Fourv(16b|8h|4s)_POST$")>;
889 // Arithmetic and Logical Instructions
890 // -----------------------------------------------------------------------------
891 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^(CCMN|CCMP)(W|X)(r|i)$")>;
892 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^ADC(S)?(W|X)r$")>;
893 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^ADD(S)?(W|X)r(r|i)$")>;
894 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^(CSEL|CSINC|CSINV|CSNEG)(W|X)r$")>;
895 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^AND(S)?(W|X)r(i|r|s)$")>;
896 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^BIC(S)?(W|X)r(r|s)$")>;
897 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^EON(W|X)r(r|s)$")>;
898 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^EOR(W|X)r(i|r|s)$")>;
899 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^ORN(W|X)r(r|s)$")>;
900 def : InstRW<[FalkorWr_ORRi],         (instregex "^ORR(W|X)ri$")>;
901 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^ORR(W|X)r(r|s)$")>;
902 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^SBC(S)?(W|X)r$")>;
903 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^SUB(S)?(W|X)r(r|i)$")>;
904 def : InstRW<[FalkorWr_ADDSUBsx],     (instregex "^ADD(S)?(W|X)r(s|x|x64)$")>;
905 def : InstRW<[FalkorWr_ADDSUBsx],     (instregex "^SUB(S)?(W|X)r(s|x|x64)$")>;
907 // SIMD Miscellaneous Instructions
908 // -----------------------------------------------------------------------------
909 def : InstRW<[FalkorWr_1GTOV_1cyc],   (instregex "^DUP(v8i8|v4i16|v2i32)(gpr|lane)$")>;
910 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^DUP(v16i8|v8i16)(gpr|lane)$")>;
911 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^DUP(i8|i16|i32|i64)$")>;
912 def : InstRW<[FalkorWr_1GTOV_1cyc],   (instregex "^INSv(i8|i16)(gpr|lane)$")>;
913 def : InstRW<[FalkorWr_1VTOG_1cyc],   (instregex "^(S|U)MOVv.*$")>;
914 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(BIF|BIT|BSL|BSP)v8i8$")>;
915 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instrs EXTv8i8)>;
916 def : InstRW<[FalkorWr_1VXVY_0cyc],   (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>; // imm fwd
917 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instrs TBLv8i8One)>;
918 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instrs NOTv8i8)>;
919 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^REV(16|32|64)v.*$")>;
920 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(TRN1|TRN2|ZIP1|UZP1|UZP2|ZIP2|XTN)(v2i32|v2i64|v4i16|v4i32|v8i8|v8i16|v16i8)$")>;
922 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^(CLS|CLZ|CNT|RBIT)(v2i32|v4i16|v8i8)$")>;
924 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "(S|U)QXTU?Nv.*$")>;
925 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instrs FRECPEv1i32, FRECPEv1i64, FRSQRTEv1i32, FRSQRTEv1i64, FRECPEv2f32, FRSQRTEv2f32)>;
926 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instrs FRECPXv1i32, FRECPXv1i64)>;
927 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instrs URECPEv2i32, URSQRTEv2i32)>;
929 def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
930                                       (instrs FRECPS32, FRSQRTS32, FRECPSv2f32, FRSQRTSv2f32)>;
932 def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
933                                       (instrs FRECPS64, FRSQRTS64)>;
935 def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc],
936                                       (instregex "^INSv(i32|i64)(gpr|lane)$")>;
937 def : InstRW<[FalkorWr_2GTOV_1cyc],   (instregex "^DUP(v4i32|v2i64)(gpr|lane)$")>;
938 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^(BIF|BIT|BSL|BSP)v16i8$")>;
939 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instrs EXTv16i8)>;
940 def : InstRW<[FalkorWr_2VXVY_0cyc],   (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>; // imm fwd
941 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instrs NOTv16i8)>;
942 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instrs TBLv16i8One)>;
944 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(CLS|CLZ|CNT|RBIT)(v4i32|v8i16|v16i8)$")>;
945 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instrs FRECPEv2f64, FRECPEv4f32, FRSQRTEv2f64, FRSQRTEv4f32)>;
946 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instrs URECPEv4i32, URSQRTEv4i32)>;
948 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instrs TBLv8i8Two)>;
949 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^TBX(v8|v16)i8One$")>;
951 def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc],
952                                       (instrs FRECPSv4f32, FRSQRTSv4f32)>;
954 def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc],
955                                       (instrs FRECPSv2f64, FRSQRTSv2f64)>;
957 def : InstRW<[FalkorWr_3VXVY_5cyc],   (instregex "^TBL(v8i8Three|v16i8Two)$")>;
958 def : InstRW<[FalkorWr_3VXVY_5cyc],   (instregex "^TBX(v8i8Two|v16i8Two)$")>;
960 def : InstRW<[FalkorWr_4VXVY_6cyc],   (instregex "^TBL(v8i8Four|v16i8Three)$")>;
961 def : InstRW<[FalkorWr_4VXVY_6cyc],   (instregex "^TBX(v8i8Three|v16i8Three)$")>;
963 def : InstRW<[FalkorWr_5VXVY_7cyc],   (instrs TBLv16i8Four)>;
964 def : InstRW<[FalkorWr_5VXVY_7cyc],   (instregex "^TBX(v8i8Four|v16i8Four)$")>;
966 // SIMD Store Instructions
967 // -----------------------------------------------------------------------------
969 def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
970                                        (instregex "^STR(Q|D|S|H|B)ui$")>;
971 def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
972                                        (instregex "^STR(Q|D|S|H|B)(post|pre)$")>;
973 def : InstRW<[FalkorWr_STRVro, ReadDefault, FalkorReadIncSt],
974                                        (instregex "^STR(D|S|H|B)ro(W|X)$")>;
975 def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
976                                        (instregex "^STPQi$")>;
977 def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
978                                        (instregex "^STPQ(post|pre)$")>;
979 def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
980                                        (instregex "^STP(D|S)(i)$")>;
981 def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
982                                        (instregex "^STP(D|S)(post|pre)$")>;
983 def : InstRW<[FalkorWr_STRQro, ReadDefault, FalkorReadIncSt],
984                                        (instregex "^STRQro(W|X)$")>;
985 def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
986                                        (instregex "^STUR(Q|D|S|B|H)i$")>;
987 def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
988                                        (instrs STNPDi, STNPSi)>;
989 def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
990                                        (instrs STNPQi)>;
992 def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
993                                        (instregex "^ST1(One(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64)|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>;
994 def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
995                                        (instregex "^ST1(One(v8b|v4h|v2s|v1d)_POST|(i8|i16|i32|i64)_POST)$")>;
996 def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
997                                        (instregex "^ST2(Two(v8b|v4h|v2s)|(i8|i16|i32|i64))$")>;
998 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
999                                        (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>;
1000 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
1001                                        (instregex "^ST2(Two(v8b|v4h|v2s)|(i8|i16|i32|i64))_POST$")>;
1003 def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
1004                                        (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>;
1005 def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
1006                                        (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>;
1007 def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
1008                                        (instregex "^ST3(i8|i16|i32|i64)$")>;
1009 def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
1010                                        (instregex "^ST4(i8|i16|i32|i64)$")>;
1011 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1012 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
1013                                        (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>;
1014 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1015 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
1016                                        (instregex "^ST2Two(v16b|v8h|v4s|v2d)_POST$")>;
1017 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1018 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
1019                                        (instregex "^ST3(i8|i16|i32|i64)_POST$")>;
1020 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1021 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
1022                                        (instregex "^ST4(i8|i16|i32|i64)_POST$")>;
1024 def : InstRW<[FalkorWr_1VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
1025                                        (instregex "^ST3Three(v8b|v4h|v2s)$")>;
1026 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1027 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
1028                                        (instregex "^ST3Three(v8b|v4h|v2s)_POST$")>;
1030 def : InstRW<[FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
1031                                        (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>;
1032 def : InstRW<[FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
1033                                        (instrs ST3Threev2d)>;
1034 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1035 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
1036                                        (instregex "^ST1Three(v16b|v8h|v4s|v2d)_POST$")>;
1037 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1038 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
1039                                        (instrs ST3Threev2d_POST)>;
1041 def : InstRW<[FalkorWr_2VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
1042                                        (instregex "^ST4Four(v8b|v4h|v2s)$")>;
1043 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1044 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
1045                                        (instregex "^ST4Four(v8b|v4h|v2s)_POST$")>;
1047 def : InstRW<[FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
1048                                        (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>;
1049 def : InstRW<[FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
1050                                        (instrs ST4Fourv2d)>;
1051 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1052 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
1053                                        (instregex "^ST1Four(v16b|v8h|v4s|v2d)_POST$")>;
1054 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1055 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
1056                                        (instrs ST4Fourv2d_POST)>;
1058 def : InstRW<[FalkorWr_2VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
1059                                        (instregex "^ST3Three(v16b|v8h|v4s)$")>;
1060 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1061 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
1062                                        (instregex "^ST3Three(v16b|v8h|v4s)_POST$")>;
1064 def : InstRW<[FalkorWr_4VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
1065                                        (instregex "^ST4Four(v16b|v8h|v4s)$")>;
1066 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1067 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
1068                                        (instregex "^ST4Four(v16b|v8h|v4s)_POST$")>;
1070 // Branch Instructions
1071 // -----------------------------------------------------------------------------
1072 def : InstRW<[FalkorWr_1none_0cyc],   (instrs B, TCRETURNdi)>;
1073 def : InstRW<[FalkorWr_1Z_0cyc],      (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ)(W|X))$")>;
1074 def : InstRW<[FalkorWr_1Z_0cyc],      (instrs RET_ReallyLR, TCRETURNri)>;
1075 def : InstRW<[FalkorWr_1ZB_0cyc],     (instrs Bcc)>;
1076 def : InstRW<[FalkorWr_1XYZB_0cyc],   (instrs BL)>;
1077 def : InstRW<[FalkorWr_1Z_1XY_0cyc],  (instrs BLR)>;
1079 // Cryptography Extensions
1080 // -----------------------------------------------------------------------------
1081 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instrs SHA1Hrr)>;
1082 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instrs AESIMCrr, AESMCrr)>;
1083 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instrs AESDrr, AESErr)>;
1084 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instrs SHA1SU0rrr, SHA1SU1rr, SHA256SU0rr)>;
1085 def : InstRW<[FalkorWr_1VX_1VY_4cyc], (instregex "^SHA1(C|M|P)rrr$")>;
1086 def : InstRW<[FalkorWr_1VX_1VY_5cyc], (instrs SHA256H2rrr, SHA256Hrrr)>;
1087 def : InstRW<[FalkorWr_4VXVY_3cyc],   (instrs SHA256SU1rrr)>;
1089 // FP Load Instructions
1090 // -----------------------------------------------------------------------------
1091 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
1092                                       (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>;
1093 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
1094                                       (instregex "^LDR(Q|D|S|H|B)(post|pre)$")>;
1095 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
1096                                       (instregex "^LDUR(Q|D|S|H|B)i$")>;
1097 def : InstRW<[FalkorWr_LDRro, FalkorReadIncLd],
1098                                       (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
1099 def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1100                                       (instrs LDNPQi)>;
1101 def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1102                                       (instrs LDPQi)>;
1103 def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1104                                       (instregex "LDNP(D|S)i$")>;
1105 def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1106                                       (instregex "LDP(D|S)i$")>;
1107 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1108                                       (instregex "LDP(D|S)(pre|post)$")>;
1109 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1110                                       (instregex "^LDPQ(pre|post)$")>;
1112 // FP Data Processing Instructions
1113 // -----------------------------------------------------------------------------
1114 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^FCCMP(E)?(S|D)rr$")>;
1115 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^FCMP(E)?(S|D)r(r|i)$")>;
1116 def : InstRW<[FalkorWr_1VTOG_1cyc],   (instregex "^FCVT(A|M|N|P|Z)(S|U)U(W|X)(S|D)r$")>;
1117 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(FABS|FNEG)(S|D)r$")>;
1118 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^FCSEL(S|D)rrr$")>;
1120 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^F(MAX|MIN)(NM)?(S|D)rr$")>;
1121 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^F(MAX|MIN)(NM)?Pv2i(32|64)p$")>;
1122 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instrs FCVTSHr, FCVTDHr)>;
1123 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^FRINT(A|I|M|N|P|X|Z)(S|D)r$")>;
1125 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^FABD(32|64)$")>;
1126 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(FADD|FSUB)(S|D)rr$")>;
1127 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instrs FCVTHSr, FCVTHDr)>;
1129 def : InstRW<[FalkorWr_1VXVY_4cyc],   (instrs FCVTSDr, FCVTDSr)>;
1131 def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
1132                                       (instregex "^F(N)?MULSrr$")>;
1134 def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
1135                                       (instregex "^F(N)?MULDrr$")>;
1137 def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instrs FDIVSrr)>;
1138 def : InstRW<[FalkorWr_1VX_1VY_14cyc],(instrs FDIVDrr)>;
1139 def : InstRW<[FalkorWr_1VX_1VY_12cyc],(instrs FSQRTSr)>;
1140 def : InstRW<[FalkorWr_1VX_1VY_21cyc],(instrs FSQRTDr)>;
1142 def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, ReadDefault, ReadDefault, FalkorReadFMA32],
1143                                       (instregex "^F(N)?M(ADD|SUB)Srrr$")>;
1144 def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, ReadDefault, ReadDefault, FalkorReadFMA64],
1145                                       (instregex "^F(N)?M(ADD|SUB)Drrr$")>;
1147 // FP Miscellaneous Instructions
1148 // -----------------------------------------------------------------------------
1149 def : InstRW<[FalkorWr_FMOV],         (instregex "^FMOV(WS|XD|XDHigh)r$")>;
1150 def : InstRW<[FalkorWr_1GTOV_0cyc],   (instregex "^FMOV(S|D)i$")>; // imm fwd
1151 def : InstRW<[FalkorWr_1VTOG_1cyc],   (instregex "^FCVTZ(S|U)S(W|X)(D|S)ri$")>;
1152 def : InstRW<[FalkorWr_1VTOG_1cyc],   (instregex "^FCVTZ(S|U)(d|s)$")>;
1153 def : InstRW<[FalkorWr_1VTOG_1cyc],   (instregex "^FMOV(SW|DX|DXHigh)r$")>;
1154 def : InstRW<[FalkorWr_1VXVY_0cyc],   (instregex "^FMOV(Sr|Dr|v.*_ns)$")>; // imm fwd
1155 // FIXME: We are currently generating movi v0.2d, #0 for these, which is worse than fmov wzr/xzr
1156 def : InstRW<[FalkorWr_2VXVY_0cyc],   (instrs FMOVD0, FMOVS0)>; // imm fwd
1158 def : InstRW<[FalkorWr_1GTOV_4cyc],   (instregex "^(S|U)CVTF(S|U)(W|X)(D|S)ri$")>;
1159 def : InstRW<[FalkorWr_1VXVY_4cyc],   (instregex "^(S|U)CVTF(v1i32|v2i32|v1i64|v2f32|d|s)(_shift)?")>;
1161 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^(S|U)CVTF(v2i64|v4i32|v2f64|v4f32)(_shift)?")>;
1163 // Load Instructions
1164 // -----------------------------------------------------------------------------
1165 def : InstRW<[FalkorWr_1ST_0cyc],     (instrs PRFMui, PRFMl)>;
1166 def : InstRW<[FalkorWr_1ST_0cyc],     (instrs PRFUMi)>;
1167 def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1168                                       (instregex "^LDNP(W|X)i$")>;
1169 def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1170                                       (instregex "^LDP(W|X)i$")>;
1171 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1172                                       (instregex "^LDP(W|X)(post|pre)$")>;
1173 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
1174                                       (instregex "^LDR(BB|HH|W|X)ui$")>;
1175 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
1176                                       (instregex "^LDR(BB|HH|W|X)(post|pre)$")>;
1177 def : InstRW<[FalkorWr_LDRro, FalkorReadIncLd],
1178                                       (instregex "^LDR(BB|HH|W|X)ro(W|X)$")>;
1179 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
1180                                       (instregex "^LDR(W|X)l$")>;
1181 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
1182                                       (instregex "^LDTR(B|H|W|X)i$")>;
1183 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
1184                                       (instregex "^LDUR(BB|HH|W|X)i$")>;
1185 def : InstRW<[FalkorWr_PRFMro],       (instregex "^PRFMro(W|X)$")>;
1186 def : InstRW<[FalkorWr_1LD_4cyc, FalkorWr_none_4cyc, FalkorReadIncLd],
1187                                       (instrs LDPSWi)>;
1188 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_4cyc, FalkorWr_none_4cyc, FalkorReadIncLd],
1189                                       (instregex "^LDPSW(post|pre)$")>;
1190 def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
1191                                       (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>;
1192 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_4cyc, FalkorReadIncLd],
1193                                       (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
1194 def : InstRW<[FalkorWr_LDRSro, FalkorReadIncLd],
1195                                       (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>;
1196 def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
1197                                       (instrs LDRSWl)>;
1198 def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
1199                                       (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>;
1200 def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
1201                                       (instregex "^LDURS(BW|BX|HW|HX|W)i$")>;
1203 // Miscellaneous Data-Processing Instructions
1204 // -----------------------------------------------------------------------------
1205 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^(S|U)?BFM(W|X)ri$")>;
1206 def : InstRW<[FalkorWr_1X_2cyc],      (instregex "^CRC32.*$")>;
1207 def : InstRW<[FalkorWr_1XYZ_2cyc],    (instregex "^(CLS|CLZ|RBIT|REV|REV16|REV32)(W|X)r$")>;
1208 def : InstRW<[FalkorWr_2XYZ_2cyc],    (instregex "^EXTR(W|X)rri$")>;
1210 // Divide and Multiply Instructions
1211 // -----------------------------------------------------------------------------
1212 def : InstRW<[FalkorWr_IMUL64_1X_4cyc, ReadDefault, ReadDefault, FalkorReadIMA64],
1213                                         (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
1214 def : InstRW<[FalkorWr_IMUL32_1X_2cyc, ReadDefault, ReadDefault, FalkorReadIMA32],
1215                                         (instregex "^M(ADD|SUB)Wrrr$")>;
1217 def : InstRW<[FalkorWr_IMUL64_1X_5cyc], (instregex "^(S|U)MULHrr$")>;
1218 def : InstRW<[FalkorWr_IMUL64_1X_5cyc, ReadDefault, ReadDefault, FalkorReadIMA64],
1219                                         (instregex "^M(ADD|SUB)Xrrr$")>;
1221 def : InstRW<[FalkorWr_1X_1Z_8cyc],     (instregex "^(S|U)DIVWr$")>;
1222 def : InstRW<[FalkorWr_1X_1Z_11cyc],    (instregex "^(S|U)DIVXr$")>;
1224 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
1225                                         (instregex "^(S|U)MULLv.*$")>;
1226 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
1227                                         (instregex "^(S|U)(MLAL|MLSL)v.*$")>;
1229 // Move and Shift Instructions
1230 // -----------------------------------------------------------------------------
1231 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^(LSLV|LSRV|ASRV|RORV)(W|X)r$")>;
1232 def : InstRW<[FalkorWr_1XYZ_0cyc],    (instregex "^MOVK(W|X)i$")>; // imm fwd
1233 def : InstRW<[FalkorWr_1XYZB_0cyc],   (instregex "^ADRP?$")>; // imm fwd
1234 def : InstRW<[FalkorWr_1XYZB_0cyc],   (instregex "^MOVN(W|X)i$")>; // imm fwd
1235 def : InstRW<[FalkorWr_MOVZ],         (instregex "^MOVZ(W|X)i$")>;
1236 def : InstRW<[FalkorWr_1XYZ_0cyc],    (instrs MOVi32imm, MOVi64imm)>; // imm fwd (approximation)
1237 def : InstRW<[WriteSequence<[FalkorWr_1XYZ_1cyc, FalkorWr_1XYZ_1cyc]>],
1238                                       (instrs MOVaddr, MOVaddrBA, MOVaddrCP, MOVaddrEXT, MOVaddrJT, MOVaddrTLS)>;
1239 def : InstRW<[WriteSequence<[FalkorWr_1LD_3cyc, FalkorWr_1XYZ_1cyc]>],
1240                                       (instrs LOADgot)>;
1242 // Other Instructions
1243 // -----------------------------------------------------------------------------
1244 def : InstRW<[FalkorWr_1LD_0cyc],     (instrs CLREX, DMB, DSB)>;
1245 def : InstRW<[FalkorWr_1none_0cyc],   (instrs BRK, DCPS1, DCPS2, DCPS3, HINT, HLT, HVC, ISB, SMC, SVC)>;
1246 def : InstRW<[FalkorWr_1ST_0cyc],     (instrs SYSxt, SYSLxt)>;
1247 def : InstRW<[FalkorWr_1Z_0cyc],      (instrs MSRpstateImm1, MSRpstateImm4)>;
1249 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
1250                                       (instregex "^(LDAR(B|H|W|X)|LDAXR(B|H|W|X)|LDXR(B|H|W|X))$")>;
1251 def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1252                                       (instregex "^(LDAXP(W|X)|LDXP(W|X))$")>;
1253 def : InstRW<[FalkorWr_1LD_3cyc],     (instrs MRS, MOVbaseTLS)>;
1255 def : InstRW<[FalkorWr_1LD_1Z_3cyc],  (instrs DRPS)>;
1257 def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs MSR)>;
1258 def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
1259                                       (instrs STNPWi, STNPXi)>;
1260 def : InstRW<[FalkorWr_2LD_1Z_3cyc],  (instrs ERET)>;
1262 def : InstRW<[FalkorWr_1ST_1SD_1LD_3cyc], (instregex "^LDC.*$")>;
1263 def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, FalkorReadIncSt],
1264                                       (instregex "^STLR(B|H|W|X)$")>;
1265 def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
1266                                       (instregex "^STXP(W|X)$")>;
1267 def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
1268                                       (instregex "^STXR(B|H|W|X)$")>;
1270 def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc, ReadDefault, ReadDefault, ReadDefault, FalkorReadIncSt],
1271                                       (instregex "^STLXP(W|X)$")>;
1272 def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
1273                                       (instregex "^STLXR(B|H|W|X)$")>;
1275 // Store Instructions
1276 // -----------------------------------------------------------------------------
1277 def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
1278                                           (instregex "^STP(W|X)i$")>;
1279 def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
1280                                           (instregex "^STP(W|X)(post|pre)$")>;
1281 def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
1282                                           (instregex "^STR(BB|HH|W|X)ui$")>;
1283 def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
1284                                           (instregex "^STR(BB|HH|W|X)(post|pre)$")>;
1285 def : InstRW<[FalkorWr_STRro, ReadDefault, FalkorReadIncSt],
1286                                           (instregex "^STR(BB|HH|W|X)ro(W|X)$")>;
1287 def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
1288                                           (instregex "^STTR(B|H|W|X)i$")>;
1289 def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
1290                                           (instregex "^STUR(BB|HH|W|X)i$")>;