[SampleProfileLoader] Fix integer overflow in generateMDProfMetadata (#90217)
[llvm-project.git] / llvm / lib / Target / AArch64 / AArch64SchedA64FX.td
blobd6fe84a2c9c9b4c031aaddf2ddcb43daa6f6db13
1 //=- AArch64SchedA64FX.td - Fujitsu A64FX Scheduling Defs -*- tablegen -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the scheduling model for the Fujitsu A64FX processors.
11 //===----------------------------------------------------------------------===//
13 def A64FXModel : SchedMachineModel {
14   let IssueWidth            =   6; // 6 micro-ops dispatched at a time.
15   let MicroOpBufferSize     = 180; // 180 entries in micro-op re-order buffer.
16   let LoadLatency           =   5; // Optimistic load latency.
17   let MispredictPenalty     =  12; // Extra cycles for mispredicted branch.
18   // Determined via a mix of micro-arch details and experimentation.
19   let LoopMicroOpBufferSize = 128;
20   let PostRAScheduler       =   1; // Using PostRA sched.
21   let CompleteModel         =   1;
23   list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F, SVEUnsupported.F,
24                                                     [HasMTE, HasMatMulInt8, HasBF16,
25                                                     HasPAuth, HasPAuthLR, HasCPA,
26                                                     HasCSSC]);
27   let FullInstRWOverlapCheck = 0;
30 let SchedModel = A64FXModel in {
32 // Define the issue ports.
34 // A64FXIP*
36 // Port 0
37 def A64FXIPFLA : ProcResource<1>;
39 // Port 1
40 def A64FXIPPR : ProcResource<1>;
42 // Port 2
43 def A64FXIPEXA : ProcResource<1>;
45 // Port 3
46 def A64FXIPFLB : ProcResource<1>;
48 // Port 4
49 def A64FXIPEXB : ProcResource<1>;
51 // Port 5
52 def A64FXIPEAGA : ProcResource<1>;
54 // Port 6
55 def A64FXIPEAGB : ProcResource<1>;
57 // Port 7
58 def A64FXIPBR : ProcResource<1>;
60 // Define groups for the functional units on each issue port.  Each group
61 // created will be used by a WriteRes later on.
63 def A64FXGI7 : ProcResGroup<[A64FXIPBR]>;
65 def A64FXGI0 : ProcResGroup<[A64FXIPFLA]>;
67 def A64FXGI1 : ProcResGroup<[A64FXIPPR]>;
69 def A64FXGI2 : ProcResGroup<[A64FXIPEXA]>;
71 def A64FXGI3 : ProcResGroup<[A64FXIPFLB]>;
73 def A64FXGI4 : ProcResGroup<[A64FXIPEXB]>;
75 def A64FXGI5 : ProcResGroup<[A64FXIPEAGA]>;
77 def A64FXGI6 : ProcResGroup<[A64FXIPEAGB]>;
79 def A64FXGI03 : ProcResGroup<[A64FXIPFLA, A64FXIPFLB]>;
81 def A64FXGI01 : ProcResGroup<[A64FXIPFLA, A64FXIPPR]>;
83 def A64FXGI24 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB]>;
85 def A64FXGI56 : ProcResGroup<[A64FXIPEAGA, A64FXIPEAGB]>;
87 def A64FXGI056 : ProcResGroup<[A64FXIPFLA, A64FXIPEAGA, A64FXIPEAGB]>;
89 def A64FXGI2456 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB]>;
91 def A64FXAny : ProcResGroup<[A64FXIPFLA, A64FXIPPR, A64FXIPEXA, A64FXIPFLB,
92                              A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB, A64FXIPBR]>;
94 def A64FXWrite_1Cyc_GI7 : SchedWriteRes<[A64FXGI7]> {
95   let Latency = 1;
98 def A64FXWrite_2Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
99   let Latency = 2;
102 def A64FXWrite_4Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
103   let Latency = 4;
106 def A64FXWrite_6Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
107   let Latency = 6;
110 def A64FXWrite_8Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
111   let Latency = 8;
114 def A64FXWrite_9Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
115   let Latency = 9;
118 def A64FXWrite_3Cyc_GI1 : SchedWriteRes<[A64FXGI1]> {
119   let Latency = 3;
122 def A64FXWrite_5Cyc_GI2 : SchedWriteRes<[A64FXGI2]> {
123   let Latency = 5;
126 def A64FXWrite_4Cyc_GI3 : SchedWriteRes<[A64FXGI3]> {
127   let Latency = 4;
130 def A64FXWrite_6Cyc_GI3 : SchedWriteRes<[A64FXGI3]> {
131   let Latency = 6;
134 def A64FXWrite_4Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
135   let Latency = 4;
138 def A64FXWrite_8Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
139   let Latency = 8;
142 def A64FXWrite_9Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
143   let Latency = 9;
146 def A64FXWrite_10Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
147   let Latency = 10;
150 def A64FXWrite_12Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
151   let Latency = 12;
154 def A64FXWrite_20Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
155   let Latency = 20;
158 def A64FXWrite_5Cyc_GI5 : SchedWriteRes<[A64FXGI5]> {
159   let Latency = 5;
162 def A64FXWrite_11Cyc_GI5 : SchedWriteRes<[A64FXGI5]> {
163   let Latency = 11;
166 def A64FXWrite_5Cyc_GI6 : SchedWriteRes<[A64FXGI6]> {
167   let Latency = 5;
170 def A64FXWrite_1Cyc_GI24 : SchedWriteRes<[A64FXGI24]> {
171   let Latency = 1;
174 def A64FXWrite_2Cyc_GI24 : SchedWriteRes<[A64FXGI24]> {
175   let Latency = 2;
178 def A64FXWrite_4Cyc_NGI24 : SchedWriteRes<[A64FXGI24]> {
179   let Latency = 4;
180   let NumMicroOps = 4;
183 def A64FXWrite_1Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
184   let Latency = 1;
187 def A64FXWrite_5Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
188   let Latency = 5;
191 def A64FXWrite_8Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
192   let Latency = 8;
195 def A64FXWrite_11Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
196   let Latency = 11;
199 def A64FXWrite_LDNP: SchedWriteRes<[A64FXGI56]> {
200   let Latency = 5;
201   let NumMicroOps = 2;
204 def A64FXWrite_LDP01: SchedWriteRes<[A64FXGI2456]> {
205   let Latency = 5;
206   let NumMicroOps = 3;
209 def A64FXWrite_LDR01: SchedWriteRes<[A64FXGI2456]> {
210   let Latency = 5;
211   let NumMicroOps = 2;
214 def A64FXWrite_LD102: SchedWriteRes<[A64FXGI56]> {
215   let Latency = 8;
216   let NumMicroOps = 2;
219 def A64FXWrite_LD103: SchedWriteRes<[A64FXGI56]> {
220   let Latency = 11;
221   let NumMicroOps = 2;
225 def A64FXWrite_LD104: SchedWriteRes<[A64FXGI56]> {
226   let Latency = 8;
227   let NumMicroOps = 3;
230 def A64FXWrite_LD105: SchedWriteRes<[A64FXGI56]> {
231   let Latency = 11;
232   let NumMicroOps = 3;
235 def A64FXWrite_LD106: SchedWriteRes<[A64FXGI56]> {
236   let Latency = 8;
237   let NumMicroOps = 4;
240 def A64FXWrite_LD107: SchedWriteRes<[A64FXGI56]> {
241   let Latency = 11;
242   let NumMicroOps = 4;
245 def A64FXWrite_LD108: SchedWriteRes<[A64FXGI56]> {
246   let Latency = 8;
247   let NumMicroOps = 2;
250 def A64FXWrite_LD109: SchedWriteRes<[A64FXGI56]> {
251   let Latency = 11;
252   let NumMicroOps = 2;
255 def A64FXWrite_LD110: SchedWriteRes<[A64FXGI56]> {
256   let Latency = 8;
257   let NumMicroOps = 3;
260 def A64FXWrite_LD111: SchedWriteRes<[A64FXGI56]> {
261   let Latency = 11;
262   let NumMicroOps = 3;
265 def A64FXWrite_LD112: SchedWriteRes<[A64FXGI56]> {
266   let Latency = 8;
267   let NumMicroOps = 4;
270 def A64FXWrite_LD113: SchedWriteRes<[A64FXGI56]> {
271   let Latency = 11;
272   let NumMicroOps = 4;
275 def A64FXWrite_LD114: SchedWriteRes<[A64FXGI56]> {
276   let Latency = 8;
277   let NumMicroOps = 5;
280 def A64FXWrite_LD115: SchedWriteRes<[A64FXGI56]> {
281   let Latency = 11;
282   let NumMicroOps = 5;
285 def A64FXWrite_LD1I0: SchedWriteRes<[A64FXGI056]> {
286   let Latency = 8;
287   let NumMicroOps = 2;
290 def A64FXWrite_LD1I1: SchedWriteRes<[A64FXGI056]> {
291   let Latency = 8;
292   let NumMicroOps = 3;
295 def A64FXWrite_LD2I0: SchedWriteRes<[A64FXGI056]> {
296   let Latency = 8;
297   let NumMicroOps = 4;
300 def A64FXWrite_LD2I1: SchedWriteRes<[A64FXGI056]> {
301   let Latency = 8;
302   let NumMicroOps = 5;
305 def A64FXWrite_LD3I0: SchedWriteRes<[A64FXGI056]> {
306   let Latency = 8;
307   let NumMicroOps = 6;
310 def A64FXWrite_LD3I1: SchedWriteRes<[A64FXGI056]> {
311   let Latency = 8;
312   let NumMicroOps = 7;
315 def A64FXWrite_LD4I0: SchedWriteRes<[A64FXGI056]> {
316   let Latency = 8;
317   let NumMicroOps = 8;
320 def A64FXWrite_LD4I1: SchedWriteRes<[A64FXGI056]> {
321   let Latency = 8;
322   let NumMicroOps = 9;
325 def A64FXWrite_1Cyc_GI2456 : SchedWriteRes<[A64FXGI2456]> {
326   let Latency = 1;
329 def A64FXWrite_FMOV_GV : SchedWriteRes<[A64FXGI03]> {
330   let Latency = 10;
333 def A64FXWrite_FMOV_VG14 : SchedWriteRes<[A64FXGI03]> {
334   let Latency = 14;
337 def A64FXWrite_ADDLV : SchedWriteRes<[A64FXGI03]> {
338   let Latency = 12;
341 def A64FXWrite_MULLE : SchedWriteRes<[A64FXGI03]> {
342   let Latency = 14;
345 def A64FXWrite_MULLV : SchedWriteRes<[A64FXGI03]> {
346   let Latency = 14;
349 def A64FXWrite_MADDL : SchedWriteRes<[A64FXGI03]> {
350   let Latency = 6;
353 def A64FXWrite_ABA : SchedWriteRes<[A64FXGI03]> {
354   let Latency = 8;
357 def A64FXWrite_ABAL : SchedWriteRes<[A64FXGI03]> {
358   let Latency = 10;
361 def A64FXWrite_ADDLV1 : SchedWriteRes<[A64FXGI03]> {
362   let Latency = 12;
363   let NumMicroOps = 6;
366 def A64FXWrite_MINMAXV : SchedWriteRes<[A64FXGI03]> {
367   let Latency = 14;
368   let NumMicroOps = 6;
371 def A64FXWrite_SQRDMULH : SchedWriteRes<[A64FXGI03]> {
372   let Latency = 9;
375 def A64FXWrite_PMUL : SchedWriteRes<[A64FXGI03]> {
376   let Latency = 8;
380 def A64FXWrite_SRSRAV : SchedWriteRes<[A64FXGI03]> {
381   let Latency = 8;
382   let NumMicroOps = 3;
385 def A64FXWrite_SSRAV : SchedWriteRes<[A64FXGI03]> {
386   let Latency = 8;
387   let NumMicroOps = 2;
390 def A64FXWrite_RSHRN : SchedWriteRes<[A64FXGI03]> {
391   let Latency = 10;
392   let NumMicroOps = 3;
395 def A64FXWrite_SHRN : SchedWriteRes<[A64FXGI03]> {
396   let Latency = 10;
397   let NumMicroOps = 2;
401 def A64FXWrite_ADDP : SchedWriteRes<[A64FXGI03]> {
402   let Latency = 10;
403   let NumMicroOps = 3;
406 def A64FXWrite_FMULXE : SchedWriteRes<[A64FXGI03]> {
407   let Latency = 15;
408   let NumMicroOps = 2;
411 def A64FXWrite_FADDPV : SchedWriteRes<[A64FXGI03]> {
412   let Latency = 15;
413   let NumMicroOps = 3;
416 def A64FXWrite_SADALP : SchedWriteRes<[A64FXGI03]> {
417   let Latency = 10;
418   let NumMicroOps = 3;
421 def A64FXWrite_SADDLP : SchedWriteRes<[A64FXGI03]> {
422   let Latency = 10;
423   let NumMicroOps = 2;
426 def A64FXWrite_FCVTXNV : SchedWriteRes<[A64FXGI03]> {
427   let Latency = 15;
428   let NumMicroOps = 2;
431 def A64FXWrite_FMAXVVH : SchedWriteRes<[A64FXGI03]> {
432   let Latency = 14;
433   let NumMicroOps = 7;
436 def A64FXWrite_BIF : SchedWriteRes<[A64FXGI03]> {
437   let Latency = 5;
440 def A64FXWrite_DUPGENERAL : SchedWriteRes<[A64FXGI03]> {
441   let Latency = 10;
444 def A64FXWrite_SHA00 : SchedWriteRes<[A64FXGI0]> {
445   let Latency = 9;
448 def A64FXWrite_SHA01 : SchedWriteRes<[A64FXGI0]> {
449   let Latency = 12;
452 def A64FXWrite_SMOV : SchedWriteRes<[A64FXGI03]> {
453   let Latency = 25;
456 def A64FXWrite_TBX1 : SchedWriteRes<[A64FXGI03]> {
457   let Latency = 10;
458   let NumMicroOps = 3;
461 def A64FXWrite_TBX2 : SchedWriteRes<[A64FXGI03]> {
462   let Latency = 10;
463   let NumMicroOps = 5;
466 def A64FXWrite_TBX3 : SchedWriteRes<[A64FXGI03]> {
467   let Latency = 10;
468   let NumMicroOps = 7;
471 def A64FXWrite_TBX4 : SchedWriteRes<[A64FXGI03]> {
472   let Latency = 10;
473   let NumMicroOps = 9;
476 def A64FXWrite_PREF0: SchedWriteRes<[A64FXGI56]> {
477   let Latency = 0;
480 def A64FXWrite_PREF1: SchedWriteRes<[A64FXGI56]> {
481   let Latency = 0;
484 def A64FXWrite_SWP: SchedWriteRes<[A64FXGI56]> {
485   let Latency = 0;
488 def A64FXWrite_STUR: SchedWriteRes<[A64FXGI56]> {
489   let Latency = 0;
492 def A64FXWrite_STNP: SchedWriteRes<[A64FXGI56]> {
493   let Latency = 0;
496 def A64FXWrite_STP01: SchedWriteRes<[A64FXGI56]> {
497   let Latency = 0;
500 def A64FXWrite_ST10: SchedWriteRes<[A64FXGI56]> {
501   let Latency = 0;
504 def A64FXWrite_ST11: SchedWriteRes<[A64FXGI56]> {
505   let Latency = 0;
508 def A64FXWrite_ST12: SchedWriteRes<[A64FXGI56]> {
509   let Latency = 0;
512 def A64FXWrite_ST13: SchedWriteRes<[A64FXGI56]> {
513   let Latency = 0;
516 def A64FXWrite_ST14: SchedWriteRes<[A64FXGI56]> {
517   let Latency = 1;
520 def A64FXWrite_ST15: SchedWriteRes<[A64FXGI56]> {
521   let Latency = 1;
524 def A64FXWrite_ST16: SchedWriteRes<[A64FXGI56]> {
525   let Latency = 1;
528 def A64FXWrite_ST17: SchedWriteRes<[A64FXGI56]> {
529   let Latency = 1;
532 def A64FXWrite_CAS: SchedWriteRes<[A64FXGI56]> {
533   let Latency = 7;
536 // Define commonly used read types.
538 // No forwarding is provided for these types.
539 def : ReadAdvance<ReadI,       0>;
540 def : ReadAdvance<ReadISReg,   0>;
541 def : ReadAdvance<ReadIEReg,   0>;
542 def : ReadAdvance<ReadIM,      0>;
543 def : ReadAdvance<ReadIMA,     0>;
544 def : ReadAdvance<ReadID,      0>;
545 def : ReadAdvance<ReadExtrHi,  0>;
546 def : ReadAdvance<ReadAdrBase, 0>;
547 def : ReadAdvance<ReadST,      0>;
548 def : ReadAdvance<ReadVLD,     0>;
550 //===----------------------------------------------------------------------===//
551 // 3. Instruction Tables.
553 //---
554 // 3.1 Branch Instructions
555 //---
557 // Branch, immed
558 // Branch and link, immed
559 // Compare and branch
560 def : WriteRes<WriteBr,      [A64FXGI7]> {
561   let Latency = 1;
564 // Branch, register
565 // Branch and link, register != LR
566 // Branch and link, register = LR
567 def : WriteRes<WriteBrReg,   [A64FXGI7]> {
568   let Latency = 1;
571 def : WriteRes<WriteSys,     []> { let Latency = 1; }
572 def : WriteRes<WriteBarrier, []> { let Latency = 1; }
573 def : WriteRes<WriteHint,    []> { let Latency = 1; }
575 def : WriteRes<WriteAtomic,  []> {
576   let Latency = 4;
579 //---
580 // Branch
581 //---
582 def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs B, BL, BR, BLR)>;
583 def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs RET)>;
584 def : InstRW<[A64FXWrite_1Cyc_GI7], (instregex "^B..$")>;
585 def : InstRW<[A64FXWrite_1Cyc_GI7],
586             (instregex "^CBZ", "^CBNZ", "^TBZ", "^TBNZ")>;
588 //---
589 // 3.2 Arithmetic and Logical Instructions
590 // 3.3 Move and Shift Instructions
591 //---
593 // ALU, basic
594 // Conditional compare
595 // Conditional select
596 // Address generation
597 def : WriteRes<WriteI,       [A64FXGI2456]> {
598   let Latency = 1;
601 def : InstRW<[WriteI],
602             (instregex "ADD?(W|X)r(i|r|s|x)",   "ADDS?(W|X)r(i|r|s|x)(64)?",
603                        "AND?(W|X)r(i|r|s|x)",   "ANDS?(W|X)r(i|r|s|x)",
604                        "ADC(W|X)r",
605                        "BIC?(W|X)r(i|r|s|x)",   "BICS?(W|X)r(i|r|s|x)",
606                        "EON?(W|X)r(i|r|s|x)",   "ORN?(W|X)r(i|r|s|x)",
607                        "ORR?(W|X)r(i|r|s|x)",   "SUB?(W|X)r(i|r|s|x)",
608                        "SUBS?(W|X)r(i|r|s|x)",  "SBC(W|X)r",
609                        "SBCS(W|X)r",            "CCMN(W|X)(i|r)",
610                        "CCMP(W|X)(i|r)",        "CSEL(W|X)r",
611                        "CSINC(W|X)r",           "CSINV(W|X)r",
612                        "CSNEG(W|X)r")>;
614 def : InstRW<[WriteI], (instrs COPY)>;
616 // ALU, extend and/or shift
617 def : WriteRes<WriteISReg,   [A64FXGI2456]> {
618   let Latency = 2;
621 def : InstRW<[WriteISReg],
622             (instregex "ADD?(W|X)r(i|r|s|x)",   "ADDS?(W|X)r(i|r|s|x)(64)?",
623                        "AND?(W|X)r(i|r|s|x)",   "ANDS?(W|X)r(i|r|s|x)",
624                        "ADC(W|X)r",
625                        "BIC?(W|X)r(i|r|s|x)",   "BICS?(W|X)r(i|r|s|x)",
626                        "EON?(W|X)r(i|r|s|x)",   "ORN?(W|X)r(i|r|s|x)",
627                        "ORR?(W|X)r(i|r|s|x)",   "SUB?(W|X)r(i|r|s|x)",
628                        "SUBS?(W|X)r(i|r|s|x)",  "SBC(W|X)r",
629                        "SBCS(W|X)r",            "CCMN(W|X)(i|r)",
630                        "CCMP(W|X)(i|r)",        "CSEL(W|X)r",
631                        "CSINC(W|X)r",           "CSINV(W|X)r",
632                        "CSNEG(W|X)r")>;
634 def : WriteRes<WriteIEReg,   [A64FXGI2456]> {
635   let Latency = 1;
638 def : InstRW<[WriteIEReg],
639             (instregex "ADD?(W|X)r(i|r|s|x)",   "ADDS?(W|X)r(i|r|s|x)(64)?",
640                        "AND?(W|X)r(i|r|s|x)",   "ANDS?(W|X)r(i|r|s|x)",
641                        "ADC(W|X)r",
642                        "BIC?(W|X)r(i|r|s|x)",   "BICS?(W|X)r(i|r|s|x)",
643                        "EON?(W|X)r(i|r|s|x)",   "ORN?(W|X)r(i|r|s|x)",
644                        "ORR?(W|X)r(i|r|s|x)",   "SUB?(W|X)r(i|r|s|x)",
645                        "SUBS?(W|X)r(i|r|s|x)",  "SBC(W|X)r",
646                        "SBCS(W|X)r",            "CCMN(W|X)(i|r)",
647                        "CCMP(W|X)(i|r)",        "CSEL(W|X)r",
648                        "CSINC(W|X)r",           "CSINV(W|X)r",
649                        "CSNEG(W|X)r")>;
651 // Move immed
652 def : WriteRes<WriteImm,     [A64FXGI2456]> {
653   let Latency = 1;
656 def : InstRW<[A64FXWrite_1Cyc_GI2456],
657             (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>;
659 def : InstRW<[A64FXWrite_2Cyc_GI24],
660             (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>;
662 // Variable shift
663 def : WriteRes<WriteIS,      [A64FXGI2456]> {
664   let Latency = 1;
667 //---
668 // 3.4 Divide and Multiply Instructions
669 //---
671 // Divide, W-form
672 def : WriteRes<WriteID32,    [A64FXGI4]> {
673   let Latency = 39;
674   let ReleaseAtCycles = [39];
677 // Divide, X-form
678 def : WriteRes<WriteID64,    [A64FXGI4]> {
679   let Latency = 23;
680   let ReleaseAtCycles = [23];
683 // Multiply accumulate, W-form
684 def : WriteRes<WriteIM32,    [A64FXGI2456]> {
685   let Latency = 5;
688 // Multiply accumulate, X-form
689 def : WriteRes<WriteIM64,    [A64FXGI2456]> {
690   let Latency = 5;
693 def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>;
694 def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>;
695 def : InstRW<[A64FXWrite_MADDL],
696             (instregex "(S|U)(MADDL|MSUBL)rrr")>;
698 def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>;
699 def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>;
701 // Bitfield extract, two reg
702 def : WriteRes<WriteExtr,    [A64FXGI2456]> {
703   let Latency = 1;
706 // Multiply high
707 def : InstRW<[A64FXWrite_5Cyc_GI2], (instrs SMULHrr, UMULHrr)>;
709 // Miscellaneous Data-Processing Instructions
710 // Bitfield extract
711 def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs EXTRWrri, EXTRXrri)>;
713 // Bitifield move - basic
714 def : InstRW<[A64FXWrite_1Cyc_GI24],
715             (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>;
717 // Bitfield move, insert
718 def : InstRW<[A64FXWrite_4Cyc_NGI24], (instregex "^BFM")>;
719 def : InstRW<[A64FXWrite_1Cyc_GI24], (instregex "(S|U)?BFM.*")>;
721 // Count leading
722 def : InstRW<[A64FXWrite_2Cyc_GI0], (instregex "^CLS(W|X)r$",
723                                                "^CLZ(W|X)r$")>;
725 // Reverse bits
726 def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs RBITWr, RBITXr)>;
728 // Cryptography Extensions
729 def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AES[DE]")>;
730 def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AESI?MC")>;
731 def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^PMULL")>;
732 def : InstRW<[A64FXWrite_SHA00], (instregex "^SHA1SU0")>;
733 def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA1(H|SU1)")>;
734 def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA1[CMP]")>;
735 def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU0")>;
736 def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU1")>;
737 def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA256(H|H2)")>;
739 // CRC Instructions
740 def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32Brr, CRC32Hrr)>;
741 def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32Wrr)>;
742 def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32Xrr)>;
744 def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32CBrr, CRC32CHrr)>;
745 def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32CWrr)>;
746 def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32CXrr)>;
748 // Reverse bits/bytes
749 // NOTE: Handled by WriteI.
751 //---
752 // 3.6 Load Instructions
753 // 3.10 FP Load Instructions
754 //---
756 // Load register, literal
757 // Load register, unscaled immed
758 // Load register, immed unprivileged
759 // Load register, unsigned immed
760 def : WriteRes<WriteLD,      [A64FXGI56]> {
761   let Latency = 4;
764 // Load register, immed post-index
765 // NOTE: Handled by WriteLD, WriteI.
766 // Load register, immed pre-index
767 // NOTE: Handled by WriteLD, WriteAdr.
768 def : WriteRes<WriteAdr,     [A64FXGI2456]> {
769   let Latency = 1;
772 // Load pair, immed offset, normal
773 // Load pair, immed offset, signed words, base != SP
774 // Load pair, immed offset signed words, base = SP
775 // LDP only breaks into *one* LS micro-op.  Thus
776 // the resources are handled by WriteLD.
777 def : WriteRes<WriteLDHi,    []> {
778   let Latency = 5;
781 // Load register offset, basic
782 // Load register, register offset, scale by 4/8
783 // Load register, register offset, scale by 2
784 // Load register offset, extend
785 // Load register, register offset, extend, scale by 4/8
786 // Load register, register offset, extend, scale by 2
787 def A64FXWriteLDIdx : SchedWriteVariant<[
788   SchedVar<ScaledIdxPred, [A64FXWrite_1Cyc_GI56]>,
789   SchedVar<NoSchedPred,   [A64FXWrite_1Cyc_GI56]>]>;
790 def : SchedAlias<WriteLDIdx, A64FXWriteLDIdx>;
792 def A64FXReadAdrBase : SchedReadVariant<[
793   SchedVar<ScaledIdxPred, [ReadDefault]>,
794   SchedVar<NoSchedPred,   [ReadDefault]>]>;
795 def : SchedAlias<ReadAdrBase, A64FXReadAdrBase>;
797 // Load pair, immed pre-index, normal
798 // Load pair, immed pre-index, signed words
799 // Load pair, immed post-index, normal
800 // Load pair, immed post-index, signed words
801 // NOTE: Handled by WriteLD, WriteLDHi, WriteAdr.
803 def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPDi)>;
804 def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPQi)>;
805 def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPSi)>;
806 def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPWi)>;
807 def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPXi)>;
809 def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPDi)>;
810 def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPQi)>;
811 def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSi)>;
812 def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSWi)>;
813 def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPWi)>;
814 def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPXi)>;
816 def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRBui)>;
817 def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRDui)>;
818 def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRHui)>;
819 def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRQui)>;
820 def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRSui)>;
822 def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRDl)>;
823 def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRQl)>;
824 def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRWl)>;
825 def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRXl)>;
827 def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRBi)>;
828 def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRHi)>;
829 def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRWi)>;
830 def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRXi)>;
832 def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBWi)>;
833 def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBXi)>;
834 def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHWi)>;
835 def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHXi)>;
836 def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSWi)>;
838 def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
839             (instrs LDPDpre)>;
840 def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
841             (instrs LDPQpre)>;
842 def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
843             (instrs LDPSpre)>;
844 def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
845             (instrs LDPWpre)>;
846 def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
847             (instrs LDPWpre)>;
849 def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>;
850 def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>;
851 def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>;
852 def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>;
853 def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>;
854 def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>;
855 def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>;
857 def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpre)>;
858 def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpre)>;
859 def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpost)>;
860 def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpost)>;
862 def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpre)>;
863 def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpre)>;
864 def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpost)>;
865 def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpost)>;
867 def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpre)>;
868 def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpost)>;
870 def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpre)>;
871 def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpost)>;
873 def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
874             (instrs LDPDpost)>;
875 def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
876             (instrs LDPQpost)>;
877 def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
878             (instrs LDPSpost)>;
879 def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
880             (instrs LDPWpost)>;
881 def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
882             (instrs LDPXpost)>;
884 def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>;
885 def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>;
886 def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>;
887 def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>;
888 def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>;
889 def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>;
890 def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>;
892 def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
893             (instrs LDPDpre)>;
894 def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
895             (instrs LDPQpre)>;
896 def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
897             (instrs LDPSpre)>;
898 def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
899             (instrs LDPWpre)>;
900 def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
901             (instrs LDPXpre)>;
903 def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>;
904 def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>;
905 def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>;
906 def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>;
907 def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>;
908 def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>;
909 def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>;
911 def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
912             (instrs LDPDpost)>;
913 def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
914             (instrs LDPQpost)>;
915 def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
916             (instrs LDPSpost)>;
917 def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
918             (instrs LDPWpost)>;
919 def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
920             (instrs LDPXpost)>;
922 def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>;
923 def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>;
924 def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>;
925 def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>;
926 def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>;
927 def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>;
928 def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>;
930 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroW)>;
931 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroW)>;
932 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroW)>;
933 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroW)>;
934 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroW)>;
935 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroW)>;
936 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroW)>;
937 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroW)>;
938 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroW)>;
939 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroW)>;
941 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroX)>;
942 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroX)>;
943 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroX)>;
944 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroX)>;
945 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroX)>;
946 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroX)>;
947 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroX)>;
948 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroX)>;
949 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroX)>;
950 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroX)>;
952 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
953             (instrs LDRBroW)>;
954 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
955             (instrs LDRBroW)>;
956 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
957              (instrs LDRDroW)>;
958 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
959             (instrs LDRHroW)>;
960 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
961             (instrs LDRHHroW)>;
962 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
963             (instrs LDRQroW)>;
964 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
965             (instrs LDRSroW)>;
966 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
967             (instrs LDRSHWroW)>;
968 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
969             (instrs LDRSHXroW)>;
970 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
971             (instrs LDRWroW)>;
972 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
973             (instrs LDRXroW)>;
974 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
975             (instrs LDRBroX)>;
976 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
977             (instrs LDRDroX)>;
978 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
979             (instrs LDRHroX)>;
980 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
981             (instrs LDRHHroX)>;
982 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
983             (instrs LDRQroX)>;
984 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
985             (instrs LDRSroX)>;
986 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
987             (instrs LDRSHWroX)>;
988 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
989             (instrs LDRSHXroX)>;
990 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
991             (instrs LDRWroX)>;
992 def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
993             (instrs LDRXroX)>;
995 def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBi)>;
996 def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBBi)>;
997 def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURDi)>;
998 def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHi)>;
999 def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHHi)>;
1000 def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURQi)>;
1001 def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSi)>;
1002 def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURXi)>;
1003 def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBWi)>;
1004 def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBXi)>;
1005 def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHWi)>;
1006 def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHXi)>;
1007 def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSWi)>;
1009 //---
1010 // Prefetch
1011 //---
1012 def : InstRW<[A64FXWrite_PREF0], (instrs PRFMl)>;
1013 def : InstRW<[A64FXWrite_PREF1], (instrs PRFUMi)>;
1014 def : InstRW<[A64FXWrite_PREF1], (instrs PRFMui)>;
1015 def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroW)>;
1016 def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroX)>;
1018 //--
1019 // 3.7 Store Instructions
1020 // 3.11 FP Store Instructions
1021 //--
1023 // Store register, unscaled immed
1024 // Store register, immed unprivileged
1025 // Store register, unsigned immed
1026 def : WriteRes<WriteST,      [A64FXGI56]> {
1027   let Latency = 1;
1030 // Store register, immed post-index
1031 // NOTE: Handled by WriteAdr, WriteST, ReadAdrBase
1033 // Store register, immed pre-index
1034 // NOTE: Handled by WriteAdr, WriteST
1036 // Store register, register offset, basic
1037 // Store register, register offset, scaled by 4/8
1038 // Store register, register offset, scaled by 2
1039 // Store register, register offset, extend
1040 // Store register, register offset, extend, scale by 4/8
1041 // Store register, register offset, extend, scale by 1
1042 def : WriteRes<WriteSTIdx, [A64FXGI56, A64FXGI2456]> {
1043   let Latency = 1;
1046 // Store pair, immed offset, W-form
1047 // Store pair, immed offset, X-form
1048 def : WriteRes<WriteSTP,     [A64FXGI56]> {
1049   let Latency = 1;
1052 // Store pair, immed post-index, W-form
1053 // Store pair, immed post-index, X-form
1054 // Store pair, immed pre-index, W-form
1055 // Store pair, immed pre-index, X-form
1056 // NOTE: Handled by WriteAdr, WriteSTP.
1058 def : InstRW<[A64FXWrite_STUR], (instrs STURBi)>;
1059 def : InstRW<[A64FXWrite_STUR], (instrs STURBBi)>;
1060 def : InstRW<[A64FXWrite_STUR], (instrs STURDi)>;
1061 def : InstRW<[A64FXWrite_STUR], (instrs STURHi)>;
1062 def : InstRW<[A64FXWrite_STUR], (instrs STURHHi)>;
1063 def : InstRW<[A64FXWrite_STUR], (instrs STURQi)>;
1064 def : InstRW<[A64FXWrite_STUR], (instrs STURSi)>;
1065 def : InstRW<[A64FXWrite_STUR], (instrs STURWi)>;
1066 def : InstRW<[A64FXWrite_STUR], (instrs STURXi)>;
1068 def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRBi)>;
1069 def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRHi)>;
1070 def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRWi)>;
1071 def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRXi)>;
1073 def : InstRW<[A64FXWrite_STNP], (instrs STNPDi)>;
1074 def : InstRW<[A64FXWrite_STNP], (instrs STNPQi)>;
1075 def : InstRW<[A64FXWrite_STNP], (instrs STNPXi)>;
1076 def : InstRW<[A64FXWrite_STNP], (instrs STNPWi)>;
1078 def : InstRW<[A64FXWrite_STNP], (instrs STPDi)>;
1079 def : InstRW<[A64FXWrite_STNP], (instrs STPQi)>;
1080 def : InstRW<[A64FXWrite_STNP], (instrs STPXi)>;
1081 def : InstRW<[A64FXWrite_STNP], (instrs STPWi)>;
1083 def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>;
1084 def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>;
1085 def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>;
1086 def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>;
1087 def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>;
1088 def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>;
1089 def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>;
1090 def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>;
1091 def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>;
1092 def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>;
1093 def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>;
1094 def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>;
1096 def : InstRW<[A64FXWrite_STP01],
1097             (instrs STPDpre, STPDpost)>;
1098 def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1099             (instrs STPDpre, STPDpost)>;
1100 def : InstRW<[A64FXWrite_STP01],
1101             (instrs STPDpre, STPDpost)>;
1102 def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1103             (instrs STPDpre, STPDpost)>;
1104 def : InstRW<[A64FXWrite_STP01],
1105             (instrs STPQpre, STPQpost)>;
1106 def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1107             (instrs STPQpre, STPQpost)>;
1108 def : InstRW<[A64FXWrite_STP01],
1109             (instrs STPQpre, STPQpost)>;
1110 def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1111             (instrs STPQpre, STPQpost)>;
1112 def : InstRW<[A64FXWrite_STP01],
1113             (instrs STPSpre, STPSpost)>;
1114 def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1115             (instrs STPSpre, STPSpost)>;
1116 def : InstRW<[A64FXWrite_STP01],
1117             (instrs STPSpre, STPSpost)>;
1118 def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1119             (instrs STPSpre, STPSpost)>;
1120 def : InstRW<[A64FXWrite_STP01],
1121             (instrs STPWpre, STPWpost)>;
1122 def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1123             (instrs STPWpre, STPWpost)>;
1124 def : InstRW<[A64FXWrite_STP01],
1125             (instrs STPWpre, STPWpost)>;
1126 def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1127             (instrs STPWpre, STPWpost)>;
1128 def : InstRW<[A64FXWrite_STP01],
1129             (instrs STPXpre, STPXpost)>;
1130 def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1131             (instrs STPXpre, STPXpost)>;
1132 def : InstRW<[A64FXWrite_STP01],
1133             (instrs STPXpre, STPXpost)>;
1134 def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1135             (instrs STPXpre, STPXpost)>;
1137 def : InstRW<[WriteAdr, A64FXWrite_STP01],
1138             (instrs STRBpre, STRBpost)>;
1139 def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1140             (instrs STRBpre, STRBpost)>;
1141 def : InstRW<[WriteAdr, A64FXWrite_STP01],
1142             (instrs STRBpre, STRBpost)>;
1143 def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1144             (instrs STRBpre, STRBpost)>;
1145 def : InstRW<[WriteAdr, A64FXWrite_STP01],
1146             (instrs STRBBpre, STRBBpost)>;
1147 def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1148             (instrs STRBBpre, STRBBpost)>;
1149 def : InstRW<[WriteAdr, A64FXWrite_STP01],
1150             (instrs STRBBpre, STRBBpost)>;
1151 def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1152             (instrs STRBBpre, STRBBpost)>;
1153 def : InstRW<[WriteAdr, A64FXWrite_STP01],
1154             (instrs STRDpre, STRDpost)>;
1155 def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1156             (instrs STRDpre, STRDpost)>;
1157 def : InstRW<[WriteAdr, A64FXWrite_STP01],
1158             (instrs STRDpre, STRDpost)>;
1159 def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1160             (instrs STRDpre, STRDpost)>;
1161 def : InstRW<[WriteAdr, A64FXWrite_STP01],
1162             (instrs STRHpre, STRHpost)>;
1163 def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1164             (instrs STRHpre, STRHpost)>;
1165 def : InstRW<[WriteAdr, A64FXWrite_STP01],
1166             (instrs STRHpre, STRHpost)>;
1167 def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1168             (instrs STRHpre, STRHpost)>;
1169 def : InstRW<[WriteAdr, A64FXWrite_STP01],
1170             (instrs STRHHpre, STRHHpost)>;
1171 def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1172             (instrs STRHHpre, STRHHpost)>;
1173 def : InstRW<[WriteAdr, A64FXWrite_STP01],
1174             (instrs STRHHpre, STRHHpost)>;
1175 def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1176             (instrs STRHHpre, STRHHpost)>;
1177 def : InstRW<[WriteAdr, A64FXWrite_STP01],
1178             (instrs STRQpre, STRQpost)>;
1179 def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1180             (instrs STRQpre, STRQpost)>;
1181 def : InstRW<[WriteAdr, A64FXWrite_STP01],
1182             (instrs STRQpre, STRQpost)>;
1183 def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1184             (instrs STRQpre, STRQpost)>;
1185 def : InstRW<[WriteAdr, A64FXWrite_STP01],
1186             (instrs STRSpre, STRSpost)>;
1187 def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1188             (instrs STRSpre, STRSpost)>;
1189 def : InstRW<[WriteAdr, A64FXWrite_STP01],
1190             (instrs STRSpre, STRSpost)>;
1191 def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1192             (instrs STRSpre, STRSpost)>;
1193 def : InstRW<[WriteAdr, A64FXWrite_STP01],
1194             (instrs STRWpre, STRWpost)>;
1195 def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1196             (instrs STRWpre, STRWpost)>;
1197 def : InstRW<[WriteAdr, A64FXWrite_STP01],
1198             (instrs STRWpre, STRWpost)>;
1199 def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1200             (instrs STRWpre, STRWpost)>;
1201 def : InstRW<[WriteAdr, A64FXWrite_STP01],
1202             (instrs STRXpre, STRXpost)>;
1203 def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1204             (instrs STRXpre, STRXpost)>;
1205 def : InstRW<[WriteAdr, A64FXWrite_STP01],
1206             (instrs STRXpre, STRXpost)>;
1207 def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1208             (instrs STRXpre, STRXpost)>;
1210 def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1211             (instrs STRBroW, STRBroX)>;
1212 def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1213             (instrs STRBroW, STRBroX)>;
1214 def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1215             (instrs STRBBroW, STRBBroX)>;
1216 def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1217             (instrs STRBBroW, STRBBroX)>;
1218 def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1219             (instrs STRDroW, STRDroX)>;
1220 def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1221             (instrs STRDroW, STRDroX)>;
1222 def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1223             (instrs STRHroW, STRHroX)>;
1224 def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1225             (instrs STRHroW, STRHroX)>;
1226 def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1227             (instrs STRHHroW, STRHHroX)>;
1228 def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1229             (instrs STRHHroW, STRHHroX)>;
1230 def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1231             (instrs STRQroW, STRQroX)>;
1232 def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1233             (instrs STRQroW, STRQroX)>;
1234 def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1235             (instrs STRSroW, STRSroX)>;
1236 def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1237             (instrs STRSroW, STRSroX)>;
1238 def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1239             (instrs STRWroW, STRWroX)>;
1240 def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1241             (instrs STRWroW, STRWroX)>;
1242 def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1243             (instrs STRXroW, STRXroX)>;
1244 def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1245             (instrs STRXroW, STRXroX)>;
1247 //---
1248 // 3.8 FP Data Processing Instructions
1249 //---
1251 // FP absolute value
1252 // FP min/max
1253 // FP negate
1254 def : WriteRes<WriteF,       [A64FXGI03]> {
1255   let Latency = 4;
1256   let ReleaseAtCycles = [2];
1259 // FP arithmetic
1261 def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FADDDrr, FADDHrr)>;
1262 def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FSUBDrr, FSUBHrr)>;
1264 // FP compare
1265 def : WriteRes<WriteFCmp,    [A64FXGI03]> {
1266   let Latency = 4;
1267   let ReleaseAtCycles = [2];
1270 // FP Div, Sqrt
1271 def : WriteRes<WriteFDiv, [A64FXGI0]> {
1272   let Latency = 43;
1275 def A64FXXWriteFDiv : SchedWriteRes<[A64FXGI0]> {
1276   let Latency = 38;
1279 def A64FXXWriteFDivSP : SchedWriteRes<[A64FXGI0]> {
1280   let Latency = 29;
1283 def A64FXXWriteFDivDP : SchedWriteRes<[A64FXGI0]> {
1284   let Latency = 43;
1287 def A64FXXWriteFSqrtSP : SchedWriteRes<[A64FXGI0]> {
1288   let Latency = 29;
1291 def A64FXXWriteFSqrtDP : SchedWriteRes<[A64FXGI0]> {
1292   let Latency = 43;
1295 // FP divide, S-form
1296 // FP square root, S-form
1297 def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVSrr)>;
1298 def : InstRW<[A64FXXWriteFSqrtSP], (instrs FSQRTSr)>;
1299 def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVv.*32$")>;
1300 def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
1301 def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVSrr")>;
1302 def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^FSQRTSr")>;
1304 // FP divide, D-form
1305 // FP square root, D-form
1306 def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVDrr)>;
1307 def : InstRW<[A64FXXWriteFSqrtDP], (instrs FSQRTDr)>;
1308 def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVv.*64$")>;
1309 def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
1310 def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVDrr")>;
1311 def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^FSQRTDr")>;
1313 // FP round to integral
1314 def : InstRW<[A64FXWrite_9Cyc_GI03],
1315             (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>;
1317 // FP select
1318 def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCSEL")>;
1320 //---
1321 // 3.9 FP Miscellaneous Instructions
1322 //---
1324 // FP convert, from vec to vec reg
1325 // FP convert, from gen to vec reg
1326 // FP convert, from vec to gen reg
1327 def : WriteRes<WriteFCvt, [A64FXGI03]> {
1328   let Latency = 9;
1329   let ReleaseAtCycles = [2];
1332 // FP move, immed
1333 // FP move, register
1334 def : WriteRes<WriteFImm, [A64FXGI0]> {
1335   let Latency = 4;
1336   let ReleaseAtCycles = [2];
1339 // FP transfer, from gen to vec reg
1340 // FP transfer, from vec to gen reg
1341 def : WriteRes<WriteFCopy, [A64FXGI0]> {
1342   let Latency = 4;
1343   let ReleaseAtCycles = [2];
1346 def : InstRW<[A64FXWrite_FMOV_GV], (instrs FMOVXDHighr)>;
1347 def : InstRW<[A64FXWrite_FMOV_VG14], (instrs FMOVDXHighr)>;
1349 //---
1350 // 3.12 ASIMD Integer Instructions
1351 //---
1353 // ASIMD absolute diff, D-form
1354 // ASIMD absolute diff, Q-form
1355 // ASIMD absolute diff accum, D-form
1356 // ASIMD absolute diff accum, Q-form
1357 // ASIMD absolute diff accum long
1358 // ASIMD absolute diff long
1359 // ASIMD arith, basic
1360 // ASIMD arith, complex
1361 // ASIMD compare
1362 // ASIMD logical (AND, BIC, EOR)
1363 // ASIMD max/min, basic
1364 // ASIMD max/min, reduce, 4H/4S
1365 // ASIMD max/min, reduce, 8B/8H
1366 // ASIMD max/min, reduce, 16B
1367 // ASIMD multiply, D-form
1368 // ASIMD multiply, Q-form
1369 // ASIMD multiply accumulate long
1370 // ASIMD multiply accumulate saturating long
1371 // ASIMD multiply long
1372 // ASIMD pairwise add and accumulate
1373 // ASIMD shift accumulate
1374 // ASIMD shift by immed, basic
1375 // ASIMD shift by immed and insert, basic, D-form
1376 // ASIMD shift by immed and insert, basic, Q-form
1377 // ASIMD shift by immed, complex
1378 // ASIMD shift by register, basic, D-form
1379 // ASIMD shift by register, basic, Q-form
1380 // ASIMD shift by register, complex, D-form
1381 // ASIMD shift by register, complex, Q-form
1382 def : WriteRes<WriteVd, [A64FXGI03]> {
1383   let Latency = 4;
1385 def : WriteRes<WriteVq, [A64FXGI03]> {
1386   let Latency = 4;
1389 // ASIMD arith, reduce, 4H/4S
1390 // ASIMD arith, reduce, 8B/8H
1391 // ASIMD arith, reduce, 16B
1393 // ASIMD logical (MVN (alias for NOT), ORN, ORR)
1394 def : InstRW<[A64FXWrite_4Cyc_GI03],
1395             (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>;
1397 // ASIMD arith, reduce
1398 def : InstRW<[A64FXWrite_ADDLV],
1399             (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>;
1401 // ASIMD polynomial (8x8) multiply long
1402 def : InstRW<[A64FXWrite_MULLE], (instregex "^(S|U|SQD)MULL")>;
1403 def : InstRW<[A64FXWrite_MULLV],
1404             (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>;
1405 def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v8i8|v16i8)")>;
1406 def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v1i64|v2i64)")>;
1408 // ASIMD absolute diff accum, D-form
1409 def : InstRW<[A64FXWrite_ABA],
1410             (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
1411 // ASIMD absolute diff accum, Q-form
1412 def : InstRW<[A64FXWrite_ABA],
1413             (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
1414 // ASIMD absolute diff accum long
1415 def : InstRW<[A64FXWrite_ABAL],
1416             (instregex "^[SU]ABAL")>;
1417 // ASIMD arith, reduce, 4H/4S
1418 def : InstRW<[A64FXWrite_ADDLV1],
1419             (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
1420 // ASIMD arith, reduce, 8B
1421 def : InstRW<[A64FXWrite_ADDLV1],
1422             (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>;
1423 // ASIMD arith, reduce, 16B/16H
1424 def : InstRW<[A64FXWrite_ADDLV1],
1425             (instregex "^[SU]?ADDL?Vv16i8v$")>;
1426 // ASIMD max/min, reduce, 4H/4S
1427 def : InstRW<[A64FXWrite_MINMAXV],
1428             (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>;
1429 // ASIMD max/min, reduce, 8B/8H
1430 def : InstRW<[A64FXWrite_MINMAXV],
1431             (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>;
1432 // ASIMD max/min, reduce, 16B/16H
1433 def : InstRW<[A64FXWrite_MINMAXV],
1434             (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
1435 // ASIMD multiply, D-form
1436 def : InstRW<[A64FXWrite_PMUL],
1437             (instregex "^(P?MUL|SQR?DMUL)" #
1438                        "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" #
1439                        "(_indexed)?$")>;
1441 // ASIMD multiply, Q-form
1442 def : InstRW<[A64FXWrite_PMUL],
1443             (instregex "^(P?MUL)(v16i8|v8i16|v4i32)(_indexed)?$")>;
1445 // ASIMD multiply, Q-form
1446 def : InstRW<[A64FXWrite_SQRDMULH],
1447             (instregex "^(SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
1449 // ASIMD multiply accumulate, D-form
1450 def : InstRW<[A64FXWrite_9Cyc_GI03],
1451             (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
1452 // ASIMD multiply accumulate, Q-form
1453 def : InstRW<[A64FXWrite_9Cyc_GI03],
1454             (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;
1455 // ASIMD shift accumulate
1456 def : InstRW<[A64FXWrite_SRSRAV],
1457             (instregex "SRSRAv", "URSRAv")>;
1458 def : InstRW<[A64FXWrite_SSRAV],
1459             (instregex "SSRAv", "USRAv")>;
1461 // ASIMD shift by immed, basic
1462 def : InstRW<[A64FXWrite_RSHRN],
1463             (instregex "RSHRNv", "SQRSHRNv", "SQRSHRUNv", "UQRSHRNv")>;
1464 def : InstRW<[A64FXWrite_SHRN],
1465             (instregex "SHRNv", "SQSHRNv", "SQSHRUNv", "UQSHRNv")>;
1467 def : InstRW<[A64FXWrite_6Cyc_GI3],
1468             (instregex "SQXTNv", "SQXTUNv", "UQXTNv")>;
1470 // ASIMD shift by immed, complex
1471 def : InstRW<[A64FXWrite_ABA], (instregex "^[SU]?(Q|R){1,2}SHR")>;
1472 def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^SQSHLU")>;
1473 // ASIMD shift by register, basic, Q-form
1474 def : InstRW<[A64FXWrite_6Cyc_GI3],
1475             (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
1476 // ASIMD shift by register, complex, D-form
1477 def : InstRW<[A64FXWrite_6Cyc_GI3],
1478             (instregex "^[SU][QR]{1,2}SHL" #
1479                        "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>;
1480 // ASIMD shift by register, complex, Q-form
1481 def : InstRW<[A64FXWrite_6Cyc_GI3],
1482             (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>;
1484 // ASIMD Arithmetic
1485 def : InstRW<[A64FXWrite_4Cyc_GI03],
1486             (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>;
1487 def : InstRW<[A64FXWrite_4Cyc_GI03],
1488             (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>;
1489 def : InstRW<[A64FXWrite_SHRN], (instregex "(ADD|SUB)HNv.*")>;
1490 def : InstRW<[A64FXWrite_RSHRN], (instregex "(RADD|RSUB)HNv.*")>;
1491 def : InstRW<[A64FXWrite_4Cyc_GI03],
1492             (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD",
1493                        "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>;
1494 def : InstRW<[A64FXWrite_ADDP],
1495             (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>;
1496 def : InstRW<[A64FXWrite_4Cyc_GI03],
1497             (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" #
1498                        "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>;
1499 def : InstRW<[A64FXWrite_4Cyc_GI0],
1500             (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>;
1501 def : InstRW<[A64FXWrite_SADALP], (instregex "^SADALP", "^UADALP")>;
1502 def : InstRW<[A64FXWrite_SADDLP], (instregex "^SADDLPv", "^UADDLPv")>;
1503 def : InstRW<[A64FXWrite_ADDLV1], (instregex "^SADDLV", "^UADDLV")>;
1504 def : InstRW<[A64FXWrite_MINMAXV],
1505              (instregex "^ADDVv", "^SMAXVv", "^UMAXVv", "^SMINVv", "^UMINVv")>;
1506 def : InstRW<[A64FXWrite_ABA],
1507              (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>;
1508 def : InstRW<[A64FXWrite_4Cyc_GI03],
1509             (instregex "^SQADDv", "^SQSUBv", "^UQADDv", "^UQSUBv")>;
1510 def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^SUQADDv", "^USQADDv")>;
1511 def : InstRW<[A64FXWrite_SHRN],
1512             (instregex "^ADDHNv", "^SUBHNv")>;
1513 def : InstRW<[A64FXWrite_RSHRN],
1514             (instregex "^RADDHNv", "^RSUBHNv")>;
1515 def : InstRW<[A64FXWrite_4Cyc_GI03],
1516             (instregex "^SQABS", "^SQADD", "^SQNEG", "^SQSUB",
1517                        "^SRHADD", "^SUQADD", "^UQADD", "^UQSUB",
1518                       "^URHADD", "^USQADD")>;
1520 def : InstRW<[A64FXWrite_4Cyc_GI03],
1521             (instregex "^CMEQv", "^CMGEv", "^CMGTv",
1522                        "^CMLEv", "^CMLTv", "^CMHIv", "^CMHSv")>;
1523 def : InstRW<[A64FXWrite_MINMAXV],
1524             (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>;
1525 def : InstRW<[A64FXWrite_ADDP],
1526             (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>;
1527 def : InstRW<[A64FXWrite_4Cyc_GI03],
1528             (instregex "^SABDv", "^UABDv")>;
1529 def : InstRW<[A64FXWrite_TBX1],
1530             (instregex "^SABDLv", "^UABDLv")>;
1532 //---
1533 // 3.13 ASIMD Floating-point Instructions
1534 //---
1536 def : WriteRes<WriteFMul, [A64FXGI03]> {
1537   let Latency = 9;
1540 // ASIMD FP absolute value
1541 def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FABSv")>;
1543 // ASIMD FP arith, normal, D-form
1544 // ASIMD FP arith, normal, Q-form
1545 def : InstRW<[A64FXWrite_9Cyc_GI03],
1546             (instregex "^FABDv", "^FADDv", "^FSUBv")>;
1548 // ASIMD FP arith, pairwise, D-form
1549 // ASIMD FP arith, pairwise, Q-form
1550 def : InstRW<[A64FXWrite_FADDPV], (instregex "^FADDPv")>;
1552 // ASIMD FP compare, D-form
1553 // ASIMD FP compare, Q-form
1554 def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FACGEv", "^FACGTv")>;
1555 def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCMEQv", "^FCMGEv",
1556                                                  "^FCMGTv", "^FCMLEv",
1557                                                  "^FCMLTv")>;
1558 // ASIMD FP round, D-form
1559 def : InstRW<[A64FXWrite_9Cyc_GI03],
1560             (instregex "^FRINT[AIMNPXZ](v2f32)")>;
1561 // ASIMD FP round, Q-form
1562 def : InstRW<[A64FXWrite_9Cyc_GI03],
1563             (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>;
1565 // ASIMD FP convert, long
1566 // ASIMD FP convert, narrow
1567 // ASIMD FP convert, other, D-form
1568 // ASIMD FP convert, other, Q-form
1570 // ASIMD FP convert, long and narrow
1571 def : InstRW<[A64FXWrite_FCVTXNV], (instregex "^FCVT(L|N|XN)v")>;
1572 // ASIMD FP convert, other, D-form
1573 def : InstRW<[A64FXWrite_FCVTXNV],
1574       (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>;
1575 // ASIMD FP convert, other, Q-form
1576 def : InstRW<[A64FXWrite_FCVTXNV],
1577       (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>;
1579 // ASIMD FP divide, D-form, F32
1580 def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVv2f32)>;
1581 def : InstRW<[A64FXXWriteFDivSP], (instregex "FDIVv2f32")>;
1583 // ASIMD FP divide, Q-form, F32
1584 def : InstRW<[A64FXXWriteFDiv], (instrs FDIVv4f32)>;
1585 def : InstRW<[A64FXXWriteFDiv], (instregex "FDIVv4f32")>;
1587 // ASIMD FP divide, Q-form, F64
1588 def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVv2f64)>;
1589 def : InstRW<[A64FXXWriteFDivDP], (instregex "FDIVv2f64")>;
1591 // ASIMD FP max/min, normal, D-form
1592 // ASIMD FP max/min, normal, Q-form
1593 def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMAXv", "^FMAXNMv",
1594                                                "^FMINv", "^FMINNMv")>;
1596 // ASIMD FP max/min, pairwise, D-form
1597 // ASIMD FP max/min, pairwise, Q-form
1598 def : InstRW<[A64FXWrite_ADDP], (instregex "^FMAXPv", "^FMAXNMPv",
1599                                            "^FMINPv", "^FMINNMPv")>;
1601 // ASIMD FP max/min, reduce
1602 def : InstRW<[A64FXWrite_FMAXVVH], (instregex "^FMAXVv", "^FMAXNMVv",
1603                                               "^FMINVv", "^FMINNMVv")>;
1605 // ASIMD FP multiply, D-form, FZ
1606 // ASIMD FP multiply, D-form, no FZ
1607 // ASIMD FP multiply, Q-form, FZ
1608 // ASIMD FP multiply, Q-form, no FZ
1609 def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMULv", "^FMULXv")>;
1610 def : InstRW<[A64FXWrite_FMULXE],
1611             (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
1612 def : InstRW<[A64FXWrite_FMULXE],
1613             (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;
1615 // ASIMD FP multiply accumulate, Dform, FZ
1616 // ASIMD FP multiply accumulate, Dform, no FZ
1617 // ASIMD FP multiply accumulate, Qform, FZ
1618 // ASIMD FP multiply accumulate, Qform, no FZ
1619 def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMLAv", "^FMLSv")>;
1620 def : InstRW<[A64FXWrite_FMULXE],
1621             (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>;
1622 def : InstRW<[A64FXWrite_FMULXE],
1623             (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;
1625 // ASIMD FP negate
1626 def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FNEGv")>;
1628 //--
1629 // 3.14 ASIMD Miscellaneous Instructions
1630 //--
1632 // ASIMD bit reverse
1633 def : InstRW<[A64FXWrite_1Cyc_GI2456], (instregex "^RBITv")>;
1635 // ASIMD bitwise insert, D-form
1636 // ASIMD bitwise insert, Q-form
1637 def : InstRW<[A64FXWrite_BIF],
1638             (instregex "^BIFv", "^BITv", "^BSLv")>;
1640 // ASIMD count, D-form
1641 // ASIMD count, Q-form
1642 def : InstRW<[A64FXWrite_4Cyc_GI0],
1643             (instregex "^CLSv", "^CLZv", "^CNTv")>;
1645 // ASIMD duplicate, gen reg
1646 // ASIMD duplicate, element
1647 def : InstRW<[A64FXWrite_DUPGENERAL], (instregex "^DUPv")>;
1648 def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUP(i8|i16|i32|i64)$")>;
1649 def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUPv.+gpr")>;
1651 // ASIMD extract
1652 def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^EXTv")>;
1654 // ASIMD extract narrow
1655 def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^XTNv")>;
1657 // ASIMD extract narrow, saturating
1658 def : InstRW<[A64FXWrite_6Cyc_GI3],
1659             (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>;
1661 // ASIMD insert, element to element
1662 def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>;
1664 // ASIMD transfer, element to gen reg
1665 def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>;
1667 // ASIMD move, integer immed
1668 def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^MOVIv")>;
1670 // ASIMD move, FP immed
1671 def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMOVv")>;
1673 // ASIMD table lookup, D-form
1674 def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv8i8One")>;
1675 def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv8i8Two")>;
1676 def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv8i8Three")>;
1677 def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv8i8Four")>;
1678 def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv8i8One")>;
1679 def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv8i8Two")>;
1680 def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv8i8Three")>;
1681 def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv8i8Four")>;
1683 // ASIMD table lookup, Q-form
1684 def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv16i8One")>;
1685 def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv16i8Two")>;
1686 def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv16i8Three")>;
1687 def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv16i8Four")>;
1688 def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv16i8One")>;
1689 def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv16i8Two")>;
1690 def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv16i8Three")>;
1691 def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv16i8Four")>;
1693 // ASIMD unzip/zip
1694 def : InstRW<[A64FXWrite_6Cyc_GI0],
1695             (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>;
1697 // ASIMD reciprocal estimate, D-form
1698 // ASIMD reciprocal estimate, Q-form
1699 def : InstRW<[A64FXWrite_4Cyc_GI03],
1700             (instregex "^FRECPEv", "^FRECPXv", "^URECPEv",
1701                        "^FRSQRTEv", "^URSQRTEv")>;
1703 // ASIMD reciprocal step, D-form, FZ
1704 // ASIMD reciprocal step, D-form, no FZ
1705 // ASIMD reciprocal step, Q-form, FZ
1706 // ASIMD reciprocal step, Q-form, no FZ
1707 def : InstRW<[A64FXWrite_9Cyc_GI0], (instregex "^FRECPSv", "^FRSQRTSv")>;
1709 // ASIMD reverse
1710 def : InstRW<[A64FXWrite_4Cyc_GI03],
1711             (instregex "^REV16v", "^REV32v", "^REV64v")>;
1713 // ASIMD table lookup, D-form
1714 // ASIMD table lookup, Q-form
1715 def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TBLv", "^TBXv")>;
1717 // ASIMD transfer, element to word or word
1718 def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>;
1720 // ASIMD transfer, element to gen reg
1721 def : InstRW<[A64FXWrite_SMOV], (instregex "(S|U)MOVv.*")>;
1723 // ASIMD transfer gen reg to element
1724 def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>;
1726 // ASIMD transpose
1727 def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TRN1v", "^TRN2v",
1728                                                  "^UZP1v", "^UZP2v")>;
1730 // ASIMD unzip/zip
1731 def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^ZIP1v", "^ZIP2v")>;
1733 //--
1734 // 3.15 ASIMD Load Instructions
1735 //--
1737 // ASIMD load, 1 element, multiple, 1 reg, D-form
1738 // ASIMD load, 1 element, multiple, 1 reg, Q-form
1739 def : InstRW<[A64FXWrite_8Cyc_GI56],
1740             (instregex "^LD1Onev(8b|4h|2s|1d|2d)$")>;
1741 def : InstRW<[A64FXWrite_11Cyc_GI56],
1742             (instregex "^LD1Onev(16b|8h|4s)$")>;
1743 def : InstRW<[A64FXWrite_LD108, WriteAdr],
1744             (instregex "^LD1Onev(8b|4h|2s|1d|2d)_POST$")>;
1745 def : InstRW<[A64FXWrite_LD109, WriteAdr],
1746             (instregex "^LD1Onev(16b|8h|4s)_POST$")>;
1748 // ASIMD load, 1 element, multiple, 2 reg, D-form
1749 // ASIMD load, 1 element, multiple, 2 reg, Q-form
1750 def : InstRW<[A64FXWrite_LD102],
1751             (instregex "^LD1Twov(8b|4h|2s|1d|2d)$")>;
1752 def : InstRW<[A64FXWrite_LD103],
1753             (instregex "^LD1Twov(16b|8h|4s)$")>;
1754 def : InstRW<[A64FXWrite_LD110, WriteAdr],
1755             (instregex "^LD1Twov(8b|4h|2s|1d|2d)_POST$")>;
1756 def : InstRW<[A64FXWrite_LD111, WriteAdr],
1757             (instregex "^LD1Twov(16b|8h|4s)_POST$")>;
1759 // ASIMD load, 1 element, multiple, 3 reg, D-form
1760 // ASIMD load, 1 element, multiple, 3 reg, Q-form
1761 def : InstRW<[A64FXWrite_LD104],
1762             (instregex "^LD1Threev(8b|4h|2s|1d|2d)$")>;
1763 def : InstRW<[A64FXWrite_LD105],
1764             (instregex "^LD1Threev(16b|8h|4s)$")>;
1765 def : InstRW<[A64FXWrite_LD112, WriteAdr],
1766             (instregex "^LD1Threev(8b|4h|2s|1d|2d)_POST$")>;
1767 def : InstRW<[A64FXWrite_LD113, WriteAdr],
1768             (instregex "^LD1Threev(16b|8h|4s)_POST$")>;
1770 // ASIMD load, 1 element, multiple, 4 reg, D-form
1771 // ASIMD load, 1 element, multiple, 4 reg, Q-form
1772 def : InstRW<[A64FXWrite_LD106],
1773             (instregex "^LD1Fourv(8b|4h|2s|1d|2d)$")>;
1774 def : InstRW<[A64FXWrite_LD107],
1775             (instregex "^LD1Fourv(16b|8h|4s)$")>;
1776 def : InstRW<[A64FXWrite_LD114, WriteAdr],
1777             (instregex "^LD1Fourv(8b|4h|2s|1d|2d)_POST$")>;
1778 def : InstRW<[A64FXWrite_LD115, WriteAdr],
1779             (instregex "^LD1Fourv(16b|8h|4s)_POST$")>;
1781 // ASIMD load, 1 element, one lane, B/H/S
1782 // ASIMD load, 1 element, one lane, D
1783 def : InstRW<[A64FXWrite_LD1I0], (instregex "^LD1i(8|16|32|64)$")>;
1784 def : InstRW<[A64FXWrite_LD1I1, WriteAdr],
1785             (instregex "^LD1i(8|16|32|64)_POST$")>;
1787 // ASIMD load, 1 element, all lanes, D-form, B/H/S
1788 // ASIMD load, 1 element, all lanes, D-form, D
1789 // ASIMD load, 1 element, all lanes, Q-form
1790 def : InstRW<[A64FXWrite_8Cyc_GI03],
1791             (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1792 def : InstRW<[A64FXWrite_LD108, WriteAdr],
1793             (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1795 // ASIMD load, 2 element, multiple, D-form, B/H/S
1796 // ASIMD load, 2 element, multiple, Q-form, D
1797 def : InstRW<[A64FXWrite_LD103],
1798             (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
1799 def : InstRW<[A64FXWrite_LD111, WriteAdr],
1800             (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1802 // ASIMD load, 2 element, one lane, B/H
1803 // ASIMD load, 2 element, one lane, S
1804 // ASIMD load, 2 element, one lane, D
1805 def : InstRW<[A64FXWrite_LD2I0], (instregex "^LD2i(8|16|32|64)$")>;
1806 def : InstRW<[A64FXWrite_LD2I1, WriteAdr],
1807             (instregex "^LD2i(8|16|32|64)_POST$")>;
1809 // ASIMD load, 2 element, all lanes, D-form, B/H/S
1810 // ASIMD load, 2 element, all lanes, D-form, D
1811 // ASIMD load, 2 element, all lanes, Q-form
1812 def : InstRW<[A64FXWrite_LD102],
1813             (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1814 def : InstRW<[A64FXWrite_LD110, WriteAdr],
1815             (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1817 // ASIMD load, 3 element, multiple, D-form, B/H/S
1818 // ASIMD load, 3 element, multiple, Q-form, B/H/S
1819 // ASIMD load, 3 element, multiple, Q-form, D
1820 def : InstRW<[A64FXWrite_LD105],
1821             (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
1822 def : InstRW<[A64FXWrite_LD113, WriteAdr],
1823             (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1825 // ASIMD load, 3 element, one lone, B/H
1826 // ASIMD load, 3 element, one lane, S
1827 // ASIMD load, 3 element, one lane, D
1828 def : InstRW<[A64FXWrite_LD3I0], (instregex "^LD3i(8|16|32|64)$")>;
1829 def : InstRW<[A64FXWrite_LD3I1, WriteAdr],
1830             (instregex "^LD3i(8|16|32|64)_POST$")>;
1832 // ASIMD load, 3 element, all lanes, D-form, B/H/S
1833 // ASIMD load, 3 element, all lanes, D-form, D
1834 // ASIMD load, 3 element, all lanes, Q-form, B/H/S
1835 // ASIMD load, 3 element, all lanes, Q-form, D
1836 def : InstRW<[A64FXWrite_LD104],
1837             (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1838 def : InstRW<[A64FXWrite_LD112, WriteAdr],
1839             (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1841 // ASIMD load, 4 element, multiple, D-form, B/H/S
1842 // ASIMD load, 4 element, multiple, Q-form, B/H/S
1843 // ASIMD load, 4 element, multiple, Q-form, D
1844 def : InstRW<[A64FXWrite_LD107],
1845             (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
1846 def : InstRW<[A64FXWrite_LD115, WriteAdr],
1847             (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1849 // ASIMD load, 4 element, one lane, B/H
1850 // ASIMD load, 4 element, one lane, S
1851 // ASIMD load, 4 element, one lane, D
1852 def : InstRW<[A64FXWrite_LD4I0], (instregex "^LD4i(8|16|32|64)$")>;
1853 def : InstRW<[A64FXWrite_LD4I1, WriteAdr],
1854             (instregex "^LD4i(8|16|32|64)_POST$")>;
1856 // ASIMD load, 4 element, all lanes, D-form, B/H/S
1857 // ASIMD load, 4 element, all lanes, D-form, D
1858 // ASIMD load, 4 element, all lanes, Q-form, B/H/S
1859 // ASIMD load, 4 element, all lanes, Q-form, D
1860 def : InstRW<[A64FXWrite_LD106],
1861             (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1862 def : InstRW<[A64FXWrite_LD114, WriteAdr],
1863             (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1865 //--
1866 // 3.16 ASIMD Store Instructions
1867 //--
1869 // ASIMD store, 1 element, multiple, 1 reg, D-form
1870 // ASIMD store, 1 element, multiple, 1 reg, Q-form
1871 def : InstRW<[A64FXWrite_ST10],
1872             (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1873 def : InstRW<[A64FXWrite_ST14, WriteAdr],
1874             (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1876 // ASIMD store, 1 element, multiple, 2 reg, D-form
1877 // ASIMD store, 1 element, multiple, 2 reg, Q-form
1878 def : InstRW<[A64FXWrite_ST11],
1879             (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1880 def : InstRW<[A64FXWrite_ST15, WriteAdr],
1881             (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1883 // ASIMD store, 1 element, multiple, 3 reg, D-form
1884 // ASIMD store, 1 element, multiple, 3 reg, Q-form
1885 def : InstRW<[A64FXWrite_ST12],
1886             (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1887 def : InstRW<[A64FXWrite_ST16, WriteAdr],
1888             (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1890 // ASIMD store, 1 element, multiple, 4 reg, D-form
1891 // ASIMD store, 1 element, multiple, 4 reg, Q-form
1892 def : InstRW<[A64FXWrite_ST13],
1893             (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1894 def : InstRW<[A64FXWrite_ST17, WriteAdr],
1895             (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1897 // ASIMD store, 1 element, one lane, B/H/S
1898 // ASIMD store, 1 element, one lane, D
1899 def : InstRW<[A64FXWrite_ST10],
1900             (instregex "^ST1i(8|16|32|64)$")>;
1901 def : InstRW<[A64FXWrite_ST14, WriteAdr],
1902             (instregex "^ST1i(8|16|32|64)_POST$")>;
1904 // ASIMD store, 2 element, multiple, D-form, B/H/S
1905 // ASIMD store, 2 element, multiple, Q-form, B/H/S
1906 // ASIMD store, 2 element, multiple, Q-form, D
1907 def : InstRW<[A64FXWrite_ST11],
1908             (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
1909 def : InstRW<[A64FXWrite_ST15, WriteAdr],
1910             (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1912 // ASIMD store, 2 element, one lane, B/H/S
1913 // ASIMD store, 2 element, one lane, D
1914 def : InstRW<[A64FXWrite_ST11],
1915             (instregex "^ST2i(8|16|32|64)$")>;
1916 def : InstRW<[A64FXWrite_ST15, WriteAdr],
1917             (instregex "^ST2i(8|16|32|64)_POST$")>;
1919 // ASIMD store, 3 element, multiple, D-form, B/H/S
1920 // ASIMD store, 3 element, multiple, Q-form, B/H/S
1921 // ASIMD store, 3 element, multiple, Q-form, D
1922 def : InstRW<[A64FXWrite_ST12],
1923             (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
1924 def : InstRW<[A64FXWrite_ST16, WriteAdr],
1925             (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1927 // ASIMD store, 3 element, one lane, B/H
1928 // ASIMD store, 3 element, one lane, S
1929 // ASIMD store, 3 element, one lane, D
1930 def : InstRW<[A64FXWrite_ST12], (instregex "^ST3i(8|16|32|64)$")>;
1931 def : InstRW<[A64FXWrite_ST16, WriteAdr],
1932             (instregex "^ST3i(8|16|32|64)_POST$")>;
1934 // ASIMD store, 4 element, multiple, D-form, B/H/S
1935 // ASIMD store, 4 element, multiple, Q-form, B/H/S
1936 // ASIMD store, 4 element, multiple, Q-form, D
1937 def : InstRW<[A64FXWrite_ST13],
1938             (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
1939 def : InstRW<[A64FXWrite_ST17, WriteAdr],
1940             (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1942 // ASIMD store, 4 element, one lane, B/H
1943 // ASIMD store, 4 element, one lane, S
1944 // ASIMD store, 4 element, one lane, D
1945 def : InstRW<[A64FXWrite_ST13], (instregex "^ST4i(8|16|32|64)$")>;
1946 def : InstRW<[A64FXWrite_ST17, WriteAdr],
1947             (instregex "^ST4i(8|16|32|64)_POST$")>;
1949 // V8.1a Atomics (LSE)
1950 def : InstRW<[A64FXWrite_CAS, WriteAtomic],
1951             (instrs CASB, CASH, CASW, CASX)>;
1953 def : InstRW<[A64FXWrite_CAS, WriteAtomic],
1954             (instrs CASAB, CASAH, CASAW, CASAX)>;
1956 def : InstRW<[A64FXWrite_CAS, WriteAtomic],
1957             (instrs CASLB, CASLH, CASLW, CASLX)>;
1959 def : InstRW<[A64FXWrite_CAS, WriteAtomic],
1960             (instrs CASALB, CASALH, CASALW, CASALX)>;
1962 def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1963             (instrs LDLARB, LDLARH, LDLARW, LDLARX)>;
1965 def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1966             (instrs LDADDB, LDADDH, LDADDW, LDADDX)>;
1968 def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1969             (instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>;
1971 def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1972             (instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>;
1974 def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1975             (instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>;
1977 def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1978             (instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>;
1980 def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1981             (instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>;
1983 def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1984             (instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>;
1986 def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1987             (instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>;
1989 def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1990             (instrs LDEORB, LDEORH, LDEORW, LDEORX)>;
1992 def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1993             (instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>;
1995 def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1996             (instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>;
1998 def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1999             (instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>;
2001 def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2002             (instrs LDSETB, LDSETH, LDSETW, LDSETX)>;
2004 def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2005             (instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>;
2007 def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2008             (instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>;
2010 def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2011             (instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>;
2013 def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2014             (instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX,
2015              LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX,
2016              LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX,
2017              LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>;
2019 def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2020             (instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX,
2021              LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX,
2022              LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX,
2023              LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>;
2025 def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2026             (instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX,
2027              LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX,
2028              LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX,
2029              LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>;
2031 def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2032             (instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX,
2033              LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX,
2034              LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX,
2035              LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>;
2037 def : InstRW<[A64FXWrite_SWP, WriteAtomic],
2038             (instrs SWPB, SWPH, SWPW, SWPX)>;
2040 def : InstRW<[A64FXWrite_SWP, WriteAtomic],
2041             (instrs SWPAB, SWPAH, SWPAW, SWPAX)>;
2043 def : InstRW<[A64FXWrite_SWP, WriteAtomic],
2044             (instrs SWPLB, SWPLH, SWPLW, SWPLX)>;
2046 def : InstRW<[A64FXWrite_SWP, WriteAtomic],
2047             (instrs SWPALB, SWPALH, SWPALW, SWPALX)>;
2049 def : InstRW<[A64FXWrite_STUR, WriteAtomic],
2050             (instrs STLLRB, STLLRH, STLLRW, STLLRX)>;
2052 // SVE instructions
2054 // The modeling method for SVE instructions is more accurate than others.
2055 // TODO: modify the model of other instructions similarly.
2057 def : InstRW<[A64FXWrite_4Cyc_GI0],
2058             (instregex "^AND_ZI", "^CL[SZ]_Z", "^CPY_ZP[mz]I", "^DUP_ZZ?I", "^DUPM_Z",
2059                        "^EOR_ZI", "^ORR_ZI", "^FCM(EQ|GT|GE|LT|LE|NE|UO)_P",
2060                        "^FCPY_Z", "^F(MAX|MIN).*I_", "^NEG_Z", "^[SU](MAX|MIN)_ZI",
2061                        "^SUBR?_ZI")>;
2063 def : InstRW<[A64FXWrite_6Cyc_GI0],
2064             (instregex "^CLAST[AB]_[VZ]", "^COMPACT_Z", "^CPY_ZPmV", "^DUP_ZR",
2065                        "^EXT_Z", "^FDUP_Z", "^INSR_ZV", "^LAST[AB]_V", "^REV_Z",
2066                        "^SPLICE_Z", "^[SU]UNPK(HI|LO)_Z", "^TBL_Z", "^TRN[12]_Z")>;
2068 def : InstRW<[A64FXWrite_9Cyc_GI0],
2069             (instregex "^F(ADD|SUBR?)_.*I_", "^FRECPS_Z", "^FRSQRTS_Z",
2070                        "^INDEX_II_[SD]", "^MUL_ZI")>;
2072 def : InstRW<[A64FXWrite_4Cyc_GI3],
2073             (instregex "^CNT_Z")>;
2075 def : InstRW<[A64FXWrite_4Cyc_GI03],
2076             (instregex "^ABS_Z", "^ADD_Z", "^AND_Z[^I]", "^ASRR?_(WIDE_)?Z",
2077                        "^BIC_Z", "^ADR_[SU]XTW_Z", "^CNOT_Z", "^DEC[BHWD]_Z",
2078                        "^EOR_Z[^I]", "^INC[BHWD]_Z", "^ORR_Z[^I]", "^FABS_Z",
2079                        "^FACG[ET]_P", "^FEXPA_Z", "^F(MAX|MIN)[^V]*Z_",
2080                        "^FNEG_Z", "^FRECP[EX]_Z", "^FRSQRTE_Z", "^FTSSEL_Z",
2081                        "^LS[LR]R?(_WIDE)?_Z", "^NOT_Z", "^RBIT_Z", "^REV[BHW]_Z", "^SABD_Z",
2082                        "^SEL_Z", "^[SU](MAX|MIN)_ZP", "^[SU]Q(INC|DEC)[^P]_Z",
2083                        "^SUBR?_Z[^I]", "^[SU]XT._Z", "^UABD_Z")>;
2085 def : InstRW<[A64FXWrite_9Cyc_GI03      ],
2086             (instregex "^FABD_Z", "^F(ADD|SUBR?)_.*Z_", "^FN?(MAD|MLA|MLS|MSB)_ZP",
2087                        "^FMUL_(ZP|ZZZ_)", "^FMULX_Z", "^FCVT(ZS|ZU)?_Z",
2088                        "^FRINT._Z", "^FSCALE_Z", "^FTMAD_Z", "^FTSMUL_Z",
2089                        "^MAD_Z", "^MLA_Z", "^MLS_Z", "^MSB_Z", "^MUL_ZP",
2090                        "^[SU]CVTF_Z", "^[SU]DOT_ZZZ_", "^[SU]MULH_Z")>;
2092 def : InstRW<[A64FXWrite_3Cyc_GI1],
2093             (instregex "^ANDS?_P", "^BICS?_P", "^BRK.*_P", "^EORS?_P", "^ORRS?_P",
2094                        "^NANDS?_P", "^NORS?_P", "^ORNS?_P", "^PFALSE", "^PNEXT",
2095                        "^PFIRST", "^PTEST", "^PTRUES?", "^PUNPK(HI|LO)",
2096                        "^RDFFRS?", "^REV_P", "^SEL_P", "^TRN[12]_P")>;
2098 def : InstRW<[A64FXWrite_1Cyc_GI24],
2099             (instregex "^ADD[PV]L", "^CNT[BHWD]_X", "^DEC[BHWD]_X", "^INC[BHWD]_X",
2100                        "^RDVLI")>;
2102 def : InstRW<[A64FXWrite_11Cyc_GI5],
2103             (instregex "^LDR_[PZ]XI")>;
2105 def : InstRW<[A64FXWrite_11Cyc_GI56],
2106             (instregex "^LD(NF|FF|NT)?1R?S?[BHSWDQ]")>;
2108 def A64FXWrite_None : SchedWriteRes<[]> {
2110 def : InstRW<[A64FXWrite_None], (instregex "^SETFFR", "^MOVPRFX")>;
2112 def A64FXWrite_FMAIndexed : SchedWriteRes<[A64FXGI03]> {
2113   let Latency = 15;
2114   let NumMicroOps = 2;
2115   let ReleaseAtCycles = [2];
2117 def : InstRW<[A64FXWrite_FMAIndexed], (instregex "^F(MLA|MLS|MUL)_ZZZI")>;
2119 def A64FXWrite_ADR_LSL_Z : SchedWriteRes<[A64FXGI0]> {
2120   let Latency = 5;
2121   let NumMicroOps = 2;
2122   let ReleaseAtCycles = [2];
2124 def : InstRW<[A64FXWrite_ADR_LSL_Z], (instregex "^ADR_LSL_Z")>;
2126 def A64FXWrite_ASRD : SchedWriteRes<[A64FXGI0, A64FXGI01]> {
2127   let Latency = 8;
2128   let NumMicroOps = 2;
2130 def : InstRW<[A64FXWrite_ASRD], (instregex "^ASRD_Z")>;
2132 def A64FXWrite_Reduction4CycB : SchedWriteRes<[A64FXGI03]> {
2133   let Latency = 46;
2134   let NumMicroOps = 10;
2135   let ReleaseAtCycles = [10];
2137 def : InstRW<[A64FXWrite_Reduction4CycB],
2138       (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_B")>;
2140 def A64FXWrite_Reduction4CycH : SchedWriteRes<[A64FXGI03]> {
2141   let Latency = 42;
2142   let NumMicroOps = 9;
2143   let ReleaseAtCycles = [9];
2145 def : InstRW<[A64FXWrite_Reduction4CycH],
2146       (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_H")>;
2148 def A64FXWrite_Reduction4CycS : SchedWriteRes<[A64FXGI03]> {
2149   let Latency = 38;
2150   let NumMicroOps = 8;
2151   let ReleaseAtCycles = [8];
2153 def : InstRW<[A64FXWrite_Reduction4CycS],
2154       (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_S")>;
2156 def A64FXWrite_Reduction4CycD : SchedWriteRes<[A64FXGI03]> {
2157   let Latency = 34;
2158   let NumMicroOps = 7;
2159   let ReleaseAtCycles = [7];
2161 def : InstRW<[A64FXWrite_Reduction4CycD],
2162       (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_D")>;
2164 def A64FXWrite_CLAST_R : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2165   let Latency = 29;
2167 def : InstRW<[A64FXWrite_CLAST_R], (instregex "^CLAST[AB]_R")>;
2169 def A64FXWrite_CMP : SchedWriteRes<[A64FXGI0, A64FXGI1]> {
2170   let Latency = 4;
2172 def : InstRW<[A64FXWrite_CMP], (instregex "^CMP.*_P")>;
2174 def A64FXWrite_CNTP : SchedWriteRes<[A64FXGI1, A64FXGI2]> {
2175   let Latency = 6;
2177 def : InstRW<[A64FXWrite_CNTP], (instregex "^CNTP_X")>;
2179 def A64FXWrite_CPYScalar : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
2180   let Latency = 8;
2182 def : InstRW<[A64FXWrite_CPYScalar], (instregex "^CPY_ZPmR")>;
2184 def A64FXWrite_CTERM : SchedWriteRes<[A64FXGI24]> {
2185   let Latency = 2;
2186   let ReleaseAtCycles = [2];
2188 def : InstRW<[A64FXWrite_CTERM], (instregex "^CTERM")>;
2190 def A64FXWrite_INCPScalar : SchedWriteRes<[A64FXGI1, A64FXGI2, A64FXGI4]> {
2191   let Latency = 7;
2192   let NumMicroOps = 2;
2194 def : InstRW<[A64FXWrite_INCPScalar], (instregex "^DECP_X", "^INCP_X")>;
2196 def A64FXWrite_INCPVector : SchedWriteRes<[A64FXGI0, A64FXGI1]> {
2197   let Latency = 12;
2199 def : InstRW<[A64FXWrite_INCPVector], (instregex "^DECP_Z", "^INCP_Z")>;
2201 def A64FXWrite_FADDVH : SchedWriteRes<[A64FXGI03]> {
2202   let Latency = 75;
2203   let NumMicroOps = 11;
2204   let ReleaseAtCycles = [11];
2206 def : InstRW<[A64FXWrite_FADDVH], (instrs FADDV_VPZ_H)>;
2208 def A64FXWrite_FADDVS : SchedWriteRes<[A64FXGI03]> {
2209   let Latency = 60;
2210   let NumMicroOps = 9;
2211   let ReleaseAtCycles = [9];
2213 def : InstRW<[A64FXWrite_FADDVS], (instrs FADDV_VPZ_S)>;
2215 def A64FXWrite_FADDVD : SchedWriteRes<[A64FXGI03]> {
2216   let Latency = 45;
2217   let NumMicroOps = 7;
2218   let ReleaseAtCycles = [7];
2220 def : InstRW<[A64FXWrite_FADDVD], (instrs FADDV_VPZ_D)>;
2222 def A64FXWrite_FADDAH : SchedWriteRes<[A64FXGI03]> {
2223   let Latency = 468;
2224   let NumMicroOps = 63;
2225   let ReleaseAtCycles = [63];
2227 def : InstRW<[A64FXWrite_FADDAH], (instrs FADDA_VPZ_H)>;
2229 def A64FXWrite_FADDAS : SchedWriteRes<[A64FXGI03]> {
2230   let Latency = 228;
2231   let NumMicroOps = 31;
2232   let ReleaseAtCycles = [31];
2234 def : InstRW<[A64FXWrite_FADDAS], (instrs FADDA_VPZ_S)>;
2236 def A64FXWrite_FADDAD : SchedWriteRes<[A64FXGI03]> {
2237   let Latency = 108;
2238   let NumMicroOps = 15;
2239   let ReleaseAtCycles = [15];
2241 def : InstRW<[A64FXWrite_FADDAD], (instrs FADDA_VPZ_D)>;
2243 def A64FXWrite_FCADDZ : SchedWriteRes<[A64FXGI0, A64FXGI3]> {
2244   let Latency = 15;
2245   let NumMicroOps = 2;
2247 def : InstRW<[A64FXWrite_FCADDZ], (instregex "^FCADD_Z")>;
2249 def A64FXWrite_FCMLAZ : SchedWriteRes<[A64FXGI03]> {
2250   let Latency = 15;
2251   let NumMicroOps = 3;
2252   let ReleaseAtCycles = [3];
2254 def : InstRW<[A64FXWrite_FCMLAZ], (instregex "^FCMLA_Z")>;
2256 def A64FXWrite_FDIVH : SchedWriteRes<[A64FXGI0]> {
2257   let Latency = 134;
2258   let ReleaseAtCycles = [134];
2260 def : InstRW<[A64FXWrite_FDIVH], (instregex "^F(DIVR?|SQRT)_Z.*_H")>;
2262 def A64FXWrite_FDIVS : SchedWriteRes<[A64FXGI0]> {
2263   let Latency = 98;
2264   let ReleaseAtCycles = [98];
2266 def : InstRW<[A64FXWrite_FDIVS], (instregex "^F(DIVR?|SQRT)_Z.*_S")>;
2268 def A64FXWrite_FDIVD : SchedWriteRes<[A64FXGI0]> {
2269   let Latency = 154;
2270   let ReleaseAtCycles = [154];
2272 def : InstRW<[A64FXWrite_FDIVD], (instregex "^F(DIVR?|SQRT)_Z.*_D")>;
2274 def A64FXWrite_FMAXVH : SchedWriteRes<[A64FXGI03]> {
2275   let Latency = 54;
2276   let NumMicroOps = 11;
2277   let ReleaseAtCycles = [11];
2279 def : InstRW<[A64FXWrite_FMAXVH], (instregex "^F(MAX|MIN)(NM)?V_VPZ_H")>;
2281 def A64FXWrite_FMAXVS : SchedWriteRes<[A64FXGI03]> {
2282   let Latency = 44;
2283   let NumMicroOps = 9;
2284   let ReleaseAtCycles = [9];
2286 def : InstRW<[A64FXWrite_FMAXVS], (instregex "^F(MAX|MIN)(NM)?V_VPZ_S")>;
2288 def A64FXWrite_FMAXVD : SchedWriteRes<[A64FXGI03]> {
2289   let Latency = 34;
2290   let NumMicroOps = 7;
2291   let ReleaseAtCycles = [7];
2293 def : InstRW<[A64FXWrite_FMAXVH], (instregex "^F(MAX|MIN)(NM)?V_VPZ_D")>;
2295 def A64FXWrite_INDEX_RI_BH : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
2296   let Latency = 17;
2297   let NumMicroOps = 2;
2298   let ReleaseAtCycles = [2, 2];
2300 def : InstRW<[A64FXWrite_INDEX_RI_BH], (instregex "^INDEX_(RI|IR)_[BH]")>;
2302 def A64FXWrite_INDEX_RI_SD : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
2303   let Latency = 13;
2304   let NumMicroOps = 1;
2306 def : InstRW<[A64FXWrite_INDEX_RI_SD], (instregex "^INDEX_(RI|IR)_[SD]")>;
2308 def A64FXWrite_INDEX_II_BH : SchedWriteRes<[A64FXGI0]> {
2309   let Latency = 13;
2310   let NumMicroOps = 2;
2311   let ReleaseAtCycles = [2];
2313 def : InstRW<[A64FXWrite_INDEX_II_BH], (instregex "^INDEX_II_[BH]")>;
2315 def A64FXWrite_INDEX_RR_BH : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI3]> {
2316   let Latency = 17;
2317   let NumMicroOps = 3;
2318   let ReleaseAtCycles = [2, 2, 1];
2320 def : InstRW<[A64FXWrite_INDEX_RR_BH], (instregex "^INDEX_RR_[BH]")>;
2322 def A64FXWrite_INDEX_RR_SD : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
2323   let Latency = 17;
2324   let NumMicroOps = 2;
2325   let ReleaseAtCycles = [2, 1];
2327 def : InstRW<[A64FXWrite_INDEX_RR_SD], (instregex "^INDEX_RR_[SD]")>;
2329 def A64FXWrite_INSR_ZR : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
2330   let Latency = 10;
2332 def : InstRW<[A64FXWrite_INSR_ZR], (instregex "^INSR_ZR")>;
2334 def A64FXWrite_LAST_R : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2335   let Latency = 25;
2337 def : InstRW<[A64FXWrite_CLAST_R], (instregex "^LAST[AB]_R")>;
2339 def A64FXWrite_GLD_S_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
2340   let Latency = 19;
2341   let ReleaseAtCycles = [2, 4, 4];
2343 def : InstRW<[A64FXWrite_GLD_S_ZI],
2344       (instregex "^GLD(FF)?1W_IMM", "^GLD(FF)?1S?[BHW]_S_IMM")>;
2346 def A64FXWrite_GLD_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
2347   let Latency = 16;
2348   let ReleaseAtCycles = [1, 2, 2];
2350 def : InstRW<[A64FXWrite_GLD_D_ZI],
2351       (instregex "^GLD(FF)?1D_IMM", "^GLD(FF)?1S?[BHW]_D_IMM")>;
2353 def A64FXWrite_GLD_S_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
2354   let Latency = 23;
2355   let ReleaseAtCycles = [2, 1, 4, 4];
2357 def : InstRW<[A64FXWrite_GLD_S_RZ],
2358       (instregex "^GLD(FF)?1W_[^DI]", "^GLD(FF)?1S?[BHW]_S_[^I]")>;
2360 def A64FXWrite_GLD_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
2361   let Latency = 20;
2362   let ReleaseAtCycles = [1, 1, 2, 2];
2364 def : InstRW<[A64FXWrite_GLD_D_RZ],
2365       (instregex "^GLD(FF)?1D_[^I]", "^GLD(FF)?1D$", "^GLD(FF)?1S?[BHW]_D_[^I]",
2366                  "^GLD(FF)?1S?[BHW]_D$")>;
2368 def A64FXWrite_LD2_BH : SchedWriteRes<[A64FXGI56]> {
2369   let Latency = 15;
2370   let NumMicroOps = 3;
2371   let ReleaseAtCycles = [9];
2373 def : InstRW<[A64FXWrite_LD2_BH], (instregex "^LD2[BH]")>;
2375 def A64FXWrite_LD2_WD_IMM : SchedWriteRes<[A64FXGI56]> {
2376   let Latency = 11;
2377   let NumMicroOps = 2;
2378   let ReleaseAtCycles = [2];
2380 def : InstRW<[A64FXWrite_LD2_WD_IMM], (instregex "^LD2[WD]_IMM")>;
2382 def A64FXWrite_LD2_WD : SchedWriteRes<[A64FXGI56]> {
2383   let Latency = 12;
2384   let NumMicroOps = 3;
2385   let ReleaseAtCycles = [3];
2387 def : InstRW<[A64FXWrite_LD2_WD], (instregex "^LD2[WD]$")>;
2389 def A64FXWrite_LD3_BH : SchedWriteRes<[A64FXGI56]> {
2390   let Latency = 15;
2391   let NumMicroOps = 4;
2392   let ReleaseAtCycles = [13];
2394 def : InstRW<[A64FXWrite_LD3_BH], (instregex "^LD3[BH]")>;
2396 def A64FXWrite_LD3_WD_IMM : SchedWriteRes<[A64FXGI56]> {
2397   let Latency = 11;
2398   let NumMicroOps = 3;
2399   let ReleaseAtCycles = [3];
2401 def : InstRW<[A64FXWrite_LD3_WD_IMM], (instregex "^LD3[WD]_IMM")>;
2403 def A64FXWrite_LD3_WD : SchedWriteRes<[A64FXGI56]> {
2404   let Latency = 12;
2405   let NumMicroOps = 4;
2406   let ReleaseAtCycles = [4];
2408 def : InstRW<[A64FXWrite_LD3_WD], (instregex "^LD3[WD]$")>;
2410 def A64FXWrite_LD4_BH : SchedWriteRes<[A64FXGI56]> {
2411   let Latency = 15;
2412   let NumMicroOps = 5;
2413   let ReleaseAtCycles = [17];
2415 def : InstRW<[A64FXWrite_LD4_BH], (instregex "^LD4[BH]")>;
2417 def A64FXWrite_LD4_WD_IMM : SchedWriteRes<[A64FXGI56]> {
2418   let Latency = 11;
2419   let NumMicroOps = 4;
2420   let ReleaseAtCycles = [4];
2422 def : InstRW<[A64FXWrite_LD4_WD_IMM], (instregex "^LD4[WD]_IMM")>;
2424 def A64FXWrite_LD4_WD : SchedWriteRes<[A64FXGI56]> {
2425   let Latency = 12;
2426   let NumMicroOps = 5;
2427   let ReleaseAtCycles = [5];
2429 def : InstRW<[A64FXWrite_LD4_WD], (instregex "^LD4[WD]$")>;
2431 def A64FXWrite_PRF : SchedWriteRes<[A64FXGI56]> {
2433 def : InstRW<[A64FXWrite_PRF], (instregex "^PRF._PR")>;
2435 def A64FXWrite_PRF_W_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI56]> {
2436   let ReleaseAtCycles = [2, 1, 4];
2438 def : InstRW<[A64FXWrite_PRF_W_RZ], (instregex "^PRF._S_[^P]")>;
2440 def A64FXWrite_PRF_W_ZI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2441   let ReleaseAtCycles = [2, 4];
2443 def : InstRW<[A64FXWrite_PRF_W_ZI], (instregex "^PRF._S_PZI")>;
2445 def A64FXWrite_PRF_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI56]> {
2446   let ReleaseAtCycles = [1, 1, 2];
2448 def : InstRW<[A64FXWrite_PRF_D_RZ], (instregex "^PRF._D_[^P]")>;
2450 def A64FXWrite_PRF_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2451   let ReleaseAtCycles = [1, 2];
2453 def : InstRW<[A64FXWrite_PRF_D_ZI], (instregex "^PRF._D_PZI")>;
2455 def A64FXWrite_SDIV_S : SchedWriteRes<[A64FXGI0]> {
2456   let Latency = 114;
2457   let ReleaseAtCycles = [114];
2459 def : InstRW<[A64FXWrite_SDIV_S], (instregex "^[SU]DIVR?.*_S")>;
2461 def A64FXWrite_SDIV_D : SchedWriteRes<[A64FXGI0]> {
2462   let Latency = 178;
2463   let ReleaseAtCycles = [178];
2465 def : InstRW<[A64FXWrite_SDIV_D], (instregex "^[SU]DIVR?.*_D")>;
2467 def A64FXWrite_SDOT_I : SchedWriteRes<[A64FXGI0, A64FXGI3]> {
2468   let Latency = 15;
2469   let NumMicroOps = 2;
2471 def : InstRW<[A64FXWrite_SDOT_I], (instregex "^[SU]DOT_ZZZI")>;
2473 def A64FXWrite_SQINC_Scalar : SchedWriteRes<[A64FXGI24]> {
2474   let Latency = 2;
2475   let ReleaseAtCycles = [2];
2477 def : InstRW<[A64FXWrite_SQINC_Scalar], (instregex "^[SU]Q(INC|DEC)[BHWD]_[WX]")>;
2479 def A64FXWrite_SQINCP_X : SchedWriteRes<[A64FXGI24, A64FXGI3]> {
2480   let Latency = 6;
2481   let NumMicroOps = 2;
2482   let ReleaseAtCycles = [3, 1];
2484 def : InstRW<[A64FXWrite_SQINCP_X], (instregex "^[SU]Q(INC|DEC)P_[WX]")>;
2486 def A64FXWrite_SQINCP_Z : SchedWriteRes<[A64FXGI24, A64FXGI3]> {
2487   let Latency = 12;
2489 def : InstRW<[A64FXWrite_SQINCP_Z], (instregex "^[SU]Q(INC|DEC)P_Z")>;
2491 def A64FXWrite_ST1 : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2492   let Latency = 11;
2494 def : InstRW<[A64FXWrite_ST1], (instregex "^ST(NT)?1[BHWD]")>;
2496 def A64FXWrite_SST1_W_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
2497   let Latency = 20;
2498   let NumMicroOps = 8;
2499   let ReleaseAtCycles = [8, 8, 8, 8];
2501 def : InstRW<[A64FXWrite_SST1_W_RZ],
2502       (instregex "^SST1[BH]_S(_[^I]|$)", "^SST1W(_[^ID]|$)")>;
2504 def A64FXWrite_SST1_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
2505   let Latency = 20;
2506   let NumMicroOps = 4;
2507   let ReleaseAtCycles = [4, 4, 4, 4];
2509 def : InstRW<[A64FXWrite_SST1_D_RZ],
2510       (instregex "^SST1[BHW]_D(_[^I]|$)", "^SST1D(_[^I]|$)")>;
2512 def A64FXWrite_SST1_W_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
2513   let Latency = 16;
2514   let NumMicroOps = 8;
2515   let ReleaseAtCycles = [12, 8, 8];
2517 def : InstRW<[A64FXWrite_SST1_W_ZI],
2518       (instregex "^SST1[BH]_S_I", "^SST1W_I")>;
2520 def A64FXWrite_SST1_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
2521   let Latency = 16;
2522   let NumMicroOps = 4;
2523   let ReleaseAtCycles = [4, 4, 4];
2525 def : InstRW<[A64FXWrite_SST1_D_ZI],
2526       (instregex "^SST1[BHW]_D_I", "^SST1D_I")>;
2528 def A64FXWrite_ST2_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2529   let Latency = 12;
2530   let NumMicroOps = 3;
2531   let ReleaseAtCycles = [8, 9];
2533 def : InstRW<[A64FXWrite_ST2_BH], (instregex "^ST2[BH]")>;
2535 def A64FXWrite_ST2_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2536   let Latency = 11;
2537   let NumMicroOps = 2;
2538   let ReleaseAtCycles = [2, 2];
2540 def : InstRW<[A64FXWrite_ST2_WD_RI], (instregex "^ST2[WD]$")>;
2542 def A64FXWrite_ST2_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2543   let Latency = 12;
2544   let NumMicroOps = 3;
2545   let ReleaseAtCycles = [2, 3];
2547 def : InstRW<[A64FXWrite_ST2_WD_RR], (instregex "^ST2[WD]_I")>;
2549 def A64FXWrite_ST3_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2550   let Latency = 15;
2551   let NumMicroOps = 4;
2552   let ReleaseAtCycles = [12, 13];
2554 def : InstRW<[A64FXWrite_ST3_BH], (instregex "^ST3[BH]")>;
2556 def A64FXWrite_ST3_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2557   let Latency = 11;
2558   let NumMicroOps = 3;
2559   let ReleaseAtCycles = [3, 3];
2561 def : InstRW<[A64FXWrite_ST3_WD_RI], (instregex "^ST3[WD]$")>;
2563 def A64FXWrite_ST3_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2564   let Latency = 12;
2565   let NumMicroOps = 4;
2566   let ReleaseAtCycles = [3, 4];
2568 def : InstRW<[A64FXWrite_ST3_WD_RR], (instregex "^ST3[WD]_I")>;
2570 def A64FXWrite_ST4_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2571   let Latency = 15;
2572   let NumMicroOps = 5;
2573   let ReleaseAtCycles = [16, 17];
2575 def : InstRW<[A64FXWrite_ST4_BH], (instregex "^ST4[BH]")>;
2577 def A64FXWrite_ST4_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2578   let Latency = 11;
2579   let NumMicroOps = 4;
2580   let ReleaseAtCycles = [4, 4];
2582 def : InstRW<[A64FXWrite_ST4_WD_RI], (instregex "^ST4[WD]$")>;
2584 def A64FXWrite_ST4_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2585   let Latency = 12;
2586   let NumMicroOps = 5;
2587   let ReleaseAtCycles = [4, 5];
2589 def : InstRW<[A64FXWrite_ST4_WD_RR], (instregex "^ST4[WD]_I")>;
2591 def A64FXWrite_STR_P : SchedWriteRes<[A64FXGI3, A64FXGI5]> {
2592   let Latency = 11;
2594 def : InstRW<[A64FXWrite_STR_P], (instrs STR_PXI)>;
2596 def A64FXWrite_STR_Z : SchedWriteRes<[A64FXGI0, A64FXGI5]> {
2597   let Latency = 11;
2599 def : InstRW<[A64FXWrite_STR_Z], (instrs STR_ZXI)>;
2601 def A64FXWrite_WHILE : SchedWriteRes<[A64FXGI3, A64FXGI5]> {
2602   let Latency = 4;
2604 def : InstRW<[A64FXWrite_WHILE], (instregex "^WHILEL._P")>;
2606 def A64FXWrite_WRFFR : SchedWriteRes<[A64FXGI3, A64FXGI5]> {
2607   let Latency = 3;
2608   let NumMicroOps = 2;
2610 def : InstRW<[A64FXWrite_WRFFR], (instrs WRFFR)>;
2612 } // SchedModel = A64FXModel