[ARM] MVE compare vector splat combine
[llvm-complete.git] / lib / Target / ARM / ARMScheduleR52.td
blobd1cbf754b5a1bf0f0f8c97c3e583ce1cb927ba24
1 //==- ARMScheduleR52.td - Cortex-R52 Scheduling Definitions -*- tablegen -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the SchedRead/Write data for the ARM Cortex-R52 processor.
11 //===----------------------------------------------------------------------===//
13 // ===---------------------------------------------------------------------===//
14 // The Cortex-R52 is an in-order pipelined superscalar microprocessor with
15 // a 8 stage pipeline. It can issue maximum two instructions in each cycle.
16 // There are two ALUs, one LDST, one MUL  and a non-pipelined integer DIV.
17 // A number of forwarding paths enable results of computations to be input
18 // to subsequent operations before they are written to registers.
19 // This scheduler is a MachineScheduler. See TargetSchedule.td for details.
21 def CortexR52Model : SchedMachineModel {
22   let MicroOpBufferSize = 0;  // R52 is in-order processor
23   let IssueWidth = 2;         // 2 micro-ops dispatched per cycle
24   let LoadLatency = 1;        // Optimistic, assuming no misses
25   let MispredictPenalty = 8;  // A branch direction mispredict, including PFU
26   let CompleteModel = 0;      // Covers instructions applicable to cortex-r52.
30 //===----------------------------------------------------------------------===//
31 // Define each kind of processor resource and number available.
33 // Modeling each pipeline as a ProcResource using the BufferSize = 0 since
34 // Cortex-R52 is an in-order processor.
36 def R52UnitALU    : ProcResource<2> { let BufferSize = 0; } // Int ALU
37 def R52UnitMAC    : ProcResource<1> { let BufferSize = 0; } // Int MAC
38 def R52UnitDiv    : ProcResource<1> { let BufferSize = 0; } // Int Division
39 def R52UnitLd     : ProcResource<1> { let BufferSize = 0; } // Load/Store
40 def R52UnitB      : ProcResource<1> { let BufferSize = 0; } // Branch
41 def R52UnitFPALU  : ProcResource<2> { let BufferSize = 0; } // FP ALU
42 def R52UnitFPMUL  : ProcResource<2> { let BufferSize = 0; } // FP MUL
43 def R52UnitFPDIV  : ProcResource<1> { let BufferSize = 0; } // FP DIV
45 // Cortex-R52 specific SchedReads
46 def R52Read_ISS   : SchedRead;
47 def R52Read_EX1   : SchedRead;
48 def R52Read_EX2   : SchedRead;
49 def R52Read_WRI   : SchedRead;
50 def R52Read_F0    : SchedRead; // F0 maps to ISS stage of integer pipe
51 def R52Read_F1    : SchedRead;
52 def R52Read_F2    : SchedRead;
55 //===----------------------------------------------------------------------===//
56 // Subtarget-specific SchedWrite types which map ProcResources and set latency.
58 let SchedModel = CortexR52Model in {
60 // ALU - Write occurs in Late EX2 (independent of whether shift was required)
61 def : WriteRes<WriteALU, [R52UnitALU]> { let Latency = 3; }
62 def : WriteRes<WriteALUsi, [R52UnitALU]> { let Latency = 3; }
63 def : WriteRes<WriteALUsr, [R52UnitALU]> { let Latency = 3; }
64 def : WriteRes<WriteALUSsr, [R52UnitALU]> { let Latency = 3; }
66 // Compares
67 def : WriteRes<WriteCMP, [R52UnitALU]> { let Latency = 0; }
68 def : WriteRes<WriteCMPsi, [R52UnitALU]> { let Latency = 0; }
69 def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; }
71 // Multiply - aliased to sub-target specific later
73 // Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2)
74 def : WriteRes<WriteDIV, [R52UnitDiv]> {
75   let Latency = 8; let ResourceCycles = [8]; // non-pipelined
78 // Branches  - LR written in Late EX2
79 def : WriteRes<WriteBr, [R52UnitB]> { let Latency = 0; }
80 def : WriteRes<WriteBrL, [R52UnitB]> { let Latency = 0; }
81 def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; }
83 // Misc
84 def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
86 // Integer pipeline by-passes
87 def : ReadAdvance<ReadALU, 1>;   // Operand needed in EX1 stage
88 def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS
89 def : ReadAdvance<ReadMUL, 0>;
90 def : ReadAdvance<ReadMAC, 0>;
92 // Floating-point. Map target-defined SchedReadWrites to subtarget
93 def : WriteRes<WriteFPMUL32, [R52UnitFPMUL]> { let Latency = 6; }
95 def : WriteRes<WriteFPMUL64, [R52UnitFPMUL, R52UnitFPMUL]> {
96   let Latency = 6;
99 def : WriteRes<WriteFPMAC32, [R52UnitFPMUL, R52UnitFPALU]> {
100   let Latency = 11;     // as it is internally two insns (MUL then ADD)
103 def : WriteRes<WriteFPMAC64, [R52UnitFPMUL, R52UnitFPMUL,
104                               R52UnitFPALU, R52UnitFPALU]> {
105   let Latency = 11;
108 def : WriteRes<WriteFPDIV32, [R52UnitDiv]> {
109   let Latency = 7;          // FP div takes fixed #cycles
110   let ResourceCycles = [7]; // is not pipelined
113 def : WriteRes<WriteFPDIV64, [R52UnitDiv]> {
114   let Latency = 17;
115   let ResourceCycles = [17];
118 def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; }
119 def : WriteRes<WriteFPSQRT64, [R52UnitDiv]> { let Latency = 17; }
121 // Overriden via InstRW for this processor.
122 def : WriteRes<WriteVST1, []>;
123 def : WriteRes<WriteVST2, []>;
124 def : WriteRes<WriteVST3, []>;
125 def : WriteRes<WriteVST4, []>;
127 def : ReadAdvance<ReadFPMUL, 1>; // mul operand read in F1
128 def : ReadAdvance<ReadFPMAC, 1>; // fp-mac operand read in F1
130 //===----------------------------------------------------------------------===//
131 // Subtarget-specific SchedReadWrites.
133 // Forwarding information - based on when an operand is read
134 def : ReadAdvance<R52Read_ISS, 0>;
135 def : ReadAdvance<R52Read_EX1, 1>;
136 def : ReadAdvance<R52Read_EX2, 2>;
137 def : ReadAdvance<R52Read_F0, 0>;
138 def : ReadAdvance<R52Read_F1, 1>;
139 def : ReadAdvance<R52Read_F2, 2>;
142 // Cortex-R52 specific SchedWrites for use with InstRW
143 def R52WriteMAC        : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; }
144 def R52WriteMACHi      : SchedWriteRes<[R52UnitMAC]> {
145   let Latency = 4; let NumMicroOps = 0;
147 def R52WriteDIV        : SchedWriteRes<[R52UnitDiv]> {
148   let Latency = 8; let ResourceCycles = [8]; // not pipelined
150 def R52WriteLd         : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
151 def R52WriteST         : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
152 def R52WriteAdr        : SchedWriteRes<[]> { let Latency = 0; }
153 def R52WriteCC         : SchedWriteRes<[]> { let Latency = 0; }
154 def R52WriteALU_EX1    : SchedWriteRes<[R52UnitALU]> { let Latency = 2; }
155 def R52WriteALU_EX2    : SchedWriteRes<[R52UnitALU]> { let Latency = 3; }
156 def R52WriteALU_WRI    : SchedWriteRes<[R52UnitALU]> { let Latency = 4; }
158 def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; }
159 def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; }
161 // Alias generics to sub-target specific
162 def : SchedAlias<WriteMUL16, R52WriteMAC>;
163 def : SchedAlias<WriteMUL32, R52WriteMAC>;
164 def : SchedAlias<WriteMUL64Lo, R52WriteMAC>;
165 def : SchedAlias<WriteMUL64Hi, R52WriteMACHi>;
166 def : SchedAlias<WriteMAC16, R52WriteMAC>;
167 def : SchedAlias<WriteMAC32, R52WriteMAC>;
168 def : SchedAlias<WriteMAC64Lo, R52WriteMAC>;
169 def : SchedAlias<WriteMAC64Hi, R52WriteMACHi>;
170 def : SchedAlias<WritePreLd, R52WriteLd>;
171 def : SchedAlias<WriteLd, R52WriteLd>;
172 def : SchedAlias<WriteST, R52WriteST>;
174 def R52WriteFPALU_F3   : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; }
175 def R52Write2FPALU_F3  : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
176   let Latency = 4;
178 def R52WriteFPALU_F4   : SchedWriteRes<[R52UnitFPALU]> { let Latency = 5; }
179 def R52Write2FPALU_F4  : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
180   let Latency = 5;
182 def R52WriteFPALU_F5   : SchedWriteRes<[R52UnitFPALU]> { let Latency = 6; }
183 def R52Write2FPALU_F5  : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
184   let Latency = 6;
186 def R52WriteFPMUL_F5   : SchedWriteRes<[R52UnitFPMUL]> { let Latency = 6; }
187 def R52Write2FPMUL_F5  : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL]> {
188   let Latency = 6;
190 def R52WriteFPMAC_F5   : SchedWriteRes<[R52UnitFPMUL, R52UnitFPALU]> {
191   let Latency = 11;     // as it is internally two insns (MUL then ADD)
193 def R52Write2FPMAC_F5  : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL,
194                                          R52UnitFPALU, R52UnitFPALU]> {
195   let Latency = 11;
198 def R52WriteFPLd_F4    : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
199 def R52WriteFPST_F4    : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
201 //===----------------------------------------------------------------------===//
202 // Floating-point. Map target defined SchedReadWrites to processor specific ones
204 def : SchedAlias<WriteFPCVT,   R52WriteFPALU_F5>;
205 def : SchedAlias<WriteFPMOV, R52WriteFPALU_F3>;
206 def : SchedAlias<WriteFPALU32, R52WriteFPALU_F5>;
207 def : SchedAlias<WriteFPALU64, R52WriteFPALU_F5>;
209 //===----------------------------------------------------------------------===//
210 // Subtarget-specific overrides. Map opcodes to list of SchedReadWrites types.
212 def : InstRW<[WriteALU], (instrs COPY)>;
214 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS],
215       (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
216       "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH", "t2UXTB16")>;
218 def : InstRW<[R52WriteALU_EX1, R52Read_ISS],
219       (instregex "MOVCCi32imm", "MOVi32imm", "t2MOVCCi", "t2MOVi")>;
220 def : InstRW<[R52WriteALU_EX2, R52Read_EX1],
221       (instregex "MOV_ga_pcrel$")>;
222 def : InstRW<[R52WriteLd,R52Read_ISS],
223       (instregex "MOV_ga_pcrel_ldr")>;
225 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "SEL", "t2SEL")>;
227 def : InstRW< [R52WriteALU_EX2, R52Read_ISS, R52Read_ISS],
228       (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
229       "(t|t2)UBFX", "(t|t2)SBFX")>;
231 // Saturating arithmetic
232 def : InstRW< [R52WriteALU_WRI, R52Read_EX1, R52Read_EX1],
233       (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
234       "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
235       "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
236       "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
237       "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
238       "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX","t2ABS")>;
240 // Parallel arithmetic
241 def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
242       (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
243       "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
244       "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
245       "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
247 // Flag setting.
248 def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
249       (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
250       "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
251       "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
252       "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
253       "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
254       "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
256 // Sum of Absolute Difference
257 def : InstRW< [R52WriteALU_WRI, R52Read_ISS, R52Read_ISS, R52Read_ISS],
258       (instregex "USAD8", "t2USAD8", "USADA8", "t2USADA8") >;
260 // Integer Multiply
261 def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS],
262       (instregex "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
263       "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDX", "t2MUL",
264       "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
265       "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
267 // Multiply Accumulate
268 // Even for 64-bit accumulation (or Long), the single MAC is used (not ALUs).
269 // The store pipeline is used partly for 64-bit operations.
270 def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS],
271       (instregex "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
272       "t2MLA", "t2MLS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", "t2SMMLSR",
273       "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX",
274       "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
275       "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
276       "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT",
277       "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX",
278       "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$",
279       "SMLAL", "UMLAL", "SMLALBT",
280       "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
281       "UMAAL", "t2SMLAL", "t2UMLAL",
282       "t2SMLALBT", "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX",
283       "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>;
285 def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS],
286       (instregex "t2SDIV", "t2UDIV")>;
288 // Loads (except POST) with SHL > 2, or ror, require 2 extra cycles.
289 // However, that's non-trivial to specify, so we keep it uniform
290 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS],
291       (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
292       "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "t2LDREX",
293       "tLDR[BH](r|i|spi|pci|pciASM)", "tLDR(r|i|spi|pci|pciASM)",
294       "LDRH$",  "PICLDR$", "PICLDR(H|B)$", "LDRcp$",
295       "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
296       "t2LDRpci_pic", "tLDRS(B|H)", "t2LDRDi8", "LDRD$", "LDA", "t2LDA")>;
297 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_ISS],
298       (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
299       "LDRBT_POST$", "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
300       "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T",
301       "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
302       "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)?",
303       "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
305 def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "MOVS?sr", "t2MOVS?sr")>;
306 def : InstRW<[R52WriteALU_WRI, R52Read_EX2], (instregex "MOVT", "t2MOVT")>;
308 def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "AD(C|D)S?ri", "ANDS?ri",
309       "BICS?ri", "CLZ", "EORri", "MVNS?r", "ORRri", "RSBS?ri", "RSCri", "SBCri",
310       "t2AD(C|D)S?ri", "t2ANDS?ri", "t2BICS?ri","t2CLZ", "t2EORri", "t2MVN",
311       "t2ORRri", "t2RSBS?ri", "t2SBCri")>;
313 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "AD(C|D)S?rr",
314       "ANDS?rr", "BICS?rr", "CRC", "EORrr", "ORRrr", "RSBrr", "RSCrr", "SBCrr",
315       "t2AD(C|D)S?rr", "t2ANDS?rr", "t2BICS?rr", "t2CRC", "t2EORrr", "t2SBCrr")>;
317 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], (instregex "AD(C|D)S?rsi",
318       "ANDS?rsi", "BICS?rsi", "EORrsi", "ORRrsi", "RSBrsi", "RSCrsi", "SBCrsi",
319       "t2AD(C|D)S?rs", "t2ANDS?rs", "t2BICS?rs", "t2EORrs", "t2ORRrs", "t2RSBrs", "t2SBCrs")>;
321 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS, R52Read_ISS],
322       (instregex "AD(C|D)S?rsr", "ANDS?rsr", "BICS?rsr", "EORrsr", "MVNS?sr",
323       "ORRrsr", "RSBrsr", "RSCrsr", "SBCrsr")>;
325 def : InstRW<[R52WriteALU_EX1],
326     (instregex "ADR", "MOVsi", "MVNS?s?i", "t2MOVS?si")>;
328 def : InstRW<[R52WriteALU_EX1, R52Read_ISS], (instregex "ASRi", "RORS?i")>;
329 def : InstRW<[R52WriteALU_EX1, R52Read_ISS, R52Read_ISS],
330       (instregex "ASRr", "RORS?r", "LSR", "LSL")>;
332 def : InstRW<[R52WriteCC, R52Read_EX1], (instregex "CMPri", "CMNri")>;
333 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_EX1], (instregex "CMPrr", "CMNzrr")>;
334 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS], (instregex "CMPrsi", "CMNzrsi")>;
335 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS, R52Read_ISS], (instregex "CMPrsr", "CMNzrsr")>;
337 def : InstRW<[R52WriteALU_EX2, R52Read_ISS],
338       (instregex "t2LDC", "RBIT", "REV", "REV16", "REVSH", "RRX")>;
340 def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>;
342 def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>;
343 def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>;
345 // Integer Load, Multiple.
346 foreach Lat = 3-25 in {
347   def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> {
348     let Latency = Lat;
349   }
350   def R52WriteILDM#Lat#CyNo : SchedWriteRes<[]> {
351     let Latency = Lat;
352     let NumMicroOps = 0;
353   }
355 foreach NAddr = 1-16 in {
356   def R52ILDMAddr#NAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == "#NAddr>;
358 def R52WriteILDMAddrNoWB : SchedWriteRes<[R52UnitLd]> { let Latency = 0; }
359 def R52WriteILDMAddrWB : SchedWriteRes<[R52UnitLd]>;
360 def R52WriteILDM : SchedWriteVariant<[
361     SchedVar<R52ILDMAddr2Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy]>,
363     SchedVar<R52ILDMAddr3Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
364                                  R52WriteILDM6Cy]>,
365     SchedVar<R52ILDMAddr4Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
366                                  R52WriteILDM6Cy, R52WriteILDM7Cy]>,
368     SchedVar<R52ILDMAddr5Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
369                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
370                                  R52WriteILDM8Cy]>,
371     SchedVar<R52ILDMAddr6Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
372                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
373                                  R52WriteILDM8Cy, R52WriteILDM9Cy]>,
375     SchedVar<R52ILDMAddr7Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
376                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
377                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
378                                  R52WriteILDM10Cy]>,
379     SchedVar<R52ILDMAddr8Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
380                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
381                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
382                                  R52WriteILDM10Cy, R52WriteILDM11Cy]>,
384     SchedVar<R52ILDMAddr9Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
385                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
386                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
387                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
388                                  R52WriteILDM12Cy]>,
389     SchedVar<R52ILDMAddr10Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
390                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
391                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
392                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
393                                  R52WriteILDM12Cy, R52WriteILDM13Cy]>,
395     SchedVar<R52ILDMAddr11Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
396                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
397                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
398                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
399                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
400                                  R52WriteILDM14Cy]>,
401     SchedVar<R52ILDMAddr12Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
402                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
403                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
404                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
405                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
406                                  R52WriteILDM14Cy, R52WriteILDM15Cy]>,
408     SchedVar<R52ILDMAddr13Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
409                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
410                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
411                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
412                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
413                                  R52WriteILDM14Cy, R52WriteILDM15Cy,
414                                  R52WriteILDM16Cy]>,
415     SchedVar<R52ILDMAddr14Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
416                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
417                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
418                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
419                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
420                                  R52WriteILDM14Cy, R52WriteILDM15Cy,
421                                  R52WriteILDM16Cy, R52WriteILDM17Cy]>,
423     SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
424                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
425                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
426                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
427                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
428                                  R52WriteILDM14Cy, R52WriteILDM15Cy,
429                                  R52WriteILDM16Cy, R52WriteILDM17Cy,
430                                  R52WriteILDM18Cy]>,
431     SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
432                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
433                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
434                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
435                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
436                                  R52WriteILDM14Cy, R52WriteILDM15Cy,
437                                  R52WriteILDM16Cy, R52WriteILDM17Cy,
438                                  R52WriteILDM18Cy, R52WriteILDM19Cy]>,
440 // Unknown number of registers, just use resources for two registers.
441     SchedVar<NoSchedPred,      [R52WriteILDM4Cy, R52WriteILDM5Cy,
442                                 R52WriteILDM6CyNo, R52WriteILDM7CyNo,
443                                 R52WriteILDM8CyNo, R52WriteILDM9CyNo,
444                                 R52WriteILDM10CyNo, R52WriteILDM11CyNo,
445                                 R52WriteILDM12CyNo, R52WriteILDM13CyNo,
446                                 R52WriteILDM14CyNo, R52WriteILDM15CyNo,
447                                 R52WriteILDM16CyNo, R52WriteILDM17CyNo,
448                                 R52WriteILDM18Cy, R52WriteILDM19Cy]>
449 ]> { let Variadic=1; }
451 // Integer Store, Multiple
452 def R52WriteIStIncAddr : SchedWriteRes<[R52UnitLd]> {
453   let Latency = 4;
454   let NumMicroOps = 2;
456 foreach NumAddr = 1-16 in {
457   def R52WriteISTM#NumAddr : WriteSequence<[R52WriteIStIncAddr], NumAddr>;
459 def R52WriteISTM : SchedWriteVariant<[
460     SchedVar<R52ILDMAddr2Pred, [R52WriteISTM2]>,
461     SchedVar<R52ILDMAddr3Pred, [R52WriteISTM3]>,
462     SchedVar<R52ILDMAddr4Pred, [R52WriteISTM4]>,
463     SchedVar<R52ILDMAddr5Pred, [R52WriteISTM5]>,
464     SchedVar<R52ILDMAddr6Pred, [R52WriteISTM6]>,
465     SchedVar<R52ILDMAddr7Pred, [R52WriteISTM7]>,
466     SchedVar<R52ILDMAddr8Pred, [R52WriteISTM8]>,
467     SchedVar<R52ILDMAddr9Pred, [R52WriteISTM9]>,
468     SchedVar<R52ILDMAddr10Pred,[R52WriteISTM10]>,
469     SchedVar<R52ILDMAddr11Pred,[R52WriteISTM11]>,
470     SchedVar<R52ILDMAddr12Pred,[R52WriteISTM12]>,
471     SchedVar<R52ILDMAddr13Pred,[R52WriteISTM13]>,
472     SchedVar<R52ILDMAddr14Pred,[R52WriteISTM14]>,
473     SchedVar<R52ILDMAddr15Pred,[R52WriteISTM15]>,
474     SchedVar<R52ILDMAddr16Pred,[R52WriteISTM16]>,
475     // Unknow number of registers, just use resources for two registers.
476     SchedVar<NoSchedPred,      [R52WriteISTM2]>
479 def : InstRW<[R52WriteILDM, R52Read_ISS],
480       (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
481       "(t|sys)LDM(IA|DA|DB|IB)$")>;
482 def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
483       (instregex "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
484 def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
485         (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "tPOP")>;
487 // Integer Store, Single Element
488 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
489       (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX", "SRS", "t2SRS",
490       "t2SRSDB", "t2STREX", "t2STREXB", "t2STREXD", "t2STREXH", "t2STR(i12|i8|s)$",
491       "RFE", "t2RFE", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
493 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
494       (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
495       "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
496       "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
497       "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
499 // Integer Store, Dual
500 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
501     (instregex "STRD$", "t2STRDi8", "STL", "t2STL")>;
502 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
503     (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
505 def : InstRW<[R52WriteISTM, R52Read_ISS, R52Read_EX2],
506     (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
507 def : InstRW<[R52WriteISTM, R52WriteAdr, R52Read_ISS, R52Read_EX2],
508     (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
509     "tPUSH")>;
511 // LDRLIT pseudo instructions, they expand to LDR + PICADD
512 def : InstRW<[R52WriteLd],
513       (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel$")>;
514 // LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR
515 def : InstRW<[R52WriteLd], (instregex "LDRLIT_ga_pcrel_ldr")>;
519 //===----------------------------------------------------------------------===//
520 // VFP, Floating Point Support
521 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fd|hd)")>;
522 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fq|hq)")>;
524 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(D|S|H)")>;
525 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(fd|hd)")>;
526 def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VABS(fq|hq)")>;
528 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fd|hd)")>;
529 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fq|hq)")>;
531 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)$")>;
532 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>;
534 def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>;
535 def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>;
538 //===----------------------------------------------------------------------===//
539 // Neon Support
541 // vector multiple load stores
542 foreach NumAddr = 1-16 in {
543   def R52LMAddrPred#NumAddr :
544     SchedPredicate<"MI->getNumOperands() == "#NumAddr>;
546 foreach Lat = 1-32 in {
547   def R52WriteLM#Lat#Cy : SchedWriteRes<[]> {
548     let Latency = Lat;
549   }
551 foreach Num = 1-32 in { // reserve LdSt resource, no dual-issue
552   def R52ReserveLd#Num#Cy : SchedWriteRes<[R52UnitLd]> {
553     let Latency = 0;
554     let NumMicroOps = Num;
555     let ResourceCycles = [Num];
556   }
558 def R52WriteVLDM : SchedWriteVariant<[
559   // 1 D reg
560   SchedVar<R52LMAddrPred1,  [R52WriteLM5Cy,
561                               R52ReserveLd5Cy]>,
562   SchedVar<R52LMAddrPred2,  [R52WriteLM5Cy,
563                               R52ReserveLd5Cy]>,
565   // 2 D reg
566   SchedVar<R52LMAddrPred3,  [R52WriteLM5Cy, R52WriteLM6Cy,
567                               R52ReserveLd6Cy]>,
568   SchedVar<R52LMAddrPred4,  [R52WriteLM5Cy, R52WriteLM6Cy,
569                               R52ReserveLd6Cy]>,
571   // 3 D reg
572   SchedVar<R52LMAddrPred5,  [R52WriteLM5Cy, R52WriteLM6Cy,
573                               R52WriteLM7Cy,
574                               R52ReserveLd4Cy]>,
575   SchedVar<R52LMAddrPred6,  [R52WriteLM5Cy, R52WriteLM6Cy,
576                               R52WriteLM7Cy,
577                               R52ReserveLd7Cy]>,
579   // 4 D reg
580   SchedVar<R52LMAddrPred7,  [R52WriteLM5Cy, R52WriteLM6Cy,
581                               R52WriteLM7Cy, R52WriteLM8Cy,
582                               R52ReserveLd8Cy]>,
583   SchedVar<R52LMAddrPred8,  [R52WriteLM5Cy, R52WriteLM6Cy,
584                               R52WriteLM7Cy, R52WriteLM8Cy,
585                               R52ReserveLd8Cy]>,
587   // 5 D reg
588   SchedVar<R52LMAddrPred9,  [R52WriteLM5Cy, R52WriteLM6Cy,
589                               R52WriteLM7Cy, R52WriteLM8Cy,
590                               R52WriteLM9Cy,
591                               R52ReserveLd9Cy]>,
592   SchedVar<R52LMAddrPred10, [R52WriteLM5Cy, R52WriteLM6Cy,
593                               R52WriteLM7Cy, R52WriteLM8Cy,
594                               R52WriteLM9Cy,
595                               R52ReserveLd9Cy]>,
597   // 6 D reg
598   SchedVar<R52LMAddrPred11, [R52WriteLM5Cy, R52WriteLM6Cy,
599                               R52WriteLM7Cy, R52WriteLM8Cy,
600                               R52WriteLM9Cy, R52WriteLM10Cy,
601                               R52ReserveLd10Cy]>,
602   SchedVar<R52LMAddrPred12, [R52WriteLM5Cy, R52WriteLM6Cy,
603                               R52WriteLM7Cy, R52WriteLM8Cy,
604                               R52WriteLM9Cy, R52WriteLM10Cy,
605                               R52ReserveLd10Cy]>,
607   // 7 D reg
608   SchedVar<R52LMAddrPred13, [R52WriteLM5Cy, R52WriteLM6Cy,
609                               R52WriteLM7Cy, R52WriteLM8Cy,
610                               R52WriteLM9Cy, R52WriteLM10Cy,
611                               R52WriteLM11Cy,
612                               R52ReserveLd11Cy]>,
613   SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
614                               R52WriteLM7Cy, R52WriteLM8Cy,
615                               R52WriteLM9Cy, R52WriteLM10Cy,
616                               R52WriteLM11Cy,
617                               R52ReserveLd11Cy]>,
619   // 8 D reg
620   SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
621                               R52WriteLM7Cy, R52WriteLM8Cy,
622                               R52WriteLM9Cy, R52WriteLM10Cy,
623                               R52WriteLM11Cy, R52WriteLM12Cy,
624                               R52ReserveLd12Cy]>,
625   SchedVar<R52LMAddrPred15, [R52WriteLM5Cy, R52WriteLM6Cy,
626                               R52WriteLM7Cy, R52WriteLM8Cy,
627                               R52WriteLM9Cy, R52WriteLM10Cy,
628                               R52WriteLM11Cy, R52WriteLM12Cy,
629                               R52ReserveLd12Cy]>,
630   // unknown number of reg.
631   SchedVar<NoSchedPred,      [R52WriteLM5Cy, R52WriteLM6Cy,
632                               R52WriteLM7Cy, R52WriteLM8Cy,
633                               R52WriteLM9Cy, R52WriteLM10Cy,
634                               R52WriteLM11Cy, R52WriteLM12Cy,
635                               R52ReserveLd5Cy]>
636 ]> { let Variadic=1;}
638 // variable stores. Cannot dual-issue
639 def R52WriteSTM5  : SchedWriteRes<[R52UnitLd]> {
640   let Latency = 5;
641   let NumMicroOps = 2;
642   let ResourceCycles = [1];
644 def R52WriteSTM6  : SchedWriteRes<[R52UnitLd]> {
645   let Latency = 6;
646   let NumMicroOps = 4;
647   let ResourceCycles = [2];
649 def R52WriteSTM7  : SchedWriteRes<[R52UnitLd]> {
650   let Latency = 7;
651   let NumMicroOps = 6;
652   let ResourceCycles = [3];
654 def R52WriteSTM8  : SchedWriteRes<[R52UnitLd]> {
655   let Latency = 8;
656   let NumMicroOps = 8;
657   let ResourceCycles = [4];
659 def R52WriteSTM9  : SchedWriteRes<[R52UnitLd]> {
660   let Latency = 9;
661   let NumMicroOps = 10;
662   let ResourceCycles = [5];
664 def R52WriteSTM10 : SchedWriteRes<[R52UnitLd]> {
665   let Latency = 10;
666   let NumMicroOps = 12;
667   let ResourceCycles = [6];
669 def R52WriteSTM11 : SchedWriteRes<[R52UnitLd]> {
670   let Latency = 11;
671   let NumMicroOps = 14;
672   let ResourceCycles = [7];
674 def R52WriteSTM12 : SchedWriteRes<[R52UnitLd]> {
675   let Latency = 12;
676   let NumMicroOps = 16;
677   let ResourceCycles = [8];
679 def R52WriteSTM13 : SchedWriteRes<[R52UnitLd]> {
680   let Latency = 13;
681   let NumMicroOps = 18;
682   let ResourceCycles = [9];
684 def R52WriteSTM14 : SchedWriteRes<[R52UnitLd]> {
685   let Latency = 14;
686   let NumMicroOps = 20;
687   let ResourceCycles = [10];
689 def R52WriteSTM15 : SchedWriteRes<[R52UnitLd]> {
690   let Latency = 15;
691   let NumMicroOps = 22;
692   let ResourceCycles = [11];
695 def R52WriteSTM : SchedWriteVariant<[
696   SchedVar<R52LMAddrPred1, [R52WriteSTM5]>,
697   SchedVar<R52LMAddrPred2, [R52WriteSTM5]>,
698   SchedVar<R52LMAddrPred3, [R52WriteSTM6]>,
699   SchedVar<R52LMAddrPred4, [R52WriteSTM6]>,
700   SchedVar<R52LMAddrPred5, [R52WriteSTM7]>,
701   SchedVar<R52LMAddrPred6, [R52WriteSTM7]>,
702   SchedVar<R52LMAddrPred7, [R52WriteSTM8]>,
703   SchedVar<R52LMAddrPred8, [R52WriteSTM8]>,
704   SchedVar<R52LMAddrPred9,  [R52WriteSTM9]>,
705   SchedVar<R52LMAddrPred10, [R52WriteSTM9]>,
706   SchedVar<R52LMAddrPred11, [R52WriteSTM10]>,
707   SchedVar<R52LMAddrPred12, [R52WriteSTM10]>,
708   SchedVar<R52LMAddrPred13, [R52WriteSTM11]>,
709   SchedVar<R52LMAddrPred14, [R52WriteSTM11]>,
710   SchedVar<R52LMAddrPred15, [R52WriteSTM12]>,
711   SchedVar<R52LMAddrPred16, [R52WriteSTM12]>,
712   // unknown number of registers, just use resources for two
713   SchedVar<NoSchedPred,      [R52WriteSTM6]>
716 // Vector Load/Stores. Can issue only in slot-0. Can dual-issue with
717 // another instruction in slot-1, but only in the last issue.
718 def : WriteRes<WriteVLD1, [R52UnitLd]> { let Latency = 5;}
719 def : WriteRes<WriteVLD2, [R52UnitLd]> {
720   let Latency = 6;
721   let NumMicroOps = 3;
722   let ResourceCycles = [2];
723   let SingleIssue = 1;
725 def : WriteRes<WriteVLD3, [R52UnitLd]> {
726   let Latency = 7;
727   let NumMicroOps = 5;
728   let ResourceCycles = [3];
729   let SingleIssue = 1;
731 def : WriteRes<WriteVLD4, [R52UnitLd]> {
732   let Latency = 8;
733   let NumMicroOps = 7;
734   let ResourceCycles = [4];
735   let SingleIssue = 1;
737 def R52WriteVST1Mem  : SchedWriteRes<[R52UnitLd]> {
738   let Latency = 5;
739   let NumMicroOps = 1;
740   let ResourceCycles = [1];
742 def R52WriteVST2Mem  : SchedWriteRes<[R52UnitLd]> {
743   let Latency = 6;
744   let NumMicroOps = 3;
745   let ResourceCycles = [2];
747 def R52WriteVST3Mem  : SchedWriteRes<[R52UnitLd]> {
748   let Latency = 7;
749   let NumMicroOps = 5;
750   let ResourceCycles = [3];
752 def R52WriteVST4Mem  : SchedWriteRes<[R52UnitLd]> {
753   let Latency = 8;
754   let NumMicroOps = 7;
755   let ResourceCycles = [4];
757 def R52WriteVST5Mem  : SchedWriteRes<[R52UnitLd]> {
758   let Latency = 9;
759   let NumMicroOps = 9;
760   let ResourceCycles = [5];
764 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v8i8|v4i16|v2i32)")>;
765 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v16i8|v8i16|v4i32)")>;
766 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABAL(u|s)(v8i16|v4i32|v2i64)")>;
768 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v8i8|v4i16|v2i32)")>;
769 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v16i8|v8i16|v4i32)")>;
770 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABDL(u|s)(v16i8|v8i16|v4i32)")>;
772 def : InstRW<[R52Write2FPALU_F4, R52Read_F1], (instregex "VABS(v16i8|v8i16|v4i32)")>;
774 def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2],
775                                (instregex "(VADD|VSUB)(v8i8|v4i16|v2i32|v1i64)")>;
776 def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2],
777                                 (instregex "(VADD|VSUB)(v16i8|v8i16|v4i32|v2i64)")>;
778 def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
779                                (instregex "(VADDHN|VRADDHN|VSUBHN|VRSUBHN)(v8i8|v4i16|v2i32)")>;
781 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1],
782                                             (instregex "VADDL", "VADDW", "VSUBL", "VSUBW")>;
784 def : InstRW<[R52WriteFPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)d")>;
785 def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)q")>;
787 def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>;
788 def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
790 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)d")>;
791 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)q")>;
793 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1],
794       (instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>;
795 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
796       (instregex "VCVT", "VSITO", "VUITO", "VTO")>;
798 def : InstRW<[R52WriteFPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)d")>;
799 def : InstRW<[R52Write2FPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)q")>;
800 def : InstRW<[R52WriteFPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)d")>;
801 def : InstRW<[R52Write2FPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)q")>;
803 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTd(8|16|32)", "VSEL")>;
804 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTq(8|16|32|64)")>;
806 def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)d")>;
807 def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)q")>;
809 def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v8i8|v4i16|v2i32)")>;
810 def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v16i8|v8i16|v4i32)")>;
812 def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
813 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>;
814 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VORR", "VORN", "VREV")>;
815 def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>;
816 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>;
817 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>;
818 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>;
819 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VQABS(v8i8|v4i16|v2i32|v1i64)")>;
820 def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VQABS(v16i8|v8i16|v4i32|v2i64)")>;
821 def : InstRW<[R52WriteFPALU_F5, R52Read_F2, R52Read_F2],
822                   (instregex "(VQADD|VQSUB)(u|s)(v8i8|v4i16|v2i32|v1i64)")>;
823 def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
824                   (instregex "(VQADD|VQSUB)(u|s)(v16i8|v8i16|v4i32|v2i64)")>;
825 def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMLAL", "VQDMLSL")>;
826 def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMUL","VQRDMUL")>;
827 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
828                  (instregex "VQMOVN", "VQNEG", "VQSHL", "VQSHRN")>;
829 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VRSHR", "VRSHRN", "VTB")>;
830 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
832 //---
833 // VSTx. Vector Stores
834 //---
835 // 1-element structure store
836 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)$")>;
837 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)$")>;
838 def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)T$")>;
839 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Q$")>;
840 def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudo$")>;
841 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudo$")>;
843 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)$")>;
844 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNdAsm_(8|16|32)$")>;
845 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo$")>;
847 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)wb")>;
848 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)wb")>;
849 def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Twb")>;
850 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Qwb")>;
851 def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudoWB")>;
852 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudoWB")>;
854 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)_UPD")>;
855 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
856 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo_UPD")>;
858 // 2-element structure store
859 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)$")>;
860 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)$")>;
861 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)Pseudo$")>;
863 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)$")>;
864 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNdAsm_(8|16|32)$")>;
865 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo$")>;
866 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)$")>;
867 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNqAsm_(16|32)$")>;
868 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo$")>;
870 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)wb")>;
871 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)wb")>;
872 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)PseudoWB")>;
874 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)_UPD")>;
875 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
876 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo_UPD")>;
877 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)_UPD")>;
878 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNqWB_(fixed|register)_Asm_(16|32)")>;
879 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo_UPD")>;
881 // 3-element structure store
882 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)$")>;
883 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)Asm_(8|16|32)$")>;
884 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3d(8|16|32)(oddP|P)seudo$")>;
886 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)$")>;
887 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNdAsm_(8|16|32)$")>;
888 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo$")>;
889 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)$")>;
890 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNqAsm_(16|32)$")>;
891 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo$")>;
893 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)_UPD$")>;
894 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)WB_(fixed|register)_Asm_(8|16|32)$")>;
895 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
897 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)_UPD$")>;
898 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNdWB_(fixed|register)_Asm_(8|16|32)")>;
899 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo_UPD$")>;
900 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)_UPD$")>;
901 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNqWB_(fixed|register)_Asm_(16|32)$")>;
902 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo_UPD$")>;
904 // 4-element structure store
905 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)$")>;
906 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)Asm_(8|16|32)$")>;
907 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4d(8|16|32)Pseudo$")>;
909 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)$")>;
910 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNdAsm_(8|16|32)$")>;
911 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo$")>;
912 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)$")>;
913 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNqAsm_(16|32)$")>;
914 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo$")>;
916 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)_UPD")>;
917 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
918 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
920 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)_UPD")>;
921 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNdWB_(fixed|register)_Asm_(8|16|32)")>;
922 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo_UPD")>;
923 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)_UPD")>;
924 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNqWB_(fixed|register)_Asm_(16|32)")>;
925 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo_UPD")>;
927 } // R52 SchedModel