1 //==- ARMScheduleR52.td - Cortex-R52 Scheduling Definitions -*- tablegen -*-=//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the SchedRead/Write data for the ARM Cortex-R52 processor.
11 //===----------------------------------------------------------------------===//
13 // ===---------------------------------------------------------------------===//
14 // The Cortex-R52 is an in-order pipelined superscalar microprocessor with
15 // a 8 stage pipeline. It can issue maximum two instructions in each cycle.
16 // There are two ALUs, one LDST, one MUL and a non-pipelined integer DIV.
17 // A number of forwarding paths enable results of computations to be input
18 // to subsequent operations before they are written to registers.
19 // This scheduler is a MachineScheduler. See TargetSchedule.td for details.
21 def CortexR52Model : SchedMachineModel {
22 let MicroOpBufferSize = 0; // R52 is in-order processor
23 let IssueWidth = 2; // 2 micro-ops dispatched per cycle
24 let LoadLatency = 1; // Optimistic, assuming no misses
25 let MispredictPenalty = 8; // A branch direction mispredict, including PFU
26 let CompleteModel = 0; // Covers instructions applicable to cortex-r52.
30 //===----------------------------------------------------------------------===//
31 // Define each kind of processor resource and number available.
33 // Modeling each pipeline as a ProcResource using the BufferSize = 0 since
34 // Cortex-R52 is an in-order processor.
36 def R52UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU
37 def R52UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC
38 def R52UnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division
39 def R52UnitLd : ProcResource<1> { let BufferSize = 0; } // Load/Store
40 def R52UnitB : ProcResource<1> { let BufferSize = 0; } // Branch
41 def R52UnitFPALU : ProcResource<2> { let BufferSize = 0; } // FP ALU
42 def R52UnitFPMUL : ProcResource<2> { let BufferSize = 0; } // FP MUL
43 def R52UnitFPDIV : ProcResource<1> { let BufferSize = 0; } // FP DIV
45 // Cortex-R52 specific SchedReads
46 def R52Read_ISS : SchedRead;
47 def R52Read_EX1 : SchedRead;
48 def R52Read_EX2 : SchedRead;
49 def R52Read_WRI : SchedRead;
50 def R52Read_F0 : SchedRead; // F0 maps to ISS stage of integer pipe
51 def R52Read_F1 : SchedRead;
52 def R52Read_F2 : SchedRead;
55 //===----------------------------------------------------------------------===//
56 // Subtarget-specific SchedWrite types which map ProcResources and set latency.
58 let SchedModel = CortexR52Model in {
60 // ALU - Write occurs in Late EX2 (independent of whether shift was required)
61 def : WriteRes<WriteALU, [R52UnitALU]> { let Latency = 3; }
62 def : WriteRes<WriteALUsi, [R52UnitALU]> { let Latency = 3; }
63 def : WriteRes<WriteALUsr, [R52UnitALU]> { let Latency = 3; }
64 def : WriteRes<WriteALUSsr, [R52UnitALU]> { let Latency = 3; }
67 def : WriteRes<WriteCMP, [R52UnitALU]> { let Latency = 0; }
68 def : WriteRes<WriteCMPsi, [R52UnitALU]> { let Latency = 0; }
69 def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; }
71 // Multiply - aliased to sub-target specific later
73 // Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2)
74 def : WriteRes<WriteDIV, [R52UnitDiv]> {
75 let Latency = 8; let ResourceCycles = [8]; // non-pipelined
78 // Branches - LR written in Late EX2
79 def : WriteRes<WriteBr, [R52UnitB]> { let Latency = 0; }
80 def : WriteRes<WriteBrL, [R52UnitB]> { let Latency = 0; }
81 def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; }
84 def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
86 // Integer pipeline by-passes
87 def : ReadAdvance<ReadALU, 1>; // Operand needed in EX1 stage
88 def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS
89 def : ReadAdvance<ReadMUL, 0>;
90 def : ReadAdvance<ReadMAC, 0>;
92 // Floating-point. Map target-defined SchedReadWrites to subtarget
93 def : WriteRes<WriteFPMUL32, [R52UnitFPMUL]> { let Latency = 6; }
95 def : WriteRes<WriteFPMUL64, [R52UnitFPMUL, R52UnitFPMUL]> {
99 def : WriteRes<WriteFPMAC32, [R52UnitFPMUL, R52UnitFPALU]> {
100 let Latency = 11; // as it is internally two insns (MUL then ADD)
103 def : WriteRes<WriteFPMAC64, [R52UnitFPMUL, R52UnitFPMUL,
104 R52UnitFPALU, R52UnitFPALU]> {
108 def : WriteRes<WriteFPDIV32, [R52UnitDiv]> {
109 let Latency = 7; // FP div takes fixed #cycles
110 let ResourceCycles = [7]; // is not pipelined
113 def : WriteRes<WriteFPDIV64, [R52UnitDiv]> {
115 let ResourceCycles = [17];
118 def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; }
119 def : WriteRes<WriteFPSQRT64, [R52UnitDiv]> { let Latency = 17; }
121 // Overriden via InstRW for this processor.
122 def : WriteRes<WriteVST1, []>;
123 def : WriteRes<WriteVST2, []>;
124 def : WriteRes<WriteVST3, []>;
125 def : WriteRes<WriteVST4, []>;
127 def : ReadAdvance<ReadFPMUL, 1>; // mul operand read in F1
128 def : ReadAdvance<ReadFPMAC, 1>; // fp-mac operand read in F1
130 //===----------------------------------------------------------------------===//
131 // Subtarget-specific SchedReadWrites.
133 // Forwarding information - based on when an operand is read
134 def : ReadAdvance<R52Read_ISS, 0>;
135 def : ReadAdvance<R52Read_EX1, 1>;
136 def : ReadAdvance<R52Read_EX2, 2>;
137 def : ReadAdvance<R52Read_F0, 0>;
138 def : ReadAdvance<R52Read_F1, 1>;
139 def : ReadAdvance<R52Read_F2, 2>;
142 // Cortex-R52 specific SchedWrites for use with InstRW
143 def R52WriteMAC : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; }
144 def R52WriteMACHi : SchedWriteRes<[R52UnitMAC]> {
145 let Latency = 4; let NumMicroOps = 0;
147 def R52WriteDIV : SchedWriteRes<[R52UnitDiv]> {
148 let Latency = 8; let ResourceCycles = [8]; // not pipelined
150 def R52WriteLd : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
151 def R52WriteST : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
152 def R52WriteAdr : SchedWriteRes<[]> { let Latency = 0; }
153 def R52WriteCC : SchedWriteRes<[]> { let Latency = 0; }
154 def R52WriteALU_EX1 : SchedWriteRes<[R52UnitALU]> { let Latency = 2; }
155 def R52WriteALU_EX2 : SchedWriteRes<[R52UnitALU]> { let Latency = 3; }
156 def R52WriteALU_WRI : SchedWriteRes<[R52UnitALU]> { let Latency = 4; }
158 def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; }
159 def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; }
161 // Alias generics to sub-target specific
162 def : SchedAlias<WriteMUL16, R52WriteMAC>;
163 def : SchedAlias<WriteMUL32, R52WriteMAC>;
164 def : SchedAlias<WriteMUL64Lo, R52WriteMAC>;
165 def : SchedAlias<WriteMUL64Hi, R52WriteMACHi>;
166 def : SchedAlias<WriteMAC16, R52WriteMAC>;
167 def : SchedAlias<WriteMAC32, R52WriteMAC>;
168 def : SchedAlias<WriteMAC64Lo, R52WriteMAC>;
169 def : SchedAlias<WriteMAC64Hi, R52WriteMACHi>;
170 def : SchedAlias<WritePreLd, R52WriteLd>;
171 def : SchedAlias<WriteLd, R52WriteLd>;
172 def : SchedAlias<WriteST, R52WriteST>;
174 def R52WriteFPALU_F3 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; }
175 def R52Write2FPALU_F3 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
178 def R52WriteFPALU_F4 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 5; }
179 def R52Write2FPALU_F4 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
182 def R52WriteFPALU_F5 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 6; }
183 def R52Write2FPALU_F5 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
186 def R52WriteFPMUL_F5 : SchedWriteRes<[R52UnitFPMUL]> { let Latency = 6; }
187 def R52Write2FPMUL_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL]> {
190 def R52WriteFPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPALU]> {
191 let Latency = 11; // as it is internally two insns (MUL then ADD)
193 def R52Write2FPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL,
194 R52UnitFPALU, R52UnitFPALU]> {
198 def R52WriteFPLd_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
199 def R52WriteFPST_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
201 //===----------------------------------------------------------------------===//
202 // Floating-point. Map target defined SchedReadWrites to processor specific ones
204 def : SchedAlias<WriteFPCVT, R52WriteFPALU_F5>;
205 def : SchedAlias<WriteFPMOV, R52WriteFPALU_F3>;
206 def : SchedAlias<WriteFPALU32, R52WriteFPALU_F5>;
207 def : SchedAlias<WriteFPALU64, R52WriteFPALU_F5>;
209 //===----------------------------------------------------------------------===//
210 // Subtarget-specific overrides. Map opcodes to list of SchedReadWrites types.
212 def : InstRW<[WriteALU], (instrs COPY)>;
214 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS],
215 (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
216 "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH", "t2UXTB16")>;
218 def : InstRW<[R52WriteALU_EX1, R52Read_ISS],
219 (instregex "MOVCCi32imm", "MOVi32imm", "t2MOVCCi", "t2MOVi")>;
220 def : InstRW<[R52WriteALU_EX2, R52Read_EX1],
221 (instregex "MOV_ga_pcrel$")>;
222 def : InstRW<[R52WriteLd,R52Read_ISS],
223 (instregex "MOV_ga_pcrel_ldr")>;
225 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "SEL", "t2SEL")>;
227 def : InstRW< [R52WriteALU_EX2, R52Read_ISS, R52Read_ISS],
228 (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
229 "(t|t2)UBFX", "(t|t2)SBFX")>;
231 // Saturating arithmetic
232 def : InstRW< [R52WriteALU_WRI, R52Read_EX1, R52Read_EX1],
233 (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
234 "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
235 "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
236 "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
237 "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
238 "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX","t2ABS")>;
240 // Parallel arithmetic
241 def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
242 (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
243 "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
244 "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
245 "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
248 def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
249 (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
250 "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
251 "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
252 "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
253 "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
254 "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
256 // Sum of Absolute Difference
257 def : InstRW< [R52WriteALU_WRI, R52Read_ISS, R52Read_ISS, R52Read_ISS],
258 (instregex "USAD8", "t2USAD8", "USADA8", "t2USADA8") >;
261 def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS],
262 (instregex "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
263 "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDX", "t2MUL",
264 "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
265 "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
267 // Multiply Accumulate
268 // Even for 64-bit accumulation (or Long), the single MAC is used (not ALUs).
269 // The store pipeline is used partly for 64-bit operations.
270 def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS],
271 (instregex "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
272 "t2MLA", "t2MLS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", "t2SMMLSR",
273 "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX",
274 "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
275 "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
276 "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT",
277 "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX",
278 "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$",
279 "SMLAL", "UMLAL", "SMLALBT",
280 "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
281 "UMAAL", "t2SMLAL", "t2UMLAL",
282 "t2SMLALBT", "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX",
283 "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>;
285 def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS],
286 (instregex "t2SDIV", "t2UDIV")>;
288 // Loads (except POST) with SHL > 2, or ror, require 2 extra cycles.
289 // However, that's non-trivial to specify, so we keep it uniform
290 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS],
291 (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
292 "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "t2LDREX",
293 "tLDR[BH](r|i|spi|pci|pciASM)", "tLDR(r|i|spi|pci|pciASM)",
294 "LDRH$", "PICLDR$", "PICLDR(H|B)$", "LDRcp$",
295 "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
296 "t2LDRpci_pic", "tLDRS(B|H)", "t2LDRDi8", "LDRD$", "LDA", "t2LDA")>;
297 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_ISS],
298 (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
299 "LDRBT_POST$", "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
300 "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T",
301 "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
302 "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)?",
303 "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
305 def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "MOVS?sr", "t2MOVS?sr")>;
306 def : InstRW<[R52WriteALU_WRI, R52Read_EX2], (instregex "MOVT", "t2MOVT")>;
308 def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "AD(C|D)S?ri", "ANDS?ri",
309 "BICS?ri", "CLZ", "EORri", "MVNS?r", "ORRri", "RSBS?ri", "RSCri", "SBCri",
310 "t2AD(C|D)S?ri", "t2ANDS?ri", "t2BICS?ri","t2CLZ", "t2EORri", "t2MVN",
311 "t2ORRri", "t2RSBS?ri", "t2SBCri")>;
313 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "AD(C|D)S?rr",
314 "ANDS?rr", "BICS?rr", "CRC", "EORrr", "ORRrr", "RSBrr", "RSCrr", "SBCrr",
315 "t2AD(C|D)S?rr", "t2ANDS?rr", "t2BICS?rr", "t2CRC", "t2EORrr", "t2SBCrr")>;
317 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], (instregex "AD(C|D)S?rsi",
318 "ANDS?rsi", "BICS?rsi", "EORrsi", "ORRrsi", "RSBrsi", "RSCrsi", "SBCrsi",
319 "t2AD(C|D)S?rs", "t2ANDS?rs", "t2BICS?rs", "t2EORrs", "t2ORRrs", "t2RSBrs", "t2SBCrs")>;
321 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS, R52Read_ISS],
322 (instregex "AD(C|D)S?rsr", "ANDS?rsr", "BICS?rsr", "EORrsr", "MVNS?sr",
323 "ORRrsr", "RSBrsr", "RSCrsr", "SBCrsr")>;
325 def : InstRW<[R52WriteALU_EX1],
326 (instregex "ADR", "MOVsi", "MVNS?s?i", "t2MOVS?si")>;
328 def : InstRW<[R52WriteALU_EX1, R52Read_ISS], (instregex "ASRi", "RORS?i")>;
329 def : InstRW<[R52WriteALU_EX1, R52Read_ISS, R52Read_ISS],
330 (instregex "ASRr", "RORS?r", "LSR", "LSL")>;
332 def : InstRW<[R52WriteCC, R52Read_EX1], (instregex "CMPri", "CMNri")>;
333 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_EX1], (instregex "CMPrr", "CMNzrr")>;
334 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS], (instregex "CMPrsi", "CMNzrsi")>;
335 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS, R52Read_ISS], (instregex "CMPrsr", "CMNzrsr")>;
337 def : InstRW<[R52WriteALU_EX2, R52Read_ISS],
338 (instregex "t2LDC", "RBIT", "REV", "REV16", "REVSH", "RRX")>;
340 def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>;
342 def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>;
343 def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>;
345 // Integer Load, Multiple.
346 foreach Lat = 3-25 in {
347 def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> {
350 def R52WriteILDM#Lat#CyNo : SchedWriteRes<[]> {
355 foreach NAddr = 1-16 in {
356 def R52ILDMAddr#NAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == "#NAddr>;
358 def R52WriteILDMAddrNoWB : SchedWriteRes<[R52UnitLd]> { let Latency = 0; }
359 def R52WriteILDMAddrWB : SchedWriteRes<[R52UnitLd]>;
360 def R52WriteILDM : SchedWriteVariant<[
361 SchedVar<R52ILDMAddr2Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy]>,
363 SchedVar<R52ILDMAddr3Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
365 SchedVar<R52ILDMAddr4Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
366 R52WriteILDM6Cy, R52WriteILDM7Cy]>,
368 SchedVar<R52ILDMAddr5Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
369 R52WriteILDM6Cy, R52WriteILDM7Cy,
371 SchedVar<R52ILDMAddr6Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
372 R52WriteILDM6Cy, R52WriteILDM7Cy,
373 R52WriteILDM8Cy, R52WriteILDM9Cy]>,
375 SchedVar<R52ILDMAddr7Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
376 R52WriteILDM6Cy, R52WriteILDM7Cy,
377 R52WriteILDM8Cy, R52WriteILDM9Cy,
379 SchedVar<R52ILDMAddr8Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
380 R52WriteILDM6Cy, R52WriteILDM7Cy,
381 R52WriteILDM8Cy, R52WriteILDM9Cy,
382 R52WriteILDM10Cy, R52WriteILDM11Cy]>,
384 SchedVar<R52ILDMAddr9Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
385 R52WriteILDM6Cy, R52WriteILDM7Cy,
386 R52WriteILDM8Cy, R52WriteILDM9Cy,
387 R52WriteILDM10Cy, R52WriteILDM11Cy,
389 SchedVar<R52ILDMAddr10Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
390 R52WriteILDM6Cy, R52WriteILDM7Cy,
391 R52WriteILDM8Cy, R52WriteILDM9Cy,
392 R52WriteILDM10Cy, R52WriteILDM11Cy,
393 R52WriteILDM12Cy, R52WriteILDM13Cy]>,
395 SchedVar<R52ILDMAddr11Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
396 R52WriteILDM6Cy, R52WriteILDM7Cy,
397 R52WriteILDM8Cy, R52WriteILDM9Cy,
398 R52WriteILDM10Cy, R52WriteILDM11Cy,
399 R52WriteILDM12Cy, R52WriteILDM13Cy,
401 SchedVar<R52ILDMAddr12Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
402 R52WriteILDM6Cy, R52WriteILDM7Cy,
403 R52WriteILDM8Cy, R52WriteILDM9Cy,
404 R52WriteILDM10Cy, R52WriteILDM11Cy,
405 R52WriteILDM12Cy, R52WriteILDM13Cy,
406 R52WriteILDM14Cy, R52WriteILDM15Cy]>,
408 SchedVar<R52ILDMAddr13Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
409 R52WriteILDM6Cy, R52WriteILDM7Cy,
410 R52WriteILDM8Cy, R52WriteILDM9Cy,
411 R52WriteILDM10Cy, R52WriteILDM11Cy,
412 R52WriteILDM12Cy, R52WriteILDM13Cy,
413 R52WriteILDM14Cy, R52WriteILDM15Cy,
415 SchedVar<R52ILDMAddr14Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
416 R52WriteILDM6Cy, R52WriteILDM7Cy,
417 R52WriteILDM8Cy, R52WriteILDM9Cy,
418 R52WriteILDM10Cy, R52WriteILDM11Cy,
419 R52WriteILDM12Cy, R52WriteILDM13Cy,
420 R52WriteILDM14Cy, R52WriteILDM15Cy,
421 R52WriteILDM16Cy, R52WriteILDM17Cy]>,
423 SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
424 R52WriteILDM6Cy, R52WriteILDM7Cy,
425 R52WriteILDM8Cy, R52WriteILDM9Cy,
426 R52WriteILDM10Cy, R52WriteILDM11Cy,
427 R52WriteILDM12Cy, R52WriteILDM13Cy,
428 R52WriteILDM14Cy, R52WriteILDM15Cy,
429 R52WriteILDM16Cy, R52WriteILDM17Cy,
431 SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
432 R52WriteILDM6Cy, R52WriteILDM7Cy,
433 R52WriteILDM8Cy, R52WriteILDM9Cy,
434 R52WriteILDM10Cy, R52WriteILDM11Cy,
435 R52WriteILDM12Cy, R52WriteILDM13Cy,
436 R52WriteILDM14Cy, R52WriteILDM15Cy,
437 R52WriteILDM16Cy, R52WriteILDM17Cy,
438 R52WriteILDM18Cy, R52WriteILDM19Cy]>,
440 // Unknown number of registers, just use resources for two registers.
441 SchedVar<NoSchedPred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
442 R52WriteILDM6CyNo, R52WriteILDM7CyNo,
443 R52WriteILDM8CyNo, R52WriteILDM9CyNo,
444 R52WriteILDM10CyNo, R52WriteILDM11CyNo,
445 R52WriteILDM12CyNo, R52WriteILDM13CyNo,
446 R52WriteILDM14CyNo, R52WriteILDM15CyNo,
447 R52WriteILDM16CyNo, R52WriteILDM17CyNo,
448 R52WriteILDM18Cy, R52WriteILDM19Cy]>
449 ]> { let Variadic=1; }
451 // Integer Store, Multiple
452 def R52WriteIStIncAddr : SchedWriteRes<[R52UnitLd]> {
456 foreach NumAddr = 1-16 in {
457 def R52WriteISTM#NumAddr : WriteSequence<[R52WriteIStIncAddr], NumAddr>;
459 def R52WriteISTM : SchedWriteVariant<[
460 SchedVar<R52ILDMAddr2Pred, [R52WriteISTM2]>,
461 SchedVar<R52ILDMAddr3Pred, [R52WriteISTM3]>,
462 SchedVar<R52ILDMAddr4Pred, [R52WriteISTM4]>,
463 SchedVar<R52ILDMAddr5Pred, [R52WriteISTM5]>,
464 SchedVar<R52ILDMAddr6Pred, [R52WriteISTM6]>,
465 SchedVar<R52ILDMAddr7Pred, [R52WriteISTM7]>,
466 SchedVar<R52ILDMAddr8Pred, [R52WriteISTM8]>,
467 SchedVar<R52ILDMAddr9Pred, [R52WriteISTM9]>,
468 SchedVar<R52ILDMAddr10Pred,[R52WriteISTM10]>,
469 SchedVar<R52ILDMAddr11Pred,[R52WriteISTM11]>,
470 SchedVar<R52ILDMAddr12Pred,[R52WriteISTM12]>,
471 SchedVar<R52ILDMAddr13Pred,[R52WriteISTM13]>,
472 SchedVar<R52ILDMAddr14Pred,[R52WriteISTM14]>,
473 SchedVar<R52ILDMAddr15Pred,[R52WriteISTM15]>,
474 SchedVar<R52ILDMAddr16Pred,[R52WriteISTM16]>,
475 // Unknow number of registers, just use resources for two registers.
476 SchedVar<NoSchedPred, [R52WriteISTM2]>
479 def : InstRW<[R52WriteILDM, R52Read_ISS],
480 (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
481 "(t|sys)LDM(IA|DA|DB|IB)$")>;
482 def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
483 (instregex "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
484 def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
485 (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "tPOP")>;
487 // Integer Store, Single Element
488 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
489 (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX", "SRS", "t2SRS",
490 "t2SRSDB", "t2STREX", "t2STREXB", "t2STREXD", "t2STREXH", "t2STR(i12|i8|s)$",
491 "RFE", "t2RFE", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
493 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
494 (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
495 "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
496 "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
497 "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
499 // Integer Store, Dual
500 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
501 (instregex "STRD$", "t2STRDi8", "STL", "t2STL")>;
502 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
503 (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
505 def : InstRW<[R52WriteISTM, R52Read_ISS, R52Read_EX2],
506 (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
507 def : InstRW<[R52WriteISTM, R52WriteAdr, R52Read_ISS, R52Read_EX2],
508 (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
511 // LDRLIT pseudo instructions, they expand to LDR + PICADD
512 def : InstRW<[R52WriteLd],
513 (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel$")>;
514 // LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR
515 def : InstRW<[R52WriteLd], (instregex "LDRLIT_ga_pcrel_ldr")>;
519 //===----------------------------------------------------------------------===//
520 // VFP, Floating Point Support
521 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fd|hd)")>;
522 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fq|hq)")>;
524 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(D|S|H)")>;
525 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(fd|hd)")>;
526 def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VABS(fq|hq)")>;
528 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fd|hd)")>;
529 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fq|hq)")>;
531 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)$")>;
532 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>;
534 def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>;
535 def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>;
538 //===----------------------------------------------------------------------===//
541 // vector multiple load stores
542 foreach NumAddr = 1-16 in {
543 def R52LMAddrPred#NumAddr :
544 SchedPredicate<"MI->getNumOperands() == "#NumAddr>;
546 foreach Lat = 1-32 in {
547 def R52WriteLM#Lat#Cy : SchedWriteRes<[]> {
551 foreach Num = 1-32 in { // reserve LdSt resource, no dual-issue
552 def R52ReserveLd#Num#Cy : SchedWriteRes<[R52UnitLd]> {
554 let NumMicroOps = Num;
555 let ResourceCycles = [Num];
558 def R52WriteVLDM : SchedWriteVariant<[
560 SchedVar<R52LMAddrPred1, [R52WriteLM5Cy,
562 SchedVar<R52LMAddrPred2, [R52WriteLM5Cy,
566 SchedVar<R52LMAddrPred3, [R52WriteLM5Cy, R52WriteLM6Cy,
568 SchedVar<R52LMAddrPred4, [R52WriteLM5Cy, R52WriteLM6Cy,
572 SchedVar<R52LMAddrPred5, [R52WriteLM5Cy, R52WriteLM6Cy,
575 SchedVar<R52LMAddrPred6, [R52WriteLM5Cy, R52WriteLM6Cy,
580 SchedVar<R52LMAddrPred7, [R52WriteLM5Cy, R52WriteLM6Cy,
581 R52WriteLM7Cy, R52WriteLM8Cy,
583 SchedVar<R52LMAddrPred8, [R52WriteLM5Cy, R52WriteLM6Cy,
584 R52WriteLM7Cy, R52WriteLM8Cy,
588 SchedVar<R52LMAddrPred9, [R52WriteLM5Cy, R52WriteLM6Cy,
589 R52WriteLM7Cy, R52WriteLM8Cy,
592 SchedVar<R52LMAddrPred10, [R52WriteLM5Cy, R52WriteLM6Cy,
593 R52WriteLM7Cy, R52WriteLM8Cy,
598 SchedVar<R52LMAddrPred11, [R52WriteLM5Cy, R52WriteLM6Cy,
599 R52WriteLM7Cy, R52WriteLM8Cy,
600 R52WriteLM9Cy, R52WriteLM10Cy,
602 SchedVar<R52LMAddrPred12, [R52WriteLM5Cy, R52WriteLM6Cy,
603 R52WriteLM7Cy, R52WriteLM8Cy,
604 R52WriteLM9Cy, R52WriteLM10Cy,
608 SchedVar<R52LMAddrPred13, [R52WriteLM5Cy, R52WriteLM6Cy,
609 R52WriteLM7Cy, R52WriteLM8Cy,
610 R52WriteLM9Cy, R52WriteLM10Cy,
613 SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
614 R52WriteLM7Cy, R52WriteLM8Cy,
615 R52WriteLM9Cy, R52WriteLM10Cy,
620 SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
621 R52WriteLM7Cy, R52WriteLM8Cy,
622 R52WriteLM9Cy, R52WriteLM10Cy,
623 R52WriteLM11Cy, R52WriteLM12Cy,
625 SchedVar<R52LMAddrPred15, [R52WriteLM5Cy, R52WriteLM6Cy,
626 R52WriteLM7Cy, R52WriteLM8Cy,
627 R52WriteLM9Cy, R52WriteLM10Cy,
628 R52WriteLM11Cy, R52WriteLM12Cy,
630 // unknown number of reg.
631 SchedVar<NoSchedPred, [R52WriteLM5Cy, R52WriteLM6Cy,
632 R52WriteLM7Cy, R52WriteLM8Cy,
633 R52WriteLM9Cy, R52WriteLM10Cy,
634 R52WriteLM11Cy, R52WriteLM12Cy,
636 ]> { let Variadic=1;}
638 // variable stores. Cannot dual-issue
639 def R52WriteSTM5 : SchedWriteRes<[R52UnitLd]> {
642 let ResourceCycles = [1];
644 def R52WriteSTM6 : SchedWriteRes<[R52UnitLd]> {
647 let ResourceCycles = [2];
649 def R52WriteSTM7 : SchedWriteRes<[R52UnitLd]> {
652 let ResourceCycles = [3];
654 def R52WriteSTM8 : SchedWriteRes<[R52UnitLd]> {
657 let ResourceCycles = [4];
659 def R52WriteSTM9 : SchedWriteRes<[R52UnitLd]> {
661 let NumMicroOps = 10;
662 let ResourceCycles = [5];
664 def R52WriteSTM10 : SchedWriteRes<[R52UnitLd]> {
666 let NumMicroOps = 12;
667 let ResourceCycles = [6];
669 def R52WriteSTM11 : SchedWriteRes<[R52UnitLd]> {
671 let NumMicroOps = 14;
672 let ResourceCycles = [7];
674 def R52WriteSTM12 : SchedWriteRes<[R52UnitLd]> {
676 let NumMicroOps = 16;
677 let ResourceCycles = [8];
679 def R52WriteSTM13 : SchedWriteRes<[R52UnitLd]> {
681 let NumMicroOps = 18;
682 let ResourceCycles = [9];
684 def R52WriteSTM14 : SchedWriteRes<[R52UnitLd]> {
686 let NumMicroOps = 20;
687 let ResourceCycles = [10];
689 def R52WriteSTM15 : SchedWriteRes<[R52UnitLd]> {
691 let NumMicroOps = 22;
692 let ResourceCycles = [11];
695 def R52WriteSTM : SchedWriteVariant<[
696 SchedVar<R52LMAddrPred1, [R52WriteSTM5]>,
697 SchedVar<R52LMAddrPred2, [R52WriteSTM5]>,
698 SchedVar<R52LMAddrPred3, [R52WriteSTM6]>,
699 SchedVar<R52LMAddrPred4, [R52WriteSTM6]>,
700 SchedVar<R52LMAddrPred5, [R52WriteSTM7]>,
701 SchedVar<R52LMAddrPred6, [R52WriteSTM7]>,
702 SchedVar<R52LMAddrPred7, [R52WriteSTM8]>,
703 SchedVar<R52LMAddrPred8, [R52WriteSTM8]>,
704 SchedVar<R52LMAddrPred9, [R52WriteSTM9]>,
705 SchedVar<R52LMAddrPred10, [R52WriteSTM9]>,
706 SchedVar<R52LMAddrPred11, [R52WriteSTM10]>,
707 SchedVar<R52LMAddrPred12, [R52WriteSTM10]>,
708 SchedVar<R52LMAddrPred13, [R52WriteSTM11]>,
709 SchedVar<R52LMAddrPred14, [R52WriteSTM11]>,
710 SchedVar<R52LMAddrPred15, [R52WriteSTM12]>,
711 SchedVar<R52LMAddrPred16, [R52WriteSTM12]>,
712 // unknown number of registers, just use resources for two
713 SchedVar<NoSchedPred, [R52WriteSTM6]>
716 // Vector Load/Stores. Can issue only in slot-0. Can dual-issue with
717 // another instruction in slot-1, but only in the last issue.
718 def : WriteRes<WriteVLD1, [R52UnitLd]> { let Latency = 5;}
719 def : WriteRes<WriteVLD2, [R52UnitLd]> {
722 let ResourceCycles = [2];
725 def : WriteRes<WriteVLD3, [R52UnitLd]> {
728 let ResourceCycles = [3];
731 def : WriteRes<WriteVLD4, [R52UnitLd]> {
734 let ResourceCycles = [4];
737 def R52WriteVST1Mem : SchedWriteRes<[R52UnitLd]> {
740 let ResourceCycles = [1];
742 def R52WriteVST2Mem : SchedWriteRes<[R52UnitLd]> {
745 let ResourceCycles = [2];
747 def R52WriteVST3Mem : SchedWriteRes<[R52UnitLd]> {
750 let ResourceCycles = [3];
752 def R52WriteVST4Mem : SchedWriteRes<[R52UnitLd]> {
755 let ResourceCycles = [4];
757 def R52WriteVST5Mem : SchedWriteRes<[R52UnitLd]> {
760 let ResourceCycles = [5];
764 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v8i8|v4i16|v2i32)")>;
765 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v16i8|v8i16|v4i32)")>;
766 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABAL(u|s)(v8i16|v4i32|v2i64)")>;
768 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v8i8|v4i16|v2i32)")>;
769 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v16i8|v8i16|v4i32)")>;
770 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABDL(u|s)(v16i8|v8i16|v4i32)")>;
772 def : InstRW<[R52Write2FPALU_F4, R52Read_F1], (instregex "VABS(v16i8|v8i16|v4i32)")>;
774 def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2],
775 (instregex "(VADD|VSUB)(v8i8|v4i16|v2i32|v1i64)")>;
776 def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2],
777 (instregex "(VADD|VSUB)(v16i8|v8i16|v4i32|v2i64)")>;
778 def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
779 (instregex "(VADDHN|VRADDHN|VSUBHN|VRSUBHN)(v8i8|v4i16|v2i32)")>;
781 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1],
782 (instregex "VADDL", "VADDW", "VSUBL", "VSUBW")>;
784 def : InstRW<[R52WriteFPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)d")>;
785 def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)q")>;
787 def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>;
788 def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
790 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)d")>;
791 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)q")>;
793 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1],
794 (instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>;
795 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
796 (instregex "VCVT", "VSITO", "VUITO", "VTO")>;
798 def : InstRW<[R52WriteFPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)d")>;
799 def : InstRW<[R52Write2FPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)q")>;
800 def : InstRW<[R52WriteFPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)d")>;
801 def : InstRW<[R52Write2FPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)q")>;
803 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTd(8|16|32)", "VSEL")>;
804 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTq(8|16|32|64)")>;
806 def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)d")>;
807 def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)q")>;
809 def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v8i8|v4i16|v2i32)")>;
810 def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v16i8|v8i16|v4i32)")>;
812 def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
813 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>;
814 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VORR", "VORN", "VREV")>;
815 def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>;
816 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>;
817 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>;
818 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>;
819 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VQABS(v8i8|v4i16|v2i32|v1i64)")>;
820 def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VQABS(v16i8|v8i16|v4i32|v2i64)")>;
821 def : InstRW<[R52WriteFPALU_F5, R52Read_F2, R52Read_F2],
822 (instregex "(VQADD|VQSUB)(u|s)(v8i8|v4i16|v2i32|v1i64)")>;
823 def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
824 (instregex "(VQADD|VQSUB)(u|s)(v16i8|v8i16|v4i32|v2i64)")>;
825 def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMLAL", "VQDMLSL")>;
826 def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMUL","VQRDMUL")>;
827 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
828 (instregex "VQMOVN", "VQNEG", "VQSHL", "VQSHRN")>;
829 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VRSHR", "VRSHRN", "VTB")>;
830 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
833 // VSTx. Vector Stores
835 // 1-element structure store
836 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)$")>;
837 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)$")>;
838 def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)T$")>;
839 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Q$")>;
840 def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudo$")>;
841 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudo$")>;
843 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)$")>;
844 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNdAsm_(8|16|32)$")>;
845 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo$")>;
847 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)wb")>;
848 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)wb")>;
849 def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Twb")>;
850 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Qwb")>;
851 def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudoWB")>;
852 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudoWB")>;
854 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)_UPD")>;
855 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
856 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo_UPD")>;
858 // 2-element structure store
859 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)$")>;
860 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)$")>;
861 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)Pseudo$")>;
863 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)$")>;
864 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNdAsm_(8|16|32)$")>;
865 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo$")>;
866 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)$")>;
867 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNqAsm_(16|32)$")>;
868 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo$")>;
870 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)wb")>;
871 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)wb")>;
872 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)PseudoWB")>;
874 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)_UPD")>;
875 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
876 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo_UPD")>;
877 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)_UPD")>;
878 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNqWB_(fixed|register)_Asm_(16|32)")>;
879 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo_UPD")>;
881 // 3-element structure store
882 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)$")>;
883 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)Asm_(8|16|32)$")>;
884 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3d(8|16|32)(oddP|P)seudo$")>;
886 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)$")>;
887 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNdAsm_(8|16|32)$")>;
888 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo$")>;
889 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)$")>;
890 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNqAsm_(16|32)$")>;
891 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo$")>;
893 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)_UPD$")>;
894 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)WB_(fixed|register)_Asm_(8|16|32)$")>;
895 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
897 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)_UPD$")>;
898 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNdWB_(fixed|register)_Asm_(8|16|32)")>;
899 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo_UPD$")>;
900 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)_UPD$")>;
901 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNqWB_(fixed|register)_Asm_(16|32)$")>;
902 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo_UPD$")>;
904 // 4-element structure store
905 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)$")>;
906 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)Asm_(8|16|32)$")>;
907 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4d(8|16|32)Pseudo$")>;
909 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)$")>;
910 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNdAsm_(8|16|32)$")>;
911 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo$")>;
912 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)$")>;
913 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNqAsm_(16|32)$")>;
914 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo$")>;
916 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)_UPD")>;
917 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
918 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
920 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)_UPD")>;
921 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNdWB_(fixed|register)_Asm_(8|16|32)")>;
922 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo_UPD")>;
923 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)_UPD")>;
924 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNqWB_(fixed|register)_Asm_(16|32)")>;
925 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo_UPD")>;