1 //=- ARMScheduleA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the machine model for ARM Cortex-A57 to support
10 // instruction scheduling and other instruction cost heuristics.
12 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
15 // *** Common description and scheduling model parameters taken from AArch64 ***
16 // The Cortex-A57 is a traditional superscalar microprocessor with a
17 // conservative 3-wide in-order stage for decode and dispatch. Combined with the
18 // much wider out-of-order issue stage, this produced a need to carefully
19 // schedule micro-ops so that all three decoded each cycle are successfully
20 // issued as the reservation station(s) simply don't stay occupied for long.
21 // Therefore, IssueWidth is set to the narrower of the two at three, while still
22 // modeling the machine as out-of-order.
24 def IsCPSRDefinedPred : SchedPredicate<[{TII->isCPSRDefined(*MI)}]>;
25 def IsCPSRDefinedAndPredicatedPred :
26 SchedPredicate<[{TII->isCPSRDefined(*MI) && TII->isPredicated(*MI)}]>;
28 // Cortex A57 rev. r1p0 or later (false = r0px)
29 def IsR1P0AndLaterPred : SchedPredicate<[{false}]>;
31 // If Addrmode3 contains register offset (not immediate)
32 def IsLdrAm3RegOffPred :
33 SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 1)}]>;
34 // The same predicate with operand offset 2 and 3:
35 def IsLdrAm3RegOffPredX2 :
36 SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 2)}]>;
37 def IsLdrAm3RegOffPredX3 :
38 SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 3)}]>;
40 // If Addrmode3 contains "minus register"
41 def IsLdrAm3NegRegOffPred :
42 SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 1)}]>;
43 // The same predicate with operand offset 2 and 3:
44 def IsLdrAm3NegRegOffPredX2 :
45 SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 2)}]>;
46 def IsLdrAm3NegRegOffPredX3 :
47 SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 3)}]>;
49 // Load, scaled register offset, not plus LSL2
50 def IsLdstsoScaledNotOptimalPredX0 :
51 SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 0)}]>;
52 def IsLdstsoScaledNotOptimalPred :
53 SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 1)}]>;
54 def IsLdstsoScaledNotOptimalPredX2 :
55 SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 2)}]>;
57 // Load, scaled register offset
58 def IsLdstsoScaledPred :
59 SchedPredicate<[{TII->isLdstScaledReg(*MI, 1)}]>;
60 def IsLdstsoScaledPredX2 :
61 SchedPredicate<[{TII->isLdstScaledReg(*MI, 2)}]>;
63 def IsLdstsoMinusRegPredX0 :
64 SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 0)}]>;
65 def IsLdstsoMinusRegPred :
66 SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 1)}]>;
67 def IsLdstsoMinusRegPredX2 :
68 SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 2)}]>;
70 // Load, scaled register offset
71 def IsLdrAm2ScaledPred :
72 SchedPredicate<[{TII->isAm2ScaledReg(*MI, 1)}]>;
74 // LDM, base reg in list
75 def IsLdmBaseRegInList :
76 SchedPredicate<[{TII->isLDMBaseRegInList(*MI)}]>;
78 class A57WriteLMOpsListType<list<SchedWriteRes> writes> {
79 list <SchedWriteRes> Writes = writes;
80 SchedMachineModel SchedModel = ?;
83 // *** Common description and scheduling model parameters taken from AArch64 ***
84 // (AArch64SchedA57.td)
85 def CortexA57Model : SchedMachineModel {
86 let IssueWidth = 3; // 3-way decode and dispatch
87 let MicroOpBufferSize = 128; // 128 micro-op re-order buffer
88 let LoadLatency = 4; // Optimistic load latency
89 let MispredictPenalty = 16; // Fetch + Decode/Rename/Dispatch + Branch
91 // Enable partial & runtime unrolling.
92 let LoopMicroOpBufferSize = 16;
93 let CompleteModel = 1;
95 // FIXME: Remove when all errors have been fixed.
96 let FullInstRWOverlapCheck = 0;
98 let UnsupportedFeatures = [HasV8_1MMainline, HasMVEInt, HasMVEFloat,
102 //===----------------------------------------------------------------------===//
103 // Define each kind of processor resource and number available on Cortex-A57.
104 // Cortex A-57 has 8 pipelines that each has its own 8-entry queue where
105 // micro-ops wait for their operands and then issue out-of-order.
107 def A57UnitB : ProcResource<1>; // Type B micro-ops
108 def A57UnitI : ProcResource<2>; // Type I micro-ops
109 def A57UnitM : ProcResource<1>; // Type M micro-ops
110 def A57UnitL : ProcResource<1>; // Type L micro-ops
111 def A57UnitS : ProcResource<1>; // Type S micro-ops
113 def A57UnitX : ProcResource<1>; // Type X micro-ops (F1)
114 def A57UnitW : ProcResource<1>; // Type W micro-ops (F0)
116 let SchedModel = CortexA57Model in {
117 def A57UnitV : ProcResGroup<[A57UnitX, A57UnitW]>; // Type V micro-ops
120 let SchedModel = CortexA57Model in {
122 //===----------------------------------------------------------------------===//
123 // Define customized scheduler read/write types specific to the Cortex-A57.
125 include "ARMScheduleA57WriteRes.td"
127 // To have "CompleteModel = 1", support of pseudos and special instructions
128 def : InstRW<[WriteNoop], (instregex "(t)?BKPT$", "(t2)?CDP(2)?$",
129 "(t2)?CLREX$", "CONSTPOOL_ENTRY$", "COPY_STRUCT_BYVAL_I32$",
130 "(t2)?CPS[123]p$", "(t2)?DBG$", "(t2)?DMB$", "(t2)?DSB$", "ERET$",
131 "(t2|t)?HINT$", "(t)?HLT$", "(t2)?HVC$", "(t2)?ISB$", "ITasm$",
132 "(t2)?RFE(DA|DB|IA|IB)", "(t)?SETEND", "(t2)?SETPAN", "(t2)?SMC", "SPACE",
133 "(t2)?SRS(DA|DB|IA|IB)", "SWP(B)?", "t?TRAP", "(t2|t)?UDF$", "t2DCPS", "t2SG",
134 "t2TT", "tCPS", "CMP_SWAP", "t?SVC", "t2IT", "CompilerBarrier",
137 def : InstRW<[WriteNoop], (instregex "VMRS", "VMSR", "FMSTAT")>;
139 // Specific memory instrs
140 def : InstRW<[WriteNoop, WriteNoop], (instregex "(t2)?LDA", "(t2)?LDC", "(t2)?STC",
141 "(t2)?STL", "(t2)?LDREX", "(t2)?STREX", "MEMCPY")>;
144 def : InstRW<[WriteNoop, WriteNoop], (instregex
145 "(t2)?MCR(2|R|R2)?$", "(t2)?MRC(2)?$",
146 "(t2)?MRRC(2)?$", "(t2)?MRS(banked|sys|_AR|_M|sys_AR)?$",
147 "(t2)?MSR(banked|i|_AR|_M)?$")>;
149 // Deprecated instructions
150 def : InstRW<[WriteNoop], (instregex "FLDM", "FSTM")>;
153 def : InstRW<[WriteNoop], (instregex "(t2)?ABS$",
154 "(t)?ADJCALLSTACKDOWN$", "(t)?ADJCALLSTACKUP$", "(t2|t)?Int_eh_sjlj",
155 "tLDRpci_pic", "(t2)?SUBS_PC_LR",
156 "JUMPTABLE", "tInt_WIN_eh_sjlj_longjmp",
157 "VLD(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm",
158 "VLD(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm",
159 "VST(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm",
160 "VST(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm",
161 "WIN__CHKSTK", "WIN__DBZCHK")>;
164 // -----------------------------------------------------------------------------
166 def : InstRW<[A57Write_1cyc_1I], (instrs COPY)>;
168 // --- 3.2 Branch Instructions ---
169 // B, BX, BL, BLX (imm, reg != LR, reg == LR), CBZ, CBNZ
171 def : InstRW<[A57Write_1cyc_1B], (instregex "(t2|t)?B$", "t?BX", "(t2|t)?Bcc$",
172 "t?TAILJMP(d|r)", "TCRETURN(d|r)i", "tBfar", "tCBN?Z")>;
173 def : InstRW<[A57Write_1cyc_1B_1I],
174 (instregex "t?BL$", "BL_pred$", "t?BLXi", "t?TPsoft")>;
175 def : InstRW<[A57Write_2cyc_1B_1I], (instregex "BLX", "tBLX(NS)?r")>;
177 def : InstRW<[A57Write_2cyc_1B_1I], (instregex "BCCi64", "BCCZi64")>;
178 def : InstRW<[A57Write_3cyc_1B_1I], (instregex "BR_JTadd", "t?BR_JTr",
179 "t2BR_JT", "t2BXJ", "(t2)?TB(B|H)(_JT)?$", "tBRIND")>;
180 def : InstRW<[A57Write_6cyc_1B_1L], (instregex "BR_JTm")>;
182 // --- 3.3 Arithmetic and Logical Instructions ---
183 // ADD{S}, ADC{S}, ADR, AND{S}, BIC{S}, CMN, CMP, EOR{S}, ORN{S}, ORR{S},
184 // RSB{S}, RSC{S}, SUB{S}, SBC{S}, TEQ, TST
186 def : InstRW<[A57Write_1cyc_1I], (instregex "tADDframe")>;
188 // shift by register, conditional or unconditional
189 // TODO: according to the doc, conditional uses I0/I1, unconditional uses M
190 // Why more complex instruction uses more simple pipeline?
191 // May be an error in doc.
192 def A57WriteALUsi : SchedWriteVariant<[
193 // lsl #2, lsl #1, or lsr #1.
194 SchedVar<IsPredicatedPred, [A57Write_2cyc_1M]>,
195 SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
197 def A57WriteALUsr : SchedWriteVariant<[
198 SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
199 SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
201 def A57WriteALUSsr : SchedWriteVariant<[
202 SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
203 SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
205 def A57ReadALUsr : SchedReadVariant<[
206 SchedVar<IsPredicatedPred, [ReadDefault]>,
207 SchedVar<NoSchedPred, [ReadDefault]>
209 def : SchedAlias<WriteALUsi, A57WriteALUsi>;
210 def : SchedAlias<WriteALUsr, A57WriteALUsr>;
211 def : SchedAlias<WriteALUSsr, A57WriteALUSsr>;
212 def : SchedAlias<ReadALUsr, A57ReadALUsr>;
214 def A57WriteCMPsr : SchedWriteVariant<[
215 SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
216 SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
218 def : SchedAlias<WriteCMP, A57Write_1cyc_1I>;
219 def : SchedAlias<WriteCMPsi, A57Write_2cyc_1M>;
220 def : SchedAlias<WriteCMPsr, A57WriteCMPsr>;
222 // --- 3.4 Move and Shift Instructions ---
224 // MOV{S}, MOVW, MVN{S}
225 def : InstRW<[A57Write_1cyc_1I], (instregex "MOV(r|i|i16|r_TC)",
226 "(t2)?MVN(CC)?(r|i)", "BMOVPCB_CALL", "BMOVPCRX_CALL",
227 "MOVCC(r|i|i16|i32imm)", "tMOV", "tMVN")>;
229 // Move, shift by immed, setflags/no setflags
230 // (ASR, LSL, LSR, ROR, RRX)=MOVsi, MVN
231 // setflags = isCPSRDefined
232 def A57WriteMOVsi : SchedWriteVariant<[
233 SchedVar<IsCPSRDefinedPred, [A57Write_2cyc_1M]>,
234 SchedVar<NoSchedPred, [A57Write_1cyc_1I]>
236 def : InstRW<[A57WriteMOVsi], (instregex "MOV(CC)?si", "MVNsi",
237 "ASRi", "(t2|t)ASRri", "LSRi", "(t2|t)LSRri", "LSLi", "(t2|t)LSLri", "RORi",
238 "(t2|t)RORri", "(t2)?RRX", "t2MOV", "tROR")>;
240 // shift by register, conditional or unconditional, setflags/no setflags
241 def A57WriteMOVsr : SchedWriteVariant<[
242 SchedVar<IsCPSRDefinedAndPredicatedPred, [A57Write_2cyc_1I]>,
243 SchedVar<IsCPSRDefinedPred, [A57Write_2cyc_1M]>,
244 SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
245 SchedVar<NoSchedPred, [A57Write_1cyc_1I]>
247 def : InstRW<[A57WriteMOVsr], (instregex "MOV(CC)?sr", "MVNsr", "t2MVNs",
248 "ASRr", "(t2|t)ASRrr", "LSRr", "(t2|t)LSRrr", "LSLr", "(t2|t)?LSLrr", "RORr",
252 // MOVT - A57Write_2cyc_1M for r0px, A57Write_1cyc_1I for r1p0 and later
253 def A57WriteMOVT : SchedWriteVariant<[
254 SchedVar<IsR1P0AndLaterPred, [A57Write_1cyc_1I]>,
255 SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
257 def : InstRW<[A57WriteMOVT], (instregex "MOVTi16")>;
260 WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_1cyc_1I]>;
262 WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_4cyc_1L]>;
263 def : InstRW< [A57WriteI2pc], (instregex "MOV_ga_pcrel")>;
264 def : InstRW< [A57WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>;
266 // +2cyc for branch forms
267 def : InstRW<[A57Write_3cyc_1I], (instregex "MOVPC(LR|RX)")>;
269 // --- 3.5 Divide and Multiply Instructions ---
270 // Divide: SDIV, UDIV
271 // latency from documentration: 4 ‐ 20, maximum taken
272 def : SchedAlias<WriteDIV, A57Write_20cyc_1M>;
273 // Multiply: tMul not bound to common WriteRes types
274 def : InstRW<[A57Write_3cyc_1M], (instregex "tMUL")>;
275 def : SchedAlias<WriteMUL16, A57Write_3cyc_1M>;
276 def : SchedAlias<WriteMUL32, A57Write_3cyc_1M>;
277 def : ReadAdvance<ReadMUL, 0>;
279 // Multiply accumulate: MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB,
280 // SMLAWT, SMLAD{X}, SMLSD{X}, SMMLA{R}, SMMLS{R}
281 // Multiply-accumulate pipelines support late-forwarding of accumulate operands
282 // from similar μops, allowing a typical sequence of multiply-accumulate μops
283 // to issue one every 1 cycle (sched advance = 2).
284 def A57WriteMLA : SchedWriteRes<[A57UnitM]> { let Latency = 3; }
285 def A57WriteMLAL : SchedWriteRes<[A57UnitM]> { let Latency = 4; }
286 def A57ReadMLA : SchedReadAdvance<2, [A57WriteMLA, A57WriteMLAL]>;
288 def : InstRW<[A57WriteMLA],
289 (instregex "t2SMLAD", "t2SMLADX", "t2SMLSD", "t2SMLSDX")>;
291 def : SchedAlias<WriteMAC16, A57WriteMLA>;
292 def : SchedAlias<WriteMAC32, A57WriteMLA>;
293 def : SchedAlias<ReadMAC, A57ReadMLA>;
295 def : SchedAlias<WriteMAC64Lo, A57WriteMLAL>;
296 def : SchedAlias<WriteMAC64Hi, A57WriteMLAL>;
298 // Multiply long: SMULL, UMULL
299 def : SchedAlias<WriteMUL64Lo, A57Write_4cyc_1M>;
300 def : SchedAlias<WriteMUL64Hi, A57Write_4cyc_1M>;
302 // --- 3.6 Saturating and Parallel Arithmetic Instructions ---
304 // SADD16, SADD8, SSUB16, SSUB8, UADD16, UADD8, USUB16, USUB8
305 // Conditional GE-setting instructions require three extra μops
306 // and two additional cycles to conditionally update the GE field.
307 def A57WriteParArith : SchedWriteVariant<[
308 SchedVar<IsPredicatedPred, [A57Write_4cyc_1I_1M]>,
309 SchedVar<NoSchedPred, [A57Write_2cyc_1I_1M]>
311 def : InstRW< [A57WriteParArith], (instregex
312 "(t2)?SADD(16|8)", "(t2)?SSUB(16|8)",
313 "(t2)?UADD(16|8)", "(t2)?USUB(16|8)")>;
315 // Parallel arith with exchange: SASX, SSAX, UASX, USAX
316 def A57WriteParArithExch : SchedWriteVariant<[
317 SchedVar<IsPredicatedPred, [A57Write_5cyc_1I_1M]>,
318 SchedVar<NoSchedPred, [A57Write_3cyc_1I_1M]>
320 def : InstRW<[A57WriteParArithExch],
321 (instregex "(t2)?SASX", "(t2)?SSAX", "(t2)?UASX", "(t2)?USAX")>;
323 // Parallel halving arith
324 // SHADD16, SHADD8, SHSUB16, SHSUB8, UHADD16, UHADD8, UHSUB16, UHSUB8
325 def : InstRW<[A57Write_2cyc_1M], (instregex
326 "(t2)?SHADD(16|8)", "(t2)?SHSUB(16|8)",
327 "(t2)?UHADD(16|8)", "(t2)?UHSUB(16|8)")>;
329 // Parallel halving arith with exchange
330 // SHASX, SHSAX, UHASX, UHSAX
331 def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?SHASX", "(t2)?SHSAX",
332 "(t2)?UHASX", "(t2)?UHSAX")>;
334 // Parallel saturating arith
335 // QADD16, QADD8, QSUB16, QSUB8, UQADD16, UQADD8, UQSUB16, UQSUB8
336 def : InstRW<[A57Write_2cyc_1M], (instregex "QADD(16|8)", "QSUB(16|8)",
337 "UQADD(16|8)", "UQSUB(16|8)", "t2(U?)QADD", "t2(U?)QSUB")>;
339 // Parallel saturating arith with exchange
340 // QASX, QSAX, UQASX, UQSAX
341 def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QASX", "(t2)?QSAX",
342 "(t2)?UQASX", "(t2)?UQSAX")>;
344 // Saturate: SSAT, SSAT16, USAT, USAT16
345 def : InstRW<[A57Write_2cyc_1M],
346 (instregex "(t2)?SSAT(16)?", "(t2)?USAT(16)?")>;
348 // Saturating arith: QADD, QSUB
349 def : InstRW<[A57Write_2cyc_1M], (instregex "QADD$", "QSUB$")>;
351 // Saturating doubling arith: QDADD, QDSUB
352 def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QDADD", "(t2)?QDSUB")>;
354 // --- 3.7 Miscellaneous Data-Processing Instructions ---
355 // Bit field extract: SBFX, UBFX
356 def : InstRW<[A57Write_1cyc_1I], (instregex "(t2)?SBFX", "(t2)?UBFX")>;
358 // Bit field insert/clear: BFI, BFC
359 def : InstRW<[A57Write_2cyc_1M], (instregex "(t2)?BFI", "(t2)?BFC")>;
361 // Select bytes, conditional/unconditional
362 def A57WriteSEL : SchedWriteVariant<[
363 SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
364 SchedVar<NoSchedPred, [A57Write_1cyc_1I]>
366 def : InstRW<[A57WriteSEL], (instregex "(t2)?SEL")>;
368 // Sign/zero extend, normal: SXTB, SXTH, UXTB, UXTH
369 def : InstRW<[A57Write_1cyc_1I],
370 (instregex "(t2|t)?SXT(B|H)$", "(t2|t)?UXT(B|H)$")>;
372 // Sign/zero extend and add, normal: SXTAB, SXTAH, UXTAB, UXTAH
373 def : InstRW<[A57Write_2cyc_1M],
374 (instregex "(t2)?SXTA(B|H)$", "(t2)?UXTA(B|H)$")>;
376 // Sign/zero extend and add, parallel: SXTAB16, UXTAB16
377 def : InstRW<[A57Write_4cyc_1M], (instregex "(t2)?SXTAB16", "(t2)?UXTAB16")>;
379 // Sum of absolute differences: USAD8, USADA8
380 def : InstRW<[A57Write_3cyc_1M], (instregex "(t2)?USAD8", "(t2)?USADA8")>;
382 // --- 3.8 Load Instructions ---
384 // Load, immed offset
385 // LDR and LDRB have LDRi12 and LDRBi12 forms for immediate
386 def : InstRW<[A57Write_4cyc_1L], (instregex "LDRi12", "LDRBi12",
387 "LDRcp", "(t2|t)?LDRConstPool", "LDRLIT_ga_(pcrel|abs)",
390 def : InstRW<[A57Write_4cyc_1L],
391 (instregex "t2LDRS?(B|H)?(pcrel|T|i8|i12|pci|pci_pic|s)?$")>;
393 // For "Load, register offset, minus" we need +1cyc, +1I
394 def A57WriteLdrAm3 : SchedWriteVariant<[
395 SchedVar<IsLdrAm3NegRegOffPred, [A57Write_5cyc_1I_1L]>,
396 SchedVar<NoSchedPred, [A57Write_4cyc_1L]>
398 def : InstRW<[A57WriteLdrAm3], (instregex "LDR(H|SH|SB)$")>;
399 def A57WriteLdrAm3X2 : SchedWriteVariant<[
400 SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_5cyc_1I_1L]>,
401 SchedVar<NoSchedPred, [A57Write_4cyc_1L]>
403 def : InstRW<[A57WriteLdrAm3X2, A57WriteLdrAm3X2], (instregex "LDRD$")>;
404 def : InstRW<[A57Write_4cyc_1L, A57Write_4cyc_1L], (instregex "t2LDRDi8")>;
406 def A57WriteLdrAmLDSTSO : SchedWriteVariant<[
407 SchedVar<IsLdstsoScaledNotOptimalPred, [A57Write_5cyc_1I_1L]>,
408 SchedVar<IsLdstsoMinusRegPred, [A57Write_5cyc_1I_1L]>,
409 SchedVar<NoSchedPred, [A57Write_4cyc_1L]>
411 def : InstRW<[A57WriteLdrAmLDSTSO], (instregex "LDRrs", "LDRBrs")>;
413 def A57WrBackOne : SchedWriteRes<[]> {
417 def A57WrBackTwo : SchedWriteRes<[]> {
421 def A57WrBackThree : SchedWriteRes<[]> {
426 // --- LDR pre-indexed ---
427 // Load, immed pre-indexed (4 cyc for load result, 1 cyc for Base update)
428 def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR_PRE_IMM",
429 "LDRB_PRE_IMM", "t2LDRB_PRE")>;
431 // Load, register pre-indexed (4 cyc for load result, 2 cyc for Base update)
432 // (5 cyc load result for not-lsl2 scaled)
433 def A57WriteLdrAmLDSTSOPre : SchedWriteVariant<[
434 SchedVar<IsLdstsoScaledNotOptimalPredX2, [A57Write_5cyc_1I_1L]>,
435 SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]>
437 def : InstRW<[A57WriteLdrAmLDSTSOPre, A57WrBackTwo],
438 (instregex "LDR_PRE_REG", "LDRB_PRE_REG")>;
440 def A57WriteLdrAm3PreWrBack : SchedWriteVariant<[
441 SchedVar<IsLdrAm3RegOffPredX2, [A57WrBackTwo]>,
442 SchedVar<NoSchedPred, [A57WrBackOne]>
444 def : InstRW<[A57Write_4cyc_1L, A57WriteLdrAm3PreWrBack],
445 (instregex "LDR(H|SH|SB)_PRE")>;
446 def : InstRW<[A57Write_4cyc_1L, A57WrBackOne],
447 (instregex "t2LDR(H|SH|SB)?_PRE")>;
449 // LDRD pre-indexed: 5(2) cyc for reg, 4(1) cyc for imm.
450 def A57WriteLdrDAm3Pre : SchedWriteVariant<[
451 SchedVar<IsLdrAm3RegOffPredX3, [A57Write_5cyc_1I_1L]>,
452 SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]>
454 def A57WriteLdrDAm3PreWrBack : SchedWriteVariant<[
455 SchedVar<IsLdrAm3RegOffPredX3, [A57WrBackTwo]>,
456 SchedVar<NoSchedPred, [A57WrBackOne]>
458 def : InstRW<[A57WriteLdrDAm3Pre, A57WriteLdrDAm3Pre, A57WriteLdrDAm3PreWrBack],
459 (instregex "LDRD_PRE")>;
460 def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne],
461 (instregex "t2LDRD_PRE")>;
463 // --- LDR post-indexed ---
464 def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR(T?)_POST_IMM",
465 "LDRB(T?)_POST_IMM", "LDR(SB|H|SH)Ti", "t2LDRB_POST")>;
467 def A57WriteLdrAm3PostWrBack : SchedWriteVariant<[
468 SchedVar<IsLdrAm3RegOffPred, [A57WrBackTwo]>,
469 SchedVar<NoSchedPred, [A57WrBackOne]>
471 def : InstRW<[A57Write_4cyc_1L_1I, A57WriteLdrAm3PostWrBack],
472 (instregex "LDR(H|SH|SB)_POST")>;
473 def : InstRW<[A57Write_4cyc_1L, A57WrBackOne],
474 (instregex "t2LDR(H|SH|SB)?_POST")>;
476 def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR_POST_REG",
477 "LDRB_POST_REG", "LDR(B?)T_POST$")>;
479 def A57WriteLdrTRegPost : SchedWriteVariant<[
480 SchedVar<IsLdrAm2ScaledPred, [A57Write_4cyc_1I_1L_1M]>,
481 SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]>
483 def A57WriteLdrTRegPostWrBack : SchedWriteVariant<[
484 SchedVar<IsLdrAm2ScaledPred, [A57WrBackThree]>,
485 SchedVar<NoSchedPred, [A57WrBackTwo]>
487 // 4(3) "I0/I1,L,M" for scaled register, otherwise 4(2) "I0/I1,L"
488 def : InstRW<[A57WriteLdrTRegPost, A57WriteLdrTRegPostWrBack],
489 (instregex "LDRT_POST_REG", "LDRBT_POST_REG")>;
491 def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR(SB|H|SH)Tr")>;
493 def A57WriteLdrAm3PostWrBackX3 : SchedWriteVariant<[
494 SchedVar<IsLdrAm3RegOffPredX3, [A57WrBackTwo]>,
495 SchedVar<NoSchedPred, [A57WrBackOne]>
497 // LDRD post-indexed: 4(2) cyc for reg, 4(1) cyc for imm.
498 def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I,
499 A57WriteLdrAm3PostWrBackX3], (instregex "LDRD_POST")>;
500 def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne],
501 (instregex "t2LDRD_POST")>;
503 // --- Preload instructions ---
504 // Preload, immed offset
505 def : InstRW<[A57Write_4cyc_1L], (instregex "(t2)?PLDi12", "(t2)?PLDWi12",
506 "t2PLDW?(i8|pci|s)", "(t2)?PLI")>;
508 // Preload, register offset,
509 // 5cyc "I0/I1,L" for minus reg or scaled not plus lsl2
510 // otherwise 4cyc "L"
511 def A57WritePLD : SchedWriteVariant<[
512 SchedVar<IsLdstsoScaledNotOptimalPredX0, [A57Write_5cyc_1I_1L]>,
513 SchedVar<IsLdstsoMinusRegPredX0, [A57Write_5cyc_1I_1L]>,
514 SchedVar<NoSchedPred, [A57Write_4cyc_1L]>
516 def : InstRW<[A57WritePLD], (instregex "PLDrs", "PLDWrs")>;
518 // --- Load multiple instructions ---
519 foreach NumAddr = 1-8 in {
520 def A57LMAddrPred#NumAddr :
521 SchedPredicate<"(TII->getLDMVariableDefsSize(*MI)+1)/2 == "#NumAddr>;
524 def A57LDMOpsListNoregin : A57WriteLMOpsListType<
525 [A57Write_3cyc_1L, A57Write_3cyc_1L,
526 A57Write_4cyc_1L, A57Write_4cyc_1L,
527 A57Write_5cyc_1L, A57Write_5cyc_1L,
528 A57Write_6cyc_1L, A57Write_6cyc_1L,
529 A57Write_7cyc_1L, A57Write_7cyc_1L,
530 A57Write_8cyc_1L, A57Write_8cyc_1L,
531 A57Write_9cyc_1L, A57Write_9cyc_1L,
532 A57Write_10cyc_1L, A57Write_10cyc_1L]>;
533 def A57WriteLDMnoreginlist : SchedWriteVariant<[
534 SchedVar<A57LMAddrPred1, A57LDMOpsListNoregin.Writes[0-1]>,
535 SchedVar<A57LMAddrPred2, A57LDMOpsListNoregin.Writes[0-3]>,
536 SchedVar<A57LMAddrPred3, A57LDMOpsListNoregin.Writes[0-5]>,
537 SchedVar<A57LMAddrPred4, A57LDMOpsListNoregin.Writes[0-7]>,
538 SchedVar<A57LMAddrPred5, A57LDMOpsListNoregin.Writes[0-9]>,
539 SchedVar<A57LMAddrPred6, A57LDMOpsListNoregin.Writes[0-11]>,
540 SchedVar<A57LMAddrPred7, A57LDMOpsListNoregin.Writes[0-13]>,
541 SchedVar<A57LMAddrPred8, A57LDMOpsListNoregin.Writes[0-15]>,
542 SchedVar<NoSchedPred, A57LDMOpsListNoregin.Writes[0-15]>
543 ]> { let Variadic=1; }
545 def A57LDMOpsListRegin : A57WriteLMOpsListType<
546 [A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I,
547 A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I,
548 A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I,
549 A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I,
550 A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I,
551 A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I,
552 A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I,
553 A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I]>;
554 def A57WriteLDMreginlist : SchedWriteVariant<[
555 SchedVar<A57LMAddrPred1, A57LDMOpsListRegin.Writes[0-1]>,
556 SchedVar<A57LMAddrPred2, A57LDMOpsListRegin.Writes[0-3]>,
557 SchedVar<A57LMAddrPred3, A57LDMOpsListRegin.Writes[0-5]>,
558 SchedVar<A57LMAddrPred4, A57LDMOpsListRegin.Writes[0-7]>,
559 SchedVar<A57LMAddrPred5, A57LDMOpsListRegin.Writes[0-9]>,
560 SchedVar<A57LMAddrPred6, A57LDMOpsListRegin.Writes[0-11]>,
561 SchedVar<A57LMAddrPred7, A57LDMOpsListRegin.Writes[0-13]>,
562 SchedVar<A57LMAddrPred8, A57LDMOpsListRegin.Writes[0-15]>,
563 SchedVar<NoSchedPred, A57LDMOpsListRegin.Writes[0-15]>
564 ]> { let Variadic=1; }
566 def A57LDMOpsList_Upd : A57WriteLMOpsListType<
568 A57Write_3cyc_1L_1I, A57Write_3cyc_1L_1I,
569 A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I,
570 A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I,
571 A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I,
572 A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I,
573 A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I,
574 A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I,
575 A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I]>;
576 def A57WriteLDM_Upd : SchedWriteVariant<[
577 SchedVar<A57LMAddrPred1, A57LDMOpsList_Upd.Writes[0-2]>,
578 SchedVar<A57LMAddrPred2, A57LDMOpsList_Upd.Writes[0-4]>,
579 SchedVar<A57LMAddrPred3, A57LDMOpsList_Upd.Writes[0-6]>,
580 SchedVar<A57LMAddrPred4, A57LDMOpsList_Upd.Writes[0-8]>,
581 SchedVar<A57LMAddrPred5, A57LDMOpsList_Upd.Writes[0-10]>,
582 SchedVar<A57LMAddrPred6, A57LDMOpsList_Upd.Writes[0-12]>,
583 SchedVar<A57LMAddrPred7, A57LDMOpsList_Upd.Writes[0-14]>,
584 SchedVar<A57LMAddrPred8, A57LDMOpsList_Upd.Writes[0-16]>,
585 SchedVar<NoSchedPred, A57LDMOpsList_Upd.Writes[0-16]>
586 ]> { let Variadic=1; }
588 def A57WriteLDM : SchedWriteVariant<[
589 SchedVar<IsLdmBaseRegInList, [A57WriteLDMreginlist]>,
590 SchedVar<NoSchedPred, [A57WriteLDMnoreginlist]>
591 ]> { let Variadic=1; }
593 def : InstRW<[A57WriteLDM], (instregex "(t|t2|sys)?LDM(IA|DA|DB|IB)$")>;
595 // TODO: no writeback latency defined in documentation (implemented as 1 cyc)
596 def : InstRW<[A57WriteLDM_Upd],
597 (instregex "(t|t2|sys)?LDM(IA_UPD|DA_UPD|DB_UPD|IB_UPD|IA_RET)", "tPOP")>;
599 def : InstRW<[A57Write_5cyc_1L], (instregex "VLLDM")>;
601 // --- 3.9 Store Instructions ---
603 // Store, immed offset
604 def : InstRW<[A57Write_1cyc_1S], (instregex "STRi12", "STRBi12", "PICSTR",
605 "t2STR(B?)(T|i12|i8|s)", "t2STRDi8", "t2STRH(i12|i8|s)", "tSTR")>;
607 // Store, register offset
608 // For minus or for not plus lsl2 scaled we need 3cyc "I0/I1, S",
610 def A57WriteStrAmLDSTSO : SchedWriteVariant<[
611 SchedVar<IsLdstsoScaledNotOptimalPred, [A57Write_3cyc_1I_1S]>,
612 SchedVar<IsLdstsoMinusRegPred, [A57Write_3cyc_1I_1S]>,
613 SchedVar<NoSchedPred, [A57Write_1cyc_1S]>
615 def : InstRW<[A57WriteStrAmLDSTSO], (instregex "STRrs", "STRBrs")>;
617 // STRH,STRD: 3cyc "I0/I1, S" for minus reg, 1cyc S for imm or for plus reg.
618 def A57WriteStrAm3 : SchedWriteVariant<[
619 SchedVar<IsLdrAm3NegRegOffPred, [A57Write_3cyc_1I_1S]>,
620 SchedVar<NoSchedPred, [A57Write_1cyc_1S]>
622 def : InstRW<[A57WriteStrAm3], (instregex "STRH$")>;
623 def A57WriteStrAm3X2 : SchedWriteVariant<[
624 SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_3cyc_1I_1S]>,
625 SchedVar<NoSchedPred, [A57Write_1cyc_1S]>
627 def : InstRW<[A57WriteStrAm3X2], (instregex "STRD$")>;
629 // Store, immed pre-indexed (1cyc "S, I0/I1", 1cyc writeback)
630 def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR_PRE_IMM",
631 "STRB_PRE_IMM", "STR(B)?(r|i)_preidx", "(t2)?STRH_(preidx|PRE)",
632 "t2STR(B?)_(PRE|preidx)", "t2STRD_PRE")>;
634 // Store, register pre-indexed:
635 // 1(1) "S, I0/I1" for plus reg
636 // 3(2) "I0/I1, S" for minus reg
637 // 1(2) "S, M" for scaled plus lsl2
638 // 3(2) "I0/I1, S" for other scaled
639 def A57WriteStrAmLDSTSOPre : SchedWriteVariant<[
640 SchedVar<IsLdstsoScaledNotOptimalPredX2, [A57Write_3cyc_1I_1S]>,
641 SchedVar<IsLdstsoMinusRegPredX2, [A57Write_3cyc_1I_1S]>,
642 SchedVar<IsLdstsoScaledPredX2, [A57Write_1cyc_1S_1M]>,
643 SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]>
645 def A57WriteStrAmLDSTSOPreWrBack : SchedWriteVariant<[
646 SchedVar<IsLdstsoScaledPredX2, [A57WrBackTwo]>,
647 SchedVar<IsLdstsoMinusRegPredX2, [A57WrBackTwo]>,
648 SchedVar<NoSchedPred, [A57WrBackOne]>
650 def : InstRW<[A57WriteStrAmLDSTSOPreWrBack, A57WriteStrAmLDSTSOPre],
651 (instregex "STR_PRE_REG", "STRB_PRE_REG")>;
653 // pre-indexed STRH/STRD (STRH_PRE, STRD_PRE)
654 // 1(1) "S, I0/I1" for imm or reg plus
655 // 3(2) "I0/I1, S" for reg minus
656 def A57WriteStrAm3PreX2 : SchedWriteVariant<[
657 SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_3cyc_1I_1S]>,
658 SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]>
660 def A57WriteStrAm3PreWrBackX2 : SchedWriteVariant<[
661 SchedVar<IsLdrAm3NegRegOffPredX2, [A57WrBackTwo]>,
662 SchedVar<NoSchedPred, [A57WrBackOne]>
664 def : InstRW<[A57WriteStrAm3PreWrBackX2, A57WriteStrAm3PreX2],
665 (instregex "STRH_PRE")>;
667 def A57WriteStrAm3PreX3 : SchedWriteVariant<[
668 SchedVar<IsLdrAm3NegRegOffPredX3, [A57Write_3cyc_1I_1S]>,
669 SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]>
671 def A57WriteStrAm3PreWrBackX3 : SchedWriteVariant<[
672 SchedVar<IsLdrAm3NegRegOffPredX3, [A57WrBackTwo]>,
673 SchedVar<NoSchedPred, [A57WrBackOne]>
675 def : InstRW<[A57WriteStrAm3PreWrBackX3, A57WriteStrAm3PreX3],
676 (instregex "STRD_PRE")>;
678 def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR(T?)_POST_IMM",
679 "STRB(T?)_POST_IMM", "t2STR(B?)_POST")>;
681 // 1(2) "S, M" for STR/STRB register post-indexed (both scaled or not)
682 def : InstRW<[A57WrBackTwo, A57Write_1cyc_1S_1M], (instregex "STR(T?)_POST_REG",
683 "STRB(T?)_POST_REG", "STR(B?)T_POST$")>;
685 // post-indexed STRH/STRD(STRH_POST, STRD_POST), STRHTi, STRHTr
686 // 1(1) "S, I0/I1" both for reg or imm
687 def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I],
688 (instregex "(t2)?STR(H|D)_POST", "STRHT(i|r)", "t2STRHT")>;
690 // --- Store multiple instructions ---
691 // TODO: no writeback latency defined in documentation
692 def A57WriteSTM : SchedWriteVariant<[
693 SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S]>,
694 SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S]>,
695 SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S]>,
696 SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S]>,
697 SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S]>,
698 SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S]>,
699 SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S]>,
700 SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S]>,
701 SchedVar<NoSchedPred, [A57Write_2cyc_1S]>
703 def A57WriteSTM_Upd : SchedWriteVariant<[
704 SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S_1I]>,
705 SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S_1I]>,
706 SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S_1I]>,
707 SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S_1I]>,
708 SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S_1I]>,
709 SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S_1I]>,
710 SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S_1I]>,
711 SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S_1I]>,
712 SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]>
715 def : InstRW<[A57WriteSTM], (instregex "(t2|sys|t)?STM(IA|DA|DB|IB)$")>;
716 def : InstRW<[A57WrBackOne, A57WriteSTM_Upd],
717 (instregex "(t2|sys|t)?STM(IA_UPD|DA_UPD|DB_UPD|IB_UPD)", "tPUSH")>;
719 def : InstRW<[A57Write_5cyc_1S], (instregex "VLSTM")>;
721 // --- 3.10 FP Data Processing Instructions ---
722 def : SchedAlias<WriteFPALU32, A57Write_5cyc_1V>;
723 def : SchedAlias<WriteFPALU64, A57Write_5cyc_1V>;
725 def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(S|D|H)")>;
727 // fp compare - 3cyc F1 for unconditional, 6cyc "F0/F1, F1" for conditional
728 def A57WriteVcmp : SchedWriteVariant<[
729 SchedVar<IsPredicatedPred, [A57Write_6cyc_1V_1X]>,
730 SchedVar<NoSchedPred, [A57Write_3cyc_1X]>
732 def : InstRW<[A57WriteVcmp],
733 (instregex "VCMP(D|S|H|ZD|ZS|ZH)$", "VCMPE(D|S|H|ZD|ZS|ZH)")>;
736 def : InstRW<[A57Write_5cyc_1V], (instregex
737 "VCVT(A|N|P|M)(SH|UH|SS|US|SD|UD)", "VCVT(BDH|THD|TDH)")>;
738 def : InstRW<[A57Write_5cyc_1V], (instregex "VTOSLS", "VTOUHS", "VTOULS")>;
739 def : SchedAlias<WriteFPCVT, A57Write_5cyc_1V>;
741 def : InstRW<[A57Write_5cyc_1V], (instregex "VJCVT")>;
743 // FP round to integral
744 def : InstRW<[A57Write_5cyc_1V], (instregex "VRINT(A|N|P|M|Z|R|X)(H|S|D)$")>;
746 // FP divide, FP square root
747 def : SchedAlias<WriteFPDIV32, A57Write_17cyc_1W>;
748 def : SchedAlias<WriteFPDIV64, A57Write_32cyc_1W>;
749 def : SchedAlias<WriteFPSQRT32, A57Write_17cyc_1W>;
750 def : SchedAlias<WriteFPSQRT64, A57Write_32cyc_1W>;
752 def : InstRW<[A57Write_17cyc_1W], (instregex "VSQRTH")>;
755 def : InstRW<[A57Write_5cyc_1V], (instregex "VMAX", "VMIN")>;
757 // FP multiply-accumulate pipelines support late forwarding of the result
758 // from FP multiply μops to the accumulate operands of an
759 // FP multiply-accumulate μop. The latter can potentially be issued 1 cycle
760 // after the FP multiply μop has been issued
762 def A57WriteVMUL : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
764 def : SchedAlias<WriteFPMUL32, A57WriteVMUL>;
765 def : SchedAlias<WriteFPMUL64, A57WriteVMUL>;
766 def : ReadAdvance<ReadFPMUL, 0>;
768 // FP multiply accumulate, FZ: 9cyc "F0/F1" or 4 cyc for sequenced accumulate
769 // VFMA, VFMS, VFNMA, VFNMS, VMLA, VMLS, VNMLA, VNMLS
770 def A57WriteVFMA : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
772 // VFMA takes 9 cyc for common case and 4 cyc for VFMA->VFMA chain (5 read adv.)
773 // VMUL takes 5 cyc for common case and 1 cyc for VMUL->VFMA chain (4 read adv.)
774 // Currently, there is no way to define different read advances for VFMA operand
775 // from VFMA or from VMUL, so there will be 5 read advance.
776 // Zero latency (instead of one) for VMUL->VFMA shouldn't break something.
777 // The same situation with ASIMD VMUL/VFMA instructions
778 // def A57ReadVFMA : SchedRead;
779 // def : ReadAdvance<A57ReadVFMA, 5, [A57WriteVFMA]>;
780 // def : ReadAdvance<A57ReadVFMA, 4, [A57WriteVMUL]>;
781 def A57ReadVFMA5 : SchedReadAdvance<5, [A57WriteVFMA, A57WriteVMUL]>;
783 def : SchedAlias<WriteFPMAC32, A57WriteVFMA>;
784 def : SchedAlias<WriteFPMAC64, A57WriteVFMA>;
785 def : SchedAlias<ReadFPMAC, A57ReadVFMA5>;
787 // VMLAH/VMLSH are not binded to scheduling classes by default, so here custom:
788 def : InstRW<[A57WriteVFMA, A57ReadVFMA5, ReadFPMUL, ReadFPMUL],
789 (instregex "VMLAH", "VMLSH", "VNMLAH", "VNMLSH")>;
791 def : InstRW<[A57WriteVMUL],
792 (instregex "VUDOTD", "VSDOTD", "VUDOTQ", "VSDOTQ")>;
794 def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG")>;
795 def : InstRW<[A57Write_3cyc_1V], (instregex "VSEL")>;
797 // --- 3.11 FP Miscellaneous Instructions ---
798 // VMOV: 3cyc "F0/F1" for imm/reg
799 def : InstRW<[A57Write_3cyc_1V], (instregex "FCONST(D|S|H)")>;
800 def : InstRW<[A57Write_3cyc_1V], (instregex "VMOV(D|S|H)(cc)?$")>;
802 def : InstRW<[A57Write_3cyc_1V], (instregex "VINSH")>;
804 // 5cyc L for FP transfer, vfp to core reg,
805 // 5cyc L for FP transfer, core reg to vfp
806 def : SchedAlias<WriteFPMOV, A57Write_5cyc_1L>;
807 // VMOVRRS/VMOVRRD in common code declared with one WriteFPMOV (instead of 2).
808 def : InstRW<[A57Write_5cyc_1L, A57Write_5cyc_1L], (instregex "VMOV(RRS|RRD)")>;
810 // 8cyc "L,F0/F1" for FP transfer, core reg to upper or lower half of vfp D-reg
811 def : InstRW<[A57Write_8cyc_1L_1I], (instregex "VMOVDRR")>;
813 // --- 3.12 FP Load Instructions ---
814 def : InstRW<[A57Write_5cyc_1L], (instregex "VLDR(D|S|H)")>;
816 def : InstRW<[A57Write_5cyc_1L], (instregex "VLDMQIA$")>;
818 // FP load multiple (VLDM)
820 def A57VLDMOpsListUncond : A57WriteLMOpsListType<
821 [A57Write_5cyc_1L, A57Write_5cyc_1L,
822 A57Write_6cyc_1L, A57Write_6cyc_1L,
823 A57Write_7cyc_1L, A57Write_7cyc_1L,
824 A57Write_8cyc_1L, A57Write_8cyc_1L,
825 A57Write_9cyc_1L, A57Write_9cyc_1L,
826 A57Write_10cyc_1L, A57Write_10cyc_1L,
827 A57Write_11cyc_1L, A57Write_11cyc_1L,
828 A57Write_12cyc_1L, A57Write_12cyc_1L]>;
829 def A57WriteVLDMuncond : SchedWriteVariant<[
830 SchedVar<A57LMAddrPred1, A57VLDMOpsListUncond.Writes[0-1]>,
831 SchedVar<A57LMAddrPred2, A57VLDMOpsListUncond.Writes[0-3]>,
832 SchedVar<A57LMAddrPred3, A57VLDMOpsListUncond.Writes[0-5]>,
833 SchedVar<A57LMAddrPred4, A57VLDMOpsListUncond.Writes[0-7]>,
834 SchedVar<A57LMAddrPred5, A57VLDMOpsListUncond.Writes[0-9]>,
835 SchedVar<A57LMAddrPred6, A57VLDMOpsListUncond.Writes[0-11]>,
836 SchedVar<A57LMAddrPred7, A57VLDMOpsListUncond.Writes[0-13]>,
837 SchedVar<A57LMAddrPred8, A57VLDMOpsListUncond.Writes[0-15]>,
838 SchedVar<NoSchedPred, A57VLDMOpsListUncond.Writes[0-15]>
839 ]> { let Variadic=1; }
841 def A57VLDMOpsListCond : A57WriteLMOpsListType<
842 [A57Write_5cyc_1L, A57Write_6cyc_1L,
843 A57Write_7cyc_1L, A57Write_8cyc_1L,
844 A57Write_9cyc_1L, A57Write_10cyc_1L,
845 A57Write_11cyc_1L, A57Write_12cyc_1L,
846 A57Write_13cyc_1L, A57Write_14cyc_1L,
847 A57Write_15cyc_1L, A57Write_16cyc_1L,
848 A57Write_17cyc_1L, A57Write_18cyc_1L,
849 A57Write_19cyc_1L, A57Write_20cyc_1L]>;
850 def A57WriteVLDMcond : SchedWriteVariant<[
851 SchedVar<A57LMAddrPred1, A57VLDMOpsListCond.Writes[0-1]>,
852 SchedVar<A57LMAddrPred2, A57VLDMOpsListCond.Writes[0-3]>,
853 SchedVar<A57LMAddrPred3, A57VLDMOpsListCond.Writes[0-5]>,
854 SchedVar<A57LMAddrPred4, A57VLDMOpsListCond.Writes[0-7]>,
855 SchedVar<A57LMAddrPred5, A57VLDMOpsListCond.Writes[0-9]>,
856 SchedVar<A57LMAddrPred6, A57VLDMOpsListCond.Writes[0-11]>,
857 SchedVar<A57LMAddrPred7, A57VLDMOpsListCond.Writes[0-13]>,
858 SchedVar<A57LMAddrPred8, A57VLDMOpsListCond.Writes[0-15]>,
859 SchedVar<NoSchedPred, A57VLDMOpsListCond.Writes[0-15]>
860 ]> { let Variadic=1; }
862 def A57WriteVLDM : SchedWriteVariant<[
863 SchedVar<IsPredicatedPred, [A57WriteVLDMcond]>,
864 SchedVar<NoSchedPred, [A57WriteVLDMuncond]>
865 ]> { let Variadic=1; }
867 def : InstRW<[A57WriteVLDM], (instregex "VLDM(DIA|SIA)$")>;
869 def A57VLDMOpsListUncond_Upd : A57WriteLMOpsListType<
870 [A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I,
871 A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I,
872 A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I,
873 A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I,
874 A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I,
875 A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I,
876 A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I,
877 A57Write_12cyc_1L_1I, A57Write_12cyc_1L_1I]>;
878 def A57WriteVLDMuncond_UPD : SchedWriteVariant<[
879 SchedVar<A57LMAddrPred1, A57VLDMOpsListUncond_Upd.Writes[0-1]>,
880 SchedVar<A57LMAddrPred2, A57VLDMOpsListUncond_Upd.Writes[0-3]>,
881 SchedVar<A57LMAddrPred3, A57VLDMOpsListUncond_Upd.Writes[0-5]>,
882 SchedVar<A57LMAddrPred4, A57VLDMOpsListUncond_Upd.Writes[0-7]>,
883 SchedVar<A57LMAddrPred5, A57VLDMOpsListUncond_Upd.Writes[0-9]>,
884 SchedVar<A57LMAddrPred6, A57VLDMOpsListUncond_Upd.Writes[0-11]>,
885 SchedVar<A57LMAddrPred7, A57VLDMOpsListUncond_Upd.Writes[0-13]>,
886 SchedVar<A57LMAddrPred8, A57VLDMOpsListUncond_Upd.Writes[0-15]>,
887 SchedVar<NoSchedPred, A57VLDMOpsListUncond_Upd.Writes[0-15]>
888 ]> { let Variadic=1; }
890 def A57VLDMOpsListCond_Upd : A57WriteLMOpsListType<
891 [A57Write_5cyc_1L_1I, A57Write_6cyc_1L_1I,
892 A57Write_7cyc_1L_1I, A57Write_8cyc_1L_1I,
893 A57Write_9cyc_1L_1I, A57Write_10cyc_1L_1I,
894 A57Write_11cyc_1L_1I, A57Write_12cyc_1L_1I,
895 A57Write_13cyc_1L_1I, A57Write_14cyc_1L_1I,
896 A57Write_15cyc_1L_1I, A57Write_16cyc_1L_1I,
897 A57Write_17cyc_1L_1I, A57Write_18cyc_1L_1I,
898 A57Write_19cyc_1L_1I, A57Write_20cyc_1L_1I]>;
899 def A57WriteVLDMcond_UPD : SchedWriteVariant<[
900 SchedVar<A57LMAddrPred1, A57VLDMOpsListCond_Upd.Writes[0-1]>,
901 SchedVar<A57LMAddrPred2, A57VLDMOpsListCond_Upd.Writes[0-3]>,
902 SchedVar<A57LMAddrPred3, A57VLDMOpsListCond_Upd.Writes[0-5]>,
903 SchedVar<A57LMAddrPred4, A57VLDMOpsListCond_Upd.Writes[0-7]>,
904 SchedVar<A57LMAddrPred5, A57VLDMOpsListCond_Upd.Writes[0-9]>,
905 SchedVar<A57LMAddrPred6, A57VLDMOpsListCond_Upd.Writes[0-11]>,
906 SchedVar<A57LMAddrPred7, A57VLDMOpsListCond_Upd.Writes[0-13]>,
907 SchedVar<A57LMAddrPred8, A57VLDMOpsListCond_Upd.Writes[0-15]>,
908 SchedVar<NoSchedPred, A57VLDMOpsListCond_Upd.Writes[0-15]>
909 ]> { let Variadic=1; }
911 def A57WriteVLDM_UPD : SchedWriteVariant<[
912 SchedVar<IsPredicatedPred, [A57WriteVLDMcond_UPD]>,
913 SchedVar<NoSchedPred, [A57WriteVLDMuncond_UPD]>
914 ]> { let Variadic=1; }
916 def : InstRW<[A57WrBackOne, A57WriteVLDM_UPD],
917 (instregex "VLDM(DIA_UPD|DDB_UPD|SIA_UPD|SDB_UPD)")>;
919 // --- 3.13 FP Store Instructions ---
920 def : InstRW<[A57Write_1cyc_1S], (instregex "VSTR(D|S|H)")>;
922 def : InstRW<[A57Write_2cyc_1S], (instregex "VSTMQIA$")>;
924 def A57WriteVSTMs : SchedWriteVariant<[
925 SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S]>,
926 SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S]>,
927 SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S]>,
928 SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S]>,
929 SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S]>,
930 SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S]>,
931 SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S]>,
932 SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S]>,
933 SchedVar<NoSchedPred, [A57Write_2cyc_1S]>
935 def A57WriteVSTMd : SchedWriteVariant<[
936 SchedVar<A57LMAddrPred1, [A57Write_2cyc_1S]>,
937 SchedVar<A57LMAddrPred2, [A57Write_4cyc_1S]>,
938 SchedVar<A57LMAddrPred3, [A57Write_6cyc_1S]>,
939 SchedVar<A57LMAddrPred4, [A57Write_8cyc_1S]>,
940 SchedVar<A57LMAddrPred5, [A57Write_10cyc_1S]>,
941 SchedVar<A57LMAddrPred6, [A57Write_12cyc_1S]>,
942 SchedVar<A57LMAddrPred7, [A57Write_14cyc_1S]>,
943 SchedVar<A57LMAddrPred8, [A57Write_16cyc_1S]>,
944 SchedVar<NoSchedPred, [A57Write_4cyc_1S]>
946 def A57WriteVSTMs_Upd : SchedWriteVariant<[
947 SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S_1I]>,
948 SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S_1I]>,
949 SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S_1I]>,
950 SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S_1I]>,
951 SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S_1I]>,
952 SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S_1I]>,
953 SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S_1I]>,
954 SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S_1I]>,
955 SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]>
957 def A57WriteVSTMd_Upd : SchedWriteVariant<[
958 SchedVar<A57LMAddrPred1, [A57Write_2cyc_1S_1I]>,
959 SchedVar<A57LMAddrPred2, [A57Write_4cyc_1S_1I]>,
960 SchedVar<A57LMAddrPred3, [A57Write_6cyc_1S_1I]>,
961 SchedVar<A57LMAddrPred4, [A57Write_8cyc_1S_1I]>,
962 SchedVar<A57LMAddrPred5, [A57Write_10cyc_1S_1I]>,
963 SchedVar<A57LMAddrPred6, [A57Write_12cyc_1S_1I]>,
964 SchedVar<A57LMAddrPred7, [A57Write_14cyc_1S_1I]>,
965 SchedVar<A57LMAddrPred8, [A57Write_16cyc_1S_1I]>,
966 SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]>
969 def : InstRW<[A57WriteVSTMs], (instregex "VSTMSIA$")>;
970 def : InstRW<[A57WriteVSTMd], (instregex "VSTMDIA$")>;
971 def : InstRW<[A57WrBackOne, A57WriteVSTMs_Upd],
972 (instregex "VSTM(SIA_UPD|SDB_UPD)")>;
973 def : InstRW<[A57WrBackOne, A57WriteVSTMd_Upd],
974 (instregex "VSTM(DIA_UPD|DDB_UPD)")>;
976 // --- 3.14 ASIMD Integer Instructions ---
978 // ASIMD absolute diff, 3cyc F0/F1 for integer VABD
979 def : InstRW<[A57Write_3cyc_1V], (instregex "VABD(s|u)")>;
981 // ASIMD absolute diff accum: 4(1) F1 for D-form, 5(2) F1 for Q-form
982 def A57WriteVABAD : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
983 def A57ReadVABAD : SchedReadAdvance<3, [A57WriteVABAD]>;
984 def : InstRW<[A57WriteVABAD, A57ReadVABAD],
985 (instregex "VABA(s|u)(v8i8|v4i16|v2i32)")>;
986 def A57WriteVABAQ : SchedWriteRes<[A57UnitX]> { let Latency = 5; }
987 def A57ReadVABAQ : SchedReadAdvance<3, [A57WriteVABAQ]>;
988 def : InstRW<[A57WriteVABAQ, A57ReadVABAQ],
989 (instregex "VABA(s|u)(v16i8|v8i16|v4i32)")>;
991 // ASIMD absolute diff accum long: 4(1) F1 for VABAL
992 def A57WriteVABAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
993 def A57ReadVABAL : SchedReadAdvance<3, [A57WriteVABAL]>;
994 def : InstRW<[A57WriteVABAL, A57ReadVABAL], (instregex "VABAL(s|u)")>;
996 // ASIMD absolute diff long: 3cyc F0/F1 for VABDL
997 def : InstRW<[A57Write_3cyc_1V], (instregex "VABDL(s|u)")>;
999 // ASIMD arith, basic
1000 def : InstRW<[A57Write_3cyc_1V], (instregex "VADDv", "VADDL", "VADDW",
1001 "VNEG(s8d|s16d|s32d|s8q|s16q|s32q|d|q)",
1002 "VPADDi", "VPADDL", "VSUBv", "VSUBL", "VSUBW")>;
1004 // ASIMD arith, complex
1005 def : InstRW<[A57Write_3cyc_1V], (instregex "VABS", "VADDHN", "VHADD", "VHSUB",
1006 "VQABS", "VQADD", "VQNEG", "VQSUB",
1007 "VRADDHN", "VRHADD", "VRSUBHN", "VSUBHN")>;
1010 def : InstRW<[A57Write_3cyc_1V],
1011 (instregex "VCEQ", "VCGE", "VCGT", "VCLE", "VTST", "VCLT")>;
1014 def : InstRW<[A57Write_3cyc_1V],
1015 (instregex "VAND", "VBIC", "VMVN", "VORR", "VORN", "VEOR")>;
1018 def : InstRW<[A57Write_3cyc_1V],
1019 (instregex "(VMAX|VMIN)(s|u)", "(VPMAX|VPMIN)(s8|s16|s32|u8|u16|u32)")>;
1021 // ASIMD multiply, D-form: 5cyc F0 for r0px, 4cyc F0 for r1p0 and later
1022 // Cortex-A57 r1p0 and later reduce the latency of ASIMD multiply
1023 // and multiply-with-accumulate instructions relative to r0pX.
1024 def A57WriteVMULD_VecInt : SchedWriteVariant<[
1025 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
1026 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
1027 def : InstRW<[A57WriteVMULD_VecInt], (instregex
1028 "VMUL(v8i8|v4i16|v2i32|pd)", "VMULsl(v4i16|v2i32)",
1029 "VQDMULH(sl)?(v4i16|v2i32)", "VQRDMULH(sl)?(v4i16|v2i32)")>;
1031 // ASIMD multiply, Q-form: 6cyc F0 for r0px, 5cyc F0 for r1p0 and later
1032 def A57WriteVMULQ_VecInt : SchedWriteVariant<[
1033 SchedVar<IsR1P0AndLaterPred, [A57Write_5cyc_1W]>,
1034 SchedVar<NoSchedPred, [A57Write_6cyc_1W]>]>;
1035 def : InstRW<[A57WriteVMULQ_VecInt], (instregex
1036 "VMUL(v16i8|v8i16|v4i32|pq)", "VMULsl(v8i16|v4i32)",
1037 "VQDMULH(sl)?(v8i16|v4i32)", "VQRDMULH(sl)?(v8i16|v4i32)")>;
1039 // ASIMD multiply accumulate, D-form
1040 // 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence
1041 // (4 or 3 ReadAdvance)
1042 def A57WriteVMLAD_VecInt : SchedWriteVariant<[
1043 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
1044 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
1045 def A57ReadVMLAD_VecInt : SchedReadVariant<[
1046 SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAD_VecInt]>]>,
1047 SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAD_VecInt]>]>
1049 def : InstRW<[A57WriteVMLAD_VecInt, A57ReadVMLAD_VecInt],
1050 (instregex "VMLA(sl)?(v8i8|v4i16|v2i32)", "VMLS(sl)?(v8i8|v4i16|v2i32)")>;
1052 // ASIMD multiply accumulate, Q-form
1053 // 6cyc F0 for r0px, 5cyc F0 for r1p0 and later, 2cyc for accumulate sequence
1054 // (4 or 3 ReadAdvance)
1055 def A57WriteVMLAQ_VecInt : SchedWriteVariant<[
1056 SchedVar<IsR1P0AndLaterPred, [A57Write_5cyc_1W]>,
1057 SchedVar<NoSchedPred, [A57Write_6cyc_1W]>]>;
1058 def A57ReadVMLAQ_VecInt : SchedReadVariant<[
1059 SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAQ_VecInt]>]>,
1060 SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAQ_VecInt]>]>
1062 def : InstRW<[A57WriteVMLAQ_VecInt, A57ReadVMLAQ_VecInt],
1063 (instregex "VMLA(sl)?(v16i8|v8i16|v4i32)", "VMLS(sl)?(v16i8|v8i16|v4i32)")>;
1065 // ASIMD multiply accumulate long
1066 // 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence
1067 // (4 or 3 ReadAdvance)
1068 def A57WriteVMLAL_VecInt : SchedWriteVariant<[
1069 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
1070 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
1071 def A57ReadVMLAL_VecInt : SchedReadVariant<[
1072 SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAL_VecInt]>]>,
1073 SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAL_VecInt]>]>
1075 def : InstRW<[A57WriteVMLAL_VecInt, A57ReadVMLAL_VecInt],
1076 (instregex "VMLAL(s|u)", "VMLSL(s|u)")>;
1078 // ASIMD multiply accumulate saturating long
1079 // 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 2cyc for accumulate sequence
1080 // (3 or 2 ReadAdvance)
1081 def A57WriteVQDMLAL_VecInt : SchedWriteVariant<[
1082 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
1083 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
1084 def A57ReadVQDMLAL_VecInt : SchedReadVariant<[
1085 SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<2, [A57WriteVQDMLAL_VecInt]>]>,
1086 SchedVar<NoSchedPred, [SchedReadAdvance<3, [A57WriteVQDMLAL_VecInt]>]>
1088 def : InstRW<[A57WriteVQDMLAL_VecInt, A57ReadVQDMLAL_VecInt],
1089 (instregex "VQDMLAL", "VQDMLSL")>;
1091 // Vector Saturating Rounding Doubling Multiply Accumulate/Subtract Long
1092 // Scheduling info from VQDMLAL/VQDMLSL
1093 def : InstRW<[A57WriteVQDMLAL_VecInt, A57ReadVQDMLAL_VecInt],
1094 (instregex "VQRDMLAH", "VQRDMLSH")>;
1096 // ASIMD multiply long
1097 // 5cyc F0 for r0px, 4cyc F0 for r1p0 and later
1098 def A57WriteVMULL_VecInt : SchedWriteVariant<[
1099 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
1100 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
1101 def : InstRW<[A57WriteVMULL_VecInt],
1102 (instregex "VMULL(s|u|p8|sls|slu)", "VQDMULL")>;
1104 // ASIMD pairwise add and accumulate
1105 // 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance)
1106 def A57WriteVPADAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
1107 def A57ReadVPADAL : SchedReadAdvance<3, [A57WriteVPADAL]>;
1108 def : InstRW<[A57WriteVPADAL, A57ReadVPADAL], (instregex "VPADAL(s|u)")>;
1110 // ASIMD shift accumulate
1111 // 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance)
1112 def A57WriteVSRA : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
1113 def A57ReadVSRA : SchedReadAdvance<3, [A57WriteVSRA]>;
1114 def : InstRW<[A57WriteVSRA, A57ReadVSRA], (instregex "VSRA", "VRSRA")>;
1116 // ASIMD shift by immed, basic
1117 def : InstRW<[A57Write_3cyc_1X],
1118 (instregex "VMOVL", "VSHLi", "VSHLL", "VSHR(s|u)", "VSHRN")>;
1120 // ASIMD shift by immed, complex
1121 def : InstRW<[A57Write_4cyc_1X], (instregex
1122 "VQRSHRN", "VQRSHRUN", "VQSHL(si|ui|su)", "VQSHRN", "VQSHRUN", "VRSHR(s|u)",
1125 // ASIMD shift by immed and insert, basic, D-form
1126 def : InstRW<[A57Write_4cyc_1X], (instregex
1127 "VSLI(v8i8|v4i16|v2i32|v1i64)", "VSRI(v8i8|v4i16|v2i32|v1i64)")>;
1129 // ASIMD shift by immed and insert, basic, Q-form
1130 def : InstRW<[A57Write_5cyc_1X], (instregex
1131 "VSLI(v16i8|v8i16|v4i32|v2i64)", "VSRI(v16i8|v8i16|v4i32|v2i64)")>;
1133 // ASIMD shift by register, basic, D-form
1134 def : InstRW<[A57Write_3cyc_1X], (instregex
1135 "VSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>;
1137 // ASIMD shift by register, basic, Q-form
1138 def : InstRW<[A57Write_4cyc_1X], (instregex
1139 "VSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>;
1141 // ASIMD shift by register, complex, D-form
1142 // VQRSHL, VQSHL, VRSHL
1143 def : InstRW<[A57Write_4cyc_1X], (instregex
1144 "VQRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)", "VQSHL(s|u)(v8i8|v4i16|v2i32|v1i64)",
1145 "VRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>;
1147 // ASIMD shift by register, complex, Q-form
1148 def : InstRW<[A57Write_5cyc_1X], (instregex
1149 "VQRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)", "VQSHL(s|u)(v16i8|v8i16|v4i32|v2i64)",
1150 "VRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>;
1152 // --- 3.15 ASIMD Floating-Point Instructions ---
1153 // ASIMD FP absolute value
1154 def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(fd|fq|hd|hq)")>;
1157 def : InstRW<[A57Write_5cyc_1V], (instregex "VABD(fd|fq|hd|hq)",
1158 "VADD(fd|fq|hd|hq)", "VPADD(f|h)", "VSUB(fd|fq|hd|hq)")>;
1160 def : InstRW<[A57Write_5cyc_1V], (instregex "VCADD", "VCMLA")>;
1163 def : InstRW<[A57Write_5cyc_1V], (instregex "VAC(GE|GT|LE|LT)",
1164 "VC(EQ|GE|GT|LE)(fd|fq|hd|hq)")>;
1166 // ASIMD FP convert, integer
1167 def : InstRW<[A57Write_5cyc_1V], (instregex
1168 "VCVT(f2sd|f2ud|s2fd|u2fd|f2sq|f2uq|s2fq|u2fq|f2xsd|f2xud|xs2fd|xu2fd)",
1169 "VCVT(f2xsq|f2xuq|xs2fq|xu2fq)",
1170 "VCVT(AN|MN|NN|PN)(SDf|SQf|UDf|UQf|SDh|SQh|UDh|UQh)")>;
1172 // ASIMD FP convert, half-precision: 8cyc F0/F1
1173 def : InstRW<[A57Write_8cyc_1V], (instregex
1174 "VCVT(h2sd|h2ud|s2hd|u2hd|h2sq|h2uq|s2hq|u2hq|h2xsd|h2xud|xs2hd|xu2hd)",
1175 "VCVT(h2xsq|h2xuq|xs2hq|xu2hq)",
1179 def : InstRW<[A57Write_5cyc_1V], (instregex
1180 "(VMAX|VMIN)(fd|fq|hd|hq)", "(VPMAX|VPMIN)(f|h)", "(NEON|VFP)_VMAXNM",
1181 "(NEON|VFP)_VMINNM")>;
1183 // ASIMD FP multiply
1184 def A57WriteVMUL_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
1185 def : InstRW<[A57WriteVMUL_VecFP], (instregex "VMUL(sl)?(fd|fq|hd|hq)")>;
1187 // ASIMD FP multiply accumulate: 9cyc F0/F1, 4cyc for accumulate sequence
1188 def A57WriteVMLA_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
1189 def A57ReadVMLA_VecFP :
1190 SchedReadAdvance<5, [A57WriteVMLA_VecFP, A57WriteVMUL_VecFP]>;
1191 def : InstRW<[A57WriteVMLA_VecFP, A57ReadVMLA_VecFP],
1192 (instregex "(VMLA|VMLS)(sl)?(fd|fq|hd|hq)", "(VFMA|VFMS)(fd|fq|hd|hq)")>;
1195 def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG(fd|f32q|hd|hq)")>;
1197 // ASIMD FP round to integral
1198 def : InstRW<[A57Write_5cyc_1V], (instregex
1199 "VRINT(AN|MN|NN|PN|XN|ZN)(Df|Qf|Dh|Qh)")>;
1201 // --- 3.16 ASIMD Miscellaneous Instructions ---
1203 // ASIMD bitwise insert
1204 def : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL")>;
1207 def : InstRW<[A57Write_3cyc_1V], (instregex "VCLS", "VCLZ", "VCNT")>;
1209 // ASIMD duplicate, core reg: 8cyc "L, F0/F1"
1210 def : InstRW<[A57Write_8cyc_1L_1V], (instregex "VDUP(8|16|32)(d|q)")>;
1212 // ASIMD duplicate, scalar: 3cyc "F0/F1"
1213 def : InstRW<[A57Write_3cyc_1V], (instregex "VDUPLN(8|16|32)(d|q)")>;
1216 def : InstRW<[A57Write_3cyc_1V], (instregex "VEXT(d|q)(8|16|32|64)")>;
1218 // ASIMD move, immed
1219 def : InstRW<[A57Write_3cyc_1V], (instregex
1220 "VMOV(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v1i64|v2i64|v2f32|v4f32)",
1221 "VMOVD0", "VMOVQ0")>;
1223 // ASIMD move, narrowing
1224 def : InstRW<[A57Write_3cyc_1V], (instregex "VMOVN")>;
1226 // ASIMD move, saturating
1227 def : InstRW<[A57Write_4cyc_1X], (instregex "VQMOVN")>;
1229 // ASIMD reciprocal estimate
1230 def : InstRW<[A57Write_5cyc_1V], (instregex "VRECPE", "VRSQRTE")>;
1232 // ASIMD reciprocal step, FZ
1233 def : InstRW<[A57Write_9cyc_1V], (instregex "VRECPS", "VRSQRTS")>;
1235 // ASIMD reverse, swap, table lookup (1-2 reg)
1236 def : InstRW<[A57Write_3cyc_1V], (instregex "VREV", "VSWP", "VTB(L|X)(1|2)")>;
1238 // ASIMD table lookup (3-4 reg)
1239 def : InstRW<[A57Write_6cyc_1V], (instregex "VTBL(3|4)", "VTBX(3|4)")>;
1241 // ASIMD transfer, scalar to core reg: 6cyc "L, I0/I1"
1242 def : InstRW<[A57Write_6cyc_1L_1I], (instregex "VGETLN")>;
1244 // ASIMD transfer, core reg to scalar: 8cyc "L, F0/F1"
1245 def : InstRW<[A57Write_8cyc_1L_1V], (instregex "VSETLN")>;
1248 def : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V], (instregex "VTRN")>;
1250 // ASIMD unzip/zip, D-form
1251 def : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V],
1252 (instregex "VUZPd", "VZIPd")>;
1254 // ASIMD unzip/zip, Q-form
1255 def : InstRW<[A57Write_6cyc_1V, A57Write_6cyc_1V],
1256 (instregex "VUZPq", "VZIPq")>;
1258 // --- 3.17 ASIMD Load Instructions ---
1260 // Overriden via InstRW for this processor.
1261 def : WriteRes<WriteVLD1, []>;
1262 def : WriteRes<WriteVLD2, []>;
1263 def : WriteRes<WriteVLD3, []>;
1264 def : WriteRes<WriteVLD4, []>;
1265 def : WriteRes<WriteVST1, []>;
1266 def : WriteRes<WriteVST2, []>;
1267 def : WriteRes<WriteVST3, []>;
1268 def : WriteRes<WriteVST4, []>;
1270 // 1-2 reg: 5cyc L, +I for writeback, 1 cyc wb latency
1271 def : InstRW<[A57Write_5cyc_1L], (instregex "VLD1(d|q)(8|16|32|64)$")>;
1272 def : InstRW<[A57Write_5cyc_1L_1I, A57WrBackOne],
1273 (instregex "VLD1(d|q)(8|16|32|64)wb")>;
1275 // 3-4 reg: 6cyc L, +I for writeback, 1 cyc wb latency
1276 def : InstRW<[A57Write_6cyc_1L],
1277 (instregex "VLD1(d|q)(8|16|32|64)(T|Q)$", "VLD1d64(T|Q)Pseudo")>;
1279 def : InstRW<[A57Write_6cyc_1L_1I, A57WrBackOne],
1280 (instregex "VLD1(d|q)(8|16|32|64)(T|Q)wb")>;
1282 // ASIMD load, 1 element, one lane and all lanes: 8cyc "L, F0/F1"
1283 def : InstRW<[A57Write_8cyc_1L_1V], (instregex
1284 "VLD1(LN|DUP)(d|q)(8|16|32)$", "VLD1(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
1285 def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], (instregex
1286 "VLD1(LN|DUP)(d|q)(8|16|32)(wb|_UPD)", "VLD1LNq(8|16|32)Pseudo_UPD")>;
1288 // ASIMD load, 2 element, multiple, 2 reg: 8cyc "L, F0/F1"
1289 def : InstRW<[A57Write_8cyc_1L_1V],
1290 (instregex "VLD2(d|q)(8|16|32)$", "VLD2q(8|16|32)Pseudo$")>;
1291 def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
1292 (instregex "VLD2(d|q)(8|16|32)wb", "VLD2q(8|16|32)PseudoWB")>;
1294 // ASIMD load, 2 element, multiple, 4 reg: 9cyc "L, F0/F1"
1295 def : InstRW<[A57Write_9cyc_1L_1V], (instregex "VLD2b(8|16|32)$")>;
1296 def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
1297 (instregex "VLD2b(8|16|32)wb")>;
1299 // ASIMD load, 2 element, one lane and all lanes: 8cyc "L, F0/F1"
1300 def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V],
1301 (instregex "VLD2(DUP|LN)(d|q)(8|16|32|8x2|16x2|32x2)$",
1302 "VLD2LN(d|q)(8|16|32)Pseudo$")>;
1303 // 2 results + wb result
1304 def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V, A57WrBackOne],
1305 (instregex "VLD2LN(d|q)(8|16|32)_UPD$")>;
1306 // 1 result + wb result
1307 def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
1308 (instregex "VLD2DUPd(8|16|32|8x2|16x2|32x2)wb",
1309 "VLD2LN(d|q)(8|16|32)Pseudo_UPD")>;
1311 // ASIMD load, 3 element, multiple, 3 reg: 9cyc "L, F0/F1"
1313 def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V],
1314 (instregex "VLD3(d|q)(8|16|32)$")>;
1316 def : InstRW<[A57Write_9cyc_1L_1V],
1317 (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo$")>;
1319 def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
1320 A57Write_9cyc_1L_1V_1I, A57WrBackOne],
1321 (instregex "VLD3(d|q)(8|16|32)_UPD$")>;
1323 def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
1324 (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
1326 // ASIMD load, 3 element, one lane, size 32: 8cyc "L, F0/F1"
1327 def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V],
1328 (instregex "VLD3LN(d|q)32$",
1329 "VLD3LN(d|q)32Pseudo$")>;
1330 def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
1331 A57Write_8cyc_1L_1V_1I, A57WrBackOne],
1332 (instregex "VLD3LN(d|q)32_UPD")>;
1333 def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
1334 (instregex "VLD3LN(d|q)32Pseudo_UPD")>;
1336 // ASIMD load, 3 element, one lane, size 8/16: 9cyc "L, F0/F1"
1337 def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V],
1338 (instregex "VLD3LN(d|q)(8|16)$",
1339 "VLD3LN(d|q)(8|16)Pseudo$")>;
1340 def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
1341 A57Write_9cyc_1L_1V_1I, A57WrBackOne],
1342 (instregex "VLD3LN(d|q)(8|16)_UPD")>;
1343 def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
1344 (instregex "VLD3LN(d|q)(8|16)Pseudo_UPD")>;
1346 // ASIMD load, 3 element, all lanes: 8cyc "L, F0/F1"
1347 def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V],
1348 (instregex "VLD3DUP(d|q)(8|16|32)$",
1349 "VLD3DUP(d|q)(8|16|32)Pseudo$")>;
1350 def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
1351 A57Write_8cyc_1L_1V_1I, A57WrBackOne],
1352 (instregex "VLD3DUP(d|q)(8|16|32)_UPD")>;
1353 def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
1354 (instregex "VLD3DUP(d|q)(8|16|32)Pseudo_UPD")>;
1356 // ASIMD load, 4 element, multiple, 4 reg: 9cyc "L, F0/F1"
1357 def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V,
1358 A57Write_9cyc_1L_1V],
1359 (instregex "VLD4(d|q)(8|16|32)$")>;
1360 def : InstRW<[A57Write_9cyc_1L_1V],
1361 (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo$")>;
1362 def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
1363 A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, A57WrBackOne],
1364 (instregex "VLD4(d|q)(8|16|32)_UPD")>;
1365 def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
1366 (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
1368 // ASIMD load, 4 element, one lane, size 32: 8cyc "L, F0/F1"
1369 def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V,
1370 A57Write_8cyc_1L_1V],
1371 (instregex "VLD4LN(d|q)32$",
1372 "VLD4LN(d|q)32Pseudo$")>;
1373 def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
1374 A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
1376 (instregex "VLD4LN(d|q)32_UPD")>;
1377 def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
1378 (instregex "VLD4LN(d|q)32Pseudo_UPD")>;
1380 // ASIMD load, 4 element, one lane, size 8/16: 9cyc "L, F0/F1"
1381 def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V,
1382 A57Write_9cyc_1L_1V],
1383 (instregex "VLD4LN(d|q)(8|16)$",
1384 "VLD4LN(d|q)(8|16)Pseudo$")>;
1385 def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
1386 A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
1388 (instregex "VLD4LN(d|q)(8|16)_UPD")>;
1389 def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
1390 (instregex "VLD4LN(d|q)(8|16)Pseudo_UPD")>;
1392 // ASIMD load, 4 element, all lanes: 8cyc "L, F0/F1"
1393 def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V,
1394 A57Write_8cyc_1L_1V],
1395 (instregex "VLD4DUP(d|q)(8|16|32)$",
1396 "VLD4DUP(d|q)(8|16|32)Pseudo$")>;
1397 def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
1398 A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
1400 (instregex "VLD4DUP(d|q)(8|16|32)_UPD")>;
1401 def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
1402 (instregex "VLD4DUP(d|q)(8|16|32)Pseudo_UPD")>;
1404 // --- 3.18 ASIMD Store Instructions ---
1406 // ASIMD store, 1 element, multiple, 1 reg: 1cyc S
1407 def : InstRW<[A57Write_1cyc_1S], (instregex "VST1d(8|16|32|64)$")>;
1408 def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I],
1409 (instregex "VST1d(8|16|32|64)wb")>;
1410 // ASIMD store, 1 element, multiple, 2 reg: 2cyc S
1411 def : InstRW<[A57Write_2cyc_1S], (instregex "VST1q(8|16|32|64)$")>;
1412 def : InstRW<[A57WrBackOne, A57Write_2cyc_1S_1I],
1413 (instregex "VST1q(8|16|32|64)wb")>;
1414 // ASIMD store, 1 element, multiple, 3 reg: 3cyc S
1415 def : InstRW<[A57Write_3cyc_1S],
1416 (instregex "VST1d(8|16|32|64)T$", "VST1d64TPseudo$")>;
1417 def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1I],
1418 (instregex "VST1d(8|16|32|64)Twb", "VST1d64TPseudoWB")>;
1419 // ASIMD store, 1 element, multiple, 4 reg: 4cyc S
1420 def : InstRW<[A57Write_4cyc_1S],
1421 (instregex "VST1d(8|16|32|64)(Q|QPseudo)$")>;
1422 def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1I],
1423 (instregex "VST1d(8|16|32|64)(Qwb|QPseudoWB)")>;
1424 // ASIMD store, 1 element, one lane: 3cyc "F0/F1, S"
1425 def : InstRW<[A57Write_3cyc_1S_1V],
1426 (instregex "VST1LNd(8|16|32)$", "VST1LNq(8|16|32)Pseudo$")>;
1427 def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
1428 (instregex "VST1LNd(8|16|32)_UPD", "VST1LNq(8|16|32)Pseudo_UPD")>;
1429 // ASIMD store, 2 element, multiple, 2 reg: 3cyc "F0/F1, S"
1430 def : InstRW<[A57Write_3cyc_1S_1V],
1431 (instregex "VST2(d|b)(8|16|32)$")>;
1432 def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
1433 (instregex "VST2(b|d)(8|16|32)wb")>;
1434 // ASIMD store, 2 element, multiple, 4 reg: 4cyc "F0/F1, S"
1435 def : InstRW<[A57Write_4cyc_1S_1V],
1436 (instregex "VST2q(8|16|32)$", "VST2q(8|16|32)Pseudo$")>;
1437 def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I],
1438 (instregex "VST2q(8|16|32)wb", "VST2q(8|16|32)PseudoWB")>;
1439 // ASIMD store, 2 element, one lane: 3cyc "F0/F1, S"
1440 def : InstRW<[A57Write_3cyc_1S_1V],
1441 (instregex "VST2LN(d|q)(8|16|32)$", "VST2LN(d|q)(8|16|32)Pseudo$")>;
1442 def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
1443 (instregex "VST2LN(d|q)(8|16|32)_UPD",
1444 "VST2LN(d|q)(8|16|32)Pseudo_UPD")>;
1445 // ASIMD store, 3 element, multiple, 3 reg
1446 def : InstRW<[A57Write_3cyc_1S_1V],
1447 (instregex "VST3(d|q)(8|16|32)$", "VST3(d|q)(8|16|32)(oddP|P)seudo$")>;
1448 def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
1449 (instregex "VST3(d|q)(8|16|32)_UPD",
1450 "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
1451 // ASIMD store, 3 element, one lane
1452 def : InstRW<[A57Write_3cyc_1S_1V],
1453 (instregex "VST3LN(d|q)(8|16|32)$", "VST3LN(d|q)(8|16|32)Pseudo$")>;
1454 def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
1455 (instregex "VST3LN(d|q)(8|16|32)_UPD",
1456 "VST3LN(d|q)(8|16|32)Pseudo_UPD")>;
1457 // ASIMD store, 4 element, multiple, 4 reg
1458 def : InstRW<[A57Write_4cyc_1S_1V],
1459 (instregex "VST4(d|q)(8|16|32)$", "VST4(d|q)(8|16|32)(oddP|P)seudo$")>;
1460 def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I],
1461 (instregex "VST4(d|q)(8|16|32)_UPD",
1462 "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
1463 // ASIMD store, 4 element, one lane
1464 def : InstRW<[A57Write_3cyc_1S_1V],
1465 (instregex "VST4LN(d|q)(8|16|32)$", "VST4LN(d|q)(8|16|32)Pseudo$")>;
1466 def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
1467 (instregex "VST4LN(d|q)(8|16|32)_UPD",
1468 "VST4LN(d|q)(8|16|32)Pseudo_UPD")>;
1470 // --- 3.19 Cryptography Extensions ---
1472 // AESD, AESE, AESIMC, AESMC: 3cyc F0
1473 def : InstRW<[A57Write_3cyc_1W], (instregex "^AES")>;
1474 // Crypto polynomial (64x64) multiply long (VMULL.P64): 3cyc F0
1475 def : InstRW<[A57Write_3cyc_1W], (instregex "^VMULLp64")>;
1476 // Crypto SHA1 xor ops: 6cyc F0/F1
1477 def : InstRW<[A57Write_6cyc_2V], (instregex "^SHA1SU0")>;
1478 // Crypto SHA1 fast ops: 3cyc F0
1479 def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA1(H|SU1)")>;
1480 // Crypto SHA1 slow ops: 6cyc F0
1481 def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA1[CMP]")>;
1482 // Crypto SHA256 fast ops: 3cyc F0
1483 def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA256SU0")>;
1484 // Crypto SHA256 slow ops: 6cyc F0
1485 def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA256(H|H2|SU1)")>;
1488 def : InstRW<[A57Write_3cyc_1W], (instregex "^(t2)?CRC32")>;
1490 // -----------------------------------------------------------------------------
1491 // Common definitions
1492 def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
1493 def : SchedAlias<WriteALU, A57Write_1cyc_1I>;
1495 def : SchedAlias<WriteBr, A57Write_1cyc_1B>;
1496 def : SchedAlias<WriteBrL, A57Write_1cyc_1B_1I>;
1497 def : SchedAlias<WriteBrTbl, A57Write_1cyc_1B_1I>;
1498 def : SchedAlias<WritePreLd, A57Write_4cyc_1L>;
1500 def : SchedAlias<WriteLd, A57Write_4cyc_1L>;
1501 def : SchedAlias<WriteST, A57Write_1cyc_1S>;
1502 def : ReadAdvance<ReadALU, 0>;
1504 } // SchedModel = CortexA57Model