1 //=- ARMScheduleM85.td - ARM Cortex-M85 Scheduling Definitions -*- tablegen -*-=//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the machine model for the ARM Cortex-M85 processor.
11 // All timing is referred to EX2. Thus, operands which are needed at EX1 are
12 // stated to have a ReadAdvance of -1. The FP/MVE pipe actually begins at EX3
13 // but is described as if it were in EX2 to avoid having unnaturally long latencies
14 // with delayed inputs on every instruction. Instead, whenever an FP instruction
15 // must access a GP register or a non-FP instruction (which includes loads/stores)
16 // must access an FP register, the operand timing is adjusted:
17 // FP accessing GPR: read one cycle later, write one cycle later
18 // NOTE: absolute spec timing already includes this if
20 // non-FP accessing FPR: read one cycle earlier, write one cycle earlier
21 //===----------------------------------------------------------------------===//
23 def CortexM85Model : SchedMachineModel {
24 let IssueWidth = 2; // Dual issue for most instructions.
25 let MicroOpBufferSize = 0; // M85 is in-order.
26 let LoadLatency = 2; // Best case for load-use case.
27 let MispredictPenalty = 4; // Mispredict cost for forward branches is 7,
29 let CompleteModel = 0;
32 let SchedModel = CortexM85Model in {
34 //===--------------------------------------------------------------------===//
35 // CortexM85 has two ALU, two LOAD, two STORE, a MAC, a BRANCH and two VFP
36 // pipes (with three units). There are three shifters available: one per
39 def M85UnitLoadL : ProcResource<1> { let BufferSize = 0; }
40 def M85UnitLoadH : ProcResource<1> { let BufferSize = 0; }
41 def M85UnitLoad : ProcResGroup<[M85UnitLoadL,M85UnitLoadH]> { let BufferSize = 0; }
42 def M85UnitStoreL : ProcResource<1> { let BufferSize = 0; }
43 def M85UnitStoreH : ProcResource<1> { let BufferSize = 0; }
44 def M85UnitStore : ProcResGroup<[M85UnitStoreL,M85UnitStoreH]> { let BufferSize = 0; }
45 def M85UnitALU : ProcResource<2> { let BufferSize = 0; }
46 def M85UnitShift1 : ProcResource<1> { let BufferSize = 0; }
47 def M85UnitShift2 : ProcResource<1> { let BufferSize = 0; }
48 def M85UnitMAC : ProcResource<1> { let BufferSize = 0; }
49 def M85UnitBranch : ProcResource<1> { let BufferSize = 0; }
50 def M85UnitVFPAL : ProcResource<1> { let BufferSize = 0; }
51 def M85UnitVFPAH : ProcResource<1> { let BufferSize = 0; }
52 def M85UnitVFPA : ProcResGroup<[M85UnitVFPAL,M85UnitVFPAH]> { let BufferSize = 0; }
53 def M85UnitVFPBL : ProcResource<1> { let BufferSize = 0; }
54 def M85UnitVFPBH : ProcResource<1> { let BufferSize = 0; }
55 def M85UnitVFPB : ProcResGroup<[M85UnitVFPBL,M85UnitVFPBH]> { let BufferSize = 0; }
56 def M85UnitVFPCL : ProcResource<1> { let BufferSize = 0; }
57 def M85UnitVFPCH : ProcResource<1> { let BufferSize = 0; }
58 def M85UnitVFPC : ProcResGroup<[M85UnitVFPCL,M85UnitVFPCH]> { let BufferSize = 0; }
59 def M85UnitVFPD : ProcResource<1> { let BufferSize = 0; }
60 def M85UnitVPortL : ProcResource<1> { let BufferSize = 0; }
61 def M85UnitVPortH : ProcResource<1> { let BufferSize = 0; }
62 def M85UnitVPort : ProcResGroup<[M85UnitVPortL,M85UnitVPortH]> { let BufferSize = 0; }
63 def M85UnitSIMD : ProcResource<1> { let BufferSize = 0; }
64 def M85UnitLShift : ProcResource<1> { let BufferSize = 0; }
65 def M85UnitDiv : ProcResource<1> { let BufferSize = 0; }
67 def M85UnitSlot0 : ProcResource<1> { let BufferSize = 0; }
69 //===---------------------------------------------------------------------===//
70 // Subtarget-specific SchedWrite types with map ProcResources and set latency.
72 def : WriteRes<WriteALU, [M85UnitALU]> { let Latency = 1; }
74 // Basic ALU with shifts.
76 def : WriteRes<WriteALUsi, [M85UnitALU, M85UnitShift1]>;
77 def : WriteRes<WriteALUsr, [M85UnitALU, M85UnitShift1]>;
78 def : WriteRes<WriteALUSsr, [M85UnitALU, M85UnitShift1]>;
82 def : WriteRes<WriteCMP, [M85UnitALU]> { let Latency = 1; }
83 def : WriteRes<WriteCMPsi, [M85UnitALU, M85UnitShift1]> { let Latency = 2; }
84 def : WriteRes<WriteCMPsr, [M85UnitALU, M85UnitShift1]> { let Latency = 2; }
88 def : WriteRes<WriteMUL16, [M85UnitMAC]>;
89 def : WriteRes<WriteMUL32, [M85UnitMAC]>;
90 def : WriteRes<WriteMUL64Lo, [M85UnitMAC]>;
91 def : WriteRes<WriteMUL64Hi, []> { let NumMicroOps = 0; }
94 // Multiply-accumulates.
96 def : WriteRes<WriteMAC16, [M85UnitMAC]>;
97 def : WriteRes<WriteMAC32, [M85UnitMAC]>;
98 def : WriteRes<WriteMAC64Lo, [M85UnitMAC]>;
99 def : WriteRes<WriteMAC64Hi, []> { let NumMicroOps = 0; }
103 def : WriteRes<WriteDIV, [M85UnitDiv]> {
108 def : WriteRes<WriteLd, [M85UnitLoad]> { let Latency = 1; }
109 def : WriteRes<WritePreLd, [M85UnitLoad]> { let Latency = 2; }
110 def : WriteRes<WriteST, [M85UnitStore]> { let Latency = 2; }
111 def M85WriteLdWide : SchedWriteRes<[M85UnitLoadL, M85UnitLoadH]> { let Latency = 1; }
112 def M85WriteStWide : SchedWriteRes<[M85UnitStoreL, M85UnitStoreH]> { let Latency = 2; }
115 def : WriteRes<WriteBr, [M85UnitBranch]> { let Latency = 2; }
116 def : WriteRes<WriteBrL, [M85UnitBranch]> { let Latency = 2; }
117 def : WriteRes<WriteBrTbl, [M85UnitBranch]> { let Latency = 2; }
120 def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
122 //===---------------------------------------------------------------------===//
123 // Sched definitions for floating-point instructions
125 // Floating point conversions.
126 def : WriteRes<WriteFPCVT, [M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> {
129 def : WriteRes<WriteFPMOV, [M85UnitVPort, M85UnitSlot0]> { let Latency = 1; }
130 def M85WriteFPMOV64 : SchedWriteRes<[M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> { let Latency = 1; }
132 // ALU operations (32/64-bit). These go down the FP pipeline.
133 def : WriteRes<WriteFPALU32, [M85UnitVFPA, M85UnitVPort, M85UnitSlot0]> {
136 def : WriteRes<WriteFPALU64, [M85UnitVFPAL, M85UnitVFPAH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
141 def : WriteRes<WriteFPMUL32, [M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> {
144 def : WriteRes<WriteFPMUL64, [M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
148 // Multiply-accumulate. FPMAC goes down the FP Pipeline.
149 def : WriteRes<WriteFPMAC32, [M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> {
152 def : WriteRes<WriteFPMAC64, [M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
156 // Division. Effective scheduling latency is 3, though real latency is larger
157 def : WriteRes<WriteFPDIV32, [M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> {
160 def : WriteRes<WriteFPDIV64, [M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
164 // Square-root. Effective scheduling latency is 3, though real latency is larger
165 def : WriteRes<WriteFPSQRT32, [M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> {
168 def : WriteRes<WriteFPSQRT64, [M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
172 let NumMicroOps = 0 in {
173 def M85SingleIssue : SchedWriteRes<[]> { let SingleIssue = 1; }
174 def M85Slot0Only : SchedWriteRes<[M85UnitSlot0]> { }
177 // What pipeline stage operands need to be ready for depending on
178 // where they come from.
179 def : ReadAdvance<ReadALUsr, 0>;
180 def : ReadAdvance<ReadMUL, 0>;
181 def : ReadAdvance<ReadMAC, 1>;
182 def : ReadAdvance<ReadALU, 0>;
183 def : ReadAdvance<ReadFPMUL, 0>;
184 def : ReadAdvance<ReadFPMAC, 3>;
185 def M85Read_ISSm1 : SchedReadAdvance<-2>; // operands needed at ISS
186 def M85Read_ISS : SchedReadAdvance<-1>; // operands needed at EX1
187 def M85Read_EX1 : SchedReadAdvance<0>; // operands needed at EX2
188 def M85Read_EX2 : SchedReadAdvance<1>; // operands needed at EX3
189 def M85Read_EX3 : SchedReadAdvance<2>; // operands needed at EX4
190 def M85Read_EX4 : SchedReadAdvance<3>; // operands needed at EX5
191 def M85Write1 : SchedWriteRes<[]> {
195 def M85Write2 : SchedWriteRes<[]> {
199 def M85WriteShift2 : SchedWriteRes<[M85UnitALU, M85UnitShift2]> {}
201 // Non general purpose instructions may not be dual issued. These
202 // use both issue units.
203 def M85NonGeneralPurpose : SchedWriteRes<[]> {
204 // Assume that these will go down the main ALU pipeline.
205 // In reality, many look likely to stall the whole pipeline.
210 // List the non general purpose instructions.
211 def : InstRW<[M85NonGeneralPurpose],
212 (instregex "t2MRS", "tSVC", "tBKPT", "t2MSR", "t2DMB", "t2DSB",
213 "t2ISB", "t2HVC", "t2SMC", "t2UDF", "ERET", "tHINT",
214 "t2HINT", "t2CLREX", "t2CLRM", "BUNDLE")>;
216 //===---------------------------------------------------------------------===//
217 // Sched definitions for load/store
219 // Mark whether the loads/stores must be single-issue
220 // Address operands are needed earlier
221 // Data operands are needed later
223 let NumMicroOps = 0 in {
224 def M85BaseUpdate : SchedWriteRes<[]> {
225 // Update is bypassable out of EX1
228 def M85MVERBaseUpdate : SchedWriteRes<[]> { let Latency = 1; }
229 // Q register base update is available in EX3 to bypass into EX2/ISS.
230 // Latency=2 matches what we want for ISS, Latency=1 for EX2. Going
231 // with 2, as base update into another load/store is most likely. Could
232 // change later in an override.
233 def M85MVEQBaseUpdate : SchedWriteRes<[]> { let Latency = 2; }
234 def M85LoadLatency1 : SchedWriteRes<[]> { let Latency = 1; }
236 def M85SlowLoad : SchedWriteRes<[M85UnitLoad]> { let Latency = 2; }
238 // Byte and half-word loads should have greater latency than other loads.
239 // So should load exclusive?
241 def : InstRW<[M85SlowLoad],
242 (instregex "t2LDR(B|H|SB|SH)pc")>;
243 def : InstRW<[M85SlowLoad, M85Read_ISS],
244 (instregex "t2LDR(B|H|SB|SH)T", "t2LDR(B|H|SB|SH)i",
245 "tLDRspi", "tLDR(B|H)i")>;
246 def : InstRW<[M85SlowLoad, M85Read_ISS, M85Read_ISS],
247 (instregex "t2LDR(B|H|SB|SH)s")>;
248 def : InstRW<[M85SlowLoad, M85Read_ISS, M85Read_ISS],
249 (instregex "tLDR(B|H)r", "tLDR(SB|SH)")>;
250 def : InstRW<[M85SlowLoad, M85BaseUpdate, M85Read_ISS],
251 (instregex "t2LDR(B|H|SB|SH)_(POST|PRE)")>;
253 // Exclusive/acquire/release loads/stores cannot be dual-issued
254 def : InstRW<[WriteLd, M85SingleIssue, M85Read_ISS],
255 (instregex "t2LDREX$", "t2LDA(EX)?$")>;
256 def : InstRW<[M85WriteLdWide, M85LoadLatency1, M85SingleIssue, M85Read_ISS],
257 (instregex "t2LDAEXD$")>;
258 def : InstRW<[M85SlowLoad, M85SingleIssue, M85Read_ISS],
259 (instregex "t2LDREX(B|H)", "t2LDA(EX)?(B|H)$")>;
260 def : InstRW<[WriteST, M85SingleIssue, M85Read_EX2, M85Read_ISS],
261 (instregex "t2STREX(B|H)?$", "t2STL(EX)?(B|H)?$")>;
262 def : InstRW<[M85WriteStWide, M85SingleIssue, M85Read_EX2, M85Read_EX2, M85Read_ISS],
263 (instregex "t2STLEXD$")>;
265 // Load/store multiples end issue groups.
267 def : InstRW<[M85WriteLdWide, M85SingleIssue, M85Read_ISS],
268 (instregex "(t|t2)LDM(DB|IA)$")>;
269 def : InstRW<[M85WriteStWide, M85SingleIssue, M85Read_ISS],
270 (instregex "(t|t2)STM(DB|IA)$")>;
271 def : InstRW<[M85BaseUpdate, M85WriteLdWide, M85SingleIssue, M85Read_ISS],
272 (instregex "(t|t2)LDM(DB|IA)_UPD$", "tPOP")>;
273 def : InstRW<[M85BaseUpdate, M85WriteStWide, M85SingleIssue, M85Read_ISS],
274 (instregex "(t|t2)STM(DB|IA)_UPD$", "tPUSH")>;
276 // Load/store doubles
278 def : InstRW<[M85BaseUpdate, M85WriteStWide,
279 M85Read_EX2, M85Read_EX2, M85Read_ISS],
280 (instregex "t2STRD_(PRE|POST)")>;
281 def : InstRW<[M85WriteStWide, M85Read_EX2, M85Read_EX2, M85Read_ISS],
282 (instregex "t2STRDi")>;
283 def : InstRW<[M85WriteLdWide, M85LoadLatency1, M85BaseUpdate, M85Read_ISS],
284 (instregex "t2LDRD_(PRE|POST)")>;
285 def : InstRW<[M85WriteLdWide, M85LoadLatency1, M85Read_ISS],
286 (instregex "t2LDRDi")>;
288 // Word load / preload
289 def : InstRW<[WriteLd],
290 (instregex "t2LDRpc", "t2PL[DI]pci", "tLDRpci")>;
291 def : InstRW<[WriteLd, M85Read_ISS],
292 (instregex "t2LDR(i|T)", "t2PL[DI](W)?i", "tLDRi")>;
293 def : InstRW<[WriteLd, M85Read_ISS, M85Read_ISS],
294 (instregex "t2LDRs", "t2PL[DI](w)?s", "tLDRr")>;
295 def : InstRW<[WriteLd, M85BaseUpdate, M85Read_ISS],
296 (instregex "t2LDR_(POST|PRE)")>;
299 def : InstRW<[M85BaseUpdate, WriteST, M85Read_EX2, M85Read_ISS],
300 (instregex "t2STR(B|H)?_(POST|PRE)")>;
301 def : InstRW<[WriteST, M85Read_EX2, M85Read_ISS, M85Read_ISS],
302 (instregex "t2STR(B|H)?s$", "tSTR(B|H)?r$")>;
303 def : InstRW<[WriteST, M85Read_EX2, M85Read_ISS],
304 (instregex "t2STR(B|H)?(i|T)", "tSTR(B|H)?i$", "tSTRspi")>;
306 // TBB/TBH - single-issue only
308 def M85TableLoad : SchedWriteRes<[M85UnitLoad]> { let SingleIssue = 1; }
310 def : InstRW<[M85TableLoad, M85Read_ISS, M85Read_ISS],
313 // VFP/MVE loads and stores
314 // Note: timing for VLDR/VSTR special has not been broken out
315 // Note 2: see notes at top of file for the reason load latency is 1 and
316 // store data is in EX3.
318 def M85LoadSP : SchedWriteRes<[M85UnitLoad, M85UnitVPort]>;
319 def M85LoadDP : SchedWriteRes<[M85UnitLoadL, M85UnitLoadH,
320 M85UnitVPortL, M85UnitVPortH]>;
321 def M85LoadSys : SchedWriteRes<[M85UnitLoad, M85UnitVPort,
322 M85UnitVFPA, M85UnitVFPB, M85UnitVFPC, M85UnitVFPD]> {
325 def M85StoreSP : SchedWriteRes<[M85UnitStore, M85UnitVPort]>;
326 def M85StoreDP : SchedWriteRes<[M85UnitStoreL, M85UnitStoreH,
327 M85UnitVPortL, M85UnitVPortH]>;
328 def M85StoreSys : SchedWriteRes<[M85UnitStore, M85UnitVPort,
329 M85UnitVFPA, M85UnitVFPB, M85UnitVFPC, M85UnitVFPD]>;
330 let ReleaseAtCycles = [2,2,1,1], EndGroup = 1 in {
331 def M85LoadMVE : SchedWriteRes<[M85UnitLoadL, M85UnitLoadH,
332 M85UnitVPortL, M85UnitVPortH]>;
333 def M85LoadMVELate : SchedWriteRes<[M85UnitLoadL, M85UnitLoadH,
334 M85UnitVPortL, M85UnitVPortH]> {
335 let Latency = 4; // 3 cycles later
337 def M85StoreMVE : SchedWriteRes<[M85UnitStoreL, M85UnitStoreH,
338 M85UnitVPortL, M85UnitVPortH]>;
341 def : InstRW<[M85LoadSP, M85Read_ISS], (instregex "VLDR(S|H)$")>;
342 def : InstRW<[M85LoadSys, M85Read_ISS], (instregex "VLDR_")>;
343 def : InstRW<[M85LoadDP, M85Read_ISS], (instregex "VLDRD$")>;
344 def : InstRW<[M85StoreSP, M85Read_EX3, M85Read_ISS], (instregex "VSTR(S|H)$")>;
345 def : InstRW<[M85StoreSys, M85Read_EX1, M85Read_ISS], (instregex "VSTR_")>;
346 def : InstRW<[M85StoreDP, M85Read_EX3, M85Read_ISS], (instregex "VSTRD$")>;
348 def : InstRW<[M85LoadMVELate, M85Read_ISS],
349 (instregex "MVE_VLD[24]._[0-9]+$")>;
350 def : InstRW<[M85LoadMVELate, M85MVERBaseUpdate, M85Read_ISS],
351 (instregex "MVE_VLD[24].*wb")>;
352 def : InstRW<[M85LoadMVE, M85Read_ISS],
353 (instregex "MVE_VLDR.*(8|16|32|64)$")>;
354 def : InstRW<[M85LoadMVE, M85SingleIssue, M85Read_ISS, M85Read_ISS],
355 (instregex "MVE_VLDR.*(_rq|_rq|_rq_u)$")>;
356 def : InstRW<[M85LoadMVE, M85SingleIssue, M85Read_ISS],
357 (instregex "MVE_VLDR.*_qi$")>;
358 def : InstRW<[M85MVERBaseUpdate, M85LoadMVE, M85Read_ISS],
359 (instregex "MVE_VLDR.*(_post|[^i]_pre)$")>;
360 def : InstRW<[M85MVEQBaseUpdate, M85SingleIssue, M85LoadMVE, M85Read_ISS],
361 (instregex "MVE_VLDR.*(qi_pre)$")>;
363 def : InstRW<[M85StoreMVE, M85Read_EX3, M85Read_ISS],
364 (instregex "MVE_VST[24]._[0-9]+$")>;
365 def : InstRW<[M85StoreMVE, M85Read_EX3, M85MVERBaseUpdate, M85Read_ISS],
366 (instregex "MVE_VST[24].*wb")>;
367 def : InstRW<[M85StoreMVE, M85Read_EX3, M85Read_ISS],
368 (instregex "MVE_VSTR.*(8|16|32|64)$")>;
369 def : InstRW<[M85StoreMVE, M85SingleIssue, M85Read_EX3, M85Read_ISS, M85Read_ISS],
370 (instregex "MVE_VSTR.*(_rq|_rq|_rq_u)$")>;
371 def : InstRW<[M85StoreMVE, M85SingleIssue, M85Read_EX3, M85Read_ISS],
372 (instregex "MVE_VSTR.*_qi$")>;
373 def : InstRW<[M85MVERBaseUpdate, M85StoreMVE, M85Read_EX3, M85Read_ISS],
374 (instregex "MVE_VSTR.*(_post|[^i]_pre)$")>;
375 def : InstRW<[M85MVEQBaseUpdate, M85SingleIssue, M85StoreMVE,
376 M85Read_EX3, M85Read_ISS],
377 (instregex "MVE_VSTR.*(qi_pre)$")>;
379 // Load/store multiples end issue groups.
381 def : InstRW<[M85WriteLdWide, M85SingleIssue, M85Read_ISS],
382 (instregex "VLDM(S|D|Q)(DB|IA)$")>;
383 def : InstRW<[M85WriteStWide, M85SingleIssue, M85Read_ISS, M85Read_EX3],
384 (instregex "VSTM(S|D|Q)(DB|IA)$")>;
385 def : InstRW<[M85BaseUpdate, M85WriteLdWide, M85SingleIssue, M85Read_ISS],
386 (instregex "VLDM(S|D|Q)(DB|IA)_UPD$", "VLLDM")>;
387 def : InstRW<[M85BaseUpdate, M85WriteStWide, M85SingleIssue,
388 M85Read_ISS, M85Read_EX3],
389 (instregex "VSTM(S|D|Q)(DB|IA)_UPD$", "VLSTM")>;
391 //===---------------------------------------------------------------------===//
392 // Sched definitions for ALU
395 // Non-small shifted ALU operands are read a cycle early; small LSLs
396 // aren't, as they don't require the shifter.
398 def M85NonsmallShiftWrite : SchedWriteRes<[M85UnitALU,M85UnitShift1]> {
402 def M85WriteALUsi : SchedWriteVariant<[
403 SchedVar<NoSchedPred, [M85NonsmallShiftWrite]>
405 def M85Ex1ReadNoFastBypass : SchedReadAdvance<-1,
406 [WriteLd, M85WriteLdWide, M85LoadLatency1]>;
407 def M85ReadALUsi : SchedReadVariant<[
408 SchedVar<NoSchedPred, [M85Read_ISS]>
411 def : InstRW<[M85WriteALUsi, M85Read_EX1, M85ReadALUsi],
412 (instregex "t2(ADC|ADDS|BIC|EOR|ORN|ORR|RSBS|RSB|SBC|"
413 "SUBS|CMP|CMNz|TEQ|TST)rs$")>;
414 def : InstRW<[M85WriteALUsi, M85ReadALUsi],
415 (instregex "t2MVNs")>;
417 // CortexM85 treats LSL #0 as needing a shifter. In practice the throughput
418 // seems to reliably be 2 when run on a cyclemodel, so we don't require a
420 def : InstRW<[M85WriteALUsi, M85Read_EX1, M85ReadALUsi],
421 (instregex "t2(ADC|ADDS|BIC|EOR|ORN|ORR|RSBS|RSB|SBC|"
422 "SUBS|CMP|CMNz|TEQ|TST)rr$")>;
423 def : InstRW<[M85WriteALUsi, M85ReadALUsi],
424 (instregex "t2MVNr")>;
426 // Shift instructions: most pure shifts (i.e. MOV w/ shift) will use whichever
427 // shifter is free, thus it is possible to dual-issue them freely with anything
428 // else. As a result, they are not modeled as needing a shifter.
429 // RRX is odd because it must use the EX2 shifter, so it cannot dual-issue with
432 // Note that pure shifts which use the EX1 shifter would need their operands
433 // a cycle earlier. However, they are only forced to use the EX1 shifter
434 // when issuing against an RRX instructions, which should be rare.
436 def : InstRW<[M85WriteShift2],
437 (instregex "t2RRX$")>;
438 def : InstRW<[WriteALU],
439 (instregex "(t|t2)(LSL|LSR|ASR|ROR|SBFX|UBFX)")>;
441 // Instructions that use the shifter, but have normal timing
443 def : InstRW<[WriteALUsi,M85Slot0Only], (instregex "t2(BFC|BFI)$")>;
445 // Stack pointer add/sub happens in EX1 with checks in EX2
447 def M85WritesToSPPred : MCSchedPredicate<CheckRegOperand<0, SP>>;
449 def M85ReadForSP : SchedReadVariant<[
450 SchedVar<M85WritesToSPPred, [M85Read_ISS]>,
451 SchedVar<NoSchedPred, [M85Read_EX1]>
453 def M85ReadForSPShift : SchedReadVariant<[
454 SchedVar<M85WritesToSPPred, [M85Read_ISS]>,
455 SchedVar<NoSchedPred, [M85Read_ISS]>
458 def : InstRW<[WriteALU, M85Read_ISS],
459 (instregex "tADDspi", "tSUBspi")>;
460 def : InstRW<[WriteALU, M85ReadForSP],
461 (instregex "t2(ADD|SUB)ri", "t2MOVr", "tMOVr")>;
462 def : InstRW<[WriteALU, M85ReadForSP, M85ReadForSP],
463 (instregex "tADDrSP", "tADDspr", "tADDhirr")>;
464 def : InstRW<[M85WriteALUsi, M85ReadForSP, M85ReadForSPShift],
465 (instregex "t2(ADD|SUB)rs")>;
467 def : InstRW<[WriteALU, M85Slot0Only], (instregex "t2CLZ")>;
469 // MAC operations that don't have SchedRW set
471 def : InstRW<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC], (instregex "t2SML[AS]D")>;
473 // Divides are special because they stall for their latency, and so look like
474 // two cycles as far as scheduling opportunities go. By putting M85Write2
475 // first, we make the operand latency 2, but keep the instruction latency 7.
476 // Divide operands are read early.
478 def : InstRW<[M85Write2, WriteDIV, M85Read_ISS, M85Read_ISS, WriteALU],
479 (instregex "t2(S|U)DIV")>;
481 // DSP extension operations
483 def M85WriteSIMD1 : SchedWriteRes<[M85UnitSIMD, M85UnitALU, M85UnitSlot0]> {
486 def M85WriteSIMD2 : SchedWriteRes<[M85UnitSIMD, M85UnitALU, M85UnitSlot0]> {
489 def M85WriteShSIMD0 : SchedWriteRes<[M85UnitSIMD, M85UnitALU,
490 M85UnitShift1, M85UnitSlot0]> {
491 let Latency = 0; // Finishes at EX1
493 def M85WriteShSIMD1 : SchedWriteRes<[M85UnitSIMD, M85UnitALU,
494 M85UnitShift1, M85UnitSlot0]> {
497 def M85WriteShSIMD2 : SchedWriteRes<[M85UnitSIMD, M85UnitALU,
498 M85UnitShift1, M85UnitSlot0]> {
502 def : InstRW<[M85WriteShSIMD2, M85Read_ISS],
503 (instregex "t2(S|U)SAT")>;
504 def : InstRW<[M85WriteSIMD1, ReadALU],
505 (instregex "(t|t2)(S|U)XT(B|H)")>;
506 def : InstRW<[M85WriteSIMD1, ReadALU, ReadALU],
507 (instregex "t2(S|SH|U|UH)(ADD16|ADD8|ASX|SAX|SUB16|SUB8)",
509 def : InstRW<[M85WriteSIMD2, ReadALU, ReadALU],
510 (instregex "t2(Q|UQ)(ADD|ASX|SAX|SUB)", "t2USAD8")>;
511 def : InstRW<[M85WriteShSIMD2, M85Read_ISS, M85Read_ISS],
512 (instregex "t2QD(ADD|SUB)")>;
513 def : InstRW<[M85WriteShSIMD0, M85Read_ISS],
514 (instregex "t2(RBIT|REV)", "tREV")>;
515 def : InstRW<[M85WriteShSIMD1, ReadALU, M85Read_ISS],
516 (instregex "t2PKH(BT|TB)", "t2(S|U)XTA")>;
517 def : InstRW<[M85WriteSIMD2, ReadALU, ReadALU, M85Read_EX2],
518 (instregex "t2USADA8")>;
521 def : InstRW<[M85NonGeneralPurpose], (instregex "MSR", "MRS")>;
523 // 64-bit shift operations in EX3
525 def M85WriteLShift : SchedWriteRes<[M85UnitLShift, M85UnitALU]> {
528 def M85WriteLat2 : SchedWriteRes<[]> { let Latency = 2; let NumMicroOps = 0; }
530 def : InstRW<[M85WriteLShift, M85WriteLat2, M85Read_EX2, M85Read_EX2],
531 (instregex "MVE_(ASRLi|LSLLi|LSRL|SQSHLL|SRSHRL|UQSHLL|URSHRL)$")>;
532 def : InstRW<[M85WriteLShift, M85WriteLat2,
533 M85Read_EX2, M85Read_EX2, M85Read_EX2],
534 (instregex "MVE_(ASRLr|LSLLr|SQRSHRL|UQRSHLL)$")>;
535 def : InstRW<[M85WriteLShift, M85Read_EX2, M85Read_EX2],
536 (instregex "MVE_(SQRSHR|UQRSHL)$")>;
537 def : InstRW<[M85WriteLShift, M85Read_EX2],
538 (instregex "MVE_(SQSHL|SRSHR|UQSHL|URSHR)$")>;
540 // Loop control/branch future instructions
542 def M85LE : SchedWriteRes<[]> { let NumMicroOps = 0; let Latency = -2; }
544 def : InstRW<[WriteALU], (instregex "t2BF(_|Lr|i|Li|r)")>;
546 def : InstRW<[WriteALU], (instregex "MVE_LCTP")>;
547 def : InstRW<[WriteALU],
548 (instregex "t2DLS", "t2WLS", "MVE_DLSTP", "MVE_WLSTP")>;
549 def : InstRW<[M85LE], (instregex "t2LE$")>;
550 def : InstRW<[M85LE, M85Read_ISSm1],
551 (instregex "t2LEUpdate", "MVE_LETP")>; // LE is executed at ISS
553 // Conditional selects
555 def : InstRW<[M85WriteLShift, M85Read_EX2, M85Read_EX2, M85Read_EX2],
556 (instregex "t2(CSEL|CSINC|CSINV|CSNEG)")>;
558 //===---------------------------------------------------------------------===//
559 // Sched definitions for FP and MVE operations
561 let NumMicroOps = 0 in {
562 def M85OverrideVFPLat5 : SchedWriteRes<[]> { let Latency = 5; }
563 def M85OverrideVFPLat4 : SchedWriteRes<[]> { let Latency = 4; }
564 def M85OverrideVFPLat3 : SchedWriteRes<[]> { let Latency = 3; }
565 def M85OverrideVFPLat2 : SchedWriteRes<[]> { let Latency = 2; }
569 def M85GroupALat1S : SchedWriteRes<[M85UnitVFPA, M85UnitVPort, M85UnitSlot0]>;
570 def M85GroupBLat1S : SchedWriteRes<[M85UnitVFPB, M85UnitVPort, M85UnitSlot0]>;
571 def M85GroupCLat1S : SchedWriteRes<[M85UnitVFPC, M85UnitVPort, M85UnitSlot0]>;
572 def M85GroupALat1D : SchedWriteRes<[M85UnitVFPAL, M85UnitVFPAH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
573 def M85GroupBLat1D : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
574 def M85GroupCLat1D : SchedWriteRes<[M85UnitVFPCL, M85UnitVFPCH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
575 def M85GroupABLat1S : SchedWriteRes<[M85UnitVPort, M85UnitSlot0]>;
578 def M85GroupBLat2S : SchedWriteRes<[M85UnitVFPB, M85UnitVPort, M85UnitSlot0]>;
579 def M85GroupBLat2D : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
580 def M85GroupABLat2S : SchedWriteRes<[M85UnitVPort, M85UnitSlot0]>;
581 def M85GroupABLat2D : SchedWriteRes<[M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
584 // Instructions which are missing default schedules
585 def : InstRW<[M85GroupALat1S], (instregex "V(FP_VMAXNM|FP_VMINNM)(H|S)$")>;
586 def : InstRW<[M85GroupALat1D], (instregex "V(FP_VMAXNM|FP_VMINNM)D$")>;
587 def : InstRW<[M85GroupCLat1S], (instregex "VCMPE?Z?(H|S)$")>;
588 def : InstRW<[M85GroupCLat1D], (instregex "VCMPE?Z?D$")>;
589 def : InstRW<[M85GroupBLat2S],
590 (instregex "VCVT(A|M|N|P|R|X|Z)(S|U)(H|S)",
591 "VRINT(A|M|N|P|R|X|Z)(H|S)")>;
592 def : InstRW<[M85GroupBLat2D],
593 (instregex "VCVT(B|T)(DH|HD)", "VCVT(A|M|N|P|R|X|Z)(S|U)D",
594 "V.*TOD", "VTO.*D", "VCVTDS", "VCVTSD",
595 "VRINT(A|M|N|P|R|X|Z)D")>;
596 def : InstRW<[M85GroupABLat1S], (instregex "VINSH")>;
597 def : InstRW<[M85GroupBLat1S], (instregex "V(ABS|NEG)(H|S)$")>;
598 def : InstRW<[M85GroupBLat1D], (instregex "V(ABS|NEG)D$")>;
601 let SingleIssue = 1 in {
602 def M85VMRSEarly : SchedWriteRes<[M85UnitVPort]> { let Latency = 2;}
603 def M85VMRSLate : SchedWriteRes<[M85UnitVPort]> { let Latency = 4; }
604 def M85VMSREarly : SchedWriteRes<[M85UnitVPort]> { let Latency = 1; }
605 def M85VMSRLate : SchedWriteRes<[M85UnitVPort]> { let Latency = 3; }
608 def M85FPSCRFlagPred : MCSchedPredicate<
609 CheckAll<[CheckIsRegOperand<0>,
610 CheckRegOperand<0, PC>]>>;
612 def M85VMRSFPSCR : SchedWriteVariant<[
613 SchedVar<M85FPSCRFlagPred, [M85VMRSEarly]>,
614 SchedVar<NoSchedPred, [M85VMRSLate]>
617 def : InstRW<[M85VMSREarly, M85Read_EX2],
618 (instregex "VMSR$", "VMSR_FPSCR_NZCVQC", "VMSR_P0", "VMSR_VPR")>;
619 def : InstRW<[M85VMRSEarly], (instregex "VMRS_P0", "VMRS_VPR", "FMSTAT")>;
620 def : InstRW<[M85VMRSLate], (instregex "VMRS_FPSCR_NZCVQC")>;
621 def : InstRW<[M85VMRSFPSCR], (instregex "VMRS$")>;
622 // Not matching properly
623 //def : InstRW<[M85VMSRLate, M85Read_EX2], (instregex "VMSR_FPCTX(NS|S)")>;
624 //def : InstRW<[M85VMRSLate], (instregex "VMRS_FPCTX(NS|S)")>;
626 // VSEL cannot bypass in its implied $cpsr operand; model as earlier read
627 def : InstRW<[M85GroupBLat1S, ReadALU, ReadALU, M85Read_ISS],
628 (instregex "VSEL.*(S|H)$")>;
629 def : InstRW<[M85GroupBLat1D, ReadALU, ReadALU, M85Read_ISS],
630 (instregex "VSEL.*D$")>;
633 def : InstRW<[WriteFPMOV],
634 (instregex "VMOV(H|S)$", "FCONST(H|S)")>;
635 def : InstRW<[WriteFPMOV, M85Read_EX2],
636 (instregex "VMOVHR$", "VMOVSR$")>;
637 def : InstRW<[M85GroupABLat2S],
638 (instregex "VMOVRH$", "VMOVRS$")>;
639 def : InstRW<[M85WriteFPMOV64],
640 (instregex "VMOVD$")>;
641 def : InstRW<[M85WriteFPMOV64],
642 (instregex "FCONSTD")>;
643 def : InstRW<[M85WriteFPMOV64, M85Read_EX2, M85Read_EX2],
644 (instregex "VMOVDRR")>;
645 def : InstRW<[M85WriteFPMOV64, M85Write1, M85Read_EX2, M85Read_EX2],
646 (instregex "VMOVSRR")>;
647 def : InstRW<[M85GroupABLat2D, M85Write2],
648 (instregex "VMOV(RRD|RRS)")>;
650 // These shouldn't even exist, but Cortex-m55 defines them, so here they are.
651 def : InstRW<[WriteFPMOV, M85Read_EX2],
652 (instregex "VGETLNi32$")>;
653 def : InstRW<[M85GroupABLat2S],
654 (instregex "VSETLNi32")>;
656 // Larger-latency overrides
658 def M85FPDIV16 : SchedWriteRes<[M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> {
661 def : InstRW<[M85OverrideVFPLat2, M85FPDIV16], (instregex "VDIVH")>;
662 def : InstRW<[M85OverrideVFPLat2, WriteFPDIV32], (instregex "VDIVS")>;
663 def : InstRW<[M85OverrideVFPLat2, WriteFPDIV64], (instregex "VDIVD")>;
664 def : InstRW<[M85OverrideVFPLat2, M85FPDIV16], (instregex "VSQRTH")>;
665 def : InstRW<[M85OverrideVFPLat2, WriteFPSQRT32], (instregex "VSQRTS")>;
666 def : InstRW<[M85OverrideVFPLat2, WriteFPSQRT64], (instregex "VSQRTD")>;
667 def : InstRW<[M85OverrideVFPLat3, WriteFPMUL64], (instregex "V(MUL|NMUL)D")>;
668 def : InstRW<[M85OverrideVFPLat2, WriteFPALU64], (instregex "V(ADD|SUB)D")>;
670 // Multiply-accumulate. Chained SP timing is correct; rest need overrides
671 // Double-precision chained MAC should also be seen as having latency of 5,
672 // as stalls stall everything.
674 def : InstRW<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL],
675 (instregex "VN?ML(A|S)H")>;
677 def : InstRW<[M85OverrideVFPLat5, WriteFPMAC64,
678 ReadFPMUL, ReadFPMUL, ReadFPMUL],
679 (instregex "VN?ML(A|S)D$")>;
681 // Single-precision fused MACs look like latency 4 with advance of 2.
683 def M85ReadFPMAC2 : SchedReadAdvance<2>;
685 def : InstRW<[M85OverrideVFPLat4, WriteFPMAC32,
686 M85ReadFPMAC2, ReadFPMUL, ReadFPMUL],
687 (instregex "VF(N)?M(A|S)(H|S)$")>;
689 // Double-precision fused MAC looks like latency 4.
691 def : InstRW<[M85OverrideVFPLat4, WriteFPMAC64,
692 ReadFPMUL, ReadFPMUL, ReadFPMUL],
693 (instregex "VF(N)?M(A|S)D$")>;
695 // MVE beatwise instructions
696 // NOTE: Q-register timing for the 2nd beat is off by a cycle and needs
697 // DAG overrides to correctly set latencies.
698 // NOTE2: MVE integer MAC->MAC accumulate latencies are set as if the
699 // accumulate value arrives from an unmatching MAC instruction;
700 // matching ones are handled via DAG mutation. These are marked as
701 // "limited accumulate bypass"
703 let Latency = 4, EndGroup = 1 in {
704 def M85GrpALat2MveR : SchedWriteRes<[M85UnitVFPAL, M85UnitVFPAH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
705 let ReleaseAtCycles = [2,2,1,1,1];
707 def M85GrpABLat2MveR : SchedWriteRes<[M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
708 def M85GrpBLat2MveR : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
709 let ReleaseAtCycles = [2,2,1,1,1];
711 def M85Lat2MveR : SchedWriteRes<[]> { let NumMicroOps = 0; }
712 def M85GrpBLat4Mve : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
713 let ReleaseAtCycles = [2,2,1,1,1];
716 let Latency = 3, EndGroup = 1 in {
717 def M85GrpBLat3Mve : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
718 let ReleaseAtCycles = [2,2,1,1,1];
720 def M85GrpBLat1MveR : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
721 let ReleaseAtCycles = [2,2,1,1,1];
723 def M85Lat1MveR : SchedWriteRes<[]> { let NumMicroOps = 0; }
725 let Latency = 2, EndGroup = 1 in {
726 def M85GrpALat2Mve : SchedWriteRes<[M85UnitVFPAL, M85UnitVFPAH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
727 let ReleaseAtCycles = [2,2,1,1,1];
729 def M85GrpABLat2Mve : SchedWriteRes<[M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
730 def M85GrpBLat2Mve : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
731 let ReleaseAtCycles = [2,2,1,1,1];
733 def M85Lat2Mve : SchedWriteRes<[]> { let NumMicroOps = 0; }
735 let Latency = 1, EndGroup = 1 in {
736 def M85GrpALat1Mve : SchedWriteRes<[M85UnitVFPAL, M85UnitVFPAH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
737 let ReleaseAtCycles = [2,2,1,1,1];
739 def M85GrpABLat1Mve : SchedWriteRes<[M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
740 def M85GrpBLat1Mve : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
741 let ReleaseAtCycles = [2,2,1,1,1];
743 def M85GrpCLat1Mve : SchedWriteRes<[M85UnitVFPCL, M85UnitVFPCH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
744 let ReleaseAtCycles = [2,2,1,1,1];
746 def M85GrpDLat1Mve : SchedWriteRes<[M85UnitVFPD, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
747 let ReleaseAtCycles = [2,1,1,1];
751 def : InstRW<[M85GrpABLat1Mve, M85Read_EX1, M85Read_EX2, M85Read_EX2],
752 (instregex "MVE_VMOV_q_rr")>;
754 def : InstRW<[M85GrpABLat1Mve, M85Read_EX2],
755 (instregex "MVE_VMOV_to_lane_(8|16|32)")>;
757 def : InstRW<[M85GrpABLat1Mve],
758 (instregex "MVE_VAND$",
759 "MVE_VBIC$", "MVE_VBICimm",
760 "MVE_VCLSs(8|16|32)",
761 "MVE_VCLZs(8|16|32)",
763 "MVE_VMOVimmf32", "MVE_VMOVimmi(8|16|32|64)",
764 "MVE_VMVN$", "MVE_VMVNimmi(16|32)",
766 "MVE_VORR$", "MVE_VORRimm", "MQPRCopy",
768 "MVE_VREV(16|32|64)_(8|16|32)"
771 def : InstRW<[M85GrpABLat2MveR, M85Lat2MveR],
772 (instregex "MVE_VMOV_rr_q")>;
774 def : InstRW<[M85GrpABLat2MveR],
775 (instregex "MVE_VMOV_from_lane_(32|u8|s8|u16|s16)")>;
777 def : InstRW<[M85GrpALat1Mve, M85Lat1MveR,
778 M85Read_EX1, M85Read_EX1, M85Read_EX2],
779 (instregex "MVE_VADC$")>;
781 def : InstRW<[M85GrpALat1Mve, M85Lat1MveR],
782 (instregex "MVE_VADCI")>;
784 def : InstRW<[M85GrpALat1Mve, M85Read_EX1, M85Read_EX2],
785 (instregex "MVE_VADD_qr_i(8|16|32)",
786 "MVE_VBRSR(16|32|8)",
787 "MVE_VHADD_qr_[su](8|16|32)",
788 "MVE_VHSUB_qr_[su](8|16|32)",
789 "MVE_VQADD_qr_[su](8|16|32)",
790 "MVE_VQSUB_qr_[su](8|16|32)",
791 "MVE_VSHL_qr[su](8|16|32)",
792 "MVE_VSUB_qr_i(8|16|32)"
795 def : InstRW<[M85GrpALat1Mve],
796 (instregex "MVE_VABD(s|u)(8|16|32)",
797 "MVE_VABS(s|u)(8|16|32)",
798 "MVE_V(MAX|MIN)A?[us](8|16|32)",
799 "MVE_VADDi(8|16|32)",
800 "MVE_VCADDi(8|16|32)",
801 "MVE_VHCADDs(8|16|32)",
802 "MVE_VHSUB[su](8|16|32)",
803 "MVE_VMOVL[su](8|16)[tb]h",
804 "MVE_VMOVNi(16|32)[tb]h",
805 "MVE_VMULL[BT]?[p](8|16|32)(bh|th)?",
806 "MVE_VNEGs(8|16|32)",
807 "MVE_VQABSs(8|16|32)",
808 "MVE_VQADD[su](8|16|32)",
809 "MVE_VQNEGs(8|16|32)",
810 "MVE_VQSUB[su](8|16|32)",
811 "MVE_VR?HADD[su](8|16|32)",
812 "MVE_VSBC$", "MVE_VSBCI",
813 "MVE_VSHL_by_vec[su](8|16|32)",
814 "MVE_VSHL_immi(8|16|32)",
815 "MVE_VSHLL_imm[su](8|16)[bt]h",
816 "MVE_VSHLL_lw[su](8|16)[bt]h",
817 "MVE_VSHRNi(16|32)[bt]h",
818 "MVE_VSHR_imm[su](8|16|32)",
819 "MVE_VSLIimm[su]?(8|16|32)",
820 "MVE_VSRIimm[su]?(8|16|32)",
824 def : InstRW<[M85GrpALat2Mve, M85Lat2MveR, M85Read_EX2, M85Read_EX2],
825 (instregex "MVE_V(D|I)WDUPu(8|16|32)")>;
827 def : InstRW<[M85GrpALat2Mve, M85Lat2MveR, M85Read_EX2],
828 (instregex "MVE_V(D|I)DUPu(8|16|32)")>;
830 def : InstRW<[M85GrpALat2Mve, M85Read_EX1, M85Read_EX2],
831 (instregex "MVE_V(Q|R|QR)SHL_qr[su](8|16|32)",
832 "MVE_VADD_qr_f(16|32)",
833 "MVE_VSUB_qr_f(16|32)"
836 def : InstRW<[M85GrpALat1Mve, M85Read_EX2],
837 (instregex "MVE_VDUP(8|16|32)")>;
839 def : InstRW<[M85GrpBLat1Mve],
840 (instregex "MVE_VABSf(16|32)",
841 "MVE_V(MAX|MIN)NMA?f(16|32)",
845 def : InstRW<[M85GrpBLat2MveR, M85Lat2MveR, M85Read_EX3, M85Read_EX3],
846 (instregex "MVE_VADDLV[us]32acc")>;
848 def : InstRW<[M85GrpBLat2MveR, M85Lat2MveR],
849 (instregex "MVE_VADDLV[us]32no_acc")>;
851 def : InstRW<[M85GrpBLat2MveR, M85Read_EX3],
852 (instregex "MVE_VADDV[us](8|16|32)acc"
855 def : InstRW<[M85GrpALat2MveR, M85Read_EX3],
856 (instregex "MVE_V(MAX|MIN)A?V[us](8|16|32)",
857 "MVE_VABAV(s|u)(8|16|32)"
860 def : InstRW<[M85GrpALat2MveR],
861 (instregex "MVE_VADDV[us](8|16|32)no_acc")>;
863 def : InstRW<[M85GrpALat2Mve],
864 (instregex "MVE_V(Q|R|QR)SHL_by_vec[su](8|16|32)",
868 "MVE_VQMOVU?N[su](8|16|32)[tb]h",
869 "MVE_VQR?SHL(U_)?imm[su](8|16|32)",
870 "MVE_VQR?SHRN[bt]h[su](16|32)",
871 "MVE_VQR?SHRUNs(16|32)[bt]h",
872 "MVE_VRSHR_imm[su](8|16|32)",
873 "MVE_VRSHRNi(16|32)[bt]h",
877 def : InstRW<[M85GrpBLat2MveR, M85Read_EX2],
878 (instregex "MVE_V(MAX|MIN)NMA?Vf(16|32)")>;
880 def : InstRW<[M85GrpBLat2Mve, M85Read_EX1, M85Read_EX2],
881 (instregex "MVE_VMUL_qr_i(8|16|32)")>;
883 def : InstRW<[M85GrpBLat2Mve, M85Read_EX1, M85Read_EX2],
884 (instregex "MVE_VQDMULL_qr_s(16|32)[tb]h")>;
886 def : InstRW<[M85GrpBLat2Mve, M85Read_EX1, M85Read_EX2],
887 (instregex "MVE_VQR?DMULH_qr_s(8|16|32)")>;
889 def : InstRW<[M85GrpBLat2Mve, M85Read_EX1, M85Read_EX1, M85Read_EX3],
890 // limited accumulate bypass
891 (instregex "MVE_VMLAS?_qr_i(8|16|32)")>;
893 def : InstRW<[M85GrpBLat2Mve, M85Read_EX1, M85Read_EX1, M85Read_EX2],
894 // limited accumulate bypass
895 (instregex "MVE_VQR?DMLAS?H_qrs(8|16|32)")>;
897 def : InstRW<[M85GrpBLat2Mve],
898 // limited accumulate bypass
899 (instregex "MVE_VQR?DML[AS]DHX?s(8|16|32)")>;
901 def : InstRW<[M85GrpBLat2MveR, M85Lat2MveR, M85Read_EX3, M85Read_EX3],
902 (instregex "MVE_VR?ML[AS]LDAVH?ax?[su](8|16|32)")>;
904 def : InstRW<[M85GrpBLat2MveR, M85Lat2MveR],
905 (instregex "MVE_VR?ML[AS]LDAVH?x?[su](8|16|32)")>;
907 def : InstRW<[M85GrpBLat2MveR, M85Read_EX3],
908 (instregex "MVE_VML[AS]DAVax?[su](8|16|32)")>;
910 def : InstRW<[M85GrpBLat2MveR],
911 (instregex "MVE_VML[AS]DAVx?[su](8|16|32)")>;
913 def : InstRW<[M85GrpBLat2Mve],
914 (instregex "MVE_VCVTf16(u|s)16", "MVE_VCVTf32(u|s)32",
915 "MVE_VCVT(u|s)16f16", "MVE_VCVT(u|s)32f32",
916 "MVE_VCVTf16f32", "MVE_VCVTf32f16",
917 "MVE_VMULL[BT]?[su](8|16|32)(bh|th)?",
918 "MVE_VMUL(t1)*i(8|16|32)",
919 "MVE_VQDMULLs(16|32)[tb]h",
920 "MVE_VQR?DMULHi(8|16|32)",
921 "MVE_VR?MULH[su](8|16|32)",
925 def : InstRW<[M85GrpBLat3Mve, M85Read_EX1, M85Read_EX2],
926 (instregex "MVE_VMUL_qr_f(16|32)")>;
928 def : InstRW<[M85GrpBLat3Mve],
929 (instregex "MVE_VCMULf(16|32)",
933 def : InstRW<[M85GrpBLat4Mve, M85Read_EX3, M85Read_EX1, M85Read_EX2],
934 (instregex "MVE_VFMA_qr_Sf(16|32)", // VFMAS
935 "MVE_VFMA_qr_f(16|32)" // VFMA
938 def : InstRW<[M85GrpBLat4Mve, M85Read_EX3],
939 (instregex "MVE_VCMLAf(16|32)")>;
941 def : InstRW<[M85GrpBLat4Mve, M85Read_EX3],
942 (instregex "MVE_VFM(A|S)f(16|32)")>;
944 def : InstRW<[M85GrpCLat1Mve, M85Read_EX1, M85Read_EX1, M85Read_EX2],
945 (instregex "MVE_VPTv(4|8)f(16|32)r")>;
947 def : InstRW<[M85GrpCLat1Mve, M85Read_EX1, M85Read_EX1, M85Read_EX2],
948 (instregex "MVE_VPTv(4|8|16)(i|s|u)(8|16|32)r")>;
950 def : InstRW<[M85GrpCLat1Mve, M85Read_EX1, M85Read_EX2],
951 (instregex "MVE_VCMP[isu](8|16|32)r$", "MVE_VCMPf(16|32)r$")>;
953 def : InstRW<[M85GrpDLat1Mve, M85Read_EX2],
954 (instregex "MVE_VCTP(8|16|32|64)")>;
956 def : InstRW<[M85GrpCLat1Mve],
957 (instregex "MVE_VCMPf(16|32)$", "MVE_VCMP[isu](8|16|32)$",
958 "MVE_VPTv(4|8)f(16|32)$",
959 "MVE_VPTv(4|8|16)(i|s|u)(8|16|32)$"
962 def : InstRW<[M85GrpDLat1Mve],
963 (instregex "MVE_VPNOT",
967 def : InstRW<[M85Lat2MveR, M85GrpALat2Mve, M85Read_EX1, M85Read_EX2],
968 (instregex "MVE_VSHLC")>;
972 def : WriteRes<WriteVLD1, []>;
973 def : WriteRes<WriteVLD2, []>;
974 def : WriteRes<WriteVLD3, []>;
975 def : WriteRes<WriteVLD4, []>;
976 def : WriteRes<WriteVST1, []>;
977 def : WriteRes<WriteVST2, []>;
978 def : WriteRes<WriteVST3, []>;
979 def : WriteRes<WriteVST4, []>;
981 } // SchedModel = CortexCortexM85Model