1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the itinerary class data for the ARM Cortex A9 processors.
11 //===----------------------------------------------------------------------===//
13 // ===---------------------------------------------------------------------===//
14 // This section contains legacy support for itineraries. This is
15 // required until SD and PostRA schedulers are replaced by MachineScheduler.
18 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
22 def A9_Issue0 : FuncUnit; // Issue 0
23 def A9_Issue1 : FuncUnit; // Issue 1
24 def A9_Branch : FuncUnit; // Branch
25 def A9_ALU0 : FuncUnit; // ALU / MUL pipeline 0
26 def A9_ALU1 : FuncUnit; // ALU pipeline 1
27 def A9_AGU : FuncUnit; // Address generation unit for ld / st
28 def A9_NPipe : FuncUnit; // NEON pipeline
29 def A9_MUX0 : FuncUnit; // AGU + NEON/FPU multiplexer
30 def A9_LSUnit : FuncUnit; // L/S Unit
31 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
32 def A9_DRegsN : FuncUnit; // FP register set, NEON side
35 def A9_LdBypass : Bypass;
37 def CortexA9Itineraries : ProcessorItineraries<
38 [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
39 A9_LSUnit, A9_DRegsVFP, A9_DRegsN],
41 // Two fully-pipelined integer ALU pipelines
44 // Move instructions, unconditional
45 InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
46 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
47 InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
48 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
49 InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
50 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
51 InstrItinData<IIC_iMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
52 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
53 InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
54 InstrStage<1, [A9_ALU0, A9_ALU1]>,
55 InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
56 InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
57 InstrStage<1, [A9_ALU0, A9_ALU1]>,
58 InstrStage<1, [A9_ALU0, A9_ALU1]>,
59 InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>,
60 InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
61 InstrStage<1, [A9_ALU0, A9_ALU1]>,
62 InstrStage<1, [A9_ALU0, A9_ALU1]>,
63 InstrStage<1, [A9_MUX0], 0>,
64 InstrStage<1, [A9_AGU], 0>,
65 InstrStage<1, [A9_LSUnit]>], [5]>,
68 InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
69 InstrStage<1, [A9_ALU0, A9_ALU1]>],
71 InstrItinData<IIC_iMVNr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
72 InstrStage<1, [A9_ALU0, A9_ALU1]>],
73 [1, 1], [NoBypass, A9_LdBypass]>,
74 InstrItinData<IIC_iMVNsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
75 InstrStage<2, [A9_ALU0, A9_ALU1]>],
77 InstrItinData<IIC_iMVNsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
78 InstrStage<3, [A9_ALU0, A9_ALU1]>],
82 InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
83 InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
85 // Binary Instructions that produce a result
86 InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
87 InstrStage<1, [A9_ALU0, A9_ALU1]>],
88 [1, 1], [NoBypass, A9_LdBypass]>,
89 InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
90 InstrStage<1, [A9_ALU0, A9_ALU1]>],
91 [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
92 InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
93 InstrStage<2, [A9_ALU0, A9_ALU1]>],
94 [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
95 InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
96 InstrStage<2, [A9_ALU0, A9_ALU1]>],
97 [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
98 InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
99 InstrStage<3, [A9_ALU0, A9_ALU1]>],
101 [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
103 // Bitwise Instructions that produce a result
104 InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
105 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
106 InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
107 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
108 InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
109 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
110 InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
111 InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
113 // Unary Instructions that produce a result
116 InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
117 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
119 // BFC, BFI, UBFX, SBFX
120 InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
121 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
124 // Zero and sign extension instructions
125 InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
126 InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
127 InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
128 InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
129 InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
130 InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
132 // Compare instructions
133 InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
134 InstrStage<1, [A9_ALU0, A9_ALU1]>],
136 InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
137 InstrStage<1, [A9_ALU0, A9_ALU1]>],
138 [1, 1], [A9_LdBypass, A9_LdBypass]>,
139 InstrItinData<IIC_iCMPsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
140 InstrStage<2, [A9_ALU0, A9_ALU1]>],
141 [1, 1], [A9_LdBypass, NoBypass]>,
142 InstrItinData<IIC_iCMPsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
143 InstrStage<3, [A9_ALU0, A9_ALU1]>],
144 [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
147 InstrItinData<IIC_iTSTi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
148 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
149 InstrItinData<IIC_iTSTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
150 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
151 InstrItinData<IIC_iTSTsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
152 InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
153 InstrItinData<IIC_iTSTsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
154 InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
156 // Move instructions, conditional
157 // FIXME: Correctly model the extra input dep on the destination.
158 InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
159 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
160 InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
161 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
162 InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
163 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
164 InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
165 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
166 InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
167 InstrStage<1, [A9_ALU0, A9_ALU1]>,
168 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
169 InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
171 // Integer multiply pipeline
173 InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
174 InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
175 InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
176 InstrStage<2, [A9_ALU0]>],
178 InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
179 InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
180 InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
181 InstrStage<2, [A9_ALU0]>],
183 InstrItinData<IIC_iMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
184 InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
185 InstrItinData<IIC_iMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
186 InstrStage<3, [A9_ALU0]>],
188 // Integer load pipeline
189 // FIXME: The timings are some rough approximations
192 InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
193 InstrStage<1, [A9_MUX0], 0>,
194 InstrStage<1, [A9_AGU], 0>,
195 InstrStage<1, [A9_LSUnit]>],
196 [3, 1], [A9_LdBypass]>,
197 InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
198 InstrStage<1, [A9_MUX0], 0>,
199 InstrStage<2, [A9_AGU], 0>,
200 InstrStage<1, [A9_LSUnit]>],
201 [4, 1], [A9_LdBypass]>,
202 // FIXME: If address is 64-bit aligned, AGU cycles is 1.
203 InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
204 InstrStage<1, [A9_MUX0], 0>,
205 InstrStage<2, [A9_AGU], 0>,
206 InstrStage<1, [A9_LSUnit]>],
207 [3, 3, 1], [A9_LdBypass]>,
210 InstrItinData<IIC_iLoad_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
211 InstrStage<1, [A9_MUX0], 0>,
212 InstrStage<1, [A9_AGU], 0>,
213 InstrStage<1, [A9_LSUnit]>],
214 [3, 1, 1], [A9_LdBypass]>,
215 InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
216 InstrStage<1, [A9_MUX0], 0>,
217 InstrStage<2, [A9_AGU], 0>,
218 InstrStage<1, [A9_LSUnit]>],
219 [4, 1, 1], [A9_LdBypass]>,
220 InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
221 InstrStage<1, [A9_MUX0], 0>,
222 InstrStage<2, [A9_AGU], 0>,
223 InstrStage<1, [A9_LSUnit]>],
224 [3, 3, 1, 1], [A9_LdBypass]>,
226 // Scaled register offset
227 InstrItinData<IIC_iLoad_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
228 InstrStage<1, [A9_MUX0], 0>,
229 InstrStage<1, [A9_AGU], 0>,
230 InstrStage<1, [A9_LSUnit], 0>],
231 [4, 1, 1], [A9_LdBypass]>,
232 InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
233 InstrStage<1, [A9_MUX0], 0>,
234 InstrStage<2, [A9_AGU], 0>,
235 InstrStage<1, [A9_LSUnit]>],
236 [5, 1, 1], [A9_LdBypass]>,
238 // Immediate offset with update
239 InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
240 InstrStage<1, [A9_MUX0], 0>,
241 InstrStage<1, [A9_AGU], 0>,
242 InstrStage<1, [A9_LSUnit]>],
243 [3, 2, 1], [A9_LdBypass]>,
244 InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
245 InstrStage<1, [A9_MUX0], 0>,
246 InstrStage<2, [A9_AGU], 0>,
247 InstrStage<1, [A9_LSUnit]>],
248 [4, 3, 1], [A9_LdBypass]>,
250 // Register offset with update
251 InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
252 InstrStage<1, [A9_MUX0], 0>,
253 InstrStage<1, [A9_AGU], 0>,
254 InstrStage<1, [A9_LSUnit]>],
255 [3, 2, 1, 1], [A9_LdBypass]>,
256 InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
257 InstrStage<1, [A9_MUX0], 0>,
258 InstrStage<2, [A9_AGU], 0>,
259 InstrStage<1, [A9_LSUnit]>],
260 [4, 3, 1, 1], [A9_LdBypass]>,
261 InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
262 InstrStage<1, [A9_MUX0], 0>,
263 InstrStage<2, [A9_AGU], 0>,
264 InstrStage<1, [A9_LSUnit]>],
265 [3, 3, 1, 1], [A9_LdBypass]>,
267 // Scaled register offset with update
268 InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
269 InstrStage<1, [A9_MUX0], 0>,
270 InstrStage<1, [A9_AGU], 0>,
271 InstrStage<1, [A9_LSUnit]>],
272 [4, 3, 1, 1], [A9_LdBypass]>,
273 InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
274 InstrStage<1, [A9_MUX0], 0>,
275 InstrStage<2, [A9_AGU], 0>,
276 InstrStage<1, [A9_LSUnit]>],
277 [5, 4, 1, 1], [A9_LdBypass]>,
279 // Load multiple, def is the 5th operand.
280 // FIXME: This assumes 3 to 4 registers.
281 InstrItinData<IIC_iLoad_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
282 InstrStage<1, [A9_MUX0], 0>,
283 InstrStage<2, [A9_AGU], 1>,
284 InstrStage<2, [A9_LSUnit]>],
286 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
289 // Load multiple + update, defs are the 1st and 5th operands.
290 InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
291 InstrStage<1, [A9_MUX0], 0>,
292 InstrStage<2, [A9_AGU], 1>,
293 InstrStage<2, [A9_LSUnit]>],
295 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
298 // Load multiple plus branch
299 InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
300 InstrStage<1, [A9_MUX0], 0>,
301 InstrStage<1, [A9_AGU], 1>,
302 InstrStage<2, [A9_LSUnit]>,
303 InstrStage<1, [A9_Branch]>],
305 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
308 // Pop, def is the 3rd operand.
309 InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
310 InstrStage<1, [A9_MUX0], 0>,
311 InstrStage<2, [A9_AGU], 1>,
312 InstrStage<2, [A9_LSUnit]>],
314 [NoBypass, NoBypass, A9_LdBypass],
317 // Pop + branch, def is the 3rd operand.
318 InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
319 InstrStage<1, [A9_MUX0], 0>,
320 InstrStage<2, [A9_AGU], 1>,
321 InstrStage<2, [A9_LSUnit]>,
322 InstrStage<1, [A9_Branch]>],
324 [NoBypass, NoBypass, A9_LdBypass],
327 // iLoadi + iALUr for t2LDRpci_pic.
328 InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
329 InstrStage<1, [A9_MUX0], 0>,
330 InstrStage<1, [A9_AGU], 0>,
331 InstrStage<1, [A9_LSUnit]>,
332 InstrStage<1, [A9_ALU0, A9_ALU1]>],
335 // Integer store pipeline
338 InstrItinData<IIC_iStore_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
339 InstrStage<1, [A9_MUX0], 0>,
340 InstrStage<1, [A9_AGU], 0>,
341 InstrStage<1, [A9_LSUnit]>], [1, 1]>,
342 InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
343 InstrStage<1, [A9_MUX0], 0>,
344 InstrStage<2, [A9_AGU], 1>,
345 InstrStage<1, [A9_LSUnit]>], [1, 1]>,
346 // FIXME: If address is 64-bit aligned, AGU cycles is 1.
347 InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
348 InstrStage<1, [A9_MUX0], 0>,
349 InstrStage<2, [A9_AGU], 1>,
350 InstrStage<1, [A9_LSUnit]>], [1, 1]>,
353 InstrItinData<IIC_iStore_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
354 InstrStage<1, [A9_MUX0], 0>,
355 InstrStage<1, [A9_AGU], 0>,
356 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
357 InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
358 InstrStage<1, [A9_MUX0], 0>,
359 InstrStage<2, [A9_AGU], 1>,
360 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
361 InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
362 InstrStage<1, [A9_MUX0], 0>,
363 InstrStage<2, [A9_AGU], 1>,
364 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
366 // Scaled register offset
367 InstrItinData<IIC_iStore_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
368 InstrStage<1, [A9_MUX0], 0>,
369 InstrStage<1, [A9_AGU], 0>,
370 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
371 InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
372 InstrStage<1, [A9_MUX0], 0>,
373 InstrStage<2, [A9_AGU], 1>,
374 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
376 // Immediate offset with update
377 InstrItinData<IIC_iStore_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
378 InstrStage<1, [A9_MUX0], 0>,
379 InstrStage<1, [A9_AGU], 0>,
380 InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>,
381 InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
382 InstrStage<1, [A9_MUX0], 0>,
383 InstrStage<2, [A9_AGU], 1>,
384 InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>,
386 // Register offset with update
387 InstrItinData<IIC_iStore_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
388 InstrStage<1, [A9_MUX0], 0>,
389 InstrStage<1, [A9_AGU], 0>,
390 InstrStage<1, [A9_LSUnit]>],
392 InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
393 InstrStage<1, [A9_MUX0], 0>,
394 InstrStage<2, [A9_AGU], 1>,
395 InstrStage<1, [A9_LSUnit]>],
397 InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
398 InstrStage<1, [A9_MUX0], 0>,
399 InstrStage<2, [A9_AGU], 1>,
400 InstrStage<1, [A9_LSUnit]>],
403 // Scaled register offset with update
404 InstrItinData<IIC_iStore_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
405 InstrStage<1, [A9_MUX0], 0>,
406 InstrStage<1, [A9_AGU], 0>,
407 InstrStage<1, [A9_LSUnit]>],
409 InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
410 InstrStage<1, [A9_MUX0], 0>,
411 InstrStage<2, [A9_AGU], 1>,
412 InstrStage<1, [A9_LSUnit]>],
416 InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
417 InstrStage<1, [A9_MUX0], 0>,
418 InstrStage<1, [A9_AGU], 0>,
419 InstrStage<2, [A9_LSUnit]>],
420 [], [], -1>, // dynamic uops
422 // Store multiple + update
423 InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
424 InstrStage<1, [A9_MUX0], 0>,
425 InstrStage<1, [A9_AGU], 0>,
426 InstrStage<2, [A9_LSUnit]>],
427 [2], [], -1>, // dynamic uops
430 InstrItinData<IIC_Preload, [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
434 // no delay slots, so the latency of a branch is unimportant
435 InstrItinData<IIC_Br , [InstrStage<1, [A9_Issue0], 0>,
436 InstrStage<1, [A9_Issue1], 0>,
437 InstrStage<1, [A9_Branch]>]>,
439 // VFP and NEON shares the same register file. This means that every VFP
440 // instruction should wait for full completion of the consecutive NEON
441 // instruction and vice-versa. We model this behavior with two artificial FUs:
442 // DRegsVFP and DRegsVFP.
444 // Every VFP instruction:
445 // - Acquires DRegsVFP resource for 1 cycle
446 // - Reserves DRegsN resource for the whole duration (including time to
447 // register file writeback!).
448 // Every NEON instruction does the same but with FUs swapped.
450 // Since the reserved FU cannot be acquired, this models precisely
451 // "cross-domain" stalls.
454 // Issue through integer pipeline, and execute in NEON unit.
456 // FP Special Register to Integer Register File Move
457 InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
458 InstrStage<1, [A9_MUX0], 0>,
459 InstrStage<1, [A9_DRegsVFP], 0, Required>,
460 InstrStage<2, [A9_DRegsN], 0, Reserved>,
461 InstrStage<1, [A9_NPipe]>],
464 // Single-precision FP Unary
465 InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
466 InstrStage<1, [A9_MUX0], 0>,
467 InstrStage<1, [A9_DRegsVFP], 0, Required>,
468 // Extra latency cycles since wbck is 2 cycles
469 InstrStage<3, [A9_DRegsN], 0, Reserved>,
470 InstrStage<1, [A9_NPipe]>],
473 // Double-precision FP Unary
474 InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
475 InstrStage<1, [A9_MUX0], 0>,
476 InstrStage<1, [A9_DRegsVFP], 0, Required>,
477 // Extra latency cycles since wbck is 2 cycles
478 InstrStage<3, [A9_DRegsN], 0, Reserved>,
479 InstrStage<1, [A9_NPipe]>],
483 // Single-precision FP Compare
484 InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
485 InstrStage<1, [A9_MUX0], 0>,
486 InstrStage<1, [A9_DRegsVFP], 0, Required>,
487 // Extra latency cycles since wbck is 4 cycles
488 InstrStage<5, [A9_DRegsN], 0, Reserved>,
489 InstrStage<1, [A9_NPipe]>],
492 // Double-precision FP Compare
493 InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
494 InstrStage<1, [A9_MUX0], 0>,
495 InstrStage<1, [A9_DRegsVFP], 0, Required>,
496 // Extra latency cycles since wbck is 4 cycles
497 InstrStage<5, [A9_DRegsN], 0, Reserved>,
498 InstrStage<1, [A9_NPipe]>],
501 // Single to Double FP Convert
502 InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
503 InstrStage<1, [A9_MUX0], 0>,
504 InstrStage<1, [A9_DRegsVFP], 0, Required>,
505 InstrStage<5, [A9_DRegsN], 0, Reserved>,
506 InstrStage<1, [A9_NPipe]>],
509 // Double to Single FP Convert
510 InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
511 InstrStage<1, [A9_MUX0], 0>,
512 InstrStage<1, [A9_DRegsVFP], 0, Required>,
513 InstrStage<5, [A9_DRegsN], 0, Reserved>,
514 InstrStage<1, [A9_NPipe]>],
518 // Single to Half FP Convert
519 InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
520 InstrStage<1, [A9_MUX0], 0>,
521 InstrStage<1, [A9_DRegsVFP], 0, Required>,
522 InstrStage<5, [A9_DRegsN], 0, Reserved>,
523 InstrStage<1, [A9_NPipe]>],
526 // Half to Single FP Convert
527 InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
528 InstrStage<1, [A9_MUX0], 0>,
529 InstrStage<1, [A9_DRegsVFP], 0, Required>,
530 InstrStage<3, [A9_DRegsN], 0, Reserved>,
531 InstrStage<1, [A9_NPipe]>],
535 // Single-Precision FP to Integer Convert
536 InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
537 InstrStage<1, [A9_MUX0], 0>,
538 InstrStage<1, [A9_DRegsVFP], 0, Required>,
539 InstrStage<5, [A9_DRegsN], 0, Reserved>,
540 InstrStage<1, [A9_NPipe]>],
543 // Double-Precision FP to Integer Convert
544 InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
545 InstrStage<1, [A9_MUX0], 0>,
546 InstrStage<1, [A9_DRegsVFP], 0, Required>,
547 InstrStage<5, [A9_DRegsN], 0, Reserved>,
548 InstrStage<1, [A9_NPipe]>],
551 // Integer to Single-Precision FP Convert
552 InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
553 InstrStage<1, [A9_MUX0], 0>,
554 InstrStage<1, [A9_DRegsVFP], 0, Required>,
555 InstrStage<5, [A9_DRegsN], 0, Reserved>,
556 InstrStage<1, [A9_NPipe]>],
559 // Integer to Double-Precision FP Convert
560 InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
561 InstrStage<1, [A9_MUX0], 0>,
562 InstrStage<1, [A9_DRegsVFP], 0, Required>,
563 InstrStage<5, [A9_DRegsN], 0, Reserved>,
564 InstrStage<1, [A9_NPipe]>],
567 // Single-precision FP ALU
568 InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
569 InstrStage<1, [A9_MUX0], 0>,
570 InstrStage<1, [A9_DRegsVFP], 0, Required>,
571 InstrStage<5, [A9_DRegsN], 0, Reserved>,
572 InstrStage<1, [A9_NPipe]>],
575 // Double-precision FP ALU
576 InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
577 InstrStage<1, [A9_MUX0], 0>,
578 InstrStage<1, [A9_DRegsVFP], 0, Required>,
579 InstrStage<5, [A9_DRegsN], 0, Reserved>,
580 InstrStage<1, [A9_NPipe]>],
583 // Single-precision FP Multiply
584 InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
585 InstrStage<1, [A9_MUX0], 0>,
586 InstrStage<1, [A9_DRegsVFP], 0, Required>,
587 InstrStage<6, [A9_DRegsN], 0, Reserved>,
588 InstrStage<1, [A9_NPipe]>],
591 // Double-precision FP Multiply
592 InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
593 InstrStage<1, [A9_MUX0], 0>,
594 InstrStage<1, [A9_DRegsVFP], 0, Required>,
595 InstrStage<7, [A9_DRegsN], 0, Reserved>,
596 InstrStage<2, [A9_NPipe]>],
599 // Single-precision FP MAC
600 InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
601 InstrStage<1, [A9_MUX0], 0>,
602 InstrStage<1, [A9_DRegsVFP], 0, Required>,
603 InstrStage<9, [A9_DRegsN], 0, Reserved>,
604 InstrStage<1, [A9_NPipe]>],
607 // Double-precision FP MAC
608 InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
609 InstrStage<1, [A9_MUX0], 0>,
610 InstrStage<1, [A9_DRegsVFP], 0, Required>,
611 InstrStage<10, [A9_DRegsN], 0, Reserved>,
612 InstrStage<2, [A9_NPipe]>],
615 // Single-precision Fused FP MAC
616 InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
617 InstrStage<1, [A9_MUX0], 0>,
618 InstrStage<1, [A9_DRegsVFP], 0, Required>,
619 InstrStage<9, [A9_DRegsN], 0, Reserved>,
620 InstrStage<1, [A9_NPipe]>],
623 // Double-precision Fused FP MAC
624 InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
625 InstrStage<1, [A9_MUX0], 0>,
626 InstrStage<1, [A9_DRegsVFP], 0, Required>,
627 InstrStage<10, [A9_DRegsN], 0, Reserved>,
628 InstrStage<2, [A9_NPipe]>],
631 // Single-precision FP DIV
632 InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
633 InstrStage<1, [A9_MUX0], 0>,
634 InstrStage<1, [A9_DRegsVFP], 0, Required>,
635 InstrStage<16, [A9_DRegsN], 0, Reserved>,
636 InstrStage<10, [A9_NPipe]>],
639 // Double-precision FP DIV
640 InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
641 InstrStage<1, [A9_MUX0], 0>,
642 InstrStage<1, [A9_DRegsVFP], 0, Required>,
643 InstrStage<26, [A9_DRegsN], 0, Reserved>,
644 InstrStage<20, [A9_NPipe]>],
647 // Single-precision FP SQRT
648 InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
649 InstrStage<1, [A9_MUX0], 0>,
650 InstrStage<1, [A9_DRegsVFP], 0, Required>,
651 InstrStage<18, [A9_DRegsN], 0, Reserved>,
652 InstrStage<13, [A9_NPipe]>],
655 // Double-precision FP SQRT
656 InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
657 InstrStage<1, [A9_MUX0], 0>,
658 InstrStage<1, [A9_DRegsVFP], 0, Required>,
659 InstrStage<33, [A9_DRegsN], 0, Reserved>,
660 InstrStage<28, [A9_NPipe]>],
664 // Integer to Single-precision Move
665 InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
666 InstrStage<1, [A9_MUX0], 0>,
667 InstrStage<1, [A9_DRegsVFP], 0, Required>,
668 // Extra 1 latency cycle since wbck is 2 cycles
669 InstrStage<3, [A9_DRegsN], 0, Reserved>,
670 InstrStage<1, [A9_NPipe]>],
673 // Integer to Double-precision Move
674 InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
675 InstrStage<1, [A9_MUX0], 0>,
676 InstrStage<1, [A9_DRegsVFP], 0, Required>,
677 // Extra 1 latency cycle since wbck is 2 cycles
678 InstrStage<3, [A9_DRegsN], 0, Reserved>,
679 InstrStage<1, [A9_NPipe]>],
682 // Single-precision to Integer Move
684 // On A9 move-from-VFP is free to issue with no stall if other VFP
685 // operations are in flight. I assume it still can't dual-issue though.
686 InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
687 InstrStage<1, [A9_MUX0], 0>],
690 // Double-precision to Integer Move
692 // On A9 move-from-VFP is free to issue with no stall if other VFP
693 // operations are in flight. I assume it still can't dual-issue though.
694 InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
695 InstrStage<1, [A9_MUX0], 0>],
698 // Single-precision FP Load
699 InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
700 InstrStage<1, [A9_MUX0], 0>,
701 InstrStage<1, [A9_DRegsVFP], 0, Required>,
702 InstrStage<2, [A9_DRegsN], 0, Reserved>,
703 InstrStage<1, [A9_NPipe], 0>,
704 InstrStage<1, [A9_LSUnit]>],
707 // Double-precision FP Load
708 // FIXME: Result latency is 1 if address is 64-bit aligned.
709 InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
710 InstrStage<1, [A9_MUX0], 0>,
711 InstrStage<1, [A9_DRegsVFP], 0, Required>,
712 InstrStage<2, [A9_DRegsN], 0, Reserved>,
713 InstrStage<1, [A9_NPipe], 0>,
714 InstrStage<1, [A9_LSUnit]>],
718 // FIXME: assumes 2 doubles which requires 2 LS cycles.
719 InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
720 InstrStage<1, [A9_MUX0], 0>,
721 InstrStage<1, [A9_DRegsVFP], 0, Required>,
722 InstrStage<2, [A9_DRegsN], 0, Reserved>,
723 InstrStage<1, [A9_NPipe], 0>,
724 InstrStage<2, [A9_LSUnit]>],
725 [1, 1, 1, 1], [], -1>, // dynamic uops
727 // FP Load Multiple + update
728 // FIXME: assumes 2 doubles which requires 2 LS cycles.
729 InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
730 InstrStage<1, [A9_MUX0], 0>,
731 InstrStage<1, [A9_DRegsVFP], 0, Required>,
732 InstrStage<2, [A9_DRegsN], 0, Reserved>,
733 InstrStage<1, [A9_NPipe], 0>,
734 InstrStage<2, [A9_LSUnit]>],
735 [2, 1, 1, 1], [], -1>, // dynamic uops
737 // Single-precision FP Store
738 InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
739 InstrStage<1, [A9_MUX0], 0>,
740 InstrStage<1, [A9_DRegsVFP], 0, Required>,
741 InstrStage<2, [A9_DRegsN], 0, Reserved>,
742 InstrStage<1, [A9_NPipe], 0>,
743 InstrStage<1, [A9_LSUnit]>],
746 // Double-precision FP Store
747 InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
748 InstrStage<1, [A9_MUX0], 0>,
749 InstrStage<1, [A9_DRegsVFP], 0, Required>,
750 InstrStage<2, [A9_DRegsN], 0, Reserved>,
751 InstrStage<1, [A9_NPipe], 0>,
752 InstrStage<1, [A9_LSUnit]>],
756 // FIXME: assumes 2 doubles which requires 2 LS cycles.
757 InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
758 InstrStage<1, [A9_MUX0], 0>,
759 InstrStage<1, [A9_DRegsVFP], 0, Required>,
760 InstrStage<2, [A9_DRegsN], 0, Reserved>,
761 InstrStage<1, [A9_NPipe], 0>,
762 InstrStage<2, [A9_LSUnit]>],
763 [1, 1, 1, 1], [], -1>, // dynamic uops
765 // FP Store Multiple + update
766 // FIXME: assumes 2 doubles which requires 2 LS cycles.
767 InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
768 InstrStage<1, [A9_MUX0], 0>,
769 InstrStage<1, [A9_DRegsVFP], 0, Required>,
770 InstrStage<2, [A9_DRegsN], 0, Reserved>,
771 InstrStage<1, [A9_NPipe], 0>,
772 InstrStage<2, [A9_LSUnit]>],
773 [2, 1, 1, 1], [], -1>, // dynamic uops
776 InstrItinData<IIC_VLD1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
777 InstrStage<1, [A9_MUX0], 0>,
778 InstrStage<1, [A9_DRegsN], 0, Required>,
779 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
780 InstrStage<1, [A9_NPipe], 0>,
781 InstrStage<1, [A9_LSUnit]>],
784 InstrItinData<IIC_VLD1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
785 InstrStage<1, [A9_MUX0], 0>,
786 InstrStage<1, [A9_DRegsN], 0, Required>,
787 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
788 InstrStage<1, [A9_NPipe], 0>,
789 InstrStage<1, [A9_LSUnit]>],
792 InstrItinData<IIC_VLD1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
793 InstrStage<1, [A9_MUX0], 0>,
794 InstrStage<1, [A9_DRegsN], 0, Required>,
795 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
796 InstrStage<2, [A9_NPipe], 0>,
797 InstrStage<2, [A9_LSUnit]>],
800 InstrItinData<IIC_VLD1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
801 InstrStage<1, [A9_MUX0], 0>,
802 InstrStage<1, [A9_DRegsN], 0, Required>,
803 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
804 InstrStage<2, [A9_NPipe], 0>,
805 InstrStage<2, [A9_LSUnit]>],
808 InstrItinData<IIC_VLD1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
809 InstrStage<1, [A9_MUX0], 0>,
810 InstrStage<1, [A9_DRegsN], 0, Required>,
811 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
812 InstrStage<1, [A9_NPipe], 0>,
813 InstrStage<1, [A9_LSUnit]>],
816 InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
817 InstrStage<1, [A9_MUX0], 0>,
818 InstrStage<1, [A9_DRegsN], 0, Required>,
819 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
820 InstrStage<1, [A9_NPipe], 0>,
821 InstrStage<1, [A9_LSUnit]>],
824 InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
825 InstrStage<1, [A9_MUX0], 0>,
826 InstrStage<1, [A9_DRegsN], 0, Required>,
827 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
828 InstrStage<2, [A9_NPipe], 0>,
829 InstrStage<2, [A9_LSUnit]>],
832 InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
833 InstrStage<1, [A9_MUX0], 0>,
834 InstrStage<1, [A9_DRegsN], 0, Required>,
835 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
836 InstrStage<2, [A9_NPipe], 0>,
837 InstrStage<2, [A9_LSUnit]>],
841 InstrItinData<IIC_VLD1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
842 InstrStage<1, [A9_MUX0], 0>,
843 InstrStage<1, [A9_DRegsN], 0, Required>,
844 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
845 InstrStage<2, [A9_NPipe], 0>,
846 InstrStage<2, [A9_LSUnit]>],
850 InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
851 InstrStage<1, [A9_MUX0], 0>,
852 InstrStage<1, [A9_DRegsN], 0, Required>,
853 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
854 InstrStage<2, [A9_NPipe], 0>,
855 InstrStage<2, [A9_LSUnit]>],
859 InstrItinData<IIC_VLD1dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
860 InstrStage<1, [A9_MUX0], 0>,
861 InstrStage<1, [A9_DRegsN], 0, Required>,
862 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
863 InstrStage<1, [A9_NPipe], 0>,
864 InstrStage<1, [A9_LSUnit]>],
868 InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
869 InstrStage<1, [A9_MUX0], 0>,
870 InstrStage<1, [A9_DRegsN], 0, Required>,
871 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
872 InstrStage<1, [A9_NPipe], 0>,
873 InstrStage<1, [A9_LSUnit]>],
877 InstrItinData<IIC_VLD2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
878 InstrStage<1, [A9_MUX0], 0>,
879 InstrStage<1, [A9_DRegsN], 0, Required>,
880 // Extra latency cycles since wbck is 7 cycles
881 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
882 InstrStage<1, [A9_NPipe], 0>,
883 InstrStage<1, [A9_LSUnit]>],
887 InstrItinData<IIC_VLD2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
888 InstrStage<1, [A9_MUX0], 0>,
889 InstrStage<1, [A9_DRegsN], 0, Required>,
890 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
891 InstrStage<2, [A9_NPipe], 0>,
892 InstrStage<2, [A9_LSUnit]>],
896 InstrItinData<IIC_VLD2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
897 InstrStage<1, [A9_MUX0], 0>,
898 InstrStage<1, [A9_DRegsN], 0, Required>,
899 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
900 InstrStage<2, [A9_NPipe], 0>,
901 InstrStage<2, [A9_LSUnit]>],
905 InstrItinData<IIC_VLD2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
906 InstrStage<1, [A9_MUX0], 0>,
907 InstrStage<1, [A9_DRegsN], 0, Required>,
908 // Extra latency cycles since wbck is 7 cycles
909 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
910 InstrStage<1, [A9_NPipe], 0>,
911 InstrStage<1, [A9_LSUnit]>],
915 InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
916 InstrStage<1, [A9_MUX0], 0>,
917 InstrStage<1, [A9_DRegsN], 0, Required>,
918 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
919 InstrStage<2, [A9_NPipe], 0>,
920 InstrStage<2, [A9_LSUnit]>],
924 InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
925 InstrStage<1, [A9_MUX0], 0>,
926 InstrStage<1, [A9_DRegsN], 0, Required>,
927 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
928 InstrStage<2, [A9_NPipe], 0>,
929 InstrStage<2, [A9_LSUnit]>],
930 [3, 3, 2, 1, 1, 1, 1, 1]>,
933 InstrItinData<IIC_VLD2dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
934 InstrStage<1, [A9_MUX0], 0>,
935 InstrStage<1, [A9_DRegsN], 0, Required>,
936 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
937 InstrStage<1, [A9_NPipe], 0>,
938 InstrStage<1, [A9_LSUnit]>],
942 InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
943 InstrStage<1, [A9_MUX0], 0>,
944 InstrStage<1, [A9_DRegsN], 0, Required>,
945 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
946 InstrStage<1, [A9_NPipe], 0>,
947 InstrStage<1, [A9_LSUnit]>],
951 InstrItinData<IIC_VLD3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
952 InstrStage<1, [A9_MUX0], 0>,
953 InstrStage<1, [A9_DRegsN], 0, Required>,
954 InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
955 InstrStage<3, [A9_NPipe], 0>,
956 InstrStage<3, [A9_LSUnit]>],
960 InstrItinData<IIC_VLD3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
961 InstrStage<1, [A9_MUX0], 0>,
962 InstrStage<1, [A9_DRegsN], 0, Required>,
963 InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
964 InstrStage<5, [A9_NPipe], 0>,
965 InstrStage<5, [A9_LSUnit]>],
966 [5, 5, 6, 1, 1, 1, 1, 2]>,
969 InstrItinData<IIC_VLD3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
970 InstrStage<1, [A9_MUX0], 0>,
971 InstrStage<1, [A9_DRegsN], 0, Required>,
972 InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
973 InstrStage<3, [A9_NPipe], 0>,
974 InstrStage<3, [A9_LSUnit]>],
978 InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
979 InstrStage<1, [A9_MUX0], 0>,
980 InstrStage<1, [A9_DRegsN], 0, Required>,
981 InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
982 InstrStage<5, [A9_NPipe], 0>,
983 InstrStage<5, [A9_LSUnit]>],
984 [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
987 InstrItinData<IIC_VLD3dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
988 InstrStage<1, [A9_MUX0], 0>,
989 InstrStage<1, [A9_DRegsN], 0, Required>,
990 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
991 InstrStage<3, [A9_NPipe], 0>,
992 InstrStage<3, [A9_LSUnit]>],
996 InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
997 InstrStage<1, [A9_MUX0], 0>,
998 InstrStage<1, [A9_DRegsN], 0, Required>,
999 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1000 InstrStage<3, [A9_NPipe], 0>,
1001 InstrStage<3, [A9_LSUnit]>],
1002 [3, 3, 4, 2, 1, 1]>,
1005 InstrItinData<IIC_VLD4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1006 InstrStage<1, [A9_MUX0], 0>,
1007 InstrStage<1, [A9_DRegsN], 0, Required>,
1008 InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
1009 InstrStage<3, [A9_NPipe], 0>,
1010 InstrStage<3, [A9_LSUnit]>],
1014 InstrItinData<IIC_VLD4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1015 InstrStage<1, [A9_MUX0], 0>,
1016 InstrStage<1, [A9_DRegsN], 0, Required>,
1017 InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
1018 InstrStage<4, [A9_NPipe], 0>,
1019 InstrStage<4, [A9_LSUnit]>],
1020 [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
1023 InstrItinData<IIC_VLD4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1024 InstrStage<1, [A9_MUX0], 0>,
1025 InstrStage<1, [A9_DRegsN], 0, Required>,
1026 InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
1027 InstrStage<3, [A9_NPipe], 0>,
1028 InstrStage<3, [A9_LSUnit]>],
1029 [3, 3, 4, 4, 2, 1]>,
1032 InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1033 InstrStage<1, [A9_MUX0], 0>,
1034 InstrStage<1, [A9_DRegsN], 0, Required>,
1035 InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
1036 InstrStage<4, [A9_NPipe], 0>,
1037 InstrStage<4, [A9_LSUnit]>],
1038 [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
1041 InstrItinData<IIC_VLD4dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1042 InstrStage<1, [A9_MUX0], 0>,
1043 InstrStage<1, [A9_DRegsN], 0, Required>,
1044 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1045 InstrStage<2, [A9_NPipe], 0>,
1046 InstrStage<2, [A9_LSUnit]>],
1050 InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1051 InstrStage<1, [A9_MUX0], 0>,
1052 InstrStage<1, [A9_DRegsN], 0, Required>,
1053 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1054 InstrStage<2, [A9_NPipe], 0>,
1055 InstrStage<2, [A9_LSUnit]>],
1056 [2, 2, 3, 3, 2, 1, 1]>,
1059 InstrItinData<IIC_VST1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1060 InstrStage<1, [A9_MUX0], 0>,
1061 InstrStage<1, [A9_DRegsN], 0, Required>,
1062 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1063 InstrStage<1, [A9_NPipe], 0>,
1064 InstrStage<1, [A9_LSUnit]>],
1068 InstrItinData<IIC_VST1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1069 InstrStage<1, [A9_MUX0], 0>,
1070 InstrStage<1, [A9_DRegsN], 0, Required>,
1071 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1072 InstrStage<1, [A9_NPipe], 0>,
1073 InstrStage<1, [A9_LSUnit]>],
1077 InstrItinData<IIC_VST1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1078 InstrStage<1, [A9_MUX0], 0>,
1079 InstrStage<1, [A9_DRegsN], 0, Required>,
1080 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1081 InstrStage<2, [A9_NPipe], 0>,
1082 InstrStage<2, [A9_LSUnit]>],
1086 InstrItinData<IIC_VST1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1087 InstrStage<1, [A9_MUX0], 0>,
1088 InstrStage<1, [A9_DRegsN], 0, Required>,
1089 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1090 InstrStage<2, [A9_NPipe], 0>,
1091 InstrStage<2, [A9_LSUnit]>],
1092 [1, 1, 1, 1, 2, 2]>,
1095 InstrItinData<IIC_VST1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1096 InstrStage<1, [A9_MUX0], 0>,
1097 InstrStage<1, [A9_DRegsN], 0, Required>,
1098 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1099 InstrStage<1, [A9_NPipe], 0>,
1100 InstrStage<1, [A9_LSUnit]>],
1104 InstrItinData<IIC_VST1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1105 InstrStage<1, [A9_MUX0], 0>,
1106 InstrStage<1, [A9_DRegsN], 0, Required>,
1107 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1108 InstrStage<1, [A9_NPipe], 0>,
1109 InstrStage<1, [A9_LSUnit]>],
1110 [2, 1, 1, 1, 1, 1]>,
1113 InstrItinData<IIC_VST1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1114 InstrStage<1, [A9_MUX0], 0>,
1115 InstrStage<1, [A9_DRegsN], 0, Required>,
1116 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1117 InstrStage<2, [A9_NPipe], 0>,
1118 InstrStage<2, [A9_LSUnit]>],
1119 [2, 1, 1, 1, 1, 1, 2]>,
1122 InstrItinData<IIC_VST1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1123 InstrStage<1, [A9_MUX0], 0>,
1124 InstrStage<1, [A9_DRegsN], 0, Required>,
1125 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1126 InstrStage<2, [A9_NPipe], 0>,
1127 InstrStage<2, [A9_LSUnit]>],
1128 [2, 1, 1, 1, 1, 1, 2, 2]>,
1131 InstrItinData<IIC_VST1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1132 InstrStage<1, [A9_MUX0], 0>,
1133 InstrStage<1, [A9_DRegsN], 0, Required>,
1134 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1135 InstrStage<1, [A9_NPipe], 0>,
1136 InstrStage<1, [A9_LSUnit]>],
1140 InstrItinData<IIC_VST1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1141 InstrStage<1, [A9_MUX0], 0>,
1142 InstrStage<1, [A9_DRegsN], 0, Required>,
1143 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1144 InstrStage<1, [A9_NPipe], 0>,
1145 InstrStage<1, [A9_LSUnit]>],
1149 InstrItinData<IIC_VST2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1150 InstrStage<1, [A9_MUX0], 0>,
1151 InstrStage<1, [A9_DRegsN], 0, Required>,
1152 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1153 InstrStage<1, [A9_NPipe], 0>,
1154 InstrStage<1, [A9_LSUnit]>],
1158 InstrItinData<IIC_VST2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1159 InstrStage<1, [A9_MUX0], 0>,
1160 InstrStage<1, [A9_DRegsN], 0, Required>,
1161 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1162 InstrStage<3, [A9_NPipe], 0>,
1163 InstrStage<3, [A9_LSUnit]>],
1164 [1, 1, 1, 1, 2, 2]>,
1167 InstrItinData<IIC_VST2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1168 InstrStage<1, [A9_MUX0], 0>,
1169 InstrStage<1, [A9_DRegsN], 0, Required>,
1170 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1171 InstrStage<1, [A9_NPipe], 0>,
1172 InstrStage<1, [A9_LSUnit]>],
1173 [2, 1, 1, 1, 1, 1]>,
1176 InstrItinData<IIC_VST2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1177 InstrStage<1, [A9_MUX0], 0>,
1178 InstrStage<1, [A9_DRegsN], 0, Required>,
1179 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1180 InstrStage<3, [A9_NPipe], 0>,
1181 InstrStage<3, [A9_LSUnit]>],
1182 [2, 1, 1, 1, 1, 1, 2, 2]>,
1185 InstrItinData<IIC_VST2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1186 InstrStage<1, [A9_MUX0], 0>,
1187 InstrStage<1, [A9_DRegsN], 0, Required>,
1188 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1189 InstrStage<1, [A9_NPipe], 0>,
1190 InstrStage<1, [A9_LSUnit]>],
1194 InstrItinData<IIC_VST2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1195 InstrStage<1, [A9_MUX0], 0>,
1196 InstrStage<1, [A9_DRegsN], 0, Required>,
1197 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1198 InstrStage<1, [A9_NPipe], 0>,
1199 InstrStage<1, [A9_LSUnit]>],
1200 [2, 1, 1, 1, 1, 1]>,
1203 InstrItinData<IIC_VST3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1204 InstrStage<1, [A9_MUX0], 0>,
1205 InstrStage<1, [A9_DRegsN], 0, Required>,
1206 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1207 InstrStage<2, [A9_NPipe], 0>,
1208 InstrStage<2, [A9_LSUnit]>],
1212 InstrItinData<IIC_VST3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1213 InstrStage<1, [A9_MUX0], 0>,
1214 InstrStage<1, [A9_DRegsN], 0, Required>,
1215 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1216 InstrStage<2, [A9_NPipe], 0>,
1217 InstrStage<2, [A9_LSUnit]>],
1218 [2, 1, 1, 1, 1, 1, 2]>,
1221 InstrItinData<IIC_VST3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1222 InstrStage<1, [A9_MUX0], 0>,
1223 InstrStage<1, [A9_DRegsN], 0, Required>,
1224 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1225 InstrStage<3, [A9_NPipe], 0>,
1226 InstrStage<3, [A9_LSUnit]>],
1230 InstrItinData<IIC_VST3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1231 InstrStage<1, [A9_MUX0], 0>,
1232 InstrStage<1, [A9_DRegsN], 0, Required>,
1233 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1234 InstrStage<3, [A9_NPipe], 0>,
1235 InstrStage<3, [A9_LSUnit]>],
1236 [2, 1, 1, 1, 1, 1, 2]>,
1239 InstrItinData<IIC_VST4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1240 InstrStage<1, [A9_MUX0], 0>,
1241 InstrStage<1, [A9_DRegsN], 0, Required>,
1242 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1243 InstrStage<2, [A9_NPipe], 0>,
1244 InstrStage<2, [A9_LSUnit]>],
1245 [1, 1, 1, 1, 2, 2]>,
1248 InstrItinData<IIC_VST4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1249 InstrStage<1, [A9_MUX0], 0>,
1250 InstrStage<1, [A9_DRegsN], 0, Required>,
1251 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1252 InstrStage<2, [A9_NPipe], 0>,
1253 InstrStage<2, [A9_LSUnit]>],
1254 [2, 1, 1, 1, 1, 1, 2, 2]>,
1257 InstrItinData<IIC_VST4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1258 InstrStage<1, [A9_MUX0], 0>,
1259 InstrStage<1, [A9_DRegsN], 0, Required>,
1260 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1261 InstrStage<2, [A9_NPipe], 0>,
1262 InstrStage<2, [A9_LSUnit]>],
1263 [1, 1, 1, 1, 2, 2]>,
1266 InstrItinData<IIC_VST4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1267 InstrStage<1, [A9_MUX0], 0>,
1268 InstrStage<1, [A9_DRegsN], 0, Required>,
1269 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1270 InstrStage<2, [A9_NPipe], 0>,
1271 InstrStage<2, [A9_LSUnit]>],
1272 [2, 1, 1, 1, 1, 1, 2, 2]>,
1275 // Double-register Integer Unary
1276 InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1277 InstrStage<1, [A9_MUX0], 0>,
1278 InstrStage<1, [A9_DRegsN], 0, Required>,
1279 // Extra latency cycles since wbck is 6 cycles
1280 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1281 InstrStage<1, [A9_NPipe]>],
1284 // Quad-register Integer Unary
1285 InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1286 InstrStage<1, [A9_MUX0], 0>,
1287 InstrStage<1, [A9_DRegsN], 0, Required>,
1288 // Extra latency cycles since wbck is 6 cycles
1289 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1290 InstrStage<1, [A9_NPipe]>],
1293 // Double-register Integer Q-Unary
1294 InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1295 InstrStage<1, [A9_MUX0], 0>,
1296 InstrStage<1, [A9_DRegsN], 0, Required>,
1297 // Extra latency cycles since wbck is 6 cycles
1298 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1299 InstrStage<1, [A9_NPipe]>],
1302 // Quad-register Integer CountQ-Unary
1303 InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1304 InstrStage<1, [A9_MUX0], 0>,
1305 InstrStage<1, [A9_DRegsN], 0, Required>,
1306 // Extra latency cycles since wbck is 6 cycles
1307 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1308 InstrStage<1, [A9_NPipe]>],
1311 // Double-register Integer Binary
1312 InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1313 InstrStage<1, [A9_MUX0], 0>,
1314 InstrStage<1, [A9_DRegsN], 0, Required>,
1315 // Extra latency cycles since wbck is 6 cycles
1316 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1317 InstrStage<1, [A9_NPipe]>],
1320 // Quad-register Integer Binary
1321 InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1322 InstrStage<1, [A9_MUX0], 0>,
1323 InstrStage<1, [A9_DRegsN], 0, Required>,
1324 // Extra latency cycles since wbck is 6 cycles
1325 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1326 InstrStage<1, [A9_NPipe]>],
1329 // Double-register Integer Subtract
1330 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1331 InstrStage<1, [A9_MUX0], 0>,
1332 InstrStage<1, [A9_DRegsN], 0, Required>,
1333 // Extra latency cycles since wbck is 6 cycles
1334 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1335 InstrStage<1, [A9_NPipe]>],
1338 // Quad-register Integer Subtract
1339 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1340 InstrStage<1, [A9_MUX0], 0>,
1341 InstrStage<1, [A9_DRegsN], 0, Required>,
1342 // Extra latency cycles since wbck is 6 cycles
1343 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1344 InstrStage<1, [A9_NPipe]>],
1347 // Double-register Integer Shift
1348 InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1349 InstrStage<1, [A9_MUX0], 0>,
1350 InstrStage<1, [A9_DRegsN], 0, Required>,
1351 // Extra latency cycles since wbck is 6 cycles
1352 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1353 InstrStage<1, [A9_NPipe]>],
1356 // Quad-register Integer Shift
1357 InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1358 InstrStage<1, [A9_MUX0], 0>,
1359 InstrStage<1, [A9_DRegsN], 0, Required>,
1360 // Extra latency cycles since wbck is 6 cycles
1361 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1362 InstrStage<1, [A9_NPipe]>],
1365 // Double-register Integer Shift (4 cycle)
1366 InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1367 InstrStage<1, [A9_MUX0], 0>,
1368 InstrStage<1, [A9_DRegsN], 0, Required>,
1369 // Extra latency cycles since wbck is 6 cycles
1370 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1371 InstrStage<1, [A9_NPipe]>],
1374 // Quad-register Integer Shift (4 cycle)
1375 InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1376 InstrStage<1, [A9_MUX0], 0>,
1377 InstrStage<1, [A9_DRegsN], 0, Required>,
1378 // Extra latency cycles since wbck is 6 cycles
1379 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1380 InstrStage<1, [A9_NPipe]>],
1383 // Double-register Integer Binary (4 cycle)
1384 InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1385 InstrStage<1, [A9_MUX0], 0>,
1386 InstrStage<1, [A9_DRegsN], 0, Required>,
1387 // Extra latency cycles since wbck is 6 cycles
1388 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1389 InstrStage<1, [A9_NPipe]>],
1392 // Quad-register Integer Binary (4 cycle)
1393 InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1394 InstrStage<1, [A9_MUX0], 0>,
1395 InstrStage<1, [A9_DRegsN], 0, Required>,
1396 // Extra latency cycles since wbck is 6 cycles
1397 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1398 InstrStage<1, [A9_NPipe]>],
1401 // Double-register Integer Subtract (4 cycle)
1402 InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1403 InstrStage<1, [A9_MUX0], 0>,
1404 InstrStage<1, [A9_DRegsN], 0, Required>,
1405 // Extra latency cycles since wbck is 6 cycles
1406 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1407 InstrStage<1, [A9_NPipe]>],
1410 // Quad-register Integer Subtract (4 cycle)
1411 InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1412 InstrStage<1, [A9_MUX0], 0>,
1413 InstrStage<1, [A9_DRegsN], 0, Required>,
1414 // Extra latency cycles since wbck is 6 cycles
1415 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1416 InstrStage<1, [A9_NPipe]>],
1420 // Double-register Integer Count
1421 InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1422 InstrStage<1, [A9_MUX0], 0>,
1423 InstrStage<1, [A9_DRegsN], 0, Required>,
1424 // Extra latency cycles since wbck is 6 cycles
1425 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1426 InstrStage<1, [A9_NPipe]>],
1429 // Quad-register Integer Count
1430 // Result written in N3, but that is relative to the last cycle of multicycle,
1431 // so we use 4 for those cases
1432 InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1433 InstrStage<1, [A9_MUX0], 0>,
1434 InstrStage<1, [A9_DRegsN], 0, Required>,
1435 // Extra latency cycles since wbck is 7 cycles
1436 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1437 InstrStage<2, [A9_NPipe]>],
1440 // Double-register Absolute Difference and Accumulate
1441 InstrItinData<IIC_VABAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1442 InstrStage<1, [A9_MUX0], 0>,
1443 InstrStage<1, [A9_DRegsN], 0, Required>,
1444 // Extra latency cycles since wbck is 6 cycles
1445 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1446 InstrStage<1, [A9_NPipe]>],
1449 // Quad-register Absolute Difference and Accumulate
1450 InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1451 InstrStage<1, [A9_MUX0], 0>,
1452 InstrStage<1, [A9_DRegsN], 0, Required>,
1453 // Extra latency cycles since wbck is 6 cycles
1454 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1455 InstrStage<2, [A9_NPipe]>],
1458 // Double-register Integer Pair Add Long
1459 InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1460 InstrStage<1, [A9_MUX0], 0>,
1461 InstrStage<1, [A9_DRegsN], 0, Required>,
1462 // Extra latency cycles since wbck is 6 cycles
1463 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1464 InstrStage<1, [A9_NPipe]>],
1467 // Quad-register Integer Pair Add Long
1468 InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1469 InstrStage<1, [A9_MUX0], 0>,
1470 InstrStage<1, [A9_DRegsN], 0, Required>,
1471 // Extra latency cycles since wbck is 6 cycles
1472 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1473 InstrStage<2, [A9_NPipe]>],
1477 // Double-register Integer Multiply (.8, .16)
1478 InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1479 InstrStage<1, [A9_MUX0], 0>,
1480 InstrStage<1, [A9_DRegsN], 0, Required>,
1481 // Extra latency cycles since wbck is 6 cycles
1482 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1483 InstrStage<1, [A9_NPipe]>],
1486 // Quad-register Integer Multiply (.8, .16)
1487 InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1488 InstrStage<1, [A9_MUX0], 0>,
1489 InstrStage<1, [A9_DRegsN], 0, Required>,
1490 // Extra latency cycles since wbck is 7 cycles
1491 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1492 InstrStage<2, [A9_NPipe]>],
1496 // Double-register Integer Multiply (.32)
1497 InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1498 InstrStage<1, [A9_MUX0], 0>,
1499 InstrStage<1, [A9_DRegsN], 0, Required>,
1500 // Extra latency cycles since wbck is 7 cycles
1501 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1502 InstrStage<2, [A9_NPipe]>],
1505 // Quad-register Integer Multiply (.32)
1506 InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1507 InstrStage<1, [A9_MUX0], 0>,
1508 InstrStage<1, [A9_DRegsN], 0, Required>,
1509 // Extra latency cycles since wbck is 9 cycles
1510 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1511 InstrStage<4, [A9_NPipe]>],
1514 // Double-register Integer Multiply-Accumulate (.8, .16)
1515 InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1516 InstrStage<1, [A9_MUX0], 0>,
1517 InstrStage<1, [A9_DRegsN], 0, Required>,
1518 // Extra latency cycles since wbck is 6 cycles
1519 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1520 InstrStage<1, [A9_NPipe]>],
1523 // Double-register Integer Multiply-Accumulate (.32)
1524 InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1525 InstrStage<1, [A9_MUX0], 0>,
1526 InstrStage<1, [A9_DRegsN], 0, Required>,
1527 // Extra latency cycles since wbck is 7 cycles
1528 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1529 InstrStage<2, [A9_NPipe]>],
1532 // Quad-register Integer Multiply-Accumulate (.8, .16)
1533 InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1534 InstrStage<1, [A9_MUX0], 0>,
1535 InstrStage<1, [A9_DRegsN], 0, Required>,
1536 // Extra latency cycles since wbck is 7 cycles
1537 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1538 InstrStage<2, [A9_NPipe]>],
1541 // Quad-register Integer Multiply-Accumulate (.32)
1542 InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1543 InstrStage<1, [A9_MUX0], 0>,
1544 InstrStage<1, [A9_DRegsN], 0, Required>,
1545 // Extra latency cycles since wbck is 9 cycles
1546 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1547 InstrStage<4, [A9_NPipe]>],
1552 InstrItinData<IIC_VMOV, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1553 InstrStage<1, [A9_MUX0], 0>,
1554 InstrStage<1, [A9_DRegsN], 0, Required>,
1555 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1556 InstrStage<1, [A9_NPipe]>],
1560 InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1561 InstrStage<1, [A9_MUX0], 0>,
1562 InstrStage<1, [A9_DRegsN], 0, Required>,
1563 // Extra latency cycles since wbck is 6 cycles
1564 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1565 InstrStage<1, [A9_NPipe]>],
1568 // Double-register Permute Move
1569 InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1570 InstrStage<1, [A9_MUX0], 0>,
1571 InstrStage<1, [A9_DRegsN], 0, Required>,
1572 // Extra latency cycles since wbck is 6 cycles
1573 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1574 InstrStage<1, [A9_NPipe]>],
1577 // Quad-register Permute Move
1578 InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1579 InstrStage<1, [A9_MUX0], 0>,
1580 InstrStage<1, [A9_DRegsN], 0, Required>,
1581 // Extra latency cycles since wbck is 6 cycles
1582 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1583 InstrStage<1, [A9_NPipe]>],
1586 // Integer to Single-precision Move
1587 InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1588 InstrStage<1, [A9_MUX0], 0>,
1589 InstrStage<1, [A9_DRegsN], 0, Required>,
1590 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1591 InstrStage<1, [A9_NPipe]>],
1594 // Integer to Double-precision Move
1595 InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1596 InstrStage<1, [A9_MUX0], 0>,
1597 InstrStage<1, [A9_DRegsN], 0, Required>,
1598 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1599 InstrStage<1, [A9_NPipe]>],
1602 // Single-precision to Integer Move
1603 InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1604 InstrStage<1, [A9_MUX0], 0>,
1605 InstrStage<1, [A9_DRegsN], 0, Required>,
1606 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1607 InstrStage<1, [A9_NPipe]>],
1610 // Double-precision to Integer Move
1611 InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1612 InstrStage<1, [A9_MUX0], 0>,
1613 InstrStage<1, [A9_DRegsN], 0, Required>,
1614 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1615 InstrStage<1, [A9_NPipe]>],
1618 // Integer to Lane Move
1619 InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1620 InstrStage<1, [A9_MUX0], 0>,
1621 InstrStage<1, [A9_DRegsN], 0, Required>,
1622 InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
1623 InstrStage<2, [A9_NPipe]>],
1627 // Vector narrow move
1628 InstrItinData<IIC_VMOVN, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1629 InstrStage<1, [A9_MUX0], 0>,
1630 InstrStage<1, [A9_DRegsN], 0, Required>,
1631 // Extra latency cycles since wbck is 6 cycles
1632 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1633 InstrStage<1, [A9_NPipe]>],
1636 // Double-register FP Unary
1637 InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1638 InstrStage<1, [A9_MUX0], 0>,
1639 InstrStage<1, [A9_DRegsN], 0, Required>,
1640 // Extra latency cycles since wbck is 6 cycles
1641 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1642 InstrStage<1, [A9_NPipe]>],
1645 // Quad-register FP Unary
1646 // Result written in N5, but that is relative to the last cycle of multicycle,
1647 // so we use 6 for those cases
1648 InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1649 InstrStage<1, [A9_MUX0], 0>,
1650 InstrStage<1, [A9_DRegsN], 0, Required>,
1651 // Extra latency cycles since wbck is 7 cycles
1652 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1653 InstrStage<2, [A9_NPipe]>],
1656 // Double-register FP Binary
1657 // FIXME: We're using this itin for many instructions and [2, 2] here is too
1659 InstrItinData<IIC_VBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1660 InstrStage<1, [A9_MUX0], 0>,
1661 InstrStage<1, [A9_DRegsN], 0, Required>,
1662 // Extra latency cycles since wbck is 6 cycles
1663 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1664 InstrStage<1, [A9_NPipe]>],
1669 InstrItinData<IIC_VPBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1670 InstrStage<1, [A9_MUX0], 0>,
1671 InstrStage<1, [A9_DRegsN], 0, Required>,
1672 // Extra latency cycles since wbck is 6 cycles
1673 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1674 InstrStage<1, [A9_NPipe]>],
1677 // Double-register FP VMUL
1678 InstrItinData<IIC_VFMULD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1679 InstrStage<1, [A9_MUX0], 0>,
1680 InstrStage<1, [A9_DRegsN], 0, Required>,
1681 // Extra latency cycles since wbck is 6 cycles
1682 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1683 InstrStage<1, [A9_NPipe]>],
1686 // Quad-register FP Binary
1687 // Result written in N5, but that is relative to the last cycle of multicycle,
1688 // so we use 6 for those cases
1689 // FIXME: We're using this itin for many instructions and [2, 2] here is too
1691 InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1692 InstrStage<1, [A9_MUX0], 0>,
1693 InstrStage<1, [A9_DRegsN], 0, Required>,
1694 // Extra latency cycles since wbck is 7 cycles
1695 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1696 InstrStage<2, [A9_NPipe]>],
1699 // Quad-register FP VMUL
1700 InstrItinData<IIC_VFMULQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1701 InstrStage<1, [A9_MUX0], 0>,
1702 InstrStage<1, [A9_DRegsN], 0, Required>,
1703 // Extra latency cycles since wbck is 7 cycles
1704 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1705 InstrStage<1, [A9_NPipe]>],
1708 // Double-register FP Multiple-Accumulate
1709 InstrItinData<IIC_VMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1710 InstrStage<1, [A9_MUX0], 0>,
1711 InstrStage<1, [A9_DRegsN], 0, Required>,
1712 // Extra latency cycles since wbck is 7 cycles
1713 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1714 InstrStage<2, [A9_NPipe]>],
1717 // Quad-register FP Multiple-Accumulate
1718 // Result written in N9, but that is relative to the last cycle of multicycle,
1719 // so we use 10 for those cases
1720 InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1721 InstrStage<1, [A9_MUX0], 0>,
1722 InstrStage<1, [A9_DRegsN], 0, Required>,
1723 // Extra latency cycles since wbck is 9 cycles
1724 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1725 InstrStage<4, [A9_NPipe]>],
1728 // Double-register Fused FP Multiple-Accumulate
1729 InstrItinData<IIC_VFMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1730 InstrStage<1, [A9_MUX0], 0>,
1731 InstrStage<1, [A9_DRegsN], 0, Required>,
1732 // Extra latency cycles since wbck is 7 cycles
1733 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1734 InstrStage<2, [A9_NPipe]>],
1737 // Quad-register Fused FP Multiple-Accumulate
1738 // Result written in N9, but that is relative to the last cycle of multicycle,
1739 // so we use 10 for those cases
1740 InstrItinData<IIC_VFMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1741 InstrStage<1, [A9_MUX0], 0>,
1742 InstrStage<1, [A9_DRegsN], 0, Required>,
1743 // Extra latency cycles since wbck is 9 cycles
1744 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1745 InstrStage<4, [A9_NPipe]>],
1748 // Double-register Reciprical Step
1749 InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1750 InstrStage<1, [A9_MUX0], 0>,
1751 InstrStage<1, [A9_DRegsN], 0, Required>,
1752 // Extra latency cycles since wbck is 10 cycles
1753 InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
1754 InstrStage<1, [A9_NPipe]>],
1757 // Quad-register Reciprical Step
1758 InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1759 InstrStage<1, [A9_MUX0], 0>,
1760 InstrStage<1, [A9_DRegsN], 0, Required>,
1761 // Extra latency cycles since wbck is 11 cycles
1762 InstrStage<12, [A9_DRegsVFP], 0, Reserved>,
1763 InstrStage<2, [A9_NPipe]>],
1766 // Double-register Permute
1767 InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1768 InstrStage<1, [A9_MUX0], 0>,
1769 InstrStage<1, [A9_DRegsN], 0, Required>,
1770 // Extra latency cycles since wbck is 6 cycles
1771 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1772 InstrStage<1, [A9_NPipe]>],
1775 // Quad-register Permute
1776 // Result written in N2, but that is relative to the last cycle of multicycle,
1777 // so we use 3 for those cases
1778 InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1779 InstrStage<1, [A9_MUX0], 0>,
1780 InstrStage<1, [A9_DRegsN], 0, Required>,
1781 // Extra latency cycles since wbck is 7 cycles
1782 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1783 InstrStage<2, [A9_NPipe]>],
1786 // Quad-register Permute (3 cycle issue)
1787 // Result written in N2, but that is relative to the last cycle of multicycle,
1788 // so we use 4 for those cases
1789 InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1790 InstrStage<1, [A9_MUX0], 0>,
1791 InstrStage<1, [A9_DRegsN], 0, Required>,
1792 // Extra latency cycles since wbck is 8 cycles
1793 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1794 InstrStage<3, [A9_NPipe]>],
1798 // Double-register VEXT
1799 InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1800 InstrStage<1, [A9_MUX0], 0>,
1801 InstrStage<1, [A9_DRegsN], 0, Required>,
1802 // Extra latency cycles since wbck is 6 cycles
1803 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1804 InstrStage<1, [A9_NPipe]>],
1807 // Quad-register VEXT
1808 InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1809 InstrStage<1, [A9_MUX0], 0>,
1810 InstrStage<1, [A9_DRegsN], 0, Required>,
1811 // Extra latency cycles since wbck is 7 cycles
1812 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1813 InstrStage<2, [A9_NPipe]>],
1817 InstrItinData<IIC_VTB1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1818 InstrStage<1, [A9_MUX0], 0>,
1819 InstrStage<1, [A9_DRegsN], 0, Required>,
1820 // Extra latency cycles since wbck is 7 cycles
1821 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1822 InstrStage<2, [A9_NPipe]>],
1824 InstrItinData<IIC_VTB2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1825 InstrStage<1, [A9_MUX0], 0>,
1826 InstrStage<2, [A9_DRegsN], 0, Required>,
1827 // Extra latency cycles since wbck is 7 cycles
1828 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1829 InstrStage<2, [A9_NPipe]>],
1831 InstrItinData<IIC_VTB3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1832 InstrStage<1, [A9_MUX0], 0>,
1833 InstrStage<2, [A9_DRegsN], 0, Required>,
1834 // Extra latency cycles since wbck is 8 cycles
1835 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1836 InstrStage<3, [A9_NPipe]>],
1838 InstrItinData<IIC_VTB4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1839 InstrStage<1, [A9_MUX0], 0>,
1840 InstrStage<1, [A9_DRegsN], 0, Required>,
1841 // Extra latency cycles since wbck is 8 cycles
1842 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1843 InstrStage<3, [A9_NPipe]>],
1844 [4, 2, 2, 3, 3, 1]>,
1847 InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1848 InstrStage<1, [A9_MUX0], 0>,
1849 InstrStage<1, [A9_DRegsN], 0, Required>,
1850 // Extra latency cycles since wbck is 7 cycles
1851 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1852 InstrStage<2, [A9_NPipe]>],
1854 InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1855 InstrStage<1, [A9_MUX0], 0>,
1856 InstrStage<1, [A9_DRegsN], 0, Required>,
1857 // Extra latency cycles since wbck is 7 cycles
1858 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1859 InstrStage<2, [A9_NPipe]>],
1861 InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1862 InstrStage<1, [A9_MUX0], 0>,
1863 InstrStage<1, [A9_DRegsN], 0, Required>,
1864 // Extra latency cycles since wbck is 8 cycles
1865 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1866 InstrStage<3, [A9_NPipe]>],
1867 [4, 1, 2, 2, 3, 1]>,
1868 InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1869 InstrStage<1, [A9_MUX0], 0>,
1870 InstrStage<1, [A9_DRegsN], 0, Required>,
1871 // Extra latency cycles since wbck is 8 cycles
1872 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1873 InstrStage<2, [A9_NPipe]>],
1874 [4, 1, 2, 2, 3, 3, 1]>
1877 // ===---------------------------------------------------------------------===//
1878 // The following definitions describe the simpler per-operand machine model.
1879 // This works with MachineScheduler and will eventually replace itineraries.
1881 class A9WriteLMOpsListType<list<WriteSequence> writes> {
1882 list <WriteSequence> Writes = writes;
1883 SchedMachineModel SchedModel = ?;
1886 // Cortex-A9 machine model for scheduling and other instruction cost heuristics.
1887 def CortexA9Model : SchedMachineModel {
1888 let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
1889 let MicroOpBufferSize = 56; // Based on available renamed registers.
1890 let LoadLatency = 2; // Optimistic load latency assuming bypass.
1891 // This is overriden by OperandCycles if the
1892 // Itineraries are queried instead.
1893 let MispredictPenalty = 8; // Based on estimate of pipeline depth.
1895 let Itineraries = CortexA9Itineraries;
1897 // FIXME: Many vector operations were never given an itinerary. We
1898 // haven't mapped these to the new model either.
1899 let CompleteModel = 0;
1901 // FIXME: Remove when all errors have been fixed.
1902 let FullInstRWOverlapCheck = 0;
1905 //===----------------------------------------------------------------------===//
1906 // Define each kind of processor resource and number available.
1908 // The AGU unit has BufferSize=1 so that the latency between operations
1909 // that use it are considered to stall other operations.
1911 // The FP unit has BufferSize=0 so that it is a hard dispatch
1912 // hazard. No instruction may be dispatched while the unit is reserved.
1914 let SchedModel = CortexA9Model in {
1916 def A9UnitALU : ProcResource<2>;
1917 def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; }
1918 def A9UnitAGU : ProcResource<1> { let BufferSize = 1; }
1919 def A9UnitLS : ProcResource<1>;
1920 def A9UnitFP : ProcResource<1> { let BufferSize = 0; }
1921 def A9UnitB : ProcResource<1>;
1923 //===----------------------------------------------------------------------===//
1924 // Define scheduler read/write types with their resources and latency on A9.
1926 // Consume an issue slot, but no processor resources. This is useful when all
1927 // other writes associated with the operand have NumMicroOps = 0.
1928 def A9WriteIssue : SchedWriteRes<[]> { let Latency = 0; }
1930 // Write an integer register.
1931 def A9WriteI : SchedWriteRes<[A9UnitALU]>;
1932 // Write an integer shifted-by register
1933 def A9WriteIsr : SchedWriteRes<[A9UnitALU]> { let Latency = 2; }
1936 def A9WriteALU : SchedWriteRes<[A9UnitALU]>;
1937 // ALU with operand shifted by immediate.
1938 def : WriteRes<WriteALUsi, [A9UnitALU]> { let Latency = 2; }
1939 // ALU with operand shifted by register.
1940 def A9WriteALUsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; }
1943 def A9WriteM : SchedWriteRes<[A9UnitMul, A9UnitMul]> { let Latency = 4; }
1944 def A9WriteMHi : SchedWriteRes<[A9UnitMul]> { let Latency = 5;
1945 let NumMicroOps = 0; }
1946 def A9WriteM16 : SchedWriteRes<[A9UnitMul]> { let Latency = 3; }
1947 def A9WriteM16Hi : SchedWriteRes<[A9UnitMul]> { let Latency = 4;
1948 let NumMicroOps = 0; }
1949 def : SchedAlias<WriteMUL16, A9WriteM16>;
1950 def : SchedAlias<WriteMUL32, A9WriteM>;
1951 def : SchedAlias<WriteMUL64Lo, A9WriteM>;
1952 def : SchedAlias<WriteMUL64Hi, A9WriteMHi>;
1953 def : SchedAlias<WriteMAC16, A9WriteM16>;
1954 def : SchedAlias<WriteMAC32, A9WriteM>;
1955 def : SchedAlias<WriteMAC64Lo, A9WriteM>;
1956 def : SchedAlias<WriteMAC64Hi, A9WriteMHi>;
1957 def : ReadAdvance<ReadMUL, 0>;
1958 def : ReadAdvance<ReadMAC, 0>;
1961 // Only one FP or AGU instruction may issue per cycle. We model this
1962 // by having FP instructions consume the AGU resource.
1963 def A9WriteF : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; }
1964 def A9WriteFMov : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; }
1965 def A9WriteFMulS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; }
1966 def A9WriteFMulD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; }
1967 def A9WriteFMAS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 8; }
1969 def A9WriteFMAD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
1970 def A9WriteFDivS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 15; }
1971 def A9WriteFDivD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 25; }
1972 def A9WriteFSqrtS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 17; }
1973 def A9WriteFSqrtD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 32; }
1975 // NEON has an odd mix of latencies. Simply name the write types by latency.
1976 def A9WriteV1 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; }
1977 def A9WriteV2 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 2; }
1978 def A9WriteV3 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 3; }
1979 def A9WriteV4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; }
1980 def A9WriteV5 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; }
1981 def A9WriteV6 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; }
1982 def A9WriteV7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 7; }
1983 def A9WriteV9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
1984 def A9WriteV10 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 10; }
1986 def : WriteRes<WriteVLD1, []>;
1987 def : WriteRes<WriteVLD2, []>;
1988 def : WriteRes<WriteVLD3, []>;
1989 def : WriteRes<WriteVLD4, []>;
1990 def : WriteRes<WriteVST1, []>;
1991 def : WriteRes<WriteVST2, []>;
1992 def : WriteRes<WriteVST3, []>;
1993 def : WriteRes<WriteVST4, []>;
1995 // Reserve A9UnitFP for 2 consecutive cycles.
1996 def A9Write2V4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
1998 let ResourceCycles = [2, 1];
2000 def A9Write2V7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
2002 let ResourceCycles = [2, 1];
2004 def A9Write2V9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
2006 let ResourceCycles = [2, 1];
2009 // Branches don't have a def operand but still consume resources.
2010 def A9WriteB : SchedWriteRes<[A9UnitB]>;
2012 // Address generation.
2013 def A9WriteAdr : SchedWriteRes<[A9UnitAGU]> { let NumMicroOps = 0; }
2016 def A9WriteL : SchedWriteRes<[A9UnitLS]> { let Latency = 3; }
2017 def : SchedAlias<WriteLd, A9WriteL>;
2018 // Load the upper 32-bits using the same micro-op.
2019 def A9WriteLHi : SchedWriteRes<[]> { let Latency = 3;
2020 let NumMicroOps = 0; }
2021 // Offset shifted by register.
2022 def A9WriteLsi : SchedWriteRes<[A9UnitLS]> { let Latency = 4; }
2023 // Load (and zero extend) a byte.
2024 def A9WriteLb : SchedWriteRes<[A9UnitLS]> { let Latency = 4; }
2025 def A9WriteLbsi : SchedWriteRes<[A9UnitLS]> { let Latency = 5; }
2027 // Load or Store Float, aligned.
2028 def A9WriteLSfp : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 1; }
2031 def A9WriteS : SchedWriteRes<[A9UnitLS]>;
2033 //===----------------------------------------------------------------------===//
2034 // Define resources dynamically for load multiple variants.
2036 // Define helpers for extra latency without consuming resources.
2037 def A9WriteCycle1 : SchedWriteRes<[]> { let Latency = 1; let NumMicroOps = 0; }
2038 foreach NumCycles = 2-8 in {
2039 def A9WriteCycle#NumCycles : WriteSequence<[A9WriteCycle1], NumCycles>;
2040 } // foreach NumCycles
2042 // Define address generation sequences and predicates for 8 flavors of LDMs.
2043 foreach NumAddr = 1-8 in {
2045 // Define A9WriteAdr1-8 as a sequence of A9WriteAdr with additive
2046 // latency for instructions that generate multiple loads or stores.
2047 def A9WriteAdr#NumAddr : WriteSequence<[A9WriteAdr], NumAddr>;
2049 // Define a predicate to select the LDM based on number of memory addresses.
2050 def A9LMAdr#NumAddr#Pred :
2051 SchedPredicate<"(TII->getNumLDMAddresses(*MI)+1)/2 == "#NumAddr>;
2053 } // foreach NumAddr
2055 // Fall-back for unknown LDMs.
2056 def A9LMUnknownPred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == 0">;
2058 // LDM/VLDM/VLDn address generation latency & resources.
2059 // Dynamically select the A9WriteAdrN sequence using a predicate.
2060 def A9WriteLMAdr : SchedWriteVariant<[
2061 SchedVar<A9LMAdr1Pred, [A9WriteAdr1]>,
2062 SchedVar<A9LMAdr2Pred, [A9WriteAdr2]>,
2063 SchedVar<A9LMAdr3Pred, [A9WriteAdr3]>,
2064 SchedVar<A9LMAdr4Pred, [A9WriteAdr4]>,
2065 SchedVar<A9LMAdr5Pred, [A9WriteAdr5]>,
2066 SchedVar<A9LMAdr6Pred, [A9WriteAdr6]>,
2067 SchedVar<A9LMAdr7Pred, [A9WriteAdr7]>,
2068 SchedVar<A9LMAdr8Pred, [A9WriteAdr8]>,
2069 // For unknown LDM/VLDM/VSTM, assume 2 32-bit registers.
2070 SchedVar<A9LMUnknownPred, [A9WriteAdr2]>]>;
2072 // Define LDM Resources.
2073 // These take no issue resource, so they can be combined with other
2074 // writes like WriteB.
2075 // A9WriteLMLo takes a single LS resource and 2 cycles.
2076 def A9WriteLMLo : SchedWriteRes<[A9UnitLS]> { let Latency = 2;
2077 let NumMicroOps = 0; }
2078 // Assuming aligned access, the upper half of each pair is free with
2079 // the same latency.
2080 def A9WriteLMHi : SchedWriteRes<[]> { let Latency = 2;
2081 let NumMicroOps = 0; }
2082 // Each A9WriteL#N variant adds N cycles of latency without consuming
2083 // additional resources.
2084 foreach NumAddr = 1-8 in {
2085 def A9WriteL#NumAddr : WriteSequence<
2086 [A9WriteLMLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
2087 def A9WriteL#NumAddr#Hi : WriteSequence<
2088 [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
2091 //===----------------------------------------------------------------------===//
2092 // LDM: Load multiple into 32-bit integer registers.
2094 def A9WriteLMOpsList : A9WriteLMOpsListType<
2095 [A9WriteL1, A9WriteL1Hi,
2096 A9WriteL2, A9WriteL2Hi,
2097 A9WriteL3, A9WriteL3Hi,
2098 A9WriteL4, A9WriteL4Hi,
2099 A9WriteL5, A9WriteL5Hi,
2100 A9WriteL6, A9WriteL6Hi,
2101 A9WriteL7, A9WriteL7Hi,
2102 A9WriteL8, A9WriteL8Hi]>;
2104 // A9WriteLM variants expand into a pair of writes for each 64-bit
2105 // value loaded. When the number of registers is odd, the last
2106 // A9WriteLnHi is naturally ignored because the instruction has no
2107 // following def operands. These variants take no issue resource, so
2108 // they may need to be part of a WriteSequence that includes A9WriteIssue.
2109 def A9WriteLM : SchedWriteVariant<[
2110 SchedVar<A9LMAdr1Pred, A9WriteLMOpsList.Writes[0-1]>,
2111 SchedVar<A9LMAdr2Pred, A9WriteLMOpsList.Writes[0-3]>,
2112 SchedVar<A9LMAdr3Pred, A9WriteLMOpsList.Writes[0-5]>,
2113 SchedVar<A9LMAdr4Pred, A9WriteLMOpsList.Writes[0-7]>,
2114 SchedVar<A9LMAdr5Pred, A9WriteLMOpsList.Writes[0-9]>,
2115 SchedVar<A9LMAdr6Pred, A9WriteLMOpsList.Writes[0-11]>,
2116 SchedVar<A9LMAdr7Pred, A9WriteLMOpsList.Writes[0-13]>,
2117 SchedVar<A9LMAdr8Pred, A9WriteLMOpsList.Writes[0-15]>,
2118 // For unknown LDMs, define the maximum number of writes, but only
2119 // make the first two consume resources.
2120 SchedVar<A9LMUnknownPred, [A9WriteL1, A9WriteL1Hi,
2121 A9WriteL2, A9WriteL2Hi,
2122 A9WriteL3Hi, A9WriteL3Hi,
2123 A9WriteL4Hi, A9WriteL4Hi,
2124 A9WriteL5Hi, A9WriteL5Hi,
2125 A9WriteL6Hi, A9WriteL6Hi,
2126 A9WriteL7Hi, A9WriteL7Hi,
2127 A9WriteL8Hi, A9WriteL8Hi]>]> {
2131 //===----------------------------------------------------------------------===//
2132 // VFP Load/Store Multiple Variants, and NEON VLDn/VSTn support.
2134 // A9WriteLfpOp is the same as A9WriteLSfp but takes no issue resources
2135 // so can be used in WriteSequences for in single-issue instructions that
2136 // encapsulate multiple loads.
2137 def A9WriteLfpOp : SchedWriteRes<[A9UnitLS, A9UnitFP]> {
2139 let NumMicroOps = 0;
2142 foreach NumAddr = 1-8 in {
2144 // Helper for A9WriteLfp1-8: A sequence of fp loads with no micro-ops.
2145 def A9WriteLfp#NumAddr#Seq : WriteSequence<[A9WriteLfpOp], NumAddr>;
2147 // A9WriteLfp1-8 definitions are statically expanded into a sequence of
2148 // A9WriteLfpOps with additive latency that takes a single issue slot.
2149 // Used directly to describe NEON VLDn.
2150 def A9WriteLfp#NumAddr : WriteSequence<
2151 [A9WriteIssue, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>;
2153 // A9WriteLfp1-8Mov adds a cycle of latency and FP resource for
2154 // permuting loaded values.
2155 def A9WriteLfp#NumAddr#Mov : WriteSequence<
2156 [A9WriteF, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>;
2158 } // foreach NumAddr
2160 // Define VLDM/VSTM PreRA resources.
2161 // A9WriteLMfpPreRA are dynamically expanded into the correct
2162 // A9WriteLfp1-8 sequence based on a predicate. This supports the
2163 // preRA VLDM variants in which all 64-bit loads are written to the
2164 // same tuple of either single or double precision registers.
2165 def A9WriteLMfpPreRA : SchedWriteVariant<[
2166 SchedVar<A9LMAdr1Pred, [A9WriteLfp1]>,
2167 SchedVar<A9LMAdr2Pred, [A9WriteLfp2]>,
2168 SchedVar<A9LMAdr3Pred, [A9WriteLfp3]>,
2169 SchedVar<A9LMAdr4Pred, [A9WriteLfp4]>,
2170 SchedVar<A9LMAdr5Pred, [A9WriteLfp5]>,
2171 SchedVar<A9LMAdr6Pred, [A9WriteLfp6]>,
2172 SchedVar<A9LMAdr7Pred, [A9WriteLfp7]>,
2173 SchedVar<A9LMAdr8Pred, [A9WriteLfp8]>,
2174 // For unknown VLDM/VSTM PreRA, assume 2xS registers.
2175 SchedVar<A9LMUnknownPred, [A9WriteLfp2]>]>;
2177 // Define VLDM/VSTM PostRA Resources.
2178 // A9WriteLMfpLo takes a LS and FP resource and one issue slot but no latency.
2179 def A9WriteLMfpLo : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 0; }
2181 foreach NumAddr = 1-8 in {
2183 // Each A9WriteL#N variant adds N cycles of latency without consuming
2184 // additional resources.
2185 def A9WriteLMfp#NumAddr : WriteSequence<
2186 [A9WriteLMfpLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
2188 // Assuming aligned access, the upper half of each pair is free with
2189 // the same latency.
2190 def A9WriteLMfp#NumAddr#Hi : WriteSequence<
2191 [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
2193 } // foreach NumAddr
2195 // VLDM PostRA Variants. These variants expand A9WriteLMfpPostRA into a
2196 // pair of writes for each 64-bit data loaded. When the number of
2197 // registers is odd, the last WriteLMfpnHi is naturally ignored because
2198 // the instruction has no following def operands.
2200 def A9WriteLMfpPostRAOpsList : A9WriteLMOpsListType<
2201 [A9WriteLMfp1, A9WriteLMfp2, // 0-1
2202 A9WriteLMfp3, A9WriteLMfp4, // 2-3
2203 A9WriteLMfp5, A9WriteLMfp6, // 4-5
2204 A9WriteLMfp7, A9WriteLMfp8, // 6-7
2205 A9WriteLMfp1Hi, // 8-8
2206 A9WriteLMfp2Hi, A9WriteLMfp2Hi, // 9-10
2207 A9WriteLMfp3Hi, A9WriteLMfp3Hi, // 11-12
2208 A9WriteLMfp4Hi, A9WriteLMfp4Hi, // 13-14
2209 A9WriteLMfp5Hi, A9WriteLMfp5Hi, // 15-16
2210 A9WriteLMfp6Hi, A9WriteLMfp6Hi, // 17-18
2211 A9WriteLMfp7Hi, A9WriteLMfp7Hi, // 19-20
2212 A9WriteLMfp8Hi, A9WriteLMfp8Hi]>; // 21-22
2214 def A9WriteLMfpPostRA : SchedWriteVariant<[
2215 SchedVar<A9LMAdr1Pred, A9WriteLMfpPostRAOpsList.Writes[0-0, 8-8]>,
2216 SchedVar<A9LMAdr2Pred, A9WriteLMfpPostRAOpsList.Writes[0-1, 9-10]>,
2217 SchedVar<A9LMAdr3Pred, A9WriteLMfpPostRAOpsList.Writes[0-2, 10-12]>,
2218 SchedVar<A9LMAdr4Pred, A9WriteLMfpPostRAOpsList.Writes[0-3, 11-14]>,
2219 SchedVar<A9LMAdr5Pred, A9WriteLMfpPostRAOpsList.Writes[0-4, 12-16]>,
2220 SchedVar<A9LMAdr6Pred, A9WriteLMfpPostRAOpsList.Writes[0-5, 13-18]>,
2221 SchedVar<A9LMAdr7Pred, A9WriteLMfpPostRAOpsList.Writes[0-6, 14-20]>,
2222 SchedVar<A9LMAdr8Pred, A9WriteLMfpPostRAOpsList.Writes[0-7, 15-22]>,
2223 // For unknown LDMs, define the maximum number of writes, but only
2224 // make the first two consume resources. We are optimizing for the case
2225 // where the operands are DPRs, and this determines the first eight
2226 // types. The remaining eight types are filled to cover the case
2227 // where the operands are SPRs.
2228 SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp2,
2229 A9WriteLMfp3Hi, A9WriteLMfp4Hi,
2230 A9WriteLMfp5Hi, A9WriteLMfp6Hi,
2231 A9WriteLMfp7Hi, A9WriteLMfp8Hi,
2232 A9WriteLMfp5Hi, A9WriteLMfp5Hi,
2233 A9WriteLMfp6Hi, A9WriteLMfp6Hi,
2234 A9WriteLMfp7Hi, A9WriteLMfp7Hi,
2235 A9WriteLMfp8Hi, A9WriteLMfp8Hi]>]> {
2239 // Distinguish between our multiple MI-level forms of the same
2240 // VLDM/VSTM instructions.
2241 def A9PreRA : SchedPredicate<
2242 "Register::isVirtualRegister(MI->getOperand(0).getReg())">;
2243 def A9PostRA : SchedPredicate<
2244 "Register::isPhysicalRegister(MI->getOperand(0).getReg())">;
2246 // VLDM represents all destination registers as a single register
2247 // tuple, unlike LDM. So the number of write operands is not variadic.
2248 def A9WriteLMfp : SchedWriteVariant<[
2249 SchedVar<A9PreRA, [A9WriteLMfpPreRA]>,
2250 SchedVar<A9PostRA, [A9WriteLMfpPostRA]>]>;
2252 //===----------------------------------------------------------------------===//
2253 // Resources for other (non-LDM/VLDM) Variants.
2255 // These mov immediate writers are unconditionally expanded with
2256 // additive latency.
2257 def A9WriteI2 : WriteSequence<[A9WriteI, A9WriteI]>;
2258 def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, WriteALU]>;
2259 def A9WriteI2ld : WriteSequence<[A9WriteI, A9WriteI, A9WriteL]>;
2261 // Some ALU operations can read loaded integer values one cycle early.
2262 def A9ReadALU : SchedReadAdvance<1,
2263 [A9WriteL, A9WriteLHi, A9WriteLsi, A9WriteLb, A9WriteLbsi,
2264 A9WriteL1, A9WriteL2, A9WriteL3, A9WriteL4,
2265 A9WriteL5, A9WriteL6, A9WriteL7, A9WriteL8,
2266 A9WriteL1Hi, A9WriteL2Hi, A9WriteL3Hi, A9WriteL4Hi,
2267 A9WriteL5Hi, A9WriteL6Hi, A9WriteL7Hi, A9WriteL8Hi]>;
2269 // Read types for operands that are unconditionally read in cycle N
2270 // after the instruction issues, decreases producer latency by N-1.
2271 def A9Read2 : SchedReadAdvance<1>;
2272 def A9Read3 : SchedReadAdvance<2>;
2273 def A9Read4 : SchedReadAdvance<3>;
2275 //===----------------------------------------------------------------------===//
2276 // Map itinerary classes to scheduler read/write resources per operand.
2278 // For ARM, we piggyback scheduler resources on the Itinerary classes
2279 // to avoid perturbing the existing instruction definitions.
2281 // This table follows the ARM Cortex-A9 Technical Reference Manuals,
2284 def :ItinRW<[WriteALU], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi,
2285 IIC_iMVNi,IIC_iMVNsi,
2286 IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>;
2287 def :ItinRW<[WriteALU, A9ReadALU],[IIC_iMVNr]>;
2288 def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>;
2290 def :ItinRW<[A9WriteI2], [IIC_iMOVix2,IIC_iCMOVix2]>;
2291 def :ItinRW<[A9WriteI2pc], [IIC_iMOVix2addpc]>;
2292 def :ItinRW<[A9WriteI2ld], [IIC_iMOVix2ld]>;
2294 def :ItinRW<[WriteALU], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>;
2295 def :ItinRW<[WriteALU, A9ReadALU], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>;
2296 def :ItinRW<[WriteALU, A9ReadALU, A9ReadALU],[IIC_iALUr,IIC_iCMPr]>;
2297 def :ItinRW<[WriteALUsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>;
2298 def :ItinRW<[WriteALUsi, A9ReadALU], [IIC_iALUsi]>;
2299 def :ItinRW<[WriteALUsi, ReadDefault, A9ReadALU], [IIC_iALUsir]>; // RSB
2300 def :ItinRW<[A9WriteALUsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>;
2301 def :ItinRW<[A9WriteALUsr, A9ReadALU], [IIC_iALUsr,IIC_iCMPsr]>;
2303 // A9WriteHi ignored for MUL32.
2304 def :ItinRW<[A9WriteM, A9WriteMHi], [IIC_iMUL32,IIC_iMAC32,
2305 IIC_iMUL64,IIC_iMAC64]>;
2306 // FIXME: SMLALxx needs itin classes
2307 def :ItinRW<[A9WriteM16, A9WriteM16Hi], [IIC_iMUL16,IIC_iMAC16]>;
2309 // TODO: For floating-point ops, we model the pipeline forwarding
2310 // latencies here. WAW latencies are sometimes longer.
2312 def :ItinRW<[A9WriteFMov], [IIC_fpSTAT, IIC_fpMOVIS, IIC_fpMOVID, IIC_fpMOVSI,
2313 IIC_fpUNA32, IIC_fpUNA64,
2314 IIC_fpCMP32, IIC_fpCMP64]>;
2315 def :ItinRW<[A9WriteFMov, A9WriteFMov], [IIC_fpMOVDI]>;
2316 def :ItinRW<[A9WriteF], [IIC_fpCVTSD, IIC_fpCVTDS, IIC_fpCVTSH, IIC_fpCVTHS,
2317 IIC_fpCVTIS, IIC_fpCVTID, IIC_fpCVTSI, IIC_fpCVTDI,
2318 IIC_fpALU32, IIC_fpALU64]>;
2319 def :ItinRW<[A9WriteFMulS], [IIC_fpMUL32]>;
2320 def :ItinRW<[A9WriteFMulD], [IIC_fpMUL64]>;
2321 def :ItinRW<[A9WriteFMAS], [IIC_fpMAC32]>;
2322 def :ItinRW<[A9WriteFMAD], [IIC_fpMAC64]>;
2323 def :ItinRW<[A9WriteFDivS], [IIC_fpDIV32]>;
2324 def :ItinRW<[A9WriteFDivD], [IIC_fpDIV64]>;
2325 def :ItinRW<[A9WriteFSqrtS], [IIC_fpSQRT32]>;
2326 def :ItinRW<[A9WriteFSqrtD], [IIC_fpSQRT64]>;
2328 def :ItinRW<[A9WriteB], [IIC_Br]>;
2330 // A9 PLD is processed in a dedicated unit.
2331 def :ItinRW<[], [IIC_Preload]>;
2333 // Note: We must assume that loads are aligned, since the machine
2334 // model cannot know this statically and A9 ignores alignment hints.
2336 // A9WriteAdr consumes AGU regardless address writeback. But it's
2337 // latency is only relevant for users of an updated address.
2338 def :ItinRW<[A9WriteL, A9WriteAdr], [IIC_iLoad_i,IIC_iLoad_r,
2339 IIC_iLoad_iu,IIC_iLoad_ru]>;
2340 def :ItinRW<[A9WriteLsi, A9WriteAdr], [IIC_iLoad_si,IIC_iLoad_siu]>;
2341 def :ItinRW<[A9WriteLb, A9WriteAdr2], [IIC_iLoad_bh_i,IIC_iLoad_bh_r,
2342 IIC_iLoad_bh_iu,IIC_iLoad_bh_ru]>;
2343 def :ItinRW<[A9WriteLbsi, A9WriteAdr2], [IIC_iLoad_bh_si,IIC_iLoad_bh_siu]>;
2344 def :ItinRW<[A9WriteL, A9WriteLHi, A9WriteAdr], [IIC_iLoad_d_i,IIC_iLoad_d_r,
2346 // Store either has no def operands, or the one def for address writeback.
2347 def :ItinRW<[A9WriteAdr, A9WriteS], [IIC_iStore_i, IIC_iStore_r,
2348 IIC_iStore_iu, IIC_iStore_ru,
2349 IIC_iStore_d_i, IIC_iStore_d_r,
2351 def :ItinRW<[A9WriteAdr2, A9WriteS], [IIC_iStore_si, IIC_iStore_siu,
2352 IIC_iStore_bh_i, IIC_iStore_bh_r,
2353 IIC_iStore_bh_iu, IIC_iStore_bh_ru]>;
2354 def :ItinRW<[A9WriteAdr3, A9WriteS], [IIC_iStore_bh_si, IIC_iStore_bh_siu]>;
2356 // A9WriteML will be expanded into a separate write for each def
2357 // operand. Address generation consumes resources, but A9WriteLMAdr
2358 // is listed after all def operands, so has no effective latency.
2360 // Note: A9WriteLM expands into an even number of def operands. The
2361 // actual number of def operands may be less by one.
2362 def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteIssue], [IIC_iLoad_m, IIC_iPop]>;
2364 // Load multiple with address writeback has an extra def operand in
2365 // front of the loaded registers.
2367 // Reuse the load-multiple variants for store-multiple because the
2368 // resources are identical, For stores only the address writeback
2369 // has a def operand so the WriteL latencies are unused.
2370 def :ItinRW<[A9WriteLMAdr, A9WriteLM, A9WriteIssue], [IIC_iLoad_mu,
2373 def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteB], [IIC_iLoad_mBr, IIC_iPop_Br]>;
2374 def :ItinRW<[A9WriteL, A9WriteAdr, WriteALU], [IIC_iLoadiALU]>;
2376 def :ItinRW<[A9WriteLSfp, A9WriteAdr], [IIC_fpLoad32, IIC_fpLoad64]>;
2378 def :ItinRW<[A9WriteLMfp, A9WriteLMAdr], [IIC_fpLoad_m]>;
2379 def :ItinRW<[A9WriteLMAdr, A9WriteLMfp], [IIC_fpLoad_mu]>;
2380 def :ItinRW<[A9WriteAdr, A9WriteLSfp], [IIC_fpStore32, IIC_fpStore64,
2381 IIC_fpStore_m, IIC_fpStore_mu]>;
2383 // Note: Unlike VLDM, VLD1 expects the writeback operand after the
2385 def :ItinRW<[A9WriteLfp1, A9WriteAdr1], [IIC_VLD1, IIC_VLD1u,
2386 IIC_VLD1x2, IIC_VLD1x2u]>;
2387 def :ItinRW<[A9WriteLfp2, A9WriteAdr2], [IIC_VLD1x3, IIC_VLD1x3u,
2388 IIC_VLD1x4, IIC_VLD1x4u,
2389 IIC_VLD4dup, IIC_VLD4dupu]>;
2390 def :ItinRW<[A9WriteLfp1Mov, A9WriteAdr1], [IIC_VLD1dup, IIC_VLD1dupu,
2391 IIC_VLD2, IIC_VLD2u,
2392 IIC_VLD2dup, IIC_VLD2dupu]>;
2393 def :ItinRW<[A9WriteLfp2Mov, A9WriteAdr1], [IIC_VLD1ln, IIC_VLD1lnu,
2394 IIC_VLD2x2, IIC_VLD2x2u,
2395 IIC_VLD2ln, IIC_VLD2lnu]>;
2396 def :ItinRW<[A9WriteLfp3Mov, A9WriteAdr3], [IIC_VLD3, IIC_VLD3u,
2397 IIC_VLD3dup, IIC_VLD3dupu]>;
2398 def :ItinRW<[A9WriteLfp4Mov, A9WriteAdr4], [IIC_VLD4, IIC_VLD4u,
2399 IIC_VLD4ln, IIC_VLD4lnu]>;
2400 def :ItinRW<[A9WriteLfp5Mov, A9WriteAdr5], [IIC_VLD3ln, IIC_VLD3lnu]>;
2402 // Vector stores use similar resources to vector loads, so use the
2403 // same write types. The address write must be first for stores with
2404 // address writeback.
2405 def :ItinRW<[A9WriteAdr1, A9WriteLfp1], [IIC_VST1, IIC_VST1u,
2406 IIC_VST1x2, IIC_VST1x2u,
2407 IIC_VST1ln, IIC_VST1lnu,
2408 IIC_VST2, IIC_VST2u,
2409 IIC_VST2x2, IIC_VST2x2u,
2410 IIC_VST2ln, IIC_VST2lnu]>;
2411 def :ItinRW<[A9WriteAdr2, A9WriteLfp2], [IIC_VST1x3, IIC_VST1x3u,
2412 IIC_VST1x4, IIC_VST1x4u,
2413 IIC_VST3, IIC_VST3u,
2414 IIC_VST3ln, IIC_VST3lnu,
2415 IIC_VST4, IIC_VST4u,
2416 IIC_VST4ln, IIC_VST4lnu]>;
2419 def :ItinRW<[A9WriteV2], [IIC_VMOVSI, IIC_VMOVDI, IIC_VMOVD, IIC_VMOVQ]>;
2420 def :ItinRW<[A9WriteV1], [IIC_VMOV, IIC_VMOVIS, IIC_VMOVID]>;
2421 def :ItinRW<[A9WriteV3], [IIC_VMOVISL, IIC_VMOVN]>;
2423 // NEON integer arithmetic
2425 // VADD/VAND/VORR/VEOR/VBIC/VORN/VBIT/VBIF/VBSL
2426 def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VBINiD, IIC_VBINiQ]>;
2427 // VSUB/VMVN/VCLSD/VCLZD/VCNTD
2428 def :ItinRW<[A9WriteV3, A9Read2], [IIC_VSUBiD, IIC_VSUBiQ, IIC_VCNTiD]>;
2429 // VADDL/VSUBL/VNEG are mapped later under IIC_SHLi.
2431 // VHADD/VRHADD/VQADD/VTST/VADH/VRADH
2432 def :ItinRW<[A9WriteV4, A9Read2, A9Read2], [IIC_VBINi4D, IIC_VBINi4Q]>;
2434 // VSBH/VRSBH/VHSUB/VQSUB/VABD/VCEQ/VCGE/VCGT/VMAX/VMIN/VPMAX/VPMIN/VABDL
2435 def :ItinRW<[A9WriteV4, A9Read2], [IIC_VSUBi4D, IIC_VSUBi4Q]>;
2437 def :ItinRW<[A9WriteV4], [IIC_VQUNAiD, IIC_VQUNAiQ]>;
2439 def :ItinRW<[A9WriteV4, A9Read2], [IIC_VUNAiD, IIC_VUNAiQ]>;
2440 // VPADD/VPADDL are mapped later under IIC_SHLi.
2442 // VCLSQ/VCLZQ/VCNTQ, takes two cycles.
2443 def :ItinRW<[A9Write2V4, A9Read3], [IIC_VCNTiQ]>;
2444 // VMOVimm/VMVNimm/VORRimm/VBICimm
2445 def :ItinRW<[A9WriteV3], [IIC_VMOVImm]>;
2446 def :ItinRW<[A9WriteV6, A9Read3, A9Read2], [IIC_VABAD, IIC_VABAQ]>;
2447 def :ItinRW<[A9WriteV6, A9Read3], [IIC_VPALiD, IIC_VPALiQ]>;
2449 // NEON integer multiply
2451 // Note: these don't quite match the timing docs, but they do match
2452 // the original A9 itinerary.
2453 def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VMULi16D]>;
2454 def :ItinRW<[A9WriteV7, A9Read2, A9Read2], [IIC_VMULi16Q]>;
2455 def :ItinRW<[A9Write2V7, A9Read2], [IIC_VMULi32D]>;
2456 def :ItinRW<[A9Write2V9, A9Read2], [IIC_VMULi32Q]>;
2457 def :ItinRW<[A9WriteV6, A9Read3, A9Read2, A9Read2], [IIC_VMACi16D]>;
2458 def :ItinRW<[A9WriteV7, A9Read3, A9Read2, A9Read2], [IIC_VMACi16Q]>;
2459 def :ItinRW<[A9Write2V7, A9Read3, A9Read2], [IIC_VMACi32D]>;
2460 def :ItinRW<[A9Write2V9, A9Read3, A9Read2], [IIC_VMACi32Q]>;
2462 // NEON integer shift
2463 // TODO: Q,Q,Q shifts should actually reserve FP for 2 cycles.
2464 def :ItinRW<[A9WriteV3], [IIC_VSHLiD, IIC_VSHLiQ]>;
2465 def :ItinRW<[A9WriteV4], [IIC_VSHLi4D, IIC_VSHLi4Q]>;
2468 def :ItinRW<[A9WriteV2, A9WriteV2], [IIC_VPERMD, IIC_VPERMQ, IIC_VEXTD]>;
2469 def :ItinRW<[A9WriteV3, A9WriteV4, ReadDefault, A9Read2],
2470 [IIC_VPERMQ3, IIC_VEXTQ]>;
2471 def :ItinRW<[A9WriteV3, A9Read2], [IIC_VTB1]>;
2472 def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VTB2]>;
2473 def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3], [IIC_VTB3]>;
2474 def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3, A9Read3], [IIC_VTB4]>;
2475 def :ItinRW<[A9WriteV3, ReadDefault, A9Read2], [IIC_VTBX1]>;
2476 def :ItinRW<[A9WriteV3, ReadDefault, A9Read2, A9Read2], [IIC_VTBX2]>;
2477 def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3], [IIC_VTBX3]>;
2478 def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3, A9Read3],
2481 // NEON floating-point
2482 def :ItinRW<[A9WriteV5, A9Read2, A9Read2], [IIC_VBIND]>;
2483 def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VBINQ]>;
2484 def :ItinRW<[A9WriteV5, A9Read2], [IIC_VUNAD, IIC_VFMULD]>;
2485 def :ItinRW<[A9WriteV6, A9Read2], [IIC_VUNAQ, IIC_VFMULQ]>;
2486 def :ItinRW<[A9WriteV9, A9Read3, A9Read2], [IIC_VMACD, IIC_VFMACD]>;
2487 def :ItinRW<[A9WriteV10, A9Read3, A9Read2], [IIC_VMACQ, IIC_VFMACQ]>;
2488 def :ItinRW<[A9WriteV9, A9Read2, A9Read2], [IIC_VRECSD]>;
2489 def :ItinRW<[A9WriteV10, A9Read2, A9Read2], [IIC_VRECSQ]>;
2491 // Map SchedRWs that are identical for cortexa9 to existing resources.
2492 def : SchedAlias<WriteALU, A9WriteALU>;
2493 def : SchedAlias<WriteALUsr, A9WriteALUsr>;
2494 def : SchedAlias<WriteALUSsr, A9WriteALUsr>;
2495 def : SchedAlias<ReadALU, A9ReadALU>;
2496 def : SchedAlias<ReadALUsr, A9ReadALU>;
2497 def : SchedAlias<WriteST, A9WriteS>;
2499 // ===---------------------------------------------------------------------===//
2500 // Floating-point. Map target defined SchedReadWrite to processor specific ones
2502 def : WriteRes<WriteFPCVT, [A9UnitFP, A9UnitAGU]> { let Latency = 4; }
2503 def : SchedAlias<WriteFPMOV, A9WriteFMov>;
2505 def : SchedAlias<WriteFPALU32, A9WriteF>;
2506 def : SchedAlias<WriteFPALU64, A9WriteF>;
2508 def : SchedAlias<WriteFPMUL32, A9WriteFMulS>;
2509 def : SchedAlias<WriteFPMUL64, A9WriteFMulD>;
2511 def : SchedAlias<WriteFPMAC32, A9WriteFMAS>;
2512 def : SchedAlias<WriteFPMAC64, A9WriteFMAD>;
2514 def : SchedAlias<WriteFPDIV32, A9WriteFDivS>;
2515 def : SchedAlias<WriteFPDIV64, A9WriteFDivD>;
2516 def : SchedAlias<WriteFPSQRT32, A9WriteFSqrtS>;
2517 def : SchedAlias<WriteFPSQRT64, A9WriteFSqrtD>;
2519 def : ReadAdvance<ReadFPMUL, 0>;
2520 def : ReadAdvance<ReadFPMAC, 0>;
2522 // ===---------------------------------------------------------------------===//
2523 // Subtarget-specific overrides. Map opcodes to list of SchedReadWrite types.
2525 def : InstRW< [WriteALU],
2526 (instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr",
2528 def : InstRW< [WriteALUsi], (instrs ANDrsi, ORRrsi, EORrsi, BICrsi)>;
2529 def : InstRW< [WriteALUsr], (instrs ANDrsr, ORRrsr, EORrsr, BICrsr)>;
2532 def : SchedAlias<WriteCMP, A9WriteALU>;
2533 def : SchedAlias<WriteCMPsi, A9WriteALU>;
2534 def : SchedAlias<WriteCMPsr, A9WriteALU>;
2536 def : InstRW< [A9WriteIsr], (instregex "MOVsr", "MOVsi", "MVNsr", "MOVCCsi",
2538 def : InstRW< [WriteALU, A9ReadALU], (instregex "MVNr")>;
2539 def : InstRW< [A9WriteI2], (instregex "MOVCCi32imm", "MOVi32imm")>;
2540 def : InstRW< [A9WriteI2pc], (instregex "MOV_ga_pcrel")>;
2541 def : InstRW< [A9WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>;
2543 def : InstRW< [WriteALU], (instregex "SEL")>;
2545 def : InstRW< [WriteALUsi], (instregex "BFC", "BFI", "UBFX", "SBFX")>;
2547 def : InstRW< [A9WriteM],
2548 (instregex "MUL", "MULv5", "SMMUL", "SMMULR", "MLA", "MLAv5", "MLS",
2549 "SMMLA", "SMMLAR", "SMMLS", "SMMLSR")>;
2550 def : InstRW< [A9WriteM, A9WriteMHi],
2551 (instregex "SMULL", "SMULLv5", "UMULL", "UMULLv5", "SMLAL$", "UMLAL",
2552 "UMAAL", "SMLALv5", "UMLALv5", "SMLALBB", "SMLALBT", "SMLALTB",
2554 // FIXME: These instructions used to have NoItinerary. Just copied the one from above.
2555 def : InstRW< [A9WriteM, A9WriteMHi],
2556 (instregex "SMLAD", "SMLADX", "SMLALD", "SMLALDX", "SMLSD", "SMLSDX",
2557 "SMLSLD", "SMLSLDX", "SMUAD", "SMUADX", "SMUSD", "SMUSDX")>;
2559 def : InstRW<[A9WriteM16, A9WriteM16Hi],
2560 (instregex "SMULBB", "SMULBT", "SMULTB", "SMULTT", "SMULWB", "SMULWT")>;
2561 def : InstRW<[A9WriteM16, A9WriteM16Hi],
2562 (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLAWB", "SMLAWT")>;
2564 def : InstRW<[A9WriteL], (instregex "LDRi12", "PICLDR$")>;
2565 def : InstRW<[A9WriteLsi], (instregex "LDRrs")>;
2566 def : InstRW<[A9WriteLb],
2567 (instregex "LDRBi12", "PICLDRH", "PICLDRB", "PICLDRSH", "PICLDRSB",
2568 "LDRH", "LDRSH", "LDRSB")>;
2569 def : InstRW<[A9WriteLbsi], (instregex "LDRrs")>;
2571 def : WriteRes<WriteDIV, []> { let Latency = 0; }
2573 def : WriteRes<WriteBr, [A9UnitB]>;
2574 def : WriteRes<WriteBrL, [A9UnitB]>;
2575 def : WriteRes<WriteBrTbl, [A9UnitB]>;
2576 def : WriteRes<WritePreLd, []>;
2577 def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
2578 } // SchedModel = CortexA9Model