[InstCombine] Signed saturation patterns
[llvm-complete.git] / lib / Target / ARM / ARMScheduleA9.td
blob3f0b71afd9779d533337511eb911951e6a54c15e
1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the itinerary class data for the ARM Cortex A9 processors.
11 //===----------------------------------------------------------------------===//
13 // ===---------------------------------------------------------------------===//
14 // This section contains legacy support for itineraries. This is
15 // required until SD and PostRA schedulers are replaced by MachineScheduler.
18 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
19 // Reference Manual".
21 // Functional units
22 def A9_Issue0  : FuncUnit; // Issue 0
23 def A9_Issue1  : FuncUnit; // Issue 1
24 def A9_Branch  : FuncUnit; // Branch
25 def A9_ALU0    : FuncUnit; // ALU / MUL pipeline 0
26 def A9_ALU1    : FuncUnit; // ALU pipeline 1
27 def A9_AGU     : FuncUnit; // Address generation unit for ld / st
28 def A9_NPipe   : FuncUnit; // NEON pipeline
29 def A9_MUX0    : FuncUnit; // AGU + NEON/FPU multiplexer
30 def A9_LSUnit  : FuncUnit; // L/S Unit
31 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
32 def A9_DRegsN  : FuncUnit; // FP register set, NEON side
34 // Bypasses
35 def A9_LdBypass : Bypass;
37 def CortexA9Itineraries : ProcessorItineraries<
38   [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
39    A9_LSUnit, A9_DRegsVFP, A9_DRegsN],
40   [A9_LdBypass], [
41   // Two fully-pipelined integer ALU pipelines
43   //
44   // Move instructions, unconditional
45   InstrItinData<IIC_iMOVi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
46                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
47   InstrItinData<IIC_iMOVr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
48                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
49   InstrItinData<IIC_iMOVsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
50                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
51   InstrItinData<IIC_iMOVsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
52                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
53   InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
54                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
55                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
56   InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
57                                   InstrStage<1, [A9_ALU0, A9_ALU1]>,
58                                   InstrStage<1, [A9_ALU0, A9_ALU1]>,
59                                   InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>,
60   InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
61                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
62                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
63                                InstrStage<1, [A9_MUX0], 0>,
64                                InstrStage<1, [A9_AGU], 0>,
65                                InstrStage<1, [A9_LSUnit]>], [5]>,
66   //
67   // MVN instructions
68   InstrItinData<IIC_iMVNi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
69                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
70                               [1]>,
71   InstrItinData<IIC_iMVNr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
72                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
73                               [1, 1], [NoBypass, A9_LdBypass]>,
74   InstrItinData<IIC_iMVNsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
75                                InstrStage<2, [A9_ALU0, A9_ALU1]>],
76                               [2, 1]>,
77   InstrItinData<IIC_iMVNsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
78                                InstrStage<3, [A9_ALU0, A9_ALU1]>],
79                               [3, 1, 1]>,
80   //
81   // No operand cycles
82   InstrItinData<IIC_iALUx   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
83                                InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
84   //
85   // Binary Instructions that produce a result
86   InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
87                              InstrStage<1, [A9_ALU0, A9_ALU1]>],
88                             [1, 1], [NoBypass, A9_LdBypass]>,
89   InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
90                              InstrStage<1, [A9_ALU0, A9_ALU1]>],
91                             [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
92   InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
93                              InstrStage<2, [A9_ALU0, A9_ALU1]>],
94                             [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
95   InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
96                              InstrStage<2, [A9_ALU0, A9_ALU1]>],
97                             [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
98   InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
99                              InstrStage<3, [A9_ALU0, A9_ALU1]>],
100                             [3, 1, 1, 1],
101                             [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
102   //
103   // Bitwise Instructions that produce a result
104   InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
105                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
106   InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
107                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
108   InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
109                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
110   InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
111                              InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
112   //
113   // Unary Instructions that produce a result
115   // CLZ, RBIT, etc.
116   InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
117                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
119   // BFC, BFI, UBFX, SBFX
120   InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
121                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
123   //
124   // Zero and sign extension instructions
125   InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
126                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
127   InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
128                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
129   InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
130                              InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
131   //
132   // Compare instructions
133   InstrItinData<IIC_iCMPi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
134                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
135                                [1], [A9_LdBypass]>,
136   InstrItinData<IIC_iCMPr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
137                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
138                                [1, 1], [A9_LdBypass, A9_LdBypass]>,
139   InstrItinData<IIC_iCMPsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
140                                InstrStage<2, [A9_ALU0, A9_ALU1]>],
141                                 [1, 1], [A9_LdBypass, NoBypass]>,
142   InstrItinData<IIC_iCMPsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
143                                InstrStage<3, [A9_ALU0, A9_ALU1]>],
144                               [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
145   //
146   // Test instructions
147   InstrItinData<IIC_iTSTi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
148                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
149   InstrItinData<IIC_iTSTr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
150                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
151   InstrItinData<IIC_iTSTsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
152                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
153   InstrItinData<IIC_iTSTsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
154                                InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
155   //
156   // Move instructions, conditional
157   // FIXME: Correctly model the extra input dep on the destination.
158   InstrItinData<IIC_iCMOVi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
159                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
160   InstrItinData<IIC_iCMOVr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
161                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
162   InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
163                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
164   InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
165                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
166   InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
167                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
168                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
169                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
171   // Integer multiply pipeline
172   //
173   InstrItinData<IIC_iMUL16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
174                                InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
175   InstrItinData<IIC_iMAC16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
176                                InstrStage<2, [A9_ALU0]>],
177                               [3, 1, 1, 1]>,
178   InstrItinData<IIC_iMUL32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
179                                InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
180   InstrItinData<IIC_iMAC32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
181                                InstrStage<2, [A9_ALU0]>],
182                               [4, 1, 1, 1]>,
183   InstrItinData<IIC_iMUL64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
184                                InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
185   InstrItinData<IIC_iMAC64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
186                                InstrStage<3, [A9_ALU0]>],
187                               [4, 5, 1, 1]>,
188   // Integer load pipeline
189   // FIXME: The timings are some rough approximations
190   //
191   // Immediate offset
192   InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
193                                  InstrStage<1, [A9_MUX0], 0>,
194                                  InstrStage<1, [A9_AGU], 0>,
195                                  InstrStage<1, [A9_LSUnit]>],
196                                 [3, 1], [A9_LdBypass]>,
197   InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
198                                  InstrStage<1, [A9_MUX0], 0>,
199                                  InstrStage<2, [A9_AGU], 0>,
200                                  InstrStage<1, [A9_LSUnit]>],
201                                 [4, 1], [A9_LdBypass]>,
202   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
203   InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
204                                  InstrStage<1, [A9_MUX0], 0>,
205                                  InstrStage<2, [A9_AGU], 0>,
206                                  InstrStage<1, [A9_LSUnit]>],
207                                 [3, 3, 1], [A9_LdBypass]>,
208   //
209   // Register offset
210   InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
211                                  InstrStage<1, [A9_MUX0], 0>,
212                                  InstrStage<1, [A9_AGU], 0>,
213                                  InstrStage<1, [A9_LSUnit]>],
214                                 [3, 1, 1], [A9_LdBypass]>,
215   InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
216                                  InstrStage<1, [A9_MUX0], 0>,
217                                  InstrStage<2, [A9_AGU], 0>,
218                                  InstrStage<1, [A9_LSUnit]>],
219                                 [4, 1, 1], [A9_LdBypass]>,
220   InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
221                                  InstrStage<1, [A9_MUX0], 0>,
222                                  InstrStage<2, [A9_AGU], 0>,
223                                  InstrStage<1, [A9_LSUnit]>],
224                                 [3, 3, 1, 1], [A9_LdBypass]>,
225   //
226   // Scaled register offset
227   InstrItinData<IIC_iLoad_si  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
228                                  InstrStage<1, [A9_MUX0], 0>,
229                                  InstrStage<1, [A9_AGU], 0>,
230                                  InstrStage<1, [A9_LSUnit], 0>],
231                                 [4, 1, 1], [A9_LdBypass]>,
232   InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
233                                  InstrStage<1, [A9_MUX0], 0>,
234                                  InstrStage<2, [A9_AGU], 0>,
235                                  InstrStage<1, [A9_LSUnit]>],
236                                 [5, 1, 1], [A9_LdBypass]>,
237   //
238   // Immediate offset with update
239   InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
240                                  InstrStage<1, [A9_MUX0], 0>,
241                                  InstrStage<1, [A9_AGU], 0>,
242                                  InstrStage<1, [A9_LSUnit]>],
243                                 [3, 2, 1], [A9_LdBypass]>,
244   InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
245                                  InstrStage<1, [A9_MUX0], 0>,
246                                  InstrStage<2, [A9_AGU], 0>,
247                                  InstrStage<1, [A9_LSUnit]>],
248                                 [4, 3, 1], [A9_LdBypass]>,
249   //
250   // Register offset with update
251   InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
252                                  InstrStage<1, [A9_MUX0], 0>,
253                                  InstrStage<1, [A9_AGU], 0>,
254                                  InstrStage<1, [A9_LSUnit]>],
255                                 [3, 2, 1, 1], [A9_LdBypass]>,
256   InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
257                                  InstrStage<1, [A9_MUX0], 0>,
258                                  InstrStage<2, [A9_AGU], 0>,
259                                  InstrStage<1, [A9_LSUnit]>],
260                                 [4, 3, 1, 1], [A9_LdBypass]>,
261   InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
262                                  InstrStage<1, [A9_MUX0], 0>,
263                                  InstrStage<2, [A9_AGU], 0>,
264                                  InstrStage<1, [A9_LSUnit]>],
265                                 [3, 3, 1, 1], [A9_LdBypass]>,
266   //
267   // Scaled register offset with update
268   InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
269                                  InstrStage<1, [A9_MUX0], 0>,
270                                  InstrStage<1, [A9_AGU], 0>,
271                                  InstrStage<1, [A9_LSUnit]>],
272                                 [4, 3, 1, 1], [A9_LdBypass]>,
273   InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
274                                   InstrStage<1, [A9_MUX0], 0>,
275                                   InstrStage<2, [A9_AGU], 0>,
276                                   InstrStage<1, [A9_LSUnit]>],
277                                  [5, 4, 1, 1], [A9_LdBypass]>,
278   //
279   // Load multiple, def is the 5th operand.
280   // FIXME: This assumes 3 to 4 registers.
281   InstrItinData<IIC_iLoad_m  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
282                                 InstrStage<1, [A9_MUX0], 0>,
283                                 InstrStage<2, [A9_AGU], 1>,
284                                 InstrStage<2, [A9_LSUnit]>],
285                                [1, 1, 1, 1, 3],
286                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
287                          -1>, // dynamic uops
288   //
289   // Load multiple + update, defs are the 1st and 5th operands.
290   InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
291                                 InstrStage<1, [A9_MUX0], 0>,
292                                 InstrStage<2, [A9_AGU], 1>,
293                                 InstrStage<2, [A9_LSUnit]>],
294                                [2, 1, 1, 1, 3],
295                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
296                          -1>, // dynamic uops
297   //
298   // Load multiple plus branch
299   InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
300                                 InstrStage<1, [A9_MUX0], 0>,
301                                 InstrStage<1, [A9_AGU], 1>,
302                                 InstrStage<2, [A9_LSUnit]>,
303                                 InstrStage<1, [A9_Branch]>],
304                                [1, 2, 1, 1, 3],
305                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
306                          -1>, // dynamic uops
307   //
308   // Pop, def is the 3rd operand.
309   InstrItinData<IIC_iPop  ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
310                                 InstrStage<1, [A9_MUX0], 0>,
311                                 InstrStage<2, [A9_AGU], 1>,
312                                 InstrStage<2, [A9_LSUnit]>],
313                                [1, 1, 3],
314                                [NoBypass, NoBypass, A9_LdBypass],
315                                -1>, // dynamic uops
316   //
317   // Pop + branch, def is the 3rd operand.
318   InstrItinData<IIC_iPop_Br,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
319                                 InstrStage<1, [A9_MUX0], 0>,
320                                 InstrStage<2, [A9_AGU], 1>,
321                                 InstrStage<2, [A9_LSUnit]>,
322                                 InstrStage<1, [A9_Branch]>],
323                                [1, 1, 3],
324                                [NoBypass, NoBypass, A9_LdBypass],
325                                -1>, // dynamic uops
326   //
327   // iLoadi + iALUr for t2LDRpci_pic.
328   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
329                                 InstrStage<1, [A9_MUX0], 0>,
330                                 InstrStage<1, [A9_AGU], 0>,
331                                 InstrStage<1, [A9_LSUnit]>,
332                                 InstrStage<1, [A9_ALU0, A9_ALU1]>],
333                                [2, 1]>,
335   // Integer store pipeline
336   ///
337   // Immediate offset
338   InstrItinData<IIC_iStore_i  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
339                                  InstrStage<1, [A9_MUX0], 0>,
340                                  InstrStage<1, [A9_AGU], 0>,
341                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
342   InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
343                                  InstrStage<1, [A9_MUX0], 0>,
344                                  InstrStage<2, [A9_AGU], 1>,
345                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
346   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
347   InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
348                                  InstrStage<1, [A9_MUX0], 0>,
349                                  InstrStage<2, [A9_AGU], 1>,
350                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
351   //
352   // Register offset
353   InstrItinData<IIC_iStore_r  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
354                                  InstrStage<1, [A9_MUX0], 0>,
355                                  InstrStage<1, [A9_AGU], 0>,
356                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
357   InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
358                                  InstrStage<1, [A9_MUX0], 0>,
359                                  InstrStage<2, [A9_AGU], 1>,
360                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
361   InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
362                                  InstrStage<1, [A9_MUX0], 0>,
363                                  InstrStage<2, [A9_AGU], 1>,
364                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
365   //
366   // Scaled register offset
367   InstrItinData<IIC_iStore_si ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
368                                   InstrStage<1, [A9_MUX0], 0>,
369                                   InstrStage<1, [A9_AGU], 0>,
370                                   InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
371   InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
372                                   InstrStage<1, [A9_MUX0], 0>,
373                                   InstrStage<2, [A9_AGU], 1>,
374                                   InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
375   //
376   // Immediate offset with update
377   InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
378                                   InstrStage<1, [A9_MUX0], 0>,
379                                   InstrStage<1, [A9_AGU], 0>,
380                                   InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>,
381   InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
382                                   InstrStage<1, [A9_MUX0], 0>,
383                                   InstrStage<2, [A9_AGU], 1>,
384                                   InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>,
385   //
386   // Register offset with update
387   InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
388                                   InstrStage<1, [A9_MUX0], 0>,
389                                   InstrStage<1, [A9_AGU], 0>,
390                                   InstrStage<1, [A9_LSUnit]>],
391                                  [2, 1, 1, 1]>,
392   InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
393                                   InstrStage<1, [A9_MUX0], 0>,
394                                   InstrStage<2, [A9_AGU], 1>,
395                                   InstrStage<1, [A9_LSUnit]>],
396                                  [3, 1, 1, 1]>,
397   InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
398                                   InstrStage<1, [A9_MUX0], 0>,
399                                   InstrStage<2, [A9_AGU], 1>,
400                                   InstrStage<1, [A9_LSUnit]>],
401                                  [3, 1, 1, 1]>,
402   //
403   // Scaled register offset with update
404   InstrItinData<IIC_iStore_siu,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
405                                     InstrStage<1, [A9_MUX0], 0>,
406                                     InstrStage<1, [A9_AGU], 0>,
407                                     InstrStage<1, [A9_LSUnit]>],
408                                    [2, 1, 1, 1]>,
409   InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
410                                     InstrStage<1, [A9_MUX0], 0>,
411                                     InstrStage<2, [A9_AGU], 1>,
412                                     InstrStage<1, [A9_LSUnit]>],
413                                    [3, 1, 1, 1]>,
414   //
415   // Store multiple
416   InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
417                                 InstrStage<1, [A9_MUX0], 0>,
418                                 InstrStage<1, [A9_AGU], 0>,
419                                 InstrStage<2, [A9_LSUnit]>],
420                 [], [], -1>, // dynamic uops
421   //
422   // Store multiple + update
423   InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
424                                 InstrStage<1, [A9_MUX0], 0>,
425                                 InstrStage<1, [A9_AGU], 0>,
426                                 InstrStage<2, [A9_LSUnit]>],
427                 [2], [], -1>, // dynamic uops
428   //
429   // Preload
430   InstrItinData<IIC_Preload,   [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
432   // Branch
433   //
434   // no delay slots, so the latency of a branch is unimportant
435   InstrItinData<IIC_Br       , [InstrStage<1, [A9_Issue0], 0>,
436                                 InstrStage<1, [A9_Issue1], 0>,
437                                 InstrStage<1, [A9_Branch]>]>,
439   // VFP and NEON shares the same register file. This means that every VFP
440   // instruction should wait for full completion of the consecutive NEON
441   // instruction and vice-versa. We model this behavior with two artificial FUs:
442   // DRegsVFP and DRegsVFP.
443   //
444   // Every VFP instruction:
445   //  - Acquires DRegsVFP resource for 1 cycle
446   //  - Reserves DRegsN resource for the whole duration (including time to
447   //    register file writeback!).
448   // Every NEON instruction does the same but with FUs swapped.
449   //
450   // Since the reserved FU cannot be acquired, this models precisely
451   // "cross-domain" stalls.
453   // VFP
454   // Issue through integer pipeline, and execute in NEON unit.
456   // FP Special Register to Integer Register File Move
457   InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
458                               InstrStage<1, [A9_MUX0], 0>,
459                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
460                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
461                               InstrStage<1, [A9_NPipe]>],
462                              [1]>,
463   //
464   // Single-precision FP Unary
465   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
466                                InstrStage<1, [A9_MUX0], 0>,
467                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
468                                // Extra latency cycles since wbck is 2 cycles
469                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
470                                InstrStage<1, [A9_NPipe]>],
471                               [1, 1]>,
472   //
473   // Double-precision FP Unary
474   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
475                                InstrStage<1, [A9_MUX0], 0>,
476                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
477                                // Extra latency cycles since wbck is 2 cycles
478                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
479                                InstrStage<1, [A9_NPipe]>],
480                               [1, 1]>,
482   //
483   // Single-precision FP Compare
484   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
485                                InstrStage<1, [A9_MUX0], 0>,
486                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
487                                // Extra latency cycles since wbck is 4 cycles
488                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
489                                InstrStage<1, [A9_NPipe]>],
490                               [1, 1]>,
491   //
492   // Double-precision FP Compare
493   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
494                                InstrStage<1, [A9_MUX0], 0>,
495                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
496                                // Extra latency cycles since wbck is 4 cycles
497                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
498                                InstrStage<1, [A9_NPipe]>],
499                               [1, 1]>,
500   //
501   // Single to Double FP Convert
502   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
503                                InstrStage<1, [A9_MUX0], 0>,
504                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
505                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
506                                InstrStage<1, [A9_NPipe]>],
507                               [4, 1]>,
508   //
509   // Double to Single FP Convert
510   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
511                                InstrStage<1, [A9_MUX0], 0>,
512                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
513                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
514                                InstrStage<1, [A9_NPipe]>],
515                               [4, 1]>,
517   //
518   // Single to Half FP Convert
519   InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
520                                InstrStage<1, [A9_MUX0], 0>,
521                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
522                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
523                                InstrStage<1, [A9_NPipe]>],
524                               [4, 1]>,
525   //
526   // Half to Single FP Convert
527   InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
528                                InstrStage<1, [A9_MUX0], 0>,
529                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
530                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
531                                InstrStage<1, [A9_NPipe]>],
532                               [2, 1]>,
534   //
535   // Single-Precision FP to Integer Convert
536   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
537                                InstrStage<1, [A9_MUX0], 0>,
538                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
539                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
540                                InstrStage<1, [A9_NPipe]>],
541                               [4, 1]>,
542   //
543   // Double-Precision FP to Integer Convert
544   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
545                                InstrStage<1, [A9_MUX0], 0>,
546                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
547                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
548                                InstrStage<1, [A9_NPipe]>],
549                               [4, 1]>,
550   //
551   // Integer to Single-Precision FP Convert
552   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
553                                InstrStage<1, [A9_MUX0], 0>,
554                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
555                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
556                                InstrStage<1, [A9_NPipe]>],
557                               [4, 1]>,
558   //
559   // Integer to Double-Precision FP Convert
560   InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
561                                InstrStage<1, [A9_MUX0], 0>,
562                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
563                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
564                                InstrStage<1, [A9_NPipe]>],
565                               [4, 1]>,
566   //
567   // Single-precision FP ALU
568   InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
569                                InstrStage<1, [A9_MUX0], 0>,
570                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
571                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
572                                InstrStage<1, [A9_NPipe]>],
573                               [4, 1, 1]>,
574   //
575   // Double-precision FP ALU
576   InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
577                                InstrStage<1, [A9_MUX0], 0>,
578                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
579                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
580                                InstrStage<1, [A9_NPipe]>],
581                               [4, 1, 1]>,
582   //
583   // Single-precision FP Multiply
584   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
585                                InstrStage<1, [A9_MUX0], 0>,
586                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
587                                InstrStage<6, [A9_DRegsN],   0, Reserved>,
588                                InstrStage<1, [A9_NPipe]>],
589                               [5, 1, 1]>,
590   //
591   // Double-precision FP Multiply
592   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
593                                InstrStage<1, [A9_MUX0], 0>,
594                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
595                                InstrStage<7, [A9_DRegsN],   0, Reserved>,
596                                InstrStage<2, [A9_NPipe]>],
597                               [6, 1, 1]>,
598   //
599   // Single-precision FP MAC
600   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
601                                InstrStage<1, [A9_MUX0], 0>,
602                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
603                                InstrStage<9, [A9_DRegsN],   0, Reserved>,
604                                InstrStage<1, [A9_NPipe]>],
605                               [8, 1, 1, 1]>,
606   //
607   // Double-precision FP MAC
608   InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
609                                InstrStage<1,  [A9_MUX0], 0>,
610                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
611                                InstrStage<10, [A9_DRegsN],  0, Reserved>,
612                                InstrStage<2,  [A9_NPipe]>],
613                               [9, 1, 1, 1]>,
614   //
615   // Single-precision Fused FP MAC
616   InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
617                                InstrStage<1, [A9_MUX0], 0>,
618                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
619                                InstrStage<9, [A9_DRegsN],   0, Reserved>,
620                                InstrStage<1, [A9_NPipe]>],
621                               [8, 1, 1, 1]>,
622   //
623   // Double-precision Fused FP MAC
624   InstrItinData<IIC_fpFMAC64, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
625                                InstrStage<1,  [A9_MUX0], 0>,
626                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
627                                InstrStage<10, [A9_DRegsN],  0, Reserved>,
628                                InstrStage<2,  [A9_NPipe]>],
629                               [9, 1, 1, 1]>,
630   //
631   // Single-precision FP DIV
632   InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
633                                InstrStage<1,  [A9_MUX0], 0>,
634                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
635                                InstrStage<16, [A9_DRegsN],  0, Reserved>,
636                                InstrStage<10, [A9_NPipe]>],
637                               [15, 1, 1]>,
638   //
639   // Double-precision FP DIV
640   InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
641                                InstrStage<1,  [A9_MUX0], 0>,
642                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
643                                InstrStage<26, [A9_DRegsN],  0, Reserved>,
644                                InstrStage<20, [A9_NPipe]>],
645                               [25, 1, 1]>,
646   //
647   // Single-precision FP SQRT
648   InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
649                                InstrStage<1,  [A9_MUX0], 0>,
650                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
651                                InstrStage<18, [A9_DRegsN],   0, Reserved>,
652                                InstrStage<13, [A9_NPipe]>],
653                               [17, 1]>,
654   //
655   // Double-precision FP SQRT
656   InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
657                                InstrStage<1,  [A9_MUX0], 0>,
658                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
659                                InstrStage<33, [A9_DRegsN],   0, Reserved>,
660                                InstrStage<28, [A9_NPipe]>],
661                               [32, 1]>,
663   //
664   // Integer to Single-precision Move
665   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
666                                InstrStage<1, [A9_MUX0], 0>,
667                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
668                                // Extra 1 latency cycle since wbck is 2 cycles
669                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
670                                InstrStage<1, [A9_NPipe]>],
671                               [1, 1]>,
672   //
673   // Integer to Double-precision Move
674   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
675                                InstrStage<1, [A9_MUX0], 0>,
676                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
677                                // Extra 1 latency cycle since wbck is 2 cycles
678                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
679                                InstrStage<1, [A9_NPipe]>],
680                               [1, 1, 1]>,
681   //
682   // Single-precision to Integer Move
683   //
684   // On A9 move-from-VFP is free to issue with no stall if other VFP
685   // operations are in flight. I assume it still can't dual-issue though.
686   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
687                                InstrStage<1, [A9_MUX0], 0>],
688                               [2, 1]>,
689   //
690   // Double-precision to Integer Move
691   //
692   // On A9 move-from-VFP is free to issue with no stall if other VFP
693   // operations are in flight. I assume it still can't dual-issue though.
694   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
695                                InstrStage<1, [A9_MUX0], 0>],
696                               [2, 1, 1]>,
697   //
698   // Single-precision FP Load
699   InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
700                                InstrStage<1, [A9_MUX0], 0>,
701                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
702                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
703                                InstrStage<1, [A9_NPipe], 0>,
704                                InstrStage<1, [A9_LSUnit]>],
705                               [1, 1]>,
706   //
707   // Double-precision FP Load
708   // FIXME: Result latency is 1 if address is 64-bit aligned.
709   InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
710                                InstrStage<1, [A9_MUX0], 0>,
711                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
712                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
713                                InstrStage<1, [A9_NPipe], 0>,
714                                InstrStage<1, [A9_LSUnit]>],
715                               [2, 1]>,
716   //
717   // FP Load Multiple
718   // FIXME: assumes 2 doubles which requires 2 LS cycles.
719   InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
720                                InstrStage<1, [A9_MUX0], 0>,
721                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
722                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
723                                InstrStage<1, [A9_NPipe], 0>,
724                                InstrStage<2, [A9_LSUnit]>],
725                 [1, 1, 1, 1], [], -1>, // dynamic uops
726   //
727   // FP Load Multiple + update
728   // FIXME: assumes 2 doubles which requires 2 LS cycles.
729   InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
730                                InstrStage<1, [A9_MUX0], 0>,
731                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
732                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
733                                InstrStage<1, [A9_NPipe], 0>,
734                                InstrStage<2, [A9_LSUnit]>],
735                 [2, 1, 1, 1], [], -1>, // dynamic uops
736   //
737   // Single-precision FP Store
738   InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
739                                InstrStage<1, [A9_MUX0], 0>,
740                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
741                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
742                                InstrStage<1, [A9_NPipe], 0>,
743                                InstrStage<1, [A9_LSUnit]>],
744                               [1, 1]>,
745   //
746   // Double-precision FP Store
747   InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
748                                InstrStage<1, [A9_MUX0], 0>,
749                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
750                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
751                                InstrStage<1, [A9_NPipe], 0>,
752                                InstrStage<1, [A9_LSUnit]>],
753                               [1, 1]>,
754   //
755   // FP Store Multiple
756   // FIXME: assumes 2 doubles which requires 2 LS cycles.
757   InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
758                                InstrStage<1, [A9_MUX0], 0>,
759                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
760                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
761                                InstrStage<1, [A9_NPipe], 0>,
762                                InstrStage<2, [A9_LSUnit]>],
763                 [1, 1, 1, 1], [], -1>, // dynamic uops
764   //
765   // FP Store Multiple + update
766   // FIXME: assumes 2 doubles which requires 2 LS cycles.
767   InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
768                                 InstrStage<1, [A9_MUX0], 0>,
769                                 InstrStage<1, [A9_DRegsVFP], 0, Required>,
770                                 InstrStage<2, [A9_DRegsN],   0, Reserved>,
771                                 InstrStage<1, [A9_NPipe], 0>,
772                                 InstrStage<2, [A9_LSUnit]>],
773                 [2, 1, 1, 1], [], -1>, // dynamic uops
774   // NEON
775   // VLD1
776   InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
777                                InstrStage<1, [A9_MUX0], 0>,
778                                InstrStage<1, [A9_DRegsN],   0, Required>,
779                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
780                                InstrStage<1, [A9_NPipe], 0>,
781                                InstrStage<1, [A9_LSUnit]>],
782                               [1, 1]>,
783   // VLD1x2
784   InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
785                                InstrStage<1, [A9_MUX0], 0>,
786                                InstrStage<1, [A9_DRegsN],   0, Required>,
787                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
788                                InstrStage<1, [A9_NPipe], 0>,
789                                InstrStage<1, [A9_LSUnit]>],
790                               [1, 1, 1]>,
791   // VLD1x3
792   InstrItinData<IIC_VLD1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
793                                InstrStage<1, [A9_MUX0], 0>,
794                                InstrStage<1, [A9_DRegsN],   0, Required>,
795                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
796                                InstrStage<2, [A9_NPipe], 0>,
797                                InstrStage<2, [A9_LSUnit]>],
798                               [1, 1, 2, 1]>,
799   // VLD1x4
800   InstrItinData<IIC_VLD1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
801                                InstrStage<1, [A9_MUX0], 0>,
802                                InstrStage<1, [A9_DRegsN],   0, Required>,
803                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
804                                InstrStage<2, [A9_NPipe], 0>,
805                                InstrStage<2, [A9_LSUnit]>],
806                               [1, 1, 2, 2, 1]>,
807   // VLD1u
808   InstrItinData<IIC_VLD1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
809                                InstrStage<1, [A9_MUX0], 0>,
810                                InstrStage<1, [A9_DRegsN],   0, Required>,
811                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
812                                InstrStage<1, [A9_NPipe], 0>,
813                                InstrStage<1, [A9_LSUnit]>],
814                               [1, 2, 1]>,
815   // VLD1x2u
816   InstrItinData<IIC_VLD1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
817                                InstrStage<1, [A9_MUX0], 0>,
818                                InstrStage<1, [A9_DRegsN],   0, Required>,
819                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
820                                InstrStage<1, [A9_NPipe], 0>,
821                                InstrStage<1, [A9_LSUnit]>],
822                               [1, 1, 2, 1]>,
823   // VLD1x3u
824   InstrItinData<IIC_VLD1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
825                                InstrStage<1, [A9_MUX0], 0>,
826                                InstrStage<1, [A9_DRegsN],   0, Required>,
827                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
828                                InstrStage<2, [A9_NPipe], 0>,
829                                InstrStage<2, [A9_LSUnit]>],
830                               [1, 1, 2, 2, 1]>,
831   // VLD1x4u
832   InstrItinData<IIC_VLD1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
833                                InstrStage<1, [A9_MUX0], 0>,
834                                InstrStage<1, [A9_DRegsN],   0, Required>,
835                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
836                                InstrStage<2, [A9_NPipe], 0>,
837                                InstrStage<2, [A9_LSUnit]>],
838                               [1, 1, 2, 2, 2, 1]>,
839   //
840   // VLD1ln
841   InstrItinData<IIC_VLD1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
842                                InstrStage<1, [A9_MUX0], 0>,
843                                InstrStage<1, [A9_DRegsN],   0, Required>,
844                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
845                                InstrStage<2, [A9_NPipe], 0>,
846                                InstrStage<2, [A9_LSUnit]>],
847                               [3, 1, 1, 1]>,
848   //
849   // VLD1lnu
850   InstrItinData<IIC_VLD1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
851                                InstrStage<1, [A9_MUX0], 0>,
852                                InstrStage<1, [A9_DRegsN],   0, Required>,
853                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
854                                InstrStage<2, [A9_NPipe], 0>,
855                                InstrStage<2, [A9_LSUnit]>],
856                               [3, 2, 1, 1, 1, 1]>,
857   //
858   // VLD1dup
859   InstrItinData<IIC_VLD1dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
860                                InstrStage<1, [A9_MUX0], 0>,
861                                InstrStage<1, [A9_DRegsN],   0, Required>,
862                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
863                                InstrStage<1, [A9_NPipe], 0>,
864                                InstrStage<1, [A9_LSUnit]>],
865                               [2, 1]>,
866   //
867   // VLD1dupu
868   InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
869                                InstrStage<1, [A9_MUX0], 0>,
870                                InstrStage<1, [A9_DRegsN],   0, Required>,
871                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
872                                InstrStage<1, [A9_NPipe], 0>,
873                                InstrStage<1, [A9_LSUnit]>],
874                               [2, 2, 1, 1]>,
875   //
876   // VLD2
877   InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
878                                InstrStage<1, [A9_MUX0], 0>,
879                                InstrStage<1, [A9_DRegsN],   0, Required>,
880                                // Extra latency cycles since wbck is 7 cycles
881                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
882                                InstrStage<1, [A9_NPipe], 0>,
883                                InstrStage<1, [A9_LSUnit]>],
884                               [2, 2, 1]>,
885   //
886   // VLD2x2
887   InstrItinData<IIC_VLD2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
888                                InstrStage<1, [A9_MUX0], 0>,
889                                InstrStage<1, [A9_DRegsN],   0, Required>,
890                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
891                                InstrStage<2, [A9_NPipe], 0>,
892                                InstrStage<2, [A9_LSUnit]>],
893                               [2, 3, 2, 3, 1]>,
894   //
895   // VLD2ln
896   InstrItinData<IIC_VLD2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
897                                InstrStage<1, [A9_MUX0], 0>,
898                                InstrStage<1, [A9_DRegsN],   0, Required>,
899                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
900                                InstrStage<2, [A9_NPipe], 0>,
901                                InstrStage<2, [A9_LSUnit]>],
902                               [3, 3, 1, 1, 1, 1]>,
903   //
904   // VLD2u
905   InstrItinData<IIC_VLD2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
906                                InstrStage<1, [A9_MUX0], 0>,
907                                InstrStage<1, [A9_DRegsN],   0, Required>,
908                                // Extra latency cycles since wbck is 7 cycles
909                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
910                                InstrStage<1, [A9_NPipe], 0>,
911                                InstrStage<1, [A9_LSUnit]>],
912                               [2, 2, 2, 1, 1, 1]>,
913   //
914   // VLD2x2u
915   InstrItinData<IIC_VLD2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
916                                InstrStage<1, [A9_MUX0], 0>,
917                                InstrStage<1, [A9_DRegsN],   0, Required>,
918                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
919                                InstrStage<2, [A9_NPipe], 0>,
920                                InstrStage<2, [A9_LSUnit]>],
921                               [2, 3, 2, 3, 2, 1]>,
922   //
923   // VLD2lnu
924   InstrItinData<IIC_VLD2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
925                                InstrStage<1, [A9_MUX0], 0>,
926                                InstrStage<1, [A9_DRegsN],   0, Required>,
927                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
928                                InstrStage<2, [A9_NPipe], 0>,
929                                InstrStage<2, [A9_LSUnit]>],
930                               [3, 3, 2, 1, 1, 1, 1, 1]>,
931   //
932   // VLD2dup
933   InstrItinData<IIC_VLD2dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
934                                InstrStage<1, [A9_MUX0], 0>,
935                                InstrStage<1, [A9_DRegsN],   0, Required>,
936                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
937                                InstrStage<1, [A9_NPipe], 0>,
938                                InstrStage<1, [A9_LSUnit]>],
939                               [2, 2, 1]>,
940   //
941   // VLD2dupu
942   InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
943                                InstrStage<1, [A9_MUX0], 0>,
944                                InstrStage<1, [A9_DRegsN],   0, Required>,
945                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
946                                InstrStage<1, [A9_NPipe], 0>,
947                                InstrStage<1, [A9_LSUnit]>],
948                               [2, 2, 2, 1, 1]>,
949   //
950   // VLD3
951   InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
952                                InstrStage<1, [A9_MUX0], 0>,
953                                InstrStage<1, [A9_DRegsN],   0, Required>,
954                                InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
955                                InstrStage<3, [A9_NPipe], 0>,
956                                InstrStage<3, [A9_LSUnit]>],
957                               [3, 3, 4, 1]>,
958   //
959   // VLD3ln
960   InstrItinData<IIC_VLD3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
961                                InstrStage<1, [A9_MUX0], 0>,
962                                InstrStage<1, [A9_DRegsN],   0, Required>,
963                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
964                                InstrStage<5, [A9_NPipe], 0>,
965                                InstrStage<5, [A9_LSUnit]>],
966                               [5, 5, 6, 1, 1, 1, 1, 2]>,
967   //
968   // VLD3u
969   InstrItinData<IIC_VLD3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
970                                InstrStage<1, [A9_MUX0], 0>,
971                                InstrStage<1, [A9_DRegsN],   0, Required>,
972                                InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
973                                InstrStage<3, [A9_NPipe], 0>,
974                                InstrStage<3, [A9_LSUnit]>],
975                               [3, 3, 4, 2, 1]>,
976   //
977   // VLD3lnu
978   InstrItinData<IIC_VLD3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
979                                InstrStage<1, [A9_MUX0], 0>,
980                                InstrStage<1, [A9_DRegsN],   0, Required>,
981                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
982                                InstrStage<5, [A9_NPipe], 0>,
983                                InstrStage<5, [A9_LSUnit]>],
984                               [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
985   //
986   // VLD3dup
987   InstrItinData<IIC_VLD3dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
988                                InstrStage<1, [A9_MUX0], 0>,
989                                InstrStage<1, [A9_DRegsN],   0, Required>,
990                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
991                                InstrStage<3, [A9_NPipe], 0>,
992                                InstrStage<3, [A9_LSUnit]>],
993                               [3, 3, 4, 1]>,
994   //
995   // VLD3dupu
996   InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
997                                InstrStage<1, [A9_MUX0], 0>,
998                                InstrStage<1, [A9_DRegsN],   0, Required>,
999                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1000                                InstrStage<3, [A9_NPipe], 0>,
1001                                InstrStage<3, [A9_LSUnit]>],
1002                               [3, 3, 4, 2, 1, 1]>,
1003   //
1004   // VLD4
1005   InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1006                                InstrStage<1, [A9_MUX0], 0>,
1007                                InstrStage<1, [A9_DRegsN],   0, Required>,
1008                                InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
1009                                InstrStage<3, [A9_NPipe], 0>,
1010                                InstrStage<3, [A9_LSUnit]>],
1011                               [3, 3, 4, 4, 1]>,
1012   //
1013   // VLD4ln
1014   InstrItinData<IIC_VLD4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1015                                InstrStage<1, [A9_MUX0], 0>,
1016                                InstrStage<1, [A9_DRegsN],   0, Required>,
1017                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
1018                                InstrStage<4, [A9_NPipe], 0>,
1019                                InstrStage<4, [A9_LSUnit]>],
1020                               [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
1021   //
1022   // VLD4u
1023   InstrItinData<IIC_VLD4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1024                                InstrStage<1, [A9_MUX0], 0>,
1025                                InstrStage<1, [A9_DRegsN],   0, Required>,
1026                                InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
1027                                InstrStage<3, [A9_NPipe], 0>,
1028                                InstrStage<3, [A9_LSUnit]>],
1029                               [3, 3, 4, 4, 2, 1]>,
1030   //
1031   // VLD4lnu
1032   InstrItinData<IIC_VLD4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1033                                InstrStage<1, [A9_MUX0], 0>,
1034                                InstrStage<1, [A9_DRegsN],   0, Required>,
1035                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
1036                                InstrStage<4, [A9_NPipe], 0>,
1037                                InstrStage<4, [A9_LSUnit]>],
1038                               [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
1039   //
1040   // VLD4dup
1041   InstrItinData<IIC_VLD4dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1042                                InstrStage<1, [A9_MUX0], 0>,
1043                                InstrStage<1, [A9_DRegsN],   0, Required>,
1044                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1045                                InstrStage<2, [A9_NPipe], 0>,
1046                                InstrStage<2, [A9_LSUnit]>],
1047                               [2, 2, 3, 3, 1]>,
1048   //
1049   // VLD4dupu
1050   InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1051                                InstrStage<1, [A9_MUX0], 0>,
1052                                InstrStage<1, [A9_DRegsN],   0, Required>,
1053                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1054                                InstrStage<2, [A9_NPipe], 0>,
1055                                InstrStage<2, [A9_LSUnit]>],
1056                               [2, 2, 3, 3, 2, 1, 1]>,
1057   //
1058   // VST1
1059   InstrItinData<IIC_VST1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1060                                InstrStage<1, [A9_MUX0], 0>,
1061                                InstrStage<1, [A9_DRegsN],   0, Required>,
1062                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1063                                InstrStage<1, [A9_NPipe], 0>,
1064                                InstrStage<1, [A9_LSUnit]>],
1065                               [1, 1, 1]>,
1066   //
1067   // VST1x2
1068   InstrItinData<IIC_VST1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1069                                InstrStage<1, [A9_MUX0], 0>,
1070                                InstrStage<1, [A9_DRegsN],   0, Required>,
1071                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1072                                InstrStage<1, [A9_NPipe], 0>,
1073                                InstrStage<1, [A9_LSUnit]>],
1074                               [1, 1, 1, 1]>,
1075   //
1076   // VST1x3
1077   InstrItinData<IIC_VST1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1078                                InstrStage<1, [A9_MUX0], 0>,
1079                                InstrStage<1, [A9_DRegsN],   0, Required>,
1080                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1081                                InstrStage<2, [A9_NPipe], 0>,
1082                                InstrStage<2, [A9_LSUnit]>],
1083                               [1, 1, 1, 1, 2]>,
1084   //
1085   // VST1x4
1086   InstrItinData<IIC_VST1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1087                                InstrStage<1, [A9_MUX0], 0>,
1088                                InstrStage<1, [A9_DRegsN],   0, Required>,
1089                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1090                                InstrStage<2, [A9_NPipe], 0>,
1091                                InstrStage<2, [A9_LSUnit]>],
1092                               [1, 1, 1, 1, 2, 2]>,
1093   //
1094   // VST1u
1095   InstrItinData<IIC_VST1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1096                                InstrStage<1, [A9_MUX0], 0>,
1097                                InstrStage<1, [A9_DRegsN],   0, Required>,
1098                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1099                                InstrStage<1, [A9_NPipe], 0>,
1100                                InstrStage<1, [A9_LSUnit]>],
1101                               [2, 1, 1, 1, 1]>,
1102   //
1103   // VST1x2u
1104   InstrItinData<IIC_VST1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1105                                InstrStage<1, [A9_MUX0], 0>,
1106                                InstrStage<1, [A9_DRegsN],   0, Required>,
1107                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1108                                InstrStage<1, [A9_NPipe], 0>,
1109                                InstrStage<1, [A9_LSUnit]>],
1110                               [2, 1, 1, 1, 1, 1]>,
1111   //
1112   // VST1x3u
1113   InstrItinData<IIC_VST1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1114                                InstrStage<1, [A9_MUX0], 0>,
1115                                InstrStage<1, [A9_DRegsN],   0, Required>,
1116                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1117                                InstrStage<2, [A9_NPipe], 0>,
1118                                InstrStage<2, [A9_LSUnit]>],
1119                               [2, 1, 1, 1, 1, 1, 2]>,
1120   //
1121   // VST1x4u
1122   InstrItinData<IIC_VST1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1123                                InstrStage<1, [A9_MUX0], 0>,
1124                                InstrStage<1, [A9_DRegsN],   0, Required>,
1125                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1126                                InstrStage<2, [A9_NPipe], 0>,
1127                                InstrStage<2, [A9_LSUnit]>],
1128                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1129   //
1130   // VST1ln
1131   InstrItinData<IIC_VST1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1132                                InstrStage<1, [A9_MUX0], 0>,
1133                                InstrStage<1, [A9_DRegsN],   0, Required>,
1134                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1135                                InstrStage<1, [A9_NPipe], 0>,
1136                                InstrStage<1, [A9_LSUnit]>],
1137                               [1, 1, 1]>,
1138   //
1139   // VST1lnu
1140   InstrItinData<IIC_VST1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1141                                InstrStage<1, [A9_MUX0], 0>,
1142                                InstrStage<1, [A9_DRegsN],   0, Required>,
1143                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1144                                InstrStage<1, [A9_NPipe], 0>,
1145                                InstrStage<1, [A9_LSUnit]>],
1146                               [2, 1, 1, 1, 1]>,
1147   //
1148   // VST2
1149   InstrItinData<IIC_VST2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1150                                InstrStage<1, [A9_MUX0], 0>,
1151                                InstrStage<1, [A9_DRegsN],   0, Required>,
1152                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1153                                InstrStage<1, [A9_NPipe], 0>,
1154                                InstrStage<1, [A9_LSUnit]>],
1155                               [1, 1, 1, 1]>,
1156   //
1157   // VST2x2
1158   InstrItinData<IIC_VST2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1159                                InstrStage<1, [A9_MUX0], 0>,
1160                                InstrStage<1, [A9_DRegsN],   0, Required>,
1161                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1162                                InstrStage<3, [A9_NPipe], 0>,
1163                                InstrStage<3, [A9_LSUnit]>],
1164                               [1, 1, 1, 1, 2, 2]>,
1165   //
1166   // VST2u
1167   InstrItinData<IIC_VST2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1168                                InstrStage<1, [A9_MUX0], 0>,
1169                                InstrStage<1, [A9_DRegsN],   0, Required>,
1170                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1171                                InstrStage<1, [A9_NPipe], 0>,
1172                                InstrStage<1, [A9_LSUnit]>],
1173                               [2, 1, 1, 1, 1, 1]>,
1174   //
1175   // VST2x2u
1176   InstrItinData<IIC_VST2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1177                                InstrStage<1, [A9_MUX0], 0>,
1178                                InstrStage<1, [A9_DRegsN],   0, Required>,
1179                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1180                                InstrStage<3, [A9_NPipe], 0>,
1181                                InstrStage<3, [A9_LSUnit]>],
1182                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1183   //
1184   // VST2ln
1185   InstrItinData<IIC_VST2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1186                                InstrStage<1, [A9_MUX0], 0>,
1187                                InstrStage<1, [A9_DRegsN],   0, Required>,
1188                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1189                                InstrStage<1, [A9_NPipe], 0>,
1190                                InstrStage<1, [A9_LSUnit]>],
1191                               [1, 1, 1, 1]>,
1192   //
1193   // VST2lnu
1194   InstrItinData<IIC_VST2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1195                                InstrStage<1, [A9_MUX0], 0>,
1196                                InstrStage<1, [A9_DRegsN],   0, Required>,
1197                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1198                                InstrStage<1, [A9_NPipe], 0>,
1199                                InstrStage<1, [A9_LSUnit]>],
1200                               [2, 1, 1, 1, 1, 1]>,
1201   //
1202   // VST3
1203   InstrItinData<IIC_VST3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1204                                InstrStage<1, [A9_MUX0], 0>,
1205                                InstrStage<1, [A9_DRegsN],   0, Required>,
1206                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1207                                InstrStage<2, [A9_NPipe], 0>,
1208                                InstrStage<2, [A9_LSUnit]>],
1209                               [1, 1, 1, 1, 2]>,
1210   //
1211   // VST3u
1212   InstrItinData<IIC_VST3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1213                                InstrStage<1, [A9_MUX0], 0>,
1214                                InstrStage<1, [A9_DRegsN],   0, Required>,
1215                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1216                                InstrStage<2, [A9_NPipe], 0>,
1217                                InstrStage<2, [A9_LSUnit]>],
1218                               [2, 1, 1, 1, 1, 1, 2]>,
1219   //
1220   // VST3ln
1221   InstrItinData<IIC_VST3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1222                                InstrStage<1, [A9_MUX0], 0>,
1223                                InstrStage<1, [A9_DRegsN],   0, Required>,
1224                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1225                                InstrStage<3, [A9_NPipe], 0>,
1226                                InstrStage<3, [A9_LSUnit]>],
1227                               [1, 1, 1, 1, 2]>,
1228   //
1229   // VST3lnu
1230   InstrItinData<IIC_VST3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1231                                InstrStage<1, [A9_MUX0], 0>,
1232                                InstrStage<1, [A9_DRegsN],   0, Required>,
1233                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1234                                InstrStage<3, [A9_NPipe], 0>,
1235                                InstrStage<3, [A9_LSUnit]>],
1236                               [2, 1, 1, 1, 1, 1, 2]>,
1237   //
1238   // VST4
1239   InstrItinData<IIC_VST4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1240                                InstrStage<1, [A9_MUX0], 0>,
1241                                InstrStage<1, [A9_DRegsN],   0, Required>,
1242                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1243                                InstrStage<2, [A9_NPipe], 0>,
1244                                InstrStage<2, [A9_LSUnit]>],
1245                               [1, 1, 1, 1, 2, 2]>,
1246   //
1247   // VST4u
1248   InstrItinData<IIC_VST4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1249                                InstrStage<1, [A9_MUX0], 0>,
1250                                InstrStage<1, [A9_DRegsN],   0, Required>,
1251                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1252                                InstrStage<2, [A9_NPipe], 0>,
1253                                InstrStage<2, [A9_LSUnit]>],
1254                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1255   //
1256   // VST4ln
1257   InstrItinData<IIC_VST4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1258                                InstrStage<1, [A9_MUX0], 0>,
1259                                InstrStage<1, [A9_DRegsN],   0, Required>,
1260                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1261                                InstrStage<2, [A9_NPipe], 0>,
1262                                InstrStage<2, [A9_LSUnit]>],
1263                               [1, 1, 1, 1, 2, 2]>,
1264   //
1265   // VST4lnu
1266   InstrItinData<IIC_VST4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1267                                InstrStage<1, [A9_MUX0], 0>,
1268                                InstrStage<1, [A9_DRegsN],   0, Required>,
1269                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1270                                InstrStage<2, [A9_NPipe], 0>,
1271                                InstrStage<2, [A9_LSUnit]>],
1272                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1274   //
1275   // Double-register Integer Unary
1276   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1277                                InstrStage<1, [A9_MUX0], 0>,
1278                                InstrStage<1, [A9_DRegsN],   0, Required>,
1279                                // Extra latency cycles since wbck is 6 cycles
1280                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1281                                InstrStage<1, [A9_NPipe]>],
1282                               [4, 2]>,
1283   //
1284   // Quad-register Integer Unary
1285   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1286                                InstrStage<1, [A9_MUX0], 0>,
1287                                InstrStage<1, [A9_DRegsN],   0, Required>,
1288                                // Extra latency cycles since wbck is 6 cycles
1289                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1290                                InstrStage<1, [A9_NPipe]>],
1291                               [4, 2]>,
1292   //
1293   // Double-register Integer Q-Unary
1294   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1295                                InstrStage<1, [A9_MUX0], 0>,
1296                                InstrStage<1, [A9_DRegsN],   0, Required>,
1297                                // Extra latency cycles since wbck is 6 cycles
1298                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1299                                InstrStage<1, [A9_NPipe]>],
1300                               [4, 1]>,
1301   //
1302   // Quad-register Integer CountQ-Unary
1303   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1304                                InstrStage<1, [A9_MUX0], 0>,
1305                                InstrStage<1, [A9_DRegsN],   0, Required>,
1306                                // Extra latency cycles since wbck is 6 cycles
1307                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1308                                InstrStage<1, [A9_NPipe]>],
1309                               [4, 1]>,
1310   //
1311   // Double-register Integer Binary
1312   InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1313                                InstrStage<1, [A9_MUX0], 0>,
1314                                InstrStage<1, [A9_DRegsN],   0, Required>,
1315                                // Extra latency cycles since wbck is 6 cycles
1316                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1317                                InstrStage<1, [A9_NPipe]>],
1318                               [3, 2, 2]>,
1319   //
1320   // Quad-register Integer Binary
1321   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1322                                InstrStage<1, [A9_MUX0], 0>,
1323                                InstrStage<1, [A9_DRegsN],   0, Required>,
1324                                // Extra latency cycles since wbck is 6 cycles
1325                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1326                                InstrStage<1, [A9_NPipe]>],
1327                               [3, 2, 2]>,
1328   //
1329   // Double-register Integer Subtract
1330   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1331                                InstrStage<1, [A9_MUX0], 0>,
1332                                InstrStage<1, [A9_DRegsN],   0, Required>,
1333                                // Extra latency cycles since wbck is 6 cycles
1334                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1335                                InstrStage<1, [A9_NPipe]>],
1336                               [3, 2, 1]>,
1337   //
1338   // Quad-register Integer Subtract
1339   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1340                                InstrStage<1, [A9_MUX0], 0>,
1341                                InstrStage<1, [A9_DRegsN],   0, Required>,
1342                                // Extra latency cycles since wbck is 6 cycles
1343                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1344                                InstrStage<1, [A9_NPipe]>],
1345                               [3, 2, 1]>,
1346   //
1347   // Double-register Integer Shift
1348   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1349                                InstrStage<1, [A9_MUX0], 0>,
1350                                InstrStage<1, [A9_DRegsN],   0, Required>,
1351                                // Extra latency cycles since wbck is 6 cycles
1352                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1353                                InstrStage<1, [A9_NPipe]>],
1354                               [3, 1, 1]>,
1355   //
1356   // Quad-register Integer Shift
1357   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1358                                InstrStage<1, [A9_MUX0], 0>,
1359                                InstrStage<1, [A9_DRegsN],   0, Required>,
1360                                // Extra latency cycles since wbck is 6 cycles
1361                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1362                                InstrStage<1, [A9_NPipe]>],
1363                               [3, 1, 1]>,
1364   //
1365   // Double-register Integer Shift (4 cycle)
1366   InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1367                                InstrStage<1, [A9_MUX0], 0>,
1368                                InstrStage<1, [A9_DRegsN],   0, Required>,
1369                                // Extra latency cycles since wbck is 6 cycles
1370                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1371                                InstrStage<1, [A9_NPipe]>],
1372                               [4, 1, 1]>,
1373   //
1374   // Quad-register Integer Shift (4 cycle)
1375   InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1376                                InstrStage<1, [A9_MUX0], 0>,
1377                                InstrStage<1, [A9_DRegsN],   0, Required>,
1378                                // Extra latency cycles since wbck is 6 cycles
1379                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1380                                InstrStage<1, [A9_NPipe]>],
1381                               [4, 1, 1]>,
1382   //
1383   // Double-register Integer Binary (4 cycle)
1384   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1385                                InstrStage<1, [A9_MUX0], 0>,
1386                                InstrStage<1, [A9_DRegsN],   0, Required>,
1387                                // Extra latency cycles since wbck is 6 cycles
1388                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1389                                InstrStage<1, [A9_NPipe]>],
1390                               [4, 2, 2]>,
1391   //
1392   // Quad-register Integer Binary (4 cycle)
1393   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1394                                InstrStage<1, [A9_MUX0], 0>,
1395                                InstrStage<1, [A9_DRegsN],   0, Required>,
1396                                // Extra latency cycles since wbck is 6 cycles
1397                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1398                                InstrStage<1, [A9_NPipe]>],
1399                               [4, 2, 2]>,
1400   //
1401   // Double-register Integer Subtract (4 cycle)
1402   InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1403                                InstrStage<1, [A9_MUX0], 0>,
1404                                InstrStage<1, [A9_DRegsN],   0, Required>,
1405                                // Extra latency cycles since wbck is 6 cycles
1406                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1407                                InstrStage<1, [A9_NPipe]>],
1408                               [4, 2, 1]>,
1409   //
1410   // Quad-register Integer Subtract (4 cycle)
1411   InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1412                                InstrStage<1, [A9_MUX0], 0>,
1413                                InstrStage<1, [A9_DRegsN],   0, Required>,
1414                                // Extra latency cycles since wbck is 6 cycles
1415                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1416                                InstrStage<1, [A9_NPipe]>],
1417                               [4, 2, 1]>,
1419   //
1420   // Double-register Integer Count
1421   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1422                                InstrStage<1, [A9_MUX0], 0>,
1423                                InstrStage<1, [A9_DRegsN],   0, Required>,
1424                                // Extra latency cycles since wbck is 6 cycles
1425                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1426                                InstrStage<1, [A9_NPipe]>],
1427                               [3, 2, 2]>,
1428   //
1429   // Quad-register Integer Count
1430   // Result written in N3, but that is relative to the last cycle of multicycle,
1431   // so we use 4 for those cases
1432   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1433                                InstrStage<1, [A9_MUX0], 0>,
1434                                InstrStage<1, [A9_DRegsN],   0, Required>,
1435                                // Extra latency cycles since wbck is 7 cycles
1436                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1437                                InstrStage<2, [A9_NPipe]>],
1438                               [4, 2, 2]>,
1439   //
1440   // Double-register Absolute Difference and Accumulate
1441   InstrItinData<IIC_VABAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1442                                InstrStage<1, [A9_MUX0], 0>,
1443                                InstrStage<1, [A9_DRegsN],   0, Required>,
1444                                // Extra latency cycles since wbck is 6 cycles
1445                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1446                                InstrStage<1, [A9_NPipe]>],
1447                               [6, 3, 2, 1]>,
1448   //
1449   // Quad-register Absolute Difference and Accumulate
1450   InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1451                                InstrStage<1, [A9_MUX0], 0>,
1452                                InstrStage<1, [A9_DRegsN],   0, Required>,
1453                                // Extra latency cycles since wbck is 6 cycles
1454                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1455                                InstrStage<2, [A9_NPipe]>],
1456                               [6, 3, 2, 1]>,
1457   //
1458   // Double-register Integer Pair Add Long
1459   InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1460                                InstrStage<1, [A9_MUX0], 0>,
1461                                InstrStage<1, [A9_DRegsN],   0, Required>,
1462                                // Extra latency cycles since wbck is 6 cycles
1463                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1464                                InstrStage<1, [A9_NPipe]>],
1465                               [6, 3, 1]>,
1466   //
1467   // Quad-register Integer Pair Add Long
1468   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1469                                InstrStage<1, [A9_MUX0], 0>,
1470                                InstrStage<1, [A9_DRegsN],   0, Required>,
1471                                // Extra latency cycles since wbck is 6 cycles
1472                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1473                                InstrStage<2, [A9_NPipe]>],
1474                               [6, 3, 1]>,
1476   //
1477   // Double-register Integer Multiply (.8, .16)
1478   InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1479                                InstrStage<1, [A9_MUX0], 0>,
1480                                InstrStage<1, [A9_DRegsN],   0, Required>,
1481                                // Extra latency cycles since wbck is 6 cycles
1482                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1483                                InstrStage<1, [A9_NPipe]>],
1484                               [6, 2, 2]>,
1485   //
1486   // Quad-register Integer Multiply (.8, .16)
1487   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1488                                InstrStage<1, [A9_MUX0], 0>,
1489                                InstrStage<1, [A9_DRegsN],   0, Required>,
1490                                // Extra latency cycles since wbck is 7 cycles
1491                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1492                                InstrStage<2, [A9_NPipe]>],
1493                               [7, 2, 2]>,
1495   //
1496   // Double-register Integer Multiply (.32)
1497   InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1498                                InstrStage<1, [A9_MUX0], 0>,
1499                                InstrStage<1, [A9_DRegsN],   0, Required>,
1500                                // Extra latency cycles since wbck is 7 cycles
1501                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1502                                InstrStage<2, [A9_NPipe]>],
1503                               [7, 2, 1]>,
1504   //
1505   // Quad-register Integer Multiply (.32)
1506   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1507                                InstrStage<1, [A9_MUX0], 0>,
1508                                InstrStage<1, [A9_DRegsN],   0, Required>,
1509                                // Extra latency cycles since wbck is 9 cycles
1510                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1511                                InstrStage<4, [A9_NPipe]>],
1512                               [9, 2, 1]>,
1513   //
1514   // Double-register Integer Multiply-Accumulate (.8, .16)
1515   InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1516                                InstrStage<1, [A9_MUX0], 0>,
1517                                InstrStage<1, [A9_DRegsN],   0, Required>,
1518                                // Extra latency cycles since wbck is 6 cycles
1519                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1520                                InstrStage<1, [A9_NPipe]>],
1521                               [6, 3, 2, 2]>,
1522   //
1523   // Double-register Integer Multiply-Accumulate (.32)
1524   InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1525                                InstrStage<1, [A9_MUX0], 0>,
1526                                InstrStage<1, [A9_DRegsN],   0, Required>,
1527                                // Extra latency cycles since wbck is 7 cycles
1528                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1529                                InstrStage<2, [A9_NPipe]>],
1530                               [7, 3, 2, 1]>,
1531   //
1532   // Quad-register Integer Multiply-Accumulate (.8, .16)
1533   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1534                                InstrStage<1, [A9_MUX0], 0>,
1535                                InstrStage<1, [A9_DRegsN],   0, Required>,
1536                                // Extra latency cycles since wbck is 7 cycles
1537                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1538                                InstrStage<2, [A9_NPipe]>],
1539                               [7, 3, 2, 2]>,
1540   //
1541   // Quad-register Integer Multiply-Accumulate (.32)
1542   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1543                                InstrStage<1, [A9_MUX0], 0>,
1544                                InstrStage<1, [A9_DRegsN],   0, Required>,
1545                                // Extra latency cycles since wbck is 9 cycles
1546                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1547                                InstrStage<4, [A9_NPipe]>],
1548                               [9, 3, 2, 1]>,
1550   //
1551   // Move
1552   InstrItinData<IIC_VMOV,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1553                                InstrStage<1, [A9_MUX0], 0>,
1554                                InstrStage<1, [A9_DRegsN],   0, Required>,
1555                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1556                                InstrStage<1, [A9_NPipe]>],
1557                               [1,1]>,
1558   //
1559   // Move Immediate
1560   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1561                                InstrStage<1, [A9_MUX0], 0>,
1562                                InstrStage<1, [A9_DRegsN],   0, Required>,
1563                                // Extra latency cycles since wbck is 6 cycles
1564                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1565                                InstrStage<1, [A9_NPipe]>],
1566                               [3]>,
1567   //
1568   // Double-register Permute Move
1569   InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1570                                InstrStage<1, [A9_MUX0], 0>,
1571                                InstrStage<1, [A9_DRegsN],   0, Required>,
1572                                // Extra latency cycles since wbck is 6 cycles
1573                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1574                                InstrStage<1, [A9_NPipe]>],
1575                               [2, 1]>,
1576   //
1577   // Quad-register Permute Move
1578   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1579                                InstrStage<1, [A9_MUX0], 0>,
1580                                InstrStage<1, [A9_DRegsN],   0, Required>,
1581                                // Extra latency cycles since wbck is 6 cycles
1582                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1583                                InstrStage<1, [A9_NPipe]>],
1584                               [2, 1]>,
1585   //
1586   // Integer to Single-precision Move
1587   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1588                                InstrStage<1, [A9_MUX0], 0>,
1589                                InstrStage<1, [A9_DRegsN],   0, Required>,
1590                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1591                                InstrStage<1, [A9_NPipe]>],
1592                               [1, 1]>,
1593   //
1594   // Integer to Double-precision Move
1595   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1596                                InstrStage<1, [A9_MUX0], 0>,
1597                                InstrStage<1, [A9_DRegsN],   0, Required>,
1598                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1599                                InstrStage<1, [A9_NPipe]>],
1600                               [1, 1, 1]>,
1601   //
1602   // Single-precision to Integer Move
1603   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1604                                InstrStage<1, [A9_MUX0], 0>,
1605                                InstrStage<1, [A9_DRegsN],   0, Required>,
1606                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1607                                InstrStage<1, [A9_NPipe]>],
1608                               [2, 1]>,
1609   //
1610   // Double-precision to Integer Move
1611   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1612                                InstrStage<1, [A9_MUX0], 0>,
1613                                InstrStage<1, [A9_DRegsN],   0, Required>,
1614                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1615                                InstrStage<1, [A9_NPipe]>],
1616                               [2, 2, 1]>,
1617   //
1618   // Integer to Lane Move
1619   InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1620                                InstrStage<1, [A9_MUX0], 0>,
1621                                InstrStage<1, [A9_DRegsN],   0, Required>,
1622                                InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
1623                                InstrStage<2, [A9_NPipe]>],
1624                               [3, 1, 1]>,
1626   //
1627   // Vector narrow move
1628   InstrItinData<IIC_VMOVN,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1629                                InstrStage<1, [A9_MUX0], 0>,
1630                                InstrStage<1, [A9_DRegsN],   0, Required>,
1631                                // Extra latency cycles since wbck is 6 cycles
1632                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1633                                InstrStage<1, [A9_NPipe]>],
1634                               [3, 1]>,
1635   //
1636   // Double-register FP Unary
1637   InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1638                                InstrStage<1, [A9_MUX0], 0>,
1639                                InstrStage<1, [A9_DRegsN],   0, Required>,
1640                                // Extra latency cycles since wbck is 6 cycles
1641                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1642                                InstrStage<1, [A9_NPipe]>],
1643                               [5, 2]>,
1644   //
1645   // Quad-register FP Unary
1646   // Result written in N5, but that is relative to the last cycle of multicycle,
1647   // so we use 6 for those cases
1648   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1649                                InstrStage<1, [A9_MUX0], 0>,
1650                                InstrStage<1, [A9_DRegsN],   0, Required>,
1651                                // Extra latency cycles since wbck is 7 cycles
1652                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1653                                InstrStage<2, [A9_NPipe]>],
1654                               [6, 2]>,
1655   //
1656   // Double-register FP Binary
1657   // FIXME: We're using this itin for many instructions and [2, 2] here is too
1658   // optimistic.
1659   InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1660                                InstrStage<1, [A9_MUX0], 0>,
1661                                InstrStage<1, [A9_DRegsN],   0, Required>,
1662                                // Extra latency cycles since wbck is 6 cycles
1663                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1664                                InstrStage<1, [A9_NPipe]>],
1665                               [5, 2, 2]>,
1667   //
1668   // VPADD, etc.
1669   InstrItinData<IIC_VPBIND,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1670                                InstrStage<1, [A9_MUX0], 0>,
1671                                InstrStage<1, [A9_DRegsN],   0, Required>,
1672                                // Extra latency cycles since wbck is 6 cycles
1673                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1674                                InstrStage<1, [A9_NPipe]>],
1675                               [5, 1, 1]>,
1676   //
1677   // Double-register FP VMUL
1678   InstrItinData<IIC_VFMULD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1679                                InstrStage<1, [A9_MUX0], 0>,
1680                                InstrStage<1, [A9_DRegsN],   0, Required>,
1681                                // Extra latency cycles since wbck is 6 cycles
1682                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1683                                InstrStage<1, [A9_NPipe]>],
1684                               [5, 2, 1]>,
1685   //
1686   // Quad-register FP Binary
1687   // Result written in N5, but that is relative to the last cycle of multicycle,
1688   // so we use 6 for those cases
1689   // FIXME: We're using this itin for many instructions and [2, 2] here is too
1690   // optimistic.
1691   InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1692                                InstrStage<1, [A9_MUX0], 0>,
1693                                InstrStage<1, [A9_DRegsN],   0, Required>,
1694                                // Extra latency cycles since wbck is 7 cycles
1695                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1696                                InstrStage<2, [A9_NPipe]>],
1697                               [6, 2, 2]>,
1698   //
1699   // Quad-register FP VMUL
1700   InstrItinData<IIC_VFMULQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1701                                InstrStage<1, [A9_MUX0], 0>,
1702                                InstrStage<1, [A9_DRegsN],   0, Required>,
1703                                // Extra latency cycles since wbck is 7 cycles
1704                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1705                                InstrStage<1, [A9_NPipe]>],
1706                               [6, 2, 1]>,
1707   //
1708   // Double-register FP Multiple-Accumulate
1709   InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1710                                InstrStage<1, [A9_MUX0], 0>,
1711                                InstrStage<1, [A9_DRegsN],   0, Required>,
1712                                // Extra latency cycles since wbck is 7 cycles
1713                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1714                                InstrStage<2, [A9_NPipe]>],
1715                               [6, 3, 2, 1]>,
1716   //
1717   // Quad-register FP Multiple-Accumulate
1718   // Result written in N9, but that is relative to the last cycle of multicycle,
1719   // so we use 10 for those cases
1720   InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1721                                InstrStage<1, [A9_MUX0], 0>,
1722                                InstrStage<1, [A9_DRegsN],   0, Required>,
1723                                // Extra latency cycles since wbck is 9 cycles
1724                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1725                                InstrStage<4, [A9_NPipe]>],
1726                               [8, 4, 2, 1]>,
1727   //
1728   // Double-register Fused FP Multiple-Accumulate
1729   InstrItinData<IIC_VFMACD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1730                                InstrStage<1, [A9_MUX0], 0>,
1731                                InstrStage<1, [A9_DRegsN],   0, Required>,
1732                                // Extra latency cycles since wbck is 7 cycles
1733                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1734                                InstrStage<2, [A9_NPipe]>],
1735                               [6, 3, 2, 1]>,
1736   //
1737   // Quad-register Fused FP Multiple-Accumulate
1738   // Result written in N9, but that is relative to the last cycle of multicycle,
1739   // so we use 10 for those cases
1740   InstrItinData<IIC_VFMACQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1741                                InstrStage<1, [A9_MUX0], 0>,
1742                                InstrStage<1, [A9_DRegsN],   0, Required>,
1743                                // Extra latency cycles since wbck is 9 cycles
1744                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1745                                InstrStage<4, [A9_NPipe]>],
1746                               [8, 4, 2, 1]>,
1747   //
1748   // Double-register Reciprical Step
1749   InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1750                                InstrStage<1, [A9_MUX0], 0>,
1751                                InstrStage<1, [A9_DRegsN],   0, Required>,
1752                                // Extra latency cycles since wbck is 10 cycles
1753                                InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
1754                                InstrStage<1, [A9_NPipe]>],
1755                               [9, 2, 2]>,
1756   //
1757   // Quad-register Reciprical Step
1758   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1759                                InstrStage<1, [A9_MUX0], 0>,
1760                                InstrStage<1, [A9_DRegsN],   0, Required>,
1761                                // Extra latency cycles since wbck is 11 cycles
1762                                InstrStage<12, [A9_DRegsVFP], 0, Reserved>,
1763                                InstrStage<2, [A9_NPipe]>],
1764                               [10, 2, 2]>,
1765   //
1766   // Double-register Permute
1767   InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1768                                InstrStage<1, [A9_MUX0], 0>,
1769                                InstrStage<1, [A9_DRegsN],   0, Required>,
1770                                // Extra latency cycles since wbck is 6 cycles
1771                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1772                                InstrStage<1, [A9_NPipe]>],
1773                               [2, 2, 1, 1]>,
1774   //
1775   // Quad-register Permute
1776   // Result written in N2, but that is relative to the last cycle of multicycle,
1777   // so we use 3 for those cases
1778   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1779                                InstrStage<1, [A9_MUX0], 0>,
1780                                InstrStage<1, [A9_DRegsN],   0, Required>,
1781                                // Extra latency cycles since wbck is 7 cycles
1782                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1783                                InstrStage<2, [A9_NPipe]>],
1784                               [3, 3, 1, 1]>,
1785   //
1786   // Quad-register Permute (3 cycle issue)
1787   // Result written in N2, but that is relative to the last cycle of multicycle,
1788   // so we use 4 for those cases
1789   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1790                                InstrStage<1, [A9_MUX0], 0>,
1791                                InstrStage<1, [A9_DRegsN],   0, Required>,
1792                                // Extra latency cycles since wbck is 8 cycles
1793                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1794                                InstrStage<3, [A9_NPipe]>],
1795                               [4, 4, 1, 1]>,
1797   //
1798   // Double-register VEXT
1799   InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1800                                InstrStage<1, [A9_MUX0], 0>,
1801                                InstrStage<1, [A9_DRegsN],   0, Required>,
1802                                // Extra latency cycles since wbck is 6 cycles
1803                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1804                                InstrStage<1, [A9_NPipe]>],
1805                               [2, 1, 1]>,
1806   //
1807   // Quad-register VEXT
1808   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1809                                InstrStage<1, [A9_MUX0], 0>,
1810                                InstrStage<1, [A9_DRegsN],   0, Required>,
1811                                // Extra latency cycles since wbck is 7 cycles
1812                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1813                                InstrStage<2, [A9_NPipe]>],
1814                               [3, 1, 2]>,
1815   //
1816   // VTB
1817   InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1818                                InstrStage<1, [A9_MUX0], 0>,
1819                                InstrStage<1, [A9_DRegsN],   0, Required>,
1820                                // Extra latency cycles since wbck is 7 cycles
1821                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1822                                InstrStage<2, [A9_NPipe]>],
1823                               [3, 2, 1]>,
1824   InstrItinData<IIC_VTB2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1825                                InstrStage<1, [A9_MUX0], 0>,
1826                                InstrStage<2, [A9_DRegsN],   0, Required>,
1827                                // Extra latency cycles since wbck is 7 cycles
1828                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1829                                InstrStage<2, [A9_NPipe]>],
1830                               [3, 2, 2, 1]>,
1831   InstrItinData<IIC_VTB3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1832                                InstrStage<1, [A9_MUX0], 0>,
1833                                InstrStage<2, [A9_DRegsN],   0, Required>,
1834                                // Extra latency cycles since wbck is 8 cycles
1835                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1836                                InstrStage<3, [A9_NPipe]>],
1837                               [4, 2, 2, 3, 1]>,
1838   InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1839                                InstrStage<1, [A9_MUX0], 0>,
1840                                InstrStage<1, [A9_DRegsN],   0, Required>,
1841                                // Extra latency cycles since wbck is 8 cycles
1842                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1843                                InstrStage<3, [A9_NPipe]>],
1844                               [4, 2, 2, 3, 3, 1]>,
1845   //
1846   // VTBX
1847   InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1848                                InstrStage<1, [A9_MUX0], 0>,
1849                                InstrStage<1, [A9_DRegsN],   0, Required>,
1850                                // Extra latency cycles since wbck is 7 cycles
1851                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1852                                InstrStage<2, [A9_NPipe]>],
1853                               [3, 1, 2, 1]>,
1854   InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1855                                InstrStage<1, [A9_MUX0], 0>,
1856                                InstrStage<1, [A9_DRegsN],   0, Required>,
1857                                // Extra latency cycles since wbck is 7 cycles
1858                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1859                                InstrStage<2, [A9_NPipe]>],
1860                               [3, 1, 2, 2, 1]>,
1861   InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1862                                InstrStage<1, [A9_MUX0], 0>,
1863                                InstrStage<1, [A9_DRegsN],   0, Required>,
1864                                // Extra latency cycles since wbck is 8 cycles
1865                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1866                                InstrStage<3, [A9_NPipe]>],
1867                               [4, 1, 2, 2, 3, 1]>,
1868   InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1869                                InstrStage<1, [A9_MUX0], 0>,
1870                                InstrStage<1, [A9_DRegsN],   0, Required>,
1871                                // Extra latency cycles since wbck is 8 cycles
1872                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1873                                InstrStage<2, [A9_NPipe]>],
1874                               [4, 1, 2, 2, 3, 3, 1]>
1877 // ===---------------------------------------------------------------------===//
1878 // The following definitions describe the simpler per-operand machine model.
1879 // This works with MachineScheduler and will eventually replace itineraries.
1881 class A9WriteLMOpsListType<list<WriteSequence> writes> {
1882   list <WriteSequence> Writes = writes;
1883   SchedMachineModel SchedModel = ?;
1886 // Cortex-A9 machine model for scheduling and other instruction cost heuristics.
1887 def CortexA9Model : SchedMachineModel {
1888   let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
1889   let MicroOpBufferSize = 56; // Based on available renamed registers.
1890   let LoadLatency = 2; // Optimistic load latency assuming bypass.
1891                        // This is overriden by OperandCycles if the
1892                        // Itineraries are queried instead.
1893   let MispredictPenalty = 8; // Based on estimate of pipeline depth.
1895   let Itineraries = CortexA9Itineraries;
1897   // FIXME: Many vector operations were never given an itinerary. We
1898   // haven't mapped these to the new model either.
1899   let CompleteModel = 0;
1901   // FIXME: Remove when all errors have been fixed.
1902   let FullInstRWOverlapCheck = 0;
1905 //===----------------------------------------------------------------------===//
1906 // Define each kind of processor resource and number available.
1908 // The AGU unit has BufferSize=1 so that the latency between operations
1909 // that use it are considered to stall other operations.
1911 // The FP unit has BufferSize=0 so that it is a hard dispatch
1912 // hazard. No instruction may be dispatched while the unit is reserved.
1914 let SchedModel = CortexA9Model in {
1916 def A9UnitALU : ProcResource<2>;
1917 def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; }
1918 def A9UnitAGU : ProcResource<1> { let BufferSize = 1; }
1919 def A9UnitLS  : ProcResource<1>;
1920 def A9UnitFP  : ProcResource<1> { let BufferSize = 0; }
1921 def A9UnitB   : ProcResource<1>;
1923 //===----------------------------------------------------------------------===//
1924 // Define scheduler read/write types with their resources and latency on A9.
1926 // Consume an issue slot, but no processor resources. This is useful when all
1927 // other writes associated with the operand have NumMicroOps = 0.
1928 def A9WriteIssue : SchedWriteRes<[]> { let Latency = 0; }
1930 // Write an integer register.
1931 def A9WriteI : SchedWriteRes<[A9UnitALU]>;
1932 // Write an integer shifted-by register
1933 def A9WriteIsr : SchedWriteRes<[A9UnitALU]> { let Latency = 2; }
1935 // Basic ALU.
1936 def A9WriteALU : SchedWriteRes<[A9UnitALU]>;
1937 // ALU with operand shifted by immediate.
1938 def : WriteRes<WriteALUsi, [A9UnitALU]> { let Latency = 2; }
1939 // ALU with operand shifted by register.
1940 def A9WriteALUsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; }
1942 // Multiplication
1943 def A9WriteM   : SchedWriteRes<[A9UnitMul, A9UnitMul]> { let Latency = 4; }
1944 def A9WriteMHi : SchedWriteRes<[A9UnitMul]> { let Latency = 5;
1945                                               let NumMicroOps = 0; }
1946 def A9WriteM16   : SchedWriteRes<[A9UnitMul]> { let Latency = 3; }
1947 def A9WriteM16Hi : SchedWriteRes<[A9UnitMul]> { let Latency = 4;
1948                                                 let NumMicroOps = 0; }
1949 def : SchedAlias<WriteMUL16, A9WriteM16>;
1950 def : SchedAlias<WriteMUL32, A9WriteM>;
1951 def : SchedAlias<WriteMUL64Lo, A9WriteM>;
1952 def : SchedAlias<WriteMUL64Hi, A9WriteMHi>;
1953 def : SchedAlias<WriteMAC16, A9WriteM16>;
1954 def : SchedAlias<WriteMAC32, A9WriteM>;
1955 def : SchedAlias<WriteMAC64Lo, A9WriteM>;
1956 def : SchedAlias<WriteMAC64Hi, A9WriteMHi>;
1957 def : ReadAdvance<ReadMUL, 0>;
1958 def : ReadAdvance<ReadMAC, 0>;
1960 // Floating-point
1961 // Only one FP or AGU instruction may issue per cycle. We model this
1962 // by having FP instructions consume the AGU resource.
1963 def A9WriteF      : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; }
1964 def A9WriteFMov   : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; }
1965 def A9WriteFMulS  : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; }
1966 def A9WriteFMulD  : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; }
1967 def A9WriteFMAS   : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 8; }
1969 def A9WriteFMAD   : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
1970 def A9WriteFDivS  : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 15; }
1971 def A9WriteFDivD  : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 25; }
1972 def A9WriteFSqrtS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 17; }
1973 def A9WriteFSqrtD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 32; }
1975 // NEON has an odd mix of latencies. Simply name the write types by latency.
1976 def A9WriteV1 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; }
1977 def A9WriteV2 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 2; }
1978 def A9WriteV3 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 3; }
1979 def A9WriteV4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; }
1980 def A9WriteV5 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; }
1981 def A9WriteV6 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; }
1982 def A9WriteV7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 7; }
1983 def A9WriteV9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
1984 def A9WriteV10 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 10; }
1986 def : WriteRes<WriteVLD1, []>;
1987 def : WriteRes<WriteVLD2, []>;
1988 def : WriteRes<WriteVLD3, []>;
1989 def : WriteRes<WriteVLD4, []>;
1990 def : WriteRes<WriteVST1, []>;
1991 def : WriteRes<WriteVST2, []>;
1992 def : WriteRes<WriteVST3, []>;
1993 def : WriteRes<WriteVST4, []>;
1995 // Reserve A9UnitFP for 2 consecutive cycles.
1996 def A9Write2V4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
1997   let Latency = 4;
1998   let ResourceCycles = [2, 1];
2000 def A9Write2V7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
2001   let Latency = 7;
2002   let ResourceCycles = [2, 1];
2004 def A9Write2V9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
2005   let Latency = 9;
2006   let ResourceCycles = [2, 1];
2009 // Branches don't have a def operand but still consume resources.
2010 def A9WriteB : SchedWriteRes<[A9UnitB]>;
2012 // Address generation.
2013 def A9WriteAdr : SchedWriteRes<[A9UnitAGU]> { let NumMicroOps = 0; }
2015 // Load Integer.
2016 def A9WriteL : SchedWriteRes<[A9UnitLS]> { let Latency = 3; }
2017 def : SchedAlias<WriteLd, A9WriteL>;
2018 // Load the upper 32-bits using the same micro-op.
2019 def A9WriteLHi : SchedWriteRes<[]> { let Latency = 3;
2020                                      let NumMicroOps = 0; }
2021 // Offset shifted by register.
2022 def A9WriteLsi : SchedWriteRes<[A9UnitLS]> { let Latency = 4; }
2023 // Load (and zero extend) a byte.
2024 def A9WriteLb : SchedWriteRes<[A9UnitLS]> { let Latency = 4; }
2025 def A9WriteLbsi : SchedWriteRes<[A9UnitLS]> { let Latency = 5; }
2027 // Load or Store Float, aligned.
2028 def A9WriteLSfp : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 1; }
2030 // Store Integer.
2031 def A9WriteS : SchedWriteRes<[A9UnitLS]>;
2033 //===----------------------------------------------------------------------===//
2034 // Define resources dynamically for load multiple variants.
2036 // Define helpers for extra latency without consuming resources.
2037 def A9WriteCycle1 : SchedWriteRes<[]> { let Latency = 1; let NumMicroOps = 0; }
2038 foreach NumCycles = 2-8 in {
2039 def A9WriteCycle#NumCycles : WriteSequence<[A9WriteCycle1], NumCycles>;
2040 } // foreach NumCycles
2042 // Define address generation sequences and predicates for 8 flavors of LDMs.
2043 foreach NumAddr = 1-8 in {
2045 // Define A9WriteAdr1-8 as a sequence of A9WriteAdr with additive
2046 // latency for instructions that generate multiple loads or stores.
2047 def A9WriteAdr#NumAddr : WriteSequence<[A9WriteAdr], NumAddr>;
2049 // Define a predicate to select the LDM based on number of memory addresses.
2050 def A9LMAdr#NumAddr#Pred :
2051   SchedPredicate<"(TII->getNumLDMAddresses(*MI)+1)/2 == "#NumAddr>;
2053 } // foreach NumAddr
2055 // Fall-back for unknown LDMs.
2056 def A9LMUnknownPred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == 0">;
2058 // LDM/VLDM/VLDn address generation latency & resources.
2059 // Dynamically select the A9WriteAdrN sequence using a predicate.
2060 def A9WriteLMAdr : SchedWriteVariant<[
2061   SchedVar<A9LMAdr1Pred, [A9WriteAdr1]>,
2062   SchedVar<A9LMAdr2Pred, [A9WriteAdr2]>,
2063   SchedVar<A9LMAdr3Pred, [A9WriteAdr3]>,
2064   SchedVar<A9LMAdr4Pred, [A9WriteAdr4]>,
2065   SchedVar<A9LMAdr5Pred, [A9WriteAdr5]>,
2066   SchedVar<A9LMAdr6Pred, [A9WriteAdr6]>,
2067   SchedVar<A9LMAdr7Pred, [A9WriteAdr7]>,
2068   SchedVar<A9LMAdr8Pred, [A9WriteAdr8]>,
2069   // For unknown LDM/VLDM/VSTM, assume 2 32-bit registers.
2070   SchedVar<A9LMUnknownPred, [A9WriteAdr2]>]>;
2072 // Define LDM Resources.
2073 // These take no issue resource, so they can be combined with other
2074 // writes like WriteB.
2075 // A9WriteLMLo takes a single LS resource and 2 cycles.
2076 def A9WriteLMLo : SchedWriteRes<[A9UnitLS]> { let Latency = 2;
2077                                               let NumMicroOps = 0; }
2078 // Assuming aligned access, the upper half of each pair is free with
2079 // the same latency.
2080 def A9WriteLMHi : SchedWriteRes<[]> { let Latency = 2;
2081                                       let NumMicroOps = 0; }
2082 // Each A9WriteL#N variant adds N cycles of latency without consuming
2083 // additional resources.
2084 foreach NumAddr = 1-8 in {
2085 def A9WriteL#NumAddr : WriteSequence<
2086   [A9WriteLMLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
2087 def A9WriteL#NumAddr#Hi : WriteSequence<
2088   [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
2091 //===----------------------------------------------------------------------===//
2092 // LDM: Load multiple into 32-bit integer registers.
2094 def A9WriteLMOpsList : A9WriteLMOpsListType<
2095                  [A9WriteL1, A9WriteL1Hi,
2096                   A9WriteL2, A9WriteL2Hi,
2097                   A9WriteL3, A9WriteL3Hi,
2098                   A9WriteL4, A9WriteL4Hi,
2099                   A9WriteL5, A9WriteL5Hi,
2100                   A9WriteL6, A9WriteL6Hi,
2101                   A9WriteL7, A9WriteL7Hi,
2102                   A9WriteL8, A9WriteL8Hi]>;
2104 // A9WriteLM variants expand into a pair of writes for each 64-bit
2105 // value loaded. When the number of registers is odd, the last
2106 // A9WriteLnHi is naturally ignored because the instruction has no
2107 // following def operands.  These variants take no issue resource, so
2108 // they may need to be part of a WriteSequence that includes A9WriteIssue.
2109 def A9WriteLM : SchedWriteVariant<[
2110   SchedVar<A9LMAdr1Pred, A9WriteLMOpsList.Writes[0-1]>,
2111   SchedVar<A9LMAdr2Pred, A9WriteLMOpsList.Writes[0-3]>,
2112   SchedVar<A9LMAdr3Pred, A9WriteLMOpsList.Writes[0-5]>,
2113   SchedVar<A9LMAdr4Pred, A9WriteLMOpsList.Writes[0-7]>,
2114   SchedVar<A9LMAdr5Pred, A9WriteLMOpsList.Writes[0-9]>,
2115   SchedVar<A9LMAdr6Pred, A9WriteLMOpsList.Writes[0-11]>,
2116   SchedVar<A9LMAdr7Pred, A9WriteLMOpsList.Writes[0-13]>,
2117   SchedVar<A9LMAdr8Pred, A9WriteLMOpsList.Writes[0-15]>,
2118   // For unknown LDMs, define the maximum number of writes, but only
2119   // make the first two consume resources.
2120   SchedVar<A9LMUnknownPred, [A9WriteL1, A9WriteL1Hi,
2121                              A9WriteL2, A9WriteL2Hi,
2122                              A9WriteL3Hi, A9WriteL3Hi,
2123                              A9WriteL4Hi, A9WriteL4Hi,
2124                              A9WriteL5Hi, A9WriteL5Hi,
2125                              A9WriteL6Hi, A9WriteL6Hi,
2126                              A9WriteL7Hi, A9WriteL7Hi,
2127                              A9WriteL8Hi, A9WriteL8Hi]>]> {
2128   let Variadic = 1;
2131 //===----------------------------------------------------------------------===//
2132 // VFP Load/Store Multiple Variants, and NEON VLDn/VSTn support.
2134 // A9WriteLfpOp is the same as A9WriteLSfp but takes no issue resources
2135 // so can be used in WriteSequences for in single-issue instructions that
2136 // encapsulate multiple loads.
2137 def A9WriteLfpOp : SchedWriteRes<[A9UnitLS, A9UnitFP]> {
2138   let Latency = 1;
2139   let NumMicroOps = 0;
2142 foreach NumAddr = 1-8 in {
2144 // Helper for A9WriteLfp1-8: A sequence of fp loads with no micro-ops.
2145 def A9WriteLfp#NumAddr#Seq : WriteSequence<[A9WriteLfpOp], NumAddr>;
2147 // A9WriteLfp1-8 definitions are statically expanded into a sequence of
2148 // A9WriteLfpOps with additive latency that takes a single issue slot.
2149 // Used directly to describe NEON VLDn.
2150 def A9WriteLfp#NumAddr : WriteSequence<
2151   [A9WriteIssue, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>;
2153 // A9WriteLfp1-8Mov adds a cycle of latency and FP resource for
2154 // permuting loaded values.
2155 def A9WriteLfp#NumAddr#Mov : WriteSequence<
2156   [A9WriteF, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>;
2158 } // foreach NumAddr
2160 // Define VLDM/VSTM PreRA resources.
2161 // A9WriteLMfpPreRA are dynamically expanded into the correct
2162 // A9WriteLfp1-8 sequence based on a predicate. This supports the
2163 // preRA VLDM variants in which all 64-bit loads are written to the
2164 // same tuple of either single or double precision registers.
2165 def A9WriteLMfpPreRA : SchedWriteVariant<[
2166   SchedVar<A9LMAdr1Pred, [A9WriteLfp1]>,
2167   SchedVar<A9LMAdr2Pred, [A9WriteLfp2]>,
2168   SchedVar<A9LMAdr3Pred, [A9WriteLfp3]>,
2169   SchedVar<A9LMAdr4Pred, [A9WriteLfp4]>,
2170   SchedVar<A9LMAdr5Pred, [A9WriteLfp5]>,
2171   SchedVar<A9LMAdr6Pred, [A9WriteLfp6]>,
2172   SchedVar<A9LMAdr7Pred, [A9WriteLfp7]>,
2173   SchedVar<A9LMAdr8Pred, [A9WriteLfp8]>,
2174   // For unknown VLDM/VSTM PreRA, assume 2xS registers.
2175   SchedVar<A9LMUnknownPred, [A9WriteLfp2]>]>;
2177 // Define VLDM/VSTM PostRA Resources.
2178 // A9WriteLMfpLo takes a LS and FP resource and one issue slot but no latency.
2179 def A9WriteLMfpLo : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 0; }
2181 foreach NumAddr = 1-8 in {
2183 // Each A9WriteL#N variant adds N cycles of latency without consuming
2184 // additional resources.
2185 def A9WriteLMfp#NumAddr : WriteSequence<
2186   [A9WriteLMfpLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
2188 // Assuming aligned access, the upper half of each pair is free with
2189 // the same latency.
2190 def A9WriteLMfp#NumAddr#Hi : WriteSequence<
2191   [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
2193 } // foreach NumAddr
2195 // VLDM PostRA Variants. These variants expand A9WriteLMfpPostRA into a
2196 // pair of writes for each 64-bit data loaded. When the number of
2197 // registers is odd, the last WriteLMfpnHi is naturally ignored because
2198 // the instruction has no following def operands.
2200 def A9WriteLMfpPostRAOpsList : A9WriteLMOpsListType<
2201                  [A9WriteLMfp1, A9WriteLMfp2,       // 0-1
2202                   A9WriteLMfp3, A9WriteLMfp4,       // 2-3
2203                   A9WriteLMfp5, A9WriteLMfp6,       // 4-5
2204                   A9WriteLMfp7, A9WriteLMfp8,       // 6-7
2205                   A9WriteLMfp1Hi,                   // 8-8
2206                   A9WriteLMfp2Hi, A9WriteLMfp2Hi,   // 9-10
2207                   A9WriteLMfp3Hi, A9WriteLMfp3Hi,   // 11-12
2208                   A9WriteLMfp4Hi, A9WriteLMfp4Hi,   // 13-14
2209                   A9WriteLMfp5Hi, A9WriteLMfp5Hi,   // 15-16
2210                   A9WriteLMfp6Hi, A9WriteLMfp6Hi,   // 17-18
2211                   A9WriteLMfp7Hi, A9WriteLMfp7Hi,   // 19-20
2212                   A9WriteLMfp8Hi, A9WriteLMfp8Hi]>; // 21-22
2214 def A9WriteLMfpPostRA : SchedWriteVariant<[
2215   SchedVar<A9LMAdr1Pred, A9WriteLMfpPostRAOpsList.Writes[0-0, 8-8]>,
2216   SchedVar<A9LMAdr2Pred, A9WriteLMfpPostRAOpsList.Writes[0-1, 9-10]>,
2217   SchedVar<A9LMAdr3Pred, A9WriteLMfpPostRAOpsList.Writes[0-2, 10-12]>,
2218   SchedVar<A9LMAdr4Pred, A9WriteLMfpPostRAOpsList.Writes[0-3, 11-14]>,
2219   SchedVar<A9LMAdr5Pred, A9WriteLMfpPostRAOpsList.Writes[0-4, 12-16]>,
2220   SchedVar<A9LMAdr6Pred, A9WriteLMfpPostRAOpsList.Writes[0-5, 13-18]>,
2221   SchedVar<A9LMAdr7Pred, A9WriteLMfpPostRAOpsList.Writes[0-6, 14-20]>,
2222   SchedVar<A9LMAdr8Pred, A9WriteLMfpPostRAOpsList.Writes[0-7, 15-22]>,
2223   // For unknown LDMs, define the maximum number of writes, but only
2224   // make the first two consume resources. We are optimizing for the case
2225   // where the operands are DPRs, and this determines the first eight
2226   // types. The remaining eight types are filled to cover the case
2227   // where the operands are SPRs.
2228   SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp2,
2229                              A9WriteLMfp3Hi, A9WriteLMfp4Hi,
2230                              A9WriteLMfp5Hi, A9WriteLMfp6Hi,
2231                              A9WriteLMfp7Hi, A9WriteLMfp8Hi,
2232                              A9WriteLMfp5Hi, A9WriteLMfp5Hi,
2233                              A9WriteLMfp6Hi, A9WriteLMfp6Hi,
2234                              A9WriteLMfp7Hi, A9WriteLMfp7Hi,
2235                              A9WriteLMfp8Hi, A9WriteLMfp8Hi]>]> {
2236   let Variadic = 1;
2239 // Distinguish between our multiple MI-level forms of the same
2240 // VLDM/VSTM instructions.
2241 def A9PreRA : SchedPredicate<
2242   "Register::isVirtualRegister(MI->getOperand(0).getReg())">;
2243 def A9PostRA : SchedPredicate<
2244   "Register::isPhysicalRegister(MI->getOperand(0).getReg())">;
2246 // VLDM represents all destination registers as a single register
2247 // tuple, unlike LDM. So the number of write operands is not variadic.
2248 def A9WriteLMfp : SchedWriteVariant<[
2249   SchedVar<A9PreRA, [A9WriteLMfpPreRA]>,
2250   SchedVar<A9PostRA, [A9WriteLMfpPostRA]>]>;
2252 //===----------------------------------------------------------------------===//
2253 // Resources for other (non-LDM/VLDM) Variants.
2255 // These mov immediate writers are unconditionally expanded with
2256 // additive latency.
2257 def A9WriteI2 : WriteSequence<[A9WriteI, A9WriteI]>;
2258 def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, WriteALU]>;
2259 def A9WriteI2ld  : WriteSequence<[A9WriteI, A9WriteI, A9WriteL]>;
2261 // Some ALU operations can read loaded integer values one cycle early.
2262 def A9ReadALU : SchedReadAdvance<1,
2263   [A9WriteL, A9WriteLHi, A9WriteLsi, A9WriteLb, A9WriteLbsi,
2264    A9WriteL1, A9WriteL2, A9WriteL3, A9WriteL4,
2265    A9WriteL5, A9WriteL6, A9WriteL7, A9WriteL8,
2266    A9WriteL1Hi, A9WriteL2Hi, A9WriteL3Hi, A9WriteL4Hi,
2267    A9WriteL5Hi, A9WriteL6Hi, A9WriteL7Hi, A9WriteL8Hi]>;
2269 // Read types for operands that are unconditionally read in cycle N
2270 // after the instruction issues, decreases producer latency by N-1.
2271 def A9Read2 : SchedReadAdvance<1>;
2272 def A9Read3 : SchedReadAdvance<2>;
2273 def A9Read4 : SchedReadAdvance<3>;
2275 //===----------------------------------------------------------------------===//
2276 // Map itinerary classes to scheduler read/write resources per operand.
2278 // For ARM, we piggyback scheduler resources on the Itinerary classes
2279 // to avoid perturbing the existing instruction definitions.
2281 // This table follows the ARM Cortex-A9 Technical Reference Manuals,
2282 // mostly in order.
2284 def :ItinRW<[WriteALU], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi,
2285                          IIC_iMVNi,IIC_iMVNsi,
2286                          IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>;
2287 def :ItinRW<[WriteALU, A9ReadALU],[IIC_iMVNr]>;
2288 def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>;
2290 def :ItinRW<[A9WriteI2],   [IIC_iMOVix2,IIC_iCMOVix2]>;
2291 def :ItinRW<[A9WriteI2pc], [IIC_iMOVix2addpc]>;
2292 def :ItinRW<[A9WriteI2ld], [IIC_iMOVix2ld]>;
2294 def :ItinRW<[WriteALU], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>;
2295 def :ItinRW<[WriteALU, A9ReadALU], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>;
2296 def :ItinRW<[WriteALU, A9ReadALU, A9ReadALU],[IIC_iALUr,IIC_iCMPr]>;
2297 def :ItinRW<[WriteALUsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>;
2298 def :ItinRW<[WriteALUsi, A9ReadALU], [IIC_iALUsi]>;
2299 def :ItinRW<[WriteALUsi, ReadDefault, A9ReadALU], [IIC_iALUsir]>; // RSB
2300 def :ItinRW<[A9WriteALUsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>;
2301 def :ItinRW<[A9WriteALUsr, A9ReadALU], [IIC_iALUsr,IIC_iCMPsr]>;
2303 // A9WriteHi ignored for MUL32.
2304 def :ItinRW<[A9WriteM, A9WriteMHi], [IIC_iMUL32,IIC_iMAC32,
2305                                      IIC_iMUL64,IIC_iMAC64]>;
2306 // FIXME: SMLALxx needs itin classes
2307 def :ItinRW<[A9WriteM16, A9WriteM16Hi], [IIC_iMUL16,IIC_iMAC16]>;
2309 // TODO: For floating-point ops, we model the pipeline forwarding
2310 // latencies here. WAW latencies are sometimes longer.
2312 def :ItinRW<[A9WriteFMov], [IIC_fpSTAT, IIC_fpMOVIS, IIC_fpMOVID, IIC_fpMOVSI,
2313                             IIC_fpUNA32, IIC_fpUNA64,
2314                             IIC_fpCMP32, IIC_fpCMP64]>;
2315 def :ItinRW<[A9WriteFMov, A9WriteFMov], [IIC_fpMOVDI]>;
2316 def :ItinRW<[A9WriteF], [IIC_fpCVTSD, IIC_fpCVTDS, IIC_fpCVTSH, IIC_fpCVTHS,
2317                          IIC_fpCVTIS, IIC_fpCVTID, IIC_fpCVTSI, IIC_fpCVTDI,
2318                          IIC_fpALU32, IIC_fpALU64]>;
2319 def :ItinRW<[A9WriteFMulS], [IIC_fpMUL32]>;
2320 def :ItinRW<[A9WriteFMulD], [IIC_fpMUL64]>;
2321 def :ItinRW<[A9WriteFMAS], [IIC_fpMAC32]>;
2322 def :ItinRW<[A9WriteFMAD], [IIC_fpMAC64]>;
2323 def :ItinRW<[A9WriteFDivS], [IIC_fpDIV32]>;
2324 def :ItinRW<[A9WriteFDivD], [IIC_fpDIV64]>;
2325 def :ItinRW<[A9WriteFSqrtS], [IIC_fpSQRT32]>;
2326 def :ItinRW<[A9WriteFSqrtD], [IIC_fpSQRT64]>;
2328 def :ItinRW<[A9WriteB], [IIC_Br]>;
2330 // A9 PLD is processed in a dedicated unit.
2331 def :ItinRW<[], [IIC_Preload]>;
2333 // Note: We must assume that loads are aligned, since the machine
2334 // model cannot know this statically and A9 ignores alignment hints.
2336 // A9WriteAdr consumes AGU regardless address writeback. But it's
2337 // latency is only relevant for users of an updated address.
2338 def :ItinRW<[A9WriteL, A9WriteAdr], [IIC_iLoad_i,IIC_iLoad_r,
2339                                      IIC_iLoad_iu,IIC_iLoad_ru]>;
2340 def :ItinRW<[A9WriteLsi, A9WriteAdr], [IIC_iLoad_si,IIC_iLoad_siu]>;
2341 def :ItinRW<[A9WriteLb, A9WriteAdr2], [IIC_iLoad_bh_i,IIC_iLoad_bh_r,
2342                                        IIC_iLoad_bh_iu,IIC_iLoad_bh_ru]>;
2343 def :ItinRW<[A9WriteLbsi, A9WriteAdr2], [IIC_iLoad_bh_si,IIC_iLoad_bh_siu]>;
2344 def :ItinRW<[A9WriteL, A9WriteLHi, A9WriteAdr], [IIC_iLoad_d_i,IIC_iLoad_d_r,
2345                                             IIC_iLoad_d_ru]>;
2346 // Store either has no def operands, or the one def for address writeback.
2347 def :ItinRW<[A9WriteAdr, A9WriteS], [IIC_iStore_i, IIC_iStore_r,
2348                                      IIC_iStore_iu, IIC_iStore_ru,
2349                                      IIC_iStore_d_i, IIC_iStore_d_r,
2350                                      IIC_iStore_d_ru]>;
2351 def :ItinRW<[A9WriteAdr2, A9WriteS], [IIC_iStore_si, IIC_iStore_siu,
2352                                       IIC_iStore_bh_i, IIC_iStore_bh_r,
2353                                       IIC_iStore_bh_iu, IIC_iStore_bh_ru]>;
2354 def :ItinRW<[A9WriteAdr3, A9WriteS], [IIC_iStore_bh_si, IIC_iStore_bh_siu]>;
2356 // A9WriteML will be expanded into a separate write for each def
2357 // operand. Address generation consumes resources, but A9WriteLMAdr
2358 // is listed after all def operands, so has no effective latency.
2360 // Note: A9WriteLM expands into an even number of def operands. The
2361 // actual number of def operands may be less by one.
2362 def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteIssue], [IIC_iLoad_m, IIC_iPop]>;
2364 // Load multiple with address writeback has an extra def operand in
2365 // front of the loaded registers.
2367 // Reuse the load-multiple variants for store-multiple because the
2368 // resources are identical, For stores only the address writeback
2369 // has a def operand so the WriteL latencies are unused.
2370 def :ItinRW<[A9WriteLMAdr, A9WriteLM, A9WriteIssue], [IIC_iLoad_mu,
2371                                                       IIC_iStore_m,
2372                                                       IIC_iStore_mu]>;
2373 def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteB], [IIC_iLoad_mBr, IIC_iPop_Br]>;
2374 def :ItinRW<[A9WriteL, A9WriteAdr, WriteALU], [IIC_iLoadiALU]>;
2376 def :ItinRW<[A9WriteLSfp, A9WriteAdr], [IIC_fpLoad32, IIC_fpLoad64]>;
2378 def :ItinRW<[A9WriteLMfp, A9WriteLMAdr], [IIC_fpLoad_m]>;
2379 def :ItinRW<[A9WriteLMAdr, A9WriteLMfp], [IIC_fpLoad_mu]>;
2380 def :ItinRW<[A9WriteAdr, A9WriteLSfp], [IIC_fpStore32, IIC_fpStore64,
2381                                         IIC_fpStore_m, IIC_fpStore_mu]>;
2383 // Note: Unlike VLDM, VLD1 expects the writeback operand after the
2384 // normal writes.
2385 def :ItinRW<[A9WriteLfp1, A9WriteAdr1], [IIC_VLD1, IIC_VLD1u,
2386                                          IIC_VLD1x2, IIC_VLD1x2u]>;
2387 def :ItinRW<[A9WriteLfp2, A9WriteAdr2], [IIC_VLD1x3, IIC_VLD1x3u,
2388                                          IIC_VLD1x4, IIC_VLD1x4u,
2389                                          IIC_VLD4dup, IIC_VLD4dupu]>;
2390 def :ItinRW<[A9WriteLfp1Mov, A9WriteAdr1], [IIC_VLD1dup, IIC_VLD1dupu,
2391                                             IIC_VLD2, IIC_VLD2u,
2392                                             IIC_VLD2dup, IIC_VLD2dupu]>;
2393 def :ItinRW<[A9WriteLfp2Mov, A9WriteAdr1], [IIC_VLD1ln, IIC_VLD1lnu,
2394                                             IIC_VLD2x2, IIC_VLD2x2u,
2395                                             IIC_VLD2ln, IIC_VLD2lnu]>;
2396 def :ItinRW<[A9WriteLfp3Mov, A9WriteAdr3], [IIC_VLD3, IIC_VLD3u,
2397                                             IIC_VLD3dup, IIC_VLD3dupu]>;
2398 def :ItinRW<[A9WriteLfp4Mov, A9WriteAdr4], [IIC_VLD4, IIC_VLD4u,
2399                                             IIC_VLD4ln, IIC_VLD4lnu]>;
2400 def :ItinRW<[A9WriteLfp5Mov, A9WriteAdr5], [IIC_VLD3ln, IIC_VLD3lnu]>;
2402 // Vector stores use similar resources to vector loads, so use the
2403 // same write types. The address write must be first for stores with
2404 // address writeback.
2405 def :ItinRW<[A9WriteAdr1, A9WriteLfp1], [IIC_VST1, IIC_VST1u,
2406                                          IIC_VST1x2, IIC_VST1x2u,
2407                                          IIC_VST1ln, IIC_VST1lnu,
2408                                          IIC_VST2, IIC_VST2u,
2409                                          IIC_VST2x2, IIC_VST2x2u,
2410                                          IIC_VST2ln, IIC_VST2lnu]>;
2411 def :ItinRW<[A9WriteAdr2, A9WriteLfp2], [IIC_VST1x3, IIC_VST1x3u,
2412                                          IIC_VST1x4, IIC_VST1x4u,
2413                                          IIC_VST3, IIC_VST3u,
2414                                          IIC_VST3ln, IIC_VST3lnu,
2415                                          IIC_VST4, IIC_VST4u,
2416                                          IIC_VST4ln, IIC_VST4lnu]>;
2418 // NEON moves.
2419 def :ItinRW<[A9WriteV2], [IIC_VMOVSI, IIC_VMOVDI, IIC_VMOVD, IIC_VMOVQ]>;
2420 def :ItinRW<[A9WriteV1], [IIC_VMOV, IIC_VMOVIS, IIC_VMOVID]>;
2421 def :ItinRW<[A9WriteV3], [IIC_VMOVISL, IIC_VMOVN]>;
2423 // NEON integer arithmetic
2425 // VADD/VAND/VORR/VEOR/VBIC/VORN/VBIT/VBIF/VBSL
2426 def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VBINiD, IIC_VBINiQ]>;
2427 // VSUB/VMVN/VCLSD/VCLZD/VCNTD
2428 def :ItinRW<[A9WriteV3, A9Read2], [IIC_VSUBiD, IIC_VSUBiQ, IIC_VCNTiD]>;
2429 // VADDL/VSUBL/VNEG are mapped later under IIC_SHLi.
2430 // ...
2431 // VHADD/VRHADD/VQADD/VTST/VADH/VRADH
2432 def :ItinRW<[A9WriteV4, A9Read2, A9Read2], [IIC_VBINi4D, IIC_VBINi4Q]>;
2434 // VSBH/VRSBH/VHSUB/VQSUB/VABD/VCEQ/VCGE/VCGT/VMAX/VMIN/VPMAX/VPMIN/VABDL
2435 def :ItinRW<[A9WriteV4, A9Read2], [IIC_VSUBi4D, IIC_VSUBi4Q]>;
2436 // VQNEG/VQABS
2437 def :ItinRW<[A9WriteV4], [IIC_VQUNAiD, IIC_VQUNAiQ]>;
2438 // VABS
2439 def :ItinRW<[A9WriteV4, A9Read2], [IIC_VUNAiD, IIC_VUNAiQ]>;
2440 // VPADD/VPADDL are mapped later under IIC_SHLi.
2441 // ...
2442 // VCLSQ/VCLZQ/VCNTQ, takes two cycles.
2443 def :ItinRW<[A9Write2V4, A9Read3], [IIC_VCNTiQ]>;
2444 // VMOVimm/VMVNimm/VORRimm/VBICimm
2445 def :ItinRW<[A9WriteV3], [IIC_VMOVImm]>;
2446 def :ItinRW<[A9WriteV6, A9Read3, A9Read2], [IIC_VABAD, IIC_VABAQ]>;
2447 def :ItinRW<[A9WriteV6, A9Read3], [IIC_VPALiD, IIC_VPALiQ]>;
2449 // NEON integer multiply
2451 // Note: these don't quite match the timing docs, but they do match
2452 // the original A9 itinerary.
2453 def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VMULi16D]>;
2454 def :ItinRW<[A9WriteV7, A9Read2, A9Read2], [IIC_VMULi16Q]>;
2455 def :ItinRW<[A9Write2V7, A9Read2], [IIC_VMULi32D]>;
2456 def :ItinRW<[A9Write2V9, A9Read2], [IIC_VMULi32Q]>;
2457 def :ItinRW<[A9WriteV6, A9Read3, A9Read2, A9Read2], [IIC_VMACi16D]>;
2458 def :ItinRW<[A9WriteV7, A9Read3, A9Read2, A9Read2], [IIC_VMACi16Q]>;
2459 def :ItinRW<[A9Write2V7, A9Read3, A9Read2], [IIC_VMACi32D]>;
2460 def :ItinRW<[A9Write2V9, A9Read3, A9Read2], [IIC_VMACi32Q]>;
2462 // NEON integer shift
2463 // TODO: Q,Q,Q shifts should actually reserve FP for 2 cycles.
2464 def :ItinRW<[A9WriteV3], [IIC_VSHLiD, IIC_VSHLiQ]>;
2465 def :ItinRW<[A9WriteV4], [IIC_VSHLi4D, IIC_VSHLi4Q]>;
2467 // NEON permute
2468 def :ItinRW<[A9WriteV2, A9WriteV2], [IIC_VPERMD, IIC_VPERMQ, IIC_VEXTD]>;
2469 def :ItinRW<[A9WriteV3, A9WriteV4, ReadDefault, A9Read2],
2470             [IIC_VPERMQ3, IIC_VEXTQ]>;
2471 def :ItinRW<[A9WriteV3, A9Read2], [IIC_VTB1]>;
2472 def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VTB2]>;
2473 def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3], [IIC_VTB3]>;
2474 def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3, A9Read3], [IIC_VTB4]>;
2475 def :ItinRW<[A9WriteV3, ReadDefault, A9Read2], [IIC_VTBX1]>;
2476 def :ItinRW<[A9WriteV3, ReadDefault, A9Read2, A9Read2], [IIC_VTBX2]>;
2477 def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3], [IIC_VTBX3]>;
2478 def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3, A9Read3],
2479             [IIC_VTBX4]>;
2481 // NEON floating-point
2482 def :ItinRW<[A9WriteV5, A9Read2, A9Read2], [IIC_VBIND]>;
2483 def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VBINQ]>;
2484 def :ItinRW<[A9WriteV5, A9Read2], [IIC_VUNAD, IIC_VFMULD]>;
2485 def :ItinRW<[A9WriteV6, A9Read2], [IIC_VUNAQ, IIC_VFMULQ]>;
2486 def :ItinRW<[A9WriteV9, A9Read3, A9Read2], [IIC_VMACD, IIC_VFMACD]>;
2487 def :ItinRW<[A9WriteV10, A9Read3, A9Read2], [IIC_VMACQ, IIC_VFMACQ]>;
2488 def :ItinRW<[A9WriteV9, A9Read2, A9Read2], [IIC_VRECSD]>;
2489 def :ItinRW<[A9WriteV10, A9Read2, A9Read2], [IIC_VRECSQ]>;
2491 // Map SchedRWs that are identical for cortexa9 to existing resources.
2492 def : SchedAlias<WriteALU, A9WriteALU>;
2493 def : SchedAlias<WriteALUsr, A9WriteALUsr>;
2494 def : SchedAlias<WriteALUSsr, A9WriteALUsr>;
2495 def : SchedAlias<ReadALU, A9ReadALU>;
2496 def : SchedAlias<ReadALUsr, A9ReadALU>;
2497 def : SchedAlias<WriteST, A9WriteS>;
2499 // ===---------------------------------------------------------------------===//
2500 // Floating-point. Map target defined SchedReadWrite to processor specific ones
2502 def : WriteRes<WriteFPCVT, [A9UnitFP, A9UnitAGU]> { let Latency = 4; }
2503 def : SchedAlias<WriteFPMOV, A9WriteFMov>;
2505 def : SchedAlias<WriteFPALU32, A9WriteF>;
2506 def : SchedAlias<WriteFPALU64, A9WriteF>;
2508 def : SchedAlias<WriteFPMUL32, A9WriteFMulS>;
2509 def : SchedAlias<WriteFPMUL64, A9WriteFMulD>;
2511 def : SchedAlias<WriteFPMAC32, A9WriteFMAS>;
2512 def : SchedAlias<WriteFPMAC64, A9WriteFMAD>;
2514 def : SchedAlias<WriteFPDIV32, A9WriteFDivS>;
2515 def : SchedAlias<WriteFPDIV64, A9WriteFDivD>;
2516 def : SchedAlias<WriteFPSQRT32, A9WriteFSqrtS>;
2517 def : SchedAlias<WriteFPSQRT64, A9WriteFSqrtD>;
2519 def : ReadAdvance<ReadFPMUL, 0>;
2520 def : ReadAdvance<ReadFPMAC, 0>;
2522 // ===---------------------------------------------------------------------===//
2523 // Subtarget-specific overrides. Map opcodes to list of SchedReadWrite types.
2525 def : InstRW< [WriteALU],
2526       (instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr",
2527                  "BICrr")>;
2528 def : InstRW< [WriteALUsi], (instregex "ANDrsi", "ORRrsi", "EORrsi", "BICrsi")>;
2529 def : InstRW< [WriteALUsr], (instregex "ANDrsr", "ORRrsr", "EORrsr", "BICrsr")>;
2532 def : SchedAlias<WriteCMP, A9WriteALU>;
2533 def : SchedAlias<WriteCMPsi, A9WriteALU>;
2534 def : SchedAlias<WriteCMPsr, A9WriteALU>;
2536 def : InstRW< [A9WriteIsr], (instregex "MOVsr", "MOVsi", "MVNsr", "MOVCCsi",
2537                                        "MOVCCsr")>;
2538 def : InstRW< [WriteALU, A9ReadALU], (instregex "MVNr")>;
2539 def : InstRW< [A9WriteI2], (instregex "MOVCCi32imm", "MOVi32imm")>;
2540 def : InstRW< [A9WriteI2pc], (instregex "MOV_ga_pcrel")>;
2541 def : InstRW< [A9WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>;
2543 def : InstRW< [WriteALU], (instregex "SEL")>;
2545 def : InstRW< [WriteALUsi], (instregex "BFC", "BFI", "UBFX", "SBFX")>;
2547 def : InstRW< [A9WriteM],
2548       (instregex "MUL", "MULv5", "SMMUL", "SMMULR", "MLA", "MLAv5", "MLS",
2549       "SMMLA", "SMMLAR", "SMMLS", "SMMLSR")>;
2550 def : InstRW< [A9WriteM, A9WriteMHi],
2551       (instregex "SMULL", "SMULLv5", "UMULL", "UMULLv5", "SMLAL$", "UMLAL",
2552       "UMAAL", "SMLALv5", "UMLALv5", "SMLALBB", "SMLALBT", "SMLALTB",
2553       "SMLALTT")>;
2554 // FIXME: These instructions used to have NoItinerary. Just copied the one from above.
2555 def : InstRW< [A9WriteM, A9WriteMHi],
2556       (instregex "SMLAD", "SMLADX", "SMLALD", "SMLALDX", "SMLSD", "SMLSDX",
2557       "SMLSLD", "SMLSLDX", "SMUAD", "SMUADX", "SMUSD", "SMUSDX")>;
2559 def : InstRW<[A9WriteM16, A9WriteM16Hi],
2560       (instregex "SMULBB", "SMULBT", "SMULTB", "SMULTT", "SMULWB", "SMULWT")>;
2561 def : InstRW<[A9WriteM16, A9WriteM16Hi],
2562       (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLAWB", "SMLAWT")>;
2564 def : InstRW<[A9WriteL], (instregex "LDRi12", "PICLDR$")>;
2565 def : InstRW<[A9WriteLsi], (instregex "LDRrs")>;
2566 def : InstRW<[A9WriteLb],
2567       (instregex "LDRBi12", "PICLDRH", "PICLDRB", "PICLDRSH", "PICLDRSB",
2568       "LDRH", "LDRSH", "LDRSB")>;
2569 def : InstRW<[A9WriteLbsi], (instregex "LDRrs")>;
2571 def : WriteRes<WriteDIV, []> { let Latency = 0; }
2573 def : WriteRes<WriteBr, [A9UnitB]>;
2574 def : WriteRes<WriteBrL, [A9UnitB]>;
2575 def : WriteRes<WriteBrTbl, [A9UnitB]>;
2576 def : WriteRes<WritePreLd, []>;
2577 def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
2578 } // SchedModel = CortexA9Model