lib/Target/ARM/ARMScheduleA8.td

   1 //=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines the itinerary class data for the ARM Cortex A8 processors.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 //
  14 // Scheduling information derived from "Cortex-A8 Technical Reference Manual".
  15 // Functional Units.
  16 def A8_Pipe0   : FuncUnit; // pipeline 0
  17 def A8_Pipe1   : FuncUnit; // pipeline 1
  18 def A8_LSPipe  : FuncUnit; // Load / store pipeline
  19 def A8_NPipe   : FuncUnit; // NEON ALU/MUL pipe
  20 def A8_NLSPipe : FuncUnit; // NEON LS pipe
  21 //
  22 // Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1
  23 //
  24 def CortexA8Itineraries : ProcessorItineraries<
  25   [A8_Pipe0, A8_Pipe1, A8_LSPipe, A8_NPipe, A8_NLSPipe],
  26   [], [
  27   // Two fully-pipelined integer ALU pipelines
  28   //
  29   // No operand cycles
  30   InstrItinData<IIC_iALUx    , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
  31   //
  32   // Binary Instructions that produce a result
  33   InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
  34   InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
  35   InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
  36   InstrItinData<IIC_iALUsir,[InstrStage<1,[A8_Pipe0, A8_Pipe1]>], [2, 1, 2]>,
  37   InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
  38   //
  39   // Bitwise Instructions that produce a result
  40   InstrItinData<IIC_iBITi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
  41   InstrItinData<IIC_iBITr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
  42   InstrItinData<IIC_iBITsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
  43   InstrItinData<IIC_iBITsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
  44   //
  45   // Unary Instructions that produce a result
  46   InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
  47   InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
  48   //
  49   // Zero and sign extension instructions
  50   InstrItinData<IIC_iEXTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
  51   InstrItinData<IIC_iEXTAr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
  52   InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>],[2, 2, 1, 1]>,
  53   //
  54   // Compare instructions
  55   InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
  56   InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
  57   InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
  58   InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
  59   //
  60   // Test instructions
  61   InstrItinData<IIC_iTSTi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
  62   InstrItinData<IIC_iTSTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
  63   InstrItinData<IIC_iTSTsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
  64   InstrItinData<IIC_iTSTsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
  65   //
  66   // Move instructions, unconditional
  67   InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
  68   InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
  69   InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
  70   InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
  71   InstrItinData<IIC_iMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
  72                              InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
  73   InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
  74                                   InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
  75                                   InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3]>,
  76   InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
  77                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
  78                                InstrStage<1, [A8_LSPipe]>], [5]>,
  79   //
  80   // Move instructions, conditional
  81   InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
  82   InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
  83   InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
  84   InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
  85   InstrItinData<IIC_iCMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
  86                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3, 1]>,
  87   //
  88   // MVN instructions
  89   InstrItinData<IIC_iMVNi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
  90   InstrItinData<IIC_iMVNr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
  91   InstrItinData<IIC_iMVNsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
  92   InstrItinData<IIC_iMVNsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
  93
  94   // Integer multiply pipeline
  95   // Result written in E5, but that is relative to the last cycle of multicycle,
  96   // so we use 6 for those cases
  97   //
  98   InstrItinData<IIC_iMUL16   , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>,
  99   InstrItinData<IIC_iMAC16   , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
 100   InstrItinData<IIC_iMUL32   , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>,
 101   InstrItinData<IIC_iMAC32   , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
 102   InstrItinData<IIC_iMUL64   , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
 103   InstrItinData<IIC_iMAC64   , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
 104
 105   // Integer load pipeline
 106   //
 107   // Immediate offset
 108   InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 109                                  InstrStage<1, [A8_LSPipe]>], [3, 1]>,
 110   InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 111                                  InstrStage<1, [A8_LSPipe]>], [3, 1]>,
 112   InstrItinData<IIC_iLoad_d_i,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 113                                  InstrStage<1, [A8_LSPipe]>], [3, 1]>,
 114   //
 115   // Register offset
 116   InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 117                                  InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
 118   InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 119                                  InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
 120   InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 121                                  InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
 122   //
 123   // Scaled register offset, issues over 2 cycles
 124   // FIXME: lsl by 2 takes 1 cycle.
 125   InstrItinData<IIC_iLoad_si  , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
 126                                  InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
 127   InstrItinData<IIC_iLoad_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
 128                                  InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
 129   //
 130   // Immediate offset with update
 131   InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 132                                  InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
 133   InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 134                                  InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
 135   //
 136   // Register offset with update
 137   InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 138                                  InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
 139   InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 140                                  InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
 141   InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 142                                  InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
 143   //
 144   // Scaled register offset with update, issues over 2 cycles
 145   InstrItinData<IIC_iLoad_siu , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
 146                                  InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
 147   InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
 148                                   InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
 149   //
 150   // Load multiple, def is the 5th operand. Pipeline 0 only.
 151   // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
 152   InstrItinData<IIC_iLoad_m  , [InstrStage<2, [A8_Pipe0], 0>,
 153                                 InstrStage<2, [A8_LSPipe]>],
 154                 [1, 1, 1, 1, 3], [], -1>, // dynamic uops
 155   //
 156   // Load multiple + update, defs are the 1st and 5th operands.
 157   InstrItinData<IIC_iLoad_mu , [InstrStage<3, [A8_Pipe0], 0>,
 158                                 InstrStage<3, [A8_LSPipe]>],
 159                 [2, 1, 1, 1, 3], [], -1>, // dynamic uops
 160   //
 161   // Load multiple plus branch
 162   InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [A8_Pipe0], 0>,
 163                                 InstrStage<3, [A8_LSPipe]>,
 164                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
 165                               [1, 2, 1, 1, 3], [], -1>, // dynamic uops
 166   //
 167   // Pop, def is the 3rd operand.
 168   InstrItinData<IIC_iPop  ,    [InstrStage<3, [A8_Pipe0], 0>,
 169                                 InstrStage<3, [A8_LSPipe]>],
 170                 [1, 1, 3], [], -1>, // dynamic uops
 171   //
 172   // Push, def is the 3th operand.
 173   InstrItinData<IIC_iPop_Br,   [InstrStage<3, [A8_Pipe0], 0>,
 174                                 InstrStage<3, [A8_LSPipe]>,
 175                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
 176                                [1, 1, 3], [], -1>, // dynamic uops
 177   //
 178   // iLoadi + iALUr for t2LDRpci_pic.
 179   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 180                                 InstrStage<1, [A8_LSPipe]>,
 181                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [4, 1]>,
 182
 183
 184   // Integer store pipeline
 185   //
 186   // Immediate offset
 187   InstrItinData<IIC_iStore_i  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 188                                  InstrStage<1, [A8_LSPipe]>], [3, 1]>,
 189   InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 190                                  InstrStage<1, [A8_LSPipe]>], [3, 1]>,
 191   InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 192                                  InstrStage<1, [A8_LSPipe]>], [3, 1]>,
 193   //
 194   // Register offset
 195   InstrItinData<IIC_iStore_r  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 196                                  InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
 197   InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 198                                  InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
 199   InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 200                                  InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
 201   //
 202   // Scaled register offset, issues over 2 cycles
 203   InstrItinData<IIC_iStore_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
 204                                  InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
 205   InstrItinData<IIC_iStore_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
 206                                   InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
 207   //
 208   // Immediate offset with update
 209   InstrItinData<IIC_iStore_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 210                                  InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
 211   InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 212                                  InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
 213   //
 214   // Register offset with update
 215   InstrItinData<IIC_iStore_ru  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 216                                   InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
 217   InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 218                                   InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
 219   InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 220                                   InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
 221   //
 222   // Scaled register offset with update, issues over 2 cycles
 223   InstrItinData<IIC_iStore_siu, [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
 224                                  InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
 225   InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
 226                                    InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
 227   //
 228   // Store multiple. Pipeline 0 only.
 229   // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
 230   InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Pipe0], 0>,
 231                                 InstrStage<2, [A8_LSPipe]>],
 232                 [], [], -1>, // dynamic uops
 233   //
 234   // Store multiple + update
 235   InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>,
 236                                 InstrStage<2, [A8_LSPipe]>],
 237                 [2], [], -1>, // dynamic uops
 238   //
 239   // Preload
 240   InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
 241
 242   // Branch
 243   //
 244   // no delay slots, so the latency of a branch is unimportant
 245   InstrItinData<IIC_Br      , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
 246
 247   // VFP
 248   // Issue through integer pipeline, and execute in NEON unit. We assume
 249   // RunFast mode so that NFP pipeline is used for single-precision when
 250   // possible.
 251   //
 252   // FP Special Register to Integer Register File Move
 253   InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 254                               InstrStage<1, [A8_NLSPipe]>], [20]>,
 255   //
 256   // Single-precision FP Unary
 257   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 258                                InstrStage<1, [A8_NPipe]>], [7, 1]>,
 259   //
 260   // Double-precision FP Unary
 261   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 262                                InstrStage<4, [A8_NPipe], 0>,
 263                                InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
 264   //
 265   // Single-precision FP Compare
 266   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 267                                InstrStage<1, [A8_NPipe]>], [1, 1]>,
 268   //
 269   // Double-precision FP Compare
 270   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 271                                InstrStage<4, [A8_NPipe], 0>,
 272                                InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
 273   //
 274   // Single to Double FP Convert
 275   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 276                                InstrStage<7, [A8_NPipe], 0>,
 277                                InstrStage<7, [A8_NLSPipe]>], [7, 1]>,
 278   //
 279   // Double to Single FP Convert
 280   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 281                                InstrStage<5, [A8_NPipe], 0>,
 282                                InstrStage<5, [A8_NLSPipe]>], [5, 1]>,
 283   //
 284   // Single-Precision FP to Integer Convert
 285   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 286                                InstrStage<1, [A8_NPipe]>], [7, 1]>,
 287   //
 288   // Double-Precision FP to Integer Convert
 289   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 290                                InstrStage<8, [A8_NPipe], 0>,
 291                                InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
 292   //
 293   // Integer to Single-Precision FP Convert
 294   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 295                                InstrStage<1, [A8_NPipe]>], [7, 1]>,
 296   //
 297   // Integer to Double-Precision FP Convert
 298   InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 299                                InstrStage<8, [A8_NPipe], 0>,
 300                                InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
 301   //
 302   // Single-precision FP ALU
 303   InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 304                                InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
 305   //
 306   // Double-precision FP ALU
 307   InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 308                                InstrStage<9, [A8_NPipe], 0>,
 309                                InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>,
 310   //
 311   // Single-precision FP Multiply
 312   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 313                                InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
 314   //
 315   // Double-precision FP Multiply
 316   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 317                                InstrStage<11, [A8_NPipe], 0>,
 318                                InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>,
 319   //
 320   // Single-precision FP MAC
 321   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 322                                InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
 323   //
 324   // Double-precision FP MAC
 325   InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 326                                InstrStage<19, [A8_NPipe], 0>,
 327                                InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
 328   //
 329   // Single-precision Fused FP MAC
 330   InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 331                                InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
 332   //
 333   // Double-precision Fused FP MAC
 334   InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 335                                InstrStage<19, [A8_NPipe], 0>,
 336                                InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
 337   //
 338   // Single-precision FP DIV
 339   InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 340                                InstrStage<20, [A8_NPipe], 0>,
 341                                InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>,
 342   //
 343   // Double-precision FP DIV
 344   InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 345                                InstrStage<29, [A8_NPipe], 0>,
 346                                InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>,
 347   //
 348   // Single-precision FP SQRT
 349   InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 350                                InstrStage<19, [A8_NPipe], 0>,
 351                                InstrStage<19, [A8_NLSPipe]>], [19, 1]>,
 352   //
 353   // Double-precision FP SQRT
 354   InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 355                                InstrStage<29, [A8_NPipe], 0>,
 356                                InstrStage<29, [A8_NLSPipe]>], [29, 1]>,
 357
 358   //
 359   // Integer to Single-precision Move
 360   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 361                                InstrStage<1, [A8_NPipe]>],
 362                               [2, 1]>,
 363   //
 364   // Integer to Double-precision Move
 365   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 366                                InstrStage<1, [A8_NPipe]>],
 367                               [2, 1, 1]>,
 368   //
 369   // Single-precision to Integer Move
 370   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 371                                InstrStage<1, [A8_NPipe]>],
 372                               [20, 1]>,
 373   //
 374   // Double-precision to Integer Move
 375   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 376                                InstrStage<1, [A8_NPipe]>],
 377                               [20, 20, 1]>,
 378
 379   //
 380   // Single-precision FP Load
 381   InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 382                                InstrStage<1, [A8_NLSPipe], 0>,
 383                                InstrStage<1, [A8_LSPipe]>],
 384                               [2, 1]>,
 385   //
 386   // Double-precision FP Load
 387   InstrItinData<IIC_fpLoad64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 388                                InstrStage<1, [A8_NLSPipe], 0>,
 389                                InstrStage<1, [A8_LSPipe]>],
 390                               [2, 1]>,
 391   //
 392   // FP Load Multiple
 393   // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
 394   InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 395                                InstrStage<1, [A8_NLSPipe], 0>,
 396                                InstrStage<1, [A8_LSPipe]>,
 397                                InstrStage<1, [A8_NLSPipe], 0>,
 398                                InstrStage<1, [A8_LSPipe]>],
 399                 [1, 1, 1, 2], [], -1>, // dynamic uops
 400   //
 401   // FP Load Multiple + update
 402   InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 403                                InstrStage<1, [A8_NLSPipe], 0>,
 404                                InstrStage<1, [A8_LSPipe]>,
 405                                InstrStage<1, [A8_NLSPipe], 0>,
 406                                InstrStage<1, [A8_LSPipe]>],
 407                 [2, 1, 1, 1, 2], [], -1>, // dynamic uops
 408   //
 409   // Single-precision FP Store
 410   InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 411                                InstrStage<1, [A8_NLSPipe], 0>,
 412                                InstrStage<1, [A8_LSPipe]>],
 413                               [1, 1]>,
 414   //
 415   // Double-precision FP Store
 416   InstrItinData<IIC_fpStore64,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 417                                InstrStage<1, [A8_NLSPipe], 0>,
 418                                InstrStage<1, [A8_LSPipe]>],
 419                               [1, 1]>,
 420   //
 421   // FP Store Multiple
 422   InstrItinData<IIC_fpStore_m,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 423                                InstrStage<1, [A8_NLSPipe], 0>,
 424                                InstrStage<1, [A8_LSPipe]>,
 425                                InstrStage<1, [A8_NLSPipe], 0>,
 426                                InstrStage<1, [A8_LSPipe]>],
 427                 [1, 1, 1, 1], [], -1>, // dynamic uops
 428   //
 429   // FP Store Multiple + update
 430   InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 431                                 InstrStage<1, [A8_NLSPipe], 0>,
 432                                 InstrStage<1, [A8_LSPipe]>,
 433                                 InstrStage<1, [A8_NLSPipe], 0>,
 434                                 InstrStage<1, [A8_LSPipe]>],
 435                 [2, 1, 1, 1, 1], [], -1>, // dynamic uops
 436   // NEON
 437   // Issue through integer pipeline, and execute in NEON unit.
 438   //
 439   // VLD1
 440   InstrItinData<IIC_VLD1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 441                                InstrStage<2, [A8_NLSPipe], 0>,
 442                                InstrStage<2, [A8_LSPipe]>],
 443                               [2, 1]>,
 444   // VLD1x2
 445   InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 446                                InstrStage<2, [A8_NLSPipe], 0>,
 447                                InstrStage<2, [A8_LSPipe]>],
 448                               [2, 2, 1]>,
 449   //
 450   // VLD1x3
 451   InstrItinData<IIC_VLD1x3,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 452                                InstrStage<3, [A8_NLSPipe], 0>,
 453                                InstrStage<3, [A8_LSPipe]>],
 454                               [2, 2, 3, 1]>,
 455   //
 456   // VLD1x4
 457   InstrItinData<IIC_VLD1x4,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 458                                InstrStage<3, [A8_NLSPipe], 0>,
 459                                InstrStage<3, [A8_LSPipe]>],
 460                               [2, 2, 3, 3, 1]>,
 461   //
 462   // VLD1u
 463   InstrItinData<IIC_VLD1u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 464                                InstrStage<2, [A8_NLSPipe], 0>,
 465                                InstrStage<2, [A8_LSPipe]>],
 466                               [2, 2, 1]>,
 467   //
 468   // VLD1x2u
 469   InstrItinData<IIC_VLD1x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 470                                InstrStage<2, [A8_NLSPipe], 0>,
 471                                InstrStage<2, [A8_LSPipe]>],
 472                               [2, 2, 2, 1]>,
 473   //
 474   // VLD1x3u
 475   InstrItinData<IIC_VLD1x3u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 476                                InstrStage<3, [A8_NLSPipe], 0>,
 477                                InstrStage<3, [A8_LSPipe]>],
 478                               [2, 2, 3, 2, 1]>,
 479   //
 480   // VLD1x4u
 481   InstrItinData<IIC_VLD1x4u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 482                                InstrStage<3, [A8_NLSPipe], 0>,
 483                                InstrStage<3, [A8_LSPipe]>],
 484                               [2, 2, 3, 3, 2, 1]>,
 485   //
 486   // VLD1ln
 487   InstrItinData<IIC_VLD1ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
 488                                InstrStage<3, [A8_NLSPipe], 0>,
 489                                InstrStage<3, [A8_LSPipe]>],
 490                               [3, 1, 1, 1]>,
 491   //
 492   // VLD1lnu
 493   InstrItinData<IIC_VLD1lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
 494                                InstrStage<3, [A8_NLSPipe], 0>,
 495                                InstrStage<3, [A8_LSPipe]>],
 496                               [3, 2, 1, 1, 1, 1]>,
 497   //
 498   // VLD1dup
 499   InstrItinData<IIC_VLD1dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
 500                                InstrStage<2, [A8_NLSPipe], 0>,
 501                                InstrStage<2, [A8_LSPipe]>],
 502                               [2, 1]>,
 503   //
 504   // VLD1dupu
 505   InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
 506                                InstrStage<2, [A8_NLSPipe], 0>,
 507                                InstrStage<2, [A8_LSPipe]>],
 508                               [2, 2, 1, 1]>,
 509   //
 510   // VLD2
 511   InstrItinData<IIC_VLD2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 512                                InstrStage<2, [A8_NLSPipe], 0>,
 513                                InstrStage<2, [A8_LSPipe]>],
 514                               [2, 2, 1]>,
 515   //
 516   // VLD2x2
 517   InstrItinData<IIC_VLD2x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 518                                InstrStage<3, [A8_NLSPipe], 0>,
 519                                InstrStage<3, [A8_LSPipe]>],
 520                               [2, 2, 3, 3, 1]>,
 521   //
 522   // VLD2ln
 523   InstrItinData<IIC_VLD2ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 524                                InstrStage<3, [A8_NLSPipe], 0>,
 525                                InstrStage<3, [A8_LSPipe]>],
 526                               [3, 3, 1, 1, 1, 1]>,
 527   //
 528   // VLD2u
 529   InstrItinData<IIC_VLD2u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 530                                InstrStage<2, [A8_NLSPipe], 0>,
 531                                InstrStage<2, [A8_LSPipe]>],
 532                               [2, 2, 2, 1, 1, 1]>,
 533   //
 534   // VLD2x2u
 535   InstrItinData<IIC_VLD2x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 536                                InstrStage<3, [A8_NLSPipe], 0>,
 537                                InstrStage<3, [A8_LSPipe]>],
 538                               [2, 2, 3, 3, 2, 1]>,
 539   //
 540   // VLD2lnu
 541   InstrItinData<IIC_VLD2lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 542                                InstrStage<3, [A8_NLSPipe], 0>,
 543                                InstrStage<3, [A8_LSPipe]>],
 544                               [3, 3, 2, 1, 1, 1, 1, 1]>,
 545   //
 546   // VLD2dup
 547   InstrItinData<IIC_VLD2dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
 548                                InstrStage<2, [A8_NLSPipe], 0>,
 549                                InstrStage<2, [A8_LSPipe]>],
 550                               [2, 2, 1]>,
 551   //
 552   // VLD2dupu
 553   InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
 554                                InstrStage<2, [A8_NLSPipe], 0>,
 555                                InstrStage<2, [A8_LSPipe]>],
 556                               [2, 2, 2, 1, 1]>,
 557   //
 558   // VLD3
 559   InstrItinData<IIC_VLD3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 560                                InstrStage<4, [A8_NLSPipe], 0>,
 561                                InstrStage<4, [A8_LSPipe]>],
 562                               [3, 3, 4, 1]>,
 563   //
 564   // VLD3ln
 565   InstrItinData<IIC_VLD3ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 566                                InstrStage<5, [A8_NLSPipe], 0>,
 567                                InstrStage<5, [A8_LSPipe]>],
 568                               [4, 4, 5, 1, 1, 1, 1, 2]>,
 569   //
 570   // VLD3u
 571   InstrItinData<IIC_VLD3u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 572                                InstrStage<4, [A8_NLSPipe], 0>,
 573                                InstrStage<4, [A8_LSPipe]>],
 574                               [3, 3, 4, 2, 1]>,
 575   //
 576   // VLD3lnu
 577   InstrItinData<IIC_VLD3lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 578                                InstrStage<5, [A8_NLSPipe], 0>,
 579                                InstrStage<5, [A8_LSPipe]>],
 580                               [4, 4, 5, 2, 1, 1, 1, 1, 1, 2]>,
 581   //
 582   // VLD3dup
 583   InstrItinData<IIC_VLD3dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
 584                                InstrStage<3, [A8_NLSPipe], 0>,
 585                                InstrStage<3, [A8_LSPipe]>],
 586                               [2, 2, 3, 1]>,
 587   //
 588   // VLD3dupu
 589   InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
 590                                InstrStage<3, [A8_NLSPipe], 0>,
 591                                InstrStage<3, [A8_LSPipe]>],
 592                               [2, 2, 3, 2, 1, 1]>,
 593   //
 594   // VLD4
 595   InstrItinData<IIC_VLD4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 596                                InstrStage<4, [A8_NLSPipe], 0>,
 597                                InstrStage<4, [A8_LSPipe]>],
 598                               [3, 3, 4, 4, 1]>,
 599   //
 600   // VLD4ln
 601   InstrItinData<IIC_VLD4ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 602                                InstrStage<5, [A8_NLSPipe], 0>,
 603                                InstrStage<5, [A8_LSPipe]>],
 604                               [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
 605   //
 606   // VLD4u
 607   InstrItinData<IIC_VLD4u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 608                                InstrStage<4, [A8_NLSPipe], 0>,
 609                                InstrStage<4, [A8_LSPipe]>],
 610                               [3, 3, 4, 4, 2, 1]>,
 611   //
 612   // VLD4lnu
 613   InstrItinData<IIC_VLD4lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 614                                InstrStage<5, [A8_NLSPipe], 0>,
 615                                InstrStage<5, [A8_LSPipe]>],
 616                               [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
 617   //
 618   // VLD4dup
 619   InstrItinData<IIC_VLD4dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
 620                                InstrStage<3, [A8_NLSPipe], 0>,
 621                                InstrStage<3, [A8_LSPipe]>],
 622                               [2, 2, 3, 3, 1]>,
 623   //
 624   // VLD4dupu
 625   InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
 626                                InstrStage<3, [A8_NLSPipe], 0>,
 627                                InstrStage<3, [A8_LSPipe]>],
 628                               [2, 2, 3, 3, 2, 1, 1]>,
 629   //
 630   // VST1
 631   InstrItinData<IIC_VST1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 632                                InstrStage<2, [A8_NLSPipe], 0>,
 633                                InstrStage<2, [A8_LSPipe]>],
 634                               [1, 1, 1]>,
 635   //
 636   // VST1x2
 637   InstrItinData<IIC_VST1x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 638                                InstrStage<2, [A8_NLSPipe], 0>,
 639                                InstrStage<2, [A8_LSPipe]>],
 640                               [1, 1, 1, 1]>,
 641   //
 642   // VST1x3
 643   InstrItinData<IIC_VST1x3,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 644                                InstrStage<3, [A8_NLSPipe], 0>,
 645                                InstrStage<3, [A8_LSPipe]>],
 646                               [1, 1, 1, 1, 2]>,
 647   //
 648   // VST1x4
 649   InstrItinData<IIC_VST1x4,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 650                                InstrStage<3, [A8_NLSPipe], 0>,
 651                                InstrStage<3, [A8_LSPipe]>],
 652                               [1, 1, 1, 1, 2, 2]>,
 653   //
 654   // VST1u
 655   InstrItinData<IIC_VST1u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 656                                InstrStage<2, [A8_NLSPipe], 0>,
 657                                InstrStage<2, [A8_LSPipe]>],
 658                               [2, 1, 1, 1, 1]>,
 659   //
 660   // VST1x2u
 661   InstrItinData<IIC_VST1x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 662                                InstrStage<2, [A8_NLSPipe], 0>,
 663                                InstrStage<2, [A8_LSPipe]>],
 664                               [2, 1, 1, 1, 1, 1]>,
 665   //
 666   // VST1x3u
 667   InstrItinData<IIC_VST1x3u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 668                                InstrStage<3, [A8_NLSPipe], 0>,
 669                                InstrStage<3, [A8_LSPipe]>],
 670                               [2, 1, 1, 1, 1, 1, 2]>,
 671   //
 672   // VST1x4u
 673   InstrItinData<IIC_VST1x4u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 674                                InstrStage<3, [A8_NLSPipe], 0>,
 675                                InstrStage<3, [A8_LSPipe]>],
 676                               [2, 1, 1, 1, 1, 1, 2, 2]>,
 677   //
 678   // VST1ln
 679   InstrItinData<IIC_VST1ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
 680                                InstrStage<2, [A8_NLSPipe], 0>,
 681                                InstrStage<2, [A8_LSPipe]>],
 682                               [1, 1, 1]>,
 683   //
 684   // VST1lnu
 685   InstrItinData<IIC_VST1lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
 686                                InstrStage<2, [A8_NLSPipe], 0>,
 687                                InstrStage<2, [A8_LSPipe]>],
 688                               [2, 1, 1, 1, 1]>,
 689   //
 690   // VST2
 691   InstrItinData<IIC_VST2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 692                                InstrStage<2, [A8_NLSPipe], 0>,
 693                                InstrStage<2, [A8_LSPipe]>],
 694                               [1, 1, 1, 1]>,
 695   //
 696   // VST2x2
 697   InstrItinData<IIC_VST2x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 698                                InstrStage<4, [A8_NLSPipe], 0>,
 699                                InstrStage<4, [A8_LSPipe]>],
 700                               [1, 1, 1, 1, 2, 2]>,
 701   //
 702   // VST2u
 703   InstrItinData<IIC_VST2u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 704                                InstrStage<2, [A8_NLSPipe], 0>,
 705                                InstrStage<2, [A8_LSPipe]>],
 706                               [2, 1, 1, 1, 1, 1]>,
 707   //
 708   // VST2x2u
 709   InstrItinData<IIC_VST2x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 710                                InstrStage<4, [A8_NLSPipe], 0>,
 711                                InstrStage<4, [A8_LSPipe]>],
 712                               [2, 1, 1, 1, 1, 1, 2, 2]>,
 713   //
 714   // VST2ln
 715   InstrItinData<IIC_VST2ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 716                                InstrStage<2, [A8_NLSPipe], 0>,
 717                                InstrStage<2, [A8_LSPipe]>],
 718                               [1, 1, 1, 1]>,
 719   //
 720   // VST2lnu
 721   InstrItinData<IIC_VST2lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 722                                InstrStage<2, [A8_NLSPipe], 0>,
 723                                InstrStage<2, [A8_LSPipe]>],
 724                               [2, 1, 1, 1, 1, 1]>,
 725   //
 726   // VST3
 727   InstrItinData<IIC_VST3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 728                                InstrStage<3, [A8_NLSPipe], 0>,
 729                                InstrStage<3, [A8_LSPipe]>],
 730                               [1, 1, 1, 1, 2]>,
 731   //
 732   // VST3u
 733   InstrItinData<IIC_VST3u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 734                                InstrStage<3, [A8_NLSPipe], 0>,
 735                                InstrStage<3, [A8_LSPipe]>],
 736                               [2, 1, 1, 1, 1, 1, 2]>,
 737   //
 738   // VST3ln
 739   InstrItinData<IIC_VST3ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 740                                InstrStage<3, [A8_NLSPipe], 0>,
 741                                InstrStage<3, [A8_LSPipe]>],
 742                               [1, 1, 1, 1, 2]>,
 743   //
 744   // VST3lnu
 745   InstrItinData<IIC_VST3lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 746                                InstrStage<3, [A8_NLSPipe], 0>,
 747                                InstrStage<3, [A8_LSPipe]>],
 748                               [2, 1, 1, 1, 1, 1, 2]>,
 749   //
 750   // VST4
 751   InstrItinData<IIC_VST4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 752                                InstrStage<4, [A8_NLSPipe], 0>,
 753                                InstrStage<4, [A8_LSPipe]>],
 754                               [1, 1, 1, 1, 2, 2]>,
 755   //
 756   // VST4u
 757   InstrItinData<IIC_VST4u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 758                                InstrStage<4, [A8_NLSPipe], 0>,
 759                                InstrStage<4, [A8_LSPipe]>],
 760                               [2, 1, 1, 1, 1, 1, 2, 2]>,
 761   //
 762   // VST4ln
 763   InstrItinData<IIC_VST4ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 764                                InstrStage<4, [A8_NLSPipe], 0>,
 765                                InstrStage<4, [A8_LSPipe]>],
 766                               [1, 1, 1, 1, 2, 2]>,
 767   //
 768   // VST4lnu
 769   InstrItinData<IIC_VST4lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 770                                InstrStage<4, [A8_NLSPipe], 0>,
 771                                InstrStage<4, [A8_LSPipe]>],
 772                               [2, 1, 1, 1, 1, 1, 2, 2]>,
 773   //
 774   // Double-register FP Unary
 775   InstrItinData<IIC_VUNAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 776                                InstrStage<1, [A8_NPipe]>], [5, 2]>,
 777   //
 778   // Quad-register FP Unary
 779   // Result written in N5, but that is relative to the last cycle of multicycle,
 780   // so we use 6 for those cases
 781   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 782                                InstrStage<2, [A8_NPipe]>], [6, 2]>,
 783   //
 784   // Double-register FP Binary
 785   InstrItinData<IIC_VBIND,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 786                                InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
 787   //
 788   // VPADD, etc.
 789   InstrItinData<IIC_VPBIND,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 790                                InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
 791   //
 792   // Double-register FP VMUL
 793   InstrItinData<IIC_VFMULD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 794                                InstrStage<1, [A8_NPipe]>], [5, 2, 1]>,
 795
 796   //
 797   // Quad-register FP Binary
 798   // Result written in N5, but that is relative to the last cycle of multicycle,
 799   // so we use 6 for those cases
 800   InstrItinData<IIC_VBINQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 801                                InstrStage<2, [A8_NPipe]>], [6, 2, 2]>,
 802   //
 803   // Quad-register FP VMUL
 804   InstrItinData<IIC_VFMULQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 805                                InstrStage<1, [A8_NPipe]>], [6, 2, 1]>,
 806   //
 807   // Move
 808   InstrItinData<IIC_VMOV,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 809                                InstrStage<1, [A8_NPipe]>], [1, 1]>,
 810   //
 811   // Move Immediate
 812   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 813                                InstrStage<1, [A8_NPipe]>], [3]>,
 814   //
 815   // Double-register Permute Move
 816   InstrItinData<IIC_VMOVD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 817                                InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
 818   //
 819   // Quad-register Permute Move
 820   // Result written in N2, but that is relative to the last cycle of multicycle,
 821   // so we use 3 for those cases
 822   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 823                                InstrStage<2, [A8_NLSPipe]>], [3, 1]>,
 824   //
 825   // Integer to Single-precision Move
 826   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 827                                InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
 828   //
 829   // Integer to Double-precision Move
 830   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 831                                InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
 832   //
 833   // Single-precision to Integer Move
 834   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 835                                InstrStage<1, [A8_NLSPipe]>], [20, 1]>,
 836   //
 837   // Double-precision to Integer Move
 838   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 839                                InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>,
 840   //
 841   // Integer to Lane Move
 842   InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 843                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
 844   //
 845   // Vector narrow move
 846   InstrItinData<IIC_VMOVN   , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 847                                InstrStage<1, [A8_NPipe]>], [2, 1]>,
 848   //
 849   // Double-register Permute
 850   InstrItinData<IIC_VPERMD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 851                                InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>,
 852   //
 853   // Quad-register Permute
 854   // Result written in N2, but that is relative to the last cycle of multicycle,
 855   // so we use 3 for those cases
 856   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 857                                InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>,
 858   //
 859   // Quad-register Permute (3 cycle issue)
 860   // Result written in N2, but that is relative to the last cycle of multicycle,
 861   // so we use 4 for those cases
 862   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 863                                InstrStage<1, [A8_NLSPipe]>,
 864                                InstrStage<1, [A8_NPipe], 0>,
 865                                InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>,
 866   //
 867   // Double-register FP Multiple-Accumulate
 868   InstrItinData<IIC_VMACD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 869                                InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
 870   //
 871   // Quad-register FP Multiple-Accumulate
 872   // Result written in N9, but that is relative to the last cycle of multicycle,
 873   // so we use 10 for those cases
 874   InstrItinData<IIC_VMACQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 875                                InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
 876   //
 877   // Double-register Fused FP Multiple-Accumulate
 878   InstrItinData<IIC_VFMACD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 879                                InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
 880   //
 881   // Quad-register Fused FP Multiple-Accumulate
 882   // Result written in N9, but that is relative to the last cycle of multicycle,
 883   // so we use 10 for those cases
 884   InstrItinData<IIC_VFMACQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 885                                InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
 886   //
 887   // Double-register Reciprical Step
 888   InstrItinData<IIC_VRECSD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 889                                InstrStage<1, [A8_NPipe]>], [9, 2, 2]>,
 890   //
 891   // Quad-register Reciprical Step
 892   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 893                                InstrStage<2, [A8_NPipe]>], [10, 2, 2]>,
 894   //
 895   // Double-register Integer Count
 896   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 897                                InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
 898   //
 899   // Quad-register Integer Count
 900   // Result written in N3, but that is relative to the last cycle of multicycle,
 901   // so we use 4 for those cases
 902   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 903                                InstrStage<2, [A8_NPipe]>], [4, 2, 2]>,
 904   //
 905   // Double-register Integer Unary
 906   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 907                                InstrStage<1, [A8_NPipe]>], [4, 2]>,
 908   //
 909   // Quad-register Integer Unary
 910   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 911                                InstrStage<1, [A8_NPipe]>], [4, 2]>,
 912   //
 913   // Double-register Integer Q-Unary
 914   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 915                                InstrStage<1, [A8_NPipe]>], [4, 1]>,
 916   //
 917   // Quad-register Integer CountQ-Unary
 918   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 919                                InstrStage<1, [A8_NPipe]>], [4, 1]>,
 920   //
 921   // Double-register Integer Binary
 922   InstrItinData<IIC_VBINiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 923                                InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
 924   //
 925   // Quad-register Integer Binary
 926   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 927                                InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
 928   //
 929   // Double-register Integer Binary (4 cycle)
 930   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 931                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
 932   //
 933   // Quad-register Integer Binary (4 cycle)
 934   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 935                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
 936
 937   //
 938   // Double-register Integer Subtract
 939   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 940                                InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
 941   //
 942   // Quad-register Integer Subtract
 943   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 944                                InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
 945   //
 946   // Double-register Integer Subtract
 947   InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 948                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
 949   //
 950   // Quad-register Integer Subtract
 951   InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 952                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
 953   //
 954   // Double-register Integer Shift
 955   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 956                                InstrStage<1, [A8_NPipe]>], [3, 1, 1]>,
 957   //
 958   // Quad-register Integer Shift
 959   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 960                                InstrStage<2, [A8_NPipe]>], [4, 1, 1]>,
 961   //
 962   // Double-register Integer Shift (4 cycle)
 963   InstrItinData<IIC_VSHLi4D,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 964                                InstrStage<1, [A8_NPipe]>], [4, 1, 1]>,
 965   //
 966   // Quad-register Integer Shift (4 cycle)
 967   InstrItinData<IIC_VSHLi4Q,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 968                                InstrStage<2, [A8_NPipe]>], [5, 1, 1]>,
 969   //
 970   // Double-register Integer Pair Add Long
 971   InstrItinData<IIC_VPALiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 972                                InstrStage<1, [A8_NPipe]>], [6, 3, 1]>,
 973   //
 974   // Quad-register Integer Pair Add Long
 975   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 976                                InstrStage<2, [A8_NPipe]>], [7, 3, 1]>,
 977   //
 978   // Double-register Absolute Difference and Accumulate
 979   InstrItinData<IIC_VABAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 980                                InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>,
 981   //
 982   // Quad-register Absolute Difference and Accumulate
 983   InstrItinData<IIC_VABAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 984                                InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>,
 985
 986   //
 987   // Double-register Integer Multiply (.8, .16)
 988   InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 989                                InstrStage<1, [A8_NPipe]>], [6, 2, 2]>,
 990   //
 991   // Double-register Integer Multiply (.32)
 992   InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 993                                InstrStage<2, [A8_NPipe]>], [7, 2, 1]>,
 994   //
 995   // Quad-register Integer Multiply (.8, .16)
 996   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
 997                                InstrStage<2, [A8_NPipe]>], [7, 2, 2]>,
 998   //
 999   // Quad-register Integer Multiply (.32)
1000   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1001                                InstrStage<1, [A8_NPipe]>,
1002                                InstrStage<2, [A8_NLSPipe], 0>,
1003                                InstrStage<3, [A8_NPipe]>], [9, 2, 1]>,
1004   //
1005   // Double-register Integer Multiply-Accumulate (.8, .16)
1006   InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1007                                InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>,
1008   //
1009   // Double-register Integer Multiply-Accumulate (.32)
1010   InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1011                                InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>,
1012   //
1013   // Quad-register Integer Multiply-Accumulate (.8, .16)
1014   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1015                                InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>,
1016   //
1017   // Quad-register Integer Multiply-Accumulate (.32)
1018   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1019                                InstrStage<1, [A8_NPipe]>,
1020                                InstrStage<2, [A8_NLSPipe], 0>,
1021                                InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>,
1022   //
1023   // Double-register VEXT
1024   InstrItinData<IIC_VEXTD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1025                                InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
1026   //
1027   // Quad-register VEXT
1028   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1029                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
1030   //
1031   // VTB
1032   InstrItinData<IIC_VTB1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1033                                InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>,
1034   InstrItinData<IIC_VTB2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1035                                InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>,
1036   InstrItinData<IIC_VTB3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1037                                InstrStage<1, [A8_NLSPipe]>,
1038                                InstrStage<1, [A8_NPipe], 0>,
1039                                InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>,
1040   InstrItinData<IIC_VTB4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1041                                InstrStage<1, [A8_NLSPipe]>,
1042                                InstrStage<1, [A8_NPipe], 0>,
1043                                InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>,
1044   //
1045   // VTBX
1046   InstrItinData<IIC_VTBX1,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1047                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>,
1048   InstrItinData<IIC_VTBX2,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1049                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>,
1050   InstrItinData<IIC_VTBX3,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1051                                InstrStage<1, [A8_NLSPipe]>,
1052                                InstrStage<1, [A8_NPipe], 0>,
1053                                InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>,
1054   InstrItinData<IIC_VTBX4,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1055                                InstrStage<1, [A8_NLSPipe]>,
1056                                InstrStage<1, [A8_NPipe], 0>,
1057                             InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
1058 ]>;
1059
1060 // ===---------------------------------------------------------------------===//
1061 // This following definitions describe the simple machine model which
1062 // will replace itineraries.
1063
1064 // Cortex-A8 machine model for scheduling and other instruction cost heuristics.
1065 def CortexA8Model : SchedMachineModel {
1066   let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
1067   let LoadLatency = 2; // Optimistic load latency assuming bypass.
1068                        // This is overriden by OperandCycles if the
1069                        // Itineraries are queried instead.
1070   let MispredictPenalty = 13; // Based on estimate of pipeline depth.
1071   let CompleteModel = 0;
1072
1073   let Itineraries = CortexA8Itineraries;
1074 }