llvm/lib/Target/AArch64/AArch64SchedNeoverseN1.td

   1 //=- AArch64SchedNeoverseN1.td - NeoverseN1 Scheduling Model -*- tablegen -*-=//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines the scheduling model for the Arm Neoverse N1 processors.
  10 //
  11 // References:
  12 // - "Arm Neoverse N1 Software Optimization Guide"
  13 // - https://en.wikichip.org/wiki/arm_holdings/microarchitectures/neoverse_n1
  14 //
  15 //===----------------------------------------------------------------------===//
  16
  17 def NeoverseN1Model : SchedMachineModel {
  18   let IssueWidth            =   8; // Maximum micro-ops dispatch rate.
  19   let MicroOpBufferSize     = 128; // NOTE: Copied from Cortex-A76.
  20   let LoadLatency           =   4; // Optimistic load latency.
  21   let MispredictPenalty     =  11; // Cycles cost of branch mispredicted.
  22   let LoopMicroOpBufferSize =  16; // NOTE: Copied from Cortex-A57.
  23   let CompleteModel         =   1;
  24
  25   list<Predicate> UnsupportedFeatures = !listconcat(PAUnsupported.F,
  26                                                     SMEUnsupported.F,
  27                                                     SVEUnsupported.F,
  28                                                     [HasMTE, HasCSSC]);
  29 }
  30
  31 //===----------------------------------------------------------------------===//
  32 // Define each kind of processor resource and number available on Neoverse N1.
  33 // Instructions are first fetched and then decoded into internal macro-ops
  34 // (MOPs).  From there, the MOPs proceed through register renaming and dispatch
  35 // stages.  A MOP can be split into one or more micro-ops further down the
  36 // pipeline, after the decode stage.  Once dispatched, micro-ops wait for their
  37 // operands and issue out-of-order to one of the issue pipelines.  Each issue
  38 // pipeline can accept one micro-op per cycle.
  39
  40 let SchedModel = NeoverseN1Model in {
  41
  42 // Define the issue ports.
  43 def N1UnitB  : ProcResource<1>;  // Branch
  44 def N1UnitS  : ProcResource<2>;  // Integer single cycle 0/1
  45 def N1UnitM  : ProcResource<1>;  // Integer multicycle
  46 def N1UnitL  : ProcResource<2>;  // Load/Store 0/1
  47 def N1UnitD  : ProcResource<2>;  // Store data 0/1
  48 def N1UnitV0 : ProcResource<1>;  // FP/ASIMD 0
  49 def N1UnitV1 : ProcResource<1>;  // FP/ASIMD 1
  50
  51 def N1UnitI : ProcResGroup<[N1UnitS, N1UnitM]>;    // Integer units
  52 def N1UnitV : ProcResGroup<[N1UnitV0, N1UnitV1]>;  // FP/ASIMD units
  53
  54 // Define commonly used read types.
  55
  56 // No generic forwarding is provided for these types.
  57 def : ReadAdvance<ReadI,       0>;
  58 def : ReadAdvance<ReadISReg,   0>;
  59 def : ReadAdvance<ReadIEReg,   0>;
  60 def : ReadAdvance<ReadIM,      0>;
  61 def : ReadAdvance<ReadIMA,     0>;
  62 def : ReadAdvance<ReadID,      0>;
  63 def : ReadAdvance<ReadExtrHi,  0>;
  64 def : ReadAdvance<ReadAdrBase, 0>;
  65 def : ReadAdvance<ReadST,      0>;
  66 def : ReadAdvance<ReadVLD,     0>;
  67
  68 def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
  69 def : WriteRes<WriteBarrier, []> { let Latency = 1; }
  70 def : WriteRes<WriteHint,    []> { let Latency = 1; }
  71
  72
  73 //===----------------------------------------------------------------------===//
  74 // Define generic 0 micro-op types
  75
  76 let Latency = 0, NumMicroOps = 0 in
  77 def N1Write_0c_0Z : SchedWriteRes<[]>;
  78
  79 //===----------------------------------------------------------------------===//
  80 // Define generic 1 micro-op types
  81
  82 def N1Write_1c_1B     : SchedWriteRes<[N1UnitB]>  { let Latency = 1; }
  83 def N1Write_1c_1I     : SchedWriteRes<[N1UnitI]>  { let Latency = 1; }
  84 def N1Write_2c_1M     : SchedWriteRes<[N1UnitM]>  { let Latency = 2; }
  85 def N1Write_3c_1M     : SchedWriteRes<[N1UnitM]>  { let Latency = 3; }
  86 def N1Write_4c3_1M    : SchedWriteRes<[N1UnitM]>  { let Latency = 4;
  87                                                     let ReleaseAtCycles = [3]; }
  88 def N1Write_5c3_1M    : SchedWriteRes<[N1UnitM]>  { let Latency = 5;
  89                                                     let ReleaseAtCycles = [3]; }
  90 def N1Write_12c5_1M   : SchedWriteRes<[N1UnitM]>  { let Latency = 12;
  91                                                     let ReleaseAtCycles = [5]; }
  92 def N1Write_20c5_1M   : SchedWriteRes<[N1UnitM]>  { let Latency = 20;
  93                                                     let ReleaseAtCycles = [5]; }
  94 def N1Write_4c_1L     : SchedWriteRes<[N1UnitL]>  { let Latency = 4; }
  95 def N1Write_5c_1L     : SchedWriteRes<[N1UnitL]>  { let Latency = 5; }
  96 def N1Write_7c_1L     : SchedWriteRes<[N1UnitL]>  { let Latency = 7; }
  97 def N1Write_2c_1V     : SchedWriteRes<[N1UnitV]>  { let Latency = 2; }
  98 def N1Write_3c_1V     : SchedWriteRes<[N1UnitV]>  { let Latency = 3; }
  99 def N1Write_4c_1V     : SchedWriteRes<[N1UnitV]>  { let Latency = 4; }
 100 def N1Write_5c_1V     : SchedWriteRes<[N1UnitV]>  { let Latency = 5; }
 101 def N1Write_2c_1V0    : SchedWriteRes<[N1UnitV0]> { let Latency = 2; }
 102 def N1Write_3c_1V0    : SchedWriteRes<[N1UnitV0]> { let Latency = 3; }
 103 def N1Write_4c_1V0    : SchedWriteRes<[N1UnitV0]> { let Latency = 4; }
 104 def N1Write_7c7_1V0   : SchedWriteRes<[N1UnitV0]> { let Latency = 7;
 105                                                     let ReleaseAtCycles = [7]; }
 106 def N1Write_10c7_1V0  : SchedWriteRes<[N1UnitV0]> { let Latency = 10;
 107                                                     let ReleaseAtCycles = [7]; }
 108 def N1Write_13c10_1V0 : SchedWriteRes<[N1UnitV0]> { let Latency = 13;
 109                                                     let ReleaseAtCycles = [10]; }
 110 def N1Write_15c7_1V0  : SchedWriteRes<[N1UnitV0]> { let Latency = 15;
 111                                                     let ReleaseAtCycles = [7]; }
 112 def N1Write_17c7_1V0  : SchedWriteRes<[N1UnitV0]> { let Latency = 17;
 113                                                     let ReleaseAtCycles = [7]; }
 114 def N1Write_2c_1V1    : SchedWriteRes<[N1UnitV1]> { let Latency = 2; }
 115 def N1Write_3c_1V1    : SchedWriteRes<[N1UnitV1]> { let Latency = 3; }
 116 def N1Write_4c_1V1    : SchedWriteRes<[N1UnitV1]> { let Latency = 4; }
 117
 118 //===----------------------------------------------------------------------===//
 119 // Define generic 2 micro-op types
 120
 121 let Latency = 1, NumMicroOps = 2 in
 122 def N1Write_1c_1B_1I   : SchedWriteRes<[N1UnitB, N1UnitI]>;
 123 let Latency = 3, NumMicroOps = 2 in
 124 def N1Write_3c_1I_1M   : SchedWriteRes<[N1UnitI, N1UnitM]>;
 125 let Latency = 2, NumMicroOps = 2 in
 126 def N1Write_2c_1I_1L   : SchedWriteRes<[N1UnitI, N1UnitL]>;
 127 let Latency = 5, NumMicroOps = 2 in
 128 def N1Write_5c_1I_1L   : SchedWriteRes<[N1UnitI, N1UnitL]>;
 129 let Latency = 6, NumMicroOps = 2 in
 130 def N1Write_6c_1I_1L   : SchedWriteRes<[N1UnitI, N1UnitL]>;
 131 let Latency = 7, NumMicroOps = 2 in
 132 def N1Write_7c_1I_1L   : SchedWriteRes<[N1UnitI, N1UnitL]>;
 133 let Latency = 5, NumMicroOps = 2 in
 134 def N1Write_5c_1M_1V   : SchedWriteRes<[N1UnitM, N1UnitV]>;
 135 let Latency = 6, NumMicroOps = 2 in
 136 def N1Write_6c_1M_1V0  : SchedWriteRes<[N1UnitM, N1UnitV0]>;
 137 let Latency = 5, NumMicroOps = 2 in
 138 def N1Write_5c_2L      : SchedWriteRes<[N1UnitL, N1UnitL]>;
 139 let Latency = 1, NumMicroOps = 2 in
 140 def N1Write_1c_1L_1D   : SchedWriteRes<[N1UnitL, N1UnitD]>;
 141 let Latency = 2, NumMicroOps = 2 in
 142 def N1Write_2c_1L_1V   : SchedWriteRes<[N1UnitL, N1UnitV]>;
 143 let Latency = 4, NumMicroOps = 2 in
 144 def N1Write_4c_1L_1V   : SchedWriteRes<[N1UnitL, N1UnitV]>;
 145 let Latency = 7, NumMicroOps = 2 in
 146 def N1Write_7c_1L_1V   : SchedWriteRes<[N1UnitL, N1UnitV]>;
 147 let Latency = 4, NumMicroOps = 2 in
 148 def N1Write_4c_1V0_1V1 : SchedWriteRes<[N1UnitV0, N1UnitV1]>;
 149 let Latency = 4, NumMicroOps = 2 in
 150 def N1Write_4c_2V0     : SchedWriteRes<[N1UnitV0, N1UnitV0]>;
 151 let Latency = 5, NumMicroOps = 2 in
 152 def N1Write_5c_2V0     : SchedWriteRes<[N1UnitV0, N1UnitV0]>;
 153 let Latency = 6, NumMicroOps = 2 in
 154 def N1Write_6c_2V1     : SchedWriteRes<[N1UnitV1, N1UnitV1]>;
 155 let Latency = 5, NumMicroOps = 2 in
 156 def N1Write_5c_1V1_1V  : SchedWriteRes<[N1UnitV1, N1UnitV]>;
 157
 158 //===----------------------------------------------------------------------===//
 159 // Define generic 3 micro-op types
 160
 161 let Latency = 7, NumMicroOps = 3 in
 162 def N1Write_2c_1I_1L_1V : SchedWriteRes<[N1UnitI, N1UnitL, N1UnitV]>;
 163 let Latency = 1, NumMicroOps = 3 in
 164 def N1Write_1c_2L_1D    : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitD]>;
 165 let Latency = 2, NumMicroOps = 3 in
 166 def N1Write_2c_1L_2V    : SchedWriteRes<[N1UnitL, N1UnitV, N1UnitV]>;
 167 let Latency = 6, NumMicroOps = 3 in
 168 def N1Write_6c_3L       : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL]>;
 169 let Latency = 4, NumMicroOps = 3 in
 170 def N1Write_4c_3V       : SchedWriteRes<[N1UnitV, N1UnitV, N1UnitV]>;
 171 let Latency = 6, NumMicroOps = 3 in
 172 def N1Write_6c_3V       : SchedWriteRes<[N1UnitV, N1UnitV, N1UnitV]>;
 173 let Latency = 8, NumMicroOps = 3 in
 174 def N1Write_8c_3V       : SchedWriteRes<[N1UnitV, N1UnitV, N1UnitV]>;
 175
 176 //===----------------------------------------------------------------------===//
 177 // Define generic 4 micro-op types
 178
 179 let Latency = 2, NumMicroOps = 4 in
 180 def N1Write_2c_2I_2L : SchedWriteRes<[N1UnitI, N1UnitI, N1UnitL, N1UnitL]>;
 181 let Latency = 6, NumMicroOps = 4 in
 182 def N1Write_6c_4L    : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL]>;
 183 let Latency = 2, NumMicroOps = 4 in
 184 def N1Write_2c_2L_2V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitV, N1UnitV]>;
 185 let Latency = 2, NumMicroOps = 4 in
 186 def N1Write_3c_2L_2V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitV, N1UnitV]>;
 187 let Latency = 5, NumMicroOps = 4 in
 188 def N1Write_5c_2L_2V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitV, N1UnitV]>;
 189 let Latency = 7, NumMicroOps = 4 in
 190 def N1Write_7c_2L_2V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitV, N1UnitV]>;
 191 let Latency = 4, NumMicroOps = 4 in
 192 def N1Write_4c_4V    : SchedWriteRes<[N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
 193 let Latency = 6, NumMicroOps = 4 in
 194 def N1Write_6c_4V0   : SchedWriteRes<[N1UnitV0, N1UnitV0, N1UnitV0, N1UnitV0]>;
 195
 196 //===----------------------------------------------------------------------===//
 197 // Define generic 5 micro-op types
 198
 199 let Latency = 3, NumMicroOps = 5 in
 200 def N1Write_3c_2L_3V : SchedWriteRes<[N1UnitL, N1UnitL,
 201                                       N1UnitV, N1UnitV, N1UnitV]>;
 202 let Latency = 7, NumMicroOps = 5 in
 203 def N1Write_7c_2L_3V : SchedWriteRes<[N1UnitL, N1UnitL,
 204                                       N1UnitV, N1UnitV, N1UnitV]>;
 205 let Latency = 6, NumMicroOps = 5 in
 206 def N1Write_6c_5V    : SchedWriteRes<[N1UnitV, N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
 207
 208 //===----------------------------------------------------------------------===//
 209 // Define generic 6 micro-op types
 210
 211 let Latency = 3, NumMicroOps = 6 in
 212 def N1Write_3c_4L_2V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL,
 213                                       N1UnitV, N1UnitV]>;
 214 let Latency = 4, NumMicroOps = 6 in
 215 def N1Write_4c_3L_3V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
 216                                       N1UnitV, N1UnitV, N1UnitV]>;
 217 let Latency = 5, NumMicroOps = 6 in
 218 def N1Write_5c_3L_3V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
 219                                       N1UnitV, N1UnitV, N1UnitV]>;
 220 let Latency = 6, NumMicroOps = 6 in
 221 def N1Write_6c_3L_3V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
 222                                       N1UnitV, N1UnitV, N1UnitV]>;
 223 let Latency = 7, NumMicroOps = 6 in
 224 def N1Write_7c_3L_3V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
 225                                       N1UnitV, N1UnitV, N1UnitV]>;
 226 let Latency = 8, NumMicroOps = 6 in
 227 def N1Write_8c_3L_3V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
 228                                       N1UnitV, N1UnitV, N1UnitV]>;
 229
 230 //===----------------------------------------------------------------------===//
 231 // Define generic 7 micro-op types
 232
 233 let Latency = 8, NumMicroOps = 7 in
 234 def N1Write_8c_3L_4V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
 235                                       N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
 236
 237 //===----------------------------------------------------------------------===//
 238 // Define generic 8 micro-op types
 239
 240 let Latency = 5, NumMicroOps = 8 in
 241 def N1Write_5c_4L_4V  : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL,
 242                                        N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
 243 let Latency = 6, NumMicroOps = 8 in
 244 def N1Write_6c_4L_4V  : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL,
 245                                        N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
 246 let Latency = 8, NumMicroOps = 8 in
 247 def N1Write_8c_4L_4V  : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL,
 248                                        N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
 249 let Latency = 10, NumMicroOps = 8 in
 250 def N1Write_10c_4L_4V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL,
 251                                        N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
 252
 253 //===----------------------------------------------------------------------===//
 254 // Define generic 12 micro-op types
 255
 256 let Latency = 9, NumMicroOps = 12 in
 257 def N1Write_9c_6L_6V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
 258                                       N1UnitL, N1UnitL, N1UnitL,
 259                                       N1UnitV, N1UnitV, N1UnitV,
 260                                       N1UnitV, N1UnitV, N1UnitV]>;
 261
 262
 263 // Miscellaneous Instructions
 264 // -----------------------------------------------------------------------------
 265
 266 def : InstRW<[WriteI], (instrs COPY)>;
 267
 268 // Convert floating-point condition flags
 269 // Flag manipulation instructions
 270 def : WriteRes<WriteSys, []> { let Latency = 1; }
 271
 272
 273 // Branch Instructions
 274 // -----------------------------------------------------------------------------
 275
 276 // Branch, immed
 277 // Compare and branch
 278 def : SchedAlias<WriteBr, N1Write_1c_1B>;
 279
 280 // Branch, register
 281 def : SchedAlias<WriteBrReg, N1Write_1c_1B>;
 282
 283 // Branch and link, immed
 284 // Branch and link, register
 285 def : InstRW<[N1Write_1c_1B_1I], (instrs BL, BLR)>;
 286
 287 // Compare and branch
 288 def : InstRW<[N1Write_1c_1B], (instregex "^[CT]BN?Z[XW]$")>;
 289
 290
 291 // Arithmetic and Logical Instructions
 292 // -----------------------------------------------------------------------------
 293
 294 // ALU, basic
 295 // ALU, basic, flagset
 296 // Conditional compare
 297 // Conditional select
 298 // Logical, basic
 299 // Address generation
 300 // Count leading
 301 // Reverse bits/bytes
 302 // Move immediate
 303 def : SchedAlias<WriteI, N1Write_1c_1I>;
 304
 305 // ALU, extend and shift
 306 def : SchedAlias<WriteIEReg, N1Write_2c_1M>;
 307
 308 // Arithmetic, LSL shift, shift <= 4
 309 // Arithmetic, flagset, LSL shift, shift <= 4
 310 // Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
 311 def N1WriteISReg : SchedWriteVariant<[
 312                      SchedVar<IsCheapLSL,  [N1Write_1c_1I]>,
 313                      SchedVar<NoSchedPred, [N1Write_2c_1M]>]>;
 314 def              : SchedAlias<WriteISReg, N1WriteISReg>;
 315
 316 // Logical, shift, no flagset
 317 def : InstRW<[N1Write_1c_1I],
 318              (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
 319
 320 // Logical, shift, flagset
 321 def : InstRW<[N1Write_2c_1M], (instregex "^(AND|BIC)S[WX]rs$")>;
 322
 323
 324 // Divide and multiply instructions
 325 // -----------------------------------------------------------------------------
 326
 327 // Divide
 328 def : SchedAlias<WriteID32, N1Write_12c5_1M>;
 329 def : SchedAlias<WriteID64, N1Write_20c5_1M>;
 330
 331 // Multiply accumulate
 332 // Multiply accumulate, long
 333 def : SchedAlias<WriteIM32, N1Write_2c_1M>;
 334 def : SchedAlias<WriteIM64, N1Write_4c3_1M>;
 335
 336 // Multiply high
 337 def : InstRW<[N1Write_5c3_1M, ReadIM, ReadIM], (instrs SMULHrr, UMULHrr)>;
 338
 339
 340 // Miscellaneous data-processing instructions
 341 // -----------------------------------------------------------------------------
 342
 343 // Bitfield extract, one reg
 344 // Bitfield extract, two regs
 345 def N1WriteExtr : SchedWriteVariant<[
 346                     SchedVar<IsRORImmIdiomPred, [N1Write_1c_1I]>,
 347                     SchedVar<NoSchedPred,       [N1Write_3c_1I_1M]>]>;
 348 def : SchedAlias<WriteExtr, N1WriteExtr>;
 349
 350 // Bitfield move, basic
 351 // Variable shift
 352 def : SchedAlias<WriteIS, N1Write_1c_1I>;
 353
 354 // Bitfield move, insert
 355 def : InstRW<[N1Write_2c_1M], (instregex "^BFM[WX]ri$")>;
 356
 357 // Move immediate
 358 def : SchedAlias<WriteImm, N1Write_1c_1I>;
 359
 360 // Load instructions
 361 // -----------------------------------------------------------------------------
 362
 363 // Load register, immed offset
 364 def : SchedAlias<WriteLD, N1Write_4c_1L>;
 365
 366 // Load register, immed offset, index
 367 def : SchedAlias<WriteLDIdx, N1Write_4c_1L>;
 368 def : SchedAlias<WriteAdr,   N1Write_1c_1I>;
 369
 370 // Load pair, immed offset
 371 def : SchedAlias<WriteLDHi, N1Write_4c_1L>;
 372
 373 // Load pair, immed offset, W-form
 374 def : InstRW<[N1Write_4c_1L, N1Write_0c_0Z], (instrs LDPWi, LDNPWi)>;
 375
 376 // Load pair, signed immed offset, signed words
 377 def : InstRW<[N1Write_5c_1I_1L, N1Write_0c_0Z], (instrs LDPSWi)>;
 378
 379 // Load pair, immed post or pre-index, signed words
 380 def : InstRW<[WriteAdr, N1Write_5c_1I_1L, N1Write_0c_0Z],
 381              (instrs LDPSWpost, LDPSWpre)>;
 382
 383
 384 // Store instructions
 385 // -----------------------------------------------------------------------------
 386
 387 // Store register, immed offset
 388 def : SchedAlias<WriteST, N1Write_1c_1L_1D>;
 389
 390 // Store register, immed offset, index
 391 def : SchedAlias<WriteSTIdx, N1Write_1c_1L_1D>;
 392
 393 // Store pair, immed offset
 394 def : SchedAlias<WriteSTP, N1Write_1c_2L_1D>;
 395
 396 // Store pair, immed offset, W-form
 397 def : InstRW<[N1Write_1c_1L_1D], (instrs STPWi)>;
 398
 399
 400 // FP data processing instructions
 401 // -----------------------------------------------------------------------------
 402
 403 // FP absolute value
 404 // FP arithmetic
 405 // FP min/max
 406 // FP negate
 407 // FP select
 408 def : SchedAlias<WriteF, N1Write_2c_1V>;
 409
 410 // FP compare
 411 def : SchedAlias<WriteFCmp, N1Write_2c_1V0>;
 412
 413 // FP divide
 414 // FP square root
 415 def : SchedAlias<WriteFDiv, N1Write_10c7_1V0>;
 416
 417 // FP divide, H-form
 418 // FP square root, H-form
 419 def : InstRW<[N1Write_7c7_1V0], (instrs FDIVHrr, FSQRTHr)>;
 420
 421 // FP divide, S-form
 422 // FP square root, S-form
 423 def : InstRW<[N1Write_10c7_1V0], (instrs FDIVSrr, FSQRTSr)>;
 424
 425 // FP divide, D-form
 426 def : InstRW<[N1Write_15c7_1V0], (instrs FDIVDrr)>;
 427
 428 // FP square root, D-form
 429 def : InstRW<[N1Write_17c7_1V0], (instrs FSQRTDr)>;
 430
 431 // FP multiply
 432 def : SchedAlias<WriteFMul, N1Write_3c_1V>;
 433
 434 // FP multiply accumulate
 435 def : InstRW<[N1Write_4c_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
 436
 437 // FP round to integral
 438 def : InstRW<[N1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ][HSD]r$",
 439                                           "^FRINT(32|64)[XZ][SD]r$")>;
 440
 441
 442 // FP miscellaneous instructions
 443 // -----------------------------------------------------------------------------
 444
 445 // FP convert, from vec to vec reg
 446 // FP convert, Javascript from vec to gen reg
 447 def : SchedAlias<WriteFCvt, N1Write_3c_1V>;
 448
 449 // FP convert, from gen to vec reg
 450 def : InstRW<[N1Write_6c_1M_1V0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>;
 451
 452 // FP convert, from vec to gen reg
 453 def : InstRW<[N1Write_4c_1V0_1V1], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>;
 454
 455 // FP move, immed
 456 def : SchedAlias<WriteFImm, N1Write_2c_1V>;
 457
 458 // FP move, register
 459 def : InstRW<[N1Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>;
 460
 461 // FP transfer, from gen to low half of vec reg
 462 // FP transfer, from gen to high half of vec reg
 463 def : InstRW<[N1Write_3c_1M], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr,
 464                                       FMOVXDHighr)>;
 465
 466 // FP transfer, from vec to gen reg
 467 def : SchedAlias<WriteFCopy, N1Write_2c_1V1>;
 468
 469
 470 // FP load instructions
 471 // -----------------------------------------------------------------------------
 472
 473 // Load vector reg, literal, S/D/Q forms
 474 // Load vector reg, unscaled immed
 475 def : InstRW<[N1Write_5c_1L, ReadAdrBase], (instregex "^LDR[SDQ]l$",
 476                                                       "^LDUR[BHSDQ]i$")>;
 477
 478 // Load vector reg, immed post-index
 479 // Load vector reg, immed pre-index
 480 def : InstRW<[WriteAdr, N1Write_5c_1L],
 481              (instregex "^LDR[BHSDQ](post|pre)$")>;
 482
 483 // Load vector reg, unsigned immed
 484 def : InstRW<[N1Write_5c_1I_1L], (instregex "^LDR[BHSDQ]ui$")>;
 485
 486 // Load vector reg, register offset, basic
 487 // Load vector reg, register offset, scale, S/D-form
 488 // Load vector reg, register offset, extend
 489 // Load vector reg, register offset, extend, scale, S/D-form
 490 def : InstRW<[N1Write_5c_1I_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>;
 491
 492 // Load vector reg, register offset, scale, H/Q-form
 493 // Load vector reg, register offset, extend, scale, H/Q-form
 494 def : InstRW<[N1Write_6c_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>;
 495
 496 // Load vector pair, immed offset, S/D-form
 497 def : InstRW<[N1Write_5c_1I_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>;
 498
 499 // Load vector pair, immed offset, H/Q-form
 500 def : InstRW<[N1Write_7c_1I_1L, WriteLDHi], (instregex "^LDPN?[HQ]i$")>;
 501
 502 // Load vector pair, immed post-index, S/D-form
 503 // Load vector pair, immed pre-index, S/D-form
 504 def : InstRW<[WriteAdr, N1Write_5c_1L, WriteLDHi],
 505              (instregex "^LDP[SD](pre|post)$")>;
 506
 507 // Load vector pair, immed post-index, Q-form
 508 // Load vector pair, immed pre-index, Q-form
 509 def : InstRW<[WriteAdr, N1Write_7c_1L, WriteLDHi],
 510              (instrs LDPQpost, LDPQpre)>;
 511
 512
 513 // FP store instructions
 514 // -----------------------------------------------------------------------------
 515
 516 // Store vector reg, unscaled immed, B/H/S/D-form
 517 def : InstRW<[N1Write_2c_1I_1L], (instregex "^STUR[BHSD]i$")>;
 518
 519 // Store vector reg, unscaled immed, Q-form
 520 def : InstRW<[N1Write_2c_2I_2L], (instrs STURQi)>;
 521
 522 // Store vector reg, immed post-index, B/H/S/D-form
 523 // Store vector reg, immed pre-index, B/H/S/D-form
 524 def : InstRW<[WriteAdr, N1Write_2c_1L_1V], (instregex "^STR[BHSD](pre|post)$")>;
 525
 526 // Store vector reg, immed pre-index, Q-form
 527 // Store vector reg, immed post-index, Q-form
 528 def : InstRW<[WriteAdr, N1Write_2c_2L_2V], (instrs STRQpre, STRQpost)>;
 529
 530 // Store vector reg, unsigned immed, B/H/S/D-form
 531 def : InstRW<[N1Write_2c_1L_1V], (instregex "^STR[BHSD]ui$")>;
 532
 533 // Store vector reg, unsigned immed, Q-form
 534 def : InstRW<[N1Write_2c_2L_2V], (instrs STRQui)>;
 535
 536 // Store vector reg, register offset, basic, B/S/D-form
 537 // Store vector reg, register offset, scale, B/S/D-form
 538 // Store vector reg, register offset, extend, B/S/D-form
 539 // Store vector reg, register offset, extend, scale, B/S/D-form
 540 def : InstRW<[N1Write_2c_1L_1V, ReadAdrBase], (instregex "^STR[BSD]ro[WX]$")>;
 541
 542 // Store vector reg, register offset, basic, H-form
 543 // Store vector reg, register offset, scale, H-form
 544 // Store vector reg, register offset, extend, H-form
 545 // Store vector reg, register offset, extend, scale, H-form
 546 def : InstRW<[N1Write_2c_1I_1L_1V, ReadAdrBase], (instregex "^STRHro[WX]$")>;
 547
 548 // Store vector reg, register offset, basic, Q-form
 549 // Store vector reg, register offset, scale, Q-form
 550 // Store vector reg, register offset, extend, Q-form
 551 // Store vector reg, register offset, extend, scale, Q-form
 552 def : InstRW<[N1Write_2c_2L_2V, ReadAdrBase], (instregex "^STRQro[WX]$")>;
 553
 554 // Store vector pair, immed offset, S-form
 555 def : InstRW<[N1Write_2c_1L_1V], (instrs STPSi, STNPSi)>;
 556
 557 // Store vector pair, immed offset, D-form
 558 def : InstRW<[N1Write_2c_2L_2V], (instrs STPDi, STNPDi)>;
 559
 560 // Store vector pair, immed offset, Q-form
 561 def : InstRW<[N1Write_3c_4L_2V], (instrs STPQi, STNPQi)>;
 562
 563 // Store vector pair, immed post-index, S-form
 564 // Store vector pair, immed pre-index, S-form
 565 def : InstRW<[WriteAdr, N1Write_2c_1L_1V], (instrs STPSpre, STPSpost)>;
 566
 567 // Store vector pair, immed post-index, D-form
 568 // Store vector pair, immed pre-index, D-form
 569 def : InstRW<[WriteAdr, N1Write_2c_2L_2V], (instrs STPDpre, STPDpost)>;
 570
 571 // Store vector pair, immed post-index, Q-form
 572 // Store vector pair, immed pre-index, Q-form
 573 def : InstRW<[WriteAdr, N1Write_3c_4L_2V], (instrs STPQpre, STPQpost)>;
 574
 575
 576 // ASIMD integer instructions
 577 // -----------------------------------------------------------------------------
 578
 579 // ASIMD absolute diff
 580 // ASIMD absolute diff long
 581 // ASIMD arith, basic
 582 // ASIMD arith, complex
 583 // ASIMD arith, pair-wise
 584 // ASIMD compare
 585 // ASIMD logical
 586 // ASIMD max/min, basic and pair-wise
 587 def : SchedAlias<WriteVd, N1Write_2c_1V>;
 588 def : SchedAlias<WriteVq, N1Write_2c_1V>;
 589
 590 // ASIMD absolute diff accum
 591 // ASIMD absolute diff accum long
 592 def : InstRW<[N1Write_4c_1V1], (instregex "^[SU]ABAL?v")>;
 593
 594 // ASIMD arith, reduce, 4H/4S
 595 def : InstRW<[N1Write_3c_1V1], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>;
 596
 597 // ASIMD arith, reduce, 8B/8H
 598 def : InstRW<[N1Write_5c_1V1_1V], (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>;
 599
 600 // ASIMD arith, reduce, 16B
 601 def : InstRW<[N1Write_6c_2V1], (instregex "^(ADDV|[SU]ADDLV)v16i8v$")>;
 602
 603 // ASIMD max/min, reduce, 4H/4S
 604 def : InstRW<[N1Write_3c_1V1], (instregex "^[SU](MAX|MIN)Vv4(i16|i32)v$")>;
 605
 606 // ASIMD max/min, reduce, 8B/8H
 607 def : InstRW<[N1Write_5c_1V1_1V], (instregex "^[SU](MAX|MIN)Vv8(i8|i16)v$")>;
 608
 609 // ASIMD max/min, reduce, 16B
 610 def : InstRW<[N1Write_6c_2V1], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
 611
 612 // ASIMD multiply, D-form
 613 // ASIMD multiply accumulate, D-form
 614 // ASIMD multiply accumulate high, D-form
 615 // ASIMD multiply accumulate saturating long
 616 // ASIMD multiply long
 617 // ASIMD multiply accumulate long
 618 def : InstRW<[N1Write_4c_1V0], (instregex "^MUL(v[14]i16|v[12]i32)$",
 619                                           "^ML[AS](v[14]i16|v[12]i32)$",
 620                                           "^SQ(R)?DMULH(v[14]i16|v[12]i32)$",
 621                                           "^SQRDML[AS]H(v[14]i16|v[12]i32)$",
 622                                           "^SQDML[AS]Lv",
 623                                           "^([SU]|SQD)MULLv",
 624                                           "^[SU]ML[AS]Lv")>;
 625
 626 // ASIMD multiply, Q-form
 627 // ASIMD multiply accumulate, Q-form
 628 // ASIMD multiply accumulate high, Q-form
 629 def : InstRW<[N1Write_5c_2V0], (instregex "^MUL(v8i16|v4i32)$",
 630                                           "^ML[AS](v8i16|v4i32)$",
 631                                           "^SQ(R)?DMULH(v8i16|v4i32)$",
 632                                           "^SQRDML[AS]H(v8i16|v4i32)$")>;
 633
 634 // ASIMD multiply/multiply long (8x8) polynomial, D-form
 635 def : InstRW<[N1Write_3c_1V0], (instrs PMULv8i8, PMULLv8i8)>;
 636
 637 // ASIMD multiply/multiply long (8x8) polynomial, Q-form
 638 def : InstRW<[N1Write_4c_2V0], (instrs PMULv16i8, PMULLv16i8)>;
 639
 640 // ASIMD pairwise add and accumulate long
 641 def : InstRW<[N1Write_4c_1V1], (instregex "^[SU]ADALPv")>;
 642
 643 // ASIMD shift accumulate
 644 def : InstRW<[N1Write_4c_1V1], (instregex "^[SU]R?SRAv")>;
 645
 646 // ASIMD shift by immed, basic
 647 // ASIMD shift by immed and insert, basic
 648 // ASIMD shift by register, basic
 649 def : InstRW<[N1Write_2c_1V1], (instregex "^SHLL?v", "^SHRNv", "^[SU]SHLLv",
 650                                           "^[SU]SHRv", "^S[LR]Iv", "^[SU]SHLv")>;
 651
 652 // ASIMD shift by immed, complex
 653 // ASIMD shift by register, complex
 654 def : InstRW<[N1Write_4c_1V1],
 655              (instregex "^RSHRNv", "^SQRSHRU?Nv", "^(SQSHLU?|UQSHL)[bhsd]$",
 656                         "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
 657                         "^SQSHU?RNv", "^[SU]RSHRv", "^UQR?SHRNv",
 658                         "^[SU]Q?RSHLv", "^[SU]QSHLv")>;
 659
 660
 661 // ASIMD FP instructions
 662 // -----------------------------------------------------------------------------
 663
 664 // ASIMD FP absolute value/difference
 665 // ASIMD FP arith, normal
 666 // ASIMD FP compare
 667 // ASIMD FP max/min, normal
 668 // ASIMD FP max/min, pairwise
 669 // ASIMD FP negate
 670 // Covered by "SchedAlias (WriteV[dq]...)" above
 671
 672 // ASIMD FP convert, long (F16 to F32)
 673 def : InstRW<[N1Write_4c_2V0], (instregex "^FCVTL(v4|v8)i16$")>;
 674
 675 // ASIMD FP convert, long (F32 to F64)
 676 def : InstRW<[N1Write_3c_1V0], (instregex "^FCVTL(v2|v4)i32$")>;
 677
 678 // ASIMD FP convert, narrow (F32 to F16)
 679 def : InstRW<[N1Write_4c_2V0], (instregex "^FCVTN(v4|v8)i16$")>;
 680
 681 // ASIMD FP convert, narrow (F64 to F32)
 682 def : InstRW<[N1Write_3c_1V0], (instregex "^FCVTN(v2|v4)i32$",
 683                                           "^FCVTXN(v2|v4)f32$")>;
 684
 685 // ASIMD FP convert, other, D-form F32 and Q-form F64
 686 def : InstRW<[N1Write_3c_1V0], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$",
 687                                           "^[SU]CVTFv2f(32|64)$")>;
 688
 689 // ASIMD FP convert, other, D-form F16 and Q-form F32
 690 def : InstRW<[N1Write_4c_2V0], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$",
 691                                           "^[SU]CVTFv4f(16|32)$")>;
 692
 693 // ASIMD FP convert, other, Q-form F16
 694 def : InstRW<[N1Write_6c_4V0], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$",
 695                                           "^[SU]CVTFv8f16$")>;
 696
 697 // ASIMD FP divide, D-form, F16
 698 // ASIMD FP square root, D-form, F16
 699 def : InstRW<[N1Write_7c7_1V0], (instrs FDIVv4f16, FSQRTv4f16)>;
 700
 701 // ASIMD FP divide, D-form, F32
 702 // ASIMD FP square root, D-form, F32
 703 def : InstRW<[N1Write_10c7_1V0], (instrs FDIVv2f32, FSQRTv2f32)>;
 704
 705 // ASIMD FP divide, Q-form, F16
 706 // ASIMD FP square root, Q-form, F16
 707 def : InstRW<[N1Write_13c10_1V0], (instrs FDIVv8f16, FSQRTv8f16)>;
 708
 709 // ASIMD FP divide, Q-form, F32
 710 // ASIMD FP square root, Q-form, F32
 711 def : InstRW<[N1Write_10c7_1V0], (instrs FDIVv4f32, FSQRTv4f32)>;
 712
 713 // ASIMD FP divide, Q-form, F64
 714 def : InstRW<[N1Write_15c7_1V0], (instrs FDIVv2f64)>;
 715
 716 // ASIMD FP square root, Q-form, F64
 717 def : InstRW<[N1Write_17c7_1V0], (instrs FSQRTv2f64)>;
 718
 719 // ASIMD FP max/min, reduce, F32 and D-form F16
 720 def : InstRW<[N1Write_5c_1V], (instregex "^F(MAX|MIN)(NM)?Vv4(i16|i32)v$")>;
 721
 722 // ASIMD FP max/min, reduce, Q-form F16
 723 def : InstRW<[N1Write_8c_3V], (instregex "^F(MAX|MIN)(NM)?Vv8i16v$")>;
 724
 725 // ASIMD FP multiply
 726 def : InstRW<[N1Write_3c_1V], (instregex "^FMULX?v")>;
 727
 728 // ASIMD FP multiply accumulate
 729 def : InstRW<[N1Write_4c_1V], (instregex "^FML[AS]v")>;
 730
 731 // ASIMD FP multiply accumulate long
 732 def : InstRW<[N1Write_5c_1V], (instregex "^FML[AS]L2?v")>;
 733
 734 // ASIMD FP round, D-form F32 and Q-form F64
 735 def : InstRW<[N1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ]v2f(32|64)$")>;
 736
 737 // ASIMD FP round, D-form F16 and Q-form F32
 738 def : InstRW<[N1Write_4c_2V0], (instregex "^FRINT[AIMNPXZ]v4f(16|32)$")>;
 739
 740 // ASIMD FP round, Q-form F16
 741 def : InstRW<[N1Write_6c_4V0], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
 742
 743
 744 // ASIMD miscellaneous instructions
 745 // -----------------------------------------------------------------------------
 746
 747 // ASIMD bit reverse
 748 // ASIMD bitwise insert
 749 // ASIMD count
 750 // ASIMD duplicate, element
 751 // ASIMD extract
 752 // ASIMD extract narrow
 753 // ASIMD insert, element to element
 754 // ASIMD move, FP immed
 755 // ASIMD move, integer immed
 756 // ASIMD reverse
 757 // ASIMD table lookup, 1 or 2 table regs
 758 // ASIMD table lookup extension, 1 table reg
 759 // ASIMD transfer, element to gen reg
 760 // ASIMD transpose
 761 // ASIMD unzip/zip
 762 // Covered by "SchedAlias (WriteV[dq]...)" above
 763
 764 // ASIMD duplicate, gen reg
 765 def : InstRW<[N1Write_3c_1M],
 766              (instregex "^DUP((v16|v8)i8|(v8|v4)i16|(v4|v2)i32|v2i64)gpr$")>;
 767
 768 // ASIMD extract narrow, saturating
 769 def : InstRW<[N1Write_4c_1V1], (instregex "^[SU]QXTNv", "^SQXTUNv")>;
 770
 771 // ASIMD reciprocal and square root estimate, D-form F32 and F64
 772 def : InstRW<[N1Write_3c_1V0], (instrs FRECPEv1i32, FRECPEv2f32, FRECPEv1i64,
 773                                        FRECPXv1i32, FRECPXv1i64,
 774                                        URECPEv2i32,
 775                                        FRSQRTEv1i32, FRSQRTEv2f32, FRSQRTEv1i64,
 776                                        URSQRTEv2i32)>;
 777
 778 // ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32
 779 def : InstRW<[N1Write_4c_2V0], (instrs FRECPEv1f16, FRECPEv4f16, FRECPEv4f32,
 780                                        FRECPXv1f16,
 781                                        URECPEv4i32,
 782                                        FRSQRTEv1f16, FRSQRTEv4f16, FRSQRTEv4f32,
 783                                        URSQRTEv4i32)>;
 784
 785 // ASIMD reciprocal and square root estimate, Q-form F16
 786 def : InstRW<[N1Write_6c_4V0], (instrs FRECPEv8f16,
 787                                        FRSQRTEv8f16)>;
 788
 789 // ASIMD reciprocal step
 790 def : InstRW<[N1Write_4c_1V], (instregex "^FRECPS(16|32|64)$", "^FRECPSv",
 791                                          "^FRSQRTS(16|32|64)$", "^FRSQRTSv")>;
 792
 793 // ASIMD table lookup, 3 table regs
 794 // ASIMD table lookup extension, 2 table reg
 795 def : InstRW<[N1Write_4c_4V], (instrs TBLv8i8Three, TBLv16i8Three,
 796                                       TBXv8i8Two, TBXv16i8Two)>;
 797
 798 // ASIMD table lookup, 4 table regs
 799 def : InstRW<[N1Write_4c_3V], (instrs TBLv8i8Four, TBLv16i8Four)>;
 800
 801 // ASIMD table lookup extension, 3 table reg
 802 def : InstRW<[N1Write_6c_3V], (instrs TBXv8i8Three, TBXv16i8Three)>;
 803
 804 // ASIMD table lookup extension, 4 table reg
 805 def : InstRW<[N1Write_6c_5V], (instrs TBXv8i8Four, TBXv16i8Four)>;
 806
 807 // ASIMD transfer, element to gen reg
 808 def : InstRW<[N1Write_2c_1V1], (instregex "^SMOVvi(((8|16)to(32|64))|32to64)$",
 809                                           "^UMOVvi(8|16|32|64)$")>;
 810
 811 // ASIMD transfer, gen reg to element
 812 def : InstRW<[N1Write_5c_1M_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
 813
 814
 815 // ASIMD load instructions
 816 // -----------------------------------------------------------------------------
 817
 818 // ASIMD load, 1 element, multiple, 1 reg
 819 def : InstRW<[N1Write_5c_1L],
 820              (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
 821 def : InstRW<[WriteAdr, N1Write_5c_1L],
 822              (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
 823
 824 // ASIMD load, 1 element, multiple, 2 reg
 825 def : InstRW<[N1Write_5c_2L],
 826              (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
 827 def : InstRW<[WriteAdr, N1Write_5c_2L],
 828              (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
 829
 830 // ASIMD load, 1 element, multiple, 3 reg
 831 def : InstRW<[N1Write_6c_3L],
 832              (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
 833 def : InstRW<[WriteAdr, N1Write_6c_3L],
 834              (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
 835
 836 // ASIMD load, 1 element, multiple, 4 reg
 837 def : InstRW<[N1Write_6c_4L],
 838              (instregex "^LD1Fourv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
 839 def : InstRW<[WriteAdr, N1Write_6c_4L],
 840              (instregex "^LD1Fourv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
 841
 842 // ASIMD load, 1 element, one lane
 843 // ASIMD load, 1 element, all lanes
 844 def : InstRW<[N1Write_7c_1L_1V],
 845              (instregex "LD1(i|Rv)(8|16|32|64)$",
 846                         "LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
 847 def : InstRW<[WriteAdr, N1Write_7c_1L_1V],
 848              (instregex "LD1i(8|16|32|64)_POST$",
 849                         "LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
 850
 851 // ASIMD load, 2 element, multiple
 852 // ASIMD load, 2 element, one lane
 853 // ASIMD load, 2 element, all lanes
 854 def : InstRW<[N1Write_7c_2L_2V],
 855              (instregex "LD2Twov(8b|16b|4h|8h|2s|4s|2d)$",
 856                         "LD2i(8|16|32|64)$",
 857                         "LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
 858 def : InstRW<[WriteAdr, N1Write_7c_2L_2V],
 859              (instregex "LD2Twov(8b|16b|4h|8h|2s|4s|2d)_POST$",
 860                         "LD2i(8|16|32|64)_POST$",
 861                         "LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
 862
 863 // ASIMD load, 3 element, multiple
 864 def : InstRW<[N1Write_8c_3L_3V],
 865              (instregex "LD3Threev(8b|16b|4h|8h|2s|4s|2d)$")>;
 866 def : InstRW<[WriteAdr, N1Write_8c_3L_3V],
 867              (instregex "LD3Threev(8b|16b|4h|8h|2s|4s|2d)_POST$")>;
 868
 869 // ASIMD load, 3 element, one lane
 870 // ASIMD load, 3 element, all lanes
 871 def : InstRW<[N1Write_7c_2L_3V],
 872              (instregex "LD3i(8|16|32|64)$",
 873                         "LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
 874 def : InstRW<[WriteAdr, N1Write_7c_2L_3V],
 875              (instregex "LD3i(8|16|32|64)_POST$",
 876                         "LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
 877
 878 // ASIMD load, 4 element, multiple, D-form
 879 def : InstRW<[N1Write_8c_3L_4V],
 880              (instregex "LD4Fourv(8b|4h|2s)$")>;
 881 def : InstRW<[WriteAdr, N1Write_8c_3L_4V],
 882              (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
 883
 884 // ASIMD load, 4 element, multiple, Q-form
 885 def : InstRW<[N1Write_10c_4L_4V],
 886              (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
 887 def : InstRW<[WriteAdr, N1Write_10c_4L_4V],
 888              (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
 889
 890 // ASIMD load, 4 element, one lane
 891 // ASIMD load, 4 element, all lanes
 892 def : InstRW<[N1Write_8c_4L_4V],
 893              (instregex "LD4i(8|16|32|64)$",
 894                         "LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
 895 def : InstRW<[WriteAdr, N1Write_8c_4L_4V],
 896              (instregex "LD4i(8|16|32|64)_POST$",
 897                         "LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
 898
 899
 900 // ASIMD store instructions
 901 // -----------------------------------------------------------------------------
 902
 903 // ASIMD store, 1 element, multiple, 1 reg, D-form
 904 def : InstRW<[N1Write_2c_1L_1V],
 905              (instregex "ST1Onev(8b|4h|2s|1d)$")>;
 906 def : InstRW<[WriteAdr, N1Write_2c_1L_1V],
 907              (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
 908
 909 // ASIMD store, 1 element, multiple, 1 reg, Q-form
 910 def : InstRW<[N1Write_2c_1L_1V],
 911              (instregex "ST1Onev(16b|8h|4s|2d)$")>;
 912 def : InstRW<[WriteAdr, N1Write_2c_1L_1V],
 913              (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
 914
 915 // ASIMD store, 1 element, multiple, 2 reg, D-form
 916 def : InstRW<[N1Write_2c_1L_2V],
 917              (instregex "ST1Twov(8b|4h|2s|1d)$")>;
 918 def : InstRW<[WriteAdr, N1Write_2c_1L_2V],
 919              (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
 920
 921 // ASIMD store, 1 element, multiple, 2 reg, Q-form
 922 def : InstRW<[N1Write_3c_2L_2V],
 923              (instregex "ST1Twov(16b|8h|4s|2d)$")>;
 924 def : InstRW<[WriteAdr, N1Write_3c_2L_2V],
 925              (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
 926
 927 // ASIMD store, 1 element, multiple, 3 reg, D-form
 928 def : InstRW<[N1Write_3c_2L_3V],
 929              (instregex "ST1Threev(8b|4h|2s|1d)$")>;
 930 def : InstRW<[WriteAdr, N1Write_3c_2L_3V],
 931              (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
 932
 933 // ASIMD store, 1 element, multiple, 3 reg, Q-form
 934 def : InstRW<[N1Write_4c_3L_3V],
 935              (instregex "ST1Threev(16b|8h|4s|2d)$")>;
 936 def : InstRW<[WriteAdr, N1Write_4c_3L_3V],
 937              (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
 938
 939 // ASIMD store, 1 element, multiple, 4 reg, D-form
 940 def : InstRW<[N1Write_3c_2L_2V],
 941              (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
 942 def : InstRW<[WriteAdr, N1Write_3c_2L_2V],
 943              (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
 944
 945 // ASIMD store, 1 element, multiple, 4 reg, Q-form
 946 def : InstRW<[N1Write_5c_4L_4V],
 947              (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
 948 def : InstRW<[WriteAdr, N1Write_5c_4L_4V],
 949              (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
 950
 951 // ASIMD store, 1 element, one lane
 952 def : InstRW<[N1Write_4c_1L_1V],
 953              (instregex "ST1i(8|16|32|64)$")>;
 954 def : InstRW<[WriteAdr, N1Write_4c_1L_1V],
 955              (instregex "ST1i(8|16|32|64)_POST$")>;
 956
 957 // ASIMD store, 2 element, multiple, D-form, B/H/S
 958 def : InstRW<[N1Write_4c_1L_1V],
 959              (instregex "ST2Twov(8b|4h|2s)$")>;
 960 def : InstRW<[WriteAdr, N1Write_4c_1L_1V],
 961              (instregex "ST2Twov(8b|4h|2s)_POST$")>;
 962
 963 // ASIMD store, 2 element, multiple, Q-form
 964 def : InstRW<[N1Write_5c_2L_2V],
 965              (instregex "ST2Twov(16b|8h|4s|2d)$")>;
 966 def : InstRW<[WriteAdr, N1Write_5c_2L_2V],
 967              (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
 968
 969 // ASIMD store, 2 element, one lane
 970 def : InstRW<[N1Write_4c_1L_1V],
 971              (instregex "ST2i(8|16|32|64)$")>;
 972 def : InstRW<[WriteAdr, N1Write_4c_1L_1V],
 973              (instregex "ST2i(8|16|32|64)_POST$")>;
 974
 975 // ASIMD store, 3 element, multiple, D-form, B/H/S
 976 def : InstRW<[N1Write_5c_2L_2V],
 977              (instregex "ST3Threev(8b|4h|2s)$")>;
 978 def : InstRW<[WriteAdr, N1Write_5c_2L_2V],
 979              (instregex "ST3Threev(8b|4h|2s)_POST$")>;
 980
 981 // ASIMD store, 3 element, multiple, Q-form
 982 def : InstRW<[N1Write_6c_3L_3V],
 983              (instregex "ST3Threev(16b|8h|4s|2d)$")>;
 984 def : InstRW<[WriteAdr, N1Write_6c_3L_3V],
 985              (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
 986
 987 // ASIMD store, 3 element, one lane, B/H/S
 988 def : InstRW<[N1Write_4c_3L_3V],
 989              (instregex "ST3i(8|16|32)$")>;
 990 def : InstRW<[WriteAdr, N1Write_4c_3L_3V],
 991              (instregex "ST3i(8|16|32)_POST$")>;
 992
 993 // ASIMD store, 3 element, one lane, D
 994 def : InstRW<[N1Write_5c_3L_3V],
 995              (instrs ST3i64)>;
 996 def : InstRW<[WriteAdr, N1Write_5c_3L_3V],
 997              (instrs ST3i64_POST)>;
 998
 999 // ASIMD store, 4 element, multiple, D-form, B/H/S
1000 def : InstRW<[N1Write_7c_3L_3V],
1001              (instregex "ST4Fourv(8b|4h|2s)$")>;
1002 def : InstRW<[WriteAdr, N1Write_7c_3L_3V],
1003              (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
1004
1005 // ASIMD store, 4 element, multiple, Q-form, B/H/S
1006 def : InstRW<[N1Write_9c_6L_6V],
1007              (instregex "ST4Fourv(16b|8h|4s)$")>;
1008 def : InstRW<[WriteAdr, N1Write_9c_6L_6V],
1009              (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
1010
1011 // ASIMD store, 4 element, multiple, Q-form, D
1012 def : InstRW<[N1Write_6c_4L_4V],
1013              (instrs ST4Fourv2d)>;
1014 def : InstRW<[WriteAdr, N1Write_6c_4L_4V],
1015              (instrs ST4Fourv2d_POST)>;
1016
1017 // ASIMD store, 4 element, one lane, B/H/S
1018 def : InstRW<[N1Write_5c_3L_3V],
1019              (instregex "ST4i(8|16|32)$")>;
1020 def : InstRW<[WriteAdr, N1Write_5c_3L_3V],
1021              (instregex "ST4i(8|16|32)_POST$")>;
1022
1023 // ASIMD store, 4 element, one lane, D
1024 def : InstRW<[N1Write_4c_3L_3V],
1025              (instrs ST4i64)>;
1026 def : InstRW<[WriteAdr, N1Write_4c_3L_3V],
1027              (instrs ST4i64_POST)>;
1028
1029
1030 // Cryptography extensions
1031 // -----------------------------------------------------------------------------
1032
1033 // Crypto AES ops
1034 def N1WriteVC : WriteSequence<[N1Write_2c_1V0]>;
1035 def N1ReadVC  : SchedReadAdvance<2, [N1WriteVC]>;
1036 def           : InstRW<[N1WriteVC], (instrs AESDrr, AESErr)>;
1037 def           : InstRW<[N1Write_2c_1V0, N1ReadVC], (instrs AESMCrr, AESIMCrr)>;
1038
1039 // Crypto polynomial (64x64) multiply long
1040 // Crypto SHA1 hash acceleration op
1041 // Crypto SHA1 schedule acceleration ops
1042 // Crypto SHA256 schedule acceleration ops
1043 def : InstRW<[N1Write_2c_1V0], (instregex "^PMULLv[12]i64$",
1044                                           "^SHA1(H|SU0|SU1)rr",
1045                                           "^SHA256SU[01]rr")>;
1046
1047 // Crypto SHA1 hash acceleration ops
1048 // Crypto SHA256 hash acceleration ops
1049 def : InstRW<[N1Write_4c_1V0], (instregex "^SHA1[CMP]rrr$",
1050                                           "^SHA256H2?rrr$")>;
1051
1052
1053 // CRC
1054 // -----------------------------------------------------------------------------
1055
1056 // CRC checksum ops
1057 def : InstRW<[N1Write_2c_1M], (instregex "^CRC32C?[BHWX]rr$")>;
1058
1059
1060 }