disas/libvixl/vixl/a64/assembler-a64.h

   1 // Copyright 2015, ARM Limited
   2 // All rights reserved.
   3 //
   4 // Redistribution and use in source and binary forms, with or without
   5 // modification, are permitted provided that the following conditions are met:
   6 //
   7 //   * Redistributions of source code must retain the above copyright notice,
   8 //     this list of conditions and the following disclaimer.
   9 //   * Redistributions in binary form must reproduce the above copyright notice,
  10 //     this list of conditions and the following disclaimer in the documentation
  11 //     and/or other materials provided with the distribution.
  12 //   * Neither the name of ARM Limited nor the names of its contributors may be
  13 //     used to endorse or promote products derived from this software without
  14 //     specific prior written permission.
  15 //
  16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
  17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
  20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26
  27 #ifndef VIXL_A64_ASSEMBLER_A64_H_
  28 #define VIXL_A64_ASSEMBLER_A64_H_
  29
  30
  31 #include "vixl/globals.h"
  32 #include "vixl/invalset.h"
  33 #include "vixl/utils.h"
  34 #include "vixl/code-buffer.h"
  35 #include "vixl/a64/instructions-a64.h"
  36
  37 namespace vixl {
  38
  39 typedef uint64_t RegList;
  40 static const int kRegListSizeInBits = sizeof(RegList) * 8;
  41
  42
  43 // Registers.
  44
  45 // Some CPURegister methods can return Register or VRegister types, so we need
  46 // to declare them in advance.
  47 class Register;
  48 class VRegister;
  49
  50 class CPURegister {
  51  public:
  52   enum RegisterType {
  53     // The kInvalid value is used to detect uninitialized static instances,
  54     // which are always zero-initialized before any constructors are called.
  55     kInvalid = 0,
  56     kRegister,
  57     kVRegister,
  58     kFPRegister = kVRegister,
  59     kNoRegister
  60   };
  61
  62   CPURegister() : code_(0), size_(0), type_(kNoRegister) {
  63     VIXL_ASSERT(!IsValid());
  64     VIXL_ASSERT(IsNone());
  65   }
  66
  67   CPURegister(unsigned code, unsigned size, RegisterType type)
  68       : code_(code), size_(size), type_(type) {
  69     VIXL_ASSERT(IsValidOrNone());
  70   }
  71
  72   unsigned code() const {
  73     VIXL_ASSERT(IsValid());
  74     return code_;
  75   }
  76
  77   RegisterType type() const {
  78     VIXL_ASSERT(IsValidOrNone());
  79     return type_;
  80   }
  81
  82   RegList Bit() const {
  83     VIXL_ASSERT(code_ < (sizeof(RegList) * 8));
  84     return IsValid() ? (static_cast<RegList>(1) << code_) : 0;
  85   }
  86
  87   unsigned size() const {
  88     VIXL_ASSERT(IsValid());
  89     return size_;
  90   }
  91
  92   int SizeInBytes() const {
  93     VIXL_ASSERT(IsValid());
  94     VIXL_ASSERT(size() % 8 == 0);
  95     return size_ / 8;
  96   }
  97
  98   int SizeInBits() const {
  99     VIXL_ASSERT(IsValid());
 100     return size_;
 101   }
 102
 103   bool Is8Bits() const {
 104     VIXL_ASSERT(IsValid());
 105     return size_ == 8;
 106   }
 107
 108   bool Is16Bits() const {
 109     VIXL_ASSERT(IsValid());
 110     return size_ == 16;
 111   }
 112
 113   bool Is32Bits() const {
 114     VIXL_ASSERT(IsValid());
 115     return size_ == 32;
 116   }
 117
 118   bool Is64Bits() const {
 119     VIXL_ASSERT(IsValid());
 120     return size_ == 64;
 121   }
 122
 123   bool Is128Bits() const {
 124     VIXL_ASSERT(IsValid());
 125     return size_ == 128;
 126   }
 127
 128   bool IsValid() const {
 129     if (IsValidRegister() || IsValidVRegister()) {
 130       VIXL_ASSERT(!IsNone());
 131       return true;
 132     } else {
 133       // This assert is hit when the register has not been properly initialized.
 134       // One cause for this can be an initialisation order fiasco. See
 135       // https://isocpp.org/wiki/faq/ctors#static-init-order for some details.
 136       VIXL_ASSERT(IsNone());
 137       return false;
 138     }
 139   }
 140
 141   bool IsValidRegister() const {
 142     return IsRegister() &&
 143            ((size_ == kWRegSize) || (size_ == kXRegSize)) &&
 144            ((code_ < kNumberOfRegisters) || (code_ == kSPRegInternalCode));
 145   }
 146
 147   bool IsValidVRegister() const {
 148     return IsVRegister() &&
 149            ((size_ == kBRegSize) || (size_ == kHRegSize) ||
 150             (size_ == kSRegSize) || (size_ == kDRegSize) ||
 151             (size_ == kQRegSize)) &&
 152            (code_ < kNumberOfVRegisters);
 153   }
 154
 155   bool IsValidFPRegister() const {
 156     return IsFPRegister() && (code_ < kNumberOfVRegisters);
 157   }
 158
 159   bool IsNone() const {
 160     // kNoRegister types should always have size 0 and code 0.
 161     VIXL_ASSERT((type_ != kNoRegister) || (code_ == 0));
 162     VIXL_ASSERT((type_ != kNoRegister) || (size_ == 0));
 163
 164     return type_ == kNoRegister;
 165   }
 166
 167   bool Aliases(const CPURegister& other) const {
 168     VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone());
 169     return (code_ == other.code_) && (type_ == other.type_);
 170   }
 171
 172   bool Is(const CPURegister& other) const {
 173     VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone());
 174     return Aliases(other) && (size_ == other.size_);
 175   }
 176
 177   bool IsZero() const {
 178     VIXL_ASSERT(IsValid());
 179     return IsRegister() && (code_ == kZeroRegCode);
 180   }
 181
 182   bool IsSP() const {
 183     VIXL_ASSERT(IsValid());
 184     return IsRegister() && (code_ == kSPRegInternalCode);
 185   }
 186
 187   bool IsRegister() const {
 188     return type_ == kRegister;
 189   }
 190
 191   bool IsVRegister() const {
 192     return type_ == kVRegister;
 193   }
 194
 195   bool IsFPRegister() const {
 196     return IsS() || IsD();
 197   }
 198
 199   bool IsW() const { return IsValidRegister() && Is32Bits(); }
 200   bool IsX() const { return IsValidRegister() && Is64Bits(); }
 201
 202   // These assertions ensure that the size and type of the register are as
 203   // described. They do not consider the number of lanes that make up a vector.
 204   // So, for example, Is8B() implies IsD(), and Is1D() implies IsD, but IsD()
 205   // does not imply Is1D() or Is8B().
 206   // Check the number of lanes, ie. the format of the vector, using methods such
 207   // as Is8B(), Is1D(), etc. in the VRegister class.
 208   bool IsV() const { return IsVRegister(); }
 209   bool IsB() const { return IsV() && Is8Bits(); }
 210   bool IsH() const { return IsV() && Is16Bits(); }
 211   bool IsS() const { return IsV() && Is32Bits(); }
 212   bool IsD() const { return IsV() && Is64Bits(); }
 213   bool IsQ() const { return IsV() && Is128Bits(); }
 214
 215   const Register& W() const;
 216   const Register& X() const;
 217   const VRegister& V() const;
 218   const VRegister& B() const;
 219   const VRegister& H() const;
 220   const VRegister& S() const;
 221   const VRegister& D() const;
 222   const VRegister& Q() const;
 223
 224   bool IsSameSizeAndType(const CPURegister& other) const {
 225     return (size_ == other.size_) && (type_ == other.type_);
 226   }
 227
 228  protected:
 229   unsigned code_;
 230   unsigned size_;
 231   RegisterType type_;
 232
 233  private:
 234   bool IsValidOrNone() const {
 235     return IsValid() || IsNone();
 236   }
 237 };
 238
 239
 240 class Register : public CPURegister {
 241  public:
 242   Register() : CPURegister() {}
 243   explicit Register(const CPURegister& other)
 244       : CPURegister(other.code(), other.size(), other.type()) {
 245     VIXL_ASSERT(IsValidRegister());
 246   }
 247   Register(unsigned code, unsigned size)
 248       : CPURegister(code, size, kRegister) {}
 249
 250   bool IsValid() const {
 251     VIXL_ASSERT(IsRegister() || IsNone());
 252     return IsValidRegister();
 253   }
 254
 255   static const Register& WRegFromCode(unsigned code);
 256   static const Register& XRegFromCode(unsigned code);
 257
 258  private:
 259   static const Register wregisters[];
 260   static const Register xregisters[];
 261 };
 262
 263
 264 class VRegister : public CPURegister {
 265  public:
 266   VRegister() : CPURegister(), lanes_(1) {}
 267   explicit VRegister(const CPURegister& other)
 268       : CPURegister(other.code(), other.size(), other.type()), lanes_(1) {
 269     VIXL_ASSERT(IsValidVRegister());
 270     VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16));
 271   }
 272   VRegister(unsigned code, unsigned size, unsigned lanes = 1)
 273       : CPURegister(code, size, kVRegister), lanes_(lanes) {
 274     VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16));
 275   }
 276   VRegister(unsigned code, VectorFormat format)
 277       : CPURegister(code, RegisterSizeInBitsFromFormat(format), kVRegister),
 278         lanes_(IsVectorFormat(format) ? LaneCountFromFormat(format) : 1) {
 279     VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16));
 280   }
 281
 282   bool IsValid() const {
 283     VIXL_ASSERT(IsVRegister() || IsNone());
 284     return IsValidVRegister();
 285   }
 286
 287   static const VRegister& BRegFromCode(unsigned code);
 288   static const VRegister& HRegFromCode(unsigned code);
 289   static const VRegister& SRegFromCode(unsigned code);
 290   static const VRegister& DRegFromCode(unsigned code);
 291   static const VRegister& QRegFromCode(unsigned code);
 292   static const VRegister& VRegFromCode(unsigned code);
 293
 294   VRegister V8B() const { return VRegister(code_, kDRegSize, 8); }
 295   VRegister V16B() const { return VRegister(code_, kQRegSize, 16); }
 296   VRegister V4H() const { return VRegister(code_, kDRegSize, 4); }
 297   VRegister V8H() const { return VRegister(code_, kQRegSize, 8); }
 298   VRegister V2S() const { return VRegister(code_, kDRegSize, 2); }
 299   VRegister V4S() const { return VRegister(code_, kQRegSize, 4); }
 300   VRegister V2D() const { return VRegister(code_, kQRegSize, 2); }
 301   VRegister V1D() const { return VRegister(code_, kDRegSize, 1); }
 302
 303   bool Is8B() const { return (Is64Bits() && (lanes_ == 8)); }
 304   bool Is16B() const { return (Is128Bits() && (lanes_ == 16)); }
 305   bool Is4H() const { return (Is64Bits() && (lanes_ == 4)); }
 306   bool Is8H() const { return (Is128Bits() && (lanes_ == 8)); }
 307   bool Is2S() const { return (Is64Bits() && (lanes_ == 2)); }
 308   bool Is4S() const { return (Is128Bits() && (lanes_ == 4)); }
 309   bool Is1D() const { return (Is64Bits() && (lanes_ == 1)); }
 310   bool Is2D() const { return (Is128Bits() && (lanes_ == 2)); }
 311
 312   // For consistency, we assert the number of lanes of these scalar registers,
 313   // even though there are no vectors of equivalent total size with which they
 314   // could alias.
 315   bool Is1B() const {
 316     VIXL_ASSERT(!(Is8Bits() && IsVector()));
 317     return Is8Bits();
 318   }
 319   bool Is1H() const {
 320     VIXL_ASSERT(!(Is16Bits() && IsVector()));
 321     return Is16Bits();
 322   }
 323   bool Is1S() const {
 324     VIXL_ASSERT(!(Is32Bits() && IsVector()));
 325     return Is32Bits();
 326   }
 327
 328   bool IsLaneSizeB() const { return LaneSizeInBits() == kBRegSize; }
 329   bool IsLaneSizeH() const { return LaneSizeInBits() == kHRegSize; }
 330   bool IsLaneSizeS() const { return LaneSizeInBits() == kSRegSize; }
 331   bool IsLaneSizeD() const { return LaneSizeInBits() == kDRegSize; }
 332
 333   int lanes() const {
 334     return lanes_;
 335   }
 336
 337   bool IsScalar() const {
 338     return lanes_ == 1;
 339   }
 340
 341   bool IsVector() const {
 342     return lanes_ > 1;
 343   }
 344
 345   bool IsSameFormat(const VRegister& other) const {
 346     return (size_ == other.size_) && (lanes_ == other.lanes_);
 347   }
 348
 349   unsigned LaneSizeInBytes() const {
 350     return SizeInBytes() / lanes_;
 351   }
 352
 353   unsigned LaneSizeInBits() const {
 354     return LaneSizeInBytes() * 8;
 355   }
 356
 357  private:
 358   static const VRegister bregisters[];
 359   static const VRegister hregisters[];
 360   static const VRegister sregisters[];
 361   static const VRegister dregisters[];
 362   static const VRegister qregisters[];
 363   static const VRegister vregisters[];
 364   int lanes_;
 365 };
 366
 367
 368 // Backward compatibility for FPRegisters.
 369 typedef VRegister FPRegister;
 370
 371 // No*Reg is used to indicate an unused argument, or an error case. Note that
 372 // these all compare equal (using the Is() method). The Register and VRegister
 373 // variants are provided for convenience.
 374 const Register NoReg;
 375 const VRegister NoVReg;
 376 const FPRegister NoFPReg;  // For backward compatibility.
 377 const CPURegister NoCPUReg;
 378
 379
 380 #define DEFINE_REGISTERS(N)  \
 381 const Register w##N(N, kWRegSize);  \
 382 const Register x##N(N, kXRegSize);
 383 REGISTER_CODE_LIST(DEFINE_REGISTERS)
 384 #undef DEFINE_REGISTERS
 385 const Register wsp(kSPRegInternalCode, kWRegSize);
 386 const Register sp(kSPRegInternalCode, kXRegSize);
 387
 388
 389 #define DEFINE_VREGISTERS(N)  \
 390 const VRegister b##N(N, kBRegSize);  \
 391 const VRegister h##N(N, kHRegSize);  \
 392 const VRegister s##N(N, kSRegSize);  \
 393 const VRegister d##N(N, kDRegSize);  \
 394 const VRegister q##N(N, kQRegSize);  \
 395 const VRegister v##N(N, kQRegSize);
 396 REGISTER_CODE_LIST(DEFINE_VREGISTERS)
 397 #undef DEFINE_VREGISTERS
 398
 399
 400 // Registers aliases.
 401 const Register ip0 = x16;
 402 const Register ip1 = x17;
 403 const Register lr = x30;
 404 const Register xzr = x31;
 405 const Register wzr = w31;
 406
 407
 408 // AreAliased returns true if any of the named registers overlap. Arguments
 409 // set to NoReg are ignored. The system stack pointer may be specified.
 410 bool AreAliased(const CPURegister& reg1,
 411                 const CPURegister& reg2,
 412                 const CPURegister& reg3 = NoReg,
 413                 const CPURegister& reg4 = NoReg,
 414                 const CPURegister& reg5 = NoReg,
 415                 const CPURegister& reg6 = NoReg,
 416                 const CPURegister& reg7 = NoReg,
 417                 const CPURegister& reg8 = NoReg);
 418
 419
 420 // AreSameSizeAndType returns true if all of the specified registers have the
 421 // same size, and are of the same type. The system stack pointer may be
 422 // specified. Arguments set to NoReg are ignored, as are any subsequent
 423 // arguments. At least one argument (reg1) must be valid (not NoCPUReg).
 424 bool AreSameSizeAndType(const CPURegister& reg1,
 425                         const CPURegister& reg2,
 426                         const CPURegister& reg3 = NoCPUReg,
 427                         const CPURegister& reg4 = NoCPUReg,
 428                         const CPURegister& reg5 = NoCPUReg,
 429                         const CPURegister& reg6 = NoCPUReg,
 430                         const CPURegister& reg7 = NoCPUReg,
 431                         const CPURegister& reg8 = NoCPUReg);
 432
 433
 434 // AreSameFormat returns true if all of the specified VRegisters have the same
 435 // vector format. Arguments set to NoReg are ignored, as are any subsequent
 436 // arguments. At least one argument (reg1) must be valid (not NoVReg).
 437 bool AreSameFormat(const VRegister& reg1,
 438                    const VRegister& reg2,
 439                    const VRegister& reg3 = NoVReg,
 440                    const VRegister& reg4 = NoVReg);
 441
 442
 443 // AreConsecutive returns true if all of the specified VRegisters are
 444 // consecutive in the register file. Arguments set to NoReg are ignored, as are
 445 // any subsequent arguments. At least one argument (reg1) must be valid
 446 // (not NoVReg).
 447 bool AreConsecutive(const VRegister& reg1,
 448                     const VRegister& reg2,
 449                     const VRegister& reg3 = NoVReg,
 450                     const VRegister& reg4 = NoVReg);
 451
 452
 453 // Lists of registers.
 454 class CPURegList {
 455  public:
 456   explicit CPURegList(CPURegister reg1,
 457                       CPURegister reg2 = NoCPUReg,
 458                       CPURegister reg3 = NoCPUReg,
 459                       CPURegister reg4 = NoCPUReg)
 460       : list_(reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit()),
 461         size_(reg1.size()), type_(reg1.type()) {
 462     VIXL_ASSERT(AreSameSizeAndType(reg1, reg2, reg3, reg4));
 463     VIXL_ASSERT(IsValid());
 464   }
 465
 466   CPURegList(CPURegister::RegisterType type, unsigned size, RegList list)
 467       : list_(list), size_(size), type_(type) {
 468     VIXL_ASSERT(IsValid());
 469   }
 470
 471   CPURegList(CPURegister::RegisterType type, unsigned size,
 472              unsigned first_reg, unsigned last_reg)
 473       : size_(size), type_(type) {
 474     VIXL_ASSERT(((type == CPURegister::kRegister) &&
 475                  (last_reg < kNumberOfRegisters)) ||
 476                 ((type == CPURegister::kVRegister) &&
 477                  (last_reg < kNumberOfVRegisters)));
 478     VIXL_ASSERT(last_reg >= first_reg);
 479     list_ = (UINT64_C(1) << (last_reg + 1)) - 1;
 480     list_ &= ~((UINT64_C(1) << first_reg) - 1);
 481     VIXL_ASSERT(IsValid());
 482   }
 483
 484   CPURegister::RegisterType type() const {
 485     VIXL_ASSERT(IsValid());
 486     return type_;
 487   }
 488
 489   // Combine another CPURegList into this one. Registers that already exist in
 490   // this list are left unchanged. The type and size of the registers in the
 491   // 'other' list must match those in this list.
 492   void Combine(const CPURegList& other) {
 493     VIXL_ASSERT(IsValid());
 494     VIXL_ASSERT(other.type() == type_);
 495     VIXL_ASSERT(other.RegisterSizeInBits() == size_);
 496     list_ |= other.list();
 497   }
 498
 499   // Remove every register in the other CPURegList from this one. Registers that
 500   // do not exist in this list are ignored. The type and size of the registers
 501   // in the 'other' list must match those in this list.
 502   void Remove(const CPURegList& other) {
 503     VIXL_ASSERT(IsValid());
 504     VIXL_ASSERT(other.type() == type_);
 505     VIXL_ASSERT(other.RegisterSizeInBits() == size_);
 506     list_ &= ~other.list();
 507   }
 508
 509   // Variants of Combine and Remove which take a single register.
 510   void Combine(const CPURegister& other) {
 511     VIXL_ASSERT(other.type() == type_);
 512     VIXL_ASSERT(other.size() == size_);
 513     Combine(other.code());
 514   }
 515
 516   void Remove(const CPURegister& other) {
 517     VIXL_ASSERT(other.type() == type_);
 518     VIXL_ASSERT(other.size() == size_);
 519     Remove(other.code());
 520   }
 521
 522   // Variants of Combine and Remove which take a single register by its code;
 523   // the type and size of the register is inferred from this list.
 524   void Combine(int code) {
 525     VIXL_ASSERT(IsValid());
 526     VIXL_ASSERT(CPURegister(code, size_, type_).IsValid());
 527     list_ |= (UINT64_C(1) << code);
 528   }
 529
 530   void Remove(int code) {
 531     VIXL_ASSERT(IsValid());
 532     VIXL_ASSERT(CPURegister(code, size_, type_).IsValid());
 533     list_ &= ~(UINT64_C(1) << code);
 534   }
 535
 536   static CPURegList Union(const CPURegList& list_1, const CPURegList& list_2) {
 537     VIXL_ASSERT(list_1.type_ == list_2.type_);
 538     VIXL_ASSERT(list_1.size_ == list_2.size_);
 539     return CPURegList(list_1.type_, list_1.size_, list_1.list_ | list_2.list_);
 540   }
 541   static CPURegList Union(const CPURegList& list_1,
 542                           const CPURegList& list_2,
 543                           const CPURegList& list_3);
 544   static CPURegList Union(const CPURegList& list_1,
 545                           const CPURegList& list_2,
 546                           const CPURegList& list_3,
 547                           const CPURegList& list_4);
 548
 549   static CPURegList Intersection(const CPURegList& list_1,
 550                                  const CPURegList& list_2) {
 551     VIXL_ASSERT(list_1.type_ == list_2.type_);
 552     VIXL_ASSERT(list_1.size_ == list_2.size_);
 553     return CPURegList(list_1.type_, list_1.size_, list_1.list_ & list_2.list_);
 554   }
 555   static CPURegList Intersection(const CPURegList& list_1,
 556                                  const CPURegList& list_2,
 557                                  const CPURegList& list_3);
 558   static CPURegList Intersection(const CPURegList& list_1,
 559                                  const CPURegList& list_2,
 560                                  const CPURegList& list_3,
 561                                  const CPURegList& list_4);
 562
 563   bool Overlaps(const CPURegList& other) const {
 564     return (type_ == other.type_) && ((list_ & other.list_) != 0);
 565   }
 566
 567   RegList list() const {
 568     VIXL_ASSERT(IsValid());
 569     return list_;
 570   }
 571
 572   void set_list(RegList new_list) {
 573     VIXL_ASSERT(IsValid());
 574     list_ = new_list;
 575   }
 576
 577   // Remove all callee-saved registers from the list. This can be useful when
 578   // preparing registers for an AAPCS64 function call, for example.
 579   void RemoveCalleeSaved();
 580
 581   CPURegister PopLowestIndex();
 582   CPURegister PopHighestIndex();
 583
 584   // AAPCS64 callee-saved registers.
 585   static CPURegList GetCalleeSaved(unsigned size = kXRegSize);
 586   static CPURegList GetCalleeSavedV(unsigned size = kDRegSize);
 587
 588   // AAPCS64 caller-saved registers. Note that this includes lr.
 589   // TODO(all): Determine how we handle d8-d15 being callee-saved, but the top
 590   // 64-bits being caller-saved.
 591   static CPURegList GetCallerSaved(unsigned size = kXRegSize);
 592   static CPURegList GetCallerSavedV(unsigned size = kDRegSize);
 593
 594   bool IsEmpty() const {
 595     VIXL_ASSERT(IsValid());
 596     return list_ == 0;
 597   }
 598
 599   bool IncludesAliasOf(const CPURegister& other) const {
 600     VIXL_ASSERT(IsValid());
 601     return (type_ == other.type()) && ((other.Bit() & list_) != 0);
 602   }
 603
 604   bool IncludesAliasOf(int code) const {
 605     VIXL_ASSERT(IsValid());
 606     return ((code & list_) != 0);
 607   }
 608
 609   int Count() const {
 610     VIXL_ASSERT(IsValid());
 611     return CountSetBits(list_);
 612   }
 613
 614   unsigned RegisterSizeInBits() const {
 615     VIXL_ASSERT(IsValid());
 616     return size_;
 617   }
 618
 619   unsigned RegisterSizeInBytes() const {
 620     int size_in_bits = RegisterSizeInBits();
 621     VIXL_ASSERT((size_in_bits % 8) == 0);
 622     return size_in_bits / 8;
 623   }
 624
 625   unsigned TotalSizeInBytes() const {
 626     VIXL_ASSERT(IsValid());
 627     return RegisterSizeInBytes() * Count();
 628   }
 629
 630  private:
 631   RegList list_;
 632   unsigned size_;
 633   CPURegister::RegisterType type_;
 634
 635   bool IsValid() const;
 636 };
 637
 638
 639 // AAPCS64 callee-saved registers.
 640 extern const CPURegList kCalleeSaved;
 641 extern const CPURegList kCalleeSavedV;
 642
 643
 644 // AAPCS64 caller-saved registers. Note that this includes lr.
 645 extern const CPURegList kCallerSaved;
 646 extern const CPURegList kCallerSavedV;
 647
 648
 649 // Operand.
 650 class Operand {
 651  public:
 652   // #<immediate>
 653   // where <immediate> is int64_t.
 654   // This is allowed to be an implicit constructor because Operand is
 655   // a wrapper class that doesn't normally perform any type conversion.
 656   Operand(int64_t immediate = 0);           // NOLINT(runtime/explicit)
 657
 658   // rm, {<shift> #<shift_amount>}
 659   // where <shift> is one of {LSL, LSR, ASR, ROR}.
 660   //       <shift_amount> is uint6_t.
 661   // This is allowed to be an implicit constructor because Operand is
 662   // a wrapper class that doesn't normally perform any type conversion.
 663   Operand(Register reg,
 664           Shift shift = LSL,
 665           unsigned shift_amount = 0);   // NOLINT(runtime/explicit)
 666
 667   // rm, {<extend> {#<shift_amount>}}
 668   // where <extend> is one of {UXTB, UXTH, UXTW, UXTX, SXTB, SXTH, SXTW, SXTX}.
 669   //       <shift_amount> is uint2_t.
 670   explicit Operand(Register reg, Extend extend, unsigned shift_amount = 0);
 671
 672   bool IsImmediate() const;
 673   bool IsShiftedRegister() const;
 674   bool IsExtendedRegister() const;
 675   bool IsZero() const;
 676
 677   // This returns an LSL shift (<= 4) operand as an equivalent extend operand,
 678   // which helps in the encoding of instructions that use the stack pointer.
 679   Operand ToExtendedRegister() const;
 680
 681   int64_t immediate() const {
 682     VIXL_ASSERT(IsImmediate());
 683     return immediate_;
 684   }
 685
 686   Register reg() const {
 687     VIXL_ASSERT(IsShiftedRegister() || IsExtendedRegister());
 688     return reg_;
 689   }
 690
 691   Shift shift() const {
 692     VIXL_ASSERT(IsShiftedRegister());
 693     return shift_;
 694   }
 695
 696   Extend extend() const {
 697     VIXL_ASSERT(IsExtendedRegister());
 698     return extend_;
 699   }
 700
 701   unsigned shift_amount() const {
 702     VIXL_ASSERT(IsShiftedRegister() || IsExtendedRegister());
 703     return shift_amount_;
 704   }
 705
 706  private:
 707   int64_t immediate_;
 708   Register reg_;
 709   Shift shift_;
 710   Extend extend_;
 711   unsigned shift_amount_;
 712 };
 713
 714
 715 // MemOperand represents the addressing mode of a load or store instruction.
 716 class MemOperand {
 717  public:
 718   explicit MemOperand(Register base,
 719                       int64_t offset = 0,
 720                       AddrMode addrmode = Offset);
 721   MemOperand(Register base,
 722              Register regoffset,
 723              Shift shift = LSL,
 724              unsigned shift_amount = 0);
 725   MemOperand(Register base,
 726              Register regoffset,
 727              Extend extend,
 728              unsigned shift_amount = 0);
 729   MemOperand(Register base,
 730              const Operand& offset,
 731              AddrMode addrmode = Offset);
 732
 733   const Register& base() const { return base_; }
 734   const Register& regoffset() const { return regoffset_; }
 735   int64_t offset() const { return offset_; }
 736   AddrMode addrmode() const { return addrmode_; }
 737   Shift shift() const { return shift_; }
 738   Extend extend() const { return extend_; }
 739   unsigned shift_amount() const { return shift_amount_; }
 740   bool IsImmediateOffset() const;
 741   bool IsRegisterOffset() const;
 742   bool IsPreIndex() const;
 743   bool IsPostIndex() const;
 744
 745   void AddOffset(int64_t offset);
 746
 747  private:
 748   Register base_;
 749   Register regoffset_;
 750   int64_t offset_;
 751   AddrMode addrmode_;
 752   Shift shift_;
 753   Extend extend_;
 754   unsigned shift_amount_;
 755 };
 756
 757
 758 class LabelTestHelper;  // Forward declaration.
 759
 760
 761 class Label {
 762  public:
 763   Label() : location_(kLocationUnbound) {}
 764   ~Label() {
 765     // If the label has been linked to, it needs to be bound to a target.
 766     VIXL_ASSERT(!IsLinked() || IsBound());
 767   }
 768
 769   bool IsBound() const { return location_ >= 0; }
 770   bool IsLinked() const { return !links_.empty(); }
 771
 772   ptrdiff_t location() const { return location_; }
 773
 774   static const int kNPreallocatedLinks = 4;
 775   static const ptrdiff_t kInvalidLinkKey = PTRDIFF_MAX;
 776   static const size_t kReclaimFrom = 512;
 777   static const size_t kReclaimFactor = 2;
 778
 779   typedef InvalSet<ptrdiff_t,
 780                    kNPreallocatedLinks,
 781                    ptrdiff_t,
 782                    kInvalidLinkKey,
 783                    kReclaimFrom,
 784                    kReclaimFactor> LinksSetBase;
 785   typedef InvalSetIterator<LinksSetBase> LabelLinksIteratorBase;
 786
 787  private:
 788   class LinksSet : public LinksSetBase {
 789    public:
 790     LinksSet() : LinksSetBase() {}
 791   };
 792
 793   // Allows iterating over the links of a label. The behaviour is undefined if
 794   // the list of links is modified in any way while iterating.
 795   class LabelLinksIterator : public LabelLinksIteratorBase {
 796    public:
 797     explicit LabelLinksIterator(Label* label)
 798         : LabelLinksIteratorBase(&label->links_) {}
 799   };
 800
 801   void Bind(ptrdiff_t location) {
 802     // Labels can only be bound once.
 803     VIXL_ASSERT(!IsBound());
 804     location_ = location;
 805   }
 806
 807   void AddLink(ptrdiff_t instruction) {
 808     // If a label is bound, the assembler already has the information it needs
 809     // to write the instruction, so there is no need to add it to links_.
 810     VIXL_ASSERT(!IsBound());
 811     links_.insert(instruction);
 812   }
 813
 814   void DeleteLink(ptrdiff_t instruction) {
 815     links_.erase(instruction);
 816   }
 817
 818   void ClearAllLinks() {
 819     links_.clear();
 820   }
 821
 822   // TODO: The comment below considers average case complexity for our
 823   // usual use-cases. The elements of interest are:
 824   // - Branches to a label are emitted in order: branch instructions to a label
 825   // are generated at an offset in the code generation buffer greater than any
 826   // other branch to that same label already generated. As an example, this can
 827   // be broken when an instruction is patched to become a branch. Note that the
 828   // code will still work, but the complexity considerations below may locally
 829   // not apply any more.
 830   // - Veneers are generated in order: for multiple branches of the same type
 831   // branching to the same unbound label going out of range, veneers are
 832   // generated in growing order of the branch instruction offset from the start
 833   // of the buffer.
 834   //
 835   // When creating a veneer for a branch going out of range, the link for this
 836   // branch needs to be removed from this `links_`. Since all branches are
 837   // tracked in one underlying InvalSet, the complexity for this deletion is the
 838   // same as for finding the element, ie. O(n), where n is the number of links
 839   // in the set.
 840   // This could be reduced to O(1) by using the same trick as used when tracking
 841   // branch information for veneers: split the container to use one set per type
 842   // of branch. With that setup, when a veneer is created and the link needs to
 843   // be deleted, if the two points above hold, it must be the minimum element of
 844   // the set for its type of branch, and that minimum element will be accessible
 845   // in O(1).
 846
 847   // The offsets of the instructions that have linked to this label.
 848   LinksSet links_;
 849   // The label location.
 850   ptrdiff_t location_;
 851
 852   static const ptrdiff_t kLocationUnbound = -1;
 853
 854   // It is not safe to copy labels, so disable the copy constructor and operator
 855   // by declaring them private (without an implementation).
 856   Label(const Label&);
 857   void operator=(const Label&);
 858
 859   // The Assembler class is responsible for binding and linking labels, since
 860   // the stored offsets need to be consistent with the Assembler's buffer.
 861   friend class Assembler;
 862   // The MacroAssembler and VeneerPool handle resolution of branches to distant
 863   // targets.
 864   friend class MacroAssembler;
 865   friend class VeneerPool;
 866 };
 867
 868
 869 // Required InvalSet template specialisations.
 870 #define INVAL_SET_TEMPLATE_PARAMETERS \
 871     ptrdiff_t,                        \
 872     Label::kNPreallocatedLinks,       \
 873     ptrdiff_t,                        \
 874     Label::kInvalidLinkKey,           \
 875     Label::kReclaimFrom,              \
 876     Label::kReclaimFactor
 877 template<>
 878 inline ptrdiff_t InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::Key(
 879     const ptrdiff_t& element) {
 880   return element;
 881 }
 882 template<>
 883 inline void InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::SetKey(
 884               ptrdiff_t* element, ptrdiff_t key) {
 885   *element = key;
 886 }
 887 #undef INVAL_SET_TEMPLATE_PARAMETERS
 888
 889
 890 class Assembler;
 891 class LiteralPool;
 892
 893 // A literal is a 32-bit or 64-bit piece of data stored in the instruction
 894 // stream and loaded through a pc relative load. The same literal can be
 895 // referred to by multiple instructions but a literal can only reside at one
 896 // place in memory. A literal can be used by a load before or after being
 897 // placed in memory.
 898 //
 899 // Internally an offset of 0 is associated with a literal which has been
 900 // neither used nor placed. Then two possibilities arise:
 901 //  1) the label is placed, the offset (stored as offset + 1) is used to
 902 //     resolve any subsequent load using the label.
 903 //  2) the label is not placed and offset is the offset of the last load using
 904 //     the literal (stored as -offset -1). If multiple loads refer to this
 905 //     literal then the last load holds the offset of the preceding load and
 906 //     all loads form a chain. Once the offset is placed all the loads in the
 907 //     chain are resolved and future loads fall back to possibility 1.
 908 class RawLiteral {
 909  public:
 910   enum DeletionPolicy {
 911     kDeletedOnPlacementByPool,
 912     kDeletedOnPoolDestruction,
 913     kManuallyDeleted
 914   };
 915
 916   RawLiteral(size_t size,
 917              LiteralPool* literal_pool,
 918              DeletionPolicy deletion_policy = kManuallyDeleted);
 919
 920   // The literal pool only sees and deletes `RawLiteral*` pointers, but they are
 921   // actually pointing to `Literal<T>` objects.
 922   virtual ~RawLiteral() {}
 923
 924   size_t size() {
 925     VIXL_STATIC_ASSERT(kDRegSizeInBytes == kXRegSizeInBytes);
 926     VIXL_STATIC_ASSERT(kSRegSizeInBytes == kWRegSizeInBytes);
 927     VIXL_ASSERT((size_ == kXRegSizeInBytes) ||
 928                 (size_ == kWRegSizeInBytes) ||
 929                 (size_ == kQRegSizeInBytes));
 930     return size_;
 931   }
 932   uint64_t raw_value128_low64() {
 933     VIXL_ASSERT(size_ == kQRegSizeInBytes);
 934     return low64_;
 935   }
 936   uint64_t raw_value128_high64() {
 937     VIXL_ASSERT(size_ == kQRegSizeInBytes);
 938     return high64_;
 939   }
 940   uint64_t raw_value64() {
 941     VIXL_ASSERT(size_ == kXRegSizeInBytes);
 942     VIXL_ASSERT(high64_ == 0);
 943     return low64_;
 944   }
 945   uint32_t raw_value32() {
 946     VIXL_ASSERT(size_ == kWRegSizeInBytes);
 947     VIXL_ASSERT(high64_ == 0);
 948     VIXL_ASSERT(is_uint32(low64_) || is_int32(low64_));
 949     return static_cast<uint32_t>(low64_);
 950   }
 951   bool IsUsed() { return offset_ < 0; }
 952   bool IsPlaced() { return offset_ > 0; }
 953
 954   LiteralPool* GetLiteralPool() const {
 955     return literal_pool_;
 956   }
 957
 958   ptrdiff_t offset() {
 959     VIXL_ASSERT(IsPlaced());
 960     return offset_ - 1;
 961   }
 962
 963  protected:
 964   void set_offset(ptrdiff_t offset) {
 965     VIXL_ASSERT(offset >= 0);
 966     VIXL_ASSERT(IsWordAligned(offset));
 967     VIXL_ASSERT(!IsPlaced());
 968     offset_ = offset + 1;
 969   }
 970   ptrdiff_t last_use() {
 971     VIXL_ASSERT(IsUsed());
 972     return -offset_ - 1;
 973   }
 974   void set_last_use(ptrdiff_t offset) {
 975     VIXL_ASSERT(offset >= 0);
 976     VIXL_ASSERT(IsWordAligned(offset));
 977     VIXL_ASSERT(!IsPlaced());
 978     offset_ = -offset - 1;
 979   }
 980
 981   size_t size_;
 982   ptrdiff_t offset_;
 983   uint64_t low64_;
 984   uint64_t high64_;
 985
 986  private:
 987   LiteralPool* literal_pool_;
 988   DeletionPolicy deletion_policy_;
 989
 990   friend class Assembler;
 991   friend class LiteralPool;
 992 };
 993
 994
 995 template <typename T>
 996 class Literal : public RawLiteral {
 997  public:
 998   explicit Literal(T value,
 999                    LiteralPool* literal_pool = NULL,
1000                    RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
1001       : RawLiteral(sizeof(value), literal_pool, ownership) {
1002     VIXL_STATIC_ASSERT(sizeof(value) <= kXRegSizeInBytes);
1003     UpdateValue(value);
1004   }
1005
1006   Literal(T high64, T low64,
1007           LiteralPool* literal_pool = NULL,
1008           RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
1009       : RawLiteral(kQRegSizeInBytes, literal_pool, ownership) {
1010     VIXL_STATIC_ASSERT(sizeof(low64) == (kQRegSizeInBytes / 2));
1011     UpdateValue(high64, low64);
1012   }
1013
1014   virtual ~Literal() {}
1015
1016   // Update the value of this literal, if necessary by rewriting the value in
1017   // the pool.
1018   // If the literal has already been placed in a literal pool, the address of
1019   // the start of the code buffer must be provided, as the literal only knows it
1020   // offset from there.  This also allows patching the value after the code has
1021   // been moved in memory.
1022   void UpdateValue(T new_value, uint8_t* code_buffer = NULL) {
1023     VIXL_ASSERT(sizeof(new_value) == size_);
1024     memcpy(&low64_, &new_value, sizeof(new_value));
1025     if (IsPlaced()) {
1026       VIXL_ASSERT(code_buffer != NULL);
1027       RewriteValueInCode(code_buffer);
1028     }
1029   }
1030
1031   void UpdateValue(T high64, T low64, uint8_t* code_buffer = NULL) {
1032     VIXL_ASSERT(sizeof(low64) == size_ / 2);
1033     memcpy(&low64_, &low64, sizeof(low64));
1034     memcpy(&high64_, &high64, sizeof(high64));
1035     if (IsPlaced()) {
1036       VIXL_ASSERT(code_buffer != NULL);
1037       RewriteValueInCode(code_buffer);
1038     }
1039   }
1040
1041   void UpdateValue(T new_value, const Assembler* assembler);
1042   void UpdateValue(T high64, T low64, const Assembler* assembler);
1043
1044  private:
1045   void RewriteValueInCode(uint8_t* code_buffer) {
1046     VIXL_ASSERT(IsPlaced());
1047     VIXL_STATIC_ASSERT(sizeof(T) <= kXRegSizeInBytes);
1048     switch (size()) {
1049       case kSRegSizeInBytes:
1050         *reinterpret_cast<uint32_t*>(code_buffer + offset()) = raw_value32();
1051         break;
1052       case kDRegSizeInBytes:
1053         *reinterpret_cast<uint64_t*>(code_buffer + offset()) = raw_value64();
1054         break;
1055       default:
1056         VIXL_ASSERT(size() == kQRegSizeInBytes);
1057         uint64_t* base_address =
1058             reinterpret_cast<uint64_t*>(code_buffer + offset());
1059         *base_address = raw_value128_low64();
1060         *(base_address + 1) = raw_value128_high64();
1061     }
1062   }
1063 };
1064
1065
1066 // Control whether or not position-independent code should be emitted.
1067 enum PositionIndependentCodeOption {
1068   // All code generated will be position-independent; all branches and
1069   // references to labels generated with the Label class will use PC-relative
1070   // addressing.
1071   PositionIndependentCode,
1072
1073   // Allow VIXL to generate code that refers to absolute addresses. With this
1074   // option, it will not be possible to copy the code buffer and run it from a
1075   // different address; code must be generated in its final location.
1076   PositionDependentCode,
1077
1078   // Allow VIXL to assume that the bottom 12 bits of the address will be
1079   // constant, but that the top 48 bits may change. This allows `adrp` to
1080   // function in systems which copy code between pages, but otherwise maintain
1081   // 4KB page alignment.
1082   PageOffsetDependentCode
1083 };
1084
1085
1086 // Control how scaled- and unscaled-offset loads and stores are generated.
1087 enum LoadStoreScalingOption {
1088   // Prefer scaled-immediate-offset instructions, but emit unscaled-offset,
1089   // register-offset, pre-index or post-index instructions if necessary.
1090   PreferScaledOffset,
1091
1092   // Prefer unscaled-immediate-offset instructions, but emit scaled-offset,
1093   // register-offset, pre-index or post-index instructions if necessary.
1094   PreferUnscaledOffset,
1095
1096   // Require scaled-immediate-offset instructions.
1097   RequireScaledOffset,
1098
1099   // Require unscaled-immediate-offset instructions.
1100   RequireUnscaledOffset
1101 };
1102
1103
1104 // Assembler.
1105 class Assembler {
1106  public:
1107   Assembler(size_t capacity,
1108             PositionIndependentCodeOption pic = PositionIndependentCode);
1109   Assembler(byte* buffer, size_t capacity,
1110             PositionIndependentCodeOption pic = PositionIndependentCode);
1111
1112   // The destructor asserts that one of the following is true:
1113   //  * The Assembler object has not been used.
1114   //  * Nothing has been emitted since the last Reset() call.
1115   //  * Nothing has been emitted since the last FinalizeCode() call.
1116   ~Assembler();
1117
1118   // System functions.
1119
1120   // Start generating code from the beginning of the buffer, discarding any code
1121   // and data that has already been emitted into the buffer.
1122   void Reset();
1123
1124   // Finalize a code buffer of generated instructions. This function must be
1125   // called before executing or copying code from the buffer.
1126   void FinalizeCode();
1127
1128   // Label.
1129   // Bind a label to the current PC.
1130   void bind(Label* label);
1131
1132   // Bind a label to a specified offset from the start of the buffer.
1133   void BindToOffset(Label* label, ptrdiff_t offset);
1134
1135   // Place a literal at the current PC.
1136   void place(RawLiteral* literal);
1137
1138   ptrdiff_t CursorOffset() const {
1139     return buffer_->CursorOffset();
1140   }
1141
1142   ptrdiff_t BufferEndOffset() const {
1143     return static_cast<ptrdiff_t>(buffer_->capacity());
1144   }
1145
1146   // Return the address of an offset in the buffer.
1147   template <typename T>
1148   T GetOffsetAddress(ptrdiff_t offset) const {
1149     VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
1150     return buffer_->GetOffsetAddress<T>(offset);
1151   }
1152
1153   // Return the address of a bound label.
1154   template <typename T>
1155   T GetLabelAddress(const Label * label) const {
1156     VIXL_ASSERT(label->IsBound());
1157     VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
1158     return GetOffsetAddress<T>(label->location());
1159   }
1160
1161   // Return the address of the cursor.
1162   template <typename T>
1163   T GetCursorAddress() const {
1164     VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
1165     return GetOffsetAddress<T>(CursorOffset());
1166   }
1167
1168   // Return the address of the start of the buffer.
1169   template <typename T>
1170   T GetStartAddress() const {
1171     VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
1172     return GetOffsetAddress<T>(0);
1173   }
1174
1175   Instruction* InstructionAt(ptrdiff_t instruction_offset) {
1176     return GetOffsetAddress<Instruction*>(instruction_offset);
1177   }
1178
1179   ptrdiff_t InstructionOffset(Instruction* instruction) {
1180     VIXL_STATIC_ASSERT(sizeof(*instruction) == 1);
1181     ptrdiff_t offset = instruction - GetStartAddress<Instruction*>();
1182     VIXL_ASSERT((0 <= offset) &&
1183                 (offset < static_cast<ptrdiff_t>(BufferCapacity())));
1184     return offset;
1185   }
1186
1187   // Instruction set functions.
1188
1189   // Branch / Jump instructions.
1190   // Branch to register.
1191   void br(const Register& xn);
1192
1193   // Branch with link to register.
1194   void blr(const Register& xn);
1195
1196   // Branch to register with return hint.
1197   void ret(const Register& xn = lr);
1198
1199   // Unconditional branch to label.
1200   void b(Label* label);
1201
1202   // Conditional branch to label.
1203   void b(Label* label, Condition cond);
1204
1205   // Unconditional branch to PC offset.
1206   void b(int imm26);
1207
1208   // Conditional branch to PC offset.
1209   void b(int imm19, Condition cond);
1210
1211   // Branch with link to label.
1212   void bl(Label* label);
1213
1214   // Branch with link to PC offset.
1215   void bl(int imm26);
1216
1217   // Compare and branch to label if zero.
1218   void cbz(const Register& rt, Label* label);
1219
1220   // Compare and branch to PC offset if zero.
1221   void cbz(const Register& rt, int imm19);
1222
1223   // Compare and branch to label if not zero.
1224   void cbnz(const Register& rt, Label* label);
1225
1226   // Compare and branch to PC offset if not zero.
1227   void cbnz(const Register& rt, int imm19);
1228
1229   // Table lookup from one register.
1230   void tbl(const VRegister& vd,
1231            const VRegister& vn,
1232            const VRegister& vm);
1233
1234   // Table lookup from two registers.
1235   void tbl(const VRegister& vd,
1236            const VRegister& vn,
1237            const VRegister& vn2,
1238            const VRegister& vm);
1239
1240   // Table lookup from three registers.
1241   void tbl(const VRegister& vd,
1242            const VRegister& vn,
1243            const VRegister& vn2,
1244            const VRegister& vn3,
1245            const VRegister& vm);
1246
1247   // Table lookup from four registers.
1248   void tbl(const VRegister& vd,
1249            const VRegister& vn,
1250            const VRegister& vn2,
1251            const VRegister& vn3,
1252            const VRegister& vn4,
1253            const VRegister& vm);
1254
1255   // Table lookup extension from one register.
1256   void tbx(const VRegister& vd,
1257            const VRegister& vn,
1258            const VRegister& vm);
1259
1260   // Table lookup extension from two registers.
1261   void tbx(const VRegister& vd,
1262            const VRegister& vn,
1263            const VRegister& vn2,
1264            const VRegister& vm);
1265
1266   // Table lookup extension from three registers.
1267   void tbx(const VRegister& vd,
1268            const VRegister& vn,
1269            const VRegister& vn2,
1270            const VRegister& vn3,
1271            const VRegister& vm);
1272
1273   // Table lookup extension from four registers.
1274   void tbx(const VRegister& vd,
1275            const VRegister& vn,
1276            const VRegister& vn2,
1277            const VRegister& vn3,
1278            const VRegister& vn4,
1279            const VRegister& vm);
1280
1281   // Test bit and branch to label if zero.
1282   void tbz(const Register& rt, unsigned bit_pos, Label* label);
1283
1284   // Test bit and branch to PC offset if zero.
1285   void tbz(const Register& rt, unsigned bit_pos, int imm14);
1286
1287   // Test bit and branch to label if not zero.
1288   void tbnz(const Register& rt, unsigned bit_pos, Label* label);
1289
1290   // Test bit and branch to PC offset if not zero.
1291   void tbnz(const Register& rt, unsigned bit_pos, int imm14);
1292
1293   // Address calculation instructions.
1294   // Calculate a PC-relative address. Unlike for branches the offset in adr is
1295   // unscaled (i.e. the result can be unaligned).
1296
1297   // Calculate the address of a label.
1298   void adr(const Register& rd, Label* label);
1299
1300   // Calculate the address of a PC offset.
1301   void adr(const Register& rd, int imm21);
1302
1303   // Calculate the page address of a label.
1304   void adrp(const Register& rd, Label* label);
1305
1306   // Calculate the page address of a PC offset.
1307   void adrp(const Register& rd, int imm21);
1308
1309   // Data Processing instructions.
1310   // Add.
1311   void add(const Register& rd,
1312            const Register& rn,
1313            const Operand& operand);
1314
1315   // Add and update status flags.
1316   void adds(const Register& rd,
1317             const Register& rn,
1318             const Operand& operand);
1319
1320   // Compare negative.
1321   void cmn(const Register& rn, const Operand& operand);
1322
1323   // Subtract.
1324   void sub(const Register& rd,
1325            const Register& rn,
1326            const Operand& operand);
1327
1328   // Subtract and update status flags.
1329   void subs(const Register& rd,
1330             const Register& rn,
1331             const Operand& operand);
1332
1333   // Compare.
1334   void cmp(const Register& rn, const Operand& operand);
1335
1336   // Negate.
1337   void neg(const Register& rd,
1338            const Operand& operand);
1339
1340   // Negate and update status flags.
1341   void negs(const Register& rd,
1342             const Operand& operand);
1343
1344   // Add with carry bit.
1345   void adc(const Register& rd,
1346            const Register& rn,
1347            const Operand& operand);
1348
1349   // Add with carry bit and update status flags.
1350   void adcs(const Register& rd,
1351             const Register& rn,
1352             const Operand& operand);
1353
1354   // Subtract with carry bit.
1355   void sbc(const Register& rd,
1356            const Register& rn,
1357            const Operand& operand);
1358
1359   // Subtract with carry bit and update status flags.
1360   void sbcs(const Register& rd,
1361             const Register& rn,
1362             const Operand& operand);
1363
1364   // Negate with carry bit.
1365   void ngc(const Register& rd,
1366            const Operand& operand);
1367
1368   // Negate with carry bit and update status flags.
1369   void ngcs(const Register& rd,
1370             const Operand& operand);
1371
1372   // Logical instructions.
1373   // Bitwise and (A & B).
1374   void and_(const Register& rd,
1375             const Register& rn,
1376             const Operand& operand);
1377
1378   // Bitwise and (A & B) and update status flags.
1379   void ands(const Register& rd,
1380             const Register& rn,
1381             const Operand& operand);
1382
1383   // Bit test and set flags.
1384   void tst(const Register& rn, const Operand& operand);
1385
1386   // Bit clear (A & ~B).
1387   void bic(const Register& rd,
1388            const Register& rn,
1389            const Operand& operand);
1390
1391   // Bit clear (A & ~B) and update status flags.
1392   void bics(const Register& rd,
1393             const Register& rn,
1394             const Operand& operand);
1395
1396   // Bitwise or (A | B).
1397   void orr(const Register& rd, const Register& rn, const Operand& operand);
1398
1399   // Bitwise nor (A | ~B).
1400   void orn(const Register& rd, const Register& rn, const Operand& operand);
1401
1402   // Bitwise eor/xor (A ^ B).
1403   void eor(const Register& rd, const Register& rn, const Operand& operand);
1404
1405   // Bitwise enor/xnor (A ^ ~B).
1406   void eon(const Register& rd, const Register& rn, const Operand& operand);
1407
1408   // Logical shift left by variable.
1409   void lslv(const Register& rd, const Register& rn, const Register& rm);
1410
1411   // Logical shift right by variable.
1412   void lsrv(const Register& rd, const Register& rn, const Register& rm);
1413
1414   // Arithmetic shift right by variable.
1415   void asrv(const Register& rd, const Register& rn, const Register& rm);
1416
1417   // Rotate right by variable.
1418   void rorv(const Register& rd, const Register& rn, const Register& rm);
1419
1420   // Bitfield instructions.
1421   // Bitfield move.
1422   void bfm(const Register& rd,
1423            const Register& rn,
1424            unsigned immr,
1425            unsigned imms);
1426
1427   // Signed bitfield move.
1428   void sbfm(const Register& rd,
1429             const Register& rn,
1430             unsigned immr,
1431             unsigned imms);
1432
1433   // Unsigned bitfield move.
1434   void ubfm(const Register& rd,
1435             const Register& rn,
1436             unsigned immr,
1437             unsigned imms);
1438
1439   // Bfm aliases.
1440   // Bitfield insert.
1441   void bfi(const Register& rd,
1442            const Register& rn,
1443            unsigned lsb,
1444            unsigned width) {
1445     VIXL_ASSERT(width >= 1);
1446     VIXL_ASSERT(lsb + width <= rn.size());
1447     bfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
1448   }
1449
1450   // Bitfield extract and insert low.
1451   void bfxil(const Register& rd,
1452              const Register& rn,
1453              unsigned lsb,
1454              unsigned width) {
1455     VIXL_ASSERT(width >= 1);
1456     VIXL_ASSERT(lsb + width <= rn.size());
1457     bfm(rd, rn, lsb, lsb + width - 1);
1458   }
1459
1460   // Sbfm aliases.
1461   // Arithmetic shift right.
1462   void asr(const Register& rd, const Register& rn, unsigned shift) {
1463     VIXL_ASSERT(shift < rd.size());
1464     sbfm(rd, rn, shift, rd.size() - 1);
1465   }
1466
1467   // Signed bitfield insert with zero at right.
1468   void sbfiz(const Register& rd,
1469              const Register& rn,
1470              unsigned lsb,
1471              unsigned width) {
1472     VIXL_ASSERT(width >= 1);
1473     VIXL_ASSERT(lsb + width <= rn.size());
1474     sbfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
1475   }
1476
1477   // Signed bitfield extract.
1478   void sbfx(const Register& rd,
1479             const Register& rn,
1480             unsigned lsb,
1481             unsigned width) {
1482     VIXL_ASSERT(width >= 1);
1483     VIXL_ASSERT(lsb + width <= rn.size());
1484     sbfm(rd, rn, lsb, lsb + width - 1);
1485   }
1486
1487   // Signed extend byte.
1488   void sxtb(const Register& rd, const Register& rn) {
1489     sbfm(rd, rn, 0, 7);
1490   }
1491
1492   // Signed extend halfword.
1493   void sxth(const Register& rd, const Register& rn) {
1494     sbfm(rd, rn, 0, 15);
1495   }
1496
1497   // Signed extend word.
1498   void sxtw(const Register& rd, const Register& rn) {
1499     sbfm(rd, rn, 0, 31);
1500   }
1501
1502   // Ubfm aliases.
1503   // Logical shift left.
1504   void lsl(const Register& rd, const Register& rn, unsigned shift) {
1505     unsigned reg_size = rd.size();
1506     VIXL_ASSERT(shift < reg_size);
1507     ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1);
1508   }
1509
1510   // Logical shift right.
1511   void lsr(const Register& rd, const Register& rn, unsigned shift) {
1512     VIXL_ASSERT(shift < rd.size());
1513     ubfm(rd, rn, shift, rd.size() - 1);
1514   }
1515
1516   // Unsigned bitfield insert with zero at right.
1517   void ubfiz(const Register& rd,
1518              const Register& rn,
1519              unsigned lsb,
1520              unsigned width) {
1521     VIXL_ASSERT(width >= 1);
1522     VIXL_ASSERT(lsb + width <= rn.size());
1523     ubfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
1524   }
1525
1526   // Unsigned bitfield extract.
1527   void ubfx(const Register& rd,
1528             const Register& rn,
1529             unsigned lsb,
1530             unsigned width) {
1531     VIXL_ASSERT(width >= 1);
1532     VIXL_ASSERT(lsb + width <= rn.size());
1533     ubfm(rd, rn, lsb, lsb + width - 1);
1534   }
1535
1536   // Unsigned extend byte.
1537   void uxtb(const Register& rd, const Register& rn) {
1538     ubfm(rd, rn, 0, 7);
1539   }
1540
1541   // Unsigned extend halfword.
1542   void uxth(const Register& rd, const Register& rn) {
1543     ubfm(rd, rn, 0, 15);
1544   }
1545
1546   // Unsigned extend word.
1547   void uxtw(const Register& rd, const Register& rn) {
1548     ubfm(rd, rn, 0, 31);
1549   }
1550
1551   // Extract.
1552   void extr(const Register& rd,
1553             const Register& rn,
1554             const Register& rm,
1555             unsigned lsb);
1556
1557   // Conditional select: rd = cond ? rn : rm.
1558   void csel(const Register& rd,
1559             const Register& rn,
1560             const Register& rm,
1561             Condition cond);
1562
1563   // Conditional select increment: rd = cond ? rn : rm + 1.
1564   void csinc(const Register& rd,
1565              const Register& rn,
1566              const Register& rm,
1567              Condition cond);
1568
1569   // Conditional select inversion: rd = cond ? rn : ~rm.
1570   void csinv(const Register& rd,
1571              const Register& rn,
1572              const Register& rm,
1573              Condition cond);
1574
1575   // Conditional select negation: rd = cond ? rn : -rm.
1576   void csneg(const Register& rd,
1577              const Register& rn,
1578              const Register& rm,
1579              Condition cond);
1580
1581   // Conditional set: rd = cond ? 1 : 0.
1582   void cset(const Register& rd, Condition cond);
1583
1584   // Conditional set mask: rd = cond ? -1 : 0.
1585   void csetm(const Register& rd, Condition cond);
1586
1587   // Conditional increment: rd = cond ? rn + 1 : rn.
1588   void cinc(const Register& rd, const Register& rn, Condition cond);
1589
1590   // Conditional invert: rd = cond ? ~rn : rn.
1591   void cinv(const Register& rd, const Register& rn, Condition cond);
1592
1593   // Conditional negate: rd = cond ? -rn : rn.
1594   void cneg(const Register& rd, const Register& rn, Condition cond);
1595
1596   // Rotate right.
1597   void ror(const Register& rd, const Register& rs, unsigned shift) {
1598     extr(rd, rs, rs, shift);
1599   }
1600
1601   // Conditional comparison.
1602   // Conditional compare negative.
1603   void ccmn(const Register& rn,
1604             const Operand& operand,
1605             StatusFlags nzcv,
1606             Condition cond);
1607
1608   // Conditional compare.
1609   void ccmp(const Register& rn,
1610             const Operand& operand,
1611             StatusFlags nzcv,
1612             Condition cond);
1613
1614   // CRC-32 checksum from byte.
1615   void crc32b(const Register& rd,
1616               const Register& rn,
1617               const Register& rm);
1618
1619   // CRC-32 checksum from half-word.
1620   void crc32h(const Register& rd,
1621               const Register& rn,
1622               const Register& rm);
1623
1624   // CRC-32 checksum from word.
1625   void crc32w(const Register& rd,
1626               const Register& rn,
1627               const Register& rm);
1628
1629   // CRC-32 checksum from double word.
1630   void crc32x(const Register& rd,
1631               const Register& rn,
1632               const Register& rm);
1633
1634   // CRC-32 C checksum from byte.
1635   void crc32cb(const Register& rd,
1636                const Register& rn,
1637                const Register& rm);
1638
1639   // CRC-32 C checksum from half-word.
1640   void crc32ch(const Register& rd,
1641                const Register& rn,
1642                const Register& rm);
1643
1644   // CRC-32 C checksum from word.
1645   void crc32cw(const Register& rd,
1646                const Register& rn,
1647                const Register& rm);
1648
1649   // CRC-32C checksum from double word.
1650   void crc32cx(const Register& rd,
1651                const Register& rn,
1652                const Register& rm);
1653
1654   // Multiply.
1655   void mul(const Register& rd, const Register& rn, const Register& rm);
1656
1657   // Negated multiply.
1658   void mneg(const Register& rd, const Register& rn, const Register& rm);
1659
1660   // Signed long multiply: 32 x 32 -> 64-bit.
1661   void smull(const Register& rd, const Register& rn, const Register& rm);
1662
1663   // Signed multiply high: 64 x 64 -> 64-bit <127:64>.
1664   void smulh(const Register& xd, const Register& xn, const Register& xm);
1665
1666   // Multiply and accumulate.
1667   void madd(const Register& rd,
1668             const Register& rn,
1669             const Register& rm,
1670             const Register& ra);
1671
1672   // Multiply and subtract.
1673   void msub(const Register& rd,
1674             const Register& rn,
1675             const Register& rm,
1676             const Register& ra);
1677
1678   // Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
1679   void smaddl(const Register& rd,
1680               const Register& rn,
1681               const Register& rm,
1682               const Register& ra);
1683
1684   // Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
1685   void umaddl(const Register& rd,
1686               const Register& rn,
1687               const Register& rm,
1688               const Register& ra);
1689
1690   // Unsigned long multiply: 32 x 32 -> 64-bit.
1691   void umull(const Register& rd,
1692              const Register& rn,
1693              const Register& rm) {
1694     umaddl(rd, rn, rm, xzr);
1695   }
1696
1697   // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>.
1698   void umulh(const Register& xd,
1699              const Register& xn,
1700              const Register& xm);
1701
1702   // Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1703   void smsubl(const Register& rd,
1704               const Register& rn,
1705               const Register& rm,
1706               const Register& ra);
1707
1708   // Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1709   void umsubl(const Register& rd,
1710               const Register& rn,
1711               const Register& rm,
1712               const Register& ra);
1713
1714   // Signed integer divide.
1715   void sdiv(const Register& rd, const Register& rn, const Register& rm);
1716
1717   // Unsigned integer divide.
1718   void udiv(const Register& rd, const Register& rn, const Register& rm);
1719
1720   // Bit reverse.
1721   void rbit(const Register& rd, const Register& rn);
1722
1723   // Reverse bytes in 16-bit half words.
1724   void rev16(const Register& rd, const Register& rn);
1725
1726   // Reverse bytes in 32-bit words.
1727   void rev32(const Register& rd, const Register& rn);
1728
1729   // Reverse bytes.
1730   void rev(const Register& rd, const Register& rn);
1731
1732   // Count leading zeroes.
1733   void clz(const Register& rd, const Register& rn);
1734
1735   // Count leading sign bits.
1736   void cls(const Register& rd, const Register& rn);
1737
1738   // Memory instructions.
1739   // Load integer or FP register.
1740   void ldr(const CPURegister& rt, const MemOperand& src,
1741            LoadStoreScalingOption option = PreferScaledOffset);
1742
1743   // Store integer or FP register.
1744   void str(const CPURegister& rt, const MemOperand& dst,
1745            LoadStoreScalingOption option = PreferScaledOffset);
1746
1747   // Load word with sign extension.
1748   void ldrsw(const Register& rt, const MemOperand& src,
1749              LoadStoreScalingOption option = PreferScaledOffset);
1750
1751   // Load byte.
1752   void ldrb(const Register& rt, const MemOperand& src,
1753             LoadStoreScalingOption option = PreferScaledOffset);
1754
1755   // Store byte.
1756   void strb(const Register& rt, const MemOperand& dst,
1757             LoadStoreScalingOption option = PreferScaledOffset);
1758
1759   // Load byte with sign extension.
1760   void ldrsb(const Register& rt, const MemOperand& src,
1761              LoadStoreScalingOption option = PreferScaledOffset);
1762
1763   // Load half-word.
1764   void ldrh(const Register& rt, const MemOperand& src,
1765             LoadStoreScalingOption option = PreferScaledOffset);
1766
1767   // Store half-word.
1768   void strh(const Register& rt, const MemOperand& dst,
1769             LoadStoreScalingOption option = PreferScaledOffset);
1770
1771   // Load half-word with sign extension.
1772   void ldrsh(const Register& rt, const MemOperand& src,
1773              LoadStoreScalingOption option = PreferScaledOffset);
1774
1775   // Load integer or FP register (with unscaled offset).
1776   void ldur(const CPURegister& rt, const MemOperand& src,
1777             LoadStoreScalingOption option = PreferUnscaledOffset);
1778
1779   // Store integer or FP register (with unscaled offset).
1780   void stur(const CPURegister& rt, const MemOperand& src,
1781             LoadStoreScalingOption option = PreferUnscaledOffset);
1782
1783   // Load word with sign extension.
1784   void ldursw(const Register& rt, const MemOperand& src,
1785               LoadStoreScalingOption option = PreferUnscaledOffset);
1786
1787   // Load byte (with unscaled offset).
1788   void ldurb(const Register& rt, const MemOperand& src,
1789              LoadStoreScalingOption option = PreferUnscaledOffset);
1790
1791   // Store byte (with unscaled offset).
1792   void sturb(const Register& rt, const MemOperand& dst,
1793              LoadStoreScalingOption option = PreferUnscaledOffset);
1794
1795   // Load byte with sign extension (and unscaled offset).
1796   void ldursb(const Register& rt, const MemOperand& src,
1797               LoadStoreScalingOption option = PreferUnscaledOffset);
1798
1799   // Load half-word (with unscaled offset).
1800   void ldurh(const Register& rt, const MemOperand& src,
1801              LoadStoreScalingOption option = PreferUnscaledOffset);
1802
1803   // Store half-word (with unscaled offset).
1804   void sturh(const Register& rt, const MemOperand& dst,
1805              LoadStoreScalingOption option = PreferUnscaledOffset);
1806
1807   // Load half-word with sign extension (and unscaled offset).
1808   void ldursh(const Register& rt, const MemOperand& src,
1809               LoadStoreScalingOption option = PreferUnscaledOffset);
1810
1811   // Load integer or FP register pair.
1812   void ldp(const CPURegister& rt, const CPURegister& rt2,
1813            const MemOperand& src);
1814
1815   // Store integer or FP register pair.
1816   void stp(const CPURegister& rt, const CPURegister& rt2,
1817            const MemOperand& dst);
1818
1819   // Load word pair with sign extension.
1820   void ldpsw(const Register& rt, const Register& rt2, const MemOperand& src);
1821
1822   // Load integer or FP register pair, non-temporal.
1823   void ldnp(const CPURegister& rt, const CPURegister& rt2,
1824             const MemOperand& src);
1825
1826   // Store integer or FP register pair, non-temporal.
1827   void stnp(const CPURegister& rt, const CPURegister& rt2,
1828             const MemOperand& dst);
1829
1830   // Load integer or FP register from literal pool.
1831   void ldr(const CPURegister& rt, RawLiteral* literal);
1832
1833   // Load word with sign extension from literal pool.
1834   void ldrsw(const Register& rt, RawLiteral* literal);
1835
1836   // Load integer or FP register from pc + imm19 << 2.
1837   void ldr(const CPURegister& rt, int imm19);
1838
1839   // Load word with sign extension from pc + imm19 << 2.
1840   void ldrsw(const Register& rt, int imm19);
1841
1842   // Store exclusive byte.
1843   void stxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1844
1845   // Store exclusive half-word.
1846   void stxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1847
1848   // Store exclusive register.
1849   void stxr(const Register& rs, const Register& rt, const MemOperand& dst);
1850
1851   // Load exclusive byte.
1852   void ldxrb(const Register& rt, const MemOperand& src);
1853
1854   // Load exclusive half-word.
1855   void ldxrh(const Register& rt, const MemOperand& src);
1856
1857   // Load exclusive register.
1858   void ldxr(const Register& rt, const MemOperand& src);
1859
1860   // Store exclusive register pair.
1861   void stxp(const Register& rs,
1862             const Register& rt,
1863             const Register& rt2,
1864             const MemOperand& dst);
1865
1866   // Load exclusive register pair.
1867   void ldxp(const Register& rt, const Register& rt2, const MemOperand& src);
1868
1869   // Store-release exclusive byte.
1870   void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1871
1872   // Store-release exclusive half-word.
1873   void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1874
1875   // Store-release exclusive register.
1876   void stlxr(const Register& rs, const Register& rt, const MemOperand& dst);
1877
1878   // Load-acquire exclusive byte.
1879   void ldaxrb(const Register& rt, const MemOperand& src);
1880
1881   // Load-acquire exclusive half-word.
1882   void ldaxrh(const Register& rt, const MemOperand& src);
1883
1884   // Load-acquire exclusive register.
1885   void ldaxr(const Register& rt, const MemOperand& src);
1886
1887   // Store-release exclusive register pair.
1888   void stlxp(const Register& rs,
1889              const Register& rt,
1890              const Register& rt2,
1891              const MemOperand& dst);
1892
1893   // Load-acquire exclusive register pair.
1894   void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src);
1895
1896   // Store-release byte.
1897   void stlrb(const Register& rt, const MemOperand& dst);
1898
1899   // Store-release half-word.
1900   void stlrh(const Register& rt, const MemOperand& dst);
1901
1902   // Store-release register.
1903   void stlr(const Register& rt, const MemOperand& dst);
1904
1905   // Load-acquire byte.
1906   void ldarb(const Register& rt, const MemOperand& src);
1907
1908   // Load-acquire half-word.
1909   void ldarh(const Register& rt, const MemOperand& src);
1910
1911   // Load-acquire register.
1912   void ldar(const Register& rt, const MemOperand& src);
1913
1914   // Prefetch memory.
1915   void prfm(PrefetchOperation op, const MemOperand& addr,
1916             LoadStoreScalingOption option = PreferScaledOffset);
1917
1918   // Prefetch memory (with unscaled offset).
1919   void prfum(PrefetchOperation op, const MemOperand& addr,
1920              LoadStoreScalingOption option = PreferUnscaledOffset);
1921
1922   // Prefetch memory in the literal pool.
1923   void prfm(PrefetchOperation op, RawLiteral* literal);
1924
1925   // Prefetch from pc + imm19 << 2.
1926   void prfm(PrefetchOperation op, int imm19);
1927
1928   // Move instructions. The default shift of -1 indicates that the move
1929   // instruction will calculate an appropriate 16-bit immediate and left shift
1930   // that is equal to the 64-bit immediate argument. If an explicit left shift
1931   // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value.
1932   //
1933   // For movk, an explicit shift can be used to indicate which half word should
1934   // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant
1935   // half word with zero, whereas movk(x0, 0, 48) will overwrite the
1936   // most-significant.
1937
1938   // Move immediate and keep.
1939   void movk(const Register& rd, uint64_t imm, int shift = -1) {
1940     MoveWide(rd, imm, shift, MOVK);
1941   }
1942
1943   // Move inverted immediate.
1944   void movn(const Register& rd, uint64_t imm, int shift = -1) {
1945     MoveWide(rd, imm, shift, MOVN);
1946   }
1947
1948   // Move immediate.
1949   void movz(const Register& rd, uint64_t imm, int shift = -1) {
1950     MoveWide(rd, imm, shift, MOVZ);
1951   }
1952
1953   // Misc instructions.
1954   // Monitor debug-mode breakpoint.
1955   void brk(int code);
1956
1957   // Halting debug-mode breakpoint.
1958   void hlt(int code);
1959
1960   // Generate exception targeting EL1.
1961   void svc(int code);
1962
1963   // Move register to register.
1964   void mov(const Register& rd, const Register& rn);
1965
1966   // Move inverted operand to register.
1967   void mvn(const Register& rd, const Operand& operand);
1968
1969   // System instructions.
1970   // Move to register from system register.
1971   void mrs(const Register& rt, SystemRegister sysreg);
1972
1973   // Move from register to system register.
1974   void msr(SystemRegister sysreg, const Register& rt);
1975
1976   // System instruction.
1977   void sys(int op1, int crn, int crm, int op2, const Register& rt = xzr);
1978
1979   // System instruction with pre-encoded op (op1:crn:crm:op2).
1980   void sys(int op, const Register& rt = xzr);
1981
1982   // System data cache operation.
1983   void dc(DataCacheOp op, const Register& rt);
1984
1985   // System instruction cache operation.
1986   void ic(InstructionCacheOp op, const Register& rt);
1987
1988   // System hint.
1989   void hint(SystemHint code);
1990
1991   // Clear exclusive monitor.
1992   void clrex(int imm4 = 0xf);
1993
1994   // Data memory barrier.
1995   void dmb(BarrierDomain domain, BarrierType type);
1996
1997   // Data synchronization barrier.
1998   void dsb(BarrierDomain domain, BarrierType type);
1999
2000   // Instruction synchronization barrier.
2001   void isb();
2002
2003   // Alias for system instructions.
2004   // No-op.
2005   void nop() {
2006     hint(NOP);
2007   }
2008
2009   // FP and NEON instructions.
2010   // Move double precision immediate to FP register.
2011   void fmov(const VRegister& vd, double imm);
2012
2013   // Move single precision immediate to FP register.
2014   void fmov(const VRegister& vd, float imm);
2015
2016   // Move FP register to register.
2017   void fmov(const Register& rd, const VRegister& fn);
2018
2019   // Move register to FP register.
2020   void fmov(const VRegister& vd, const Register& rn);
2021
2022   // Move FP register to FP register.
2023   void fmov(const VRegister& vd, const VRegister& fn);
2024
2025   // Move 64-bit register to top half of 128-bit FP register.
2026   void fmov(const VRegister& vd, int index, const Register& rn);
2027
2028   // Move top half of 128-bit FP register to 64-bit register.
2029   void fmov(const Register& rd, const VRegister& vn, int index);
2030
2031   // FP add.
2032   void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2033
2034   // FP subtract.
2035   void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2036
2037   // FP multiply.
2038   void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2039
2040   // FP fused multiply-add.
2041   void fmadd(const VRegister& vd,
2042              const VRegister& vn,
2043              const VRegister& vm,
2044              const VRegister& va);
2045
2046   // FP fused multiply-subtract.
2047   void fmsub(const VRegister& vd,
2048              const VRegister& vn,
2049              const VRegister& vm,
2050              const VRegister& va);
2051
2052   // FP fused multiply-add and negate.
2053   void fnmadd(const VRegister& vd,
2054               const VRegister& vn,
2055               const VRegister& vm,
2056               const VRegister& va);
2057
2058   // FP fused multiply-subtract and negate.
2059   void fnmsub(const VRegister& vd,
2060               const VRegister& vn,
2061               const VRegister& vm,
2062               const VRegister& va);
2063
2064   // FP multiply-negate scalar.
2065   void fnmul(const VRegister& vd,
2066              const VRegister& vn,
2067              const VRegister& vm);
2068
2069   // FP reciprocal exponent scalar.
2070   void frecpx(const VRegister& vd,
2071               const VRegister& vn);
2072
2073   // FP divide.
2074   void fdiv(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2075
2076   // FP maximum.
2077   void fmax(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2078
2079   // FP minimum.
2080   void fmin(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2081
2082   // FP maximum number.
2083   void fmaxnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2084
2085   // FP minimum number.
2086   void fminnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2087
2088   // FP absolute.
2089   void fabs(const VRegister& vd, const VRegister& vn);
2090
2091   // FP negate.
2092   void fneg(const VRegister& vd, const VRegister& vn);
2093
2094   // FP square root.
2095   void fsqrt(const VRegister& vd, const VRegister& vn);
2096
2097   // FP round to integer, nearest with ties to away.
2098   void frinta(const VRegister& vd, const VRegister& vn);
2099
2100   // FP round to integer, implicit rounding.
2101   void frinti(const VRegister& vd, const VRegister& vn);
2102
2103   // FP round to integer, toward minus infinity.
2104   void frintm(const VRegister& vd, const VRegister& vn);
2105
2106   // FP round to integer, nearest with ties to even.
2107   void frintn(const VRegister& vd, const VRegister& vn);
2108
2109   // FP round to integer, toward plus infinity.
2110   void frintp(const VRegister& vd, const VRegister& vn);
2111
2112   // FP round to integer, exact, implicit rounding.
2113   void frintx(const VRegister& vd, const VRegister& vn);
2114
2115   // FP round to integer, towards zero.
2116   void frintz(const VRegister& vd, const VRegister& vn);
2117
2118   void FPCompareMacro(const VRegister& vn,
2119                       double value,
2120                       FPTrapFlags trap);
2121
2122   void FPCompareMacro(const VRegister& vn,
2123                       const VRegister& vm,
2124                       FPTrapFlags trap);
2125
2126   // FP compare registers.
2127   void fcmp(const VRegister& vn, const VRegister& vm);
2128
2129   // FP compare immediate.
2130   void fcmp(const VRegister& vn, double value);
2131
2132   void FPCCompareMacro(const VRegister& vn,
2133                        const VRegister& vm,
2134                        StatusFlags nzcv,
2135                        Condition cond,
2136                        FPTrapFlags trap);
2137
2138   // FP conditional compare.
2139   void fccmp(const VRegister& vn,
2140              const VRegister& vm,
2141              StatusFlags nzcv,
2142              Condition cond);
2143
2144   // FP signaling compare registers.
2145   void fcmpe(const VRegister& vn, const VRegister& vm);
2146
2147   // FP signaling compare immediate.
2148   void fcmpe(const VRegister& vn, double value);
2149
2150   // FP conditional signaling compare.
2151   void fccmpe(const VRegister& vn,
2152               const VRegister& vm,
2153               StatusFlags nzcv,
2154               Condition cond);
2155
2156   // FP conditional select.
2157   void fcsel(const VRegister& vd,
2158              const VRegister& vn,
2159              const VRegister& vm,
2160              Condition cond);
2161
2162   // Common FP Convert functions.
2163   void NEONFPConvertToInt(const Register& rd,
2164                           const VRegister& vn,
2165                           Instr op);
2166   void NEONFPConvertToInt(const VRegister& vd,
2167                           const VRegister& vn,
2168                           Instr op);
2169
2170   // FP convert between precisions.
2171   void fcvt(const VRegister& vd, const VRegister& vn);
2172
2173   // FP convert to higher precision.
2174   void fcvtl(const VRegister& vd, const VRegister& vn);
2175
2176   // FP convert to higher precision (second part).
2177   void fcvtl2(const VRegister& vd, const VRegister& vn);
2178
2179   // FP convert to lower precision.
2180   void fcvtn(const VRegister& vd, const VRegister& vn);
2181
2182   // FP convert to lower prevision (second part).
2183   void fcvtn2(const VRegister& vd, const VRegister& vn);
2184
2185   // FP convert to lower precision, rounding to odd.
2186   void fcvtxn(const VRegister& vd, const VRegister& vn);
2187
2188   // FP convert to lower precision, rounding to odd (second part).
2189   void fcvtxn2(const VRegister& vd, const VRegister& vn);
2190
2191   // FP convert to signed integer, nearest with ties to away.
2192   void fcvtas(const Register& rd, const VRegister& vn);
2193
2194   // FP convert to unsigned integer, nearest with ties to away.
2195   void fcvtau(const Register& rd, const VRegister& vn);
2196
2197   // FP convert to signed integer, nearest with ties to away.
2198   void fcvtas(const VRegister& vd, const VRegister& vn);
2199
2200   // FP convert to unsigned integer, nearest with ties to away.
2201   void fcvtau(const VRegister& vd, const VRegister& vn);
2202
2203   // FP convert to signed integer, round towards -infinity.
2204   void fcvtms(const Register& rd, const VRegister& vn);
2205
2206   // FP convert to unsigned integer, round towards -infinity.
2207   void fcvtmu(const Register& rd, const VRegister& vn);
2208
2209   // FP convert to signed integer, round towards -infinity.
2210   void fcvtms(const VRegister& vd, const VRegister& vn);
2211
2212   // FP convert to unsigned integer, round towards -infinity.
2213   void fcvtmu(const VRegister& vd, const VRegister& vn);
2214
2215   // FP convert to signed integer, nearest with ties to even.
2216   void fcvtns(const Register& rd, const VRegister& vn);
2217
2218   // FP convert to unsigned integer, nearest with ties to even.
2219   void fcvtnu(const Register& rd, const VRegister& vn);
2220
2221   // FP convert to signed integer, nearest with ties to even.
2222   void fcvtns(const VRegister& rd, const VRegister& vn);
2223
2224   // FP convert to unsigned integer, nearest with ties to even.
2225   void fcvtnu(const VRegister& rd, const VRegister& vn);
2226
2227   // FP convert to signed integer or fixed-point, round towards zero.
2228   void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0);
2229
2230   // FP convert to unsigned integer or fixed-point, round towards zero.
2231   void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0);
2232
2233   // FP convert to signed integer or fixed-point, round towards zero.
2234   void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0);
2235
2236   // FP convert to unsigned integer or fixed-point, round towards zero.
2237   void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0);
2238
2239   // FP convert to signed integer, round towards +infinity.
2240   void fcvtps(const Register& rd, const VRegister& vn);
2241
2242   // FP convert to unsigned integer, round towards +infinity.
2243   void fcvtpu(const Register& rd, const VRegister& vn);
2244
2245   // FP convert to signed integer, round towards +infinity.
2246   void fcvtps(const VRegister& vd, const VRegister& vn);
2247
2248   // FP convert to unsigned integer, round towards +infinity.
2249   void fcvtpu(const VRegister& vd, const VRegister& vn);
2250
2251   // Convert signed integer or fixed point to FP.
2252   void scvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2253
2254   // Convert unsigned integer or fixed point to FP.
2255   void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2256
2257   // Convert signed integer or fixed-point to FP.
2258   void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2259
2260   // Convert unsigned integer or fixed-point to FP.
2261   void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2262
2263   // Unsigned absolute difference.
2264   void uabd(const VRegister& vd,
2265             const VRegister& vn,
2266             const VRegister& vm);
2267
2268   // Signed absolute difference.
2269   void sabd(const VRegister& vd,
2270             const VRegister& vn,
2271             const VRegister& vm);
2272
2273   // Unsigned absolute difference and accumulate.
2274   void uaba(const VRegister& vd,
2275             const VRegister& vn,
2276             const VRegister& vm);
2277
2278   // Signed absolute difference and accumulate.
2279   void saba(const VRegister& vd,
2280             const VRegister& vn,
2281             const VRegister& vm);
2282
2283   // Add.
2284   void add(const VRegister& vd,
2285            const VRegister& vn,
2286            const VRegister& vm);
2287
2288   // Subtract.
2289   void sub(const VRegister& vd,
2290            const VRegister& vn,
2291            const VRegister& vm);
2292
2293   // Unsigned halving add.
2294   void uhadd(const VRegister& vd,
2295              const VRegister& vn,
2296              const VRegister& vm);
2297
2298   // Signed halving add.
2299   void shadd(const VRegister& vd,
2300              const VRegister& vn,
2301              const VRegister& vm);
2302
2303   // Unsigned rounding halving add.
2304   void urhadd(const VRegister& vd,
2305               const VRegister& vn,
2306               const VRegister& vm);
2307
2308   // Signed rounding halving add.
2309   void srhadd(const VRegister& vd,
2310               const VRegister& vn,
2311               const VRegister& vm);
2312
2313   // Unsigned halving sub.
2314   void uhsub(const VRegister& vd,
2315              const VRegister& vn,
2316              const VRegister& vm);
2317
2318   // Signed halving sub.
2319   void shsub(const VRegister& vd,
2320              const VRegister& vn,
2321              const VRegister& vm);
2322
2323   // Unsigned saturating add.
2324   void uqadd(const VRegister& vd,
2325              const VRegister& vn,
2326              const VRegister& vm);
2327
2328   // Signed saturating add.
2329   void sqadd(const VRegister& vd,
2330              const VRegister& vn,
2331              const VRegister& vm);
2332
2333   // Unsigned saturating subtract.
2334   void uqsub(const VRegister& vd,
2335              const VRegister& vn,
2336              const VRegister& vm);
2337
2338   // Signed saturating subtract.
2339   void sqsub(const VRegister& vd,
2340              const VRegister& vn,
2341              const VRegister& vm);
2342
2343   // Add pairwise.
2344   void addp(const VRegister& vd,
2345             const VRegister& vn,
2346             const VRegister& vm);
2347
2348   // Add pair of elements scalar.
2349   void addp(const VRegister& vd,
2350             const VRegister& vn);
2351
2352   // Multiply-add to accumulator.
2353   void mla(const VRegister& vd,
2354            const VRegister& vn,
2355            const VRegister& vm);
2356
2357   // Multiply-subtract to accumulator.
2358   void mls(const VRegister& vd,
2359            const VRegister& vn,
2360            const VRegister& vm);
2361
2362   // Multiply.
2363   void mul(const VRegister& vd,
2364            const VRegister& vn,
2365            const VRegister& vm);
2366
2367   // Multiply by scalar element.
2368   void mul(const VRegister& vd,
2369            const VRegister& vn,
2370            const VRegister& vm,
2371            int vm_index);
2372
2373   // Multiply-add by scalar element.
2374   void mla(const VRegister& vd,
2375            const VRegister& vn,
2376            const VRegister& vm,
2377            int vm_index);
2378
2379   // Multiply-subtract by scalar element.
2380   void mls(const VRegister& vd,
2381            const VRegister& vn,
2382            const VRegister& vm,
2383            int vm_index);
2384
2385   // Signed long multiply-add by scalar element.
2386   void smlal(const VRegister& vd,
2387              const VRegister& vn,
2388              const VRegister& vm,
2389              int vm_index);
2390
2391   // Signed long multiply-add by scalar element (second part).
2392   void smlal2(const VRegister& vd,
2393               const VRegister& vn,
2394               const VRegister& vm,
2395               int vm_index);
2396
2397   // Unsigned long multiply-add by scalar element.
2398   void umlal(const VRegister& vd,
2399              const VRegister& vn,
2400              const VRegister& vm,
2401              int vm_index);
2402
2403   // Unsigned long multiply-add by scalar element (second part).
2404   void umlal2(const VRegister& vd,
2405               const VRegister& vn,
2406               const VRegister& vm,
2407               int vm_index);
2408
2409   // Signed long multiply-sub by scalar element.
2410   void smlsl(const VRegister& vd,
2411              const VRegister& vn,
2412              const VRegister& vm,
2413              int vm_index);
2414
2415   // Signed long multiply-sub by scalar element (second part).
2416   void smlsl2(const VRegister& vd,
2417               const VRegister& vn,
2418               const VRegister& vm,
2419               int vm_index);
2420
2421   // Unsigned long multiply-sub by scalar element.
2422   void umlsl(const VRegister& vd,
2423              const VRegister& vn,
2424              const VRegister& vm,
2425              int vm_index);
2426
2427   // Unsigned long multiply-sub by scalar element (second part).
2428   void umlsl2(const VRegister& vd,
2429               const VRegister& vn,
2430               const VRegister& vm,
2431               int vm_index);
2432
2433   // Signed long multiply by scalar element.
2434   void smull(const VRegister& vd,
2435              const VRegister& vn,
2436              const VRegister& vm,
2437              int vm_index);
2438
2439   // Signed long multiply by scalar element (second part).
2440   void smull2(const VRegister& vd,
2441               const VRegister& vn,
2442               const VRegister& vm,
2443               int vm_index);
2444
2445   // Unsigned long multiply by scalar element.
2446   void umull(const VRegister& vd,
2447              const VRegister& vn,
2448              const VRegister& vm,
2449              int vm_index);
2450
2451   // Unsigned long multiply by scalar element (second part).
2452   void umull2(const VRegister& vd,
2453               const VRegister& vn,
2454               const VRegister& vm,
2455               int vm_index);
2456
2457   // Signed saturating double long multiply by element.
2458   void sqdmull(const VRegister& vd,
2459                const VRegister& vn,
2460                const VRegister& vm,
2461                int vm_index);
2462
2463   // Signed saturating double long multiply by element (second part).
2464   void sqdmull2(const VRegister& vd,
2465                 const VRegister& vn,
2466                 const VRegister& vm,
2467                 int vm_index);
2468
2469   // Signed saturating doubling long multiply-add by element.
2470   void sqdmlal(const VRegister& vd,
2471                const VRegister& vn,
2472                const VRegister& vm,
2473                int vm_index);
2474
2475   // Signed saturating doubling long multiply-add by element (second part).
2476   void sqdmlal2(const VRegister& vd,
2477                 const VRegister& vn,
2478                 const VRegister& vm,
2479                 int vm_index);
2480
2481   // Signed saturating doubling long multiply-sub by element.
2482   void sqdmlsl(const VRegister& vd,
2483                const VRegister& vn,
2484                const VRegister& vm,
2485                int vm_index);
2486
2487   // Signed saturating doubling long multiply-sub by element (second part).
2488   void sqdmlsl2(const VRegister& vd,
2489                 const VRegister& vn,
2490                 const VRegister& vm,
2491                 int vm_index);
2492
2493   // Compare equal.
2494   void cmeq(const VRegister& vd,
2495             const VRegister& vn,
2496             const VRegister& vm);
2497
2498   // Compare signed greater than or equal.
2499   void cmge(const VRegister& vd,
2500             const VRegister& vn,
2501             const VRegister& vm);
2502
2503   // Compare signed greater than.
2504   void cmgt(const VRegister& vd,
2505             const VRegister& vn,
2506             const VRegister& vm);
2507
2508   // Compare unsigned higher.
2509   void cmhi(const VRegister& vd,
2510             const VRegister& vn,
2511             const VRegister& vm);
2512
2513   // Compare unsigned higher or same.
2514   void cmhs(const VRegister& vd,
2515             const VRegister& vn,
2516             const VRegister& vm);
2517
2518   // Compare bitwise test bits nonzero.
2519   void cmtst(const VRegister& vd,
2520              const VRegister& vn,
2521              const VRegister& vm);
2522
2523   // Compare bitwise to zero.
2524   void cmeq(const VRegister& vd,
2525             const VRegister& vn,
2526             int value);
2527
2528   // Compare signed greater than or equal to zero.
2529   void cmge(const VRegister& vd,
2530             const VRegister& vn,
2531             int value);
2532
2533   // Compare signed greater than zero.
2534   void cmgt(const VRegister& vd,
2535             const VRegister& vn,
2536             int value);
2537
2538   // Compare signed less than or equal to zero.
2539   void cmle(const VRegister& vd,
2540             const VRegister& vn,
2541             int value);
2542
2543   // Compare signed less than zero.
2544   void cmlt(const VRegister& vd,
2545             const VRegister& vn,
2546             int value);
2547
2548   // Signed shift left by register.
2549   void sshl(const VRegister& vd,
2550             const VRegister& vn,
2551             const VRegister& vm);
2552
2553   // Unsigned shift left by register.
2554   void ushl(const VRegister& vd,
2555             const VRegister& vn,
2556             const VRegister& vm);
2557
2558   // Signed saturating shift left by register.
2559   void sqshl(const VRegister& vd,
2560              const VRegister& vn,
2561              const VRegister& vm);
2562
2563   // Unsigned saturating shift left by register.
2564   void uqshl(const VRegister& vd,
2565              const VRegister& vn,
2566              const VRegister& vm);
2567
2568   // Signed rounding shift left by register.
2569   void srshl(const VRegister& vd,
2570              const VRegister& vn,
2571              const VRegister& vm);
2572
2573   // Unsigned rounding shift left by register.
2574   void urshl(const VRegister& vd,
2575              const VRegister& vn,
2576              const VRegister& vm);
2577
2578   // Signed saturating rounding shift left by register.
2579   void sqrshl(const VRegister& vd,
2580               const VRegister& vn,
2581               const VRegister& vm);
2582
2583   // Unsigned saturating rounding shift left by register.
2584   void uqrshl(const VRegister& vd,
2585               const VRegister& vn,
2586               const VRegister& vm);
2587
2588   // Bitwise and.
2589   void and_(const VRegister& vd,
2590             const VRegister& vn,
2591             const VRegister& vm);
2592
2593   // Bitwise or.
2594   void orr(const VRegister& vd,
2595            const VRegister& vn,
2596            const VRegister& vm);
2597
2598   // Bitwise or immediate.
2599   void orr(const VRegister& vd,
2600            const int imm8,
2601            const int left_shift = 0);
2602
2603   // Move register to register.
2604   void mov(const VRegister& vd,
2605            const VRegister& vn);
2606
2607   // Bitwise orn.
2608   void orn(const VRegister& vd,
2609            const VRegister& vn,
2610            const VRegister& vm);
2611
2612   // Bitwise eor.
2613   void eor(const VRegister& vd,
2614            const VRegister& vn,
2615            const VRegister& vm);
2616
2617   // Bit clear immediate.
2618   void bic(const VRegister& vd,
2619            const int imm8,
2620            const int left_shift = 0);
2621
2622   // Bit clear.
2623   void bic(const VRegister& vd,
2624            const VRegister& vn,
2625            const VRegister& vm);
2626
2627   // Bitwise insert if false.
2628   void bif(const VRegister& vd,
2629            const VRegister& vn,
2630            const VRegister& vm);
2631
2632   // Bitwise insert if true.
2633   void bit(const VRegister& vd,
2634            const VRegister& vn,
2635            const VRegister& vm);
2636
2637   // Bitwise select.
2638   void bsl(const VRegister& vd,
2639            const VRegister& vn,
2640            const VRegister& vm);
2641
2642   // Polynomial multiply.
2643   void pmul(const VRegister& vd,
2644             const VRegister& vn,
2645             const VRegister& vm);
2646
2647   // Vector move immediate.
2648   void movi(const VRegister& vd,
2649             const uint64_t imm,
2650             Shift shift = LSL,
2651             const int shift_amount = 0);
2652
2653   // Bitwise not.
2654   void mvn(const VRegister& vd,
2655            const VRegister& vn);
2656
2657   // Vector move inverted immediate.
2658   void mvni(const VRegister& vd,
2659             const int imm8,
2660             Shift shift = LSL,
2661             const int shift_amount = 0);
2662
2663   // Signed saturating accumulate of unsigned value.
2664   void suqadd(const VRegister& vd,
2665               const VRegister& vn);
2666
2667   // Unsigned saturating accumulate of signed value.
2668   void usqadd(const VRegister& vd,
2669               const VRegister& vn);
2670
2671   // Absolute value.
2672   void abs(const VRegister& vd,
2673            const VRegister& vn);
2674
2675   // Signed saturating absolute value.
2676   void sqabs(const VRegister& vd,
2677              const VRegister& vn);
2678
2679   // Negate.
2680   void neg(const VRegister& vd,
2681            const VRegister& vn);
2682
2683   // Signed saturating negate.
2684   void sqneg(const VRegister& vd,
2685              const VRegister& vn);
2686
2687   // Bitwise not.
2688   void not_(const VRegister& vd,
2689             const VRegister& vn);
2690
2691   // Extract narrow.
2692   void xtn(const VRegister& vd,
2693            const VRegister& vn);
2694
2695   // Extract narrow (second part).
2696   void xtn2(const VRegister& vd,
2697             const VRegister& vn);
2698
2699   // Signed saturating extract narrow.
2700   void sqxtn(const VRegister& vd,
2701              const VRegister& vn);
2702
2703   // Signed saturating extract narrow (second part).
2704   void sqxtn2(const VRegister& vd,
2705               const VRegister& vn);
2706
2707   // Unsigned saturating extract narrow.
2708   void uqxtn(const VRegister& vd,
2709              const VRegister& vn);
2710
2711   // Unsigned saturating extract narrow (second part).
2712   void uqxtn2(const VRegister& vd,
2713               const VRegister& vn);
2714
2715   // Signed saturating extract unsigned narrow.
2716   void sqxtun(const VRegister& vd,
2717               const VRegister& vn);
2718
2719   // Signed saturating extract unsigned narrow (second part).
2720   void sqxtun2(const VRegister& vd,
2721                const VRegister& vn);
2722
2723   // Extract vector from pair of vectors.
2724   void ext(const VRegister& vd,
2725            const VRegister& vn,
2726            const VRegister& vm,
2727            int index);
2728
2729   // Duplicate vector element to vector or scalar.
2730   void dup(const VRegister& vd,
2731            const VRegister& vn,
2732            int vn_index);
2733
2734   // Move vector element to scalar.
2735   void mov(const VRegister& vd,
2736            const VRegister& vn,
2737            int vn_index);
2738
2739   // Duplicate general-purpose register to vector.
2740   void dup(const VRegister& vd,
2741            const Register& rn);
2742
2743   // Insert vector element from another vector element.
2744   void ins(const VRegister& vd,
2745            int vd_index,
2746            const VRegister& vn,
2747            int vn_index);
2748
2749   // Move vector element to another vector element.
2750   void mov(const VRegister& vd,
2751            int vd_index,
2752            const VRegister& vn,
2753            int vn_index);
2754
2755   // Insert vector element from general-purpose register.
2756   void ins(const VRegister& vd,
2757            int vd_index,
2758            const Register& rn);
2759
2760   // Move general-purpose register to a vector element.
2761   void mov(const VRegister& vd,
2762            int vd_index,
2763            const Register& rn);
2764
2765   // Unsigned move vector element to general-purpose register.
2766   void umov(const Register& rd,
2767             const VRegister& vn,
2768             int vn_index);
2769
2770   // Move vector element to general-purpose register.
2771   void mov(const Register& rd,
2772            const VRegister& vn,
2773            int vn_index);
2774
2775   // Signed move vector element to general-purpose register.
2776   void smov(const Register& rd,
2777             const VRegister& vn,
2778             int vn_index);
2779
2780   // One-element structure load to one register.
2781   void ld1(const VRegister& vt,
2782            const MemOperand& src);
2783
2784   // One-element structure load to two registers.
2785   void ld1(const VRegister& vt,
2786            const VRegister& vt2,
2787            const MemOperand& src);
2788
2789   // One-element structure load to three registers.
2790   void ld1(const VRegister& vt,
2791            const VRegister& vt2,
2792            const VRegister& vt3,
2793            const MemOperand& src);
2794
2795   // One-element structure load to four registers.
2796   void ld1(const VRegister& vt,
2797            const VRegister& vt2,
2798            const VRegister& vt3,
2799            const VRegister& vt4,
2800            const MemOperand& src);
2801
2802   // One-element single structure load to one lane.
2803   void ld1(const VRegister& vt,
2804            int lane,
2805            const MemOperand& src);
2806
2807   // One-element single structure load to all lanes.
2808   void ld1r(const VRegister& vt,
2809             const MemOperand& src);
2810
2811   // Two-element structure load.
2812   void ld2(const VRegister& vt,
2813            const VRegister& vt2,
2814            const MemOperand& src);
2815
2816   // Two-element single structure load to one lane.
2817   void ld2(const VRegister& vt,
2818            const VRegister& vt2,
2819            int lane,
2820            const MemOperand& src);
2821
2822   // Two-element single structure load to all lanes.
2823   void ld2r(const VRegister& vt,
2824             const VRegister& vt2,
2825             const MemOperand& src);
2826
2827   // Three-element structure load.
2828   void ld3(const VRegister& vt,
2829            const VRegister& vt2,
2830            const VRegister& vt3,
2831            const MemOperand& src);
2832
2833   // Three-element single structure load to one lane.
2834   void ld3(const VRegister& vt,
2835            const VRegister& vt2,
2836            const VRegister& vt3,
2837            int lane,
2838            const MemOperand& src);
2839
2840   // Three-element single structure load to all lanes.
2841   void ld3r(const VRegister& vt,
2842             const VRegister& vt2,
2843             const VRegister& vt3,
2844             const MemOperand& src);
2845
2846   // Four-element structure load.
2847   void ld4(const VRegister& vt,
2848            const VRegister& vt2,
2849            const VRegister& vt3,
2850            const VRegister& vt4,
2851            const MemOperand& src);
2852
2853   // Four-element single structure load to one lane.
2854   void ld4(const VRegister& vt,
2855            const VRegister& vt2,
2856            const VRegister& vt3,
2857            const VRegister& vt4,
2858            int lane,
2859            const MemOperand& src);
2860
2861   // Four-element single structure load to all lanes.
2862   void ld4r(const VRegister& vt,
2863             const VRegister& vt2,
2864             const VRegister& vt3,
2865             const VRegister& vt4,
2866             const MemOperand& src);
2867
2868   // Count leading sign bits.
2869   void cls(const VRegister& vd,
2870            const VRegister& vn);
2871
2872   // Count leading zero bits (vector).
2873   void clz(const VRegister& vd,
2874            const VRegister& vn);
2875
2876   // Population count per byte.
2877   void cnt(const VRegister& vd,
2878            const VRegister& vn);
2879
2880   // Reverse bit order.
2881   void rbit(const VRegister& vd,
2882             const VRegister& vn);
2883
2884   // Reverse elements in 16-bit halfwords.
2885   void rev16(const VRegister& vd,
2886              const VRegister& vn);
2887
2888   // Reverse elements in 32-bit words.
2889   void rev32(const VRegister& vd,
2890              const VRegister& vn);
2891
2892   // Reverse elements in 64-bit doublewords.
2893   void rev64(const VRegister& vd,
2894              const VRegister& vn);
2895
2896   // Unsigned reciprocal square root estimate.
2897   void ursqrte(const VRegister& vd,
2898                const VRegister& vn);
2899
2900   // Unsigned reciprocal estimate.
2901   void urecpe(const VRegister& vd,
2902               const VRegister& vn);
2903
2904   // Signed pairwise long add.
2905   void saddlp(const VRegister& vd,
2906               const VRegister& vn);
2907
2908   // Unsigned pairwise long add.
2909   void uaddlp(const VRegister& vd,
2910               const VRegister& vn);
2911
2912   // Signed pairwise long add and accumulate.
2913   void sadalp(const VRegister& vd,
2914               const VRegister& vn);
2915
2916   // Unsigned pairwise long add and accumulate.
2917   void uadalp(const VRegister& vd,
2918               const VRegister& vn);
2919
2920   // Shift left by immediate.
2921   void shl(const VRegister& vd,
2922            const VRegister& vn,
2923            int shift);
2924
2925   // Signed saturating shift left by immediate.
2926   void sqshl(const VRegister& vd,
2927              const VRegister& vn,
2928              int shift);
2929
2930   // Signed saturating shift left unsigned by immediate.
2931   void sqshlu(const VRegister& vd,
2932               const VRegister& vn,
2933               int shift);
2934
2935   // Unsigned saturating shift left by immediate.
2936   void uqshl(const VRegister& vd,
2937              const VRegister& vn,
2938              int shift);
2939
2940   // Signed shift left long by immediate.
2941   void sshll(const VRegister& vd,
2942              const VRegister& vn,
2943              int shift);
2944
2945   // Signed shift left long by immediate (second part).
2946   void sshll2(const VRegister& vd,
2947               const VRegister& vn,
2948               int shift);
2949
2950   // Signed extend long.
2951   void sxtl(const VRegister& vd,
2952             const VRegister& vn);
2953
2954   // Signed extend long (second part).
2955   void sxtl2(const VRegister& vd,
2956              const VRegister& vn);
2957
2958   // Unsigned shift left long by immediate.
2959   void ushll(const VRegister& vd,
2960              const VRegister& vn,
2961              int shift);
2962
2963   // Unsigned shift left long by immediate (second part).
2964   void ushll2(const VRegister& vd,
2965               const VRegister& vn,
2966               int shift);
2967
2968   // Shift left long by element size.
2969   void shll(const VRegister& vd,
2970             const VRegister& vn,
2971             int shift);
2972
2973   // Shift left long by element size (second part).
2974   void shll2(const VRegister& vd,
2975              const VRegister& vn,
2976              int shift);
2977
2978   // Unsigned extend long.
2979   void uxtl(const VRegister& vd,
2980             const VRegister& vn);
2981
2982   // Unsigned extend long (second part).
2983   void uxtl2(const VRegister& vd,
2984              const VRegister& vn);
2985
2986   // Shift left by immediate and insert.
2987   void sli(const VRegister& vd,
2988            const VRegister& vn,
2989            int shift);
2990
2991   // Shift right by immediate and insert.
2992   void sri(const VRegister& vd,
2993            const VRegister& vn,
2994            int shift);
2995
2996   // Signed maximum.
2997   void smax(const VRegister& vd,
2998             const VRegister& vn,
2999             const VRegister& vm);
3000
3001   // Signed pairwise maximum.
3002   void smaxp(const VRegister& vd,
3003              const VRegister& vn,
3004              const VRegister& vm);
3005
3006   // Add across vector.
3007   void addv(const VRegister& vd,
3008             const VRegister& vn);
3009
3010   // Signed add long across vector.
3011   void saddlv(const VRegister& vd,
3012               const VRegister& vn);
3013
3014   // Unsigned add long across vector.
3015   void uaddlv(const VRegister& vd,
3016               const VRegister& vn);
3017
3018   // FP maximum number across vector.
3019   void fmaxnmv(const VRegister& vd,
3020                const VRegister& vn);
3021
3022   // FP maximum across vector.
3023   void fmaxv(const VRegister& vd,
3024              const VRegister& vn);
3025
3026   // FP minimum number across vector.
3027   void fminnmv(const VRegister& vd,
3028                const VRegister& vn);
3029
3030   // FP minimum across vector.
3031   void fminv(const VRegister& vd,
3032              const VRegister& vn);
3033
3034   // Signed maximum across vector.
3035   void smaxv(const VRegister& vd,
3036              const VRegister& vn);
3037
3038   // Signed minimum.
3039   void smin(const VRegister& vd,
3040             const VRegister& vn,
3041             const VRegister& vm);
3042
3043   // Signed minimum pairwise.
3044   void sminp(const VRegister& vd,
3045              const VRegister& vn,
3046              const VRegister& vm);
3047
3048   // Signed minimum across vector.
3049   void sminv(const VRegister& vd,
3050              const VRegister& vn);
3051
3052   // One-element structure store from one register.
3053   void st1(const VRegister& vt,
3054            const MemOperand& src);
3055
3056   // One-element structure store from two registers.
3057   void st1(const VRegister& vt,
3058            const VRegister& vt2,
3059            const MemOperand& src);
3060
3061   // One-element structure store from three registers.
3062   void st1(const VRegister& vt,
3063            const VRegister& vt2,
3064            const VRegister& vt3,
3065            const MemOperand& src);
3066
3067   // One-element structure store from four registers.
3068   void st1(const VRegister& vt,
3069            const VRegister& vt2,
3070            const VRegister& vt3,
3071            const VRegister& vt4,
3072            const MemOperand& src);
3073
3074   // One-element single structure store from one lane.
3075   void st1(const VRegister& vt,
3076            int lane,
3077            const MemOperand& src);
3078
3079   // Two-element structure store from two registers.
3080   void st2(const VRegister& vt,
3081            const VRegister& vt2,
3082            const MemOperand& src);
3083
3084   // Two-element single structure store from two lanes.
3085   void st2(const VRegister& vt,
3086            const VRegister& vt2,
3087            int lane,
3088            const MemOperand& src);
3089
3090   // Three-element structure store from three registers.
3091   void st3(const VRegister& vt,
3092            const VRegister& vt2,
3093            const VRegister& vt3,
3094            const MemOperand& src);
3095
3096   // Three-element single structure store from three lanes.
3097   void st3(const VRegister& vt,
3098            const VRegister& vt2,
3099            const VRegister& vt3,
3100            int lane,
3101            const MemOperand& src);
3102
3103   // Four-element structure store from four registers.
3104   void st4(const VRegister& vt,
3105            const VRegister& vt2,
3106            const VRegister& vt3,
3107            const VRegister& vt4,
3108            const MemOperand& src);
3109
3110   // Four-element single structure store from four lanes.
3111   void st4(const VRegister& vt,
3112            const VRegister& vt2,
3113            const VRegister& vt3,
3114            const VRegister& vt4,
3115            int lane,
3116            const MemOperand& src);
3117
3118   // Unsigned add long.
3119   void uaddl(const VRegister& vd,
3120              const VRegister& vn,
3121              const VRegister& vm);
3122
3123   // Unsigned add long (second part).
3124   void uaddl2(const VRegister& vd,
3125               const VRegister& vn,
3126               const VRegister& vm);
3127
3128   // Unsigned add wide.
3129   void uaddw(const VRegister& vd,
3130              const VRegister& vn,
3131              const VRegister& vm);
3132
3133   // Unsigned add wide (second part).
3134   void uaddw2(const VRegister& vd,
3135               const VRegister& vn,
3136               const VRegister& vm);
3137
3138   // Signed add long.
3139   void saddl(const VRegister& vd,
3140              const VRegister& vn,
3141              const VRegister& vm);
3142
3143   // Signed add long (second part).
3144   void saddl2(const VRegister& vd,
3145               const VRegister& vn,
3146               const VRegister& vm);
3147
3148   // Signed add wide.
3149   void saddw(const VRegister& vd,
3150              const VRegister& vn,
3151              const VRegister& vm);
3152
3153   // Signed add wide (second part).
3154   void saddw2(const VRegister& vd,
3155               const VRegister& vn,
3156               const VRegister& vm);
3157
3158   // Unsigned subtract long.
3159   void usubl(const VRegister& vd,
3160              const VRegister& vn,
3161              const VRegister& vm);
3162
3163   // Unsigned subtract long (second part).
3164   void usubl2(const VRegister& vd,
3165               const VRegister& vn,
3166               const VRegister& vm);
3167
3168   // Unsigned subtract wide.
3169   void usubw(const VRegister& vd,
3170              const VRegister& vn,
3171              const VRegister& vm);
3172
3173   // Unsigned subtract wide (second part).
3174   void usubw2(const VRegister& vd,
3175               const VRegister& vn,
3176               const VRegister& vm);
3177
3178   // Signed subtract long.
3179   void ssubl(const VRegister& vd,
3180              const VRegister& vn,
3181              const VRegister& vm);
3182
3183   // Signed subtract long (second part).
3184   void ssubl2(const VRegister& vd,
3185               const VRegister& vn,
3186               const VRegister& vm);
3187
3188   // Signed integer subtract wide.
3189   void ssubw(const VRegister& vd,
3190              const VRegister& vn,
3191              const VRegister& vm);
3192
3193   // Signed integer subtract wide (second part).
3194   void ssubw2(const VRegister& vd,
3195               const VRegister& vn,
3196               const VRegister& vm);
3197
3198   // Unsigned maximum.
3199   void umax(const VRegister& vd,
3200             const VRegister& vn,
3201             const VRegister& vm);
3202
3203   // Unsigned pairwise maximum.
3204   void umaxp(const VRegister& vd,
3205              const VRegister& vn,
3206              const VRegister& vm);
3207
3208   // Unsigned maximum across vector.
3209   void umaxv(const VRegister& vd,
3210              const VRegister& vn);
3211
3212   // Unsigned minimum.
3213   void umin(const VRegister& vd,
3214             const VRegister& vn,
3215             const VRegister& vm);
3216
3217   // Unsigned pairwise minimum.
3218   void uminp(const VRegister& vd,
3219              const VRegister& vn,
3220              const VRegister& vm);
3221
3222   // Unsigned minimum across vector.
3223   void uminv(const VRegister& vd,
3224              const VRegister& vn);
3225
3226   // Transpose vectors (primary).
3227   void trn1(const VRegister& vd,
3228             const VRegister& vn,
3229             const VRegister& vm);
3230
3231   // Transpose vectors (secondary).
3232   void trn2(const VRegister& vd,
3233             const VRegister& vn,
3234             const VRegister& vm);
3235
3236   // Unzip vectors (primary).
3237   void uzp1(const VRegister& vd,
3238             const VRegister& vn,
3239             const VRegister& vm);
3240
3241   // Unzip vectors (secondary).
3242   void uzp2(const VRegister& vd,
3243             const VRegister& vn,
3244             const VRegister& vm);
3245
3246   // Zip vectors (primary).
3247   void zip1(const VRegister& vd,
3248             const VRegister& vn,
3249             const VRegister& vm);
3250
3251   // Zip vectors (secondary).
3252   void zip2(const VRegister& vd,
3253             const VRegister& vn,
3254             const VRegister& vm);
3255
3256   // Signed shift right by immediate.
3257   void sshr(const VRegister& vd,
3258             const VRegister& vn,
3259             int shift);
3260
3261   // Unsigned shift right by immediate.
3262   void ushr(const VRegister& vd,
3263             const VRegister& vn,
3264             int shift);
3265
3266   // Signed rounding shift right by immediate.
3267   void srshr(const VRegister& vd,
3268              const VRegister& vn,
3269              int shift);
3270
3271   // Unsigned rounding shift right by immediate.
3272   void urshr(const VRegister& vd,
3273              const VRegister& vn,
3274              int shift);
3275
3276   // Signed shift right by immediate and accumulate.
3277   void ssra(const VRegister& vd,
3278             const VRegister& vn,
3279             int shift);
3280
3281   // Unsigned shift right by immediate and accumulate.
3282   void usra(const VRegister& vd,
3283             const VRegister& vn,
3284             int shift);
3285
3286   // Signed rounding shift right by immediate and accumulate.
3287   void srsra(const VRegister& vd,
3288              const VRegister& vn,
3289              int shift);
3290
3291   // Unsigned rounding shift right by immediate and accumulate.
3292   void ursra(const VRegister& vd,
3293              const VRegister& vn,
3294              int shift);
3295
3296   // Shift right narrow by immediate.
3297   void shrn(const VRegister& vd,
3298             const VRegister& vn,
3299             int shift);
3300
3301   // Shift right narrow by immediate (second part).
3302   void shrn2(const VRegister& vd,
3303              const VRegister& vn,
3304              int shift);
3305
3306   // Rounding shift right narrow by immediate.
3307   void rshrn(const VRegister& vd,
3308              const VRegister& vn,
3309              int shift);
3310
3311   // Rounding shift right narrow by immediate (second part).
3312   void rshrn2(const VRegister& vd,
3313               const VRegister& vn,
3314               int shift);
3315
3316   // Unsigned saturating shift right narrow by immediate.
3317   void uqshrn(const VRegister& vd,
3318               const VRegister& vn,
3319               int shift);
3320
3321   // Unsigned saturating shift right narrow by immediate (second part).
3322   void uqshrn2(const VRegister& vd,
3323                const VRegister& vn,
3324                int shift);
3325
3326   // Unsigned saturating rounding shift right narrow by immediate.
3327   void uqrshrn(const VRegister& vd,
3328                const VRegister& vn,
3329                int shift);
3330
3331   // Unsigned saturating rounding shift right narrow by immediate (second part).
3332   void uqrshrn2(const VRegister& vd,
3333                 const VRegister& vn,
3334                 int shift);
3335
3336   // Signed saturating shift right narrow by immediate.
3337   void sqshrn(const VRegister& vd,
3338               const VRegister& vn,
3339               int shift);
3340
3341   // Signed saturating shift right narrow by immediate (second part).
3342   void sqshrn2(const VRegister& vd,
3343                const VRegister& vn,
3344                int shift);
3345
3346   // Signed saturating rounded shift right narrow by immediate.
3347   void sqrshrn(const VRegister& vd,
3348                const VRegister& vn,
3349                int shift);
3350
3351   // Signed saturating rounded shift right narrow by immediate (second part).
3352   void sqrshrn2(const VRegister& vd,
3353                 const VRegister& vn,
3354                 int shift);
3355
3356   // Signed saturating shift right unsigned narrow by immediate.
3357   void sqshrun(const VRegister& vd,
3358                const VRegister& vn,
3359                int shift);
3360
3361   // Signed saturating shift right unsigned narrow by immediate (second part).
3362   void sqshrun2(const VRegister& vd,
3363                 const VRegister& vn,
3364                 int shift);
3365
3366   // Signed sat rounded shift right unsigned narrow by immediate.
3367   void sqrshrun(const VRegister& vd,
3368                 const VRegister& vn,
3369                 int shift);
3370
3371   // Signed sat rounded shift right unsigned narrow by immediate (second part).
3372   void sqrshrun2(const VRegister& vd,
3373                  const VRegister& vn,
3374                  int shift);
3375
3376   // FP reciprocal step.
3377   void frecps(const VRegister& vd,
3378               const VRegister& vn,
3379               const VRegister& vm);
3380
3381   // FP reciprocal estimate.
3382   void frecpe(const VRegister& vd,
3383               const VRegister& vn);
3384
3385   // FP reciprocal square root estimate.
3386   void frsqrte(const VRegister& vd,
3387                const VRegister& vn);
3388
3389   // FP reciprocal square root step.
3390   void frsqrts(const VRegister& vd,
3391                const VRegister& vn,
3392                const VRegister& vm);
3393
3394   // Signed absolute difference and accumulate long.
3395   void sabal(const VRegister& vd,
3396              const VRegister& vn,
3397              const VRegister& vm);
3398
3399   // Signed absolute difference and accumulate long (second part).
3400   void sabal2(const VRegister& vd,
3401               const VRegister& vn,
3402               const VRegister& vm);
3403
3404   // Unsigned absolute difference and accumulate long.
3405   void uabal(const VRegister& vd,
3406              const VRegister& vn,
3407              const VRegister& vm);
3408
3409   // Unsigned absolute difference and accumulate long (second part).
3410   void uabal2(const VRegister& vd,
3411               const VRegister& vn,
3412               const VRegister& vm);
3413
3414   // Signed absolute difference long.
3415   void sabdl(const VRegister& vd,
3416              const VRegister& vn,
3417              const VRegister& vm);
3418
3419   // Signed absolute difference long (second part).
3420   void sabdl2(const VRegister& vd,
3421               const VRegister& vn,
3422               const VRegister& vm);
3423
3424   // Unsigned absolute difference long.
3425   void uabdl(const VRegister& vd,
3426              const VRegister& vn,
3427              const VRegister& vm);
3428
3429   // Unsigned absolute difference long (second part).
3430   void uabdl2(const VRegister& vd,
3431               const VRegister& vn,
3432               const VRegister& vm);
3433
3434   // Polynomial multiply long.
3435   void pmull(const VRegister& vd,
3436              const VRegister& vn,
3437              const VRegister& vm);
3438
3439   // Polynomial multiply long (second part).
3440   void pmull2(const VRegister& vd,
3441               const VRegister& vn,
3442               const VRegister& vm);
3443
3444   // Signed long multiply-add.
3445   void smlal(const VRegister& vd,
3446              const VRegister& vn,
3447              const VRegister& vm);
3448
3449   // Signed long multiply-add (second part).
3450   void smlal2(const VRegister& vd,
3451               const VRegister& vn,
3452               const VRegister& vm);
3453
3454   // Unsigned long multiply-add.
3455   void umlal(const VRegister& vd,
3456              const VRegister& vn,
3457              const VRegister& vm);
3458
3459   // Unsigned long multiply-add (second part).
3460   void umlal2(const VRegister& vd,
3461               const VRegister& vn,
3462               const VRegister& vm);
3463
3464   // Signed long multiply-sub.
3465   void smlsl(const VRegister& vd,
3466              const VRegister& vn,
3467              const VRegister& vm);
3468
3469   // Signed long multiply-sub (second part).
3470   void smlsl2(const VRegister& vd,
3471               const VRegister& vn,
3472               const VRegister& vm);
3473
3474   // Unsigned long multiply-sub.
3475   void umlsl(const VRegister& vd,
3476              const VRegister& vn,
3477              const VRegister& vm);
3478
3479   // Unsigned long multiply-sub (second part).
3480   void umlsl2(const VRegister& vd,
3481               const VRegister& vn,
3482               const VRegister& vm);
3483
3484   // Signed long multiply.
3485   void smull(const VRegister& vd,
3486              const VRegister& vn,
3487              const VRegister& vm);
3488
3489   // Signed long multiply (second part).
3490   void smull2(const VRegister& vd,
3491               const VRegister& vn,
3492               const VRegister& vm);
3493
3494   // Signed saturating doubling long multiply-add.
3495   void sqdmlal(const VRegister& vd,
3496                const VRegister& vn,
3497                const VRegister& vm);
3498
3499   // Signed saturating doubling long multiply-add (second part).
3500   void sqdmlal2(const VRegister& vd,
3501                 const VRegister& vn,
3502                 const VRegister& vm);
3503
3504   // Signed saturating doubling long multiply-subtract.
3505   void sqdmlsl(const VRegister& vd,
3506                const VRegister& vn,
3507                const VRegister& vm);
3508
3509   // Signed saturating doubling long multiply-subtract (second part).
3510   void sqdmlsl2(const VRegister& vd,
3511                 const VRegister& vn,
3512                 const VRegister& vm);
3513
3514   // Signed saturating doubling long multiply.
3515   void sqdmull(const VRegister& vd,
3516                const VRegister& vn,
3517                const VRegister& vm);
3518
3519   // Signed saturating doubling long multiply (second part).
3520   void sqdmull2(const VRegister& vd,
3521                 const VRegister& vn,
3522                 const VRegister& vm);
3523
3524   // Signed saturating doubling multiply returning high half.
3525   void sqdmulh(const VRegister& vd,
3526                const VRegister& vn,
3527                const VRegister& vm);
3528
3529   // Signed saturating rounding doubling multiply returning high half.
3530   void sqrdmulh(const VRegister& vd,
3531                 const VRegister& vn,
3532                 const VRegister& vm);
3533
3534   // Signed saturating doubling multiply element returning high half.
3535   void sqdmulh(const VRegister& vd,
3536                const VRegister& vn,
3537                const VRegister& vm,
3538                int vm_index);
3539
3540   // Signed saturating rounding doubling multiply element returning high half.
3541   void sqrdmulh(const VRegister& vd,
3542                 const VRegister& vn,
3543                 const VRegister& vm,
3544                 int vm_index);
3545
3546   // Unsigned long multiply long.
3547   void umull(const VRegister& vd,
3548              const VRegister& vn,
3549              const VRegister& vm);
3550
3551   // Unsigned long multiply (second part).
3552   void umull2(const VRegister& vd,
3553               const VRegister& vn,
3554               const VRegister& vm);
3555
3556   // Add narrow returning high half.
3557   void addhn(const VRegister& vd,
3558              const VRegister& vn,
3559              const VRegister& vm);
3560
3561   // Add narrow returning high half (second part).
3562   void addhn2(const VRegister& vd,
3563               const VRegister& vn,
3564               const VRegister& vm);
3565
3566   // Rounding add narrow returning high half.
3567   void raddhn(const VRegister& vd,
3568               const VRegister& vn,
3569               const VRegister& vm);
3570
3571   // Rounding add narrow returning high half (second part).
3572   void raddhn2(const VRegister& vd,
3573                const VRegister& vn,
3574                const VRegister& vm);
3575
3576   // Subtract narrow returning high half.
3577   void subhn(const VRegister& vd,
3578              const VRegister& vn,
3579              const VRegister& vm);
3580
3581   // Subtract narrow returning high half (second part).
3582   void subhn2(const VRegister& vd,
3583               const VRegister& vn,
3584               const VRegister& vm);
3585
3586   // Rounding subtract narrow returning high half.
3587   void rsubhn(const VRegister& vd,
3588               const VRegister& vn,
3589               const VRegister& vm);
3590
3591   // Rounding subtract narrow returning high half (second part).
3592   void rsubhn2(const VRegister& vd,
3593                const VRegister& vn,
3594                const VRegister& vm);
3595
3596   // FP vector multiply accumulate.
3597   void fmla(const VRegister& vd,
3598             const VRegister& vn,
3599             const VRegister& vm);
3600
3601   // FP vector multiply subtract.
3602   void fmls(const VRegister& vd,
3603             const VRegister& vn,
3604             const VRegister& vm);
3605
3606   // FP vector multiply extended.
3607   void fmulx(const VRegister& vd,
3608              const VRegister& vn,
3609              const VRegister& vm);
3610
3611   // FP absolute greater than or equal.
3612   void facge(const VRegister& vd,
3613              const VRegister& vn,
3614              const VRegister& vm);
3615
3616   // FP absolute greater than.
3617   void facgt(const VRegister& vd,
3618              const VRegister& vn,
3619              const VRegister& vm);
3620
3621   // FP multiply by element.
3622   void fmul(const VRegister& vd,
3623             const VRegister& vn,
3624             const VRegister& vm,
3625             int vm_index);
3626
3627   // FP fused multiply-add to accumulator by element.
3628   void fmla(const VRegister& vd,
3629             const VRegister& vn,
3630             const VRegister& vm,
3631             int vm_index);
3632
3633   // FP fused multiply-sub from accumulator by element.
3634   void fmls(const VRegister& vd,
3635             const VRegister& vn,
3636             const VRegister& vm,
3637             int vm_index);
3638
3639   // FP multiply extended by element.
3640   void fmulx(const VRegister& vd,
3641              const VRegister& vn,
3642              const VRegister& vm,
3643              int vm_index);
3644
3645   // FP compare equal.
3646   void fcmeq(const VRegister& vd,
3647              const VRegister& vn,
3648              const VRegister& vm);
3649
3650   // FP greater than.
3651   void fcmgt(const VRegister& vd,
3652              const VRegister& vn,
3653              const VRegister& vm);
3654
3655   // FP greater than or equal.
3656   void fcmge(const VRegister& vd,
3657              const VRegister& vn,
3658              const VRegister& vm);
3659
3660   // FP compare equal to zero.
3661   void fcmeq(const VRegister& vd,
3662              const VRegister& vn,
3663              double imm);
3664
3665   // FP greater than zero.
3666   void fcmgt(const VRegister& vd,
3667              const VRegister& vn,
3668              double imm);
3669
3670   // FP greater than or equal to zero.
3671   void fcmge(const VRegister& vd,
3672              const VRegister& vn,
3673              double imm);
3674
3675   // FP less than or equal to zero.
3676   void fcmle(const VRegister& vd,
3677              const VRegister& vn,
3678              double imm);
3679
3680   // FP less than to zero.
3681   void fcmlt(const VRegister& vd,
3682              const VRegister& vn,
3683              double imm);
3684
3685   // FP absolute difference.
3686   void fabd(const VRegister& vd,
3687             const VRegister& vn,
3688             const VRegister& vm);
3689
3690   // FP pairwise add vector.
3691   void faddp(const VRegister& vd,
3692              const VRegister& vn,
3693              const VRegister& vm);
3694
3695   // FP pairwise add scalar.
3696   void faddp(const VRegister& vd,
3697              const VRegister& vn);
3698
3699   // FP pairwise maximum vector.
3700   void fmaxp(const VRegister& vd,
3701              const VRegister& vn,
3702              const VRegister& vm);
3703
3704   // FP pairwise maximum scalar.
3705   void fmaxp(const VRegister& vd,
3706              const VRegister& vn);
3707
3708   // FP pairwise minimum vector.
3709   void fminp(const VRegister& vd,
3710              const VRegister& vn,
3711              const VRegister& vm);
3712
3713   // FP pairwise minimum scalar.
3714   void fminp(const VRegister& vd,
3715              const VRegister& vn);
3716
3717   // FP pairwise maximum number vector.
3718   void fmaxnmp(const VRegister& vd,
3719                const VRegister& vn,
3720                const VRegister& vm);
3721
3722   // FP pairwise maximum number scalar.
3723   void fmaxnmp(const VRegister& vd,
3724                const VRegister& vn);
3725
3726   // FP pairwise minimum number vector.
3727   void fminnmp(const VRegister& vd,
3728                const VRegister& vn,
3729                const VRegister& vm);
3730
3731   // FP pairwise minimum number scalar.
3732   void fminnmp(const VRegister& vd,
3733                const VRegister& vn);
3734
3735   // Emit generic instructions.
3736   // Emit raw instructions into the instruction stream.
3737   void dci(Instr raw_inst) { Emit(raw_inst); }
3738
3739   // Emit 32 bits of data into the instruction stream.
3740   void dc32(uint32_t data) {
3741     VIXL_ASSERT(buffer_monitor_ > 0);
3742     buffer_->Emit32(data);
3743   }
3744
3745   // Emit 64 bits of data into the instruction stream.
3746   void dc64(uint64_t data) {
3747     VIXL_ASSERT(buffer_monitor_ > 0);
3748     buffer_->Emit64(data);
3749   }
3750
3751   // Copy a string into the instruction stream, including the terminating NULL
3752   // character. The instruction pointer is then aligned correctly for
3753   // subsequent instructions.
3754   void EmitString(const char * string) {
3755     VIXL_ASSERT(string != NULL);
3756     VIXL_ASSERT(buffer_monitor_ > 0);
3757
3758     buffer_->EmitString(string);
3759     buffer_->Align();
3760   }
3761
3762   // Code generation helpers.
3763
3764   // Register encoding.
3765   static Instr Rd(CPURegister rd) {
3766     VIXL_ASSERT(rd.code() != kSPRegInternalCode);
3767     return rd.code() << Rd_offset;
3768   }
3769
3770   static Instr Rn(CPURegister rn) {
3771     VIXL_ASSERT(rn.code() != kSPRegInternalCode);
3772     return rn.code() << Rn_offset;
3773   }
3774
3775   static Instr Rm(CPURegister rm) {
3776     VIXL_ASSERT(rm.code() != kSPRegInternalCode);
3777     return rm.code() << Rm_offset;
3778   }
3779
3780   static Instr RmNot31(CPURegister rm) {
3781     VIXL_ASSERT(rm.code() != kSPRegInternalCode);
3782     VIXL_ASSERT(!rm.IsZero());
3783     return Rm(rm);
3784   }
3785
3786   static Instr Ra(CPURegister ra) {
3787     VIXL_ASSERT(ra.code() != kSPRegInternalCode);
3788     return ra.code() << Ra_offset;
3789   }
3790
3791   static Instr Rt(CPURegister rt) {
3792     VIXL_ASSERT(rt.code() != kSPRegInternalCode);
3793     return rt.code() << Rt_offset;
3794   }
3795
3796   static Instr Rt2(CPURegister rt2) {
3797     VIXL_ASSERT(rt2.code() != kSPRegInternalCode);
3798     return rt2.code() << Rt2_offset;
3799   }
3800
3801   static Instr Rs(CPURegister rs) {
3802     VIXL_ASSERT(rs.code() != kSPRegInternalCode);
3803     return rs.code() << Rs_offset;
3804   }
3805
3806   // These encoding functions allow the stack pointer to be encoded, and
3807   // disallow the zero register.
3808   static Instr RdSP(Register rd) {
3809     VIXL_ASSERT(!rd.IsZero());
3810     return (rd.code() & kRegCodeMask) << Rd_offset;
3811   }
3812
3813   static Instr RnSP(Register rn) {
3814     VIXL_ASSERT(!rn.IsZero());
3815     return (rn.code() & kRegCodeMask) << Rn_offset;
3816   }
3817
3818   // Flags encoding.
3819   static Instr Flags(FlagsUpdate S) {
3820     if (S == SetFlags) {
3821       return 1 << FlagsUpdate_offset;
3822     } else if (S == LeaveFlags) {
3823       return 0 << FlagsUpdate_offset;
3824     }
3825     VIXL_UNREACHABLE();
3826     return 0;
3827   }
3828
3829   static Instr Cond(Condition cond) {
3830     return cond << Condition_offset;
3831   }
3832
3833   // PC-relative address encoding.
3834   static Instr ImmPCRelAddress(int imm21) {
3835     VIXL_ASSERT(is_int21(imm21));
3836     Instr imm = static_cast<Instr>(truncate_to_int21(imm21));
3837     Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset;
3838     Instr immlo = imm << ImmPCRelLo_offset;
3839     return (immhi & ImmPCRelHi_mask) | (immlo & ImmPCRelLo_mask);
3840   }
3841
3842   // Branch encoding.
3843   static Instr ImmUncondBranch(int imm26) {
3844     VIXL_ASSERT(is_int26(imm26));
3845     return truncate_to_int26(imm26) << ImmUncondBranch_offset;
3846   }
3847
3848   static Instr ImmCondBranch(int imm19) {
3849     VIXL_ASSERT(is_int19(imm19));
3850     return truncate_to_int19(imm19) << ImmCondBranch_offset;
3851   }
3852
3853   static Instr ImmCmpBranch(int imm19) {
3854     VIXL_ASSERT(is_int19(imm19));
3855     return truncate_to_int19(imm19) << ImmCmpBranch_offset;
3856   }
3857
3858   static Instr ImmTestBranch(int imm14) {
3859     VIXL_ASSERT(is_int14(imm14));
3860     return truncate_to_int14(imm14) << ImmTestBranch_offset;
3861   }
3862
3863   static Instr ImmTestBranchBit(unsigned bit_pos) {
3864     VIXL_ASSERT(is_uint6(bit_pos));
3865     // Subtract five from the shift offset, as we need bit 5 from bit_pos.
3866     unsigned b5 = bit_pos << (ImmTestBranchBit5_offset - 5);
3867     unsigned b40 = bit_pos << ImmTestBranchBit40_offset;
3868     b5 &= ImmTestBranchBit5_mask;
3869     b40 &= ImmTestBranchBit40_mask;
3870     return b5 | b40;
3871   }
3872
3873   // Data Processing encoding.
3874   static Instr SF(Register rd) {
3875       return rd.Is64Bits() ? SixtyFourBits : ThirtyTwoBits;
3876   }
3877
3878   static Instr ImmAddSub(int imm) {
3879     VIXL_ASSERT(IsImmAddSub(imm));
3880     if (is_uint12(imm)) {  // No shift required.
3881       imm <<= ImmAddSub_offset;
3882     } else {
3883       imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ShiftAddSub_offset);
3884     }
3885     return imm;
3886   }
3887
3888   static Instr ImmS(unsigned imms, unsigned reg_size) {
3889     VIXL_ASSERT(((reg_size == kXRegSize) && is_uint6(imms)) ||
3890            ((reg_size == kWRegSize) && is_uint5(imms)));
3891     USE(reg_size);
3892     return imms << ImmS_offset;
3893   }
3894
3895   static Instr ImmR(unsigned immr, unsigned reg_size) {
3896     VIXL_ASSERT(((reg_size == kXRegSize) && is_uint6(immr)) ||
3897            ((reg_size == kWRegSize) && is_uint5(immr)));
3898     USE(reg_size);
3899     VIXL_ASSERT(is_uint6(immr));
3900     return immr << ImmR_offset;
3901   }
3902
3903   static Instr ImmSetBits(unsigned imms, unsigned reg_size) {
3904     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
3905     VIXL_ASSERT(is_uint6(imms));
3906     VIXL_ASSERT((reg_size == kXRegSize) || is_uint6(imms + 3));
3907     USE(reg_size);
3908     return imms << ImmSetBits_offset;
3909   }
3910
3911   static Instr ImmRotate(unsigned immr, unsigned reg_size) {
3912     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
3913     VIXL_ASSERT(((reg_size == kXRegSize) && is_uint6(immr)) ||
3914            ((reg_size == kWRegSize) && is_uint5(immr)));
3915     USE(reg_size);
3916     return immr << ImmRotate_offset;
3917   }
3918
3919   static Instr ImmLLiteral(int imm19) {
3920     VIXL_ASSERT(is_int19(imm19));
3921     return truncate_to_int19(imm19) << ImmLLiteral_offset;
3922   }
3923
3924   static Instr BitN(unsigned bitn, unsigned reg_size) {
3925     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
3926     VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0));
3927     USE(reg_size);
3928     return bitn << BitN_offset;
3929   }
3930
3931   static Instr ShiftDP(Shift shift) {
3932     VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR);
3933     return shift << ShiftDP_offset;
3934   }
3935
3936   static Instr ImmDPShift(unsigned amount) {
3937     VIXL_ASSERT(is_uint6(amount));
3938     return amount << ImmDPShift_offset;
3939   }
3940
3941   static Instr ExtendMode(Extend extend) {
3942     return extend << ExtendMode_offset;
3943   }
3944
3945   static Instr ImmExtendShift(unsigned left_shift) {
3946     VIXL_ASSERT(left_shift <= 4);
3947     return left_shift << ImmExtendShift_offset;
3948   }
3949
3950   static Instr ImmCondCmp(unsigned imm) {
3951     VIXL_ASSERT(is_uint5(imm));
3952     return imm << ImmCondCmp_offset;
3953   }
3954
3955   static Instr Nzcv(StatusFlags nzcv) {
3956     return ((nzcv >> Flags_offset) & 0xf) << Nzcv_offset;
3957   }
3958
3959   // MemOperand offset encoding.
3960   static Instr ImmLSUnsigned(int imm12) {
3961     VIXL_ASSERT(is_uint12(imm12));
3962     return imm12 << ImmLSUnsigned_offset;
3963   }
3964
3965   static Instr ImmLS(int imm9) {
3966     VIXL_ASSERT(is_int9(imm9));
3967     return truncate_to_int9(imm9) << ImmLS_offset;
3968   }
3969
3970   static Instr ImmLSPair(int imm7, unsigned access_size) {
3971     VIXL_ASSERT(((imm7 >> access_size) << access_size) == imm7);
3972     int scaled_imm7 = imm7 >> access_size;
3973     VIXL_ASSERT(is_int7(scaled_imm7));
3974     return truncate_to_int7(scaled_imm7) << ImmLSPair_offset;
3975   }
3976
3977   static Instr ImmShiftLS(unsigned shift_amount) {
3978     VIXL_ASSERT(is_uint1(shift_amount));
3979     return shift_amount << ImmShiftLS_offset;
3980   }
3981
3982   static Instr ImmPrefetchOperation(int imm5) {
3983     VIXL_ASSERT(is_uint5(imm5));
3984     return imm5 << ImmPrefetchOperation_offset;
3985   }
3986
3987   static Instr ImmException(int imm16) {
3988     VIXL_ASSERT(is_uint16(imm16));
3989     return imm16 << ImmException_offset;
3990   }
3991
3992   static Instr ImmSystemRegister(int imm15) {
3993     VIXL_ASSERT(is_uint15(imm15));
3994     return imm15 << ImmSystemRegister_offset;
3995   }
3996
3997   static Instr ImmHint(int imm7) {
3998     VIXL_ASSERT(is_uint7(imm7));
3999     return imm7 << ImmHint_offset;
4000   }
4001
4002   static Instr CRm(int imm4) {
4003     VIXL_ASSERT(is_uint4(imm4));
4004     return imm4 << CRm_offset;
4005   }
4006
4007   static Instr CRn(int imm4) {
4008     VIXL_ASSERT(is_uint4(imm4));
4009     return imm4 << CRn_offset;
4010   }
4011
4012   static Instr SysOp(int imm14) {
4013     VIXL_ASSERT(is_uint14(imm14));
4014     return imm14 << SysOp_offset;
4015   }
4016
4017   static Instr ImmSysOp1(int imm3) {
4018     VIXL_ASSERT(is_uint3(imm3));
4019     return imm3 << SysOp1_offset;
4020   }
4021
4022   static Instr ImmSysOp2(int imm3) {
4023     VIXL_ASSERT(is_uint3(imm3));
4024     return imm3 << SysOp2_offset;
4025   }
4026
4027   static Instr ImmBarrierDomain(int imm2) {
4028     VIXL_ASSERT(is_uint2(imm2));
4029     return imm2 << ImmBarrierDomain_offset;
4030   }
4031
4032   static Instr ImmBarrierType(int imm2) {
4033     VIXL_ASSERT(is_uint2(imm2));
4034     return imm2 << ImmBarrierType_offset;
4035   }
4036
4037   // Move immediates encoding.
4038   static Instr ImmMoveWide(uint64_t imm) {
4039     VIXL_ASSERT(is_uint16(imm));
4040     return static_cast<Instr>(imm << ImmMoveWide_offset);
4041   }
4042
4043   static Instr ShiftMoveWide(int64_t shift) {
4044     VIXL_ASSERT(is_uint2(shift));
4045     return static_cast<Instr>(shift << ShiftMoveWide_offset);
4046   }
4047
4048   // FP Immediates.
4049   static Instr ImmFP32(float imm);
4050   static Instr ImmFP64(double imm);
4051
4052   // FP register type.
4053   static Instr FPType(FPRegister fd) {
4054     return fd.Is64Bits() ? FP64 : FP32;
4055   }
4056
4057   static Instr FPScale(unsigned scale) {
4058     VIXL_ASSERT(is_uint6(scale));
4059     return scale << FPScale_offset;
4060   }
4061
4062   // Immediate field checking helpers.
4063   static bool IsImmAddSub(int64_t immediate);
4064   static bool IsImmConditionalCompare(int64_t immediate);
4065   static bool IsImmFP32(float imm);
4066   static bool IsImmFP64(double imm);
4067   static bool IsImmLogical(uint64_t value,
4068                            unsigned width,
4069                            unsigned* n = NULL,
4070                            unsigned* imm_s = NULL,
4071                            unsigned* imm_r = NULL);
4072   static bool IsImmLSPair(int64_t offset, unsigned access_size);
4073   static bool IsImmLSScaled(int64_t offset, unsigned access_size);
4074   static bool IsImmLSUnscaled(int64_t offset);
4075   static bool IsImmMovn(uint64_t imm, unsigned reg_size);
4076   static bool IsImmMovz(uint64_t imm, unsigned reg_size);
4077
4078   // Instruction bits for vector format in data processing operations.
4079   static Instr VFormat(VRegister vd) {
4080     if (vd.Is64Bits()) {
4081       switch (vd.lanes()) {
4082         case 2: return NEON_2S;
4083         case 4: return NEON_4H;
4084         case 8: return NEON_8B;
4085         default: return 0xffffffff;
4086       }
4087     } else {
4088       VIXL_ASSERT(vd.Is128Bits());
4089       switch (vd.lanes()) {
4090         case 2: return NEON_2D;
4091         case 4: return NEON_4S;
4092         case 8: return NEON_8H;
4093         case 16: return NEON_16B;
4094         default: return 0xffffffff;
4095       }
4096     }
4097   }
4098
4099   // Instruction bits for vector format in floating point data processing
4100   // operations.
4101   static Instr FPFormat(VRegister vd) {
4102     if (vd.lanes() == 1) {
4103       // Floating point scalar formats.
4104       VIXL_ASSERT(vd.Is32Bits() || vd.Is64Bits());
4105       return vd.Is64Bits() ? FP64 : FP32;
4106     }
4107
4108     // Two lane floating point vector formats.
4109     if (vd.lanes() == 2) {
4110       VIXL_ASSERT(vd.Is64Bits() || vd.Is128Bits());
4111       return vd.Is128Bits() ? NEON_FP_2D : NEON_FP_2S;
4112     }
4113
4114     // Four lane floating point vector format.
4115     VIXL_ASSERT((vd.lanes() == 4) && vd.Is128Bits());
4116     return NEON_FP_4S;
4117   }
4118
4119   // Instruction bits for vector format in load and store operations.
4120   static Instr LSVFormat(VRegister vd) {
4121     if (vd.Is64Bits()) {
4122       switch (vd.lanes()) {
4123         case 1: return LS_NEON_1D;
4124         case 2: return LS_NEON_2S;
4125         case 4: return LS_NEON_4H;
4126         case 8: return LS_NEON_8B;
4127         default: return 0xffffffff;
4128       }
4129     } else {
4130       VIXL_ASSERT(vd.Is128Bits());
4131       switch (vd.lanes()) {
4132         case 2: return LS_NEON_2D;
4133         case 4: return LS_NEON_4S;
4134         case 8: return LS_NEON_8H;
4135         case 16: return LS_NEON_16B;
4136         default: return 0xffffffff;
4137       }
4138     }
4139   }
4140
4141   // Instruction bits for scalar format in data processing operations.
4142   static Instr SFormat(VRegister vd) {
4143     VIXL_ASSERT(vd.lanes() == 1);
4144     switch (vd.SizeInBytes()) {
4145       case 1: return NEON_B;
4146       case 2: return NEON_H;
4147       case 4: return NEON_S;
4148       case 8: return NEON_D;
4149       default: return 0xffffffff;
4150     }
4151   }
4152
4153   static Instr ImmNEONHLM(int index, int num_bits) {
4154     int h, l, m;
4155     if (num_bits == 3) {
4156       VIXL_ASSERT(is_uint3(index));
4157       h  = (index >> 2) & 1;
4158       l  = (index >> 1) & 1;
4159       m  = (index >> 0) & 1;
4160     } else if (num_bits == 2) {
4161       VIXL_ASSERT(is_uint2(index));
4162       h  = (index >> 1) & 1;
4163       l  = (index >> 0) & 1;
4164       m  = 0;
4165     } else {
4166       VIXL_ASSERT(is_uint1(index) && (num_bits == 1));
4167       h  = (index >> 0) & 1;
4168       l  = 0;
4169       m  = 0;
4170     }
4171     return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset);
4172   }
4173
4174   static Instr ImmNEONExt(int imm4) {
4175     VIXL_ASSERT(is_uint4(imm4));
4176     return imm4 << ImmNEONExt_offset;
4177   }
4178
4179   static Instr ImmNEON5(Instr format, int index) {
4180     VIXL_ASSERT(is_uint4(index));
4181     int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
4182     int imm5 = (index << (s + 1)) | (1 << s);
4183     return imm5 << ImmNEON5_offset;
4184   }
4185
4186   static Instr ImmNEON4(Instr format, int index) {
4187     VIXL_ASSERT(is_uint4(index));
4188     int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
4189     int imm4 = index << s;
4190     return imm4 << ImmNEON4_offset;
4191   }
4192
4193   static Instr ImmNEONabcdefgh(int imm8) {
4194     VIXL_ASSERT(is_uint8(imm8));
4195     Instr instr;
4196     instr  = ((imm8 >> 5) & 7) << ImmNEONabc_offset;
4197     instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset;
4198     return instr;
4199   }
4200
4201   static Instr NEONCmode(int cmode) {
4202     VIXL_ASSERT(is_uint4(cmode));
4203     return cmode << NEONCmode_offset;
4204   }
4205
4206   static Instr NEONModImmOp(int op) {
4207     VIXL_ASSERT(is_uint1(op));
4208     return op << NEONModImmOp_offset;
4209   }
4210
4211   // Size of the code generated since label to the current position.
4212   size_t SizeOfCodeGeneratedSince(Label* label) const {
4213     VIXL_ASSERT(label->IsBound());
4214     return buffer_->OffsetFrom(label->location());
4215   }
4216
4217   size_t SizeOfCodeGenerated() const {
4218     return buffer_->CursorOffset();
4219   }
4220
4221   size_t BufferCapacity() const { return buffer_->capacity(); }
4222
4223   size_t RemainingBufferSpace() const { return buffer_->RemainingBytes(); }
4224
4225   void EnsureSpaceFor(size_t amount) {
4226     if (buffer_->RemainingBytes() < amount) {
4227       size_t capacity = buffer_->capacity();
4228       size_t size = buffer_->CursorOffset();
4229       do {
4230         // TODO(all): refine.
4231         capacity *= 2;
4232       } while ((capacity - size) <  amount);
4233       buffer_->Grow(capacity);
4234     }
4235   }
4236
4237 #ifdef VIXL_DEBUG
4238   void AcquireBuffer() {
4239     VIXL_ASSERT(buffer_monitor_ >= 0);
4240     buffer_monitor_++;
4241   }
4242
4243   void ReleaseBuffer() {
4244     buffer_monitor_--;
4245     VIXL_ASSERT(buffer_monitor_ >= 0);
4246   }
4247 #endif
4248
4249   PositionIndependentCodeOption pic() const {
4250     return pic_;
4251   }
4252
4253   bool AllowPageOffsetDependentCode() const {
4254     return (pic() == PageOffsetDependentCode) ||
4255            (pic() == PositionDependentCode);
4256   }
4257
4258   static const Register& AppropriateZeroRegFor(const CPURegister& reg) {
4259     return reg.Is64Bits() ? xzr : wzr;
4260   }
4261
4262
4263  protected:
4264   void LoadStore(const CPURegister& rt,
4265                  const MemOperand& addr,
4266                  LoadStoreOp op,
4267                  LoadStoreScalingOption option = PreferScaledOffset);
4268
4269   void LoadStorePair(const CPURegister& rt,
4270                      const CPURegister& rt2,
4271                      const MemOperand& addr,
4272                      LoadStorePairOp op);
4273   void LoadStoreStruct(const VRegister& vt,
4274                        const MemOperand& addr,
4275                        NEONLoadStoreMultiStructOp op);
4276   void LoadStoreStruct1(const VRegister& vt,
4277                         int reg_count,
4278                         const MemOperand& addr);
4279   void LoadStoreStructSingle(const VRegister& vt,
4280                              uint32_t lane,
4281                              const MemOperand& addr,
4282                              NEONLoadStoreSingleStructOp op);
4283   void LoadStoreStructSingleAllLanes(const VRegister& vt,
4284                                      const MemOperand& addr,
4285                                      NEONLoadStoreSingleStructOp op);
4286   void LoadStoreStructVerify(const VRegister& vt,
4287                              const MemOperand& addr,
4288                              Instr op);
4289
4290   void Prefetch(PrefetchOperation op,
4291                 const MemOperand& addr,
4292                 LoadStoreScalingOption option = PreferScaledOffset);
4293
4294   // TODO(all): The third parameter should be passed by reference but gcc 4.8.2
4295   // reports a bogus uninitialised warning then.
4296   void Logical(const Register& rd,
4297                const Register& rn,
4298                const Operand operand,
4299                LogicalOp op);
4300   void LogicalImmediate(const Register& rd,
4301                         const Register& rn,
4302                         unsigned n,
4303                         unsigned imm_s,
4304                         unsigned imm_r,
4305                         LogicalOp op);
4306
4307   void ConditionalCompare(const Register& rn,
4308                           const Operand& operand,
4309                           StatusFlags nzcv,
4310                           Condition cond,
4311                           ConditionalCompareOp op);
4312
4313   void AddSubWithCarry(const Register& rd,
4314                        const Register& rn,
4315                        const Operand& operand,
4316                        FlagsUpdate S,
4317                        AddSubWithCarryOp op);
4318
4319
4320   // Functions for emulating operands not directly supported by the instruction
4321   // set.
4322   void EmitShift(const Register& rd,
4323                  const Register& rn,
4324                  Shift shift,
4325                  unsigned amount);
4326   void EmitExtendShift(const Register& rd,
4327                        const Register& rn,
4328                        Extend extend,
4329                        unsigned left_shift);
4330
4331   void AddSub(const Register& rd,
4332               const Register& rn,
4333               const Operand& operand,
4334               FlagsUpdate S,
4335               AddSubOp op);
4336
4337   void NEONTable(const VRegister& vd,
4338                  const VRegister& vn,
4339                  const VRegister& vm,
4340                  NEONTableOp op);
4341
4342   // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified
4343   // registers. Only simple loads are supported; sign- and zero-extension (such
4344   // as in LDPSW_x or LDRB_w) are not supported.
4345   static LoadStoreOp LoadOpFor(const CPURegister& rt);
4346   static LoadStorePairOp LoadPairOpFor(const CPURegister& rt,
4347                                        const CPURegister& rt2);
4348   static LoadStoreOp StoreOpFor(const CPURegister& rt);
4349   static LoadStorePairOp StorePairOpFor(const CPURegister& rt,
4350                                         const CPURegister& rt2);
4351   static LoadStorePairNonTemporalOp LoadPairNonTemporalOpFor(
4352     const CPURegister& rt, const CPURegister& rt2);
4353   static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor(
4354     const CPURegister& rt, const CPURegister& rt2);
4355   static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt);
4356
4357
4358  private:
4359   static uint32_t FP32ToImm8(float imm);
4360   static uint32_t FP64ToImm8(double imm);
4361
4362   // Instruction helpers.
4363   void MoveWide(const Register& rd,
4364                 uint64_t imm,
4365                 int shift,
4366                 MoveWideImmediateOp mov_op);
4367   void DataProcShiftedRegister(const Register& rd,
4368                                const Register& rn,
4369                                const Operand& operand,
4370                                FlagsUpdate S,
4371                                Instr op);
4372   void DataProcExtendedRegister(const Register& rd,
4373                                 const Register& rn,
4374                                 const Operand& operand,
4375                                 FlagsUpdate S,
4376                                 Instr op);
4377   void LoadStorePairNonTemporal(const CPURegister& rt,
4378                                 const CPURegister& rt2,
4379                                 const MemOperand& addr,
4380                                 LoadStorePairNonTemporalOp op);
4381   void LoadLiteral(const CPURegister& rt, uint64_t imm, LoadLiteralOp op);
4382   void ConditionalSelect(const Register& rd,
4383                          const Register& rn,
4384                          const Register& rm,
4385                          Condition cond,
4386                          ConditionalSelectOp op);
4387   void DataProcessing1Source(const Register& rd,
4388                              const Register& rn,
4389                              DataProcessing1SourceOp op);
4390   void DataProcessing3Source(const Register& rd,
4391                              const Register& rn,
4392                              const Register& rm,
4393                              const Register& ra,
4394                              DataProcessing3SourceOp op);
4395   void FPDataProcessing1Source(const VRegister& fd,
4396                                const VRegister& fn,
4397                                FPDataProcessing1SourceOp op);
4398   void FPDataProcessing3Source(const VRegister& fd,
4399                                const VRegister& fn,
4400                                const VRegister& fm,
4401                                const VRegister& fa,
4402                                FPDataProcessing3SourceOp op);
4403   void NEONAcrossLanesL(const VRegister& vd,
4404                         const VRegister& vn,
4405                         NEONAcrossLanesOp op);
4406   void NEONAcrossLanes(const VRegister& vd,
4407                        const VRegister& vn,
4408                        NEONAcrossLanesOp op);
4409   void NEONModifiedImmShiftLsl(const VRegister& vd,
4410                                const int imm8,
4411                                const int left_shift,
4412                                NEONModifiedImmediateOp op);
4413   void NEONModifiedImmShiftMsl(const VRegister& vd,
4414                                const int imm8,
4415                                const int shift_amount,
4416                                NEONModifiedImmediateOp op);
4417   void NEONFP2Same(const VRegister& vd,
4418                    const VRegister& vn,
4419                    Instr vop);
4420   void NEON3Same(const VRegister& vd,
4421                  const VRegister& vn,
4422                  const VRegister& vm,
4423                  NEON3SameOp vop);
4424   void NEONFP3Same(const VRegister& vd,
4425                    const VRegister& vn,
4426                    const VRegister& vm,
4427                    Instr op);
4428   void NEON3DifferentL(const VRegister& vd,
4429                        const VRegister& vn,
4430                        const VRegister& vm,
4431                        NEON3DifferentOp vop);
4432   void NEON3DifferentW(const VRegister& vd,
4433                        const VRegister& vn,
4434                        const VRegister& vm,
4435                        NEON3DifferentOp vop);
4436   void NEON3DifferentHN(const VRegister& vd,
4437                         const VRegister& vn,
4438                         const VRegister& vm,
4439                         NEON3DifferentOp vop);
4440   void NEONFP2RegMisc(const VRegister& vd,
4441                       const VRegister& vn,
4442                       NEON2RegMiscOp vop,
4443                       double value = 0.0);
4444   void NEON2RegMisc(const VRegister& vd,
4445                     const VRegister& vn,
4446                     NEON2RegMiscOp vop,
4447                     int value = 0);
4448   void NEONFP2RegMisc(const VRegister& vd,
4449                       const VRegister& vn,
4450                       Instr op);
4451   void NEONAddlp(const VRegister& vd,
4452                  const VRegister& vn,
4453                  NEON2RegMiscOp op);
4454   void NEONPerm(const VRegister& vd,
4455                 const VRegister& vn,
4456                 const VRegister& vm,
4457                 NEONPermOp op);
4458   void NEONFPByElement(const VRegister& vd,
4459                        const VRegister& vn,
4460                        const VRegister& vm,
4461                        int vm_index,
4462                        NEONByIndexedElementOp op);
4463   void NEONByElement(const VRegister& vd,
4464                      const VRegister& vn,
4465                      const VRegister& vm,
4466                      int vm_index,
4467                      NEONByIndexedElementOp op);
4468   void NEONByElementL(const VRegister& vd,
4469                       const VRegister& vn,
4470                       const VRegister& vm,
4471                       int vm_index,
4472                       NEONByIndexedElementOp op);
4473   void NEONShiftImmediate(const VRegister& vd,
4474                           const VRegister& vn,
4475                           NEONShiftImmediateOp op,
4476                           int immh_immb);
4477   void NEONShiftLeftImmediate(const VRegister& vd,
4478                               const VRegister& vn,
4479                               int shift,
4480                               NEONShiftImmediateOp op);
4481   void NEONShiftRightImmediate(const VRegister& vd,
4482                                const VRegister& vn,
4483                                int shift,
4484                                NEONShiftImmediateOp op);
4485   void NEONShiftImmediateL(const VRegister& vd,
4486                            const VRegister& vn,
4487                            int shift,
4488                            NEONShiftImmediateOp op);
4489   void NEONShiftImmediateN(const VRegister& vd,
4490                            const VRegister& vn,
4491                            int shift,
4492                            NEONShiftImmediateOp op);
4493   void NEONXtn(const VRegister& vd,
4494                const VRegister& vn,
4495                NEON2RegMiscOp vop);
4496
4497   Instr LoadStoreStructAddrModeField(const MemOperand& addr);
4498
4499   // Encode the specified MemOperand for the specified access size and scaling
4500   // preference.
4501   Instr LoadStoreMemOperand(const MemOperand& addr,
4502                             unsigned access_size,
4503                             LoadStoreScalingOption option);
4504
4505   // Link the current (not-yet-emitted) instruction to the specified label, then
4506   // return an offset to be encoded in the instruction. If the label is not yet
4507   // bound, an offset of 0 is returned.
4508   ptrdiff_t LinkAndGetByteOffsetTo(Label * label);
4509   ptrdiff_t LinkAndGetInstructionOffsetTo(Label * label);
4510   ptrdiff_t LinkAndGetPageOffsetTo(Label * label);
4511
4512   // A common implementation for the LinkAndGet<Type>OffsetTo helpers.
4513   template <int element_shift>
4514   ptrdiff_t LinkAndGetOffsetTo(Label* label);
4515
4516   // Literal load offset are in words (32-bit).
4517   ptrdiff_t LinkAndGetWordOffsetTo(RawLiteral* literal);
4518
4519   // Emit the instruction in buffer_.
4520   void Emit(Instr instruction) {
4521     VIXL_STATIC_ASSERT(sizeof(instruction) == kInstructionSize);
4522     VIXL_ASSERT(buffer_monitor_ > 0);
4523     buffer_->Emit32(instruction);
4524   }
4525
4526   // Buffer where the code is emitted.
4527   CodeBuffer* buffer_;
4528   PositionIndependentCodeOption pic_;
4529
4530 #ifdef VIXL_DEBUG
4531   int64_t buffer_monitor_;
4532 #endif
4533 };
4534
4535
4536 // All Assembler emits MUST acquire/release the underlying code buffer. The
4537 // helper scope below will do so and optionally ensure the buffer is big enough
4538 // to receive the emit. It is possible to request the scope not to perform any
4539 // checks (kNoCheck) if for example it is known in advance the buffer size is
4540 // adequate or there is some other size checking mechanism in place.
4541 class CodeBufferCheckScope {
4542  public:
4543   // Tell whether or not the scope needs to ensure the associated CodeBuffer
4544   // has enough space for the requested size.
4545   enum CheckPolicy {
4546     kNoCheck,
4547     kCheck
4548   };
4549
4550   // Tell whether or not the scope should assert the amount of code emitted
4551   // within the scope is consistent with the requested amount.
4552   enum AssertPolicy {
4553     kNoAssert,    // No assert required.
4554     kExactSize,   // The code emitted must be exactly size bytes.
4555     kMaximumSize  // The code emitted must be at most size bytes.
4556   };
4557
4558   CodeBufferCheckScope(Assembler* assm,
4559                        size_t size,
4560                        CheckPolicy check_policy = kCheck,
4561                        AssertPolicy assert_policy = kMaximumSize)
4562       : assm_(assm) {
4563     if (check_policy == kCheck) assm->EnsureSpaceFor(size);
4564 #ifdef VIXL_DEBUG
4565     assm->bind(&start_);
4566     size_ = size;
4567     assert_policy_ = assert_policy;
4568     assm->AcquireBuffer();
4569 #else
4570     USE(assert_policy);
4571 #endif
4572   }
4573
4574   // This is a shortcut for CodeBufferCheckScope(assm, 0, kNoCheck, kNoAssert).
4575   explicit CodeBufferCheckScope(Assembler* assm) : assm_(assm) {
4576 #ifdef VIXL_DEBUG
4577     size_ = 0;
4578     assert_policy_ = kNoAssert;
4579     assm->AcquireBuffer();
4580 #endif
4581   }
4582
4583   ~CodeBufferCheckScope() {
4584 #ifdef VIXL_DEBUG
4585     assm_->ReleaseBuffer();
4586     switch (assert_policy_) {
4587       case kNoAssert: break;
4588       case kExactSize:
4589         VIXL_ASSERT(assm_->SizeOfCodeGeneratedSince(&start_) == size_);
4590         break;
4591       case kMaximumSize:
4592         VIXL_ASSERT(assm_->SizeOfCodeGeneratedSince(&start_) <= size_);
4593         break;
4594       default:
4595         VIXL_UNREACHABLE();
4596     }
4597 #endif
4598   }
4599
4600  protected:
4601   Assembler* assm_;
4602 #ifdef VIXL_DEBUG
4603   Label start_;
4604   size_t size_;
4605   AssertPolicy assert_policy_;
4606 #endif
4607 };
4608
4609
4610 template <typename T>
4611 void Literal<T>::UpdateValue(T new_value, const Assembler* assembler) {
4612   return UpdateValue(new_value, assembler->GetStartAddress<uint8_t*>());
4613 }
4614
4615
4616 template <typename T>
4617 void Literal<T>::UpdateValue(T high64, T low64, const Assembler* assembler) {
4618   return UpdateValue(high64, low64, assembler->GetStartAddress<uint8_t*>());
4619 }
4620
4621
4622 }  // namespace vixl
4623
4624 #endif  // VIXL_A64_ASSEMBLER_A64_H_