llvm/lib/Target/AArch64/AArch64ExpandImm.cpp

   1 //===- AArch64ExpandImm.h - AArch64 Immediate Expansion -------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file implements the AArch64ExpandImm stuff.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "AArch64.h"
  14 #include "AArch64ExpandImm.h"
  15 #include "MCTargetDesc/AArch64AddressingModes.h"
  16
  17 using namespace llvm;
  18 using namespace llvm::AArch64_IMM;
  19
  20 /// Helper function which extracts the specified 16-bit chunk from a
  21 /// 64-bit value.
  22 static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) {
  23   assert(ChunkIdx < 4 && "Out of range chunk index specified!");
  24
  25   return (Imm >> (ChunkIdx * 16)) & 0xFFFF;
  26 }
  27
  28 /// Check whether the given 16-bit chunk replicated to full 64-bit width
  29 /// can be materialized with an ORR instruction.
  30 static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
  31   Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk;
  32
  33   return AArch64_AM::processLogicalImmediate(Chunk, 64, Encoding);
  34 }
  35
  36 /// Check for identical 16-bit chunks within the constant and if so
  37 /// materialize them with a single ORR instruction. The remaining one or two
  38 /// 16-bit chunks will be materialized with MOVK instructions.
  39 ///
  40 /// This allows us to materialize constants like |A|B|A|A| or |A|B|C|A| (order
  41 /// of the chunks doesn't matter), assuming |A|A|A|A| can be materialized with
  42 /// an ORR instruction.
  43 static bool tryToreplicateChunks(uint64_t UImm,
  44                                  SmallVectorImpl<ImmInsnModel> &Insn) {
  45   using CountMap = DenseMap<uint64_t, unsigned>;
  46
  47   CountMap Counts;
  48
  49   // Scan the constant and count how often every chunk occurs.
  50   for (unsigned Idx = 0; Idx < 4; ++Idx)
  51     ++Counts[getChunk(UImm, Idx)];
  52
  53   // Traverse the chunks to find one which occurs more than once.
  54   for (const auto &Chunk : Counts) {
  55     const uint64_t ChunkVal = Chunk.first;
  56     const unsigned Count = Chunk.second;
  57
  58     uint64_t Encoding = 0;
  59
  60     // We are looking for chunks which have two or three instances and can be
  61     // materialized with an ORR instruction.
  62     if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding))
  63       continue;
  64
  65     const bool CountThree = Count == 3;
  66
  67     Insn.push_back({ AArch64::ORRXri, 0, Encoding });
  68
  69     unsigned ShiftAmt = 0;
  70     uint64_t Imm16 = 0;
  71     // Find the first chunk not materialized with the ORR instruction.
  72     for (; ShiftAmt < 64; ShiftAmt += 16) {
  73       Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
  74
  75       if (Imm16 != ChunkVal)
  76         break;
  77     }
  78
  79     // Create the first MOVK instruction.
  80     Insn.push_back({ AArch64::MOVKXi, Imm16,
  81                      AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt) });
  82
  83     // In case we have three instances the whole constant is now materialized
  84     // and we can exit.
  85     if (CountThree)
  86       return true;
  87
  88     // Find the remaining chunk which needs to be materialized.
  89     for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) {
  90       Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
  91
  92       if (Imm16 != ChunkVal)
  93         break;
  94     }
  95     Insn.push_back({ AArch64::MOVKXi, Imm16,
  96                      AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt) });
  97     return true;
  98   }
  99
 100   return false;
 101 }
 102
 103 /// Check whether this chunk matches the pattern '1...0...'. This pattern
 104 /// starts a contiguous sequence of ones if we look at the bits from the LSB
 105 /// towards the MSB.
 106 static bool isStartChunk(uint64_t Chunk) {
 107   if (Chunk == 0 || Chunk == std::numeric_limits<uint64_t>::max())
 108     return false;
 109
 110   return isMask_64(~Chunk);
 111 }
 112
 113 /// Check whether this chunk matches the pattern '0...1...' This pattern
 114 /// ends a contiguous sequence of ones if we look at the bits from the LSB
 115 /// towards the MSB.
 116 static bool isEndChunk(uint64_t Chunk) {
 117   if (Chunk == 0 || Chunk == std::numeric_limits<uint64_t>::max())
 118     return false;
 119
 120   return isMask_64(Chunk);
 121 }
 122
 123 /// Clear or set all bits in the chunk at the given index.
 124 static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) {
 125   const uint64_t Mask = 0xFFFF;
 126
 127   if (Clear)
 128     // Clear chunk in the immediate.
 129     Imm &= ~(Mask << (Idx * 16));
 130   else
 131     // Set all bits in the immediate for the particular chunk.
 132     Imm |= Mask << (Idx * 16);
 133
 134   return Imm;
 135 }
 136
 137 /// Check whether the constant contains a sequence of contiguous ones,
 138 /// which might be interrupted by one or two chunks. If so, materialize the
 139 /// sequence of contiguous ones with an ORR instruction.
 140 /// Materialize the chunks which are either interrupting the sequence or outside
 141 /// of the sequence with a MOVK instruction.
 142 ///
 143 /// Assuming S is a chunk which starts the sequence (1...0...), E is a chunk
 144 /// which ends the sequence (0...1...). Then we are looking for constants which
 145 /// contain at least one S and E chunk.
 146 /// E.g. |E|A|B|S|, |A|E|B|S| or |A|B|E|S|.
 147 ///
 148 /// We are also looking for constants like |S|A|B|E| where the contiguous
 149 /// sequence of ones wraps around the MSB into the LSB.
 150 static bool trySequenceOfOnes(uint64_t UImm,
 151                               SmallVectorImpl<ImmInsnModel> &Insn) {
 152   const int NotSet = -1;
 153   const uint64_t Mask = 0xFFFF;
 154
 155   int StartIdx = NotSet;
 156   int EndIdx = NotSet;
 157   // Try to find the chunks which start/end a contiguous sequence of ones.
 158   for (int Idx = 0; Idx < 4; ++Idx) {
 159     int64_t Chunk = getChunk(UImm, Idx);
 160     // Sign extend the 16-bit chunk to 64-bit.
 161     Chunk = (Chunk << 48) >> 48;
 162
 163     if (isStartChunk(Chunk))
 164       StartIdx = Idx;
 165     else if (isEndChunk(Chunk))
 166       EndIdx = Idx;
 167   }
 168
 169   // Early exit in case we can't find a start/end chunk.
 170   if (StartIdx == NotSet || EndIdx == NotSet)
 171     return false;
 172
 173   // Outside of the contiguous sequence of ones everything needs to be zero.
 174   uint64_t Outside = 0;
 175   // Chunks between the start and end chunk need to have all their bits set.
 176   uint64_t Inside = Mask;
 177
 178   // If our contiguous sequence of ones wraps around from the MSB into the LSB,
 179   // just swap indices and pretend we are materializing a contiguous sequence
 180   // of zeros surrounded by a contiguous sequence of ones.
 181   if (StartIdx > EndIdx) {
 182     std::swap(StartIdx, EndIdx);
 183     std::swap(Outside, Inside);
 184   }
 185
 186   uint64_t OrrImm = UImm;
 187   int FirstMovkIdx = NotSet;
 188   int SecondMovkIdx = NotSet;
 189
 190   // Find out which chunks we need to patch up to obtain a contiguous sequence
 191   // of ones.
 192   for (int Idx = 0; Idx < 4; ++Idx) {
 193     const uint64_t Chunk = getChunk(UImm, Idx);
 194
 195     // Check whether we are looking at a chunk which is not part of the
 196     // contiguous sequence of ones.
 197     if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) {
 198       OrrImm = updateImm(OrrImm, Idx, Outside == 0);
 199
 200       // Remember the index we need to patch.
 201       if (FirstMovkIdx == NotSet)
 202         FirstMovkIdx = Idx;
 203       else
 204         SecondMovkIdx = Idx;
 205
 206       // Check whether we are looking a chunk which is part of the contiguous
 207       // sequence of ones.
 208     } else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) {
 209       OrrImm = updateImm(OrrImm, Idx, Inside != Mask);
 210
 211       // Remember the index we need to patch.
 212       if (FirstMovkIdx == NotSet)
 213         FirstMovkIdx = Idx;
 214       else
 215         SecondMovkIdx = Idx;
 216     }
 217   }
 218   assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!");
 219
 220   // Create the ORR-immediate instruction.
 221   uint64_t Encoding = 0;
 222   AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding);
 223   Insn.push_back({ AArch64::ORRXri, 0, Encoding });
 224
 225   const bool SingleMovk = SecondMovkIdx == NotSet;
 226   Insn.push_back({ AArch64::MOVKXi, getChunk(UImm, FirstMovkIdx),
 227                    AArch64_AM::getShifterImm(AArch64_AM::LSL,
 228                                              FirstMovkIdx * 16) });
 229
 230   // Early exit in case we only need to emit a single MOVK instruction.
 231   if (SingleMovk)
 232     return true;
 233
 234   // Create the second MOVK instruction.
 235   Insn.push_back({ AArch64::MOVKXi, getChunk(UImm, SecondMovkIdx),
 236                    AArch64_AM::getShifterImm(AArch64_AM::LSL,
 237                                              SecondMovkIdx * 16) });
 238
 239   return true;
 240 }
 241
 242 static uint64_t GetRunOfOnesStartingAt(uint64_t V, uint64_t StartPosition) {
 243   uint64_t NumOnes = llvm::countr_one(V >> StartPosition);
 244
 245   uint64_t UnshiftedOnes;
 246   if (NumOnes == 64) {
 247     UnshiftedOnes = ~0ULL;
 248   } else {
 249     UnshiftedOnes = (1ULL << NumOnes) - 1;
 250   }
 251   return UnshiftedOnes << StartPosition;
 252 }
 253
 254 static uint64_t MaximallyReplicateSubImmediate(uint64_t V, uint64_t Subset) {
 255   uint64_t Result = Subset;
 256
 257   // 64, 32, 16, 8, 4, 2
 258   for (uint64_t i = 0; i < 6; ++i) {
 259     uint64_t Rotation = 1ULL << (6 - i);
 260     uint64_t Closure = Result | llvm::rotl<uint64_t>(Result, Rotation);
 261     if (Closure != (Closure & V)) {
 262       break;
 263     }
 264     Result = Closure;
 265   }
 266
 267   return Result;
 268 }
 269
 270 // Find the logical immediate that covers the most bits in RemainingBits,
 271 // allowing for additional bits to be set that were set in OriginalBits.
 272 static uint64_t maximalLogicalImmWithin(uint64_t RemainingBits,
 273                                         uint64_t OriginalBits) {
 274   // Find the first set bit.
 275   uint32_t Position = llvm::countr_zero(RemainingBits);
 276
 277   // Get the first run of set bits.
 278   uint64_t FirstRun = GetRunOfOnesStartingAt(OriginalBits, Position);
 279
 280   // Replicate the run as many times as possible, as long as the bits are set in
 281   // RemainingBits.
 282   uint64_t MaximalImm = MaximallyReplicateSubImmediate(OriginalBits, FirstRun);
 283
 284   return MaximalImm;
 285 }
 286
 287 static std::optional<std::pair<uint64_t, uint64_t>>
 288 decomposeIntoOrrOfLogicalImmediates(uint64_t UImm) {
 289   if (UImm == 0 || ~UImm == 0)
 290     return std::nullopt;
 291
 292   // Make sure we don't have a run of ones split around the rotation boundary.
 293   uint32_t InitialTrailingOnes = llvm::countr_one(UImm);
 294   uint64_t RotatedBits = llvm::rotr<uint64_t>(UImm, InitialTrailingOnes);
 295
 296   // Find the largest logical immediate that fits within the full immediate.
 297   uint64_t MaximalImm1 = maximalLogicalImmWithin(RotatedBits, RotatedBits);
 298
 299   // Remove all bits that are set by this mask.
 300   uint64_t RemainingBits = RotatedBits & ~MaximalImm1;
 301
 302   // Find the largest logical immediate covering the remaining bits, allowing
 303   // for additional bits to be set that were also set in the original immediate.
 304   uint64_t MaximalImm2 = maximalLogicalImmWithin(RemainingBits, RotatedBits);
 305
 306   // If any bits still haven't been covered, then give up.
 307   if (RemainingBits & ~MaximalImm2)
 308     return std::nullopt;
 309
 310   // Make sure to un-rotate the immediates.
 311   return std::make_pair(rotl(MaximalImm1, InitialTrailingOnes),
 312                         rotl(MaximalImm2, InitialTrailingOnes));
 313 }
 314
 315 // Attempt to expand an immediate as the ORR of a pair of logical immediates.
 316 static bool tryOrrOfLogicalImmediates(uint64_t UImm,
 317                                       SmallVectorImpl<ImmInsnModel> &Insn) {
 318   auto MaybeDecomposition = decomposeIntoOrrOfLogicalImmediates(UImm);
 319   if (MaybeDecomposition == std::nullopt)
 320     return false;
 321   uint64_t Imm1 = MaybeDecomposition->first;
 322   uint64_t Imm2 = MaybeDecomposition->second;
 323
 324   uint64_t Encoding1, Encoding2;
 325   bool Imm1Success = AArch64_AM::processLogicalImmediate(Imm1, 64, Encoding1);
 326   bool Imm2Success = AArch64_AM::processLogicalImmediate(Imm2, 64, Encoding2);
 327
 328   if (Imm1Success && Imm2Success) {
 329     // Create the ORR-immediate instructions.
 330     Insn.push_back({AArch64::ORRXri, 0, Encoding1});
 331     Insn.push_back({AArch64::ORRXri, 1, Encoding2});
 332     return true;
 333   }
 334
 335   return false;
 336 }
 337
 338 // Attempt to expand an immediate as the AND of a pair of logical immediates.
 339 // This is done by applying DeMorgan's law, under which logical immediates
 340 // are closed.
 341 static bool tryAndOfLogicalImmediates(uint64_t UImm,
 342                                       SmallVectorImpl<ImmInsnModel> &Insn) {
 343   // Apply DeMorgan's law to turn this into an ORR problem.
 344   auto MaybeDecomposition = decomposeIntoOrrOfLogicalImmediates(~UImm);
 345   if (MaybeDecomposition == std::nullopt)
 346     return false;
 347   uint64_t Imm1 = MaybeDecomposition->first;
 348   uint64_t Imm2 = MaybeDecomposition->second;
 349
 350   uint64_t Encoding1, Encoding2;
 351   bool Imm1Success = AArch64_AM::processLogicalImmediate(~Imm1, 64, Encoding1);
 352   bool Imm2Success = AArch64_AM::processLogicalImmediate(~Imm2, 64, Encoding2);
 353
 354   if (Imm1Success && Imm2Success) {
 355     // Materialize Imm1, the LHS of the AND
 356     Insn.push_back({AArch64::ORRXri, 0, Encoding1});
 357     // AND Imm1 with Imm2
 358     Insn.push_back({AArch64::ANDXri, 1, Encoding2});
 359     return true;
 360   }
 361
 362   return false;
 363 }
 364
 365 // Check whether the constant can be represented by exclusive-or of two 64-bit
 366 // logical immediates. If so, materialize it with an ORR instruction followed
 367 // by an EOR instruction.
 368 //
 369 // This encoding allows all remaining repeated byte patterns, and many repeated
 370 // 16-bit values, to be encoded without needing four instructions. It can also
 371 // represent some irregular bitmasks (although those would mostly only need
 372 // three instructions otherwise).
 373 static bool tryEorOfLogicalImmediates(uint64_t Imm,
 374                                       SmallVectorImpl<ImmInsnModel> &Insn) {
 375   // Determine the larger repetition size of the two possible logical
 376   // immediates, by finding the repetition size of Imm.
 377   unsigned BigSize = 64;
 378
 379   do {
 380     BigSize /= 2;
 381     uint64_t Mask = (1ULL << BigSize) - 1;
 382
 383     if ((Imm & Mask) != ((Imm >> BigSize) & Mask)) {
 384       BigSize *= 2;
 385       break;
 386     }
 387   } while (BigSize > 2);
 388
 389   uint64_t BigMask = ((uint64_t)-1LL) >> (64 - BigSize);
 390
 391   // Find the last bit of each run of ones, circularly. For runs which wrap
 392   // around from bit 0 to bit 63, this is the bit before the most-significant
 393   // zero, otherwise it is the least-significant bit in the run of ones.
 394   uint64_t RunStarts = Imm & ~rotl<uint64_t>(Imm, 1);
 395
 396   // Find the smaller repetition size of the two possible logical immediates by
 397   // counting the number of runs of one-bits within the BigSize-bit value. Both
 398   // sizes may be the same. The EOR may add one or subtract one from the
 399   // power-of-two count that can be represented by a logical immediate, or it
 400   // may be left unchanged.
 401   int RunsPerBigChunk = popcount(RunStarts & BigMask);
 402
 403   static const int8_t BigToSmallSizeTable[32] = {
 404       -1, -1, 0,  1,  2,  2,  -1, 3,  3,  3,  -1, -1, -1, -1, -1, 4,
 405       4,  4,  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5,
 406   };
 407
 408   int BigToSmallShift = BigToSmallSizeTable[RunsPerBigChunk];
 409
 410   // Early-exit if the big chunk couldn't be a power-of-two number of runs
 411   // EORed with another single run.
 412   if (BigToSmallShift == -1)
 413     return false;
 414
 415   unsigned SmallSize = BigSize >> BigToSmallShift;
 416
 417   // 64-bit values with a bit set every (1 << index) bits.
 418   static const uint64_t RepeatedOnesTable[] = {
 419       0xffffffffffffffff, 0x5555555555555555, 0x1111111111111111,
 420       0x0101010101010101, 0x0001000100010001, 0x0000000100000001,
 421       0x0000000000000001,
 422   };
 423
 424   // This RepeatedOnesTable lookup is a faster implementation of the division
 425   // 0xffffffffffffffff / ((1 << SmallSize) - 1), and can be thought of as
 426   // dividing the 64-bit value into fields of width SmallSize, and placing a
 427   // one in the least significant bit of each field.
 428   uint64_t SmallOnes = RepeatedOnesTable[countr_zero(SmallSize)];
 429
 430   // Now we try to find the number of ones in each of the smaller repetitions,
 431   // by looking at runs of ones in Imm. This can take three attempts, as the
 432   // EOR may have changed the length of the first two runs we find.
 433
 434   // Rotate a run of ones so we can count the number of trailing set bits.
 435   int Rotation = countr_zero(RunStarts);
 436   uint64_t RotatedImm = rotr<uint64_t>(Imm, Rotation);
 437   for (int Attempt = 0; Attempt < 3; ++Attempt) {
 438     unsigned RunLength = countr_one(RotatedImm);
 439
 440     // Construct candidate values BigImm and SmallImm, such that if these two
 441     // values are encodable, we have a solution. (SmallImm is constructed to be
 442     // encodable, but this isn't guaranteed when RunLength >= SmallSize)
 443     uint64_t SmallImm =
 444         rotl<uint64_t>((SmallOnes << RunLength) - SmallOnes, Rotation);
 445     uint64_t BigImm = Imm ^ SmallImm;
 446
 447     uint64_t BigEncoding = 0;
 448     uint64_t SmallEncoding = 0;
 449     if (AArch64_AM::processLogicalImmediate(BigImm, 64, BigEncoding) &&
 450         AArch64_AM::processLogicalImmediate(SmallImm, 64, SmallEncoding)) {
 451       Insn.push_back({AArch64::ORRXri, 0, SmallEncoding});
 452       Insn.push_back({AArch64::EORXri, 1, BigEncoding});
 453       return true;
 454     }
 455
 456     // Rotate to the next run of ones
 457     Rotation += countr_zero(rotr<uint64_t>(RunStarts, Rotation) & ~1);
 458     RotatedImm = rotr<uint64_t>(Imm, Rotation);
 459   }
 460
 461   return false;
 462 }
 463
 464 /// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to a
 465 /// MOVZ or MOVN of width BitSize followed by up to 3 MOVK instructions.
 466 static inline void expandMOVImmSimple(uint64_t Imm, unsigned BitSize,
 467                                       unsigned OneChunks, unsigned ZeroChunks,
 468                                       SmallVectorImpl<ImmInsnModel> &Insn) {
 469   const unsigned Mask = 0xFFFF;
 470
 471   // Use a MOVZ or MOVN instruction to set the high bits, followed by one or
 472   // more MOVK instructions to insert additional 16-bit portions into the
 473   // lower bits.
 474   bool isNeg = false;
 475
 476   // Use MOVN to materialize the high bits if we have more all one chunks
 477   // than all zero chunks.
 478   if (OneChunks > ZeroChunks) {
 479     isNeg = true;
 480     Imm = ~Imm;
 481   }
 482
 483   unsigned FirstOpc;
 484   if (BitSize == 32) {
 485     Imm &= (1LL << 32) - 1;
 486     FirstOpc = (isNeg ? AArch64::MOVNWi : AArch64::MOVZWi);
 487   } else {
 488     FirstOpc = (isNeg ? AArch64::MOVNXi : AArch64::MOVZXi);
 489   }
 490   unsigned Shift = 0;     // LSL amount for high bits with MOVZ/MOVN
 491   unsigned LastShift = 0; // LSL amount for last MOVK
 492   if (Imm != 0) {
 493     unsigned LZ = llvm::countl_zero(Imm);
 494     unsigned TZ = llvm::countr_zero(Imm);
 495     Shift = (TZ / 16) * 16;
 496     LastShift = ((63 - LZ) / 16) * 16;
 497   }
 498   unsigned Imm16 = (Imm >> Shift) & Mask;
 499
 500   Insn.push_back({ FirstOpc, Imm16,
 501                    AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift) });
 502
 503   if (Shift == LastShift)
 504     return;
 505
 506   // If a MOVN was used for the high bits of a negative value, flip the rest
 507   // of the bits back for use with MOVK.
 508   if (isNeg)
 509     Imm = ~Imm;
 510
 511   unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi);
 512   while (Shift < LastShift) {
 513     Shift += 16;
 514     Imm16 = (Imm >> Shift) & Mask;
 515     if (Imm16 == (isNeg ? Mask : 0))
 516       continue; // This 16-bit portion is already set correctly.
 517
 518     Insn.push_back({ Opc, Imm16,
 519                      AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift) });
 520   }
 521 }
 522
 523 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
 524 /// real move-immediate instructions to synthesize the immediate.
 525 void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize,
 526                                SmallVectorImpl<ImmInsnModel> &Insn) {
 527   const unsigned Mask = 0xFFFF;
 528
 529   // Scan the immediate and count the number of 16-bit chunks which are either
 530   // all ones or all zeros.
 531   unsigned OneChunks = 0;
 532   unsigned ZeroChunks = 0;
 533   for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
 534     const unsigned Chunk = (Imm >> Shift) & Mask;
 535     if (Chunk == Mask)
 536       OneChunks++;
 537     else if (Chunk == 0)
 538       ZeroChunks++;
 539   }
 540
 541   // Prefer MOVZ/MOVN over ORR because of the rules for the "mov" alias.
 542   if ((BitSize / 16) - OneChunks <= 1 || (BitSize / 16) - ZeroChunks <= 1) {
 543     expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
 544     return;
 545   }
 546
 547   // Try a single ORR.
 548   uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
 549   uint64_t Encoding;
 550   if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
 551     unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri);
 552     Insn.push_back({ Opc, 0, Encoding });
 553     return;
 554   }
 555
 556   // One to up three instruction sequences.
 557   //
 558   // Prefer MOVZ/MOVN followed by MOVK; it's more readable, and possibly the
 559   // fastest sequence with fast literal generation.
 560   if (OneChunks >= (BitSize / 16) - 2 || ZeroChunks >= (BitSize / 16) - 2) {
 561     expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
 562     return;
 563   }
 564
 565   assert(BitSize == 64 && "All 32-bit immediates can be expanded with a"
 566                           "MOVZ/MOVK pair");
 567
 568   // Try other two-instruction sequences.
 569
 570   // 64-bit ORR followed by MOVK.
 571   // We try to construct the ORR immediate in three different ways: either we
 572   // zero out the chunk which will be replaced, we fill the chunk which will
 573   // be replaced with ones, or we take the bit pattern from the other half of
 574   // the 64-bit immediate. This is comprehensive because of the way ORR
 575   // immediates are constructed.
 576   for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
 577     uint64_t ShiftedMask = (0xFFFFULL << Shift);
 578     uint64_t ZeroChunk = UImm & ~ShiftedMask;
 579     uint64_t OneChunk = UImm | ShiftedMask;
 580     uint64_t RotatedImm = (UImm << 32) | (UImm >> 32);
 581     uint64_t ReplicateChunk = ZeroChunk | (RotatedImm & ShiftedMask);
 582     if (AArch64_AM::processLogicalImmediate(ZeroChunk, BitSize, Encoding) ||
 583         AArch64_AM::processLogicalImmediate(OneChunk, BitSize, Encoding) ||
 584         AArch64_AM::processLogicalImmediate(ReplicateChunk, BitSize,
 585                                             Encoding)) {
 586       // Create the ORR-immediate instruction.
 587       Insn.push_back({ AArch64::ORRXri, 0, Encoding });
 588
 589       // Create the MOVK instruction.
 590       const unsigned Imm16 = getChunk(UImm, Shift / 16);
 591       Insn.push_back({ AArch64::MOVKXi, Imm16,
 592                        AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift) });
 593       return;
 594     }
 595   }
 596
 597   // Attempt to use a sequence of two ORR-immediate instructions.
 598   if (tryOrrOfLogicalImmediates(Imm, Insn))
 599     return;
 600
 601   // Attempt to use a sequence of ORR-immediate followed by AND-immediate.
 602   if (tryAndOfLogicalImmediates(Imm, Insn))
 603     return;
 604
 605   // Attempt to use a sequence of ORR-immediate followed by EOR-immediate.
 606   if (tryEorOfLogicalImmediates(UImm, Insn))
 607     return;
 608
 609   // FIXME: Add more two-instruction sequences.
 610
 611   // Three instruction sequences.
 612   //
 613   // Prefer MOVZ/MOVN followed by two MOVK; it's more readable, and possibly
 614   // the fastest sequence with fast literal generation. (If neither MOVK is
 615   // part of a fast literal generation pair, it could be slower than the
 616   // four-instruction sequence, but we won't worry about that for now.)
 617   if (OneChunks || ZeroChunks) {
 618     expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
 619     return;
 620   }
 621
 622   // Check for identical 16-bit chunks within the constant and if so materialize
 623   // them with a single ORR instruction. The remaining one or two 16-bit chunks
 624   // will be materialized with MOVK instructions.
 625   if (BitSize == 64 && tryToreplicateChunks(UImm, Insn))
 626     return;
 627
 628   // Check whether the constant contains a sequence of contiguous ones, which
 629   // might be interrupted by one or two chunks. If so, materialize the sequence
 630   // of contiguous ones with an ORR instruction. Materialize the chunks which
 631   // are either interrupting the sequence or outside of the sequence with a
 632   // MOVK instruction.
 633   if (BitSize == 64 && trySequenceOfOnes(UImm, Insn))
 634     return;
 635
 636   // We found no possible two or three instruction sequence; use the general
 637   // four-instruction sequence.
 638   expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
 639 }