1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// This file implements the targeting of the InstructionSelector class for
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
14 #include "AArch64GlobalISelUtils.h"
15 #include "AArch64InstrInfo.h"
16 #include "AArch64MachineFunctionInfo.h"
17 #include "AArch64RegisterBankInfo.h"
18 #include "AArch64RegisterInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "AArch64TargetMachine.h"
21 #include "MCTargetDesc/AArch64AddressingModes.h"
22 #include "MCTargetDesc/AArch64MCTargetDesc.h"
23 #include "llvm/BinaryFormat/Dwarf.h"
24 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
25 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
26 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
27 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
28 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
29 #include "llvm/CodeGen/GlobalISel/Utils.h"
30 #include "llvm/CodeGen/MachineBasicBlock.h"
31 #include "llvm/CodeGen/MachineConstantPool.h"
32 #include "llvm/CodeGen/MachineFrameInfo.h"
33 #include "llvm/CodeGen/MachineFunction.h"
34 #include "llvm/CodeGen/MachineInstr.h"
35 #include "llvm/CodeGen/MachineInstrBuilder.h"
36 #include "llvm/CodeGen/MachineMemOperand.h"
37 #include "llvm/CodeGen/MachineOperand.h"
38 #include "llvm/CodeGen/MachineRegisterInfo.h"
39 #include "llvm/CodeGen/TargetOpcodes.h"
40 #include "llvm/CodeGen/TargetRegisterInfo.h"
41 #include "llvm/IR/Constants.h"
42 #include "llvm/IR/DerivedTypes.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/IntrinsicsAArch64.h"
45 #include "llvm/IR/Type.h"
46 #include "llvm/Pass.h"
47 #include "llvm/Support/Debug.h"
48 #include "llvm/Support/raw_ostream.h"
51 #define DEBUG_TYPE "aarch64-isel"
54 using namespace MIPatternMatch
;
55 using namespace AArch64GISelUtils
;
58 class BlockFrequencyInfo
;
59 class ProfileSummaryInfo
;
64 #define GET_GLOBALISEL_PREDICATE_BITSET
65 #include "AArch64GenGlobalISel.inc"
66 #undef GET_GLOBALISEL_PREDICATE_BITSET
69 class AArch64InstructionSelector
: public InstructionSelector
{
71 AArch64InstructionSelector(const AArch64TargetMachine
&TM
,
72 const AArch64Subtarget
&STI
,
73 const AArch64RegisterBankInfo
&RBI
);
75 bool select(MachineInstr
&I
) override
;
76 static const char *getName() { return DEBUG_TYPE
; }
78 void setupMF(MachineFunction
&MF
, GISelKnownBits
*KB
,
79 CodeGenCoverage
*CoverageInfo
, ProfileSummaryInfo
*PSI
,
80 BlockFrequencyInfo
*BFI
) override
{
81 InstructionSelector::setupMF(MF
, KB
, CoverageInfo
, PSI
, BFI
);
84 // hasFnAttribute() is expensive to call on every BRCOND selection, so
85 // cache it here for each run of the selector.
86 ProduceNonFlagSettingCondBr
=
87 !MF
.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening
);
88 MFReturnAddr
= Register();
94 /// tblgen-erated 'select' implementation, used as the initial selector for
95 /// the patterns that don't require complex C++.
96 bool selectImpl(MachineInstr
&I
, CodeGenCoverage
&CoverageInfo
) const;
98 // A lowering phase that runs before any selection attempts.
99 // Returns true if the instruction was modified.
100 bool preISelLower(MachineInstr
&I
);
102 // An early selection function that runs before the selectImpl() call.
103 bool earlySelect(MachineInstr
&I
);
105 /// Save state that is shared between select calls, call select on \p I and
106 /// then restore the saved state. This can be used to recursively call select
107 /// within a select call.
108 bool selectAndRestoreState(MachineInstr
&I
);
110 // Do some preprocessing of G_PHIs before we begin selection.
111 void processPHIs(MachineFunction
&MF
);
113 bool earlySelectSHL(MachineInstr
&I
, MachineRegisterInfo
&MRI
);
115 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
116 bool contractCrossBankCopyIntoStore(MachineInstr
&I
,
117 MachineRegisterInfo
&MRI
);
119 bool convertPtrAddToAdd(MachineInstr
&I
, MachineRegisterInfo
&MRI
);
121 bool selectVaStartAAPCS(MachineInstr
&I
, MachineFunction
&MF
,
122 MachineRegisterInfo
&MRI
) const;
123 bool selectVaStartDarwin(MachineInstr
&I
, MachineFunction
&MF
,
124 MachineRegisterInfo
&MRI
) const;
127 /// Helper functions for selectCompareBranch.
128 bool selectCompareBranchFedByFCmp(MachineInstr
&I
, MachineInstr
&FCmp
,
129 MachineIRBuilder
&MIB
) const;
130 bool selectCompareBranchFedByICmp(MachineInstr
&I
, MachineInstr
&ICmp
,
131 MachineIRBuilder
&MIB
) const;
132 bool tryOptCompareBranchFedByICmp(MachineInstr
&I
, MachineInstr
&ICmp
,
133 MachineIRBuilder
&MIB
) const;
134 bool tryOptAndIntoCompareBranch(MachineInstr
&AndInst
, bool Invert
,
135 MachineBasicBlock
*DstMBB
,
136 MachineIRBuilder
&MIB
) const;
139 bool selectCompareBranch(MachineInstr
&I
, MachineFunction
&MF
,
140 MachineRegisterInfo
&MRI
);
142 bool selectVectorAshrLshr(MachineInstr
&I
, MachineRegisterInfo
&MRI
);
143 bool selectVectorSHL(MachineInstr
&I
, MachineRegisterInfo
&MRI
);
145 // Helper to generate an equivalent of scalar_to_vector into a new register,
146 // returned via 'Dst'.
147 MachineInstr
*emitScalarToVector(unsigned EltSize
,
148 const TargetRegisterClass
*DstRC
,
150 MachineIRBuilder
&MIRBuilder
) const;
151 /// Helper to narrow vector that was widened by emitScalarToVector.
152 /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
153 /// vector, correspondingly.
154 MachineInstr
*emitNarrowVector(Register DstReg
, Register SrcReg
,
155 MachineIRBuilder
&MIRBuilder
,
156 MachineRegisterInfo
&MRI
) const;
158 /// Emit a lane insert into \p DstReg, or a new vector register if
159 /// std::nullopt is provided.
161 /// The lane inserted into is defined by \p LaneIdx. The vector source
162 /// register is given by \p SrcReg. The register containing the element is
163 /// given by \p EltReg.
164 MachineInstr
*emitLaneInsert(std::optional
<Register
> DstReg
, Register SrcReg
,
165 Register EltReg
, unsigned LaneIdx
,
166 const RegisterBank
&RB
,
167 MachineIRBuilder
&MIRBuilder
) const;
169 /// Emit a sequence of instructions representing a constant \p CV for a
170 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
172 /// \returns the last instruction in the sequence on success, and nullptr
174 MachineInstr
*emitConstantVector(Register Dst
, Constant
*CV
,
175 MachineIRBuilder
&MIRBuilder
,
176 MachineRegisterInfo
&MRI
);
178 MachineInstr
*tryAdvSIMDModImm8(Register Dst
, unsigned DstSize
, APInt Bits
,
179 MachineIRBuilder
&MIRBuilder
);
181 MachineInstr
*tryAdvSIMDModImm16(Register Dst
, unsigned DstSize
, APInt Bits
,
182 MachineIRBuilder
&MIRBuilder
, bool Inv
);
184 MachineInstr
*tryAdvSIMDModImm32(Register Dst
, unsigned DstSize
, APInt Bits
,
185 MachineIRBuilder
&MIRBuilder
, bool Inv
);
186 MachineInstr
*tryAdvSIMDModImm64(Register Dst
, unsigned DstSize
, APInt Bits
,
187 MachineIRBuilder
&MIRBuilder
);
188 MachineInstr
*tryAdvSIMDModImm321s(Register Dst
, unsigned DstSize
, APInt Bits
,
189 MachineIRBuilder
&MIRBuilder
, bool Inv
);
190 MachineInstr
*tryAdvSIMDModImmFP(Register Dst
, unsigned DstSize
, APInt Bits
,
191 MachineIRBuilder
&MIRBuilder
);
193 bool tryOptConstantBuildVec(MachineInstr
&MI
, LLT DstTy
,
194 MachineRegisterInfo
&MRI
);
195 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
197 bool tryOptBuildVecToSubregToReg(MachineInstr
&MI
, MachineRegisterInfo
&MRI
);
198 bool selectBuildVector(MachineInstr
&I
, MachineRegisterInfo
&MRI
);
199 bool selectMergeValues(MachineInstr
&I
, MachineRegisterInfo
&MRI
);
200 bool selectUnmergeValues(MachineInstr
&I
, MachineRegisterInfo
&MRI
);
202 bool selectShuffleVector(MachineInstr
&I
, MachineRegisterInfo
&MRI
);
203 bool selectExtractElt(MachineInstr
&I
, MachineRegisterInfo
&MRI
);
204 bool selectConcatVectors(MachineInstr
&I
, MachineRegisterInfo
&MRI
);
205 bool selectSplitVectorUnmerge(MachineInstr
&I
, MachineRegisterInfo
&MRI
);
207 /// Helper function to select vector load intrinsics like
208 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
209 /// \p Opc is the opcode that the selected instruction should use.
210 /// \p NumVecs is the number of vector destinations for the instruction.
211 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
212 bool selectVectorLoadIntrinsic(unsigned Opc
, unsigned NumVecs
,
214 bool selectVectorLoadLaneIntrinsic(unsigned Opc
, unsigned NumVecs
,
216 void selectVectorStoreIntrinsic(MachineInstr
&I
, unsigned NumVecs
,
218 bool selectVectorStoreLaneIntrinsic(MachineInstr
&I
, unsigned NumVecs
,
220 bool selectIntrinsicWithSideEffects(MachineInstr
&I
,
221 MachineRegisterInfo
&MRI
);
222 bool selectIntrinsic(MachineInstr
&I
, MachineRegisterInfo
&MRI
);
223 bool selectJumpTable(MachineInstr
&I
, MachineRegisterInfo
&MRI
);
224 bool selectBrJT(MachineInstr
&I
, MachineRegisterInfo
&MRI
);
225 bool selectTLSGlobalValue(MachineInstr
&I
, MachineRegisterInfo
&MRI
);
226 bool selectPtrAuthGlobalValue(MachineInstr
&I
,
227 MachineRegisterInfo
&MRI
) const;
228 bool selectReduction(MachineInstr
&I
, MachineRegisterInfo
&MRI
);
229 bool selectMOPS(MachineInstr
&I
, MachineRegisterInfo
&MRI
);
230 bool selectUSMovFromExtend(MachineInstr
&I
, MachineRegisterInfo
&MRI
);
231 void SelectTable(MachineInstr
&I
, MachineRegisterInfo
&MRI
, unsigned NumVecs
,
232 unsigned Opc1
, unsigned Opc2
, bool isExt
);
234 bool selectIndexedExtLoad(MachineInstr
&I
, MachineRegisterInfo
&MRI
);
235 bool selectIndexedLoad(MachineInstr
&I
, MachineRegisterInfo
&MRI
);
236 bool selectIndexedStore(GIndexedStore
&I
, MachineRegisterInfo
&MRI
);
238 unsigned emitConstantPoolEntry(const Constant
*CPVal
,
239 MachineFunction
&MF
) const;
240 MachineInstr
*emitLoadFromConstantPool(const Constant
*CPVal
,
241 MachineIRBuilder
&MIRBuilder
) const;
243 // Emit a vector concat operation.
244 MachineInstr
*emitVectorConcat(std::optional
<Register
> Dst
, Register Op1
,
246 MachineIRBuilder
&MIRBuilder
) const;
248 // Emit an integer compare between LHS and RHS, which checks for Predicate.
249 MachineInstr
*emitIntegerCompare(MachineOperand
&LHS
, MachineOperand
&RHS
,
250 MachineOperand
&Predicate
,
251 MachineIRBuilder
&MIRBuilder
) const;
253 /// Emit a floating point comparison between \p LHS and \p RHS.
254 /// \p Pred if given is the intended predicate to use.
256 emitFPCompare(Register LHS
, Register RHS
, MachineIRBuilder
&MIRBuilder
,
257 std::optional
<CmpInst::Predicate
> = std::nullopt
) const;
260 emitInstr(unsigned Opcode
, std::initializer_list
<llvm::DstOp
> DstOps
,
261 std::initializer_list
<llvm::SrcOp
> SrcOps
,
262 MachineIRBuilder
&MIRBuilder
,
263 const ComplexRendererFns
&RenderFns
= std::nullopt
) const;
264 /// Helper function to emit an add or sub instruction.
266 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
267 /// in a specific order.
269 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
272 /// const std::array<std::array<unsigned, 2>, 4> Table {
273 /// {{AArch64::ADDXri, AArch64::ADDWri},
274 /// {AArch64::ADDXrs, AArch64::ADDWrs},
275 /// {AArch64::ADDXrr, AArch64::ADDWrr},
276 /// {AArch64::SUBXri, AArch64::SUBWri},
277 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
280 /// Each row in the table corresponds to a different addressing mode. Each
281 /// column corresponds to a different register size.
283 /// \attention Rows must be structured as follows:
284 /// - Row 0: The ri opcode variants
285 /// - Row 1: The rs opcode variants
286 /// - Row 2: The rr opcode variants
287 /// - Row 3: The ri opcode variants for negative immediates
288 /// - Row 4: The rx opcode variants
290 /// \attention Columns must be structured as follows:
291 /// - Column 0: The 64-bit opcode variants
292 /// - Column 1: The 32-bit opcode variants
294 /// \p Dst is the destination register of the binop to emit.
295 /// \p LHS is the left-hand operand of the binop to emit.
296 /// \p RHS is the right-hand operand of the binop to emit.
297 MachineInstr
*emitAddSub(
298 const std::array
<std::array
<unsigned, 2>, 5> &AddrModeAndSizeToOpcode
,
299 Register Dst
, MachineOperand
&LHS
, MachineOperand
&RHS
,
300 MachineIRBuilder
&MIRBuilder
) const;
301 MachineInstr
*emitADD(Register DefReg
, MachineOperand
&LHS
,
303 MachineIRBuilder
&MIRBuilder
) const;
304 MachineInstr
*emitADDS(Register Dst
, MachineOperand
&LHS
, MachineOperand
&RHS
,
305 MachineIRBuilder
&MIRBuilder
) const;
306 MachineInstr
*emitSUBS(Register Dst
, MachineOperand
&LHS
, MachineOperand
&RHS
,
307 MachineIRBuilder
&MIRBuilder
) const;
308 MachineInstr
*emitADCS(Register Dst
, MachineOperand
&LHS
, MachineOperand
&RHS
,
309 MachineIRBuilder
&MIRBuilder
) const;
310 MachineInstr
*emitSBCS(Register Dst
, MachineOperand
&LHS
, MachineOperand
&RHS
,
311 MachineIRBuilder
&MIRBuilder
) const;
312 MachineInstr
*emitCMN(MachineOperand
&LHS
, MachineOperand
&RHS
,
313 MachineIRBuilder
&MIRBuilder
) const;
314 MachineInstr
*emitTST(MachineOperand
&LHS
, MachineOperand
&RHS
,
315 MachineIRBuilder
&MIRBuilder
) const;
316 MachineInstr
*emitSelect(Register Dst
, Register LHS
, Register RHS
,
317 AArch64CC::CondCode CC
,
318 MachineIRBuilder
&MIRBuilder
) const;
319 MachineInstr
*emitExtractVectorElt(std::optional
<Register
> DstReg
,
320 const RegisterBank
&DstRB
, LLT ScalarTy
,
321 Register VecReg
, unsigned LaneIdx
,
322 MachineIRBuilder
&MIRBuilder
) const;
323 MachineInstr
*emitCSINC(Register Dst
, Register Src1
, Register Src2
,
324 AArch64CC::CondCode Pred
,
325 MachineIRBuilder
&MIRBuilder
) const;
326 /// Emit a CSet for a FP compare.
328 /// \p Dst is expected to be a 32-bit scalar register.
329 MachineInstr
*emitCSetForFCmp(Register Dst
, CmpInst::Predicate Pred
,
330 MachineIRBuilder
&MIRBuilder
) const;
332 /// Emit an instruction that sets NZCV to the carry-in expected by \p I.
333 /// Might elide the instruction if the previous instruction already sets NZCV
335 MachineInstr
*emitCarryIn(MachineInstr
&I
, Register CarryReg
);
337 /// Emit the overflow op for \p Opcode.
339 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
341 std::pair
<MachineInstr
*, AArch64CC::CondCode
>
342 emitOverflowOp(unsigned Opcode
, Register Dst
, MachineOperand
&LHS
,
343 MachineOperand
&RHS
, MachineIRBuilder
&MIRBuilder
) const;
345 bool selectOverflowOp(MachineInstr
&I
, MachineRegisterInfo
&MRI
);
347 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
348 /// In some cases this is even possible with OR operations in the expression.
349 MachineInstr
*emitConjunction(Register Val
, AArch64CC::CondCode
&OutCC
,
350 MachineIRBuilder
&MIB
) const;
351 MachineInstr
*emitConditionalComparison(Register LHS
, Register RHS
,
352 CmpInst::Predicate CC
,
353 AArch64CC::CondCode Predicate
,
354 AArch64CC::CondCode OutCC
,
355 MachineIRBuilder
&MIB
) const;
356 MachineInstr
*emitConjunctionRec(Register Val
, AArch64CC::CondCode
&OutCC
,
357 bool Negate
, Register CCOp
,
358 AArch64CC::CondCode Predicate
,
359 MachineIRBuilder
&MIB
) const;
361 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
362 /// \p IsNegative is true if the test should be "not zero".
363 /// This will also optimize the test bit instruction when possible.
364 MachineInstr
*emitTestBit(Register TestReg
, uint64_t Bit
, bool IsNegative
,
365 MachineBasicBlock
*DstMBB
,
366 MachineIRBuilder
&MIB
) const;
368 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
369 MachineInstr
*emitCBZ(Register CompareReg
, bool IsNegative
,
370 MachineBasicBlock
*DestMBB
,
371 MachineIRBuilder
&MIB
) const;
373 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
374 // We use these manually instead of using the importer since it doesn't
375 // support SDNodeXForm.
376 ComplexRendererFns
selectShiftA_32(const MachineOperand
&Root
) const;
377 ComplexRendererFns
selectShiftB_32(const MachineOperand
&Root
) const;
378 ComplexRendererFns
selectShiftA_64(const MachineOperand
&Root
) const;
379 ComplexRendererFns
selectShiftB_64(const MachineOperand
&Root
) const;
381 ComplexRendererFns
select12BitValueWithLeftShift(uint64_t Immed
) const;
382 ComplexRendererFns
selectArithImmed(MachineOperand
&Root
) const;
383 ComplexRendererFns
selectNegArithImmed(MachineOperand
&Root
) const;
385 ComplexRendererFns
selectAddrModeUnscaled(MachineOperand
&Root
,
386 unsigned Size
) const;
388 ComplexRendererFns
selectAddrModeUnscaled8(MachineOperand
&Root
) const {
389 return selectAddrModeUnscaled(Root
, 1);
391 ComplexRendererFns
selectAddrModeUnscaled16(MachineOperand
&Root
) const {
392 return selectAddrModeUnscaled(Root
, 2);
394 ComplexRendererFns
selectAddrModeUnscaled32(MachineOperand
&Root
) const {
395 return selectAddrModeUnscaled(Root
, 4);
397 ComplexRendererFns
selectAddrModeUnscaled64(MachineOperand
&Root
) const {
398 return selectAddrModeUnscaled(Root
, 8);
400 ComplexRendererFns
selectAddrModeUnscaled128(MachineOperand
&Root
) const {
401 return selectAddrModeUnscaled(Root
, 16);
404 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
405 /// from complex pattern matchers like selectAddrModeIndexed().
406 ComplexRendererFns
tryFoldAddLowIntoImm(MachineInstr
&RootDef
, unsigned Size
,
407 MachineRegisterInfo
&MRI
) const;
409 ComplexRendererFns
selectAddrModeIndexed(MachineOperand
&Root
,
410 unsigned Size
) const;
412 ComplexRendererFns
selectAddrModeIndexed(MachineOperand
&Root
) const {
413 return selectAddrModeIndexed(Root
, Width
/ 8);
417 isWorthFoldingIntoAddrMode(MachineInstr
&MI
,
418 const MachineRegisterInfo
&MRI
) const;
420 bool isWorthFoldingIntoExtendedReg(MachineInstr
&MI
,
421 const MachineRegisterInfo
&MRI
,
422 bool IsAddrOperand
) const;
424 selectAddrModeShiftedExtendXReg(MachineOperand
&Root
,
425 unsigned SizeInBytes
) const;
427 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
428 /// or not a shift + extend should be folded into an addressing mode. Returns
429 /// None when this is not profitable or possible.
431 selectExtendedSHL(MachineOperand
&Root
, MachineOperand
&Base
,
432 MachineOperand
&Offset
, unsigned SizeInBytes
,
433 bool WantsExt
) const;
434 ComplexRendererFns
selectAddrModeRegisterOffset(MachineOperand
&Root
) const;
435 ComplexRendererFns
selectAddrModeXRO(MachineOperand
&Root
,
436 unsigned SizeInBytes
) const;
438 ComplexRendererFns
selectAddrModeXRO(MachineOperand
&Root
) const {
439 return selectAddrModeXRO(Root
, Width
/ 8);
442 ComplexRendererFns
selectAddrModeWRO(MachineOperand
&Root
,
443 unsigned SizeInBytes
) const;
445 ComplexRendererFns
selectAddrModeWRO(MachineOperand
&Root
) const {
446 return selectAddrModeWRO(Root
, Width
/ 8);
449 ComplexRendererFns
selectShiftedRegister(MachineOperand
&Root
,
450 bool AllowROR
= false) const;
452 ComplexRendererFns
selectArithShiftedRegister(MachineOperand
&Root
) const {
453 return selectShiftedRegister(Root
);
456 ComplexRendererFns
selectLogicalShiftedRegister(MachineOperand
&Root
) const {
457 return selectShiftedRegister(Root
, true);
460 /// Given an extend instruction, determine the correct shift-extend type for
461 /// that instruction.
463 /// If the instruction is going to be used in a load or store, pass
464 /// \p IsLoadStore = true.
465 AArch64_AM::ShiftExtendType
466 getExtendTypeForInst(MachineInstr
&MI
, MachineRegisterInfo
&MRI
,
467 bool IsLoadStore
= false) const;
469 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
471 /// \returns Either \p Reg if no change was necessary, or the new register
472 /// created by moving \p Reg.
474 /// Note: This uses emitCopy right now.
475 Register
moveScalarRegClass(Register Reg
, const TargetRegisterClass
&RC
,
476 MachineIRBuilder
&MIB
) const;
478 ComplexRendererFns
selectArithExtendedRegister(MachineOperand
&Root
) const;
480 ComplexRendererFns
selectExtractHigh(MachineOperand
&Root
) const;
482 void renderTruncImm(MachineInstrBuilder
&MIB
, const MachineInstr
&MI
,
483 int OpIdx
= -1) const;
484 void renderLogicalImm32(MachineInstrBuilder
&MIB
, const MachineInstr
&I
,
485 int OpIdx
= -1) const;
486 void renderLogicalImm64(MachineInstrBuilder
&MIB
, const MachineInstr
&I
,
487 int OpIdx
= -1) const;
488 void renderUbsanTrap(MachineInstrBuilder
&MIB
, const MachineInstr
&MI
,
490 void renderFPImm16(MachineInstrBuilder
&MIB
, const MachineInstr
&MI
,
491 int OpIdx
= -1) const;
492 void renderFPImm32(MachineInstrBuilder
&MIB
, const MachineInstr
&MI
,
493 int OpIdx
= -1) const;
494 void renderFPImm64(MachineInstrBuilder
&MIB
, const MachineInstr
&MI
,
495 int OpIdx
= -1) const;
496 void renderFPImm32SIMDModImmType4(MachineInstrBuilder
&MIB
,
497 const MachineInstr
&MI
,
498 int OpIdx
= -1) const;
500 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
501 void materializeLargeCMVal(MachineInstr
&I
, const Value
*V
, unsigned OpFlags
);
503 // Optimization methods.
504 bool tryOptSelect(GSelect
&Sel
);
505 bool tryOptSelectConjunction(GSelect
&Sel
, MachineInstr
&CondMI
);
506 MachineInstr
*tryFoldIntegerCompare(MachineOperand
&LHS
, MachineOperand
&RHS
,
507 MachineOperand
&Predicate
,
508 MachineIRBuilder
&MIRBuilder
) const;
510 /// Return true if \p MI is a load or store of \p NumBytes bytes.
511 bool isLoadStoreOfNumBytes(const MachineInstr
&MI
, unsigned NumBytes
) const;
513 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
514 /// register zeroed out. In other words, the result of MI has been explicitly
516 bool isDef32(const MachineInstr
&MI
) const;
518 const AArch64TargetMachine
&TM
;
519 const AArch64Subtarget
&STI
;
520 const AArch64InstrInfo
&TII
;
521 const AArch64RegisterInfo
&TRI
;
522 const AArch64RegisterBankInfo
&RBI
;
524 bool ProduceNonFlagSettingCondBr
= false;
526 // Some cached values used during selection.
527 // We use LR as a live-in register, and we keep track of it here as it can be
528 // clobbered by calls.
529 Register MFReturnAddr
;
531 MachineIRBuilder MIB
;
533 #define GET_GLOBALISEL_PREDICATES_DECL
534 #include "AArch64GenGlobalISel.inc"
535 #undef GET_GLOBALISEL_PREDICATES_DECL
537 // We declare the temporaries used by selectImpl() in the class to minimize the
538 // cost of constructing placeholder values.
539 #define GET_GLOBALISEL_TEMPORARIES_DECL
540 #include "AArch64GenGlobalISel.inc"
541 #undef GET_GLOBALISEL_TEMPORARIES_DECL
544 } // end anonymous namespace
546 #define GET_GLOBALISEL_IMPL
547 #include "AArch64GenGlobalISel.inc"
548 #undef GET_GLOBALISEL_IMPL
550 AArch64InstructionSelector::AArch64InstructionSelector(
551 const AArch64TargetMachine
&TM
, const AArch64Subtarget
&STI
,
552 const AArch64RegisterBankInfo
&RBI
)
553 : TM(TM
), STI(STI
), TII(*STI
.getInstrInfo()), TRI(*STI
.getRegisterInfo()),
555 #define GET_GLOBALISEL_PREDICATES_INIT
556 #include "AArch64GenGlobalISel.inc"
557 #undef GET_GLOBALISEL_PREDICATES_INIT
558 #define GET_GLOBALISEL_TEMPORARIES_INIT
559 #include "AArch64GenGlobalISel.inc"
560 #undef GET_GLOBALISEL_TEMPORARIES_INIT
564 // FIXME: This should be target-independent, inferred from the types declared
565 // for each class in the bank.
567 /// Given a register bank, and a type, return the smallest register class that
568 /// can represent that combination.
569 static const TargetRegisterClass
*
570 getRegClassForTypeOnBank(LLT Ty
, const RegisterBank
&RB
,
571 bool GetAllRegSet
= false) {
572 if (RB
.getID() == AArch64::GPRRegBankID
) {
573 if (Ty
.getSizeInBits() <= 32)
574 return GetAllRegSet
? &AArch64::GPR32allRegClass
575 : &AArch64::GPR32RegClass
;
576 if (Ty
.getSizeInBits() == 64)
577 return GetAllRegSet
? &AArch64::GPR64allRegClass
578 : &AArch64::GPR64RegClass
;
579 if (Ty
.getSizeInBits() == 128)
580 return &AArch64::XSeqPairsClassRegClass
;
584 if (RB
.getID() == AArch64::FPRRegBankID
) {
585 switch (Ty
.getSizeInBits()) {
587 return &AArch64::FPR8RegClass
;
589 return &AArch64::FPR16RegClass
;
591 return &AArch64::FPR32RegClass
;
593 return &AArch64::FPR64RegClass
;
595 return &AArch64::FPR128RegClass
;
603 /// Given a register bank, and size in bits, return the smallest register class
604 /// that can represent that combination.
605 static const TargetRegisterClass
*
606 getMinClassForRegBank(const RegisterBank
&RB
, TypeSize SizeInBits
,
607 bool GetAllRegSet
= false) {
608 if (SizeInBits
.isScalable()) {
609 assert(RB
.getID() == AArch64::FPRRegBankID
&&
610 "Expected FPR regbank for scalable type size");
611 return &AArch64::ZPRRegClass
;
614 unsigned RegBankID
= RB
.getID();
616 if (RegBankID
== AArch64::GPRRegBankID
) {
617 assert(!SizeInBits
.isScalable() && "Unexpected scalable register size");
618 if (SizeInBits
<= 32)
619 return GetAllRegSet
? &AArch64::GPR32allRegClass
620 : &AArch64::GPR32RegClass
;
621 if (SizeInBits
== 64)
622 return GetAllRegSet
? &AArch64::GPR64allRegClass
623 : &AArch64::GPR64RegClass
;
624 if (SizeInBits
== 128)
625 return &AArch64::XSeqPairsClassRegClass
;
628 if (RegBankID
== AArch64::FPRRegBankID
) {
629 if (SizeInBits
.isScalable()) {
630 assert(SizeInBits
== TypeSize::getScalable(128) &&
631 "Unexpected scalable register size");
632 return &AArch64::ZPRRegClass
;
635 switch (SizeInBits
) {
639 return &AArch64::FPR8RegClass
;
641 return &AArch64::FPR16RegClass
;
643 return &AArch64::FPR32RegClass
;
645 return &AArch64::FPR64RegClass
;
647 return &AArch64::FPR128RegClass
;
654 /// Returns the correct subregister to use for a given register class.
655 static bool getSubRegForClass(const TargetRegisterClass
*RC
,
656 const TargetRegisterInfo
&TRI
, unsigned &SubReg
) {
657 switch (TRI
.getRegSizeInBits(*RC
)) {
659 SubReg
= AArch64::bsub
;
662 SubReg
= AArch64::hsub
;
665 if (RC
!= &AArch64::FPR32RegClass
)
666 SubReg
= AArch64::sub_32
;
668 SubReg
= AArch64::ssub
;
671 SubReg
= AArch64::dsub
;
675 dbgs() << "Couldn't find appropriate subregister for register class.");
682 /// Returns the minimum size the given register bank can hold.
683 static unsigned getMinSizeForRegBank(const RegisterBank
&RB
) {
684 switch (RB
.getID()) {
685 case AArch64::GPRRegBankID
:
687 case AArch64::FPRRegBankID
:
690 llvm_unreachable("Tried to get minimum size for unknown register bank.");
694 /// Create a REG_SEQUENCE instruction using the registers in \p Regs.
695 /// Helper function for functions like createDTuple and createQTuple.
697 /// \p RegClassIDs - The list of register class IDs available for some tuple of
698 /// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
699 /// expected to contain between 2 and 4 tuple classes.
701 /// \p SubRegs - The list of subregister classes associated with each register
702 /// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
703 /// subregister class. The index of each subregister class is expected to
704 /// correspond with the index of each register class.
706 /// \returns Either the destination register of REG_SEQUENCE instruction that
707 /// was created, or the 0th element of \p Regs if \p Regs contains a single
709 static Register
createTuple(ArrayRef
<Register
> Regs
,
710 const unsigned RegClassIDs
[],
711 const unsigned SubRegs
[], MachineIRBuilder
&MIB
) {
712 unsigned NumRegs
= Regs
.size();
715 assert(NumRegs
>= 2 && NumRegs
<= 4 &&
716 "Only support between two and 4 registers in a tuple!");
717 const TargetRegisterInfo
*TRI
= MIB
.getMF().getSubtarget().getRegisterInfo();
718 auto *DesiredClass
= TRI
->getRegClass(RegClassIDs
[NumRegs
- 2]);
720 MIB
.buildInstr(TargetOpcode::REG_SEQUENCE
, {DesiredClass
}, {});
721 for (unsigned I
= 0, E
= Regs
.size(); I
< E
; ++I
) {
722 RegSequence
.addUse(Regs
[I
]);
723 RegSequence
.addImm(SubRegs
[I
]);
725 return RegSequence
.getReg(0);
728 /// Create a tuple of D-registers using the registers in \p Regs.
729 static Register
createDTuple(ArrayRef
<Register
> Regs
, MachineIRBuilder
&MIB
) {
730 static const unsigned RegClassIDs
[] = {
731 AArch64::DDRegClassID
, AArch64::DDDRegClassID
, AArch64::DDDDRegClassID
};
732 static const unsigned SubRegs
[] = {AArch64::dsub0
, AArch64::dsub1
,
733 AArch64::dsub2
, AArch64::dsub3
};
734 return createTuple(Regs
, RegClassIDs
, SubRegs
, MIB
);
737 /// Create a tuple of Q-registers using the registers in \p Regs.
738 static Register
createQTuple(ArrayRef
<Register
> Regs
, MachineIRBuilder
&MIB
) {
739 static const unsigned RegClassIDs
[] = {
740 AArch64::QQRegClassID
, AArch64::QQQRegClassID
, AArch64::QQQQRegClassID
};
741 static const unsigned SubRegs
[] = {AArch64::qsub0
, AArch64::qsub1
,
742 AArch64::qsub2
, AArch64::qsub3
};
743 return createTuple(Regs
, RegClassIDs
, SubRegs
, MIB
);
746 static std::optional
<uint64_t> getImmedFromMO(const MachineOperand
&Root
) {
747 auto &MI
= *Root
.getParent();
748 auto &MBB
= *MI
.getParent();
749 auto &MF
= *MBB
.getParent();
750 auto &MRI
= MF
.getRegInfo();
753 Immed
= Root
.getImm();
754 else if (Root
.isCImm())
755 Immed
= Root
.getCImm()->getZExtValue();
756 else if (Root
.isReg()) {
758 getIConstantVRegValWithLookThrough(Root
.getReg(), MRI
, true);
761 Immed
= ValAndVReg
->Value
.getSExtValue();
767 /// Check whether \p I is a currently unsupported binary operation:
768 /// - it has an unsized type
769 /// - an operand is not a vreg
770 /// - all operands are not in the same bank
771 /// These are checks that should someday live in the verifier, but right now,
772 /// these are mostly limitations of the aarch64 selector.
773 static bool unsupportedBinOp(const MachineInstr
&I
,
774 const AArch64RegisterBankInfo
&RBI
,
775 const MachineRegisterInfo
&MRI
,
776 const AArch64RegisterInfo
&TRI
) {
777 LLT Ty
= MRI
.getType(I
.getOperand(0).getReg());
779 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
783 const RegisterBank
*PrevOpBank
= nullptr;
784 for (auto &MO
: I
.operands()) {
785 // FIXME: Support non-register operands.
787 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
791 // FIXME: Can generic operations have physical registers operands? If
792 // so, this will need to be taught about that, and we'll need to get the
793 // bank out of the minimal class for the register.
794 // Either way, this needs to be documented (and possibly verified).
795 if (!MO
.getReg().isVirtual()) {
796 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
800 const RegisterBank
*OpBank
= RBI
.getRegBank(MO
.getReg(), MRI
, TRI
);
802 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
806 if (PrevOpBank
&& OpBank
!= PrevOpBank
) {
807 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
815 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
816 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
817 /// and of size \p OpSize.
818 /// \returns \p GenericOpc if the combination is unsupported.
819 static unsigned selectBinaryOp(unsigned GenericOpc
, unsigned RegBankID
,
822 case AArch64::GPRRegBankID
:
824 switch (GenericOpc
) {
825 case TargetOpcode::G_SHL
:
826 return AArch64::LSLVWr
;
827 case TargetOpcode::G_LSHR
:
828 return AArch64::LSRVWr
;
829 case TargetOpcode::G_ASHR
:
830 return AArch64::ASRVWr
;
834 } else if (OpSize
== 64) {
835 switch (GenericOpc
) {
836 case TargetOpcode::G_PTR_ADD
:
837 return AArch64::ADDXrr
;
838 case TargetOpcode::G_SHL
:
839 return AArch64::LSLVXr
;
840 case TargetOpcode::G_LSHR
:
841 return AArch64::LSRVXr
;
842 case TargetOpcode::G_ASHR
:
843 return AArch64::ASRVXr
;
849 case AArch64::FPRRegBankID
:
852 switch (GenericOpc
) {
853 case TargetOpcode::G_FADD
:
854 return AArch64::FADDSrr
;
855 case TargetOpcode::G_FSUB
:
856 return AArch64::FSUBSrr
;
857 case TargetOpcode::G_FMUL
:
858 return AArch64::FMULSrr
;
859 case TargetOpcode::G_FDIV
:
860 return AArch64::FDIVSrr
;
865 switch (GenericOpc
) {
866 case TargetOpcode::G_FADD
:
867 return AArch64::FADDDrr
;
868 case TargetOpcode::G_FSUB
:
869 return AArch64::FSUBDrr
;
870 case TargetOpcode::G_FMUL
:
871 return AArch64::FMULDrr
;
872 case TargetOpcode::G_FDIV
:
873 return AArch64::FDIVDrr
;
874 case TargetOpcode::G_OR
:
875 return AArch64::ORRv8i8
;
885 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
886 /// appropriate for the (value) register bank \p RegBankID and of memory access
887 /// size \p OpSize. This returns the variant with the base+unsigned-immediate
888 /// addressing mode (e.g., LDRXui).
889 /// \returns \p GenericOpc if the combination is unsupported.
890 static unsigned selectLoadStoreUIOp(unsigned GenericOpc
, unsigned RegBankID
,
892 const bool isStore
= GenericOpc
== TargetOpcode::G_STORE
;
894 case AArch64::GPRRegBankID
:
897 return isStore
? AArch64::STRBBui
: AArch64::LDRBBui
;
899 return isStore
? AArch64::STRHHui
: AArch64::LDRHHui
;
901 return isStore
? AArch64::STRWui
: AArch64::LDRWui
;
903 return isStore
? AArch64::STRXui
: AArch64::LDRXui
;
906 case AArch64::FPRRegBankID
:
909 return isStore
? AArch64::STRBui
: AArch64::LDRBui
;
911 return isStore
? AArch64::STRHui
: AArch64::LDRHui
;
913 return isStore
? AArch64::STRSui
: AArch64::LDRSui
;
915 return isStore
? AArch64::STRDui
: AArch64::LDRDui
;
917 return isStore
? AArch64::STRQui
: AArch64::LDRQui
;
924 /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
927 /// E.g "To = COPY SrcReg:SubReg"
928 static bool copySubReg(MachineInstr
&I
, MachineRegisterInfo
&MRI
,
929 const RegisterBankInfo
&RBI
, Register SrcReg
,
930 const TargetRegisterClass
*To
, unsigned SubReg
) {
931 assert(SrcReg
.isValid() && "Expected a valid source register?");
932 assert(To
&& "Destination register class cannot be null");
933 assert(SubReg
&& "Expected a valid subregister");
935 MachineIRBuilder
MIB(I
);
937 MIB
.buildInstr(TargetOpcode::COPY
, {To
}, {}).addReg(SrcReg
, 0, SubReg
);
938 MachineOperand
&RegOp
= I
.getOperand(1);
939 RegOp
.setReg(SubRegCopy
.getReg(0));
941 // It's possible that the destination register won't be constrained. Make
942 // sure that happens.
943 if (!I
.getOperand(0).getReg().isPhysical())
944 RBI
.constrainGenericRegister(I
.getOperand(0).getReg(), *To
, MRI
);
949 /// Helper function to get the source and destination register classes for a
950 /// copy. Returns a std::pair containing the source register class for the
951 /// copy, and the destination register class for the copy. If a register class
952 /// cannot be determined, then it will be nullptr.
953 static std::pair
<const TargetRegisterClass
*, const TargetRegisterClass
*>
954 getRegClassesForCopy(MachineInstr
&I
, const TargetInstrInfo
&TII
,
955 MachineRegisterInfo
&MRI
, const TargetRegisterInfo
&TRI
,
956 const RegisterBankInfo
&RBI
) {
957 Register DstReg
= I
.getOperand(0).getReg();
958 Register SrcReg
= I
.getOperand(1).getReg();
959 const RegisterBank
&DstRegBank
= *RBI
.getRegBank(DstReg
, MRI
, TRI
);
960 const RegisterBank
&SrcRegBank
= *RBI
.getRegBank(SrcReg
, MRI
, TRI
);
962 TypeSize DstSize
= RBI
.getSizeInBits(DstReg
, MRI
, TRI
);
963 TypeSize SrcSize
= RBI
.getSizeInBits(SrcReg
, MRI
, TRI
);
965 // Special casing for cross-bank copies of s1s. We can technically represent
966 // a 1-bit value with any size of register. The minimum size for a GPR is 32
967 // bits. So, we need to put the FPR on 32 bits as well.
969 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
970 // then we can pull it into the helpers that get the appropriate class for a
971 // register bank. Or make a new helper that carries along some constraint
973 if (SrcRegBank
!= DstRegBank
&&
974 (DstSize
== TypeSize::getFixed(1) && SrcSize
== TypeSize::getFixed(1)))
975 SrcSize
= DstSize
= TypeSize::getFixed(32);
977 return {getMinClassForRegBank(SrcRegBank
, SrcSize
, true),
978 getMinClassForRegBank(DstRegBank
, DstSize
, true)};
981 // FIXME: We need some sort of API in RBI/TRI to allow generic code to
982 // constrain operands of simple instructions given a TargetRegisterClass
984 static bool selectDebugInstr(MachineInstr
&I
, MachineRegisterInfo
&MRI
,
985 const RegisterBankInfo
&RBI
) {
986 for (MachineOperand
&MO
: I
.operands()) {
989 Register Reg
= MO
.getReg();
992 if (Reg
.isPhysical())
994 LLT Ty
= MRI
.getType(Reg
);
995 const RegClassOrRegBank
&RegClassOrBank
= MRI
.getRegClassOrRegBank(Reg
);
996 const TargetRegisterClass
*RC
=
997 dyn_cast
<const TargetRegisterClass
*>(RegClassOrBank
);
999 const RegisterBank
&RB
= *cast
<const RegisterBank
*>(RegClassOrBank
);
1000 RC
= getRegClassForTypeOnBank(Ty
, RB
);
1003 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
1007 RBI
.constrainGenericRegister(Reg
, *RC
, MRI
);
1013 static bool selectCopy(MachineInstr
&I
, const TargetInstrInfo
&TII
,
1014 MachineRegisterInfo
&MRI
, const TargetRegisterInfo
&TRI
,
1015 const RegisterBankInfo
&RBI
) {
1016 Register DstReg
= I
.getOperand(0).getReg();
1017 Register SrcReg
= I
.getOperand(1).getReg();
1018 const RegisterBank
&DstRegBank
= *RBI
.getRegBank(DstReg
, MRI
, TRI
);
1019 const RegisterBank
&SrcRegBank
= *RBI
.getRegBank(SrcReg
, MRI
, TRI
);
1021 // Find the correct register classes for the source and destination registers.
1022 const TargetRegisterClass
*SrcRC
;
1023 const TargetRegisterClass
*DstRC
;
1024 std::tie(SrcRC
, DstRC
) = getRegClassesForCopy(I
, TII
, MRI
, TRI
, RBI
);
1027 LLVM_DEBUG(dbgs() << "Unexpected dest size "
1028 << RBI
.getSizeInBits(DstReg
, MRI
, TRI
) << '\n');
1032 // Is this a copy? If so, then we may need to insert a subregister copy.
1034 // Yes. Check if there's anything to fix up.
1036 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1040 const TypeSize SrcSize
= TRI
.getRegSizeInBits(*SrcRC
);
1041 const TypeSize DstSize
= TRI
.getRegSizeInBits(*DstRC
);
1044 // If the source bank doesn't support a subregister copy small enough,
1045 // then we first need to copy to the destination bank.
1046 if (getMinSizeForRegBank(SrcRegBank
) > DstSize
) {
1047 const TargetRegisterClass
*DstTempRC
=
1048 getMinClassForRegBank(DstRegBank
, SrcSize
, /* GetAllRegSet */ true);
1049 getSubRegForClass(DstRC
, TRI
, SubReg
);
1051 MachineIRBuilder
MIB(I
);
1052 auto Copy
= MIB
.buildCopy({DstTempRC
}, {SrcReg
});
1053 copySubReg(I
, MRI
, RBI
, Copy
.getReg(0), DstRC
, SubReg
);
1054 } else if (SrcSize
> DstSize
) {
1055 // If the source register is bigger than the destination we need to
1056 // perform a subregister copy.
1057 const TargetRegisterClass
*SubRegRC
=
1058 getMinClassForRegBank(SrcRegBank
, DstSize
, /* GetAllRegSet */ true);
1059 getSubRegForClass(SubRegRC
, TRI
, SubReg
);
1060 copySubReg(I
, MRI
, RBI
, SrcReg
, DstRC
, SubReg
);
1061 } else if (DstSize
> SrcSize
) {
1062 // If the destination register is bigger than the source we need to do
1063 // a promotion using SUBREG_TO_REG.
1064 const TargetRegisterClass
*PromotionRC
=
1065 getMinClassForRegBank(SrcRegBank
, DstSize
, /* GetAllRegSet */ true);
1066 getSubRegForClass(SrcRC
, TRI
, SubReg
);
1068 Register PromoteReg
= MRI
.createVirtualRegister(PromotionRC
);
1069 BuildMI(*I
.getParent(), I
, I
.getDebugLoc(),
1070 TII
.get(AArch64::SUBREG_TO_REG
), PromoteReg
)
1074 MachineOperand
&RegOp
= I
.getOperand(1);
1075 RegOp
.setReg(PromoteReg
);
1078 // If the destination is a physical register, then there's nothing to
1079 // change, so we're done.
1080 if (DstReg
.isPhysical())
1084 // No need to constrain SrcReg. It will get constrained when we hit another
1085 // of its use or its defs. Copies do not have constraints.
1086 if (!RBI
.constrainGenericRegister(DstReg
, *DstRC
, MRI
)) {
1087 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII
.getName(I
.getOpcode())
1092 // If this a GPR ZEXT that we want to just reduce down into a copy.
1093 // The sizes will be mismatched with the source < 32b but that's ok.
1094 if (I
.getOpcode() == TargetOpcode::G_ZEXT
) {
1095 I
.setDesc(TII
.get(AArch64::COPY
));
1096 assert(SrcRegBank
.getID() == AArch64::GPRRegBankID
);
1097 return selectCopy(I
, TII
, MRI
, TRI
, RBI
);
1100 I
.setDesc(TII
.get(AArch64::COPY
));
1104 static unsigned selectFPConvOpc(unsigned GenericOpc
, LLT DstTy
, LLT SrcTy
) {
1105 if (!DstTy
.isScalar() || !SrcTy
.isScalar())
1108 const unsigned DstSize
= DstTy
.getSizeInBits();
1109 const unsigned SrcSize
= SrcTy
.getSizeInBits();
1115 switch (GenericOpc
) {
1116 case TargetOpcode::G_SITOFP
:
1117 return AArch64::SCVTFUWSri
;
1118 case TargetOpcode::G_UITOFP
:
1119 return AArch64::UCVTFUWSri
;
1120 case TargetOpcode::G_FPTOSI
:
1121 return AArch64::FCVTZSUWSr
;
1122 case TargetOpcode::G_FPTOUI
:
1123 return AArch64::FCVTZUUWSr
;
1128 switch (GenericOpc
) {
1129 case TargetOpcode::G_SITOFP
:
1130 return AArch64::SCVTFUXSri
;
1131 case TargetOpcode::G_UITOFP
:
1132 return AArch64::UCVTFUXSri
;
1133 case TargetOpcode::G_FPTOSI
:
1134 return AArch64::FCVTZSUWDr
;
1135 case TargetOpcode::G_FPTOUI
:
1136 return AArch64::FCVTZUUWDr
;
1146 switch (GenericOpc
) {
1147 case TargetOpcode::G_SITOFP
:
1148 return AArch64::SCVTFUWDri
;
1149 case TargetOpcode::G_UITOFP
:
1150 return AArch64::UCVTFUWDri
;
1151 case TargetOpcode::G_FPTOSI
:
1152 return AArch64::FCVTZSUXSr
;
1153 case TargetOpcode::G_FPTOUI
:
1154 return AArch64::FCVTZUUXSr
;
1159 switch (GenericOpc
) {
1160 case TargetOpcode::G_SITOFP
:
1161 return AArch64::SCVTFUXDri
;
1162 case TargetOpcode::G_UITOFP
:
1163 return AArch64::UCVTFUXDri
;
1164 case TargetOpcode::G_FPTOSI
:
1165 return AArch64::FCVTZSUXDr
;
1166 case TargetOpcode::G_FPTOUI
:
1167 return AArch64::FCVTZUUXDr
;
1181 AArch64InstructionSelector::emitSelect(Register Dst
, Register True
,
1182 Register False
, AArch64CC::CondCode CC
,
1183 MachineIRBuilder
&MIB
) const {
1184 MachineRegisterInfo
&MRI
= *MIB
.getMRI();
1185 assert(RBI
.getRegBank(False
, MRI
, TRI
)->getID() ==
1186 RBI
.getRegBank(True
, MRI
, TRI
)->getID() &&
1187 "Expected both select operands to have the same regbank?");
1188 LLT Ty
= MRI
.getType(True
);
1191 const unsigned Size
= Ty
.getSizeInBits();
1192 assert((Size
== 32 || Size
== 64) &&
1193 "Expected 32 bit or 64 bit select only?");
1194 const bool Is32Bit
= Size
== 32;
1195 if (RBI
.getRegBank(True
, MRI
, TRI
)->getID() != AArch64::GPRRegBankID
) {
1196 unsigned Opc
= Is32Bit
? AArch64::FCSELSrrr
: AArch64::FCSELDrrr
;
1197 auto FCSel
= MIB
.buildInstr(Opc
, {Dst
}, {True
, False
}).addImm(CC
);
1198 constrainSelectedInstRegOperands(*FCSel
, TII
, TRI
, RBI
);
1202 // By default, we'll try and emit a CSEL.
1203 unsigned Opc
= Is32Bit
? AArch64::CSELWr
: AArch64::CSELXr
;
1204 bool Optimized
= false;
1205 auto TryFoldBinOpIntoSelect
= [&Opc
, Is32Bit
, &CC
, &MRI
,
1206 &Optimized
](Register
&Reg
, Register
&OtherReg
,
1213 // %sub = G_SUB 0, %x
1214 // %select = G_SELECT cc, %reg, %sub
1217 // %select = CSNEG %reg, %x, cc
1219 if (mi_match(Reg
, MRI
, m_Neg(m_Reg(MatchReg
)))) {
1220 Opc
= Is32Bit
? AArch64::CSNEGWr
: AArch64::CSNEGXr
;
1223 CC
= AArch64CC::getInvertedCondCode(CC
);
1224 std::swap(Reg
, OtherReg
);
1231 // %xor = G_XOR %x, -1
1232 // %select = G_SELECT cc, %reg, %xor
1235 // %select = CSINV %reg, %x, cc
1236 if (mi_match(Reg
, MRI
, m_Not(m_Reg(MatchReg
)))) {
1237 Opc
= Is32Bit
? AArch64::CSINVWr
: AArch64::CSINVXr
;
1240 CC
= AArch64CC::getInvertedCondCode(CC
);
1241 std::swap(Reg
, OtherReg
);
1248 // %add = G_ADD %x, 1
1249 // %select = G_SELECT cc, %reg, %add
1252 // %select = CSINC %reg, %x, cc
1253 if (mi_match(Reg
, MRI
,
1254 m_any_of(m_GAdd(m_Reg(MatchReg
), m_SpecificICst(1)),
1255 m_GPtrAdd(m_Reg(MatchReg
), m_SpecificICst(1))))) {
1256 Opc
= Is32Bit
? AArch64::CSINCWr
: AArch64::CSINCXr
;
1259 CC
= AArch64CC::getInvertedCondCode(CC
);
1260 std::swap(Reg
, OtherReg
);
1268 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1269 // true/false values are constants.
1270 // FIXME: All of these patterns already exist in tablegen. We should be
1271 // able to import these.
1272 auto TryOptSelectCst
= [&Opc
, &True
, &False
, &CC
, Is32Bit
, &MRI
,
1276 auto TrueCst
= getIConstantVRegValWithLookThrough(True
, MRI
);
1277 auto FalseCst
= getIConstantVRegValWithLookThrough(False
, MRI
);
1278 if (!TrueCst
&& !FalseCst
)
1281 Register ZReg
= Is32Bit
? AArch64::WZR
: AArch64::XZR
;
1282 if (TrueCst
&& FalseCst
) {
1283 int64_t T
= TrueCst
->Value
.getSExtValue();
1284 int64_t F
= FalseCst
->Value
.getSExtValue();
1286 if (T
== 0 && F
== 1) {
1287 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1288 Opc
= Is32Bit
? AArch64::CSINCWr
: AArch64::CSINCXr
;
1294 if (T
== 0 && F
== -1) {
1295 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1296 Opc
= Is32Bit
? AArch64::CSINVWr
: AArch64::CSINVXr
;
1304 int64_t T
= TrueCst
->Value
.getSExtValue();
1306 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1307 Opc
= Is32Bit
? AArch64::CSINCWr
: AArch64::CSINCXr
;
1310 CC
= AArch64CC::getInvertedCondCode(CC
);
1315 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1316 Opc
= Is32Bit
? AArch64::CSINVWr
: AArch64::CSINVXr
;
1319 CC
= AArch64CC::getInvertedCondCode(CC
);
1325 int64_t F
= FalseCst
->Value
.getSExtValue();
1327 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1328 Opc
= Is32Bit
? AArch64::CSINCWr
: AArch64::CSINCXr
;
1334 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1335 Opc
= Is32Bit
? AArch64::CSINVWr
: AArch64::CSINVXr
;
1343 Optimized
|= TryFoldBinOpIntoSelect(False
, True
, /*Invert = */ false);
1344 Optimized
|= TryFoldBinOpIntoSelect(True
, False
, /*Invert = */ true);
1345 Optimized
|= TryOptSelectCst();
1346 auto SelectInst
= MIB
.buildInstr(Opc
, {Dst
}, {True
, False
}).addImm(CC
);
1347 constrainSelectedInstRegOperands(*SelectInst
, TII
, TRI
, RBI
);
1348 return &*SelectInst
;
1351 static AArch64CC::CondCode
changeICMPPredToAArch64CC(CmpInst::Predicate P
) {
1354 llvm_unreachable("Unknown condition code!");
1355 case CmpInst::ICMP_NE
:
1356 return AArch64CC::NE
;
1357 case CmpInst::ICMP_EQ
:
1358 return AArch64CC::EQ
;
1359 case CmpInst::ICMP_SGT
:
1360 return AArch64CC::GT
;
1361 case CmpInst::ICMP_SGE
:
1362 return AArch64CC::GE
;
1363 case CmpInst::ICMP_SLT
:
1364 return AArch64CC::LT
;
1365 case CmpInst::ICMP_SLE
:
1366 return AArch64CC::LE
;
1367 case CmpInst::ICMP_UGT
:
1368 return AArch64CC::HI
;
1369 case CmpInst::ICMP_UGE
:
1370 return AArch64CC::HS
;
1371 case CmpInst::ICMP_ULT
:
1372 return AArch64CC::LO
;
1373 case CmpInst::ICMP_ULE
:
1374 return AArch64CC::LS
;
1378 /// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1379 static void changeFPCCToORAArch64CC(CmpInst::Predicate CC
,
1380 AArch64CC::CondCode
&CondCode
,
1381 AArch64CC::CondCode
&CondCode2
) {
1382 CondCode2
= AArch64CC::AL
;
1385 llvm_unreachable("Unknown FP condition!");
1386 case CmpInst::FCMP_OEQ
:
1387 CondCode
= AArch64CC::EQ
;
1389 case CmpInst::FCMP_OGT
:
1390 CondCode
= AArch64CC::GT
;
1392 case CmpInst::FCMP_OGE
:
1393 CondCode
= AArch64CC::GE
;
1395 case CmpInst::FCMP_OLT
:
1396 CondCode
= AArch64CC::MI
;
1398 case CmpInst::FCMP_OLE
:
1399 CondCode
= AArch64CC::LS
;
1401 case CmpInst::FCMP_ONE
:
1402 CondCode
= AArch64CC::MI
;
1403 CondCode2
= AArch64CC::GT
;
1405 case CmpInst::FCMP_ORD
:
1406 CondCode
= AArch64CC::VC
;
1408 case CmpInst::FCMP_UNO
:
1409 CondCode
= AArch64CC::VS
;
1411 case CmpInst::FCMP_UEQ
:
1412 CondCode
= AArch64CC::EQ
;
1413 CondCode2
= AArch64CC::VS
;
1415 case CmpInst::FCMP_UGT
:
1416 CondCode
= AArch64CC::HI
;
1418 case CmpInst::FCMP_UGE
:
1419 CondCode
= AArch64CC::PL
;
1421 case CmpInst::FCMP_ULT
:
1422 CondCode
= AArch64CC::LT
;
1424 case CmpInst::FCMP_ULE
:
1425 CondCode
= AArch64CC::LE
;
1427 case CmpInst::FCMP_UNE
:
1428 CondCode
= AArch64CC::NE
;
1433 /// Convert an IR fp condition code to an AArch64 CC.
1434 /// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1435 /// should be AND'ed instead of OR'ed.
1436 static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC
,
1437 AArch64CC::CondCode
&CondCode
,
1438 AArch64CC::CondCode
&CondCode2
) {
1439 CondCode2
= AArch64CC::AL
;
1442 changeFPCCToORAArch64CC(CC
, CondCode
, CondCode2
);
1443 assert(CondCode2
== AArch64CC::AL
);
1445 case CmpInst::FCMP_ONE
:
1447 // == ((a olt b) || (a ogt b))
1448 // == ((a ord b) && (a une b))
1449 CondCode
= AArch64CC::VC
;
1450 CondCode2
= AArch64CC::NE
;
1452 case CmpInst::FCMP_UEQ
:
1454 // == ((a uno b) || (a oeq b))
1455 // == ((a ule b) && (a uge b))
1456 CondCode
= AArch64CC::PL
;
1457 CondCode2
= AArch64CC::LE
;
1462 /// Return a register which can be used as a bit to test in a TB(N)Z.
1463 static Register
getTestBitReg(Register Reg
, uint64_t &Bit
, bool &Invert
,
1464 MachineRegisterInfo
&MRI
) {
1465 assert(Reg
.isValid() && "Expected valid register!");
1466 bool HasZext
= false;
1467 while (MachineInstr
*MI
= getDefIgnoringCopies(Reg
, MRI
)) {
1468 unsigned Opc
= MI
->getOpcode();
1470 if (!MI
->getOperand(0).isReg() ||
1471 !MRI
.hasOneNonDBGUse(MI
->getOperand(0).getReg()))
1474 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1476 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1477 // on the truncated x is the same as the bit number on x.
1478 if (Opc
== TargetOpcode::G_ANYEXT
|| Opc
== TargetOpcode::G_ZEXT
||
1479 Opc
== TargetOpcode::G_TRUNC
) {
1480 if (Opc
== TargetOpcode::G_ZEXT
)
1483 Register NextReg
= MI
->getOperand(1).getReg();
1484 // Did we find something worth folding?
1485 if (!NextReg
.isValid() || !MRI
.hasOneNonDBGUse(NextReg
))
1488 // NextReg is worth folding. Keep looking.
1493 // Attempt to find a suitable operation with a constant on one side.
1494 std::optional
<uint64_t> C
;
1499 case TargetOpcode::G_AND
:
1500 case TargetOpcode::G_XOR
: {
1501 TestReg
= MI
->getOperand(1).getReg();
1502 Register ConstantReg
= MI
->getOperand(2).getReg();
1503 auto VRegAndVal
= getIConstantVRegValWithLookThrough(ConstantReg
, MRI
);
1505 // AND commutes, check the other side for a constant.
1506 // FIXME: Can we canonicalize the constant so that it's always on the
1507 // same side at some point earlier?
1508 std::swap(ConstantReg
, TestReg
);
1509 VRegAndVal
= getIConstantVRegValWithLookThrough(ConstantReg
, MRI
);
1513 C
= VRegAndVal
->Value
.getZExtValue();
1515 C
= VRegAndVal
->Value
.getSExtValue();
1519 case TargetOpcode::G_ASHR
:
1520 case TargetOpcode::G_LSHR
:
1521 case TargetOpcode::G_SHL
: {
1522 TestReg
= MI
->getOperand(1).getReg();
1524 getIConstantVRegValWithLookThrough(MI
->getOperand(2).getReg(), MRI
);
1526 C
= VRegAndVal
->Value
.getSExtValue();
1531 // Didn't find a constant or viable register. Bail out of the loop.
1532 if (!C
|| !TestReg
.isValid())
1535 // We found a suitable instruction with a constant. Check to see if we can
1536 // walk through the instruction.
1538 unsigned TestRegSize
= MRI
.getType(TestReg
).getSizeInBits();
1542 case TargetOpcode::G_AND
:
1543 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1544 if ((*C
>> Bit
) & 1)
1547 case TargetOpcode::G_SHL
:
1548 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1549 // the type of the register.
1550 if (*C
<= Bit
&& (Bit
- *C
) < TestRegSize
) {
1555 case TargetOpcode::G_ASHR
:
1556 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1560 if (Bit
>= TestRegSize
)
1561 Bit
= TestRegSize
- 1;
1563 case TargetOpcode::G_LSHR
:
1564 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1565 if ((Bit
+ *C
) < TestRegSize
) {
1570 case TargetOpcode::G_XOR
:
1571 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1574 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1576 // tbz x', b -> tbnz x, b
1578 // Because x' only has the b-th bit set if x does not.
1579 if ((*C
>> Bit
) & 1)
1585 // Check if we found anything worth folding.
1586 if (!NextReg
.isValid())
1594 MachineInstr
*AArch64InstructionSelector::emitTestBit(
1595 Register TestReg
, uint64_t Bit
, bool IsNegative
, MachineBasicBlock
*DstMBB
,
1596 MachineIRBuilder
&MIB
) const {
1597 assert(TestReg
.isValid());
1598 assert(ProduceNonFlagSettingCondBr
&&
1599 "Cannot emit TB(N)Z with speculation tracking!");
1600 MachineRegisterInfo
&MRI
= *MIB
.getMRI();
1602 // Attempt to optimize the test bit by walking over instructions.
1603 TestReg
= getTestBitReg(TestReg
, Bit
, IsNegative
, MRI
);
1604 LLT Ty
= MRI
.getType(TestReg
);
1605 unsigned Size
= Ty
.getSizeInBits();
1606 assert(!Ty
.isVector() && "Expected a scalar!");
1607 assert(Bit
< 64 && "Bit is too large!");
1609 // When the test register is a 64-bit register, we have to narrow to make
1611 bool UseWReg
= Bit
< 32;
1612 unsigned NecessarySize
= UseWReg
? 32 : 64;
1613 if (Size
!= NecessarySize
)
1614 TestReg
= moveScalarRegClass(
1615 TestReg
, UseWReg
? AArch64::GPR32RegClass
: AArch64::GPR64RegClass
,
1618 static const unsigned OpcTable
[2][2] = {{AArch64::TBZX
, AArch64::TBNZX
},
1619 {AArch64::TBZW
, AArch64::TBNZW
}};
1620 unsigned Opc
= OpcTable
[UseWReg
][IsNegative
];
1622 MIB
.buildInstr(Opc
).addReg(TestReg
).addImm(Bit
).addMBB(DstMBB
);
1623 constrainSelectedInstRegOperands(*TestBitMI
, TII
, TRI
, RBI
);
1627 bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1628 MachineInstr
&AndInst
, bool Invert
, MachineBasicBlock
*DstMBB
,
1629 MachineIRBuilder
&MIB
) const {
1630 assert(AndInst
.getOpcode() == TargetOpcode::G_AND
&& "Expected G_AND only?");
1631 // Given something like this:
1633 // %x = ...Something...
1634 // %one = G_CONSTANT i64 1
1635 // %zero = G_CONSTANT i64 0
1636 // %and = G_AND %x, %one
1637 // %cmp = G_ICMP intpred(ne), %and, %zero
1638 // %cmp_trunc = G_TRUNC %cmp
1639 // G_BRCOND %cmp_trunc, %bb.3
1641 // We want to try and fold the AND into the G_BRCOND and produce either a
1642 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1644 // In this case, we'd get
1649 // Check if the AND has a constant on its RHS which we can use as a mask.
1650 // If it's a power of 2, then it's the same as checking a specific bit.
1651 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1652 auto MaybeBit
= getIConstantVRegValWithLookThrough(
1653 AndInst
.getOperand(2).getReg(), *MIB
.getMRI());
1657 int32_t Bit
= MaybeBit
->Value
.exactLogBase2();
1661 Register TestReg
= AndInst
.getOperand(1).getReg();
1664 emitTestBit(TestReg
, Bit
, Invert
, DstMBB
, MIB
);
1668 MachineInstr
*AArch64InstructionSelector::emitCBZ(Register CompareReg
,
1670 MachineBasicBlock
*DestMBB
,
1671 MachineIRBuilder
&MIB
) const {
1672 assert(ProduceNonFlagSettingCondBr
&& "CBZ does not set flags!");
1673 MachineRegisterInfo
&MRI
= *MIB
.getMRI();
1674 assert(RBI
.getRegBank(CompareReg
, MRI
, TRI
)->getID() ==
1675 AArch64::GPRRegBankID
&&
1676 "Expected GPRs only?");
1677 auto Ty
= MRI
.getType(CompareReg
);
1678 unsigned Width
= Ty
.getSizeInBits();
1679 assert(!Ty
.isVector() && "Expected scalar only?");
1680 assert(Width
<= 64 && "Expected width to be at most 64?");
1681 static const unsigned OpcTable
[2][2] = {{AArch64::CBZW
, AArch64::CBZX
},
1682 {AArch64::CBNZW
, AArch64::CBNZX
}};
1683 unsigned Opc
= OpcTable
[IsNegative
][Width
== 64];
1684 auto BranchMI
= MIB
.buildInstr(Opc
, {}, {CompareReg
}).addMBB(DestMBB
);
1685 constrainSelectedInstRegOperands(*BranchMI
, TII
, TRI
, RBI
);
1689 bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1690 MachineInstr
&I
, MachineInstr
&FCmp
, MachineIRBuilder
&MIB
) const {
1691 assert(FCmp
.getOpcode() == TargetOpcode::G_FCMP
);
1692 assert(I
.getOpcode() == TargetOpcode::G_BRCOND
);
1693 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1694 // totally clean. Some of them require two branches to implement.
1695 auto Pred
= (CmpInst::Predicate
)FCmp
.getOperand(1).getPredicate();
1696 emitFPCompare(FCmp
.getOperand(2).getReg(), FCmp
.getOperand(3).getReg(), MIB
,
1698 AArch64CC::CondCode CC1
, CC2
;
1699 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate
>(Pred
), CC1
, CC2
);
1700 MachineBasicBlock
*DestMBB
= I
.getOperand(1).getMBB();
1701 MIB
.buildInstr(AArch64::Bcc
, {}, {}).addImm(CC1
).addMBB(DestMBB
);
1702 if (CC2
!= AArch64CC::AL
)
1703 MIB
.buildInstr(AArch64::Bcc
, {}, {}).addImm(CC2
).addMBB(DestMBB
);
1704 I
.eraseFromParent();
1708 bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1709 MachineInstr
&I
, MachineInstr
&ICmp
, MachineIRBuilder
&MIB
) const {
1710 assert(ICmp
.getOpcode() == TargetOpcode::G_ICMP
);
1711 assert(I
.getOpcode() == TargetOpcode::G_BRCOND
);
1712 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1714 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1715 // instructions will not be produced, as they are conditional branch
1716 // instructions that do not set flags.
1717 if (!ProduceNonFlagSettingCondBr
)
1720 MachineRegisterInfo
&MRI
= *MIB
.getMRI();
1721 MachineBasicBlock
*DestMBB
= I
.getOperand(1).getMBB();
1723 static_cast<CmpInst::Predicate
>(ICmp
.getOperand(1).getPredicate());
1724 Register LHS
= ICmp
.getOperand(2).getReg();
1725 Register RHS
= ICmp
.getOperand(3).getReg();
1727 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1728 auto VRegAndVal
= getIConstantVRegValWithLookThrough(RHS
, MRI
);
1729 MachineInstr
*AndInst
= getOpcodeDef(TargetOpcode::G_AND
, LHS
, MRI
);
1731 // When we can emit a TB(N)Z, prefer that.
1733 // Handle non-commutative condition codes first.
1734 // Note that we don't want to do this when we have a G_AND because it can
1735 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1736 if (VRegAndVal
&& !AndInst
) {
1737 int64_t C
= VRegAndVal
->Value
.getSExtValue();
1739 // When we have a greater-than comparison, we can just test if the msb is
1741 if (C
== -1 && Pred
== CmpInst::ICMP_SGT
) {
1742 uint64_t Bit
= MRI
.getType(LHS
).getSizeInBits() - 1;
1743 emitTestBit(LHS
, Bit
, /*IsNegative = */ false, DestMBB
, MIB
);
1744 I
.eraseFromParent();
1748 // When we have a less than comparison, we can just test if the msb is not
1750 if (C
== 0 && Pred
== CmpInst::ICMP_SLT
) {
1751 uint64_t Bit
= MRI
.getType(LHS
).getSizeInBits() - 1;
1752 emitTestBit(LHS
, Bit
, /*IsNegative = */ true, DestMBB
, MIB
);
1753 I
.eraseFromParent();
1757 // Inversely, if we have a signed greater-than-or-equal comparison to zero,
1758 // we can test if the msb is zero.
1759 if (C
== 0 && Pred
== CmpInst::ICMP_SGE
) {
1760 uint64_t Bit
= MRI
.getType(LHS
).getSizeInBits() - 1;
1761 emitTestBit(LHS
, Bit
, /*IsNegative = */ false, DestMBB
, MIB
);
1762 I
.eraseFromParent();
1767 // Attempt to handle commutative condition codes. Right now, that's only
1769 if (ICmpInst::isEquality(Pred
)) {
1771 std::swap(RHS
, LHS
);
1772 VRegAndVal
= getIConstantVRegValWithLookThrough(RHS
, MRI
);
1773 AndInst
= getOpcodeDef(TargetOpcode::G_AND
, LHS
, MRI
);
1776 if (VRegAndVal
&& VRegAndVal
->Value
== 0) {
1777 // If there's a G_AND feeding into this branch, try to fold it away by
1778 // emitting a TB(N)Z instead.
1780 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1781 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1782 // would be redundant.
1784 tryOptAndIntoCompareBranch(
1785 *AndInst
, /*Invert = */ Pred
== CmpInst::ICMP_NE
, DestMBB
, MIB
)) {
1786 I
.eraseFromParent();
1790 // Otherwise, try to emit a CB(N)Z instead.
1791 auto LHSTy
= MRI
.getType(LHS
);
1792 if (!LHSTy
.isVector() && LHSTy
.getSizeInBits() <= 64) {
1793 emitCBZ(LHS
, /*IsNegative = */ Pred
== CmpInst::ICMP_NE
, DestMBB
, MIB
);
1794 I
.eraseFromParent();
1803 bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1804 MachineInstr
&I
, MachineInstr
&ICmp
, MachineIRBuilder
&MIB
) const {
1805 assert(ICmp
.getOpcode() == TargetOpcode::G_ICMP
);
1806 assert(I
.getOpcode() == TargetOpcode::G_BRCOND
);
1807 if (tryOptCompareBranchFedByICmp(I
, ICmp
, MIB
))
1810 // Couldn't optimize. Emit a compare + a Bcc.
1811 MachineBasicBlock
*DestMBB
= I
.getOperand(1).getMBB();
1812 auto PredOp
= ICmp
.getOperand(1);
1813 emitIntegerCompare(ICmp
.getOperand(2), ICmp
.getOperand(3), PredOp
, MIB
);
1814 const AArch64CC::CondCode CC
= changeICMPPredToAArch64CC(
1815 static_cast<CmpInst::Predicate
>(PredOp
.getPredicate()));
1816 MIB
.buildInstr(AArch64::Bcc
, {}, {}).addImm(CC
).addMBB(DestMBB
);
1817 I
.eraseFromParent();
1821 bool AArch64InstructionSelector::selectCompareBranch(
1822 MachineInstr
&I
, MachineFunction
&MF
, MachineRegisterInfo
&MRI
) {
1823 Register CondReg
= I
.getOperand(0).getReg();
1824 MachineInstr
*CCMI
= MRI
.getVRegDef(CondReg
);
1825 // Try to select the G_BRCOND using whatever is feeding the condition if
1827 unsigned CCMIOpc
= CCMI
->getOpcode();
1828 if (CCMIOpc
== TargetOpcode::G_FCMP
)
1829 return selectCompareBranchFedByFCmp(I
, *CCMI
, MIB
);
1830 if (CCMIOpc
== TargetOpcode::G_ICMP
)
1831 return selectCompareBranchFedByICmp(I
, *CCMI
, MIB
);
1833 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1834 // instructions will not be produced, as they are conditional branch
1835 // instructions that do not set flags.
1836 if (ProduceNonFlagSettingCondBr
) {
1837 emitTestBit(CondReg
, /*Bit = */ 0, /*IsNegative = */ true,
1838 I
.getOperand(1).getMBB(), MIB
);
1839 I
.eraseFromParent();
1843 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1845 MIB
.buildInstr(AArch64::ANDSWri
, {LLT::scalar(32)}, {CondReg
}).addImm(1);
1846 constrainSelectedInstRegOperands(*TstMI
, TII
, TRI
, RBI
);
1847 auto Bcc
= MIB
.buildInstr(AArch64::Bcc
)
1848 .addImm(AArch64CC::NE
)
1849 .addMBB(I
.getOperand(1).getMBB());
1850 I
.eraseFromParent();
1851 return constrainSelectedInstRegOperands(*Bcc
, TII
, TRI
, RBI
);
1854 /// Returns the element immediate value of a vector shift operand if found.
1855 /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1856 static std::optional
<int64_t> getVectorShiftImm(Register Reg
,
1857 MachineRegisterInfo
&MRI
) {
1858 assert(MRI
.getType(Reg
).isVector() && "Expected a *vector* shift operand");
1859 MachineInstr
*OpMI
= MRI
.getVRegDef(Reg
);
1860 return getAArch64VectorSplatScalar(*OpMI
, MRI
);
1863 /// Matches and returns the shift immediate value for a SHL instruction given
1864 /// a shift operand.
1865 static std::optional
<int64_t> getVectorSHLImm(LLT SrcTy
, Register Reg
,
1866 MachineRegisterInfo
&MRI
) {
1867 std::optional
<int64_t> ShiftImm
= getVectorShiftImm(Reg
, MRI
);
1869 return std::nullopt
;
1870 // Check the immediate is in range for a SHL.
1871 int64_t Imm
= *ShiftImm
;
1873 return std::nullopt
;
1874 switch (SrcTy
.getElementType().getSizeInBits()) {
1876 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1877 return std::nullopt
;
1880 return std::nullopt
;
1884 return std::nullopt
;
1888 return std::nullopt
;
1892 return std::nullopt
;
1898 bool AArch64InstructionSelector::selectVectorSHL(MachineInstr
&I
,
1899 MachineRegisterInfo
&MRI
) {
1900 assert(I
.getOpcode() == TargetOpcode::G_SHL
);
1901 Register DstReg
= I
.getOperand(0).getReg();
1902 const LLT Ty
= MRI
.getType(DstReg
);
1903 Register Src1Reg
= I
.getOperand(1).getReg();
1904 Register Src2Reg
= I
.getOperand(2).getReg();
1909 // Check if we have a vector of constants on RHS that we can select as the
1911 std::optional
<int64_t> ImmVal
= getVectorSHLImm(Ty
, Src2Reg
, MRI
);
1914 if (Ty
== LLT::fixed_vector(2, 64)) {
1915 Opc
= ImmVal
? AArch64::SHLv2i64_shift
: AArch64::USHLv2i64
;
1916 } else if (Ty
== LLT::fixed_vector(4, 32)) {
1917 Opc
= ImmVal
? AArch64::SHLv4i32_shift
: AArch64::USHLv4i32
;
1918 } else if (Ty
== LLT::fixed_vector(2, 32)) {
1919 Opc
= ImmVal
? AArch64::SHLv2i32_shift
: AArch64::USHLv2i32
;
1920 } else if (Ty
== LLT::fixed_vector(4, 16)) {
1921 Opc
= ImmVal
? AArch64::SHLv4i16_shift
: AArch64::USHLv4i16
;
1922 } else if (Ty
== LLT::fixed_vector(8, 16)) {
1923 Opc
= ImmVal
? AArch64::SHLv8i16_shift
: AArch64::USHLv8i16
;
1924 } else if (Ty
== LLT::fixed_vector(16, 8)) {
1925 Opc
= ImmVal
? AArch64::SHLv16i8_shift
: AArch64::USHLv16i8
;
1926 } else if (Ty
== LLT::fixed_vector(8, 8)) {
1927 Opc
= ImmVal
? AArch64::SHLv8i8_shift
: AArch64::USHLv8i8
;
1929 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1933 auto Shl
= MIB
.buildInstr(Opc
, {DstReg
}, {Src1Reg
});
1935 Shl
.addImm(*ImmVal
);
1937 Shl
.addUse(Src2Reg
);
1938 constrainSelectedInstRegOperands(*Shl
, TII
, TRI
, RBI
);
1939 I
.eraseFromParent();
1943 bool AArch64InstructionSelector::selectVectorAshrLshr(
1944 MachineInstr
&I
, MachineRegisterInfo
&MRI
) {
1945 assert(I
.getOpcode() == TargetOpcode::G_ASHR
||
1946 I
.getOpcode() == TargetOpcode::G_LSHR
);
1947 Register DstReg
= I
.getOperand(0).getReg();
1948 const LLT Ty
= MRI
.getType(DstReg
);
1949 Register Src1Reg
= I
.getOperand(1).getReg();
1950 Register Src2Reg
= I
.getOperand(2).getReg();
1955 bool IsASHR
= I
.getOpcode() == TargetOpcode::G_ASHR
;
1957 // We expect the immediate case to be lowered in the PostLegalCombiner to
1958 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1960 // There is not a shift right register instruction, but the shift left
1961 // register instruction takes a signed value, where negative numbers specify a
1965 unsigned NegOpc
= 0;
1966 const TargetRegisterClass
*RC
=
1967 getRegClassForTypeOnBank(Ty
, RBI
.getRegBank(AArch64::FPRRegBankID
));
1968 if (Ty
== LLT::fixed_vector(2, 64)) {
1969 Opc
= IsASHR
? AArch64::SSHLv2i64
: AArch64::USHLv2i64
;
1970 NegOpc
= AArch64::NEGv2i64
;
1971 } else if (Ty
== LLT::fixed_vector(4, 32)) {
1972 Opc
= IsASHR
? AArch64::SSHLv4i32
: AArch64::USHLv4i32
;
1973 NegOpc
= AArch64::NEGv4i32
;
1974 } else if (Ty
== LLT::fixed_vector(2, 32)) {
1975 Opc
= IsASHR
? AArch64::SSHLv2i32
: AArch64::USHLv2i32
;
1976 NegOpc
= AArch64::NEGv2i32
;
1977 } else if (Ty
== LLT::fixed_vector(4, 16)) {
1978 Opc
= IsASHR
? AArch64::SSHLv4i16
: AArch64::USHLv4i16
;
1979 NegOpc
= AArch64::NEGv4i16
;
1980 } else if (Ty
== LLT::fixed_vector(8, 16)) {
1981 Opc
= IsASHR
? AArch64::SSHLv8i16
: AArch64::USHLv8i16
;
1982 NegOpc
= AArch64::NEGv8i16
;
1983 } else if (Ty
== LLT::fixed_vector(16, 8)) {
1984 Opc
= IsASHR
? AArch64::SSHLv16i8
: AArch64::USHLv16i8
;
1985 NegOpc
= AArch64::NEGv16i8
;
1986 } else if (Ty
== LLT::fixed_vector(8, 8)) {
1987 Opc
= IsASHR
? AArch64::SSHLv8i8
: AArch64::USHLv8i8
;
1988 NegOpc
= AArch64::NEGv8i8
;
1990 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1994 auto Neg
= MIB
.buildInstr(NegOpc
, {RC
}, {Src2Reg
});
1995 constrainSelectedInstRegOperands(*Neg
, TII
, TRI
, RBI
);
1996 auto SShl
= MIB
.buildInstr(Opc
, {DstReg
}, {Src1Reg
, Neg
});
1997 constrainSelectedInstRegOperands(*SShl
, TII
, TRI
, RBI
);
1998 I
.eraseFromParent();
2002 bool AArch64InstructionSelector::selectVaStartAAPCS(
2003 MachineInstr
&I
, MachineFunction
&MF
, MachineRegisterInfo
&MRI
) const {
2005 if (STI
.isCallingConvWin64(MF
.getFunction().getCallingConv(),
2006 MF
.getFunction().isVarArg()))
2009 // The layout of the va_list struct is specified in the AArch64 Procedure Call
2010 // Standard, section 10.1.5.
2012 const AArch64FunctionInfo
*FuncInfo
= MF
.getInfo
<AArch64FunctionInfo
>();
2013 const unsigned PtrSize
= STI
.isTargetILP32() ? 4 : 8;
2014 const auto *PtrRegClass
=
2015 STI
.isTargetILP32() ? &AArch64::GPR32RegClass
: &AArch64::GPR64RegClass
;
2017 const MCInstrDesc
&MCIDAddAddr
=
2018 TII
.get(STI
.isTargetILP32() ? AArch64::ADDWri
: AArch64::ADDXri
);
2019 const MCInstrDesc
&MCIDStoreAddr
=
2020 TII
.get(STI
.isTargetILP32() ? AArch64::STRWui
: AArch64::STRXui
);
2023 * typedef struct va_list {
2024 * void * stack; // next stack param
2025 * void * gr_top; // end of GP arg reg save area
2026 * void * vr_top; // end of FP/SIMD arg reg save area
2027 * int gr_offs; // offset from gr_top to next GP register arg
2028 * int vr_offs; // offset from vr_top to next FP/SIMD register arg
2031 const auto VAList
= I
.getOperand(0).getReg();
2033 // Our current offset in bytes from the va_list struct (VAList).
2034 unsigned OffsetBytes
= 0;
2036 // Helper function to store (FrameIndex + Imm) to VAList at offset OffsetBytes
2037 // and increment OffsetBytes by PtrSize.
2038 const auto PushAddress
= [&](const int FrameIndex
, const int64_t Imm
) {
2039 const Register Top
= MRI
.createVirtualRegister(PtrRegClass
);
2040 auto MIB
= BuildMI(*I
.getParent(), I
, I
.getDebugLoc(), MCIDAddAddr
)
2042 .addFrameIndex(FrameIndex
)
2045 constrainSelectedInstRegOperands(*MIB
, TII
, TRI
, RBI
);
2047 const auto *MMO
= *I
.memoperands_begin();
2048 MIB
= BuildMI(*I
.getParent(), I
, I
.getDebugLoc(), MCIDStoreAddr
)
2051 .addImm(OffsetBytes
/ PtrSize
)
2052 .addMemOperand(MF
.getMachineMemOperand(
2053 MMO
->getPointerInfo().getWithOffset(OffsetBytes
),
2054 MachineMemOperand::MOStore
, PtrSize
, MMO
->getBaseAlign()));
2055 constrainSelectedInstRegOperands(*MIB
, TII
, TRI
, RBI
);
2057 OffsetBytes
+= PtrSize
;
2060 // void* stack at offset 0
2061 PushAddress(FuncInfo
->getVarArgsStackIndex(), 0);
2063 // void* gr_top at offset 8 (4 on ILP32)
2064 const unsigned GPRSize
= FuncInfo
->getVarArgsGPRSize();
2065 PushAddress(FuncInfo
->getVarArgsGPRIndex(), GPRSize
);
2067 // void* vr_top at offset 16 (8 on ILP32)
2068 const unsigned FPRSize
= FuncInfo
->getVarArgsFPRSize();
2069 PushAddress(FuncInfo
->getVarArgsFPRIndex(), FPRSize
);
2071 // Helper function to store a 4-byte integer constant to VAList at offset
2072 // OffsetBytes, and increment OffsetBytes by 4.
2073 const auto PushIntConstant
= [&](const int32_t Value
) {
2074 constexpr int IntSize
= 4;
2075 const Register Temp
= MRI
.createVirtualRegister(&AArch64::GPR32RegClass
);
2077 BuildMI(*I
.getParent(), I
, I
.getDebugLoc(), TII
.get(AArch64::MOVi32imm
))
2080 constrainSelectedInstRegOperands(*MIB
, TII
, TRI
, RBI
);
2082 const auto *MMO
= *I
.memoperands_begin();
2083 MIB
= BuildMI(*I
.getParent(), I
, I
.getDebugLoc(), TII
.get(AArch64::STRWui
))
2086 .addImm(OffsetBytes
/ IntSize
)
2087 .addMemOperand(MF
.getMachineMemOperand(
2088 MMO
->getPointerInfo().getWithOffset(OffsetBytes
),
2089 MachineMemOperand::MOStore
, IntSize
, MMO
->getBaseAlign()));
2090 constrainSelectedInstRegOperands(*MIB
, TII
, TRI
, RBI
);
2091 OffsetBytes
+= IntSize
;
2094 // int gr_offs at offset 24 (12 on ILP32)
2095 PushIntConstant(-static_cast<int32_t>(GPRSize
));
2097 // int vr_offs at offset 28 (16 on ILP32)
2098 PushIntConstant(-static_cast<int32_t>(FPRSize
));
2100 assert(OffsetBytes
== (STI
.isTargetILP32() ? 20 : 32) && "Unexpected offset");
2102 I
.eraseFromParent();
2106 bool AArch64InstructionSelector::selectVaStartDarwin(
2107 MachineInstr
&I
, MachineFunction
&MF
, MachineRegisterInfo
&MRI
) const {
2108 AArch64FunctionInfo
*FuncInfo
= MF
.getInfo
<AArch64FunctionInfo
>();
2109 Register ListReg
= I
.getOperand(0).getReg();
2111 Register ArgsAddrReg
= MRI
.createVirtualRegister(&AArch64::GPR64RegClass
);
2113 int FrameIdx
= FuncInfo
->getVarArgsStackIndex();
2114 if (MF
.getSubtarget
<AArch64Subtarget
>().isCallingConvWin64(
2115 MF
.getFunction().getCallingConv(), MF
.getFunction().isVarArg())) {
2116 FrameIdx
= FuncInfo
->getVarArgsGPRSize() > 0
2117 ? FuncInfo
->getVarArgsGPRIndex()
2118 : FuncInfo
->getVarArgsStackIndex();
2122 BuildMI(*I
.getParent(), I
, I
.getDebugLoc(), TII
.get(AArch64::ADDXri
))
2123 .addDef(ArgsAddrReg
)
2124 .addFrameIndex(FrameIdx
)
2128 constrainSelectedInstRegOperands(*MIB
, TII
, TRI
, RBI
);
2130 MIB
= BuildMI(*I
.getParent(), I
, I
.getDebugLoc(), TII
.get(AArch64::STRXui
))
2131 .addUse(ArgsAddrReg
)
2134 .addMemOperand(*I
.memoperands_begin());
2136 constrainSelectedInstRegOperands(*MIB
, TII
, TRI
, RBI
);
2137 I
.eraseFromParent();
2141 void AArch64InstructionSelector::materializeLargeCMVal(
2142 MachineInstr
&I
, const Value
*V
, unsigned OpFlags
) {
2143 MachineBasicBlock
&MBB
= *I
.getParent();
2144 MachineFunction
&MF
= *MBB
.getParent();
2145 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
2147 auto MovZ
= MIB
.buildInstr(AArch64::MOVZXi
, {&AArch64::GPR64RegClass
}, {});
2148 MovZ
->addOperand(MF
, I
.getOperand(1));
2149 MovZ
->getOperand(1).setTargetFlags(OpFlags
| AArch64II::MO_G0
|
2151 MovZ
->addOperand(MF
, MachineOperand::CreateImm(0));
2152 constrainSelectedInstRegOperands(*MovZ
, TII
, TRI
, RBI
);
2154 auto BuildMovK
= [&](Register SrcReg
, unsigned char Flags
, unsigned Offset
,
2155 Register ForceDstReg
) {
2156 Register DstReg
= ForceDstReg
2158 : MRI
.createVirtualRegister(&AArch64::GPR64RegClass
);
2159 auto MovI
= MIB
.buildInstr(AArch64::MOVKXi
).addDef(DstReg
).addUse(SrcReg
);
2160 if (auto *GV
= dyn_cast
<GlobalValue
>(V
)) {
2161 MovI
->addOperand(MF
, MachineOperand::CreateGA(
2162 GV
, MovZ
->getOperand(1).getOffset(), Flags
));
2165 MF
, MachineOperand::CreateBA(cast
<BlockAddress
>(V
),
2166 MovZ
->getOperand(1).getOffset(), Flags
));
2168 MovI
->addOperand(MF
, MachineOperand::CreateImm(Offset
));
2169 constrainSelectedInstRegOperands(*MovI
, TII
, TRI
, RBI
);
2172 Register DstReg
= BuildMovK(MovZ
.getReg(0),
2173 AArch64II::MO_G1
| AArch64II::MO_NC
, 16, 0);
2174 DstReg
= BuildMovK(DstReg
, AArch64II::MO_G2
| AArch64II::MO_NC
, 32, 0);
2175 BuildMovK(DstReg
, AArch64II::MO_G3
, 48, I
.getOperand(0).getReg());
2178 bool AArch64InstructionSelector::preISelLower(MachineInstr
&I
) {
2179 MachineBasicBlock
&MBB
= *I
.getParent();
2180 MachineFunction
&MF
= *MBB
.getParent();
2181 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
2183 switch (I
.getOpcode()) {
2184 case TargetOpcode::G_STORE
: {
2185 bool Changed
= contractCrossBankCopyIntoStore(I
, MRI
);
2186 MachineOperand
&SrcOp
= I
.getOperand(0);
2187 if (MRI
.getType(SrcOp
.getReg()).isPointer()) {
2188 // Allow matching with imported patterns for stores of pointers. Unlike
2189 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2191 auto Copy
= MIB
.buildCopy(LLT::scalar(64), SrcOp
);
2192 Register NewSrc
= Copy
.getReg(0);
2193 SrcOp
.setReg(NewSrc
);
2194 RBI
.constrainGenericRegister(NewSrc
, AArch64::GPR64RegClass
, MRI
);
2199 case TargetOpcode::G_PTR_ADD
:
2200 return convertPtrAddToAdd(I
, MRI
);
2201 case TargetOpcode::G_LOAD
: {
2202 // For scalar loads of pointers, we try to convert the dest type from p0
2203 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2204 // conversion, this should be ok because all users should have been
2205 // selected already, so the type doesn't matter for them.
2206 Register DstReg
= I
.getOperand(0).getReg();
2207 const LLT DstTy
= MRI
.getType(DstReg
);
2208 if (!DstTy
.isPointer())
2210 MRI
.setType(DstReg
, LLT::scalar(64));
2213 case AArch64::G_DUP
: {
2214 // Convert the type from p0 to s64 to help selection.
2215 LLT DstTy
= MRI
.getType(I
.getOperand(0).getReg());
2216 if (!DstTy
.isPointerVector())
2218 auto NewSrc
= MIB
.buildCopy(LLT::scalar(64), I
.getOperand(1).getReg());
2219 MRI
.setType(I
.getOperand(0).getReg(),
2220 DstTy
.changeElementType(LLT::scalar(64)));
2221 MRI
.setRegClass(NewSrc
.getReg(0), &AArch64::GPR64RegClass
);
2222 I
.getOperand(1).setReg(NewSrc
.getReg(0));
2225 case AArch64::G_INSERT_VECTOR_ELT
: {
2226 // Convert the type from p0 to s64 to help selection.
2227 LLT DstTy
= MRI
.getType(I
.getOperand(0).getReg());
2228 LLT SrcVecTy
= MRI
.getType(I
.getOperand(1).getReg());
2229 if (!SrcVecTy
.isPointerVector())
2231 auto NewSrc
= MIB
.buildCopy(LLT::scalar(64), I
.getOperand(2).getReg());
2232 MRI
.setType(I
.getOperand(1).getReg(),
2233 DstTy
.changeElementType(LLT::scalar(64)));
2234 MRI
.setType(I
.getOperand(0).getReg(),
2235 DstTy
.changeElementType(LLT::scalar(64)));
2236 MRI
.setRegClass(NewSrc
.getReg(0), &AArch64::GPR64RegClass
);
2237 I
.getOperand(2).setReg(NewSrc
.getReg(0));
2240 case TargetOpcode::G_UITOFP
:
2241 case TargetOpcode::G_SITOFP
: {
2242 // If both source and destination regbanks are FPR, then convert the opcode
2243 // to G_SITOF so that the importer can select it to an fpr variant.
2244 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2246 Register SrcReg
= I
.getOperand(1).getReg();
2247 LLT SrcTy
= MRI
.getType(SrcReg
);
2248 LLT DstTy
= MRI
.getType(I
.getOperand(0).getReg());
2249 if (SrcTy
.isVector() || SrcTy
.getSizeInBits() != DstTy
.getSizeInBits())
2252 if (RBI
.getRegBank(SrcReg
, MRI
, TRI
)->getID() == AArch64::FPRRegBankID
) {
2253 if (I
.getOpcode() == TargetOpcode::G_SITOFP
)
2254 I
.setDesc(TII
.get(AArch64::G_SITOF
));
2256 I
.setDesc(TII
.get(AArch64::G_UITOF
));
2266 /// This lowering tries to look for G_PTR_ADD instructions and then converts
2267 /// them to a standard G_ADD with a COPY on the source.
2269 /// The motivation behind this is to expose the add semantics to the imported
2270 /// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2271 /// because the selector works bottom up, uses before defs. By the time we
2272 /// end up trying to select a G_PTR_ADD, we should have already attempted to
2273 /// fold this into addressing modes and were therefore unsuccessful.
2274 bool AArch64InstructionSelector::convertPtrAddToAdd(
2275 MachineInstr
&I
, MachineRegisterInfo
&MRI
) {
2276 assert(I
.getOpcode() == TargetOpcode::G_PTR_ADD
&& "Expected G_PTR_ADD");
2277 Register DstReg
= I
.getOperand(0).getReg();
2278 Register AddOp1Reg
= I
.getOperand(1).getReg();
2279 const LLT PtrTy
= MRI
.getType(DstReg
);
2280 if (PtrTy
.getAddressSpace() != 0)
2283 const LLT CastPtrTy
=
2284 PtrTy
.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2285 auto PtrToInt
= MIB
.buildPtrToInt(CastPtrTy
, AddOp1Reg
);
2286 // Set regbanks on the registers.
2287 if (PtrTy
.isVector())
2288 MRI
.setRegBank(PtrToInt
.getReg(0), RBI
.getRegBank(AArch64::FPRRegBankID
));
2290 MRI
.setRegBank(PtrToInt
.getReg(0), RBI
.getRegBank(AArch64::GPRRegBankID
));
2292 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2293 // %dst(intty) = G_ADD %intbase, off
2294 I
.setDesc(TII
.get(TargetOpcode::G_ADD
));
2295 MRI
.setType(DstReg
, CastPtrTy
);
2296 I
.getOperand(1).setReg(PtrToInt
.getReg(0));
2297 if (!select(*PtrToInt
)) {
2298 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2302 // Also take the opportunity here to try to do some optimization.
2303 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2304 Register NegatedReg
;
2305 if (!mi_match(I
.getOperand(2).getReg(), MRI
, m_Neg(m_Reg(NegatedReg
))))
2307 I
.getOperand(2).setReg(NegatedReg
);
2308 I
.setDesc(TII
.get(TargetOpcode::G_SUB
));
2312 bool AArch64InstructionSelector::earlySelectSHL(MachineInstr
&I
,
2313 MachineRegisterInfo
&MRI
) {
2314 // We try to match the immediate variant of LSL, which is actually an alias
2315 // for a special case of UBFM. Otherwise, we fall back to the imported
2316 // selector which will match the register variant.
2317 assert(I
.getOpcode() == TargetOpcode::G_SHL
&& "unexpected op");
2318 const auto &MO
= I
.getOperand(2);
2319 auto VRegAndVal
= getIConstantVRegVal(MO
.getReg(), MRI
);
2323 const LLT DstTy
= MRI
.getType(I
.getOperand(0).getReg());
2324 if (DstTy
.isVector())
2326 bool Is64Bit
= DstTy
.getSizeInBits() == 64;
2327 auto Imm1Fn
= Is64Bit
? selectShiftA_64(MO
) : selectShiftA_32(MO
);
2328 auto Imm2Fn
= Is64Bit
? selectShiftB_64(MO
) : selectShiftB_32(MO
);
2330 if (!Imm1Fn
|| !Imm2Fn
)
2334 MIB
.buildInstr(Is64Bit
? AArch64::UBFMXri
: AArch64::UBFMWri
,
2335 {I
.getOperand(0).getReg()}, {I
.getOperand(1).getReg()});
2337 for (auto &RenderFn
: *Imm1Fn
)
2339 for (auto &RenderFn
: *Imm2Fn
)
2342 I
.eraseFromParent();
2343 return constrainSelectedInstRegOperands(*NewI
, TII
, TRI
, RBI
);
2346 bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2347 MachineInstr
&I
, MachineRegisterInfo
&MRI
) {
2348 assert(I
.getOpcode() == TargetOpcode::G_STORE
&& "Expected G_STORE");
2349 // If we're storing a scalar, it doesn't matter what register bank that
2350 // scalar is on. All that matters is the size.
2352 // So, if we see something like this (with a 32-bit scalar as an example):
2354 // %x:gpr(s32) = ... something ...
2355 // %y:fpr(s32) = COPY %x:gpr(s32)
2356 // G_STORE %y:fpr(s32)
2358 // We can fix this up into something like this:
2360 // G_STORE %x:gpr(s32)
2362 // And then continue the selection process normally.
2363 Register DefDstReg
= getSrcRegIgnoringCopies(I
.getOperand(0).getReg(), MRI
);
2364 if (!DefDstReg
.isValid())
2366 LLT DefDstTy
= MRI
.getType(DefDstReg
);
2367 Register StoreSrcReg
= I
.getOperand(0).getReg();
2368 LLT StoreSrcTy
= MRI
.getType(StoreSrcReg
);
2370 // If we get something strange like a physical register, then we shouldn't
2372 if (!DefDstTy
.isValid())
2375 // Are the source and dst types the same size?
2376 if (DefDstTy
.getSizeInBits() != StoreSrcTy
.getSizeInBits())
2379 if (RBI
.getRegBank(StoreSrcReg
, MRI
, TRI
) ==
2380 RBI
.getRegBank(DefDstReg
, MRI
, TRI
))
2383 // We have a cross-bank copy, which is entering a store. Let's fold it.
2384 I
.getOperand(0).setReg(DefDstReg
);
2388 bool AArch64InstructionSelector::earlySelect(MachineInstr
&I
) {
2389 assert(I
.getParent() && "Instruction should be in a basic block!");
2390 assert(I
.getParent()->getParent() && "Instruction should be in a function!");
2392 MachineBasicBlock
&MBB
= *I
.getParent();
2393 MachineFunction
&MF
= *MBB
.getParent();
2394 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
2396 switch (I
.getOpcode()) {
2397 case AArch64::G_DUP
: {
2398 // Before selecting a DUP instruction, check if it is better selected as a
2399 // MOV or load from a constant pool.
2400 Register Src
= I
.getOperand(1).getReg();
2401 auto ValAndVReg
= getAnyConstantVRegValWithLookThrough(Src
, MRI
);
2404 LLVMContext
&Ctx
= MF
.getFunction().getContext();
2405 Register Dst
= I
.getOperand(0).getReg();
2406 auto *CV
= ConstantDataVector::getSplat(
2407 MRI
.getType(Dst
).getNumElements(),
2409 Type::getIntNTy(Ctx
, MRI
.getType(Dst
).getScalarSizeInBits()),
2410 ValAndVReg
->Value
.trunc(MRI
.getType(Dst
).getScalarSizeInBits())));
2411 if (!emitConstantVector(Dst
, CV
, MIB
, MRI
))
2413 I
.eraseFromParent();
2416 case TargetOpcode::G_SEXT
:
2417 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2418 // over a normal extend.
2419 if (selectUSMovFromExtend(I
, MRI
))
2422 case TargetOpcode::G_BR
:
2424 case TargetOpcode::G_SHL
:
2425 return earlySelectSHL(I
, MRI
);
2426 case TargetOpcode::G_CONSTANT
: {
2427 bool IsZero
= false;
2428 if (I
.getOperand(1).isCImm())
2429 IsZero
= I
.getOperand(1).getCImm()->isZero();
2430 else if (I
.getOperand(1).isImm())
2431 IsZero
= I
.getOperand(1).getImm() == 0;
2436 Register DefReg
= I
.getOperand(0).getReg();
2437 LLT Ty
= MRI
.getType(DefReg
);
2438 if (Ty
.getSizeInBits() == 64) {
2439 I
.getOperand(1).ChangeToRegister(AArch64::XZR
, false);
2440 RBI
.constrainGenericRegister(DefReg
, AArch64::GPR64RegClass
, MRI
);
2441 } else if (Ty
.getSizeInBits() == 32) {
2442 I
.getOperand(1).ChangeToRegister(AArch64::WZR
, false);
2443 RBI
.constrainGenericRegister(DefReg
, AArch64::GPR32RegClass
, MRI
);
2447 I
.setDesc(TII
.get(TargetOpcode::COPY
));
2451 case TargetOpcode::G_ADD
: {
2452 // Check if this is being fed by a G_ICMP on either side.
2454 // (cmp pred, x, y) + z
2456 // In the above case, when the cmp is true, we increment z by 1. So, we can
2457 // fold the add into the cset for the cmp by using cinc.
2459 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2460 Register AddDst
= I
.getOperand(0).getReg();
2461 Register AddLHS
= I
.getOperand(1).getReg();
2462 Register AddRHS
= I
.getOperand(2).getReg();
2463 // Only handle scalars.
2464 LLT Ty
= MRI
.getType(AddLHS
);
2467 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2469 unsigned Size
= Ty
.getSizeInBits();
2470 if (Size
!= 32 && Size
!= 64)
2472 auto MatchCmp
= [&](Register Reg
) -> MachineInstr
* {
2473 if (!MRI
.hasOneNonDBGUse(Reg
))
2475 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2478 return getOpcodeDef(TargetOpcode::G_ICMP
, Reg
, MRI
);
2479 // We model scalar compares using 32-bit destinations right now.
2480 // If it's a 64-bit compare, it'll have 64-bit sources.
2482 if (!mi_match(Reg
, MRI
,
2483 m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt
))))))
2485 auto *Cmp
= getOpcodeDef(TargetOpcode::G_ICMP
, ZExt
, MRI
);
2487 MRI
.getType(Cmp
->getOperand(2).getReg()).getSizeInBits() != 64)
2492 // z + (cmp pred, x, y)
2493 MachineInstr
*Cmp
= MatchCmp(AddRHS
);
2495 // (cmp pred, x, y) + z
2496 std::swap(AddLHS
, AddRHS
);
2497 Cmp
= MatchCmp(AddRHS
);
2501 auto &PredOp
= Cmp
->getOperand(1);
2502 auto Pred
= static_cast<CmpInst::Predicate
>(PredOp
.getPredicate());
2503 const AArch64CC::CondCode InvCC
=
2504 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred
));
2505 MIB
.setInstrAndDebugLoc(I
);
2506 emitIntegerCompare(/*LHS=*/Cmp
->getOperand(2),
2507 /*RHS=*/Cmp
->getOperand(3), PredOp
, MIB
);
2508 emitCSINC(/*Dst=*/AddDst
, /*Src =*/AddLHS
, /*Src2=*/AddLHS
, InvCC
, MIB
);
2509 I
.eraseFromParent();
2512 case TargetOpcode::G_OR
: {
2513 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2514 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2515 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2516 Register Dst
= I
.getOperand(0).getReg();
2517 LLT Ty
= MRI
.getType(Dst
);
2522 unsigned Size
= Ty
.getSizeInBits();
2523 if (Size
!= 32 && Size
!= 64)
2532 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc
), m_ICst(ShiftImm
))),
2533 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc
), m_ICst(MaskImm
))))))
2536 if (ShiftImm
> Size
|| ((1ULL << ShiftImm
) - 1ULL) != uint64_t(MaskImm
))
2539 int64_t Immr
= Size
- ShiftImm
;
2540 int64_t Imms
= Size
- ShiftImm
- 1;
2541 unsigned Opc
= Size
== 32 ? AArch64::BFMWri
: AArch64::BFMXri
;
2542 emitInstr(Opc
, {Dst
}, {MaskSrc
, ShiftSrc
, Immr
, Imms
}, MIB
);
2543 I
.eraseFromParent();
2546 case TargetOpcode::G_FENCE
: {
2547 if (I
.getOperand(1).getImm() == 0)
2548 BuildMI(MBB
, I
, MIMetadata(I
), TII
.get(TargetOpcode::MEMBARRIER
));
2550 BuildMI(MBB
, I
, MIMetadata(I
), TII
.get(AArch64::DMB
))
2551 .addImm(I
.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2552 I
.eraseFromParent();
2560 bool AArch64InstructionSelector::select(MachineInstr
&I
) {
2561 assert(I
.getParent() && "Instruction should be in a basic block!");
2562 assert(I
.getParent()->getParent() && "Instruction should be in a function!");
2564 MachineBasicBlock
&MBB
= *I
.getParent();
2565 MachineFunction
&MF
= *MBB
.getParent();
2566 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
2568 const AArch64Subtarget
*Subtarget
= &MF
.getSubtarget
<AArch64Subtarget
>();
2569 if (Subtarget
->requiresStrictAlign()) {
2570 // We don't support this feature yet.
2571 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2575 MIB
.setInstrAndDebugLoc(I
);
2577 unsigned Opcode
= I
.getOpcode();
2578 // G_PHI requires same handling as PHI
2579 if (!I
.isPreISelOpcode() || Opcode
== TargetOpcode::G_PHI
) {
2580 // Certain non-generic instructions also need some special handling.
2582 if (Opcode
== TargetOpcode::LOAD_STACK_GUARD
)
2583 return constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
2585 if (Opcode
== TargetOpcode::PHI
|| Opcode
== TargetOpcode::G_PHI
) {
2586 const Register DefReg
= I
.getOperand(0).getReg();
2587 const LLT DefTy
= MRI
.getType(DefReg
);
2589 const RegClassOrRegBank
&RegClassOrBank
=
2590 MRI
.getRegClassOrRegBank(DefReg
);
2592 const TargetRegisterClass
*DefRC
=
2593 dyn_cast
<const TargetRegisterClass
*>(RegClassOrBank
);
2595 if (!DefTy
.isValid()) {
2596 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2599 const RegisterBank
&RB
= *cast
<const RegisterBank
*>(RegClassOrBank
);
2600 DefRC
= getRegClassForTypeOnBank(DefTy
, RB
);
2602 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2607 I
.setDesc(TII
.get(TargetOpcode::PHI
));
2609 return RBI
.constrainGenericRegister(DefReg
, *DefRC
, MRI
);
2613 return selectCopy(I
, TII
, MRI
, TRI
, RBI
);
2615 if (I
.isDebugInstr())
2616 return selectDebugInstr(I
, MRI
, RBI
);
2622 if (I
.getNumOperands() != I
.getNumExplicitOperands()) {
2624 dbgs() << "Generic instruction has unexpected implicit operands\n");
2628 // Try to do some lowering before we start instruction selecting. These
2629 // lowerings are purely transformations on the input G_MIR and so selection
2630 // must continue after any modification of the instruction.
2631 if (preISelLower(I
)) {
2632 Opcode
= I
.getOpcode(); // The opcode may have been modified, refresh it.
2635 // There may be patterns where the importer can't deal with them optimally,
2636 // but does select it to a suboptimal sequence so our custom C++ selection
2637 // code later never has a chance to work on it. Therefore, we have an early
2638 // selection attempt here to give priority to certain selection routines
2639 // over the imported ones.
2643 if (selectImpl(I
, *CoverageInfo
))
2647 I
.getOperand(0).isReg() ? MRI
.getType(I
.getOperand(0).getReg()) : LLT
{};
2650 case TargetOpcode::G_SBFX
:
2651 case TargetOpcode::G_UBFX
: {
2652 static const unsigned OpcTable
[2][2] = {
2653 {AArch64::UBFMWri
, AArch64::UBFMXri
},
2654 {AArch64::SBFMWri
, AArch64::SBFMXri
}};
2655 bool IsSigned
= Opcode
== TargetOpcode::G_SBFX
;
2656 unsigned Size
= Ty
.getSizeInBits();
2657 unsigned Opc
= OpcTable
[IsSigned
][Size
== 64];
2659 getIConstantVRegValWithLookThrough(I
.getOperand(2).getReg(), MRI
);
2660 assert(Cst1
&& "Should have gotten a constant for src 1?");
2662 getIConstantVRegValWithLookThrough(I
.getOperand(3).getReg(), MRI
);
2663 assert(Cst2
&& "Should have gotten a constant for src 2?");
2664 auto LSB
= Cst1
->Value
.getZExtValue();
2665 auto Width
= Cst2
->Value
.getZExtValue();
2667 MIB
.buildInstr(Opc
, {I
.getOperand(0)}, {I
.getOperand(1)})
2669 .addImm(LSB
+ Width
- 1);
2670 I
.eraseFromParent();
2671 return constrainSelectedInstRegOperands(*BitfieldInst
, TII
, TRI
, RBI
);
2673 case TargetOpcode::G_BRCOND
:
2674 return selectCompareBranch(I
, MF
, MRI
);
2676 case TargetOpcode::G_BRINDIRECT
: {
2677 const Function
&Fn
= MF
.getFunction();
2678 if (std::optional
<uint16_t> BADisc
=
2679 STI
.getPtrAuthBlockAddressDiscriminatorIfEnabled(Fn
)) {
2680 auto MI
= MIB
.buildInstr(AArch64::BRA
, {}, {I
.getOperand(0).getReg()});
2681 MI
.addImm(AArch64PACKey::IA
);
2683 MI
.addReg(/*AddrDisc=*/AArch64::XZR
);
2684 I
.eraseFromParent();
2685 return constrainSelectedInstRegOperands(*MI
, TII
, TRI
, RBI
);
2687 I
.setDesc(TII
.get(AArch64::BR
));
2688 return constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
2691 case TargetOpcode::G_BRJT
:
2692 return selectBrJT(I
, MRI
);
2694 case AArch64::G_ADD_LOW
: {
2695 // This op may have been separated from it's ADRP companion by the localizer
2696 // or some other code motion pass. Given that many CPUs will try to
2697 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2698 // which will later be expanded into an ADRP+ADD pair after scheduling.
2699 MachineInstr
*BaseMI
= MRI
.getVRegDef(I
.getOperand(1).getReg());
2700 if (BaseMI
->getOpcode() != AArch64::ADRP
) {
2701 I
.setDesc(TII
.get(AArch64::ADDXri
));
2702 I
.addOperand(MachineOperand::CreateImm(0));
2703 return constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
2705 assert(TM
.getCodeModel() == CodeModel::Small
&&
2706 "Expected small code model");
2707 auto Op1
= BaseMI
->getOperand(1);
2708 auto Op2
= I
.getOperand(2);
2709 auto MovAddr
= MIB
.buildInstr(AArch64::MOVaddr
, {I
.getOperand(0)}, {})
2710 .addGlobalAddress(Op1
.getGlobal(), Op1
.getOffset(),
2711 Op1
.getTargetFlags())
2712 .addGlobalAddress(Op2
.getGlobal(), Op2
.getOffset(),
2713 Op2
.getTargetFlags());
2714 I
.eraseFromParent();
2715 return constrainSelectedInstRegOperands(*MovAddr
, TII
, TRI
, RBI
);
2718 case TargetOpcode::G_FCONSTANT
:
2719 case TargetOpcode::G_CONSTANT
: {
2720 const bool isFP
= Opcode
== TargetOpcode::G_FCONSTANT
;
2722 const LLT s8
= LLT::scalar(8);
2723 const LLT s16
= LLT::scalar(16);
2724 const LLT s32
= LLT::scalar(32);
2725 const LLT s64
= LLT::scalar(64);
2726 const LLT s128
= LLT::scalar(128);
2727 const LLT p0
= LLT::pointer(0, 64);
2729 const Register DefReg
= I
.getOperand(0).getReg();
2730 const LLT DefTy
= MRI
.getType(DefReg
);
2731 const unsigned DefSize
= DefTy
.getSizeInBits();
2732 const RegisterBank
&RB
= *RBI
.getRegBank(DefReg
, MRI
, TRI
);
2734 // FIXME: Redundant check, but even less readable when factored out.
2736 if (Ty
!= s16
&& Ty
!= s32
&& Ty
!= s64
&& Ty
!= s128
) {
2737 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2738 << " constant, expected: " << s16
<< " or " << s32
2739 << " or " << s64
<< " or " << s128
<< '\n');
2743 if (RB
.getID() != AArch64::FPRRegBankID
) {
2744 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2745 << " constant on bank: " << RB
2746 << ", expected: FPR\n");
2750 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2751 // can be sure tablegen works correctly and isn't rescued by this code.
2752 // 0.0 is not covered by tablegen for FP128. So we will handle this
2753 // scenario in the code here.
2754 if (DefSize
!= 128 && I
.getOperand(1).getFPImm()->isExactlyValue(0.0))
2757 // s32 and s64 are covered by tablegen.
2758 if (Ty
!= p0
&& Ty
!= s8
&& Ty
!= s16
) {
2759 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2760 << " constant, expected: " << s32
<< ", " << s64
2761 << ", or " << p0
<< '\n');
2765 if (RB
.getID() != AArch64::GPRRegBankID
) {
2766 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2767 << " constant on bank: " << RB
2768 << ", expected: GPR\n");
2774 const TargetRegisterClass
&FPRRC
= *getRegClassForTypeOnBank(DefTy
, RB
);
2775 // For 16, 64, and 128b values, emit a constant pool load.
2778 llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2781 bool OptForSize
= shouldOptForSize(&MF
);
2782 const auto &TLI
= MF
.getSubtarget().getTargetLowering();
2783 // If TLI says that this fpimm is illegal, then we'll expand to a
2784 // constant pool load.
2785 if (TLI
->isFPImmLegal(I
.getOperand(1).getFPImm()->getValueAPF(),
2786 EVT::getFloatingPointVT(DefSize
), OptForSize
))
2792 auto *FPImm
= I
.getOperand(1).getFPImm();
2793 auto *LoadMI
= emitLoadFromConstantPool(FPImm
, MIB
);
2795 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2798 MIB
.buildCopy({DefReg
}, {LoadMI
->getOperand(0).getReg()});
2799 I
.eraseFromParent();
2800 return RBI
.constrainGenericRegister(DefReg
, FPRRC
, MRI
);
2804 assert((DefSize
== 32 || DefSize
== 64) && "Unexpected const def size");
2805 // Either emit a FMOV, or emit a copy to emit a normal mov.
2806 const Register DefGPRReg
= MRI
.createVirtualRegister(
2807 DefSize
== 32 ? &AArch64::GPR32RegClass
: &AArch64::GPR64RegClass
);
2808 MachineOperand
&RegOp
= I
.getOperand(0);
2809 RegOp
.setReg(DefGPRReg
);
2810 MIB
.setInsertPt(MIB
.getMBB(), std::next(I
.getIterator()));
2811 MIB
.buildCopy({DefReg
}, {DefGPRReg
});
2813 if (!RBI
.constrainGenericRegister(DefReg
, FPRRC
, MRI
)) {
2814 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2818 MachineOperand
&ImmOp
= I
.getOperand(1);
2819 // FIXME: Is going through int64_t always correct?
2820 ImmOp
.ChangeToImmediate(
2821 ImmOp
.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2822 } else if (I
.getOperand(1).isCImm()) {
2823 uint64_t Val
= I
.getOperand(1).getCImm()->getZExtValue();
2824 I
.getOperand(1).ChangeToImmediate(Val
);
2825 } else if (I
.getOperand(1).isImm()) {
2826 uint64_t Val
= I
.getOperand(1).getImm();
2827 I
.getOperand(1).ChangeToImmediate(Val
);
2830 const unsigned MovOpc
=
2831 DefSize
== 64 ? AArch64::MOVi64imm
: AArch64::MOVi32imm
;
2832 I
.setDesc(TII
.get(MovOpc
));
2833 constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
2836 case TargetOpcode::G_EXTRACT
: {
2837 Register DstReg
= I
.getOperand(0).getReg();
2838 Register SrcReg
= I
.getOperand(1).getReg();
2839 LLT SrcTy
= MRI
.getType(SrcReg
);
2840 LLT DstTy
= MRI
.getType(DstReg
);
2842 unsigned SrcSize
= SrcTy
.getSizeInBits();
2844 if (SrcTy
.getSizeInBits() > 64) {
2845 // This should be an extract of an s128, which is like a vector extract.
2846 if (SrcTy
.getSizeInBits() != 128)
2848 // Only support extracting 64 bits from an s128 at the moment.
2849 if (DstTy
.getSizeInBits() != 64)
2852 unsigned Offset
= I
.getOperand(2).getImm();
2853 if (Offset
% 64 != 0)
2856 // Check we have the right regbank always.
2857 const RegisterBank
&SrcRB
= *RBI
.getRegBank(SrcReg
, MRI
, TRI
);
2858 const RegisterBank
&DstRB
= *RBI
.getRegBank(DstReg
, MRI
, TRI
);
2859 assert(SrcRB
.getID() == DstRB
.getID() && "Wrong extract regbank!");
2861 if (SrcRB
.getID() == AArch64::GPRRegBankID
) {
2863 MIB
.buildInstr(TargetOpcode::COPY
, {DstReg
}, {})
2865 Offset
== 0 ? AArch64::sube64
: AArch64::subo64
);
2866 constrainOperandRegClass(MF
, TRI
, MRI
, TII
, RBI
, *NewI
,
2867 AArch64::GPR64RegClass
, NewI
->getOperand(0));
2868 I
.eraseFromParent();
2872 // Emit the same code as a vector extract.
2873 // Offset must be a multiple of 64.
2874 unsigned LaneIdx
= Offset
/ 64;
2875 MachineInstr
*Extract
= emitExtractVectorElt(
2876 DstReg
, DstRB
, LLT::scalar(64), SrcReg
, LaneIdx
, MIB
);
2879 I
.eraseFromParent();
2883 I
.setDesc(TII
.get(SrcSize
== 64 ? AArch64::UBFMXri
: AArch64::UBFMWri
));
2884 MachineInstrBuilder(MF
, I
).addImm(I
.getOperand(2).getImm() +
2885 Ty
.getSizeInBits() - 1);
2888 assert(SrcSize
== 32 && DstTy
.getSizeInBits() == 16 &&
2889 "unexpected G_EXTRACT types");
2890 return constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
2893 DstReg
= MRI
.createGenericVirtualRegister(LLT::scalar(64));
2894 MIB
.setInsertPt(MIB
.getMBB(), std::next(I
.getIterator()));
2895 MIB
.buildInstr(TargetOpcode::COPY
, {I
.getOperand(0).getReg()}, {})
2896 .addReg(DstReg
, 0, AArch64::sub_32
);
2897 RBI
.constrainGenericRegister(I
.getOperand(0).getReg(),
2898 AArch64::GPR32RegClass
, MRI
);
2899 I
.getOperand(0).setReg(DstReg
);
2901 return constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
2904 case TargetOpcode::G_INSERT
: {
2905 LLT SrcTy
= MRI
.getType(I
.getOperand(2).getReg());
2906 LLT DstTy
= MRI
.getType(I
.getOperand(0).getReg());
2907 unsigned DstSize
= DstTy
.getSizeInBits();
2908 // Larger inserts are vectors, same-size ones should be something else by
2909 // now (split up or turned into COPYs).
2910 if (Ty
.getSizeInBits() > 64 || SrcTy
.getSizeInBits() > 32)
2913 I
.setDesc(TII
.get(DstSize
== 64 ? AArch64::BFMXri
: AArch64::BFMWri
));
2914 unsigned LSB
= I
.getOperand(3).getImm();
2915 unsigned Width
= MRI
.getType(I
.getOperand(2).getReg()).getSizeInBits();
2916 I
.getOperand(3).setImm((DstSize
- LSB
) % DstSize
);
2917 MachineInstrBuilder(MF
, I
).addImm(Width
- 1);
2920 assert(DstSize
== 32 && SrcTy
.getSizeInBits() == 16 &&
2921 "unexpected G_INSERT types");
2922 return constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
2925 Register SrcReg
= MRI
.createGenericVirtualRegister(LLT::scalar(64));
2926 BuildMI(MBB
, I
.getIterator(), I
.getDebugLoc(),
2927 TII
.get(AArch64::SUBREG_TO_REG
))
2930 .addUse(I
.getOperand(2).getReg())
2931 .addImm(AArch64::sub_32
);
2932 RBI
.constrainGenericRegister(I
.getOperand(2).getReg(),
2933 AArch64::GPR32RegClass
, MRI
);
2934 I
.getOperand(2).setReg(SrcReg
);
2936 return constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
2938 case TargetOpcode::G_FRAME_INDEX
: {
2939 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2940 if (Ty
!= LLT::pointer(0, 64)) {
2941 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2942 << ", expected: " << LLT::pointer(0, 64) << '\n');
2945 I
.setDesc(TII
.get(AArch64::ADDXri
));
2947 // MOs for a #0 shifted immediate.
2948 I
.addOperand(MachineOperand::CreateImm(0));
2949 I
.addOperand(MachineOperand::CreateImm(0));
2951 return constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
2954 case TargetOpcode::G_GLOBAL_VALUE
: {
2955 const GlobalValue
*GV
= nullptr;
2957 if (I
.getOperand(1).isSymbol()) {
2958 OpFlags
= I
.getOperand(1).getTargetFlags();
2959 // Currently only used by "RtLibUseGOT".
2960 assert(OpFlags
== AArch64II::MO_GOT
);
2962 GV
= I
.getOperand(1).getGlobal();
2963 if (GV
->isThreadLocal())
2964 return selectTLSGlobalValue(I
, MRI
);
2965 OpFlags
= STI
.ClassifyGlobalReference(GV
, TM
);
2968 if (OpFlags
& AArch64II::MO_GOT
) {
2969 I
.setDesc(TII
.get(MF
.getInfo
<AArch64FunctionInfo
>()->hasELFSignedGOT()
2970 ? AArch64::LOADgotAUTH
2971 : AArch64::LOADgot
));
2972 I
.getOperand(1).setTargetFlags(OpFlags
);
2973 } else if (TM
.getCodeModel() == CodeModel::Large
&&
2974 !TM
.isPositionIndependent()) {
2975 // Materialize the global using movz/movk instructions.
2976 materializeLargeCMVal(I
, GV
, OpFlags
);
2977 I
.eraseFromParent();
2979 } else if (TM
.getCodeModel() == CodeModel::Tiny
) {
2980 I
.setDesc(TII
.get(AArch64::ADR
));
2981 I
.getOperand(1).setTargetFlags(OpFlags
);
2983 I
.setDesc(TII
.get(AArch64::MOVaddr
));
2984 I
.getOperand(1).setTargetFlags(OpFlags
| AArch64II::MO_PAGE
);
2985 MachineInstrBuilder
MIB(MF
, I
);
2986 MIB
.addGlobalAddress(GV
, I
.getOperand(1).getOffset(),
2987 OpFlags
| AArch64II::MO_PAGEOFF
| AArch64II::MO_NC
);
2989 return constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
2992 case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE
:
2993 return selectPtrAuthGlobalValue(I
, MRI
);
2995 case TargetOpcode::G_ZEXTLOAD
:
2996 case TargetOpcode::G_LOAD
:
2997 case TargetOpcode::G_STORE
: {
2998 GLoadStore
&LdSt
= cast
<GLoadStore
>(I
);
2999 bool IsZExtLoad
= I
.getOpcode() == TargetOpcode::G_ZEXTLOAD
;
3000 LLT PtrTy
= MRI
.getType(LdSt
.getPointerReg());
3002 if (PtrTy
!= LLT::pointer(0, 64)) {
3003 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
3004 << ", expected: " << LLT::pointer(0, 64) << '\n');
3008 uint64_t MemSizeInBytes
= LdSt
.getMemSize().getValue();
3009 unsigned MemSizeInBits
= LdSt
.getMemSizeInBits().getValue();
3010 AtomicOrdering Order
= LdSt
.getMMO().getSuccessOrdering();
3012 // Need special instructions for atomics that affect ordering.
3013 if (Order
!= AtomicOrdering::NotAtomic
&&
3014 Order
!= AtomicOrdering::Unordered
&&
3015 Order
!= AtomicOrdering::Monotonic
) {
3016 assert(!isa
<GZExtLoad
>(LdSt
));
3017 assert(MemSizeInBytes
<= 8 &&
3018 "128-bit atomics should already be custom-legalized");
3020 if (isa
<GLoad
>(LdSt
)) {
3021 static constexpr unsigned LDAPROpcodes
[] = {
3022 AArch64::LDAPRB
, AArch64::LDAPRH
, AArch64::LDAPRW
, AArch64::LDAPRX
};
3023 static constexpr unsigned LDAROpcodes
[] = {
3024 AArch64::LDARB
, AArch64::LDARH
, AArch64::LDARW
, AArch64::LDARX
};
3025 ArrayRef
<unsigned> Opcodes
=
3026 STI
.hasRCPC() && Order
!= AtomicOrdering::SequentiallyConsistent
3029 I
.setDesc(TII
.get(Opcodes
[Log2_32(MemSizeInBytes
)]));
3031 static constexpr unsigned Opcodes
[] = {AArch64::STLRB
, AArch64::STLRH
,
3032 AArch64::STLRW
, AArch64::STLRX
};
3033 Register ValReg
= LdSt
.getReg(0);
3034 if (MRI
.getType(ValReg
).getSizeInBits() == 64 && MemSizeInBits
!= 64) {
3035 // Emit a subreg copy of 32 bits.
3036 Register NewVal
= MRI
.createVirtualRegister(&AArch64::GPR32RegClass
);
3037 MIB
.buildInstr(TargetOpcode::COPY
, {NewVal
}, {})
3038 .addReg(I
.getOperand(0).getReg(), 0, AArch64::sub_32
);
3039 I
.getOperand(0).setReg(NewVal
);
3041 I
.setDesc(TII
.get(Opcodes
[Log2_32(MemSizeInBytes
)]));
3043 constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
3048 const Register PtrReg
= LdSt
.getPointerReg();
3049 const RegisterBank
&PtrRB
= *RBI
.getRegBank(PtrReg
, MRI
, TRI
);
3050 // Check that the pointer register is valid.
3051 assert(PtrRB
.getID() == AArch64::GPRRegBankID
&&
3052 "Load/Store pointer operand isn't a GPR");
3053 assert(MRI
.getType(PtrReg
).isPointer() &&
3054 "Load/Store pointer operand isn't a pointer");
3057 const Register ValReg
= LdSt
.getReg(0);
3058 const LLT ValTy
= MRI
.getType(ValReg
);
3059 const RegisterBank
&RB
= *RBI
.getRegBank(ValReg
, MRI
, TRI
);
3061 // The code below doesn't support truncating stores, so we need to split it
3063 if (isa
<GStore
>(LdSt
) && ValTy
.getSizeInBits() > MemSizeInBits
) {
3065 LLT MemTy
= LdSt
.getMMO().getMemoryType();
3066 auto *RC
= getRegClassForTypeOnBank(MemTy
, RB
);
3067 if (!getSubRegForClass(RC
, TRI
, SubReg
))
3070 // Generate a subreg copy.
3071 auto Copy
= MIB
.buildInstr(TargetOpcode::COPY
, {MemTy
}, {})
3072 .addReg(ValReg
, 0, SubReg
)
3074 RBI
.constrainGenericRegister(Copy
, *RC
, MRI
);
3075 LdSt
.getOperand(0).setReg(Copy
);
3076 } else if (isa
<GLoad
>(LdSt
) && ValTy
.getSizeInBits() > MemSizeInBits
) {
3077 // If this is an any-extending load from the FPR bank, split it into a regular
3079 if (RB
.getID() == AArch64::FPRRegBankID
) {
3081 LLT MemTy
= LdSt
.getMMO().getMemoryType();
3082 auto *RC
= getRegClassForTypeOnBank(MemTy
, RB
);
3083 if (!getSubRegForClass(RC
, TRI
, SubReg
))
3085 Register OldDst
= LdSt
.getReg(0);
3087 MRI
.createGenericVirtualRegister(LdSt
.getMMO().getMemoryType());
3088 LdSt
.getOperand(0).setReg(NewDst
);
3089 MRI
.setRegBank(NewDst
, RB
);
3090 // Generate a SUBREG_TO_REG to extend it.
3091 MIB
.setInsertPt(MIB
.getMBB(), std::next(LdSt
.getIterator()));
3092 MIB
.buildInstr(AArch64::SUBREG_TO_REG
, {OldDst
}, {})
3096 auto SubRegRC
= getRegClassForTypeOnBank(MRI
.getType(OldDst
), RB
);
3097 RBI
.constrainGenericRegister(OldDst
, *SubRegRC
, MRI
);
3102 // Helper lambda for partially selecting I. Either returns the original
3103 // instruction with an updated opcode, or a new instruction.
3104 auto SelectLoadStoreAddressingMode
= [&]() -> MachineInstr
* {
3105 bool IsStore
= isa
<GStore
>(I
);
3106 const unsigned NewOpc
=
3107 selectLoadStoreUIOp(I
.getOpcode(), RB
.getID(), MemSizeInBits
);
3108 if (NewOpc
== I
.getOpcode())
3110 // Check if we can fold anything into the addressing mode.
3112 selectAddrModeIndexed(I
.getOperand(1), MemSizeInBytes
);
3114 // Can't fold anything. Use the original instruction.
3115 I
.setDesc(TII
.get(NewOpc
));
3116 I
.addOperand(MachineOperand::CreateImm(0));
3120 // Folded something. Create a new instruction and return it.
3121 auto NewInst
= MIB
.buildInstr(NewOpc
, {}, {}, I
.getFlags());
3122 Register CurValReg
= I
.getOperand(0).getReg();
3123 IsStore
? NewInst
.addUse(CurValReg
) : NewInst
.addDef(CurValReg
);
3124 NewInst
.cloneMemRefs(I
);
3125 for (auto &Fn
: *AddrModeFns
)
3127 I
.eraseFromParent();
3131 MachineInstr
*LoadStore
= SelectLoadStoreAddressingMode();
3135 // If we're storing a 0, use WZR/XZR.
3136 if (Opcode
== TargetOpcode::G_STORE
) {
3137 auto CVal
= getIConstantVRegValWithLookThrough(
3138 LoadStore
->getOperand(0).getReg(), MRI
);
3139 if (CVal
&& CVal
->Value
== 0) {
3140 switch (LoadStore
->getOpcode()) {
3141 case AArch64::STRWui
:
3142 case AArch64::STRHHui
:
3143 case AArch64::STRBBui
:
3144 LoadStore
->getOperand(0).setReg(AArch64::WZR
);
3146 case AArch64::STRXui
:
3147 LoadStore
->getOperand(0).setReg(AArch64::XZR
);
3153 if (IsZExtLoad
|| (Opcode
== TargetOpcode::G_LOAD
&&
3154 ValTy
== LLT::scalar(64) && MemSizeInBits
== 32)) {
3155 // The any/zextload from a smaller type to i32 should be handled by the
3157 if (MRI
.getType(LoadStore
->getOperand(0).getReg()).getSizeInBits() != 64)
3159 // If we have an extending load then change the load's type to be a
3160 // narrower reg and zero_extend with SUBREG_TO_REG.
3161 Register LdReg
= MRI
.createVirtualRegister(&AArch64::GPR32RegClass
);
3162 Register DstReg
= LoadStore
->getOperand(0).getReg();
3163 LoadStore
->getOperand(0).setReg(LdReg
);
3165 MIB
.setInsertPt(MIB
.getMBB(), std::next(LoadStore
->getIterator()));
3166 MIB
.buildInstr(AArch64::SUBREG_TO_REG
, {DstReg
}, {})
3169 .addImm(AArch64::sub_32
);
3170 constrainSelectedInstRegOperands(*LoadStore
, TII
, TRI
, RBI
);
3171 return RBI
.constrainGenericRegister(DstReg
, AArch64::GPR64allRegClass
,
3174 return constrainSelectedInstRegOperands(*LoadStore
, TII
, TRI
, RBI
);
3177 case TargetOpcode::G_INDEXED_ZEXTLOAD
:
3178 case TargetOpcode::G_INDEXED_SEXTLOAD
:
3179 return selectIndexedExtLoad(I
, MRI
);
3180 case TargetOpcode::G_INDEXED_LOAD
:
3181 return selectIndexedLoad(I
, MRI
);
3182 case TargetOpcode::G_INDEXED_STORE
:
3183 return selectIndexedStore(cast
<GIndexedStore
>(I
), MRI
);
3185 case TargetOpcode::G_LSHR
:
3186 case TargetOpcode::G_ASHR
:
3187 if (MRI
.getType(I
.getOperand(0).getReg()).isVector())
3188 return selectVectorAshrLshr(I
, MRI
);
3190 case TargetOpcode::G_SHL
:
3191 if (Opcode
== TargetOpcode::G_SHL
&&
3192 MRI
.getType(I
.getOperand(0).getReg()).isVector())
3193 return selectVectorSHL(I
, MRI
);
3195 // These shifts were legalized to have 64 bit shift amounts because we
3196 // want to take advantage of the selection patterns that assume the
3197 // immediates are s64s, however, selectBinaryOp will assume both operands
3198 // will have the same bit size.
3200 Register SrcReg
= I
.getOperand(1).getReg();
3201 Register ShiftReg
= I
.getOperand(2).getReg();
3202 const LLT ShiftTy
= MRI
.getType(ShiftReg
);
3203 const LLT SrcTy
= MRI
.getType(SrcReg
);
3204 if (!SrcTy
.isVector() && SrcTy
.getSizeInBits() == 32 &&
3205 ShiftTy
.getSizeInBits() == 64) {
3206 assert(!ShiftTy
.isVector() && "unexpected vector shift ty");
3207 // Insert a subregister copy to implement a 64->32 trunc
3208 auto Trunc
= MIB
.buildInstr(TargetOpcode::COPY
, {SrcTy
}, {})
3209 .addReg(ShiftReg
, 0, AArch64::sub_32
);
3210 MRI
.setRegBank(Trunc
.getReg(0), RBI
.getRegBank(AArch64::GPRRegBankID
));
3211 I
.getOperand(2).setReg(Trunc
.getReg(0));
3215 case TargetOpcode::G_OR
: {
3216 // Reject the various things we don't support yet.
3217 if (unsupportedBinOp(I
, RBI
, MRI
, TRI
))
3220 const unsigned OpSize
= Ty
.getSizeInBits();
3222 const Register DefReg
= I
.getOperand(0).getReg();
3223 const RegisterBank
&RB
= *RBI
.getRegBank(DefReg
, MRI
, TRI
);
3225 const unsigned NewOpc
= selectBinaryOp(I
.getOpcode(), RB
.getID(), OpSize
);
3226 if (NewOpc
== I
.getOpcode())
3229 I
.setDesc(TII
.get(NewOpc
));
3230 // FIXME: Should the type be always reset in setDesc?
3232 // Now that we selected an opcode, we need to constrain the register
3233 // operands to use appropriate classes.
3234 return constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
3237 case TargetOpcode::G_PTR_ADD
: {
3238 emitADD(I
.getOperand(0).getReg(), I
.getOperand(1), I
.getOperand(2), MIB
);
3239 I
.eraseFromParent();
3243 case TargetOpcode::G_SADDE
:
3244 case TargetOpcode::G_UADDE
:
3245 case TargetOpcode::G_SSUBE
:
3246 case TargetOpcode::G_USUBE
:
3247 case TargetOpcode::G_SADDO
:
3248 case TargetOpcode::G_UADDO
:
3249 case TargetOpcode::G_SSUBO
:
3250 case TargetOpcode::G_USUBO
:
3251 return selectOverflowOp(I
, MRI
);
3253 case TargetOpcode::G_PTRMASK
: {
3254 Register MaskReg
= I
.getOperand(2).getReg();
3255 std::optional
<int64_t> MaskVal
= getIConstantVRegSExtVal(MaskReg
, MRI
);
3256 // TODO: Implement arbitrary cases
3257 if (!MaskVal
|| !isShiftedMask_64(*MaskVal
))
3260 uint64_t Mask
= *MaskVal
;
3261 I
.setDesc(TII
.get(AArch64::ANDXri
));
3262 I
.getOperand(2).ChangeToImmediate(
3263 AArch64_AM::encodeLogicalImmediate(Mask
, 64));
3265 return constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
3267 case TargetOpcode::G_PTRTOINT
:
3268 case TargetOpcode::G_TRUNC
: {
3269 const LLT DstTy
= MRI
.getType(I
.getOperand(0).getReg());
3270 const LLT SrcTy
= MRI
.getType(I
.getOperand(1).getReg());
3272 const Register DstReg
= I
.getOperand(0).getReg();
3273 const Register SrcReg
= I
.getOperand(1).getReg();
3275 const RegisterBank
&DstRB
= *RBI
.getRegBank(DstReg
, MRI
, TRI
);
3276 const RegisterBank
&SrcRB
= *RBI
.getRegBank(SrcReg
, MRI
, TRI
);
3278 if (DstRB
.getID() != SrcRB
.getID()) {
3280 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3284 if (DstRB
.getID() == AArch64::GPRRegBankID
) {
3285 const TargetRegisterClass
*DstRC
= getRegClassForTypeOnBank(DstTy
, DstRB
);
3289 const TargetRegisterClass
*SrcRC
= getRegClassForTypeOnBank(SrcTy
, SrcRB
);
3293 if (!RBI
.constrainGenericRegister(SrcReg
, *SrcRC
, MRI
) ||
3294 !RBI
.constrainGenericRegister(DstReg
, *DstRC
, MRI
)) {
3295 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3299 if (DstRC
== SrcRC
) {
3300 // Nothing to be done
3301 } else if (Opcode
== TargetOpcode::G_TRUNC
&& DstTy
== LLT::scalar(32) &&
3302 SrcTy
== LLT::scalar(64)) {
3303 llvm_unreachable("TableGen can import this case");
3305 } else if (DstRC
== &AArch64::GPR32RegClass
&&
3306 SrcRC
== &AArch64::GPR64RegClass
) {
3307 I
.getOperand(1).setSubReg(AArch64::sub_32
);
3310 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3314 I
.setDesc(TII
.get(TargetOpcode::COPY
));
3316 } else if (DstRB
.getID() == AArch64::FPRRegBankID
) {
3317 if (DstTy
== LLT::fixed_vector(4, 16) &&
3318 SrcTy
== LLT::fixed_vector(4, 32)) {
3319 I
.setDesc(TII
.get(AArch64::XTNv4i16
));
3320 constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
3324 if (!SrcTy
.isVector() && SrcTy
.getSizeInBits() == 128) {
3325 MachineInstr
*Extract
= emitExtractVectorElt(
3326 DstReg
, DstRB
, LLT::scalar(DstTy
.getSizeInBits()), SrcReg
, 0, MIB
);
3329 I
.eraseFromParent();
3333 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3334 if (Opcode
== TargetOpcode::G_PTRTOINT
) {
3335 assert(DstTy
.isVector() && "Expected an FPR ptrtoint to be a vector");
3336 I
.setDesc(TII
.get(TargetOpcode::COPY
));
3337 return selectCopy(I
, TII
, MRI
, TRI
, RBI
);
3344 case TargetOpcode::G_ANYEXT
: {
3345 if (selectUSMovFromExtend(I
, MRI
))
3348 const Register DstReg
= I
.getOperand(0).getReg();
3349 const Register SrcReg
= I
.getOperand(1).getReg();
3351 const RegisterBank
&RBDst
= *RBI
.getRegBank(DstReg
, MRI
, TRI
);
3352 if (RBDst
.getID() != AArch64::GPRRegBankID
) {
3353 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3354 << ", expected: GPR\n");
3358 const RegisterBank
&RBSrc
= *RBI
.getRegBank(SrcReg
, MRI
, TRI
);
3359 if (RBSrc
.getID() != AArch64::GPRRegBankID
) {
3360 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3361 << ", expected: GPR\n");
3365 const unsigned DstSize
= MRI
.getType(DstReg
).getSizeInBits();
3368 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3372 if (DstSize
!= 64 && DstSize
> 32) {
3373 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3374 << ", expected: 32 or 64\n");
3377 // At this point G_ANYEXT is just like a plain COPY, but we need
3378 // to explicitly form the 64-bit value if any.
3380 Register ExtSrc
= MRI
.createVirtualRegister(&AArch64::GPR64allRegClass
);
3381 BuildMI(MBB
, I
, I
.getDebugLoc(), TII
.get(AArch64::SUBREG_TO_REG
))
3385 .addImm(AArch64::sub_32
);
3386 I
.getOperand(1).setReg(ExtSrc
);
3388 return selectCopy(I
, TII
, MRI
, TRI
, RBI
);
3391 case TargetOpcode::G_ZEXT
:
3392 case TargetOpcode::G_SEXT_INREG
:
3393 case TargetOpcode::G_SEXT
: {
3394 if (selectUSMovFromExtend(I
, MRI
))
3397 unsigned Opcode
= I
.getOpcode();
3398 const bool IsSigned
= Opcode
!= TargetOpcode::G_ZEXT
;
3399 const Register DefReg
= I
.getOperand(0).getReg();
3400 Register SrcReg
= I
.getOperand(1).getReg();
3401 const LLT DstTy
= MRI
.getType(DefReg
);
3402 const LLT SrcTy
= MRI
.getType(SrcReg
);
3403 unsigned DstSize
= DstTy
.getSizeInBits();
3404 unsigned SrcSize
= SrcTy
.getSizeInBits();
3406 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3407 // extended is encoded in the imm.
3408 if (Opcode
== TargetOpcode::G_SEXT_INREG
)
3409 SrcSize
= I
.getOperand(2).getImm();
3411 if (DstTy
.isVector())
3412 return false; // Should be handled by imported patterns.
3414 assert((*RBI
.getRegBank(DefReg
, MRI
, TRI
)).getID() ==
3415 AArch64::GPRRegBankID
&&
3416 "Unexpected ext regbank");
3420 // First check if we're extending the result of a load which has a dest type
3421 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3422 // GPR register on AArch64 and all loads which are smaller automatically
3423 // zero-extend the upper bits. E.g.
3424 // %v(s8) = G_LOAD %p, :: (load 1)
3425 // %v2(s32) = G_ZEXT %v(s8)
3427 auto *LoadMI
= getOpcodeDef(TargetOpcode::G_LOAD
, SrcReg
, MRI
);
3429 RBI
.getRegBank(SrcReg
, MRI
, TRI
)->getID() == AArch64::GPRRegBankID
;
3430 if (LoadMI
&& IsGPR
) {
3431 const MachineMemOperand
*MemOp
= *LoadMI
->memoperands_begin();
3432 unsigned BytesLoaded
= MemOp
->getSize().getValue();
3433 if (BytesLoaded
< 4 && SrcTy
.getSizeInBytes() == BytesLoaded
)
3434 return selectCopy(I
, TII
, MRI
, TRI
, RBI
);
3437 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3439 if (IsGPR
&& SrcSize
== 32 && DstSize
== 64) {
3440 Register SubregToRegSrc
=
3441 MRI
.createVirtualRegister(&AArch64::GPR32RegClass
);
3442 const Register ZReg
= AArch64::WZR
;
3443 MIB
.buildInstr(AArch64::ORRWrs
, {SubregToRegSrc
}, {ZReg
, SrcReg
})
3446 MIB
.buildInstr(AArch64::SUBREG_TO_REG
, {DefReg
}, {})
3448 .addUse(SubregToRegSrc
)
3449 .addImm(AArch64::sub_32
);
3451 if (!RBI
.constrainGenericRegister(DefReg
, AArch64::GPR64RegClass
,
3453 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3457 if (!RBI
.constrainGenericRegister(SrcReg
, AArch64::GPR32RegClass
,
3459 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3463 I
.eraseFromParent();
3468 if (DstSize
== 64) {
3469 if (Opcode
!= TargetOpcode::G_SEXT_INREG
) {
3470 // FIXME: Can we avoid manually doing this?
3471 if (!RBI
.constrainGenericRegister(SrcReg
, AArch64::GPR32RegClass
,
3473 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII
.getName(Opcode
)
3477 SrcReg
= MIB
.buildInstr(AArch64::SUBREG_TO_REG
,
3478 {&AArch64::GPR64RegClass
}, {})
3481 .addImm(AArch64::sub_32
)
3485 ExtI
= MIB
.buildInstr(IsSigned
? AArch64::SBFMXri
: AArch64::UBFMXri
,
3488 .addImm(SrcSize
- 1);
3489 } else if (DstSize
<= 32) {
3490 ExtI
= MIB
.buildInstr(IsSigned
? AArch64::SBFMWri
: AArch64::UBFMWri
,
3493 .addImm(SrcSize
- 1);
3498 constrainSelectedInstRegOperands(*ExtI
, TII
, TRI
, RBI
);
3499 I
.eraseFromParent();
3503 case TargetOpcode::G_SITOFP
:
3504 case TargetOpcode::G_UITOFP
:
3505 case TargetOpcode::G_FPTOSI
:
3506 case TargetOpcode::G_FPTOUI
: {
3507 const LLT DstTy
= MRI
.getType(I
.getOperand(0).getReg()),
3508 SrcTy
= MRI
.getType(I
.getOperand(1).getReg());
3509 const unsigned NewOpc
= selectFPConvOpc(Opcode
, DstTy
, SrcTy
);
3510 if (NewOpc
== Opcode
)
3513 I
.setDesc(TII
.get(NewOpc
));
3514 constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
3515 I
.setFlags(MachineInstr::NoFPExcept
);
3520 case TargetOpcode::G_FREEZE
:
3521 return selectCopy(I
, TII
, MRI
, TRI
, RBI
);
3523 case TargetOpcode::G_INTTOPTR
:
3524 // The importer is currently unable to import pointer types since they
3525 // didn't exist in SelectionDAG.
3526 return selectCopy(I
, TII
, MRI
, TRI
, RBI
);
3528 case TargetOpcode::G_BITCAST
:
3529 // Imported SelectionDAG rules can handle every bitcast except those that
3530 // bitcast from a type to the same type. Ideally, these shouldn't occur
3531 // but we might not run an optimizer that deletes them. The other exception
3532 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3534 return selectCopy(I
, TII
, MRI
, TRI
, RBI
);
3536 case TargetOpcode::G_SELECT
: {
3537 auto &Sel
= cast
<GSelect
>(I
);
3538 const Register CondReg
= Sel
.getCondReg();
3539 const Register TReg
= Sel
.getTrueReg();
3540 const Register FReg
= Sel
.getFalseReg();
3542 if (tryOptSelect(Sel
))
3545 // Make sure to use an unused vreg instead of wzr, so that the peephole
3546 // optimizations will be able to optimize these.
3547 Register DeadVReg
= MRI
.createVirtualRegister(&AArch64::GPR32RegClass
);
3548 auto TstMI
= MIB
.buildInstr(AArch64::ANDSWri
, {DeadVReg
}, {CondReg
})
3549 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3550 constrainSelectedInstRegOperands(*TstMI
, TII
, TRI
, RBI
);
3551 if (!emitSelect(Sel
.getReg(0), TReg
, FReg
, AArch64CC::NE
, MIB
))
3553 Sel
.eraseFromParent();
3556 case TargetOpcode::G_ICMP
: {
3560 if (Ty
!= LLT::scalar(32)) {
3561 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3562 << ", expected: " << LLT::scalar(32) << '\n');
3566 auto Pred
= static_cast<CmpInst::Predicate
>(I
.getOperand(1).getPredicate());
3567 const AArch64CC::CondCode InvCC
=
3568 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred
));
3569 emitIntegerCompare(I
.getOperand(2), I
.getOperand(3), I
.getOperand(1), MIB
);
3570 emitCSINC(/*Dst=*/I
.getOperand(0).getReg(), /*Src1=*/AArch64::WZR
,
3571 /*Src2=*/AArch64::WZR
, InvCC
, MIB
);
3572 I
.eraseFromParent();
3576 case TargetOpcode::G_FCMP
: {
3577 CmpInst::Predicate Pred
=
3578 static_cast<CmpInst::Predicate
>(I
.getOperand(1).getPredicate());
3579 if (!emitFPCompare(I
.getOperand(2).getReg(), I
.getOperand(3).getReg(), MIB
,
3581 !emitCSetForFCmp(I
.getOperand(0).getReg(), Pred
, MIB
))
3583 I
.eraseFromParent();
3586 case TargetOpcode::G_VASTART
:
3587 return STI
.isTargetDarwin() ? selectVaStartDarwin(I
, MF
, MRI
)
3588 : selectVaStartAAPCS(I
, MF
, MRI
);
3589 case TargetOpcode::G_INTRINSIC
:
3590 return selectIntrinsic(I
, MRI
);
3591 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
:
3592 return selectIntrinsicWithSideEffects(I
, MRI
);
3593 case TargetOpcode::G_IMPLICIT_DEF
: {
3594 I
.setDesc(TII
.get(TargetOpcode::IMPLICIT_DEF
));
3595 const LLT DstTy
= MRI
.getType(I
.getOperand(0).getReg());
3596 const Register DstReg
= I
.getOperand(0).getReg();
3597 const RegisterBank
&DstRB
= *RBI
.getRegBank(DstReg
, MRI
, TRI
);
3598 const TargetRegisterClass
*DstRC
= getRegClassForTypeOnBank(DstTy
, DstRB
);
3599 RBI
.constrainGenericRegister(DstReg
, *DstRC
, MRI
);
3602 case TargetOpcode::G_BLOCK_ADDR
: {
3603 Function
*BAFn
= I
.getOperand(1).getBlockAddress()->getFunction();
3604 if (std::optional
<uint16_t> BADisc
=
3605 STI
.getPtrAuthBlockAddressDiscriminatorIfEnabled(*BAFn
)) {
3606 MIB
.buildInstr(TargetOpcode::IMPLICIT_DEF
, {AArch64::X16
}, {});
3607 MIB
.buildInstr(TargetOpcode::IMPLICIT_DEF
, {AArch64::X17
}, {});
3608 MIB
.buildInstr(AArch64::MOVaddrPAC
)
3609 .addBlockAddress(I
.getOperand(1).getBlockAddress())
3610 .addImm(AArch64PACKey::IA
)
3611 .addReg(/*AddrDisc=*/AArch64::XZR
)
3613 .constrainAllUses(TII
, TRI
, RBI
);
3614 MIB
.buildCopy(I
.getOperand(0).getReg(), Register(AArch64::X16
));
3615 RBI
.constrainGenericRegister(I
.getOperand(0).getReg(),
3616 AArch64::GPR64RegClass
, MRI
);
3617 I
.eraseFromParent();
3620 if (TM
.getCodeModel() == CodeModel::Large
&& !TM
.isPositionIndependent()) {
3621 materializeLargeCMVal(I
, I
.getOperand(1).getBlockAddress(), 0);
3622 I
.eraseFromParent();
3625 I
.setDesc(TII
.get(AArch64::MOVaddrBA
));
3626 auto MovMI
= BuildMI(MBB
, I
, I
.getDebugLoc(), TII
.get(AArch64::MOVaddrBA
),
3627 I
.getOperand(0).getReg())
3628 .addBlockAddress(I
.getOperand(1).getBlockAddress(),
3629 /* Offset */ 0, AArch64II::MO_PAGE
)
3631 I
.getOperand(1).getBlockAddress(), /* Offset */ 0,
3632 AArch64II::MO_NC
| AArch64II::MO_PAGEOFF
);
3633 I
.eraseFromParent();
3634 return constrainSelectedInstRegOperands(*MovMI
, TII
, TRI
, RBI
);
3637 case AArch64::G_DUP
: {
3638 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3639 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3640 // difficult because at RBS we may end up pessimizing the fpr case if we
3641 // decided to add an anyextend to fix this. Manual selection is the most
3642 // robust solution for now.
3643 if (RBI
.getRegBank(I
.getOperand(1).getReg(), MRI
, TRI
)->getID() !=
3644 AArch64::GPRRegBankID
)
3645 return false; // We expect the fpr regbank case to be imported.
3646 LLT VecTy
= MRI
.getType(I
.getOperand(0).getReg());
3647 if (VecTy
== LLT::fixed_vector(8, 8))
3648 I
.setDesc(TII
.get(AArch64::DUPv8i8gpr
));
3649 else if (VecTy
== LLT::fixed_vector(16, 8))
3650 I
.setDesc(TII
.get(AArch64::DUPv16i8gpr
));
3651 else if (VecTy
== LLT::fixed_vector(4, 16))
3652 I
.setDesc(TII
.get(AArch64::DUPv4i16gpr
));
3653 else if (VecTy
== LLT::fixed_vector(8, 16))
3654 I
.setDesc(TII
.get(AArch64::DUPv8i16gpr
));
3657 return constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
3659 case TargetOpcode::G_BUILD_VECTOR
:
3660 return selectBuildVector(I
, MRI
);
3661 case TargetOpcode::G_MERGE_VALUES
:
3662 return selectMergeValues(I
, MRI
);
3663 case TargetOpcode::G_UNMERGE_VALUES
:
3664 return selectUnmergeValues(I
, MRI
);
3665 case TargetOpcode::G_SHUFFLE_VECTOR
:
3666 return selectShuffleVector(I
, MRI
);
3667 case TargetOpcode::G_EXTRACT_VECTOR_ELT
:
3668 return selectExtractElt(I
, MRI
);
3669 case TargetOpcode::G_CONCAT_VECTORS
:
3670 return selectConcatVectors(I
, MRI
);
3671 case TargetOpcode::G_JUMP_TABLE
:
3672 return selectJumpTable(I
, MRI
);
3673 case TargetOpcode::G_MEMCPY
:
3674 case TargetOpcode::G_MEMCPY_INLINE
:
3675 case TargetOpcode::G_MEMMOVE
:
3676 case TargetOpcode::G_MEMSET
:
3677 assert(STI
.hasMOPS() && "Shouldn't get here without +mops feature");
3678 return selectMOPS(I
, MRI
);
3684 bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr
&I
) {
3685 MachineIRBuilderState OldMIBState
= MIB
.getState();
3686 bool Success
= select(I
);
3687 MIB
.setState(OldMIBState
);
3691 bool AArch64InstructionSelector::selectMOPS(MachineInstr
&GI
,
3692 MachineRegisterInfo
&MRI
) {
3694 switch (GI
.getOpcode()) {
3695 case TargetOpcode::G_MEMCPY
:
3696 case TargetOpcode::G_MEMCPY_INLINE
:
3697 Mopcode
= AArch64::MOPSMemoryCopyPseudo
;
3699 case TargetOpcode::G_MEMMOVE
:
3700 Mopcode
= AArch64::MOPSMemoryMovePseudo
;
3702 case TargetOpcode::G_MEMSET
:
3703 // For tagged memset see llvm.aarch64.mops.memset.tag
3704 Mopcode
= AArch64::MOPSMemorySetPseudo
;
3708 auto &DstPtr
= GI
.getOperand(0);
3709 auto &SrcOrVal
= GI
.getOperand(1);
3710 auto &Size
= GI
.getOperand(2);
3712 // Create copies of the registers that can be clobbered.
3713 const Register DstPtrCopy
= MRI
.cloneVirtualRegister(DstPtr
.getReg());
3714 const Register SrcValCopy
= MRI
.cloneVirtualRegister(SrcOrVal
.getReg());
3715 const Register SizeCopy
= MRI
.cloneVirtualRegister(Size
.getReg());
3717 const bool IsSet
= Mopcode
== AArch64::MOPSMemorySetPseudo
;
3718 const auto &SrcValRegClass
=
3719 IsSet
? AArch64::GPR64RegClass
: AArch64::GPR64commonRegClass
;
3721 // Constrain to specific registers
3722 RBI
.constrainGenericRegister(DstPtrCopy
, AArch64::GPR64commonRegClass
, MRI
);
3723 RBI
.constrainGenericRegister(SrcValCopy
, SrcValRegClass
, MRI
);
3724 RBI
.constrainGenericRegister(SizeCopy
, AArch64::GPR64RegClass
, MRI
);
3726 MIB
.buildCopy(DstPtrCopy
, DstPtr
);
3727 MIB
.buildCopy(SrcValCopy
, SrcOrVal
);
3728 MIB
.buildCopy(SizeCopy
, Size
);
3730 // New instruction uses the copied registers because it must update them.
3731 // The defs are not used since they don't exist in G_MEM*. They are still
3733 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3734 Register DefDstPtr
= MRI
.createVirtualRegister(&AArch64::GPR64commonRegClass
);
3735 Register DefSize
= MRI
.createVirtualRegister(&AArch64::GPR64RegClass
);
3737 MIB
.buildInstr(Mopcode
, {DefDstPtr
, DefSize
},
3738 {DstPtrCopy
, SizeCopy
, SrcValCopy
});
3740 Register DefSrcPtr
= MRI
.createVirtualRegister(&SrcValRegClass
);
3741 MIB
.buildInstr(Mopcode
, {DefDstPtr
, DefSrcPtr
, DefSize
},
3742 {DstPtrCopy
, SrcValCopy
, SizeCopy
});
3745 GI
.eraseFromParent();
3749 bool AArch64InstructionSelector::selectBrJT(MachineInstr
&I
,
3750 MachineRegisterInfo
&MRI
) {
3751 assert(I
.getOpcode() == TargetOpcode::G_BRJT
&& "Expected G_BRJT");
3752 Register JTAddr
= I
.getOperand(0).getReg();
3753 unsigned JTI
= I
.getOperand(1).getIndex();
3754 Register Index
= I
.getOperand(2).getReg();
3756 MF
->getInfo
<AArch64FunctionInfo
>()->setJumpTableEntryInfo(JTI
, 4, nullptr);
3758 // With aarch64-jump-table-hardening, we only expand the jump table dispatch
3759 // sequence later, to guarantee the integrity of the intermediate values.
3760 if (MF
->getFunction().hasFnAttribute("aarch64-jump-table-hardening")) {
3761 CodeModel::Model CM
= TM
.getCodeModel();
3762 if (STI
.isTargetMachO()) {
3763 if (CM
!= CodeModel::Small
&& CM
!= CodeModel::Large
)
3764 report_fatal_error("Unsupported code-model for hardened jump-table");
3766 // Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO.
3767 assert(STI
.isTargetELF() &&
3768 "jump table hardening only supported on MachO/ELF");
3769 if (CM
!= CodeModel::Small
)
3770 report_fatal_error("Unsupported code-model for hardened jump-table");
3773 MIB
.buildCopy({AArch64::X16
}, I
.getOperand(2).getReg());
3774 MIB
.buildInstr(AArch64::BR_JumpTable
)
3775 .addJumpTableIndex(I
.getOperand(1).getIndex());
3776 I
.eraseFromParent();
3780 Register TargetReg
= MRI
.createVirtualRegister(&AArch64::GPR64RegClass
);
3781 Register ScratchReg
= MRI
.createVirtualRegister(&AArch64::GPR64spRegClass
);
3783 auto JumpTableInst
= MIB
.buildInstr(AArch64::JumpTableDest32
,
3784 {TargetReg
, ScratchReg
}, {JTAddr
, Index
})
3785 .addJumpTableIndex(JTI
);
3786 // Save the jump table info.
3787 MIB
.buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO
, {},
3788 {static_cast<int64_t>(JTI
)});
3789 // Build the indirect branch.
3790 MIB
.buildInstr(AArch64::BR
, {}, {TargetReg
});
3791 I
.eraseFromParent();
3792 return constrainSelectedInstRegOperands(*JumpTableInst
, TII
, TRI
, RBI
);
3795 bool AArch64InstructionSelector::selectJumpTable(MachineInstr
&I
,
3796 MachineRegisterInfo
&MRI
) {
3797 assert(I
.getOpcode() == TargetOpcode::G_JUMP_TABLE
&& "Expected jump table");
3798 assert(I
.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3800 Register DstReg
= I
.getOperand(0).getReg();
3801 unsigned JTI
= I
.getOperand(1).getIndex();
3802 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3804 MIB
.buildInstr(AArch64::MOVaddrJT
, {DstReg
}, {})
3805 .addJumpTableIndex(JTI
, AArch64II::MO_PAGE
)
3806 .addJumpTableIndex(JTI
, AArch64II::MO_NC
| AArch64II::MO_PAGEOFF
);
3807 I
.eraseFromParent();
3808 return constrainSelectedInstRegOperands(*MovMI
, TII
, TRI
, RBI
);
3811 bool AArch64InstructionSelector::selectTLSGlobalValue(
3812 MachineInstr
&I
, MachineRegisterInfo
&MRI
) {
3813 if (!STI
.isTargetMachO())
3815 MachineFunction
&MF
= *I
.getParent()->getParent();
3816 MF
.getFrameInfo().setAdjustsStack(true);
3818 const auto &GlobalOp
= I
.getOperand(1);
3819 assert(GlobalOp
.getOffset() == 0 &&
3820 "Shouldn't have an offset on TLS globals!");
3821 const GlobalValue
&GV
= *GlobalOp
.getGlobal();
3824 MIB
.buildInstr(AArch64::LOADgot
, {&AArch64::GPR64commonRegClass
}, {})
3825 .addGlobalAddress(&GV
, 0, AArch64II::MO_TLS
);
3827 auto Load
= MIB
.buildInstr(AArch64::LDRXui
, {&AArch64::GPR64commonRegClass
},
3828 {LoadGOT
.getReg(0)})
3831 MIB
.buildCopy(Register(AArch64::X0
), LoadGOT
.getReg(0));
3832 // TLS calls preserve all registers except those that absolutely must be
3833 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3835 unsigned Opcode
= getBLRCallOpcode(MF
);
3837 // With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0).
3838 if (MF
.getFunction().hasFnAttribute("ptrauth-calls")) {
3839 assert(Opcode
== AArch64::BLR
);
3840 Opcode
= AArch64::BLRAAZ
;
3843 MIB
.buildInstr(Opcode
, {}, {Load
})
3844 .addUse(AArch64::X0
, RegState::Implicit
)
3845 .addDef(AArch64::X0
, RegState::Implicit
)
3846 .addRegMask(TRI
.getTLSCallPreservedMask());
3848 MIB
.buildCopy(I
.getOperand(0).getReg(), Register(AArch64::X0
));
3849 RBI
.constrainGenericRegister(I
.getOperand(0).getReg(), AArch64::GPR64RegClass
,
3851 I
.eraseFromParent();
3855 MachineInstr
*AArch64InstructionSelector::emitScalarToVector(
3856 unsigned EltSize
, const TargetRegisterClass
*DstRC
, Register Scalar
,
3857 MachineIRBuilder
&MIRBuilder
) const {
3858 auto Undef
= MIRBuilder
.buildInstr(TargetOpcode::IMPLICIT_DEF
, {DstRC
}, {});
3860 auto BuildFn
= [&](unsigned SubregIndex
) {
3863 .buildInstr(TargetOpcode::INSERT_SUBREG
, {DstRC
}, {Undef
, Scalar
})
3864 .addImm(SubregIndex
);
3865 constrainSelectedInstRegOperands(*Undef
, TII
, TRI
, RBI
);
3866 constrainSelectedInstRegOperands(*Ins
, TII
, TRI
, RBI
);
3872 return BuildFn(AArch64::bsub
);
3874 return BuildFn(AArch64::hsub
);
3876 return BuildFn(AArch64::ssub
);
3878 return BuildFn(AArch64::dsub
);
3885 AArch64InstructionSelector::emitNarrowVector(Register DstReg
, Register SrcReg
,
3886 MachineIRBuilder
&MIB
,
3887 MachineRegisterInfo
&MRI
) const {
3888 LLT DstTy
= MRI
.getType(DstReg
);
3889 const TargetRegisterClass
*RC
=
3890 getRegClassForTypeOnBank(DstTy
, *RBI
.getRegBank(SrcReg
, MRI
, TRI
));
3891 if (RC
!= &AArch64::FPR32RegClass
&& RC
!= &AArch64::FPR64RegClass
) {
3892 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3895 unsigned SubReg
= 0;
3896 if (!getSubRegForClass(RC
, TRI
, SubReg
))
3898 if (SubReg
!= AArch64::ssub
&& SubReg
!= AArch64::dsub
) {
3899 LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3900 << DstTy
.getSizeInBits() << "\n");
3903 auto Copy
= MIB
.buildInstr(TargetOpcode::COPY
, {DstReg
}, {})
3904 .addReg(SrcReg
, 0, SubReg
);
3905 RBI
.constrainGenericRegister(DstReg
, *RC
, MRI
);
3909 bool AArch64InstructionSelector::selectMergeValues(
3910 MachineInstr
&I
, MachineRegisterInfo
&MRI
) {
3911 assert(I
.getOpcode() == TargetOpcode::G_MERGE_VALUES
&& "unexpected opcode");
3912 const LLT DstTy
= MRI
.getType(I
.getOperand(0).getReg());
3913 const LLT SrcTy
= MRI
.getType(I
.getOperand(1).getReg());
3914 assert(!DstTy
.isVector() && !SrcTy
.isVector() && "invalid merge operation");
3915 const RegisterBank
&RB
= *RBI
.getRegBank(I
.getOperand(1).getReg(), MRI
, TRI
);
3917 if (I
.getNumOperands() != 3)
3920 // Merging 2 s64s into an s128.
3921 if (DstTy
== LLT::scalar(128)) {
3922 if (SrcTy
.getSizeInBits() != 64)
3924 Register DstReg
= I
.getOperand(0).getReg();
3925 Register Src1Reg
= I
.getOperand(1).getReg();
3926 Register Src2Reg
= I
.getOperand(2).getReg();
3927 auto Tmp
= MIB
.buildInstr(TargetOpcode::IMPLICIT_DEF
, {DstTy
}, {});
3928 MachineInstr
*InsMI
= emitLaneInsert(std::nullopt
, Tmp
.getReg(0), Src1Reg
,
3929 /* LaneIdx */ 0, RB
, MIB
);
3932 MachineInstr
*Ins2MI
= emitLaneInsert(DstReg
, InsMI
->getOperand(0).getReg(),
3933 Src2Reg
, /* LaneIdx */ 1, RB
, MIB
);
3936 constrainSelectedInstRegOperands(*InsMI
, TII
, TRI
, RBI
);
3937 constrainSelectedInstRegOperands(*Ins2MI
, TII
, TRI
, RBI
);
3938 I
.eraseFromParent();
3942 if (RB
.getID() != AArch64::GPRRegBankID
)
3945 if (DstTy
.getSizeInBits() != 64 || SrcTy
.getSizeInBits() != 32)
3948 auto *DstRC
= &AArch64::GPR64RegClass
;
3949 Register SubToRegDef
= MRI
.createVirtualRegister(DstRC
);
3950 MachineInstr
&SubRegMI
= *BuildMI(*I
.getParent(), I
, I
.getDebugLoc(),
3951 TII
.get(TargetOpcode::SUBREG_TO_REG
))
3952 .addDef(SubToRegDef
)
3954 .addUse(I
.getOperand(1).getReg())
3955 .addImm(AArch64::sub_32
);
3956 Register SubToRegDef2
= MRI
.createVirtualRegister(DstRC
);
3957 // Need to anyext the second scalar before we can use bfm
3958 MachineInstr
&SubRegMI2
= *BuildMI(*I
.getParent(), I
, I
.getDebugLoc(),
3959 TII
.get(TargetOpcode::SUBREG_TO_REG
))
3960 .addDef(SubToRegDef2
)
3962 .addUse(I
.getOperand(2).getReg())
3963 .addImm(AArch64::sub_32
);
3965 *BuildMI(*I
.getParent(), I
, I
.getDebugLoc(), TII
.get(AArch64::BFMXri
))
3966 .addDef(I
.getOperand(0).getReg())
3967 .addUse(SubToRegDef
)
3968 .addUse(SubToRegDef2
)
3971 constrainSelectedInstRegOperands(SubRegMI
, TII
, TRI
, RBI
);
3972 constrainSelectedInstRegOperands(SubRegMI2
, TII
, TRI
, RBI
);
3973 constrainSelectedInstRegOperands(BFM
, TII
, TRI
, RBI
);
3974 I
.eraseFromParent();
3978 static bool getLaneCopyOpcode(unsigned &CopyOpc
, unsigned &ExtractSubReg
,
3979 const unsigned EltSize
) {
3980 // Choose a lane copy opcode and subregister based off of the size of the
3981 // vector's elements.
3984 CopyOpc
= AArch64::DUPi8
;
3985 ExtractSubReg
= AArch64::bsub
;
3988 CopyOpc
= AArch64::DUPi16
;
3989 ExtractSubReg
= AArch64::hsub
;
3992 CopyOpc
= AArch64::DUPi32
;
3993 ExtractSubReg
= AArch64::ssub
;
3996 CopyOpc
= AArch64::DUPi64
;
3997 ExtractSubReg
= AArch64::dsub
;
4000 // Unknown size, bail out.
4001 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize
<< "' unsupported.\n");
4007 MachineInstr
*AArch64InstructionSelector::emitExtractVectorElt(
4008 std::optional
<Register
> DstReg
, const RegisterBank
&DstRB
, LLT ScalarTy
,
4009 Register VecReg
, unsigned LaneIdx
, MachineIRBuilder
&MIRBuilder
) const {
4010 MachineRegisterInfo
&MRI
= *MIRBuilder
.getMRI();
4011 unsigned CopyOpc
= 0;
4012 unsigned ExtractSubReg
= 0;
4013 if (!getLaneCopyOpcode(CopyOpc
, ExtractSubReg
, ScalarTy
.getSizeInBits())) {
4015 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
4019 const TargetRegisterClass
*DstRC
=
4020 getRegClassForTypeOnBank(ScalarTy
, DstRB
, true);
4022 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
4026 const RegisterBank
&VecRB
= *RBI
.getRegBank(VecReg
, MRI
, TRI
);
4027 const LLT
&VecTy
= MRI
.getType(VecReg
);
4028 const TargetRegisterClass
*VecRC
=
4029 getRegClassForTypeOnBank(VecTy
, VecRB
, true);
4031 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
4035 // The register that we're going to copy into.
4036 Register InsertReg
= VecReg
;
4038 DstReg
= MRI
.createVirtualRegister(DstRC
);
4039 // If the lane index is 0, we just use a subregister COPY.
4041 auto Copy
= MIRBuilder
.buildInstr(TargetOpcode::COPY
, {*DstReg
}, {})
4042 .addReg(VecReg
, 0, ExtractSubReg
);
4043 RBI
.constrainGenericRegister(*DstReg
, *DstRC
, MRI
);
4047 // Lane copies require 128-bit wide registers. If we're dealing with an
4048 // unpacked vector, then we need to move up to that width. Insert an implicit
4049 // def and a subregister insert to get us there.
4050 if (VecTy
.getSizeInBits() != 128) {
4051 MachineInstr
*ScalarToVector
= emitScalarToVector(
4052 VecTy
.getSizeInBits(), &AArch64::FPR128RegClass
, VecReg
, MIRBuilder
);
4053 if (!ScalarToVector
)
4055 InsertReg
= ScalarToVector
->getOperand(0).getReg();
4058 MachineInstr
*LaneCopyMI
=
4059 MIRBuilder
.buildInstr(CopyOpc
, {*DstReg
}, {InsertReg
}).addImm(LaneIdx
);
4060 constrainSelectedInstRegOperands(*LaneCopyMI
, TII
, TRI
, RBI
);
4062 // Make sure that we actually constrain the initial copy.
4063 RBI
.constrainGenericRegister(*DstReg
, *DstRC
, MRI
);
4067 bool AArch64InstructionSelector::selectExtractElt(
4068 MachineInstr
&I
, MachineRegisterInfo
&MRI
) {
4069 assert(I
.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
&&
4070 "unexpected opcode!");
4071 Register DstReg
= I
.getOperand(0).getReg();
4072 const LLT NarrowTy
= MRI
.getType(DstReg
);
4073 const Register SrcReg
= I
.getOperand(1).getReg();
4074 const LLT WideTy
= MRI
.getType(SrcReg
);
4076 assert(WideTy
.getSizeInBits() >= NarrowTy
.getSizeInBits() &&
4077 "source register size too small!");
4078 assert(!NarrowTy
.isVector() && "cannot extract vector into vector!");
4080 // Need the lane index to determine the correct copy opcode.
4081 MachineOperand
&LaneIdxOp
= I
.getOperand(2);
4082 assert(LaneIdxOp
.isReg() && "Lane index operand was not a register?");
4084 if (RBI
.getRegBank(DstReg
, MRI
, TRI
)->getID() != AArch64::FPRRegBankID
) {
4085 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
4089 // Find the index to extract from.
4090 auto VRegAndVal
= getIConstantVRegValWithLookThrough(LaneIdxOp
.getReg(), MRI
);
4093 unsigned LaneIdx
= VRegAndVal
->Value
.getSExtValue();
4096 const RegisterBank
&DstRB
= *RBI
.getRegBank(DstReg
, MRI
, TRI
);
4097 MachineInstr
*Extract
= emitExtractVectorElt(DstReg
, DstRB
, NarrowTy
, SrcReg
,
4102 I
.eraseFromParent();
4106 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4107 MachineInstr
&I
, MachineRegisterInfo
&MRI
) {
4108 unsigned NumElts
= I
.getNumOperands() - 1;
4109 Register SrcReg
= I
.getOperand(NumElts
).getReg();
4110 const LLT NarrowTy
= MRI
.getType(I
.getOperand(0).getReg());
4111 const LLT SrcTy
= MRI
.getType(SrcReg
);
4113 assert(NarrowTy
.isVector() && "Expected an unmerge into vectors");
4114 if (SrcTy
.getSizeInBits() > 128) {
4115 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
4119 // We implement a split vector operation by treating the sub-vectors as
4120 // scalars and extracting them.
4121 const RegisterBank
&DstRB
=
4122 *RBI
.getRegBank(I
.getOperand(0).getReg(), MRI
, TRI
);
4123 for (unsigned OpIdx
= 0; OpIdx
< NumElts
; ++OpIdx
) {
4124 Register Dst
= I
.getOperand(OpIdx
).getReg();
4125 MachineInstr
*Extract
=
4126 emitExtractVectorElt(Dst
, DstRB
, NarrowTy
, SrcReg
, OpIdx
, MIB
);
4130 I
.eraseFromParent();
4134 bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr
&I
,
4135 MachineRegisterInfo
&MRI
) {
4136 assert(I
.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&&
4137 "unexpected opcode");
4139 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4140 if (RBI
.getRegBank(I
.getOperand(0).getReg(), MRI
, TRI
)->getID() !=
4141 AArch64::FPRRegBankID
||
4142 RBI
.getRegBank(I
.getOperand(1).getReg(), MRI
, TRI
)->getID() !=
4143 AArch64::FPRRegBankID
) {
4144 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4145 "currently unsupported.\n");
4149 // The last operand is the vector source register, and every other operand is
4150 // a register to unpack into.
4151 unsigned NumElts
= I
.getNumOperands() - 1;
4152 Register SrcReg
= I
.getOperand(NumElts
).getReg();
4153 const LLT NarrowTy
= MRI
.getType(I
.getOperand(0).getReg());
4154 const LLT WideTy
= MRI
.getType(SrcReg
);
4156 assert((WideTy
.isVector() || WideTy
.getSizeInBits() == 128) &&
4157 "can only unmerge from vector or s128 types!");
4158 assert(WideTy
.getSizeInBits() > NarrowTy
.getSizeInBits() &&
4159 "source register size too small!");
4161 if (!NarrowTy
.isScalar())
4162 return selectSplitVectorUnmerge(I
, MRI
);
4164 // Choose a lane copy opcode and subregister based off of the size of the
4165 // vector's elements.
4166 unsigned CopyOpc
= 0;
4167 unsigned ExtractSubReg
= 0;
4168 if (!getLaneCopyOpcode(CopyOpc
, ExtractSubReg
, NarrowTy
.getSizeInBits()))
4171 // Set up for the lane copies.
4172 MachineBasicBlock
&MBB
= *I
.getParent();
4174 // Stores the registers we'll be copying from.
4175 SmallVector
<Register
, 4> InsertRegs
;
4177 // We'll use the first register twice, so we only need NumElts-1 registers.
4178 unsigned NumInsertRegs
= NumElts
- 1;
4180 // If our elements fit into exactly 128 bits, then we can copy from the source
4181 // directly. Otherwise, we need to do a bit of setup with some subregister
4183 if (NarrowTy
.getSizeInBits() * NumElts
== 128) {
4184 InsertRegs
= SmallVector
<Register
, 4>(NumInsertRegs
, SrcReg
);
4186 // No. We have to perform subregister inserts. For each insert, create an
4187 // implicit def and a subregister insert, and save the register we create.
4188 const TargetRegisterClass
*RC
= getRegClassForTypeOnBank(
4189 LLT::fixed_vector(NumElts
, WideTy
.getScalarSizeInBits()),
4190 *RBI
.getRegBank(SrcReg
, MRI
, TRI
));
4191 unsigned SubReg
= 0;
4192 bool Found
= getSubRegForClass(RC
, TRI
, SubReg
);
4194 assert(Found
&& "expected to find last operand's subeg idx");
4195 for (unsigned Idx
= 0; Idx
< NumInsertRegs
; ++Idx
) {
4196 Register ImpDefReg
= MRI
.createVirtualRegister(&AArch64::FPR128RegClass
);
4197 MachineInstr
&ImpDefMI
=
4198 *BuildMI(MBB
, I
, I
.getDebugLoc(), TII
.get(TargetOpcode::IMPLICIT_DEF
),
4201 // Now, create the subregister insert from SrcReg.
4202 Register InsertReg
= MRI
.createVirtualRegister(&AArch64::FPR128RegClass
);
4203 MachineInstr
&InsMI
=
4204 *BuildMI(MBB
, I
, I
.getDebugLoc(),
4205 TII
.get(TargetOpcode::INSERT_SUBREG
), InsertReg
)
4210 constrainSelectedInstRegOperands(ImpDefMI
, TII
, TRI
, RBI
);
4211 constrainSelectedInstRegOperands(InsMI
, TII
, TRI
, RBI
);
4213 // Save the register so that we can copy from it after.
4214 InsertRegs
.push_back(InsertReg
);
4218 // Now that we've created any necessary subregister inserts, we can
4219 // create the copies.
4221 // Perform the first copy separately as a subregister copy.
4222 Register CopyTo
= I
.getOperand(0).getReg();
4223 auto FirstCopy
= MIB
.buildInstr(TargetOpcode::COPY
, {CopyTo
}, {})
4224 .addReg(InsertRegs
[0], 0, ExtractSubReg
);
4225 constrainSelectedInstRegOperands(*FirstCopy
, TII
, TRI
, RBI
);
4227 // Now, perform the remaining copies as vector lane copies.
4228 unsigned LaneIdx
= 1;
4229 for (Register InsReg
: InsertRegs
) {
4230 Register CopyTo
= I
.getOperand(LaneIdx
).getReg();
4231 MachineInstr
&CopyInst
=
4232 *BuildMI(MBB
, I
, I
.getDebugLoc(), TII
.get(CopyOpc
), CopyTo
)
4235 constrainSelectedInstRegOperands(CopyInst
, TII
, TRI
, RBI
);
4239 // Separately constrain the first copy's destination. Because of the
4240 // limitation in constrainOperandRegClass, we can't guarantee that this will
4241 // actually be constrained. So, do it ourselves using the second operand.
4242 const TargetRegisterClass
*RC
=
4243 MRI
.getRegClassOrNull(I
.getOperand(1).getReg());
4245 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4249 RBI
.constrainGenericRegister(CopyTo
, *RC
, MRI
);
4250 I
.eraseFromParent();
4254 bool AArch64InstructionSelector::selectConcatVectors(
4255 MachineInstr
&I
, MachineRegisterInfo
&MRI
) {
4256 assert(I
.getOpcode() == TargetOpcode::G_CONCAT_VECTORS
&&
4257 "Unexpected opcode");
4258 Register Dst
= I
.getOperand(0).getReg();
4259 Register Op1
= I
.getOperand(1).getReg();
4260 Register Op2
= I
.getOperand(2).getReg();
4261 MachineInstr
*ConcatMI
= emitVectorConcat(Dst
, Op1
, Op2
, MIB
);
4264 I
.eraseFromParent();
4269 AArch64InstructionSelector::emitConstantPoolEntry(const Constant
*CPVal
,
4270 MachineFunction
&MF
) const {
4271 Type
*CPTy
= CPVal
->getType();
4272 Align Alignment
= MF
.getDataLayout().getPrefTypeAlign(CPTy
);
4274 MachineConstantPool
*MCP
= MF
.getConstantPool();
4275 return MCP
->getConstantPoolIndex(CPVal
, Alignment
);
4278 MachineInstr
*AArch64InstructionSelector::emitLoadFromConstantPool(
4279 const Constant
*CPVal
, MachineIRBuilder
&MIRBuilder
) const {
4280 const TargetRegisterClass
*RC
;
4282 bool IsTiny
= TM
.getCodeModel() == CodeModel::Tiny
;
4283 unsigned Size
= MIRBuilder
.getDataLayout().getTypeStoreSize(CPVal
->getType());
4286 RC
= &AArch64::FPR128RegClass
;
4287 Opc
= IsTiny
? AArch64::LDRQl
: AArch64::LDRQui
;
4290 RC
= &AArch64::FPR64RegClass
;
4291 Opc
= IsTiny
? AArch64::LDRDl
: AArch64::LDRDui
;
4294 RC
= &AArch64::FPR32RegClass
;
4295 Opc
= IsTiny
? AArch64::LDRSl
: AArch64::LDRSui
;
4298 RC
= &AArch64::FPR16RegClass
;
4299 Opc
= AArch64::LDRHui
;
4302 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4303 << *CPVal
->getType());
4307 MachineInstr
*LoadMI
= nullptr;
4308 auto &MF
= MIRBuilder
.getMF();
4309 unsigned CPIdx
= emitConstantPoolEntry(CPVal
, MF
);
4310 if (IsTiny
&& (Size
== 16 || Size
== 8 || Size
== 4)) {
4311 // Use load(literal) for tiny code model.
4312 LoadMI
= &*MIRBuilder
.buildInstr(Opc
, {RC
}, {}).addConstantPoolIndex(CPIdx
);
4315 MIRBuilder
.buildInstr(AArch64::ADRP
, {&AArch64::GPR64RegClass
}, {})
4316 .addConstantPoolIndex(CPIdx
, 0, AArch64II::MO_PAGE
);
4318 LoadMI
= &*MIRBuilder
.buildInstr(Opc
, {RC
}, {Adrp
})
4319 .addConstantPoolIndex(
4320 CPIdx
, 0, AArch64II::MO_PAGEOFF
| AArch64II::MO_NC
);
4322 constrainSelectedInstRegOperands(*Adrp
, TII
, TRI
, RBI
);
4325 MachinePointerInfo PtrInfo
= MachinePointerInfo::getConstantPool(MF
);
4326 LoadMI
->addMemOperand(MF
, MF
.getMachineMemOperand(PtrInfo
,
4327 MachineMemOperand::MOLoad
,
4328 Size
, Align(Size
)));
4329 constrainSelectedInstRegOperands(*LoadMI
, TII
, TRI
, RBI
);
4333 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4335 static std::pair
<unsigned, unsigned>
4336 getInsertVecEltOpInfo(const RegisterBank
&RB
, unsigned EltSize
) {
4337 unsigned Opc
, SubregIdx
;
4338 if (RB
.getID() == AArch64::GPRRegBankID
) {
4340 Opc
= AArch64::INSvi8gpr
;
4341 SubregIdx
= AArch64::bsub
;
4342 } else if (EltSize
== 16) {
4343 Opc
= AArch64::INSvi16gpr
;
4344 SubregIdx
= AArch64::ssub
;
4345 } else if (EltSize
== 32) {
4346 Opc
= AArch64::INSvi32gpr
;
4347 SubregIdx
= AArch64::ssub
;
4348 } else if (EltSize
== 64) {
4349 Opc
= AArch64::INSvi64gpr
;
4350 SubregIdx
= AArch64::dsub
;
4352 llvm_unreachable("invalid elt size!");
4356 Opc
= AArch64::INSvi8lane
;
4357 SubregIdx
= AArch64::bsub
;
4358 } else if (EltSize
== 16) {
4359 Opc
= AArch64::INSvi16lane
;
4360 SubregIdx
= AArch64::hsub
;
4361 } else if (EltSize
== 32) {
4362 Opc
= AArch64::INSvi32lane
;
4363 SubregIdx
= AArch64::ssub
;
4364 } else if (EltSize
== 64) {
4365 Opc
= AArch64::INSvi64lane
;
4366 SubregIdx
= AArch64::dsub
;
4368 llvm_unreachable("invalid elt size!");
4371 return std::make_pair(Opc
, SubregIdx
);
4374 MachineInstr
*AArch64InstructionSelector::emitInstr(
4375 unsigned Opcode
, std::initializer_list
<llvm::DstOp
> DstOps
,
4376 std::initializer_list
<llvm::SrcOp
> SrcOps
, MachineIRBuilder
&MIRBuilder
,
4377 const ComplexRendererFns
&RenderFns
) const {
4378 assert(Opcode
&& "Expected an opcode?");
4379 assert(!isPreISelGenericOpcode(Opcode
) &&
4380 "Function should only be used to produce selected instructions!");
4381 auto MI
= MIRBuilder
.buildInstr(Opcode
, DstOps
, SrcOps
);
4383 for (auto &Fn
: *RenderFns
)
4385 constrainSelectedInstRegOperands(*MI
, TII
, TRI
, RBI
);
4389 MachineInstr
*AArch64InstructionSelector::emitAddSub(
4390 const std::array
<std::array
<unsigned, 2>, 5> &AddrModeAndSizeToOpcode
,
4391 Register Dst
, MachineOperand
&LHS
, MachineOperand
&RHS
,
4392 MachineIRBuilder
&MIRBuilder
) const {
4393 MachineRegisterInfo
&MRI
= MIRBuilder
.getMF().getRegInfo();
4394 assert(LHS
.isReg() && RHS
.isReg() && "Expected register operands?");
4395 auto Ty
= MRI
.getType(LHS
.getReg());
4396 assert(!Ty
.isVector() && "Expected a scalar or pointer?");
4397 unsigned Size
= Ty
.getSizeInBits();
4398 assert((Size
== 32 || Size
== 64) && "Expected a 32-bit or 64-bit type only");
4399 bool Is32Bit
= Size
== 32;
4401 // INSTRri form with positive arithmetic immediate.
4402 if (auto Fns
= selectArithImmed(RHS
))
4403 return emitInstr(AddrModeAndSizeToOpcode
[0][Is32Bit
], {Dst
}, {LHS
},
4406 // INSTRri form with negative arithmetic immediate.
4407 if (auto Fns
= selectNegArithImmed(RHS
))
4408 return emitInstr(AddrModeAndSizeToOpcode
[3][Is32Bit
], {Dst
}, {LHS
},
4412 if (auto Fns
= selectArithExtendedRegister(RHS
))
4413 return emitInstr(AddrModeAndSizeToOpcode
[4][Is32Bit
], {Dst
}, {LHS
},
4417 if (auto Fns
= selectShiftedRegister(RHS
))
4418 return emitInstr(AddrModeAndSizeToOpcode
[1][Is32Bit
], {Dst
}, {LHS
},
4420 return emitInstr(AddrModeAndSizeToOpcode
[2][Is32Bit
], {Dst
}, {LHS
, RHS
},
4425 AArch64InstructionSelector::emitADD(Register DefReg
, MachineOperand
&LHS
,
4426 MachineOperand
&RHS
,
4427 MachineIRBuilder
&MIRBuilder
) const {
4428 const std::array
<std::array
<unsigned, 2>, 5> OpcTable
{
4429 {{AArch64::ADDXri
, AArch64::ADDWri
},
4430 {AArch64::ADDXrs
, AArch64::ADDWrs
},
4431 {AArch64::ADDXrr
, AArch64::ADDWrr
},
4432 {AArch64::SUBXri
, AArch64::SUBWri
},
4433 {AArch64::ADDXrx
, AArch64::ADDWrx
}}};
4434 return emitAddSub(OpcTable
, DefReg
, LHS
, RHS
, MIRBuilder
);
4438 AArch64InstructionSelector::emitADDS(Register Dst
, MachineOperand
&LHS
,
4439 MachineOperand
&RHS
,
4440 MachineIRBuilder
&MIRBuilder
) const {
4441 const std::array
<std::array
<unsigned, 2>, 5> OpcTable
{
4442 {{AArch64::ADDSXri
, AArch64::ADDSWri
},
4443 {AArch64::ADDSXrs
, AArch64::ADDSWrs
},
4444 {AArch64::ADDSXrr
, AArch64::ADDSWrr
},
4445 {AArch64::SUBSXri
, AArch64::SUBSWri
},
4446 {AArch64::ADDSXrx
, AArch64::ADDSWrx
}}};
4447 return emitAddSub(OpcTable
, Dst
, LHS
, RHS
, MIRBuilder
);
4451 AArch64InstructionSelector::emitSUBS(Register Dst
, MachineOperand
&LHS
,
4452 MachineOperand
&RHS
,
4453 MachineIRBuilder
&MIRBuilder
) const {
4454 const std::array
<std::array
<unsigned, 2>, 5> OpcTable
{
4455 {{AArch64::SUBSXri
, AArch64::SUBSWri
},
4456 {AArch64::SUBSXrs
, AArch64::SUBSWrs
},
4457 {AArch64::SUBSXrr
, AArch64::SUBSWrr
},
4458 {AArch64::ADDSXri
, AArch64::ADDSWri
},
4459 {AArch64::SUBSXrx
, AArch64::SUBSWrx
}}};
4460 return emitAddSub(OpcTable
, Dst
, LHS
, RHS
, MIRBuilder
);
4464 AArch64InstructionSelector::emitADCS(Register Dst
, MachineOperand
&LHS
,
4465 MachineOperand
&RHS
,
4466 MachineIRBuilder
&MIRBuilder
) const {
4467 assert(LHS
.isReg() && RHS
.isReg() && "Expected register operands?");
4468 MachineRegisterInfo
*MRI
= MIRBuilder
.getMRI();
4469 bool Is32Bit
= (MRI
->getType(LHS
.getReg()).getSizeInBits() == 32);
4470 static const unsigned OpcTable
[2] = {AArch64::ADCSXr
, AArch64::ADCSWr
};
4471 return emitInstr(OpcTable
[Is32Bit
], {Dst
}, {LHS
, RHS
}, MIRBuilder
);
4475 AArch64InstructionSelector::emitSBCS(Register Dst
, MachineOperand
&LHS
,
4476 MachineOperand
&RHS
,
4477 MachineIRBuilder
&MIRBuilder
) const {
4478 assert(LHS
.isReg() && RHS
.isReg() && "Expected register operands?");
4479 MachineRegisterInfo
*MRI
= MIRBuilder
.getMRI();
4480 bool Is32Bit
= (MRI
->getType(LHS
.getReg()).getSizeInBits() == 32);
4481 static const unsigned OpcTable
[2] = {AArch64::SBCSXr
, AArch64::SBCSWr
};
4482 return emitInstr(OpcTable
[Is32Bit
], {Dst
}, {LHS
, RHS
}, MIRBuilder
);
4486 AArch64InstructionSelector::emitCMN(MachineOperand
&LHS
, MachineOperand
&RHS
,
4487 MachineIRBuilder
&MIRBuilder
) const {
4488 MachineRegisterInfo
&MRI
= MIRBuilder
.getMF().getRegInfo();
4489 bool Is32Bit
= (MRI
.getType(LHS
.getReg()).getSizeInBits() == 32);
4490 auto RC
= Is32Bit
? &AArch64::GPR32RegClass
: &AArch64::GPR64RegClass
;
4491 return emitADDS(MRI
.createVirtualRegister(RC
), LHS
, RHS
, MIRBuilder
);
4495 AArch64InstructionSelector::emitTST(MachineOperand
&LHS
, MachineOperand
&RHS
,
4496 MachineIRBuilder
&MIRBuilder
) const {
4497 assert(LHS
.isReg() && RHS
.isReg() && "Expected register operands?");
4498 MachineRegisterInfo
&MRI
= MIRBuilder
.getMF().getRegInfo();
4499 LLT Ty
= MRI
.getType(LHS
.getReg());
4500 unsigned RegSize
= Ty
.getSizeInBits();
4501 bool Is32Bit
= (RegSize
== 32);
4502 const unsigned OpcTable
[3][2] = {{AArch64::ANDSXri
, AArch64::ANDSWri
},
4503 {AArch64::ANDSXrs
, AArch64::ANDSWrs
},
4504 {AArch64::ANDSXrr
, AArch64::ANDSWrr
}};
4505 // ANDS needs a logical immediate for its immediate form. Check if we can
4507 if (auto ValAndVReg
= getIConstantVRegValWithLookThrough(RHS
.getReg(), MRI
)) {
4508 int64_t Imm
= ValAndVReg
->Value
.getSExtValue();
4510 if (AArch64_AM::isLogicalImmediate(Imm
, RegSize
)) {
4511 auto TstMI
= MIRBuilder
.buildInstr(OpcTable
[0][Is32Bit
], {Ty
}, {LHS
});
4512 TstMI
.addImm(AArch64_AM::encodeLogicalImmediate(Imm
, RegSize
));
4513 constrainSelectedInstRegOperands(*TstMI
, TII
, TRI
, RBI
);
4518 if (auto Fns
= selectLogicalShiftedRegister(RHS
))
4519 return emitInstr(OpcTable
[1][Is32Bit
], {Ty
}, {LHS
}, MIRBuilder
, Fns
);
4520 return emitInstr(OpcTable
[2][Is32Bit
], {Ty
}, {LHS
, RHS
}, MIRBuilder
);
4523 MachineInstr
*AArch64InstructionSelector::emitIntegerCompare(
4524 MachineOperand
&LHS
, MachineOperand
&RHS
, MachineOperand
&Predicate
,
4525 MachineIRBuilder
&MIRBuilder
) const {
4526 assert(LHS
.isReg() && RHS
.isReg() && "Expected LHS and RHS to be registers!");
4527 assert(Predicate
.isPredicate() && "Expected predicate?");
4528 MachineRegisterInfo
&MRI
= MIRBuilder
.getMF().getRegInfo();
4529 LLT CmpTy
= MRI
.getType(LHS
.getReg());
4530 assert(!CmpTy
.isVector() && "Expected scalar or pointer");
4531 unsigned Size
= CmpTy
.getSizeInBits();
4533 assert((Size
== 32 || Size
== 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4534 // Fold the compare into a cmn or tst if possible.
4535 if (auto FoldCmp
= tryFoldIntegerCompare(LHS
, RHS
, Predicate
, MIRBuilder
))
4537 auto Dst
= MRI
.cloneVirtualRegister(LHS
.getReg());
4538 return emitSUBS(Dst
, LHS
, RHS
, MIRBuilder
);
4541 MachineInstr
*AArch64InstructionSelector::emitCSetForFCmp(
4542 Register Dst
, CmpInst::Predicate Pred
, MachineIRBuilder
&MIRBuilder
) const {
4543 MachineRegisterInfo
&MRI
= *MIRBuilder
.getMRI();
4545 LLT Ty
= MRI
.getType(Dst
);
4546 assert(!Ty
.isVector() && Ty
.getSizeInBits() == 32 &&
4547 "Expected a 32-bit scalar register?");
4549 const Register ZReg
= AArch64::WZR
;
4550 AArch64CC::CondCode CC1
, CC2
;
4551 changeFCMPPredToAArch64CC(Pred
, CC1
, CC2
);
4552 auto InvCC1
= AArch64CC::getInvertedCondCode(CC1
);
4553 if (CC2
== AArch64CC::AL
)
4554 return emitCSINC(/*Dst=*/Dst
, /*Src1=*/ZReg
, /*Src2=*/ZReg
, InvCC1
,
4556 const TargetRegisterClass
*RC
= &AArch64::GPR32RegClass
;
4557 Register Def1Reg
= MRI
.createVirtualRegister(RC
);
4558 Register Def2Reg
= MRI
.createVirtualRegister(RC
);
4559 auto InvCC2
= AArch64CC::getInvertedCondCode(CC2
);
4560 emitCSINC(/*Dst=*/Def1Reg
, /*Src1=*/ZReg
, /*Src2=*/ZReg
, InvCC1
, MIRBuilder
);
4561 emitCSINC(/*Dst=*/Def2Reg
, /*Src1=*/ZReg
, /*Src2=*/ZReg
, InvCC2
, MIRBuilder
);
4562 auto OrMI
= MIRBuilder
.buildInstr(AArch64::ORRWrr
, {Dst
}, {Def1Reg
, Def2Reg
});
4563 constrainSelectedInstRegOperands(*OrMI
, TII
, TRI
, RBI
);
4567 MachineInstr
*AArch64InstructionSelector::emitFPCompare(
4568 Register LHS
, Register RHS
, MachineIRBuilder
&MIRBuilder
,
4569 std::optional
<CmpInst::Predicate
> Pred
) const {
4570 MachineRegisterInfo
&MRI
= *MIRBuilder
.getMRI();
4571 LLT Ty
= MRI
.getType(LHS
);
4574 unsigned OpSize
= Ty
.getSizeInBits();
4575 assert(OpSize
== 16 || OpSize
== 32 || OpSize
== 64);
4577 // If this is a compare against +0.0, then we don't have
4578 // to explicitly materialize a constant.
4579 const ConstantFP
*FPImm
= getConstantFPVRegVal(RHS
, MRI
);
4580 bool ShouldUseImm
= FPImm
&& (FPImm
->isZero() && !FPImm
->isNegative());
4582 auto IsEqualityPred
= [](CmpInst::Predicate P
) {
4583 return P
== CmpInst::FCMP_OEQ
|| P
== CmpInst::FCMP_ONE
||
4584 P
== CmpInst::FCMP_UEQ
|| P
== CmpInst::FCMP_UNE
;
4586 if (!ShouldUseImm
&& Pred
&& IsEqualityPred(*Pred
)) {
4587 // Try commutating the operands.
4588 const ConstantFP
*LHSImm
= getConstantFPVRegVal(LHS
, MRI
);
4589 if (LHSImm
&& (LHSImm
->isZero() && !LHSImm
->isNegative())) {
4590 ShouldUseImm
= true;
4591 std::swap(LHS
, RHS
);
4594 unsigned CmpOpcTbl
[2][3] = {
4595 {AArch64::FCMPHrr
, AArch64::FCMPSrr
, AArch64::FCMPDrr
},
4596 {AArch64::FCMPHri
, AArch64::FCMPSri
, AArch64::FCMPDri
}};
4598 CmpOpcTbl
[ShouldUseImm
][OpSize
== 16 ? 0 : (OpSize
== 32 ? 1 : 2)];
4600 // Partially build the compare. Decide if we need to add a use for the
4601 // third operand based off whether or not we're comparing against 0.0.
4602 auto CmpMI
= MIRBuilder
.buildInstr(CmpOpc
).addUse(LHS
);
4603 CmpMI
.setMIFlags(MachineInstr::NoFPExcept
);
4606 constrainSelectedInstRegOperands(*CmpMI
, TII
, TRI
, RBI
);
4610 MachineInstr
*AArch64InstructionSelector::emitVectorConcat(
4611 std::optional
<Register
> Dst
, Register Op1
, Register Op2
,
4612 MachineIRBuilder
&MIRBuilder
) const {
4613 // We implement a vector concat by:
4614 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4615 // 2. Insert the upper vector into the destination's upper element
4616 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4617 MachineRegisterInfo
&MRI
= MIRBuilder
.getMF().getRegInfo();
4619 const LLT Op1Ty
= MRI
.getType(Op1
);
4620 const LLT Op2Ty
= MRI
.getType(Op2
);
4622 if (Op1Ty
!= Op2Ty
) {
4623 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4626 assert(Op1Ty
.isVector() && "Expected a vector for vector concat");
4628 if (Op1Ty
.getSizeInBits() >= 128) {
4629 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4633 // At the moment we just support 64 bit vector concats.
4634 if (Op1Ty
.getSizeInBits() != 64) {
4635 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4639 const LLT ScalarTy
= LLT::scalar(Op1Ty
.getSizeInBits());
4640 const RegisterBank
&FPRBank
= *RBI
.getRegBank(Op1
, MRI
, TRI
);
4641 const TargetRegisterClass
*DstRC
=
4642 getRegClassForTypeOnBank(Op1Ty
.multiplyElements(2), FPRBank
);
4644 MachineInstr
*WidenedOp1
=
4645 emitScalarToVector(ScalarTy
.getSizeInBits(), DstRC
, Op1
, MIRBuilder
);
4646 MachineInstr
*WidenedOp2
=
4647 emitScalarToVector(ScalarTy
.getSizeInBits(), DstRC
, Op2
, MIRBuilder
);
4648 if (!WidenedOp1
|| !WidenedOp2
) {
4649 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4653 // Now do the insert of the upper element.
4654 unsigned InsertOpc
, InsSubRegIdx
;
4655 std::tie(InsertOpc
, InsSubRegIdx
) =
4656 getInsertVecEltOpInfo(FPRBank
, ScalarTy
.getSizeInBits());
4659 Dst
= MRI
.createVirtualRegister(DstRC
);
4662 .buildInstr(InsertOpc
, {*Dst
}, {WidenedOp1
->getOperand(0).getReg()})
4663 .addImm(1) /* Lane index */
4664 .addUse(WidenedOp2
->getOperand(0).getReg())
4666 constrainSelectedInstRegOperands(*InsElt
, TII
, TRI
, RBI
);
4671 AArch64InstructionSelector::emitCSINC(Register Dst
, Register Src1
,
4672 Register Src2
, AArch64CC::CondCode Pred
,
4673 MachineIRBuilder
&MIRBuilder
) const {
4674 auto &MRI
= *MIRBuilder
.getMRI();
4675 const RegClassOrRegBank
&RegClassOrBank
= MRI
.getRegClassOrRegBank(Dst
);
4676 // If we used a register class, then this won't necessarily have an LLT.
4677 // Compute the size based off whether or not we have a class or bank.
4679 if (const auto *RC
= dyn_cast
<const TargetRegisterClass
*>(RegClassOrBank
))
4680 Size
= TRI
.getRegSizeInBits(*RC
);
4682 Size
= MRI
.getType(Dst
).getSizeInBits();
4683 // Some opcodes use s1.
4684 assert(Size
<= 64 && "Expected 64 bits or less only!");
4685 static const unsigned OpcTable
[2] = {AArch64::CSINCWr
, AArch64::CSINCXr
};
4686 unsigned Opc
= OpcTable
[Size
== 64];
4687 auto CSINC
= MIRBuilder
.buildInstr(Opc
, {Dst
}, {Src1
, Src2
}).addImm(Pred
);
4688 constrainSelectedInstRegOperands(*CSINC
, TII
, TRI
, RBI
);
4692 MachineInstr
*AArch64InstructionSelector::emitCarryIn(MachineInstr
&I
,
4693 Register CarryReg
) {
4694 MachineRegisterInfo
*MRI
= MIB
.getMRI();
4695 unsigned Opcode
= I
.getOpcode();
4697 // If the instruction is a SUB, we need to negate the carry,
4698 // because borrowing is indicated by carry-flag == 0.
4699 bool NeedsNegatedCarry
=
4700 (Opcode
== TargetOpcode::G_USUBE
|| Opcode
== TargetOpcode::G_SSUBE
);
4702 // If the previous instruction will already produce the correct carry, do not
4703 // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
4704 // generated during legalization of wide add/sub. This optimization depends on
4705 // these sequences not being interrupted by other instructions.
4706 // We have to select the previous instruction before the carry-using
4707 // instruction is deleted by the calling function, otherwise the previous
4708 // instruction might become dead and would get deleted.
4709 MachineInstr
*SrcMI
= MRI
->getVRegDef(CarryReg
);
4710 if (SrcMI
== I
.getPrevNode()) {
4711 if (auto *CarrySrcMI
= dyn_cast
<GAddSubCarryOut
>(SrcMI
)) {
4712 bool ProducesNegatedCarry
= CarrySrcMI
->isSub();
4713 if (NeedsNegatedCarry
== ProducesNegatedCarry
&&
4714 CarrySrcMI
->isUnsigned() &&
4715 CarrySrcMI
->getCarryOutReg() == CarryReg
&&
4716 selectAndRestoreState(*SrcMI
))
4721 Register DeadReg
= MRI
->createVirtualRegister(&AArch64::GPR32RegClass
);
4723 if (NeedsNegatedCarry
) {
4724 // (0 - Carry) sets !C in NZCV when Carry == 1
4725 Register ZReg
= AArch64::WZR
;
4726 return emitInstr(AArch64::SUBSWrr
, {DeadReg
}, {ZReg
, CarryReg
}, MIB
);
4729 // (Carry - 1) sets !C in NZCV when Carry == 0
4730 auto Fns
= select12BitValueWithLeftShift(1);
4731 return emitInstr(AArch64::SUBSWri
, {DeadReg
}, {CarryReg
}, MIB
, Fns
);
4734 bool AArch64InstructionSelector::selectOverflowOp(MachineInstr
&I
,
4735 MachineRegisterInfo
&MRI
) {
4736 auto &CarryMI
= cast
<GAddSubCarryOut
>(I
);
4738 if (auto *CarryInMI
= dyn_cast
<GAddSubCarryInOut
>(&I
)) {
4739 // Set NZCV carry according to carry-in VReg
4740 emitCarryIn(I
, CarryInMI
->getCarryInReg());
4743 // Emit the operation and get the correct condition code.
4744 auto OpAndCC
= emitOverflowOp(I
.getOpcode(), CarryMI
.getDstReg(),
4745 CarryMI
.getLHS(), CarryMI
.getRHS(), MIB
);
4747 Register CarryOutReg
= CarryMI
.getCarryOutReg();
4749 // Don't convert carry-out to VReg if it is never used
4750 if (!MRI
.use_nodbg_empty(CarryOutReg
)) {
4751 // Now, put the overflow result in the register given by the first operand
4752 // to the overflow op. CSINC increments the result when the predicate is
4753 // false, so to get the increment when it's true, we need to use the
4754 // inverse. In this case, we want to increment when carry is set.
4755 Register ZReg
= AArch64::WZR
;
4756 emitCSINC(/*Dst=*/CarryOutReg
, /*Src1=*/ZReg
, /*Src2=*/ZReg
,
4757 getInvertedCondCode(OpAndCC
.second
), MIB
);
4760 I
.eraseFromParent();
4764 std::pair
<MachineInstr
*, AArch64CC::CondCode
>
4765 AArch64InstructionSelector::emitOverflowOp(unsigned Opcode
, Register Dst
,
4766 MachineOperand
&LHS
,
4767 MachineOperand
&RHS
,
4768 MachineIRBuilder
&MIRBuilder
) const {
4771 llvm_unreachable("Unexpected opcode!");
4772 case TargetOpcode::G_SADDO
:
4773 return std::make_pair(emitADDS(Dst
, LHS
, RHS
, MIRBuilder
), AArch64CC::VS
);
4774 case TargetOpcode::G_UADDO
:
4775 return std::make_pair(emitADDS(Dst
, LHS
, RHS
, MIRBuilder
), AArch64CC::HS
);
4776 case TargetOpcode::G_SSUBO
:
4777 return std::make_pair(emitSUBS(Dst
, LHS
, RHS
, MIRBuilder
), AArch64CC::VS
);
4778 case TargetOpcode::G_USUBO
:
4779 return std::make_pair(emitSUBS(Dst
, LHS
, RHS
, MIRBuilder
), AArch64CC::LO
);
4780 case TargetOpcode::G_SADDE
:
4781 return std::make_pair(emitADCS(Dst
, LHS
, RHS
, MIRBuilder
), AArch64CC::VS
);
4782 case TargetOpcode::G_UADDE
:
4783 return std::make_pair(emitADCS(Dst
, LHS
, RHS
, MIRBuilder
), AArch64CC::HS
);
4784 case TargetOpcode::G_SSUBE
:
4785 return std::make_pair(emitSBCS(Dst
, LHS
, RHS
, MIRBuilder
), AArch64CC::VS
);
4786 case TargetOpcode::G_USUBE
:
4787 return std::make_pair(emitSBCS(Dst
, LHS
, RHS
, MIRBuilder
), AArch64CC::LO
);
4791 /// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4792 /// expressed as a conjunction.
4793 /// \param CanNegate Set to true if we can negate the whole sub-tree just by
4794 /// changing the conditions on the CMP tests.
4795 /// (this means we can call emitConjunctionRec() with
4796 /// Negate==true on this sub-tree)
4797 /// \param MustBeFirst Set to true if this subtree needs to be negated and we
4798 /// cannot do the negation naturally. We are required to
4799 /// emit the subtree first in this case.
4800 /// \param WillNegate Is true if are called when the result of this
4801 /// subexpression must be negated. This happens when the
4802 /// outer expression is an OR. We can use this fact to know
4803 /// that we have a double negation (or (or ...) ...) that
4804 /// can be implemented for free.
4805 static bool canEmitConjunction(Register Val
, bool &CanNegate
, bool &MustBeFirst
,
4806 bool WillNegate
, MachineRegisterInfo
&MRI
,
4807 unsigned Depth
= 0) {
4808 if (!MRI
.hasOneNonDBGUse(Val
))
4810 MachineInstr
*ValDef
= MRI
.getVRegDef(Val
);
4811 unsigned Opcode
= ValDef
->getOpcode();
4812 if (isa
<GAnyCmp
>(ValDef
)) {
4814 MustBeFirst
= false;
4817 // Protect against exponential runtime and stack overflow.
4820 if (Opcode
== TargetOpcode::G_AND
|| Opcode
== TargetOpcode::G_OR
) {
4821 bool IsOR
= Opcode
== TargetOpcode::G_OR
;
4822 Register O0
= ValDef
->getOperand(1).getReg();
4823 Register O1
= ValDef
->getOperand(2).getReg();
4826 if (!canEmitConjunction(O0
, CanNegateL
, MustBeFirstL
, IsOR
, MRI
, Depth
+ 1))
4830 if (!canEmitConjunction(O1
, CanNegateR
, MustBeFirstR
, IsOR
, MRI
, Depth
+ 1))
4833 if (MustBeFirstL
&& MustBeFirstR
)
4837 // For an OR expression we need to be able to naturally negate at least
4838 // one side or we cannot do the transformation at all.
4839 if (!CanNegateL
&& !CanNegateR
)
4841 // If we the result of the OR will be negated and we can naturally negate
4842 // the leaves, then this sub-tree as a whole negates naturally.
4843 CanNegate
= WillNegate
&& CanNegateL
&& CanNegateR
;
4844 // If we cannot naturally negate the whole sub-tree, then this must be
4846 MustBeFirst
= !CanNegate
;
4848 assert(Opcode
== TargetOpcode::G_AND
&& "Must be G_AND");
4849 // We cannot naturally negate an AND operation.
4851 MustBeFirst
= MustBeFirstL
|| MustBeFirstR
;
4858 MachineInstr
*AArch64InstructionSelector::emitConditionalComparison(
4859 Register LHS
, Register RHS
, CmpInst::Predicate CC
,
4860 AArch64CC::CondCode Predicate
, AArch64CC::CondCode OutCC
,
4861 MachineIRBuilder
&MIB
) const {
4862 auto &MRI
= *MIB
.getMRI();
4863 LLT OpTy
= MRI
.getType(LHS
);
4865 std::optional
<ValueAndVReg
> C
;
4866 if (CmpInst::isIntPredicate(CC
)) {
4867 assert(OpTy
.getSizeInBits() == 32 || OpTy
.getSizeInBits() == 64);
4868 C
= getIConstantVRegValWithLookThrough(RHS
, MRI
);
4869 if (!C
|| C
->Value
.sgt(31) || C
->Value
.slt(-31))
4870 CCmpOpc
= OpTy
.getSizeInBits() == 32 ? AArch64::CCMPWr
: AArch64::CCMPXr
;
4871 else if (C
->Value
.ule(31))
4872 CCmpOpc
= OpTy
.getSizeInBits() == 32 ? AArch64::CCMPWi
: AArch64::CCMPXi
;
4874 CCmpOpc
= OpTy
.getSizeInBits() == 32 ? AArch64::CCMNWi
: AArch64::CCMNXi
;
4876 assert(OpTy
.getSizeInBits() == 16 || OpTy
.getSizeInBits() == 32 ||
4877 OpTy
.getSizeInBits() == 64);
4878 switch (OpTy
.getSizeInBits()) {
4880 assert(STI
.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");
4881 CCmpOpc
= AArch64::FCCMPHrr
;
4884 CCmpOpc
= AArch64::FCCMPSrr
;
4887 CCmpOpc
= AArch64::FCCMPDrr
;
4893 AArch64CC::CondCode InvOutCC
= AArch64CC::getInvertedCondCode(OutCC
);
4894 unsigned NZCV
= AArch64CC::getNZCVToSatisfyCondCode(InvOutCC
);
4896 MIB
.buildInstr(CCmpOpc
, {}, {LHS
});
4897 if (CCmpOpc
== AArch64::CCMPWi
|| CCmpOpc
== AArch64::CCMPXi
)
4898 CCmp
.addImm(C
->Value
.getZExtValue());
4899 else if (CCmpOpc
== AArch64::CCMNWi
|| CCmpOpc
== AArch64::CCMNXi
)
4900 CCmp
.addImm(C
->Value
.abs().getZExtValue());
4903 CCmp
.addImm(NZCV
).addImm(Predicate
);
4904 constrainSelectedInstRegOperands(*CCmp
, TII
, TRI
, RBI
);
4908 MachineInstr
*AArch64InstructionSelector::emitConjunctionRec(
4909 Register Val
, AArch64CC::CondCode
&OutCC
, bool Negate
, Register CCOp
,
4910 AArch64CC::CondCode Predicate
, MachineIRBuilder
&MIB
) const {
4911 // We're at a tree leaf, produce a conditional comparison operation.
4912 auto &MRI
= *MIB
.getMRI();
4913 MachineInstr
*ValDef
= MRI
.getVRegDef(Val
);
4914 unsigned Opcode
= ValDef
->getOpcode();
4915 if (auto *Cmp
= dyn_cast
<GAnyCmp
>(ValDef
)) {
4916 Register LHS
= Cmp
->getLHSReg();
4917 Register RHS
= Cmp
->getRHSReg();
4918 CmpInst::Predicate CC
= Cmp
->getCond();
4920 CC
= CmpInst::getInversePredicate(CC
);
4921 if (isa
<GICmp
>(Cmp
)) {
4922 OutCC
= changeICMPPredToAArch64CC(CC
);
4924 // Handle special FP cases.
4925 AArch64CC::CondCode ExtraCC
;
4926 changeFPCCToANDAArch64CC(CC
, OutCC
, ExtraCC
);
4927 // Some floating point conditions can't be tested with a single condition
4928 // code. Construct an additional comparison in this case.
4929 if (ExtraCC
!= AArch64CC::AL
) {
4930 MachineInstr
*ExtraCmp
;
4932 ExtraCmp
= emitFPCompare(LHS
, RHS
, MIB
, CC
);
4935 emitConditionalComparison(LHS
, RHS
, CC
, Predicate
, ExtraCC
, MIB
);
4936 CCOp
= ExtraCmp
->getOperand(0).getReg();
4937 Predicate
= ExtraCC
;
4941 // Produce a normal comparison if we are first in the chain
4943 auto Dst
= MRI
.cloneVirtualRegister(LHS
);
4944 if (isa
<GICmp
>(Cmp
))
4945 return emitSUBS(Dst
, Cmp
->getOperand(2), Cmp
->getOperand(3), MIB
);
4946 return emitFPCompare(Cmp
->getOperand(2).getReg(),
4947 Cmp
->getOperand(3).getReg(), MIB
);
4949 // Otherwise produce a ccmp.
4950 return emitConditionalComparison(LHS
, RHS
, CC
, Predicate
, OutCC
, MIB
);
4952 assert(MRI
.hasOneNonDBGUse(Val
) && "Valid conjunction/disjunction tree");
4954 bool IsOR
= Opcode
== TargetOpcode::G_OR
;
4956 Register LHS
= ValDef
->getOperand(1).getReg();
4959 bool ValidL
= canEmitConjunction(LHS
, CanNegateL
, MustBeFirstL
, IsOR
, MRI
);
4960 assert(ValidL
&& "Valid conjunction/disjunction tree");
4963 Register RHS
= ValDef
->getOperand(2).getReg();
4966 bool ValidR
= canEmitConjunction(RHS
, CanNegateR
, MustBeFirstR
, IsOR
, MRI
);
4967 assert(ValidR
&& "Valid conjunction/disjunction tree");
4970 // Swap sub-tree that must come first to the right side.
4972 assert(!MustBeFirstR
&& "Valid conjunction/disjunction tree");
4973 std::swap(LHS
, RHS
);
4974 std::swap(CanNegateL
, CanNegateR
);
4975 std::swap(MustBeFirstL
, MustBeFirstR
);
4981 bool NegateAfterAll
;
4982 if (Opcode
== TargetOpcode::G_OR
) {
4983 // Swap the sub-tree that we can negate naturally to the left.
4985 assert(CanNegateR
&& "at least one side must be negatable");
4986 assert(!MustBeFirstR
&& "invalid conjunction/disjunction tree");
4988 std::swap(LHS
, RHS
);
4990 NegateAfterR
= true;
4992 // Negate the left sub-tree if possible, otherwise negate the result.
4993 NegateR
= CanNegateR
;
4994 NegateAfterR
= !CanNegateR
;
4997 NegateAfterAll
= !Negate
;
4999 assert(Opcode
== TargetOpcode::G_AND
&&
5000 "Valid conjunction/disjunction tree");
5001 assert(!Negate
&& "Valid conjunction/disjunction tree");
5005 NegateAfterR
= false;
5006 NegateAfterAll
= false;
5010 AArch64CC::CondCode RHSCC
;
5011 MachineInstr
*CmpR
=
5012 emitConjunctionRec(RHS
, RHSCC
, NegateR
, CCOp
, Predicate
, MIB
);
5014 RHSCC
= AArch64CC::getInvertedCondCode(RHSCC
);
5015 MachineInstr
*CmpL
= emitConjunctionRec(
5016 LHS
, OutCC
, NegateL
, CmpR
->getOperand(0).getReg(), RHSCC
, MIB
);
5018 OutCC
= AArch64CC::getInvertedCondCode(OutCC
);
5022 MachineInstr
*AArch64InstructionSelector::emitConjunction(
5023 Register Val
, AArch64CC::CondCode
&OutCC
, MachineIRBuilder
&MIB
) const {
5024 bool DummyCanNegate
;
5025 bool DummyMustBeFirst
;
5026 if (!canEmitConjunction(Val
, DummyCanNegate
, DummyMustBeFirst
, false,
5029 return emitConjunctionRec(Val
, OutCC
, false, Register(), AArch64CC::AL
, MIB
);
5032 bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect
&SelI
,
5033 MachineInstr
&CondMI
) {
5034 AArch64CC::CondCode AArch64CC
;
5035 MachineInstr
*ConjMI
= emitConjunction(SelI
.getCondReg(), AArch64CC
, MIB
);
5039 emitSelect(SelI
.getReg(0), SelI
.getTrueReg(), SelI
.getFalseReg(), AArch64CC
, MIB
);
5040 SelI
.eraseFromParent();
5044 bool AArch64InstructionSelector::tryOptSelect(GSelect
&I
) {
5045 MachineRegisterInfo
&MRI
= *MIB
.getMRI();
5046 // We want to recognize this pattern:
5048 // $z = G_FCMP pred, $x, $y
5050 // $w = G_SELECT $z, $a, $b
5052 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
5053 // some copies/truncs in between.)
5055 // If we see this, then we can emit something like this:
5058 // fcsel $w, $a, $b, pred
5060 // Rather than emitting both of the rather long sequences in the standard
5061 // G_FCMP/G_SELECT select methods.
5063 // First, check if the condition is defined by a compare.
5064 MachineInstr
*CondDef
= MRI
.getVRegDef(I
.getOperand(1).getReg());
5066 // We can only fold if all of the defs have one use.
5067 Register CondDefReg
= CondDef
->getOperand(0).getReg();
5068 if (!MRI
.hasOneNonDBGUse(CondDefReg
)) {
5069 // Unless it's another select.
5070 for (const MachineInstr
&UI
: MRI
.use_nodbg_instructions(CondDefReg
)) {
5073 if (UI
.getOpcode() != TargetOpcode::G_SELECT
)
5078 // Is the condition defined by a compare?
5079 unsigned CondOpc
= CondDef
->getOpcode();
5080 if (CondOpc
!= TargetOpcode::G_ICMP
&& CondOpc
!= TargetOpcode::G_FCMP
) {
5081 if (tryOptSelectConjunction(I
, *CondDef
))
5086 AArch64CC::CondCode CondCode
;
5087 if (CondOpc
== TargetOpcode::G_ICMP
) {
5089 static_cast<CmpInst::Predicate
>(CondDef
->getOperand(1).getPredicate());
5090 CondCode
= changeICMPPredToAArch64CC(Pred
);
5091 emitIntegerCompare(CondDef
->getOperand(2), CondDef
->getOperand(3),
5092 CondDef
->getOperand(1), MIB
);
5094 // Get the condition code for the select.
5096 static_cast<CmpInst::Predicate
>(CondDef
->getOperand(1).getPredicate());
5097 AArch64CC::CondCode CondCode2
;
5098 changeFCMPPredToAArch64CC(Pred
, CondCode
, CondCode2
);
5100 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
5101 // instructions to emit the comparison.
5102 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
5104 if (CondCode2
!= AArch64CC::AL
)
5107 if (!emitFPCompare(CondDef
->getOperand(2).getReg(),
5108 CondDef
->getOperand(3).getReg(), MIB
)) {
5109 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
5115 emitSelect(I
.getOperand(0).getReg(), I
.getOperand(2).getReg(),
5116 I
.getOperand(3).getReg(), CondCode
, MIB
);
5117 I
.eraseFromParent();
5121 MachineInstr
*AArch64InstructionSelector::tryFoldIntegerCompare(
5122 MachineOperand
&LHS
, MachineOperand
&RHS
, MachineOperand
&Predicate
,
5123 MachineIRBuilder
&MIRBuilder
) const {
5124 assert(LHS
.isReg() && RHS
.isReg() && Predicate
.isPredicate() &&
5125 "Unexpected MachineOperand");
5126 MachineRegisterInfo
&MRI
= *MIRBuilder
.getMRI();
5127 // We want to find this sort of thing:
5131 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5136 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
5137 MachineInstr
*LHSDef
= getDefIgnoringCopies(LHS
.getReg(), MRI
);
5138 MachineInstr
*RHSDef
= getDefIgnoringCopies(RHS
.getReg(), MRI
);
5139 auto P
= static_cast<CmpInst::Predicate
>(Predicate
.getPredicate());
5148 if (isCMN(LHSDef
, P
, MRI
))
5149 return emitCMN(LHSDef
->getOperand(2), RHS
, MIRBuilder
);
5151 // Same idea here, but with the RHS of the compare instead:
5161 if (isCMN(RHSDef
, P
, MRI
))
5162 return emitCMN(LHS
, RHSDef
->getOperand(2), MIRBuilder
);
5169 // Produce this if the compare is signed:
5172 if (!CmpInst::isUnsigned(P
) && LHSDef
&&
5173 LHSDef
->getOpcode() == TargetOpcode::G_AND
) {
5174 // Make sure that the RHS is 0.
5175 auto ValAndVReg
= getIConstantVRegValWithLookThrough(RHS
.getReg(), MRI
);
5176 if (!ValAndVReg
|| ValAndVReg
->Value
!= 0)
5179 return emitTST(LHSDef
->getOperand(1),
5180 LHSDef
->getOperand(2), MIRBuilder
);
5186 bool AArch64InstructionSelector::selectShuffleVector(
5187 MachineInstr
&I
, MachineRegisterInfo
&MRI
) {
5188 const LLT DstTy
= MRI
.getType(I
.getOperand(0).getReg());
5189 Register Src1Reg
= I
.getOperand(1).getReg();
5190 const LLT Src1Ty
= MRI
.getType(Src1Reg
);
5191 Register Src2Reg
= I
.getOperand(2).getReg();
5192 const LLT Src2Ty
= MRI
.getType(Src2Reg
);
5193 ArrayRef
<int> Mask
= I
.getOperand(3).getShuffleMask();
5195 MachineBasicBlock
&MBB
= *I
.getParent();
5196 MachineFunction
&MF
= *MBB
.getParent();
5197 LLVMContext
&Ctx
= MF
.getFunction().getContext();
5199 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
5200 // it's originated from a <1 x T> type. Those should have been lowered into
5201 // G_BUILD_VECTOR earlier.
5202 if (!Src1Ty
.isVector() || !Src2Ty
.isVector()) {
5203 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
5207 unsigned BytesPerElt
= DstTy
.getElementType().getSizeInBits() / 8;
5209 SmallVector
<Constant
*, 64> CstIdxs
;
5210 for (int Val
: Mask
) {
5211 // For now, any undef indexes we'll just assume to be 0. This should be
5212 // optimized in future, e.g. to select DUP etc.
5213 Val
= Val
< 0 ? 0 : Val
;
5214 for (unsigned Byte
= 0; Byte
< BytesPerElt
; ++Byte
) {
5215 unsigned Offset
= Byte
+ Val
* BytesPerElt
;
5216 CstIdxs
.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx
), Offset
));
5220 // Use a constant pool to load the index vector for TBL.
5221 Constant
*CPVal
= ConstantVector::get(CstIdxs
);
5222 MachineInstr
*IndexLoad
= emitLoadFromConstantPool(CPVal
, MIB
);
5224 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
5228 if (DstTy
.getSizeInBits() != 128) {
5229 assert(DstTy
.getSizeInBits() == 64 && "Unexpected shuffle result ty");
5230 // This case can be done with TBL1.
5231 MachineInstr
*Concat
=
5232 emitVectorConcat(std::nullopt
, Src1Reg
, Src2Reg
, MIB
);
5234 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
5238 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5239 IndexLoad
= emitScalarToVector(64, &AArch64::FPR128RegClass
,
5240 IndexLoad
->getOperand(0).getReg(), MIB
);
5242 auto TBL1
= MIB
.buildInstr(
5243 AArch64::TBLv16i8One
, {&AArch64::FPR128RegClass
},
5244 {Concat
->getOperand(0).getReg(), IndexLoad
->getOperand(0).getReg()});
5245 constrainSelectedInstRegOperands(*TBL1
, TII
, TRI
, RBI
);
5248 MIB
.buildInstr(TargetOpcode::COPY
, {I
.getOperand(0).getReg()}, {})
5249 .addReg(TBL1
.getReg(0), 0, AArch64::dsub
);
5250 RBI
.constrainGenericRegister(Copy
.getReg(0), AArch64::FPR64RegClass
, MRI
);
5251 I
.eraseFromParent();
5255 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5256 // Q registers for regalloc.
5257 SmallVector
<Register
, 2> Regs
= {Src1Reg
, Src2Reg
};
5258 auto RegSeq
= createQTuple(Regs
, MIB
);
5259 auto TBL2
= MIB
.buildInstr(AArch64::TBLv16i8Two
, {I
.getOperand(0)},
5260 {RegSeq
, IndexLoad
->getOperand(0)});
5261 constrainSelectedInstRegOperands(*TBL2
, TII
, TRI
, RBI
);
5262 I
.eraseFromParent();
5266 MachineInstr
*AArch64InstructionSelector::emitLaneInsert(
5267 std::optional
<Register
> DstReg
, Register SrcReg
, Register EltReg
,
5268 unsigned LaneIdx
, const RegisterBank
&RB
,
5269 MachineIRBuilder
&MIRBuilder
) const {
5270 MachineInstr
*InsElt
= nullptr;
5271 const TargetRegisterClass
*DstRC
= &AArch64::FPR128RegClass
;
5272 MachineRegisterInfo
&MRI
= *MIRBuilder
.getMRI();
5274 // Create a register to define with the insert if one wasn't passed in.
5276 DstReg
= MRI
.createVirtualRegister(DstRC
);
5278 unsigned EltSize
= MRI
.getType(EltReg
).getSizeInBits();
5279 unsigned Opc
= getInsertVecEltOpInfo(RB
, EltSize
).first
;
5281 if (RB
.getID() == AArch64::FPRRegBankID
) {
5282 auto InsSub
= emitScalarToVector(EltSize
, DstRC
, EltReg
, MIRBuilder
);
5283 InsElt
= MIRBuilder
.buildInstr(Opc
, {*DstReg
}, {SrcReg
})
5285 .addUse(InsSub
->getOperand(0).getReg())
5288 InsElt
= MIRBuilder
.buildInstr(Opc
, {*DstReg
}, {SrcReg
})
5293 constrainSelectedInstRegOperands(*InsElt
, TII
, TRI
, RBI
);
5297 bool AArch64InstructionSelector::selectUSMovFromExtend(
5298 MachineInstr
&MI
, MachineRegisterInfo
&MRI
) {
5299 if (MI
.getOpcode() != TargetOpcode::G_SEXT
&&
5300 MI
.getOpcode() != TargetOpcode::G_ZEXT
&&
5301 MI
.getOpcode() != TargetOpcode::G_ANYEXT
)
5303 bool IsSigned
= MI
.getOpcode() == TargetOpcode::G_SEXT
;
5304 const Register DefReg
= MI
.getOperand(0).getReg();
5305 const LLT DstTy
= MRI
.getType(DefReg
);
5306 unsigned DstSize
= DstTy
.getSizeInBits();
5308 if (DstSize
!= 32 && DstSize
!= 64)
5311 MachineInstr
*Extract
= getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT
,
5312 MI
.getOperand(1).getReg(), MRI
);
5314 if (!Extract
|| !mi_match(Extract
->getOperand(2).getReg(), MRI
, m_ICst(Lane
)))
5316 Register Src0
= Extract
->getOperand(1).getReg();
5318 const LLT VecTy
= MRI
.getType(Src0
);
5319 if (VecTy
.isScalableVector())
5322 if (VecTy
.getSizeInBits() != 128) {
5323 const MachineInstr
*ScalarToVector
= emitScalarToVector(
5324 VecTy
.getSizeInBits(), &AArch64::FPR128RegClass
, Src0
, MIB
);
5325 assert(ScalarToVector
&& "Didn't expect emitScalarToVector to fail!");
5326 Src0
= ScalarToVector
->getOperand(0).getReg();
5330 if (DstSize
== 64 && VecTy
.getScalarSizeInBits() == 32)
5331 Opcode
= IsSigned
? AArch64::SMOVvi32to64
: AArch64::UMOVvi32
;
5332 else if (DstSize
== 64 && VecTy
.getScalarSizeInBits() == 16)
5333 Opcode
= IsSigned
? AArch64::SMOVvi16to64
: AArch64::UMOVvi16
;
5334 else if (DstSize
== 64 && VecTy
.getScalarSizeInBits() == 8)
5335 Opcode
= IsSigned
? AArch64::SMOVvi8to64
: AArch64::UMOVvi8
;
5336 else if (DstSize
== 32 && VecTy
.getScalarSizeInBits() == 16)
5337 Opcode
= IsSigned
? AArch64::SMOVvi16to32
: AArch64::UMOVvi16
;
5338 else if (DstSize
== 32 && VecTy
.getScalarSizeInBits() == 8)
5339 Opcode
= IsSigned
? AArch64::SMOVvi8to32
: AArch64::UMOVvi8
;
5341 llvm_unreachable("Unexpected type combo for S/UMov!");
5343 // We may need to generate one of these, depending on the type and sign of the
5345 // DstReg = SMOV Src0, Lane;
5346 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5347 MachineInstr
*ExtI
= nullptr;
5348 if (DstSize
== 64 && !IsSigned
) {
5349 Register NewReg
= MRI
.createVirtualRegister(&AArch64::GPR32RegClass
);
5350 MIB
.buildInstr(Opcode
, {NewReg
}, {Src0
}).addImm(Lane
);
5351 ExtI
= MIB
.buildInstr(AArch64::SUBREG_TO_REG
, {DefReg
}, {})
5354 .addImm(AArch64::sub_32
);
5355 RBI
.constrainGenericRegister(DefReg
, AArch64::GPR64RegClass
, MRI
);
5357 ExtI
= MIB
.buildInstr(Opcode
, {DefReg
}, {Src0
}).addImm(Lane
);
5359 constrainSelectedInstRegOperands(*ExtI
, TII
, TRI
, RBI
);
5360 MI
.eraseFromParent();
5364 MachineInstr
*AArch64InstructionSelector::tryAdvSIMDModImm8(
5365 Register Dst
, unsigned DstSize
, APInt Bits
, MachineIRBuilder
&Builder
) {
5367 if (DstSize
== 128) {
5368 if (Bits
.getHiBits(64) != Bits
.getLoBits(64))
5370 Op
= AArch64::MOVIv16b_ns
;
5372 Op
= AArch64::MOVIv8b_ns
;
5375 uint64_t Val
= Bits
.zextOrTrunc(64).getZExtValue();
5377 if (AArch64_AM::isAdvSIMDModImmType9(Val
)) {
5378 Val
= AArch64_AM::encodeAdvSIMDModImmType9(Val
);
5379 auto Mov
= Builder
.buildInstr(Op
, {Dst
}, {}).addImm(Val
);
5380 constrainSelectedInstRegOperands(*Mov
, TII
, TRI
, RBI
);
5386 MachineInstr
*AArch64InstructionSelector::tryAdvSIMDModImm16(
5387 Register Dst
, unsigned DstSize
, APInt Bits
, MachineIRBuilder
&Builder
,
5391 if (DstSize
== 128) {
5392 if (Bits
.getHiBits(64) != Bits
.getLoBits(64))
5394 Op
= Inv
? AArch64::MVNIv8i16
: AArch64::MOVIv8i16
;
5396 Op
= Inv
? AArch64::MVNIv4i16
: AArch64::MOVIv4i16
;
5399 uint64_t Val
= Bits
.zextOrTrunc(64).getZExtValue();
5402 if (AArch64_AM::isAdvSIMDModImmType5(Val
)) {
5403 Val
= AArch64_AM::encodeAdvSIMDModImmType5(Val
);
5405 } else if (AArch64_AM::isAdvSIMDModImmType6(Val
)) {
5406 Val
= AArch64_AM::encodeAdvSIMDModImmType6(Val
);
5411 auto Mov
= Builder
.buildInstr(Op
, {Dst
}, {}).addImm(Val
).addImm(Shift
);
5412 constrainSelectedInstRegOperands(*Mov
, TII
, TRI
, RBI
);
5416 MachineInstr
*AArch64InstructionSelector::tryAdvSIMDModImm32(
5417 Register Dst
, unsigned DstSize
, APInt Bits
, MachineIRBuilder
&Builder
,
5421 if (DstSize
== 128) {
5422 if (Bits
.getHiBits(64) != Bits
.getLoBits(64))
5424 Op
= Inv
? AArch64::MVNIv4i32
: AArch64::MOVIv4i32
;
5426 Op
= Inv
? AArch64::MVNIv2i32
: AArch64::MOVIv2i32
;
5429 uint64_t Val
= Bits
.zextOrTrunc(64).getZExtValue();
5432 if ((AArch64_AM::isAdvSIMDModImmType1(Val
))) {
5433 Val
= AArch64_AM::encodeAdvSIMDModImmType1(Val
);
5435 } else if ((AArch64_AM::isAdvSIMDModImmType2(Val
))) {
5436 Val
= AArch64_AM::encodeAdvSIMDModImmType2(Val
);
5438 } else if ((AArch64_AM::isAdvSIMDModImmType3(Val
))) {
5439 Val
= AArch64_AM::encodeAdvSIMDModImmType3(Val
);
5441 } else if ((AArch64_AM::isAdvSIMDModImmType4(Val
))) {
5442 Val
= AArch64_AM::encodeAdvSIMDModImmType4(Val
);
5447 auto Mov
= Builder
.buildInstr(Op
, {Dst
}, {}).addImm(Val
).addImm(Shift
);
5448 constrainSelectedInstRegOperands(*Mov
, TII
, TRI
, RBI
);
5452 MachineInstr
*AArch64InstructionSelector::tryAdvSIMDModImm64(
5453 Register Dst
, unsigned DstSize
, APInt Bits
, MachineIRBuilder
&Builder
) {
5456 if (DstSize
== 128) {
5457 if (Bits
.getHiBits(64) != Bits
.getLoBits(64))
5459 Op
= AArch64::MOVIv2d_ns
;
5461 Op
= AArch64::MOVID
;
5464 uint64_t Val
= Bits
.zextOrTrunc(64).getZExtValue();
5465 if (AArch64_AM::isAdvSIMDModImmType10(Val
)) {
5466 Val
= AArch64_AM::encodeAdvSIMDModImmType10(Val
);
5467 auto Mov
= Builder
.buildInstr(Op
, {Dst
}, {}).addImm(Val
);
5468 constrainSelectedInstRegOperands(*Mov
, TII
, TRI
, RBI
);
5474 MachineInstr
*AArch64InstructionSelector::tryAdvSIMDModImm321s(
5475 Register Dst
, unsigned DstSize
, APInt Bits
, MachineIRBuilder
&Builder
,
5479 if (DstSize
== 128) {
5480 if (Bits
.getHiBits(64) != Bits
.getLoBits(64))
5482 Op
= Inv
? AArch64::MVNIv4s_msl
: AArch64::MOVIv4s_msl
;
5484 Op
= Inv
? AArch64::MVNIv2s_msl
: AArch64::MOVIv2s_msl
;
5487 uint64_t Val
= Bits
.zextOrTrunc(64).getZExtValue();
5490 if (AArch64_AM::isAdvSIMDModImmType7(Val
)) {
5491 Val
= AArch64_AM::encodeAdvSIMDModImmType7(Val
);
5493 } else if (AArch64_AM::isAdvSIMDModImmType8(Val
)) {
5494 Val
= AArch64_AM::encodeAdvSIMDModImmType8(Val
);
5499 auto Mov
= Builder
.buildInstr(Op
, {Dst
}, {}).addImm(Val
).addImm(Shift
);
5500 constrainSelectedInstRegOperands(*Mov
, TII
, TRI
, RBI
);
5504 MachineInstr
*AArch64InstructionSelector::tryAdvSIMDModImmFP(
5505 Register Dst
, unsigned DstSize
, APInt Bits
, MachineIRBuilder
&Builder
) {
5508 bool IsWide
= false;
5509 if (DstSize
== 128) {
5510 if (Bits
.getHiBits(64) != Bits
.getLoBits(64))
5512 Op
= AArch64::FMOVv4f32_ns
;
5515 Op
= AArch64::FMOVv2f32_ns
;
5518 uint64_t Val
= Bits
.zextOrTrunc(64).getZExtValue();
5520 if (AArch64_AM::isAdvSIMDModImmType11(Val
)) {
5521 Val
= AArch64_AM::encodeAdvSIMDModImmType11(Val
);
5522 } else if (IsWide
&& AArch64_AM::isAdvSIMDModImmType12(Val
)) {
5523 Val
= AArch64_AM::encodeAdvSIMDModImmType12(Val
);
5524 Op
= AArch64::FMOVv2f64_ns
;
5528 auto Mov
= Builder
.buildInstr(Op
, {Dst
}, {}).addImm(Val
);
5529 constrainSelectedInstRegOperands(*Mov
, TII
, TRI
, RBI
);
5533 bool AArch64InstructionSelector::selectIndexedExtLoad(
5534 MachineInstr
&MI
, MachineRegisterInfo
&MRI
) {
5535 auto &ExtLd
= cast
<GIndexedAnyExtLoad
>(MI
);
5536 Register Dst
= ExtLd
.getDstReg();
5537 Register WriteBack
= ExtLd
.getWritebackReg();
5538 Register Base
= ExtLd
.getBaseReg();
5539 Register Offset
= ExtLd
.getOffsetReg();
5540 LLT Ty
= MRI
.getType(Dst
);
5541 assert(Ty
.getSizeInBits() <= 64); // Only for scalar GPRs.
5542 unsigned MemSizeBits
= ExtLd
.getMMO().getMemoryType().getSizeInBits();
5543 bool IsPre
= ExtLd
.isPre();
5544 bool IsSExt
= isa
<GIndexedSExtLoad
>(ExtLd
);
5545 bool InsertIntoXReg
= false;
5546 bool IsDst64
= Ty
.getSizeInBits() == 64;
5550 LLT s32
= LLT::scalar(32);
5551 LLT s64
= LLT::scalar(64);
5553 if (MemSizeBits
== 8) {
5556 Opc
= IsPre
? AArch64::LDRSBXpre
: AArch64::LDRSBXpost
;
5558 Opc
= IsPre
? AArch64::LDRSBWpre
: AArch64::LDRSBWpost
;
5559 NewLdDstTy
= IsDst64
? s64
: s32
;
5561 Opc
= IsPre
? AArch64::LDRBBpre
: AArch64::LDRBBpost
;
5562 InsertIntoXReg
= IsDst64
;
5565 } else if (MemSizeBits
== 16) {
5568 Opc
= IsPre
? AArch64::LDRSHXpre
: AArch64::LDRSHXpost
;
5570 Opc
= IsPre
? AArch64::LDRSHWpre
: AArch64::LDRSHWpost
;
5571 NewLdDstTy
= IsDst64
? s64
: s32
;
5573 Opc
= IsPre
? AArch64::LDRHHpre
: AArch64::LDRHHpost
;
5574 InsertIntoXReg
= IsDst64
;
5577 } else if (MemSizeBits
== 32) {
5579 Opc
= IsPre
? AArch64::LDRSWpre
: AArch64::LDRSWpost
;
5582 Opc
= IsPre
? AArch64::LDRWpre
: AArch64::LDRWpost
;
5583 InsertIntoXReg
= IsDst64
;
5587 llvm_unreachable("Unexpected size for indexed load");
5590 if (RBI
.getRegBank(Dst
, MRI
, TRI
)->getID() == AArch64::FPRRegBankID
)
5591 return false; // We should be on gpr.
5593 auto Cst
= getIConstantVRegVal(Offset
, MRI
);
5595 return false; // Shouldn't happen, but just in case.
5597 auto LdMI
= MIB
.buildInstr(Opc
, {WriteBack
, NewLdDstTy
}, {Base
})
5598 .addImm(Cst
->getSExtValue());
5599 LdMI
.cloneMemRefs(ExtLd
);
5600 constrainSelectedInstRegOperands(*LdMI
, TII
, TRI
, RBI
);
5601 // Make sure to select the load with the MemTy as the dest type, and then
5602 // insert into X reg if needed.
5603 if (InsertIntoXReg
) {
5604 // Generate a SUBREG_TO_REG.
5605 auto SubToReg
= MIB
.buildInstr(TargetOpcode::SUBREG_TO_REG
, {Dst
}, {})
5607 .addUse(LdMI
.getReg(1))
5608 .addImm(AArch64::sub_32
);
5609 RBI
.constrainGenericRegister(SubToReg
.getReg(0), AArch64::GPR64RegClass
,
5612 auto Copy
= MIB
.buildCopy(Dst
, LdMI
.getReg(1));
5613 selectCopy(*Copy
, TII
, MRI
, TRI
, RBI
);
5615 MI
.eraseFromParent();
5620 bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr
&MI
,
5621 MachineRegisterInfo
&MRI
) {
5622 auto &Ld
= cast
<GIndexedLoad
>(MI
);
5623 Register Dst
= Ld
.getDstReg();
5624 Register WriteBack
= Ld
.getWritebackReg();
5625 Register Base
= Ld
.getBaseReg();
5626 Register Offset
= Ld
.getOffsetReg();
5627 assert(MRI
.getType(Dst
).getSizeInBits() <= 128 &&
5628 "Unexpected type for indexed load");
5629 unsigned MemSize
= Ld
.getMMO().getMemoryType().getSizeInBytes();
5631 if (MemSize
< MRI
.getType(Dst
).getSizeInBytes())
5632 return selectIndexedExtLoad(MI
, MRI
);
5636 static constexpr unsigned GPROpcodes
[] = {
5637 AArch64::LDRBBpre
, AArch64::LDRHHpre
, AArch64::LDRWpre
,
5639 static constexpr unsigned FPROpcodes
[] = {
5640 AArch64::LDRBpre
, AArch64::LDRHpre
, AArch64::LDRSpre
, AArch64::LDRDpre
,
5642 if (RBI
.getRegBank(Dst
, MRI
, TRI
)->getID() == AArch64::FPRRegBankID
)
5643 Opc
= FPROpcodes
[Log2_32(MemSize
)];
5645 Opc
= GPROpcodes
[Log2_32(MemSize
)];
5647 static constexpr unsigned GPROpcodes
[] = {
5648 AArch64::LDRBBpost
, AArch64::LDRHHpost
, AArch64::LDRWpost
,
5650 static constexpr unsigned FPROpcodes
[] = {
5651 AArch64::LDRBpost
, AArch64::LDRHpost
, AArch64::LDRSpost
,
5652 AArch64::LDRDpost
, AArch64::LDRQpost
};
5653 if (RBI
.getRegBank(Dst
, MRI
, TRI
)->getID() == AArch64::FPRRegBankID
)
5654 Opc
= FPROpcodes
[Log2_32(MemSize
)];
5656 Opc
= GPROpcodes
[Log2_32(MemSize
)];
5658 auto Cst
= getIConstantVRegVal(Offset
, MRI
);
5660 return false; // Shouldn't happen, but just in case.
5662 MIB
.buildInstr(Opc
, {WriteBack
, Dst
}, {Base
}).addImm(Cst
->getSExtValue());
5663 LdMI
.cloneMemRefs(Ld
);
5664 constrainSelectedInstRegOperands(*LdMI
, TII
, TRI
, RBI
);
5665 MI
.eraseFromParent();
5669 bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore
&I
,
5670 MachineRegisterInfo
&MRI
) {
5671 Register Dst
= I
.getWritebackReg();
5672 Register Val
= I
.getValueReg();
5673 Register Base
= I
.getBaseReg();
5674 Register Offset
= I
.getOffsetReg();
5675 LLT ValTy
= MRI
.getType(Val
);
5676 assert(ValTy
.getSizeInBits() <= 128 && "Unexpected type for indexed store");
5680 static constexpr unsigned GPROpcodes
[] = {
5681 AArch64::STRBBpre
, AArch64::STRHHpre
, AArch64::STRWpre
,
5683 static constexpr unsigned FPROpcodes
[] = {
5684 AArch64::STRBpre
, AArch64::STRHpre
, AArch64::STRSpre
, AArch64::STRDpre
,
5687 if (RBI
.getRegBank(Val
, MRI
, TRI
)->getID() == AArch64::FPRRegBankID
)
5688 Opc
= FPROpcodes
[Log2_32(ValTy
.getSizeInBytes())];
5690 Opc
= GPROpcodes
[Log2_32(ValTy
.getSizeInBytes())];
5692 static constexpr unsigned GPROpcodes
[] = {
5693 AArch64::STRBBpost
, AArch64::STRHHpost
, AArch64::STRWpost
,
5695 static constexpr unsigned FPROpcodes
[] = {
5696 AArch64::STRBpost
, AArch64::STRHpost
, AArch64::STRSpost
,
5697 AArch64::STRDpost
, AArch64::STRQpost
};
5699 if (RBI
.getRegBank(Val
, MRI
, TRI
)->getID() == AArch64::FPRRegBankID
)
5700 Opc
= FPROpcodes
[Log2_32(ValTy
.getSizeInBytes())];
5702 Opc
= GPROpcodes
[Log2_32(ValTy
.getSizeInBytes())];
5705 auto Cst
= getIConstantVRegVal(Offset
, MRI
);
5707 return false; // Shouldn't happen, but just in case.
5709 MIB
.buildInstr(Opc
, {Dst
}, {Val
, Base
}).addImm(Cst
->getSExtValue());
5710 Str
.cloneMemRefs(I
);
5711 constrainSelectedInstRegOperands(*Str
, TII
, TRI
, RBI
);
5712 I
.eraseFromParent();
5717 AArch64InstructionSelector::emitConstantVector(Register Dst
, Constant
*CV
,
5718 MachineIRBuilder
&MIRBuilder
,
5719 MachineRegisterInfo
&MRI
) {
5720 LLT DstTy
= MRI
.getType(Dst
);
5721 unsigned DstSize
= DstTy
.getSizeInBits();
5722 if (CV
->isNullValue()) {
5723 if (DstSize
== 128) {
5725 MIRBuilder
.buildInstr(AArch64::MOVIv2d_ns
, {Dst
}, {}).addImm(0);
5726 constrainSelectedInstRegOperands(*Mov
, TII
, TRI
, RBI
);
5730 if (DstSize
== 64) {
5733 .buildInstr(AArch64::MOVIv2d_ns
, {&AArch64::FPR128RegClass
}, {})
5735 auto Copy
= MIRBuilder
.buildInstr(TargetOpcode::COPY
, {Dst
}, {})
5736 .addReg(Mov
.getReg(0), 0, AArch64::dsub
);
5737 RBI
.constrainGenericRegister(Dst
, AArch64::FPR64RegClass
, MRI
);
5742 if (CV
->getSplatValue()) {
5743 APInt DefBits
= APInt::getSplat(
5744 DstSize
, CV
->getUniqueInteger().trunc(DstTy
.getScalarSizeInBits()));
5745 auto TryMOVIWithBits
= [&](APInt DefBits
) -> MachineInstr
* {
5746 MachineInstr
*NewOp
;
5748 if ((NewOp
= tryAdvSIMDModImm64(Dst
, DstSize
, DefBits
, MIRBuilder
)) ||
5750 tryAdvSIMDModImm32(Dst
, DstSize
, DefBits
, MIRBuilder
, Inv
)) ||
5752 tryAdvSIMDModImm321s(Dst
, DstSize
, DefBits
, MIRBuilder
, Inv
)) ||
5754 tryAdvSIMDModImm16(Dst
, DstSize
, DefBits
, MIRBuilder
, Inv
)) ||
5755 (NewOp
= tryAdvSIMDModImm8(Dst
, DstSize
, DefBits
, MIRBuilder
)) ||
5756 (NewOp
= tryAdvSIMDModImmFP(Dst
, DstSize
, DefBits
, MIRBuilder
)))
5762 tryAdvSIMDModImm32(Dst
, DstSize
, DefBits
, MIRBuilder
, Inv
)) ||
5764 tryAdvSIMDModImm321s(Dst
, DstSize
, DefBits
, MIRBuilder
, Inv
)) ||
5765 (NewOp
= tryAdvSIMDModImm16(Dst
, DstSize
, DefBits
, MIRBuilder
, Inv
)))
5770 if (auto *NewOp
= TryMOVIWithBits(DefBits
))
5773 // See if a fneg of the constant can be materialized with a MOVI, etc
5774 auto TryWithFNeg
= [&](APInt DefBits
, int NumBits
,
5775 unsigned NegOpc
) -> MachineInstr
* {
5776 // FNegate each sub-element of the constant
5777 APInt Neg
= APInt::getHighBitsSet(NumBits
, 1).zext(DstSize
);
5778 APInt
NegBits(DstSize
, 0);
5779 unsigned NumElts
= DstSize
/ NumBits
;
5780 for (unsigned i
= 0; i
< NumElts
; i
++)
5781 NegBits
|= Neg
<< (NumBits
* i
);
5782 NegBits
= DefBits
^ NegBits
;
5784 // Try to create the new constants with MOVI, and if so generate a fneg
5786 if (auto *NewOp
= TryMOVIWithBits(NegBits
)) {
5787 Register NewDst
= MRI
.createVirtualRegister(&AArch64::FPR128RegClass
);
5788 NewOp
->getOperand(0).setReg(NewDst
);
5789 return MIRBuilder
.buildInstr(NegOpc
, {Dst
}, {NewDst
});
5794 if ((R
= TryWithFNeg(DefBits
, 32, AArch64::FNEGv4f32
)) ||
5795 (R
= TryWithFNeg(DefBits
, 64, AArch64::FNEGv2f64
)) ||
5796 (STI
.hasFullFP16() &&
5797 (R
= TryWithFNeg(DefBits
, 16, AArch64::FNEGv8f16
))))
5801 auto *CPLoad
= emitLoadFromConstantPool(CV
, MIRBuilder
);
5803 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
5807 auto Copy
= MIRBuilder
.buildCopy(Dst
, CPLoad
->getOperand(0));
5808 RBI
.constrainGenericRegister(
5809 Dst
, *MRI
.getRegClass(CPLoad
->getOperand(0).getReg()), MRI
);
5813 bool AArch64InstructionSelector::tryOptConstantBuildVec(
5814 MachineInstr
&I
, LLT DstTy
, MachineRegisterInfo
&MRI
) {
5815 assert(I
.getOpcode() == TargetOpcode::G_BUILD_VECTOR
);
5816 unsigned DstSize
= DstTy
.getSizeInBits();
5817 assert(DstSize
<= 128 && "Unexpected build_vec type!");
5820 // Check if we're building a constant vector, in which case we want to
5821 // generate a constant pool load instead of a vector insert sequence.
5822 SmallVector
<Constant
*, 16> Csts
;
5823 for (unsigned Idx
= 1; Idx
< I
.getNumOperands(); ++Idx
) {
5824 // Try to find G_CONSTANT or G_FCONSTANT
5826 getOpcodeDef(TargetOpcode::G_CONSTANT
, I
.getOperand(Idx
).getReg(), MRI
);
5829 const_cast<ConstantInt
*>(OpMI
->getOperand(1).getCImm()));
5830 else if ((OpMI
= getOpcodeDef(TargetOpcode::G_FCONSTANT
,
5831 I
.getOperand(Idx
).getReg(), MRI
)))
5833 const_cast<ConstantFP
*>(OpMI
->getOperand(1).getFPImm()));
5837 Constant
*CV
= ConstantVector::get(Csts
);
5838 if (!emitConstantVector(I
.getOperand(0).getReg(), CV
, MIB
, MRI
))
5840 I
.eraseFromParent();
5844 bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5845 MachineInstr
&I
, MachineRegisterInfo
&MRI
) {
5847 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5849 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5850 Register Dst
= I
.getOperand(0).getReg();
5851 Register EltReg
= I
.getOperand(1).getReg();
5852 LLT EltTy
= MRI
.getType(EltReg
);
5853 // If the index isn't on the same bank as its elements, then this can't be a
5855 const RegisterBank
&EltRB
= *RBI
.getRegBank(EltReg
, MRI
, TRI
);
5856 const RegisterBank
&DstRB
= *RBI
.getRegBank(Dst
, MRI
, TRI
);
5859 if (any_of(drop_begin(I
.operands(), 2), [&MRI
](const MachineOperand
&Op
) {
5860 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF
, Op
.getReg(), MRI
);
5864 const TargetRegisterClass
*EltRC
= getRegClassForTypeOnBank(EltTy
, EltRB
);
5867 const TargetRegisterClass
*DstRC
=
5868 getRegClassForTypeOnBank(MRI
.getType(Dst
), DstRB
);
5871 if (!getSubRegForClass(EltRC
, TRI
, SubReg
))
5873 auto SubregToReg
= MIB
.buildInstr(AArch64::SUBREG_TO_REG
, {Dst
}, {})
5877 I
.eraseFromParent();
5878 constrainSelectedInstRegOperands(*SubregToReg
, TII
, TRI
, RBI
);
5879 return RBI
.constrainGenericRegister(Dst
, *DstRC
, MRI
);
5882 bool AArch64InstructionSelector::selectBuildVector(MachineInstr
&I
,
5883 MachineRegisterInfo
&MRI
) {
5884 assert(I
.getOpcode() == TargetOpcode::G_BUILD_VECTOR
);
5885 // Until we port more of the optimized selections, for now just use a vector
5887 const LLT DstTy
= MRI
.getType(I
.getOperand(0).getReg());
5888 const LLT EltTy
= MRI
.getType(I
.getOperand(1).getReg());
5889 unsigned EltSize
= EltTy
.getSizeInBits();
5891 if (tryOptConstantBuildVec(I
, DstTy
, MRI
))
5893 if (tryOptBuildVecToSubregToReg(I
, MRI
))
5896 if (EltSize
!= 8 && EltSize
!= 16 && EltSize
!= 32 && EltSize
!= 64)
5897 return false; // Don't support all element types yet.
5898 const RegisterBank
&RB
= *RBI
.getRegBank(I
.getOperand(1).getReg(), MRI
, TRI
);
5900 const TargetRegisterClass
*DstRC
= &AArch64::FPR128RegClass
;
5901 MachineInstr
*ScalarToVec
=
5902 emitScalarToVector(DstTy
.getElementType().getSizeInBits(), DstRC
,
5903 I
.getOperand(1).getReg(), MIB
);
5907 Register DstVec
= ScalarToVec
->getOperand(0).getReg();
5908 unsigned DstSize
= DstTy
.getSizeInBits();
5910 // Keep track of the last MI we inserted. Later on, we might be able to save
5912 MachineInstr
*PrevMI
= ScalarToVec
;
5913 for (unsigned i
= 2, e
= DstSize
/ EltSize
+ 1; i
< e
; ++i
) {
5914 // Note that if we don't do a subregister copy, we can end up making an
5916 Register OpReg
= I
.getOperand(i
).getReg();
5917 // Do not emit inserts for undefs
5918 if (!getOpcodeDef
<GImplicitDef
>(OpReg
, MRI
)) {
5919 PrevMI
= &*emitLaneInsert(std::nullopt
, DstVec
, OpReg
, i
- 1, RB
, MIB
);
5920 DstVec
= PrevMI
->getOperand(0).getReg();
5924 // If DstTy's size in bits is less than 128, then emit a subregister copy
5925 // from DstVec to the last register we've defined.
5926 if (DstSize
< 128) {
5927 // Force this to be FPR using the destination vector.
5928 const TargetRegisterClass
*RC
=
5929 getRegClassForTypeOnBank(DstTy
, *RBI
.getRegBank(DstVec
, MRI
, TRI
));
5932 if (RC
!= &AArch64::FPR32RegClass
&& RC
!= &AArch64::FPR64RegClass
) {
5933 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5937 unsigned SubReg
= 0;
5938 if (!getSubRegForClass(RC
, TRI
, SubReg
))
5940 if (SubReg
!= AArch64::ssub
&& SubReg
!= AArch64::dsub
) {
5941 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
5946 Register Reg
= MRI
.createVirtualRegister(RC
);
5947 Register DstReg
= I
.getOperand(0).getReg();
5949 MIB
.buildInstr(TargetOpcode::COPY
, {DstReg
}, {}).addReg(DstVec
, 0, SubReg
);
5950 MachineOperand
&RegOp
= I
.getOperand(1);
5952 RBI
.constrainGenericRegister(DstReg
, *RC
, MRI
);
5954 // We either have a vector with all elements (except the first one) undef or
5955 // at least one non-undef non-first element. In the first case, we need to
5956 // constrain the output register ourselves as we may have generated an
5957 // INSERT_SUBREG operation which is a generic operation for which the
5958 // output regclass cannot be automatically chosen.
5960 // In the second case, there is no need to do this as it may generate an
5961 // instruction like INSvi32gpr where the regclass can be automatically
5964 // Also, we save a copy by re-using the destination register on the final
5966 PrevMI
->getOperand(0).setReg(I
.getOperand(0).getReg());
5967 constrainSelectedInstRegOperands(*PrevMI
, TII
, TRI
, RBI
);
5969 Register DstReg
= PrevMI
->getOperand(0).getReg();
5970 if (PrevMI
== ScalarToVec
&& DstReg
.isVirtual()) {
5971 const TargetRegisterClass
*RC
=
5972 getRegClassForTypeOnBank(DstTy
, *RBI
.getRegBank(DstVec
, MRI
, TRI
));
5973 RBI
.constrainGenericRegister(DstReg
, *RC
, MRI
);
5977 I
.eraseFromParent();
5981 bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc
,
5984 assert(I
.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
);
5985 assert(Opc
&& "Expected an opcode?");
5986 assert(NumVecs
> 1 && NumVecs
< 5 && "Only support 2, 3, or 4 vectors");
5987 auto &MRI
= *MIB
.getMRI();
5988 LLT Ty
= MRI
.getType(I
.getOperand(0).getReg());
5989 unsigned Size
= Ty
.getSizeInBits();
5990 assert((Size
== 64 || Size
== 128) &&
5991 "Destination must be 64 bits or 128 bits?");
5992 unsigned SubReg
= Size
== 64 ? AArch64::dsub0
: AArch64::qsub0
;
5993 auto Ptr
= I
.getOperand(I
.getNumOperands() - 1).getReg();
5994 assert(MRI
.getType(Ptr
).isPointer() && "Expected a pointer type?");
5995 auto Load
= MIB
.buildInstr(Opc
, {Ty
}, {Ptr
});
5996 Load
.cloneMemRefs(I
);
5997 constrainSelectedInstRegOperands(*Load
, TII
, TRI
, RBI
);
5998 Register SelectedLoadDst
= Load
->getOperand(0).getReg();
5999 for (unsigned Idx
= 0; Idx
< NumVecs
; ++Idx
) {
6000 auto Vec
= MIB
.buildInstr(TargetOpcode::COPY
, {I
.getOperand(Idx
)}, {})
6001 .addReg(SelectedLoadDst
, 0, SubReg
+ Idx
);
6002 // Emit the subreg copies and immediately select them.
6003 // FIXME: We should refactor our copy code into an emitCopy helper and
6004 // clean up uses of this pattern elsewhere in the selector.
6005 selectCopy(*Vec
, TII
, MRI
, TRI
, RBI
);
6010 bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
6011 unsigned Opc
, unsigned NumVecs
, MachineInstr
&I
) {
6012 assert(I
.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
);
6013 assert(Opc
&& "Expected an opcode?");
6014 assert(NumVecs
> 1 && NumVecs
< 5 && "Only support 2, 3, or 4 vectors");
6015 auto &MRI
= *MIB
.getMRI();
6016 LLT Ty
= MRI
.getType(I
.getOperand(0).getReg());
6017 bool Narrow
= Ty
.getSizeInBits() == 64;
6019 auto FirstSrcRegIt
= I
.operands_begin() + NumVecs
+ 1;
6020 SmallVector
<Register
, 4> Regs(NumVecs
);
6021 std::transform(FirstSrcRegIt
, FirstSrcRegIt
+ NumVecs
, Regs
.begin(),
6022 [](auto MO
) { return MO
.getReg(); });
6025 transform(Regs
, Regs
.begin(), [this](Register Reg
) {
6026 return emitScalarToVector(64, &AArch64::FPR128RegClass
, Reg
, MIB
)
6030 Ty
= Ty
.multiplyElements(2);
6033 Register Tuple
= createQTuple(Regs
, MIB
);
6034 auto LaneNo
= getIConstantVRegVal((FirstSrcRegIt
+ NumVecs
)->getReg(), MRI
);
6038 Register Ptr
= (FirstSrcRegIt
+ NumVecs
+ 1)->getReg();
6039 auto Load
= MIB
.buildInstr(Opc
, {Ty
}, {})
6041 .addImm(LaneNo
->getZExtValue())
6043 Load
.cloneMemRefs(I
);
6044 constrainSelectedInstRegOperands(*Load
, TII
, TRI
, RBI
);
6045 Register SelectedLoadDst
= Load
->getOperand(0).getReg();
6046 unsigned SubReg
= AArch64::qsub0
;
6047 for (unsigned Idx
= 0; Idx
< NumVecs
; ++Idx
) {
6048 auto Vec
= MIB
.buildInstr(TargetOpcode::COPY
,
6049 {Narrow
? DstOp(&AArch64::FPR128RegClass
)
6050 : DstOp(I
.getOperand(Idx
).getReg())},
6052 .addReg(SelectedLoadDst
, 0, SubReg
+ Idx
);
6053 Register WideReg
= Vec
.getReg(0);
6054 // Emit the subreg copies and immediately select them.
6055 selectCopy(*Vec
, TII
, MRI
, TRI
, RBI
);
6057 !emitNarrowVector(I
.getOperand(Idx
).getReg(), WideReg
, MIB
, MRI
))
6063 void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr
&I
,
6066 MachineRegisterInfo
&MRI
= I
.getParent()->getParent()->getRegInfo();
6067 LLT Ty
= MRI
.getType(I
.getOperand(1).getReg());
6068 Register Ptr
= I
.getOperand(1 + NumVecs
).getReg();
6070 SmallVector
<Register
, 2> Regs(NumVecs
);
6071 std::transform(I
.operands_begin() + 1, I
.operands_begin() + 1 + NumVecs
,
6072 Regs
.begin(), [](auto MO
) { return MO
.getReg(); });
6074 Register Tuple
= Ty
.getSizeInBits() == 128 ? createQTuple(Regs
, MIB
)
6075 : createDTuple(Regs
, MIB
);
6076 auto Store
= MIB
.buildInstr(Opc
, {}, {Tuple
, Ptr
});
6077 Store
.cloneMemRefs(I
);
6078 constrainSelectedInstRegOperands(*Store
, TII
, TRI
, RBI
);
6081 bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
6082 MachineInstr
&I
, unsigned NumVecs
, unsigned Opc
) {
6083 MachineRegisterInfo
&MRI
= I
.getParent()->getParent()->getRegInfo();
6084 LLT Ty
= MRI
.getType(I
.getOperand(1).getReg());
6085 bool Narrow
= Ty
.getSizeInBits() == 64;
6087 SmallVector
<Register
, 2> Regs(NumVecs
);
6088 std::transform(I
.operands_begin() + 1, I
.operands_begin() + 1 + NumVecs
,
6089 Regs
.begin(), [](auto MO
) { return MO
.getReg(); });
6092 transform(Regs
, Regs
.begin(), [this](Register Reg
) {
6093 return emitScalarToVector(64, &AArch64::FPR128RegClass
, Reg
, MIB
)
6098 Register Tuple
= createQTuple(Regs
, MIB
);
6100 auto LaneNo
= getIConstantVRegVal(I
.getOperand(1 + NumVecs
).getReg(), MRI
);
6103 Register Ptr
= I
.getOperand(1 + NumVecs
+ 1).getReg();
6104 auto Store
= MIB
.buildInstr(Opc
, {}, {})
6106 .addImm(LaneNo
->getZExtValue())
6108 Store
.cloneMemRefs(I
);
6109 constrainSelectedInstRegOperands(*Store
, TII
, TRI
, RBI
);
6113 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
6114 MachineInstr
&I
, MachineRegisterInfo
&MRI
) {
6115 // Find the intrinsic ID.
6116 unsigned IntrinID
= cast
<GIntrinsic
>(I
).getIntrinsicID();
6118 const LLT S8
= LLT::scalar(8);
6119 const LLT S16
= LLT::scalar(16);
6120 const LLT S32
= LLT::scalar(32);
6121 const LLT S64
= LLT::scalar(64);
6122 const LLT P0
= LLT::pointer(0, 64);
6123 // Select the instruction.
6127 case Intrinsic::aarch64_ldxp
:
6128 case Intrinsic::aarch64_ldaxp
: {
6129 auto NewI
= MIB
.buildInstr(
6130 IntrinID
== Intrinsic::aarch64_ldxp
? AArch64::LDXPX
: AArch64::LDAXPX
,
6131 {I
.getOperand(0).getReg(), I
.getOperand(1).getReg()},
6133 NewI
.cloneMemRefs(I
);
6134 constrainSelectedInstRegOperands(*NewI
, TII
, TRI
, RBI
);
6137 case Intrinsic::aarch64_neon_ld1x2
: {
6138 LLT Ty
= MRI
.getType(I
.getOperand(0).getReg());
6140 if (Ty
== LLT::fixed_vector(8, S8
))
6141 Opc
= AArch64::LD1Twov8b
;
6142 else if (Ty
== LLT::fixed_vector(16, S8
))
6143 Opc
= AArch64::LD1Twov16b
;
6144 else if (Ty
== LLT::fixed_vector(4, S16
))
6145 Opc
= AArch64::LD1Twov4h
;
6146 else if (Ty
== LLT::fixed_vector(8, S16
))
6147 Opc
= AArch64::LD1Twov8h
;
6148 else if (Ty
== LLT::fixed_vector(2, S32
))
6149 Opc
= AArch64::LD1Twov2s
;
6150 else if (Ty
== LLT::fixed_vector(4, S32
))
6151 Opc
= AArch64::LD1Twov4s
;
6152 else if (Ty
== LLT::fixed_vector(2, S64
) || Ty
== LLT::fixed_vector(2, P0
))
6153 Opc
= AArch64::LD1Twov2d
;
6154 else if (Ty
== S64
|| Ty
== P0
)
6155 Opc
= AArch64::LD1Twov1d
;
6157 llvm_unreachable("Unexpected type for ld1x2!");
6158 selectVectorLoadIntrinsic(Opc
, 2, I
);
6161 case Intrinsic::aarch64_neon_ld1x3
: {
6162 LLT Ty
= MRI
.getType(I
.getOperand(0).getReg());
6164 if (Ty
== LLT::fixed_vector(8, S8
))
6165 Opc
= AArch64::LD1Threev8b
;
6166 else if (Ty
== LLT::fixed_vector(16, S8
))
6167 Opc
= AArch64::LD1Threev16b
;
6168 else if (Ty
== LLT::fixed_vector(4, S16
))
6169 Opc
= AArch64::LD1Threev4h
;
6170 else if (Ty
== LLT::fixed_vector(8, S16
))
6171 Opc
= AArch64::LD1Threev8h
;
6172 else if (Ty
== LLT::fixed_vector(2, S32
))
6173 Opc
= AArch64::LD1Threev2s
;
6174 else if (Ty
== LLT::fixed_vector(4, S32
))
6175 Opc
= AArch64::LD1Threev4s
;
6176 else if (Ty
== LLT::fixed_vector(2, S64
) || Ty
== LLT::fixed_vector(2, P0
))
6177 Opc
= AArch64::LD1Threev2d
;
6178 else if (Ty
== S64
|| Ty
== P0
)
6179 Opc
= AArch64::LD1Threev1d
;
6181 llvm_unreachable("Unexpected type for ld1x3!");
6182 selectVectorLoadIntrinsic(Opc
, 3, I
);
6185 case Intrinsic::aarch64_neon_ld1x4
: {
6186 LLT Ty
= MRI
.getType(I
.getOperand(0).getReg());
6188 if (Ty
== LLT::fixed_vector(8, S8
))
6189 Opc
= AArch64::LD1Fourv8b
;
6190 else if (Ty
== LLT::fixed_vector(16, S8
))
6191 Opc
= AArch64::LD1Fourv16b
;
6192 else if (Ty
== LLT::fixed_vector(4, S16
))
6193 Opc
= AArch64::LD1Fourv4h
;
6194 else if (Ty
== LLT::fixed_vector(8, S16
))
6195 Opc
= AArch64::LD1Fourv8h
;
6196 else if (Ty
== LLT::fixed_vector(2, S32
))
6197 Opc
= AArch64::LD1Fourv2s
;
6198 else if (Ty
== LLT::fixed_vector(4, S32
))
6199 Opc
= AArch64::LD1Fourv4s
;
6200 else if (Ty
== LLT::fixed_vector(2, S64
) || Ty
== LLT::fixed_vector(2, P0
))
6201 Opc
= AArch64::LD1Fourv2d
;
6202 else if (Ty
== S64
|| Ty
== P0
)
6203 Opc
= AArch64::LD1Fourv1d
;
6205 llvm_unreachable("Unexpected type for ld1x4!");
6206 selectVectorLoadIntrinsic(Opc
, 4, I
);
6209 case Intrinsic::aarch64_neon_ld2
: {
6210 LLT Ty
= MRI
.getType(I
.getOperand(0).getReg());
6212 if (Ty
== LLT::fixed_vector(8, S8
))
6213 Opc
= AArch64::LD2Twov8b
;
6214 else if (Ty
== LLT::fixed_vector(16, S8
))
6215 Opc
= AArch64::LD2Twov16b
;
6216 else if (Ty
== LLT::fixed_vector(4, S16
))
6217 Opc
= AArch64::LD2Twov4h
;
6218 else if (Ty
== LLT::fixed_vector(8, S16
))
6219 Opc
= AArch64::LD2Twov8h
;
6220 else if (Ty
== LLT::fixed_vector(2, S32
))
6221 Opc
= AArch64::LD2Twov2s
;
6222 else if (Ty
== LLT::fixed_vector(4, S32
))
6223 Opc
= AArch64::LD2Twov4s
;
6224 else if (Ty
== LLT::fixed_vector(2, S64
) || Ty
== LLT::fixed_vector(2, P0
))
6225 Opc
= AArch64::LD2Twov2d
;
6226 else if (Ty
== S64
|| Ty
== P0
)
6227 Opc
= AArch64::LD1Twov1d
;
6229 llvm_unreachable("Unexpected type for ld2!");
6230 selectVectorLoadIntrinsic(Opc
, 2, I
);
6233 case Intrinsic::aarch64_neon_ld2lane
: {
6234 LLT Ty
= MRI
.getType(I
.getOperand(0).getReg());
6236 if (Ty
== LLT::fixed_vector(8, S8
) || Ty
== LLT::fixed_vector(16, S8
))
6237 Opc
= AArch64::LD2i8
;
6238 else if (Ty
== LLT::fixed_vector(4, S16
) || Ty
== LLT::fixed_vector(8, S16
))
6239 Opc
= AArch64::LD2i16
;
6240 else if (Ty
== LLT::fixed_vector(2, S32
) || Ty
== LLT::fixed_vector(4, S32
))
6241 Opc
= AArch64::LD2i32
;
6242 else if (Ty
== LLT::fixed_vector(2, S64
) ||
6243 Ty
== LLT::fixed_vector(2, P0
) || Ty
== S64
|| Ty
== P0
)
6244 Opc
= AArch64::LD2i64
;
6246 llvm_unreachable("Unexpected type for st2lane!");
6247 if (!selectVectorLoadLaneIntrinsic(Opc
, 2, I
))
6251 case Intrinsic::aarch64_neon_ld2r
: {
6252 LLT Ty
= MRI
.getType(I
.getOperand(0).getReg());
6254 if (Ty
== LLT::fixed_vector(8, S8
))
6255 Opc
= AArch64::LD2Rv8b
;
6256 else if (Ty
== LLT::fixed_vector(16, S8
))
6257 Opc
= AArch64::LD2Rv16b
;
6258 else if (Ty
== LLT::fixed_vector(4, S16
))
6259 Opc
= AArch64::LD2Rv4h
;
6260 else if (Ty
== LLT::fixed_vector(8, S16
))
6261 Opc
= AArch64::LD2Rv8h
;
6262 else if (Ty
== LLT::fixed_vector(2, S32
))
6263 Opc
= AArch64::LD2Rv2s
;
6264 else if (Ty
== LLT::fixed_vector(4, S32
))
6265 Opc
= AArch64::LD2Rv4s
;
6266 else if (Ty
== LLT::fixed_vector(2, S64
) || Ty
== LLT::fixed_vector(2, P0
))
6267 Opc
= AArch64::LD2Rv2d
;
6268 else if (Ty
== S64
|| Ty
== P0
)
6269 Opc
= AArch64::LD2Rv1d
;
6271 llvm_unreachable("Unexpected type for ld2r!");
6272 selectVectorLoadIntrinsic(Opc
, 2, I
);
6275 case Intrinsic::aarch64_neon_ld3
: {
6276 LLT Ty
= MRI
.getType(I
.getOperand(0).getReg());
6278 if (Ty
== LLT::fixed_vector(8, S8
))
6279 Opc
= AArch64::LD3Threev8b
;
6280 else if (Ty
== LLT::fixed_vector(16, S8
))
6281 Opc
= AArch64::LD3Threev16b
;
6282 else if (Ty
== LLT::fixed_vector(4, S16
))
6283 Opc
= AArch64::LD3Threev4h
;
6284 else if (Ty
== LLT::fixed_vector(8, S16
))
6285 Opc
= AArch64::LD3Threev8h
;
6286 else if (Ty
== LLT::fixed_vector(2, S32
))
6287 Opc
= AArch64::LD3Threev2s
;
6288 else if (Ty
== LLT::fixed_vector(4, S32
))
6289 Opc
= AArch64::LD3Threev4s
;
6290 else if (Ty
== LLT::fixed_vector(2, S64
) || Ty
== LLT::fixed_vector(2, P0
))
6291 Opc
= AArch64::LD3Threev2d
;
6292 else if (Ty
== S64
|| Ty
== P0
)
6293 Opc
= AArch64::LD1Threev1d
;
6295 llvm_unreachable("Unexpected type for ld3!");
6296 selectVectorLoadIntrinsic(Opc
, 3, I
);
6299 case Intrinsic::aarch64_neon_ld3lane
: {
6300 LLT Ty
= MRI
.getType(I
.getOperand(0).getReg());
6302 if (Ty
== LLT::fixed_vector(8, S8
) || Ty
== LLT::fixed_vector(16, S8
))
6303 Opc
= AArch64::LD3i8
;
6304 else if (Ty
== LLT::fixed_vector(4, S16
) || Ty
== LLT::fixed_vector(8, S16
))
6305 Opc
= AArch64::LD3i16
;
6306 else if (Ty
== LLT::fixed_vector(2, S32
) || Ty
== LLT::fixed_vector(4, S32
))
6307 Opc
= AArch64::LD3i32
;
6308 else if (Ty
== LLT::fixed_vector(2, S64
) ||
6309 Ty
== LLT::fixed_vector(2, P0
) || Ty
== S64
|| Ty
== P0
)
6310 Opc
= AArch64::LD3i64
;
6312 llvm_unreachable("Unexpected type for st3lane!");
6313 if (!selectVectorLoadLaneIntrinsic(Opc
, 3, I
))
6317 case Intrinsic::aarch64_neon_ld3r
: {
6318 LLT Ty
= MRI
.getType(I
.getOperand(0).getReg());
6320 if (Ty
== LLT::fixed_vector(8, S8
))
6321 Opc
= AArch64::LD3Rv8b
;
6322 else if (Ty
== LLT::fixed_vector(16, S8
))
6323 Opc
= AArch64::LD3Rv16b
;
6324 else if (Ty
== LLT::fixed_vector(4, S16
))
6325 Opc
= AArch64::LD3Rv4h
;
6326 else if (Ty
== LLT::fixed_vector(8, S16
))
6327 Opc
= AArch64::LD3Rv8h
;
6328 else if (Ty
== LLT::fixed_vector(2, S32
))
6329 Opc
= AArch64::LD3Rv2s
;
6330 else if (Ty
== LLT::fixed_vector(4, S32
))
6331 Opc
= AArch64::LD3Rv4s
;
6332 else if (Ty
== LLT::fixed_vector(2, S64
) || Ty
== LLT::fixed_vector(2, P0
))
6333 Opc
= AArch64::LD3Rv2d
;
6334 else if (Ty
== S64
|| Ty
== P0
)
6335 Opc
= AArch64::LD3Rv1d
;
6337 llvm_unreachable("Unexpected type for ld3r!");
6338 selectVectorLoadIntrinsic(Opc
, 3, I
);
6341 case Intrinsic::aarch64_neon_ld4
: {
6342 LLT Ty
= MRI
.getType(I
.getOperand(0).getReg());
6344 if (Ty
== LLT::fixed_vector(8, S8
))
6345 Opc
= AArch64::LD4Fourv8b
;
6346 else if (Ty
== LLT::fixed_vector(16, S8
))
6347 Opc
= AArch64::LD4Fourv16b
;
6348 else if (Ty
== LLT::fixed_vector(4, S16
))
6349 Opc
= AArch64::LD4Fourv4h
;
6350 else if (Ty
== LLT::fixed_vector(8, S16
))
6351 Opc
= AArch64::LD4Fourv8h
;
6352 else if (Ty
== LLT::fixed_vector(2, S32
))
6353 Opc
= AArch64::LD4Fourv2s
;
6354 else if (Ty
== LLT::fixed_vector(4, S32
))
6355 Opc
= AArch64::LD4Fourv4s
;
6356 else if (Ty
== LLT::fixed_vector(2, S64
) || Ty
== LLT::fixed_vector(2, P0
))
6357 Opc
= AArch64::LD4Fourv2d
;
6358 else if (Ty
== S64
|| Ty
== P0
)
6359 Opc
= AArch64::LD1Fourv1d
;
6361 llvm_unreachable("Unexpected type for ld4!");
6362 selectVectorLoadIntrinsic(Opc
, 4, I
);
6365 case Intrinsic::aarch64_neon_ld4lane
: {
6366 LLT Ty
= MRI
.getType(I
.getOperand(0).getReg());
6368 if (Ty
== LLT::fixed_vector(8, S8
) || Ty
== LLT::fixed_vector(16, S8
))
6369 Opc
= AArch64::LD4i8
;
6370 else if (Ty
== LLT::fixed_vector(4, S16
) || Ty
== LLT::fixed_vector(8, S16
))
6371 Opc
= AArch64::LD4i16
;
6372 else if (Ty
== LLT::fixed_vector(2, S32
) || Ty
== LLT::fixed_vector(4, S32
))
6373 Opc
= AArch64::LD4i32
;
6374 else if (Ty
== LLT::fixed_vector(2, S64
) ||
6375 Ty
== LLT::fixed_vector(2, P0
) || Ty
== S64
|| Ty
== P0
)
6376 Opc
= AArch64::LD4i64
;
6378 llvm_unreachable("Unexpected type for st4lane!");
6379 if (!selectVectorLoadLaneIntrinsic(Opc
, 4, I
))
6383 case Intrinsic::aarch64_neon_ld4r
: {
6384 LLT Ty
= MRI
.getType(I
.getOperand(0).getReg());
6386 if (Ty
== LLT::fixed_vector(8, S8
))
6387 Opc
= AArch64::LD4Rv8b
;
6388 else if (Ty
== LLT::fixed_vector(16, S8
))
6389 Opc
= AArch64::LD4Rv16b
;
6390 else if (Ty
== LLT::fixed_vector(4, S16
))
6391 Opc
= AArch64::LD4Rv4h
;
6392 else if (Ty
== LLT::fixed_vector(8, S16
))
6393 Opc
= AArch64::LD4Rv8h
;
6394 else if (Ty
== LLT::fixed_vector(2, S32
))
6395 Opc
= AArch64::LD4Rv2s
;
6396 else if (Ty
== LLT::fixed_vector(4, S32
))
6397 Opc
= AArch64::LD4Rv4s
;
6398 else if (Ty
== LLT::fixed_vector(2, S64
) || Ty
== LLT::fixed_vector(2, P0
))
6399 Opc
= AArch64::LD4Rv2d
;
6400 else if (Ty
== S64
|| Ty
== P0
)
6401 Opc
= AArch64::LD4Rv1d
;
6403 llvm_unreachable("Unexpected type for ld4r!");
6404 selectVectorLoadIntrinsic(Opc
, 4, I
);
6407 case Intrinsic::aarch64_neon_st1x2
: {
6408 LLT Ty
= MRI
.getType(I
.getOperand(1).getReg());
6410 if (Ty
== LLT::fixed_vector(8, S8
))
6411 Opc
= AArch64::ST1Twov8b
;
6412 else if (Ty
== LLT::fixed_vector(16, S8
))
6413 Opc
= AArch64::ST1Twov16b
;
6414 else if (Ty
== LLT::fixed_vector(4, S16
))
6415 Opc
= AArch64::ST1Twov4h
;
6416 else if (Ty
== LLT::fixed_vector(8, S16
))
6417 Opc
= AArch64::ST1Twov8h
;
6418 else if (Ty
== LLT::fixed_vector(2, S32
))
6419 Opc
= AArch64::ST1Twov2s
;
6420 else if (Ty
== LLT::fixed_vector(4, S32
))
6421 Opc
= AArch64::ST1Twov4s
;
6422 else if (Ty
== LLT::fixed_vector(2, S64
) || Ty
== LLT::fixed_vector(2, P0
))
6423 Opc
= AArch64::ST1Twov2d
;
6424 else if (Ty
== S64
|| Ty
== P0
)
6425 Opc
= AArch64::ST1Twov1d
;
6427 llvm_unreachable("Unexpected type for st1x2!");
6428 selectVectorStoreIntrinsic(I
, 2, Opc
);
6431 case Intrinsic::aarch64_neon_st1x3
: {
6432 LLT Ty
= MRI
.getType(I
.getOperand(1).getReg());
6434 if (Ty
== LLT::fixed_vector(8, S8
))
6435 Opc
= AArch64::ST1Threev8b
;
6436 else if (Ty
== LLT::fixed_vector(16, S8
))
6437 Opc
= AArch64::ST1Threev16b
;
6438 else if (Ty
== LLT::fixed_vector(4, S16
))
6439 Opc
= AArch64::ST1Threev4h
;
6440 else if (Ty
== LLT::fixed_vector(8, S16
))
6441 Opc
= AArch64::ST1Threev8h
;
6442 else if (Ty
== LLT::fixed_vector(2, S32
))
6443 Opc
= AArch64::ST1Threev2s
;
6444 else if (Ty
== LLT::fixed_vector(4, S32
))
6445 Opc
= AArch64::ST1Threev4s
;
6446 else if (Ty
== LLT::fixed_vector(2, S64
) || Ty
== LLT::fixed_vector(2, P0
))
6447 Opc
= AArch64::ST1Threev2d
;
6448 else if (Ty
== S64
|| Ty
== P0
)
6449 Opc
= AArch64::ST1Threev1d
;
6451 llvm_unreachable("Unexpected type for st1x3!");
6452 selectVectorStoreIntrinsic(I
, 3, Opc
);
6455 case Intrinsic::aarch64_neon_st1x4
: {
6456 LLT Ty
= MRI
.getType(I
.getOperand(1).getReg());
6458 if (Ty
== LLT::fixed_vector(8, S8
))
6459 Opc
= AArch64::ST1Fourv8b
;
6460 else if (Ty
== LLT::fixed_vector(16, S8
))
6461 Opc
= AArch64::ST1Fourv16b
;
6462 else if (Ty
== LLT::fixed_vector(4, S16
))
6463 Opc
= AArch64::ST1Fourv4h
;
6464 else if (Ty
== LLT::fixed_vector(8, S16
))
6465 Opc
= AArch64::ST1Fourv8h
;
6466 else if (Ty
== LLT::fixed_vector(2, S32
))
6467 Opc
= AArch64::ST1Fourv2s
;
6468 else if (Ty
== LLT::fixed_vector(4, S32
))
6469 Opc
= AArch64::ST1Fourv4s
;
6470 else if (Ty
== LLT::fixed_vector(2, S64
) || Ty
== LLT::fixed_vector(2, P0
))
6471 Opc
= AArch64::ST1Fourv2d
;
6472 else if (Ty
== S64
|| Ty
== P0
)
6473 Opc
= AArch64::ST1Fourv1d
;
6475 llvm_unreachable("Unexpected type for st1x4!");
6476 selectVectorStoreIntrinsic(I
, 4, Opc
);
6479 case Intrinsic::aarch64_neon_st2
: {
6480 LLT Ty
= MRI
.getType(I
.getOperand(1).getReg());
6482 if (Ty
== LLT::fixed_vector(8, S8
))
6483 Opc
= AArch64::ST2Twov8b
;
6484 else if (Ty
== LLT::fixed_vector(16, S8
))
6485 Opc
= AArch64::ST2Twov16b
;
6486 else if (Ty
== LLT::fixed_vector(4, S16
))
6487 Opc
= AArch64::ST2Twov4h
;
6488 else if (Ty
== LLT::fixed_vector(8, S16
))
6489 Opc
= AArch64::ST2Twov8h
;
6490 else if (Ty
== LLT::fixed_vector(2, S32
))
6491 Opc
= AArch64::ST2Twov2s
;
6492 else if (Ty
== LLT::fixed_vector(4, S32
))
6493 Opc
= AArch64::ST2Twov4s
;
6494 else if (Ty
== LLT::fixed_vector(2, S64
) || Ty
== LLT::fixed_vector(2, P0
))
6495 Opc
= AArch64::ST2Twov2d
;
6496 else if (Ty
== S64
|| Ty
== P0
)
6497 Opc
= AArch64::ST1Twov1d
;
6499 llvm_unreachable("Unexpected type for st2!");
6500 selectVectorStoreIntrinsic(I
, 2, Opc
);
6503 case Intrinsic::aarch64_neon_st3
: {
6504 LLT Ty
= MRI
.getType(I
.getOperand(1).getReg());
6506 if (Ty
== LLT::fixed_vector(8, S8
))
6507 Opc
= AArch64::ST3Threev8b
;
6508 else if (Ty
== LLT::fixed_vector(16, S8
))
6509 Opc
= AArch64::ST3Threev16b
;
6510 else if (Ty
== LLT::fixed_vector(4, S16
))
6511 Opc
= AArch64::ST3Threev4h
;
6512 else if (Ty
== LLT::fixed_vector(8, S16
))
6513 Opc
= AArch64::ST3Threev8h
;
6514 else if (Ty
== LLT::fixed_vector(2, S32
))
6515 Opc
= AArch64::ST3Threev2s
;
6516 else if (Ty
== LLT::fixed_vector(4, S32
))
6517 Opc
= AArch64::ST3Threev4s
;
6518 else if (Ty
== LLT::fixed_vector(2, S64
) || Ty
== LLT::fixed_vector(2, P0
))
6519 Opc
= AArch64::ST3Threev2d
;
6520 else if (Ty
== S64
|| Ty
== P0
)
6521 Opc
= AArch64::ST1Threev1d
;
6523 llvm_unreachable("Unexpected type for st3!");
6524 selectVectorStoreIntrinsic(I
, 3, Opc
);
6527 case Intrinsic::aarch64_neon_st4
: {
6528 LLT Ty
= MRI
.getType(I
.getOperand(1).getReg());
6530 if (Ty
== LLT::fixed_vector(8, S8
))
6531 Opc
= AArch64::ST4Fourv8b
;
6532 else if (Ty
== LLT::fixed_vector(16, S8
))
6533 Opc
= AArch64::ST4Fourv16b
;
6534 else if (Ty
== LLT::fixed_vector(4, S16
))
6535 Opc
= AArch64::ST4Fourv4h
;
6536 else if (Ty
== LLT::fixed_vector(8, S16
))
6537 Opc
= AArch64::ST4Fourv8h
;
6538 else if (Ty
== LLT::fixed_vector(2, S32
))
6539 Opc
= AArch64::ST4Fourv2s
;
6540 else if (Ty
== LLT::fixed_vector(4, S32
))
6541 Opc
= AArch64::ST4Fourv4s
;
6542 else if (Ty
== LLT::fixed_vector(2, S64
) || Ty
== LLT::fixed_vector(2, P0
))
6543 Opc
= AArch64::ST4Fourv2d
;
6544 else if (Ty
== S64
|| Ty
== P0
)
6545 Opc
= AArch64::ST1Fourv1d
;
6547 llvm_unreachable("Unexpected type for st4!");
6548 selectVectorStoreIntrinsic(I
, 4, Opc
);
6551 case Intrinsic::aarch64_neon_st2lane
: {
6552 LLT Ty
= MRI
.getType(I
.getOperand(1).getReg());
6554 if (Ty
== LLT::fixed_vector(8, S8
) || Ty
== LLT::fixed_vector(16, S8
))
6555 Opc
= AArch64::ST2i8
;
6556 else if (Ty
== LLT::fixed_vector(4, S16
) || Ty
== LLT::fixed_vector(8, S16
))
6557 Opc
= AArch64::ST2i16
;
6558 else if (Ty
== LLT::fixed_vector(2, S32
) || Ty
== LLT::fixed_vector(4, S32
))
6559 Opc
= AArch64::ST2i32
;
6560 else if (Ty
== LLT::fixed_vector(2, S64
) ||
6561 Ty
== LLT::fixed_vector(2, P0
) || Ty
== S64
|| Ty
== P0
)
6562 Opc
= AArch64::ST2i64
;
6564 llvm_unreachable("Unexpected type for st2lane!");
6565 if (!selectVectorStoreLaneIntrinsic(I
, 2, Opc
))
6569 case Intrinsic::aarch64_neon_st3lane
: {
6570 LLT Ty
= MRI
.getType(I
.getOperand(1).getReg());
6572 if (Ty
== LLT::fixed_vector(8, S8
) || Ty
== LLT::fixed_vector(16, S8
))
6573 Opc
= AArch64::ST3i8
;
6574 else if (Ty
== LLT::fixed_vector(4, S16
) || Ty
== LLT::fixed_vector(8, S16
))
6575 Opc
= AArch64::ST3i16
;
6576 else if (Ty
== LLT::fixed_vector(2, S32
) || Ty
== LLT::fixed_vector(4, S32
))
6577 Opc
= AArch64::ST3i32
;
6578 else if (Ty
== LLT::fixed_vector(2, S64
) ||
6579 Ty
== LLT::fixed_vector(2, P0
) || Ty
== S64
|| Ty
== P0
)
6580 Opc
= AArch64::ST3i64
;
6582 llvm_unreachable("Unexpected type for st3lane!");
6583 if (!selectVectorStoreLaneIntrinsic(I
, 3, Opc
))
6587 case Intrinsic::aarch64_neon_st4lane
: {
6588 LLT Ty
= MRI
.getType(I
.getOperand(1).getReg());
6590 if (Ty
== LLT::fixed_vector(8, S8
) || Ty
== LLT::fixed_vector(16, S8
))
6591 Opc
= AArch64::ST4i8
;
6592 else if (Ty
== LLT::fixed_vector(4, S16
) || Ty
== LLT::fixed_vector(8, S16
))
6593 Opc
= AArch64::ST4i16
;
6594 else if (Ty
== LLT::fixed_vector(2, S32
) || Ty
== LLT::fixed_vector(4, S32
))
6595 Opc
= AArch64::ST4i32
;
6596 else if (Ty
== LLT::fixed_vector(2, S64
) ||
6597 Ty
== LLT::fixed_vector(2, P0
) || Ty
== S64
|| Ty
== P0
)
6598 Opc
= AArch64::ST4i64
;
6600 llvm_unreachable("Unexpected type for st4lane!");
6601 if (!selectVectorStoreLaneIntrinsic(I
, 4, Opc
))
6605 case Intrinsic::aarch64_mops_memset_tag
: {
6608 // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
6609 // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
6610 // where %dst is updated, into
6611 // %Rd:GPR64common, %Rn:GPR64) = \
6612 // MOPSMemorySetTaggingPseudo \
6613 // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
6614 // where Rd and Rn are tied.
6615 // It is expected that %val has been extended to s64 in legalization.
6616 // Note that the order of the size/value operands are swapped.
6618 Register DstDef
= I
.getOperand(0).getReg();
6619 // I.getOperand(1) is the intrinsic function
6620 Register DstUse
= I
.getOperand(2).getReg();
6621 Register ValUse
= I
.getOperand(3).getReg();
6622 Register SizeUse
= I
.getOperand(4).getReg();
6624 // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
6625 // Therefore an additional virtual register is requried for the updated size
6626 // operand. This value is not accessible via the semantics of the intrinsic.
6627 Register SizeDef
= MRI
.createGenericVirtualRegister(LLT::scalar(64));
6629 auto Memset
= MIB
.buildInstr(AArch64::MOPSMemorySetTaggingPseudo
,
6630 {DstDef
, SizeDef
}, {DstUse
, SizeUse
, ValUse
});
6631 Memset
.cloneMemRefs(I
);
6632 constrainSelectedInstRegOperands(*Memset
, TII
, TRI
, RBI
);
6637 I
.eraseFromParent();
6641 bool AArch64InstructionSelector::selectIntrinsic(MachineInstr
&I
,
6642 MachineRegisterInfo
&MRI
) {
6643 unsigned IntrinID
= cast
<GIntrinsic
>(I
).getIntrinsicID();
6648 case Intrinsic::aarch64_crypto_sha1h
: {
6649 Register DstReg
= I
.getOperand(0).getReg();
6650 Register SrcReg
= I
.getOperand(2).getReg();
6652 // FIXME: Should this be an assert?
6653 if (MRI
.getType(DstReg
).getSizeInBits() != 32 ||
6654 MRI
.getType(SrcReg
).getSizeInBits() != 32)
6657 // The operation has to happen on FPRs. Set up some new FPR registers for
6658 // the source and destination if they are on GPRs.
6659 if (RBI
.getRegBank(SrcReg
, MRI
, TRI
)->getID() != AArch64::FPRRegBankID
) {
6660 SrcReg
= MRI
.createVirtualRegister(&AArch64::FPR32RegClass
);
6661 MIB
.buildCopy({SrcReg
}, {I
.getOperand(2)});
6663 // Make sure the copy ends up getting constrained properly.
6664 RBI
.constrainGenericRegister(I
.getOperand(2).getReg(),
6665 AArch64::GPR32RegClass
, MRI
);
6668 if (RBI
.getRegBank(DstReg
, MRI
, TRI
)->getID() != AArch64::FPRRegBankID
)
6669 DstReg
= MRI
.createVirtualRegister(&AArch64::FPR32RegClass
);
6671 // Actually insert the instruction.
6672 auto SHA1Inst
= MIB
.buildInstr(AArch64::SHA1Hrr
, {DstReg
}, {SrcReg
});
6673 constrainSelectedInstRegOperands(*SHA1Inst
, TII
, TRI
, RBI
);
6675 // Did we create a new register for the destination?
6676 if (DstReg
!= I
.getOperand(0).getReg()) {
6677 // Yep. Copy the result of the instruction back into the original
6679 MIB
.buildCopy({I
.getOperand(0)}, {DstReg
});
6680 RBI
.constrainGenericRegister(I
.getOperand(0).getReg(),
6681 AArch64::GPR32RegClass
, MRI
);
6684 I
.eraseFromParent();
6687 case Intrinsic::ptrauth_resign
: {
6688 Register DstReg
= I
.getOperand(0).getReg();
6689 Register ValReg
= I
.getOperand(2).getReg();
6690 uint64_t AUTKey
= I
.getOperand(3).getImm();
6691 Register AUTDisc
= I
.getOperand(4).getReg();
6692 uint64_t PACKey
= I
.getOperand(5).getImm();
6693 Register PACDisc
= I
.getOperand(6).getReg();
6695 Register AUTAddrDisc
= AUTDisc
;
6696 uint16_t AUTConstDiscC
= 0;
6697 std::tie(AUTConstDiscC
, AUTAddrDisc
) =
6698 extractPtrauthBlendDiscriminators(AUTDisc
, MRI
);
6700 Register PACAddrDisc
= PACDisc
;
6701 uint16_t PACConstDiscC
= 0;
6702 std::tie(PACConstDiscC
, PACAddrDisc
) =
6703 extractPtrauthBlendDiscriminators(PACDisc
, MRI
);
6705 MIB
.buildCopy({AArch64::X16
}, {ValReg
});
6706 MIB
.buildInstr(TargetOpcode::IMPLICIT_DEF
, {AArch64::X17
}, {});
6707 MIB
.buildInstr(AArch64::AUTPAC
)
6709 .addImm(AUTConstDiscC
)
6710 .addUse(AUTAddrDisc
)
6712 .addImm(PACConstDiscC
)
6713 .addUse(PACAddrDisc
)
6714 .constrainAllUses(TII
, TRI
, RBI
);
6715 MIB
.buildCopy({DstReg
}, Register(AArch64::X16
));
6717 RBI
.constrainGenericRegister(DstReg
, AArch64::GPR64RegClass
, MRI
);
6718 I
.eraseFromParent();
6721 case Intrinsic::ptrauth_auth
: {
6722 Register DstReg
= I
.getOperand(0).getReg();
6723 Register ValReg
= I
.getOperand(2).getReg();
6724 uint64_t AUTKey
= I
.getOperand(3).getImm();
6725 Register AUTDisc
= I
.getOperand(4).getReg();
6727 Register AUTAddrDisc
= AUTDisc
;
6728 uint16_t AUTConstDiscC
= 0;
6729 std::tie(AUTConstDiscC
, AUTAddrDisc
) =
6730 extractPtrauthBlendDiscriminators(AUTDisc
, MRI
);
6732 MIB
.buildCopy({AArch64::X16
}, {ValReg
});
6733 MIB
.buildInstr(TargetOpcode::IMPLICIT_DEF
, {AArch64::X17
}, {});
6734 MIB
.buildInstr(AArch64::AUT
)
6736 .addImm(AUTConstDiscC
)
6737 .addUse(AUTAddrDisc
)
6738 .constrainAllUses(TII
, TRI
, RBI
);
6739 MIB
.buildCopy({DstReg
}, Register(AArch64::X16
));
6741 RBI
.constrainGenericRegister(DstReg
, AArch64::GPR64RegClass
, MRI
);
6742 I
.eraseFromParent();
6745 case Intrinsic::frameaddress
:
6746 case Intrinsic::returnaddress
: {
6747 MachineFunction
&MF
= *I
.getParent()->getParent();
6748 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
6750 unsigned Depth
= I
.getOperand(2).getImm();
6751 Register DstReg
= I
.getOperand(0).getReg();
6752 RBI
.constrainGenericRegister(DstReg
, AArch64::GPR64RegClass
, MRI
);
6754 if (Depth
== 0 && IntrinID
== Intrinsic::returnaddress
) {
6755 if (!MFReturnAddr
) {
6756 // Insert the copy from LR/X30 into the entry block, before it can be
6757 // clobbered by anything.
6758 MFI
.setReturnAddressIsTaken(true);
6759 MFReturnAddr
= getFunctionLiveInPhysReg(
6760 MF
, TII
, AArch64::LR
, AArch64::GPR64RegClass
, I
.getDebugLoc());
6763 if (STI
.hasPAuth()) {
6764 MIB
.buildInstr(AArch64::XPACI
, {DstReg
}, {MFReturnAddr
});
6766 MIB
.buildCopy({Register(AArch64::LR
)}, {MFReturnAddr
});
6767 MIB
.buildInstr(AArch64::XPACLRI
);
6768 MIB
.buildCopy({DstReg
}, {Register(AArch64::LR
)});
6771 I
.eraseFromParent();
6775 MFI
.setFrameAddressIsTaken(true);
6776 Register
FrameAddr(AArch64::FP
);
6778 Register NextFrame
= MRI
.createVirtualRegister(&AArch64::GPR64spRegClass
);
6780 MIB
.buildInstr(AArch64::LDRXui
, {NextFrame
}, {FrameAddr
}).addImm(0);
6781 constrainSelectedInstRegOperands(*Ldr
, TII
, TRI
, RBI
);
6782 FrameAddr
= NextFrame
;
6785 if (IntrinID
== Intrinsic::frameaddress
)
6786 MIB
.buildCopy({DstReg
}, {FrameAddr
});
6788 MFI
.setReturnAddressIsTaken(true);
6790 if (STI
.hasPAuth()) {
6791 Register TmpReg
= MRI
.createVirtualRegister(&AArch64::GPR64RegClass
);
6792 MIB
.buildInstr(AArch64::LDRXui
, {TmpReg
}, {FrameAddr
}).addImm(1);
6793 MIB
.buildInstr(AArch64::XPACI
, {DstReg
}, {TmpReg
});
6795 MIB
.buildInstr(AArch64::LDRXui
, {Register(AArch64::LR
)}, {FrameAddr
})
6797 MIB
.buildInstr(AArch64::XPACLRI
);
6798 MIB
.buildCopy({DstReg
}, {Register(AArch64::LR
)});
6802 I
.eraseFromParent();
6805 case Intrinsic::aarch64_neon_tbl2
:
6806 SelectTable(I
, MRI
, 2, AArch64::TBLv8i8Two
, AArch64::TBLv16i8Two
, false);
6808 case Intrinsic::aarch64_neon_tbl3
:
6809 SelectTable(I
, MRI
, 3, AArch64::TBLv8i8Three
, AArch64::TBLv16i8Three
,
6812 case Intrinsic::aarch64_neon_tbl4
:
6813 SelectTable(I
, MRI
, 4, AArch64::TBLv8i8Four
, AArch64::TBLv16i8Four
, false);
6815 case Intrinsic::aarch64_neon_tbx2
:
6816 SelectTable(I
, MRI
, 2, AArch64::TBXv8i8Two
, AArch64::TBXv16i8Two
, true);
6818 case Intrinsic::aarch64_neon_tbx3
:
6819 SelectTable(I
, MRI
, 3, AArch64::TBXv8i8Three
, AArch64::TBXv16i8Three
, true);
6821 case Intrinsic::aarch64_neon_tbx4
:
6822 SelectTable(I
, MRI
, 4, AArch64::TBXv8i8Four
, AArch64::TBXv16i8Four
, true);
6824 case Intrinsic::swift_async_context_addr
:
6825 auto Sub
= MIB
.buildInstr(AArch64::SUBXri
, {I
.getOperand(0).getReg()},
6826 {Register(AArch64::FP
)})
6829 constrainSelectedInstRegOperands(*Sub
, TII
, TRI
, RBI
);
6831 MF
->getFrameInfo().setFrameAddressIsTaken(true);
6832 MF
->getInfo
<AArch64FunctionInfo
>()->setHasSwiftAsyncContext(true);
6833 I
.eraseFromParent();
6839 // G_PTRAUTH_GLOBAL_VALUE lowering
6841 // We have 3 lowering alternatives to choose from:
6842 // - MOVaddrPAC: similar to MOVaddr, with added PAC.
6843 // If the GV doesn't need a GOT load (i.e., is locally defined)
6844 // materialize the pointer using adrp+add+pac. See LowerMOVaddrPAC.
6846 // - LOADgotPAC: similar to LOADgot, with added PAC.
6847 // If the GV needs a GOT load, materialize the pointer using the usual
6848 // GOT adrp+ldr, +pac. Pointers in GOT are assumed to be not signed, the GOT
6849 // section is assumed to be read-only (for example, via relro mechanism). See
6852 // - LOADauthptrstatic: similar to LOADgot, but use a
6853 // special stub slot instead of a GOT slot.
6854 // Load a signed pointer for symbol 'sym' from a stub slot named
6855 // 'sym$auth_ptr$key$disc' filled by dynamic linker during relocation
6856 // resolving. This usually lowers to adrp+ldr, but also emits an entry into
6858 // @AUTH relocation. See LowerLOADauthptrstatic.
6860 // All 3 are pseudos that are expand late to longer sequences: this lets us
6861 // provide integrity guarantees on the to-be-signed intermediate values.
6863 // LOADauthptrstatic is undesirable because it requires a large section filled
6864 // with often similarly-signed pointers, making it a good harvesting target.
6865 // Thus, it's only used for ptrauth references to extern_weak to avoid null
6868 bool AArch64InstructionSelector::selectPtrAuthGlobalValue(
6869 MachineInstr
&I
, MachineRegisterInfo
&MRI
) const {
6870 Register DefReg
= I
.getOperand(0).getReg();
6871 Register Addr
= I
.getOperand(1).getReg();
6872 uint64_t Key
= I
.getOperand(2).getImm();
6873 Register AddrDisc
= I
.getOperand(3).getReg();
6874 uint64_t Disc
= I
.getOperand(4).getImm();
6877 if (Key
> AArch64PACKey::LAST
)
6878 report_fatal_error("key in ptrauth global out of range [0, " +
6879 Twine((int)AArch64PACKey::LAST
) + "]");
6881 // Blend only works if the integer discriminator is 16-bit wide.
6882 if (!isUInt
<16>(Disc
))
6884 "constant discriminator in ptrauth global out of range [0, 0xffff]");
6886 // Choosing between 3 lowering alternatives is target-specific.
6887 if (!STI
.isTargetELF() && !STI
.isTargetMachO())
6888 report_fatal_error("ptrauth global lowering only supported on MachO/ELF");
6890 if (!MRI
.hasOneDef(Addr
))
6893 // First match any offset we take from the real global.
6894 const MachineInstr
*DefMI
= &*MRI
.def_instr_begin(Addr
);
6895 if (DefMI
->getOpcode() == TargetOpcode::G_PTR_ADD
) {
6896 Register OffsetReg
= DefMI
->getOperand(2).getReg();
6897 if (!MRI
.hasOneDef(OffsetReg
))
6899 const MachineInstr
&OffsetMI
= *MRI
.def_instr_begin(OffsetReg
);
6900 if (OffsetMI
.getOpcode() != TargetOpcode::G_CONSTANT
)
6903 Addr
= DefMI
->getOperand(1).getReg();
6904 if (!MRI
.hasOneDef(Addr
))
6907 DefMI
= &*MRI
.def_instr_begin(Addr
);
6908 Offset
= OffsetMI
.getOperand(1).getCImm()->getSExtValue();
6911 // We should be left with a genuine unauthenticated GlobalValue.
6912 const GlobalValue
*GV
;
6913 if (DefMI
->getOpcode() == TargetOpcode::G_GLOBAL_VALUE
) {
6914 GV
= DefMI
->getOperand(1).getGlobal();
6915 Offset
+= DefMI
->getOperand(1).getOffset();
6916 } else if (DefMI
->getOpcode() == AArch64::G_ADD_LOW
) {
6917 GV
= DefMI
->getOperand(2).getGlobal();
6918 Offset
+= DefMI
->getOperand(2).getOffset();
6923 MachineIRBuilder
MIB(I
);
6925 // Classify the reference to determine whether it needs a GOT load.
6926 unsigned OpFlags
= STI
.ClassifyGlobalReference(GV
, TM
);
6927 const bool NeedsGOTLoad
= ((OpFlags
& AArch64II::MO_GOT
) != 0);
6928 assert(((OpFlags
& (~AArch64II::MO_GOT
)) == 0) &&
6929 "unsupported non-GOT op flags on ptrauth global reference");
6930 assert((!GV
->hasExternalWeakLinkage() || NeedsGOTLoad
) &&
6931 "unsupported non-GOT reference to weak ptrauth global");
6933 std::optional
<APInt
> AddrDiscVal
= getIConstantVRegVal(AddrDisc
, MRI
);
6934 bool HasAddrDisc
= !AddrDiscVal
|| *AddrDiscVal
!= 0;
6937 // - No GOT load needed -> MOVaddrPAC
6938 // - GOT load for non-extern_weak -> LOADgotPAC
6939 // Note that we disallow extern_weak refs to avoid null checks later.
6940 if (!GV
->hasExternalWeakLinkage()) {
6941 MIB
.buildInstr(TargetOpcode::IMPLICIT_DEF
, {AArch64::X16
}, {});
6942 MIB
.buildInstr(TargetOpcode::IMPLICIT_DEF
, {AArch64::X17
}, {});
6943 MIB
.buildInstr(NeedsGOTLoad
? AArch64::LOADgotPAC
: AArch64::MOVaddrPAC
)
6944 .addGlobalAddress(GV
, Offset
)
6946 .addReg(HasAddrDisc
? AddrDisc
: AArch64::XZR
)
6948 .constrainAllUses(TII
, TRI
, RBI
);
6949 MIB
.buildCopy(DefReg
, Register(AArch64::X16
));
6950 RBI
.constrainGenericRegister(DefReg
, AArch64::GPR64RegClass
, MRI
);
6951 I
.eraseFromParent();
6955 // extern_weak -> LOADauthptrstatic
6957 // Offsets and extern_weak don't mix well: ptrauth aside, you'd get the
6958 // offset alone as a pointer if the symbol wasn't available, which would
6959 // probably break null checks in users. Ptrauth complicates things further:
6963 "unsupported non-zero offset in weak ptrauth global reference");
6966 report_fatal_error("unsupported weak addr-div ptrauth global");
6968 MIB
.buildInstr(AArch64::LOADauthptrstatic
, {DefReg
}, {})
6969 .addGlobalAddress(GV
, Offset
)
6972 RBI
.constrainGenericRegister(DefReg
, AArch64::GPR64RegClass
, MRI
);
6974 I
.eraseFromParent();
6978 void AArch64InstructionSelector::SelectTable(MachineInstr
&I
,
6979 MachineRegisterInfo
&MRI
,
6980 unsigned NumVec
, unsigned Opc1
,
6981 unsigned Opc2
, bool isExt
) {
6982 Register DstReg
= I
.getOperand(0).getReg();
6983 unsigned Opc
= MRI
.getType(DstReg
) == LLT::fixed_vector(8, 8) ? Opc1
: Opc2
;
6985 // Create the REG_SEQUENCE
6986 SmallVector
<Register
, 4> Regs
;
6987 for (unsigned i
= 0; i
< NumVec
; i
++)
6988 Regs
.push_back(I
.getOperand(i
+ 2 + isExt
).getReg());
6989 Register RegSeq
= createQTuple(Regs
, MIB
);
6991 Register IdxReg
= I
.getOperand(2 + NumVec
+ isExt
).getReg();
6992 MachineInstrBuilder Instr
;
6994 Register Reg
= I
.getOperand(2).getReg();
6995 Instr
= MIB
.buildInstr(Opc
, {DstReg
}, {Reg
, RegSeq
, IdxReg
});
6997 Instr
= MIB
.buildInstr(Opc
, {DstReg
}, {RegSeq
, IdxReg
});
6998 constrainSelectedInstRegOperands(*Instr
, TII
, TRI
, RBI
);
6999 I
.eraseFromParent();
7002 InstructionSelector::ComplexRendererFns
7003 AArch64InstructionSelector::selectShiftA_32(const MachineOperand
&Root
) const {
7004 auto MaybeImmed
= getImmedFromMO(Root
);
7005 if (MaybeImmed
== std::nullopt
|| *MaybeImmed
> 31)
7006 return std::nullopt
;
7007 uint64_t Enc
= (32 - *MaybeImmed
) & 0x1f;
7008 return {{[=](MachineInstrBuilder
&MIB
) { MIB
.addImm(Enc
); }}};
7011 InstructionSelector::ComplexRendererFns
7012 AArch64InstructionSelector::selectShiftB_32(const MachineOperand
&Root
) const {
7013 auto MaybeImmed
= getImmedFromMO(Root
);
7014 if (MaybeImmed
== std::nullopt
|| *MaybeImmed
> 31)
7015 return std::nullopt
;
7016 uint64_t Enc
= 31 - *MaybeImmed
;
7017 return {{[=](MachineInstrBuilder
&MIB
) { MIB
.addImm(Enc
); }}};
7020 InstructionSelector::ComplexRendererFns
7021 AArch64InstructionSelector::selectShiftA_64(const MachineOperand
&Root
) const {
7022 auto MaybeImmed
= getImmedFromMO(Root
);
7023 if (MaybeImmed
== std::nullopt
|| *MaybeImmed
> 63)
7024 return std::nullopt
;
7025 uint64_t Enc
= (64 - *MaybeImmed
) & 0x3f;
7026 return {{[=](MachineInstrBuilder
&MIB
) { MIB
.addImm(Enc
); }}};
7029 InstructionSelector::ComplexRendererFns
7030 AArch64InstructionSelector::selectShiftB_64(const MachineOperand
&Root
) const {
7031 auto MaybeImmed
= getImmedFromMO(Root
);
7032 if (MaybeImmed
== std::nullopt
|| *MaybeImmed
> 63)
7033 return std::nullopt
;
7034 uint64_t Enc
= 63 - *MaybeImmed
;
7035 return {{[=](MachineInstrBuilder
&MIB
) { MIB
.addImm(Enc
); }}};
7038 /// Helper to select an immediate value that can be represented as a 12-bit
7039 /// value shifted left by either 0 or 12. If it is possible to do so, return
7040 /// the immediate and shift value. If not, return std::nullopt.
7042 /// Used by selectArithImmed and selectNegArithImmed.
7043 InstructionSelector::ComplexRendererFns
7044 AArch64InstructionSelector::select12BitValueWithLeftShift(
7045 uint64_t Immed
) const {
7047 if (Immed
>> 12 == 0) {
7049 } else if ((Immed
& 0xfff) == 0 && Immed
>> 24 == 0) {
7051 Immed
= Immed
>> 12;
7053 return std::nullopt
;
7055 unsigned ShVal
= AArch64_AM::getShifterImm(AArch64_AM::LSL
, ShiftAmt
);
7057 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(Immed
); },
7058 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(ShVal
); },
7062 /// SelectArithImmed - Select an immediate value that can be represented as
7063 /// a 12-bit value shifted left by either 0 or 12. If so, return true with
7064 /// Val set to the 12-bit value and Shift set to the shifter operand.
7065 InstructionSelector::ComplexRendererFns
7066 AArch64InstructionSelector::selectArithImmed(MachineOperand
&Root
) const {
7067 // This function is called from the addsub_shifted_imm ComplexPattern,
7068 // which lists [imm] as the list of opcode it's interested in, however
7069 // we still need to check whether the operand is actually an immediate
7070 // here because the ComplexPattern opcode list is only used in
7071 // root-level opcode matching.
7072 auto MaybeImmed
= getImmedFromMO(Root
);
7073 if (MaybeImmed
== std::nullopt
)
7074 return std::nullopt
;
7075 return select12BitValueWithLeftShift(*MaybeImmed
);
7078 /// SelectNegArithImmed - As above, but negates the value before trying to
7080 InstructionSelector::ComplexRendererFns
7081 AArch64InstructionSelector::selectNegArithImmed(MachineOperand
&Root
) const {
7082 // We need a register here, because we need to know if we have a 64 or 32
7085 return std::nullopt
;
7086 auto MaybeImmed
= getImmedFromMO(Root
);
7087 if (MaybeImmed
== std::nullopt
)
7088 return std::nullopt
;
7089 uint64_t Immed
= *MaybeImmed
;
7091 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
7092 // have the opposite effect on the C flag, so this pattern mustn't match under
7093 // those circumstances.
7095 return std::nullopt
;
7097 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
7099 MachineRegisterInfo
&MRI
= Root
.getParent()->getMF()->getRegInfo();
7100 if (MRI
.getType(Root
.getReg()).getSizeInBits() == 32)
7101 Immed
= ~((uint32_t)Immed
) + 1;
7103 Immed
= ~Immed
+ 1ULL;
7105 if (Immed
& 0xFFFFFFFFFF000000ULL
)
7106 return std::nullopt
;
7108 Immed
&= 0xFFFFFFULL
;
7109 return select12BitValueWithLeftShift(Immed
);
7112 /// Checks if we are sure that folding MI into load/store addressing mode is
7113 /// beneficial or not.
7116 /// - true if folding MI would be beneficial.
7117 /// - false if folding MI would be bad.
7118 /// - std::nullopt if it is not sure whether folding MI is beneficial.
7120 /// \p MI can be the offset operand of G_PTR_ADD, e.g. G_SHL in the example:
7122 /// %13:gpr(s64) = G_CONSTANT i64 1
7123 /// %8:gpr(s64) = G_SHL %6, %13(s64)
7124 /// %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
7125 /// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
7126 std::optional
<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
7127 MachineInstr
&MI
, const MachineRegisterInfo
&MRI
) const {
7128 if (MI
.getOpcode() == AArch64::G_SHL
) {
7129 // Address operands with shifts are free, except for running on subtargets
7130 // with AddrLSLSlow14.
7131 if (const auto ValAndVeg
= getIConstantVRegValWithLookThrough(
7132 MI
.getOperand(2).getReg(), MRI
)) {
7133 const APInt ShiftVal
= ValAndVeg
->Value
;
7135 // Don't fold if we know this will be slow.
7136 return !(STI
.hasAddrLSLSlow14() && (ShiftVal
== 1 || ShiftVal
== 4));
7139 return std::nullopt
;
7142 /// Return true if it is worth folding MI into an extended register. That is,
7143 /// if it's safe to pull it into the addressing mode of a load or store as a
7145 /// \p IsAddrOperand whether the def of MI is used as an address operand
7146 /// (e.g. feeding into an LDR/STR).
7147 bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
7148 MachineInstr
&MI
, const MachineRegisterInfo
&MRI
,
7149 bool IsAddrOperand
) const {
7151 // Always fold if there is one use, or if we're optimizing for size.
7152 Register DefReg
= MI
.getOperand(0).getReg();
7153 if (MRI
.hasOneNonDBGUse(DefReg
) ||
7154 MI
.getParent()->getParent()->getFunction().hasOptSize())
7157 if (IsAddrOperand
) {
7158 // If we are already sure that folding MI is good or bad, return the result.
7159 if (const auto Worth
= isWorthFoldingIntoAddrMode(MI
, MRI
))
7162 // Fold G_PTR_ADD if its offset operand can be folded
7163 if (MI
.getOpcode() == AArch64::G_PTR_ADD
) {
7164 MachineInstr
*OffsetInst
=
7165 getDefIgnoringCopies(MI
.getOperand(2).getReg(), MRI
);
7167 // Note, we already know G_PTR_ADD is used by at least two instructions.
7168 // If we are also sure about whether folding is beneficial or not,
7169 // return the result.
7170 if (const auto Worth
= isWorthFoldingIntoAddrMode(*OffsetInst
, MRI
))
7175 // FIXME: Consider checking HasALULSLFast as appropriate.
7177 // We have a fastpath, so folding a shift in and potentially computing it
7178 // many times may be beneficial. Check if this is only used in memory ops.
7179 // If it is, then we should fold.
7180 return all_of(MRI
.use_nodbg_instructions(DefReg
),
7181 [](MachineInstr
&Use
) { return Use
.mayLoadOrStore(); });
7184 static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type
) {
7186 case AArch64_AM::SXTB
:
7187 case AArch64_AM::SXTH
:
7188 case AArch64_AM::SXTW
:
7195 InstructionSelector::ComplexRendererFns
7196 AArch64InstructionSelector::selectExtendedSHL(
7197 MachineOperand
&Root
, MachineOperand
&Base
, MachineOperand
&Offset
,
7198 unsigned SizeInBytes
, bool WantsExt
) const {
7199 assert(Base
.isReg() && "Expected base to be a register operand");
7200 assert(Offset
.isReg() && "Expected offset to be a register operand");
7202 MachineRegisterInfo
&MRI
= Root
.getParent()->getMF()->getRegInfo();
7203 MachineInstr
*OffsetInst
= MRI
.getVRegDef(Offset
.getReg());
7205 unsigned OffsetOpc
= OffsetInst
->getOpcode();
7206 bool LookedThroughZExt
= false;
7207 if (OffsetOpc
!= TargetOpcode::G_SHL
&& OffsetOpc
!= TargetOpcode::G_MUL
) {
7208 // Try to look through a ZEXT.
7209 if (OffsetOpc
!= TargetOpcode::G_ZEXT
|| !WantsExt
)
7210 return std::nullopt
;
7212 OffsetInst
= MRI
.getVRegDef(OffsetInst
->getOperand(1).getReg());
7213 OffsetOpc
= OffsetInst
->getOpcode();
7214 LookedThroughZExt
= true;
7216 if (OffsetOpc
!= TargetOpcode::G_SHL
&& OffsetOpc
!= TargetOpcode::G_MUL
)
7217 return std::nullopt
;
7219 // Make sure that the memory op is a valid size.
7220 int64_t LegalShiftVal
= Log2_32(SizeInBytes
);
7221 if (LegalShiftVal
== 0)
7222 return std::nullopt
;
7223 if (!isWorthFoldingIntoExtendedReg(*OffsetInst
, MRI
, true))
7224 return std::nullopt
;
7226 // Now, try to find the specific G_CONSTANT. Start by assuming that the
7227 // register we will offset is the LHS, and the register containing the
7228 // constant is the RHS.
7229 Register OffsetReg
= OffsetInst
->getOperand(1).getReg();
7230 Register ConstantReg
= OffsetInst
->getOperand(2).getReg();
7231 auto ValAndVReg
= getIConstantVRegValWithLookThrough(ConstantReg
, MRI
);
7233 // We didn't get a constant on the RHS. If the opcode is a shift, then
7235 if (OffsetOpc
== TargetOpcode::G_SHL
)
7236 return std::nullopt
;
7238 // If we have a G_MUL, we can use either register. Try looking at the RHS.
7239 std::swap(OffsetReg
, ConstantReg
);
7240 ValAndVReg
= getIConstantVRegValWithLookThrough(ConstantReg
, MRI
);
7242 return std::nullopt
;
7245 // The value must fit into 3 bits, and must be positive. Make sure that is
7247 int64_t ImmVal
= ValAndVReg
->Value
.getSExtValue();
7249 // Since we're going to pull this into a shift, the constant value must be
7250 // a power of 2. If we got a multiply, then we need to check this.
7251 if (OffsetOpc
== TargetOpcode::G_MUL
) {
7252 if (!llvm::has_single_bit
<uint32_t>(ImmVal
))
7253 return std::nullopt
;
7255 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
7256 ImmVal
= Log2_32(ImmVal
);
7259 if ((ImmVal
& 0x7) != ImmVal
)
7260 return std::nullopt
;
7262 // We are only allowed to shift by LegalShiftVal. This shift value is built
7263 // into the instruction, so we can't just use whatever we want.
7264 if (ImmVal
!= LegalShiftVal
)
7265 return std::nullopt
;
7267 unsigned SignExtend
= 0;
7269 // Check if the offset is defined by an extend, unless we looked through a
7271 if (!LookedThroughZExt
) {
7272 MachineInstr
*ExtInst
= getDefIgnoringCopies(OffsetReg
, MRI
);
7273 auto Ext
= getExtendTypeForInst(*ExtInst
, MRI
, true);
7274 if (Ext
== AArch64_AM::InvalidShiftExtend
)
7275 return std::nullopt
;
7277 SignExtend
= isSignExtendShiftType(Ext
) ? 1 : 0;
7278 // We only support SXTW for signed extension here.
7279 if (SignExtend
&& Ext
!= AArch64_AM::SXTW
)
7280 return std::nullopt
;
7281 OffsetReg
= ExtInst
->getOperand(1).getReg();
7284 // Need a 32-bit wide register here.
7285 MachineIRBuilder
MIB(*MRI
.getVRegDef(Root
.getReg()));
7286 OffsetReg
= moveScalarRegClass(OffsetReg
, AArch64::GPR32RegClass
, MIB
);
7289 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
7290 // offset. Signify that we are shifting by setting the shift flag to 1.
7291 return {{[=](MachineInstrBuilder
&MIB
) { MIB
.addUse(Base
.getReg()); },
7292 [=](MachineInstrBuilder
&MIB
) { MIB
.addUse(OffsetReg
); },
7293 [=](MachineInstrBuilder
&MIB
) {
7294 // Need to add both immediates here to make sure that they are both
7295 // added to the instruction.
7296 MIB
.addImm(SignExtend
);
7301 /// This is used for computing addresses like this:
7303 /// ldr x1, [x2, x3, lsl #3]
7305 /// Where x2 is the base register, and x3 is an offset register. The shift-left
7306 /// is a constant value specific to this load instruction. That is, we'll never
7307 /// see anything other than a 3 here (which corresponds to the size of the
7308 /// element being loaded.)
7309 InstructionSelector::ComplexRendererFns
7310 AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7311 MachineOperand
&Root
, unsigned SizeInBytes
) const {
7313 return std::nullopt
;
7314 MachineRegisterInfo
&MRI
= Root
.getParent()->getMF()->getRegInfo();
7316 // We want to find something like this:
7318 // val = G_CONSTANT LegalShiftVal
7319 // shift = G_SHL off_reg val
7320 // ptr = G_PTR_ADD base_reg shift
7323 // And fold it into this addressing mode:
7325 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
7327 // Check if we can find the G_PTR_ADD.
7328 MachineInstr
*PtrAdd
=
7329 getOpcodeDef(TargetOpcode::G_PTR_ADD
, Root
.getReg(), MRI
);
7330 if (!PtrAdd
|| !isWorthFoldingIntoExtendedReg(*PtrAdd
, MRI
, true))
7331 return std::nullopt
;
7333 // Now, try to match an opcode which will match our specific offset.
7334 // We want a G_SHL or a G_MUL.
7335 MachineInstr
*OffsetInst
=
7336 getDefIgnoringCopies(PtrAdd
->getOperand(2).getReg(), MRI
);
7337 return selectExtendedSHL(Root
, PtrAdd
->getOperand(1),
7338 OffsetInst
->getOperand(0), SizeInBytes
,
7339 /*WantsExt=*/false);
7342 /// This is used for computing addresses like this:
7344 /// ldr x1, [x2, x3]
7346 /// Where x2 is the base register, and x3 is an offset register.
7348 /// When possible (or profitable) to fold a G_PTR_ADD into the address
7349 /// calculation, this will do so. Otherwise, it will return std::nullopt.
7350 InstructionSelector::ComplexRendererFns
7351 AArch64InstructionSelector::selectAddrModeRegisterOffset(
7352 MachineOperand
&Root
) const {
7353 MachineRegisterInfo
&MRI
= Root
.getParent()->getMF()->getRegInfo();
7356 MachineInstr
*Gep
= MRI
.getVRegDef(Root
.getReg());
7357 if (Gep
->getOpcode() != TargetOpcode::G_PTR_ADD
)
7358 return std::nullopt
;
7360 // If this is used more than once, let's not bother folding.
7361 // TODO: Check if they are memory ops. If they are, then we can still fold
7362 // without having to recompute anything.
7363 if (!MRI
.hasOneNonDBGUse(Gep
->getOperand(0).getReg()))
7364 return std::nullopt
;
7366 // Base is the GEP's LHS, offset is its RHS.
7367 return {{[=](MachineInstrBuilder
&MIB
) {
7368 MIB
.addUse(Gep
->getOperand(1).getReg());
7370 [=](MachineInstrBuilder
&MIB
) {
7371 MIB
.addUse(Gep
->getOperand(2).getReg());
7373 [=](MachineInstrBuilder
&MIB
) {
7374 // Need to add both immediates here to make sure that they are both
7375 // added to the instruction.
7381 /// This is intended to be equivalent to selectAddrModeXRO in
7382 /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
7383 InstructionSelector::ComplexRendererFns
7384 AArch64InstructionSelector::selectAddrModeXRO(MachineOperand
&Root
,
7385 unsigned SizeInBytes
) const {
7386 MachineRegisterInfo
&MRI
= Root
.getParent()->getMF()->getRegInfo();
7388 return std::nullopt
;
7389 MachineInstr
*PtrAdd
=
7390 getOpcodeDef(TargetOpcode::G_PTR_ADD
, Root
.getReg(), MRI
);
7392 return std::nullopt
;
7394 // Check for an immediates which cannot be encoded in the [base + imm]
7395 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
7396 // end up with code like:
7402 // In this situation, we can use the [base, xreg] addressing mode to save an
7406 // ldr x2, [base, x0]
7408 getIConstantVRegValWithLookThrough(PtrAdd
->getOperand(2).getReg(), MRI
);
7410 unsigned Scale
= Log2_32(SizeInBytes
);
7411 int64_t ImmOff
= ValAndVReg
->Value
.getSExtValue();
7413 // Skip immediates that can be selected in the load/store addresing
7415 if (ImmOff
% SizeInBytes
== 0 && ImmOff
>= 0 &&
7416 ImmOff
< (0x1000 << Scale
))
7417 return std::nullopt
;
7419 // Helper lambda to decide whether or not it is preferable to emit an add.
7420 auto isPreferredADD
= [](int64_t ImmOff
) {
7421 // Constants in [0x0, 0xfff] can be encoded in an add.
7422 if ((ImmOff
& 0xfffffffffffff000LL
) == 0x0LL
)
7425 // Can it be encoded in an add lsl #12?
7426 if ((ImmOff
& 0xffffffffff000fffLL
) != 0x0LL
)
7429 // It can be encoded in an add lsl #12, but we may not want to. If it is
7430 // possible to select this as a single movz, then prefer that. A single
7431 // movz is faster than an add with a shift.
7432 return (ImmOff
& 0xffffffffff00ffffLL
) != 0x0LL
&&
7433 (ImmOff
& 0xffffffffffff0fffLL
) != 0x0LL
;
7436 // If the immediate can be encoded in a single add/sub, then bail out.
7437 if (isPreferredADD(ImmOff
) || isPreferredADD(-ImmOff
))
7438 return std::nullopt
;
7441 // Try to fold shifts into the addressing mode.
7442 auto AddrModeFns
= selectAddrModeShiftedExtendXReg(Root
, SizeInBytes
);
7446 // If that doesn't work, see if it's possible to fold in registers from
7448 return selectAddrModeRegisterOffset(Root
);
7451 /// This is used for computing addresses like this:
7453 /// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
7455 /// Where we have a 64-bit base register, a 32-bit offset register, and an
7456 /// extend (which may or may not be signed).
7457 InstructionSelector::ComplexRendererFns
7458 AArch64InstructionSelector::selectAddrModeWRO(MachineOperand
&Root
,
7459 unsigned SizeInBytes
) const {
7460 MachineRegisterInfo
&MRI
= Root
.getParent()->getMF()->getRegInfo();
7462 MachineInstr
*PtrAdd
=
7463 getOpcodeDef(TargetOpcode::G_PTR_ADD
, Root
.getReg(), MRI
);
7464 if (!PtrAdd
|| !isWorthFoldingIntoExtendedReg(*PtrAdd
, MRI
, true))
7465 return std::nullopt
;
7467 MachineOperand
&LHS
= PtrAdd
->getOperand(1);
7468 MachineOperand
&RHS
= PtrAdd
->getOperand(2);
7469 MachineInstr
*OffsetInst
= getDefIgnoringCopies(RHS
.getReg(), MRI
);
7471 // The first case is the same as selectAddrModeXRO, except we need an extend.
7472 // In this case, we try to find a shift and extend, and fold them into the
7477 // off_reg = G_Z/S/ANYEXT ext_reg
7478 // val = G_CONSTANT LegalShiftVal
7479 // shift = G_SHL off_reg val
7480 // ptr = G_PTR_ADD base_reg shift
7483 // In this case we can get a load like this:
7485 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
7486 auto ExtendedShl
= selectExtendedSHL(Root
, LHS
, OffsetInst
->getOperand(0),
7487 SizeInBytes
, /*WantsExt=*/true);
7491 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
7494 // ldr something, [base_reg, ext_reg, sxtw]
7495 if (!isWorthFoldingIntoExtendedReg(*OffsetInst
, MRI
, true))
7496 return std::nullopt
;
7498 // Check if this is an extend. We'll get an extend type if it is.
7499 AArch64_AM::ShiftExtendType Ext
=
7500 getExtendTypeForInst(*OffsetInst
, MRI
, /*IsLoadStore=*/true);
7501 if (Ext
== AArch64_AM::InvalidShiftExtend
)
7502 return std::nullopt
;
7504 // Need a 32-bit wide register.
7505 MachineIRBuilder
MIB(*PtrAdd
);
7506 Register ExtReg
= moveScalarRegClass(OffsetInst
->getOperand(1).getReg(),
7507 AArch64::GPR32RegClass
, MIB
);
7508 unsigned SignExtend
= Ext
== AArch64_AM::SXTW
;
7510 // Base is LHS, offset is ExtReg.
7511 return {{[=](MachineInstrBuilder
&MIB
) { MIB
.addUse(LHS
.getReg()); },
7512 [=](MachineInstrBuilder
&MIB
) { MIB
.addUse(ExtReg
); },
7513 [=](MachineInstrBuilder
&MIB
) {
7514 MIB
.addImm(SignExtend
);
7519 /// Select a "register plus unscaled signed 9-bit immediate" address. This
7520 /// should only match when there is an offset that is not valid for a scaled
7521 /// immediate addressing mode. The "Size" argument is the size in bytes of the
7522 /// memory reference, which is needed here to know what is valid for a scaled
7524 InstructionSelector::ComplexRendererFns
7525 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand
&Root
,
7526 unsigned Size
) const {
7527 MachineRegisterInfo
&MRI
=
7528 Root
.getParent()->getParent()->getParent()->getRegInfo();
7531 return std::nullopt
;
7533 if (!isBaseWithConstantOffset(Root
, MRI
))
7534 return std::nullopt
;
7536 MachineInstr
*RootDef
= MRI
.getVRegDef(Root
.getReg());
7538 MachineOperand
&OffImm
= RootDef
->getOperand(2);
7539 if (!OffImm
.isReg())
7540 return std::nullopt
;
7541 MachineInstr
*RHS
= MRI
.getVRegDef(OffImm
.getReg());
7542 if (RHS
->getOpcode() != TargetOpcode::G_CONSTANT
)
7543 return std::nullopt
;
7545 MachineOperand
&RHSOp1
= RHS
->getOperand(1);
7546 if (!RHSOp1
.isCImm() || RHSOp1
.getCImm()->getBitWidth() > 64)
7547 return std::nullopt
;
7548 RHSC
= RHSOp1
.getCImm()->getSExtValue();
7550 if (RHSC
>= -256 && RHSC
< 256) {
7551 MachineOperand
&Base
= RootDef
->getOperand(1);
7553 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Base
); },
7554 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(RHSC
); },
7557 return std::nullopt
;
7560 InstructionSelector::ComplexRendererFns
7561 AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr
&RootDef
,
7563 MachineRegisterInfo
&MRI
) const {
7564 if (RootDef
.getOpcode() != AArch64::G_ADD_LOW
)
7565 return std::nullopt
;
7566 MachineInstr
&Adrp
= *MRI
.getVRegDef(RootDef
.getOperand(1).getReg());
7567 if (Adrp
.getOpcode() != AArch64::ADRP
)
7568 return std::nullopt
;
7570 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
7571 auto Offset
= Adrp
.getOperand(1).getOffset();
7572 if (Offset
% Size
!= 0)
7573 return std::nullopt
;
7575 auto GV
= Adrp
.getOperand(1).getGlobal();
7576 if (GV
->isThreadLocal())
7577 return std::nullopt
;
7579 auto &MF
= *RootDef
.getParent()->getParent();
7580 if (GV
->getPointerAlignment(MF
.getDataLayout()) < Size
)
7581 return std::nullopt
;
7583 unsigned OpFlags
= STI
.ClassifyGlobalReference(GV
, MF
.getTarget());
7584 MachineIRBuilder
MIRBuilder(RootDef
);
7585 Register AdrpReg
= Adrp
.getOperand(0).getReg();
7586 return {{[=](MachineInstrBuilder
&MIB
) { MIB
.addUse(AdrpReg
); },
7587 [=](MachineInstrBuilder
&MIB
) {
7588 MIB
.addGlobalAddress(GV
, Offset
,
7589 OpFlags
| AArch64II::MO_PAGEOFF
|
7594 /// Select a "register plus scaled unsigned 12-bit immediate" address. The
7595 /// "Size" argument is the size in bytes of the memory reference, which
7596 /// determines the scale.
7597 InstructionSelector::ComplexRendererFns
7598 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand
&Root
,
7599 unsigned Size
) const {
7600 MachineFunction
&MF
= *Root
.getParent()->getParent()->getParent();
7601 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
7604 return std::nullopt
;
7606 MachineInstr
*RootDef
= MRI
.getVRegDef(Root
.getReg());
7607 if (RootDef
->getOpcode() == TargetOpcode::G_FRAME_INDEX
) {
7609 [=](MachineInstrBuilder
&MIB
) { MIB
.add(RootDef
->getOperand(1)); },
7610 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); },
7614 CodeModel::Model CM
= MF
.getTarget().getCodeModel();
7615 // Check if we can fold in the ADD of small code model ADRP + ADD address.
7616 if (CM
== CodeModel::Small
) {
7617 auto OpFns
= tryFoldAddLowIntoImm(*RootDef
, Size
, MRI
);
7622 if (isBaseWithConstantOffset(Root
, MRI
)) {
7623 MachineOperand
&LHS
= RootDef
->getOperand(1);
7624 MachineOperand
&RHS
= RootDef
->getOperand(2);
7625 MachineInstr
*LHSDef
= MRI
.getVRegDef(LHS
.getReg());
7626 MachineInstr
*RHSDef
= MRI
.getVRegDef(RHS
.getReg());
7628 int64_t RHSC
= (int64_t)RHSDef
->getOperand(1).getCImm()->getZExtValue();
7629 unsigned Scale
= Log2_32(Size
);
7630 if ((RHSC
& (Size
- 1)) == 0 && RHSC
>= 0 && RHSC
< (0x1000 << Scale
)) {
7631 if (LHSDef
->getOpcode() == TargetOpcode::G_FRAME_INDEX
)
7633 [=](MachineInstrBuilder
&MIB
) { MIB
.add(LHSDef
->getOperand(1)); },
7634 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(RHSC
>> Scale
); },
7638 [=](MachineInstrBuilder
&MIB
) { MIB
.add(LHS
); },
7639 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(RHSC
>> Scale
); },
7644 // Before falling back to our general case, check if the unscaled
7645 // instructions can handle this. If so, that's preferable.
7646 if (selectAddrModeUnscaled(Root
, Size
))
7647 return std::nullopt
;
7650 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); },
7651 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); },
7655 /// Given a shift instruction, return the correct shift type for that
7657 static AArch64_AM::ShiftExtendType
getShiftTypeForInst(MachineInstr
&MI
) {
7658 switch (MI
.getOpcode()) {
7660 return AArch64_AM::InvalidShiftExtend
;
7661 case TargetOpcode::G_SHL
:
7662 return AArch64_AM::LSL
;
7663 case TargetOpcode::G_LSHR
:
7664 return AArch64_AM::LSR
;
7665 case TargetOpcode::G_ASHR
:
7666 return AArch64_AM::ASR
;
7667 case TargetOpcode::G_ROTR
:
7668 return AArch64_AM::ROR
;
7672 /// Select a "shifted register" operand. If the value is not shifted, set the
7673 /// shift operand to a default value of "lsl 0".
7674 InstructionSelector::ComplexRendererFns
7675 AArch64InstructionSelector::selectShiftedRegister(MachineOperand
&Root
,
7676 bool AllowROR
) const {
7678 return std::nullopt
;
7679 MachineRegisterInfo
&MRI
=
7680 Root
.getParent()->getParent()->getParent()->getRegInfo();
7682 // Check if the operand is defined by an instruction which corresponds to
7683 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
7684 MachineInstr
*ShiftInst
= MRI
.getVRegDef(Root
.getReg());
7685 AArch64_AM::ShiftExtendType ShType
= getShiftTypeForInst(*ShiftInst
);
7686 if (ShType
== AArch64_AM::InvalidShiftExtend
)
7687 return std::nullopt
;
7688 if (ShType
== AArch64_AM::ROR
&& !AllowROR
)
7689 return std::nullopt
;
7690 if (!isWorthFoldingIntoExtendedReg(*ShiftInst
, MRI
, false))
7691 return std::nullopt
;
7693 // Need an immediate on the RHS.
7694 MachineOperand
&ShiftRHS
= ShiftInst
->getOperand(2);
7695 auto Immed
= getImmedFromMO(ShiftRHS
);
7697 return std::nullopt
;
7699 // We have something that we can fold. Fold in the shift's LHS and RHS into
7701 MachineOperand
&ShiftLHS
= ShiftInst
->getOperand(1);
7702 Register ShiftReg
= ShiftLHS
.getReg();
7704 unsigned NumBits
= MRI
.getType(ShiftReg
).getSizeInBits();
7705 unsigned Val
= *Immed
& (NumBits
- 1);
7706 unsigned ShiftVal
= AArch64_AM::getShifterImm(ShType
, Val
);
7708 return {{[=](MachineInstrBuilder
&MIB
) { MIB
.addUse(ShiftReg
); },
7709 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(ShiftVal
); }}};
7712 AArch64_AM::ShiftExtendType
AArch64InstructionSelector::getExtendTypeForInst(
7713 MachineInstr
&MI
, MachineRegisterInfo
&MRI
, bool IsLoadStore
) const {
7714 unsigned Opc
= MI
.getOpcode();
7716 // Handle explicit extend instructions first.
7717 if (Opc
== TargetOpcode::G_SEXT
|| Opc
== TargetOpcode::G_SEXT_INREG
) {
7719 if (Opc
== TargetOpcode::G_SEXT
)
7720 Size
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
7722 Size
= MI
.getOperand(2).getImm();
7723 assert(Size
!= 64 && "Extend from 64 bits?");
7726 return IsLoadStore
? AArch64_AM::InvalidShiftExtend
: AArch64_AM::SXTB
;
7728 return IsLoadStore
? AArch64_AM::InvalidShiftExtend
: AArch64_AM::SXTH
;
7730 return AArch64_AM::SXTW
;
7732 return AArch64_AM::InvalidShiftExtend
;
7736 if (Opc
== TargetOpcode::G_ZEXT
|| Opc
== TargetOpcode::G_ANYEXT
) {
7737 unsigned Size
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
7738 assert(Size
!= 64 && "Extend from 64 bits?");
7741 return IsLoadStore
? AArch64_AM::InvalidShiftExtend
: AArch64_AM::UXTB
;
7743 return IsLoadStore
? AArch64_AM::InvalidShiftExtend
: AArch64_AM::UXTH
;
7745 return AArch64_AM::UXTW
;
7747 return AArch64_AM::InvalidShiftExtend
;
7751 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
7753 if (Opc
!= TargetOpcode::G_AND
)
7754 return AArch64_AM::InvalidShiftExtend
;
7756 std::optional
<uint64_t> MaybeAndMask
= getImmedFromMO(MI
.getOperand(2));
7758 return AArch64_AM::InvalidShiftExtend
;
7759 uint64_t AndMask
= *MaybeAndMask
;
7762 return AArch64_AM::InvalidShiftExtend
;
7764 return !IsLoadStore
? AArch64_AM::UXTB
: AArch64_AM::InvalidShiftExtend
;
7766 return !IsLoadStore
? AArch64_AM::UXTH
: AArch64_AM::InvalidShiftExtend
;
7768 return AArch64_AM::UXTW
;
7772 Register
AArch64InstructionSelector::moveScalarRegClass(
7773 Register Reg
, const TargetRegisterClass
&RC
, MachineIRBuilder
&MIB
) const {
7774 MachineRegisterInfo
&MRI
= *MIB
.getMRI();
7775 auto Ty
= MRI
.getType(Reg
);
7776 assert(!Ty
.isVector() && "Expected scalars only!");
7777 if (Ty
.getSizeInBits() == TRI
.getRegSizeInBits(RC
))
7780 // Create a copy and immediately select it.
7781 // FIXME: We should have an emitCopy function?
7782 auto Copy
= MIB
.buildCopy({&RC
}, {Reg
});
7783 selectCopy(*Copy
, TII
, MRI
, TRI
, RBI
);
7784 return Copy
.getReg(0);
7787 /// Select an "extended register" operand. This operand folds in an extend
7788 /// followed by an optional left shift.
7789 InstructionSelector::ComplexRendererFns
7790 AArch64InstructionSelector::selectArithExtendedRegister(
7791 MachineOperand
&Root
) const {
7793 return std::nullopt
;
7794 MachineRegisterInfo
&MRI
=
7795 Root
.getParent()->getParent()->getParent()->getRegInfo();
7797 uint64_t ShiftVal
= 0;
7799 AArch64_AM::ShiftExtendType Ext
;
7800 MachineInstr
*RootDef
= getDefIgnoringCopies(Root
.getReg(), MRI
);
7802 return std::nullopt
;
7804 if (!isWorthFoldingIntoExtendedReg(*RootDef
, MRI
, false))
7805 return std::nullopt
;
7807 // Check if we can fold a shift and an extend.
7808 if (RootDef
->getOpcode() == TargetOpcode::G_SHL
) {
7809 // Look for a constant on the RHS of the shift.
7810 MachineOperand
&RHS
= RootDef
->getOperand(2);
7811 std::optional
<uint64_t> MaybeShiftVal
= getImmedFromMO(RHS
);
7813 return std::nullopt
;
7814 ShiftVal
= *MaybeShiftVal
;
7816 return std::nullopt
;
7817 // Look for a valid extend instruction on the LHS of the shift.
7818 MachineOperand
&LHS
= RootDef
->getOperand(1);
7819 MachineInstr
*ExtDef
= getDefIgnoringCopies(LHS
.getReg(), MRI
);
7821 return std::nullopt
;
7822 Ext
= getExtendTypeForInst(*ExtDef
, MRI
);
7823 if (Ext
== AArch64_AM::InvalidShiftExtend
)
7824 return std::nullopt
;
7825 ExtReg
= ExtDef
->getOperand(1).getReg();
7827 // Didn't get a shift. Try just folding an extend.
7828 Ext
= getExtendTypeForInst(*RootDef
, MRI
);
7829 if (Ext
== AArch64_AM::InvalidShiftExtend
)
7830 return std::nullopt
;
7831 ExtReg
= RootDef
->getOperand(1).getReg();
7833 // If we have a 32 bit instruction which zeroes out the high half of a
7834 // register, we get an implicit zero extend for free. Check if we have one.
7835 // FIXME: We actually emit the extend right now even though we don't have
7837 if (Ext
== AArch64_AM::UXTW
&& MRI
.getType(ExtReg
).getSizeInBits() == 32) {
7838 MachineInstr
*ExtInst
= MRI
.getVRegDef(ExtReg
);
7839 if (isDef32(*ExtInst
))
7840 return std::nullopt
;
7844 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
7846 MachineIRBuilder
MIB(*RootDef
);
7847 ExtReg
= moveScalarRegClass(ExtReg
, AArch64::GPR32RegClass
, MIB
);
7849 return {{[=](MachineInstrBuilder
&MIB
) { MIB
.addUse(ExtReg
); },
7850 [=](MachineInstrBuilder
&MIB
) {
7851 MIB
.addImm(getArithExtendImm(Ext
, ShiftVal
));
7855 InstructionSelector::ComplexRendererFns
7856 AArch64InstructionSelector::selectExtractHigh(MachineOperand
&Root
) const {
7858 return std::nullopt
;
7859 MachineRegisterInfo
&MRI
=
7860 Root
.getParent()->getParent()->getParent()->getRegInfo();
7862 auto Extract
= getDefSrcRegIgnoringCopies(Root
.getReg(), MRI
);
7863 while (Extract
&& Extract
->MI
->getOpcode() == TargetOpcode::G_BITCAST
&&
7864 STI
.isLittleEndian())
7866 getDefSrcRegIgnoringCopies(Extract
->MI
->getOperand(1).getReg(), MRI
);
7868 return std::nullopt
;
7870 if (Extract
->MI
->getOpcode() == TargetOpcode::G_UNMERGE_VALUES
) {
7871 if (Extract
->Reg
== Extract
->MI
->getOperand(1).getReg()) {
7872 Register ExtReg
= Extract
->MI
->getOperand(2).getReg();
7873 return {{[=](MachineInstrBuilder
&MIB
) { MIB
.addUse(ExtReg
); }}};
7876 if (Extract
->MI
->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
) {
7877 LLT SrcTy
= MRI
.getType(Extract
->MI
->getOperand(1).getReg());
7878 auto LaneIdx
= getIConstantVRegValWithLookThrough(
7879 Extract
->MI
->getOperand(2).getReg(), MRI
);
7880 if (LaneIdx
&& SrcTy
== LLT::fixed_vector(2, 64) &&
7881 LaneIdx
->Value
.getSExtValue() == 1) {
7882 Register ExtReg
= Extract
->MI
->getOperand(1).getReg();
7883 return {{[=](MachineInstrBuilder
&MIB
) { MIB
.addUse(ExtReg
); }}};
7887 return std::nullopt
;
7890 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder
&MIB
,
7891 const MachineInstr
&MI
,
7893 const MachineRegisterInfo
&MRI
= MI
.getParent()->getParent()->getRegInfo();
7894 assert(MI
.getOpcode() == TargetOpcode::G_CONSTANT
&& OpIdx
== -1 &&
7895 "Expected G_CONSTANT");
7896 std::optional
<int64_t> CstVal
=
7897 getIConstantVRegSExtVal(MI
.getOperand(0).getReg(), MRI
);
7898 assert(CstVal
&& "Expected constant value");
7899 MIB
.addImm(*CstVal
);
7902 void AArch64InstructionSelector::renderLogicalImm32(
7903 MachineInstrBuilder
&MIB
, const MachineInstr
&I
, int OpIdx
) const {
7904 assert(I
.getOpcode() == TargetOpcode::G_CONSTANT
&& OpIdx
== -1 &&
7905 "Expected G_CONSTANT");
7906 uint64_t CstVal
= I
.getOperand(1).getCImm()->getZExtValue();
7907 uint64_t Enc
= AArch64_AM::encodeLogicalImmediate(CstVal
, 32);
7911 void AArch64InstructionSelector::renderLogicalImm64(
7912 MachineInstrBuilder
&MIB
, const MachineInstr
&I
, int OpIdx
) const {
7913 assert(I
.getOpcode() == TargetOpcode::G_CONSTANT
&& OpIdx
== -1 &&
7914 "Expected G_CONSTANT");
7915 uint64_t CstVal
= I
.getOperand(1).getCImm()->getZExtValue();
7916 uint64_t Enc
= AArch64_AM::encodeLogicalImmediate(CstVal
, 64);
7920 void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder
&MIB
,
7921 const MachineInstr
&MI
,
7923 assert(MI
.getOpcode() == TargetOpcode::G_UBSANTRAP
&& OpIdx
== 0 &&
7924 "Expected G_UBSANTRAP");
7925 MIB
.addImm(MI
.getOperand(0).getImm() | ('U' << 8));
7928 void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder
&MIB
,
7929 const MachineInstr
&MI
,
7931 assert(MI
.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx
== -1 &&
7932 "Expected G_FCONSTANT");
7934 AArch64_AM::getFP16Imm(MI
.getOperand(1).getFPImm()->getValueAPF()));
7937 void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder
&MIB
,
7938 const MachineInstr
&MI
,
7940 assert(MI
.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx
== -1 &&
7941 "Expected G_FCONSTANT");
7943 AArch64_AM::getFP32Imm(MI
.getOperand(1).getFPImm()->getValueAPF()));
7946 void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder
&MIB
,
7947 const MachineInstr
&MI
,
7949 assert(MI
.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx
== -1 &&
7950 "Expected G_FCONSTANT");
7952 AArch64_AM::getFP64Imm(MI
.getOperand(1).getFPImm()->getValueAPF()));
7955 void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7956 MachineInstrBuilder
&MIB
, const MachineInstr
&MI
, int OpIdx
) const {
7957 assert(MI
.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx
== -1 &&
7958 "Expected G_FCONSTANT");
7959 MIB
.addImm(AArch64_AM::encodeAdvSIMDModImmType4(MI
.getOperand(1)
7966 bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
7967 const MachineInstr
&MI
, unsigned NumBytes
) const {
7968 if (!MI
.mayLoadOrStore())
7970 assert(MI
.hasOneMemOperand() &&
7971 "Expected load/store to have only one mem op!");
7972 return (*MI
.memoperands_begin())->getSize() == NumBytes
;
7975 bool AArch64InstructionSelector::isDef32(const MachineInstr
&MI
) const {
7976 const MachineRegisterInfo
&MRI
= MI
.getParent()->getParent()->getRegInfo();
7977 if (MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits() != 32)
7980 // Only return true if we know the operation will zero-out the high half of
7981 // the 64-bit register. Truncates can be subregister copies, which don't
7982 // zero out the high bits. Copies and other copy-like instructions can be
7983 // fed by truncates, or could be lowered as subregister copies.
7984 switch (MI
.getOpcode()) {
7987 case TargetOpcode::COPY
:
7988 case TargetOpcode::G_BITCAST
:
7989 case TargetOpcode::G_TRUNC
:
7990 case TargetOpcode::G_PHI
:
7996 // Perform fixups on the given PHI instruction's operands to force them all
7997 // to be the same as the destination regbank.
7998 static void fixupPHIOpBanks(MachineInstr
&MI
, MachineRegisterInfo
&MRI
,
7999 const AArch64RegisterBankInfo
&RBI
) {
8000 assert(MI
.getOpcode() == TargetOpcode::G_PHI
&& "Expected a G_PHI");
8001 Register DstReg
= MI
.getOperand(0).getReg();
8002 const RegisterBank
*DstRB
= MRI
.getRegBankOrNull(DstReg
);
8003 assert(DstRB
&& "Expected PHI dst to have regbank assigned");
8004 MachineIRBuilder
MIB(MI
);
8006 // Go through each operand and ensure it has the same regbank.
8007 for (MachineOperand
&MO
: llvm::drop_begin(MI
.operands())) {
8010 Register OpReg
= MO
.getReg();
8011 const RegisterBank
*RB
= MRI
.getRegBankOrNull(OpReg
);
8013 // Insert a cross-bank copy.
8014 auto *OpDef
= MRI
.getVRegDef(OpReg
);
8015 const LLT
&Ty
= MRI
.getType(OpReg
);
8016 MachineBasicBlock
&OpDefBB
= *OpDef
->getParent();
8018 // Any instruction we insert must appear after all PHIs in the block
8019 // for the block to be valid MIR.
8020 MachineBasicBlock::iterator InsertPt
= std::next(OpDef
->getIterator());
8021 if (InsertPt
!= OpDefBB
.end() && InsertPt
->isPHI())
8022 InsertPt
= OpDefBB
.getFirstNonPHI();
8023 MIB
.setInsertPt(*OpDef
->getParent(), InsertPt
);
8024 auto Copy
= MIB
.buildCopy(Ty
, OpReg
);
8025 MRI
.setRegBank(Copy
.getReg(0), *DstRB
);
8026 MO
.setReg(Copy
.getReg(0));
8031 void AArch64InstructionSelector::processPHIs(MachineFunction
&MF
) {
8032 // We're looking for PHIs, build a list so we don't invalidate iterators.
8033 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
8034 SmallVector
<MachineInstr
*, 32> Phis
;
8035 for (auto &BB
: MF
) {
8036 for (auto &MI
: BB
) {
8037 if (MI
.getOpcode() == TargetOpcode::G_PHI
)
8038 Phis
.emplace_back(&MI
);
8042 for (auto *MI
: Phis
) {
8043 // We need to do some work here if the operand types are < 16 bit and they
8044 // are split across fpr/gpr banks. Since all types <32b on gpr
8045 // end up being assigned gpr32 regclasses, we can end up with PHIs here
8046 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
8047 // be selecting heterogenous regbanks for operands if possible, but we
8048 // still need to be able to deal with it here.
8050 // To fix this, if we have a gpr-bank operand < 32b in size and at least
8051 // one other operand is on the fpr bank, then we add cross-bank copies
8052 // to homogenize the operand banks. For simplicity the bank that we choose
8053 // to settle on is whatever bank the def operand has. For example:
8056 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
8060 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
8063 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
8064 bool HasGPROp
= false, HasFPROp
= false;
8065 for (const MachineOperand
&MO
: llvm::drop_begin(MI
->operands())) {
8068 const LLT
&Ty
= MRI
.getType(MO
.getReg());
8069 if (!Ty
.isValid() || !Ty
.isScalar())
8071 if (Ty
.getSizeInBits() >= 32)
8073 const RegisterBank
*RB
= MRI
.getRegBankOrNull(MO
.getReg());
8074 // If for some reason we don't have a regbank yet. Don't try anything.
8078 if (RB
->getID() == AArch64::GPRRegBankID
)
8083 // We have heterogenous regbanks, need to fixup.
8084 if (HasGPROp
&& HasFPROp
)
8085 fixupPHIOpBanks(*MI
, MRI
, RBI
);
8090 InstructionSelector
*
8091 createAArch64InstructionSelector(const AArch64TargetMachine
&TM
,
8092 const AArch64Subtarget
&Subtarget
,
8093 const AArch64RegisterBankInfo
&RBI
) {
8094 return new AArch64InstructionSelector(TM
, Subtarget
, RBI
);