1 //===-- X86RegisterInfo.cpp - X86 Register Information --------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains the X86 implementation of the TargetRegisterInfo class.
10 // This file is responsible for the frame pointer elimination optimization
13 //===----------------------------------------------------------------------===//
15 #include "X86RegisterInfo.h"
16 #include "X86FrameLowering.h"
17 #include "X86MachineFunctionInfo.h"
18 #include "X86Subtarget.h"
19 #include "llvm/ADT/BitVector.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/CodeGen/LiveRegMatrix.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/TargetFrameLowering.h"
27 #include "llvm/CodeGen/TargetInstrInfo.h"
28 #include "llvm/CodeGen/TileShapeInfo.h"
29 #include "llvm/CodeGen/VirtRegMap.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/Type.h"
32 #include "llvm/MC/MCContext.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Target/TargetMachine.h"
36 #include "llvm/Target/TargetOptions.h"
40 #define GET_REGINFO_TARGET_DESC
41 #include "X86GenRegisterInfo.inc"
44 EnableBasePointer("x86-use-base-pointer", cl::Hidden
, cl::init(true),
45 cl::desc("Enable use of a base pointer for complex stack frames"));
48 DisableRegAllocNDDHints("x86-disable-regalloc-hints-for-ndd", cl::Hidden
,
50 cl::desc("Disable two address hints for register "
53 X86RegisterInfo::X86RegisterInfo(const Triple
&TT
)
54 : X86GenRegisterInfo((TT
.isArch64Bit() ? X86::RIP
: X86::EIP
),
55 X86_MC::getDwarfRegFlavour(TT
, false),
56 X86_MC::getDwarfRegFlavour(TT
, true),
57 (TT
.isArch64Bit() ? X86::RIP
: X86::EIP
)) {
58 X86_MC::initLLVMToSEHAndCVRegMapping(this);
60 // Cache some information.
61 Is64Bit
= TT
.isArch64Bit();
62 IsWin64
= Is64Bit
&& TT
.isOSWindows();
64 // Use a callee-saved register as the base pointer. These registers must
65 // not conflict with any ABI requirements. For example, in 32-bit mode PIC
66 // requires GOT in the EBX register before function calls via PLT GOT pointer.
69 // This matches the simplified 32-bit pointer code in the data layout
71 // FIXME: Should use the data layout?
72 bool Use64BitReg
= !TT
.isX32();
73 StackPtr
= Use64BitReg
? X86::RSP
: X86::ESP
;
74 FramePtr
= Use64BitReg
? X86::RBP
: X86::EBP
;
75 BasePtr
= Use64BitReg
? X86::RBX
: X86::EBX
;
84 const TargetRegisterClass
*
85 X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass
*RC
,
87 // The sub_8bit sub-register index is more constrained in 32-bit mode.
88 // It behaves just like the sub_8bit_hi index.
89 if (!Is64Bit
&& Idx
== X86::sub_8bit
)
90 Idx
= X86::sub_8bit_hi
;
92 // Forward to TableGen's default version.
93 return X86GenRegisterInfo::getSubClassWithSubReg(RC
, Idx
);
96 const TargetRegisterClass
*
97 X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass
*A
,
98 const TargetRegisterClass
*B
,
99 unsigned SubIdx
) const {
100 // The sub_8bit sub-register index is more constrained in 32-bit mode.
101 if (!Is64Bit
&& SubIdx
== X86::sub_8bit
) {
102 A
= X86GenRegisterInfo::getSubClassWithSubReg(A
, X86::sub_8bit_hi
);
106 return X86GenRegisterInfo::getMatchingSuperRegClass(A
, B
, SubIdx
);
109 const TargetRegisterClass
*
110 X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass
*RC
,
111 const MachineFunction
&MF
) const {
112 // Don't allow super-classes of GR8_NOREX. This class is only used after
113 // extracting sub_8bit_hi sub-registers. The H sub-registers cannot be copied
114 // to the full GR8 register class in 64-bit mode, so we cannot allow the
115 // reigster class inflation.
117 // The GR8_NOREX class is always used in a way that won't be constrained to a
118 // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the
120 if (RC
== &X86::GR8_NOREXRegClass
)
123 const X86Subtarget
&Subtarget
= MF
.getSubtarget
<X86Subtarget
>();
125 const TargetRegisterClass
*Super
= RC
;
126 auto I
= RC
->superclasses().begin();
127 auto E
= RC
->superclasses().end();
129 switch (Super
->getID()) {
130 case X86::FR32RegClassID
:
131 case X86::FR64RegClassID
:
132 // If AVX-512 isn't supported we should only inflate to these classes.
133 if (!Subtarget
.hasAVX512() &&
134 getRegSizeInBits(*Super
) == getRegSizeInBits(*RC
))
137 case X86::VR128RegClassID
:
138 case X86::VR256RegClassID
:
139 // If VLX isn't supported we should only inflate to these classes.
140 if (!Subtarget
.hasVLX() &&
141 getRegSizeInBits(*Super
) == getRegSizeInBits(*RC
))
144 case X86::VR128XRegClassID
:
145 case X86::VR256XRegClassID
:
146 // If VLX isn't support we shouldn't inflate to these classes.
147 if (Subtarget
.hasVLX() &&
148 getRegSizeInBits(*Super
) == getRegSizeInBits(*RC
))
151 case X86::FR32XRegClassID
:
152 case X86::FR64XRegClassID
:
153 // If AVX-512 isn't support we shouldn't inflate to these classes.
154 if (Subtarget
.hasAVX512() &&
155 getRegSizeInBits(*Super
) == getRegSizeInBits(*RC
))
158 case X86::GR8RegClassID
:
159 case X86::GR16RegClassID
:
160 case X86::GR32RegClassID
:
161 case X86::GR64RegClassID
:
162 case X86::GR8_NOREX2RegClassID
:
163 case X86::GR16_NOREX2RegClassID
:
164 case X86::GR32_NOREX2RegClassID
:
165 case X86::GR64_NOREX2RegClassID
:
166 case X86::RFP32RegClassID
:
167 case X86::RFP64RegClassID
:
168 case X86::RFP80RegClassID
:
169 case X86::VR512_0_15RegClassID
:
170 case X86::VR512RegClassID
:
171 // Don't return a super-class that would shrink the spill size.
172 // That can happen with the vector and float classes.
173 if (getRegSizeInBits(*Super
) == getRegSizeInBits(*RC
))
177 Super
= getRegClass(*I
);
186 const TargetRegisterClass
*
187 X86RegisterInfo::getPointerRegClass(const MachineFunction
&MF
,
188 unsigned Kind
) const {
189 const X86Subtarget
&Subtarget
= MF
.getSubtarget
<X86Subtarget
>();
191 default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
192 case 0: // Normal GPRs.
193 if (Subtarget
.isTarget64BitLP64())
194 return &X86::GR64RegClass
;
195 // If the target is 64bit but we have been told to use 32bit addresses,
196 // we can still use 64-bit register as long as we know the high bits
198 // Reflect that in the returned register class.
200 // When the target also allows 64-bit frame pointer and we do have a
201 // frame, this is fine to use it for the address accesses as well.
202 const X86FrameLowering
*TFI
= getFrameLowering(MF
);
203 return TFI
->hasFP(MF
) && TFI
->Uses64BitFramePtr
204 ? &X86::LOW32_ADDR_ACCESS_RBPRegClass
205 : &X86::LOW32_ADDR_ACCESSRegClass
;
207 return &X86::GR32RegClass
;
208 case 1: // Normal GPRs except the stack pointer (for encoding reasons).
209 if (Subtarget
.isTarget64BitLP64())
210 return &X86::GR64_NOSPRegClass
;
211 // NOSP does not contain RIP, so no special case here.
212 return &X86::GR32_NOSPRegClass
;
213 case 2: // NOREX GPRs.
214 if (Subtarget
.isTarget64BitLP64())
215 return &X86::GR64_NOREXRegClass
;
216 return &X86::GR32_NOREXRegClass
;
217 case 3: // NOREX GPRs except the stack pointer (for encoding reasons).
218 if (Subtarget
.isTarget64BitLP64())
219 return &X86::GR64_NOREX_NOSPRegClass
;
220 // NOSP does not contain RIP, so no special case here.
221 return &X86::GR32_NOREX_NOSPRegClass
;
222 case 4: // Available for tailcall (not callee-saved GPRs).
223 return getGPRsForTailCall(MF
);
227 bool X86RegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass
*DefRC
,
229 const TargetRegisterClass
*SrcRC
,
230 unsigned SrcSubReg
) const {
231 // Prevent rewriting a copy where the destination size is larger than the
232 // input size. See PR41619.
233 // FIXME: Should this be factored into the base implementation somehow.
234 if (DefRC
->hasSuperClassEq(&X86::GR64RegClass
) && DefSubReg
== 0 &&
235 SrcRC
->hasSuperClassEq(&X86::GR64RegClass
) && SrcSubReg
== X86::sub_32bit
)
238 return TargetRegisterInfo::shouldRewriteCopySrc(DefRC
, DefSubReg
,
242 const TargetRegisterClass
*
243 X86RegisterInfo::getGPRsForTailCall(const MachineFunction
&MF
) const {
244 const Function
&F
= MF
.getFunction();
245 if (IsWin64
|| (F
.getCallingConv() == CallingConv::Win64
))
246 return &X86::GR64_TCW64RegClass
;
248 return &X86::GR64_TCRegClass
;
250 bool hasHipeCC
= (F
.getCallingConv() == CallingConv::HiPE
);
252 return &X86::GR32RegClass
;
253 return &X86::GR32_TCRegClass
;
256 const TargetRegisterClass
*
257 X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass
*RC
) const {
258 if (RC
== &X86::CCRRegClass
) {
260 return &X86::GR64RegClass
;
262 return &X86::GR32RegClass
;
268 X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass
*RC
,
269 MachineFunction
&MF
) const {
270 const X86FrameLowering
*TFI
= getFrameLowering(MF
);
272 unsigned FPDiff
= TFI
->hasFP(MF
) ? 1 : 0;
273 switch (RC
->getID()) {
276 case X86::GR32RegClassID
:
278 case X86::GR64RegClassID
:
280 case X86::VR128RegClassID
:
281 return Is64Bit
? 10 : 4;
282 case X86::VR64RegClassID
:
288 X86RegisterInfo::getCalleeSavedRegs(const MachineFunction
*MF
) const {
289 assert(MF
&& "MachineFunction required");
291 const X86Subtarget
&Subtarget
= MF
->getSubtarget
<X86Subtarget
>();
292 const Function
&F
= MF
->getFunction();
293 bool HasSSE
= Subtarget
.hasSSE1();
294 bool HasAVX
= Subtarget
.hasAVX();
295 bool HasAVX512
= Subtarget
.hasAVX512();
296 bool CallsEHReturn
= MF
->callsEHReturn();
298 CallingConv::ID CC
= F
.getCallingConv();
300 // If attribute NoCallerSavedRegisters exists then we set X86_INTR calling
301 // convention because it has the CSR list.
302 if (MF
->getFunction().hasFnAttribute("no_caller_saved_registers"))
303 CC
= CallingConv::X86_INTR
;
305 // If atribute specified, override the CSRs normally specified by the
306 // calling convention and use the empty set instead.
307 if (MF
->getFunction().hasFnAttribute("no_callee_saved_registers"))
308 return CSR_NoRegs_SaveList
;
311 case CallingConv::GHC
:
312 case CallingConv::HiPE
:
313 return CSR_NoRegs_SaveList
;
314 case CallingConv::AnyReg
:
316 return CSR_64_AllRegs_AVX_SaveList
;
317 return CSR_64_AllRegs_SaveList
;
318 case CallingConv::PreserveMost
:
319 return IsWin64
? CSR_Win64_RT_MostRegs_SaveList
320 : CSR_64_RT_MostRegs_SaveList
;
321 case CallingConv::PreserveAll
:
323 return CSR_64_RT_AllRegs_AVX_SaveList
;
324 return CSR_64_RT_AllRegs_SaveList
;
325 case CallingConv::PreserveNone
:
326 return CSR_64_NoneRegs_SaveList
;
327 case CallingConv::CXX_FAST_TLS
:
329 return MF
->getInfo
<X86MachineFunctionInfo
>()->isSplitCSR() ?
330 CSR_64_CXX_TLS_Darwin_PE_SaveList
: CSR_64_TLS_Darwin_SaveList
;
332 case CallingConv::Intel_OCL_BI
: {
333 if (HasAVX512
&& IsWin64
)
334 return CSR_Win64_Intel_OCL_BI_AVX512_SaveList
;
335 if (HasAVX512
&& Is64Bit
)
336 return CSR_64_Intel_OCL_BI_AVX512_SaveList
;
337 if (HasAVX
&& IsWin64
)
338 return CSR_Win64_Intel_OCL_BI_AVX_SaveList
;
339 if (HasAVX
&& Is64Bit
)
340 return CSR_64_Intel_OCL_BI_AVX_SaveList
;
341 if (!HasAVX
&& !IsWin64
&& Is64Bit
)
342 return CSR_64_Intel_OCL_BI_SaveList
;
345 case CallingConv::X86_RegCall
:
348 return (HasSSE
? CSR_Win64_RegCall_SaveList
:
349 CSR_Win64_RegCall_NoSSE_SaveList
);
351 return (HasSSE
? CSR_SysV64_RegCall_SaveList
:
352 CSR_SysV64_RegCall_NoSSE_SaveList
);
355 return (HasSSE
? CSR_32_RegCall_SaveList
:
356 CSR_32_RegCall_NoSSE_SaveList
);
358 case CallingConv::CFGuard_Check
:
359 assert(!Is64Bit
&& "CFGuard check mechanism only used on 32-bit X86");
360 return (HasSSE
? CSR_Win32_CFGuard_Check_SaveList
361 : CSR_Win32_CFGuard_Check_NoSSE_SaveList
);
362 case CallingConv::Cold
:
364 return CSR_64_MostRegs_SaveList
;
366 case CallingConv::Win64
:
368 return CSR_Win64_NoSSE_SaveList
;
369 return CSR_Win64_SaveList
;
370 case CallingConv::SwiftTail
:
372 return CSR_32_SaveList
;
373 return IsWin64
? CSR_Win64_SwiftTail_SaveList
: CSR_64_SwiftTail_SaveList
;
374 case CallingConv::X86_64_SysV
:
376 return CSR_64EHRet_SaveList
;
377 return CSR_64_SaveList
;
378 case CallingConv::X86_INTR
:
381 return CSR_64_AllRegs_AVX512_SaveList
;
383 return CSR_64_AllRegs_AVX_SaveList
;
385 return CSR_64_AllRegs_SaveList
;
386 return CSR_64_AllRegs_NoSSE_SaveList
;
389 return CSR_32_AllRegs_AVX512_SaveList
;
391 return CSR_32_AllRegs_AVX_SaveList
;
393 return CSR_32_AllRegs_SSE_SaveList
;
394 return CSR_32_AllRegs_SaveList
;
401 bool IsSwiftCC
= Subtarget
.getTargetLowering()->supportSwiftError() &&
402 F
.getAttributes().hasAttrSomewhere(Attribute::SwiftError
);
404 return IsWin64
? CSR_Win64_SwiftError_SaveList
405 : CSR_64_SwiftError_SaveList
;
408 return HasSSE
? CSR_Win64_SaveList
: CSR_Win64_NoSSE_SaveList
;
410 return CSR_64EHRet_SaveList
;
411 return CSR_64_SaveList
;
414 return CallsEHReturn
? CSR_32EHRet_SaveList
: CSR_32_SaveList
;
418 X86RegisterInfo::getIPRACSRegs(const MachineFunction
*MF
) const {
419 return Is64Bit
? CSR_IPRA_64_SaveList
: CSR_IPRA_32_SaveList
;
422 const MCPhysReg
*X86RegisterInfo::getCalleeSavedRegsViaCopy(
423 const MachineFunction
*MF
) const {
424 assert(MF
&& "Invalid MachineFunction pointer.");
425 if (MF
->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS
&&
426 MF
->getInfo
<X86MachineFunctionInfo
>()->isSplitCSR())
427 return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList
;
432 X86RegisterInfo::getCallPreservedMask(const MachineFunction
&MF
,
433 CallingConv::ID CC
) const {
434 const X86Subtarget
&Subtarget
= MF
.getSubtarget
<X86Subtarget
>();
435 bool HasSSE
= Subtarget
.hasSSE1();
436 bool HasAVX
= Subtarget
.hasAVX();
437 bool HasAVX512
= Subtarget
.hasAVX512();
440 case CallingConv::GHC
:
441 case CallingConv::HiPE
:
442 return CSR_NoRegs_RegMask
;
443 case CallingConv::AnyReg
:
445 return CSR_64_AllRegs_AVX_RegMask
;
446 return CSR_64_AllRegs_RegMask
;
447 case CallingConv::PreserveMost
:
448 return IsWin64
? CSR_Win64_RT_MostRegs_RegMask
: CSR_64_RT_MostRegs_RegMask
;
449 case CallingConv::PreserveAll
:
451 return CSR_64_RT_AllRegs_AVX_RegMask
;
452 return CSR_64_RT_AllRegs_RegMask
;
453 case CallingConv::PreserveNone
:
454 return CSR_64_NoneRegs_RegMask
;
455 case CallingConv::CXX_FAST_TLS
:
457 return CSR_64_TLS_Darwin_RegMask
;
459 case CallingConv::Intel_OCL_BI
: {
460 if (HasAVX512
&& IsWin64
)
461 return CSR_Win64_Intel_OCL_BI_AVX512_RegMask
;
462 if (HasAVX512
&& Is64Bit
)
463 return CSR_64_Intel_OCL_BI_AVX512_RegMask
;
464 if (HasAVX
&& IsWin64
)
465 return CSR_Win64_Intel_OCL_BI_AVX_RegMask
;
466 if (HasAVX
&& Is64Bit
)
467 return CSR_64_Intel_OCL_BI_AVX_RegMask
;
468 if (!HasAVX
&& !IsWin64
&& Is64Bit
)
469 return CSR_64_Intel_OCL_BI_RegMask
;
472 case CallingConv::X86_RegCall
:
475 return (HasSSE
? CSR_Win64_RegCall_RegMask
:
476 CSR_Win64_RegCall_NoSSE_RegMask
);
478 return (HasSSE
? CSR_SysV64_RegCall_RegMask
:
479 CSR_SysV64_RegCall_NoSSE_RegMask
);
482 return (HasSSE
? CSR_32_RegCall_RegMask
:
483 CSR_32_RegCall_NoSSE_RegMask
);
485 case CallingConv::CFGuard_Check
:
486 assert(!Is64Bit
&& "CFGuard check mechanism only used on 32-bit X86");
487 return (HasSSE
? CSR_Win32_CFGuard_Check_RegMask
488 : CSR_Win32_CFGuard_Check_NoSSE_RegMask
);
489 case CallingConv::Cold
:
491 return CSR_64_MostRegs_RegMask
;
493 case CallingConv::Win64
:
494 return CSR_Win64_RegMask
;
495 case CallingConv::SwiftTail
:
497 return CSR_32_RegMask
;
498 return IsWin64
? CSR_Win64_SwiftTail_RegMask
: CSR_64_SwiftTail_RegMask
;
499 case CallingConv::X86_64_SysV
:
500 return CSR_64_RegMask
;
501 case CallingConv::X86_INTR
:
504 return CSR_64_AllRegs_AVX512_RegMask
;
506 return CSR_64_AllRegs_AVX_RegMask
;
508 return CSR_64_AllRegs_RegMask
;
509 return CSR_64_AllRegs_NoSSE_RegMask
;
512 return CSR_32_AllRegs_AVX512_RegMask
;
514 return CSR_32_AllRegs_AVX_RegMask
;
516 return CSR_32_AllRegs_SSE_RegMask
;
517 return CSR_32_AllRegs_RegMask
;
523 // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check
526 const Function
&F
= MF
.getFunction();
527 bool IsSwiftCC
= Subtarget
.getTargetLowering()->supportSwiftError() &&
528 F
.getAttributes().hasAttrSomewhere(Attribute::SwiftError
);
530 return IsWin64
? CSR_Win64_SwiftError_RegMask
: CSR_64_SwiftError_RegMask
;
532 return IsWin64
? CSR_Win64_RegMask
: CSR_64_RegMask
;
535 return CSR_32_RegMask
;
539 X86RegisterInfo::getNoPreservedMask() const {
540 return CSR_NoRegs_RegMask
;
543 const uint32_t *X86RegisterInfo::getDarwinTLSCallPreservedMask() const {
544 return CSR_64_TLS_Darwin_RegMask
;
547 BitVector
X86RegisterInfo::getReservedRegs(const MachineFunction
&MF
) const {
548 BitVector
Reserved(getNumRegs());
549 const X86FrameLowering
*TFI
= getFrameLowering(MF
);
551 // Set the floating point control register as reserved.
552 Reserved
.set(X86::FPCW
);
554 // Set the floating point status register as reserved.
555 Reserved
.set(X86::FPSW
);
557 // Set the SIMD floating point control register as reserved.
558 Reserved
.set(X86::MXCSR
);
560 // Set the stack-pointer register and its aliases as reserved.
561 for (const MCPhysReg
&SubReg
: subregs_inclusive(X86::RSP
))
562 Reserved
.set(SubReg
);
564 // Set the Shadow Stack Pointer as reserved.
565 Reserved
.set(X86::SSP
);
567 // Set the instruction pointer register and its aliases as reserved.
568 for (const MCPhysReg
&SubReg
: subregs_inclusive(X86::RIP
))
569 Reserved
.set(SubReg
);
571 // Set the frame-pointer register and its aliases as reserved if needed.
572 if (TFI
->hasFP(MF
)) {
573 if (MF
.getInfo
<X86MachineFunctionInfo
>()->getFPClobberedByInvoke())
574 MF
.getContext().reportError(
576 "Frame pointer clobbered by function invoke is not supported.");
578 for (const MCPhysReg
&SubReg
: subregs_inclusive(X86::RBP
))
579 Reserved
.set(SubReg
);
582 // Set the base-pointer register and its aliases as reserved if needed.
583 if (hasBasePointer(MF
)) {
584 if (MF
.getInfo
<X86MachineFunctionInfo
>()->getBPClobberedByInvoke())
585 MF
.getContext().reportError(SMLoc(),
586 "Stack realignment in presence of dynamic "
587 "allocas is not supported with "
588 "this calling convention.");
590 Register BasePtr
= getX86SubSuperRegister(getBaseRegister(), 64);
591 for (const MCPhysReg
&SubReg
: subregs_inclusive(BasePtr
))
592 Reserved
.set(SubReg
);
595 // Mark the segment registers as reserved.
596 Reserved
.set(X86::CS
);
597 Reserved
.set(X86::SS
);
598 Reserved
.set(X86::DS
);
599 Reserved
.set(X86::ES
);
600 Reserved
.set(X86::FS
);
601 Reserved
.set(X86::GS
);
603 // Mark the floating point stack registers as reserved.
604 for (unsigned n
= 0; n
!= 8; ++n
)
605 Reserved
.set(X86::ST0
+ n
);
607 // Reserve the registers that only exist in 64-bit mode.
609 // These 8-bit registers are part of the x86-64 extension even though their
610 // super-registers are old 32-bits.
611 Reserved
.set(X86::SIL
);
612 Reserved
.set(X86::DIL
);
613 Reserved
.set(X86::BPL
);
614 Reserved
.set(X86::SPL
);
615 Reserved
.set(X86::SIH
);
616 Reserved
.set(X86::DIH
);
617 Reserved
.set(X86::BPH
);
618 Reserved
.set(X86::SPH
);
620 for (unsigned n
= 0; n
!= 8; ++n
) {
622 for (MCRegAliasIterator
AI(X86::R8
+ n
, this, true); AI
.isValid(); ++AI
)
626 for (MCRegAliasIterator
AI(X86::XMM8
+ n
, this, true); AI
.isValid(); ++AI
)
630 if (!Is64Bit
|| !MF
.getSubtarget
<X86Subtarget
>().hasAVX512()) {
631 for (unsigned n
= 0; n
!= 16; ++n
) {
632 for (MCRegAliasIterator
AI(X86::XMM16
+ n
, this, true); AI
.isValid();
638 // Reserve the extended general purpose registers.
639 if (!Is64Bit
|| !MF
.getSubtarget
<X86Subtarget
>().hasEGPR())
640 Reserved
.set(X86::R16
, X86::R31WH
+ 1);
642 if (MF
.getFunction().getCallingConv() == CallingConv::GRAAL
) {
643 for (MCRegAliasIterator
AI(X86::R14
, this, true); AI
.isValid(); ++AI
)
645 for (MCRegAliasIterator
AI(X86::R15
, this, true); AI
.isValid(); ++AI
)
649 // Reserve low half pair registers in case they are used by RA aggressively.
650 Reserved
.set(X86::TMM0_TMM1
);
651 Reserved
.set(X86::TMM2_TMM3
);
653 assert(checkAllSuperRegsMarked(Reserved
,
654 {X86::SIL
, X86::DIL
, X86::BPL
, X86::SPL
,
655 X86::SIH
, X86::DIH
, X86::BPH
, X86::SPH
}));
659 unsigned X86RegisterInfo::getNumSupportedRegs(const MachineFunction
&MF
) const {
660 // All existing Intel CPUs that support AMX support AVX512 and all existing
661 // Intel CPUs that support APX support AMX. AVX512 implies AVX.
663 // We enumerate the registers in X86GenRegisterInfo.inc in this order:
665 // Registers before AVX512,
666 // AVX512 registers (X/YMM16-31, ZMM0-31, K registers)
667 // AMX registers (TMM)
668 // APX registers (R16-R31)
670 // and try to return the minimum number of registers supported by the target.
671 static_assert((X86::R15WH
+ 1 == X86::YMM0
) && (X86::YMM15
+ 1 == X86::K0
) &&
672 (X86::K6_K7
+ 1 == X86::TMMCFG
) &&
673 (X86::TMM6_TMM7
+ 1 == X86::R16
) &&
674 (X86::R31WH
+ 1 == X86::NUM_TARGET_REGS
),
675 "Register number may be incorrect");
677 const X86Subtarget
&ST
= MF
.getSubtarget
<X86Subtarget
>();
679 return X86::NUM_TARGET_REGS
;
681 return X86::TMM7
+ 1;
683 return X86::K6_K7
+ 1;
685 return X86::YMM15
+ 1;
686 return X86::R15WH
+ 1;
689 bool X86RegisterInfo::isArgumentRegister(const MachineFunction
&MF
,
690 MCRegister Reg
) const {
691 const X86Subtarget
&ST
= MF
.getSubtarget
<X86Subtarget
>();
692 const TargetRegisterInfo
&TRI
= *ST
.getRegisterInfo();
693 auto IsSubReg
= [&](MCRegister RegA
, MCRegister RegB
) {
694 return TRI
.isSuperOrSubRegisterEq(RegA
, RegB
);
699 SmallVector
<MCRegister
>{X86::EAX
, X86::ECX
, X86::EDX
},
700 [&](MCRegister
&RegA
) { return IsSubReg(RegA
, Reg
); }) ||
701 (ST
.hasMMX() && X86::VR64RegClass
.contains(Reg
));
703 CallingConv::ID CC
= MF
.getFunction().getCallingConv();
705 if (CC
== CallingConv::X86_64_SysV
&& IsSubReg(X86::RAX
, Reg
))
709 SmallVector
<MCRegister
>{X86::RDX
, X86::RCX
, X86::R8
, X86::R9
},
710 [&](MCRegister
&RegA
) { return IsSubReg(RegA
, Reg
); }))
713 if (CC
!= CallingConv::Win64
&&
714 llvm::any_of(SmallVector
<MCRegister
>{X86::RDI
, X86::RSI
},
715 [&](MCRegister
&RegA
) { return IsSubReg(RegA
, Reg
); }))
719 llvm::any_of(SmallVector
<MCRegister
>{X86::XMM0
, X86::XMM1
, X86::XMM2
,
720 X86::XMM3
, X86::XMM4
, X86::XMM5
,
721 X86::XMM6
, X86::XMM7
},
722 [&](MCRegister
&RegA
) { return IsSubReg(RegA
, Reg
); }))
725 return X86GenRegisterInfo::isArgumentRegister(MF
, Reg
);
728 bool X86RegisterInfo::isFixedRegister(const MachineFunction
&MF
,
729 MCRegister PhysReg
) const {
730 const X86Subtarget
&ST
= MF
.getSubtarget
<X86Subtarget
>();
731 const TargetRegisterInfo
&TRI
= *ST
.getRegisterInfo();
734 if (TRI
.isSuperOrSubRegisterEq(X86::RSP
, PhysReg
))
737 // Don't use the frame pointer if it's being used.
738 const X86FrameLowering
&TFI
= *getFrameLowering(MF
);
739 if (TFI
.hasFP(MF
) && TRI
.isSuperOrSubRegisterEq(X86::RBP
, PhysReg
))
742 return X86GenRegisterInfo::isFixedRegister(MF
, PhysReg
);
745 bool X86RegisterInfo::isTileRegisterClass(const TargetRegisterClass
*RC
) const {
746 return RC
->getID() == X86::TILERegClassID
||
747 RC
->getID() == X86::TILEPAIRRegClassID
;
750 void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask
) const {
751 // Check if the EFLAGS register is marked as live-out. This shouldn't happen,
752 // because the calling convention defines the EFLAGS register as NOT
755 // Unfortunatelly the EFLAGS show up as live-out after branch folding. Adding
756 // an assert to track this and clear the register afterwards to avoid
757 // unnecessary crashes during release builds.
758 assert(!(Mask
[X86::EFLAGS
/ 32] & (1U << (X86::EFLAGS
% 32))) &&
759 "EFLAGS are not live-out from a patchpoint.");
761 // Also clean other registers that don't need preserving (IP).
762 for (auto Reg
: {X86::EFLAGS
, X86::RIP
, X86::EIP
, X86::IP
})
763 Mask
[Reg
/ 32] &= ~(1U << (Reg
% 32));
766 //===----------------------------------------------------------------------===//
767 // Stack Frame Processing methods
768 //===----------------------------------------------------------------------===//
770 static bool CantUseSP(const MachineFrameInfo
&MFI
) {
771 return MFI
.hasVarSizedObjects() || MFI
.hasOpaqueSPAdjustment();
774 bool X86RegisterInfo::hasBasePointer(const MachineFunction
&MF
) const {
775 const X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
776 // We have a virtual register to reference argument, and don't need base
778 if (X86FI
->getStackPtrSaveMI() != nullptr)
781 if (X86FI
->hasPreallocatedCall())
784 const MachineFrameInfo
&MFI
= MF
.getFrameInfo();
786 if (!EnableBasePointer
)
789 // When we need stack realignment, we can't address the stack from the frame
790 // pointer. When we have dynamic allocas or stack-adjusting inline asm, we
791 // can't address variables from the stack pointer. MS inline asm can
792 // reference locals while also adjusting the stack pointer. When we can't
793 // use both the SP and the FP, we need a separate base pointer register.
794 bool CantUseFP
= hasStackRealignment(MF
);
795 return CantUseFP
&& CantUseSP(MFI
);
798 bool X86RegisterInfo::canRealignStack(const MachineFunction
&MF
) const {
799 if (!TargetRegisterInfo::canRealignStack(MF
))
802 const MachineFrameInfo
&MFI
= MF
.getFrameInfo();
803 const MachineRegisterInfo
*MRI
= &MF
.getRegInfo();
805 // Stack realignment requires a frame pointer. If we already started
806 // register allocation with frame pointer elimination, it is too late now.
807 if (!MRI
->canReserveReg(FramePtr
))
810 // If a base pointer is necessary. Check that it isn't too late to reserve
813 return MRI
->canReserveReg(BasePtr
);
817 bool X86RegisterInfo::shouldRealignStack(const MachineFunction
&MF
) const {
818 if (TargetRegisterInfo::shouldRealignStack(MF
))
821 return !Is64Bit
&& MF
.getFunction().getCallingConv() == CallingConv::X86_INTR
;
824 // tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction
825 // of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'.
826 // TODO: In this case we should be really trying first to entirely eliminate
827 // this instruction which is a plain copy.
828 static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II
) {
829 MachineInstr
&MI
= *II
;
830 unsigned Opc
= II
->getOpcode();
831 // Check if this is a LEA of the form 'lea (%esp), %ebx'
832 if ((Opc
!= X86::LEA32r
&& Opc
!= X86::LEA64r
&& Opc
!= X86::LEA64_32r
) ||
833 MI
.getOperand(2).getImm() != 1 ||
834 MI
.getOperand(3).getReg() != X86::NoRegister
||
835 MI
.getOperand(4).getImm() != 0 ||
836 MI
.getOperand(5).getReg() != X86::NoRegister
)
838 Register BasePtr
= MI
.getOperand(1).getReg();
839 // In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will
840 // be replaced with a 32-bit operand MOV which will zero extend the upper
841 // 32-bits of the super register.
842 if (Opc
== X86::LEA64_32r
)
843 BasePtr
= getX86SubSuperRegister(BasePtr
, 32);
844 Register NewDestReg
= MI
.getOperand(0).getReg();
845 const X86InstrInfo
*TII
=
846 MI
.getParent()->getParent()->getSubtarget
<X86Subtarget
>().getInstrInfo();
847 TII
->copyPhysReg(*MI
.getParent(), II
, MI
.getDebugLoc(), NewDestReg
, BasePtr
,
848 MI
.getOperand(1).isKill());
849 MI
.eraseFromParent();
853 static bool isFuncletReturnInstr(MachineInstr
&MI
) {
854 switch (MI
.getOpcode()) {
856 case X86::CLEANUPRET
:
861 llvm_unreachable("impossible");
864 void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II
,
865 unsigned FIOperandNum
,
867 int FIOffset
) const {
868 MachineInstr
&MI
= *II
;
869 unsigned Opc
= MI
.getOpcode();
870 if (Opc
== TargetOpcode::LOCAL_ESCAPE
) {
871 MachineOperand
&FI
= MI
.getOperand(FIOperandNum
);
872 FI
.ChangeToImmediate(FIOffset
);
876 MI
.getOperand(FIOperandNum
).ChangeToRegister(BaseReg
, false);
878 // The frame index format for stackmaps and patchpoints is different from the
879 // X86 format. It only has a FI and an offset.
880 if (Opc
== TargetOpcode::STACKMAP
|| Opc
== TargetOpcode::PATCHPOINT
) {
881 assert(BasePtr
== FramePtr
&& "Expected the FP as base register");
882 int64_t Offset
= MI
.getOperand(FIOperandNum
+ 1).getImm() + FIOffset
;
883 MI
.getOperand(FIOperandNum
+ 1).ChangeToImmediate(Offset
);
887 if (MI
.getOperand(FIOperandNum
+ 3).isImm()) {
888 // Offset is a 32-bit integer.
889 int Imm
= (int)(MI
.getOperand(FIOperandNum
+ 3).getImm());
890 int Offset
= FIOffset
+ Imm
;
891 assert((!Is64Bit
|| isInt
<32>((long long)FIOffset
+ Imm
)) &&
892 "Requesting 64-bit offset in 32-bit immediate!");
894 MI
.getOperand(FIOperandNum
+ 3).ChangeToImmediate(Offset
);
896 // Offset is symbolic. This is extremely rare.
898 FIOffset
+ (uint64_t)MI
.getOperand(FIOperandNum
+ 3).getOffset();
899 MI
.getOperand(FIOperandNum
+ 3).setOffset(Offset
);
904 X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II
,
905 int SPAdj
, unsigned FIOperandNum
,
906 RegScavenger
*RS
) const {
907 MachineInstr
&MI
= *II
;
908 MachineBasicBlock
&MBB
= *MI
.getParent();
909 MachineFunction
&MF
= *MBB
.getParent();
910 MachineBasicBlock::iterator MBBI
= MBB
.getFirstTerminator();
911 bool IsEHFuncletEpilogue
= MBBI
== MBB
.end() ? false
912 : isFuncletReturnInstr(*MBBI
);
913 const X86FrameLowering
*TFI
= getFrameLowering(MF
);
914 int FrameIndex
= MI
.getOperand(FIOperandNum
).getIndex();
916 // Determine base register and offset.
920 assert((!hasStackRealignment(MF
) ||
921 MF
.getFrameInfo().isFixedObjectIndex(FrameIndex
)) &&
922 "Return instruction can only reference SP relative frame objects");
924 TFI
->getFrameIndexReferenceSP(MF
, FrameIndex
, BasePtr
, 0).getFixed();
925 } else if (TFI
->Is64Bit
&& (MBB
.isEHFuncletEntry() || IsEHFuncletEpilogue
)) {
926 FIOffset
= TFI
->getWin64EHFrameIndexRef(MF
, FrameIndex
, BasePtr
);
928 FIOffset
= TFI
->getFrameIndexReference(MF
, FrameIndex
, BasePtr
).getFixed();
931 // LOCAL_ESCAPE uses a single offset, with no register. It only works in the
932 // simple FP case, and doesn't work with stack realignment. On 32-bit, the
933 // offset is from the traditional base pointer location. On 64-bit, the
934 // offset is from the SP at the end of the prologue, not the FP location. This
935 // matches the behavior of llvm.frameaddress.
936 unsigned Opc
= MI
.getOpcode();
937 if (Opc
== TargetOpcode::LOCAL_ESCAPE
) {
938 MachineOperand
&FI
= MI
.getOperand(FIOperandNum
);
939 FI
.ChangeToImmediate(FIOffset
);
943 // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit
944 // register as source operand, semantic is the same and destination is
945 // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided.
946 // Don't change BasePtr since it is used later for stack adjustment.
947 Register MachineBasePtr
= BasePtr
;
948 if (Opc
== X86::LEA64_32r
&& X86::GR32RegClass
.contains(BasePtr
))
949 MachineBasePtr
= getX86SubSuperRegister(BasePtr
, 64);
951 // This must be part of a four operand memory reference. Replace the
952 // FrameIndex with base register. Add an offset to the offset.
953 MI
.getOperand(FIOperandNum
).ChangeToRegister(MachineBasePtr
, false);
955 if (BasePtr
== StackPtr
)
958 // The frame index format for stackmaps and patchpoints is different from the
959 // X86 format. It only has a FI and an offset.
960 if (Opc
== TargetOpcode::STACKMAP
|| Opc
== TargetOpcode::PATCHPOINT
) {
961 assert(BasePtr
== FramePtr
&& "Expected the FP as base register");
962 int64_t Offset
= MI
.getOperand(FIOperandNum
+ 1).getImm() + FIOffset
;
963 MI
.getOperand(FIOperandNum
+ 1).ChangeToImmediate(Offset
);
967 if (MI
.getOperand(FIOperandNum
+3).isImm()) {
968 // Offset is a 32-bit integer.
969 int Imm
= (int)(MI
.getOperand(FIOperandNum
+ 3).getImm());
970 int Offset
= FIOffset
+ Imm
;
971 assert((!Is64Bit
|| isInt
<32>((long long)FIOffset
+ Imm
)) &&
972 "Requesting 64-bit offset in 32-bit immediate!");
973 if (Offset
!= 0 || !tryOptimizeLEAtoMOV(II
))
974 MI
.getOperand(FIOperandNum
+ 3).ChangeToImmediate(Offset
);
976 // Offset is symbolic. This is extremely rare.
977 uint64_t Offset
= FIOffset
+
978 (uint64_t)MI
.getOperand(FIOperandNum
+3).getOffset();
979 MI
.getOperand(FIOperandNum
+ 3).setOffset(Offset
);
984 unsigned X86RegisterInfo::findDeadCallerSavedReg(
985 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator
&MBBI
) const {
986 const MachineFunction
*MF
= MBB
.getParent();
987 if (MF
->callsEHReturn())
990 const TargetRegisterClass
&AvailableRegs
= *getGPRsForTailCall(*MF
);
992 if (MBBI
== MBB
.end())
995 switch (MBBI
->getOpcode()) {
998 case TargetOpcode::PATCHABLE_RET
:
1004 case X86::TCRETURNdi
:
1005 case X86::TCRETURNri
:
1006 case X86::TCRETURNmi
:
1007 case X86::TCRETURNdi64
:
1008 case X86::TCRETURNri64
:
1009 case X86::TCRETURNmi64
:
1010 case X86::EH_RETURN
:
1011 case X86::EH_RETURN64
: {
1012 SmallSet
<uint16_t, 8> Uses
;
1013 for (MachineOperand
&MO
: MBBI
->operands()) {
1014 if (!MO
.isReg() || MO
.isDef())
1016 Register Reg
= MO
.getReg();
1019 for (MCRegAliasIterator
AI(Reg
, this, true); AI
.isValid(); ++AI
)
1023 for (auto CS
: AvailableRegs
)
1024 if (!Uses
.count(CS
) && CS
!= X86::RIP
&& CS
!= X86::RSP
&& CS
!= X86::ESP
)
1032 Register
X86RegisterInfo::getFrameRegister(const MachineFunction
&MF
) const {
1033 const X86FrameLowering
*TFI
= getFrameLowering(MF
);
1034 return TFI
->hasFP(MF
) ? FramePtr
: StackPtr
;
1038 X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction
&MF
) const {
1039 const X86Subtarget
&Subtarget
= MF
.getSubtarget
<X86Subtarget
>();
1040 Register FrameReg
= getFrameRegister(MF
);
1041 if (Subtarget
.isTarget64BitILP32())
1042 FrameReg
= getX86SubSuperRegister(FrameReg
, 32);
1047 X86RegisterInfo::getPtrSizedStackRegister(const MachineFunction
&MF
) const {
1048 const X86Subtarget
&Subtarget
= MF
.getSubtarget
<X86Subtarget
>();
1049 Register StackReg
= getStackRegister();
1050 if (Subtarget
.isTarget64BitILP32())
1051 StackReg
= getX86SubSuperRegister(StackReg
, 32);
1055 static ShapeT
getTileShape(Register VirtReg
, VirtRegMap
*VRM
,
1056 const MachineRegisterInfo
*MRI
) {
1057 if (VRM
->hasShape(VirtReg
))
1058 return VRM
->getShape(VirtReg
);
1060 const MachineOperand
&Def
= *MRI
->def_begin(VirtReg
);
1061 MachineInstr
*MI
= const_cast<MachineInstr
*>(Def
.getParent());
1062 unsigned OpCode
= MI
->getOpcode();
1065 llvm_unreachable("Unexpected machine instruction on tile register!");
1068 Register SrcReg
= MI
->getOperand(1).getReg();
1069 ShapeT Shape
= getTileShape(SrcReg
, VRM
, MRI
);
1070 VRM
->assignVirt2Shape(VirtReg
, Shape
);
1073 // We only collect the tile shape that is defined.
1074 case X86::PTILELOADDV
:
1075 case X86::PTILELOADDT1V
:
1076 case X86::PTDPBSSDV
:
1077 case X86::PTDPBSUDV
:
1078 case X86::PTDPBUSDV
:
1079 case X86::PTDPBUUDV
:
1080 case X86::PTILEZEROV
:
1081 case X86::PTDPBF16PSV
:
1082 case X86::PTDPFP16PSV
:
1083 case X86::PTCMMIMFP16PSV
:
1084 case X86::PTCMMRLFP16PSV
:
1085 case X86::PTTRANSPOSEDV
:
1086 case X86::PTTDPBF16PSV
:
1087 case X86::PTTDPFP16PSV
:
1088 case X86::PTTCMMIMFP16PSV
:
1089 case X86::PTTCMMRLFP16PSV
:
1090 case X86::PTCONJTCMMIMFP16PSV
:
1091 case X86::PTCONJTFP16V
:
1092 case X86::PTILELOADDRSV
:
1093 case X86::PTILELOADDRST1V
:
1094 case X86::PTMMULTF32PSV
:
1095 case X86::PTTMMULTF32PSV
:
1096 case X86::PTDPBF8PSV
:
1097 case X86::PTDPBHF8PSV
:
1098 case X86::PTDPHBF8PSV
:
1099 case X86::PTDPHF8PSV
: {
1100 MachineOperand
&MO1
= MI
->getOperand(1);
1101 MachineOperand
&MO2
= MI
->getOperand(2);
1102 ShapeT
Shape(&MO1
, &MO2
, MRI
);
1103 VRM
->assignVirt2Shape(VirtReg
, Shape
);
1106 case X86::PT2RPNTLVWZ0V
:
1107 case X86::PT2RPNTLVWZ0T1V
:
1108 case X86::PT2RPNTLVWZ1V
:
1109 case X86::PT2RPNTLVWZ1T1V
:
1110 case X86::PT2RPNTLVWZ0RSV
:
1111 case X86::PT2RPNTLVWZ0RST1V
:
1112 case X86::PT2RPNTLVWZ1RSV
:
1113 case X86::PT2RPNTLVWZ1RST1V
: {
1114 MachineOperand
&MO1
= MI
->getOperand(1);
1115 MachineOperand
&MO2
= MI
->getOperand(2);
1116 MachineOperand
&MO3
= MI
->getOperand(3);
1117 ShapeT
Shape({&MO1
, &MO2
, &MO1
, &MO3
}, MRI
);
1118 VRM
->assignVirt2Shape(VirtReg
, Shape
);
1124 static bool canHintShape(ShapeT
&PhysShape
, ShapeT
&VirtShape
) {
1125 unsigned PhysShapeNum
= PhysShape
.getShapeNum();
1126 unsigned VirtShapeNum
= VirtShape
.getShapeNum();
1128 if (PhysShapeNum
< VirtShapeNum
)
1131 if (PhysShapeNum
== VirtShapeNum
) {
1132 if (PhysShapeNum
== 1)
1133 return PhysShape
== VirtShape
;
1135 for (unsigned I
= 0; I
< PhysShapeNum
; I
++) {
1136 ShapeT
PShape(PhysShape
.getRow(I
), PhysShape
.getCol(I
));
1137 ShapeT
VShape(VirtShape
.getRow(I
), VirtShape
.getCol(I
));
1138 if (VShape
!= PShape
)
1144 // Hint subreg of mult-tile reg to single tile reg.
1145 if (VirtShapeNum
== 1) {
1146 for (unsigned I
= 0; I
< PhysShapeNum
; I
++) {
1147 ShapeT
PShape(PhysShape
.getRow(I
), PhysShape
.getCol(I
));
1148 if (VirtShape
== PShape
)
1153 // Note: Currently we have no requirement for case of
1154 // (VirtShapeNum > 1 and PhysShapeNum > VirtShapeNum)
1158 bool X86RegisterInfo::getRegAllocationHints(Register VirtReg
,
1159 ArrayRef
<MCPhysReg
> Order
,
1160 SmallVectorImpl
<MCPhysReg
> &Hints
,
1161 const MachineFunction
&MF
,
1162 const VirtRegMap
*VRM
,
1163 const LiveRegMatrix
*Matrix
) const {
1164 const MachineRegisterInfo
*MRI
= &MF
.getRegInfo();
1165 const TargetRegisterClass
&RC
= *MRI
->getRegClass(VirtReg
);
1166 bool BaseImplRetVal
= TargetRegisterInfo::getRegAllocationHints(
1167 VirtReg
, Order
, Hints
, MF
, VRM
, Matrix
);
1168 const X86Subtarget
&ST
= MF
.getSubtarget
<X86Subtarget
>();
1169 const TargetRegisterInfo
&TRI
= *ST
.getRegisterInfo();
1171 unsigned ID
= RC
.getID();
1174 return BaseImplRetVal
;
1176 if (ID
!= X86::TILERegClassID
&& ID
!= X86::TILEPAIRRegClassID
) {
1177 if (DisableRegAllocNDDHints
|| !ST
.hasNDD() ||
1178 !TRI
.isGeneralPurposeRegisterClass(&RC
))
1179 return BaseImplRetVal
;
1181 // Add any two address hints after any copy hints.
1182 SmallSet
<unsigned, 4> TwoAddrHints
;
1184 auto TryAddNDDHint
= [&](const MachineOperand
&MO
) {
1185 Register Reg
= MO
.getReg();
1186 Register PhysReg
= Reg
.isPhysical() ? Reg
: Register(VRM
->getPhys(Reg
));
1187 if (PhysReg
&& !MRI
->isReserved(PhysReg
) && !is_contained(Hints
, PhysReg
))
1188 TwoAddrHints
.insert(PhysReg
);
1191 // NDD instructions is compressible when Op0 is allocated to the same
1192 // physic register as Op1 (or Op2 if it's commutable).
1193 for (auto &MO
: MRI
->reg_nodbg_operands(VirtReg
)) {
1194 const MachineInstr
&MI
= *MO
.getParent();
1195 if (!X86::getNonNDVariant(MI
.getOpcode()))
1197 unsigned OpIdx
= MI
.getOperandNo(&MO
);
1199 assert(MI
.getOperand(1).isReg());
1200 TryAddNDDHint(MI
.getOperand(1));
1201 if (MI
.isCommutable()) {
1202 assert(MI
.getOperand(2).isReg());
1203 TryAddNDDHint(MI
.getOperand(2));
1205 } else if (OpIdx
== 1) {
1206 TryAddNDDHint(MI
.getOperand(0));
1207 } else if (MI
.isCommutable() && OpIdx
== 2) {
1208 TryAddNDDHint(MI
.getOperand(0));
1212 for (MCPhysReg OrderReg
: Order
)
1213 if (TwoAddrHints
.count(OrderReg
))
1214 Hints
.push_back(OrderReg
);
1216 return BaseImplRetVal
;
1219 ShapeT VirtShape
= getTileShape(VirtReg
, const_cast<VirtRegMap
*>(VRM
), MRI
);
1220 auto AddHint
= [&](MCPhysReg PhysReg
) {
1221 Register VReg
= Matrix
->getOneVReg(PhysReg
);
1222 if (VReg
== MCRegister::NoRegister
) { // Not allocated yet
1223 Hints
.push_back(PhysReg
);
1226 ShapeT PhysShape
= getTileShape(VReg
, const_cast<VirtRegMap
*>(VRM
), MRI
);
1227 if (canHintShape(PhysShape
, VirtShape
))
1228 Hints
.push_back(PhysReg
);
1231 SmallSet
<MCPhysReg
, 4> CopyHints
;
1232 CopyHints
.insert(Hints
.begin(), Hints
.end());
1234 for (auto Hint
: CopyHints
) {
1235 if (RC
.contains(Hint
) && !MRI
->isReserved(Hint
))
1238 for (MCPhysReg PhysReg
: Order
) {
1239 if (!CopyHints
.count(PhysReg
) && RC
.contains(PhysReg
) &&
1240 !MRI
->isReserved(PhysReg
))
1244 #define DEBUG_TYPE "tile-hint"
1246 dbgs() << "Hints for virtual register " << format_hex(VirtReg
, 8) << "\n";
1247 for (auto Hint
: Hints
) {
1248 dbgs() << "tmm" << Hint
<< ",";