Add gfx950 mfma instructions to ROCDL dialect (#123361)
[llvm-project.git] / llvm / lib / Target / X86 / X86RegisterInfo.cpp
blob4faf8bca4f9e0219235592a46b4864638b993e5b
1 //===-- X86RegisterInfo.cpp - X86 Register Information --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the X86 implementation of the TargetRegisterInfo class.
10 // This file is responsible for the frame pointer elimination optimization
11 // on X86.
13 //===----------------------------------------------------------------------===//
15 #include "X86RegisterInfo.h"
16 #include "X86FrameLowering.h"
17 #include "X86MachineFunctionInfo.h"
18 #include "X86Subtarget.h"
19 #include "llvm/ADT/BitVector.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/CodeGen/LiveRegMatrix.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/TargetFrameLowering.h"
27 #include "llvm/CodeGen/TargetInstrInfo.h"
28 #include "llvm/CodeGen/TileShapeInfo.h"
29 #include "llvm/CodeGen/VirtRegMap.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/Type.h"
32 #include "llvm/MC/MCContext.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Target/TargetMachine.h"
36 #include "llvm/Target/TargetOptions.h"
38 using namespace llvm;
40 #define GET_REGINFO_TARGET_DESC
41 #include "X86GenRegisterInfo.inc"
43 static cl::opt<bool>
44 EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
45 cl::desc("Enable use of a base pointer for complex stack frames"));
47 static cl::opt<bool>
48 DisableRegAllocNDDHints("x86-disable-regalloc-hints-for-ndd", cl::Hidden,
49 cl::init(false),
50 cl::desc("Disable two address hints for register "
51 "allocation"));
53 X86RegisterInfo::X86RegisterInfo(const Triple &TT)
54 : X86GenRegisterInfo((TT.isArch64Bit() ? X86::RIP : X86::EIP),
55 X86_MC::getDwarfRegFlavour(TT, false),
56 X86_MC::getDwarfRegFlavour(TT, true),
57 (TT.isArch64Bit() ? X86::RIP : X86::EIP)) {
58 X86_MC::initLLVMToSEHAndCVRegMapping(this);
60 // Cache some information.
61 Is64Bit = TT.isArch64Bit();
62 IsWin64 = Is64Bit && TT.isOSWindows();
64 // Use a callee-saved register as the base pointer. These registers must
65 // not conflict with any ABI requirements. For example, in 32-bit mode PIC
66 // requires GOT in the EBX register before function calls via PLT GOT pointer.
67 if (Is64Bit) {
68 SlotSize = 8;
69 // This matches the simplified 32-bit pointer code in the data layout
70 // computation.
71 // FIXME: Should use the data layout?
72 bool Use64BitReg = !TT.isX32();
73 StackPtr = Use64BitReg ? X86::RSP : X86::ESP;
74 FramePtr = Use64BitReg ? X86::RBP : X86::EBP;
75 BasePtr = Use64BitReg ? X86::RBX : X86::EBX;
76 } else {
77 SlotSize = 4;
78 StackPtr = X86::ESP;
79 FramePtr = X86::EBP;
80 BasePtr = X86::ESI;
84 const TargetRegisterClass *
85 X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC,
86 unsigned Idx) const {
87 // The sub_8bit sub-register index is more constrained in 32-bit mode.
88 // It behaves just like the sub_8bit_hi index.
89 if (!Is64Bit && Idx == X86::sub_8bit)
90 Idx = X86::sub_8bit_hi;
92 // Forward to TableGen's default version.
93 return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx);
96 const TargetRegisterClass *
97 X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
98 const TargetRegisterClass *B,
99 unsigned SubIdx) const {
100 // The sub_8bit sub-register index is more constrained in 32-bit mode.
101 if (!Is64Bit && SubIdx == X86::sub_8bit) {
102 A = X86GenRegisterInfo::getSubClassWithSubReg(A, X86::sub_8bit_hi);
103 if (!A)
104 return nullptr;
106 return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, SubIdx);
109 const TargetRegisterClass *
110 X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
111 const MachineFunction &MF) const {
112 // Don't allow super-classes of GR8_NOREX. This class is only used after
113 // extracting sub_8bit_hi sub-registers. The H sub-registers cannot be copied
114 // to the full GR8 register class in 64-bit mode, so we cannot allow the
115 // reigster class inflation.
117 // The GR8_NOREX class is always used in a way that won't be constrained to a
118 // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the
119 // full GR8 class.
120 if (RC == &X86::GR8_NOREXRegClass)
121 return RC;
123 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
125 const TargetRegisterClass *Super = RC;
126 auto I = RC->superclasses().begin();
127 auto E = RC->superclasses().end();
128 do {
129 switch (Super->getID()) {
130 case X86::FR32RegClassID:
131 case X86::FR64RegClassID:
132 // If AVX-512 isn't supported we should only inflate to these classes.
133 if (!Subtarget.hasAVX512() &&
134 getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
135 return Super;
136 break;
137 case X86::VR128RegClassID:
138 case X86::VR256RegClassID:
139 // If VLX isn't supported we should only inflate to these classes.
140 if (!Subtarget.hasVLX() &&
141 getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
142 return Super;
143 break;
144 case X86::VR128XRegClassID:
145 case X86::VR256XRegClassID:
146 // If VLX isn't support we shouldn't inflate to these classes.
147 if (Subtarget.hasVLX() &&
148 getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
149 return Super;
150 break;
151 case X86::FR32XRegClassID:
152 case X86::FR64XRegClassID:
153 // If AVX-512 isn't support we shouldn't inflate to these classes.
154 if (Subtarget.hasAVX512() &&
155 getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
156 return Super;
157 break;
158 case X86::GR8RegClassID:
159 case X86::GR16RegClassID:
160 case X86::GR32RegClassID:
161 case X86::GR64RegClassID:
162 case X86::GR8_NOREX2RegClassID:
163 case X86::GR16_NOREX2RegClassID:
164 case X86::GR32_NOREX2RegClassID:
165 case X86::GR64_NOREX2RegClassID:
166 case X86::RFP32RegClassID:
167 case X86::RFP64RegClassID:
168 case X86::RFP80RegClassID:
169 case X86::VR512_0_15RegClassID:
170 case X86::VR512RegClassID:
171 // Don't return a super-class that would shrink the spill size.
172 // That can happen with the vector and float classes.
173 if (getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
174 return Super;
176 if (I != E) {
177 Super = getRegClass(*I);
178 ++I;
179 } else {
180 Super = nullptr;
182 } while (Super);
183 return RC;
186 const TargetRegisterClass *
187 X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
188 unsigned Kind) const {
189 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
190 switch (Kind) {
191 default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
192 case 0: // Normal GPRs.
193 if (Subtarget.isTarget64BitLP64())
194 return &X86::GR64RegClass;
195 // If the target is 64bit but we have been told to use 32bit addresses,
196 // we can still use 64-bit register as long as we know the high bits
197 // are zeros.
198 // Reflect that in the returned register class.
199 if (Is64Bit) {
200 // When the target also allows 64-bit frame pointer and we do have a
201 // frame, this is fine to use it for the address accesses as well.
202 const X86FrameLowering *TFI = getFrameLowering(MF);
203 return TFI->hasFP(MF) && TFI->Uses64BitFramePtr
204 ? &X86::LOW32_ADDR_ACCESS_RBPRegClass
205 : &X86::LOW32_ADDR_ACCESSRegClass;
207 return &X86::GR32RegClass;
208 case 1: // Normal GPRs except the stack pointer (for encoding reasons).
209 if (Subtarget.isTarget64BitLP64())
210 return &X86::GR64_NOSPRegClass;
211 // NOSP does not contain RIP, so no special case here.
212 return &X86::GR32_NOSPRegClass;
213 case 2: // NOREX GPRs.
214 if (Subtarget.isTarget64BitLP64())
215 return &X86::GR64_NOREXRegClass;
216 return &X86::GR32_NOREXRegClass;
217 case 3: // NOREX GPRs except the stack pointer (for encoding reasons).
218 if (Subtarget.isTarget64BitLP64())
219 return &X86::GR64_NOREX_NOSPRegClass;
220 // NOSP does not contain RIP, so no special case here.
221 return &X86::GR32_NOREX_NOSPRegClass;
222 case 4: // Available for tailcall (not callee-saved GPRs).
223 return getGPRsForTailCall(MF);
227 bool X86RegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
228 unsigned DefSubReg,
229 const TargetRegisterClass *SrcRC,
230 unsigned SrcSubReg) const {
231 // Prevent rewriting a copy where the destination size is larger than the
232 // input size. See PR41619.
233 // FIXME: Should this be factored into the base implementation somehow.
234 if (DefRC->hasSuperClassEq(&X86::GR64RegClass) && DefSubReg == 0 &&
235 SrcRC->hasSuperClassEq(&X86::GR64RegClass) && SrcSubReg == X86::sub_32bit)
236 return false;
238 return TargetRegisterInfo::shouldRewriteCopySrc(DefRC, DefSubReg,
239 SrcRC, SrcSubReg);
242 const TargetRegisterClass *
243 X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const {
244 const Function &F = MF.getFunction();
245 if (IsWin64 || (F.getCallingConv() == CallingConv::Win64))
246 return &X86::GR64_TCW64RegClass;
247 else if (Is64Bit)
248 return &X86::GR64_TCRegClass;
250 bool hasHipeCC = (F.getCallingConv() == CallingConv::HiPE);
251 if (hasHipeCC)
252 return &X86::GR32RegClass;
253 return &X86::GR32_TCRegClass;
256 const TargetRegisterClass *
257 X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
258 if (RC == &X86::CCRRegClass) {
259 if (Is64Bit)
260 return &X86::GR64RegClass;
261 else
262 return &X86::GR32RegClass;
264 return RC;
267 unsigned
268 X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
269 MachineFunction &MF) const {
270 const X86FrameLowering *TFI = getFrameLowering(MF);
272 unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
273 switch (RC->getID()) {
274 default:
275 return 0;
276 case X86::GR32RegClassID:
277 return 4 - FPDiff;
278 case X86::GR64RegClassID:
279 return 12 - FPDiff;
280 case X86::VR128RegClassID:
281 return Is64Bit ? 10 : 4;
282 case X86::VR64RegClassID:
283 return 4;
287 const MCPhysReg *
288 X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
289 assert(MF && "MachineFunction required");
291 const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>();
292 const Function &F = MF->getFunction();
293 bool HasSSE = Subtarget.hasSSE1();
294 bool HasAVX = Subtarget.hasAVX();
295 bool HasAVX512 = Subtarget.hasAVX512();
296 bool CallsEHReturn = MF->callsEHReturn();
298 CallingConv::ID CC = F.getCallingConv();
300 // If attribute NoCallerSavedRegisters exists then we set X86_INTR calling
301 // convention because it has the CSR list.
302 if (MF->getFunction().hasFnAttribute("no_caller_saved_registers"))
303 CC = CallingConv::X86_INTR;
305 // If atribute specified, override the CSRs normally specified by the
306 // calling convention and use the empty set instead.
307 if (MF->getFunction().hasFnAttribute("no_callee_saved_registers"))
308 return CSR_NoRegs_SaveList;
310 switch (CC) {
311 case CallingConv::GHC:
312 case CallingConv::HiPE:
313 return CSR_NoRegs_SaveList;
314 case CallingConv::AnyReg:
315 if (HasAVX)
316 return CSR_64_AllRegs_AVX_SaveList;
317 return CSR_64_AllRegs_SaveList;
318 case CallingConv::PreserveMost:
319 return IsWin64 ? CSR_Win64_RT_MostRegs_SaveList
320 : CSR_64_RT_MostRegs_SaveList;
321 case CallingConv::PreserveAll:
322 if (HasAVX)
323 return CSR_64_RT_AllRegs_AVX_SaveList;
324 return CSR_64_RT_AllRegs_SaveList;
325 case CallingConv::PreserveNone:
326 return CSR_64_NoneRegs_SaveList;
327 case CallingConv::CXX_FAST_TLS:
328 if (Is64Bit)
329 return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ?
330 CSR_64_CXX_TLS_Darwin_PE_SaveList : CSR_64_TLS_Darwin_SaveList;
331 break;
332 case CallingConv::Intel_OCL_BI: {
333 if (HasAVX512 && IsWin64)
334 return CSR_Win64_Intel_OCL_BI_AVX512_SaveList;
335 if (HasAVX512 && Is64Bit)
336 return CSR_64_Intel_OCL_BI_AVX512_SaveList;
337 if (HasAVX && IsWin64)
338 return CSR_Win64_Intel_OCL_BI_AVX_SaveList;
339 if (HasAVX && Is64Bit)
340 return CSR_64_Intel_OCL_BI_AVX_SaveList;
341 if (!HasAVX && !IsWin64 && Is64Bit)
342 return CSR_64_Intel_OCL_BI_SaveList;
343 break;
345 case CallingConv::X86_RegCall:
346 if (Is64Bit) {
347 if (IsWin64) {
348 return (HasSSE ? CSR_Win64_RegCall_SaveList :
349 CSR_Win64_RegCall_NoSSE_SaveList);
350 } else {
351 return (HasSSE ? CSR_SysV64_RegCall_SaveList :
352 CSR_SysV64_RegCall_NoSSE_SaveList);
354 } else {
355 return (HasSSE ? CSR_32_RegCall_SaveList :
356 CSR_32_RegCall_NoSSE_SaveList);
358 case CallingConv::CFGuard_Check:
359 assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86");
360 return (HasSSE ? CSR_Win32_CFGuard_Check_SaveList
361 : CSR_Win32_CFGuard_Check_NoSSE_SaveList);
362 case CallingConv::Cold:
363 if (Is64Bit)
364 return CSR_64_MostRegs_SaveList;
365 break;
366 case CallingConv::Win64:
367 if (!HasSSE)
368 return CSR_Win64_NoSSE_SaveList;
369 return CSR_Win64_SaveList;
370 case CallingConv::SwiftTail:
371 if (!Is64Bit)
372 return CSR_32_SaveList;
373 return IsWin64 ? CSR_Win64_SwiftTail_SaveList : CSR_64_SwiftTail_SaveList;
374 case CallingConv::X86_64_SysV:
375 if (CallsEHReturn)
376 return CSR_64EHRet_SaveList;
377 return CSR_64_SaveList;
378 case CallingConv::X86_INTR:
379 if (Is64Bit) {
380 if (HasAVX512)
381 return CSR_64_AllRegs_AVX512_SaveList;
382 if (HasAVX)
383 return CSR_64_AllRegs_AVX_SaveList;
384 if (HasSSE)
385 return CSR_64_AllRegs_SaveList;
386 return CSR_64_AllRegs_NoSSE_SaveList;
387 } else {
388 if (HasAVX512)
389 return CSR_32_AllRegs_AVX512_SaveList;
390 if (HasAVX)
391 return CSR_32_AllRegs_AVX_SaveList;
392 if (HasSSE)
393 return CSR_32_AllRegs_SSE_SaveList;
394 return CSR_32_AllRegs_SaveList;
396 default:
397 break;
400 if (Is64Bit) {
401 bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() &&
402 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError);
403 if (IsSwiftCC)
404 return IsWin64 ? CSR_Win64_SwiftError_SaveList
405 : CSR_64_SwiftError_SaveList;
407 if (IsWin64)
408 return HasSSE ? CSR_Win64_SaveList : CSR_Win64_NoSSE_SaveList;
409 if (CallsEHReturn)
410 return CSR_64EHRet_SaveList;
411 return CSR_64_SaveList;
414 return CallsEHReturn ? CSR_32EHRet_SaveList : CSR_32_SaveList;
417 const MCPhysReg *
418 X86RegisterInfo::getIPRACSRegs(const MachineFunction *MF) const {
419 return Is64Bit ? CSR_IPRA_64_SaveList : CSR_IPRA_32_SaveList;
422 const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy(
423 const MachineFunction *MF) const {
424 assert(MF && "Invalid MachineFunction pointer.");
425 if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
426 MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR())
427 return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList;
428 return nullptr;
431 const uint32_t *
432 X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
433 CallingConv::ID CC) const {
434 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
435 bool HasSSE = Subtarget.hasSSE1();
436 bool HasAVX = Subtarget.hasAVX();
437 bool HasAVX512 = Subtarget.hasAVX512();
439 switch (CC) {
440 case CallingConv::GHC:
441 case CallingConv::HiPE:
442 return CSR_NoRegs_RegMask;
443 case CallingConv::AnyReg:
444 if (HasAVX)
445 return CSR_64_AllRegs_AVX_RegMask;
446 return CSR_64_AllRegs_RegMask;
447 case CallingConv::PreserveMost:
448 return IsWin64 ? CSR_Win64_RT_MostRegs_RegMask : CSR_64_RT_MostRegs_RegMask;
449 case CallingConv::PreserveAll:
450 if (HasAVX)
451 return CSR_64_RT_AllRegs_AVX_RegMask;
452 return CSR_64_RT_AllRegs_RegMask;
453 case CallingConv::PreserveNone:
454 return CSR_64_NoneRegs_RegMask;
455 case CallingConv::CXX_FAST_TLS:
456 if (Is64Bit)
457 return CSR_64_TLS_Darwin_RegMask;
458 break;
459 case CallingConv::Intel_OCL_BI: {
460 if (HasAVX512 && IsWin64)
461 return CSR_Win64_Intel_OCL_BI_AVX512_RegMask;
462 if (HasAVX512 && Is64Bit)
463 return CSR_64_Intel_OCL_BI_AVX512_RegMask;
464 if (HasAVX && IsWin64)
465 return CSR_Win64_Intel_OCL_BI_AVX_RegMask;
466 if (HasAVX && Is64Bit)
467 return CSR_64_Intel_OCL_BI_AVX_RegMask;
468 if (!HasAVX && !IsWin64 && Is64Bit)
469 return CSR_64_Intel_OCL_BI_RegMask;
470 break;
472 case CallingConv::X86_RegCall:
473 if (Is64Bit) {
474 if (IsWin64) {
475 return (HasSSE ? CSR_Win64_RegCall_RegMask :
476 CSR_Win64_RegCall_NoSSE_RegMask);
477 } else {
478 return (HasSSE ? CSR_SysV64_RegCall_RegMask :
479 CSR_SysV64_RegCall_NoSSE_RegMask);
481 } else {
482 return (HasSSE ? CSR_32_RegCall_RegMask :
483 CSR_32_RegCall_NoSSE_RegMask);
485 case CallingConv::CFGuard_Check:
486 assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86");
487 return (HasSSE ? CSR_Win32_CFGuard_Check_RegMask
488 : CSR_Win32_CFGuard_Check_NoSSE_RegMask);
489 case CallingConv::Cold:
490 if (Is64Bit)
491 return CSR_64_MostRegs_RegMask;
492 break;
493 case CallingConv::Win64:
494 return CSR_Win64_RegMask;
495 case CallingConv::SwiftTail:
496 if (!Is64Bit)
497 return CSR_32_RegMask;
498 return IsWin64 ? CSR_Win64_SwiftTail_RegMask : CSR_64_SwiftTail_RegMask;
499 case CallingConv::X86_64_SysV:
500 return CSR_64_RegMask;
501 case CallingConv::X86_INTR:
502 if (Is64Bit) {
503 if (HasAVX512)
504 return CSR_64_AllRegs_AVX512_RegMask;
505 if (HasAVX)
506 return CSR_64_AllRegs_AVX_RegMask;
507 if (HasSSE)
508 return CSR_64_AllRegs_RegMask;
509 return CSR_64_AllRegs_NoSSE_RegMask;
510 } else {
511 if (HasAVX512)
512 return CSR_32_AllRegs_AVX512_RegMask;
513 if (HasAVX)
514 return CSR_32_AllRegs_AVX_RegMask;
515 if (HasSSE)
516 return CSR_32_AllRegs_SSE_RegMask;
517 return CSR_32_AllRegs_RegMask;
519 default:
520 break;
523 // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check
524 // callsEHReturn().
525 if (Is64Bit) {
526 const Function &F = MF.getFunction();
527 bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() &&
528 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError);
529 if (IsSwiftCC)
530 return IsWin64 ? CSR_Win64_SwiftError_RegMask : CSR_64_SwiftError_RegMask;
532 return IsWin64 ? CSR_Win64_RegMask : CSR_64_RegMask;
535 return CSR_32_RegMask;
538 const uint32_t*
539 X86RegisterInfo::getNoPreservedMask() const {
540 return CSR_NoRegs_RegMask;
543 const uint32_t *X86RegisterInfo::getDarwinTLSCallPreservedMask() const {
544 return CSR_64_TLS_Darwin_RegMask;
547 BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
548 BitVector Reserved(getNumRegs());
549 const X86FrameLowering *TFI = getFrameLowering(MF);
551 // Set the floating point control register as reserved.
552 Reserved.set(X86::FPCW);
554 // Set the floating point status register as reserved.
555 Reserved.set(X86::FPSW);
557 // Set the SIMD floating point control register as reserved.
558 Reserved.set(X86::MXCSR);
560 // Set the stack-pointer register and its aliases as reserved.
561 for (const MCPhysReg &SubReg : subregs_inclusive(X86::RSP))
562 Reserved.set(SubReg);
564 // Set the Shadow Stack Pointer as reserved.
565 Reserved.set(X86::SSP);
567 // Set the instruction pointer register and its aliases as reserved.
568 for (const MCPhysReg &SubReg : subregs_inclusive(X86::RIP))
569 Reserved.set(SubReg);
571 // Set the frame-pointer register and its aliases as reserved if needed.
572 if (TFI->hasFP(MF)) {
573 if (MF.getInfo<X86MachineFunctionInfo>()->getFPClobberedByInvoke())
574 MF.getContext().reportError(
575 SMLoc(),
576 "Frame pointer clobbered by function invoke is not supported.");
578 for (const MCPhysReg &SubReg : subregs_inclusive(X86::RBP))
579 Reserved.set(SubReg);
582 // Set the base-pointer register and its aliases as reserved if needed.
583 if (hasBasePointer(MF)) {
584 if (MF.getInfo<X86MachineFunctionInfo>()->getBPClobberedByInvoke())
585 MF.getContext().reportError(SMLoc(),
586 "Stack realignment in presence of dynamic "
587 "allocas is not supported with "
588 "this calling convention.");
590 Register BasePtr = getX86SubSuperRegister(getBaseRegister(), 64);
591 for (const MCPhysReg &SubReg : subregs_inclusive(BasePtr))
592 Reserved.set(SubReg);
595 // Mark the segment registers as reserved.
596 Reserved.set(X86::CS);
597 Reserved.set(X86::SS);
598 Reserved.set(X86::DS);
599 Reserved.set(X86::ES);
600 Reserved.set(X86::FS);
601 Reserved.set(X86::GS);
603 // Mark the floating point stack registers as reserved.
604 for (unsigned n = 0; n != 8; ++n)
605 Reserved.set(X86::ST0 + n);
607 // Reserve the registers that only exist in 64-bit mode.
608 if (!Is64Bit) {
609 // These 8-bit registers are part of the x86-64 extension even though their
610 // super-registers are old 32-bits.
611 Reserved.set(X86::SIL);
612 Reserved.set(X86::DIL);
613 Reserved.set(X86::BPL);
614 Reserved.set(X86::SPL);
615 Reserved.set(X86::SIH);
616 Reserved.set(X86::DIH);
617 Reserved.set(X86::BPH);
618 Reserved.set(X86::SPH);
620 for (unsigned n = 0; n != 8; ++n) {
621 // R8, R9, ...
622 for (MCRegAliasIterator AI(X86::R8 + n, this, true); AI.isValid(); ++AI)
623 Reserved.set(*AI);
625 // XMM8, XMM9, ...
626 for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI)
627 Reserved.set(*AI);
630 if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) {
631 for (unsigned n = 0; n != 16; ++n) {
632 for (MCRegAliasIterator AI(X86::XMM16 + n, this, true); AI.isValid();
633 ++AI)
634 Reserved.set(*AI);
638 // Reserve the extended general purpose registers.
639 if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasEGPR())
640 Reserved.set(X86::R16, X86::R31WH + 1);
642 if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) {
643 for (MCRegAliasIterator AI(X86::R14, this, true); AI.isValid(); ++AI)
644 Reserved.set(*AI);
645 for (MCRegAliasIterator AI(X86::R15, this, true); AI.isValid(); ++AI)
646 Reserved.set(*AI);
649 // Reserve low half pair registers in case they are used by RA aggressively.
650 Reserved.set(X86::TMM0_TMM1);
651 Reserved.set(X86::TMM2_TMM3);
653 assert(checkAllSuperRegsMarked(Reserved,
654 {X86::SIL, X86::DIL, X86::BPL, X86::SPL,
655 X86::SIH, X86::DIH, X86::BPH, X86::SPH}));
656 return Reserved;
659 unsigned X86RegisterInfo::getNumSupportedRegs(const MachineFunction &MF) const {
660 // All existing Intel CPUs that support AMX support AVX512 and all existing
661 // Intel CPUs that support APX support AMX. AVX512 implies AVX.
663 // We enumerate the registers in X86GenRegisterInfo.inc in this order:
665 // Registers before AVX512,
666 // AVX512 registers (X/YMM16-31, ZMM0-31, K registers)
667 // AMX registers (TMM)
668 // APX registers (R16-R31)
670 // and try to return the minimum number of registers supported by the target.
671 static_assert((X86::R15WH + 1 == X86::YMM0) && (X86::YMM15 + 1 == X86::K0) &&
672 (X86::K6_K7 + 1 == X86::TMMCFG) &&
673 (X86::TMM6_TMM7 + 1 == X86::R16) &&
674 (X86::R31WH + 1 == X86::NUM_TARGET_REGS),
675 "Register number may be incorrect");
677 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
678 if (ST.hasEGPR())
679 return X86::NUM_TARGET_REGS;
680 if (ST.hasAMXTILE())
681 return X86::TMM7 + 1;
682 if (ST.hasAVX512())
683 return X86::K6_K7 + 1;
684 if (ST.hasAVX())
685 return X86::YMM15 + 1;
686 return X86::R15WH + 1;
689 bool X86RegisterInfo::isArgumentRegister(const MachineFunction &MF,
690 MCRegister Reg) const {
691 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
692 const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
693 auto IsSubReg = [&](MCRegister RegA, MCRegister RegB) {
694 return TRI.isSuperOrSubRegisterEq(RegA, RegB);
697 if (!ST.is64Bit())
698 return llvm::any_of(
699 SmallVector<MCRegister>{X86::EAX, X86::ECX, X86::EDX},
700 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }) ||
701 (ST.hasMMX() && X86::VR64RegClass.contains(Reg));
703 CallingConv::ID CC = MF.getFunction().getCallingConv();
705 if (CC == CallingConv::X86_64_SysV && IsSubReg(X86::RAX, Reg))
706 return true;
708 if (llvm::any_of(
709 SmallVector<MCRegister>{X86::RDX, X86::RCX, X86::R8, X86::R9},
710 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
711 return true;
713 if (CC != CallingConv::Win64 &&
714 llvm::any_of(SmallVector<MCRegister>{X86::RDI, X86::RSI},
715 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
716 return true;
718 if (ST.hasSSE1() &&
719 llvm::any_of(SmallVector<MCRegister>{X86::XMM0, X86::XMM1, X86::XMM2,
720 X86::XMM3, X86::XMM4, X86::XMM5,
721 X86::XMM6, X86::XMM7},
722 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
723 return true;
725 return X86GenRegisterInfo::isArgumentRegister(MF, Reg);
728 bool X86RegisterInfo::isFixedRegister(const MachineFunction &MF,
729 MCRegister PhysReg) const {
730 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
731 const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
733 // Stack pointer.
734 if (TRI.isSuperOrSubRegisterEq(X86::RSP, PhysReg))
735 return true;
737 // Don't use the frame pointer if it's being used.
738 const X86FrameLowering &TFI = *getFrameLowering(MF);
739 if (TFI.hasFP(MF) && TRI.isSuperOrSubRegisterEq(X86::RBP, PhysReg))
740 return true;
742 return X86GenRegisterInfo::isFixedRegister(MF, PhysReg);
745 bool X86RegisterInfo::isTileRegisterClass(const TargetRegisterClass *RC) const {
746 return RC->getID() == X86::TILERegClassID ||
747 RC->getID() == X86::TILEPAIRRegClassID;
750 void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
751 // Check if the EFLAGS register is marked as live-out. This shouldn't happen,
752 // because the calling convention defines the EFLAGS register as NOT
753 // preserved.
755 // Unfortunatelly the EFLAGS show up as live-out after branch folding. Adding
756 // an assert to track this and clear the register afterwards to avoid
757 // unnecessary crashes during release builds.
758 assert(!(Mask[X86::EFLAGS / 32] & (1U << (X86::EFLAGS % 32))) &&
759 "EFLAGS are not live-out from a patchpoint.");
761 // Also clean other registers that don't need preserving (IP).
762 for (auto Reg : {X86::EFLAGS, X86::RIP, X86::EIP, X86::IP})
763 Mask[Reg / 32] &= ~(1U << (Reg % 32));
766 //===----------------------------------------------------------------------===//
767 // Stack Frame Processing methods
768 //===----------------------------------------------------------------------===//
770 static bool CantUseSP(const MachineFrameInfo &MFI) {
771 return MFI.hasVarSizedObjects() || MFI.hasOpaqueSPAdjustment();
774 bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
775 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
776 // We have a virtual register to reference argument, and don't need base
777 // pointer.
778 if (X86FI->getStackPtrSaveMI() != nullptr)
779 return false;
781 if (X86FI->hasPreallocatedCall())
782 return true;
784 const MachineFrameInfo &MFI = MF.getFrameInfo();
786 if (!EnableBasePointer)
787 return false;
789 // When we need stack realignment, we can't address the stack from the frame
790 // pointer. When we have dynamic allocas or stack-adjusting inline asm, we
791 // can't address variables from the stack pointer. MS inline asm can
792 // reference locals while also adjusting the stack pointer. When we can't
793 // use both the SP and the FP, we need a separate base pointer register.
794 bool CantUseFP = hasStackRealignment(MF);
795 return CantUseFP && CantUseSP(MFI);
798 bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
799 if (!TargetRegisterInfo::canRealignStack(MF))
800 return false;
802 const MachineFrameInfo &MFI = MF.getFrameInfo();
803 const MachineRegisterInfo *MRI = &MF.getRegInfo();
805 // Stack realignment requires a frame pointer. If we already started
806 // register allocation with frame pointer elimination, it is too late now.
807 if (!MRI->canReserveReg(FramePtr))
808 return false;
810 // If a base pointer is necessary. Check that it isn't too late to reserve
811 // it.
812 if (CantUseSP(MFI))
813 return MRI->canReserveReg(BasePtr);
814 return true;
817 bool X86RegisterInfo::shouldRealignStack(const MachineFunction &MF) const {
818 if (TargetRegisterInfo::shouldRealignStack(MF))
819 return true;
821 return !Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
824 // tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction
825 // of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'.
826 // TODO: In this case we should be really trying first to entirely eliminate
827 // this instruction which is a plain copy.
828 static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) {
829 MachineInstr &MI = *II;
830 unsigned Opc = II->getOpcode();
831 // Check if this is a LEA of the form 'lea (%esp), %ebx'
832 if ((Opc != X86::LEA32r && Opc != X86::LEA64r && Opc != X86::LEA64_32r) ||
833 MI.getOperand(2).getImm() != 1 ||
834 MI.getOperand(3).getReg() != X86::NoRegister ||
835 MI.getOperand(4).getImm() != 0 ||
836 MI.getOperand(5).getReg() != X86::NoRegister)
837 return false;
838 Register BasePtr = MI.getOperand(1).getReg();
839 // In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will
840 // be replaced with a 32-bit operand MOV which will zero extend the upper
841 // 32-bits of the super register.
842 if (Opc == X86::LEA64_32r)
843 BasePtr = getX86SubSuperRegister(BasePtr, 32);
844 Register NewDestReg = MI.getOperand(0).getReg();
845 const X86InstrInfo *TII =
846 MI.getParent()->getParent()->getSubtarget<X86Subtarget>().getInstrInfo();
847 TII->copyPhysReg(*MI.getParent(), II, MI.getDebugLoc(), NewDestReg, BasePtr,
848 MI.getOperand(1).isKill());
849 MI.eraseFromParent();
850 return true;
853 static bool isFuncletReturnInstr(MachineInstr &MI) {
854 switch (MI.getOpcode()) {
855 case X86::CATCHRET:
856 case X86::CLEANUPRET:
857 return true;
858 default:
859 return false;
861 llvm_unreachable("impossible");
864 void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
865 unsigned FIOperandNum,
866 Register BaseReg,
867 int FIOffset) const {
868 MachineInstr &MI = *II;
869 unsigned Opc = MI.getOpcode();
870 if (Opc == TargetOpcode::LOCAL_ESCAPE) {
871 MachineOperand &FI = MI.getOperand(FIOperandNum);
872 FI.ChangeToImmediate(FIOffset);
873 return;
876 MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false);
878 // The frame index format for stackmaps and patchpoints is different from the
879 // X86 format. It only has a FI and an offset.
880 if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
881 assert(BasePtr == FramePtr && "Expected the FP as base register");
882 int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset;
883 MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
884 return;
887 if (MI.getOperand(FIOperandNum + 3).isImm()) {
888 // Offset is a 32-bit integer.
889 int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm());
890 int Offset = FIOffset + Imm;
891 assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
892 "Requesting 64-bit offset in 32-bit immediate!");
893 if (Offset != 0)
894 MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
895 } else {
896 // Offset is symbolic. This is extremely rare.
897 uint64_t Offset =
898 FIOffset + (uint64_t)MI.getOperand(FIOperandNum + 3).getOffset();
899 MI.getOperand(FIOperandNum + 3).setOffset(Offset);
903 bool
904 X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
905 int SPAdj, unsigned FIOperandNum,
906 RegScavenger *RS) const {
907 MachineInstr &MI = *II;
908 MachineBasicBlock &MBB = *MI.getParent();
909 MachineFunction &MF = *MBB.getParent();
910 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
911 bool IsEHFuncletEpilogue = MBBI == MBB.end() ? false
912 : isFuncletReturnInstr(*MBBI);
913 const X86FrameLowering *TFI = getFrameLowering(MF);
914 int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
916 // Determine base register and offset.
917 int FIOffset;
918 Register BasePtr;
919 if (MI.isReturn()) {
920 assert((!hasStackRealignment(MF) ||
921 MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) &&
922 "Return instruction can only reference SP relative frame objects");
923 FIOffset =
924 TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0).getFixed();
925 } else if (TFI->Is64Bit && (MBB.isEHFuncletEntry() || IsEHFuncletEpilogue)) {
926 FIOffset = TFI->getWin64EHFrameIndexRef(MF, FrameIndex, BasePtr);
927 } else {
928 FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr).getFixed();
931 // LOCAL_ESCAPE uses a single offset, with no register. It only works in the
932 // simple FP case, and doesn't work with stack realignment. On 32-bit, the
933 // offset is from the traditional base pointer location. On 64-bit, the
934 // offset is from the SP at the end of the prologue, not the FP location. This
935 // matches the behavior of llvm.frameaddress.
936 unsigned Opc = MI.getOpcode();
937 if (Opc == TargetOpcode::LOCAL_ESCAPE) {
938 MachineOperand &FI = MI.getOperand(FIOperandNum);
939 FI.ChangeToImmediate(FIOffset);
940 return false;
943 // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit
944 // register as source operand, semantic is the same and destination is
945 // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided.
946 // Don't change BasePtr since it is used later for stack adjustment.
947 Register MachineBasePtr = BasePtr;
948 if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr))
949 MachineBasePtr = getX86SubSuperRegister(BasePtr, 64);
951 // This must be part of a four operand memory reference. Replace the
952 // FrameIndex with base register. Add an offset to the offset.
953 MI.getOperand(FIOperandNum).ChangeToRegister(MachineBasePtr, false);
955 if (BasePtr == StackPtr)
956 FIOffset += SPAdj;
958 // The frame index format for stackmaps and patchpoints is different from the
959 // X86 format. It only has a FI and an offset.
960 if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
961 assert(BasePtr == FramePtr && "Expected the FP as base register");
962 int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset;
963 MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
964 return false;
967 if (MI.getOperand(FIOperandNum+3).isImm()) {
968 // Offset is a 32-bit integer.
969 int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm());
970 int Offset = FIOffset + Imm;
971 assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
972 "Requesting 64-bit offset in 32-bit immediate!");
973 if (Offset != 0 || !tryOptimizeLEAtoMOV(II))
974 MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
975 } else {
976 // Offset is symbolic. This is extremely rare.
977 uint64_t Offset = FIOffset +
978 (uint64_t)MI.getOperand(FIOperandNum+3).getOffset();
979 MI.getOperand(FIOperandNum + 3).setOffset(Offset);
981 return false;
984 unsigned X86RegisterInfo::findDeadCallerSavedReg(
985 MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const {
986 const MachineFunction *MF = MBB.getParent();
987 if (MF->callsEHReturn())
988 return 0;
990 const TargetRegisterClass &AvailableRegs = *getGPRsForTailCall(*MF);
992 if (MBBI == MBB.end())
993 return 0;
995 switch (MBBI->getOpcode()) {
996 default:
997 return 0;
998 case TargetOpcode::PATCHABLE_RET:
999 case X86::RET:
1000 case X86::RET32:
1001 case X86::RET64:
1002 case X86::RETI32:
1003 case X86::RETI64:
1004 case X86::TCRETURNdi:
1005 case X86::TCRETURNri:
1006 case X86::TCRETURNmi:
1007 case X86::TCRETURNdi64:
1008 case X86::TCRETURNri64:
1009 case X86::TCRETURNmi64:
1010 case X86::EH_RETURN:
1011 case X86::EH_RETURN64: {
1012 SmallSet<uint16_t, 8> Uses;
1013 for (MachineOperand &MO : MBBI->operands()) {
1014 if (!MO.isReg() || MO.isDef())
1015 continue;
1016 Register Reg = MO.getReg();
1017 if (!Reg)
1018 continue;
1019 for (MCRegAliasIterator AI(Reg, this, true); AI.isValid(); ++AI)
1020 Uses.insert(*AI);
1023 for (auto CS : AvailableRegs)
1024 if (!Uses.count(CS) && CS != X86::RIP && CS != X86::RSP && CS != X86::ESP)
1025 return CS;
1029 return 0;
1032 Register X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
1033 const X86FrameLowering *TFI = getFrameLowering(MF);
1034 return TFI->hasFP(MF) ? FramePtr : StackPtr;
1037 unsigned
1038 X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
1039 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
1040 Register FrameReg = getFrameRegister(MF);
1041 if (Subtarget.isTarget64BitILP32())
1042 FrameReg = getX86SubSuperRegister(FrameReg, 32);
1043 return FrameReg;
1046 unsigned
1047 X86RegisterInfo::getPtrSizedStackRegister(const MachineFunction &MF) const {
1048 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
1049 Register StackReg = getStackRegister();
1050 if (Subtarget.isTarget64BitILP32())
1051 StackReg = getX86SubSuperRegister(StackReg, 32);
1052 return StackReg;
1055 static ShapeT getTileShape(Register VirtReg, VirtRegMap *VRM,
1056 const MachineRegisterInfo *MRI) {
1057 if (VRM->hasShape(VirtReg))
1058 return VRM->getShape(VirtReg);
1060 const MachineOperand &Def = *MRI->def_begin(VirtReg);
1061 MachineInstr *MI = const_cast<MachineInstr *>(Def.getParent());
1062 unsigned OpCode = MI->getOpcode();
1063 switch (OpCode) {
1064 default:
1065 llvm_unreachable("Unexpected machine instruction on tile register!");
1066 break;
1067 case X86::COPY: {
1068 Register SrcReg = MI->getOperand(1).getReg();
1069 ShapeT Shape = getTileShape(SrcReg, VRM, MRI);
1070 VRM->assignVirt2Shape(VirtReg, Shape);
1071 return Shape;
1073 // We only collect the tile shape that is defined.
1074 case X86::PTILELOADDV:
1075 case X86::PTILELOADDT1V:
1076 case X86::PTDPBSSDV:
1077 case X86::PTDPBSUDV:
1078 case X86::PTDPBUSDV:
1079 case X86::PTDPBUUDV:
1080 case X86::PTILEZEROV:
1081 case X86::PTDPBF16PSV:
1082 case X86::PTDPFP16PSV:
1083 case X86::PTCMMIMFP16PSV:
1084 case X86::PTCMMRLFP16PSV:
1085 case X86::PTTRANSPOSEDV:
1086 case X86::PTTDPBF16PSV:
1087 case X86::PTTDPFP16PSV:
1088 case X86::PTTCMMIMFP16PSV:
1089 case X86::PTTCMMRLFP16PSV:
1090 case X86::PTCONJTCMMIMFP16PSV:
1091 case X86::PTCONJTFP16V:
1092 case X86::PTILELOADDRSV:
1093 case X86::PTILELOADDRST1V:
1094 case X86::PTMMULTF32PSV:
1095 case X86::PTTMMULTF32PSV:
1096 case X86::PTDPBF8PSV:
1097 case X86::PTDPBHF8PSV:
1098 case X86::PTDPHBF8PSV:
1099 case X86::PTDPHF8PSV: {
1100 MachineOperand &MO1 = MI->getOperand(1);
1101 MachineOperand &MO2 = MI->getOperand(2);
1102 ShapeT Shape(&MO1, &MO2, MRI);
1103 VRM->assignVirt2Shape(VirtReg, Shape);
1104 return Shape;
1106 case X86::PT2RPNTLVWZ0V:
1107 case X86::PT2RPNTLVWZ0T1V:
1108 case X86::PT2RPNTLVWZ1V:
1109 case X86::PT2RPNTLVWZ1T1V:
1110 case X86::PT2RPNTLVWZ0RSV:
1111 case X86::PT2RPNTLVWZ0RST1V:
1112 case X86::PT2RPNTLVWZ1RSV:
1113 case X86::PT2RPNTLVWZ1RST1V: {
1114 MachineOperand &MO1 = MI->getOperand(1);
1115 MachineOperand &MO2 = MI->getOperand(2);
1116 MachineOperand &MO3 = MI->getOperand(3);
1117 ShapeT Shape({&MO1, &MO2, &MO1, &MO3}, MRI);
1118 VRM->assignVirt2Shape(VirtReg, Shape);
1119 return Shape;
1124 static bool canHintShape(ShapeT &PhysShape, ShapeT &VirtShape) {
1125 unsigned PhysShapeNum = PhysShape.getShapeNum();
1126 unsigned VirtShapeNum = VirtShape.getShapeNum();
1128 if (PhysShapeNum < VirtShapeNum)
1129 return false;
1131 if (PhysShapeNum == VirtShapeNum) {
1132 if (PhysShapeNum == 1)
1133 return PhysShape == VirtShape;
1135 for (unsigned I = 0; I < PhysShapeNum; I++) {
1136 ShapeT PShape(PhysShape.getRow(I), PhysShape.getCol(I));
1137 ShapeT VShape(VirtShape.getRow(I), VirtShape.getCol(I));
1138 if (VShape != PShape)
1139 return false;
1141 return true;
1144 // Hint subreg of mult-tile reg to single tile reg.
1145 if (VirtShapeNum == 1) {
1146 for (unsigned I = 0; I < PhysShapeNum; I++) {
1147 ShapeT PShape(PhysShape.getRow(I), PhysShape.getCol(I));
1148 if (VirtShape == PShape)
1149 return true;
1153 // Note: Currently we have no requirement for case of
1154 // (VirtShapeNum > 1 and PhysShapeNum > VirtShapeNum)
1155 return false;
1158 bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
1159 ArrayRef<MCPhysReg> Order,
1160 SmallVectorImpl<MCPhysReg> &Hints,
1161 const MachineFunction &MF,
1162 const VirtRegMap *VRM,
1163 const LiveRegMatrix *Matrix) const {
1164 const MachineRegisterInfo *MRI = &MF.getRegInfo();
1165 const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
1166 bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
1167 VirtReg, Order, Hints, MF, VRM, Matrix);
1168 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
1169 const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
1171 unsigned ID = RC.getID();
1173 if (!VRM)
1174 return BaseImplRetVal;
1176 if (ID != X86::TILERegClassID && ID != X86::TILEPAIRRegClassID) {
1177 if (DisableRegAllocNDDHints || !ST.hasNDD() ||
1178 !TRI.isGeneralPurposeRegisterClass(&RC))
1179 return BaseImplRetVal;
1181 // Add any two address hints after any copy hints.
1182 SmallSet<unsigned, 4> TwoAddrHints;
1184 auto TryAddNDDHint = [&](const MachineOperand &MO) {
1185 Register Reg = MO.getReg();
1186 Register PhysReg = Reg.isPhysical() ? Reg : Register(VRM->getPhys(Reg));
1187 if (PhysReg && !MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg))
1188 TwoAddrHints.insert(PhysReg);
1191 // NDD instructions is compressible when Op0 is allocated to the same
1192 // physic register as Op1 (or Op2 if it's commutable).
1193 for (auto &MO : MRI->reg_nodbg_operands(VirtReg)) {
1194 const MachineInstr &MI = *MO.getParent();
1195 if (!X86::getNonNDVariant(MI.getOpcode()))
1196 continue;
1197 unsigned OpIdx = MI.getOperandNo(&MO);
1198 if (OpIdx == 0) {
1199 assert(MI.getOperand(1).isReg());
1200 TryAddNDDHint(MI.getOperand(1));
1201 if (MI.isCommutable()) {
1202 assert(MI.getOperand(2).isReg());
1203 TryAddNDDHint(MI.getOperand(2));
1205 } else if (OpIdx == 1) {
1206 TryAddNDDHint(MI.getOperand(0));
1207 } else if (MI.isCommutable() && OpIdx == 2) {
1208 TryAddNDDHint(MI.getOperand(0));
1212 for (MCPhysReg OrderReg : Order)
1213 if (TwoAddrHints.count(OrderReg))
1214 Hints.push_back(OrderReg);
1216 return BaseImplRetVal;
1219 ShapeT VirtShape = getTileShape(VirtReg, const_cast<VirtRegMap *>(VRM), MRI);
1220 auto AddHint = [&](MCPhysReg PhysReg) {
1221 Register VReg = Matrix->getOneVReg(PhysReg);
1222 if (VReg == MCRegister::NoRegister) { // Not allocated yet
1223 Hints.push_back(PhysReg);
1224 return;
1226 ShapeT PhysShape = getTileShape(VReg, const_cast<VirtRegMap *>(VRM), MRI);
1227 if (canHintShape(PhysShape, VirtShape))
1228 Hints.push_back(PhysReg);
1231 SmallSet<MCPhysReg, 4> CopyHints;
1232 CopyHints.insert(Hints.begin(), Hints.end());
1233 Hints.clear();
1234 for (auto Hint : CopyHints) {
1235 if (RC.contains(Hint) && !MRI->isReserved(Hint))
1236 AddHint(Hint);
1238 for (MCPhysReg PhysReg : Order) {
1239 if (!CopyHints.count(PhysReg) && RC.contains(PhysReg) &&
1240 !MRI->isReserved(PhysReg))
1241 AddHint(PhysReg);
1244 #define DEBUG_TYPE "tile-hint"
1245 LLVM_DEBUG({
1246 dbgs() << "Hints for virtual register " << format_hex(VirtReg, 8) << "\n";
1247 for (auto Hint : Hints) {
1248 dbgs() << "tmm" << Hint << ",";
1250 dbgs() << "\n";
1252 #undef DEBUG_TYPE
1254 return true;