[llvm-exegesis][NFC] Pass Instruction instead of bare Opcode
[llvm-core.git] / lib / Target / AMDGPU / SIRegisterInfo.cpp
blob669a60fadd4f891e28b123d62f06b4ba2efef6b6
1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// SI implementation of the TargetRegisterInfo class.
13 //===----------------------------------------------------------------------===//
15 #include "SIRegisterInfo.h"
16 #include "AMDGPURegisterBankInfo.h"
17 #include "AMDGPUSubtarget.h"
18 #include "SIInstrInfo.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/RegisterScavenging.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/LLVMContext.h"
27 using namespace llvm;
29 static bool hasPressureSet(const int *PSets, unsigned PSetID) {
30 for (unsigned i = 0; PSets[i] != -1; ++i) {
31 if (PSets[i] == (int)PSetID)
32 return true;
34 return false;
37 void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
38 BitVector &PressureSets) const {
39 for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
40 const int *PSets = getRegUnitPressureSets(*U);
41 if (hasPressureSet(PSets, PSetID)) {
42 PressureSets.set(PSetID);
43 break;
48 static cl::opt<bool> EnableSpillSGPRToSMEM(
49 "amdgpu-spill-sgpr-to-smem",
50 cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
51 cl::init(false));
53 static cl::opt<bool> EnableSpillSGPRToVGPR(
54 "amdgpu-spill-sgpr-to-vgpr",
55 cl::desc("Enable spilling VGPRs to SGPRs"),
56 cl::ReallyHidden,
57 cl::init(true));
59 SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) :
60 AMDGPURegisterInfo(),
61 SGPRPressureSets(getNumRegPressureSets()),
62 VGPRPressureSets(getNumRegPressureSets()),
63 SpillSGPRToVGPR(false),
64 SpillSGPRToSMEM(false) {
65 if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
66 SpillSGPRToSMEM = true;
67 else if (EnableSpillSGPRToVGPR)
68 SpillSGPRToVGPR = true;
70 unsigned NumRegPressureSets = getNumRegPressureSets();
72 SGPRSetID = NumRegPressureSets;
73 VGPRSetID = NumRegPressureSets;
75 for (unsigned i = 0; i < NumRegPressureSets; ++i) {
76 classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
77 classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
80 // Determine the number of reg units for each pressure set.
81 std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
82 for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
83 const int *PSets = getRegUnitPressureSets(i);
84 for (unsigned j = 0; PSets[j] != -1; ++j) {
85 ++PressureSetRegUnits[PSets[j]];
89 unsigned VGPRMax = 0, SGPRMax = 0;
90 for (unsigned i = 0; i < NumRegPressureSets; ++i) {
91 if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
92 VGPRSetID = i;
93 VGPRMax = PressureSetRegUnits[i];
94 continue;
96 if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
97 SGPRSetID = i;
98 SGPRMax = PressureSetRegUnits[i];
102 assert(SGPRSetID < NumRegPressureSets &&
103 VGPRSetID < NumRegPressureSets);
106 unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
107 const MachineFunction &MF) const {
109 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
110 unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
111 unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
112 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
115 static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
116 unsigned Reg;
118 // Try to place it in a hole after PrivateSegmentBufferReg.
119 if (RegCount & 3) {
120 // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
121 // alignment constraints, so we have a hole where can put the wave offset.
122 Reg = RegCount - 1;
123 } else {
124 // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
125 // wave offset before it.
126 Reg = RegCount - 5;
129 return Reg;
132 unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
133 const MachineFunction &MF) const {
134 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
135 unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF));
136 return AMDGPU::SGPR_32RegClass.getRegister(Reg);
139 unsigned SIRegisterInfo::reservedStackPtrOffsetReg(
140 const MachineFunction &MF) const {
141 return AMDGPU::SGPR32;
144 BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
145 BitVector Reserved(getNumRegs());
147 // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
148 // this seems likely to result in bugs, so I'm marking them as reserved.
149 reserveRegisterTuples(Reserved, AMDGPU::EXEC);
150 reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
152 // M0 has to be reserved so that llvm accepts it as a live-in into a block.
153 reserveRegisterTuples(Reserved, AMDGPU::M0);
155 // Reserve the memory aperture registers.
156 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
157 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
158 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
159 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
161 // Reserve xnack_mask registers - support is not implemented in Codegen.
162 reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
164 // Reserve Trap Handler registers - support is not implemented in Codegen.
165 reserveRegisterTuples(Reserved, AMDGPU::TBA);
166 reserveRegisterTuples(Reserved, AMDGPU::TMA);
167 reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
168 reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
169 reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
170 reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
171 reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
172 reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
173 reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
174 reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
176 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
178 unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
179 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
180 for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
181 unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
182 reserveRegisterTuples(Reserved, Reg);
185 unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
186 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
187 for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
188 unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
189 reserveRegisterTuples(Reserved, Reg);
192 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
194 unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
195 if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
196 // Reserve 1 SGPR for scratch wave offset in case we need to spill.
197 reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
200 unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
201 if (ScratchRSrcReg != AMDGPU::NoRegister) {
202 // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
203 // to spill.
204 // TODO: May need to reserve a VGPR if doing LDS spilling.
205 reserveRegisterTuples(Reserved, ScratchRSrcReg);
206 assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
209 // We have to assume the SP is needed in case there are calls in the function,
210 // which is detected after the function is lowered. If we aren't really going
211 // to need SP, don't bother reserving it.
212 unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
214 if (StackPtrReg != AMDGPU::NoRegister) {
215 reserveRegisterTuples(Reserved, StackPtrReg);
216 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
219 unsigned FrameReg = MFI->getFrameOffsetReg();
220 if (FrameReg != AMDGPU::NoRegister) {
221 reserveRegisterTuples(Reserved, FrameReg);
222 assert(!isSubRegister(ScratchRSrcReg, FrameReg));
225 return Reserved;
228 bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
229 const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>();
230 if (Info->isEntryFunction()) {
231 const MachineFrameInfo &MFI = Fn.getFrameInfo();
232 return MFI.hasStackObjects() || MFI.hasCalls();
235 // May need scavenger for dealing with callee saved registers.
236 return true;
239 bool SIRegisterInfo::requiresFrameIndexScavenging(
240 const MachineFunction &MF) const {
241 const MachineFrameInfo &MFI = MF.getFrameInfo();
242 if (MFI.hasStackObjects())
243 return true;
245 // May need to deal with callee saved registers.
246 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
247 return !Info->isEntryFunction();
250 bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
251 const MachineFunction &MF) const {
252 // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't
253 // create a virtual register for it during frame index elimination, so the
254 // scavenger is directly needed.
255 return MF.getFrameInfo().hasStackObjects() &&
256 MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
257 MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
260 bool SIRegisterInfo::requiresVirtualBaseRegisters(
261 const MachineFunction &) const {
262 // There are no special dedicated stack or frame pointers.
263 return true;
266 bool SIRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
267 // This helps catch bugs as verifier errors.
268 return true;
271 int64_t SIRegisterInfo::getMUBUFInstrOffset(const MachineInstr *MI) const {
272 assert(SIInstrInfo::isMUBUF(*MI));
274 int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
275 AMDGPU::OpName::offset);
276 return MI->getOperand(OffIdx).getImm();
279 int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
280 int Idx) const {
281 if (!SIInstrInfo::isMUBUF(*MI))
282 return 0;
284 assert(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
285 AMDGPU::OpName::vaddr) &&
286 "Should never see frame index on non-address operand");
288 return getMUBUFInstrOffset(MI);
291 bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
292 if (!MI->mayLoadOrStore())
293 return false;
295 int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
297 return !isUInt<12>(FullOffset);
300 void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
301 unsigned BaseReg,
302 int FrameIdx,
303 int64_t Offset) const {
304 MachineBasicBlock::iterator Ins = MBB->begin();
305 DebugLoc DL; // Defaults to "unknown"
307 if (Ins != MBB->end())
308 DL = Ins->getDebugLoc();
310 MachineFunction *MF = MBB->getParent();
311 const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
312 const SIInstrInfo *TII = Subtarget.getInstrInfo();
314 if (Offset == 0) {
315 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
316 .addFrameIndex(FrameIdx);
317 return;
320 MachineRegisterInfo &MRI = MF->getRegInfo();
321 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
323 unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
325 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
326 .addImm(Offset);
327 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
328 .addFrameIndex(FrameIdx);
330 TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
331 .addReg(OffsetReg, RegState::Kill)
332 .addReg(FIReg);
335 void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
336 int64_t Offset) const {
338 MachineBasicBlock *MBB = MI.getParent();
339 MachineFunction *MF = MBB->getParent();
340 const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
341 const SIInstrInfo *TII = Subtarget.getInstrInfo();
343 #ifndef NDEBUG
344 // FIXME: Is it possible to be storing a frame index to itself?
345 bool SeenFI = false;
346 for (const MachineOperand &MO: MI.operands()) {
347 if (MO.isFI()) {
348 if (SeenFI)
349 llvm_unreachable("should not see multiple frame indices");
351 SeenFI = true;
354 #endif
356 MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
357 assert(FIOp && FIOp->isFI() && "frame index must be address operand");
358 assert(TII->isMUBUF(MI));
359 assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
360 MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
361 "should only be seeing frame offset relative FrameIndex");
364 MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
365 int64_t NewOffset = OffsetOp->getImm() + Offset;
366 assert(isUInt<12>(NewOffset) && "offset should be legal");
368 FIOp->ChangeToRegister(BaseReg, false);
369 OffsetOp->setImm(NewOffset);
372 bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
373 unsigned BaseReg,
374 int64_t Offset) const {
375 if (!SIInstrInfo::isMUBUF(*MI))
376 return false;
378 int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
380 return isUInt<12>(NewOffset);
383 const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
384 const MachineFunction &MF, unsigned Kind) const {
385 // This is inaccurate. It depends on the instruction and address space. The
386 // only place where we should hit this is for dealing with frame indexes /
387 // private accesses, so this is correct in that case.
388 return &AMDGPU::VGPR_32RegClass;
391 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
393 switch (Op) {
394 case AMDGPU::SI_SPILL_S512_SAVE:
395 case AMDGPU::SI_SPILL_S512_RESTORE:
396 case AMDGPU::SI_SPILL_V512_SAVE:
397 case AMDGPU::SI_SPILL_V512_RESTORE:
398 return 16;
399 case AMDGPU::SI_SPILL_S256_SAVE:
400 case AMDGPU::SI_SPILL_S256_RESTORE:
401 case AMDGPU::SI_SPILL_V256_SAVE:
402 case AMDGPU::SI_SPILL_V256_RESTORE:
403 return 8;
404 case AMDGPU::SI_SPILL_S128_SAVE:
405 case AMDGPU::SI_SPILL_S128_RESTORE:
406 case AMDGPU::SI_SPILL_V128_SAVE:
407 case AMDGPU::SI_SPILL_V128_RESTORE:
408 return 4;
409 case AMDGPU::SI_SPILL_V96_SAVE:
410 case AMDGPU::SI_SPILL_V96_RESTORE:
411 return 3;
412 case AMDGPU::SI_SPILL_S64_SAVE:
413 case AMDGPU::SI_SPILL_S64_RESTORE:
414 case AMDGPU::SI_SPILL_V64_SAVE:
415 case AMDGPU::SI_SPILL_V64_RESTORE:
416 return 2;
417 case AMDGPU::SI_SPILL_S32_SAVE:
418 case AMDGPU::SI_SPILL_S32_RESTORE:
419 case AMDGPU::SI_SPILL_V32_SAVE:
420 case AMDGPU::SI_SPILL_V32_RESTORE:
421 return 1;
422 default: llvm_unreachable("Invalid spill opcode");
426 static int getOffsetMUBUFStore(unsigned Opc) {
427 switch (Opc) {
428 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
429 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
430 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
431 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
432 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
433 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
434 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
435 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
436 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
437 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
438 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
439 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
440 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
441 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
442 default:
443 return -1;
447 static int getOffsetMUBUFLoad(unsigned Opc) {
448 switch (Opc) {
449 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
450 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
451 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
452 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
453 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
454 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
455 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
456 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
457 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
458 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
459 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
460 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
461 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
462 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
463 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
464 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
465 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
466 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
467 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
468 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
469 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
470 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
471 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
472 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
473 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
474 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
475 default:
476 return -1;
480 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
481 // need to handle the case where an SGPR may need to be spilled while spilling.
482 static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
483 MachineFrameInfo &MFI,
484 MachineBasicBlock::iterator MI,
485 int Index,
486 int64_t Offset) {
487 MachineBasicBlock *MBB = MI->getParent();
488 const DebugLoc &DL = MI->getDebugLoc();
489 bool IsStore = MI->mayStore();
491 unsigned Opc = MI->getOpcode();
492 int LoadStoreOp = IsStore ?
493 getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc);
494 if (LoadStoreOp == -1)
495 return false;
497 const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
498 MachineInstrBuilder NewMI =
499 BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
500 .add(*Reg)
501 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
502 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
503 .addImm(Offset)
504 .addImm(0) // glc
505 .addImm(0) // slc
506 .addImm(0) // tfe
507 .cloneMemRefs(*MI);
509 const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
510 AMDGPU::OpName::vdata_in);
511 if (VDataIn)
512 NewMI.add(*VDataIn);
513 return true;
516 void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
517 unsigned LoadStoreOp,
518 int Index,
519 unsigned ValueReg,
520 bool IsKill,
521 unsigned ScratchRsrcReg,
522 unsigned ScratchOffsetReg,
523 int64_t InstOffset,
524 MachineMemOperand *MMO,
525 RegScavenger *RS) const {
526 MachineBasicBlock *MBB = MI->getParent();
527 MachineFunction *MF = MI->getParent()->getParent();
528 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
529 const SIInstrInfo *TII = ST.getInstrInfo();
530 const MachineFrameInfo &MFI = MF->getFrameInfo();
532 const MCInstrDesc &Desc = TII->get(LoadStoreOp);
533 const DebugLoc &DL = MI->getDebugLoc();
534 bool IsStore = Desc.mayStore();
536 bool Scavenged = false;
537 unsigned SOffset = ScratchOffsetReg;
539 const unsigned EltSize = 4;
540 const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
541 unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / (EltSize * CHAR_BIT);
542 unsigned Size = NumSubRegs * EltSize;
543 int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
544 int64_t ScratchOffsetRegDelta = 0;
546 unsigned Align = MFI.getObjectAlignment(Index);
547 const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
549 assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset");
551 if (!isUInt<12>(Offset + Size - EltSize)) {
552 SOffset = AMDGPU::NoRegister;
554 // We currently only support spilling VGPRs to EltSize boundaries, meaning
555 // we can simplify the adjustment of Offset here to just scale with
556 // WavefrontSize.
557 Offset *= ST.getWavefrontSize();
559 // We don't have access to the register scavenger if this function is called
560 // during PEI::scavengeFrameVirtualRegs().
561 if (RS)
562 SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass);
564 if (SOffset == AMDGPU::NoRegister) {
565 // There are no free SGPRs, and since we are in the process of spilling
566 // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
567 // on SI/CI and on VI it is true until we implement spilling using scalar
568 // stores), we have no way to free up an SGPR. Our solution here is to
569 // add the offset directly to the ScratchOffset register, and then
570 // subtract the offset after the spill to return ScratchOffset to it's
571 // original value.
572 SOffset = ScratchOffsetReg;
573 ScratchOffsetRegDelta = Offset;
574 } else {
575 Scavenged = true;
578 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
579 .addReg(ScratchOffsetReg)
580 .addImm(Offset);
582 Offset = 0;
585 for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
586 unsigned SubReg = NumSubRegs == 1 ?
587 ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
589 unsigned SOffsetRegState = 0;
590 unsigned SrcDstRegState = getDefRegState(!IsStore);
591 if (i + 1 == e) {
592 SOffsetRegState |= getKillRegState(Scavenged);
593 // The last implicit use carries the "Kill" flag.
594 SrcDstRegState |= getKillRegState(IsKill);
597 MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
598 MachineMemOperand *NewMMO
599 = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
600 EltSize, MinAlign(Align, EltSize * i));
602 auto MIB = BuildMI(*MBB, MI, DL, Desc)
603 .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
604 .addReg(ScratchRsrcReg)
605 .addReg(SOffset, SOffsetRegState)
606 .addImm(Offset)
607 .addImm(0) // glc
608 .addImm(0) // slc
609 .addImm(0) // tfe
610 .addMemOperand(NewMMO);
612 if (NumSubRegs > 1)
613 MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
616 if (ScratchOffsetRegDelta != 0) {
617 // Subtract the offset we added to the ScratchOffset register.
618 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
619 .addReg(ScratchOffsetReg)
620 .addImm(ScratchOffsetRegDelta);
624 static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
625 bool Store) {
626 if (SuperRegSize % 16 == 0) {
627 return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
628 AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
631 if (SuperRegSize % 8 == 0) {
632 return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
633 AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
636 return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
637 AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
640 bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
641 int Index,
642 RegScavenger *RS,
643 bool OnlyToVGPR) const {
644 MachineBasicBlock *MBB = MI->getParent();
645 MachineFunction *MF = MBB->getParent();
646 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
647 DenseSet<unsigned> SGPRSpillVGPRDefinedSet;
649 ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
650 = MFI->getSGPRToVGPRSpills(Index);
651 bool SpillToVGPR = !VGPRSpills.empty();
652 if (OnlyToVGPR && !SpillToVGPR)
653 return false;
655 MachineRegisterInfo &MRI = MF->getRegInfo();
656 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
657 const SIInstrInfo *TII = ST.getInstrInfo();
659 unsigned SuperReg = MI->getOperand(0).getReg();
660 bool IsKill = MI->getOperand(0).isKill();
661 const DebugLoc &DL = MI->getDebugLoc();
663 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
665 bool SpillToSMEM = spillSGPRToSMEM();
666 if (SpillToSMEM && OnlyToVGPR)
667 return false;
669 assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
670 SuperReg != MFI->getFrameOffsetReg() &&
671 SuperReg != MFI->getScratchWaveOffsetReg()));
673 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
675 unsigned OffsetReg = AMDGPU::M0;
676 unsigned M0CopyReg = AMDGPU::NoRegister;
678 if (SpillToSMEM) {
679 if (RS->isRegUsed(AMDGPU::M0)) {
680 M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
681 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
682 .addReg(AMDGPU::M0);
686 unsigned ScalarStoreOp;
687 unsigned EltSize = 4;
688 const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
689 if (SpillToSMEM && isSGPRClass(RC)) {
690 // XXX - if private_element_size is larger than 4 it might be useful to be
691 // able to spill wider vmem spills.
692 std::tie(EltSize, ScalarStoreOp) =
693 getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
696 ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
697 unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
699 // SubReg carries the "Kill" flag when SubReg == SuperReg.
700 unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
701 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
702 unsigned SubReg = NumSubRegs == 1 ?
703 SuperReg : getSubReg(SuperReg, SplitParts[i]);
705 if (SpillToSMEM) {
706 int64_t FrOffset = FrameInfo.getObjectOffset(Index);
708 // The allocated memory size is really the wavefront size * the frame
709 // index size. The widest register class is 64 bytes, so a 4-byte scratch
710 // allocation is enough to spill this in a single stack object.
712 // FIXME: Frame size/offsets are computed earlier than this, so the extra
713 // space is still unnecessarily allocated.
715 unsigned Align = FrameInfo.getObjectAlignment(Index);
716 MachinePointerInfo PtrInfo
717 = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
718 MachineMemOperand *MMO
719 = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
720 EltSize, MinAlign(Align, EltSize * i));
722 // SMEM instructions only support a single offset, so increment the wave
723 // offset.
725 int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
726 if (Offset != 0) {
727 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
728 .addReg(MFI->getFrameOffsetReg())
729 .addImm(Offset);
730 } else {
731 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
732 .addReg(MFI->getFrameOffsetReg());
735 BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
736 .addReg(SubReg, getKillRegState(IsKill)) // sdata
737 .addReg(MFI->getScratchRSrcReg()) // sbase
738 .addReg(OffsetReg, RegState::Kill) // soff
739 .addImm(0) // glc
740 .addMemOperand(MMO);
742 continue;
745 if (SpillToVGPR) {
746 SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
748 // During SGPR spilling to VGPR, determine if the VGPR is defined. The
749 // only circumstance in which we say it is undefined is when it is the
750 // first spill to this VGPR in the first basic block.
751 bool VGPRDefined = true;
752 if (MBB == &MF->front())
753 VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second;
755 // Mark the "old value of vgpr" input undef only if this is the first sgpr
756 // spill to this specific vgpr in the first basic block.
757 BuildMI(*MBB, MI, DL,
758 TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
759 Spill.VGPR)
760 .addReg(SubReg, getKillRegState(IsKill))
761 .addImm(Spill.Lane)
762 .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
764 // FIXME: Since this spills to another register instead of an actual
765 // frame index, we should delete the frame index when all references to
766 // it are fixed.
767 } else {
768 // XXX - Can to VGPR spill fail for some subregisters but not others?
769 if (OnlyToVGPR)
770 return false;
772 // Spill SGPR to a frame index.
773 // TODO: Should VI try to spill to VGPR and then spill to SMEM?
774 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
775 // TODO: Should VI try to spill to VGPR and then spill to SMEM?
777 MachineInstrBuilder Mov
778 = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
779 .addReg(SubReg, SubKillState);
782 // There could be undef components of a spilled super register.
783 // TODO: Can we detect this and skip the spill?
784 if (NumSubRegs > 1) {
785 // The last implicit use of the SuperReg carries the "Kill" flag.
786 unsigned SuperKillState = 0;
787 if (i + 1 == e)
788 SuperKillState |= getKillRegState(IsKill);
789 Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
792 unsigned Align = FrameInfo.getObjectAlignment(Index);
793 MachinePointerInfo PtrInfo
794 = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
795 MachineMemOperand *MMO
796 = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
797 EltSize, MinAlign(Align, EltSize * i));
798 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
799 .addReg(TmpReg, RegState::Kill) // src
800 .addFrameIndex(Index) // vaddr
801 .addReg(MFI->getScratchRSrcReg()) // srrsrc
802 .addReg(MFI->getFrameOffsetReg()) // soffset
803 .addImm(i * 4) // offset
804 .addMemOperand(MMO);
808 if (M0CopyReg != AMDGPU::NoRegister) {
809 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
810 .addReg(M0CopyReg, RegState::Kill);
813 MI->eraseFromParent();
814 MFI->addToSpilledSGPRs(NumSubRegs);
815 return true;
818 bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
819 int Index,
820 RegScavenger *RS,
821 bool OnlyToVGPR) const {
822 MachineFunction *MF = MI->getParent()->getParent();
823 MachineRegisterInfo &MRI = MF->getRegInfo();
824 MachineBasicBlock *MBB = MI->getParent();
825 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
827 ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
828 = MFI->getSGPRToVGPRSpills(Index);
829 bool SpillToVGPR = !VGPRSpills.empty();
830 if (OnlyToVGPR && !SpillToVGPR)
831 return false;
833 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
834 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
835 const SIInstrInfo *TII = ST.getInstrInfo();
836 const DebugLoc &DL = MI->getDebugLoc();
838 unsigned SuperReg = MI->getOperand(0).getReg();
839 bool SpillToSMEM = spillSGPRToSMEM();
840 if (SpillToSMEM && OnlyToVGPR)
841 return false;
843 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
845 unsigned OffsetReg = AMDGPU::M0;
846 unsigned M0CopyReg = AMDGPU::NoRegister;
848 if (SpillToSMEM) {
849 if (RS->isRegUsed(AMDGPU::M0)) {
850 M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
851 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
852 .addReg(AMDGPU::M0);
856 unsigned EltSize = 4;
857 unsigned ScalarLoadOp;
859 const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
860 if (SpillToSMEM && isSGPRClass(RC)) {
861 // XXX - if private_element_size is larger than 4 it might be useful to be
862 // able to spill wider vmem spills.
863 std::tie(EltSize, ScalarLoadOp) =
864 getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
867 ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
868 unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
870 // SubReg carries the "Kill" flag when SubReg == SuperReg.
871 int64_t FrOffset = FrameInfo.getObjectOffset(Index);
873 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
874 unsigned SubReg = NumSubRegs == 1 ?
875 SuperReg : getSubReg(SuperReg, SplitParts[i]);
877 if (SpillToSMEM) {
878 // FIXME: Size may be > 4 but extra bytes wasted.
879 unsigned Align = FrameInfo.getObjectAlignment(Index);
880 MachinePointerInfo PtrInfo
881 = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
882 MachineMemOperand *MMO
883 = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
884 EltSize, MinAlign(Align, EltSize * i));
886 // Add i * 4 offset
887 int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
888 if (Offset != 0) {
889 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
890 .addReg(MFI->getFrameOffsetReg())
891 .addImm(Offset);
892 } else {
893 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
894 .addReg(MFI->getFrameOffsetReg());
897 auto MIB =
898 BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
899 .addReg(MFI->getScratchRSrcReg()) // sbase
900 .addReg(OffsetReg, RegState::Kill) // soff
901 .addImm(0) // glc
902 .addMemOperand(MMO);
904 if (NumSubRegs > 1)
905 MIB.addReg(SuperReg, RegState::ImplicitDefine);
907 continue;
910 if (SpillToVGPR) {
911 SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
912 auto MIB =
913 BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
914 SubReg)
915 .addReg(Spill.VGPR)
916 .addImm(Spill.Lane);
918 if (NumSubRegs > 1)
919 MIB.addReg(SuperReg, RegState::ImplicitDefine);
920 } else {
921 if (OnlyToVGPR)
922 return false;
924 // Restore SGPR from a stack slot.
925 // FIXME: We should use S_LOAD_DWORD here for VI.
926 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
927 unsigned Align = FrameInfo.getObjectAlignment(Index);
929 MachinePointerInfo PtrInfo
930 = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
932 MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
933 MachineMemOperand::MOLoad, EltSize,
934 MinAlign(Align, EltSize * i));
936 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
937 .addFrameIndex(Index) // vaddr
938 .addReg(MFI->getScratchRSrcReg()) // srsrc
939 .addReg(MFI->getFrameOffsetReg()) // soffset
940 .addImm(i * 4) // offset
941 .addMemOperand(MMO);
943 auto MIB =
944 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
945 .addReg(TmpReg, RegState::Kill);
947 if (NumSubRegs > 1)
948 MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
952 if (M0CopyReg != AMDGPU::NoRegister) {
953 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
954 .addReg(M0CopyReg, RegState::Kill);
957 MI->eraseFromParent();
958 return true;
961 /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
962 /// a VGPR and the stack slot can be safely eliminated when all other users are
963 /// handled.
964 bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
965 MachineBasicBlock::iterator MI,
966 int FI,
967 RegScavenger *RS) const {
968 switch (MI->getOpcode()) {
969 case AMDGPU::SI_SPILL_S512_SAVE:
970 case AMDGPU::SI_SPILL_S256_SAVE:
971 case AMDGPU::SI_SPILL_S128_SAVE:
972 case AMDGPU::SI_SPILL_S64_SAVE:
973 case AMDGPU::SI_SPILL_S32_SAVE:
974 return spillSGPR(MI, FI, RS, true);
975 case AMDGPU::SI_SPILL_S512_RESTORE:
976 case AMDGPU::SI_SPILL_S256_RESTORE:
977 case AMDGPU::SI_SPILL_S128_RESTORE:
978 case AMDGPU::SI_SPILL_S64_RESTORE:
979 case AMDGPU::SI_SPILL_S32_RESTORE:
980 return restoreSGPR(MI, FI, RS, true);
981 default:
982 llvm_unreachable("not an SGPR spill instruction");
986 void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
987 int SPAdj, unsigned FIOperandNum,
988 RegScavenger *RS) const {
989 MachineFunction *MF = MI->getParent()->getParent();
990 MachineRegisterInfo &MRI = MF->getRegInfo();
991 MachineBasicBlock *MBB = MI->getParent();
992 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
993 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
994 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
995 const SIInstrInfo *TII = ST.getInstrInfo();
996 DebugLoc DL = MI->getDebugLoc();
998 MachineOperand &FIOp = MI->getOperand(FIOperandNum);
999 int Index = MI->getOperand(FIOperandNum).getIndex();
1001 switch (MI->getOpcode()) {
1002 // SGPR register spill
1003 case AMDGPU::SI_SPILL_S512_SAVE:
1004 case AMDGPU::SI_SPILL_S256_SAVE:
1005 case AMDGPU::SI_SPILL_S128_SAVE:
1006 case AMDGPU::SI_SPILL_S64_SAVE:
1007 case AMDGPU::SI_SPILL_S32_SAVE: {
1008 spillSGPR(MI, Index, RS);
1009 break;
1012 // SGPR register restore
1013 case AMDGPU::SI_SPILL_S512_RESTORE:
1014 case AMDGPU::SI_SPILL_S256_RESTORE:
1015 case AMDGPU::SI_SPILL_S128_RESTORE:
1016 case AMDGPU::SI_SPILL_S64_RESTORE:
1017 case AMDGPU::SI_SPILL_S32_RESTORE: {
1018 restoreSGPR(MI, Index, RS);
1019 break;
1022 // VGPR register spill
1023 case AMDGPU::SI_SPILL_V512_SAVE:
1024 case AMDGPU::SI_SPILL_V256_SAVE:
1025 case AMDGPU::SI_SPILL_V128_SAVE:
1026 case AMDGPU::SI_SPILL_V96_SAVE:
1027 case AMDGPU::SI_SPILL_V64_SAVE:
1028 case AMDGPU::SI_SPILL_V32_SAVE: {
1029 const MachineOperand *VData = TII->getNamedOperand(*MI,
1030 AMDGPU::OpName::vdata);
1031 buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
1032 Index,
1033 VData->getReg(), VData->isKill(),
1034 TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1035 TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1036 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1037 *MI->memoperands_begin(),
1038 RS);
1039 MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
1040 MI->eraseFromParent();
1041 break;
1043 case AMDGPU::SI_SPILL_V32_RESTORE:
1044 case AMDGPU::SI_SPILL_V64_RESTORE:
1045 case AMDGPU::SI_SPILL_V96_RESTORE:
1046 case AMDGPU::SI_SPILL_V128_RESTORE:
1047 case AMDGPU::SI_SPILL_V256_RESTORE:
1048 case AMDGPU::SI_SPILL_V512_RESTORE: {
1049 const MachineOperand *VData = TII->getNamedOperand(*MI,
1050 AMDGPU::OpName::vdata);
1052 buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
1053 Index,
1054 VData->getReg(), VData->isKill(),
1055 TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1056 TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1057 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1058 *MI->memoperands_begin(),
1059 RS);
1060 MI->eraseFromParent();
1061 break;
1064 default: {
1065 const DebugLoc &DL = MI->getDebugLoc();
1066 bool IsMUBUF = TII->isMUBUF(*MI);
1068 if (!IsMUBUF &&
1069 MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) {
1070 // Convert to an absolute stack address by finding the offset from the
1071 // scratch wave base and scaling by the wave size.
1073 // In an entry function/kernel the stack address is already the
1074 // absolute address relative to the scratch wave offset.
1076 unsigned DiffReg
1077 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1079 bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
1080 unsigned ResultReg = IsCopy ?
1081 MI->getOperand(0).getReg() :
1082 MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1084 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
1085 .addReg(MFI->getFrameOffsetReg())
1086 .addReg(MFI->getScratchWaveOffsetReg());
1088 int64_t Offset = FrameInfo.getObjectOffset(Index);
1089 if (Offset == 0) {
1090 // XXX - This never happens because of emergency scavenging slot at 0?
1091 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
1092 .addImm(Log2_32(ST.getWavefrontSize()))
1093 .addReg(DiffReg);
1094 } else {
1095 unsigned ScaledReg
1096 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1098 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
1099 .addImm(Log2_32(ST.getWavefrontSize()))
1100 .addReg(DiffReg, RegState::Kill);
1102 // TODO: Fold if use instruction is another add of a constant.
1103 if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
1104 TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1105 .addImm(Offset)
1106 .addReg(ScaledReg, RegState::Kill);
1107 } else {
1108 unsigned ConstOffsetReg
1109 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1111 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
1112 .addImm(Offset);
1113 TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1114 .addReg(ConstOffsetReg, RegState::Kill)
1115 .addReg(ScaledReg, RegState::Kill);
1119 // Don't introduce an extra copy if we're just materializing in a mov.
1120 if (IsCopy)
1121 MI->eraseFromParent();
1122 else
1123 FIOp.ChangeToRegister(ResultReg, false, false, true);
1124 return;
1127 if (IsMUBUF) {
1128 // Disable offen so we don't need a 0 vgpr base.
1129 assert(static_cast<int>(FIOperandNum) ==
1130 AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1131 AMDGPU::OpName::vaddr));
1133 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg()
1134 == MFI->getFrameOffsetReg());
1136 int64_t Offset = FrameInfo.getObjectOffset(Index);
1137 int64_t OldImm
1138 = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
1139 int64_t NewOffset = OldImm + Offset;
1141 if (isUInt<12>(NewOffset) &&
1142 buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
1143 MI->eraseFromParent();
1144 return;
1148 // If the offset is simply too big, don't convert to a scratch wave offset
1149 // relative index.
1151 int64_t Offset = FrameInfo.getObjectOffset(Index);
1152 FIOp.ChangeToImmediate(Offset);
1153 if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
1154 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1155 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1156 .addImm(Offset);
1157 FIOp.ChangeToRegister(TmpReg, false, false, true);
1163 StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const {
1164 #define AMDGPU_REG_ASM_NAMES
1165 #include "AMDGPURegAsmNames.inc.cpp"
1167 #define REG_RANGE(BeginReg, EndReg, RegTable) \
1168 if (Reg >= BeginReg && Reg <= EndReg) { \
1169 unsigned Index = Reg - BeginReg; \
1170 assert(Index < array_lengthof(RegTable)); \
1171 return RegTable[Index]; \
1174 REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
1175 REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames);
1176 REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
1177 REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames);
1178 REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
1179 VGPR96RegNames);
1181 REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
1182 AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
1183 VGPR128RegNames);
1184 REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
1185 AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
1186 SGPR128RegNames);
1188 REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
1189 AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1190 VGPR256RegNames);
1192 REG_RANGE(
1193 AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
1194 AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1195 VGPR512RegNames);
1197 REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
1198 AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1199 SGPR256RegNames);
1201 REG_RANGE(
1202 AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
1203 AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1204 SGPR512RegNames
1207 #undef REG_RANGE
1209 // FIXME: Rename flat_scr so we don't need to special case this.
1210 switch (Reg) {
1211 case AMDGPU::FLAT_SCR:
1212 return "flat_scratch";
1213 case AMDGPU::FLAT_SCR_LO:
1214 return "flat_scratch_lo";
1215 case AMDGPU::FLAT_SCR_HI:
1216 return "flat_scratch_hi";
1217 default:
1218 // For the special named registers the default is fine.
1219 return TargetRegisterInfo::getRegAsmName(Reg);
1223 // FIXME: This is very slow. It might be worth creating a map from physreg to
1224 // register class.
1225 const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
1226 assert(!TargetRegisterInfo::isVirtualRegister(Reg));
1228 static const TargetRegisterClass *const BaseClasses[] = {
1229 &AMDGPU::VGPR_32RegClass,
1230 &AMDGPU::SReg_32RegClass,
1231 &AMDGPU::VReg_64RegClass,
1232 &AMDGPU::SReg_64RegClass,
1233 &AMDGPU::VReg_96RegClass,
1234 &AMDGPU::VReg_128RegClass,
1235 &AMDGPU::SReg_128RegClass,
1236 &AMDGPU::VReg_256RegClass,
1237 &AMDGPU::SReg_256RegClass,
1238 &AMDGPU::VReg_512RegClass,
1239 &AMDGPU::SReg_512RegClass,
1240 &AMDGPU::SCC_CLASSRegClass,
1241 &AMDGPU::Pseudo_SReg_32RegClass,
1242 &AMDGPU::Pseudo_SReg_128RegClass,
1245 for (const TargetRegisterClass *BaseClass : BaseClasses) {
1246 if (BaseClass->contains(Reg)) {
1247 return BaseClass;
1250 return nullptr;
1253 // TODO: It might be helpful to have some target specific flags in
1254 // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
1255 bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
1256 unsigned Size = getRegSizeInBits(*RC);
1257 if (Size < 32)
1258 return false;
1259 switch (Size) {
1260 case 32:
1261 return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
1262 case 64:
1263 return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
1264 case 96:
1265 return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
1266 case 128:
1267 return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
1268 case 256:
1269 return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
1270 case 512:
1271 return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
1272 default:
1273 llvm_unreachable("Invalid register class size");
1277 const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
1278 const TargetRegisterClass *SRC) const {
1279 switch (getRegSizeInBits(*SRC)) {
1280 case 32:
1281 return &AMDGPU::VGPR_32RegClass;
1282 case 64:
1283 return &AMDGPU::VReg_64RegClass;
1284 case 96:
1285 return &AMDGPU::VReg_96RegClass;
1286 case 128:
1287 return &AMDGPU::VReg_128RegClass;
1288 case 256:
1289 return &AMDGPU::VReg_256RegClass;
1290 case 512:
1291 return &AMDGPU::VReg_512RegClass;
1292 default:
1293 llvm_unreachable("Invalid register class size");
1297 const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass(
1298 const TargetRegisterClass *VRC) const {
1299 switch (getRegSizeInBits(*VRC)) {
1300 case 32:
1301 return &AMDGPU::SGPR_32RegClass;
1302 case 64:
1303 return &AMDGPU::SReg_64RegClass;
1304 case 128:
1305 return &AMDGPU::SReg_128RegClass;
1306 case 256:
1307 return &AMDGPU::SReg_256RegClass;
1308 case 512:
1309 return &AMDGPU::SReg_512RegClass;
1310 default:
1311 llvm_unreachable("Invalid register class size");
1315 const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
1316 const TargetRegisterClass *RC, unsigned SubIdx) const {
1317 if (SubIdx == AMDGPU::NoSubRegister)
1318 return RC;
1320 // We can assume that each lane corresponds to one 32-bit register.
1321 unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
1322 if (isSGPRClass(RC)) {
1323 switch (Count) {
1324 case 1:
1325 return &AMDGPU::SGPR_32RegClass;
1326 case 2:
1327 return &AMDGPU::SReg_64RegClass;
1328 case 4:
1329 return &AMDGPU::SReg_128RegClass;
1330 case 8:
1331 return &AMDGPU::SReg_256RegClass;
1332 case 16: /* fall-through */
1333 default:
1334 llvm_unreachable("Invalid sub-register class size");
1336 } else {
1337 switch (Count) {
1338 case 1:
1339 return &AMDGPU::VGPR_32RegClass;
1340 case 2:
1341 return &AMDGPU::VReg_64RegClass;
1342 case 3:
1343 return &AMDGPU::VReg_96RegClass;
1344 case 4:
1345 return &AMDGPU::VReg_128RegClass;
1346 case 8:
1347 return &AMDGPU::VReg_256RegClass;
1348 case 16: /* fall-through */
1349 default:
1350 llvm_unreachable("Invalid sub-register class size");
1355 bool SIRegisterInfo::shouldRewriteCopySrc(
1356 const TargetRegisterClass *DefRC,
1357 unsigned DefSubReg,
1358 const TargetRegisterClass *SrcRC,
1359 unsigned SrcSubReg) const {
1360 // We want to prefer the smallest register class possible, so we don't want to
1361 // stop and rewrite on anything that looks like a subregister
1362 // extract. Operations mostly don't care about the super register class, so we
1363 // only want to stop on the most basic of copies between the same register
1364 // class.
1366 // e.g. if we have something like
1367 // %0 = ...
1368 // %1 = ...
1369 // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
1370 // %3 = COPY %2, sub0
1372 // We want to look through the COPY to find:
1373 // => %3 = COPY %0
1375 // Plain copy.
1376 return getCommonSubClass(DefRC, SrcRC) != nullptr;
1379 /// Returns a register that is not used at any point in the function.
1380 /// If all registers are used, then this function will return
1381 // AMDGPU::NoRegister.
1382 unsigned
1383 SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
1384 const TargetRegisterClass *RC,
1385 const MachineFunction &MF) const {
1387 for (unsigned Reg : *RC)
1388 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
1389 return Reg;
1390 return AMDGPU::NoRegister;
1393 ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
1394 unsigned EltSize) const {
1395 if (EltSize == 4) {
1396 static const int16_t Sub0_15[] = {
1397 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1398 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1399 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1400 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1403 static const int16_t Sub0_7[] = {
1404 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1405 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1408 static const int16_t Sub0_3[] = {
1409 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1412 static const int16_t Sub0_2[] = {
1413 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
1416 static const int16_t Sub0_1[] = {
1417 AMDGPU::sub0, AMDGPU::sub1,
1420 switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1421 case 32:
1422 return {};
1423 case 64:
1424 return makeArrayRef(Sub0_1);
1425 case 96:
1426 return makeArrayRef(Sub0_2);
1427 case 128:
1428 return makeArrayRef(Sub0_3);
1429 case 256:
1430 return makeArrayRef(Sub0_7);
1431 case 512:
1432 return makeArrayRef(Sub0_15);
1433 default:
1434 llvm_unreachable("unhandled register size");
1438 if (EltSize == 8) {
1439 static const int16_t Sub0_15_64[] = {
1440 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1441 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1442 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1443 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
1446 static const int16_t Sub0_7_64[] = {
1447 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1448 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
1452 static const int16_t Sub0_3_64[] = {
1453 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
1456 switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1457 case 64:
1458 return {};
1459 case 128:
1460 return makeArrayRef(Sub0_3_64);
1461 case 256:
1462 return makeArrayRef(Sub0_7_64);
1463 case 512:
1464 return makeArrayRef(Sub0_15_64);
1465 default:
1466 llvm_unreachable("unhandled register size");
1470 assert(EltSize == 16 && "unhandled register spill split size");
1472 static const int16_t Sub0_15_128[] = {
1473 AMDGPU::sub0_sub1_sub2_sub3,
1474 AMDGPU::sub4_sub5_sub6_sub7,
1475 AMDGPU::sub8_sub9_sub10_sub11,
1476 AMDGPU::sub12_sub13_sub14_sub15
1479 static const int16_t Sub0_7_128[] = {
1480 AMDGPU::sub0_sub1_sub2_sub3,
1481 AMDGPU::sub4_sub5_sub6_sub7
1484 switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1485 case 128:
1486 return {};
1487 case 256:
1488 return makeArrayRef(Sub0_7_128);
1489 case 512:
1490 return makeArrayRef(Sub0_15_128);
1491 default:
1492 llvm_unreachable("unhandled register size");
1496 const TargetRegisterClass*
1497 SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI,
1498 unsigned Reg) const {
1499 if (TargetRegisterInfo::isVirtualRegister(Reg))
1500 return MRI.getRegClass(Reg);
1502 return getPhysRegClass(Reg);
1505 bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
1506 unsigned Reg) const {
1507 const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
1508 assert(RC && "Register class for the reg not found");
1509 return hasVGPRs(RC);
1512 bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
1513 const TargetRegisterClass *SrcRC,
1514 unsigned SubReg,
1515 const TargetRegisterClass *DstRC,
1516 unsigned DstSubReg,
1517 const TargetRegisterClass *NewRC,
1518 LiveIntervals &LIS) const {
1519 unsigned SrcSize = getRegSizeInBits(*SrcRC);
1520 unsigned DstSize = getRegSizeInBits(*DstRC);
1521 unsigned NewSize = getRegSizeInBits(*NewRC);
1523 // Do not increase size of registers beyond dword, we would need to allocate
1524 // adjacent registers and constraint regalloc more than needed.
1526 // Always allow dword coalescing.
1527 if (SrcSize <= 32 || DstSize <= 32)
1528 return true;
1530 return NewSize <= DstSize || NewSize <= SrcSize;
1533 unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
1534 MachineFunction &MF) const {
1536 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1537 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1539 unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
1540 MF.getFunction());
1541 switch (RC->getID()) {
1542 default:
1543 return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
1544 case AMDGPU::VGPR_32RegClassID:
1545 return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
1546 case AMDGPU::SGPR_32RegClassID:
1547 return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
1551 unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
1552 unsigned Idx) const {
1553 if (Idx == getVGPRPressureSet())
1554 return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
1555 const_cast<MachineFunction &>(MF));
1557 if (Idx == getSGPRPressureSet())
1558 return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
1559 const_cast<MachineFunction &>(MF));
1561 return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
1564 const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
1565 static const int Empty[] = { -1 };
1567 if (hasRegUnit(AMDGPU::M0, RegUnit))
1568 return Empty;
1569 return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
1572 unsigned SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const {
1573 // Not a callee saved register.
1574 return AMDGPU::SGPR30_SGPR31;
1577 const TargetRegisterClass *
1578 SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
1579 const MachineRegisterInfo &MRI) const {
1580 unsigned Size = getRegSizeInBits(MO.getReg(), MRI);
1581 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
1582 if (!RB)
1583 return nullptr;
1585 switch (Size) {
1586 case 32:
1587 return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
1588 &AMDGPU::SReg_32_XM0RegClass;
1589 case 64:
1590 return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
1591 &AMDGPU::SReg_64_XEXECRegClass;
1592 case 96:
1593 return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
1594 nullptr;
1595 case 128:
1596 return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
1597 &AMDGPU::SReg_128RegClass;
1598 default:
1599 llvm_unreachable("not implemented");