1 //===-- X86FastPreTileConfig.cpp - Fast Tile Register Configure------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// \file Pass to preconfig the shape of physical tile registers
10 /// It inserts ldtilecfg ahead of each group of tile registers. The algorithm
11 /// walk each instruction of basic block in reverse order. All the tile
12 /// registers that live out the basic block would be spilled and reloaded
13 /// before its user. It also check the depenedency of the shape to ensure
14 /// the shape is defined before ldtilecfg.
16 //===----------------------------------------------------------------------===//
19 #include "X86InstrBuilder.h"
20 #include "X86MachineFunctionInfo.h"
21 #include "X86RegisterInfo.h"
22 #include "X86Subtarget.h"
23 #include "llvm/ADT/PostOrderIterator.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineInstr.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/Passes.h"
30 #include "llvm/CodeGen/TargetInstrInfo.h"
31 #include "llvm/CodeGen/TargetRegisterInfo.h"
32 #include "llvm/Support/Debug.h"
36 #define DEBUG_TYPE "fastpretileconfig"
38 STATISTIC(NumStores
, "Number of stores added");
39 STATISTIC(NumLoads
, "Number of loads added");
43 class X86FastPreTileConfig
: public MachineFunctionPass
{
44 MachineFunction
*MF
= nullptr;
45 const X86Subtarget
*ST
= nullptr;
46 const TargetInstrInfo
*TII
= nullptr;
47 MachineRegisterInfo
*MRI
= nullptr;
48 X86MachineFunctionInfo
*X86FI
= nullptr;
49 MachineFrameInfo
*MFI
= nullptr;
50 const TargetRegisterInfo
*TRI
= nullptr;
51 MachineBasicBlock
*MBB
= nullptr;
58 DenseMap
<MachineInstr
*, struct PHIInfo
> VisitedPHIs
;
60 /// Maps virtual regs to the frame index where these values are spilled.
61 IndexedMap
<int, VirtReg2IndexFunctor
> StackSlotForVirtReg
;
63 /// Has a bit set for tile virtual register for which it was determined
64 /// that it is alive across blocks.
65 BitVector MayLiveAcrossBlocks
;
67 int getStackSpaceFor(Register VirtReg
);
68 void InitializeTileConfigStackSpace();
69 bool mayLiveOut(Register VirtReg
, MachineInstr
*CfgMI
);
70 void spill(MachineBasicBlock::iterator Before
, Register VirtReg
, bool Kill
);
71 void reload(MachineBasicBlock::iterator UseMI
, Register VirtReg
,
72 MachineOperand
*RowMO
, MachineOperand
*ColMO
);
73 void canonicalizePHIs(MachineBasicBlock
&MBB
);
74 void convertPHI(MachineBasicBlock
*MBB
, MachineInstr
&PHI
);
75 void convertPHIs(MachineBasicBlock
&MBB
);
76 bool configBasicBlock(MachineBasicBlock
&MBB
);
79 X86FastPreTileConfig() : MachineFunctionPass(ID
), StackSlotForVirtReg(-1) {}
81 /// Return the pass name.
82 StringRef
getPassName() const override
{
83 return "Fast Tile Register Preconfigure";
86 /// Perform tile register configure.
87 bool runOnMachineFunction(MachineFunction
&MFunc
) override
;
92 } // end anonymous namespace
94 char X86FastPreTileConfig::ID
= 0;
96 INITIALIZE_PASS_BEGIN(X86FastPreTileConfig
, DEBUG_TYPE
,
97 "Fast Tile Register Preconfigure", false, false)
98 INITIALIZE_PASS_END(X86FastPreTileConfig
, DEBUG_TYPE
,
99 "Fast Tile Register Preconfigure", false, false)
101 static bool dominates(MachineBasicBlock
&MBB
,
102 MachineBasicBlock::const_iterator A
,
103 MachineBasicBlock::const_iterator B
) {
104 auto MBBEnd
= MBB
.end();
108 MachineBasicBlock::const_iterator I
= MBB
.begin();
109 for (; &*I
!= A
&& &*I
!= B
; ++I
)
115 /// This allocates space for the specified virtual register to be held on the
117 int X86FastPreTileConfig::getStackSpaceFor(Register VirtReg
) {
118 // Find the location Reg would belong...
119 int SS
= StackSlotForVirtReg
[VirtReg
];
120 // Already has space allocated?
124 // Allocate a new stack object for this spill location...
125 const TargetRegisterClass
&RC
= *MRI
->getRegClass(VirtReg
);
126 unsigned Size
= TRI
->getSpillSize(RC
);
127 Align Alignment
= TRI
->getSpillAlign(RC
);
128 int FrameIdx
= MFI
->CreateSpillStackObject(Size
, Alignment
);
131 StackSlotForVirtReg
[VirtReg
] = FrameIdx
;
135 /// Returns false if \p VirtReg is known to not live out of the current config.
136 /// If \p VirtReg live out of the current MBB, it must live out of the current
138 bool X86FastPreTileConfig::mayLiveOut(Register VirtReg
, MachineInstr
*CfgMI
) {
139 if (MayLiveAcrossBlocks
.test(Register::virtReg2Index(VirtReg
)))
142 for (const MachineInstr
&UseInst
: MRI
->use_nodbg_instructions(VirtReg
)) {
143 if (UseInst
.getParent() != MBB
) {
144 MayLiveAcrossBlocks
.set(Register::virtReg2Index(VirtReg
));
148 // The use and def are in the same MBB. If the tile register is
149 // reconfigured, it is crobbered and we need to spill and reload
152 if (dominates(*MBB
, *CfgMI
, UseInst
)) {
153 MayLiveAcrossBlocks
.set(Register::virtReg2Index(VirtReg
));
162 void X86FastPreTileConfig::InitializeTileConfigStackSpace() {
163 MachineBasicBlock
&MBB
= MF
->front();
164 MachineInstr
*MI
= &*MBB
.getFirstNonPHI();
166 if (ST
->hasAVX512()) {
167 Register Zmm
= MRI
->createVirtualRegister(&X86::VR512RegClass
);
168 BuildMI(MBB
, MI
, DL
, TII
->get(X86::AVX512_512_SET0
), Zmm
);
169 addFrameReference(BuildMI(MBB
, MI
, DL
, TII
->get(X86::VMOVUPSZmr
)), CfgSS
)
171 } else if (ST
->hasAVX2()) {
172 Register Ymm
= MRI
->createVirtualRegister(&X86::VR256RegClass
);
173 BuildMI(MBB
, MI
, DL
, TII
->get(X86::AVX_SET0
), Ymm
);
174 addFrameReference(BuildMI(MBB
, MI
, DL
, TII
->get(X86::VMOVUPSYmr
)), CfgSS
)
176 addFrameReference(BuildMI(MBB
, MI
, DL
, TII
->get(X86::VMOVUPSYmr
)), CfgSS
,
180 assert(ST
->hasSSE2() && "AMX should assume SSE2 enabled");
181 unsigned StoreOpc
= ST
->hasAVX() ? X86::VMOVUPSmr
: X86::MOVUPSmr
;
182 Register Xmm
= MRI
->createVirtualRegister(&X86::VR128RegClass
);
183 BuildMI(MBB
, MI
, DL
, TII
->get(X86::V_SET0
), Xmm
);
184 addFrameReference(BuildMI(MBB
, MI
, DL
, TII
->get(StoreOpc
)), CfgSS
)
186 addFrameReference(BuildMI(MBB
, MI
, DL
, TII
->get(StoreOpc
)), CfgSS
, 16)
188 addFrameReference(BuildMI(MBB
, MI
, DL
, TII
->get(StoreOpc
)), CfgSS
, 32)
190 addFrameReference(BuildMI(MBB
, MI
, DL
, TII
->get(StoreOpc
)), CfgSS
, 48)
193 // Fill in the palette first.
194 addFrameReference(BuildMI(MBB
, MI
, DL
, TII
->get(X86::MOV8mi
)), CfgSS
)
198 /// Insert spill instruction for \p AssignedReg before \p Before.
199 /// TODO: Update DBG_VALUEs with \p VirtReg operands with the stack slot.
200 void X86FastPreTileConfig::spill(MachineBasicBlock::iterator Before
,
201 Register VirtReg
, bool Kill
) {
202 LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg
, TRI
) << " \n");
203 int FI
= getStackSpaceFor(VirtReg
);
204 LLVM_DEBUG(dbgs() << " to stack slot #" << FI
<< '\n');
206 const TargetRegisterClass
&RC
= *MRI
->getRegClass(VirtReg
);
207 // Don't need shape information for tile store, becasue it is adjacent to
208 // the tile def instruction.
209 TII
->storeRegToStackSlot(*MBB
, Before
, VirtReg
, Kill
, FI
, &RC
, TRI
,
213 // TODO: update DBG_VALUEs
216 /// Insert reload instruction for \p PhysReg before \p Before.
217 void X86FastPreTileConfig::reload(MachineBasicBlock::iterator UseMI
,
218 Register OrigReg
, MachineOperand
*RowMO
,
219 MachineOperand
*ColMO
) {
220 int FI
= getStackSpaceFor(OrigReg
);
221 const TargetRegisterClass
&RC
= *MRI
->getRegClass(OrigReg
);
223 // Fold copy to tileload
232 TileReg
= UseMI
->getOperand(0).getReg();
234 TileReg
= MRI
->createVirtualRegister(&RC
);
235 // Can't use TII->loadRegFromStackSlot(), because we need the shape
236 // information for reload.
237 // tileloadd (%sp, %idx), %tmm
238 unsigned Opc
= X86::PTILELOADDV
;
239 Register StrideReg
= MRI
->createVirtualRegister(&X86::GR64_NOSPRegClass
);
240 // FIXME: MBB is not the parent of UseMI.
241 MachineInstr
*NewMI
= BuildMI(*UseMI
->getParent(), UseMI
, DebugLoc(),
242 TII
->get(X86::MOV64ri
), StrideReg
)
244 NewMI
= addFrameReference(
245 BuildMI(*UseMI
->getParent(), UseMI
, DebugLoc(), TII
->get(Opc
), TileReg
)
246 .addReg(RowMO
->getReg())
247 .addReg(ColMO
->getReg()),
249 MachineOperand
&MO
= NewMI
->getOperand(5);
250 MO
.setReg(StrideReg
);
252 RowMO
->setIsKill(false);
253 ColMO
->setIsKill(false);
254 // Erase copy instruction after it is folded.
255 if (UseMI
->isCopy()) {
256 UseMI
->eraseFromParent();
258 // Replace the register in the user MI.
259 for (auto &MO
: UseMI
->operands()) {
260 if (MO
.isReg() && MO
.getReg() == OrigReg
)
266 LLVM_DEBUG(dbgs() << "Reloading " << printReg(OrigReg
, TRI
) << " into "
267 << printReg(TileReg
, TRI
) << '\n');
270 static unsigned getTileDefNum(MachineRegisterInfo
*MRI
, Register Reg
) {
271 if (Reg
.isVirtual()) {
272 unsigned RegClassID
= MRI
->getRegClass(Reg
)->getID();
273 if (RegClassID
== X86::TILERegClassID
)
275 if (RegClassID
== X86::TILEPAIRRegClassID
)
278 if (Reg
>= X86::TMM0
&& Reg
<= X86::TMM7
)
280 if (Reg
>= X86::TMM0_TMM1
&& Reg
<= X86::TMM6_TMM7
)
286 static bool isTileRegister(MachineRegisterInfo
*MRI
, Register VirtReg
) {
287 return getTileDefNum(MRI
, VirtReg
) > 0;
290 static bool isTileDef(MachineRegisterInfo
*MRI
, MachineInstr
&MI
) {
291 // The instruction must have 3 operands: tile def, row, col.
292 if (MI
.isDebugInstr() || MI
.getNumOperands() < 3 || !MI
.isPseudo())
294 MachineOperand
&MO
= MI
.getOperand(0);
299 return getTileDefNum(MRI
, MO
.getReg()) > 0;
302 static ShapeT
getShape(MachineRegisterInfo
*MRI
, Register TileReg
) {
303 MachineInstr
*MI
= MRI
->getVRegDef(TileReg
);
304 if (isTileDef(MRI
, *MI
)) {
305 MachineOperand
*RowMO
= &MI
->getOperand(1);
306 MachineOperand
*ColMO
= &MI
->getOperand(2);
307 return ShapeT(RowMO
, ColMO
, MRI
);
308 } else if (MI
->isCopy()) {
309 TileReg
= MI
->getOperand(1).getReg();
310 return getShape(MRI
, TileReg
);
313 // The def should not be PHI node, because we walk the MBB in reverse post
315 assert(MI
->isPHI() && "Unexpected PHI when get shape.");
316 llvm_unreachable("Unexpected MI when get shape.");
325 // t = phi [t0, bb0] [t1, bb1]
327 // row = phi [r0, bb0] [r1, bb1]
328 // col = phi [c0, bb0] [c1, bb1]
329 // s = phi [s0, bb0] [s1, bb1]
330 // t = tileload row, col, s
331 // The new instruction is inserted at the end of the phi node. The order
332 // of the original phi node is not ensured.
333 void X86FastPreTileConfig::convertPHI(MachineBasicBlock
*MBB
,
335 // 1. Create instruction to get stack slot address of each incoming block.
336 // 2. Create PHI node for the stack address.
337 // 3. Create PHI node for shape. If one of the incoming shape is immediate
338 // use the immediate and delete the PHI node.
339 // 4. Create tileload instruction from the stack address.
340 Register StackAddrReg
= MRI
->createVirtualRegister(&X86::GR64_NOSPRegClass
);
341 MachineInstrBuilder AddrPHI
= BuildMI(*MBB
, ++PHI
.getIterator(), DebugLoc(),
342 TII
->get(X86::PHI
), StackAddrReg
);
343 Register RowReg
= MRI
->createVirtualRegister(&X86::GR16RegClass
);
344 MachineInstrBuilder RowPHI
= BuildMI(*MBB
, ++PHI
.getIterator(), DebugLoc(),
345 TII
->get(X86::PHI
), RowReg
);
346 Register ColReg
= MRI
->createVirtualRegister(&X86::GR16RegClass
);
347 MachineInstrBuilder ColPHI
= BuildMI(*MBB
, ++PHI
.getIterator(), DebugLoc(),
348 TII
->get(X86::PHI
), ColReg
);
349 // Record the mapping of phi node and its row/column information.
350 VisitedPHIs
[&PHI
] = {RowReg
, ColReg
, StackAddrReg
};
352 for (unsigned I
= 1, E
= PHI
.getNumOperands(); I
!= E
; I
+= 2) {
353 // Get the 2 incoming value of tile register and MBB.
354 Register InTileReg
= PHI
.getOperand(I
).getReg();
355 // Mark it as liveout, so that it will be spilled when visit
356 // the incoming MBB. Otherwise since phi will be deleted, it
357 // would miss spill when visit incoming MBB.
358 MayLiveAcrossBlocks
.set(Register::virtReg2Index(InTileReg
));
359 MachineBasicBlock
*InMBB
= PHI
.getOperand(I
+ 1).getMBB();
361 MachineInstr
*TileDefMI
= MRI
->getVRegDef(InTileReg
);
362 MachineBasicBlock::iterator InsertPos
;
363 if (TileDefMI
->isPHI()) {
364 InsertPos
= TileDefMI
->getParent()->getFirstNonPHI();
365 if (VisitedPHIs
.count(TileDefMI
)) { // circular phi reference
368 // def t2 t3 = phi(t1, t4) <--
370 // t4 = phi(t2, t3)-------------
372 // For each (row, column and stack address) append phi incoming value.
373 // Create r3 = phi(r1, r4)
374 // Create r4 = phi(r2, r3)
375 Register InRowReg
= VisitedPHIs
[TileDefMI
].Row
;
376 Register InColReg
= VisitedPHIs
[TileDefMI
].Col
;
377 Register InStackAddrReg
= VisitedPHIs
[TileDefMI
].StackAddr
;
378 RowPHI
.addReg(InRowReg
).addMBB(InMBB
);
379 ColPHI
.addReg(InColReg
).addMBB(InMBB
);
380 AddrPHI
.addReg(InStackAddrReg
).addMBB(InMBB
);
383 // Recursively convert PHI to tileload
384 convertPHI(TileDefMI
->getParent(), *TileDefMI
);
385 // The PHI node is coverted to tileload instruction. Get the stack
386 // address from tileload operands.
387 MachineInstr
*TileLoad
= MRI
->getVRegDef(InTileReg
);
388 assert(TileLoad
&& TileLoad
->getOpcode() == X86::PTILELOADDV
);
389 Register InRowReg
= TileLoad
->getOperand(1).getReg();
390 Register InColReg
= TileLoad
->getOperand(2).getReg();
391 Register InStackAddrReg
= TileLoad
->getOperand(3).getReg();
392 RowPHI
.addReg(InRowReg
).addMBB(InMBB
);
393 ColPHI
.addReg(InColReg
).addMBB(InMBB
);
394 AddrPHI
.addReg(InStackAddrReg
).addMBB(InMBB
);
397 InsertPos
= TileDefMI
->getIterator();
399 // Fill the incoming operand of row/column phi instruction.
400 ShapeT Shape
= getShape(MRI
, InTileReg
);
401 Shape
.getRow()->setIsKill(false);
402 Shape
.getCol()->setIsKill(false);
403 RowPHI
.addReg(Shape
.getRow()->getReg()).addMBB(InMBB
);
404 ColPHI
.addReg(Shape
.getCol()->getReg()).addMBB(InMBB
);
406 // The incoming tile register live out of its def BB, it would be spilled.
407 // Create MI to get the spill stack slot address for the tile register
408 int FI
= getStackSpaceFor(InTileReg
);
409 Register InStackAddrReg
=
410 MRI
->createVirtualRegister(&X86::GR64_NOSPRegClass
);
411 addOffset(BuildMI(*TileDefMI
->getParent(), InsertPos
, DebugLoc(),
412 TII
->get(X86::LEA64r
), InStackAddrReg
)
415 AddrPHI
.addReg(InStackAddrReg
).addMBB(InMBB
);
419 MachineBasicBlock::iterator InsertPos
= MBB
->getFirstNonPHI();
420 Register StrideReg
= MRI
->createVirtualRegister(&X86::GR64_NOSPRegClass
);
421 BuildMI(*MBB
, InsertPos
, DebugLoc(), TII
->get(X86::MOV64ri
), StrideReg
)
423 Register TileReg
= PHI
.getOperand(0).getReg();
424 MachineInstr
*NewMI
= addDirectMem(
425 BuildMI(*MBB
, InsertPos
, DebugLoc(), TII
->get(X86::PTILELOADDV
), TileReg
)
429 MachineOperand
&MO
= NewMI
->getOperand(5);
430 MO
.setReg(StrideReg
);
432 PHI
.eraseFromParent();
433 VisitedPHIs
.erase(&PHI
);
436 static bool isTileRegDef(MachineRegisterInfo
*MRI
, MachineInstr
&MI
) {
437 MachineOperand
&MO
= MI
.getOperand(0);
438 if (MO
.isReg() && MO
.getReg().isVirtual() && isTileRegister(MRI
, MO
.getReg()))
443 void X86FastPreTileConfig::canonicalizePHIs(MachineBasicBlock
&MBB
) {
444 SmallVector
<MachineInstr
*, 8> PHIs
;
446 for (MachineInstr
&MI
: MBB
) {
449 if (!isTileRegDef(MRI
, MI
))
453 // Canonicalize the phi node first. One tile phi may depeneds previous
454 // phi node. For below case, we need convert %t4.
457 // %t3 = phi (t1 BB1, t2 BB0)
458 // %t4 = phi (t5 BB1, t3 BB0)
460 // %t3 = phi (t1 BB1, t2 BB0)
461 // %t4 = phi (t5 BB1, t2 BB0)
463 while (!PHIs
.empty()) {
464 MachineInstr
*PHI
= PHIs
.pop_back_val();
466 // Find the operand that is incoming from the same MBB and the def
468 MachineOperand
*InMO
= nullptr;
469 MachineInstr
*DefMI
= nullptr;
470 for (unsigned I
= 1, E
= PHI
->getNumOperands(); I
!= E
; I
+= 2) {
471 Register InTileReg
= PHI
->getOperand(I
).getReg();
472 MachineBasicBlock
*InMBB
= PHI
->getOperand(I
+ 1).getMBB();
473 DefMI
= MRI
->getVRegDef(InTileReg
);
474 if (InMBB
!= &MBB
|| !DefMI
->isPHI())
477 InMO
= &PHI
->getOperand(I
);
480 // If can't find such operand, do nothing.
484 // Current phi node depends on previous phi node. Break the
487 for (unsigned I
= 1, E
= DefMI
->getNumOperands(); I
!= E
; I
+= 2) {
488 MachineBasicBlock
*InMBB
= PHI
->getOperand(I
+ 1).getMBB();
491 DefTileReg
= DefMI
->getOperand(I
).getReg();
492 InMO
->setReg(DefTileReg
);
498 void X86FastPreTileConfig::convertPHIs(MachineBasicBlock
&MBB
) {
499 SmallVector
<MachineInstr
*, 8> PHIs
;
500 for (MachineInstr
&MI
: MBB
) {
503 if (!isTileRegDef(MRI
, MI
))
507 while (!PHIs
.empty()) {
508 MachineInstr
*MI
= PHIs
.pop_back_val();
510 convertPHI(&MBB
, *MI
);
514 // PreTileConfig should configure the tile registers based on basic
516 bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock
&MBB
) {
519 MachineInstr
*LastShapeMI
= nullptr;
520 MachineInstr
*LastTileCfg
= nullptr;
521 bool HasUnconfigTile
= false;
523 auto Config
= [&](MachineInstr
&Before
) {
525 CfgSS
= MFI
->CreateStackObject(ST
->getTileConfigSize(),
526 ST
->getTileConfigAlignment(), false);
527 LastTileCfg
= addFrameReference(
528 BuildMI(MBB
, Before
, DebugLoc(), TII
->get(X86::PLDTILECFGV
)), CfgSS
);
529 LastShapeMI
= nullptr;
532 auto HasTileOperand
= [](MachineRegisterInfo
*MRI
, MachineInstr
&MI
) {
533 for (const MachineOperand
&MO
: MI
.operands()) {
536 Register Reg
= MO
.getReg();
537 if (Reg
.isVirtual() && isTileRegister(MRI
, Reg
))
542 for (MachineInstr
&MI
: reverse(MBB
)) {
543 // We have transformed phi node before configuring BB.
546 // Don't collect the shape of used tile, the tile should be defined
547 // before the tile use. Spill and reload would happen if there is only
548 // tile use after ldtilecfg, so the shape can be collected from reload.
549 // Take below code for example. %t would be reloaded before tilestore
552 // tilestore %r, %c, %t
556 // %t = tileload %r, %c
557 // tilestore %r, %c, %t
558 if (HasTileOperand(MRI
, MI
))
559 HasUnconfigTile
= true;
560 // According to AMX ABI, all the tile registers including config register
561 // are volatile. Caller need to save/restore config register.
562 if (MI
.isCall() && HasUnconfigTile
) {
563 MachineBasicBlock::iterator I
;
564 if (LastShapeMI
&& dominates(MBB
, MI
, LastShapeMI
))
565 I
= ++LastShapeMI
->getIterator();
567 I
= ++MI
.getIterator();
569 HasUnconfigTile
= false;
572 if (!isTileDef(MRI
, MI
))
575 //---------------------------------------------------------------------
576 // Don't handle COPY instruction. If the src and dst of the COPY can be
577 // in the same config in below case, we just check the shape of t0.
581 // t0 = tielzero(row0, col0)
584 // If the src and dst of the COPY can NOT be in the same config in below
585 // case. Reload would be generated befor the copy instruction.
588 // t0 = tielzero(row0, col0)
594 // t1 = tilezero(row1, col1)
597 //---------------------------------------------------------------------
599 // If MI dominate the last shape def instruction, we need insert
600 // ldtilecfg after LastShapeMI now. The config doesn't include
604 // tilezero(row0, col0) <- MI
607 // ldtilecfg <- insert
608 // tilezero(row1, col1)
609 if (LastShapeMI
&& dominates(MBB
, MI
, LastShapeMI
))
610 Config(*(++LastShapeMI
->getIterator()));
611 MachineOperand
*RowMO
= &MI
.getOperand(1);
612 MachineOperand
*ColMO
= &MI
.getOperand(2);
613 MachineInstr
*RowMI
= MRI
->getVRegDef(RowMO
->getReg());
614 MachineInstr
*ColMI
= MRI
->getVRegDef(ColMO
->getReg());
615 // If the shape is defined in current MBB, check the domination.
616 // FIXME how about loop?
617 if (RowMI
->getParent() == &MBB
) {
620 else if (dominates(MBB
, LastShapeMI
, RowMI
))
623 if (ColMI
->getParent() == &MBB
) {
626 else if (dominates(MBB
, LastShapeMI
, ColMI
))
629 unsigned TileDefNum
= getTileDefNum(MRI
, MI
.getOperand(0).getReg());
630 if (TileDefNum
> 1) {
631 for (unsigned I
= 1; I
< TileDefNum
; I
++) {
632 MachineOperand
*ColxMO
= &MI
.getOperand(2 + I
);
633 MachineInstr
*ColxMI
= MRI
->getVRegDef(ColxMO
->getReg());
634 if (ColxMI
->getParent() == &MBB
) {
636 LastShapeMI
= ColxMI
;
637 else if (dominates(MBB
, LastShapeMI
, ColxMI
))
638 LastShapeMI
= ColxMI
;
642 // If there is user live out of the tilecfg, spill it and reload in
644 Register TileReg
= MI
.getOperand(0).getReg();
645 if (mayLiveOut(TileReg
, LastTileCfg
))
646 spill(++MI
.getIterator(), TileReg
, false);
647 for (MachineInstr
&UseMI
: MRI
->use_instructions(TileReg
)) {
648 if (UseMI
.getParent() == &MBB
) {
649 // check user should not across ldtilecfg
650 if (!LastTileCfg
|| !dominates(MBB
, LastTileCfg
, UseMI
))
652 // reload befor UseMI
653 reload(UseMI
.getIterator(), TileReg
, RowMO
, ColMO
);
655 // Don't reload for phi instruction, we handle phi reload separately.
656 // TODO: merge the reload for the same user MBB.
658 reload(UseMI
.getIterator(), TileReg
, RowMO
, ColMO
);
663 // Configure tile registers at the head of the MBB
664 if (HasUnconfigTile
) {
665 MachineInstr
*Before
;
666 if (LastShapeMI
== nullptr || LastShapeMI
->isPHI())
667 Before
= &*MBB
.getFirstNonPHI();
669 Before
= &*(++LastShapeMI
->getIterator());
677 bool X86FastPreTileConfig::runOnMachineFunction(MachineFunction
&MFunc
) {
678 X86FI
= MFunc
.getInfo
<X86MachineFunctionInfo
>();
679 // Early exit in the common case of non-AMX code.
680 if (X86FI
->getAMXProgModel() != AMXProgModelEnum::ManagedRA
)
684 MRI
= &MFunc
.getRegInfo();
685 ST
= &MFunc
.getSubtarget
<X86Subtarget
>();
686 TII
= ST
->getInstrInfo();
687 MFI
= &MFunc
.getFrameInfo();
688 TRI
= ST
->getRegisterInfo();
691 unsigned NumVirtRegs
= MRI
->getNumVirtRegs();
693 StackSlotForVirtReg
.resize(NumVirtRegs
);
694 MayLiveAcrossBlocks
.clear();
695 // We will create register during config. *3 is to make sure
696 // the virtual register number doesn't exceed the size of
698 MayLiveAcrossBlocks
.resize(NumVirtRegs
* 3);
700 assert(MRI
->isSSA());
702 // Canonicalize the phi node first.
703 for (MachineBasicBlock
&MBB
: MFunc
)
704 canonicalizePHIs(MBB
);
706 // Loop over all of the basic blocks in reverse post order and insert
707 // ldtilecfg for tile registers. The reserse post order is to facilitate
709 ReversePostOrderTraversal
<MachineFunction
*> RPOT(MF
);
710 for (MachineBasicBlock
*MBB
: RPOT
) {
712 Change
|= configBasicBlock(*MBB
);
716 InitializeTileConfigStackSpace();
718 StackSlotForVirtReg
.clear();
722 FunctionPass
*llvm::createX86FastPreTileConfigPass() {
723 return new X86FastPreTileConfig();