1 //===-- ModuloScheduling.cpp - ModuloScheduling ----------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the LLVM research group and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This ModuloScheduling pass is based on the Swing Modulo Scheduling
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "ModuloSched"
17 #include "ModuloScheduling.h"
18 #include "llvm/Constants.h"
19 #include "llvm/Instructions.h"
20 #include "llvm/Function.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/Passes.h"
23 #include "llvm/Support/CFG.h"
24 #include "llvm/Target/TargetSchedInfo.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/GraphWriter.h"
27 #include "llvm/ADT/SCCIterator.h"
28 #include "llvm/ADT/StringExtras.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/Support/Timer.h"
37 #include "../MachineCodeForInstruction.h"
38 #include "../SparcV9TmpInstr.h"
39 #include "../SparcV9Internals.h"
40 #include "../SparcV9RegisterInfo.h"
43 /// Create ModuloSchedulingPass
45 FunctionPass
*llvm::createModuloSchedulingPass(TargetMachine
& targ
) {
46 DEBUG(std::cerr
<< "Created ModuloSchedulingPass\n");
47 return new ModuloSchedulingPass(targ
);
51 //Graph Traits for printing out the dependence graph
52 template<typename GraphType
>
53 static void WriteGraphToFile(std::ostream
&O
, const std::string
&GraphName
,
54 const GraphType
>
) {
55 std::string Filename
= GraphName
+ ".dot";
56 O
<< "Writing '" << Filename
<< "'...";
57 std::ofstream
F(Filename
.c_str());
62 O
<< " error opening file for writing!";
68 #define TIME_REGION(VARNAME, DESC) \
69 NamedRegionTimer VARNAME(DESC)
71 #define TIME_REGION(VARNAME, DESC)
75 //Graph Traits for printing out the dependence graph
79 Statistic
<> ValidLoops("modulosched-validLoops", "Number of candidate loops modulo-scheduled");
80 Statistic
<> JumboBB("modulosched-jumboBB", "Basic Blocks with more then 100 instructions");
81 Statistic
<> LoopsWithCalls("modulosched-loopCalls", "Loops with calls");
82 Statistic
<> LoopsWithCondMov("modulosched-loopCondMov", "Loops with conditional moves");
83 Statistic
<> InvalidLoops("modulosched-invalidLoops", "Loops with unknown trip counts or loop invariant trip counts");
84 Statistic
<> SingleBBLoops("modulosched-singeBBLoops", "Number of single basic block loops");
86 //Scheduling Statistics
87 Statistic
<> MSLoops("modulosched-schedLoops", "Number of loops successfully modulo-scheduled");
88 Statistic
<> NoSched("modulosched-noSched", "No schedule");
89 Statistic
<> SameStage("modulosched-sameStage", "Max stage is 0");
90 Statistic
<> ResourceConstraint("modulosched-resourceConstraint", "Loops constrained by resources");
91 Statistic
<> RecurrenceConstraint("modulosched-recurrenceConstraint", "Loops constrained by recurrences");
92 Statistic
<> FinalIISum("modulosched-finalIISum", "Sum of all final II");
93 Statistic
<> IISum("modulosched-IISum", "Sum of all theoretical II");
96 struct DOTGraphTraits
<MSchedGraph
*> : public DefaultDOTGraphTraits
{
97 static std::string
getGraphName(MSchedGraph
*F
) {
98 return "Dependence Graph";
101 static std::string
getNodeLabel(MSchedGraphNode
*Node
, MSchedGraph
*Graph
) {
102 if (Node
->getInst()) {
103 std::stringstream ss
;
104 ss
<< *(Node
->getInst());
105 return ss
.str(); //((MachineInstr*)Node->getInst());
110 static std::string
getEdgeSourceLabel(MSchedGraphNode
*Node
,
111 MSchedGraphNode::succ_iterator I
) {
112 //Label each edge with the type of dependence
113 std::string edgelabel
= "";
114 switch (I
.getEdge().getDepOrderType()) {
116 case MSchedGraphEdge::TrueDep
:
120 case MSchedGraphEdge::AntiDep
:
124 case MSchedGraphEdge::OutputDep
:
125 edgelabel
= "Output";
129 edgelabel
= "Unknown";
134 int iteDiff
= I
.getEdge().getIteDiff();
135 std::string intStr
= "(IteDiff: ";
136 intStr
+= itostr(iteDiff
);
149 /// ModuloScheduling::runOnFunction - main transformation entry point
150 /// The Swing Modulo Schedule algorithm has three basic steps:
151 /// 1) Computation and Analysis of the dependence graph
152 /// 2) Ordering of the nodes
155 bool ModuloSchedulingPass::runOnFunction(Function
&F
) {
158 bool Changed
= false;
161 DEBUG(std::cerr
<< "Creating ModuloSchedGraph for each valid BasicBlock in " + F
.getName() + "\n");
163 //Get MachineFunction
164 MachineFunction
&MF
= MachineFunction::get(&F
);
166 DependenceAnalyzer
&DA
= getAnalysis
<DependenceAnalyzer
>();
170 std::vector
<MachineBasicBlock
*> Worklist
;
172 //Iterate over BasicBlocks and put them into our worklist if they are valid
173 for (MachineFunction::iterator BI
= MF
.begin(); BI
!= MF
.end(); ++BI
)
174 if(MachineBBisValid(BI
)) {
175 if(BI
->size() < 100) {
176 Worklist
.push_back(&*BI
);
181 std::cerr
<< "BB Size: " << BI
->size() << "\n";
186 DEBUG(if(Worklist
.size() == 0) std::cerr
<< "No single basic block loops in function to ModuloSchedule\n");
188 //Iterate over the worklist and perform scheduling
189 for(std::vector
<MachineBasicBlock
*>::iterator BI
= Worklist
.begin(),
190 BE
= Worklist
.end(); BI
!= BE
; ++BI
) {
192 //Print out BB for debugging
193 DEBUG(std::cerr
<< "BB Size: " << (*BI
)->size() << "\n");
194 DEBUG(std::cerr
<< "ModuloScheduling BB: \n"; (*BI
)->print(std::cerr
));
197 DEBUG(std::cerr
<< "ModuloScheduling LLVMBB: \n"; (*BI
)->getBasicBlock()->print(std::cerr
));
199 //Catch the odd case where we only have TmpInstructions and no real Value*s
200 if(!CreateDefMap(*BI
)) {
201 //Clear out our maps for the next basic block that is processed
202 nodeToAttributesMap
.clear();
203 partialOrder
.clear();
204 recurrenceList
.clear();
205 FinalNodeOrder
.clear();
211 MSchedGraph
*MSG
= new MSchedGraph(*BI
, target
, indVarInstrs
[*BI
], DA
, machineTollvm
[*BI
]);
213 //Write Graph out to file
214 DEBUG(WriteGraphToFile(std::cerr
, F
.getName(), MSG
));
215 DEBUG(MSG
->print(std::cerr
));
217 //Calculate Resource II
218 int ResMII
= calculateResMII(*BI
);
220 //Calculate Recurrence II
221 int RecMII
= calculateRecMII(MSG
, ResMII
);
223 DEBUG(std::cerr
<< "Number of reccurrences found: " << recurrenceList
.size() << "\n");
225 //Our starting initiation interval is the maximum of RecMII and ResMII
227 ++RecurrenceConstraint
;
229 ++ResourceConstraint
;
231 II
= std::max(RecMII
, ResMII
);
235 //Print out II, RecMII, and ResMII
236 DEBUG(std::cerr
<< "II starts out as " << II
<< " ( RecMII=" << RecMII
<< " and ResMII=" << ResMII
<< ")\n");
238 //Dump node properties if in debug mode
239 DEBUG(for(std::map
<MSchedGraphNode
*, MSNodeAttributes
>::iterator I
= nodeToAttributesMap
.begin(),
240 E
= nodeToAttributesMap
.end(); I
!=E
; ++I
) {
241 std::cerr
<< "Node: " << *(I
->first
) << " ASAP: " << I
->second
.ASAP
<< " ALAP: "
242 << I
->second
.ALAP
<< " MOB: " << I
->second
.MOB
<< " Depth: " << I
->second
.depth
243 << " Height: " << I
->second
.height
<< "\n";
246 //Calculate Node Properties
247 calculateNodeAttributes(MSG
, ResMII
);
249 //Dump node properties if in debug mode
250 DEBUG(for(std::map
<MSchedGraphNode
*, MSNodeAttributes
>::iterator I
= nodeToAttributesMap
.begin(),
251 E
= nodeToAttributesMap
.end(); I
!=E
; ++I
) {
252 std::cerr
<< "Node: " << *(I
->first
) << " ASAP: " << I
->second
.ASAP
<< " ALAP: "
253 << I
->second
.ALAP
<< " MOB: " << I
->second
.MOB
<< " Depth: " << I
->second
.depth
254 << " Height: " << I
->second
.height
<< "\n";
257 //Put nodes in order to schedule them
258 computePartialOrder();
260 //Dump out partial order
261 DEBUG(for(std::vector
<std::set
<MSchedGraphNode
*> >::iterator I
= partialOrder
.begin(),
262 E
= partialOrder
.end(); I
!=E
; ++I
) {
263 std::cerr
<< "Start set in PO\n";
264 for(std::set
<MSchedGraphNode
*>::iterator J
= I
->begin(), JE
= I
->end(); J
!= JE
; ++J
)
265 std::cerr
<< "PO:" << **J
<< "\n";
268 //Place nodes in final order
271 //Dump out order of nodes
272 DEBUG(for(std::vector
<MSchedGraphNode
*>::iterator I
= FinalNodeOrder
.begin(), E
= FinalNodeOrder
.end(); I
!= E
; ++I
) {
273 std::cerr
<< "FO:" << **I
<< "\n";
276 //Finally schedule nodes
277 bool haveSched
= computeSchedule(*BI
, MSG
);
279 //Print out final schedule
280 DEBUG(schedule
.print(std::cerr
));
282 //Final scheduling step is to reconstruct the loop only if we actual have
285 reconstructLoop(*BI
);
289 if(schedule
.getMaxStage() == 0)
295 //Clear out our maps for the next basic block that is processed
296 nodeToAttributesMap
.clear();
297 partialOrder
.clear();
298 recurrenceList
.clear();
299 FinalNodeOrder
.clear();
302 //Clean up. Nuke old MachineBB and llvmBB
303 //BasicBlock *llvmBB = (BasicBlock*) (*BI)->getBasicBlock();
304 //Function *parent = (Function*) llvmBB->getParent();
305 //Should't std::find work??
306 //parent->getBasicBlockList().erase(std::find(parent->getBasicBlockList().begin(), parent->getBasicBlockList().end(), *llvmBB));
307 //parent->getBasicBlockList().erase(llvmBB);
317 bool ModuloSchedulingPass::CreateDefMap(MachineBasicBlock
*BI
) {
320 for(MachineBasicBlock::iterator I
= BI
->begin(), E
= BI
->end(); I
!= E
; ++I
) {
321 for(unsigned opNum
= 0; opNum
< I
->getNumOperands(); ++opNum
) {
322 const MachineOperand
&mOp
= I
->getOperand(opNum
);
323 if(mOp
.getType() == MachineOperand::MO_VirtualRegister
&& mOp
.isDef()) {
324 //assert if this is the second def we have seen
325 //DEBUG(std::cerr << "Putting " << *(mOp.getVRegValue()) << " into map\n");
326 assert(!defMap
.count(mOp
.getVRegValue()) && "Def already in the map");
328 defMap
[mOp
.getVRegValue()] = &*I
;
331 //See if we can use this Value* as our defaultInst
332 if(!defaultInst
&& mOp
.getType() == MachineOperand::MO_VirtualRegister
) {
333 Value
*V
= mOp
.getVRegValue();
334 if(!isa
<TmpInstruction
>(V
) && !isa
<Argument
>(V
) && !isa
<Constant
>(V
) && !isa
<PHINode
>(V
))
335 defaultInst
= (Instruction
*) V
;
346 /// This function checks if a Machine Basic Block is valid for modulo
347 /// scheduling. This means that it has no control flow (if/else or
348 /// calls) in the block. Currently ModuloScheduling only works on
349 /// single basic block loops.
350 bool ModuloSchedulingPass::MachineBBisValid(const MachineBasicBlock
*BI
) {
354 //Check first if its a valid loop
355 for(succ_const_iterator I
= succ_begin(BI
->getBasicBlock()),
356 E
= succ_end(BI
->getBasicBlock()); I
!= E
; ++I
) {
357 if (*I
== BI
->getBasicBlock()) // has single block loop
364 //Check that we have a conditional branch (avoiding MS infinite loops)
365 if(BranchInst
*b
= dyn_cast
<BranchInst
>(((BasicBlock
*) BI
->getBasicBlock())->getTerminator()))
366 if(b
->isUnconditional())
369 //Check size of our basic block.. make sure we have more then just the terminator in it
370 if(BI
->getBasicBlock()->size() == 1)
373 //Increase number of single basic block loops for stats
376 //Get Target machine instruction info
377 const TargetInstrInfo
*TMI
= target
.getInstrInfo();
379 //Check each instruction and look for calls, keep map to get index later
380 std::map
<const MachineInstr
*, unsigned> indexMap
;
383 for(MachineBasicBlock::const_iterator I
= BI
->begin(), E
= BI
->end(); I
!= E
; ++I
) {
384 //Get opcode to check instruction type
385 MachineOpCode OC
= I
->getOpcode();
388 if(TMI
->isCall(OC
)) {
393 //Look for conditional move
394 if(OC
== V9::MOVRZr
|| OC
== V9::MOVRZi
|| OC
== V9::MOVRLEZr
|| OC
== V9::MOVRLEZi
395 || OC
== V9::MOVRLZr
|| OC
== V9::MOVRLZi
|| OC
== V9::MOVRNZr
|| OC
== V9::MOVRNZi
396 || OC
== V9::MOVRGZr
|| OC
== V9::MOVRGZi
|| OC
== V9::MOVRGEZr
397 || OC
== V9::MOVRGEZi
|| OC
== V9::MOVLEr
|| OC
== V9::MOVLEi
|| OC
== V9::MOVLEUr
398 || OC
== V9::MOVLEUi
|| OC
== V9::MOVFLEr
|| OC
== V9::MOVFLEi
399 || OC
== V9::MOVNEr
|| OC
== V9::MOVNEi
|| OC
== V9::MOVNEGr
|| OC
== V9::MOVNEGi
400 || OC
== V9::MOVFNEr
|| OC
== V9::MOVFNEi
) {
413 //Apply a simple pattern match to make sure this loop can be modulo scheduled
414 //This means only loops with a branch associated to the iteration count
417 BranchInst
*b
= dyn_cast
<BranchInst
>(((BasicBlock
*) BI
->getBasicBlock())->getTerminator());
419 //Get the condition for the branch (we already checked if it was conditional)
420 Value
*cond
= b
->getCondition();
422 DEBUG(std::cerr
<< "Condition: " << *cond
<< "\n");
424 //List of instructions associated with induction variable
425 std::set
<Instruction
*> indVar
;
426 std::vector
<Instruction
*> stack
;
428 BasicBlock
*BB
= (BasicBlock
*) BI
->getBasicBlock();
433 if(Instruction
*I
= dyn_cast
<Instruction
>(cond
))
434 if(I
->getParent() == BB
) {
435 if (!assocIndVar(I
, indVar
, stack
, BB
)) {
448 //The indVar set must be >= 3 instructions for this loop to match (FIX ME!)
449 if(indVar
.size() < 3 )
452 //Dump out instructions associate with indvar for debug reasons
453 DEBUG(for(std::set
<Instruction
*>::iterator N
= indVar
.begin(), NE
= indVar
.end(); N
!= NE
; ++N
) {
454 std::cerr
<< **N
<< "\n";
457 //Create map of machine instr to llvm instr
458 std::map
<MachineInstr
*, Instruction
*> mllvm
;
459 for(BasicBlock::iterator I
= BB
->begin(), E
= BB
->end(); I
!= E
; ++I
) {
460 MachineCodeForInstruction
& tempMvec
= MachineCodeForInstruction::get(I
);
461 for (unsigned j
= 0; j
< tempMvec
.size(); j
++) {
462 mllvm
[tempMvec
[j
]] = I
;
466 //Convert list of LLVM Instructions to list of Machine instructions
467 std::map
<const MachineInstr
*, unsigned> mIndVar
;
468 for(std::set
<Instruction
*>::iterator N
= indVar
.begin(), NE
= indVar
.end(); N
!= NE
; ++N
) {
470 //If we have a load, we can't handle this loop because there is no way to preserve dependences
471 //between loads and stores
472 if(isa
<LoadInst
>(*N
))
475 MachineCodeForInstruction
& tempMvec
= MachineCodeForInstruction::get(*N
);
476 for (unsigned j
= 0; j
< tempMvec
.size(); j
++) {
477 MachineOpCode OC
= (tempMvec
[j
])->getOpcode();
480 if(!indexMap
.count(tempMvec
[j
]))
482 mIndVar
[(MachineInstr
*) tempMvec
[j
]] = indexMap
[(MachineInstr
*) tempMvec
[j
]];
483 DEBUG(std::cerr
<< *(tempMvec
[j
]) << " at index " << indexMap
[(MachineInstr
*) tempMvec
[j
]] << "\n");
487 //Must have some guts to the loop body (more then 1 instr, dont count nops in size)
488 if(mIndVar
.size() >= (BI
->size()-3))
491 //Put into a map for future access
492 indVarInstrs
[BI
] = mIndVar
;
493 machineTollvm
[BI
] = mllvm
;
497 bool ModuloSchedulingPass::assocIndVar(Instruction
*I
, std::set
<Instruction
*> &indVar
,
498 std::vector
<Instruction
*> &stack
, BasicBlock
*BB
) {
502 //If this is a phi node, check if its the canonical indvar
503 if(PHINode
*PN
= dyn_cast
<PHINode
>(I
)) {
504 if (Instruction
*Inc
=
505 dyn_cast
<Instruction
>(PN
->getIncomingValueForBlock(BB
)))
506 if (Inc
->getOpcode() == Instruction::Add
&& Inc
->getOperand(0) == PN
)
507 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Inc
->getOperand(1)))
508 if (CI
->equalsInt(1)) {
509 //We have found the indvar, so add the stack, and inc instruction to the set
510 indVar
.insert(stack
.begin(), stack
.end());
518 //Loop over each of the instructions operands, check if they are an instruction and in this BB
519 for(unsigned i
= 0; i
< I
->getNumOperands(); ++i
) {
520 if(Instruction
*N
= dyn_cast
<Instruction
>(I
->getOperand(i
))) {
521 if(N
->getParent() == BB
)
522 if(!assocIndVar(N
, indVar
, stack
, BB
))
532 //ResMII is calculated by determining the usage count for each resource
533 //and using the maximum.
534 //FIXME: In future there should be a way to get alternative resources
535 //for each instruction
536 int ModuloSchedulingPass::calculateResMII(const MachineBasicBlock
*BI
) {
538 TIME_REGION(X
, "calculateResMII");
540 const TargetInstrInfo
*mii
= target
.getInstrInfo();
541 const TargetSchedInfo
*msi
= target
.getSchedInfo();
545 //Map to keep track of usage count of each resource
546 std::map
<unsigned, unsigned> resourceUsageCount
;
548 for(MachineBasicBlock::const_iterator I
= BI
->begin(), E
= BI
->end(); I
!= E
; ++I
) {
550 //Get resource usage for this instruction
551 InstrRUsage rUsage
= msi
->getInstrRUsage(I
->getOpcode());
552 std::vector
<std::vector
<resourceId_t
> > resources
= rUsage
.resourcesByCycle
;
554 //Loop over resources in each cycle and increments their usage count
555 for(unsigned i
=0; i
< resources
.size(); ++i
)
556 for(unsigned j
=0; j
< resources
[i
].size(); ++j
) {
557 if(!resourceUsageCount
.count(resources
[i
][j
])) {
558 resourceUsageCount
[resources
[i
][j
]] = 1;
561 resourceUsageCount
[resources
[i
][j
]] = resourceUsageCount
[resources
[i
][j
]] + 1;
566 //Find maximum usage count
568 //Get max number of instructions that can be issued at once. (FIXME)
569 int issueSlots
= msi
->maxNumIssueTotal
;
571 for(std::map
<unsigned,unsigned>::iterator RB
= resourceUsageCount
.begin(), RE
= resourceUsageCount
.end(); RB
!= RE
; ++RB
) {
573 //Get the total number of the resources in our cpu
574 int resourceNum
= CPUResource::getCPUResource(RB
->first
)->maxNumUsers
;
576 //Get total usage count for this resources
577 unsigned usageCount
= RB
->second
;
579 //Divide the usage count by either the max number we can issue or the number of
580 //resources (whichever is its upper bound)
581 double finalUsageCount
;
582 if( resourceNum
<= issueSlots
)
583 finalUsageCount
= ceil(1.0 * usageCount
/ resourceNum
);
585 finalUsageCount
= ceil(1.0 * usageCount
/ issueSlots
);
588 //Only keep track of the max
589 ResMII
= std::max( (int) finalUsageCount
, ResMII
);
597 /// calculateRecMII - Calculates the value of the highest recurrence
598 /// By value we mean the total latency
599 int ModuloSchedulingPass::calculateRecMII(MSchedGraph
*graph
, int MII
) {
600 /*std::vector<MSchedGraphNode*> vNodes;
601 //Loop over all nodes in the graph
602 for(MSchedGraph::iterator I = graph->begin(), E = graph->end(); I != E; ++I) {
603 findAllReccurrences(I->second, vNodes, MII);
607 TIME_REGION(X
, "calculateRecMII");
609 findAllCircuits(graph
, MII
);
612 for(std::set
<std::pair
<int, std::vector
<MSchedGraphNode
*> > >::iterator I
= recurrenceList
.begin(), E
=recurrenceList
.end(); I
!=E
; ++I
) {
613 RecMII
= std::max(RecMII
, I
->first
);
619 /// calculateNodeAttributes - The following properties are calculated for
620 /// each node in the dependence graph: ASAP, ALAP, Depth, Height, and
622 void ModuloSchedulingPass::calculateNodeAttributes(MSchedGraph
*graph
, int MII
) {
624 TIME_REGION(X
, "calculateNodeAttributes");
626 assert(nodeToAttributesMap
.empty() && "Node attribute map was not cleared");
628 //Loop over the nodes and add them to the map
629 for(MSchedGraph::iterator I
= graph
->begin(), E
= graph
->end(); I
!= E
; ++I
) {
631 DEBUG(std::cerr
<< "Inserting node into attribute map: " << *I
->second
<< "\n");
633 //Assert if its already in the map
634 assert(nodeToAttributesMap
.count(I
->second
) == 0 &&
635 "Node attributes are already in the map");
637 //Put into the map with default attribute values
638 nodeToAttributesMap
[I
->second
] = MSNodeAttributes();
641 //Create set to deal with reccurrences
642 std::set
<MSchedGraphNode
*> visitedNodes
;
644 //Now Loop over map and calculate the node attributes
645 for(std::map
<MSchedGraphNode
*, MSNodeAttributes
>::iterator I
= nodeToAttributesMap
.begin(), E
= nodeToAttributesMap
.end(); I
!= E
; ++I
) {
646 calculateASAP(I
->first
, MII
, (MSchedGraphNode
*) 0);
647 visitedNodes
.clear();
650 int maxASAP
= findMaxASAP();
651 //Calculate ALAP which depends on ASAP being totally calculated
652 for(std::map
<MSchedGraphNode
*, MSNodeAttributes
>::iterator I
= nodeToAttributesMap
.begin(), E
= nodeToAttributesMap
.end(); I
!= E
; ++I
) {
653 calculateALAP(I
->first
, MII
, maxASAP
, (MSchedGraphNode
*) 0);
654 visitedNodes
.clear();
657 //Calculate MOB which depends on ASAP being totally calculated, also do depth and height
658 for(std::map
<MSchedGraphNode
*, MSNodeAttributes
>::iterator I
= nodeToAttributesMap
.begin(), E
= nodeToAttributesMap
.end(); I
!= E
; ++I
) {
659 (I
->second
).MOB
= std::max(0,(I
->second
).ALAP
- (I
->second
).ASAP
);
661 DEBUG(std::cerr
<< "MOB: " << (I
->second
).MOB
<< " (" << *(I
->first
) << ")\n");
662 calculateDepth(I
->first
, (MSchedGraphNode
*) 0);
663 calculateHeight(I
->first
, (MSchedGraphNode
*) 0);
669 /// ignoreEdge - Checks to see if this edge of a recurrence should be ignored or not
670 bool ModuloSchedulingPass::ignoreEdge(MSchedGraphNode
*srcNode
, MSchedGraphNode
*destNode
) {
671 if(destNode
== 0 || srcNode
==0)
674 bool findEdge
= edgesToIgnore
.count(std::make_pair(srcNode
, destNode
->getInEdgeNum(srcNode
)));
676 DEBUG(std::cerr
<< "Ignoring edge? from: " << *srcNode
<< " to " << *destNode
<< "\n");
682 /// calculateASAP - Calculates the
683 int ModuloSchedulingPass::calculateASAP(MSchedGraphNode
*node
, int MII
, MSchedGraphNode
*destNode
) {
685 DEBUG(std::cerr
<< "Calculating ASAP for " << *node
<< "\n");
687 //Get current node attributes
688 MSNodeAttributes
&attributes
= nodeToAttributesMap
.find(node
)->second
;
690 if(attributes
.ASAP
!= -1)
691 return attributes
.ASAP
;
693 int maxPredValue
= 0;
695 //Iterate over all of the predecessors and find max
696 for(MSchedGraphNode::pred_iterator P
= node
->pred_begin(), E
= node
->pred_end(); P
!= E
; ++P
) {
698 //Only process if we are not ignoring the edge
699 if(!ignoreEdge(*P
, node
)) {
701 predASAP
= calculateASAP(*P
, MII
, node
);
703 assert(predASAP
!= -1 && "ASAP has not been calculated");
704 int iteDiff
= node
->getInEdge(*P
).getIteDiff();
706 int currentPredValue
= predASAP
+ (*P
)->getLatency() - (iteDiff
* MII
);
707 DEBUG(std::cerr
<< "pred ASAP: " << predASAP
<< ", iteDiff: " << iteDiff
<< ", PredLatency: " << (*P
)->getLatency() << ", Current ASAP pred: " << currentPredValue
<< "\n");
708 maxPredValue
= std::max(maxPredValue
, currentPredValue
);
712 attributes
.ASAP
= maxPredValue
;
714 DEBUG(std::cerr
<< "ASAP: " << attributes
.ASAP
<< " (" << *node
<< ")\n");
720 int ModuloSchedulingPass::calculateALAP(MSchedGraphNode
*node
, int MII
,
721 int maxASAP
, MSchedGraphNode
*srcNode
) {
723 DEBUG(std::cerr
<< "Calculating ALAP for " << *node
<< "\n");
725 MSNodeAttributes
&attributes
= nodeToAttributesMap
.find(node
)->second
;
727 if(attributes
.ALAP
!= -1)
728 return attributes
.ALAP
;
730 if(node
->hasSuccessors()) {
732 //Trying to deal with the issue where the node has successors, but
733 //we are ignoring all of the edges to them. So this is my hack for
734 //now.. there is probably a more elegant way of doing this (FIXME)
735 bool processedOneEdge
= false;
737 //FIXME, set to something high to start
738 int minSuccValue
= 9999999;
740 //Iterate over all of the predecessors and fine max
741 for(MSchedGraphNode::succ_iterator P
= node
->succ_begin(),
742 E
= node
->succ_end(); P
!= E
; ++P
) {
744 //Only process if we are not ignoring the edge
745 if(!ignoreEdge(node
, *P
)) {
746 processedOneEdge
= true;
748 succALAP
= calculateALAP(*P
, MII
, maxASAP
, node
);
750 assert(succALAP
!= -1 && "Successors ALAP should have been caclulated");
752 int iteDiff
= P
.getEdge().getIteDiff();
754 int currentSuccValue
= succALAP
- node
->getLatency() + iteDiff
* MII
;
756 DEBUG(std::cerr
<< "succ ALAP: " << succALAP
<< ", iteDiff: " << iteDiff
<< ", SuccLatency: " << (*P
)->getLatency() << ", Current ALAP succ: " << currentSuccValue
<< "\n");
758 minSuccValue
= std::min(minSuccValue
, currentSuccValue
);
763 attributes
.ALAP
= minSuccValue
;
766 attributes
.ALAP
= maxASAP
;
769 attributes
.ALAP
= maxASAP
;
771 DEBUG(std::cerr
<< "ALAP: " << attributes
.ALAP
<< " (" << *node
<< ")\n");
773 if(attributes
.ALAP
< 0)
776 return attributes
.ALAP
;
779 int ModuloSchedulingPass::findMaxASAP() {
782 for(std::map
<MSchedGraphNode
*, MSNodeAttributes
>::iterator I
= nodeToAttributesMap
.begin(),
783 E
= nodeToAttributesMap
.end(); I
!= E
; ++I
)
784 maxASAP
= std::max(maxASAP
, I
->second
.ASAP
);
789 int ModuloSchedulingPass::calculateHeight(MSchedGraphNode
*node
,MSchedGraphNode
*srcNode
) {
791 MSNodeAttributes
&attributes
= nodeToAttributesMap
.find(node
)->second
;
793 if(attributes
.height
!= -1)
794 return attributes
.height
;
798 //Iterate over all of the predecessors and find max
799 for(MSchedGraphNode::succ_iterator P
= node
->succ_begin(),
800 E
= node
->succ_end(); P
!= E
; ++P
) {
803 if(!ignoreEdge(node
, *P
)) {
804 int succHeight
= calculateHeight(*P
, node
);
806 assert(succHeight
!= -1 && "Successors Height should have been caclulated");
808 int currentHeight
= succHeight
+ node
->getLatency();
809 maxHeight
= std::max(maxHeight
, currentHeight
);
812 attributes
.height
= maxHeight
;
813 DEBUG(std::cerr
<< "Height: " << attributes
.height
<< " (" << *node
<< ")\n");
818 int ModuloSchedulingPass::calculateDepth(MSchedGraphNode
*node
,
819 MSchedGraphNode
*destNode
) {
821 MSNodeAttributes
&attributes
= nodeToAttributesMap
.find(node
)->second
;
823 if(attributes
.depth
!= -1)
824 return attributes
.depth
;
828 //Iterate over all of the predecessors and fine max
829 for(MSchedGraphNode::pred_iterator P
= node
->pred_begin(), E
= node
->pred_end(); P
!= E
; ++P
) {
831 if(!ignoreEdge(*P
, node
)) {
833 predDepth
= calculateDepth(*P
, node
);
835 assert(predDepth
!= -1 && "Predecessors ASAP should have been caclulated");
837 int currentDepth
= predDepth
+ (*P
)->getLatency();
838 maxDepth
= std::max(maxDepth
, currentDepth
);
841 attributes
.depth
= maxDepth
;
843 DEBUG(std::cerr
<< "Depth: " << attributes
.depth
<< " (" << *node
<< "*)\n");
849 void ModuloSchedulingPass::addReccurrence(std::vector
<MSchedGraphNode
*> &recurrence
, int II
, MSchedGraphNode
*srcBENode
, MSchedGraphNode
*destBENode
) {
850 //Check to make sure that this recurrence is unique
854 //Loop over all recurrences already in our list
855 for(std::set
<std::pair
<int, std::vector
<MSchedGraphNode
*> > >::iterator R
= recurrenceList
.begin(), RE
= recurrenceList
.end(); R
!= RE
; ++R
) {
857 bool all_same
= true;
859 if(R
->second
.size() == recurrence
.size()) {
861 for(std::vector
<MSchedGraphNode
*>::const_iterator node
= R
->second
.begin(), end
= R
->second
.end(); node
!= end
; ++node
) {
862 if(std::find(recurrence
.begin(), recurrence
.end(), *node
) == recurrence
.end()) {
863 all_same
= all_same
&& false;
867 all_same
= all_same
&& true;
877 srcBENode
= recurrence
.back();
878 destBENode
= recurrence
.front();
881 if(destBENode
->getInEdge(srcBENode
).getIteDiff() == 0) {
882 //DEBUG(std::cerr << "NOT A BACKEDGE\n");
883 //find actual backedge HACK HACK
884 for(unsigned i
=0; i
< recurrence
.size()-1; ++i
) {
885 if(recurrence
[i
+1]->getInEdge(recurrence
[i
]).getIteDiff() == 1) {
886 srcBENode
= recurrence
[i
];
887 destBENode
= recurrence
[i
+1];
894 DEBUG(std::cerr
<< "Back Edge to Remove: " << *srcBENode
<< " to " << *destBENode
<< "\n");
895 edgesToIgnore
.insert(std::make_pair(srcBENode
, destBENode
->getInEdgeNum(srcBENode
)));
896 recurrenceList
.insert(std::make_pair(II
, recurrence
));
903 void ModuloSchedulingPass::unblock(MSchedGraphNode
*u
, std::set
<MSchedGraphNode
*> &blocked
,
904 std::map
<MSchedGraphNode
*, std::set
<MSchedGraphNode
*> > &B
) {
907 DEBUG(std::cerr
<< "Unblocking: " << *u
<< "\n");
910 //std::set<MSchedGraphNode*> toErase;
911 while (!B
[u
].empty()) {
912 MSchedGraphNode
*W
= *B
[u
].begin();
914 //toErase.insert(*W);
915 DEBUG(std::cerr
<< "Removed: " << *W
<< "from B-List\n");
917 unblock(W
, blocked
, B
);
922 bool ModuloSchedulingPass::circuit(MSchedGraphNode
*v
, std::vector
<MSchedGraphNode
*> &stack
,
923 std::set
<MSchedGraphNode
*> &blocked
, std::vector
<MSchedGraphNode
*> &SCC
,
924 MSchedGraphNode
*s
, std::map
<MSchedGraphNode
*, std::set
<MSchedGraphNode
*> > &B
,
925 int II
, std::map
<MSchedGraphNode
*, MSchedGraphNode
*> &newNodes
) {
928 DEBUG(std::cerr
<< "Finding Circuits Starting with: ( " << v
<< ")"<< *v
<< "\n");
930 //Push node onto the stack
936 //Loop over all successors of node v that are in the scc, create Adjaceny list
937 std::set
<MSchedGraphNode
*> AkV
;
938 for(MSchedGraphNode::succ_iterator I
= v
->succ_begin(), E
= v
->succ_end(); I
!= E
; ++I
) {
939 if((std::find(SCC
.begin(), SCC
.end(), *I
) != SCC
.end())) {
944 for(std::set
<MSchedGraphNode
*>::iterator I
= AkV
.begin(), E
= AkV
.end(); I
!= E
; ++I
) {
946 //We have a circuit, so add it to our list
947 addRecc(stack
, newNodes
);
950 else if(!blocked
.count(*I
)) {
951 if(circuit(*I
, stack
, blocked
, SCC
, s
, B
, II
, newNodes
))
955 DEBUG(std::cerr
<< "Blocked: " << **I
<< "\n");
960 unblock(v
, blocked
, B
);
963 for(std::set
<MSchedGraphNode
*>::iterator I
= AkV
.begin(), E
= AkV
.end(); I
!= E
; ++I
)
975 void ModuloSchedulingPass::addRecc(std::vector
<MSchedGraphNode
*> &stack
, std::map
<MSchedGraphNode
*, MSchedGraphNode
*> &newNodes
) {
976 std::vector
<MSchedGraphNode
*> recc
;
977 //Dump recurrence for now
978 DEBUG(std::cerr
<< "Starting Recc\n");
981 int totalDistance
= 0;
982 MSchedGraphNode
*lastN
= 0;
983 MSchedGraphNode
*start
= 0;
984 MSchedGraphNode
*end
= 0;
986 //Loop over recurrence, get delay and distance
987 for(std::vector
<MSchedGraphNode
*>::iterator N
= stack
.begin(), NE
= stack
.end(); N
!= NE
; ++N
) {
988 DEBUG(std::cerr
<< **N
<< "\n");
989 totalDelay
+= (*N
)->getLatency();
991 int iteDiff
= (*N
)->getInEdge(lastN
).getIteDiff();
992 totalDistance
+= iteDiff
;
999 //Get the original node
1001 recc
.push_back(newNodes
[*N
]);
1007 totalDistance
+= lastN
->getIteDiff(*stack
.begin());
1009 DEBUG(std::cerr
<< "End Recc\n");
1013 //Insert reccurrence into the list
1014 DEBUG(std::cerr
<< "Ignore Edge from!!: " << *start
<< " to " << *end
<< "\n");
1015 edgesToIgnore
.insert(std::make_pair(newNodes
[start
], (newNodes
[end
])->getInEdgeNum(newNodes
[start
])));
1018 //Insert reccurrence into the list
1019 DEBUG(std::cerr
<< "Ignore Edge from: " << *lastN
<< " to " << **stack
.begin() << "\n");
1020 edgesToIgnore
.insert(std::make_pair(newNodes
[lastN
], newNodes
[(*stack
.begin())]->getInEdgeNum(newNodes
[lastN
])));
1023 //Adjust II until we get close to the inequality delay - II*distance <= 0
1024 int RecMII
= II
; //Starting value
1025 int value
= totalDelay
-(RecMII
* totalDistance
);
1031 value
= totalDelay
-(RecMII
* totalDistance
);
1034 recurrenceList
.insert(std::make_pair(lastII
, recc
));
1039 void ModuloSchedulingPass::findAllCircuits(MSchedGraph
*g
, int II
) {
1043 //Keep old to new node mapping information
1044 std::map
<MSchedGraphNode
*, MSchedGraphNode
*> newNodes
;
1047 MSchedGraph
*MSG
= new MSchedGraph(*g
, newNodes
);
1049 DEBUG(std::cerr
<< "Finding All Circuits\n");
1051 //Set of blocked nodes
1052 std::set
<MSchedGraphNode
*> blocked
;
1054 //Stack holding current circuit
1055 std::vector
<MSchedGraphNode
*> stack
;
1058 std::map
<MSchedGraphNode
*, std::set
<MSchedGraphNode
*> > B
;
1064 //Iterate over the graph until its down to one node or empty
1065 while(MSG
->size() > 1) {
1067 //Write Graph out to file
1068 //WriteGraphToFile(std::cerr, "Graph" + utostr(MSG->size()), MSG);
1070 DEBUG(std::cerr
<< "Graph Size: " << MSG
->size() << "\n");
1071 DEBUG(std::cerr
<< "Finding strong component Vk with least vertex\n");
1073 //Iterate over all the SCCs in the graph
1074 std::set
<MSchedGraphNode
*> Visited
;
1075 std::vector
<MSchedGraphNode
*> Vk
;
1076 MSchedGraphNode
* s
= 0;
1078 //Find scc with the least vertex
1079 for (MSchedGraph::iterator GI
= MSG
->begin(), E
= MSG
->end(); GI
!= E
; ++GI
)
1080 if (Visited
.insert(GI
->second
).second
) {
1081 for (scc_iterator
<MSchedGraphNode
*> SCCI
= scc_begin(GI
->second
),
1082 E
= scc_end(GI
->second
); SCCI
!= E
; ++SCCI
) {
1083 std::vector
<MSchedGraphNode
*> &nextSCC
= *SCCI
;
1085 if (Visited
.insert(nextSCC
[0]).second
) {
1086 Visited
.insert(nextSCC
.begin()+1, nextSCC
.end());
1088 DEBUG(std::cerr
<< "SCC size: " << nextSCC
.size() << "\n");
1091 if(nextSCC
.size() > 1) {
1093 //Get least vertex in Vk
1099 for(unsigned i
= 0; i
< nextSCC
.size(); ++i
) {
1100 if(nextSCC
[i
] < s
) {
1113 DEBUG(for(std::vector
<MSchedGraphNode
*>::iterator N
= Vk
.begin(), NE
= Vk
.end();
1114 N
!= NE
; ++N
) { std::cerr
<< *((*N
)->getInst()); });
1116 //Iterate over all nodes in this scc
1117 for(std::vector
<MSchedGraphNode
*>::iterator N
= Vk
.begin(), NE
= Vk
.end();
1123 circuit(s
, stack
, blocked
, Vk
, s
, B
, II
, newNodes
);
1125 //Delete nodes from the graph
1126 //Find all nodes up to s and delete them
1127 std::vector
<MSchedGraphNode
*> nodesToRemove
;
1128 nodesToRemove
.push_back(s
);
1129 for(MSchedGraph::iterator N
= MSG
->begin(), NE
= MSG
->end(); N
!= NE
; ++N
) {
1131 nodesToRemove
.push_back(N
->second
);
1133 for(std::vector
<MSchedGraphNode
*>::iterator N
= nodesToRemove
.begin(), NE
= nodesToRemove
.end(); N
!= NE
; ++N
) {
1134 DEBUG(std::cerr
<< "Deleting Node: " << **N
<< "\n");
1135 MSG
->deleteNode(*N
);
1141 DEBUG(std::cerr
<< "Num Circuits found: " << CircCount
<< "\n");
1145 void ModuloSchedulingPass::findAllReccurrences(MSchedGraphNode
*node
,
1146 std::vector
<MSchedGraphNode
*> &visitedNodes
,
1150 if(std::find(visitedNodes
.begin(), visitedNodes
.end(), node
) != visitedNodes
.end()) {
1151 std::vector
<MSchedGraphNode
*> recurrence
;
1155 int RecMII
= II
; //Starting value
1156 MSchedGraphNode
*last
= node
;
1157 MSchedGraphNode
*srcBackEdge
= 0;
1158 MSchedGraphNode
*destBackEdge
= 0;
1162 for(std::vector
<MSchedGraphNode
*>::iterator I
= visitedNodes
.begin(), E
= visitedNodes
.end();
1170 delay
= delay
+ (*I
)->getLatency();
1173 int diff
= (*I
)->getInEdge(last
).getIteDiff();
1181 recurrence
.push_back(*I
);
1187 //Get final distance calc
1188 distance
+= node
->getInEdge(last
).getIteDiff();
1189 DEBUG(std::cerr
<< "Reccurrence Distance: " << distance
<< "\n");
1191 //Adjust II until we get close to the inequality delay - II*distance <= 0
1193 int value
= delay
-(RecMII
* distance
);
1199 value
= delay
-(RecMII
* distance
);
1203 DEBUG(std::cerr
<< "Final II for this recurrence: " << lastII
<< "\n");
1204 addReccurrence(recurrence
, lastII
, srcBackEdge
, destBackEdge
);
1205 assert(distance
!= 0 && "Recurrence distance should not be zero");
1210 for(MSchedGraphNode::succ_iterator I
= node
->succ_begin(), E
= node
->succ_end(); I
!= E
; ++I
) {
1211 visitedNodes
.push_back(node
);
1212 //if(!edgesToIgnore.count(std::make_pair(node, count)))
1213 findAllReccurrences(*I
, visitedNodes
, II
);
1214 visitedNodes
.pop_back();
1219 void ModuloSchedulingPass::searchPath(MSchedGraphNode
*node
,
1220 std::vector
<MSchedGraphNode
*> &path
,
1221 std::set
<MSchedGraphNode
*> &nodesToAdd
,
1222 std::set
<MSchedGraphNode
*> &new_reccurrence
) {
1223 //Push node onto the path
1224 path
.push_back(node
);
1226 //Loop over all successors and see if there is a path from this node to
1227 //a recurrence in the partial order, if so.. add all nodes to be added to recc
1228 for(MSchedGraphNode::succ_iterator S
= node
->succ_begin(), SE
= node
->succ_end(); S
!= SE
;
1231 //Check if we should ignore this edge first
1232 if(ignoreEdge(node
,*S
))
1235 //check if successor is in this recurrence, we will get to it eventually
1236 if(new_reccurrence
.count(*S
))
1239 //If this node exists in a recurrence already in the partial
1240 //order, then add all nodes in the path to the set of nodes to add
1241 //Check if its already in our partial order, if not add it to the
1244 for(std::vector
<std::set
<MSchedGraphNode
*> >::iterator PO
= partialOrder
.begin(),
1245 PE
= partialOrder
.end(); PO
!= PE
; ++PO
) {
1254 nodesToAdd
.insert(*S
);
1255 searchPath(*S
, path
, nodesToAdd
, new_reccurrence
);
1259 //Pop Node off the path
1263 void ModuloSchedulingPass::pathToRecc(MSchedGraphNode
*node
,
1264 std::vector
<MSchedGraphNode
*> &path
,
1265 std::set
<MSchedGraphNode
*> &poSet
,
1266 std::set
<MSchedGraphNode
*> &lastNodes
) {
1267 //Push node onto the path
1268 path
.push_back(node
);
1270 DEBUG(std::cerr
<< "Current node: " << *node
<< "\n");
1272 //Loop over all successors and see if there is a path from this node to
1273 //a recurrence in the partial order, if so.. add all nodes to be added to recc
1274 for(MSchedGraphNode::succ_iterator S
= node
->succ_begin(), SE
= node
->succ_end(); S
!= SE
;
1276 DEBUG(std::cerr
<< "Succ:" << **S
<< "\n");
1277 //Check if we should ignore this edge first
1278 if(ignoreEdge(node
,*S
))
1281 if(poSet
.count(*S
)) {
1282 DEBUG(std::cerr
<< "Found path to recc from no pred\n");
1283 //Loop over path, if it exists in lastNodes, then add to poset, and remove from lastNodes
1284 for(std::vector
<MSchedGraphNode
*>::iterator I
= path
.begin(), IE
= path
.end(); I
!= IE
; ++I
) {
1285 if(lastNodes
.count(*I
)) {
1286 DEBUG(std::cerr
<< "Inserting node into recc: " << **I
<< "\n");
1288 lastNodes
.erase(*I
);
1293 pathToRecc(*S
, path
, poSet
, lastNodes
);
1296 //Pop Node off the path
1300 void ModuloSchedulingPass::computePartialOrder() {
1302 TIME_REGION(X
, "calculatePartialOrder");
1304 DEBUG(std::cerr
<< "Computing Partial Order\n");
1306 //Only push BA branches onto the final node order, we put other
1307 //branches after it FIXME: Should we really be pushing branches on
1308 //it a specific order instead of relying on BA being there?
1310 std::vector
<MSchedGraphNode
*> branches
;
1312 //Steps to add a recurrence to the partial order 1) Find reccurrence
1313 //with the highest RecMII. Add it to the partial order. 2) For each
1314 //recurrence with decreasing RecMII, add it to the partial order
1315 //along with any nodes that connect this recurrence to recurrences
1316 //already in the partial order
1317 for(std::set
<std::pair
<int, std::vector
<MSchedGraphNode
*> > >::reverse_iterator
1318 I
= recurrenceList
.rbegin(), E
=recurrenceList
.rend(); I
!=E
; ++I
) {
1320 std::set
<MSchedGraphNode
*> new_recurrence
;
1322 //Loop through recurrence and remove any nodes already in the partial order
1323 for(std::vector
<MSchedGraphNode
*>::const_iterator N
= I
->second
.begin(),
1324 NE
= I
->second
.end(); N
!= NE
; ++N
) {
1327 for(std::vector
<std::set
<MSchedGraphNode
*> >::iterator PO
= partialOrder
.begin(),
1328 PE
= partialOrder
.end(); PO
!= PE
; ++PO
) {
1333 //Check if its a branch, and remove to handle special
1335 if((*N
)->isBranch() && !(*N
)->hasPredecessors()) {
1336 branches
.push_back(*N
);
1339 new_recurrence
.insert(*N
);
1345 if(new_recurrence
.size() > 0) {
1347 std::vector
<MSchedGraphNode
*> path
;
1348 std::set
<MSchedGraphNode
*> nodesToAdd
;
1350 //Dump recc we are dealing with (minus nodes already in PO)
1351 DEBUG(std::cerr
<< "Recc: ");
1352 DEBUG(for(std::set
<MSchedGraphNode
*>::iterator R
= new_recurrence
.begin(), RE
= new_recurrence
.end(); R
!= RE
; ++R
) { std::cerr
<< **R
; });
1354 //Add nodes that connect this recurrence to recurrences in the partial path
1355 for(std::set
<MSchedGraphNode
*>::iterator N
= new_recurrence
.begin(),
1356 NE
= new_recurrence
.end(); N
!= NE
; ++N
)
1357 searchPath(*N
, path
, nodesToAdd
, new_recurrence
);
1359 //Add nodes to this recurrence if they are not already in the partial order
1360 for(std::set
<MSchedGraphNode
*>::iterator N
= nodesToAdd
.begin(), NE
= nodesToAdd
.end();
1363 for(std::vector
<std::set
<MSchedGraphNode
*> >::iterator PO
= partialOrder
.begin(),
1364 PE
= partialOrder
.end(); PO
!= PE
; ++PO
) {
1369 assert("FOUND CONNECTOR");
1370 new_recurrence
.insert(*N
);
1374 partialOrder
.push_back(new_recurrence
);
1377 //Dump out partial order
1378 DEBUG(for(std::vector
<std::set
<MSchedGraphNode
*> >::iterator I
= partialOrder
.begin(),
1379 E
= partialOrder
.end(); I
!=E
; ++I
) {
1380 std::cerr
<< "Start set in PO\n";
1381 for(std::set
<MSchedGraphNode
*>::iterator J
= I
->begin(), JE
= I
->end(); J
!= JE
; ++J
)
1382 std::cerr
<< "PO:" << **J
<< "\n";
1388 //Add any nodes that are not already in the partial order
1389 //Add them in a set, one set per connected component
1390 std::set
<MSchedGraphNode
*> lastNodes
;
1391 std::set
<MSchedGraphNode
*> noPredNodes
;
1392 for(std::map
<MSchedGraphNode
*, MSNodeAttributes
>::iterator I
= nodeToAttributesMap
.begin(),
1393 E
= nodeToAttributesMap
.end(); I
!= E
; ++I
) {
1397 //Check if its already in our partial order, if not add it to the final vector
1398 for(std::vector
<std::set
<MSchedGraphNode
*> >::iterator PO
= partialOrder
.begin(),
1399 PE
= partialOrder
.end(); PO
!= PE
; ++PO
) {
1400 if(PO
->count(I
->first
))
1404 lastNodes
.insert(I
->first
);
1407 //For each node w/out preds, see if there is a path to one of the
1408 //recurrences, and if so add them to that current recc
1409 /*for(std::set<MSchedGraphNode*>::iterator N = noPredNodes.begin(), NE = noPredNodes.end();
1411 DEBUG(std::cerr << "No Pred Path from: " << **N << "\n");
1412 for(std::vector<std::set<MSchedGraphNode*> >::iterator PO = partialOrder.begin(),
1413 PE = partialOrder.end(); PO != PE; ++PO) {
1414 std::vector<MSchedGraphNode*> path;
1415 pathToRecc(*N, path, *PO, lastNodes);
1420 //Break up remaining nodes that are not in the partial order
1421 ///into their connected compoenents
1422 while(lastNodes
.size() > 0) {
1423 std::set
<MSchedGraphNode
*> ccSet
;
1424 connectedComponentSet(*(lastNodes
.begin()),ccSet
, lastNodes
);
1425 if(ccSet
.size() > 0)
1426 partialOrder
.push_back(ccSet
);
1430 //Clean up branches by putting them in final order
1431 assert(branches
.size() == 0 && "We should not have any branches in our graph");
1435 void ModuloSchedulingPass::connectedComponentSet(MSchedGraphNode
*node
, std::set
<MSchedGraphNode
*> &ccSet
, std::set
<MSchedGraphNode
*> &lastNodes
) {
1438 if( !ccSet
.count(node
) && lastNodes
.count(node
)) {
1439 lastNodes
.erase(node
);
1445 //Loop over successors and recurse if we have not seen this node before
1446 for(MSchedGraphNode::succ_iterator node_succ
= node
->succ_begin(), end
=node
->succ_end(); node_succ
!= end
; ++node_succ
) {
1447 connectedComponentSet(*node_succ
, ccSet
, lastNodes
);
1452 void ModuloSchedulingPass::predIntersect(std::set
<MSchedGraphNode
*> &CurrentSet
, std::set
<MSchedGraphNode
*> &IntersectResult
) {
1454 for(unsigned j
=0; j
< FinalNodeOrder
.size(); ++j
) {
1455 for(MSchedGraphNode::pred_iterator P
= FinalNodeOrder
[j
]->pred_begin(),
1456 E
= FinalNodeOrder
[j
]->pred_end(); P
!= E
; ++P
) {
1458 //Check if we are supposed to ignore this edge or not
1459 if(ignoreEdge(*P
,FinalNodeOrder
[j
]))
1462 if(CurrentSet
.count(*P
))
1463 if(std::find(FinalNodeOrder
.begin(), FinalNodeOrder
.end(), *P
) == FinalNodeOrder
.end())
1464 IntersectResult
.insert(*P
);
1473 void ModuloSchedulingPass::succIntersect(std::set
<MSchedGraphNode
*> &CurrentSet
, std::set
<MSchedGraphNode
*> &IntersectResult
) {
1475 for(unsigned j
=0; j
< FinalNodeOrder
.size(); ++j
) {
1476 for(MSchedGraphNode::succ_iterator P
= FinalNodeOrder
[j
]->succ_begin(),
1477 E
= FinalNodeOrder
[j
]->succ_end(); P
!= E
; ++P
) {
1479 //Check if we are supposed to ignore this edge or not
1480 if(ignoreEdge(FinalNodeOrder
[j
],*P
))
1483 if(CurrentSet
.count(*P
))
1484 if(std::find(FinalNodeOrder
.begin(), FinalNodeOrder
.end(), *P
) == FinalNodeOrder
.end())
1485 IntersectResult
.insert(*P
);
1490 void dumpIntersection(std::set
<MSchedGraphNode
*> &IntersectCurrent
) {
1491 std::cerr
<< "Intersection (";
1492 for(std::set
<MSchedGraphNode
*>::iterator I
= IntersectCurrent
.begin(), E
= IntersectCurrent
.end(); I
!= E
; ++I
)
1493 std::cerr
<< **I
<< ", ";
1499 void ModuloSchedulingPass::orderNodes() {
1501 TIME_REGION(X
, "orderNodes");
1507 int order
= BOTTOM_UP
;
1510 //Loop over all the sets and place them in the final node order
1511 for(std::vector
<std::set
<MSchedGraphNode
*> >::iterator CurrentSet
= partialOrder
.begin(), E
= partialOrder
.end(); CurrentSet
!= E
; ++CurrentSet
) {
1513 DEBUG(std::cerr
<< "Processing set in S\n");
1514 DEBUG(dumpIntersection(*CurrentSet
));
1516 //Result of intersection
1517 std::set
<MSchedGraphNode
*> IntersectCurrent
;
1519 predIntersect(*CurrentSet
, IntersectCurrent
);
1521 //If the intersection of predecessor and current set is not empty
1522 //sort nodes bottom up
1523 if(IntersectCurrent
.size() != 0) {
1524 DEBUG(std::cerr
<< "Final Node Order Predecessors and Current Set interesection is NOT empty\n");
1527 //If empty, use successors
1529 DEBUG(std::cerr
<< "Final Node Order Predecessors and Current Set interesection is empty\n");
1531 succIntersect(*CurrentSet
, IntersectCurrent
);
1534 if(IntersectCurrent
.size() != 0) {
1535 DEBUG(std::cerr
<< "Final Node Order Successors and Current Set interesection is NOT empty\n");
1539 DEBUG(std::cerr
<< "Final Node Order Successors and Current Set interesection is empty\n");
1540 //Find node with max ASAP in current Set
1541 MSchedGraphNode
*node
;
1543 DEBUG(std::cerr
<< "Using current set of size " << CurrentSet
->size() << "to find max ASAP\n");
1544 for(std::set
<MSchedGraphNode
*>::iterator J
= CurrentSet
->begin(), JE
= CurrentSet
->end(); J
!= JE
; ++J
) {
1545 //Get node attributes
1546 MSNodeAttributes nodeAttr
= nodeToAttributesMap
.find(*J
)->second
;
1547 //assert(nodeAttr != nodeToAttributesMap.end() && "Node not in attributes map!");
1549 if(maxASAP
<= nodeAttr
.ASAP
) {
1550 maxASAP
= nodeAttr
.ASAP
;
1554 assert(node
!= 0 && "In node ordering node should not be null");
1555 IntersectCurrent
.insert(node
);
1560 //Repeat until all nodes are put into the final order from current set
1561 while(IntersectCurrent
.size() > 0) {
1563 if(order
== TOP_DOWN
) {
1564 DEBUG(std::cerr
<< "Order is TOP DOWN\n");
1566 while(IntersectCurrent
.size() > 0) {
1567 DEBUG(std::cerr
<< "Intersection is not empty, so find heighest height\n");
1571 MSchedGraphNode
*highestHeightNode
= *(IntersectCurrent
.begin());
1573 //Find node in intersection with highest heigh and lowest MOB
1574 for(std::set
<MSchedGraphNode
*>::iterator I
= IntersectCurrent
.begin(),
1575 E
= IntersectCurrent
.end(); I
!= E
; ++I
) {
1577 //Get current nodes properties
1578 MSNodeAttributes nodeAttr
= nodeToAttributesMap
.find(*I
)->second
;
1580 if(height
< nodeAttr
.height
) {
1581 highestHeightNode
= *I
;
1582 height
= nodeAttr
.height
;
1585 else if(height
== nodeAttr
.height
) {
1586 if(MOB
> nodeAttr
.height
) {
1587 highestHeightNode
= *I
;
1588 height
= nodeAttr
.height
;
1594 //Append our node with greatest height to the NodeOrder
1595 if(std::find(FinalNodeOrder
.begin(), FinalNodeOrder
.end(), highestHeightNode
) == FinalNodeOrder
.end()) {
1596 DEBUG(std::cerr
<< "Adding node to Final Order: " << *highestHeightNode
<< "\n");
1597 FinalNodeOrder
.push_back(highestHeightNode
);
1600 //Remove V from IntersectOrder
1601 IntersectCurrent
.erase(std::find(IntersectCurrent
.begin(),
1602 IntersectCurrent
.end(), highestHeightNode
));
1605 //Intersect V's successors with CurrentSet
1606 for(MSchedGraphNode::succ_iterator P
= highestHeightNode
->succ_begin(),
1607 E
= highestHeightNode
->succ_end(); P
!= E
; ++P
) {
1608 //if(lower_bound(CurrentSet->begin(),
1609 // CurrentSet->end(), *P) != CurrentSet->end()) {
1610 if(std::find(CurrentSet
->begin(), CurrentSet
->end(), *P
) != CurrentSet
->end()) {
1611 if(ignoreEdge(highestHeightNode
, *P
))
1613 //If not already in Intersect, add
1614 if(!IntersectCurrent
.count(*P
))
1615 IntersectCurrent
.insert(*P
);
1618 } //End while loop over Intersect Size
1623 //Reset Intersect to reflect changes in OrderNodes
1624 IntersectCurrent
.clear();
1625 predIntersect(*CurrentSet
, IntersectCurrent
);
1629 //Begin if BOTTOM_UP
1631 DEBUG(std::cerr
<< "Order is BOTTOM UP\n");
1632 while(IntersectCurrent
.size() > 0) {
1633 DEBUG(std::cerr
<< "Intersection of size " << IntersectCurrent
.size() << ", finding highest depth\n");
1636 DEBUG(dumpIntersection(IntersectCurrent
));
1637 //Get node with highest depth, if a tie, use one with lowest
1641 MSchedGraphNode
*highestDepthNode
= *(IntersectCurrent
.begin());
1643 for(std::set
<MSchedGraphNode
*>::iterator I
= IntersectCurrent
.begin(),
1644 E
= IntersectCurrent
.end(); I
!= E
; ++I
) {
1645 //Find node attribute in graph
1646 MSNodeAttributes nodeAttr
= nodeToAttributesMap
.find(*I
)->second
;
1648 if(depth
< nodeAttr
.depth
) {
1649 highestDepthNode
= *I
;
1650 depth
= nodeAttr
.depth
;
1653 else if(depth
== nodeAttr
.depth
) {
1654 if(MOB
> nodeAttr
.MOB
) {
1655 highestDepthNode
= *I
;
1656 depth
= nodeAttr
.depth
;
1664 //Append highest depth node to the NodeOrder
1665 if(std::find(FinalNodeOrder
.begin(), FinalNodeOrder
.end(), highestDepthNode
) == FinalNodeOrder
.end()) {
1666 DEBUG(std::cerr
<< "Adding node to Final Order: " << *highestDepthNode
<< "\n");
1667 FinalNodeOrder
.push_back(highestDepthNode
);
1669 //Remove heightestDepthNode from IntersectOrder
1670 IntersectCurrent
.erase(highestDepthNode
);
1673 //Intersect heightDepthNode's pred with CurrentSet
1674 for(MSchedGraphNode::pred_iterator P
= highestDepthNode
->pred_begin(),
1675 E
= highestDepthNode
->pred_end(); P
!= E
; ++P
) {
1676 if(CurrentSet
->count(*P
)) {
1677 if(ignoreEdge(*P
, highestDepthNode
))
1680 //If not already in Intersect, add
1681 if(!IntersectCurrent
.count(*P
))
1682 IntersectCurrent
.insert(*P
);
1686 } //End while loop over Intersect Size
1691 //Reset IntersectCurrent to reflect changes in OrderNodes
1692 IntersectCurrent
.clear();
1693 succIntersect(*CurrentSet
, IntersectCurrent
);
1694 } //End if BOTTOM_DOWN
1696 DEBUG(std::cerr
<< "Current Intersection Size: " << IntersectCurrent
.size() << "\n");
1698 //End Wrapping while loop
1699 DEBUG(std::cerr
<< "Ending Size of Current Set: " << CurrentSet
->size() << "\n");
1700 }//End for over all sets of nodes
1702 //FIXME: As the algorithm stands it will NEVER add an instruction such as ba (with no
1703 //data dependencies) to the final order. We add this manually. It will always be
1704 //in the last set of S since its not part of a recurrence
1705 //Loop over all the sets and place them in the final node order
1706 std::vector
<std::set
<MSchedGraphNode
*> > ::reverse_iterator LastSet
= partialOrder
.rbegin();
1707 for(std::set
<MSchedGraphNode
*>::iterator CurrentNode
= LastSet
->begin(), LastNode
= LastSet
->end();
1708 CurrentNode
!= LastNode
; ++CurrentNode
) {
1709 if((*CurrentNode
)->getInst()->getOpcode() == V9::BA
)
1710 FinalNodeOrder
.push_back(*CurrentNode
);
1712 //Return final Order
1713 //return FinalNodeOrder;
1716 bool ModuloSchedulingPass::computeSchedule(const MachineBasicBlock
*BB
, MSchedGraph
*MSG
) {
1718 TIME_REGION(X
, "computeSchedule");
1720 bool success
= false;
1722 //FIXME: Should be set to max II of the original loop
1723 //Cap II in order to prevent infinite loop
1724 int capII
= MSG
->totalDelay();
1728 //Keep track of branches, but do not insert into the schedule
1729 std::vector
<MSchedGraphNode
*> branches
;
1731 //Loop over the final node order and process each node
1732 for(std::vector
<MSchedGraphNode
*>::iterator I
= FinalNodeOrder
.begin(),
1733 E
= FinalNodeOrder
.end(); I
!= E
; ++I
) {
1735 //CalculateEarly and Late start
1736 bool initialLSVal
= false;
1737 bool initialESVal
= false;
1740 bool hasSucc
= false;
1741 bool hasPred
= false;
1744 if((*I
)->isBranch())
1745 if((*I
)->hasPredecessors())
1753 //Loop over nodes in the schedule and determine if they are predecessors
1754 //or successors of the node we are trying to schedule
1755 for(MSSchedule::schedule_iterator nodesByCycle
= schedule
.begin(), nodesByCycleEnd
= schedule
.end();
1756 nodesByCycle
!= nodesByCycleEnd
; ++nodesByCycle
) {
1758 //For this cycle, get the vector of nodes schedule and loop over it
1759 for(std::vector
<MSchedGraphNode
*>::iterator schedNode
= nodesByCycle
->second
.begin(), SNE
= nodesByCycle
->second
.end(); schedNode
!= SNE
; ++schedNode
) {
1761 if((*I
)->isPredecessor(*schedNode
)) {
1762 int diff
= (*I
)->getInEdge(*schedNode
).getIteDiff();
1763 int ES_Temp
= nodesByCycle
->first
+ (*schedNode
)->getLatency() - diff
* II
;
1764 DEBUG(std::cerr
<< "Diff: " << diff
<< " Cycle: " << nodesByCycle
->first
<< "\n");
1765 DEBUG(std::cerr
<< "Temp EarlyStart: " << ES_Temp
<< " Prev EarlyStart: " << EarlyStart
<< "\n");
1767 EarlyStart
= std::max(EarlyStart
, ES_Temp
);
1769 EarlyStart
= ES_Temp
;
1770 initialESVal
= true;
1774 if((*I
)->isSuccessor(*schedNode
)) {
1775 int diff
= (*schedNode
)->getInEdge(*I
).getIteDiff();
1776 int LS_Temp
= nodesByCycle
->first
- (*I
)->getLatency() + diff
* II
;
1777 DEBUG(std::cerr
<< "Diff: " << diff
<< " Cycle: " << nodesByCycle
->first
<< "\n");
1778 DEBUG(std::cerr
<< "Temp LateStart: " << LS_Temp
<< " Prev LateStart: " << LateStart
<< "\n");
1780 LateStart
= std::min(LateStart
, LS_Temp
);
1782 LateStart
= LS_Temp
;
1783 initialLSVal
= true;
1791 branches
.push_back(*I
);
1795 //Check if this node is a pred or succ to a branch, and restrict its placement
1796 //even though the branch is not in the schedule
1797 /*int count = branches.size();
1798 for(std::vector<MSchedGraphNode*>::iterator B = branches.begin(), BE = branches.end();
1800 if((*I)->isPredecessor(*B)) {
1801 int diff = (*I)->getInEdge(*B).getIteDiff();
1802 int ES_Temp = (II+count-1) + (*B)->getLatency() - diff * II;
1803 DEBUG(std::cerr << "Diff: " << diff << " Cycle: " << (II+count)-1 << "\n");
1804 DEBUG(std::cerr << "Temp EarlyStart: " << ES_Temp << " Prev EarlyStart: " << EarlyStart << "\n");
1805 EarlyStart = std::max(EarlyStart, ES_Temp);
1809 if((*I)->isSuccessor(*B)) {
1810 int diff = (*B)->getInEdge(*I).getIteDiff();
1811 int LS_Temp = (II+count-1) - (*I)->getLatency() + diff * II;
1812 DEBUG(std::cerr << "Diff: " << diff << " Cycle: " << (II+count-1) << "\n");
1813 DEBUG(std::cerr << "Temp LateStart: " << LS_Temp << " Prev LateStart: " << LateStart << "\n");
1814 LateStart = std::min(LateStart, LS_Temp);
1821 //Check if the node has no pred or successors and set Early Start to its ASAP
1822 if(!hasSucc
&& !hasPred
)
1823 EarlyStart
= nodeToAttributesMap
.find(*I
)->second
.ASAP
;
1825 DEBUG(std::cerr
<< "Has Successors: " << hasSucc
<< ", Has Pred: " << hasPred
<< "\n");
1826 DEBUG(std::cerr
<< "EarlyStart: " << EarlyStart
<< ", LateStart: " << LateStart
<< "\n");
1828 //Now, try to schedule this node depending upon its pred and successor in the schedule
1830 if(!hasSucc
&& hasPred
)
1831 success
= scheduleNode(*I
, EarlyStart
, (EarlyStart
+ II
-1));
1832 else if(!hasPred
&& hasSucc
)
1833 success
= scheduleNode(*I
, LateStart
, (LateStart
- II
+1));
1834 else if(hasPred
&& hasSucc
) {
1835 if(EarlyStart
> LateStart
) {
1837 //LateStart = EarlyStart;
1838 DEBUG(std::cerr
<< "Early Start can not be later then the late start cycle, schedule fails\n");
1841 success
= scheduleNode(*I
, EarlyStart
, std::min(LateStart
, (EarlyStart
+ II
-1)));
1844 success
= scheduleNode(*I
, EarlyStart
, EarlyStart
+ II
- 1);
1855 DEBUG(std::cerr
<< "Constructing Schedule Kernel\n");
1856 success
= schedule
.constructKernel(II
, branches
, indVarInstrs
[BB
]);
1857 DEBUG(std::cerr
<< "Done Constructing Schedule Kernel\n");
1862 DEBUG(std::cerr
<< "Final II: " << II
<< "\n");
1867 DEBUG(std::cerr
<< "Maximum II reached, giving up\n");
1871 assert(II
< capII
&& "The II should not exceed the original loop number of cycles");
1877 bool ModuloSchedulingPass::scheduleNode(MSchedGraphNode
*node
,
1878 int start
, int end
) {
1879 bool success
= false;
1881 DEBUG(std::cerr
<< *node
<< " (Start Cycle: " << start
<< ", End Cycle: " << end
<< ")\n");
1883 //Make sure start and end are not negative
1891 bool forward
= true;
1895 bool increaseSC
= true;
1903 increaseSC
= schedule
.insert(node
, cycle
);
1908 //Increment cycle to try again
1911 DEBUG(std::cerr
<< "Increase cycle: " << cycle
<< "\n");
1917 DEBUG(std::cerr
<< "Decrease cycle: " << cycle
<< "\n");
1926 void ModuloSchedulingPass::writePrologues(std::vector
<MachineBasicBlock
*> &prologues
, MachineBasicBlock
*origBB
, std::vector
<BasicBlock
*> &llvm_prologues
, std::map
<const Value
*, std::pair
<const MachineInstr
*, int> > &valuesToSave
, std::map
<Value
*, std::map
<int, Value
*> > &newValues
, std::map
<Value
*, MachineBasicBlock
*> &newValLocation
) {
1928 //Keep a map to easily know whats in the kernel
1929 std::map
<int, std::set
<const MachineInstr
*> > inKernel
;
1930 int maxStageCount
= 0;
1932 //Keep a map of new values we consumed in case they need to be added back
1933 std::map
<Value
*, std::map
<int, Value
*> > consumedValues
;
1935 MSchedGraphNode
*branch
= 0;
1936 MSchedGraphNode
*BAbranch
= 0;
1938 DEBUG(schedule
.print(std::cerr
));
1940 std::vector
<MSchedGraphNode
*> branches
;
1942 for(MSSchedule::kernel_iterator I
= schedule
.kernel_begin(), E
= schedule
.kernel_end(); I
!= E
; ++I
) {
1943 maxStageCount
= std::max(maxStageCount
, I
->second
);
1945 //Put int the map so we know what instructions in each stage are in the kernel
1946 DEBUG(std::cerr
<< "Inserting instruction " << *(I
->first
) << " into map at stage " << I
->second
<< "\n");
1947 inKernel
[I
->second
].insert(I
->first
);
1950 //Get target information to look at machine operands
1951 const TargetInstrInfo
*mii
= target
.getInstrInfo();
1953 //Now write the prologues
1954 for(int i
= 0; i
< maxStageCount
; ++i
) {
1955 BasicBlock
*llvmBB
= new BasicBlock("PROLOGUE", (Function
*) (origBB
->getBasicBlock()->getParent()));
1956 MachineBasicBlock
*machineBB
= new MachineBasicBlock(llvmBB
);
1958 DEBUG(std::cerr
<< "i=" << i
<< "\n");
1959 for(int j
= i
; j
>= 0; --j
) {
1960 for(MachineBasicBlock::const_iterator MI
= origBB
->begin(), ME
= origBB
->end(); ME
!= MI
; ++MI
) {
1961 if(inKernel
[j
].count(&*MI
)) {
1962 MachineInstr
*instClone
= MI
->clone();
1963 machineBB
->push_back(instClone
);
1965 //If its a branch, insert a nop
1966 if(mii
->isBranch(instClone
->getOpcode()))
1967 BuildMI(machineBB
, V9::NOP
, 0);
1970 DEBUG(std::cerr
<< "Cloning: " << *MI
<< "\n");
1972 //After cloning, we may need to save the value that this instruction defines
1973 for(unsigned opNum
=0; opNum
< MI
->getNumOperands(); ++opNum
) {
1976 //get machine operand
1977 MachineOperand
&mOp
= instClone
->getOperand(opNum
);
1978 if(mOp
.getType() == MachineOperand::MO_VirtualRegister
&& mOp
.isDef()) {
1980 //Check if this is a value we should save
1981 if(valuesToSave
.count(mOp
.getVRegValue())) {
1982 //Save copy in tmpInstruction
1983 tmp
= new TmpInstruction(mOp
.getVRegValue());
1985 //Add TmpInstruction to safe LLVM Instruction MCFI
1986 MachineCodeForInstruction
& tempMvec
= MachineCodeForInstruction::get(defaultInst
);
1987 tempMvec
.addTemp((Value
*) tmp
);
1989 DEBUG(std::cerr
<< "Value: " << *(mOp
.getVRegValue()) << " New Value: " << *tmp
<< " Stage: " << i
<< "\n");
1991 newValues
[mOp
.getVRegValue()][i
]= tmp
;
1992 newValLocation
[tmp
] = machineBB
;
1994 DEBUG(std::cerr
<< "Machine Instr Operands: " << *(mOp
.getVRegValue()) << ", 0, " << *tmp
<< "\n");
1996 //Create machine instruction and put int machineBB
1997 MachineInstr
*saveValue
;
1998 if(mOp
.getVRegValue()->getType() == Type::FloatTy
)
1999 saveValue
= BuildMI(machineBB
, V9::FMOVS
, 3).addReg(mOp
.getVRegValue()).addRegDef(tmp
);
2000 else if(mOp
.getVRegValue()->getType() == Type::DoubleTy
)
2001 saveValue
= BuildMI(machineBB
, V9::FMOVD
, 3).addReg(mOp
.getVRegValue()).addRegDef(tmp
);
2003 saveValue
= BuildMI(machineBB
, V9::ORr
, 3).addReg(mOp
.getVRegValue()).addImm(0).addRegDef(tmp
);
2006 DEBUG(std::cerr
<< "Created new machine instr: " << *saveValue
<< "\n");
2010 //We may also need to update the value that we use if its from an earlier prologue
2012 if(mOp
.getType() == MachineOperand::MO_VirtualRegister
&& mOp
.isUse()) {
2013 if(newValues
.count(mOp
.getVRegValue())) {
2014 if(newValues
[mOp
.getVRegValue()].count(i
-1)) {
2015 Value
*oldV
= mOp
.getVRegValue();
2016 DEBUG(std::cerr
<< "Replaced this value: " << mOp
.getVRegValue() << " With:" << (newValues
[mOp
.getVRegValue()][i
-1]) << "\n");
2017 //Update the operand with the right value
2018 mOp
.setValueReg(newValues
[mOp
.getVRegValue()][i
-1]);
2020 //Remove this value since we have consumed it
2021 //NOTE: Should this only be done if j != maxStage?
2022 consumedValues
[oldV
][i
-1] = (newValues
[oldV
][i
-1]);
2023 DEBUG(std::cerr
<< "Deleted value: " << consumedValues
[oldV
][i
-1] << "\n");
2024 newValues
[oldV
].erase(i
-1);
2028 if(consumedValues
.count(mOp
.getVRegValue()))
2029 assert(!consumedValues
[mOp
.getVRegValue()].count(i
-1) && "Found a case where we need the value");
2038 /*for(std::vector<MSchedGraphNode*>::iterator BR = branches.begin(), BE = branches.end(); BR != BE; ++BR) {
2040 //Stick in branch at the end
2041 machineBB->push_back((*BR)->getInst()->clone());
2044 BuildMI(machineBB, V9::NOP, 0);
2048 (((MachineBasicBlock
*)origBB
)->getParent())->getBasicBlockList().push_back(machineBB
);
2049 prologues
.push_back(machineBB
);
2050 llvm_prologues
.push_back(llvmBB
);
2054 void ModuloSchedulingPass::writeEpilogues(std::vector
<MachineBasicBlock
*> &epilogues
, const MachineBasicBlock
*origBB
, std::vector
<BasicBlock
*> &llvm_epilogues
, std::map
<const Value
*, std::pair
<const MachineInstr
*, int> > &valuesToSave
, std::map
<Value
*, std::map
<int, Value
*> > &newValues
,std::map
<Value
*, MachineBasicBlock
*> &newValLocation
, std::map
<Value
*, std::map
<int, Value
*> > &kernelPHIs
) {
2056 std::map
<int, std::set
<const MachineInstr
*> > inKernel
;
2058 for(MSSchedule::kernel_iterator I
= schedule
.kernel_begin(), E
= schedule
.kernel_end(); I
!= E
; ++I
) {
2060 //Ignore the branch, we will handle this separately
2061 //if(I->first->isBranch())
2064 //Put int the map so we know what instructions in each stage are in the kernel
2065 inKernel
[I
->second
].insert(I
->first
);
2068 std::map
<Value
*, Value
*> valPHIs
;
2070 //some debug stuff, will remove later
2071 DEBUG(for(std::map
<Value
*, std::map
<int, Value
*> >::iterator V
= newValues
.begin(), E
= newValues
.end(); V
!=E
; ++V
) {
2072 std::cerr
<< "Old Value: " << *(V
->first
) << "\n";
2073 for(std::map
<int, Value
*>::iterator I
= V
->second
.begin(), IE
= V
->second
.end(); I
!= IE
; ++I
)
2074 std::cerr
<< "Stage: " << I
->first
<< " Value: " << *(I
->second
) << "\n";
2077 //some debug stuff, will remove later
2078 DEBUG(for(std::map
<Value
*, std::map
<int, Value
*> >::iterator V
= kernelPHIs
.begin(), E
= kernelPHIs
.end(); V
!=E
; ++V
) {
2079 std::cerr
<< "Old Value: " << *(V
->first
) << "\n";
2080 for(std::map
<int, Value
*>::iterator I
= V
->second
.begin(), IE
= V
->second
.end(); I
!= IE
; ++I
)
2081 std::cerr
<< "Stage: " << I
->first
<< " Value: " << *(I
->second
) << "\n";
2084 //Now write the epilogues
2085 for(int i
= schedule
.getMaxStage()-1; i
>= 0; --i
) {
2086 BasicBlock
*llvmBB
= new BasicBlock("EPILOGUE", (Function
*) (origBB
->getBasicBlock()->getParent()));
2087 MachineBasicBlock
*machineBB
= new MachineBasicBlock(llvmBB
);
2089 DEBUG(std::cerr
<< " Epilogue #: " << i
<< "\n");
2092 std::map
<Value
*, int> inEpilogue
;
2094 for(MachineBasicBlock::const_iterator MI
= origBB
->begin(), ME
= origBB
->end(); ME
!= MI
; ++MI
) {
2095 for(int j
=schedule
.getMaxStage(); j
> i
; --j
) {
2096 if(inKernel
[j
].count(&*MI
)) {
2097 DEBUG(std::cerr
<< "Cloning instruction " << *MI
<< "\n");
2098 MachineInstr
*clone
= MI
->clone();
2100 //Update operands that need to use the result from the phi
2101 for(unsigned opNum
=0; opNum
< clone
->getNumOperands(); ++opNum
) {
2102 //get machine operand
2103 const MachineOperand
&mOp
= clone
->getOperand(opNum
);
2105 if((mOp
.getType() == MachineOperand::MO_VirtualRegister
&& mOp
.isUse())) {
2107 DEBUG(std::cerr
<< "Writing PHI for " << (mOp
.getVRegValue()) << "\n");
2109 //If this is the last instructions for the max iterations ago, don't update operands
2110 if(inEpilogue
.count(mOp
.getVRegValue()))
2111 if(inEpilogue
[mOp
.getVRegValue()] == i
)
2114 //Quickly write appropriate phis for this operand
2115 if(newValues
.count(mOp
.getVRegValue())) {
2116 if(newValues
[mOp
.getVRegValue()].count(i
)) {
2117 Instruction
*tmp
= new TmpInstruction(newValues
[mOp
.getVRegValue()][i
]);
2119 //Get machine code for this instruction
2120 MachineCodeForInstruction
& tempMvec
= MachineCodeForInstruction::get(defaultInst
);
2121 tempMvec
.addTemp((Value
*) tmp
);
2123 //assert of no kernelPHI for this value
2124 assert(kernelPHIs
[mOp
.getVRegValue()][i
] !=0 && "Must have final kernel phi to construct epilogue phi");
2126 MachineInstr
*saveValue
= BuildMI(machineBB
, V9::PHI
, 3).addReg(newValues
[mOp
.getVRegValue()][i
]).addReg(kernelPHIs
[mOp
.getVRegValue()][i
]).addRegDef(tmp
);
2127 DEBUG(std::cerr
<< "Resulting PHI: " << *saveValue
<< "\n");
2128 valPHIs
[mOp
.getVRegValue()] = tmp
;
2132 if(valPHIs
.count(mOp
.getVRegValue())) {
2133 //Update the operand in the cloned instruction
2134 clone
->getOperand(opNum
).setValueReg(valPHIs
[mOp
.getVRegValue()]);
2137 else if((mOp
.getType() == MachineOperand::MO_VirtualRegister
&& mOp
.isDef())) {
2138 inEpilogue
[mOp
.getVRegValue()] = i
;
2141 machineBB
->push_back(clone
);
2146 (((MachineBasicBlock
*)origBB
)->getParent())->getBasicBlockList().push_back(machineBB
);
2147 epilogues
.push_back(machineBB
);
2148 llvm_epilogues
.push_back(llvmBB
);
2150 DEBUG(std::cerr
<< "EPILOGUE #" << i
<< "\n");
2151 DEBUG(machineBB
->print(std::cerr
));
2155 void ModuloSchedulingPass::writeKernel(BasicBlock
*llvmBB
, MachineBasicBlock
*machineBB
, std::map
<const Value
*, std::pair
<const MachineInstr
*, int> > &valuesToSave
, std::map
<Value
*, std::map
<int, Value
*> > &newValues
, std::map
<Value
*, MachineBasicBlock
*> &newValLocation
, std::map
<Value
*, std::map
<int, Value
*> > &kernelPHIs
) {
2157 //Keep track of operands that are read and saved from a previous iteration. The new clone
2158 //instruction will use the result of the phi instead.
2159 std::map
<Value
*, Value
*> finalPHIValue
;
2160 std::map
<Value
*, Value
*> kernelValue
;
2162 //Branches are a special case
2163 std::vector
<MachineInstr
*> branches
;
2165 //Get target information to look at machine operands
2166 const TargetInstrInfo
*mii
= target
.getInstrInfo();
2168 //Create TmpInstructions for the final phis
2169 for(MSSchedule::kernel_iterator I
= schedule
.kernel_begin(), E
= schedule
.kernel_end(); I
!= E
; ++I
) {
2171 DEBUG(std::cerr
<< "Stage: " << I
->second
<< " Inst: " << *(I
->first
) << "\n";);
2173 /*if(I->first->isBranch()) {
2175 const MachineInstr *inst = I->first->getInst();
2176 MachineInstr *instClone = inst->clone();
2177 branches.push_back(instClone);
2182 const MachineInstr
*inst
= I
->first
;
2183 MachineInstr
*instClone
= inst
->clone();
2185 //Insert into machine basic block
2186 machineBB
->push_back(instClone
);
2188 if(mii
->isBranch(instClone
->getOpcode()))
2189 BuildMI(machineBB
, V9::NOP
, 0);
2191 DEBUG(std::cerr
<< "Cloned Inst: " << *instClone
<< "\n");
2193 //Loop over Machine Operands
2194 for(unsigned i
=0; i
< inst
->getNumOperands(); ++i
) {
2195 //get machine operand
2196 const MachineOperand
&mOp
= inst
->getOperand(i
);
2198 if(I
->second
!= 0) {
2199 if(mOp
.getType() == MachineOperand::MO_VirtualRegister
&& mOp
.isUse()) {
2201 //Check to see where this operand is defined if this instruction is from max stage
2202 if(I
->second
== schedule
.getMaxStage()) {
2203 DEBUG(std::cerr
<< "VREG: " << *(mOp
.getVRegValue()) << "\n");
2206 //If its in the value saved, we need to create a temp instruction and use that instead
2207 if(valuesToSave
.count(mOp
.getVRegValue())) {
2209 //Check if we already have a final PHI value for this
2210 if(!finalPHIValue
.count(mOp
.getVRegValue())) {
2211 TmpInstruction
*tmp
= new TmpInstruction(mOp
.getVRegValue());
2213 //Get machine code for this instruction
2214 MachineCodeForInstruction
& tempMvec
= MachineCodeForInstruction::get(defaultInst
);
2215 tempMvec
.addTemp((Value
*) tmp
);
2217 //Update the operand in the cloned instruction
2218 instClone
->getOperand(i
).setValueReg(tmp
);
2220 //save this as our final phi
2221 finalPHIValue
[mOp
.getVRegValue()] = tmp
;
2222 newValLocation
[tmp
] = machineBB
;
2225 //Use the previous final phi value
2226 instClone
->getOperand(i
).setValueReg(finalPHIValue
[mOp
.getVRegValue()]);
2231 if(I
->second
!= schedule
.getMaxStage()) {
2232 if(mOp
.getType() == MachineOperand::MO_VirtualRegister
&& mOp
.isDef()) {
2233 if(valuesToSave
.count(mOp
.getVRegValue())) {
2235 TmpInstruction
*tmp
= new TmpInstruction(mOp
.getVRegValue());
2237 //Get machine code for this instruction
2238 MachineCodeForInstruction
& tempVec
= MachineCodeForInstruction::get(defaultInst
);
2239 tempVec
.addTemp((Value
*) tmp
);
2241 //Create new machine instr and put in MBB
2242 MachineInstr
*saveValue
;
2243 if(mOp
.getVRegValue()->getType() == Type::FloatTy
)
2244 saveValue
= BuildMI(machineBB
, V9::FMOVS
, 3).addReg(mOp
.getVRegValue()).addRegDef(tmp
);
2245 else if(mOp
.getVRegValue()->getType() == Type::DoubleTy
)
2246 saveValue
= BuildMI(machineBB
, V9::FMOVD
, 3).addReg(mOp
.getVRegValue()).addRegDef(tmp
);
2248 saveValue
= BuildMI(machineBB
, V9::ORr
, 3).addReg(mOp
.getVRegValue()).addImm(0).addRegDef(tmp
);
2251 //Save for future cleanup
2252 kernelValue
[mOp
.getVRegValue()] = tmp
;
2253 newValLocation
[tmp
] = machineBB
;
2254 kernelPHIs
[mOp
.getVRegValue()][schedule
.getMaxStage()-1] = tmp
;
2263 for(std::vector
<MachineInstr
*>::iterator I
= branches
.begin(), E
= branches
.end(); I
!= E
; ++I
) {
2264 machineBB
->push_back(*I
);
2265 BuildMI(machineBB
, V9::NOP
, 0);
2269 DEBUG(std::cerr
<< "KERNEL before PHIs\n");
2270 DEBUG(machineBB
->print(std::cerr
));
2273 //Loop over each value we need to generate phis for
2274 for(std::map
<Value
*, std::map
<int, Value
*> >::iterator V
= newValues
.begin(),
2275 E
= newValues
.end(); V
!= E
; ++V
) {
2278 DEBUG(std::cerr
<< "Writing phi for" << *(V
->first
));
2279 DEBUG(std::cerr
<< "\nMap of Value* for this phi\n");
2280 DEBUG(for(std::map
<int, Value
*>::iterator I
= V
->second
.begin(),
2281 IE
= V
->second
.end(); I
!= IE
; ++I
) {
2282 std::cerr
<< "Stage: " << I
->first
;
2283 std::cerr
<< " Value: " << *(I
->second
) << "\n";
2286 //If we only have one current iteration live, its safe to set lastPhi = to kernel value
2287 if(V
->second
.size() == 1) {
2288 assert(kernelValue
[V
->first
] != 0 && "Kernel value* must exist to create phi");
2289 MachineInstr
*saveValue
= BuildMI(*machineBB
, machineBB
->begin(),V9::PHI
, 3).addReg(V
->second
.begin()->second
).addReg(kernelValue
[V
->first
]).addRegDef(finalPHIValue
[V
->first
]);
2290 DEBUG(std::cerr
<< "Resulting PHI (one live): " << *saveValue
<< "\n");
2291 kernelPHIs
[V
->first
][V
->second
.begin()->first
] = kernelValue
[V
->first
];
2292 DEBUG(std::cerr
<< "Put kernel phi in at stage: " << schedule
.getMaxStage()-1 << " (map stage = " << V
->second
.begin()->first
<< ")\n");
2296 //Keep track of last phi created.
2297 Instruction
*lastPhi
= 0;
2300 //Loop over the the map backwards to generate phis
2301 for(std::map
<int, Value
*>::reverse_iterator I
= V
->second
.rbegin(), IE
= V
->second
.rend();
2304 if(count
< (V
->second
).size()) {
2306 lastPhi
= new TmpInstruction(I
->second
);
2308 //Get machine code for this instruction
2309 MachineCodeForInstruction
& tempMvec
= MachineCodeForInstruction::get(defaultInst
);
2310 tempMvec
.addTemp((Value
*) lastPhi
);
2312 MachineInstr
*saveValue
= BuildMI(*machineBB
, machineBB
->begin(), V9::PHI
, 3).addReg(kernelValue
[V
->first
]).addReg(I
->second
).addRegDef(lastPhi
);
2313 DEBUG(std::cerr
<< "Resulting PHI: " << *saveValue
<< "\n");
2314 newValLocation
[lastPhi
] = machineBB
;
2317 Instruction
*tmp
= new TmpInstruction(I
->second
);
2319 //Get machine code for this instruction
2320 MachineCodeForInstruction
& tempMvec
= MachineCodeForInstruction::get(defaultInst
);
2321 tempMvec
.addTemp((Value
*) tmp
);
2324 MachineInstr
*saveValue
= BuildMI(*machineBB
, machineBB
->begin(), V9::PHI
, 3).addReg(lastPhi
).addReg(I
->second
).addRegDef(tmp
);
2325 DEBUG(std::cerr
<< "Resulting PHI: " << *saveValue
<< "\n");
2327 kernelPHIs
[V
->first
][I
->first
] = lastPhi
;
2328 newValLocation
[lastPhi
] = machineBB
;
2333 //The resulting value must be the Value* we created earlier
2334 assert(lastPhi
!= 0 && "Last phi is NULL!\n");
2335 MachineInstr
*saveValue
= BuildMI(*machineBB
, machineBB
->begin(), V9::PHI
, 3).addReg(lastPhi
).addReg(I
->second
).addRegDef(finalPHIValue
[V
->first
]);
2336 DEBUG(std::cerr
<< "Resulting PHI: " << *saveValue
<< "\n");
2337 kernelPHIs
[V
->first
][I
->first
] = finalPHIValue
[V
->first
];
2346 DEBUG(std::cerr
<< "KERNEL after PHIs\n");
2347 DEBUG(machineBB
->print(std::cerr
));
2351 void ModuloSchedulingPass::removePHIs(const MachineBasicBlock
*origBB
, std::vector
<MachineBasicBlock
*> &prologues
, std::vector
<MachineBasicBlock
*> &epilogues
, MachineBasicBlock
*kernelBB
, std::map
<Value
*, MachineBasicBlock
*> &newValLocation
) {
2353 //Worklist to delete things
2354 std::vector
<std::pair
<MachineBasicBlock
*, MachineBasicBlock::iterator
> > worklist
;
2356 //Worklist of TmpInstructions that need to be added to a MCFI
2357 std::vector
<Instruction
*> addToMCFI
;
2359 //Worklist to add OR instructions to end of kernel so not to invalidate the iterator
2360 //std::vector<std::pair<Instruction*, Value*> > newORs;
2362 const TargetInstrInfo
*TMI
= target
.getInstrInfo();
2364 //Start with the kernel and for each phi insert a copy for the phi def and for each arg
2365 for(MachineBasicBlock::iterator I
= kernelBB
->begin(), E
= kernelBB
->end(); I
!= E
; ++I
) {
2367 DEBUG(std::cerr
<< "Looking at Instr: " << *I
<< "\n");
2368 //Get op code and check if its a phi
2369 if(I
->getOpcode() == V9::PHI
) {
2371 DEBUG(std::cerr
<< "Replacing PHI: " << *I
<< "\n");
2372 Instruction
*tmp
= 0;
2374 for(unsigned i
= 0; i
< I
->getNumOperands(); ++i
) {
2376 const MachineOperand
&mOp
= I
->getOperand(i
);
2377 assert(mOp
.getType() == MachineOperand::MO_VirtualRegister
&& "Should be a Value*\n");
2380 tmp
= new TmpInstruction(mOp
.getVRegValue());
2381 addToMCFI
.push_back(tmp
);
2384 //Now for all our arguments we read, OR to the new TmpInstruction that we created
2386 DEBUG(std::cerr
<< "Use: " << mOp
<< "\n");
2387 //Place a copy at the end of its BB but before the branches
2388 assert(newValLocation
.count(mOp
.getVRegValue()) && "We must know where this value is located\n");
2389 //Reverse iterate to find the branches, we can safely assume no instructions have been
2390 //put in the nop positions
2391 for(MachineBasicBlock::iterator inst
= --(newValLocation
[mOp
.getVRegValue()])->end(), endBB
= (newValLocation
[mOp
.getVRegValue()])->begin(); inst
!= endBB
; --inst
) {
2392 MachineOpCode opc
= inst
->getOpcode();
2393 if(TMI
->isBranch(opc
) || TMI
->isNop(opc
))
2396 if(mOp
.getVRegValue()->getType() == Type::FloatTy
)
2397 BuildMI(*(newValLocation
[mOp
.getVRegValue()]), ++inst
, V9::FMOVS
, 3).addReg(mOp
.getVRegValue()).addRegDef(tmp
);
2398 else if(mOp
.getVRegValue()->getType() == Type::DoubleTy
)
2399 BuildMI(*(newValLocation
[mOp
.getVRegValue()]), ++inst
, V9::FMOVD
, 3).addReg(mOp
.getVRegValue()).addRegDef(tmp
);
2401 BuildMI(*(newValLocation
[mOp
.getVRegValue()]), ++inst
, V9::ORr
, 3).addReg(mOp
.getVRegValue()).addImm(0).addRegDef(tmp
);
2410 //Remove the phi and replace it with an OR
2411 DEBUG(std::cerr
<< "Def: " << mOp
<< "\n");
2412 //newORs.push_back(std::make_pair(tmp, mOp.getVRegValue()));
2413 if(tmp
->getType() == Type::FloatTy
)
2414 BuildMI(*kernelBB
, I
, V9::FMOVS
, 3).addReg(tmp
).addRegDef(mOp
.getVRegValue());
2415 else if(tmp
->getType() == Type::DoubleTy
)
2416 BuildMI(*kernelBB
, I
, V9::FMOVD
, 3).addReg(tmp
).addRegDef(mOp
.getVRegValue());
2418 BuildMI(*kernelBB
, I
, V9::ORr
, 3).addReg(tmp
).addImm(0).addRegDef(mOp
.getVRegValue());
2421 worklist
.push_back(std::make_pair(kernelBB
, I
));
2431 //Add TmpInstructions to some MCFI
2432 if(addToMCFI
.size() > 0) {
2433 MachineCodeForInstruction
& tempMvec
= MachineCodeForInstruction::get(defaultInst
);
2434 for(unsigned x
= 0; x
< addToMCFI
.size(); ++x
) {
2435 tempMvec
.addTemp(addToMCFI
[x
]);
2441 //Remove phis from epilogue
2442 for(std::vector
<MachineBasicBlock
*>::iterator MB
= epilogues
.begin(), ME
= epilogues
.end(); MB
!= ME
; ++MB
) {
2443 for(MachineBasicBlock::iterator I
= (*MB
)->begin(), E
= (*MB
)->end(); I
!= E
; ++I
) {
2445 DEBUG(std::cerr
<< "Looking at Instr: " << *I
<< "\n");
2446 //Get op code and check if its a phi
2447 if(I
->getOpcode() == V9::PHI
) {
2448 Instruction
*tmp
= 0;
2450 for(unsigned i
= 0; i
< I
->getNumOperands(); ++i
) {
2452 const MachineOperand
&mOp
= I
->getOperand(i
);
2453 assert(mOp
.getType() == MachineOperand::MO_VirtualRegister
&& "Should be a Value*\n");
2456 tmp
= new TmpInstruction(mOp
.getVRegValue());
2457 addToMCFI
.push_back(tmp
);
2460 //Now for all our arguments we read, OR to the new TmpInstruction that we created
2462 DEBUG(std::cerr
<< "Use: " << mOp
<< "\n");
2463 //Place a copy at the end of its BB but before the branches
2464 assert(newValLocation
.count(mOp
.getVRegValue()) && "We must know where this value is located\n");
2465 //Reverse iterate to find the branches, we can safely assume no instructions have been
2466 //put in the nop positions
2467 for(MachineBasicBlock::iterator inst
= --(newValLocation
[mOp
.getVRegValue()])->end(), endBB
= (newValLocation
[mOp
.getVRegValue()])->begin(); inst
!= endBB
; --inst
) {
2468 MachineOpCode opc
= inst
->getOpcode();
2469 if(TMI
->isBranch(opc
) || TMI
->isNop(opc
))
2472 if(mOp
.getVRegValue()->getType() == Type::FloatTy
)
2473 BuildMI(*(newValLocation
[mOp
.getVRegValue()]), ++inst
, V9::FMOVS
, 3).addReg(mOp
.getVRegValue()).addRegDef(tmp
);
2474 else if(mOp
.getVRegValue()->getType() == Type::DoubleTy
)
2475 BuildMI(*(newValLocation
[mOp
.getVRegValue()]), ++inst
, V9::FMOVD
, 3).addReg(mOp
.getVRegValue()).addRegDef(tmp
);
2477 BuildMI(*(newValLocation
[mOp
.getVRegValue()]), ++inst
, V9::ORr
, 3).addReg(mOp
.getVRegValue()).addImm(0).addRegDef(tmp
);
2487 //Remove the phi and replace it with an OR
2488 DEBUG(std::cerr
<< "Def: " << mOp
<< "\n");
2489 if(tmp
->getType() == Type::FloatTy
)
2490 BuildMI(**MB
, I
, V9::FMOVS
, 3).addReg(tmp
).addRegDef(mOp
.getVRegValue());
2491 else if(tmp
->getType() == Type::DoubleTy
)
2492 BuildMI(**MB
, I
, V9::FMOVD
, 3).addReg(tmp
).addRegDef(mOp
.getVRegValue());
2494 BuildMI(**MB
, I
, V9::ORr
, 3).addReg(tmp
).addImm(0).addRegDef(mOp
.getVRegValue());
2496 worklist
.push_back(std::make_pair(*MB
,I
));
2507 if(addToMCFI
.size() > 0) {
2508 MachineCodeForInstruction
& tempMvec
= MachineCodeForInstruction::get(defaultInst
);
2509 for(unsigned x
= 0; x
< addToMCFI
.size(); ++x
) {
2510 tempMvec
.addTemp(addToMCFI
[x
]);
2516 for(std::vector
<std::pair
<MachineBasicBlock
*, MachineBasicBlock::iterator
> >::iterator I
= worklist
.begin(), E
= worklist
.end(); I
!= E
; ++I
) {
2518 DEBUG(std::cerr
<< "Deleting PHI " << *I
->second
<< "\n");
2519 I
->first
->erase(I
->second
);
2524 assert((addToMCFI
.size() == 0) && "We should have added all TmpInstructions to some MachineCodeForInstruction");
2528 void ModuloSchedulingPass::reconstructLoop(MachineBasicBlock
*BB
) {
2530 TIME_REGION(X
, "reconstructLoop");
2533 DEBUG(std::cerr
<< "Reconstructing Loop\n");
2535 //First find the value *'s that we need to "save"
2536 std::map
<const Value
*, std::pair
<const MachineInstr
*, int> > valuesToSave
;
2538 //Keep track of instructions we have already seen and their stage because
2539 //we don't want to "save" values if they are used in the kernel immediately
2540 std::map
<const MachineInstr
*, int> lastInstrs
;
2542 //Loop over kernel and only look at instructions from a stage > 0
2543 //Look at its operands and save values *'s that are read
2544 for(MSSchedule::kernel_iterator I
= schedule
.kernel_begin(), E
= schedule
.kernel_end(); I
!= E
; ++I
) {
2547 //For this instruction, get the Value*'s that it reads and put them into the set.
2548 //Assert if there is an operand of another type that we need to save
2549 const MachineInstr
*inst
= I
->first
;
2550 lastInstrs
[inst
] = I
->second
;
2552 for(unsigned i
=0; i
< inst
->getNumOperands(); ++i
) {
2553 //get machine operand
2554 const MachineOperand
&mOp
= inst
->getOperand(i
);
2556 if(mOp
.getType() == MachineOperand::MO_VirtualRegister
&& mOp
.isUse()) {
2557 //find the value in the map
2558 if (const Value
* srcI
= mOp
.getVRegValue()) {
2560 if(isa
<Constant
>(srcI
) || isa
<Argument
>(srcI
) || isa
<PHINode
>(srcI
))
2563 //Before we declare this Value* one that we should save
2564 //make sure its def is not of the same stage as this instruction
2565 //because it will be consumed before its used
2566 Instruction
*defInst
= (Instruction
*) srcI
;
2568 //Should we save this value?
2571 //Continue if not in the def map, loop invariant code does not need to be saved
2572 if(!defMap
.count(srcI
))
2575 MachineInstr
*defInstr
= defMap
[srcI
];
2578 if(lastInstrs
.count(defInstr
)) {
2579 if(lastInstrs
[defInstr
] == I
->second
) {
2586 valuesToSave
[srcI
] = std::make_pair(I
->first
, i
);
2590 if(mOp
.getType() != MachineOperand::MO_VirtualRegister
&& mOp
.isUse()) {
2591 assert("Our assumption is wrong. We have another type of register that needs to be saved\n");
2597 //The new loop will consist of one or more prologues, the kernel, and one or more epilogues.
2599 //Map to keep track of old to new values
2600 std::map
<Value
*, std::map
<int, Value
*> > newValues
;
2602 //Map to keep track of old to new values in kernel
2603 std::map
<Value
*, std::map
<int, Value
*> > kernelPHIs
;
2605 //Another map to keep track of what machine basic blocks these new value*s are in since
2606 //they have no llvm instruction equivalent
2607 std::map
<Value
*, MachineBasicBlock
*> newValLocation
;
2609 std::vector
<MachineBasicBlock
*> prologues
;
2610 std::vector
<BasicBlock
*> llvm_prologues
;
2614 if(schedule
.getMaxStage() != 0)
2615 writePrologues(prologues
, BB
, llvm_prologues
, valuesToSave
, newValues
, newValLocation
);
2617 //Print out epilogues and prologue
2618 DEBUG(for(std::vector
<MachineBasicBlock
*>::iterator I
= prologues
.begin(), E
= prologues
.end();
2620 std::cerr
<< "PROLOGUE\n";
2621 (*I
)->print(std::cerr
);
2624 BasicBlock
*llvmKernelBB
= new BasicBlock("Kernel", (Function
*) (BB
->getBasicBlock()->getParent()));
2625 MachineBasicBlock
*machineKernelBB
= new MachineBasicBlock(llvmKernelBB
);
2626 (((MachineBasicBlock
*)BB
)->getParent())->getBasicBlockList().push_back(machineKernelBB
);
2627 writeKernel(llvmKernelBB
, machineKernelBB
, valuesToSave
, newValues
, newValLocation
, kernelPHIs
);
2630 std::vector
<MachineBasicBlock
*> epilogues
;
2631 std::vector
<BasicBlock
*> llvm_epilogues
;
2634 if(schedule
.getMaxStage() != 0)
2635 writeEpilogues(epilogues
, BB
, llvm_epilogues
, valuesToSave
, newValues
, newValLocation
, kernelPHIs
);
2639 fixBranches(prologues
, llvm_prologues
, machineKernelBB
, llvmKernelBB
, epilogues
, llvm_epilogues
, BB
);
2642 removePHIs(BB
, prologues
, epilogues
, machineKernelBB
, newValLocation
);
2644 //Print out epilogues and prologue
2645 DEBUG(for(std::vector
<MachineBasicBlock
*>::iterator I
= prologues
.begin(), E
= prologues
.end();
2647 std::cerr
<< "PROLOGUE\n";
2648 (*I
)->print(std::cerr
);
2651 DEBUG(std::cerr
<< "KERNEL\n");
2652 DEBUG(machineKernelBB
->print(std::cerr
));
2654 DEBUG(for(std::vector
<MachineBasicBlock
*>::iterator I
= epilogues
.begin(), E
= epilogues
.end();
2656 std::cerr
<< "EPILOGUE\n";
2657 (*I
)->print(std::cerr
);
2661 DEBUG(std::cerr
<< "New Machine Function" << "\n");
2662 DEBUG(std::cerr
<< BB
->getParent() << "\n");
2667 void ModuloSchedulingPass::fixBranches(std::vector
<MachineBasicBlock
*> &prologues
, std::vector
<BasicBlock
*> &llvm_prologues
, MachineBasicBlock
*machineKernelBB
, BasicBlock
*llvmKernelBB
, std::vector
<MachineBasicBlock
*> &epilogues
, std::vector
<BasicBlock
*> &llvm_epilogues
, MachineBasicBlock
*BB
) {
2669 const TargetInstrInfo
*TMI
= target
.getInstrInfo();
2671 if(schedule
.getMaxStage() != 0) {
2672 //Fix prologue branches
2673 for(unsigned I
= 0; I
< prologues
.size(); ++I
) {
2675 //Find terminator since getFirstTerminator does not work!
2676 for(MachineBasicBlock::reverse_iterator mInst
= prologues
[I
]->rbegin(), mInstEnd
= prologues
[I
]->rend(); mInst
!= mInstEnd
; ++mInst
) {
2677 MachineOpCode OC
= mInst
->getOpcode();
2678 //If its a branch update its branchto
2679 if(TMI
->isBranch(OC
)) {
2680 for(unsigned opNum
= 0; opNum
< mInst
->getNumOperands(); ++opNum
) {
2681 MachineOperand
&mOp
= mInst
->getOperand(opNum
);
2682 if (mOp
.getType() == MachineOperand::MO_PCRelativeDisp
) {
2683 //Check if we are branching to the kernel, if not branch to epilogue
2684 if(mOp
.getVRegValue() == BB
->getBasicBlock()) {
2685 if(I
== prologues
.size()-1)
2686 mOp
.setValueReg(llvmKernelBB
);
2688 mOp
.setValueReg(llvm_prologues
[I
+1]);
2691 mOp
.setValueReg(llvm_epilogues
[(llvm_epilogues
.size()-1-I
)]);
2696 DEBUG(std::cerr
<< "New Prologue Branch: " << *mInst
<< "\n");
2701 //Update llvm basic block with our new branch instr
2702 DEBUG(std::cerr
<< BB
->getBasicBlock()->getTerminator() << "\n");
2703 const BranchInst
*branchVal
= dyn_cast
<BranchInst
>(BB
->getBasicBlock()->getTerminator());
2705 if(I
== prologues
.size()-1) {
2706 TerminatorInst
*newBranch
= new BranchInst(llvmKernelBB
,
2707 llvm_epilogues
[(llvm_epilogues
.size()-1-I
)],
2708 branchVal
->getCondition(),
2712 TerminatorInst
*newBranch
= new BranchInst(llvm_prologues
[I
+1],
2713 llvm_epilogues
[(llvm_epilogues
.size()-1-I
)],
2714 branchVal
->getCondition(),
2720 Value
*origBranchExit
= 0;
2722 //Fix up kernel machine branches
2723 for(MachineBasicBlock::reverse_iterator mInst
= machineKernelBB
->rbegin(), mInstEnd
= machineKernelBB
->rend(); mInst
!= mInstEnd
; ++mInst
) {
2724 MachineOpCode OC
= mInst
->getOpcode();
2725 if(TMI
->isBranch(OC
)) {
2726 for(unsigned opNum
= 0; opNum
< mInst
->getNumOperands(); ++opNum
) {
2727 MachineOperand
&mOp
= mInst
->getOperand(opNum
);
2729 if(mOp
.getType() == MachineOperand::MO_PCRelativeDisp
) {
2730 if(mOp
.getVRegValue() == BB
->getBasicBlock())
2731 mOp
.setValueReg(llvmKernelBB
);
2733 if(llvm_epilogues
.size() > 0) {
2734 assert(origBranchExit
== 0 && "There should only be one branch out of the loop");
2736 origBranchExit
= mOp
.getVRegValue();
2737 mOp
.setValueReg(llvm_epilogues
[0]);
2740 origBranchExit
= mOp
.getVRegValue();
2746 //Update kernelLLVM branches
2747 const BranchInst
*branchVal
= dyn_cast
<BranchInst
>(BB
->getBasicBlock()->getTerminator());
2749 assert(origBranchExit
!= 0 && "We must have the original bb the kernel exits to!");
2751 if(epilogues
.size() > 0) {
2752 TerminatorInst
*newBranch
= new BranchInst(llvmKernelBB
,
2754 branchVal
->getCondition(),
2758 BasicBlock
*origBBExit
= dyn_cast
<BasicBlock
>(origBranchExit
);
2759 assert(origBBExit
!=0 && "Original exit basic block must be set");
2760 TerminatorInst
*newBranch
= new BranchInst(llvmKernelBB
,
2762 branchVal
->getCondition(),
2766 if(schedule
.getMaxStage() != 0) {
2767 //Lastly add unconditional branches for the epilogues
2768 for(unsigned I
= 0; I
< epilogues
.size(); ++I
) {
2770 //Now since we don't have fall throughs, add a unconditional branch to the next prologue
2771 if(I
!= epilogues
.size()-1) {
2772 BuildMI(epilogues
[I
], V9::BA
, 1).addPCDisp(llvm_epilogues
[I
+1]);
2773 //Add unconditional branch to end of epilogue
2774 TerminatorInst
*newBranch
= new BranchInst(llvm_epilogues
[I
+1],
2779 BuildMI(epilogues
[I
], V9::BA
, 1).addPCDisp(origBranchExit
);
2782 //Update last epilogue exit branch
2783 BranchInst
*branchVal
= (BranchInst
*) dyn_cast
<BranchInst
>(BB
->getBasicBlock()->getTerminator());
2784 //Find where we are supposed to branch to
2785 BasicBlock
*nextBlock
= 0;
2786 for(unsigned j
=0; j
<branchVal
->getNumSuccessors(); ++j
) {
2787 if(branchVal
->getSuccessor(j
) != BB
->getBasicBlock())
2788 nextBlock
= branchVal
->getSuccessor(j
);
2791 assert((nextBlock
!= 0) && "Next block should not be null!");
2792 TerminatorInst
*newBranch
= new BranchInst(nextBlock
, llvm_epilogues
[I
]);
2795 BuildMI(epilogues
[I
], V9::NOP
, 0);
2800 //FIX UP Machine BB entry!!
2801 //We are looking at the predecesor of our loop basic block and we want to change its ba instruction
2804 //Find all llvm basic blocks that branch to the loop entry and change to our first prologue.
2805 const BasicBlock
*llvmBB
= BB
->getBasicBlock();
2807 std::vector
<const BasicBlock
*>Preds (pred_begin(llvmBB
), pred_end(llvmBB
));
2809 //for(pred_const_iterator P = pred_begin(llvmBB), PE = pred_end(llvmBB); P != PE; ++PE) {
2810 for(std::vector
<const BasicBlock
*>::iterator P
= Preds
.begin(), PE
= Preds
.end(); P
!= PE
; ++P
) {
2814 DEBUG(std::cerr
<< "Found our entry BB\n");
2815 //Get the Terminator instruction for this basic block and print it out
2816 DEBUG(std::cerr
<< *((*P
)->getTerminator()) << "\n");
2817 //Update the terminator
2818 TerminatorInst
*term
= ((BasicBlock
*)*P
)->getTerminator();
2819 for(unsigned i
=0; i
< term
->getNumSuccessors(); ++i
) {
2820 if(term
->getSuccessor(i
) == llvmBB
) {
2821 DEBUG(std::cerr
<< "Replacing successor bb\n");
2822 if(llvm_prologues
.size() > 0) {
2823 term
->setSuccessor(i
, llvm_prologues
[0]);
2824 //Also update its corresponding machine instruction
2825 MachineCodeForInstruction
& tempMvec
=
2826 MachineCodeForInstruction::get(term
);
2827 for (unsigned j
= 0; j
< tempMvec
.size(); j
++) {
2828 MachineInstr
*temp
= tempMvec
[j
];
2829 MachineOpCode opc
= temp
->getOpcode();
2830 if(TMI
->isBranch(opc
)) {
2831 DEBUG(std::cerr
<< *temp
<< "\n");
2833 for(unsigned opNum
= 0; opNum
< temp
->getNumOperands(); ++opNum
) {
2834 MachineOperand
&mOp
= temp
->getOperand(opNum
);
2835 if (mOp
.getType() == MachineOperand::MO_PCRelativeDisp
) {
2836 mOp
.setValueReg(llvm_prologues
[0]);
2843 term
->setSuccessor(i
, llvmKernelBB
);
2844 //Also update its corresponding machine instruction
2845 MachineCodeForInstruction
& tempMvec
=
2846 MachineCodeForInstruction::get(term
);
2847 for (unsigned j
= 0; j
< tempMvec
.size(); j
++) {
2848 MachineInstr
*temp
= tempMvec
[j
];
2849 MachineOpCode opc
= temp
->getOpcode();
2850 if(TMI
->isBranch(opc
)) {
2851 DEBUG(std::cerr
<< *temp
<< "\n");
2853 for(unsigned opNum
= 0; opNum
< temp
->getNumOperands(); ++opNum
) {
2854 MachineOperand
&mOp
= temp
->getOperand(opNum
);
2855 if (mOp
.getType() == MachineOperand::MO_PCRelativeDisp
) {
2856 mOp
.setValueReg(llvmKernelBB
);
2869 //BB->getParent()->getBasicBlockList().erase(BB);