Fixed bug in searchPath function for finding nodes between two recurrences.
[llvm-complete.git] / lib / Target / SparcV9 / ModuloScheduling / ModuloScheduling.cpp
blob4e57c65aae1098ace8342d95029017362317af34
1 //===-- ModuloScheduling.cpp - ModuloScheduling ----------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file was developed by the LLVM research group and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This ModuloScheduling pass is based on the Swing Modulo Scheduling
11 // algorithm.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "ModuloSched"
17 #include "ModuloScheduling.h"
18 #include "llvm/Constants.h"
19 #include "llvm/Instructions.h"
20 #include "llvm/Function.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/Passes.h"
23 #include "llvm/Support/CFG.h"
24 #include "llvm/Target/TargetSchedInfo.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/GraphWriter.h"
27 #include "llvm/ADT/SCCIterator.h"
28 #include "llvm/ADT/StringExtras.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/Support/Timer.h"
31 #include <cmath>
32 #include <algorithm>
33 #include <fstream>
34 #include <sstream>
35 #include <utility>
36 #include <vector>
37 #include "../MachineCodeForInstruction.h"
38 #include "../SparcV9TmpInstr.h"
39 #include "../SparcV9Internals.h"
40 #include "../SparcV9RegisterInfo.h"
41 using namespace llvm;
43 /// Create ModuloSchedulingPass
44 ///
45 FunctionPass *llvm::createModuloSchedulingPass(TargetMachine & targ) {
46 DEBUG(std::cerr << "Created ModuloSchedulingPass\n");
47 return new ModuloSchedulingPass(targ);
51 //Graph Traits for printing out the dependence graph
52 template<typename GraphType>
53 static void WriteGraphToFile(std::ostream &O, const std::string &GraphName,
54 const GraphType &GT) {
55 std::string Filename = GraphName + ".dot";
56 O << "Writing '" << Filename << "'...";
57 std::ofstream F(Filename.c_str());
59 if (F.good())
60 WriteGraph(F, GT);
61 else
62 O << " error opening file for writing!";
63 O << "\n";
67 #if 1
68 #define TIME_REGION(VARNAME, DESC) \
69 NamedRegionTimer VARNAME(DESC)
70 #else
71 #define TIME_REGION(VARNAME, DESC)
72 #endif
75 //Graph Traits for printing out the dependence graph
76 namespace llvm {
78 //Loop statistics
79 Statistic<> ValidLoops("modulosched-validLoops", "Number of candidate loops modulo-scheduled");
80 Statistic<> JumboBB("modulosched-jumboBB", "Basic Blocks with more then 100 instructions");
81 Statistic<> LoopsWithCalls("modulosched-loopCalls", "Loops with calls");
82 Statistic<> LoopsWithCondMov("modulosched-loopCondMov", "Loops with conditional moves");
83 Statistic<> InvalidLoops("modulosched-invalidLoops", "Loops with unknown trip counts or loop invariant trip counts");
84 Statistic<> SingleBBLoops("modulosched-singeBBLoops", "Number of single basic block loops");
86 //Scheduling Statistics
87 Statistic<> MSLoops("modulosched-schedLoops", "Number of loops successfully modulo-scheduled");
88 Statistic<> NoSched("modulosched-noSched", "No schedule");
89 Statistic<> SameStage("modulosched-sameStage", "Max stage is 0");
90 Statistic<> ResourceConstraint("modulosched-resourceConstraint", "Loops constrained by resources");
91 Statistic<> RecurrenceConstraint("modulosched-recurrenceConstraint", "Loops constrained by recurrences");
92 Statistic<> FinalIISum("modulosched-finalIISum", "Sum of all final II");
93 Statistic<> IISum("modulosched-IISum", "Sum of all theoretical II");
95 template<>
96 struct DOTGraphTraits<MSchedGraph*> : public DefaultDOTGraphTraits {
97 static std::string getGraphName(MSchedGraph *F) {
98 return "Dependence Graph";
101 static std::string getNodeLabel(MSchedGraphNode *Node, MSchedGraph *Graph) {
102 if (Node->getInst()) {
103 std::stringstream ss;
104 ss << *(Node->getInst());
105 return ss.str(); //((MachineInstr*)Node->getInst());
107 else
108 return "No Inst";
110 static std::string getEdgeSourceLabel(MSchedGraphNode *Node,
111 MSchedGraphNode::succ_iterator I) {
112 //Label each edge with the type of dependence
113 std::string edgelabel = "";
114 switch (I.getEdge().getDepOrderType()) {
116 case MSchedGraphEdge::TrueDep:
117 edgelabel = "True";
118 break;
120 case MSchedGraphEdge::AntiDep:
121 edgelabel = "Anti";
122 break;
124 case MSchedGraphEdge::OutputDep:
125 edgelabel = "Output";
126 break;
128 default:
129 edgelabel = "Unknown";
130 break;
133 //FIXME
134 int iteDiff = I.getEdge().getIteDiff();
135 std::string intStr = "(IteDiff: ";
136 intStr += itostr(iteDiff);
138 intStr += ")";
139 edgelabel += intStr;
141 return edgelabel;
147 #include <unistd.h>
149 /// ModuloScheduling::runOnFunction - main transformation entry point
150 /// The Swing Modulo Schedule algorithm has three basic steps:
151 /// 1) Computation and Analysis of the dependence graph
152 /// 2) Ordering of the nodes
153 /// 3) Scheduling
155 bool ModuloSchedulingPass::runOnFunction(Function &F) {
156 alarm(100);
158 bool Changed = false;
159 int numMS = 0;
161 DEBUG(std::cerr << "Creating ModuloSchedGraph for each valid BasicBlock in " + F.getName() + "\n");
163 //Get MachineFunction
164 MachineFunction &MF = MachineFunction::get(&F);
166 DependenceAnalyzer &DA = getAnalysis<DependenceAnalyzer>();
169 //Worklist
170 std::vector<MachineBasicBlock*> Worklist;
172 //Iterate over BasicBlocks and put them into our worklist if they are valid
173 for (MachineFunction::iterator BI = MF.begin(); BI != MF.end(); ++BI)
174 if(MachineBBisValid(BI)) {
175 if(BI->size() < 100) {
176 Worklist.push_back(&*BI);
177 ++ValidLoops;
179 else
180 ++JumboBB;
181 std::cerr << "BB Size: " << BI->size() << "\n";
184 defaultInst = 0;
186 DEBUG(if(Worklist.size() == 0) std::cerr << "No single basic block loops in function to ModuloSchedule\n");
188 //Iterate over the worklist and perform scheduling
189 for(std::vector<MachineBasicBlock*>::iterator BI = Worklist.begin(),
190 BE = Worklist.end(); BI != BE; ++BI) {
192 //Print out BB for debugging
193 DEBUG(std::cerr << "BB Size: " << (*BI)->size() << "\n");
194 DEBUG(std::cerr << "ModuloScheduling BB: \n"; (*BI)->print(std::cerr));
196 //Print out LLVM BB
197 DEBUG(std::cerr << "ModuloScheduling LLVMBB: \n"; (*BI)->getBasicBlock()->print(std::cerr));
199 //Catch the odd case where we only have TmpInstructions and no real Value*s
200 if(!CreateDefMap(*BI)) {
201 //Clear out our maps for the next basic block that is processed
202 nodeToAttributesMap.clear();
203 partialOrder.clear();
204 recurrenceList.clear();
205 FinalNodeOrder.clear();
206 schedule.clear();
207 defMap.clear();
208 continue;
211 MSchedGraph *MSG = new MSchedGraph(*BI, target, indVarInstrs[*BI], DA, machineTollvm[*BI]);
213 //Write Graph out to file
214 DEBUG(WriteGraphToFile(std::cerr, F.getName(), MSG));
215 DEBUG(MSG->print(std::cerr));
217 //Calculate Resource II
218 int ResMII = calculateResMII(*BI);
220 //Calculate Recurrence II
221 int RecMII = calculateRecMII(MSG, ResMII);
223 DEBUG(std::cerr << "Number of reccurrences found: " << recurrenceList.size() << "\n");
225 //Our starting initiation interval is the maximum of RecMII and ResMII
226 if(RecMII < ResMII)
227 ++RecurrenceConstraint;
228 else
229 ++ResourceConstraint;
231 II = std::max(RecMII, ResMII);
232 int mII = II;
233 IISum += mII;
235 //Print out II, RecMII, and ResMII
236 DEBUG(std::cerr << "II starts out as " << II << " ( RecMII=" << RecMII << " and ResMII=" << ResMII << ")\n");
238 //Dump node properties if in debug mode
239 DEBUG(for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(),
240 E = nodeToAttributesMap.end(); I !=E; ++I) {
241 std::cerr << "Node: " << *(I->first) << " ASAP: " << I->second.ASAP << " ALAP: "
242 << I->second.ALAP << " MOB: " << I->second.MOB << " Depth: " << I->second.depth
243 << " Height: " << I->second.height << "\n";
246 //Calculate Node Properties
247 calculateNodeAttributes(MSG, ResMII);
249 //Dump node properties if in debug mode
250 DEBUG(for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(),
251 E = nodeToAttributesMap.end(); I !=E; ++I) {
252 std::cerr << "Node: " << *(I->first) << " ASAP: " << I->second.ASAP << " ALAP: "
253 << I->second.ALAP << " MOB: " << I->second.MOB << " Depth: " << I->second.depth
254 << " Height: " << I->second.height << "\n";
257 //Put nodes in order to schedule them
258 computePartialOrder();
260 //Dump out partial order
261 DEBUG(for(std::vector<std::set<MSchedGraphNode*> >::iterator I = partialOrder.begin(),
262 E = partialOrder.end(); I !=E; ++I) {
263 std::cerr << "Start set in PO\n";
264 for(std::set<MSchedGraphNode*>::iterator J = I->begin(), JE = I->end(); J != JE; ++J)
265 std::cerr << "PO:" << **J << "\n";
268 //Place nodes in final order
269 orderNodes();
271 //Dump out order of nodes
272 DEBUG(for(std::vector<MSchedGraphNode*>::iterator I = FinalNodeOrder.begin(), E = FinalNodeOrder.end(); I != E; ++I) {
273 std::cerr << "FO:" << **I << "\n";
276 //Finally schedule nodes
277 bool haveSched = computeSchedule(*BI, MSG);
279 //Print out final schedule
280 DEBUG(schedule.print(std::cerr));
282 //Final scheduling step is to reconstruct the loop only if we actual have
283 //stage > 0
284 if(haveSched) {
285 reconstructLoop(*BI);
286 ++MSLoops;
287 Changed = true;
289 if(schedule.getMaxStage() == 0)
290 ++SameStage;
292 else
293 ++NoSched;
295 //Clear out our maps for the next basic block that is processed
296 nodeToAttributesMap.clear();
297 partialOrder.clear();
298 recurrenceList.clear();
299 FinalNodeOrder.clear();
300 schedule.clear();
301 defMap.clear();
302 //Clean up. Nuke old MachineBB and llvmBB
303 //BasicBlock *llvmBB = (BasicBlock*) (*BI)->getBasicBlock();
304 //Function *parent = (Function*) llvmBB->getParent();
305 //Should't std::find work??
306 //parent->getBasicBlockList().erase(std::find(parent->getBasicBlockList().begin(), parent->getBasicBlockList().end(), *llvmBB));
307 //parent->getBasicBlockList().erase(llvmBB);
309 //delete(llvmBB);
310 //delete(*BI);
313 alarm(0);
314 return Changed;
317 bool ModuloSchedulingPass::CreateDefMap(MachineBasicBlock *BI) {
318 defaultInst = 0;
320 for(MachineBasicBlock::iterator I = BI->begin(), E = BI->end(); I != E; ++I) {
321 for(unsigned opNum = 0; opNum < I->getNumOperands(); ++opNum) {
322 const MachineOperand &mOp = I->getOperand(opNum);
323 if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef()) {
324 //assert if this is the second def we have seen
325 //DEBUG(std::cerr << "Putting " << *(mOp.getVRegValue()) << " into map\n");
326 assert(!defMap.count(mOp.getVRegValue()) && "Def already in the map");
328 defMap[mOp.getVRegValue()] = &*I;
331 //See if we can use this Value* as our defaultInst
332 if(!defaultInst && mOp.getType() == MachineOperand::MO_VirtualRegister) {
333 Value *V = mOp.getVRegValue();
334 if(!isa<TmpInstruction>(V) && !isa<Argument>(V) && !isa<Constant>(V) && !isa<PHINode>(V))
335 defaultInst = (Instruction*) V;
340 if(!defaultInst)
341 return false;
343 return true;
346 /// This function checks if a Machine Basic Block is valid for modulo
347 /// scheduling. This means that it has no control flow (if/else or
348 /// calls) in the block. Currently ModuloScheduling only works on
349 /// single basic block loops.
350 bool ModuloSchedulingPass::MachineBBisValid(const MachineBasicBlock *BI) {
352 bool isLoop = false;
354 //Check first if its a valid loop
355 for(succ_const_iterator I = succ_begin(BI->getBasicBlock()),
356 E = succ_end(BI->getBasicBlock()); I != E; ++I) {
357 if (*I == BI->getBasicBlock()) // has single block loop
358 isLoop = true;
361 if(!isLoop)
362 return false;
364 //Check that we have a conditional branch (avoiding MS infinite loops)
365 if(BranchInst *b = dyn_cast<BranchInst>(((BasicBlock*) BI->getBasicBlock())->getTerminator()))
366 if(b->isUnconditional())
367 return false;
369 //Check size of our basic block.. make sure we have more then just the terminator in it
370 if(BI->getBasicBlock()->size() == 1)
371 return false;
373 //Increase number of single basic block loops for stats
374 ++SingleBBLoops;
376 //Get Target machine instruction info
377 const TargetInstrInfo *TMI = target.getInstrInfo();
379 //Check each instruction and look for calls, keep map to get index later
380 std::map<const MachineInstr*, unsigned> indexMap;
382 unsigned count = 0;
383 for(MachineBasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) {
384 //Get opcode to check instruction type
385 MachineOpCode OC = I->getOpcode();
387 //Look for calls
388 if(TMI->isCall(OC)) {
389 ++LoopsWithCalls;
390 return false;
393 //Look for conditional move
394 if(OC == V9::MOVRZr || OC == V9::MOVRZi || OC == V9::MOVRLEZr || OC == V9::MOVRLEZi
395 || OC == V9::MOVRLZr || OC == V9::MOVRLZi || OC == V9::MOVRNZr || OC == V9::MOVRNZi
396 || OC == V9::MOVRGZr || OC == V9::MOVRGZi || OC == V9::MOVRGEZr
397 || OC == V9::MOVRGEZi || OC == V9::MOVLEr || OC == V9::MOVLEi || OC == V9::MOVLEUr
398 || OC == V9::MOVLEUi || OC == V9::MOVFLEr || OC == V9::MOVFLEi
399 || OC == V9::MOVNEr || OC == V9::MOVNEi || OC == V9::MOVNEGr || OC == V9::MOVNEGi
400 || OC == V9::MOVFNEr || OC == V9::MOVFNEi) {
401 ++LoopsWithCondMov;
402 return false;
405 indexMap[I] = count;
407 if(TMI->isNop(OC))
408 continue;
410 ++count;
413 //Apply a simple pattern match to make sure this loop can be modulo scheduled
414 //This means only loops with a branch associated to the iteration count
416 //Get the branch
417 BranchInst *b = dyn_cast<BranchInst>(((BasicBlock*) BI->getBasicBlock())->getTerminator());
419 //Get the condition for the branch (we already checked if it was conditional)
420 Value *cond = b->getCondition();
422 DEBUG(std::cerr << "Condition: " << *cond << "\n");
424 //List of instructions associated with induction variable
425 std::set<Instruction*> indVar;
426 std::vector<Instruction*> stack;
428 BasicBlock *BB = (BasicBlock*) BI->getBasicBlock();
430 //Add branch
431 indVar.insert(b);
433 if(Instruction *I = dyn_cast<Instruction>(cond))
434 if(I->getParent() == BB) {
435 if (!assocIndVar(I, indVar, stack, BB)) {
436 ++InvalidLoops;
437 return false;
440 else {
441 ++InvalidLoops;
442 return false;
444 else {
445 ++InvalidLoops;
446 return false;
448 //The indVar set must be >= 3 instructions for this loop to match (FIX ME!)
449 if(indVar.size() < 3 )
450 return false;
452 //Dump out instructions associate with indvar for debug reasons
453 DEBUG(for(std::set<Instruction*>::iterator N = indVar.begin(), NE = indVar.end(); N != NE; ++N) {
454 std::cerr << **N << "\n";
457 //Create map of machine instr to llvm instr
458 std::map<MachineInstr*, Instruction*> mllvm;
459 for(BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
460 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(I);
461 for (unsigned j = 0; j < tempMvec.size(); j++) {
462 mllvm[tempMvec[j]] = I;
466 //Convert list of LLVM Instructions to list of Machine instructions
467 std::map<const MachineInstr*, unsigned> mIndVar;
468 for(std::set<Instruction*>::iterator N = indVar.begin(), NE = indVar.end(); N != NE; ++N) {
470 //If we have a load, we can't handle this loop because there is no way to preserve dependences
471 //between loads and stores
472 if(isa<LoadInst>(*N))
473 return false;
475 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(*N);
476 for (unsigned j = 0; j < tempMvec.size(); j++) {
477 MachineOpCode OC = (tempMvec[j])->getOpcode();
478 if(TMI->isNop(OC))
479 continue;
480 if(!indexMap.count(tempMvec[j]))
481 continue;
482 mIndVar[(MachineInstr*) tempMvec[j]] = indexMap[(MachineInstr*) tempMvec[j]];
483 DEBUG(std::cerr << *(tempMvec[j]) << " at index " << indexMap[(MachineInstr*) tempMvec[j]] << "\n");
487 //Must have some guts to the loop body (more then 1 instr, dont count nops in size)
488 if(mIndVar.size() >= (BI->size()-3))
489 return false;
491 //Put into a map for future access
492 indVarInstrs[BI] = mIndVar;
493 machineTollvm[BI] = mllvm;
494 return true;
497 bool ModuloSchedulingPass::assocIndVar(Instruction *I, std::set<Instruction*> &indVar,
498 std::vector<Instruction*> &stack, BasicBlock *BB) {
500 stack.push_back(I);
502 //If this is a phi node, check if its the canonical indvar
503 if(PHINode *PN = dyn_cast<PHINode>(I)) {
504 if (Instruction *Inc =
505 dyn_cast<Instruction>(PN->getIncomingValueForBlock(BB)))
506 if (Inc->getOpcode() == Instruction::Add && Inc->getOperand(0) == PN)
507 if (ConstantInt *CI = dyn_cast<ConstantInt>(Inc->getOperand(1)))
508 if (CI->equalsInt(1)) {
509 //We have found the indvar, so add the stack, and inc instruction to the set
510 indVar.insert(stack.begin(), stack.end());
511 indVar.insert(Inc);
512 stack.pop_back();
513 return true;
515 return false;
517 else {
518 //Loop over each of the instructions operands, check if they are an instruction and in this BB
519 for(unsigned i = 0; i < I->getNumOperands(); ++i) {
520 if(Instruction *N = dyn_cast<Instruction>(I->getOperand(i))) {
521 if(N->getParent() == BB)
522 if(!assocIndVar(N, indVar, stack, BB))
523 return false;
528 stack.pop_back();
529 return true;
532 //ResMII is calculated by determining the usage count for each resource
533 //and using the maximum.
534 //FIXME: In future there should be a way to get alternative resources
535 //for each instruction
536 int ModuloSchedulingPass::calculateResMII(const MachineBasicBlock *BI) {
538 TIME_REGION(X, "calculateResMII");
540 const TargetInstrInfo *mii = target.getInstrInfo();
541 const TargetSchedInfo *msi = target.getSchedInfo();
543 int ResMII = 0;
545 //Map to keep track of usage count of each resource
546 std::map<unsigned, unsigned> resourceUsageCount;
548 for(MachineBasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) {
550 //Get resource usage for this instruction
551 InstrRUsage rUsage = msi->getInstrRUsage(I->getOpcode());
552 std::vector<std::vector<resourceId_t> > resources = rUsage.resourcesByCycle;
554 //Loop over resources in each cycle and increments their usage count
555 for(unsigned i=0; i < resources.size(); ++i)
556 for(unsigned j=0; j < resources[i].size(); ++j) {
557 if(!resourceUsageCount.count(resources[i][j])) {
558 resourceUsageCount[resources[i][j]] = 1;
560 else {
561 resourceUsageCount[resources[i][j]] = resourceUsageCount[resources[i][j]] + 1;
566 //Find maximum usage count
568 //Get max number of instructions that can be issued at once. (FIXME)
569 int issueSlots = msi->maxNumIssueTotal;
571 for(std::map<unsigned,unsigned>::iterator RB = resourceUsageCount.begin(), RE = resourceUsageCount.end(); RB != RE; ++RB) {
573 //Get the total number of the resources in our cpu
574 int resourceNum = CPUResource::getCPUResource(RB->first)->maxNumUsers;
576 //Get total usage count for this resources
577 unsigned usageCount = RB->second;
579 //Divide the usage count by either the max number we can issue or the number of
580 //resources (whichever is its upper bound)
581 double finalUsageCount;
582 if( resourceNum <= issueSlots)
583 finalUsageCount = ceil(1.0 * usageCount / resourceNum);
584 else
585 finalUsageCount = ceil(1.0 * usageCount / issueSlots);
588 //Only keep track of the max
589 ResMII = std::max( (int) finalUsageCount, ResMII);
593 return ResMII;
597 /// calculateRecMII - Calculates the value of the highest recurrence
598 /// By value we mean the total latency
599 int ModuloSchedulingPass::calculateRecMII(MSchedGraph *graph, int MII) {
600 /*std::vector<MSchedGraphNode*> vNodes;
601 //Loop over all nodes in the graph
602 for(MSchedGraph::iterator I = graph->begin(), E = graph->end(); I != E; ++I) {
603 findAllReccurrences(I->second, vNodes, MII);
604 vNodes.clear();
607 TIME_REGION(X, "calculateRecMII");
609 findAllCircuits(graph, MII);
610 int RecMII = 0;
612 for(std::set<std::pair<int, std::vector<MSchedGraphNode*> > >::iterator I = recurrenceList.begin(), E=recurrenceList.end(); I !=E; ++I) {
613 RecMII = std::max(RecMII, I->first);
616 return MII;
619 /// calculateNodeAttributes - The following properties are calculated for
620 /// each node in the dependence graph: ASAP, ALAP, Depth, Height, and
621 /// MOB.
622 void ModuloSchedulingPass::calculateNodeAttributes(MSchedGraph *graph, int MII) {
624 TIME_REGION(X, "calculateNodeAttributes");
626 assert(nodeToAttributesMap.empty() && "Node attribute map was not cleared");
628 //Loop over the nodes and add them to the map
629 for(MSchedGraph::iterator I = graph->begin(), E = graph->end(); I != E; ++I) {
631 DEBUG(std::cerr << "Inserting node into attribute map: " << *I->second << "\n");
633 //Assert if its already in the map
634 assert(nodeToAttributesMap.count(I->second) == 0 &&
635 "Node attributes are already in the map");
637 //Put into the map with default attribute values
638 nodeToAttributesMap[I->second] = MSNodeAttributes();
641 //Create set to deal with reccurrences
642 std::set<MSchedGraphNode*> visitedNodes;
644 //Now Loop over map and calculate the node attributes
645 for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(), E = nodeToAttributesMap.end(); I != E; ++I) {
646 calculateASAP(I->first, MII, (MSchedGraphNode*) 0);
647 visitedNodes.clear();
650 int maxASAP = findMaxASAP();
651 //Calculate ALAP which depends on ASAP being totally calculated
652 for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(), E = nodeToAttributesMap.end(); I != E; ++I) {
653 calculateALAP(I->first, MII, maxASAP, (MSchedGraphNode*) 0);
654 visitedNodes.clear();
657 //Calculate MOB which depends on ASAP being totally calculated, also do depth and height
658 for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(), E = nodeToAttributesMap.end(); I != E; ++I) {
659 (I->second).MOB = std::max(0,(I->second).ALAP - (I->second).ASAP);
661 DEBUG(std::cerr << "MOB: " << (I->second).MOB << " (" << *(I->first) << ")\n");
662 calculateDepth(I->first, (MSchedGraphNode*) 0);
663 calculateHeight(I->first, (MSchedGraphNode*) 0);
669 /// ignoreEdge - Checks to see if this edge of a recurrence should be ignored or not
670 bool ModuloSchedulingPass::ignoreEdge(MSchedGraphNode *srcNode, MSchedGraphNode *destNode) {
671 if(destNode == 0 || srcNode ==0)
672 return false;
674 bool findEdge = edgesToIgnore.count(std::make_pair(srcNode, destNode->getInEdgeNum(srcNode)));
676 DEBUG(std::cerr << "Ignoring edge? from: " << *srcNode << " to " << *destNode << "\n");
678 return findEdge;
682 /// calculateASAP - Calculates the
683 int ModuloSchedulingPass::calculateASAP(MSchedGraphNode *node, int MII, MSchedGraphNode *destNode) {
685 DEBUG(std::cerr << "Calculating ASAP for " << *node << "\n");
687 //Get current node attributes
688 MSNodeAttributes &attributes = nodeToAttributesMap.find(node)->second;
690 if(attributes.ASAP != -1)
691 return attributes.ASAP;
693 int maxPredValue = 0;
695 //Iterate over all of the predecessors and find max
696 for(MSchedGraphNode::pred_iterator P = node->pred_begin(), E = node->pred_end(); P != E; ++P) {
698 //Only process if we are not ignoring the edge
699 if(!ignoreEdge(*P, node)) {
700 int predASAP = -1;
701 predASAP = calculateASAP(*P, MII, node);
703 assert(predASAP != -1 && "ASAP has not been calculated");
704 int iteDiff = node->getInEdge(*P).getIteDiff();
706 int currentPredValue = predASAP + (*P)->getLatency() - (iteDiff * MII);
707 DEBUG(std::cerr << "pred ASAP: " << predASAP << ", iteDiff: " << iteDiff << ", PredLatency: " << (*P)->getLatency() << ", Current ASAP pred: " << currentPredValue << "\n");
708 maxPredValue = std::max(maxPredValue, currentPredValue);
712 attributes.ASAP = maxPredValue;
714 DEBUG(std::cerr << "ASAP: " << attributes.ASAP << " (" << *node << ")\n");
716 return maxPredValue;
720 int ModuloSchedulingPass::calculateALAP(MSchedGraphNode *node, int MII,
721 int maxASAP, MSchedGraphNode *srcNode) {
723 DEBUG(std::cerr << "Calculating ALAP for " << *node << "\n");
725 MSNodeAttributes &attributes = nodeToAttributesMap.find(node)->second;
727 if(attributes.ALAP != -1)
728 return attributes.ALAP;
730 if(node->hasSuccessors()) {
732 //Trying to deal with the issue where the node has successors, but
733 //we are ignoring all of the edges to them. So this is my hack for
734 //now.. there is probably a more elegant way of doing this (FIXME)
735 bool processedOneEdge = false;
737 //FIXME, set to something high to start
738 int minSuccValue = 9999999;
740 //Iterate over all of the predecessors and fine max
741 for(MSchedGraphNode::succ_iterator P = node->succ_begin(),
742 E = node->succ_end(); P != E; ++P) {
744 //Only process if we are not ignoring the edge
745 if(!ignoreEdge(node, *P)) {
746 processedOneEdge = true;
747 int succALAP = -1;
748 succALAP = calculateALAP(*P, MII, maxASAP, node);
750 assert(succALAP != -1 && "Successors ALAP should have been caclulated");
752 int iteDiff = P.getEdge().getIteDiff();
754 int currentSuccValue = succALAP - node->getLatency() + iteDiff * MII;
756 DEBUG(std::cerr << "succ ALAP: " << succALAP << ", iteDiff: " << iteDiff << ", SuccLatency: " << (*P)->getLatency() << ", Current ALAP succ: " << currentSuccValue << "\n");
758 minSuccValue = std::min(minSuccValue, currentSuccValue);
762 if(processedOneEdge)
763 attributes.ALAP = minSuccValue;
765 else
766 attributes.ALAP = maxASAP;
768 else
769 attributes.ALAP = maxASAP;
771 DEBUG(std::cerr << "ALAP: " << attributes.ALAP << " (" << *node << ")\n");
773 if(attributes.ALAP < 0)
774 attributes.ALAP = 0;
776 return attributes.ALAP;
779 int ModuloSchedulingPass::findMaxASAP() {
780 int maxASAP = 0;
782 for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(),
783 E = nodeToAttributesMap.end(); I != E; ++I)
784 maxASAP = std::max(maxASAP, I->second.ASAP);
785 return maxASAP;
789 int ModuloSchedulingPass::calculateHeight(MSchedGraphNode *node,MSchedGraphNode *srcNode) {
791 MSNodeAttributes &attributes = nodeToAttributesMap.find(node)->second;
793 if(attributes.height != -1)
794 return attributes.height;
796 int maxHeight = 0;
798 //Iterate over all of the predecessors and find max
799 for(MSchedGraphNode::succ_iterator P = node->succ_begin(),
800 E = node->succ_end(); P != E; ++P) {
803 if(!ignoreEdge(node, *P)) {
804 int succHeight = calculateHeight(*P, node);
806 assert(succHeight != -1 && "Successors Height should have been caclulated");
808 int currentHeight = succHeight + node->getLatency();
809 maxHeight = std::max(maxHeight, currentHeight);
812 attributes.height = maxHeight;
813 DEBUG(std::cerr << "Height: " << attributes.height << " (" << *node << ")\n");
814 return maxHeight;
818 int ModuloSchedulingPass::calculateDepth(MSchedGraphNode *node,
819 MSchedGraphNode *destNode) {
821 MSNodeAttributes &attributes = nodeToAttributesMap.find(node)->second;
823 if(attributes.depth != -1)
824 return attributes.depth;
826 int maxDepth = 0;
828 //Iterate over all of the predecessors and fine max
829 for(MSchedGraphNode::pred_iterator P = node->pred_begin(), E = node->pred_end(); P != E; ++P) {
831 if(!ignoreEdge(*P, node)) {
832 int predDepth = -1;
833 predDepth = calculateDepth(*P, node);
835 assert(predDepth != -1 && "Predecessors ASAP should have been caclulated");
837 int currentDepth = predDepth + (*P)->getLatency();
838 maxDepth = std::max(maxDepth, currentDepth);
841 attributes.depth = maxDepth;
843 DEBUG(std::cerr << "Depth: " << attributes.depth << " (" << *node << "*)\n");
844 return maxDepth;
849 void ModuloSchedulingPass::addReccurrence(std::vector<MSchedGraphNode*> &recurrence, int II, MSchedGraphNode *srcBENode, MSchedGraphNode *destBENode) {
850 //Check to make sure that this recurrence is unique
851 bool same = false;
854 //Loop over all recurrences already in our list
855 for(std::set<std::pair<int, std::vector<MSchedGraphNode*> > >::iterator R = recurrenceList.begin(), RE = recurrenceList.end(); R != RE; ++R) {
857 bool all_same = true;
858 //First compare size
859 if(R->second.size() == recurrence.size()) {
861 for(std::vector<MSchedGraphNode*>::const_iterator node = R->second.begin(), end = R->second.end(); node != end; ++node) {
862 if(std::find(recurrence.begin(), recurrence.end(), *node) == recurrence.end()) {
863 all_same = all_same && false;
864 break;
866 else
867 all_same = all_same && true;
869 if(all_same) {
870 same = true;
871 break;
876 if(!same) {
877 srcBENode = recurrence.back();
878 destBENode = recurrence.front();
880 //FIXME
881 if(destBENode->getInEdge(srcBENode).getIteDiff() == 0) {
882 //DEBUG(std::cerr << "NOT A BACKEDGE\n");
883 //find actual backedge HACK HACK
884 for(unsigned i=0; i< recurrence.size()-1; ++i) {
885 if(recurrence[i+1]->getInEdge(recurrence[i]).getIteDiff() == 1) {
886 srcBENode = recurrence[i];
887 destBENode = recurrence[i+1];
888 break;
894 DEBUG(std::cerr << "Back Edge to Remove: " << *srcBENode << " to " << *destBENode << "\n");
895 edgesToIgnore.insert(std::make_pair(srcBENode, destBENode->getInEdgeNum(srcBENode)));
896 recurrenceList.insert(std::make_pair(II, recurrence));
901 int CircCount;
903 void ModuloSchedulingPass::unblock(MSchedGraphNode *u, std::set<MSchedGraphNode*> &blocked,
904 std::map<MSchedGraphNode*, std::set<MSchedGraphNode*> > &B) {
906 //Unblock u
907 DEBUG(std::cerr << "Unblocking: " << *u << "\n");
908 blocked.erase(u);
910 //std::set<MSchedGraphNode*> toErase;
911 while (!B[u].empty()) {
912 MSchedGraphNode *W = *B[u].begin();
913 B[u].erase(W);
914 //toErase.insert(*W);
915 DEBUG(std::cerr << "Removed: " << *W << "from B-List\n");
916 if(blocked.count(W))
917 unblock(W, blocked, B);
922 bool ModuloSchedulingPass::circuit(MSchedGraphNode *v, std::vector<MSchedGraphNode*> &stack,
923 std::set<MSchedGraphNode*> &blocked, std::vector<MSchedGraphNode*> &SCC,
924 MSchedGraphNode *s, std::map<MSchedGraphNode*, std::set<MSchedGraphNode*> > &B,
925 int II, std::map<MSchedGraphNode*, MSchedGraphNode*> &newNodes) {
926 bool f = false;
928 DEBUG(std::cerr << "Finding Circuits Starting with: ( " << v << ")"<< *v << "\n");
930 //Push node onto the stack
931 stack.push_back(v);
933 //block this node
934 blocked.insert(v);
936 //Loop over all successors of node v that are in the scc, create Adjaceny list
937 std::set<MSchedGraphNode*> AkV;
938 for(MSchedGraphNode::succ_iterator I = v->succ_begin(), E = v->succ_end(); I != E; ++I) {
939 if((std::find(SCC.begin(), SCC.end(), *I) != SCC.end())) {
940 AkV.insert(*I);
944 for(std::set<MSchedGraphNode*>::iterator I = AkV.begin(), E = AkV.end(); I != E; ++I) {
945 if(*I == s) {
946 //We have a circuit, so add it to our list
947 addRecc(stack, newNodes);
948 f = true;
950 else if(!blocked.count(*I)) {
951 if(circuit(*I, stack, blocked, SCC, s, B, II, newNodes))
952 f = true;
954 else
955 DEBUG(std::cerr << "Blocked: " << **I << "\n");
959 if(f) {
960 unblock(v, blocked, B);
962 else {
963 for(std::set<MSchedGraphNode*>::iterator I = AkV.begin(), E = AkV.end(); I != E; ++I)
964 B[*I].insert(v);
968 //Pop v
969 stack.pop_back();
971 return f;
975 void ModuloSchedulingPass::addRecc(std::vector<MSchedGraphNode*> &stack, std::map<MSchedGraphNode*, MSchedGraphNode*> &newNodes) {
976 std::vector<MSchedGraphNode*> recc;
977 //Dump recurrence for now
978 DEBUG(std::cerr << "Starting Recc\n");
980 int totalDelay = 0;
981 int totalDistance = 0;
982 MSchedGraphNode *lastN = 0;
983 MSchedGraphNode *start = 0;
984 MSchedGraphNode *end = 0;
986 //Loop over recurrence, get delay and distance
987 for(std::vector<MSchedGraphNode*>::iterator N = stack.begin(), NE = stack.end(); N != NE; ++N) {
988 DEBUG(std::cerr << **N << "\n");
989 totalDelay += (*N)->getLatency();
990 if(lastN) {
991 int iteDiff = (*N)->getInEdge(lastN).getIteDiff();
992 totalDistance += iteDiff;
994 if(iteDiff > 0) {
995 start = lastN;
996 end = *N;
999 //Get the original node
1000 lastN = *N;
1001 recc.push_back(newNodes[*N]);
1006 //Get the loop edge
1007 totalDistance += lastN->getIteDiff(*stack.begin());
1009 DEBUG(std::cerr << "End Recc\n");
1010 CircCount++;
1012 if(start && end) {
1013 //Insert reccurrence into the list
1014 DEBUG(std::cerr << "Ignore Edge from!!: " << *start << " to " << *end << "\n");
1015 edgesToIgnore.insert(std::make_pair(newNodes[start], (newNodes[end])->getInEdgeNum(newNodes[start])));
1017 else {
1018 //Insert reccurrence into the list
1019 DEBUG(std::cerr << "Ignore Edge from: " << *lastN << " to " << **stack.begin() << "\n");
1020 edgesToIgnore.insert(std::make_pair(newNodes[lastN], newNodes[(*stack.begin())]->getInEdgeNum(newNodes[lastN])));
1023 //Adjust II until we get close to the inequality delay - II*distance <= 0
1024 int RecMII = II; //Starting value
1025 int value = totalDelay-(RecMII * totalDistance);
1026 int lastII = II;
1027 while(value < 0) {
1029 lastII = RecMII;
1030 RecMII--;
1031 value = totalDelay-(RecMII * totalDistance);
1034 recurrenceList.insert(std::make_pair(lastII, recc));
1039 void ModuloSchedulingPass::findAllCircuits(MSchedGraph *g, int II) {
1041 CircCount = 0;
1043 //Keep old to new node mapping information
1044 std::map<MSchedGraphNode*, MSchedGraphNode*> newNodes;
1046 //copy the graph
1047 MSchedGraph *MSG = new MSchedGraph(*g, newNodes);
1049 DEBUG(std::cerr << "Finding All Circuits\n");
1051 //Set of blocked nodes
1052 std::set<MSchedGraphNode*> blocked;
1054 //Stack holding current circuit
1055 std::vector<MSchedGraphNode*> stack;
1057 //Map for B Lists
1058 std::map<MSchedGraphNode*, std::set<MSchedGraphNode*> > B;
1060 //current node
1061 MSchedGraphNode *s;
1064 //Iterate over the graph until its down to one node or empty
1065 while(MSG->size() > 1) {
1067 //Write Graph out to file
1068 //WriteGraphToFile(std::cerr, "Graph" + utostr(MSG->size()), MSG);
1070 DEBUG(std::cerr << "Graph Size: " << MSG->size() << "\n");
1071 DEBUG(std::cerr << "Finding strong component Vk with least vertex\n");
1073 //Iterate over all the SCCs in the graph
1074 std::set<MSchedGraphNode*> Visited;
1075 std::vector<MSchedGraphNode*> Vk;
1076 MSchedGraphNode* s = 0;
1078 //Find scc with the least vertex
1079 for (MSchedGraph::iterator GI = MSG->begin(), E = MSG->end(); GI != E; ++GI)
1080 if (Visited.insert(GI->second).second) {
1081 for (scc_iterator<MSchedGraphNode*> SCCI = scc_begin(GI->second),
1082 E = scc_end(GI->second); SCCI != E; ++SCCI) {
1083 std::vector<MSchedGraphNode*> &nextSCC = *SCCI;
1085 if (Visited.insert(nextSCC[0]).second) {
1086 Visited.insert(nextSCC.begin()+1, nextSCC.end());
1088 DEBUG(std::cerr << "SCC size: " << nextSCC.size() << "\n");
1090 //Ignore self loops
1091 if(nextSCC.size() > 1) {
1093 //Get least vertex in Vk
1094 if(!s) {
1095 s = nextSCC[0];
1096 Vk = nextSCC;
1099 for(unsigned i = 0; i < nextSCC.size(); ++i) {
1100 if(nextSCC[i] < s) {
1101 s = nextSCC[i];
1102 Vk = nextSCC;
1112 //Process SCC
1113 DEBUG(for(std::vector<MSchedGraphNode*>::iterator N = Vk.begin(), NE = Vk.end();
1114 N != NE; ++N) { std::cerr << *((*N)->getInst()); });
1116 //Iterate over all nodes in this scc
1117 for(std::vector<MSchedGraphNode*>::iterator N = Vk.begin(), NE = Vk.end();
1118 N != NE; ++N) {
1119 blocked.erase(*N);
1120 B[*N].clear();
1122 if(Vk.size() > 1) {
1123 circuit(s, stack, blocked, Vk, s, B, II, newNodes);
1125 //Delete nodes from the graph
1126 //Find all nodes up to s and delete them
1127 std::vector<MSchedGraphNode*> nodesToRemove;
1128 nodesToRemove.push_back(s);
1129 for(MSchedGraph::iterator N = MSG->begin(), NE = MSG->end(); N != NE; ++N) {
1130 if(N->second < s )
1131 nodesToRemove.push_back(N->second);
1133 for(std::vector<MSchedGraphNode*>::iterator N = nodesToRemove.begin(), NE = nodesToRemove.end(); N != NE; ++N) {
1134 DEBUG(std::cerr << "Deleting Node: " << **N << "\n");
1135 MSG->deleteNode(*N);
1138 else
1139 break;
1141 DEBUG(std::cerr << "Num Circuits found: " << CircCount << "\n");
1145 void ModuloSchedulingPass::findAllReccurrences(MSchedGraphNode *node,
1146 std::vector<MSchedGraphNode*> &visitedNodes,
1147 int II) {
1150 if(std::find(visitedNodes.begin(), visitedNodes.end(), node) != visitedNodes.end()) {
1151 std::vector<MSchedGraphNode*> recurrence;
1152 bool first = true;
1153 int delay = 0;
1154 int distance = 0;
1155 int RecMII = II; //Starting value
1156 MSchedGraphNode *last = node;
1157 MSchedGraphNode *srcBackEdge = 0;
1158 MSchedGraphNode *destBackEdge = 0;
1162 for(std::vector<MSchedGraphNode*>::iterator I = visitedNodes.begin(), E = visitedNodes.end();
1163 I !=E; ++I) {
1165 if(*I == node)
1166 first = false;
1167 if(first)
1168 continue;
1170 delay = delay + (*I)->getLatency();
1172 if(*I != node) {
1173 int diff = (*I)->getInEdge(last).getIteDiff();
1174 distance += diff;
1175 if(diff > 0) {
1176 srcBackEdge = last;
1177 destBackEdge = *I;
1181 recurrence.push_back(*I);
1182 last = *I;
1187 //Get final distance calc
1188 distance += node->getInEdge(last).getIteDiff();
1189 DEBUG(std::cerr << "Reccurrence Distance: " << distance << "\n");
1191 //Adjust II until we get close to the inequality delay - II*distance <= 0
1193 int value = delay-(RecMII * distance);
1194 int lastII = II;
1195 while(value <= 0) {
1197 lastII = RecMII;
1198 RecMII--;
1199 value = delay-(RecMII * distance);
1203 DEBUG(std::cerr << "Final II for this recurrence: " << lastII << "\n");
1204 addReccurrence(recurrence, lastII, srcBackEdge, destBackEdge);
1205 assert(distance != 0 && "Recurrence distance should not be zero");
1206 return;
1209 unsigned count = 0;
1210 for(MSchedGraphNode::succ_iterator I = node->succ_begin(), E = node->succ_end(); I != E; ++I) {
1211 visitedNodes.push_back(node);
1212 //if(!edgesToIgnore.count(std::make_pair(node, count)))
1213 findAllReccurrences(*I, visitedNodes, II);
1214 visitedNodes.pop_back();
1215 count++;
1219 void ModuloSchedulingPass::searchPath(MSchedGraphNode *node,
1220 std::vector<MSchedGraphNode*> &path,
1221 std::set<MSchedGraphNode*> &nodesToAdd,
1222 std::set<MSchedGraphNode*> &new_reccurrence) {
1223 //Push node onto the path
1224 path.push_back(node);
1226 //Loop over all successors and see if there is a path from this node to
1227 //a recurrence in the partial order, if so.. add all nodes to be added to recc
1228 for(MSchedGraphNode::succ_iterator S = node->succ_begin(), SE = node->succ_end(); S != SE;
1229 ++S) {
1231 //Check if we should ignore this edge first
1232 if(ignoreEdge(node,*S))
1233 continue;
1235 //check if successor is in this recurrence, we will get to it eventually
1236 if(new_reccurrence.count(*S))
1237 continue;
1239 //If this node exists in a recurrence already in the partial
1240 //order, then add all nodes in the path to the set of nodes to add
1241 //Check if its already in our partial order, if not add it to the
1242 //final vector
1243 bool found = false;
1244 for(std::vector<std::set<MSchedGraphNode*> >::iterator PO = partialOrder.begin(),
1245 PE = partialOrder.end(); PO != PE; ++PO) {
1247 if(PO->count(*S)) {
1248 found = true;
1249 break;
1253 if(!found) {
1254 nodesToAdd.insert(*S);
1255 searchPath(*S, path, nodesToAdd, new_reccurrence);
1259 //Pop Node off the path
1260 path.pop_back();
1263 void ModuloSchedulingPass::pathToRecc(MSchedGraphNode *node,
1264 std::vector<MSchedGraphNode*> &path,
1265 std::set<MSchedGraphNode*> &poSet,
1266 std::set<MSchedGraphNode*> &lastNodes) {
1267 //Push node onto the path
1268 path.push_back(node);
1270 DEBUG(std::cerr << "Current node: " << *node << "\n");
1272 //Loop over all successors and see if there is a path from this node to
1273 //a recurrence in the partial order, if so.. add all nodes to be added to recc
1274 for(MSchedGraphNode::succ_iterator S = node->succ_begin(), SE = node->succ_end(); S != SE;
1275 ++S) {
1276 DEBUG(std::cerr << "Succ:" << **S << "\n");
1277 //Check if we should ignore this edge first
1278 if(ignoreEdge(node,*S))
1279 continue;
1281 if(poSet.count(*S)) {
1282 DEBUG(std::cerr << "Found path to recc from no pred\n");
1283 //Loop over path, if it exists in lastNodes, then add to poset, and remove from lastNodes
1284 for(std::vector<MSchedGraphNode*>::iterator I = path.begin(), IE = path.end(); I != IE; ++I) {
1285 if(lastNodes.count(*I)) {
1286 DEBUG(std::cerr << "Inserting node into recc: " << **I << "\n");
1287 poSet.insert(*I);
1288 lastNodes.erase(*I);
1292 else
1293 pathToRecc(*S, path, poSet, lastNodes);
1296 //Pop Node off the path
1297 path.pop_back();
1300 void ModuloSchedulingPass::computePartialOrder() {
1302 TIME_REGION(X, "calculatePartialOrder");
1304 DEBUG(std::cerr << "Computing Partial Order\n");
1306 //Only push BA branches onto the final node order, we put other
1307 //branches after it FIXME: Should we really be pushing branches on
1308 //it a specific order instead of relying on BA being there?
1310 std::vector<MSchedGraphNode*> branches;
1312 //Steps to add a recurrence to the partial order 1) Find reccurrence
1313 //with the highest RecMII. Add it to the partial order. 2) For each
1314 //recurrence with decreasing RecMII, add it to the partial order
1315 //along with any nodes that connect this recurrence to recurrences
1316 //already in the partial order
1317 for(std::set<std::pair<int, std::vector<MSchedGraphNode*> > >::reverse_iterator
1318 I = recurrenceList.rbegin(), E=recurrenceList.rend(); I !=E; ++I) {
1320 std::set<MSchedGraphNode*> new_recurrence;
1322 //Loop through recurrence and remove any nodes already in the partial order
1323 for(std::vector<MSchedGraphNode*>::const_iterator N = I->second.begin(),
1324 NE = I->second.end(); N != NE; ++N) {
1326 bool found = false;
1327 for(std::vector<std::set<MSchedGraphNode*> >::iterator PO = partialOrder.begin(),
1328 PE = partialOrder.end(); PO != PE; ++PO) {
1329 if(PO->count(*N))
1330 found = true;
1333 //Check if its a branch, and remove to handle special
1334 if(!found) {
1335 if((*N)->isBranch() && !(*N)->hasPredecessors()) {
1336 branches.push_back(*N);
1338 else
1339 new_recurrence.insert(*N);
1345 if(new_recurrence.size() > 0) {
1347 std::vector<MSchedGraphNode*> path;
1348 std::set<MSchedGraphNode*> nodesToAdd;
1350 //Dump recc we are dealing with (minus nodes already in PO)
1351 DEBUG(std::cerr << "Recc: ");
1352 DEBUG(for(std::set<MSchedGraphNode*>::iterator R = new_recurrence.begin(), RE = new_recurrence.end(); R != RE; ++R) { std::cerr << **R ; });
1354 //Add nodes that connect this recurrence to recurrences in the partial path
1355 for(std::set<MSchedGraphNode*>::iterator N = new_recurrence.begin(),
1356 NE = new_recurrence.end(); N != NE; ++N)
1357 searchPath(*N, path, nodesToAdd, new_recurrence);
1359 //Add nodes to this recurrence if they are not already in the partial order
1360 for(std::set<MSchedGraphNode*>::iterator N = nodesToAdd.begin(), NE = nodesToAdd.end();
1361 N != NE; ++N) {
1362 bool found = false;
1363 for(std::vector<std::set<MSchedGraphNode*> >::iterator PO = partialOrder.begin(),
1364 PE = partialOrder.end(); PO != PE; ++PO) {
1365 if(PO->count(*N))
1366 found = true;
1368 if(!found) {
1369 assert("FOUND CONNECTOR");
1370 new_recurrence.insert(*N);
1374 partialOrder.push_back(new_recurrence);
1377 //Dump out partial order
1378 DEBUG(for(std::vector<std::set<MSchedGraphNode*> >::iterator I = partialOrder.begin(),
1379 E = partialOrder.end(); I !=E; ++I) {
1380 std::cerr << "Start set in PO\n";
1381 for(std::set<MSchedGraphNode*>::iterator J = I->begin(), JE = I->end(); J != JE; ++J)
1382 std::cerr << "PO:" << **J << "\n";
1388 //Add any nodes that are not already in the partial order
1389 //Add them in a set, one set per connected component
1390 std::set<MSchedGraphNode*> lastNodes;
1391 std::set<MSchedGraphNode*> noPredNodes;
1392 for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(),
1393 E = nodeToAttributesMap.end(); I != E; ++I) {
1395 bool found = false;
1397 //Check if its already in our partial order, if not add it to the final vector
1398 for(std::vector<std::set<MSchedGraphNode*> >::iterator PO = partialOrder.begin(),
1399 PE = partialOrder.end(); PO != PE; ++PO) {
1400 if(PO->count(I->first))
1401 found = true;
1403 if(!found)
1404 lastNodes.insert(I->first);
1407 //For each node w/out preds, see if there is a path to one of the
1408 //recurrences, and if so add them to that current recc
1409 /*for(std::set<MSchedGraphNode*>::iterator N = noPredNodes.begin(), NE = noPredNodes.end();
1410 N != NE; ++N) {
1411 DEBUG(std::cerr << "No Pred Path from: " << **N << "\n");
1412 for(std::vector<std::set<MSchedGraphNode*> >::iterator PO = partialOrder.begin(),
1413 PE = partialOrder.end(); PO != PE; ++PO) {
1414 std::vector<MSchedGraphNode*> path;
1415 pathToRecc(*N, path, *PO, lastNodes);
1420 //Break up remaining nodes that are not in the partial order
1421 ///into their connected compoenents
1422 while(lastNodes.size() > 0) {
1423 std::set<MSchedGraphNode*> ccSet;
1424 connectedComponentSet(*(lastNodes.begin()),ccSet, lastNodes);
1425 if(ccSet.size() > 0)
1426 partialOrder.push_back(ccSet);
1430 //Clean up branches by putting them in final order
1431 assert(branches.size() == 0 && "We should not have any branches in our graph");
1435 void ModuloSchedulingPass::connectedComponentSet(MSchedGraphNode *node, std::set<MSchedGraphNode*> &ccSet, std::set<MSchedGraphNode*> &lastNodes) {
1437 //Add to final set
1438 if( !ccSet.count(node) && lastNodes.count(node)) {
1439 lastNodes.erase(node);
1440 ccSet.insert(node);
1442 else
1443 return;
1445 //Loop over successors and recurse if we have not seen this node before
1446 for(MSchedGraphNode::succ_iterator node_succ = node->succ_begin(), end=node->succ_end(); node_succ != end; ++node_succ) {
1447 connectedComponentSet(*node_succ, ccSet, lastNodes);
1452 void ModuloSchedulingPass::predIntersect(std::set<MSchedGraphNode*> &CurrentSet, std::set<MSchedGraphNode*> &IntersectResult) {
1454 for(unsigned j=0; j < FinalNodeOrder.size(); ++j) {
1455 for(MSchedGraphNode::pred_iterator P = FinalNodeOrder[j]->pred_begin(),
1456 E = FinalNodeOrder[j]->pred_end(); P != E; ++P) {
1458 //Check if we are supposed to ignore this edge or not
1459 if(ignoreEdge(*P,FinalNodeOrder[j]))
1460 continue;
1462 if(CurrentSet.count(*P))
1463 if(std::find(FinalNodeOrder.begin(), FinalNodeOrder.end(), *P) == FinalNodeOrder.end())
1464 IntersectResult.insert(*P);
1473 void ModuloSchedulingPass::succIntersect(std::set<MSchedGraphNode*> &CurrentSet, std::set<MSchedGraphNode*> &IntersectResult) {
1475 for(unsigned j=0; j < FinalNodeOrder.size(); ++j) {
1476 for(MSchedGraphNode::succ_iterator P = FinalNodeOrder[j]->succ_begin(),
1477 E = FinalNodeOrder[j]->succ_end(); P != E; ++P) {
1479 //Check if we are supposed to ignore this edge or not
1480 if(ignoreEdge(FinalNodeOrder[j],*P))
1481 continue;
1483 if(CurrentSet.count(*P))
1484 if(std::find(FinalNodeOrder.begin(), FinalNodeOrder.end(), *P) == FinalNodeOrder.end())
1485 IntersectResult.insert(*P);
1490 void dumpIntersection(std::set<MSchedGraphNode*> &IntersectCurrent) {
1491 std::cerr << "Intersection (";
1492 for(std::set<MSchedGraphNode*>::iterator I = IntersectCurrent.begin(), E = IntersectCurrent.end(); I != E; ++I)
1493 std::cerr << **I << ", ";
1494 std::cerr << ")\n";
1499 void ModuloSchedulingPass::orderNodes() {
1501 TIME_REGION(X, "orderNodes");
1503 int BOTTOM_UP = 0;
1504 int TOP_DOWN = 1;
1506 //Set default order
1507 int order = BOTTOM_UP;
1510 //Loop over all the sets and place them in the final node order
1511 for(std::vector<std::set<MSchedGraphNode*> >::iterator CurrentSet = partialOrder.begin(), E= partialOrder.end(); CurrentSet != E; ++CurrentSet) {
1513 DEBUG(std::cerr << "Processing set in S\n");
1514 DEBUG(dumpIntersection(*CurrentSet));
1516 //Result of intersection
1517 std::set<MSchedGraphNode*> IntersectCurrent;
1519 predIntersect(*CurrentSet, IntersectCurrent);
1521 //If the intersection of predecessor and current set is not empty
1522 //sort nodes bottom up
1523 if(IntersectCurrent.size() != 0) {
1524 DEBUG(std::cerr << "Final Node Order Predecessors and Current Set interesection is NOT empty\n");
1525 order = BOTTOM_UP;
1527 //If empty, use successors
1528 else {
1529 DEBUG(std::cerr << "Final Node Order Predecessors and Current Set interesection is empty\n");
1531 succIntersect(*CurrentSet, IntersectCurrent);
1533 //sort top-down
1534 if(IntersectCurrent.size() != 0) {
1535 DEBUG(std::cerr << "Final Node Order Successors and Current Set interesection is NOT empty\n");
1536 order = TOP_DOWN;
1538 else {
1539 DEBUG(std::cerr << "Final Node Order Successors and Current Set interesection is empty\n");
1540 //Find node with max ASAP in current Set
1541 MSchedGraphNode *node;
1542 int maxASAP = 0;
1543 DEBUG(std::cerr << "Using current set of size " << CurrentSet->size() << "to find max ASAP\n");
1544 for(std::set<MSchedGraphNode*>::iterator J = CurrentSet->begin(), JE = CurrentSet->end(); J != JE; ++J) {
1545 //Get node attributes
1546 MSNodeAttributes nodeAttr= nodeToAttributesMap.find(*J)->second;
1547 //assert(nodeAttr != nodeToAttributesMap.end() && "Node not in attributes map!");
1549 if(maxASAP <= nodeAttr.ASAP) {
1550 maxASAP = nodeAttr.ASAP;
1551 node = *J;
1554 assert(node != 0 && "In node ordering node should not be null");
1555 IntersectCurrent.insert(node);
1556 order = BOTTOM_UP;
1560 //Repeat until all nodes are put into the final order from current set
1561 while(IntersectCurrent.size() > 0) {
1563 if(order == TOP_DOWN) {
1564 DEBUG(std::cerr << "Order is TOP DOWN\n");
1566 while(IntersectCurrent.size() > 0) {
1567 DEBUG(std::cerr << "Intersection is not empty, so find heighest height\n");
1569 int MOB = 0;
1570 int height = 0;
1571 MSchedGraphNode *highestHeightNode = *(IntersectCurrent.begin());
1573 //Find node in intersection with highest heigh and lowest MOB
1574 for(std::set<MSchedGraphNode*>::iterator I = IntersectCurrent.begin(),
1575 E = IntersectCurrent.end(); I != E; ++I) {
1577 //Get current nodes properties
1578 MSNodeAttributes nodeAttr= nodeToAttributesMap.find(*I)->second;
1580 if(height < nodeAttr.height) {
1581 highestHeightNode = *I;
1582 height = nodeAttr.height;
1583 MOB = nodeAttr.MOB;
1585 else if(height == nodeAttr.height) {
1586 if(MOB > nodeAttr.height) {
1587 highestHeightNode = *I;
1588 height = nodeAttr.height;
1589 MOB = nodeAttr.MOB;
1594 //Append our node with greatest height to the NodeOrder
1595 if(std::find(FinalNodeOrder.begin(), FinalNodeOrder.end(), highestHeightNode) == FinalNodeOrder.end()) {
1596 DEBUG(std::cerr << "Adding node to Final Order: " << *highestHeightNode << "\n");
1597 FinalNodeOrder.push_back(highestHeightNode);
1600 //Remove V from IntersectOrder
1601 IntersectCurrent.erase(std::find(IntersectCurrent.begin(),
1602 IntersectCurrent.end(), highestHeightNode));
1605 //Intersect V's successors with CurrentSet
1606 for(MSchedGraphNode::succ_iterator P = highestHeightNode->succ_begin(),
1607 E = highestHeightNode->succ_end(); P != E; ++P) {
1608 //if(lower_bound(CurrentSet->begin(),
1609 // CurrentSet->end(), *P) != CurrentSet->end()) {
1610 if(std::find(CurrentSet->begin(), CurrentSet->end(), *P) != CurrentSet->end()) {
1611 if(ignoreEdge(highestHeightNode, *P))
1612 continue;
1613 //If not already in Intersect, add
1614 if(!IntersectCurrent.count(*P))
1615 IntersectCurrent.insert(*P);
1618 } //End while loop over Intersect Size
1620 //Change direction
1621 order = BOTTOM_UP;
1623 //Reset Intersect to reflect changes in OrderNodes
1624 IntersectCurrent.clear();
1625 predIntersect(*CurrentSet, IntersectCurrent);
1627 } //End If TOP_DOWN
1629 //Begin if BOTTOM_UP
1630 else {
1631 DEBUG(std::cerr << "Order is BOTTOM UP\n");
1632 while(IntersectCurrent.size() > 0) {
1633 DEBUG(std::cerr << "Intersection of size " << IntersectCurrent.size() << ", finding highest depth\n");
1635 //dump intersection
1636 DEBUG(dumpIntersection(IntersectCurrent));
1637 //Get node with highest depth, if a tie, use one with lowest
1638 //MOB
1639 int MOB = 0;
1640 int depth = 0;
1641 MSchedGraphNode *highestDepthNode = *(IntersectCurrent.begin());
1643 for(std::set<MSchedGraphNode*>::iterator I = IntersectCurrent.begin(),
1644 E = IntersectCurrent.end(); I != E; ++I) {
1645 //Find node attribute in graph
1646 MSNodeAttributes nodeAttr= nodeToAttributesMap.find(*I)->second;
1648 if(depth < nodeAttr.depth) {
1649 highestDepthNode = *I;
1650 depth = nodeAttr.depth;
1651 MOB = nodeAttr.MOB;
1653 else if(depth == nodeAttr.depth) {
1654 if(MOB > nodeAttr.MOB) {
1655 highestDepthNode = *I;
1656 depth = nodeAttr.depth;
1657 MOB = nodeAttr.MOB;
1664 //Append highest depth node to the NodeOrder
1665 if(std::find(FinalNodeOrder.begin(), FinalNodeOrder.end(), highestDepthNode) == FinalNodeOrder.end()) {
1666 DEBUG(std::cerr << "Adding node to Final Order: " << *highestDepthNode << "\n");
1667 FinalNodeOrder.push_back(highestDepthNode);
1669 //Remove heightestDepthNode from IntersectOrder
1670 IntersectCurrent.erase(highestDepthNode);
1673 //Intersect heightDepthNode's pred with CurrentSet
1674 for(MSchedGraphNode::pred_iterator P = highestDepthNode->pred_begin(),
1675 E = highestDepthNode->pred_end(); P != E; ++P) {
1676 if(CurrentSet->count(*P)) {
1677 if(ignoreEdge(*P, highestDepthNode))
1678 continue;
1680 //If not already in Intersect, add
1681 if(!IntersectCurrent.count(*P))
1682 IntersectCurrent.insert(*P);
1686 } //End while loop over Intersect Size
1688 //Change order
1689 order = TOP_DOWN;
1691 //Reset IntersectCurrent to reflect changes in OrderNodes
1692 IntersectCurrent.clear();
1693 succIntersect(*CurrentSet, IntersectCurrent);
1694 } //End if BOTTOM_DOWN
1696 DEBUG(std::cerr << "Current Intersection Size: " << IntersectCurrent.size() << "\n");
1698 //End Wrapping while loop
1699 DEBUG(std::cerr << "Ending Size of Current Set: " << CurrentSet->size() << "\n");
1700 }//End for over all sets of nodes
1702 //FIXME: As the algorithm stands it will NEVER add an instruction such as ba (with no
1703 //data dependencies) to the final order. We add this manually. It will always be
1704 //in the last set of S since its not part of a recurrence
1705 //Loop over all the sets and place them in the final node order
1706 std::vector<std::set<MSchedGraphNode*> > ::reverse_iterator LastSet = partialOrder.rbegin();
1707 for(std::set<MSchedGraphNode*>::iterator CurrentNode = LastSet->begin(), LastNode = LastSet->end();
1708 CurrentNode != LastNode; ++CurrentNode) {
1709 if((*CurrentNode)->getInst()->getOpcode() == V9::BA)
1710 FinalNodeOrder.push_back(*CurrentNode);
1712 //Return final Order
1713 //return FinalNodeOrder;
1716 bool ModuloSchedulingPass::computeSchedule(const MachineBasicBlock *BB, MSchedGraph *MSG) {
1718 TIME_REGION(X, "computeSchedule");
1720 bool success = false;
1722 //FIXME: Should be set to max II of the original loop
1723 //Cap II in order to prevent infinite loop
1724 int capII = MSG->totalDelay();
1726 while(!success) {
1728 //Keep track of branches, but do not insert into the schedule
1729 std::vector<MSchedGraphNode*> branches;
1731 //Loop over the final node order and process each node
1732 for(std::vector<MSchedGraphNode*>::iterator I = FinalNodeOrder.begin(),
1733 E = FinalNodeOrder.end(); I != E; ++I) {
1735 //CalculateEarly and Late start
1736 bool initialLSVal = false;
1737 bool initialESVal = false;
1738 int EarlyStart = 0;
1739 int LateStart = 0;
1740 bool hasSucc = false;
1741 bool hasPred = false;
1742 bool sched;
1744 if((*I)->isBranch())
1745 if((*I)->hasPredecessors())
1746 sched = true;
1747 else
1748 sched = false;
1749 else
1750 sched = true;
1752 if(sched) {
1753 //Loop over nodes in the schedule and determine if they are predecessors
1754 //or successors of the node we are trying to schedule
1755 for(MSSchedule::schedule_iterator nodesByCycle = schedule.begin(), nodesByCycleEnd = schedule.end();
1756 nodesByCycle != nodesByCycleEnd; ++nodesByCycle) {
1758 //For this cycle, get the vector of nodes schedule and loop over it
1759 for(std::vector<MSchedGraphNode*>::iterator schedNode = nodesByCycle->second.begin(), SNE = nodesByCycle->second.end(); schedNode != SNE; ++schedNode) {
1761 if((*I)->isPredecessor(*schedNode)) {
1762 int diff = (*I)->getInEdge(*schedNode).getIteDiff();
1763 int ES_Temp = nodesByCycle->first + (*schedNode)->getLatency() - diff * II;
1764 DEBUG(std::cerr << "Diff: " << diff << " Cycle: " << nodesByCycle->first << "\n");
1765 DEBUG(std::cerr << "Temp EarlyStart: " << ES_Temp << " Prev EarlyStart: " << EarlyStart << "\n");
1766 if(initialESVal)
1767 EarlyStart = std::max(EarlyStart, ES_Temp);
1768 else {
1769 EarlyStart = ES_Temp;
1770 initialESVal = true;
1772 hasPred = true;
1774 if((*I)->isSuccessor(*schedNode)) {
1775 int diff = (*schedNode)->getInEdge(*I).getIteDiff();
1776 int LS_Temp = nodesByCycle->first - (*I)->getLatency() + diff * II;
1777 DEBUG(std::cerr << "Diff: " << diff << " Cycle: " << nodesByCycle->first << "\n");
1778 DEBUG(std::cerr << "Temp LateStart: " << LS_Temp << " Prev LateStart: " << LateStart << "\n");
1779 if(initialLSVal)
1780 LateStart = std::min(LateStart, LS_Temp);
1781 else {
1782 LateStart = LS_Temp;
1783 initialLSVal = true;
1785 hasSucc = true;
1790 else {
1791 branches.push_back(*I);
1792 continue;
1795 //Check if this node is a pred or succ to a branch, and restrict its placement
1796 //even though the branch is not in the schedule
1797 /*int count = branches.size();
1798 for(std::vector<MSchedGraphNode*>::iterator B = branches.begin(), BE = branches.end();
1799 B != BE; ++B) {
1800 if((*I)->isPredecessor(*B)) {
1801 int diff = (*I)->getInEdge(*B).getIteDiff();
1802 int ES_Temp = (II+count-1) + (*B)->getLatency() - diff * II;
1803 DEBUG(std::cerr << "Diff: " << diff << " Cycle: " << (II+count)-1 << "\n");
1804 DEBUG(std::cerr << "Temp EarlyStart: " << ES_Temp << " Prev EarlyStart: " << EarlyStart << "\n");
1805 EarlyStart = std::max(EarlyStart, ES_Temp);
1806 hasPred = true;
1809 if((*I)->isSuccessor(*B)) {
1810 int diff = (*B)->getInEdge(*I).getIteDiff();
1811 int LS_Temp = (II+count-1) - (*I)->getLatency() + diff * II;
1812 DEBUG(std::cerr << "Diff: " << diff << " Cycle: " << (II+count-1) << "\n");
1813 DEBUG(std::cerr << "Temp LateStart: " << LS_Temp << " Prev LateStart: " << LateStart << "\n");
1814 LateStart = std::min(LateStart, LS_Temp);
1815 hasSucc = true;
1818 count--;
1821 //Check if the node has no pred or successors and set Early Start to its ASAP
1822 if(!hasSucc && !hasPred)
1823 EarlyStart = nodeToAttributesMap.find(*I)->second.ASAP;
1825 DEBUG(std::cerr << "Has Successors: " << hasSucc << ", Has Pred: " << hasPred << "\n");
1826 DEBUG(std::cerr << "EarlyStart: " << EarlyStart << ", LateStart: " << LateStart << "\n");
1828 //Now, try to schedule this node depending upon its pred and successor in the schedule
1829 //already
1830 if(!hasSucc && hasPred)
1831 success = scheduleNode(*I, EarlyStart, (EarlyStart + II -1));
1832 else if(!hasPred && hasSucc)
1833 success = scheduleNode(*I, LateStart, (LateStart - II +1));
1834 else if(hasPred && hasSucc) {
1835 if(EarlyStart > LateStart) {
1836 success = false;
1837 //LateStart = EarlyStart;
1838 DEBUG(std::cerr << "Early Start can not be later then the late start cycle, schedule fails\n");
1840 else
1841 success = scheduleNode(*I, EarlyStart, std::min(LateStart, (EarlyStart + II -1)));
1843 else
1844 success = scheduleNode(*I, EarlyStart, EarlyStart + II - 1);
1846 if(!success) {
1847 ++II;
1848 schedule.clear();
1849 break;
1854 if(success) {
1855 DEBUG(std::cerr << "Constructing Schedule Kernel\n");
1856 success = schedule.constructKernel(II, branches, indVarInstrs[BB]);
1857 DEBUG(std::cerr << "Done Constructing Schedule Kernel\n");
1858 if(!success) {
1859 ++II;
1860 schedule.clear();
1862 DEBUG(std::cerr << "Final II: " << II << "\n");
1863 FinalIISum += II;
1866 if(II >= capII) {
1867 DEBUG(std::cerr << "Maximum II reached, giving up\n");
1868 return false;
1871 assert(II < capII && "The II should not exceed the original loop number of cycles");
1873 return true;
1877 bool ModuloSchedulingPass::scheduleNode(MSchedGraphNode *node,
1878 int start, int end) {
1879 bool success = false;
1881 DEBUG(std::cerr << *node << " (Start Cycle: " << start << ", End Cycle: " << end << ")\n");
1883 //Make sure start and end are not negative
1884 //if(start < 0) {
1885 //start = 0;
1888 //if(end < 0)
1889 //end = 0;
1891 bool forward = true;
1892 if(start > end)
1893 forward = false;
1895 bool increaseSC = true;
1896 int cycle = start ;
1899 while(increaseSC) {
1901 increaseSC = false;
1903 increaseSC = schedule.insert(node, cycle);
1905 if(!increaseSC)
1906 return true;
1908 //Increment cycle to try again
1909 if(forward) {
1910 ++cycle;
1911 DEBUG(std::cerr << "Increase cycle: " << cycle << "\n");
1912 if(cycle > end)
1913 return false;
1915 else {
1916 --cycle;
1917 DEBUG(std::cerr << "Decrease cycle: " << cycle << "\n");
1918 if(cycle < end)
1919 return false;
1923 return success;
1926 void ModuloSchedulingPass::writePrologues(std::vector<MachineBasicBlock *> &prologues, MachineBasicBlock *origBB, std::vector<BasicBlock*> &llvm_prologues, std::map<const Value*, std::pair<const MachineInstr*, int> > &valuesToSave, std::map<Value*, std::map<int, Value*> > &newValues, std::map<Value*, MachineBasicBlock*> &newValLocation) {
1928 //Keep a map to easily know whats in the kernel
1929 std::map<int, std::set<const MachineInstr*> > inKernel;
1930 int maxStageCount = 0;
1932 //Keep a map of new values we consumed in case they need to be added back
1933 std::map<Value*, std::map<int, Value*> > consumedValues;
1935 MSchedGraphNode *branch = 0;
1936 MSchedGraphNode *BAbranch = 0;
1938 DEBUG(schedule.print(std::cerr));
1940 std::vector<MSchedGraphNode*> branches;
1942 for(MSSchedule::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) {
1943 maxStageCount = std::max(maxStageCount, I->second);
1945 //Put int the map so we know what instructions in each stage are in the kernel
1946 DEBUG(std::cerr << "Inserting instruction " << *(I->first) << " into map at stage " << I->second << "\n");
1947 inKernel[I->second].insert(I->first);
1950 //Get target information to look at machine operands
1951 const TargetInstrInfo *mii = target.getInstrInfo();
1953 //Now write the prologues
1954 for(int i = 0; i < maxStageCount; ++i) {
1955 BasicBlock *llvmBB = new BasicBlock("PROLOGUE", (Function*) (origBB->getBasicBlock()->getParent()));
1956 MachineBasicBlock *machineBB = new MachineBasicBlock(llvmBB);
1958 DEBUG(std::cerr << "i=" << i << "\n");
1959 for(int j = i; j >= 0; --j) {
1960 for(MachineBasicBlock::const_iterator MI = origBB->begin(), ME = origBB->end(); ME != MI; ++MI) {
1961 if(inKernel[j].count(&*MI)) {
1962 MachineInstr *instClone = MI->clone();
1963 machineBB->push_back(instClone);
1965 //If its a branch, insert a nop
1966 if(mii->isBranch(instClone->getOpcode()))
1967 BuildMI(machineBB, V9::NOP, 0);
1970 DEBUG(std::cerr << "Cloning: " << *MI << "\n");
1972 //After cloning, we may need to save the value that this instruction defines
1973 for(unsigned opNum=0; opNum < MI->getNumOperands(); ++opNum) {
1974 Instruction *tmp;
1976 //get machine operand
1977 MachineOperand &mOp = instClone->getOperand(opNum);
1978 if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef()) {
1980 //Check if this is a value we should save
1981 if(valuesToSave.count(mOp.getVRegValue())) {
1982 //Save copy in tmpInstruction
1983 tmp = new TmpInstruction(mOp.getVRegValue());
1985 //Add TmpInstruction to safe LLVM Instruction MCFI
1986 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst);
1987 tempMvec.addTemp((Value*) tmp);
1989 DEBUG(std::cerr << "Value: " << *(mOp.getVRegValue()) << " New Value: " << *tmp << " Stage: " << i << "\n");
1991 newValues[mOp.getVRegValue()][i]= tmp;
1992 newValLocation[tmp] = machineBB;
1994 DEBUG(std::cerr << "Machine Instr Operands: " << *(mOp.getVRegValue()) << ", 0, " << *tmp << "\n");
1996 //Create machine instruction and put int machineBB
1997 MachineInstr *saveValue;
1998 if(mOp.getVRegValue()->getType() == Type::FloatTy)
1999 saveValue = BuildMI(machineBB, V9::FMOVS, 3).addReg(mOp.getVRegValue()).addRegDef(tmp);
2000 else if(mOp.getVRegValue()->getType() == Type::DoubleTy)
2001 saveValue = BuildMI(machineBB, V9::FMOVD, 3).addReg(mOp.getVRegValue()).addRegDef(tmp);
2002 else
2003 saveValue = BuildMI(machineBB, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp);
2006 DEBUG(std::cerr << "Created new machine instr: " << *saveValue << "\n");
2010 //We may also need to update the value that we use if its from an earlier prologue
2011 if(j != 0) {
2012 if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isUse()) {
2013 if(newValues.count(mOp.getVRegValue())) {
2014 if(newValues[mOp.getVRegValue()].count(i-1)) {
2015 Value *oldV = mOp.getVRegValue();
2016 DEBUG(std::cerr << "Replaced this value: " << mOp.getVRegValue() << " With:" << (newValues[mOp.getVRegValue()][i-1]) << "\n");
2017 //Update the operand with the right value
2018 mOp.setValueReg(newValues[mOp.getVRegValue()][i-1]);
2020 //Remove this value since we have consumed it
2021 //NOTE: Should this only be done if j != maxStage?
2022 consumedValues[oldV][i-1] = (newValues[oldV][i-1]);
2023 DEBUG(std::cerr << "Deleted value: " << consumedValues[oldV][i-1] << "\n");
2024 newValues[oldV].erase(i-1);
2027 else
2028 if(consumedValues.count(mOp.getVRegValue()))
2029 assert(!consumedValues[mOp.getVRegValue()].count(i-1) && "Found a case where we need the value");
2038 /*for(std::vector<MSchedGraphNode*>::iterator BR = branches.begin(), BE = branches.end(); BR != BE; ++BR) {
2040 //Stick in branch at the end
2041 machineBB->push_back((*BR)->getInst()->clone());
2043 //Add nop
2044 BuildMI(machineBB, V9::NOP, 0);
2048 (((MachineBasicBlock*)origBB)->getParent())->getBasicBlockList().push_back(machineBB);
2049 prologues.push_back(machineBB);
2050 llvm_prologues.push_back(llvmBB);
2054 void ModuloSchedulingPass::writeEpilogues(std::vector<MachineBasicBlock *> &epilogues, const MachineBasicBlock *origBB, std::vector<BasicBlock*> &llvm_epilogues, std::map<const Value*, std::pair<const MachineInstr*, int> > &valuesToSave, std::map<Value*, std::map<int, Value*> > &newValues,std::map<Value*, MachineBasicBlock*> &newValLocation, std::map<Value*, std::map<int, Value*> > &kernelPHIs ) {
2056 std::map<int, std::set<const MachineInstr*> > inKernel;
2058 for(MSSchedule::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) {
2060 //Ignore the branch, we will handle this separately
2061 //if(I->first->isBranch())
2062 //continue;
2064 //Put int the map so we know what instructions in each stage are in the kernel
2065 inKernel[I->second].insert(I->first);
2068 std::map<Value*, Value*> valPHIs;
2070 //some debug stuff, will remove later
2071 DEBUG(for(std::map<Value*, std::map<int, Value*> >::iterator V = newValues.begin(), E = newValues.end(); V !=E; ++V) {
2072 std::cerr << "Old Value: " << *(V->first) << "\n";
2073 for(std::map<int, Value*>::iterator I = V->second.begin(), IE = V->second.end(); I != IE; ++I)
2074 std::cerr << "Stage: " << I->first << " Value: " << *(I->second) << "\n";
2077 //some debug stuff, will remove later
2078 DEBUG(for(std::map<Value*, std::map<int, Value*> >::iterator V = kernelPHIs.begin(), E = kernelPHIs.end(); V !=E; ++V) {
2079 std::cerr << "Old Value: " << *(V->first) << "\n";
2080 for(std::map<int, Value*>::iterator I = V->second.begin(), IE = V->second.end(); I != IE; ++I)
2081 std::cerr << "Stage: " << I->first << " Value: " << *(I->second) << "\n";
2084 //Now write the epilogues
2085 for(int i = schedule.getMaxStage()-1; i >= 0; --i) {
2086 BasicBlock *llvmBB = new BasicBlock("EPILOGUE", (Function*) (origBB->getBasicBlock()->getParent()));
2087 MachineBasicBlock *machineBB = new MachineBasicBlock(llvmBB);
2089 DEBUG(std::cerr << " Epilogue #: " << i << "\n");
2092 std::map<Value*, int> inEpilogue;
2094 for(MachineBasicBlock::const_iterator MI = origBB->begin(), ME = origBB->end(); ME != MI; ++MI) {
2095 for(int j=schedule.getMaxStage(); j > i; --j) {
2096 if(inKernel[j].count(&*MI)) {
2097 DEBUG(std::cerr << "Cloning instruction " << *MI << "\n");
2098 MachineInstr *clone = MI->clone();
2100 //Update operands that need to use the result from the phi
2101 for(unsigned opNum=0; opNum < clone->getNumOperands(); ++opNum) {
2102 //get machine operand
2103 const MachineOperand &mOp = clone->getOperand(opNum);
2105 if((mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isUse())) {
2107 DEBUG(std::cerr << "Writing PHI for " << (mOp.getVRegValue()) << "\n");
2109 //If this is the last instructions for the max iterations ago, don't update operands
2110 if(inEpilogue.count(mOp.getVRegValue()))
2111 if(inEpilogue[mOp.getVRegValue()] == i)
2112 continue;
2114 //Quickly write appropriate phis for this operand
2115 if(newValues.count(mOp.getVRegValue())) {
2116 if(newValues[mOp.getVRegValue()].count(i)) {
2117 Instruction *tmp = new TmpInstruction(newValues[mOp.getVRegValue()][i]);
2119 //Get machine code for this instruction
2120 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst);
2121 tempMvec.addTemp((Value*) tmp);
2123 //assert of no kernelPHI for this value
2124 assert(kernelPHIs[mOp.getVRegValue()][i] !=0 && "Must have final kernel phi to construct epilogue phi");
2126 MachineInstr *saveValue = BuildMI(machineBB, V9::PHI, 3).addReg(newValues[mOp.getVRegValue()][i]).addReg(kernelPHIs[mOp.getVRegValue()][i]).addRegDef(tmp);
2127 DEBUG(std::cerr << "Resulting PHI: " << *saveValue << "\n");
2128 valPHIs[mOp.getVRegValue()] = tmp;
2132 if(valPHIs.count(mOp.getVRegValue())) {
2133 //Update the operand in the cloned instruction
2134 clone->getOperand(opNum).setValueReg(valPHIs[mOp.getVRegValue()]);
2137 else if((mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef())) {
2138 inEpilogue[mOp.getVRegValue()] = i;
2141 machineBB->push_back(clone);
2146 (((MachineBasicBlock*)origBB)->getParent())->getBasicBlockList().push_back(machineBB);
2147 epilogues.push_back(machineBB);
2148 llvm_epilogues.push_back(llvmBB);
2150 DEBUG(std::cerr << "EPILOGUE #" << i << "\n");
2151 DEBUG(machineBB->print(std::cerr));
2155 void ModuloSchedulingPass::writeKernel(BasicBlock *llvmBB, MachineBasicBlock *machineBB, std::map<const Value*, std::pair<const MachineInstr*, int> > &valuesToSave, std::map<Value*, std::map<int, Value*> > &newValues, std::map<Value*, MachineBasicBlock*> &newValLocation, std::map<Value*, std::map<int, Value*> > &kernelPHIs) {
2157 //Keep track of operands that are read and saved from a previous iteration. The new clone
2158 //instruction will use the result of the phi instead.
2159 std::map<Value*, Value*> finalPHIValue;
2160 std::map<Value*, Value*> kernelValue;
2162 //Branches are a special case
2163 std::vector<MachineInstr*> branches;
2165 //Get target information to look at machine operands
2166 const TargetInstrInfo *mii = target.getInstrInfo();
2168 //Create TmpInstructions for the final phis
2169 for(MSSchedule::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) {
2171 DEBUG(std::cerr << "Stage: " << I->second << " Inst: " << *(I->first) << "\n";);
2173 /*if(I->first->isBranch()) {
2174 //Clone instruction
2175 const MachineInstr *inst = I->first->getInst();
2176 MachineInstr *instClone = inst->clone();
2177 branches.push_back(instClone);
2178 continue;
2181 //Clone instruction
2182 const MachineInstr *inst = I->first;
2183 MachineInstr *instClone = inst->clone();
2185 //Insert into machine basic block
2186 machineBB->push_back(instClone);
2188 if(mii->isBranch(instClone->getOpcode()))
2189 BuildMI(machineBB, V9::NOP, 0);
2191 DEBUG(std::cerr << "Cloned Inst: " << *instClone << "\n");
2193 //Loop over Machine Operands
2194 for(unsigned i=0; i < inst->getNumOperands(); ++i) {
2195 //get machine operand
2196 const MachineOperand &mOp = inst->getOperand(i);
2198 if(I->second != 0) {
2199 if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isUse()) {
2201 //Check to see where this operand is defined if this instruction is from max stage
2202 if(I->second == schedule.getMaxStage()) {
2203 DEBUG(std::cerr << "VREG: " << *(mOp.getVRegValue()) << "\n");
2206 //If its in the value saved, we need to create a temp instruction and use that instead
2207 if(valuesToSave.count(mOp.getVRegValue())) {
2209 //Check if we already have a final PHI value for this
2210 if(!finalPHIValue.count(mOp.getVRegValue())) {
2211 TmpInstruction *tmp = new TmpInstruction(mOp.getVRegValue());
2213 //Get machine code for this instruction
2214 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst);
2215 tempMvec.addTemp((Value*) tmp);
2217 //Update the operand in the cloned instruction
2218 instClone->getOperand(i).setValueReg(tmp);
2220 //save this as our final phi
2221 finalPHIValue[mOp.getVRegValue()] = tmp;
2222 newValLocation[tmp] = machineBB;
2224 else {
2225 //Use the previous final phi value
2226 instClone->getOperand(i).setValueReg(finalPHIValue[mOp.getVRegValue()]);
2231 if(I->second != schedule.getMaxStage()) {
2232 if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef()) {
2233 if(valuesToSave.count(mOp.getVRegValue())) {
2235 TmpInstruction *tmp = new TmpInstruction(mOp.getVRegValue());
2237 //Get machine code for this instruction
2238 MachineCodeForInstruction & tempVec = MachineCodeForInstruction::get(defaultInst);
2239 tempVec.addTemp((Value*) tmp);
2241 //Create new machine instr and put in MBB
2242 MachineInstr *saveValue;
2243 if(mOp.getVRegValue()->getType() == Type::FloatTy)
2244 saveValue = BuildMI(machineBB, V9::FMOVS, 3).addReg(mOp.getVRegValue()).addRegDef(tmp);
2245 else if(mOp.getVRegValue()->getType() == Type::DoubleTy)
2246 saveValue = BuildMI(machineBB, V9::FMOVD, 3).addReg(mOp.getVRegValue()).addRegDef(tmp);
2247 else
2248 saveValue = BuildMI(machineBB, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp);
2251 //Save for future cleanup
2252 kernelValue[mOp.getVRegValue()] = tmp;
2253 newValLocation[tmp] = machineBB;
2254 kernelPHIs[mOp.getVRegValue()][schedule.getMaxStage()-1] = tmp;
2262 //Add branches
2263 for(std::vector<MachineInstr*>::iterator I = branches.begin(), E = branches.end(); I != E; ++I) {
2264 machineBB->push_back(*I);
2265 BuildMI(machineBB, V9::NOP, 0);
2269 DEBUG(std::cerr << "KERNEL before PHIs\n");
2270 DEBUG(machineBB->print(std::cerr));
2273 //Loop over each value we need to generate phis for
2274 for(std::map<Value*, std::map<int, Value*> >::iterator V = newValues.begin(),
2275 E = newValues.end(); V != E; ++V) {
2278 DEBUG(std::cerr << "Writing phi for" << *(V->first));
2279 DEBUG(std::cerr << "\nMap of Value* for this phi\n");
2280 DEBUG(for(std::map<int, Value*>::iterator I = V->second.begin(),
2281 IE = V->second.end(); I != IE; ++I) {
2282 std::cerr << "Stage: " << I->first;
2283 std::cerr << " Value: " << *(I->second) << "\n";
2286 //If we only have one current iteration live, its safe to set lastPhi = to kernel value
2287 if(V->second.size() == 1) {
2288 assert(kernelValue[V->first] != 0 && "Kernel value* must exist to create phi");
2289 MachineInstr *saveValue = BuildMI(*machineBB, machineBB->begin(),V9::PHI, 3).addReg(V->second.begin()->second).addReg(kernelValue[V->first]).addRegDef(finalPHIValue[V->first]);
2290 DEBUG(std::cerr << "Resulting PHI (one live): " << *saveValue << "\n");
2291 kernelPHIs[V->first][V->second.begin()->first] = kernelValue[V->first];
2292 DEBUG(std::cerr << "Put kernel phi in at stage: " << schedule.getMaxStage()-1 << " (map stage = " << V->second.begin()->first << ")\n");
2294 else {
2296 //Keep track of last phi created.
2297 Instruction *lastPhi = 0;
2299 unsigned count = 1;
2300 //Loop over the the map backwards to generate phis
2301 for(std::map<int, Value*>::reverse_iterator I = V->second.rbegin(), IE = V->second.rend();
2302 I != IE; ++I) {
2304 if(count < (V->second).size()) {
2305 if(lastPhi == 0) {
2306 lastPhi = new TmpInstruction(I->second);
2308 //Get machine code for this instruction
2309 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst);
2310 tempMvec.addTemp((Value*) lastPhi);
2312 MachineInstr *saveValue = BuildMI(*machineBB, machineBB->begin(), V9::PHI, 3).addReg(kernelValue[V->first]).addReg(I->second).addRegDef(lastPhi);
2313 DEBUG(std::cerr << "Resulting PHI: " << *saveValue << "\n");
2314 newValLocation[lastPhi] = machineBB;
2316 else {
2317 Instruction *tmp = new TmpInstruction(I->second);
2319 //Get machine code for this instruction
2320 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst);
2321 tempMvec.addTemp((Value*) tmp);
2324 MachineInstr *saveValue = BuildMI(*machineBB, machineBB->begin(), V9::PHI, 3).addReg(lastPhi).addReg(I->second).addRegDef(tmp);
2325 DEBUG(std::cerr << "Resulting PHI: " << *saveValue << "\n");
2326 lastPhi = tmp;
2327 kernelPHIs[V->first][I->first] = lastPhi;
2328 newValLocation[lastPhi] = machineBB;
2331 //Final phi value
2332 else {
2333 //The resulting value must be the Value* we created earlier
2334 assert(lastPhi != 0 && "Last phi is NULL!\n");
2335 MachineInstr *saveValue = BuildMI(*machineBB, machineBB->begin(), V9::PHI, 3).addReg(lastPhi).addReg(I->second).addRegDef(finalPHIValue[V->first]);
2336 DEBUG(std::cerr << "Resulting PHI: " << *saveValue << "\n");
2337 kernelPHIs[V->first][I->first] = finalPHIValue[V->first];
2340 ++count;
2346 DEBUG(std::cerr << "KERNEL after PHIs\n");
2347 DEBUG(machineBB->print(std::cerr));
2351 void ModuloSchedulingPass::removePHIs(const MachineBasicBlock *origBB, std::vector<MachineBasicBlock *> &prologues, std::vector<MachineBasicBlock *> &epilogues, MachineBasicBlock *kernelBB, std::map<Value*, MachineBasicBlock*> &newValLocation) {
2353 //Worklist to delete things
2354 std::vector<std::pair<MachineBasicBlock*, MachineBasicBlock::iterator> > worklist;
2356 //Worklist of TmpInstructions that need to be added to a MCFI
2357 std::vector<Instruction*> addToMCFI;
2359 //Worklist to add OR instructions to end of kernel so not to invalidate the iterator
2360 //std::vector<std::pair<Instruction*, Value*> > newORs;
2362 const TargetInstrInfo *TMI = target.getInstrInfo();
2364 //Start with the kernel and for each phi insert a copy for the phi def and for each arg
2365 for(MachineBasicBlock::iterator I = kernelBB->begin(), E = kernelBB->end(); I != E; ++I) {
2367 DEBUG(std::cerr << "Looking at Instr: " << *I << "\n");
2368 //Get op code and check if its a phi
2369 if(I->getOpcode() == V9::PHI) {
2371 DEBUG(std::cerr << "Replacing PHI: " << *I << "\n");
2372 Instruction *tmp = 0;
2374 for(unsigned i = 0; i < I->getNumOperands(); ++i) {
2375 //Get Operand
2376 const MachineOperand &mOp = I->getOperand(i);
2377 assert(mOp.getType() == MachineOperand::MO_VirtualRegister && "Should be a Value*\n");
2379 if(!tmp) {
2380 tmp = new TmpInstruction(mOp.getVRegValue());
2381 addToMCFI.push_back(tmp);
2384 //Now for all our arguments we read, OR to the new TmpInstruction that we created
2385 if(mOp.isUse()) {
2386 DEBUG(std::cerr << "Use: " << mOp << "\n");
2387 //Place a copy at the end of its BB but before the branches
2388 assert(newValLocation.count(mOp.getVRegValue()) && "We must know where this value is located\n");
2389 //Reverse iterate to find the branches, we can safely assume no instructions have been
2390 //put in the nop positions
2391 for(MachineBasicBlock::iterator inst = --(newValLocation[mOp.getVRegValue()])->end(), endBB = (newValLocation[mOp.getVRegValue()])->begin(); inst != endBB; --inst) {
2392 MachineOpCode opc = inst->getOpcode();
2393 if(TMI->isBranch(opc) || TMI->isNop(opc))
2394 continue;
2395 else {
2396 if(mOp.getVRegValue()->getType() == Type::FloatTy)
2397 BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::FMOVS, 3).addReg(mOp.getVRegValue()).addRegDef(tmp);
2398 else if(mOp.getVRegValue()->getType() == Type::DoubleTy)
2399 BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::FMOVD, 3).addReg(mOp.getVRegValue()).addRegDef(tmp);
2400 else
2401 BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp);
2403 break;
2409 else {
2410 //Remove the phi and replace it with an OR
2411 DEBUG(std::cerr << "Def: " << mOp << "\n");
2412 //newORs.push_back(std::make_pair(tmp, mOp.getVRegValue()));
2413 if(tmp->getType() == Type::FloatTy)
2414 BuildMI(*kernelBB, I, V9::FMOVS, 3).addReg(tmp).addRegDef(mOp.getVRegValue());
2415 else if(tmp->getType() == Type::DoubleTy)
2416 BuildMI(*kernelBB, I, V9::FMOVD, 3).addReg(tmp).addRegDef(mOp.getVRegValue());
2417 else
2418 BuildMI(*kernelBB, I, V9::ORr, 3).addReg(tmp).addImm(0).addRegDef(mOp.getVRegValue());
2421 worklist.push_back(std::make_pair(kernelBB, I));
2431 //Add TmpInstructions to some MCFI
2432 if(addToMCFI.size() > 0) {
2433 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst);
2434 for(unsigned x = 0; x < addToMCFI.size(); ++x) {
2435 tempMvec.addTemp(addToMCFI[x]);
2437 addToMCFI.clear();
2441 //Remove phis from epilogue
2442 for(std::vector<MachineBasicBlock*>::iterator MB = epilogues.begin(), ME = epilogues.end(); MB != ME; ++MB) {
2443 for(MachineBasicBlock::iterator I = (*MB)->begin(), E = (*MB)->end(); I != E; ++I) {
2445 DEBUG(std::cerr << "Looking at Instr: " << *I << "\n");
2446 //Get op code and check if its a phi
2447 if(I->getOpcode() == V9::PHI) {
2448 Instruction *tmp = 0;
2450 for(unsigned i = 0; i < I->getNumOperands(); ++i) {
2451 //Get Operand
2452 const MachineOperand &mOp = I->getOperand(i);
2453 assert(mOp.getType() == MachineOperand::MO_VirtualRegister && "Should be a Value*\n");
2455 if(!tmp) {
2456 tmp = new TmpInstruction(mOp.getVRegValue());
2457 addToMCFI.push_back(tmp);
2460 //Now for all our arguments we read, OR to the new TmpInstruction that we created
2461 if(mOp.isUse()) {
2462 DEBUG(std::cerr << "Use: " << mOp << "\n");
2463 //Place a copy at the end of its BB but before the branches
2464 assert(newValLocation.count(mOp.getVRegValue()) && "We must know where this value is located\n");
2465 //Reverse iterate to find the branches, we can safely assume no instructions have been
2466 //put in the nop positions
2467 for(MachineBasicBlock::iterator inst = --(newValLocation[mOp.getVRegValue()])->end(), endBB = (newValLocation[mOp.getVRegValue()])->begin(); inst != endBB; --inst) {
2468 MachineOpCode opc = inst->getOpcode();
2469 if(TMI->isBranch(opc) || TMI->isNop(opc))
2470 continue;
2471 else {
2472 if(mOp.getVRegValue()->getType() == Type::FloatTy)
2473 BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::FMOVS, 3).addReg(mOp.getVRegValue()).addRegDef(tmp);
2474 else if(mOp.getVRegValue()->getType() == Type::DoubleTy)
2475 BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::FMOVD, 3).addReg(mOp.getVRegValue()).addRegDef(tmp);
2476 else
2477 BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp);
2480 break;
2486 else {
2487 //Remove the phi and replace it with an OR
2488 DEBUG(std::cerr << "Def: " << mOp << "\n");
2489 if(tmp->getType() == Type::FloatTy)
2490 BuildMI(**MB, I, V9::FMOVS, 3).addReg(tmp).addRegDef(mOp.getVRegValue());
2491 else if(tmp->getType() == Type::DoubleTy)
2492 BuildMI(**MB, I, V9::FMOVD, 3).addReg(tmp).addRegDef(mOp.getVRegValue());
2493 else
2494 BuildMI(**MB, I, V9::ORr, 3).addReg(tmp).addImm(0).addRegDef(mOp.getVRegValue());
2496 worklist.push_back(std::make_pair(*MB,I));
2507 if(addToMCFI.size() > 0) {
2508 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst);
2509 for(unsigned x = 0; x < addToMCFI.size(); ++x) {
2510 tempMvec.addTemp(addToMCFI[x]);
2512 addToMCFI.clear();
2515 //Delete the phis
2516 for(std::vector<std::pair<MachineBasicBlock*, MachineBasicBlock::iterator> >::iterator I = worklist.begin(), E = worklist.end(); I != E; ++I) {
2518 DEBUG(std::cerr << "Deleting PHI " << *I->second << "\n");
2519 I->first->erase(I->second);
2524 assert((addToMCFI.size() == 0) && "We should have added all TmpInstructions to some MachineCodeForInstruction");
2528 void ModuloSchedulingPass::reconstructLoop(MachineBasicBlock *BB) {
2530 TIME_REGION(X, "reconstructLoop");
2533 DEBUG(std::cerr << "Reconstructing Loop\n");
2535 //First find the value *'s that we need to "save"
2536 std::map<const Value*, std::pair<const MachineInstr*, int> > valuesToSave;
2538 //Keep track of instructions we have already seen and their stage because
2539 //we don't want to "save" values if they are used in the kernel immediately
2540 std::map<const MachineInstr*, int> lastInstrs;
2542 //Loop over kernel and only look at instructions from a stage > 0
2543 //Look at its operands and save values *'s that are read
2544 for(MSSchedule::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) {
2546 if(I->second !=0) {
2547 //For this instruction, get the Value*'s that it reads and put them into the set.
2548 //Assert if there is an operand of another type that we need to save
2549 const MachineInstr *inst = I->first;
2550 lastInstrs[inst] = I->second;
2552 for(unsigned i=0; i < inst->getNumOperands(); ++i) {
2553 //get machine operand
2554 const MachineOperand &mOp = inst->getOperand(i);
2556 if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isUse()) {
2557 //find the value in the map
2558 if (const Value* srcI = mOp.getVRegValue()) {
2560 if(isa<Constant>(srcI) || isa<Argument>(srcI) || isa<PHINode>(srcI))
2561 continue;
2563 //Before we declare this Value* one that we should save
2564 //make sure its def is not of the same stage as this instruction
2565 //because it will be consumed before its used
2566 Instruction *defInst = (Instruction*) srcI;
2568 //Should we save this value?
2569 bool save = true;
2571 //Continue if not in the def map, loop invariant code does not need to be saved
2572 if(!defMap.count(srcI))
2573 continue;
2575 MachineInstr *defInstr = defMap[srcI];
2578 if(lastInstrs.count(defInstr)) {
2579 if(lastInstrs[defInstr] == I->second) {
2580 save = false;
2585 if(save)
2586 valuesToSave[srcI] = std::make_pair(I->first, i);
2590 if(mOp.getType() != MachineOperand::MO_VirtualRegister && mOp.isUse()) {
2591 assert("Our assumption is wrong. We have another type of register that needs to be saved\n");
2597 //The new loop will consist of one or more prologues, the kernel, and one or more epilogues.
2599 //Map to keep track of old to new values
2600 std::map<Value*, std::map<int, Value*> > newValues;
2602 //Map to keep track of old to new values in kernel
2603 std::map<Value*, std::map<int, Value*> > kernelPHIs;
2605 //Another map to keep track of what machine basic blocks these new value*s are in since
2606 //they have no llvm instruction equivalent
2607 std::map<Value*, MachineBasicBlock*> newValLocation;
2609 std::vector<MachineBasicBlock*> prologues;
2610 std::vector<BasicBlock*> llvm_prologues;
2613 //Write prologue
2614 if(schedule.getMaxStage() != 0)
2615 writePrologues(prologues, BB, llvm_prologues, valuesToSave, newValues, newValLocation);
2617 //Print out epilogues and prologue
2618 DEBUG(for(std::vector<MachineBasicBlock*>::iterator I = prologues.begin(), E = prologues.end();
2619 I != E; ++I) {
2620 std::cerr << "PROLOGUE\n";
2621 (*I)->print(std::cerr);
2624 BasicBlock *llvmKernelBB = new BasicBlock("Kernel", (Function*) (BB->getBasicBlock()->getParent()));
2625 MachineBasicBlock *machineKernelBB = new MachineBasicBlock(llvmKernelBB);
2626 (((MachineBasicBlock*)BB)->getParent())->getBasicBlockList().push_back(machineKernelBB);
2627 writeKernel(llvmKernelBB, machineKernelBB, valuesToSave, newValues, newValLocation, kernelPHIs);
2630 std::vector<MachineBasicBlock*> epilogues;
2631 std::vector<BasicBlock*> llvm_epilogues;
2633 //Write epilogues
2634 if(schedule.getMaxStage() != 0)
2635 writeEpilogues(epilogues, BB, llvm_epilogues, valuesToSave, newValues, newValLocation, kernelPHIs);
2638 //Fix our branches
2639 fixBranches(prologues, llvm_prologues, machineKernelBB, llvmKernelBB, epilogues, llvm_epilogues, BB);
2641 //Remove phis
2642 removePHIs(BB, prologues, epilogues, machineKernelBB, newValLocation);
2644 //Print out epilogues and prologue
2645 DEBUG(for(std::vector<MachineBasicBlock*>::iterator I = prologues.begin(), E = prologues.end();
2646 I != E; ++I) {
2647 std::cerr << "PROLOGUE\n";
2648 (*I)->print(std::cerr);
2651 DEBUG(std::cerr << "KERNEL\n");
2652 DEBUG(machineKernelBB->print(std::cerr));
2654 DEBUG(for(std::vector<MachineBasicBlock*>::iterator I = epilogues.begin(), E = epilogues.end();
2655 I != E; ++I) {
2656 std::cerr << "EPILOGUE\n";
2657 (*I)->print(std::cerr);
2661 DEBUG(std::cerr << "New Machine Function" << "\n");
2662 DEBUG(std::cerr << BB->getParent() << "\n");
2667 void ModuloSchedulingPass::fixBranches(std::vector<MachineBasicBlock *> &prologues, std::vector<BasicBlock*> &llvm_prologues, MachineBasicBlock *machineKernelBB, BasicBlock *llvmKernelBB, std::vector<MachineBasicBlock *> &epilogues, std::vector<BasicBlock*> &llvm_epilogues, MachineBasicBlock *BB) {
2669 const TargetInstrInfo *TMI = target.getInstrInfo();
2671 if(schedule.getMaxStage() != 0) {
2672 //Fix prologue branches
2673 for(unsigned I = 0; I < prologues.size(); ++I) {
2675 //Find terminator since getFirstTerminator does not work!
2676 for(MachineBasicBlock::reverse_iterator mInst = prologues[I]->rbegin(), mInstEnd = prologues[I]->rend(); mInst != mInstEnd; ++mInst) {
2677 MachineOpCode OC = mInst->getOpcode();
2678 //If its a branch update its branchto
2679 if(TMI->isBranch(OC)) {
2680 for(unsigned opNum = 0; opNum < mInst->getNumOperands(); ++opNum) {
2681 MachineOperand &mOp = mInst->getOperand(opNum);
2682 if (mOp.getType() == MachineOperand::MO_PCRelativeDisp) {
2683 //Check if we are branching to the kernel, if not branch to epilogue
2684 if(mOp.getVRegValue() == BB->getBasicBlock()) {
2685 if(I == prologues.size()-1)
2686 mOp.setValueReg(llvmKernelBB);
2687 else
2688 mOp.setValueReg(llvm_prologues[I+1]);
2690 else {
2691 mOp.setValueReg(llvm_epilogues[(llvm_epilogues.size()-1-I)]);
2696 DEBUG(std::cerr << "New Prologue Branch: " << *mInst << "\n");
2701 //Update llvm basic block with our new branch instr
2702 DEBUG(std::cerr << BB->getBasicBlock()->getTerminator() << "\n");
2703 const BranchInst *branchVal = dyn_cast<BranchInst>(BB->getBasicBlock()->getTerminator());
2705 if(I == prologues.size()-1) {
2706 TerminatorInst *newBranch = new BranchInst(llvmKernelBB,
2707 llvm_epilogues[(llvm_epilogues.size()-1-I)],
2708 branchVal->getCondition(),
2709 llvm_prologues[I]);
2711 else
2712 TerminatorInst *newBranch = new BranchInst(llvm_prologues[I+1],
2713 llvm_epilogues[(llvm_epilogues.size()-1-I)],
2714 branchVal->getCondition(),
2715 llvm_prologues[I]);
2720 Value *origBranchExit = 0;
2722 //Fix up kernel machine branches
2723 for(MachineBasicBlock::reverse_iterator mInst = machineKernelBB->rbegin(), mInstEnd = machineKernelBB->rend(); mInst != mInstEnd; ++mInst) {
2724 MachineOpCode OC = mInst->getOpcode();
2725 if(TMI->isBranch(OC)) {
2726 for(unsigned opNum = 0; opNum < mInst->getNumOperands(); ++opNum) {
2727 MachineOperand &mOp = mInst->getOperand(opNum);
2729 if(mOp.getType() == MachineOperand::MO_PCRelativeDisp) {
2730 if(mOp.getVRegValue() == BB->getBasicBlock())
2731 mOp.setValueReg(llvmKernelBB);
2732 else
2733 if(llvm_epilogues.size() > 0) {
2734 assert(origBranchExit == 0 && "There should only be one branch out of the loop");
2736 origBranchExit = mOp.getVRegValue();
2737 mOp.setValueReg(llvm_epilogues[0]);
2739 else
2740 origBranchExit = mOp.getVRegValue();
2746 //Update kernelLLVM branches
2747 const BranchInst *branchVal = dyn_cast<BranchInst>(BB->getBasicBlock()->getTerminator());
2749 assert(origBranchExit != 0 && "We must have the original bb the kernel exits to!");
2751 if(epilogues.size() > 0) {
2752 TerminatorInst *newBranch = new BranchInst(llvmKernelBB,
2753 llvm_epilogues[0],
2754 branchVal->getCondition(),
2755 llvmKernelBB);
2757 else {
2758 BasicBlock *origBBExit = dyn_cast<BasicBlock>(origBranchExit);
2759 assert(origBBExit !=0 && "Original exit basic block must be set");
2760 TerminatorInst *newBranch = new BranchInst(llvmKernelBB,
2761 origBBExit,
2762 branchVal->getCondition(),
2763 llvmKernelBB);
2766 if(schedule.getMaxStage() != 0) {
2767 //Lastly add unconditional branches for the epilogues
2768 for(unsigned I = 0; I < epilogues.size(); ++I) {
2770 //Now since we don't have fall throughs, add a unconditional branch to the next prologue
2771 if(I != epilogues.size()-1) {
2772 BuildMI(epilogues[I], V9::BA, 1).addPCDisp(llvm_epilogues[I+1]);
2773 //Add unconditional branch to end of epilogue
2774 TerminatorInst *newBranch = new BranchInst(llvm_epilogues[I+1],
2775 llvm_epilogues[I]);
2778 else {
2779 BuildMI(epilogues[I], V9::BA, 1).addPCDisp(origBranchExit);
2782 //Update last epilogue exit branch
2783 BranchInst *branchVal = (BranchInst*) dyn_cast<BranchInst>(BB->getBasicBlock()->getTerminator());
2784 //Find where we are supposed to branch to
2785 BasicBlock *nextBlock = 0;
2786 for(unsigned j=0; j <branchVal->getNumSuccessors(); ++j) {
2787 if(branchVal->getSuccessor(j) != BB->getBasicBlock())
2788 nextBlock = branchVal->getSuccessor(j);
2791 assert((nextBlock != 0) && "Next block should not be null!");
2792 TerminatorInst *newBranch = new BranchInst(nextBlock, llvm_epilogues[I]);
2794 //Add one more nop!
2795 BuildMI(epilogues[I], V9::NOP, 0);
2800 //FIX UP Machine BB entry!!
2801 //We are looking at the predecesor of our loop basic block and we want to change its ba instruction
2804 //Find all llvm basic blocks that branch to the loop entry and change to our first prologue.
2805 const BasicBlock *llvmBB = BB->getBasicBlock();
2807 std::vector<const BasicBlock*>Preds (pred_begin(llvmBB), pred_end(llvmBB));
2809 //for(pred_const_iterator P = pred_begin(llvmBB), PE = pred_end(llvmBB); P != PE; ++PE) {
2810 for(std::vector<const BasicBlock*>::iterator P = Preds.begin(), PE = Preds.end(); P != PE; ++P) {
2811 if(*P == llvmBB)
2812 continue;
2813 else {
2814 DEBUG(std::cerr << "Found our entry BB\n");
2815 //Get the Terminator instruction for this basic block and print it out
2816 DEBUG(std::cerr << *((*P)->getTerminator()) << "\n");
2817 //Update the terminator
2818 TerminatorInst *term = ((BasicBlock*)*P)->getTerminator();
2819 for(unsigned i=0; i < term->getNumSuccessors(); ++i) {
2820 if(term->getSuccessor(i) == llvmBB) {
2821 DEBUG(std::cerr << "Replacing successor bb\n");
2822 if(llvm_prologues.size() > 0) {
2823 term->setSuccessor(i, llvm_prologues[0]);
2824 //Also update its corresponding machine instruction
2825 MachineCodeForInstruction & tempMvec =
2826 MachineCodeForInstruction::get(term);
2827 for (unsigned j = 0; j < tempMvec.size(); j++) {
2828 MachineInstr *temp = tempMvec[j];
2829 MachineOpCode opc = temp->getOpcode();
2830 if(TMI->isBranch(opc)) {
2831 DEBUG(std::cerr << *temp << "\n");
2832 //Update branch
2833 for(unsigned opNum = 0; opNum < temp->getNumOperands(); ++opNum) {
2834 MachineOperand &mOp = temp->getOperand(opNum);
2835 if (mOp.getType() == MachineOperand::MO_PCRelativeDisp) {
2836 mOp.setValueReg(llvm_prologues[0]);
2842 else {
2843 term->setSuccessor(i, llvmKernelBB);
2844 //Also update its corresponding machine instruction
2845 MachineCodeForInstruction & tempMvec =
2846 MachineCodeForInstruction::get(term);
2847 for (unsigned j = 0; j < tempMvec.size(); j++) {
2848 MachineInstr *temp = tempMvec[j];
2849 MachineOpCode opc = temp->getOpcode();
2850 if(TMI->isBranch(opc)) {
2851 DEBUG(std::cerr << *temp << "\n");
2852 //Update branch
2853 for(unsigned opNum = 0; opNum < temp->getNumOperands(); ++opNum) {
2854 MachineOperand &mOp = temp->getOperand(opNum);
2855 if (mOp.getType() == MachineOperand::MO_PCRelativeDisp) {
2856 mOp.setValueReg(llvmKernelBB);
2864 break;
2869 //BB->getParent()->getBasicBlockList().erase(BB);