Update svn merge history.
[sdcc.git] / sdcc / src / SDCCralloc.hpp
blob4e19a02317642f44f749d667fea07dae5c0380ec
1 // Philipp Klaus Krause, philipp@informatik.uni-frankfurt.de, pkk@spth.de, 2010 - 2013
2 //
3 // (c) 2010-2012 Goethe-Universität Frankfurt
4 //
5 // This program is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License as published by the
7 // Free Software Foundation; either version 2, or (at your option) any
8 // later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 // An optimal, polynomial-time register allocator.
21 // For details, see:
23 // Philipp Klaus Krause,
24 // "Optimal Register Allocation in Polynomial Time",
25 // Compiler Construction - 22nd International Conference, CC 2013, Held as Part of the European Joint Conferences on Theory and Practice of Software, ETAPS 2013. Proceedings,
26 // Lecture Notes in Computer Science, volume 7791, pp. 1-20.
27 // Springer,
28 // 2013.
30 // To use this from a port do the following:
32 // 1) Supply a cost function
33 // template <class G_t, class I_t>
34 // float instruction_cost(const assignment &a, unsigned short int i, const G_t &G, const I_t &I);
35 // Which can range from
36 // simple, e.g. cost 1 for each byte accessed in a register, cost 4 for each byte accessed in memory
37 // to
38 // quite involved, e.g. the number of bytes of code the code generator would generate.
40 // 2) Call
41 // create_cfg(), thorup_tree_decomposition(), nicify(), alive_tree_dec(), tree_dec_ralloc_nodes().
43 // The Z80 port can serve as an example, see z80_ralloc2_cc() in z80/ralloc2.cc.
45 #ifndef SDCCRALLOC_HH
46 #define SDCCRALLOC_HH 1
48 #include <iostream>
49 #include <limits>
50 #include <utility>
51 #include <sstream>
52 #include <fstream>
54 #include <boost/version.hpp>
56 #include <boost/graph/graphviz.hpp>
57 #include <boost/graph/adjacency_matrix.hpp>
58 #include <boost/graph/connected_components.hpp>
59 #include <boost/container/flat_set.hpp>
60 #include <boost/container/flat_map.hpp>
62 #include "common.h"
64 extern "C"
66 #include "SDCCbtree.h"
69 typedef short int var_t;
70 typedef signed char reg_t;
72 // Integer constant upper bound on port->num_regs
73 #define MAX_NUM_REGS 9
75 // Assignment at an instruction
76 struct i_assignment_t
78 var_t registers[MAX_NUM_REGS][2];
80 i_assignment_t(void)
82 for (reg_t r = 0; r < MAX_NUM_REGS; r++)
83 for (unsigned int i = 0; i < 2; i++)
84 registers[r][i] = -1;
87 #if 0
88 bool operator<(const i_assignment_t &i_a) const
90 for (reg_t r = 0; r < port->num_regs; r++)
91 for (unsigned int i = 0; i < 2; i++)
93 if (registers[r][i] < i_a.registers[r][i])
94 return(true);
95 else if (registers[r][i] > i_a.registers[r][i])
96 return(false);
98 return(false);
100 #endif
102 void add_var(var_t v, reg_t r)
104 if (registers[r][1] < v)
106 registers[r][0] = registers[r][1];
107 registers[r][1] = v;
109 else
110 registers[r][0] = v;
113 void remove_var(var_t v)
115 for (reg_t r = 0; r < port->num_regs; r++)
117 if (registers[r][1] == v)
119 registers[r][1] = registers[r][0];
120 registers[r][0] = -1;
122 else if (registers[r][0] == v)
123 registers[r][0] = -1;
128 typedef std::vector<var_t> varset_t; // Faster than std::set, std::tr1::unordered_set and stx::btree_set here.
130 typedef boost::container::flat_map<int, float> icosts_t; // Faster than std::map and stx::btree_map here.
132 typedef std::vector<var_t> cfg_alive_t; // Faster than stx::btree_set here.
133 typedef boost::container::flat_set<var_t> cfg_dying_t; // Faster than stx::btree_set and std::set here.
135 struct assignment
137 float s;
139 varset_t local; // Entries: var
140 std::vector<reg_t> global; // Entries: global[var] = reg (-1 if no reg assigned)
141 icosts_t i_costs; // Costs for all instructions in bag (needed to avoid double counting costs at join nodes)
142 i_assignment_t i_assignment; // Assignment at the instruction currently being added in an introduce node;
144 bool marked;
146 bool operator<(const assignment& a) const
148 varset_t::const_iterator i, ai, i_end, ai_end;
150 i_end = local.end();
151 ai_end = a.local.end();
153 for (i = local.begin(), ai = a.local.begin();; ++i, ++ai)
155 if (i == i_end && ai == ai_end)
156 return(false);
157 if (i == i_end)
158 return(true);
159 if (ai == ai_end)
160 return(false);
162 if (*i < *ai)
163 return(true);
164 if (*i > *ai)
165 return(false);
167 if (global[*i] < a.global[*ai])
168 return(true);
169 if (global[*i] > a.global[*ai])
170 return(false);
175 typedef std::list<assignment> assignment_list_t;
176 //typedef std::vector<assignment> assignment_list_t; // Probably faster, but would require some code reorganization.
178 struct tree_dec_node
180 std::set<unsigned int> bag;
181 std::set<var_t> alive;
182 assignment_list_t assignments;
183 unsigned weight; // The weight is the number of nodes at which intermediate results need to be remembered. In general, to minimize memory consumption, at join nodes the child with maximum weight should be processed first.
186 // The operand map has few entries, and is accessed often. boost::container::flat_multimap is substantially faster than std::multimap here.
187 // While we found boost::container::flat_multimap bugs affecting 1.83.0 and 1.85.0, and know there is a fix in 1.86.0, we don't know which is the first broken version.
188 // To actually trigger the bug also requires newer GCC / clang optimizations, so not having hit the bug in the past doesn't mean older boost isn't affected.
189 // https://sourceforge.net/p/sdcc/bugs/3739/ apparently only affected boost 1.85.0. https://sourceforge.net/p/sdcc/bugs/3697/ affects boost 1.83.0, but might affect more.
190 #if BOOST_VERSION / 100000 == 1 && BOOST_VERSION / 100 % 1000 >= 83 && BOOST_VERSION / 100 % 1000 <= 85
191 #warning boost::container::flat_multimap is broken in boost 1.85.0 (https://sourceforge.net/p/sdcc/bugs/3739/, https://github.com/boostorg/container/issues/281) and also 1.83.0 (https://sourceforge.net/p/sdcc/bugs/3697/)
192 #warning Using std::multimap fallback instead of boost::container::flat_multimap
193 typedef std::multimap<int, var_t> operand_map_t; // Faster than std::multimap<int, var_t> and stx::btree_multimap<int, var_t> here.
194 #else
195 typedef boost::container::flat_multimap<int, var_t> operand_map_t; // Faster than std::multimap<int, var_t> and stx::btree_multimap<int, var_t> here.
196 #endif
198 struct cfg_node
200 iCode *ic;
201 operand_map_t operands;
202 cfg_alive_t alive;
203 cfg_dying_t dying;
205 std::set<var_t> stack_alive;
207 #ifdef DEBUG_SEGV
208 cfg_node(void);
209 #endif
212 #ifdef DEBUG_SEGV
213 // This only exists to track down #3506333 and #3475617.
214 bool default_constructor_of_cfg_node_called;
215 cfg_node::cfg_node(void)
217 default_constructor_of_cfg_node_called = true;
219 #endif
221 struct con_node
223 int v;
224 int byte;
225 int size;
226 char *name;
229 typedef boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS, tree_dec_node> tree_dec_t;
230 typedef boost::adjacency_list<boost::setS, boost::vecS, boost::undirectedS, con_node> con_t;
231 typedef boost::adjacency_matrix<boost::undirectedS, con_node> con2_t;
232 typedef boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS, cfg_node> cfg_t;
233 typedef boost::adjacency_list<boost::vecS, boost::vecS, boost::undirectedS> cfg_sym_t;
235 #ifdef HAVE_TREEDEC_COMBINATIONS_HPP
236 #include <treedec/treedec_traits.hpp>
237 TREEDEC_TREEDEC_BAG_TRAITS(tree_dec_t, bag);
238 #endif
240 #include "SDCCtree_dec.hpp"
242 // Cost function. Port-specific.
243 template <class G_t, class I_t>
244 static float instruction_cost(const assignment &a, unsigned short int i, const G_t &G, const I_t &I);
246 // For early removel of assignments that cannot be extended to valid assignments. Port-specific.
247 template <class G_t, class I_t>
248 static bool assignment_hopeless(const assignment &a, unsigned short int i, const G_t &G, const I_t &I, const var_t lastvar);
250 // Rough cost estimate. Port-specific.
251 template <class G_t, class I_t>
252 static float rough_cost_estimate(const assignment &a, unsigned short int i, const G_t &G, const I_t &I);
254 // Avoid overwriting operands that are still needed by the result. Port-specific.
255 template <class I_t>
256 static void add_operand_conflicts_in_node(const cfg_node &n, I_t &I);
258 // Port-specific
259 template <class T_t>
260 static void get_best_local_assignment_biased(assignment &a, typename boost::graph_traits<T_t>::vertex_descriptor t, const T_t &T);
262 // Code for another ic is generated when generating this one. Mark the other as generated. Port-specific.
263 static void extra_ic_generated(iCode *ic);
265 inline void
266 add_operand_to_cfg_node(cfg_node &n, operand *o, std::map<std::pair<int, reg_t>, var_t> &sym_to_index)
268 reg_t k;
269 if (o && IS_SYMOP(o) && sym_to_index.find(std::pair<int, reg_t>(OP_SYMBOL_CONST(o)->key, 0)) != sym_to_index.end())
271 if (n.operands.find(OP_SYMBOL_CONST(o)->key) == n.operands.end())
272 for (k = 0; k < OP_SYMBOL_CONST(o)->nRegs; k++){
273 n.operands.insert(std::pair<int, var_t>(OP_SYMBOL_CONST(o)->key, sym_to_index[std::pair<int, reg_t>(OP_SYMBOL_CONST(o)->key, k)]));}
277 // Check if the live-range of variable i is connected
278 #if 0
279 // This check was too expensive - Profiling shows that compiling the Dhrystone benchmark for stm8 with default options, we spent about a quarter of compiler runtime in here!
280 // Profiling shows that we spent a significant amount of time on the first call to copy_graph()
281 // Todo: Improve efficiency, e.g. using subgraph or filtered_graph to avoid the costly first call to copy_graph()
282 // Issues to solve: cfg2 is undirected, cfg is bidirectional; this makes use of subgraph or filtered_graph harder.
283 static bool liverange_connected(cfg_t &cfg, var_t i)
285 cfg_sym_t cfg2;
286 boost::copy_graph(cfg, cfg2, boost::vertex_copy(forget_properties()).edge_copy(forget_properties())); // This call to copy_graph is expensive!
287 for (int j = boost::num_vertices(cfg) - 1; j >= 0; j--)
289 if (std::find(cfg[j].alive.begin(), cfg[j].alive.end(), i) == cfg[j].alive.end())
291 boost::clear_vertex(j, cfg2);
292 boost::remove_vertex(j, cfg2);
296 std::vector<boost::graph_traits<cfg_t>::vertices_size_type> component(num_vertices(cfg2));
298 return(boost::connected_components(cfg2, &component[0]) <= 1);
300 #else
301 // A not very elegant, but faster check
302 static inline int component_size_impl(const cfg_t &cfg, const std::vector<bool> &life, var_t v, int i, std::vector<bool>& visited)
304 typename boost::graph_traits<cfg_t>::in_edge_iterator in, in_end;
305 typename boost::graph_traits<cfg_t>::out_edge_iterator out, out_end;
307 int size = 1;
308 visited[i] = true;
310 for(boost::tie(in, in_end) = boost::in_edges(i, cfg); in != in_end; ++in)
311 if(life[boost::source(*in, cfg)] && !visited[boost::source(*in, cfg)])
312 size += component_size_impl(cfg, life, v, boost::source(*in, cfg), visited);
314 for(boost::tie(out, out_end) = boost::out_edges(i, cfg); out != out_end; ++out)
315 if(life[boost::target(*out, cfg)] && !visited[boost::target(*out, cfg)])
316 size += component_size_impl(cfg, life, v, boost::target(*out, cfg), visited);
318 return(size);
321 static inline int component_size(const cfg_t &cfg, const std::vector<bool> &life, var_t v, int i)
323 std::vector<bool> visited(boost::num_vertices(cfg));
325 return(component_size_impl(cfg, life, v, i, visited));
328 static bool liverange_connected(const cfg_t &cfg, var_t v)
330 std::vector<bool> life(boost::num_vertices(cfg));
331 int num_life = 0;
332 int last_life;
334 for(int i = 0; i < boost::num_vertices (cfg); i++)
335 if(std::find(cfg[i].alive.begin(), cfg[i].alive.end(), v) != cfg[i].alive.end())
337 life[i] = true;
338 num_life++;
339 last_life = i;
342 if(!num_life)
343 return(true);
345 return(component_size(cfg, life, v, last_life) >= num_life);
347 #endif
349 // A quick-and-dirty function to get the CFG from sdcc.
350 static iCode *
351 create_cfg(cfg_t &cfg, con_t &con, ebbIndex *ebbi)
353 eBBlock **ebbs = ebbi->bbOrder;
354 iCode *start_ic;
355 iCode *ic;
357 std::map<int, unsigned int> key_to_index;
358 std::map<std::pair<int, reg_t>, var_t> sym_to_index;
360 if(currFunc)
361 currFunc->funcDivFlagSafe = 1;
363 start_ic = iCodeLabelOptimize(iCodeFromeBBlock (ebbs, ebbi->count));
365 int i;
366 var_t j;
367 wassertl (!boost::num_vertices(cfg), "CFG non-empty before creation.");
368 for (ic = start_ic, i = 0, j = 0; ic; ic = ic->next, i++)
370 if (currFunc)
371 currFunc->funcDivFlagSafe &= !(ic->op == INLINEASM || ic->op == '/' || ic->op == '%' || ic->op == PCALL ||
372 ic->op == CALL && (IS_OP_LITERAL (IC_LEFT (ic)) || !OP_SYMBOL(IC_LEFT (ic))->funcDivFlagSafe) ||
373 ic->op == RIGHT_OP && IS_OP_LITERAL (IC_RIGHT (ic)) && ulFromVal (OP_VALUE_CONST (IC_RIGHT (ic))) > 2); // Right shift might be implemented using division when shifting by more than 2.
375 #ifdef DEBUG_SEGV
376 default_constructor_of_cfg_node_called = false;
377 #endif
378 boost::add_vertex(cfg);
380 #ifdef DEBUG_SEGV
381 wassertl (default_constructor_of_cfg_node_called, "add_vertex failed to call default constructor of cfg_node!");
382 #endif
383 wassertl (cfg[i].alive.empty(), "Alive set non-empty upon creation.");
384 key_to_index[ic->key] = i;
386 if(ic->op == SEND && ic->builtinSEND) // Ensure that only the very first send iCode is active.
388 operand *bi_parms[MAX_BUILTIN_ARGS];
389 int nbi_parms;
390 getBuiltinParms(ic, &nbi_parms, bi_parms);
393 extra_ic_generated(ic);
395 cfg[i].ic = ic;
396 ic->rSurv = newBitVect(port->num_regs); // Never freed. Memory leak?
397 ic->rMask = newBitVect(port->num_regs); // Never freed. Memory leak?
399 if (ic->generated)
400 continue;
402 for (int j2 = 0; j2 <= operandKey; j2++)
404 if (bitVectBitValue(ic->rlive, j2))
406 symbol *sym = (symbol *)(hTabItemWithKey(liveRanges, j2));
408 if (!sym->for_newralloc)
409 continue;
411 // Add node to conflict graph:
412 if (sym_to_index.find(std::pair<int, reg_t>(j2, 0)) != sym_to_index.end())
413 continue;
415 // Other parts of the allocator may rely on the variables corresponding to bytes from the same sdcc variable to have subsequent numbers.
416 for (reg_t k = 0; k < sym->nRegs; k++)
418 boost::add_vertex(con);
419 con[j].v = j2;
420 con[j].byte = k;
421 con[j].size = sym->nRegs;
422 con[j].name = sym->name;
423 sym_to_index[std::pair<int, reg_t>(j2, k)] = j;
424 for (reg_t l = 0; l < k; l++)
425 boost::add_edge(j - l - 1, j, con);
426 j++;
433 // Get control flow graph from sdcc.
434 for (ic = start_ic; ic; ic = ic->next)
436 wassertl (key_to_index[ic->key] < boost::num_vertices(cfg), "Node not in CFG.");
438 if (ic->op != GOTO && ic->op != RETURN && ic->op != JUMPTABLE && ic->next)
440 wassertl (key_to_index[ic->next->key] < boost::num_vertices(cfg), "Next node not in CFG.");
441 boost::add_edge(key_to_index[ic->key], key_to_index[ic->next->key], cfg);
444 if (ic->op == GOTO)
446 wassertl (key_to_index[eBBWithEntryLabel(ebbi, ic->label)->sch->key] < boost::num_vertices(cfg), "GOTO target not in CFG.");
447 boost::add_edge(key_to_index[ic->key], key_to_index[eBBWithEntryLabel(ebbi, ic->label)->sch->key], cfg);
449 else if (ic->op == RETURN)
451 wassertl (key_to_index[eBBWithEntryLabel(ebbi, returnLabel)->sch->key] < boost::num_vertices(cfg), "RETURN target not in CFG.");
452 boost::add_edge(key_to_index[ic->key], key_to_index[eBBWithEntryLabel(ebbi, returnLabel)->sch->key], cfg);
454 else if (ic->op == IFX)
456 wassertl (key_to_index[eBBWithEntryLabel(ebbi, IC_TRUE(ic) ? IC_TRUE(ic) : IC_FALSE(ic))->sch->key] < boost::num_vertices(cfg), "IFX target not in CFG.");
457 boost::add_edge(key_to_index[ic->key], key_to_index[eBBWithEntryLabel(ebbi, IC_TRUE(ic) ? IC_TRUE(ic) : IC_FALSE(ic))->sch->key], cfg);
459 else if (ic->op == JUMPTABLE)
460 for (symbol *lbl = (symbol *)(setFirstItem (IC_JTLABELS (ic))); lbl; lbl = (symbol *)(setNextItem (IC_JTLABELS (ic))))
462 wassertl (key_to_index[eBBWithEntryLabel(ebbi, lbl)->sch->key] < boost::num_vertices(cfg), "GOTO target not in CFG.");
463 boost::add_edge(key_to_index[ic->key], key_to_index[eBBWithEntryLabel(ebbi, lbl)->sch->key], cfg);
466 for (int i = 0; i <= operandKey; i++)
468 if (sym_to_index.find(std::pair<int, reg_t>(i, 0)) == sym_to_index.end())
469 continue;
471 if (bitVectBitValue(ic->rlive, i))
473 symbol *isym = (symbol *)(hTabItemWithKey(liveRanges, i));
474 for (reg_t k = 0; k < isym->nRegs; k++)
476 wassert (key_to_index.find(ic->key) != key_to_index.end());
477 wassert (sym_to_index.find(std::pair<int, int>(i, k)) != sym_to_index.end());
478 wassertl (key_to_index[ic->key] < boost::num_vertices(cfg), "Node not in CFG.");
479 cfg[key_to_index[ic->key]].alive.push_back(sym_to_index[std::pair<int, int>(i, k)]);
482 // TODO: Move this to a place where it also works when using the old allocator!
483 isym->block = btree_lowest_common_ancestor(isym->block, ic->block);
484 // If this symbol has a spill location, ensure the spill location is also allocated in a compatible block
485 if (SYM_SPIL_LOC(isym))
486 SYM_SPIL_LOC(isym)->block = btree_lowest_common_ancestor(SYM_SPIL_LOC(isym)->block, isym->block);
490 add_operand_to_cfg_node(cfg[key_to_index[ic->key]], IC_RESULT(ic), sym_to_index);
491 add_operand_to_cfg_node(cfg[key_to_index[ic->key]], IC_LEFT(ic), sym_to_index);
492 add_operand_to_cfg_node(cfg[key_to_index[ic->key]], IC_RIGHT(ic), sym_to_index);
494 // TODO: Extend live-ranges of returns of built-in function calls back to first SEND.
496 add_operand_conflicts_in_node(cfg[key_to_index[ic->key]], con);
499 #if 0
500 // Get conflict graph from sdcc
501 for (var_t i = 0; static_cast<boost::graph_traits<cfg_t>::vertices_size_type>(i) < num_vertices(con); i++)
503 symbol *isym = (symbol *)(hTabItemWithKey(liveRanges, con[i].v));
504 for (int j = 0; j <= operandKey; j++)
505 if (bitVectBitValue(isym->clashes, j))
507 symbol *jsym = (symbol *)(hTabItemWithKey(liveRanges, j));
508 if (sym_to_index.find(std::pair<int, reg_t>(j, 0)) == sym_to_index.end())
509 continue;
510 for (reg_t k = 0; k < jsym->nRegs; k++)
511 boost::add_edge(i, sym_to_index[std::pair<int, reg_t>(j, k)], con);
514 #endif
516 // Check for unconnected live ranges, some might have survived earlier stages.
517 for (var_t i = (var_t)boost::num_vertices(con) - 1; i >= 0; i--)
518 if (!liverange_connected(cfg, i))
520 // Non-connected CFGs are created by at least GCSE and lospre. We now have a live-range splitter that fixes them, so this should no longer be necessary, but we leave this code here for now, so in case one gets through, we can still generate correct code.
521 std::cerr << "Warning: Non-connected liverange found and extended to connected component of the CFG:" << con[i].name << ". Please contact sdcc authors with source code to reproduce.\n";
523 cfg_sym_t cfg2;
524 boost::copy_graph(cfg, cfg2, boost::vertex_copy(forget_properties()).edge_copy(forget_properties()));
525 std::vector<boost::graph_traits<cfg_t>::vertices_size_type> component(num_vertices(cfg2));
526 boost::connected_components(cfg2, &component[0]);
528 for (boost::graph_traits<cfg_t>::vertices_size_type j = 0; j < boost::num_vertices(cfg) - 1; j++)
530 if (std::find(cfg[j].alive.begin(), cfg[j].alive.end(), i) == cfg[j].alive.end())
531 continue;
533 for (boost::graph_traits<cfg_t>::vertices_size_type k = 0; k < boost::num_vertices(cfg) - 1; k++)
534 if (component[j] == component[k] && std::find(cfg[k].alive.begin(), cfg[k].alive.end(), i) == cfg[k].alive.end())
535 cfg[k].alive.push_back(i);
539 // Sort alive and setup dying.
540 for (boost::graph_traits<cfg_t>::vertices_size_type i = 0; i < num_vertices(cfg); i++)
542 std::sort(cfg[i].alive.begin(), cfg[i].alive.end());
543 cfg[i].dying = cfg_dying_t(cfg[i].alive.begin(), cfg[i].alive.end());;
544 typedef boost::graph_traits<cfg_t>::adjacency_iterator adjacency_iter_t;
545 adjacency_iter_t j, j_end;
546 for (boost::tie(j, j_end) = adjacent_vertices(i, cfg); j != j_end; ++j)
548 cfg_alive_t::const_iterator v, v_end;
549 for (v = cfg[*j].alive.begin(), v_end = cfg[*j].alive.end(); v != v_end; ++v)
551 const symbol *const vsym = (symbol *)(hTabItemWithKey(liveRanges, con[*v].v));
553 const operand *const left = IC_LEFT(cfg[*j].ic);
554 const operand *const right = IC_RIGHT(cfg[*j].ic);
555 const operand *const result = IC_RESULT(cfg[*j].ic);
557 if (!(POINTER_SET(cfg[*j].ic) || cfg[*j].ic->op == SET_VALUE_AT_ADDRESS) &&
558 (!left || !IS_SYMOP(left) || OP_SYMBOL_CONST(left)->key != vsym->key) &&
559 (!right || !IS_SYMOP(right) || OP_SYMBOL_CONST(right)->key != vsym->key) &&
560 result && IS_SYMOP(result) && OP_SYMBOL_CONST(result)->key == vsym->key)
561 continue;
563 cfg[i].dying.erase(*v);
568 // Construct conflict graph
569 for (boost::graph_traits<cfg_t>::vertices_size_type i = 0; i < num_vertices(cfg); i++)
571 cfg_alive_t::const_iterator v, v_end;
572 const iCode *ic = cfg[i].ic;
574 for (v = cfg[i].alive.begin(), v_end = cfg[i].alive.end(); v != v_end; ++v)
576 cfg_alive_t::const_iterator v2, v2_end;
578 // Conflict between operands are handled by add_operand_conflicts_in_node().
579 if (cfg[i].dying.find (*v) != cfg[i].dying.end())
580 continue;
581 if (ic->op != IFX && ic->op != JUMPTABLE && IC_RESULT(ic) && IS_SYMOP(IC_RESULT(ic)))
583 operand_map_t::const_iterator oi, oi_end;
584 for(boost::tie(oi, oi_end) = cfg[i].operands.equal_range(OP_SYMBOL_CONST(IC_RESULT(ic))->key); oi != oi_end; ++oi)
585 if(oi->second == *v)
586 goto next_var;
589 // Here, v is a variable that survives cfg[i].
590 // TODO: Check if we can use v, ++v2 instead of cfg[i].alive.begin() to speed things up.
591 for (v2 = cfg[i].alive.begin(), v2_end = cfg[i].alive.end(); v2 != v2_end; ++v2)
593 if(*v == *v2)
594 continue;
595 if (cfg[i].dying.find (*v2) != cfg[i].dying.end())
596 continue;
598 boost::add_edge(*v, *v2, con);
601 next_var:
606 return(start_ic);
609 // Computes live ranges for tree decomposition from live ranges from cfg.
610 inline void alive_tree_dec(tree_dec_t &tree_dec, const cfg_t &cfg)
612 for (unsigned int i = 0; i < num_vertices(tree_dec); i++)
614 std::set<unsigned int>::const_iterator v;
615 for (v = tree_dec[i].bag.begin(); v != tree_dec[i].bag.end(); ++v)
616 tree_dec[i].alive.insert(cfg[*v].alive.begin(), cfg[*v].alive.end());
620 #if defined(DEBUG_RALLOC_DEC) || defined (DEBUG_RALLOC_DEC_ASS)
621 static void print_assignment(const assignment &a)
623 varset_t::const_iterator i;
624 std::cout << "[";
625 for (i = a.local.begin(); i != a.local.end(); ++i)
626 std::cout << "(" << int(*i) << ", " << int(a.global[*i]) << "), ";
627 std::cout << "c: " << a.s << "]";
629 #endif
631 template <class I_t>
632 bool assignment_conflict(const assignment &a, const I_t &I, var_t v, reg_t r)
634 varset_t::const_iterator i, i_end;
636 for (i = a.local.begin(), i_end = a.local.end(); i != i_end; ++i)
638 if (a.global[*i] != r)
639 continue;
640 if (boost::edge(*i, v, I).second)
641 return(true);
644 return(false);
647 template<class G_t>
648 void assignments_introduce_instruction(assignment_list_t &alist, unsigned short int i, const G_t &G)
650 assignment_list_t::iterator ai, ai_end;
652 #if !defined(_MSC_VER) // Efficient code - reduces total SDCC runtime by about 5.5% vs. code below, but doesn't work with MSVC++ (at least up to MSVC++ 2015)
653 struct inserter_t
655 explicit inserter_t(const std::vector<reg_t>& g, i_assignment_t& a) : global(g), ia(a)
658 inserter_t& operator=(var_t v)
660 if (global[v] >= 0)
661 ia.add_var(v, global[v]);
662 return(*this);
664 inserter_t& operator*()
666 return(*this);
668 inserter_t& operator++()
670 return(*this);
672 inserter_t& operator++(int i)
674 return(*this);
676 private:
677 const std::vector<reg_t>& global;
678 i_assignment_t& ia;
681 for (ai = alist.begin(), ai_end = alist.end(); ai != ai_end; ++ai)
683 i_assignment_t ia;
685 std::set_intersection(ai->local.begin(), ai->local.end(), G[i].alive.begin(), G[i].alive.end(), inserter_t(ai->global, ia));
687 ai->i_assignment = ia;
689 #else // Human-readable code
690 for (ai = alist.begin(), ai_end = alist.end(); ai != ai_end; ++ai)
692 varset_t i_variables;
694 std::set_intersection(ai->local.begin(), ai->local.end(), G[i].alive.begin(), G[i].alive.end(), std::inserter(i_variables, i_variables.end()));
696 i_assignment_t ia;
698 varset_t::const_iterator v, v_end;
700 for (v = i_variables.begin(), v_end = i_variables.end(); v != v_end; ++v)
701 if (ai->global[*v] >= 0)
702 ia.add_var(*v, ai->global[*v]);
704 ai->i_assignment = ia;
706 #endif
709 template <class G_t, class I_t>
710 static void assignments_introduce_variable(assignment_list_t &alist, unsigned short int i, short int v, const G_t &G, const I_t &I)
712 assignment_list_t::iterator ai;
713 bool a_initialized;
714 assignment a;
715 size_t c, c_end;
717 for (ai = alist.begin(), c = 0, c_end = alist.size(); c < c_end; c++, ai++)
719 a_initialized = false;
721 for (reg_t r = 0; r < port->num_regs; r++)
723 if (!assignment_conflict(*ai, I, v, r))
725 if(!a_initialized)
727 a = *ai;
728 ai->marked = true;
729 a.marked = false;
730 varset_t::iterator i = std::lower_bound(a.local.begin(), a.local.end(), v);
731 if (i == a.local.end() || *i != v)
732 a.local.insert(i, v);
734 a.global[v] = r;
735 a.i_assignment.add_var(v, r);
736 if(!assignment_hopeless(a, i, G, I, v))
737 alist.push_back(a);
738 a.i_assignment.remove_var(v);
744 struct assignment_rep
746 assignment_list_t::iterator i;
747 float s;
749 bool operator<(const assignment_rep& a) const
751 return(s < a.s);
755 template <class I_t>
756 float compatibility_cost(const assignment& a, const assignment& ac, const I_t &I)
758 typedef typename boost::graph_traits<I_t>::adjacency_iterator adjacency_iter_t;
760 float c = 0.0f;
762 varset_t::const_iterator vi, vi_end;
764 for(vi = ac.local.begin(), vi_end = ac.local.end(); vi != vi_end; ++vi)
766 const var_t v = *vi;
767 if(a.global[v] != ac.global[v])
769 c += 1000.0f;
770 continue;
772 #if 0 // This improves the quality of assignments, but it has a big runtime overhead for some cases.
773 adjacency_iter_t j, j_end;
774 for (boost::tie(j, j_end) = adjacent_vertices(v, I); j != j_end; ++j)
775 if(ac.global[v] != -1 && a.global[*j] == ac.global[v])
777 c += 1000.0f;
778 break;
780 #endif
783 return(c);
786 // Ensure that we never get more than options.max_allocs_per_node assignments at a single node of the tree decomposition.
787 // Tries to drop the worst ones first (but never drop the empty assignment, as it's the only one guaranteed to be always valid).
788 template <class G_t, class I_t>
789 static void drop_worst_assignments(assignment_list_t &alist, unsigned short int i, const G_t &G, const I_t &I, const assignment& ac, bool *const assignment_optimal)
791 unsigned int n;
792 size_t alist_size;
793 assignment_list_t::iterator ai, an;
795 if ((alist_size = alist.size()) * port->num_regs <= static_cast<size_t>(options.max_allocs_per_node) || alist_size <= 1)
796 return;
798 *assignment_optimal = false;
800 #ifdef DEBUG_RALLOC_DEC
801 std::cout << "Too many assignments here (" << i << "):" << alist_size << " > " << options.max_allocs_per_node / port->num_regs << ". Dropping some.\n"; std::cout.flush();
802 #endif
804 #if 0
805 assignment_rep *arep = new assignment_rep[alist_size];
807 for (n = 0, ai = alist.begin(); n < alist_size; ++ai, n++)
809 arep[n].i = ai;
810 arep[n].s = ai->s + rough_cost_estimate(*ai, i, G, I) + compatibility_cost(*ai, ac, I);
813 std::nth_element(arep + 1, arep + options.max_allocs_per_node / port->num_regs, arep + alist_size);
815 //std::cout << "nth elem. est. cost: " << arep[options.max_allocs_per_node / port->num_regs].s << "\n"; std::cout.flush();
817 for (n = options.max_allocs_per_node / port->num_regs + 1; n < alist_size; n++)
818 alist.erase(arep[n].i);
819 #else // More efficient, reduces total SDCC runtime by about 1%.
821 size_t endsize = options.max_allocs_per_node / port->num_regs + 1;
822 size_t arep_maxsize = std::min(alist_size, endsize * 2) + 1;
823 size_t m, k;
824 float bound = std::numeric_limits<float>::infinity();
826 assignment_rep *arep = new assignment_rep[arep_maxsize];
828 for(m = 0, n = 1, ai = alist.begin(), ++ai; n < alist_size; n++)
830 float s = ai->s;
832 if(s > bound)
834 alist.erase(ai++);
835 continue;
837 s += compatibility_cost(*ai, ac, I);
838 if(s > bound)
840 alist.erase(ai++);
841 continue;
843 s += rough_cost_estimate(*ai, i, G, I);
844 if(s > bound)
846 alist.erase(ai++);
847 continue;
850 if(m >= arep_maxsize - 1)
852 std::nth_element(arep, arep + (endsize - 1), arep + m);
853 for(k = endsize; k < m; k++)
854 alist.erase(arep[k].i);
855 bound = arep[endsize - 1].s;
857 m = endsize;
860 arep[m].i = ai;
861 arep[m].s = s;
863 m++;
865 ++ai;
868 std::nth_element(arep, arep + (endsize - 1), arep + m);
870 for (n = endsize; n < m; n++)
871 alist.erase(arep[n].i);
872 #endif
874 delete[] arep;
877 // Handle Leaf nodes in the nice tree decomposition
878 template <class T_t, class G_t, class I_t>
879 static void tree_dec_ralloc_leaf(T_t &T, typename boost::graph_traits<T_t>::vertex_descriptor t, const G_t &G, const I_t &I)
881 #ifdef DEBUG_RALLOC_DEC
882 std::cout << "Leaf (" << t << "):\n"; std::cout.flush();
883 #endif
885 assignment a;
886 assignment_list_t &alist = T[t].assignments;
888 a.s = 0;
889 a.marked = false;
890 a.global.resize(boost::num_vertices(I), -1);
891 alist.push_back(a);
893 #ifdef DEBUG_RALLOC_DEC_ASS
894 assignment_list_t::iterator ai;
895 for(ai = alist.begin(); ai != alist.end(); ++ai)
897 print_assignment(*ai);
898 std::cout << "\n";
900 assignment best;
901 get_best_local_assignment(best, t, T);
902 std::cout << "Best: "; print_assignment(best); std::cout << "\n";
903 #endif
906 // Handle introduce nodes in the nice tree decomposition
907 template <class T_t, class G_t, class I_t>
908 static void tree_dec_ralloc_introduce(T_t &T, typename boost::graph_traits<T_t>::vertex_descriptor t, const G_t &G, const I_t &I, const assignment& ac, bool *const assignment_optimal)
910 typedef typename boost::graph_traits<T_t>::adjacency_iterator adjacency_iter_t;
911 adjacency_iter_t c, c_end;
912 assignment_list_t::iterator ai;
913 boost::tie(c, c_end) = adjacent_vertices(t, T);
915 #ifdef DEBUG_RALLOC_DEC
916 std::cout << "Introduce (" << t << "):\n"; std::cout.flush();
917 std::cout << "ac: "; print_assignment(ac); std::cout << "\n";
918 #endif
920 assignment_list_t &alist = T[t].assignments;
922 std::swap(alist, T[*c].assignments);
924 std::set<var_t> new_vars;
925 std::set_difference(T[t].alive.begin(), T[t].alive.end(), T[*c].alive.begin(), T[*c].alive.end(), std::inserter(new_vars, new_vars.end()));
927 std::set<unsigned short> new_inst;
928 std::set_difference(T[t].bag.begin(), T[t].bag.end(), T[*c].bag.begin(), T[*c].bag.end(), std::inserter(new_inst, new_inst.end()));
929 unsigned short int i = *(new_inst.begin());
931 // Extend to new instruction.
932 assignments_introduce_instruction(alist, i, G);
934 std::set<var_t>::const_iterator v;
935 for (v = new_vars.begin(); v != new_vars.end(); ++v)
937 drop_worst_assignments(alist, i, G, I, ac, assignment_optimal);
938 assignments_introduce_variable(alist, i, *v, G, I);
941 // Summation of costs and early removal of assignments.
942 for (ai = alist.begin(); ai != alist.end();)
944 if ((ai->s += (ai->i_costs[i] = instruction_cost(*ai, i, G, I))) == std::numeric_limits<float>::infinity())
945 ai = alist.erase(ai);
946 else
947 ++ai;
950 // Free memory in the std::set<var_t, boost::pool_allocator<var_t> > that live in the assignments in the list.
951 //boost::singleton_pool<boost::fast_pool_allocator_tag, sizeof(var_t)>::release_memory();
953 #ifdef DEBUG_RALLOC_DEC_ASS
954 for(ai = alist.begin(); ai != alist.end(); ++ai)
956 print_assignment(*ai);
957 std::cout << "\n";
960 assignment best;
961 get_best_local_assignment(best, t, T);
962 std::cout << "Best: "; print_assignment(best); std::cout << "\n";
963 #endif
966 static bool assignments_locally_same(const assignment &a1, const assignment &a2)
968 if (a1.local != a2.local)
969 return(false);
971 varset_t::const_iterator i, i_end;
972 for (i = a1.local.begin(), i_end = a1.local.end(); i != i_end; ++i)
973 if (a1.global[*i] != a2.global[*i])
974 return(false);
976 return(true);
979 // Handle forget nodes in the nice tree decomposition
980 template <class T_t, class G_t, class I_t>
981 static void tree_dec_ralloc_forget(T_t &T, typename boost::graph_traits<T_t>::vertex_descriptor t, const G_t &G, const I_t &I)
983 typedef typename boost::graph_traits<T_t>::adjacency_iterator adjacency_iter_t;
984 adjacency_iter_t c, c_end;
985 boost::tie(c, c_end) = adjacent_vertices(t, T);
987 #ifdef DEBUG_RALLOC_DEC
988 std::cout << "Forget (" << t << "):\n"; std::cout.flush();
989 #endif
991 assignment_list_t &alist = T[t].assignments;
993 std::swap(alist, T[*c].assignments);
995 std::set<unsigned short int> old_inst;
996 std::set_difference(T[*c].bag.begin(), T[*c].bag.end(), T[t].bag.begin(), T[t].bag.end(), std::inserter(old_inst, old_inst.end()));
997 wassert(old_inst.size() == 1);
998 unsigned short int i = *(old_inst.begin());
1000 varset_t old_vars;
1001 std::set_difference(T[*c].alive.begin(), T[*c].alive.end(), T[t].alive.begin(), T[t].alive.end(), std::inserter(old_vars, old_vars.end()));
1003 assignment_list_t::iterator ai, aif;
1005 // Restrict assignments (locally) to current variables.
1006 varset_t newlocal;
1007 for (ai = alist.begin(); ai != alist.end(); ++ai)
1009 newlocal.clear();
1010 std::set_difference(ai->local.begin(), ai->local.end(), old_vars.begin(), old_vars.end(), std::inserter(newlocal, newlocal.end()));
1011 std::swap(ai->local, newlocal);
1013 ai->i_costs.erase(i);
1016 alist.sort();
1018 // Collapse (locally) identical assignments.
1019 for (ai = alist.begin(); ai != alist.end();)
1021 aif = ai;
1023 for (++ai; ai != alist.end() && assignments_locally_same(*aif, *ai);)
1025 if (aif->s > ai->s)
1027 alist.erase(aif);
1028 aif = ai;
1029 ++ai;
1031 else
1032 ai = alist.erase(ai);
1036 // Free memory in the std::set<var_t, boost::pool_allocator<var_t> > that live in the assignments in the list.
1037 //boost::singleton_pool<boost::fast_pool_allocator_tag, sizeof(var_t)>::release_memory();
1039 #ifdef DEBUG_RALLOC_DEC
1040 std::cout << "Remaining assignments: " << alist.size() << "\n"; std::cout.flush();
1041 #endif
1043 #ifdef DEBUG_RALLOC_DEC_ASS
1044 for(ai = alist.begin(); ai != alist.end(); ++ai)
1046 print_assignment(*ai);
1047 std::cout << "\n";
1050 assignment best;
1051 get_best_local_assignment(best, t, T);
1052 std::cout << "Best: "; print_assignment(best); std::cout << "\n";
1053 #endif
1056 // Handle join nodes in the nice tree decomposition
1057 template <class T_t, class G_t, class I_t>
1058 static void tree_dec_ralloc_join(T_t &T, typename boost::graph_traits<T_t>::vertex_descriptor t, const G_t &G, const I_t &I)
1060 typedef typename boost::graph_traits<T_t>::adjacency_iterator adjacency_iter_t;
1061 adjacency_iter_t c, c_end, c2, c3;
1062 boost::tie(c, c_end) = adjacent_vertices(t, T);
1064 #ifdef DEBUG_RALLOC_DEC
1065 std::cout << "Join (" << t << "):\n"; std::cout.flush();
1066 #endif
1068 c2 = c;
1069 ++c;
1070 c3 = c;
1072 assignment_list_t &alist = T[t].assignments;
1073 assignment_list_t &alist2 = T[*c2].assignments;
1074 std::swap(alist, T[*c3].assignments);
1076 alist.sort();
1077 alist2.sort();
1079 assignment_list_t::iterator ai, ai2;
1080 for (ai = alist.begin(), ai2 = alist2.begin(); ai != alist.end() && ai2 != alist2.end();)
1082 if (assignments_locally_same(*ai, *ai2))
1084 ai->s += ai2->s;
1085 // Avoid double-counting instruction costs.
1086 std::set<unsigned int>::iterator bi;
1087 for (bi = T[t].bag.begin(); bi != T[t].bag.end(); ++bi)
1088 ai->s -= ai->i_costs[*bi];
1089 for (size_t i = 0; i < ai->global.size(); i++)
1090 ai->global[i] = ((ai->global[i] != -1) ? ai->global[i] : ai2->global[i]);
1091 ++ai;
1092 ++ai2;
1094 else if (*ai < *ai2)
1095 ai = alist.erase(ai);
1096 else if (*ai2 < *ai)
1097 ++ai2;
1099 while(ai != alist.end())
1100 ai = alist.erase(ai);
1102 alist2.clear();
1104 #ifdef DEBUG_RALLOC_DEC
1105 std::cout << "Remaining assignments: " << alist.size() << "\n"; std::cout.flush();
1106 #endif
1108 #ifdef DEBUG_RALLOC_DEC_ASS
1109 for(std::list<assignment>::iterator ai = alist.begin(); ai != alist.end(); ++ai)
1111 print_assignment(*ai);
1112 std::cout << "\n";
1114 #endif
1117 template <class T_t>
1118 void get_best_local_assignment(assignment &a, typename boost::graph_traits<T_t>::vertex_descriptor t, const T_t &T)
1120 const assignment_list_t &alist = T[t].assignments;
1122 assignment_list_t::const_iterator ai, ai_end, ai_best;
1123 for(ai = ai_best = alist.begin(), ai_end = alist.end(); ai != ai_end; ++ai)
1124 if(ai->s < ai_best->s)
1125 ai_best = ai;
1127 a = *ai_best;
1130 // Handle nodes in the tree decomposition, by detecting their type and calling the appropriate function. Recurses.
1131 template <class T_t, class G_t, class I_t>
1132 static void tree_dec_ralloc_nodes(T_t &T, typename boost::graph_traits<T_t>::vertex_descriptor t, const G_t &G, const I_t &I, const assignment& ac, bool *const assignment_optimal)
1134 typedef typename boost::graph_traits<T_t>::adjacency_iterator adjacency_iter_t;
1136 adjacency_iter_t c, c_end;
1137 typename boost::graph_traits<T_t>::vertex_descriptor c0, c1;
1139 boost::tie(c, c_end) = adjacent_vertices(t, T);
1141 switch (out_degree(t, T))
1143 case 0:
1144 tree_dec_ralloc_leaf(T, t, G, I);
1145 break;
1146 case 1:
1147 c0 = *c;
1148 tree_dec_ralloc_nodes(T, c0, G, I, ac, assignment_optimal);
1149 T[c0].bag.size() < T[t].bag.size() ? tree_dec_ralloc_introduce(T, t, G, I, ac, assignment_optimal) : tree_dec_ralloc_forget(T, t, G, I);
1150 break;
1151 case 2:
1152 c0 = *c++;
1153 c1 = *c;
1155 if (T[c0].weight < T[c1].weight) // Minimize memory consumption needed for keeping intermediate results. As a side effect, this also helps the ac mechanism in the heuristic.
1156 std::swap (c0, c1);
1158 tree_dec_ralloc_nodes(T, c0, G, I, ac, assignment_optimal);
1160 assignment *ac2 = new assignment;
1161 get_best_local_assignment_biased(*ac2, c0, T);
1162 tree_dec_ralloc_nodes(T, c1, G, I, *ac2, assignment_optimal);
1163 delete ac2;
1165 tree_dec_ralloc_join(T, t, G, I);
1166 break;
1167 default:
1168 std::cerr << "Not nice.\n";
1169 break;
1173 // Find the best root selecting from t_old and the leafs under t.
1174 template <class T_t>
1175 static std::pair<typename boost::graph_traits<T_t>::vertex_descriptor, size_t> find_best_root(const T_t &T, typename boost::graph_traits<T_t>::vertex_descriptor t, size_t t_s, typename boost::graph_traits<T_t>::vertex_descriptor t_old, size_t t_old_s)
1177 typedef typename boost::graph_traits<T_t>::adjacency_iterator adjacency_iter_t;
1178 adjacency_iter_t c, c_end;
1179 typename boost::graph_traits<T_t>::vertex_descriptor c0, c1, t0;
1180 size_t t0_s;
1182 boost::tie(c, c_end) = adjacent_vertices(t, T);
1184 switch (out_degree(t, T))
1186 case 0:
1187 return(t_s > t_old_s ? std::pair<typename boost::graph_traits<T_t>::vertex_descriptor, size_t>(t, t_s) : std::pair<typename boost::graph_traits<T_t>::vertex_descriptor, size_t>(t_old, t_old_s));
1188 case 1:
1189 return(find_best_root(T, *c, T[*c].alive.size() ? T[*c].alive.size() : t_s, t_old, t_old_s));
1190 case 2:
1191 c0 = *c++;
1192 c1 = *c;
1193 boost::tie(t0, t0_s) = find_best_root(T, c0, T[c0].alive.size() ? T[c0].alive.size() : t_s, t_old, t_old_s);
1194 return(find_best_root(T, c1, T[c1].alive.size() ? T[c1].alive.size() : t_s, t0_s > t_old_s ? t0 : t_old, t0_s > t_old_s ? t0_s : t_old_s));
1195 break;
1196 default:
1197 std::cerr << "Not nice.\n";
1198 break;
1201 return(std::pair<typename boost::graph_traits<T_t>::vertex_descriptor, size_t>(t_old, t_old_s));
1204 // Change the root to t.
1205 template <class T_t>
1206 static void re_root(T_t &T, typename boost::graph_traits<T_t>::vertex_descriptor t)
1208 typename boost::graph_traits<T_t>::vertex_descriptor s0, s1, s2;
1209 typename boost::graph_traits<T_t>::in_edge_iterator e, e_end;
1211 boost::tie(e, e_end) = boost::in_edges(t, T);
1212 if (e == e_end)
1213 return;
1215 s0 = t;
1216 s1 = boost::source(*e, T);
1218 for (boost::tie(e, e_end) = boost::in_edges(s1, T); e != e_end; boost::tie(e, e_end) = boost::in_edges(s1, T))
1220 s2 = boost::source(*e, T);
1221 boost::remove_edge(s1, s0, T);
1222 boost::add_edge(s0, s1, T);
1223 s0 = s1;
1224 s1 = s2;
1226 boost::remove_edge(s1, s0, T);
1227 boost::add_edge(s0, s1, T);
1230 // Change the root to improve the assignment removal heuristic.
1231 template <class T_t>
1232 static void good_re_root(T_t &T)
1234 typename boost::graph_traits<T_t>::vertex_descriptor t;
1236 typedef typename boost::graph_traits<T_t>::adjacency_iterator adjacency_iter_t;
1237 adjacency_iter_t c, c_end;
1239 t = find_root(T);
1241 for (boost::tie(c, c_end) = boost::adjacent_vertices(t, T); c != c_end && !T[*c].alive.size();)
1242 boost::tie(c, c_end) = boost::adjacent_vertices(*c, T);
1244 size_t t_s = (c != c_end ? T[*c].alive.size() : 0);
1245 t = find_best_root(T, t, t_s, t, t_s).first;
1247 if (T[t].alive.size())
1249 std::cerr << "Error: Invalid root.\n";
1250 return;
1253 re_root(T, t);
1256 // Dump conflict graph, with numbered nodes, show live variables at each node.
1257 static void dump_con(const con_t &con)
1259 if (!currFunc)
1260 return;
1262 std::ofstream dump_file((std::string(dstFileName) + ".dumpralloccon" + currFunc->rname + ".dot").c_str());
1264 std::string *name = new std::string[num_vertices(con)];
1265 for (var_t i = 0; static_cast<boost::graph_traits<cfg_t>::vertices_size_type>(i) < boost::num_vertices(con); i++)
1267 std::ostringstream os;
1268 os << i;
1269 if (con[i].name)
1270 os << " : " << con[i].name << ":" << con[i].byte;
1271 name[i] = os.str();
1273 boost::write_graphviz(dump_file, con, boost::make_label_writer(name));
1274 delete[] name;
1277 // Dump cfg, with numbered nodes, show live variables at each node.
1278 static void dump_cfg(const cfg_t &cfg)
1280 if (!currFunc)
1281 return;
1283 std::ofstream dump_file((std::string(dstFileName) + ".dumpralloccfg" + currFunc->rname + ".dot").c_str());
1285 std::string *name = new std::string[num_vertices(cfg)];
1286 for (unsigned int i = 0; i < boost::num_vertices(cfg); i++)
1288 std::ostringstream os;
1289 os << i << ", " << cfg[i].ic->key << ": ";
1290 cfg_alive_t::const_iterator v;
1291 for (v = cfg[i].alive.begin(); v != cfg[i].alive.end(); ++v)
1292 os << *v << " ";
1293 name[i] = os.str();
1296 boost::write_graphviz(dump_file, cfg, boost::make_label_writer(name), boost::default_writer(), cfg_titlewriter(currFunc->rname, "register allocator"));
1297 delete[] name;
1300 // Dump tree decomposition, show bag and live variables at each node.
1301 static void dump_tree_decomposition(const tree_dec_t &tree_dec)
1303 if (!currFunc)
1304 return;
1306 std::ofstream dump_file((std::string(dstFileName) + ".dumprallocdec" + currFunc->rname + ".dot").c_str());
1308 unsigned int w = 0;
1310 std::string *name = new std::string[num_vertices(tree_dec)];
1311 for (unsigned int i = 0; i < boost::num_vertices(tree_dec); i++)
1313 if (tree_dec[i].bag.size() > w)
1314 w = tree_dec[i].bag.size();
1315 std::ostringstream os;
1316 std::set<unsigned int>::const_iterator v1;
1317 os << i << " | ";
1318 for (v1 = tree_dec[i].bag.begin(); v1 != tree_dec[i].bag.end(); ++v1)
1319 os << *v1 << " ";
1320 os << ": ";
1321 std::set<var_t>::const_iterator v2;
1322 for (v2 = tree_dec[i].alive.begin(); v2 != tree_dec[i].alive.end(); ++v2)
1323 os << *v2 << " ";
1324 name[i] = os.str();
1327 boost::write_graphviz(dump_file, tree_dec, boost::make_label_writer(name), boost::default_writer(), dec_titlewriter(w - 1, currFunc->rname, "register allocator"));
1328 delete[] name;
1330 #ifdef D_RALLOC_DEC
1331 std::cout << "Width: " << (w - 1) << "(" << currFunc->name << ")\n";
1332 #endif
1335 #endif