1 // Philipp Klaus Krause, philipp@informatik.uni-frankfurt.de, pkk@spth.de, 2012
3 // (c) 2012 Goethe-Universität Frankfurt
5 // This program is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License as published by the
7 // Free Software Foundation; either version 2, or (at your option) any
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 // Lifetime-optimal speculative partial redundancy elimination.
22 // #define DEBUG_LOSPRE // Uncomment to get debug messages while doing lospre.
23 // #define DEBUG_LOSPRE_ASS // Uncomment to get debug messages on considered assignmentd while doing lospre.
25 #include "SDCClospre.hpp"
27 // A quick-and-dirty function to get the CFG from sdcc (a simplified version of the function from SDCCralloc.hpp).
29 create_cfg_lospre (cfg_lospre_t
&cfg
, iCode
*start_ic
, ebbIndex
*ebbi
)
33 std::map
<int, unsigned int> key_to_index
;
37 for (ic
= start_ic
, i
= 0; ic
; ic
= ic
->next
, i
++)
39 boost::add_vertex(cfg
);
40 key_to_index
[ic
->key
] = i
;
45 // Get control flow graph from sdcc.
46 for (ic
= start_ic
; ic
; ic
= ic
->next
)
49 float base_cost
= 128.0f
/*+ ic->count * (!optimize.codeSize + 2.0f * !optimize.codeSpeed)*/; // Constant summand: Optimization for code size.
51 if((ic
->op
== '>' || ic
->op
== '<' || ic
->op
== LE_OP
|| ic
->op
== GE_OP
|| ic
->op
== EQ_OP
|| ic
->op
== NE_OP
|| ic
->op
== '^' || ic
->op
== '|' || ic
->op
== BITWISEAND
) && ifxForOp (IC_RESULT (ic
), ic
))
52 boost::add_edge(key_to_index
[ic
->key
], key_to_index
[ic
->next
->key
], base_cost
* 1.3f
, cfg
); // 1.3f: Try not to separate op from ifx.
53 else if (ic
->op
!= GOTO
&& ic
->op
!= RETURN
&& ic
->op
!= JUMPTABLE
&& ic
->next
)
54 boost::add_edge(key_to_index
[ic
->key
], key_to_index
[ic
->next
->key
], base_cost
, cfg
);
57 boost::add_edge(key_to_index
[ic
->key
], key_to_index
[eBBWithEntryLabel(ebbi
, ic
->label
)->sch
->key
], base_cost
* 2.0f
, cfg
);
58 else if (ic
->op
== RETURN
)
59 boost::add_edge(key_to_index
[ic
->key
], key_to_index
[eBBWithEntryLabel(ebbi
, returnLabel
)->sch
->key
], base_cost
* 2.0f
, cfg
);
60 else if (ic
->op
== IFX
)
61 boost::add_edge(key_to_index
[ic
->key
], key_to_index
[eBBWithEntryLabel(ebbi
, IC_TRUE(ic
) ? IC_TRUE(ic
) : IC_FALSE(ic
))->sch
->key
], base_cost
* 2.0f
, cfg
);
62 else if (ic
->op
== JUMPTABLE
)
63 for (symbol
*lbl
= (symbol
*)(setFirstItem (IC_JTLABELS (ic
))); lbl
; lbl
= (symbol
*)(setNextItem (IC_JTLABELS (ic
))))
64 boost::add_edge(key_to_index
[ic
->key
], key_to_index
[eBBWithEntryLabel(ebbi
, lbl
)->sch
->key
], base_cost
* 2.0f
, cfg
);
69 candidate_expression (const iCode
*const ic
, int lkey
)
76 ic
->op
!= UNARYMINUS
&&
92 ic
->op
!= BITWISEAND
&&
97 !(ic
->op
== '=' && !POINTER_SET(ic
) && !(IS_ITEMP(IC_RIGHT(ic
)) /*&& IC_RIGHT(ic)->key > lkey*/)) &&
98 ic
->op
!= GET_VALUE_AT_ADDRESS
&&
100 ic->op != ADDRESS_OF Apparently typically not worth the cost in code size*/)
103 const operand
*const left
= IC_LEFT (ic
);
104 const operand
*const right
= IC_RIGHT (ic
);
105 const operand
*const result
= IC_RESULT (ic
);
107 // Todo: Allow literal right operand once backends can rematerialize literals!
108 if(ic
->op
== '=' && IS_OP_LITERAL (right
))
111 // Todo: Allow more operands!
112 if (ic
->op
!= CAST
&& left
&& !(IS_SYMOP (left
) || IS_OP_LITERAL (left
)) ||
113 right
&& !(IS_SYMOP (right
) || IS_OP_LITERAL (right
)) ||
114 result
&& !(IS_SYMOP (result
) || IS_OP_LITERAL (result
)))
121 same_expression (const iCode
*const lic
, const iCode
*const ric
)
126 if (lic
->op
!= ric
->op
)
129 const operand
*lleft
= IC_LEFT (lic
);
130 const operand
*lright
= IC_RIGHT (lic
);
131 const operand
*lresult
= IC_RESULT (lic
);
132 const operand
*rleft
= IC_LEFT (ric
);
133 const operand
*rright
= IC_RIGHT (ric
);
134 const operand
*rresult
= IC_RESULT (ric
);
136 if ((isOperandEqual (lleft
, rleft
) && isOperandEqual (lright
, rright
) ||
137 IS_COMMUTATIVE (lic
) && isOperandEqual (lleft
, rright
) && isOperandEqual (lright
, rleft
)) &&
138 (lresult
&& rresult
&& compareTypeInexact (operandType (lresult
), operandType (rresult
)) > 0) &&
139 (!IS_PTR(operandType (lresult
)) && !IS_PTR(operandType (rresult
)) || compareType (operandType (lresult
), operandType (rresult
), true) == 1) && // Otherwise we get confusion between bit-field and non bit-field writes into structs within a union.
140 IS_FLOAT (operandType (lresult
)) == IS_FLOAT (operandType (rresult
)))
147 get_candidate_set(std::set
<int> *c
, const iCode
*const sic
, int lkey
)
149 // TODO: For loop invariant code motion allow expression that only occurs once, too - will be needed when optimizing for speed.
150 for (const iCode
*ic
= sic
; ic
; ic
= ic
->next
)
152 if (!candidate_expression (ic
, lkey
))
154 for (const iCode
*pic
= sic
; pic
!= ic
; pic
= pic
->next
)
155 if (candidate_expression (pic
, lkey
) && same_expression (ic
, pic
) && c
->find (pic
->key
) == c
->end ())
157 // Found expression that occurs at least twice.
158 c
->insert (pic
->key
);
165 invalidates_expression(const iCode
*const eic
, const iCode
*const iic
)
167 const operand
*const eleft
= IC_LEFT (eic
);
168 const operand
*const eright
= IC_RIGHT (eic
);
169 const bool uses_global
= (eic
->op
== GET_VALUE_AT_ADDRESS
|| isOperandGlobal (eleft
) || isOperandGlobal (eright
) || IS_SYMOP (eleft
) && OP_SYMBOL_CONST (eleft
)->addrtaken
|| IS_SYMOP (eright
) && OP_SYMBOL_CONST (eright
)->addrtaken
);
170 const bool uses_volatile
= POINTER_GET (eic
) && IS_VOLATILE (operandType (eleft
)->next
) || IS_OP_VOLATILE (eleft
) || IS_OP_VOLATILE (eright
);
172 const operand
*const left
= IC_LEFT (iic
);
173 const operand
*const right
= IC_RIGHT (iic
);
174 const operand
*const result
= IC_RESULT (iic
);
176 if (iic
->op
== FUNCTION
|| iic
->op
== ENDFUNCTION
|| iic
->op
== RECEIVE
)
178 if (eic
->op
== ADDRESS_OF
) // ADDRESS_OF does not really read its operand.
180 if (eic
->op
== GET_VALUE_AT_ADDRESS
&& (isOperandGlobal (IC_RESULT (iic
)) || IS_SYMOP (IC_RESULT (iic
)) && OP_SYMBOL_CONST (IC_RESULT (iic
))->addrtaken
))
182 if (IC_RESULT (iic
) && !IS_OP_LITERAL (result
) && !POINTER_SET(iic
) &&
183 (eleft
&& isOperandEqual (eleft
, result
) || eright
&& isOperandEqual (eright
, result
)))
185 if ((uses_global
|| uses_volatile
) && (iic
->op
== CALL
|| iic
->op
== PCALL
))
187 if (uses_volatile
&& (POINTER_GET (iic
) && IS_VOLATILE (operandType (left
)->next
)) || IS_OP_VOLATILE (left
) || IS_OP_VOLATILE (right
))
189 if (uses_global
&& POINTER_SET (iic
)) // TODO: More accuracy here!
196 setup_cfg_for_expression (cfg_lospre_t
*const cfg
, const iCode
*const eic
)
198 typedef boost::graph_traits
<cfg_lospre_t
>::vertex_descriptor vertex_t
;
199 bool safety_required
= false;
201 // In redundancy elimination, safety means not doing a computation on any path were it was not done before.
202 // This is important, if the computation can have side-effects, which depends on the target architecture.
203 // E.g. On some systems division requires safety, since division by zero might result in an interrupt.
204 // When there are memory-mapped devices or there is memory management, reading from a pointer requires
205 // safety, since reading from an unknown location could result in making the device do something or in a SIGSEGV.
206 // On the other hand, addition is something that typically does not require safety, since adding two undefined
207 // operands gives just another undefined (the C standard allows trap representations, which, could result
208 // in addition requiring safety though; AFAIK none of the targets currently supported by SDCC have trap representations).
209 // Philipp, 2012-07-06.
211 // For now we just always require safety for "dangerous" operations.
213 // TODO: Replace the current one by a more exact mechanism, that takes into account information from
214 // (not yet implemented) generalized constant propagation, pointer analysis, etc.
216 // Function calls can have any side effects.
217 if (eic
->op
== CALL
|| eic
->op
== PCALL
)
218 safety_required
= true;
220 // volatile requires safety.
221 if (POINTER_GET (eic
) && IS_VOLATILE (operandType (IC_LEFT (eic
))->next
) || IS_OP_VOLATILE (IC_LEFT (eic
)) || IS_OP_VOLATILE (IC_RIGHT (eic
)))
222 safety_required
= true;
224 // Reading from an invalid address might be dangerous, since there could be memory-mapped I/O.
225 if (eic
->op
== GET_VALUE_AT_ADDRESS
&& !optimize
.allow_unsafe_read
)
226 safety_required
= true;
228 // The division routines for z80-like ports, the hc08/s08's, the pdk ports and stm8's hardware division just give an undefined result
229 // for division by zero, but there are no harmful side effects. I don't know about the other ports.
230 if ((eic
->op
== '/' || eic
->op
== '%') && !TARGET_Z80_LIKE
&& !TARGET_HC08_LIKE
&& !TARGET_PDK_LIKE
&& !TARGET_IS_STM8
)
231 safety_required
= true;
233 // TODO: Relax this! There are cases where allowing unsafe optimizations will improve speed.
234 // This probably needs implementation of profile-guided optimization though.
235 if (optimize
.codeSpeed
)
236 safety_required
= true;
239 std::cout
<< "Invalidation set I: ";
241 for (vertex_t i
= 0; i
< boost::num_vertices (*cfg
); i
++)
243 const iCode
*const ic
= (*cfg
)[i
].ic
;
245 (*cfg
)[i
].uses
= same_expression (eic
, ic
);
246 (*cfg
)[i
].invalidates
= invalidates_expression (eic
, ic
);
248 (*cfg
)[i
].forward
= std::pair
<int, int>(-1, -1);
251 if ((*cfg
)[i
].invalidates
)
252 std::cout
<< i
<< ", ";
259 return (safety_required
);
262 // Dump cfg, with numbered nodes.
263 void dump_cfg_lospre (const cfg_lospre_t
&cfg
)
268 std::ofstream
dump_file((std::string(dstFileName
) + ".dumplosprecfg" + currFunc
->rname
+ ".dot").c_str());
270 std::string
*name
= new std::string
[num_vertices(cfg
)];
271 for (unsigned int i
= 0; i
< boost::num_vertices(cfg
); i
++)
273 const char *iLine
= printILine (cfg
[i
].ic
);
274 std::ostringstream os
;
275 os
<< i
<< ", " << cfg
[i
].ic
->key
<< " : " << iLine
;
276 { // Ugly workaround for me not knowing how to make boost::write_graphviz write the edge weight.
277 typedef typename
boost::graph_traits
<cfg_lospre_t
>::out_edge_iterator n_iter_t
;
279 boost::tie(n
, n_end
) = boost::out_edges(i
, cfg
);
281 os
<< " weight of first outgoing edge: " << cfg
[*n
];
286 boost::write_graphviz(dump_file
, cfg
, boost::make_label_writer(name
), boost::default_writer(), cfg_titlewriter(currFunc
->rname
, "lospre"));
290 // Dump tree decomposition.
291 static void dump_dec_lospre(const tree_dec_t
&tree_dec
)
296 std::ofstream
dump_file((std::string(dstFileName
) + ".dumplospredec" + currFunc
->rname
+ ".dot").c_str());
300 std::string
*name
= new std::string
[num_vertices(tree_dec
)];
301 for (unsigned int i
= 0; i
< boost::num_vertices(tree_dec
); i
++)
303 if (tree_dec
[i
].bag
.size() > w
)
304 w
= tree_dec
[i
].bag
.size();
305 std::ostringstream os
;
306 typename
decltype(tree_dec
[0].bag
)::const_iterator v1
;
308 for (v1
= tree_dec
[i
].bag
.begin(); v1
!= tree_dec
[i
].bag
.end(); ++v1
)
312 boost::write_graphviz(dump_file
, tree_dec
, boost::make_label_writer(name
));
317 lospre (iCode
*sic
, ebbIndex
*ebbi
)
319 cfg_lospre_t control_flow_graph
;
320 tree_dec_t tree_decomposition
;
326 std::cout
<< "lospre for " << currFunc
->rname
<< "()\n";
329 create_cfg_lospre (control_flow_graph
, sic
, ebbi
);
331 if(options
.dump_graphs
)
332 dump_cfg_lospre(control_flow_graph
);
334 get_nice_tree_decomposition (tree_decomposition
, control_flow_graph
);
336 if(options
.dump_graphs
)
337 dump_dec_lospre(tree_decomposition
);
339 int lkey
= operandKey
;
341 for (bool change
= true; change
;)
345 std::set
<int> candidate_set
;
346 get_candidate_set (&candidate_set
, sic
, lkey
);
348 std::set
<int>::iterator ci
, ci_end
;
349 for (ci
= candidate_set
.begin(), ci_end
= candidate_set
.end(); ci
!= ci_end
; ++ci
)
352 for (ic
= sic
; ic
&& ic
->key
!= *ci
; ic
= ic
->next
);
354 if (!ic
|| !candidate_expression (ic
, lkey
))
357 bool safety
= setup_cfg_for_expression (&control_flow_graph
, ic
);
359 if (safety
&& tree_dec_safety (tree_decomposition
, control_flow_graph
, ic
) < 0)
362 change
|= (tree_dec_lospre (tree_decomposition
, control_flow_graph
, ic
) > 0);