2 * Copyright © 2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file opt_tree_grafting.cpp
27 * Takes assignments to variables that are dereferenced only once and
28 * pastes the RHS expression into where the variable is dereferenced.
30 * In the process of various operations like function inlining and
31 * tertiary op handling, we'll end up with our expression trees having
32 * been chopped up into a series of assignments of short expressions
33 * to temps. Other passes like ir_algebraic.cpp would prefer to see
34 * the deepest expression trees they can to try to optimize them.
36 * This is a lot like copy propagaton. In comparison, copy
37 * propagation only acts on plain copies, not arbitrary expressions on
38 * the RHS. Generally, we wouldn't want to go pasting some
39 * complicated expression everywhere it got used, though, so we don't
40 * handle expressions in that pass.
42 * The hard part is making sure we don't move an expression across
43 * some other assignments that would change the value of the
44 * expression. So we split this into two passes: First, find the
45 * variables in our scope which are written to once and read once, and
46 * then go through basic blocks seeing if we find an opportunity to
47 * move those expressions safely.
51 #include "ir_visitor.h"
52 #include "ir_variable_refcount.h"
53 #include "ir_basic_block.h"
54 #include "ir_optimization.h"
55 #include "glsl_types.h"
57 static bool debug
= false;
59 class ir_tree_grafting_visitor
: public ir_hierarchical_visitor
{
61 ir_tree_grafting_visitor(ir_assignment
*graft_assign
,
62 ir_variable
*graft_var
)
64 this->progress
= false;
65 this->graft_assign
= graft_assign
;
66 this->graft_var
= graft_var
;
69 virtual ir_visitor_status
visit_leave(class ir_assignment
*);
70 virtual ir_visitor_status
visit_enter(class ir_call
*);
71 virtual ir_visitor_status
visit_enter(class ir_expression
*);
72 virtual ir_visitor_status
visit_enter(class ir_function
*);
73 virtual ir_visitor_status
visit_enter(class ir_function_signature
*);
74 virtual ir_visitor_status
visit_enter(class ir_if
*);
75 virtual ir_visitor_status
visit_enter(class ir_loop
*);
76 virtual ir_visitor_status
visit_enter(class ir_swizzle
*);
77 virtual ir_visitor_status
visit_enter(class ir_texture
*);
79 ir_visitor_status
check_graft(ir_instruction
*ir
, ir_variable
*var
);
81 bool do_graft(ir_rvalue
**rvalue
);
84 ir_variable
*graft_var
;
85 ir_assignment
*graft_assign
;
88 struct find_deref_info
{
94 dereferences_variable_callback(ir_instruction
*ir
, void *data
)
96 struct find_deref_info
*info
= (struct find_deref_info
*)data
;
97 ir_dereference_variable
*deref
= ir
->as_dereference_variable();
99 if (deref
&& deref
->var
== info
->var
)
104 dereferences_variable(ir_instruction
*ir
, ir_variable
*var
)
106 struct find_deref_info info
;
111 visit_tree(ir
, dereferences_variable_callback
, &info
);
117 ir_tree_grafting_visitor::do_graft(ir_rvalue
**rvalue
)
122 ir_dereference_variable
*deref
= (*rvalue
)->as_dereference_variable();
124 if (!deref
|| deref
->var
!= this->graft_var
)
128 printf("GRAFTING:\n");
129 this->graft_assign
->print();
136 this->graft_assign
->remove();
137 *rvalue
= this->graft_assign
->rhs
;
139 this->progress
= true;
144 ir_tree_grafting_visitor::visit_enter(ir_loop
*ir
)
147 /* Do not traverse into the body of the loop since that is a
148 * different basic block.
154 * Check if we can continue grafting after writing to a variable. If the
155 * expression we're trying to graft references the variable, we must stop.
157 * \param ir An instruction that writes to a variable.
158 * \param var The variable being updated.
161 ir_tree_grafting_visitor::check_graft(ir_instruction
*ir
, ir_variable
*var
)
163 if (dereferences_variable(this->graft_assign
->rhs
, var
)) {
165 printf("graft killed by: ");
172 return visit_continue
;
176 ir_tree_grafting_visitor::visit_leave(ir_assignment
*ir
)
178 if (do_graft(&ir
->rhs
) ||
179 do_graft(&ir
->condition
))
182 /* If this assignment updates a variable used in the assignment
183 * we're trying to graft, then we're done.
185 return check_graft(ir
, ir
->lhs
->variable_referenced());
189 ir_tree_grafting_visitor::visit_enter(ir_function
*ir
)
192 return visit_continue_with_parent
;
196 ir_tree_grafting_visitor::visit_enter(ir_function_signature
*ir
)
199 return visit_continue_with_parent
;
203 ir_tree_grafting_visitor::visit_enter(ir_call
*ir
)
205 exec_list_iterator sig_iter
= ir
->get_callee()->parameters
.iterator();
206 /* Reminder: iterating ir_call iterates its parameters. */
207 foreach_iter(exec_list_iterator
, iter
, *ir
) {
208 ir_variable
*sig_param
= (ir_variable
*)sig_iter
.get();
209 ir_rvalue
*ir
= (ir_rvalue
*)iter
.get();
210 ir_rvalue
*new_ir
= ir
;
212 if (sig_param
->mode
!= ir_var_in
&& sig_param
->mode
!= ir_var_const_in
) {
213 if (check_graft(ir
, sig_param
) == visit_stop
)
218 if (do_graft(&new_ir
)) {
219 ir
->replace_with(new_ir
);
225 return visit_continue
;
229 ir_tree_grafting_visitor::visit_enter(ir_expression
*ir
)
231 for (unsigned int i
= 0; i
< ir
->get_num_operands(); i
++) {
232 if (do_graft(&ir
->operands
[i
]))
236 return visit_continue
;
240 ir_tree_grafting_visitor::visit_enter(ir_if
*ir
)
242 if (do_graft(&ir
->condition
))
245 /* Do not traverse into the body of the if-statement since that is a
246 * different basic block.
248 return visit_continue_with_parent
;
252 ir_tree_grafting_visitor::visit_enter(ir_swizzle
*ir
)
254 if (do_graft(&ir
->val
))
257 return visit_continue
;
261 ir_tree_grafting_visitor::visit_enter(ir_texture
*ir
)
263 if (do_graft(&ir
->coordinate
) ||
264 do_graft(&ir
->projector
) ||
265 do_graft(&ir
->offset
) ||
266 do_graft(&ir
->shadow_comparitor
))
273 if (do_graft(&ir
->lod_info
.bias
))
279 if (do_graft(&ir
->lod_info
.lod
))
283 if (do_graft(&ir
->lod_info
.grad
.dPdx
) ||
284 do_graft(&ir
->lod_info
.grad
.dPdy
))
289 return visit_continue
;
292 struct tree_grafting_info
{
293 ir_variable_refcount_visitor
*refs
;
298 try_tree_grafting(ir_assignment
*start
,
299 ir_variable
*lhs_var
,
300 ir_instruction
*bb_last
)
302 ir_tree_grafting_visitor
v(start
, lhs_var
);
305 printf("trying to graft: ");
310 for (ir_instruction
*ir
= (ir_instruction
*)start
->next
;
312 ir
= (ir_instruction
*)ir
->next
) {
320 ir_visitor_status s
= ir
->accept(&v
);
329 tree_grafting_basic_block(ir_instruction
*bb_first
,
330 ir_instruction
*bb_last
,
333 struct tree_grafting_info
*info
= (struct tree_grafting_info
*)data
;
334 ir_instruction
*ir
, *next
;
336 for (ir
= bb_first
, next
= (ir_instruction
*)ir
->next
;
338 ir
= next
, next
= (ir_instruction
*)ir
->next
) {
339 ir_assignment
*assign
= ir
->as_assignment();
344 ir_variable
*lhs_var
= assign
->whole_variable_written();
348 if (lhs_var
->mode
== ir_var_out
||
349 lhs_var
->mode
== ir_var_inout
)
352 variable_entry
*entry
= info
->refs
->get_variable_entry(lhs_var
);
354 if (!entry
->declaration
||
355 entry
->assigned_count
!= 1 ||
356 entry
->referenced_count
!= 2)
359 assert(assign
== entry
->assign
);
361 /* Found a possibly graftable assignment. Now, walk through the
362 * rest of the BB seeing if the deref is here, and if nothing interfered with
363 * pasting its expression's values in between.
365 info
->progress
|= try_tree_grafting(assign
, lhs_var
, bb_last
);
370 * Does a copy propagation pass on the code present in the instruction stream.
373 do_tree_grafting(exec_list
*instructions
)
375 ir_variable_refcount_visitor refs
;
376 struct tree_grafting_info info
;
378 info
.progress
= false;
381 visit_list_elements(info
.refs
, instructions
);
383 call_for_basic_blocks(instructions
, tree_grafting_basic_block
, &info
);
385 return info
.progress
;