2 Branch reduction/prediction on the x86 code.
3 Copyright (C) 2001 Justin David Smith, Caltech
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License
7 as published by the Free Software Foundation; either version 2
8 of the License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
43 This optimization should be performed before blocks are merged
48 This code removes branches in situations where it can easily identify
49 them as simply setting the value of a variable. This comes up most
50 often in relational operators which set a boolean variable and then
51 continue on to common code. Since conditional jumps on Intel are not
52 cheap (they can be very expensive when mispredicted), we want to re-
53 write these statements using SETcc whenever possible. That is the
54 goal of this optimization.
59 Extracts comments from real code, and returns a block of comments
60 and a list of non-comment instructions. This is useful for getting
61 the comments out of our way for awhile while we try to figure out
62 what the block is actually doing. *)
63 let rec nontrivial_code = function
64 inst :: insts when trivial_inst inst ->
65 let comm, insts = nontrivial_code insts in
68 let comm, insts = nontrivial_code insts in
75 Returns something if the nontrivial code in a block simply sets
76 a register and then jumps to another block. The syntax is to
77 return Some(comments, register set, register value, destination).
78 If the block does not match this pattern, then None is returned. *)
80 match nontrivial_code code with
81 comm, [MOV (IL, dst, src); JMP (ImmediateLabel label)] ->
82 Some(comm, dst, src, label)
88 Returns something if this block ends in a conditional jump to
89 another block (followed by an unconditional jump - the fail case).
90 If this is the case, then we return Some with the comments, code
91 preceding the jump, the operator used in JCC, and the labels for
92 the success and failure cases. Because we assume the JCC is
93 followed by an unconditional jump, we need to run this code before
94 the blocks are merged over in x86_trace.ml. *)
96 let comm, insts = nontrivial_code code in
97 let rec code_jcc prev = function
98 [JCC(op, ImmediateLabel lsucc); JMP(ImmediateLabel lfail)] ->
99 Some(comm, List.rev prev, op, lsucc, lfail)
101 code_jcc (inst :: prev) insts
109 Determines if this block is a block containing a conditional, in
110 which both branches set a common variable and then return to run
111 common code. If such a case is found, it will be rewritten and
112 optimized to use SETcc instead. *)
113 let branch_block benv block =
114 let code = block.block_code in
115 match code_jcc code with
118 | Some(comm, prev, op, lsucc, lfail) ->
120 let bsucc = SymbolTable.find benv lsucc in
121 let bfail = SymbolTable.find benv lfail in
122 match code_set bsucc.block_code, code_set bfail.block_code with
123 Some(cs, ds, ImmediateNumber (OffNumber ss), ls), Some(cf, df, ImmediateNumber (OffNumber sf), lf)
124 when ds = df && ls = lf ->
125 (* This is the SET optimization. Note that the generated
126 code causes a partial register stall; scheduling should
127 try to resolve this. *)
130 dcl op, ds, lbl ls, $ss, $sf;
131 def diffvalues = sf - ss;
134 (* Emit the various comments that got snipped out *)
135 comm_string "OPT: Branch SET optimization";
136 comm_string `{ sprintf "OPT: Success %s, set value %ld" (string_of_symbol lsucc) ss `};
137 comm_string `{ sprintf "OPT: Failure %s, set value %ld" (string_of_symbol lfail) sf `};
139 comm_string "OPT: Comments from success branch";
141 comm_string "OPT: Comments from failure branch";
144 (* This is the actual setCC optimization. We use
145 ECX here because we must force a byte-register. *)
150 andl ecx, diffvalues;
152 movl ds, ecx; (* Move result to destination ds *)
153 jmp ls;` (* The failure label is eliminated *)
155 let code = Listbuf.to_list insts in
156 (* Remove the conditional blocks we originally jumped to
157 from our list of possible jump destinations. *)
158 let jumps = block.block_jumps in
159 let jumps = List.filter (fun v -> not (Symbol.eq v lsucc)) jumps in
160 let jumps = List.filter (fun v -> not (Symbol.eq v lfail)) jumps in
161 let jumps = ls :: jumps in
162 { block with block_code = code;
163 block_jumps = jumps }
165 (* No optimization performed *)
171 (* remove_dead_blocks
172 After the branch optimization, we may have blocks sitting around
173 that are no longer accessable. These need to be removed both
174 for efficiency, and to avoid problems during dataflow analysis.
175 This function takes the blocks as a list, not as a trace. *)
176 let remove_dead_blocks export preserve blocks =
177 let add_jumps targets block =
178 let jumps = block.block_jumps in
179 let add_jump targets j = SymbolTable.add targets j () in
180 List.fold_left add_jump targets jumps
182 let targets = List.fold_left add_jumps SymbolTable.empty blocks in
183 let block_target block =
184 SymbolTable.mem export block.block_label
185 || SymbolSet.mem preserve block.block_label
186 || block.block_index <> None
187 || SymbolTable.mem targets block.block_label
189 let blocks = List.filter block_target blocks in
194 Apply the branch optimization to a program. *)
196 { export_name = "init";
201 { export_name = "main";
205 let branch_prog prog =
206 (* Get the program blocks, apply branch optimization to blocks *)
207 let blocks = prog.asm_blocks in
208 let benv = Trace.fold (fun benv block ->
209 SymbolTable.add benv block.block_label block) SymbolTable.empty blocks in
211 Trace.fold (fun blocks block ->
212 branch_block benv block :: blocks) [] blocks
215 (* Remove any newly-dead blocks from the program *)
217 List.fold_left (fun export (_, init_sym, main_sym) ->
218 let export = SymbolTable.add export init_sym init_info in
219 let export = SymbolTable.add export main_sym main_info in
220 export) prog.asm_export prog.asm_init
222 let preserve = prog.asm_preserve in
223 let blocks = remove_dead_blocks export preserve blocks in
224 let blocks = Trace.of_list (List.rev blocks) in
225 { prog with asm_blocks = blocks }
227 let branch_prog = profile "X86_branch.branch_prog" branch_prog
229 let branch_prog prog =
230 if optimize_asm_level "opt.x86.branch" 2 then
235 let () = std_flags_register_list_help "opt.x86"
236 ["opt.x86.branch", FlagBool true,
237 "Enable specialized X86 branch instruction optimization." ^
238 " This introduces a significant performance improvement," ^
239 " and is recommended."]