1 /* Avoid store forwarding optimization pass.
2 Copyright (C) 2024-2025 Free Software Foundation, Inc.
3 Contributed by VRULL GmbH.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "avoid-store-forwarding.h"
24 #include "coretypes.h"
31 #include "tree-pass.h"
34 #include "insn-config.h"
44 /* This pass tries to detect and avoid cases of store forwarding.
45 On many processors there is a large penalty when smaller stores are
46 forwarded to larger loads. The idea used to avoid the stall is to move
47 the store after the load and in addition emit a bit insert sequence so
48 the load register has the correct value. For example the following:
53 Will be transformed to:
62 const pass_data pass_data_avoid_store_forwarding
=
65 "avoid_store_forwarding", /* name. */
66 OPTGROUP_NONE
, /* optinfo_flags. */
67 TV_AVOID_STORE_FORWARDING
, /* tv_id. */
68 0, /* properties_required. */
69 0, /* properties_provided. */
70 0, /* properties_destroyed. */
71 0, /* todo_flags_start. */
72 TODO_df_finish
/* todo_flags_finish. */
75 class pass_rtl_avoid_store_forwarding
: public rtl_opt_pass
78 pass_rtl_avoid_store_forwarding (gcc::context
*ctxt
)
79 : rtl_opt_pass (pass_data_avoid_store_forwarding
, ctxt
)
82 /* opt_pass methods: */
83 virtual bool gate (function
*)
85 return flag_avoid_store_forwarding
&& optimize
>= 1;
88 virtual unsigned int execute (function
*) override
;
89 }; // class pass_rtl_avoid_store_forwarding
91 /* Handler for finding and avoiding store forwardings. */
93 class store_forwarding_analyzer
96 unsigned int stats_sf_detected
= 0;
97 unsigned int stats_sf_avoided
= 0;
99 bool is_store_forwarding (rtx store_mem
, rtx load_mem
,
100 HOST_WIDE_INT
*off_val
);
101 bool process_store_forwarding (vec
<store_fwd_info
> &, rtx_insn
*load_insn
,
103 void avoid_store_forwarding (basic_block
);
104 void update_stats (function
*);
107 /* Return a bit insertion sequence that would make DEST have the correct value
108 if the store represented by STORE_INFO were to be moved after DEST. */
111 generate_bit_insert_sequence (store_fwd_info
*store_info
, rtx dest
)
113 /* Memory size should be a constant at this stage. */
114 unsigned HOST_WIDE_INT store_size
115 = MEM_SIZE (store_info
->store_mem
).to_constant ();
119 unsigned HOST_WIDE_INT bitsize
= store_size
* BITS_PER_UNIT
;
120 unsigned HOST_WIDE_INT start
= store_info
->offset
* BITS_PER_UNIT
;
122 /* Adjust START for machines with BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN.
123 Given that the bytes will be reversed in this case, we need to
124 calculate the starting position from the end of the destination
126 if (BITS_BIG_ENDIAN
!= BYTES_BIG_ENDIAN
)
128 unsigned HOST_WIDE_INT load_mode_bitsize
129 = (GET_MODE_BITSIZE (GET_MODE (dest
))).to_constant ();
130 start
= load_mode_bitsize
- bitsize
- start
;
133 rtx mov_reg
= store_info
->mov_reg
;
134 store_bit_field (dest
, bitsize
, start
, 0, 0, GET_MODE (mov_reg
), mov_reg
,
137 rtx_insn
*insns
= get_insns ();
138 unshare_all_rtl_in_chain (insns
);
141 for (rtx_insn
*insn
= insns
; insn
; insn
= NEXT_INSN (insn
))
142 if (contains_mem_rtx_p (PATTERN (insn
))
143 || recog_memoized (insn
) < 0)
149 /* Return true iff a store to STORE_MEM would write to a sub-region of bytes
150 from what LOAD_MEM would read. If true also store the relative byte offset
151 of the store within the load to OFF_VAL. */
153 bool store_forwarding_analyzer::
154 is_store_forwarding (rtx store_mem
, rtx load_mem
, HOST_WIDE_INT
*off_val
)
156 poly_int64 load_offset
, store_offset
;
157 rtx load_base
= strip_offset (XEXP (load_mem
, 0), &load_offset
);
158 rtx store_base
= strip_offset (XEXP (store_mem
, 0), &store_offset
);
159 return (MEM_SIZE (load_mem
).is_constant ()
160 && rtx_equal_p (load_base
, store_base
)
161 && known_subrange_p (store_offset
, MEM_SIZE (store_mem
),
162 load_offset
, MEM_SIZE (load_mem
))
163 && (store_offset
- load_offset
).is_constant (off_val
));
166 /* Given a list of small stores that are forwarded to LOAD_INSN, try to
167 rearrange them so that a store-forwarding penalty doesn't occur.
168 The stores must be given in reverse program order, starting from the
169 one closer to LOAD_INSN. */
171 bool store_forwarding_analyzer::
172 process_store_forwarding (vec
<store_fwd_info
> &stores
, rtx_insn
*load_insn
,
175 machine_mode load_mem_mode
= GET_MODE (load_mem
);
176 /* Memory sizes should be constants at this stage. */
177 HOST_WIDE_INT load_size
= MEM_SIZE (load_mem
).to_constant ();
179 /* If the stores cover all the bytes of the load without overlap then we can
180 eliminate the load entirely and use the computed value instead. */
182 sbitmap forwarded_bytes
= sbitmap_alloc (load_size
);
183 bitmap_clear (forwarded_bytes
);
187 FOR_EACH_VEC_ELT (stores
, i
, it
)
189 HOST_WIDE_INT store_size
= MEM_SIZE (it
->store_mem
).to_constant ();
190 if (bitmap_bit_in_range_p (forwarded_bytes
, it
->offset
,
191 it
->offset
+ store_size
- 1))
193 bitmap_set_range (forwarded_bytes
, it
->offset
, store_size
);
196 bitmap_not (forwarded_bytes
, forwarded_bytes
);
197 bool load_elim
= bitmap_empty_p (forwarded_bytes
);
203 fprintf (dump_file
, "Store forwarding detected:\n");
205 FOR_EACH_VEC_ELT (stores
, i
, it
)
207 fprintf (dump_file
, "From: ");
208 print_rtl_single (dump_file
, it
->store_insn
);
211 fprintf (dump_file
, "To: ");
212 print_rtl_single (dump_file
, load_insn
);
215 fprintf (dump_file
, "(Load elimination candidate)\n");
218 rtx load
= single_set (load_insn
);
222 dest
= gen_reg_rtx (load_mem_mode
);
224 dest
= SET_DEST (load
);
226 int move_to_front
= -1;
229 /* Check if we can emit bit insert instructions for all forwarded stores. */
230 FOR_EACH_VEC_ELT (stores
, i
, it
)
232 it
->mov_reg
= gen_reg_rtx (GET_MODE (it
->store_mem
));
233 rtx_insn
*insns
= NULL
;
235 /* If we're eliminating the load then find the store with zero offset
236 and use it as the base register to avoid a bit insert if possible. */
237 if (load_elim
&& it
->offset
== 0)
241 rtx ext0
= gen_rtx_ZERO_EXTEND (GET_MODE (dest
), it
->mov_reg
);
244 rtx_insn
*move0
= emit_move_insn (dest
, ext0
);
245 if (recog_memoized (move0
) >= 0)
247 insns
= get_insns ();
248 move_to_front
= (int) i
;
256 insns
= generate_bit_insert_sequence (&(*it
), dest
);
262 fprintf (dump_file
, "Failed due to: ");
263 print_rtl_single (dump_file
, it
->store_insn
);
268 total_cost
+= seq_cost (insns
, true);
269 it
->bits_insert_insns
= insns
;
271 rtx store_set
= single_set (it
->store_insn
);
273 /* Create a register move at the store's original position to save the
277 = emit_insn (gen_rtx_SET (it
->mov_reg
, SET_SRC (store_set
)));
280 if (recog_memoized (insn1
) < 0)
284 fprintf (dump_file
, "Failed due to unrecognizable insn: ");
285 print_rtl_single (dump_file
, insn1
);
290 it
->save_store_value_insn
= insn1
;
292 /* Create a new store after the load with the saved original value.
293 This avoids the forwarding stall. */
296 = emit_insn (gen_rtx_SET (SET_DEST (store_set
), it
->mov_reg
));
299 if (recog_memoized (insn2
) < 0)
303 fprintf (dump_file
, "Failed due to unrecognizable insn: ");
304 print_rtl_single (dump_file
, insn2
);
309 it
->store_saved_value_insn
= insn2
;
313 total_cost
-= insn_cost (load_insn
, true);
315 /* Let the target decide if transforming this store forwarding instance is
317 if (!targetm
.avoid_store_forwarding_p (stores
, load_mem
, total_cost
,
321 fprintf (dump_file
, "Not transformed due to target decision.\n");
326 /* If we have a move instead of bit insert, it needs to be emitted first in
327 the resulting sequence. */
328 if (move_to_front
!= -1)
330 store_fwd_info copy
= stores
[move_to_front
];
331 stores
.safe_push (copy
);
332 stores
.ordered_remove (move_to_front
);
337 machine_mode outer_mode
= GET_MODE (SET_DEST (load
));
339 rtx load_value
= dest
;
340 if (outer_mode
!= load_mem_mode
)
342 load_value
= simplify_gen_unary (GET_CODE (SET_SRC (load
)),
343 outer_mode
, dest
, load_mem_mode
);
345 load_move
= gen_rtx_SET (SET_DEST (load
), load_value
);
348 rtx_insn
*insn
= emit_insn (load_move
);
349 rtx_insn
*seq
= get_insns ();
352 if (recog_memoized (insn
) < 0)
355 emit_insn_after (seq
, load_insn
);
360 fprintf (dump_file
, "Store forwarding avoided with bit inserts:\n");
362 FOR_EACH_VEC_ELT (stores
, i
, it
)
364 if (stores
.length () > 1)
366 fprintf (dump_file
, "For: ");
367 print_rtl_single (dump_file
, it
->store_insn
);
370 fprintf (dump_file
, "With sequence:\n");
372 for (rtx_insn
*insn
= it
->bits_insert_insns
; insn
;
373 insn
= NEXT_INSN (insn
))
375 fprintf (dump_file
, " ");
376 print_rtl_single (dump_file
, insn
);
383 /* Done, emit all the generated instructions and delete the stores.
384 Note that STORES are in reverse program order. */
386 FOR_EACH_VEC_ELT (stores
, i
, it
)
388 emit_insn_after (it
->bits_insert_insns
, load_insn
);
389 emit_insn_after (it
->store_saved_value_insn
, load_insn
);
392 FOR_EACH_VEC_ELT (stores
, i
, it
)
394 emit_insn_before (it
->save_store_value_insn
, it
->store_insn
);
395 delete_insn (it
->store_insn
);
398 df_insn_rescan (load_insn
);
401 delete_insn (load_insn
);
406 /* Try to modify BB so that expensive store forwarding cases are avoided. */
409 store_forwarding_analyzer::avoid_store_forwarding (basic_block bb
)
411 if (!optimize_bb_for_speed_p (bb
))
414 auto_vec
<store_fwd_info
, 8> store_exprs
;
416 unsigned int insn_cnt
= 0;
418 FOR_BB_INSNS (bb
, insn
)
420 if (!NONDEBUG_INSN_P (insn
))
423 vec_rtx_properties properties
;
424 properties
.add_insn (insn
, false);
426 rtx set
= single_set (insn
);
428 if (!set
|| insn_could_throw_p (insn
))
430 store_exprs
.truncate (0);
434 /* The inner mem RTX if INSN is a load, NULL_RTX otherwise. */
435 rtx load_mem
= SET_SRC (set
);
437 if (GET_CODE (load_mem
) == ZERO_EXTEND
438 || GET_CODE (load_mem
) == SIGN_EXTEND
)
439 load_mem
= XEXP (load_mem
, 0);
441 if (!MEM_P (load_mem
))
444 /* The mem RTX if INSN is a store, NULL_RTX otherwise. */
445 rtx store_mem
= MEM_P (SET_DEST (set
)) ? SET_DEST (set
) : NULL_RTX
;
447 /* We cannot analyze memory RTXs that have unknown size. */
448 if ((store_mem
&& (!MEM_SIZE_KNOWN_P (store_mem
)
449 || !MEM_SIZE (store_mem
).is_constant ()))
450 || (load_mem
&& (!MEM_SIZE_KNOWN_P (load_mem
)
451 || !MEM_SIZE (load_mem
).is_constant ())))
453 store_exprs
.truncate (0);
457 bool is_simple
= !properties
.has_asm
458 && !properties
.has_side_effects ();
459 bool is_simple_store
= is_simple
461 && !contains_mem_rtx_p (SET_SRC (set
));
462 bool is_simple_load
= is_simple
464 && !contains_mem_rtx_p (SET_DEST (set
));
466 int removed_count
= 0;
470 /* Record store forwarding candidate. */
472 info
.store_insn
= insn
;
473 info
.store_mem
= store_mem
;
474 info
.insn_cnt
= insn_cnt
;
476 info
.forwarded
= false;
477 store_exprs
.safe_push (info
);
480 bool reads_mem
= false;
481 bool writes_mem
= false;
482 for (auto ref
: properties
.refs ())
485 reads_mem
|= ref
.is_read ();
486 writes_mem
|= ref
.is_write ();
488 else if (ref
.is_write ())
490 /* Drop store forwarding candidates when the address register is
492 bool remove_rest
= false;
495 FOR_EACH_VEC_ELT_REVERSE (store_exprs
, i
, it
)
498 || reg_overlap_mentioned_p (regno_reg_rtx
[ref
.regno
],
510 /* Process load for possible store forwarding cases.
511 Possible newly created/moved stores, resulted from a successful
512 forwarding, will be processed in subsequent iterations. */
513 auto_vec
<store_fwd_info
> forwardings
;
514 bool partial_forwarding
= false;
515 bool remove_rest
= false;
517 bool vector_load
= VECTOR_MODE_P (GET_MODE (load_mem
));
521 FOR_EACH_VEC_ELT_REVERSE (store_exprs
, i
, it
)
523 rtx store_mem
= it
->store_mem
;
524 HOST_WIDE_INT off_val
;
526 bool vector_store
= VECTOR_MODE_P (GET_MODE (store_mem
));
533 else if (vector_load
^ vector_store
)
535 /* Vector stores followed by a non-vector load or the
536 opposite, cause store_bit_field to generate non-canonical
537 expressions, like (subreg:V4SI (reg:DI ...) 0)).
538 Cases like that should be handled using vec_duplicate,
539 so we reject the transformation in those cases. */
544 else if (is_store_forwarding (store_mem
, load_mem
, &off_val
))
546 /* Check if moving this store after the load is legal. */
547 bool write_dep
= false;
548 for (unsigned int j
= store_exprs
.length () - 1; j
!= i
; j
--)
550 if (!store_exprs
[j
].forwarded
551 && output_dependence (store_mem
,
552 store_exprs
[j
].store_mem
))
561 it
->forwarded
= true;
562 it
->offset
= off_val
;
563 forwardings
.safe_push (*it
);
566 partial_forwarding
= true;
571 else if (true_dependence (store_mem
, GET_MODE (store_mem
),
574 /* We cannot keep a store forwarding candidate if it possibly
575 interferes with this load. */
582 if (!forwardings
.is_empty () && !partial_forwarding
)
583 process_store_forwarding (forwardings
, insn
, load_mem
);
586 if ((writes_mem
&& !is_simple_store
)
587 || (reads_mem
&& !is_simple_load
))
588 store_exprs
.truncate (0);
594 VEC_ORDERED_REMOVE_IF (store_exprs
, i
, j
, it
, it
->remove
);
597 /* Don't consider store forwarding if the RTL instruction distance is
598 more than PARAM_STORE_FORWARDING_MAX_DISTANCE and the cost checks
600 const bool unlimited_cost
= (param_store_forwarding_max_distance
== 0);
601 if (!unlimited_cost
&& !store_exprs
.is_empty ()
602 && (store_exprs
[0].insn_cnt
603 + param_store_forwarding_max_distance
<= insn_cnt
))
604 store_exprs
.ordered_remove (0);
610 /* Update pass statistics. */
613 store_forwarding_analyzer::update_stats (function
*fn
)
615 statistics_counter_event (fn
, "Cases of store forwarding detected: ",
617 statistics_counter_event (fn
, "Cases of store forwarding avoided: ",
622 pass_rtl_avoid_store_forwarding::execute (function
*fn
)
624 df_set_flags (DF_DEFER_INSN_RESCAN
);
626 init_alias_analysis ();
628 store_forwarding_analyzer analyzer
;
631 FOR_EACH_BB_FN (bb
, fn
)
632 analyzer
.avoid_store_forwarding (bb
);
634 end_alias_analysis ();
636 analyzer
.update_stats (fn
);
644 make_pass_rtl_avoid_store_forwarding (gcc::context
*ctxt
)
646 return new pass_rtl_avoid_store_forwarding (ctxt
);