[PATCH] RISC-V: Move UNSPEC_SSP_SET and UNSPEC_SSP_TEST to correct enum
[gcc.git] / gcc / avoid-store-forwarding.cc
blob34a7bba4043951ed982eb9f3c55bb86cd556d482
1 /* Avoid store forwarding optimization pass.
2 Copyright (C) 2024-2025 Free Software Foundation, Inc.
3 Contributed by VRULL GmbH.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "avoid-store-forwarding.h"
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "alias.h"
29 #include "rtlanal.h"
30 #include "cfgrtl.h"
31 #include "tree-pass.h"
32 #include "cselib.h"
33 #include "predict.h"
34 #include "insn-config.h"
35 #include "expmed.h"
36 #include "recog.h"
37 #include "regset.h"
38 #include "df.h"
39 #include "expr.h"
40 #include "memmodel.h"
41 #include "emit-rtl.h"
42 #include "vec.h"
44 /* This pass tries to detect and avoid cases of store forwarding.
45 On many processors there is a large penalty when smaller stores are
46 forwarded to larger loads. The idea used to avoid the stall is to move
47 the store after the load and in addition emit a bit insert sequence so
48 the load register has the correct value. For example the following:
50 strb w2, [x1, 1]
51 ldr x0, [x1]
53 Will be transformed to:
55 ldr x0, [x1]
56 strb w2, [x1]
57 bfi x0, x2, 0, 8
60 namespace {
62 const pass_data pass_data_avoid_store_forwarding =
64 RTL_PASS, /* type. */
65 "avoid_store_forwarding", /* name. */
66 OPTGROUP_NONE, /* optinfo_flags. */
67 TV_AVOID_STORE_FORWARDING, /* tv_id. */
68 0, /* properties_required. */
69 0, /* properties_provided. */
70 0, /* properties_destroyed. */
71 0, /* todo_flags_start. */
72 TODO_df_finish /* todo_flags_finish. */
75 class pass_rtl_avoid_store_forwarding : public rtl_opt_pass
77 public:
78 pass_rtl_avoid_store_forwarding (gcc::context *ctxt)
79 : rtl_opt_pass (pass_data_avoid_store_forwarding, ctxt)
82 /* opt_pass methods: */
83 virtual bool gate (function *)
85 return flag_avoid_store_forwarding && optimize >= 1;
88 virtual unsigned int execute (function *) override;
89 }; // class pass_rtl_avoid_store_forwarding
91 /* Handler for finding and avoiding store forwardings. */
93 class store_forwarding_analyzer
95 public:
96 unsigned int stats_sf_detected = 0;
97 unsigned int stats_sf_avoided = 0;
99 bool is_store_forwarding (rtx store_mem, rtx load_mem,
100 HOST_WIDE_INT *off_val);
101 bool process_store_forwarding (vec<store_fwd_info> &, rtx_insn *load_insn,
102 rtx load_mem);
103 void avoid_store_forwarding (basic_block);
104 void update_stats (function *);
107 /* Return a bit insertion sequence that would make DEST have the correct value
108 if the store represented by STORE_INFO were to be moved after DEST. */
110 static rtx_insn *
111 generate_bit_insert_sequence (store_fwd_info *store_info, rtx dest)
113 /* Memory size should be a constant at this stage. */
114 unsigned HOST_WIDE_INT store_size
115 = MEM_SIZE (store_info->store_mem).to_constant ();
117 start_sequence ();
119 unsigned HOST_WIDE_INT bitsize = store_size * BITS_PER_UNIT;
120 unsigned HOST_WIDE_INT start = store_info->offset * BITS_PER_UNIT;
122 /* Adjust START for machines with BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN.
123 Given that the bytes will be reversed in this case, we need to
124 calculate the starting position from the end of the destination
125 register. */
126 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
128 unsigned HOST_WIDE_INT load_mode_bitsize
129 = (GET_MODE_BITSIZE (GET_MODE (dest))).to_constant ();
130 start = load_mode_bitsize - bitsize - start;
133 rtx mov_reg = store_info->mov_reg;
134 store_bit_field (dest, bitsize, start, 0, 0, GET_MODE (mov_reg), mov_reg,
135 false, false);
137 rtx_insn *insns = get_insns ();
138 unshare_all_rtl_in_chain (insns);
139 end_sequence ();
141 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
142 if (contains_mem_rtx_p (PATTERN (insn))
143 || recog_memoized (insn) < 0)
144 return NULL;
146 return insns;
149 /* Return true iff a store to STORE_MEM would write to a sub-region of bytes
150 from what LOAD_MEM would read. If true also store the relative byte offset
151 of the store within the load to OFF_VAL. */
153 bool store_forwarding_analyzer::
154 is_store_forwarding (rtx store_mem, rtx load_mem, HOST_WIDE_INT *off_val)
156 poly_int64 load_offset, store_offset;
157 rtx load_base = strip_offset (XEXP (load_mem, 0), &load_offset);
158 rtx store_base = strip_offset (XEXP (store_mem, 0), &store_offset);
159 return (MEM_SIZE (load_mem).is_constant ()
160 && rtx_equal_p (load_base, store_base)
161 && known_subrange_p (store_offset, MEM_SIZE (store_mem),
162 load_offset, MEM_SIZE (load_mem))
163 && (store_offset - load_offset).is_constant (off_val));
166 /* Given a list of small stores that are forwarded to LOAD_INSN, try to
167 rearrange them so that a store-forwarding penalty doesn't occur.
168 The stores must be given in reverse program order, starting from the
169 one closer to LOAD_INSN. */
171 bool store_forwarding_analyzer::
172 process_store_forwarding (vec<store_fwd_info> &stores, rtx_insn *load_insn,
173 rtx load_mem)
175 machine_mode load_mem_mode = GET_MODE (load_mem);
176 /* Memory sizes should be constants at this stage. */
177 HOST_WIDE_INT load_size = MEM_SIZE (load_mem).to_constant ();
179 /* If the stores cover all the bytes of the load without overlap then we can
180 eliminate the load entirely and use the computed value instead. */
182 sbitmap forwarded_bytes = sbitmap_alloc (load_size);
183 bitmap_clear (forwarded_bytes);
185 unsigned int i;
186 store_fwd_info* it;
187 FOR_EACH_VEC_ELT (stores, i, it)
189 HOST_WIDE_INT store_size = MEM_SIZE (it->store_mem).to_constant ();
190 if (bitmap_bit_in_range_p (forwarded_bytes, it->offset,
191 it->offset + store_size - 1))
192 break;
193 bitmap_set_range (forwarded_bytes, it->offset, store_size);
196 bitmap_not (forwarded_bytes, forwarded_bytes);
197 bool load_elim = bitmap_empty_p (forwarded_bytes);
199 stats_sf_detected++;
201 if (dump_file)
203 fprintf (dump_file, "Store forwarding detected:\n");
205 FOR_EACH_VEC_ELT (stores, i, it)
207 fprintf (dump_file, "From: ");
208 print_rtl_single (dump_file, it->store_insn);
211 fprintf (dump_file, "To: ");
212 print_rtl_single (dump_file, load_insn);
214 if (load_elim)
215 fprintf (dump_file, "(Load elimination candidate)\n");
218 rtx load = single_set (load_insn);
219 rtx dest;
221 if (load_elim)
222 dest = gen_reg_rtx (load_mem_mode);
223 else
224 dest = SET_DEST (load);
226 int move_to_front = -1;
227 int total_cost = 0;
229 /* Check if we can emit bit insert instructions for all forwarded stores. */
230 FOR_EACH_VEC_ELT (stores, i, it)
232 it->mov_reg = gen_reg_rtx (GET_MODE (it->store_mem));
233 rtx_insn *insns = NULL;
235 /* If we're eliminating the load then find the store with zero offset
236 and use it as the base register to avoid a bit insert if possible. */
237 if (load_elim && it->offset == 0)
239 start_sequence ();
241 rtx ext0 = gen_rtx_ZERO_EXTEND (GET_MODE (dest), it->mov_reg);
242 if (ext0)
244 rtx_insn *move0 = emit_move_insn (dest, ext0);
245 if (recog_memoized (move0) >= 0)
247 insns = get_insns ();
248 move_to_front = (int) i;
252 end_sequence ();
255 if (!insns)
256 insns = generate_bit_insert_sequence (&(*it), dest);
258 if (!insns)
260 if (dump_file)
262 fprintf (dump_file, "Failed due to: ");
263 print_rtl_single (dump_file, it->store_insn);
265 return false;
268 total_cost += seq_cost (insns, true);
269 it->bits_insert_insns = insns;
271 rtx store_set = single_set (it->store_insn);
273 /* Create a register move at the store's original position to save the
274 stored value. */
275 start_sequence ();
276 rtx_insn *insn1
277 = emit_insn (gen_rtx_SET (it->mov_reg, SET_SRC (store_set)));
278 end_sequence ();
280 if (recog_memoized (insn1) < 0)
282 if (dump_file)
284 fprintf (dump_file, "Failed due to unrecognizable insn: ");
285 print_rtl_single (dump_file, insn1);
287 return false;
290 it->save_store_value_insn = insn1;
292 /* Create a new store after the load with the saved original value.
293 This avoids the forwarding stall. */
294 start_sequence ();
295 rtx_insn *insn2
296 = emit_insn (gen_rtx_SET (SET_DEST (store_set), it->mov_reg));
297 end_sequence ();
299 if (recog_memoized (insn2) < 0)
301 if (dump_file)
303 fprintf (dump_file, "Failed due to unrecognizable insn: ");
304 print_rtl_single (dump_file, insn2);
306 return false;
309 it->store_saved_value_insn = insn2;
312 if (load_elim)
313 total_cost -= insn_cost (load_insn, true);
315 /* Let the target decide if transforming this store forwarding instance is
316 profitable. */
317 if (!targetm.avoid_store_forwarding_p (stores, load_mem, total_cost,
318 load_elim))
320 if (dump_file)
321 fprintf (dump_file, "Not transformed due to target decision.\n");
323 return false;
326 /* If we have a move instead of bit insert, it needs to be emitted first in
327 the resulting sequence. */
328 if (move_to_front != -1)
330 store_fwd_info copy = stores[move_to_front];
331 stores.safe_push (copy);
332 stores.ordered_remove (move_to_front);
335 if (load_elim)
337 machine_mode outer_mode = GET_MODE (SET_DEST (load));
338 rtx load_move;
339 rtx load_value = dest;
340 if (outer_mode != load_mem_mode)
342 load_value = simplify_gen_unary (GET_CODE (SET_SRC (load)),
343 outer_mode, dest, load_mem_mode);
345 load_move = gen_rtx_SET (SET_DEST (load), load_value);
347 start_sequence ();
348 rtx_insn *insn = emit_insn (load_move);
349 rtx_insn *seq = get_insns ();
350 end_sequence ();
352 if (recog_memoized (insn) < 0)
353 return false;
355 emit_insn_after (seq, load_insn);
358 if (dump_file)
360 fprintf (dump_file, "Store forwarding avoided with bit inserts:\n");
362 FOR_EACH_VEC_ELT (stores, i, it)
364 if (stores.length () > 1)
366 fprintf (dump_file, "For: ");
367 print_rtl_single (dump_file, it->store_insn);
370 fprintf (dump_file, "With sequence:\n");
372 for (rtx_insn *insn = it->bits_insert_insns; insn;
373 insn = NEXT_INSN (insn))
375 fprintf (dump_file, " ");
376 print_rtl_single (dump_file, insn);
381 stats_sf_avoided++;
383 /* Done, emit all the generated instructions and delete the stores.
384 Note that STORES are in reverse program order. */
386 FOR_EACH_VEC_ELT (stores, i, it)
388 emit_insn_after (it->bits_insert_insns, load_insn);
389 emit_insn_after (it->store_saved_value_insn, load_insn);
392 FOR_EACH_VEC_ELT (stores, i, it)
394 emit_insn_before (it->save_store_value_insn, it->store_insn);
395 delete_insn (it->store_insn);
398 df_insn_rescan (load_insn);
400 if (load_elim)
401 delete_insn (load_insn);
403 return true;
406 /* Try to modify BB so that expensive store forwarding cases are avoided. */
408 void
409 store_forwarding_analyzer::avoid_store_forwarding (basic_block bb)
411 if (!optimize_bb_for_speed_p (bb))
412 return;
414 auto_vec<store_fwd_info, 8> store_exprs;
415 rtx_insn *insn;
416 unsigned int insn_cnt = 0;
418 FOR_BB_INSNS (bb, insn)
420 if (!NONDEBUG_INSN_P (insn))
421 continue;
423 vec_rtx_properties properties;
424 properties.add_insn (insn, false);
426 rtx set = single_set (insn);
428 if (!set || insn_could_throw_p (insn))
430 store_exprs.truncate (0);
431 continue;
434 /* The inner mem RTX if INSN is a load, NULL_RTX otherwise. */
435 rtx load_mem = SET_SRC (set);
437 if (GET_CODE (load_mem) == ZERO_EXTEND
438 || GET_CODE (load_mem) == SIGN_EXTEND)
439 load_mem = XEXP (load_mem, 0);
441 if (!MEM_P (load_mem))
442 load_mem = NULL_RTX;
444 /* The mem RTX if INSN is a store, NULL_RTX otherwise. */
445 rtx store_mem = MEM_P (SET_DEST (set)) ? SET_DEST (set) : NULL_RTX;
447 /* We cannot analyze memory RTXs that have unknown size. */
448 if ((store_mem && (!MEM_SIZE_KNOWN_P (store_mem)
449 || !MEM_SIZE (store_mem).is_constant ()))
450 || (load_mem && (!MEM_SIZE_KNOWN_P (load_mem)
451 || !MEM_SIZE (load_mem).is_constant ())))
453 store_exprs.truncate (0);
454 continue;
457 bool is_simple = !properties.has_asm
458 && !properties.has_side_effects ();
459 bool is_simple_store = is_simple
460 && store_mem
461 && !contains_mem_rtx_p (SET_SRC (set));
462 bool is_simple_load = is_simple
463 && load_mem
464 && !contains_mem_rtx_p (SET_DEST (set));
466 int removed_count = 0;
468 if (is_simple_store)
470 /* Record store forwarding candidate. */
471 store_fwd_info info;
472 info.store_insn = insn;
473 info.store_mem = store_mem;
474 info.insn_cnt = insn_cnt;
475 info.remove = false;
476 info.forwarded = false;
477 store_exprs.safe_push (info);
480 bool reads_mem = false;
481 bool writes_mem = false;
482 for (auto ref : properties.refs ())
483 if (ref.is_mem ())
485 reads_mem |= ref.is_read ();
486 writes_mem |= ref.is_write ();
488 else if (ref.is_write ())
490 /* Drop store forwarding candidates when the address register is
491 overwritten. */
492 bool remove_rest = false;
493 unsigned int i;
494 store_fwd_info *it;
495 FOR_EACH_VEC_ELT_REVERSE (store_exprs, i, it)
497 if (remove_rest
498 || reg_overlap_mentioned_p (regno_reg_rtx[ref.regno],
499 it->store_mem))
501 it->remove = true;
502 removed_count++;
503 remove_rest = true;
508 if (is_simple_load)
510 /* Process load for possible store forwarding cases.
511 Possible newly created/moved stores, resulted from a successful
512 forwarding, will be processed in subsequent iterations. */
513 auto_vec<store_fwd_info> forwardings;
514 bool partial_forwarding = false;
515 bool remove_rest = false;
517 bool vector_load = VECTOR_MODE_P (GET_MODE (load_mem));
519 unsigned int i;
520 store_fwd_info *it;
521 FOR_EACH_VEC_ELT_REVERSE (store_exprs, i, it)
523 rtx store_mem = it->store_mem;
524 HOST_WIDE_INT off_val;
526 bool vector_store = VECTOR_MODE_P (GET_MODE (store_mem));
528 if (remove_rest)
530 it->remove = true;
531 removed_count++;
533 else if (vector_load ^ vector_store)
535 /* Vector stores followed by a non-vector load or the
536 opposite, cause store_bit_field to generate non-canonical
537 expressions, like (subreg:V4SI (reg:DI ...) 0)).
538 Cases like that should be handled using vec_duplicate,
539 so we reject the transformation in those cases. */
540 it->remove = true;
541 removed_count++;
542 remove_rest = true;
544 else if (is_store_forwarding (store_mem, load_mem, &off_val))
546 /* Check if moving this store after the load is legal. */
547 bool write_dep = false;
548 for (unsigned int j = store_exprs.length () - 1; j != i; j--)
550 if (!store_exprs[j].forwarded
551 && output_dependence (store_mem,
552 store_exprs[j].store_mem))
554 write_dep = true;
555 break;
559 if (!write_dep)
561 it->forwarded = true;
562 it->offset = off_val;
563 forwardings.safe_push (*it);
565 else
566 partial_forwarding = true;
568 it->remove = true;
569 removed_count++;
571 else if (true_dependence (store_mem, GET_MODE (store_mem),
572 load_mem))
574 /* We cannot keep a store forwarding candidate if it possibly
575 interferes with this load. */
576 it->remove = true;
577 removed_count++;
578 remove_rest = true;
582 if (!forwardings.is_empty () && !partial_forwarding)
583 process_store_forwarding (forwardings, insn, load_mem);
586 if ((writes_mem && !is_simple_store)
587 || (reads_mem && !is_simple_load))
588 store_exprs.truncate (0);
590 if (removed_count)
592 unsigned int i, j;
593 store_fwd_info *it;
594 VEC_ORDERED_REMOVE_IF (store_exprs, i, j, it, it->remove);
597 /* Don't consider store forwarding if the RTL instruction distance is
598 more than PARAM_STORE_FORWARDING_MAX_DISTANCE and the cost checks
599 are not disabled. */
600 const bool unlimited_cost = (param_store_forwarding_max_distance == 0);
601 if (!unlimited_cost && !store_exprs.is_empty ()
602 && (store_exprs[0].insn_cnt
603 + param_store_forwarding_max_distance <= insn_cnt))
604 store_exprs.ordered_remove (0);
606 insn_cnt++;
610 /* Update pass statistics. */
612 void
613 store_forwarding_analyzer::update_stats (function *fn)
615 statistics_counter_event (fn, "Cases of store forwarding detected: ",
616 stats_sf_detected);
617 statistics_counter_event (fn, "Cases of store forwarding avoided: ",
618 stats_sf_detected);
621 unsigned int
622 pass_rtl_avoid_store_forwarding::execute (function *fn)
624 df_set_flags (DF_DEFER_INSN_RESCAN);
626 init_alias_analysis ();
628 store_forwarding_analyzer analyzer;
630 basic_block bb;
631 FOR_EACH_BB_FN (bb, fn)
632 analyzer.avoid_store_forwarding (bb);
634 end_alias_analysis ();
636 analyzer.update_stats (fn);
638 return 0;
641 } // anon namespace.
643 rtl_opt_pass *
644 make_pass_rtl_avoid_store_forwarding (gcc::context *ctxt)
646 return new pass_rtl_avoid_store_forwarding (ctxt);