1 // Pass to fuse adjacent loads/stores into paired memory accesses.
2 // Copyright (C) 2023-2024 Free Software Foundation, Inc.
4 // This file is part of GCC.
6 // GCC is free software; you can redistribute it and/or modify it
7 // under the terms of the GNU General Public License as published by
8 // the Free Software Foundation; either version 3, or (at your option)
11 // GCC is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
16 // You should have received a copy of the GNU General Public License
17 // along with GCC; see the file COPYING3. If not see
18 // <http://www.gnu.org/licenses/>.
20 #define INCLUDE_ALGORITHM
21 #define INCLUDE_FUNCTIONAL
23 #define INCLUDE_TYPE_TRAITS
27 #include "coretypes.h"
33 #include "cfgcleanup.h"
34 #include "tree-pass.h"
35 #include "ordered-hash-map.h"
37 #include "fold-const.h"
38 #include "tree-hash-traits.h"
39 #include "print-tree.h"
40 #include "pair-fusion.h"
42 using namespace rtl_ssa
;
44 // We pack these fields (load_p, fpsimd_p, and size) into an integer
45 // (LFS) which we use as part of the key into the main hash tables.
47 // The idea is that we group candidates together only if they agree on
48 // the fields below. Candidates that disagree on any of these
49 // properties shouldn't be merged together.
57 using insn_list_t
= std::list
<insn_info
*>;
59 // Information about the accesses at a given offset from a particular
60 // base. Stored in an access_group, see below.
64 std::list
<insn_info
*> cand_insns
;
65 std::list
<access_record
>::iterator place
;
67 access_record (poly_int64 off
) : offset (off
) {}
70 // A group of accesses where adjacent accesses could be ldp/stp
71 // candidates. The splay tree supports efficient insertion,
72 // while the list supports efficient iteration.
75 splay_tree
<access_record
*> tree
;
76 std::list
<access_record
> list
;
78 template<typename Alloc
>
79 inline void track (Alloc node_alloc
, poly_int64 offset
, insn_info
*insn
);
82 // Test if this base candidate is viable according to HAZARDS.
84 base_cand::viable () const
86 return !hazards
[0] || !hazards
[1] || (*hazards
[0] > *hazards
[1]);
89 // Information about an alternate base. For a def_info D, it may
90 // instead be expressed as D = BASE + OFFSET.
97 // Virtual base class for load/store walkers used in alias analysis.
100 virtual bool conflict_p (int &budget
) const = 0;
101 virtual insn_info
*insn () const = 0;
102 virtual bool valid () const = 0;
103 virtual void advance () = 0;
107 pair_fusion::pair_fusion ()
109 calculate_dominance_info (CDI_DOMINATORS
);
111 crtl
->ssa
= new rtl_ssa::function_info (cfun
);
114 pair_fusion::~pair_fusion ()
116 if (crtl
->ssa
->perform_pending_updates ())
119 free_dominance_info (CDI_DOMINATORS
);
125 // This is the main function to start the pass.
129 if (!track_loads_p () && !track_stores_p ())
132 for (auto bb
: crtl
->ssa
->bbs ())
136 // State used by the pass for a given basic block.
137 struct pair_fusion_bb_info
139 using def_hash
= nofree_ptr_hash
<def_info
>;
140 using expr_key_t
= pair_hash
<tree_operand_hash
, int_hash
<int, -1, -2>>;
141 using def_key_t
= pair_hash
<def_hash
, int_hash
<int, -1, -2>>;
143 // Map of <tree base, LFS> -> access_group.
144 ordered_hash_map
<expr_key_t
, access_group
> expr_map
;
146 // Map of <RTL-SSA def_info *, LFS> -> access_group.
147 ordered_hash_map
<def_key_t
, access_group
> def_map
;
149 // Given the def_info for an RTL base register, express it as an offset from
150 // some canonical base instead.
152 // Canonicalizing bases in this way allows us to identify adjacent accesses
153 // even if they see different base register defs.
154 hash_map
<def_hash
, alt_base
> canon_base_map
;
156 static const size_t obstack_alignment
= sizeof (void *);
158 pair_fusion_bb_info (bb_info
*bb
, pair_fusion
*d
)
159 : m_bb (bb
), m_pass (d
), m_emitted_tombstone (false)
161 obstack_specify_allocation (&m_obstack
, OBSTACK_CHUNK_SIZE
,
162 obstack_alignment
, obstack_chunk_alloc
,
165 ~pair_fusion_bb_info ()
167 obstack_free (&m_obstack
, nullptr);
169 if (m_emitted_tombstone
)
171 bitmap_release (&m_tombstone_bitmap
);
172 bitmap_obstack_release (&m_bitmap_obstack
);
176 inline void track_access (insn_info
*, bool load
, rtx mem
);
177 inline void transform ();
178 inline void cleanup_tombstones ();
185 // State for keeping track of tombstone insns emitted for this BB.
186 bitmap_obstack m_bitmap_obstack
;
187 bitmap_head m_tombstone_bitmap
;
188 bool m_emitted_tombstone
;
190 inline splay_tree_node
<access_record
*> *node_alloc (access_record
*);
192 template<typename Map
>
193 inline void traverse_base_map (Map
&map
);
194 inline void transform_for_base (int load_size
, access_group
&group
);
196 inline void merge_pairs (insn_list_t
&, insn_list_t
&,
197 bool load_p
, unsigned access_size
);
199 inline bool try_fuse_pair (bool load_p
, unsigned access_size
,
200 insn_info
*i1
, insn_info
*i2
);
202 inline bool fuse_pair (bool load_p
, unsigned access_size
,
204 insn_info
*i1
, insn_info
*i2
,
206 const insn_range_info
&move_range
);
208 inline void track_tombstone (int uid
);
210 inline bool track_via_mem_expr (insn_info
*, rtx mem
, lfs_fields lfs
);
213 splay_tree_node
<access_record
*> *
214 pair_fusion_bb_info::node_alloc (access_record
*access
)
216 using T
= splay_tree_node
<access_record
*>;
217 void *addr
= obstack_alloc (&m_obstack
, sizeof (T
));
218 return new (addr
) T (access
);
221 // Given a mem MEM, if the address has side effects, return a MEM that accesses
222 // the same address but without the side effects. Otherwise, return
225 drop_writeback (rtx mem
)
227 rtx addr
= XEXP (mem
, 0);
229 if (!side_effects_p (addr
))
232 switch (GET_CODE (addr
))
235 addr
= XEXP (addr
, 1);
240 addr
= XEXP (addr
, 0);
245 poly_int64 adjustment
= GET_MODE_SIZE (GET_MODE (mem
));
246 if (GET_CODE (addr
) == PRE_DEC
)
248 addr
= plus_constant (GET_MODE (addr
), XEXP (addr
, 0), adjustment
);
255 return change_address (mem
, GET_MODE (mem
), addr
);
258 // Convenience wrapper around strip_offset that can also look through
259 // RTX_AUTOINC addresses. The interface is like strip_offset except we take a
260 // MEM so that we know the mode of the access.
262 pair_mem_strip_offset (rtx mem
, poly_int64
*offset
)
264 rtx addr
= XEXP (mem
, 0);
266 switch (GET_CODE (addr
))
270 addr
= strip_offset (XEXP (addr
, 1), offset
);
271 gcc_checking_assert (REG_P (addr
));
272 gcc_checking_assert (rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
));
276 addr
= XEXP (addr
, 0);
277 *offset
= GET_MODE_SIZE (GET_MODE (mem
));
278 gcc_checking_assert (REG_P (addr
));
282 addr
= XEXP (addr
, 0);
283 *offset
= -GET_MODE_SIZE (GET_MODE (mem
));
284 gcc_checking_assert (REG_P (addr
));
288 addr
= strip_offset (addr
, offset
);
294 // Return true if X is a PRE_{INC,DEC,MODIFY} rtx.
296 any_pre_modify_p (rtx x
)
298 const auto code
= GET_CODE (x
);
299 return code
== PRE_INC
|| code
== PRE_DEC
|| code
== PRE_MODIFY
;
302 // Return true if X is a POST_{INC,DEC,MODIFY} rtx.
304 any_post_modify_p (rtx x
)
306 const auto code
= GET_CODE (x
);
307 return code
== POST_INC
|| code
== POST_DEC
|| code
== POST_MODIFY
;
310 // Given LFS (load_p, fpsimd_p, size) fields in FIELDS, encode these
311 // into an integer for use as a hash table key.
313 encode_lfs (lfs_fields fields
)
315 int size_log2
= exact_log2 (fields
.size
);
316 gcc_checking_assert (size_log2
>= 2 && size_log2
<= 4);
317 return ((int)fields
.load_p
<< 3)
318 | ((int)fields
.fpsimd_p
<< 2)
322 // Inverse of encode_lfs.
326 bool load_p
= (lfs
& (1 << 3));
327 bool fpsimd_p
= (lfs
& (1 << 2));
328 unsigned size
= 1U << ((lfs
& 3) + 2);
329 return { load_p
, fpsimd_p
, size
};
332 // Track the access INSN at offset OFFSET in this access group.
333 // ALLOC_NODE is used to allocate splay tree nodes.
334 template<typename Alloc
>
336 access_group::track (Alloc alloc_node
, poly_int64 offset
, insn_info
*insn
)
338 auto insert_before
= [&](std::list
<access_record
>::iterator after
)
340 auto it
= list
.emplace (after
, offset
);
341 it
->cand_insns
.push_back (insn
);
348 auto access
= insert_before (list
.end ());
349 tree
.insert_max_node (alloc_node (access
));
353 auto compare
= [&](splay_tree_node
<access_record
*> *node
)
355 return compare_sizes_for_sort (offset
, node
->value ()->offset
);
357 auto result
= tree
.lookup (compare
);
358 splay_tree_node
<access_record
*> *node
= tree
.root ();
360 node
->value ()->cand_insns
.push_back (insn
);
363 auto it
= node
->value ()->place
;
364 auto after
= (result
> 0) ? std::next (it
) : it
;
365 auto access
= insert_before (after
);
366 tree
.insert_child (node
, result
> 0, alloc_node (access
));
370 // Given a candidate access INSN (with mem MEM), see if it has a suitable
371 // MEM_EXPR base (i.e. a tree decl) relative to which we can track the access.
372 // LFS is used as part of the key to the hash table, see track_access.
374 pair_fusion_bb_info::track_via_mem_expr (insn_info
*insn
, rtx mem
,
377 if (!MEM_EXPR (mem
) || !MEM_OFFSET_KNOWN_P (mem
))
381 tree base_expr
= get_addr_base_and_unit_offset (MEM_EXPR (mem
),
383 if (!base_expr
|| !DECL_P (base_expr
))
386 offset
+= MEM_OFFSET (mem
);
388 const machine_mode mem_mode
= GET_MODE (mem
);
389 const HOST_WIDE_INT mem_size
= GET_MODE_SIZE (mem_mode
).to_constant ();
391 // Punt on misaligned offsets. Paired memory access instructions require
392 // offsets to be a multiple of the access size, and we believe that
393 // misaligned offsets on MEM_EXPR bases are likely to lead to misaligned
394 // offsets w.r.t. RTL bases.
395 if (!multiple_p (offset
, mem_size
))
398 const auto key
= std::make_pair (base_expr
, encode_lfs (lfs
));
399 access_group
&group
= expr_map
.get_or_insert (key
, NULL
);
400 auto alloc
= [&](access_record
*access
) { return node_alloc (access
); };
401 group
.track (alloc
, offset
, insn
);
405 fprintf (dump_file
, "[bb %u] tracking insn %d via ",
406 m_bb
->index (), insn
->uid ());
407 print_node_brief (dump_file
, "mem expr", base_expr
, 0);
408 fprintf (dump_file
, " [L=%d FP=%d, %smode, off=",
409 lfs
.load_p
, lfs
.fpsimd_p
, mode_name
[mem_mode
]);
410 print_dec (offset
, dump_file
);
411 fprintf (dump_file
, "]\n");
417 // Main function to begin pair discovery. Given a memory access INSN,
418 // determine whether it could be a candidate for fusing into a paired
419 // access, and if so, track it in the appropriate data structure for
420 // this basic block. LOAD_P is true if the access is a load, and MEM
421 // is the mem rtx that occurs in INSN.
423 pair_fusion_bb_info::track_access (insn_info
*insn
, bool load_p
, rtx mem
)
425 // We can't combine volatile MEMs, so punt on these.
426 if (MEM_VOLATILE_P (mem
))
429 // Ignore writeback accesses if the hook says to do so.
430 if (!m_pass
->should_handle_writeback (writeback_type::EXISTING
)
431 && GET_RTX_CLASS (GET_CODE (XEXP (mem
, 0))) == RTX_AUTOINC
)
434 const machine_mode mem_mode
= GET_MODE (mem
);
435 if (!m_pass
->pair_operand_mode_ok_p (mem_mode
))
438 rtx reg_op
= XEXP (PATTERN (insn
->rtl ()), !load_p
);
440 if (!m_pass
->pair_reg_operand_ok_p (load_p
, reg_op
, mem_mode
))
443 // We want to segregate FP/SIMD accesses from GPR accesses.
444 const bool fpsimd_op_p
= m_pass
->fpsimd_op_p (reg_op
, mem_mode
, load_p
);
446 // Note pair_operand_mode_ok_p already rejected VL modes.
447 const unsigned mem_size
= GET_MODE_SIZE (mem_mode
).to_constant ();
448 const lfs_fields lfs
= { load_p
, fpsimd_op_p
, mem_size
};
450 if (track_via_mem_expr (insn
, mem
, lfs
))
454 rtx addr
= XEXP (mem
, 0);
455 const bool autoinc_p
= GET_RTX_CLASS (GET_CODE (addr
)) == RTX_AUTOINC
;
456 rtx base
= pair_mem_strip_offset (mem
, &mem_off
);
460 // Need to calculate two (possibly different) offsets:
461 // - Offset at which the access occurs.
462 // - Offset of the new base def.
463 poly_int64 access_off
;
464 if (autoinc_p
&& any_post_modify_p (addr
))
467 access_off
= mem_off
;
469 poly_int64 new_def_off
= mem_off
;
471 // Punt on accesses relative to eliminable regs. Since we don't know the
472 // elimination offset pre-RA, we should postpone forming pairs on such
473 // accesses until after RA.
475 // As it stands, addresses in range for an individual load/store but not
476 // for a paired access are currently reloaded inefficiently,
477 // ending up with a separate base register for each pair.
479 // In theory LRA should make use of
480 // targetm.legitimize_address_displacement to promote sharing of
481 // bases among multiple (nearby) address reloads, but the current
482 // LRA code returns early from process_address_1 for operands that
483 // satisfy "m", even if they don't satisfy the real (relaxed) address
484 // constraint; this early return means we never get to the code
485 // that calls targetm.legitimize_address_displacement.
487 // So for now, it's better to punt when we can't be sure that the
488 // offset is in range for paired access. On aarch64, out-of-range cases
489 // can then be handled after RA by the out-of-range LDP/STP peepholes.
490 // Eventually, it would be nice to handle known out-of-range opportunities
491 // in the pass itself (for stack accesses, this would be in the post-RA pass).
492 if (!reload_completed
493 && (REGNO (base
) == FRAME_POINTER_REGNUM
494 || REGNO (base
) == ARG_POINTER_REGNUM
))
497 // Now need to find def of base register.
498 use_info
*base_use
= find_access (insn
->uses (), REGNO (base
));
499 gcc_assert (base_use
);
500 def_info
*base_def
= base_use
->def ();
505 "base register (regno %d) of insn %d is undefined",
506 REGNO (base
), insn
->uid ());
510 alt_base
*canon_base
= canon_base_map
.get (base_def
);
513 // Express this as the combined offset from the canonical base.
514 base_def
= canon_base
->base
;
515 new_def_off
+= canon_base
->offset
;
516 access_off
+= canon_base
->offset
;
521 auto def
= find_access (insn
->defs (), REGNO (base
));
524 // Record that DEF = BASE_DEF + MEM_OFF.
528 pp_access (&pp
, def
, 0);
529 pp_string (&pp
, " = ");
530 pp_access (&pp
, base_def
, 0);
531 fprintf (dump_file
, "[bb %u] recording %s + ",
532 m_bb
->index (), pp_formatted_text (&pp
));
533 print_dec (new_def_off
, dump_file
);
534 fprintf (dump_file
, "\n");
537 alt_base base_rec
{ base_def
, new_def_off
};
538 if (canon_base_map
.put (def
, base_rec
))
539 gcc_unreachable (); // Base defs should be unique.
542 // Punt on misaligned offsets. Paired memory accesses require offsets
543 // to be a multiple of the access size.
544 if (!multiple_p (mem_off
, mem_size
))
547 const auto key
= std::make_pair (base_def
, encode_lfs (lfs
));
548 access_group
&group
= def_map
.get_or_insert (key
, NULL
);
549 auto alloc
= [&](access_record
*access
) { return node_alloc (access
); };
550 group
.track (alloc
, access_off
, insn
);
555 pp_access (&pp
, base_def
, 0);
557 fprintf (dump_file
, "[bb %u] tracking insn %d via %s",
558 m_bb
->index (), insn
->uid (), pp_formatted_text (&pp
));
560 " [L=%d, WB=%d, FP=%d, %smode, off=",
561 lfs
.load_p
, autoinc_p
, lfs
.fpsimd_p
, mode_name
[mem_mode
]);
562 print_dec (access_off
, dump_file
);
563 fprintf (dump_file
, "]\n");
567 // Return the latest dataflow hazard before INSN.
569 // If IGNORE is non-NULL, this points to a sub-rtx which we should ignore for
570 // dataflow purposes. This is needed when considering changing the RTL base of
571 // an access discovered through a MEM_EXPR base.
573 // If IGNORE_INSN is non-NULL, we should further ignore any hazards arising
576 // N.B. we ignore any defs/uses of memory here as we deal with that separately,
577 // making use of alias disambiguation.
579 latest_hazard_before (insn_info
*insn
, rtx
*ignore
,
580 insn_info
*ignore_insn
= nullptr)
582 insn_info
*result
= nullptr;
584 // If the insn can throw then it is at the end of a BB and we can't
585 // move it, model this by recording a hazard in the previous insn
586 // which will prevent moving the insn up.
587 if (cfun
->can_throw_non_call_exceptions
588 && find_reg_note (insn
->rtl (), REG_EH_REGION
, NULL_RTX
))
589 return insn
->prev_nondebug_insn ();
591 // Return true if we registered the hazard.
592 auto hazard
= [&](insn_info
*h
) -> bool
594 gcc_checking_assert (*h
< *insn
);
595 if (h
== ignore_insn
)
598 if (!result
|| *h
> *result
)
604 rtx pat
= PATTERN (insn
->rtl ());
605 auto ignore_use
= [&](use_info
*u
)
610 return !refers_to_regno_p (u
->regno (), u
->regno () + 1, pat
, ignore
);
613 // Find defs of uses in INSN (RaW).
614 for (auto use
: insn
->uses ())
615 if (!ignore_use (use
) && use
->def ())
616 hazard (use
->def ()->insn ());
618 // Find previous defs (WaW) or previous uses (WaR) of defs in INSN.
619 for (auto def
: insn
->defs ())
624 if (def
->prev_def ())
626 hazard (def
->prev_def ()->insn ()); // WaW
628 auto set
= dyn_cast
<set_info
*> (def
->prev_def ());
629 if (set
&& set
->has_nondebug_insn_uses ())
630 for (auto use
: set
->reverse_nondebug_insn_uses ())
631 if (use
->insn () != insn
&& hazard (use
->insn ())) // WaR
635 if (!HARD_REGISTER_NUM_P (def
->regno ()))
638 // Also need to check backwards for call clobbers (WaW).
639 for (auto call_group
: def
->ebb ()->call_clobbers ())
641 if (!call_group
->clobbers (def
->resource ()))
644 auto clobber_insn
= prev_call_clobbers (*call_group
, def
->insn (),
647 hazard (clobber_insn
);
655 // Return the first dataflow hazard after INSN.
657 // If IGNORE is non-NULL, this points to a sub-rtx which we should ignore for
658 // dataflow purposes. This is needed when considering changing the RTL base of
659 // an access discovered through a MEM_EXPR base.
661 // N.B. we ignore any defs/uses of memory here as we deal with that separately,
662 // making use of alias disambiguation.
664 first_hazard_after (insn_info
*insn
, rtx
*ignore
)
666 insn_info
*result
= nullptr;
667 auto hazard
= [insn
, &result
](insn_info
*h
)
669 gcc_checking_assert (*h
> *insn
);
670 if (!result
|| *h
< *result
)
674 rtx pat
= PATTERN (insn
->rtl ());
675 auto ignore_use
= [&](use_info
*u
)
680 return !refers_to_regno_p (u
->regno (), u
->regno () + 1, pat
, ignore
);
683 for (auto def
: insn
->defs ())
688 if (def
->next_def ())
689 hazard (def
->next_def ()->insn ()); // WaW
691 auto set
= dyn_cast
<set_info
*> (def
);
692 if (set
&& set
->has_nondebug_insn_uses ())
693 hazard (set
->first_nondebug_insn_use ()->insn ()); // RaW
695 if (!HARD_REGISTER_NUM_P (def
->regno ()))
698 // Also check for call clobbers of this def (WaW).
699 for (auto call_group
: def
->ebb ()->call_clobbers ())
701 if (!call_group
->clobbers (def
->resource ()))
704 auto clobber_insn
= next_call_clobbers (*call_group
, def
->insn (),
707 hazard (clobber_insn
);
711 // Find any subsequent defs of uses in INSN (WaR).
712 for (auto use
: insn
->uses ())
714 if (ignore_use (use
))
719 auto def
= use
->def ()->next_def ();
720 if (def
&& def
->insn () == insn
)
721 def
= def
->next_def ();
724 hazard (def
->insn ());
727 if (!HARD_REGISTER_NUM_P (use
->regno ()))
730 // Also need to handle call clobbers of our uses (again WaR).
732 // See restrict_movement_for_uses for why we don't need to check
733 // backwards for call clobbers.
734 for (auto call_group
: use
->ebb ()->call_clobbers ())
736 if (!call_group
->clobbers (use
->resource ()))
739 auto clobber_insn
= next_call_clobbers (*call_group
, use
->insn (),
742 hazard (clobber_insn
);
749 // Return true iff R1 and R2 overlap.
751 ranges_overlap_p (const insn_range_info
&r1
, const insn_range_info
&r2
)
753 // If either range is empty, then their intersection is empty.
757 // When do they not overlap? When one range finishes before the other
758 // starts, i.e. (*r1.last < *r2.first || *r2.last < *r1.first).
759 // Inverting this, we get the below.
760 return *r1
.last
>= *r2
.first
&& *r2
.last
>= *r1
.first
;
763 // Get the range of insns that def feeds.
764 static insn_range_info
get_def_range (def_info
*def
)
766 insn_info
*last
= def
->next_def ()->insn ()->prev_nondebug_insn ();
767 return { def
->insn (), last
};
770 // Given a def (of memory), return the downwards range within which we
771 // can safely move this def.
772 static insn_range_info
773 def_downwards_move_range (def_info
*def
)
775 auto range
= get_def_range (def
);
777 auto set
= dyn_cast
<set_info
*> (def
);
778 if (!set
|| !set
->has_any_uses ())
781 auto use
= set
->first_nondebug_insn_use ();
783 range
= move_earlier_than (range
, use
->insn ());
788 // Given a def (of memory), return the upwards range within which we can
789 // safely move this def.
790 static insn_range_info
791 def_upwards_move_range (def_info
*def
)
793 def_info
*prev
= def
->prev_def ();
794 insn_range_info range
{ prev
->insn (), def
->insn () };
796 auto set
= dyn_cast
<set_info
*> (prev
);
797 if (!set
|| !set
->has_any_uses ())
800 auto use
= set
->last_nondebug_insn_use ();
802 range
= move_later_than (range
, use
->insn ());
807 // Class that implements a state machine for building the changes needed to form
808 // a store pair instruction. This allows us to easily build the changes in
809 // program order, as required by rtl-ssa.
810 struct store_change_builder
835 bool done () const { return m_state
== state::DONE
; }
837 store_change_builder (insn_info
*insns
[2],
838 insn_info
*repurpose
,
840 : m_state (state::FIRST
), m_insns
{ insns
[0], insns
[1] },
841 m_repurpose (repurpose
), m_dest (dest
), m_use (nullptr) {}
843 change
get_change () const
849 m_insns
[0] == m_repurpose
? action::CHANGE
: action::TOMBSTONE
,
854 m_insns
[1] == m_repurpose
? action::CHANGE
: action::TOMBSTONE
,
858 return { action::INSERT
, m_dest
};
859 case state::FIXUP_USE
:
860 return { action::FIXUP_USE
, m_use
->insn () };
868 // Transition to the next state.
875 m_state
= state::LAST
;
877 m_state
= state::INSERT
;
881 def_info
*def
= memory_access (m_insns
[0]->defs ());
882 while (*def
->next_def ()->insn () <= *m_dest
)
883 def
= def
->next_def ();
885 // Now we know DEF feeds the insertion point for the new stp.
886 // Look for any uses of DEF that will consume the new stp.
887 gcc_assert (*def
->insn () <= *m_dest
888 && *def
->next_def ()->insn () > *m_dest
);
890 auto set
= as_a
<set_info
*> (def
);
891 for (auto use
: set
->nondebug_insn_uses ())
892 if (*use
->insn () > *m_dest
)
899 m_state
= state::FIXUP_USE
;
901 m_state
= state::LAST
;
904 case state::FIXUP_USE
:
905 m_use
= m_use
->next_nondebug_insn_use ();
907 m_state
= state::LAST
;
910 m_state
= state::DONE
;
920 // Original candidate stores.
921 insn_info
*m_insns
[2];
923 // If non-null, this is a candidate insn to change into an stp. Otherwise we
924 // are deleting both original insns and inserting a new insn for the stp.
925 insn_info
*m_repurpose
;
927 // Destionation of the stp, it will be placed immediately after m_dest.
930 // Current nondebug use that needs updating due to stp insertion.
934 // Given candidate store insns FIRST and SECOND, see if we can re-purpose one
935 // of them (together with its def of memory) for the stp insn. If so, return
936 // that insn. Otherwise, return null.
938 try_repurpose_store (insn_info
*first
,
940 const insn_range_info
&move_range
)
942 def_info
* const defs
[2] = {
943 memory_access (first
->defs ()),
944 memory_access (second
->defs ())
947 if (move_range
.includes (first
)
948 || ranges_overlap_p (move_range
, def_downwards_move_range (defs
[0])))
951 if (move_range
.includes (second
)
952 || ranges_overlap_p (move_range
, def_upwards_move_range (defs
[1])))
958 // Generate the RTL pattern for a "tombstone"; used temporarily during this pass
959 // to replace stores that are marked for deletion where we can't immediately
960 // delete the store (since there are uses of mem hanging off the store).
962 // These are deleted at the end of the pass and uses re-parented appropriately
967 return gen_rtx_CLOBBER (VOIDmode
,
968 gen_rtx_MEM (BLKmode
, gen_rtx_SCRATCH (Pmode
)));
971 // Go through the reg notes rooted at NOTE, dropping those that we should drop,
972 // and preserving those that we want to keep by prepending them to (and
973 // returning) RESULT. EH_REGION is used to make sure we have at most one
974 // REG_EH_REGION note in the resulting list. FR_EXPR is used to return any
975 // REG_FRAME_RELATED_EXPR note we find, as these can need special handling in
976 // combine_reg_notes.
978 filter_notes (rtx note
, rtx result
, bool *eh_region
, rtx
*fr_expr
)
980 for (; note
; note
= XEXP (note
, 1))
982 switch (REG_NOTE_KIND (note
))
985 // REG_DEAD notes aren't required to be maintained.
990 // These can all be dropped. For REG_EQU{AL,IV} they cannot apply to
991 // non-single_set insns, and REG_UNUSED is re-computed by RTl-SSA, see
992 // rtl-ssa/changes.cc:update_notes.
994 // Similarly, REG_NOALIAS cannot apply to a parallel.
996 // When we form the pair insn, the reg update is implemented
997 // as just another SET in the parallel, so isn't really an
998 // auto-increment in the RTL sense, hence we drop the note.
1001 gcc_assert (!*eh_region
);
1003 result
= alloc_reg_note (REG_EH_REGION
, XEXP (note
, 0), result
);
1005 case REG_CFA_DEF_CFA
:
1006 case REG_CFA_OFFSET
:
1007 case REG_CFA_RESTORE
:
1008 result
= alloc_reg_note (REG_NOTE_KIND (note
),
1009 copy_rtx (XEXP (note
, 0)),
1012 case REG_FRAME_RELATED_EXPR
:
1013 gcc_assert (!*fr_expr
);
1014 *fr_expr
= copy_rtx (XEXP (note
, 0));
1017 // Unexpected REG_NOTE kind.
1025 // Return the notes that should be attached to a combination of I1 and I2, where
1026 // *I1 < *I2. LOAD_P is true for loads.
1028 combine_reg_notes (insn_info
*i1
, insn_info
*i2
, bool load_p
)
1030 // Temporary storage for REG_FRAME_RELATED_EXPR notes.
1031 rtx fr_expr
[2] = {};
1033 bool found_eh_region
= false;
1034 rtx result
= NULL_RTX
;
1035 result
= filter_notes (REG_NOTES (i2
->rtl ()), result
,
1036 &found_eh_region
, fr_expr
+ 1);
1037 result
= filter_notes (REG_NOTES (i1
->rtl ()), result
,
1038 &found_eh_region
, fr_expr
);
1042 // Simple frame-related sp-relative saves don't need CFI notes, but when
1043 // we combine them into an stp we will need a CFI note as dwarf2cfi can't
1044 // interpret the unspec pair representation directly.
1045 if (RTX_FRAME_RELATED_P (i1
->rtl ()) && !fr_expr
[0])
1046 fr_expr
[0] = copy_rtx (PATTERN (i1
->rtl ()));
1047 if (RTX_FRAME_RELATED_P (i2
->rtl ()) && !fr_expr
[1])
1048 fr_expr
[1] = copy_rtx (PATTERN (i2
->rtl ()));
1051 rtx fr_pat
= NULL_RTX
;
1052 if (fr_expr
[0] && fr_expr
[1])
1054 // Combining two frame-related insns, need to construct
1055 // a REG_FRAME_RELATED_EXPR note which represents the combined
1057 RTX_FRAME_RELATED_P (fr_expr
[1]) = 1;
1058 fr_pat
= gen_rtx_PARALLEL (VOIDmode
,
1059 gen_rtvec (2, fr_expr
[0], fr_expr
[1]));
1062 fr_pat
= fr_expr
[0] ? fr_expr
[0] : fr_expr
[1];
1065 result
= alloc_reg_note (REG_FRAME_RELATED_EXPR
,
1071 // Given two memory accesses in PATS, at least one of which is of a
1072 // writeback form, extract two non-writeback memory accesses addressed
1073 // relative to the initial value of the base register, and output these
1074 // in PATS. Return an rtx that represents the overall change to the
1077 extract_writebacks (bool load_p
, rtx pats
[2], int changed
)
1079 rtx base_reg
= NULL_RTX
;
1080 poly_int64 current_offset
= 0;
1082 poly_int64 offsets
[2];
1084 for (int i
= 0; i
< 2; i
++)
1086 rtx mem
= XEXP (pats
[i
], load_p
);
1087 rtx reg
= XEXP (pats
[i
], !load_p
);
1089 rtx addr
= XEXP (mem
, 0);
1090 const bool autoinc_p
= GET_RTX_CLASS (GET_CODE (addr
)) == RTX_AUTOINC
;
1093 rtx this_base
= pair_mem_strip_offset (mem
, &offset
);
1094 gcc_assert (REG_P (this_base
));
1096 gcc_assert (rtx_equal_p (base_reg
, this_base
));
1098 base_reg
= this_base
;
1100 // If we changed base for the current insn, then we already
1101 // derived the correct mem for this insn from the effective
1102 // address of the other access.
1105 gcc_checking_assert (!autoinc_p
);
1106 offsets
[i
] = offset
;
1110 if (autoinc_p
&& any_pre_modify_p (addr
))
1111 current_offset
+= offset
;
1113 poly_int64 this_off
= current_offset
;
1117 offsets
[i
] = this_off
;
1118 rtx new_mem
= change_address (mem
, GET_MODE (mem
),
1119 plus_constant (GET_MODE (base_reg
),
1120 base_reg
, this_off
));
1122 ? gen_rtx_SET (reg
, new_mem
)
1123 : gen_rtx_SET (new_mem
, reg
);
1125 if (autoinc_p
&& any_post_modify_p (addr
))
1126 current_offset
+= offset
;
1129 if (known_eq (current_offset
, 0))
1132 return gen_rtx_SET (base_reg
, plus_constant (GET_MODE (base_reg
),
1133 base_reg
, current_offset
));
1136 // INSNS contains either {nullptr, pair insn} (when promoting an existing
1137 // non-writeback pair) or contains the candidate insns used to form the pair
1138 // (when fusing a new pair).
1140 // PAIR_RANGE specifies where we want to form the final pair.
1141 // INITIAL_OFFSET gives the current base offset for the pair.
1142 // Bit I of INITIAL_WRITEBACK is set if INSNS[I] initially had writeback.
1143 // ACCESS_SIZE gives the access size for a single arm of the pair.
1144 // BASE_DEF gives the initial def of the base register consumed by the pair.
1146 // Given the above, this function looks for a trailing destructive update of the
1147 // base register. If there is one, we choose the first such update after
1148 // PAIR_DST that is still in the same BB as our pair. We return the new def in
1149 // *ADD_DEF and the resulting writeback effect in *WRITEBACK_EFFECT.
1151 pair_fusion::find_trailing_add (insn_info
*insns
[2],
1152 const insn_range_info
&pair_range
,
1153 int initial_writeback
,
1154 rtx
*writeback_effect
,
1157 poly_int64 initial_offset
,
1158 unsigned access_size
)
1160 // Punt on frame-related insns, it is better to be conservative and
1161 // not try to form writeback pairs here, and means we don't have to
1162 // worry about the writeback case in forming REG_FRAME_RELATED_EXPR
1163 // notes (see combine_reg_notes).
1164 if ((insns
[0] && RTX_FRAME_RELATED_P (insns
[0]->rtl ()))
1165 || RTX_FRAME_RELATED_P (insns
[1]->rtl ()))
1168 insn_info
*pair_dst
= pair_range
.singleton ();
1169 gcc_assert (pair_dst
);
1171 def_info
*def
= base_def
->next_def ();
1173 // In the case that either of the initial pair insns had writeback,
1174 // then there will be intervening defs of the base register.
1176 for (int i
= 0; i
< 2; i
++)
1177 if (initial_writeback
& (1 << i
))
1179 gcc_assert (def
->insn () == insns
[i
]);
1180 def
= def
->next_def ();
1183 if (!def
|| def
->bb () != pair_dst
->bb ())
1186 // DEF should now be the first def of the base register after PAIR_DST.
1187 insn_info
*cand
= def
->insn ();
1188 gcc_assert (*cand
> *pair_dst
);
1190 const auto base_regno
= base_def
->regno ();
1192 // If CAND doesn't also use our base register,
1193 // it can't destructively update it.
1194 if (!find_access (cand
->uses (), base_regno
))
1197 auto rti
= cand
->rtl ();
1202 auto pat
= PATTERN (rti
);
1203 if (GET_CODE (pat
) != SET
)
1206 auto dest
= XEXP (pat
, 0);
1207 if (!REG_P (dest
) || REGNO (dest
) != base_regno
)
1211 rtx rhs_base
= strip_offset (XEXP (pat
, 1), &offset
);
1212 if (!REG_P (rhs_base
)
1213 || REGNO (rhs_base
) != base_regno
1214 || !offset
.is_constant ())
1217 // If the initial base offset is zero, we can handle any add offset
1218 // (post-inc). Otherwise, we require the offsets to match (pre-inc).
1219 if (!known_eq (initial_offset
, 0) && !known_eq (offset
, initial_offset
))
1222 auto off_hwi
= offset
.to_constant ();
1224 if (off_hwi
% access_size
!= 0)
1227 off_hwi
/= access_size
;
1229 if (!pair_mem_in_range_p (off_hwi
))
1232 auto dump_prefix
= [&]()
1235 fprintf (dump_file
, "existing pair i%d: ", insns
[1]->uid ());
1237 fprintf (dump_file
, " (%d,%d)",
1238 insns
[0]->uid (), insns
[1]->uid ());
1241 insn_info
*hazard
= latest_hazard_before (cand
, nullptr, insns
[1]);
1242 if (!hazard
|| *hazard
<= *pair_dst
)
1248 "folding in trailing add (%d) to use writeback form\n",
1253 *writeback_effect
= copy_rtx (pat
);
1261 "can't fold in trailing add (%d), hazard = %d\n",
1262 cand
->uid (), hazard
->uid ());
1268 // We just emitted a tombstone with uid UID, track it in a bitmap for
1269 // this BB so we can easily identify it later when cleaning up tombstones.
1271 pair_fusion_bb_info::track_tombstone (int uid
)
1273 if (!m_emitted_tombstone
)
1275 // Lazily initialize the bitmap for tracking tombstone insns.
1276 bitmap_obstack_initialize (&m_bitmap_obstack
);
1277 bitmap_initialize (&m_tombstone_bitmap
, &m_bitmap_obstack
);
1278 m_emitted_tombstone
= true;
1281 if (!bitmap_set_bit (&m_tombstone_bitmap
, uid
))
1282 gcc_unreachable (); // Bit should have changed.
1285 // Reset the debug insn containing USE (the debug insn has been
1288 reset_debug_use (use_info
*use
)
1290 auto use_insn
= use
->insn ();
1291 auto use_rtl
= use_insn
->rtl ();
1292 insn_change
change (use_insn
);
1293 change
.new_uses
= {};
1294 INSN_VAR_LOCATION_LOC (use_rtl
) = gen_rtx_UNKNOWN_VAR_LOC ();
1295 crtl
->ssa
->change_insn (change
);
1298 // USE is a debug use that needs updating because DEF (a def of the same
1299 // register) is being re-ordered over it. If BASE is non-null, then DEF
1300 // is an update of the register BASE by a constant, given by WB_OFFSET,
1301 // and we can preserve debug info by accounting for the change in side
1304 fixup_debug_use (obstack_watermark
&attempt
,
1308 poly_int64 wb_offset
)
1310 auto use_insn
= use
->insn ();
1313 auto use_rtl
= use_insn
->rtl ();
1314 insn_change
change (use_insn
);
1316 gcc_checking_assert (REG_P (base
) && use
->regno () == REGNO (base
));
1317 change
.new_uses
= check_remove_regno_access (attempt
,
1321 // The effect of the writeback is to add WB_OFFSET to BASE. If
1322 // we're re-ordering DEF below USE, then we update USE by adding
1323 // WB_OFFSET to it. Otherwise, if we're re-ordering DEF above
1324 // USE, we update USE by undoing the effect of the writeback
1325 // (subtracting WB_OFFSET).
1327 if (*def
->insn () > *use_insn
)
1329 // We now need USE_INSN to consume DEF. Create a new use of DEF.
1331 // N.B. this means until we call change_insns for the main change
1332 // group we will temporarily have a debug use consuming a def that
1333 // comes after it, but RTL-SSA doesn't currently support updating
1334 // debug insns as part of the main change group (together with
1335 // nondebug changes), so we will have to live with this update
1336 // leaving the IR being temporarily inconsistent. It seems to
1337 // work out OK once the main change group is applied.
1339 new_use
= crtl
->ssa
->create_use (attempt
,
1341 as_a
<set_info
*> (def
));
1344 new_use
= find_access (def
->insn ()->uses (), use
->regno ());
1346 change
.new_uses
= insert_access (attempt
, new_use
, change
.new_uses
);
1350 const char *dir
= (*def
->insn () < *use_insn
) ? "down" : "up";
1352 pp_string (&pp
, "[");
1353 pp_access (&pp
, use
, 0);
1354 pp_string (&pp
, "]");
1355 pp_string (&pp
, " due to wb def ");
1356 pp_string (&pp
, "[");
1357 pp_access (&pp
, def
, 0);
1358 pp_string (&pp
, "]");
1360 " i%d: fix up debug use %s re-ordered %s, "
1361 "sub r%u -> r%u + ",
1362 use_insn
->uid (), pp_formatted_text (&pp
),
1363 dir
, REGNO (base
), REGNO (base
));
1364 print_dec (wb_offset
, dump_file
);
1365 fprintf (dump_file
, "\n");
1368 insn_propagation
prop (use_rtl
, base
,
1369 plus_constant (GET_MODE (base
), base
, wb_offset
));
1370 if (prop
.apply_to_pattern (&INSN_VAR_LOCATION_LOC (use_rtl
)))
1371 crtl
->ssa
->change_insn (change
);
1375 fprintf (dump_file
, " i%d: RTL substitution failed (%s)"
1376 ", resetting debug insn", use_insn
->uid (),
1377 prop
.failure_reason
);
1378 reset_debug_use (use
);
1386 pp_string (&pp
, "[");
1387 pp_access (&pp
, use
, 0);
1388 pp_string (&pp
, "] due to re-ordered load def [");
1389 pp_access (&pp
, def
, 0);
1390 pp_string (&pp
, "]");
1391 fprintf (dump_file
, " i%d: resetting debug use %s\n",
1392 use_insn
->uid (), pp_formatted_text (&pp
));
1394 reset_debug_use (use
);
1398 // Update debug uses when folding in a trailing add insn to form a
1401 // ATTEMPT is used to allocate RTL-SSA temporaries for the changes,
1402 // the final pair is placed immediately after PAIR_DST, TRAILING_ADD
1403 // is a trailing add insn which is being folded into the pair to make it
1404 // use writeback addressing, and WRITEBACK_EFFECT is the pattern for
1407 fixup_debug_uses_trailing_add (obstack_watermark
&attempt
,
1408 insn_info
*pair_dst
,
1409 insn_info
*trailing_add
,
1410 rtx writeback_effect
)
1412 rtx base
= SET_DEST (writeback_effect
);
1414 poly_int64 wb_offset
;
1415 rtx base2
= strip_offset (SET_SRC (writeback_effect
), &wb_offset
);
1416 gcc_checking_assert (rtx_equal_p (base
, base2
));
1418 auto defs
= trailing_add
->defs ();
1419 gcc_checking_assert (defs
.size () == 1);
1420 def_info
*def
= defs
[0];
1422 if (auto set
= safe_dyn_cast
<set_info
*> (def
->prev_def ()))
1423 for (auto use
: iterate_safely (set
->debug_insn_uses ()))
1424 if (*use
->insn () > *pair_dst
)
1425 // DEF is getting re-ordered above USE, fix up USE accordingly.
1426 fixup_debug_use (attempt
, use
, def
, base
, wb_offset
);
1429 // Called from fuse_pair, fixes up any debug uses that will be affected
1432 // ATTEMPT is the obstack watermark used to allocate RTL-SSA temporaries for
1433 // the changes, INSNS gives the candidate insns: at this point the use/def
1434 // information should still be as on entry to fuse_pair, but the patterns may
1435 // have changed, hence we pass ORIG_RTL which contains the original patterns
1436 // for the candidate insns.
1438 // The final pair will be placed immediately after PAIR_DST, LOAD_P is true if
1439 // it is a load pair, bit I of WRITEBACK is set if INSNS[I] originally had
1440 // writeback, and WRITEBACK_EFFECT is an rtx describing the overall update to
1441 // the base register in the final pair (if any). BASE_REGNO gives the register
1442 // number of the base register used in the final pair.
1444 fixup_debug_uses (obstack_watermark
&attempt
,
1445 insn_info
*insns
[2],
1447 insn_info
*pair_dst
,
1448 insn_info
*trailing_add
,
1451 rtx writeback_effect
,
1452 unsigned base_regno
)
1454 // USE is a debug use that needs updating because DEF (a def of the
1455 // resource) is being re-ordered over it. If WRITEBACK_PAT is non-NULL,
1456 // then it gives the original RTL pattern for DEF's insn, and DEF is a
1457 // writeback update of the base register.
1459 // This simply unpacks WRITEBACK_PAT if needed and calls fixup_debug_use.
1460 auto update_debug_use
= [&](use_info
*use
, def_info
*def
,
1463 poly_int64 offset
= 0;
1464 rtx base
= NULL_RTX
;
1467 rtx mem
= XEXP (writeback_pat
, load_p
);
1468 gcc_checking_assert (GET_RTX_CLASS (GET_CODE (XEXP (mem
, 0)))
1471 base
= pair_mem_strip_offset (mem
, &offset
);
1472 gcc_checking_assert (REG_P (base
) && REGNO (base
) == base_regno
);
1474 fixup_debug_use (attempt
, use
, def
, base
, offset
);
1477 // Reset any debug uses of mem over which we re-ordered a store.
1479 // It would be nice to try and preserve debug info here, but it seems that
1480 // would require doing alias analysis to see if the store aliases with the
1481 // debug use, which seems a little extravagant just to preserve debug info.
1484 auto def
= memory_access (insns
[0]->defs ());
1485 auto last_def
= memory_access (insns
[1]->defs ());
1486 for (; def
!= last_def
; def
= def
->next_def ())
1488 auto set
= as_a
<set_info
*> (def
);
1489 for (auto use
: iterate_safely (set
->debug_insn_uses ()))
1492 fprintf (dump_file
, " i%d: resetting debug use of mem\n",
1493 use
->insn ()->uid ());
1494 reset_debug_use (use
);
1499 // Now let's take care of register uses, starting with debug uses
1500 // attached to defs from our first insn.
1501 for (auto def
: insns
[0]->defs ())
1503 auto set
= dyn_cast
<set_info
*> (def
);
1504 if (!set
|| set
->is_mem () || !set
->first_debug_insn_use ())
1507 def_info
*defs
[2] = {
1509 find_access (insns
[1]->defs (), def
->regno ())
1512 rtx writeback_pats
[2] = {};
1513 if (def
->regno () == base_regno
)
1514 for (int i
= 0; i
< 2; i
++)
1515 if (writeback
& (1 << i
))
1517 gcc_checking_assert (defs
[i
]);
1518 writeback_pats
[i
] = orig_rtl
[i
];
1521 // Now that we've characterized the defs involved, go through the
1522 // debug uses and determine how to update them (if needed).
1523 for (auto use
: iterate_safely (set
->debug_insn_uses ()))
1525 if (*pair_dst
< *use
->insn () && defs
[1])
1526 // We're re-ordering defs[1] above a previous use of the
1528 update_debug_use (use
, defs
[1], writeback_pats
[1]);
1529 else if (*pair_dst
>= *use
->insn ())
1530 // We're re-ordering defs[0] below its use.
1531 update_debug_use (use
, defs
[0], writeback_pats
[0]);
1535 // Now let's look at registers which are def'd by the second insn
1536 // but not by the first insn, there may still be debug uses of a
1537 // previous def which can be affected by moving the second insn up.
1538 for (auto def
: insns
[1]->defs ())
1540 // This should be M log N where N is the number of defs in
1541 // insns[0] and M is the number of defs in insns[1].
1542 if (def
->is_mem () || find_access (insns
[0]->defs (), def
->regno ()))
1545 auto prev_set
= safe_dyn_cast
<set_info
*> (def
->prev_def ());
1549 rtx writeback_pat
= NULL_RTX
;
1550 if (def
->regno () == base_regno
&& (writeback
& 2))
1551 writeback_pat
= orig_rtl
[1];
1553 // We have a def in insns[1] which isn't def'd by the first insn.
1554 // Look to the previous def and see if it has any debug uses.
1555 for (auto use
: iterate_safely (prev_set
->debug_insn_uses ()))
1556 if (*pair_dst
< *use
->insn ())
1557 // We're ordering DEF above a previous use of the same register.
1558 update_debug_use (use
, def
, writeback_pat
);
1561 if ((writeback
& 2) && !writeback_effect
)
1563 // If the second insn initially had writeback but the final
1564 // pair does not, then there may be trailing debug uses of the
1565 // second writeback def which need re-parenting: do that.
1566 auto def
= find_access (insns
[1]->defs (), base_regno
);
1568 auto set
= as_a
<set_info
*> (def
);
1569 for (auto use
: iterate_safely (set
->debug_insn_uses ()))
1571 insn_change
change (use
->insn ());
1572 change
.new_uses
= check_remove_regno_access (attempt
,
1575 auto new_use
= find_access (insns
[0]->uses (), base_regno
);
1577 // N.B. insns must have already shared a common base due to writeback.
1578 gcc_assert (new_use
);
1582 " i%d: cancelling wb, re-parenting trailing debug use\n",
1583 use
->insn ()->uid ());
1585 change
.new_uses
= insert_access (attempt
, new_use
, change
.new_uses
);
1586 crtl
->ssa
->change_insn (change
);
1589 else if (trailing_add
)
1590 fixup_debug_uses_trailing_add (attempt
, pair_dst
, trailing_add
,
1594 // Try and actually fuse the pair given by insns I1 and I2.
1596 // Here we've done enough analysis to know this is safe, we only
1597 // reject the pair at this stage if either the tuning policy says to,
1598 // or recog fails on the final pair insn.
1600 // LOAD_P is true for loads, ACCESS_SIZE gives the access size of each
1601 // candidate insn. Bit i of WRITEBACK is set if the ith insn (in program
1602 // order) uses writeback.
1604 // BASE gives the chosen base candidate for the pair and MOVE_RANGE is
1605 // a singleton range which says where to place the pair.
1607 pair_fusion_bb_info::fuse_pair (bool load_p
,
1608 unsigned access_size
,
1610 insn_info
*i1
, insn_info
*i2
,
1612 const insn_range_info
&move_range
)
1614 auto attempt
= crtl
->ssa
->new_change_attempt ();
1616 auto make_change
= [&attempt
](insn_info
*insn
)
1618 return crtl
->ssa
->change_alloc
<insn_change
> (attempt
, insn
);
1620 auto make_delete
= [&attempt
](insn_info
*insn
)
1622 return crtl
->ssa
->change_alloc
<insn_change
> (attempt
,
1624 insn_change::DELETE
);
1627 insn_info
*first
= (*i1
< *i2
) ? i1
: i2
;
1628 insn_info
*second
= (first
== i1
) ? i2
: i1
;
1630 insn_info
*pair_dst
= move_range
.singleton ();
1631 gcc_assert (pair_dst
);
1633 insn_info
*insns
[2] = { first
, second
};
1635 auto_vec
<insn_change
*> changes
;
1636 auto_vec
<int, 2> tombstone_uids (2);
1639 PATTERN (first
->rtl ()),
1640 PATTERN (second
->rtl ())
1643 // Make copies of the patterns as we might need to refer to the original RTL
1644 // later, for example when updating debug uses (which is after we've updated
1645 // one or both of the patterns in the candidate insns).
1647 for (int i
= 0; i
< 2; i
++)
1648 orig_rtl
[i
] = copy_rtx (pats
[i
]);
1650 use_array input_uses
[2] = { first
->uses (), second
->uses () };
1651 def_array input_defs
[2] = { first
->defs (), second
->defs () };
1653 int changed_insn
= -1;
1654 if (base
.from_insn
!= -1)
1656 // If we're not already using a shared base, we need
1657 // to re-write one of the accesses to use the base from
1659 gcc_checking_assert (base
.from_insn
== 0 || base
.from_insn
== 1);
1660 changed_insn
= !base
.from_insn
;
1662 rtx base_pat
= pats
[base
.from_insn
];
1663 rtx change_pat
= pats
[changed_insn
];
1664 rtx base_mem
= XEXP (base_pat
, load_p
);
1665 rtx change_mem
= XEXP (change_pat
, load_p
);
1667 const bool lower_base_p
= (insns
[base
.from_insn
] == i1
);
1668 HOST_WIDE_INT adjust_amt
= access_size
;
1672 rtx change_reg
= XEXP (change_pat
, !load_p
);
1673 rtx effective_base
= drop_writeback (base_mem
);
1674 rtx adjusted_addr
= plus_constant (Pmode
,
1675 XEXP (effective_base
, 0),
1677 rtx new_mem
= replace_equiv_address_nv (change_mem
, adjusted_addr
);
1678 rtx new_set
= load_p
1679 ? gen_rtx_SET (change_reg
, new_mem
)
1680 : gen_rtx_SET (new_mem
, change_reg
);
1682 pats
[changed_insn
] = new_set
;
1684 auto keep_use
= [&](use_info
*u
)
1686 return refers_to_regno_p (u
->regno (), u
->regno () + 1,
1687 change_pat
, &XEXP (change_pat
, load_p
));
1690 // Drop any uses that only occur in the old address.
1691 input_uses
[changed_insn
] = filter_accesses (attempt
,
1692 input_uses
[changed_insn
],
1696 rtx writeback_effect
= NULL_RTX
;
1698 writeback_effect
= extract_writebacks (load_p
, pats
, changed_insn
);
1700 const auto base_regno
= base
.def
->regno ();
1702 if (base
.from_insn
== -1 && (writeback
& 1))
1704 // If the first of the candidate insns had a writeback form, we'll need to
1705 // drop the use of the updated base register from the second insn's uses.
1707 // N.B. we needn't worry about the base register occurring as a store
1708 // operand, as we checked that there was no non-address true dependence
1709 // between the insns in try_fuse_pair.
1710 gcc_checking_assert (find_access (input_uses
[1], base_regno
));
1711 input_uses
[1] = check_remove_regno_access (attempt
,
1716 // Go through and drop uses that only occur in register notes,
1717 // as we won't be preserving those.
1718 for (int i
= 0; i
< 2; i
++)
1720 auto rti
= insns
[i
]->rtl ();
1721 if (!REG_NOTES (rti
))
1724 input_uses
[i
] = remove_note_accesses (attempt
, input_uses
[i
]);
1727 // Edge case: if the first insn is a writeback load and the
1728 // second insn is a non-writeback load which transfers into the base
1729 // register, then we should drop the writeback altogether as the
1730 // update of the base register from the second load should prevail.
1739 && find_access (input_defs
[1], base_regno
))
1743 " load pair: i%d has wb but subsequent i%d has non-wb "
1744 "update of base (r%d), dropping wb\n",
1745 insns
[0]->uid (), insns
[1]->uid (), base_regno
);
1746 gcc_assert (writeback_effect
);
1747 writeback_effect
= NULL_RTX
;
1750 // So far the patterns have been in instruction order,
1751 // now we want them in offset order.
1753 std::swap (pats
[0], pats
[1]);
1755 poly_int64 offsets
[2];
1756 for (int i
= 0; i
< 2; i
++)
1758 rtx mem
= XEXP (pats
[i
], load_p
);
1759 gcc_checking_assert (MEM_P (mem
));
1760 rtx base
= strip_offset (XEXP (mem
, 0), offsets
+ i
);
1761 gcc_checking_assert (REG_P (base
));
1762 gcc_checking_assert (base_regno
== REGNO (base
));
1765 // If either of the original insns had writeback, but the resulting pair insn
1766 // does not (can happen e.g. in the load pair edge case above, or if the
1767 // writeback effects cancel out), then drop the def (s) of the base register
1770 // Also drop the first def in the case that both of the original insns had
1771 // writeback. The second def could well have uses, but the first def should
1772 // only be used by the second insn (and we dropped that use above).
1773 for (int i
= 0; i
< 2; i
++)
1774 if ((!writeback_effect
&& (writeback
& (1 << i
)))
1775 || (i
== 0 && writeback
== 3))
1776 input_defs
[i
] = check_remove_regno_access (attempt
,
1780 // If we don't currently have a writeback pair, and we don't have
1781 // a load that clobbers the base register, look for a trailing destructive
1782 // update of the base register and try and fold it in to make this into a
1784 insn_info
*trailing_add
= nullptr;
1785 if (m_pass
->should_handle_writeback (writeback_type::ALL
)
1786 && !writeback_effect
1787 && (!load_p
|| (!refers_to_regno_p (base_regno
, base_regno
+ 1,
1788 XEXP (pats
[0], 0), nullptr)
1789 && !refers_to_regno_p (base_regno
, base_regno
+ 1,
1790 XEXP (pats
[1], 0), nullptr))))
1793 trailing_add
= m_pass
->find_trailing_add (insns
, move_range
, writeback
,
1795 &add_def
, base
.def
, offsets
[0],
1799 // The def of the base register from the trailing add should prevail.
1800 input_defs
[0] = insert_access (attempt
, add_def
, input_defs
[0]);
1801 gcc_assert (input_defs
[0].is_valid ());
1805 // Now that we know what base mem we're going to use, check if it's OK
1806 // with the pair mem policy.
1807 rtx first_mem
= XEXP (pats
[0], load_p
);
1808 if (!m_pass
->pair_mem_ok_with_policy (first_mem
, load_p
))
1812 "punting on pair (%d,%d), pair mem policy says no\n",
1813 i1
->uid (), i2
->uid ());
1817 rtx reg_notes
= combine_reg_notes (first
, second
, load_p
);
1819 rtx pair_pat
= m_pass
->gen_pair (pats
, writeback_effect
, load_p
);
1820 insn_change
*pair_change
= nullptr;
1821 auto set_pair_pat
= [pair_pat
,reg_notes
](insn_change
*change
) {
1822 rtx_insn
*rti
= change
->insn ()->rtl ();
1823 validate_unshare_change (rti
, &PATTERN (rti
), pair_pat
, true);
1824 validate_change (rti
, ®_NOTES (rti
), reg_notes
, true);
1829 changes
.safe_push (make_delete (first
));
1830 pair_change
= make_change (second
);
1831 changes
.safe_push (pair_change
);
1833 pair_change
->move_range
= move_range
;
1834 pair_change
->new_defs
= merge_access_arrays (attempt
,
1837 gcc_assert (pair_change
->new_defs
.is_valid ());
1839 pair_change
->new_uses
1840 = merge_access_arrays (attempt
,
1841 drop_memory_access (input_uses
[0]),
1842 drop_memory_access (input_uses
[1]));
1843 gcc_assert (pair_change
->new_uses
.is_valid ());
1844 set_pair_pat (pair_change
);
1848 using Action
= store_change_builder::action
;
1849 insn_info
*store_to_change
= try_repurpose_store (first
, second
,
1851 store_change_builder
builder (insns
, store_to_change
, pair_dst
);
1852 insn_change
*change
;
1853 set_info
*new_set
= nullptr;
1854 for (; !builder
.done (); builder
.advance ())
1856 auto action
= builder
.get_change ();
1857 change
= (action
.type
== Action::INSERT
)
1858 ? nullptr : make_change (action
.insn
);
1859 switch (action
.type
)
1861 case Action::CHANGE
:
1863 set_pair_pat (change
);
1864 change
->new_uses
= merge_access_arrays (attempt
,
1867 auto d1
= drop_memory_access (input_defs
[0]);
1868 auto d2
= drop_memory_access (input_defs
[1]);
1869 change
->new_defs
= merge_access_arrays (attempt
, d1
, d2
);
1870 gcc_assert (change
->new_defs
.is_valid ());
1871 def_info
*store_def
= memory_access (change
->insn ()->defs ());
1872 change
->new_defs
= insert_access (attempt
,
1875 gcc_assert (change
->new_defs
.is_valid ());
1876 change
->move_range
= move_range
;
1877 pair_change
= change
;
1880 case Action::TOMBSTONE
:
1882 tombstone_uids
.quick_push (change
->insn ()->uid ());
1883 rtx_insn
*rti
= change
->insn ()->rtl ();
1884 validate_change (rti
, &PATTERN (rti
), gen_tombstone (), true);
1885 validate_change (rti
, ®_NOTES (rti
), NULL_RTX
, true);
1886 change
->new_uses
= use_array (nullptr, 0);
1889 case Action::INSERT
:
1893 " stp: cannot re-purpose candidate stores\n");
1895 auto new_insn
= crtl
->ssa
->create_insn (attempt
, INSN
, pair_pat
);
1896 change
= make_change (new_insn
);
1897 change
->move_range
= move_range
;
1898 change
->new_uses
= merge_access_arrays (attempt
,
1901 gcc_assert (change
->new_uses
.is_valid ());
1903 auto d1
= drop_memory_access (input_defs
[0]);
1904 auto d2
= drop_memory_access (input_defs
[1]);
1905 change
->new_defs
= merge_access_arrays (attempt
, d1
, d2
);
1906 gcc_assert (change
->new_defs
.is_valid ());
1908 new_set
= crtl
->ssa
->create_set (attempt
, new_insn
, memory
);
1909 change
->new_defs
= insert_access (attempt
, new_set
,
1911 gcc_assert (change
->new_defs
.is_valid ());
1912 pair_change
= change
;
1915 case Action::FIXUP_USE
:
1917 // This use now needs to consume memory from our stp.
1920 " stp: changing i%d to use mem from new stp "
1922 action
.insn
->uid (), pair_dst
->uid ());
1923 change
->new_uses
= drop_memory_access (change
->new_uses
);
1924 gcc_assert (new_set
);
1925 auto new_use
= crtl
->ssa
->create_use (attempt
, action
.insn
,
1927 change
->new_uses
= insert_access (attempt
, new_use
,
1932 changes
.safe_push (change
);
1937 changes
.safe_push (make_delete (trailing_add
));
1938 else if ((writeback
& 2) && !writeback_effect
)
1940 // The second insn initially had writeback but now the pair does not,
1941 // need to update any nondebug uses of the base register def in the
1942 // second insn. We'll take care of debug uses later.
1943 auto def
= find_access (insns
[1]->defs (), base_regno
);
1945 auto set
= dyn_cast
<set_info
*> (def
);
1946 if (set
&& set
->has_nondebug_uses ())
1948 auto orig_use
= find_access (insns
[0]->uses (), base_regno
);
1949 for (auto use
: set
->nondebug_insn_uses ())
1951 auto change
= make_change (use
->insn ());
1952 change
->new_uses
= check_remove_regno_access (attempt
,
1955 change
->new_uses
= insert_access (attempt
,
1958 changes
.safe_push (change
);
1963 auto ignore
= ignore_changing_insns (changes
);
1964 for (unsigned i
= 0; i
< changes
.length (); i
++)
1965 gcc_assert (rtl_ssa::restrict_movement (*changes
[i
], ignore
));
1967 // Check the pair pattern is recog'd.
1968 if (!rtl_ssa::recog (attempt
, *pair_change
, ignore
))
1971 fprintf (dump_file
, " failed to form pair, recog failed\n");
1973 // Free any reg notes we allocated.
1976 rtx next
= XEXP (reg_notes
, 1);
1977 free_EXPR_LIST_node (reg_notes
);
1984 gcc_assert (crtl
->ssa
->verify_insn_changes (changes
));
1986 // Fix up any debug uses that will be affected by the changes.
1987 if (MAY_HAVE_DEBUG_INSNS
)
1988 fixup_debug_uses (attempt
, insns
, orig_rtl
, pair_dst
, trailing_add
,
1989 load_p
, writeback
, writeback_effect
, base_regno
);
1991 confirm_change_group ();
1992 crtl
->ssa
->change_insns (changes
);
1994 gcc_checking_assert (tombstone_uids
.length () <= 2);
1995 for (auto uid
: tombstone_uids
)
1996 track_tombstone (uid
);
2001 // Return true if STORE_INSN may modify mem rtx MEM. Make sure we keep
2002 // within our BUDGET for alias analysis.
2004 store_modifies_mem_p (rtx mem
, insn_info
*store_insn
, int &budget
)
2011 "exceeded budget, assuming store %d aliases with mem ",
2012 store_insn
->uid ());
2013 print_simple_rtl (dump_file
, mem
);
2014 fprintf (dump_file
, "\n");
2021 return memory_modified_in_insn_p (mem
, store_insn
->rtl ());
2024 // Return true if LOAD may be modified by STORE. Make sure we keep
2025 // within our BUDGET for alias analysis.
2027 load_modified_by_store_p (insn_info
*load
,
2031 gcc_checking_assert (budget
>= 0);
2038 "exceeded budget, assuming load %d aliases with store %d\n",
2039 load
->uid (), store
->uid ());
2044 // It isn't safe to re-order stores over calls.
2045 if (CALL_P (load
->rtl ()))
2050 // Iterate over all MEMs in the load, seeing if any alias with
2052 subrtx_var_iterator::array_type array
;
2053 rtx pat
= PATTERN (load
->rtl ());
2054 FOR_EACH_SUBRTX_VAR (iter
, array
, pat
, NONCONST
)
2055 if (MEM_P (*iter
) && memory_modified_in_insn_p (*iter
, store
->rtl ()))
2061 // Implement some common functionality used by both store_walker
2063 template<bool reverse
>
2064 class def_walker
: public alias_walker
2067 using def_iter_t
= typename
std::conditional
<reverse
,
2068 reverse_def_iterator
, def_iterator
>::type
;
2070 static use_info
*start_use_chain (def_iter_t
&def_iter
)
2072 set_info
*set
= nullptr;
2073 for (; *def_iter
; def_iter
++)
2075 set
= dyn_cast
<set_info
*> (*def_iter
);
2079 use_info
*use
= reverse
2080 ? set
->last_nondebug_insn_use ()
2081 : set
->first_nondebug_insn_use ();
2090 def_iter_t def_iter
;
2092 def_walker (def_info
*def
, insn_info
*limit
) :
2093 def_iter (def
), limit (limit
) {}
2095 virtual bool iter_valid () const { return *def_iter
; }
2098 insn_info
*insn () const override
{ return (*def_iter
)->insn (); }
2099 void advance () override
{ def_iter
++; }
2100 bool valid () const override final
2106 return *(insn ()) > *limit
;
2108 return *(insn ()) < *limit
;
2112 // alias_walker that iterates over stores.
2113 template<bool reverse
, typename InsnPredicate
>
2114 class store_walker
: public def_walker
<reverse
>
2117 InsnPredicate tombstone_p
;
2120 store_walker (def_info
*mem_def
, rtx mem
, insn_info
*limit_insn
,
2121 InsnPredicate tombstone_fn
) :
2122 def_walker
<reverse
> (mem_def
, limit_insn
),
2123 cand_mem (mem
), tombstone_p (tombstone_fn
) {}
2125 bool conflict_p (int &budget
) const override final
2127 if (tombstone_p (this->insn ()))
2130 return store_modifies_mem_p (cand_mem
, this->insn (), budget
);
2134 // alias_walker that iterates over loads.
2135 template<bool reverse
>
2136 class load_walker
: public def_walker
<reverse
>
2138 using Base
= def_walker
<reverse
>;
2139 using use_iter_t
= typename
std::conditional
<reverse
,
2140 reverse_use_iterator
, nondebug_insn_use_iterator
>::type
;
2142 use_iter_t use_iter
;
2143 insn_info
*cand_store
;
2145 bool iter_valid () const override final
{ return *use_iter
; }
2148 void advance () override final
2154 use_iter
= Base::start_use_chain (this->def_iter
);
2157 insn_info
*insn () const override final
2159 return (*use_iter
)->insn ();
2162 bool conflict_p (int &budget
) const override final
2164 return load_modified_by_store_p (insn (), cand_store
, budget
);
2167 load_walker (def_info
*def
, insn_info
*store
, insn_info
*limit_insn
)
2168 : Base (def
, limit_insn
),
2169 use_iter (Base::start_use_chain (this->def_iter
)),
2170 cand_store (store
) {}
2173 // Process our alias_walkers in a round-robin fashion, proceeding until
2174 // nothing more can be learned from alias analysis.
2176 // We try to maintain the invariant that if a walker becomes invalid, we
2177 // set its pointer to null.
2179 pair_fusion::do_alias_analysis (insn_info
*alias_hazards
[4],
2180 alias_walker
*walkers
[4],
2183 const int n_walkers
= 2 + (2 * !load_p
);
2184 int budget
= pair_mem_alias_check_limit ();
2186 auto next_walker
= [walkers
,n_walkers
](int current
) -> int {
2187 for (int j
= 1; j
<= n_walkers
; j
++)
2189 int idx
= (current
+ j
) % n_walkers
;
2197 for (int j
= 0; j
< n_walkers
; j
++)
2199 alias_hazards
[j
] = nullptr;
2203 if (!walkers
[j
]->valid ())
2204 walkers
[j
] = nullptr;
2212 int paired_i
= (i
& 2) + !insn_i
;
2213 int pair_fst
= (i
& 2);
2214 int pair_snd
= (i
& 2) + 1;
2216 if (walkers
[i
]->conflict_p (budget
))
2218 alias_hazards
[i
] = walkers
[i
]->insn ();
2220 // We got an aliasing conflict for this {load,store} walker,
2221 // so we don't need to walk any further.
2222 walkers
[i
] = nullptr;
2224 // If we have a pair of alias conflicts that prevent
2225 // forming the pair, stop. There's no need to do further
2227 if (alias_hazards
[paired_i
]
2228 && (*alias_hazards
[pair_fst
] <= *alias_hazards
[pair_snd
]))
2233 int other_pair_fst
= (pair_fst
? 0 : 2);
2234 int other_paired_i
= other_pair_fst
+ !insn_i
;
2236 int x_pair_fst
= (i
== pair_fst
) ? i
: other_paired_i
;
2237 int x_pair_snd
= (i
== pair_fst
) ? other_paired_i
: i
;
2239 // Similarly, handle the case where we have a {load,store}
2240 // or {store,load} alias hazard pair that prevents forming
2242 if (alias_hazards
[other_paired_i
]
2243 && *alias_hazards
[x_pair_fst
] <= *alias_hazards
[x_pair_snd
])
2250 walkers
[i
]->advance ();
2252 if (!walkers
[i
]->valid ())
2253 walkers
[i
] = nullptr;
2256 i
= next_walker (i
);
2260 // Given INSNS (in program order) which are known to be adjacent, look
2261 // to see if either insn has a suitable RTL (register) base that we can
2262 // use to form a pair. Push these to BASE_CANDS if we find any. CAND_MEMs
2263 // gives the relevant mems from the candidate insns, ACCESS_SIZE gives the
2264 // size of a single candidate access, and REVERSED says whether the accesses
2265 // are inverted in offset order.
2267 // Returns an integer where bit (1 << i) is set if INSNS[i] uses writeback
2270 pair_fusion::get_viable_bases (insn_info
*insns
[2],
2271 vec
<base_cand
> &base_cands
,
2273 unsigned access_size
,
2276 // We discovered this pair through a common base. Need to ensure that
2277 // we have a common base register that is live at both locations.
2278 def_info
*base_defs
[2] = {};
2280 for (int i
= 0; i
< 2; i
++)
2282 const bool is_lower
= (i
== reversed
);
2283 poly_int64 poly_off
;
2284 rtx base
= pair_mem_strip_offset (cand_mems
[i
], &poly_off
);
2285 if (GET_RTX_CLASS (GET_CODE (XEXP (cand_mems
[i
], 0))) == RTX_AUTOINC
)
2286 writeback
|= (1 << i
);
2288 if (!REG_P (base
) || !poly_off
.is_constant ())
2291 // Punt on accesses relative to eliminable regs. See the comment in
2292 // pair_fusion_bb_info::track_access for a detailed explanation of this.
2293 if (!reload_completed
2294 && (REGNO (base
) == FRAME_POINTER_REGNUM
2295 || REGNO (base
) == ARG_POINTER_REGNUM
))
2298 HOST_WIDE_INT base_off
= poly_off
.to_constant ();
2300 // It should be unlikely that we ever punt here, since MEM_EXPR offset
2301 // alignment should be a good proxy for register offset alignment.
2302 if (base_off
% access_size
!= 0)
2306 "base not viable, offset misaligned (insn %d)\n",
2311 base_off
/= access_size
;
2316 if (!pair_mem_in_range_p (base_off
))
2319 use_info
*use
= find_access (insns
[i
]->uses (), REGNO (base
));
2321 base_defs
[i
] = use
->def ();
2324 if (!base_defs
[0] && !base_defs
[1])
2327 fprintf (dump_file
, "no viable base register for pair (%d,%d)\n",
2328 insns
[0]->uid (), insns
[1]->uid ());
2332 for (int i
= 0; i
< 2; i
++)
2333 if ((writeback
& (1 << i
)) && !base_defs
[i
])
2336 fprintf (dump_file
, "insn %d has writeback but base isn't viable\n",
2342 && base_defs
[0]->regno () != base_defs
[1]->regno ())
2346 "pair (%d,%d): double writeback with distinct regs (%d,%d): "
2348 insns
[0]->uid (), insns
[1]->uid (),
2349 base_defs
[0]->regno (), base_defs
[1]->regno ());
2353 if (base_defs
[0] && base_defs
[1]
2354 && base_defs
[0]->regno () == base_defs
[1]->regno ())
2356 // Easy case: insns already share the same base reg.
2357 base_cands
.quick_push (base_defs
[0]);
2361 // Otherwise, we know that one of the bases must change.
2363 // Note that if there is writeback we must use the writeback base
2364 // (we know now there is exactly one).
2365 for (int i
= 0; i
< 2; i
++)
2366 if (base_defs
[i
] && (!writeback
|| (writeback
& (1 << i
))))
2367 base_cands
.quick_push (base_cand
{ base_defs
[i
], i
});
2372 // Given two adjacent memory accesses of the same size, I1 and I2, try
2373 // and see if we can merge them into a paired access.
2375 // ACCESS_SIZE gives the (common) size of a single access, LOAD_P is true
2376 // if the accesses are both loads, otherwise they are both stores.
2378 pair_fusion_bb_info::try_fuse_pair (bool load_p
, unsigned access_size
,
2379 insn_info
*i1
, insn_info
*i2
)
2382 fprintf (dump_file
, "analyzing pair (load=%d): (%d,%d)\n",
2383 load_p
, i1
->uid (), i2
->uid ());
2385 insn_info
*insns
[2];
2386 bool reversed
= false;
2402 for (int i
= 0; i
< 2; i
++)
2404 pats
[i
] = PATTERN (insns
[i
]->rtl ());
2405 cand_mems
[i
] = XEXP (pats
[i
], load_p
);
2406 reg_ops
[i
] = XEXP (pats
[i
], !load_p
);
2409 if (load_p
&& reg_overlap_mentioned_p (reg_ops
[0], reg_ops
[1]))
2413 "punting on load pair due to reg conflcits (%d,%d)\n",
2414 insns
[0]->uid (), insns
[1]->uid ());
2418 if (cfun
->can_throw_non_call_exceptions
2419 && find_reg_note (insns
[0]->rtl (), REG_EH_REGION
, NULL_RTX
)
2420 && find_reg_note (insns
[1]->rtl (), REG_EH_REGION
, NULL_RTX
))
2424 "can't combine insns with EH side effects (%d,%d)\n",
2425 insns
[0]->uid (), insns
[1]->uid ());
2429 auto_vec
<base_cand
, 2> base_cands (2);
2431 int writeback
= m_pass
->get_viable_bases (insns
, base_cands
, cand_mems
,
2432 access_size
, reversed
);
2433 if (base_cands
.is_empty ())
2436 fprintf (dump_file
, "no viable base for pair (%d,%d)\n",
2437 insns
[0]->uid (), insns
[1]->uid ());
2441 // Punt on frame-related insns with writeback. We probably won't see
2442 // these in practice, but this is conservative and ensures we don't
2443 // have to worry about these later on.
2444 if (writeback
&& (RTX_FRAME_RELATED_P (i1
->rtl ())
2445 || RTX_FRAME_RELATED_P (i2
->rtl ())))
2449 "rejecting pair (%d,%d): frame-related insn with writeback\n",
2450 i1
->uid (), i2
->uid ());
2454 rtx
*ignore
= &XEXP (pats
[1], load_p
);
2455 for (auto use
: insns
[1]->uses ())
2457 && refers_to_regno_p (use
->regno (), use
->regno () + 1, pats
[1], ignore
)
2458 && use
->def () && use
->def ()->insn () == insns
[0])
2460 // N.B. we allow a true dependence on the base address, as this
2461 // happens in the case of auto-inc accesses. Consider a post-increment
2462 // load followed by a regular indexed load, for example.
2465 "%d has non-address true dependence on %d, rejecting pair\n",
2466 insns
[1]->uid (), insns
[0]->uid ());
2471 while (i
< base_cands
.length ())
2473 base_cand
&cand
= base_cands
[i
];
2475 rtx
*ignore
[2] = {};
2476 for (int j
= 0; j
< 2; j
++)
2477 if (cand
.from_insn
== !j
)
2478 ignore
[j
] = &XEXP (cand_mems
[j
], 0);
2480 insn_info
*h
= first_hazard_after (insns
[0], ignore
[0]);
2481 if (h
&& *h
< *insns
[1])
2482 cand
.hazards
[0] = h
;
2484 h
= latest_hazard_before (insns
[1], ignore
[1]);
2485 if (h
&& *h
> *insns
[0])
2486 cand
.hazards
[1] = h
;
2488 if (!cand
.viable ())
2492 "pair (%d,%d): rejecting base %d due to dataflow "
2493 "hazards (%d,%d)\n",
2497 cand
.hazards
[0]->uid (),
2498 cand
.hazards
[1]->uid ());
2500 base_cands
.ordered_remove (i
);
2506 if (base_cands
.is_empty ())
2510 "can't form pair (%d,%d) due to dataflow hazards\n",
2511 insns
[0]->uid (), insns
[1]->uid ());
2515 insn_info
*alias_hazards
[4] = {};
2517 // First def of memory after the first insn, and last def of memory
2518 // before the second insn, respectively.
2519 def_info
*mem_defs
[2] = {};
2522 if (!MEM_READONLY_P (cand_mems
[0]))
2524 mem_defs
[0] = memory_access (insns
[0]->uses ())->def ();
2525 gcc_checking_assert (mem_defs
[0]);
2526 mem_defs
[0] = mem_defs
[0]->next_def ();
2528 if (!MEM_READONLY_P (cand_mems
[1]))
2530 mem_defs
[1] = memory_access (insns
[1]->uses ())->def ();
2531 gcc_checking_assert (mem_defs
[1]);
2536 mem_defs
[0] = memory_access (insns
[0]->defs ())->next_def ();
2537 mem_defs
[1] = memory_access (insns
[1]->defs ())->prev_def ();
2538 gcc_checking_assert (mem_defs
[0]);
2539 gcc_checking_assert (mem_defs
[1]);
2542 auto tombstone_p
= [&](insn_info
*insn
) -> bool {
2543 return m_emitted_tombstone
2544 && bitmap_bit_p (&m_tombstone_bitmap
, insn
->uid ());
2547 store_walker
<false, decltype(tombstone_p
)>
2548 forward_store_walker (mem_defs
[0], cand_mems
[0], insns
[1], tombstone_p
);
2550 store_walker
<true, decltype(tombstone_p
)>
2551 backward_store_walker (mem_defs
[1], cand_mems
[1], insns
[0], tombstone_p
);
2553 alias_walker
*walkers
[4] = {};
2555 walkers
[0] = &forward_store_walker
;
2557 walkers
[1] = &backward_store_walker
;
2559 if (load_p
&& (mem_defs
[0] || mem_defs
[1]))
2560 m_pass
->do_alias_analysis (alias_hazards
, walkers
, load_p
);
2563 // We want to find any loads hanging off the first store.
2564 mem_defs
[0] = memory_access (insns
[0]->defs ());
2565 load_walker
<false> forward_load_walker (mem_defs
[0], insns
[0], insns
[1]);
2566 load_walker
<true> backward_load_walker (mem_defs
[1], insns
[1], insns
[0]);
2567 walkers
[2] = &forward_load_walker
;
2568 walkers
[3] = &backward_load_walker
;
2569 m_pass
->do_alias_analysis (alias_hazards
, walkers
, load_p
);
2570 // Now consolidate hazards back down.
2571 if (alias_hazards
[2]
2572 && (!alias_hazards
[0] || (*alias_hazards
[2] < *alias_hazards
[0])))
2573 alias_hazards
[0] = alias_hazards
[2];
2575 if (alias_hazards
[3]
2576 && (!alias_hazards
[1] || (*alias_hazards
[3] > *alias_hazards
[1])))
2577 alias_hazards
[1] = alias_hazards
[3];
2580 if (alias_hazards
[0] && alias_hazards
[1]
2581 && *alias_hazards
[0] <= *alias_hazards
[1])
2585 "cannot form pair (%d,%d) due to alias conflicts (%d,%d)\n",
2586 i1
->uid (), i2
->uid (),
2587 alias_hazards
[0]->uid (), alias_hazards
[1]->uid ());
2591 // Now narrow the hazards on each base candidate using
2592 // the alias hazards.
2594 while (i
< base_cands
.length ())
2596 base_cand
&cand
= base_cands
[i
];
2597 if (alias_hazards
[0] && (!cand
.hazards
[0]
2598 || *alias_hazards
[0] < *cand
.hazards
[0]))
2599 cand
.hazards
[0] = alias_hazards
[0];
2600 if (alias_hazards
[1] && (!cand
.hazards
[1]
2601 || *alias_hazards
[1] > *cand
.hazards
[1]))
2602 cand
.hazards
[1] = alias_hazards
[1];
2609 fprintf (dump_file
, "pair (%d,%d): rejecting base %d due to "
2610 "alias/dataflow hazards (%d,%d)",
2611 insns
[0]->uid (), insns
[1]->uid (),
2613 cand
.hazards
[0]->uid (),
2614 cand
.hazards
[1]->uid ());
2616 base_cands
.ordered_remove (i
);
2620 if (base_cands
.is_empty ())
2624 "cannot form pair (%d,%d) due to alias/dataflow hazards",
2625 insns
[0]->uid (), insns
[1]->uid ());
2630 base_cand
*base
= &base_cands
[0];
2631 if (base_cands
.length () > 1)
2633 // If there are still multiple viable bases, it makes sense
2634 // to choose one that allows us to reduce register pressure,
2635 // for loads this means moving further down, for stores this
2636 // means moving further up.
2637 gcc_checking_assert (base_cands
.length () == 2);
2638 const int hazard_i
= !load_p
;
2639 if (base
->hazards
[hazard_i
])
2641 if (!base_cands
[1].hazards
[hazard_i
])
2642 base
= &base_cands
[1];
2644 && *base_cands
[1].hazards
[hazard_i
]
2645 > *(base
->hazards
[hazard_i
]))
2646 base
= &base_cands
[1];
2648 && *base_cands
[1].hazards
[hazard_i
]
2649 < *(base
->hazards
[hazard_i
]))
2650 base
= &base_cands
[1];
2654 // Otherwise, hazards[0] > hazards[1].
2655 // Pair can be formed anywhere in (hazards[1], hazards[0]).
2656 insn_range_info
range (insns
[0], insns
[1]);
2657 if (base
->hazards
[1])
2658 range
.first
= base
->hazards
[1];
2659 if (base
->hazards
[0])
2660 range
.last
= base
->hazards
[0]->prev_nondebug_insn ();
2662 // If the second insn can throw, narrow the move range to exactly that insn.
2663 // This prevents us trying to move the second insn from the end of the BB.
2664 if (cfun
->can_throw_non_call_exceptions
2665 && find_reg_note (insns
[1]->rtl (), REG_EH_REGION
, NULL_RTX
))
2667 gcc_assert (range
.includes (insns
[1]));
2668 range
= insn_range_info (insns
[1]);
2671 // Placement strategy: push loads down and pull stores up, this should
2672 // help register pressure by reducing live ranges.
2674 range
.first
= range
.last
;
2676 range
.last
= range
.first
;
2680 auto print_hazard
= [](insn_info
*i
)
2683 fprintf (dump_file
, "%d", i
->uid ());
2685 fprintf (dump_file
, "-");
2687 auto print_pair
= [print_hazard
](insn_info
**i
)
2689 print_hazard (i
[0]);
2690 fprintf (dump_file
, ",");
2691 print_hazard (i
[1]);
2694 fprintf (dump_file
, "fusing pair [L=%d] (%d,%d), base=%d, hazards: (",
2695 load_p
, insns
[0]->uid (), insns
[1]->uid (),
2696 base
->def
->regno ());
2697 print_pair (base
->hazards
);
2698 fprintf (dump_file
, "), move_range: (%d,%d)\n",
2699 range
.first
->uid (), range
.last
->uid ());
2702 return fuse_pair (load_p
, access_size
, writeback
,
2703 i1
, i2
, *base
, range
);
2707 dump_insn_list (FILE *f
, const insn_list_t
&l
)
2711 auto i
= l
.begin ();
2712 auto end
= l
.end ();
2715 fprintf (f
, "%d", (*i
)->uid ());
2718 for (; i
!= end
; i
++)
2719 fprintf (f
, ", %d", (*i
)->uid ());
2725 debug (const insn_list_t
&l
)
2727 dump_insn_list (stderr
, l
);
2728 fprintf (stderr
, "\n");
2731 // LEFT_LIST and RIGHT_LIST are lists of candidate instructions where all insns
2732 // in LEFT_LIST are known to be adjacent to those in RIGHT_LIST.
2734 // This function traverses the resulting 2D matrix of possible pair candidates
2735 // and attempts to merge them into pairs.
2737 // The algorithm is straightforward: if we consider a combined list of
2738 // candidates X obtained by merging LEFT_LIST and RIGHT_LIST in program order,
2739 // then we advance through X until we reach a crossing point (where X[i] and
2740 // X[i+1] come from different source lists).
2742 // At this point we know X[i] and X[i+1] are adjacent accesses, and we try to
2743 // fuse them into a pair. If this succeeds, we remove X[i] and X[i+1] from
2744 // their original lists and continue as above.
2746 // In the failure case, we advance through the source list containing X[i] and
2747 // continue as above (proceeding to the next crossing point).
2749 // The rationale for skipping over groups of consecutive candidates from the
2750 // same source list is as follows:
2752 // In the store case, the insns in the group can't be re-ordered over each
2753 // other as they are guaranteed to store to the same location, so we're
2754 // guaranteed not to lose opportunities by doing this.
2756 // In the load case, subsequent loads from the same location are either
2757 // redundant (in which case they should have been cleaned up by an earlier
2758 // optimization pass) or there is an intervening aliasing hazard, in which case
2759 // we can't re-order them anyway, so provided earlier passes have cleaned up
2760 // redundant loads, we shouldn't miss opportunities by doing this.
2762 pair_fusion_bb_info::merge_pairs (insn_list_t
&left_list
,
2763 insn_list_t
&right_list
,
2765 unsigned access_size
)
2769 fprintf (dump_file
, "merge_pairs [L=%d], cand vecs ", load_p
);
2770 dump_insn_list (dump_file
, left_list
);
2771 fprintf (dump_file
, " x ");
2772 dump_insn_list (dump_file
, right_list
);
2773 fprintf (dump_file
, "\n");
2776 auto iter_l
= left_list
.begin ();
2777 auto iter_r
= right_list
.begin ();
2779 while (iter_l
!= left_list
.end () && iter_r
!= right_list
.end ())
2781 auto next_l
= std::next (iter_l
);
2782 auto next_r
= std::next (iter_r
);
2783 if (**iter_l
< **iter_r
2784 && next_l
!= left_list
.end ()
2785 && **next_l
< **iter_r
)
2787 else if (**iter_r
< **iter_l
2788 && next_r
!= right_list
.end ()
2789 && **next_r
< **iter_l
)
2791 else if (try_fuse_pair (load_p
, access_size
, *iter_l
, *iter_r
))
2793 left_list
.erase (iter_l
);
2795 right_list
.erase (iter_r
);
2798 else if (**iter_l
< **iter_r
)
2805 // Iterate over the accesses in GROUP, looking for adjacent sets
2806 // of accesses. If we find two sets of adjacent accesses, call
2809 pair_fusion_bb_info::transform_for_base (int encoded_lfs
,
2810 access_group
&group
)
2812 const auto lfs
= decode_lfs (encoded_lfs
);
2813 const unsigned access_size
= lfs
.size
;
2815 bool skip_next
= true;
2816 access_record
*prev_access
= nullptr;
2818 for (auto &access
: group
.list
)
2822 else if (known_eq (access
.offset
, prev_access
->offset
+ access_size
))
2824 merge_pairs (prev_access
->cand_insns
,
2828 skip_next
= access
.cand_insns
.empty ();
2830 prev_access
= &access
;
2834 // If we emitted tombstone insns for this BB, iterate through the BB
2835 // and remove all the tombstone insns, being sure to reparent any uses
2836 // of mem to previous defs when we do this.
2838 pair_fusion_bb_info::cleanup_tombstones ()
2840 // No need to do anything if we didn't emit a tombstone insn for this BB.
2841 if (!m_emitted_tombstone
)
2844 for (auto insn
: iterate_safely (m_bb
->nondebug_insns ()))
2846 if (!insn
->is_real ()
2847 || !bitmap_bit_p (&m_tombstone_bitmap
, insn
->uid ()))
2850 auto set
= as_a
<set_info
*> (memory_access (insn
->defs ()));
2851 if (set
->has_any_uses ())
2853 auto prev_set
= as_a
<set_info
*> (set
->prev_def ());
2854 while (set
->first_use ())
2855 crtl
->ssa
->reparent_use (set
->first_use (), prev_set
);
2858 // Now set has no uses, we can delete it.
2859 insn_change
change (insn
, insn_change::DELETE
);
2860 crtl
->ssa
->change_insn (change
);
2864 template<typename Map
>
2866 pair_fusion_bb_info::traverse_base_map (Map
&map
)
2870 const auto &key
= kv
.first
;
2871 auto &value
= kv
.second
;
2872 transform_for_base (key
.second
, value
);
2877 pair_fusion_bb_info::transform ()
2879 traverse_base_map (expr_map
);
2880 traverse_base_map (def_map
);
2883 // Given an existing pair insn INSN, look for a trailing update of
2884 // the base register which we can fold in to make this pair use
2885 // a writeback addressing mode.
2887 pair_fusion::try_promote_writeback (insn_info
*insn
, bool load_p
)
2891 rtx mem
= destructure_pair (regs
, PATTERN (insn
->rtl ()), load_p
);
2892 gcc_checking_assert (MEM_P (mem
));
2895 rtx base
= strip_offset (XEXP (mem
, 0), &offset
);
2896 gcc_assert (REG_P (base
));
2898 const auto access_size
= GET_MODE_SIZE (GET_MODE (mem
)).to_constant () / 2;
2900 if (find_access (insn
->defs (), REGNO (base
)))
2902 gcc_assert (load_p
);
2905 "ldp %d clobbers base r%d, can't promote to writeback\n",
2906 insn
->uid (), REGNO (base
));
2910 auto base_use
= find_access (insn
->uses (), REGNO (base
));
2911 gcc_assert (base_use
);
2913 if (!base_use
->def ())
2917 "found pair (i%d, L=%d): but base r%d is upwards exposed\n",
2918 insn
->uid (), load_p
, REGNO (base
));
2922 auto base_def
= base_use
->def ();
2924 rtx wb_effect
= NULL_RTX
;
2926 const insn_range_info
pair_range (insn
);
2927 insn_info
*insns
[2] = { nullptr, insn
};
2928 insn_info
*trailing_add
2929 = find_trailing_add (insns
, pair_range
, 0, &wb_effect
,
2930 &add_def
, base_def
, offset
,
2935 auto attempt
= crtl
->ssa
->new_change_attempt ();
2937 insn_change
pair_change (insn
);
2938 insn_change
del_change (trailing_add
, insn_change::DELETE
);
2939 insn_change
*changes
[] = { &pair_change
, &del_change
};
2941 rtx pair_pat
= gen_promote_writeback_pair (wb_effect
, mem
, regs
, load_p
);
2942 validate_unshare_change (insn
->rtl (), &PATTERN (insn
->rtl ()), pair_pat
,
2945 // The pair must gain the def of the base register from the add.
2946 pair_change
.new_defs
= insert_access (attempt
,
2948 pair_change
.new_defs
);
2949 gcc_assert (pair_change
.new_defs
.is_valid ());
2951 auto ignore
= ignore_changing_insns (changes
);
2952 for (unsigned i
= 0; i
< ARRAY_SIZE (changes
); i
++)
2953 gcc_assert (rtl_ssa::restrict_movement (*changes
[i
], ignore
));
2955 if (!rtl_ssa::recog (attempt
, pair_change
, ignore
))
2958 fprintf (dump_file
, "i%d: recog failed on wb pair, bailing out\n",
2964 gcc_assert (crtl
->ssa
->verify_insn_changes (changes
));
2966 if (MAY_HAVE_DEBUG_INSNS
)
2967 fixup_debug_uses_trailing_add (attempt
, insn
, trailing_add
, wb_effect
);
2969 confirm_change_group ();
2970 crtl
->ssa
->change_insns (changes
);
2973 // Main function for the pass. Iterate over the insns in BB looking
2974 // for load/store candidates. If running after RA, also try and promote
2975 // non-writeback pairs to use writeback addressing. Then try to fuse
2976 // candidates into pairs.
2977 void pair_fusion::process_block (bb_info
*bb
)
2979 const bool track_loads
= track_loads_p ();
2980 const bool track_stores
= track_stores_p ();
2982 pair_fusion_bb_info
bb_state (bb
, this);
2984 for (auto insn
: bb
->nondebug_insns ())
2986 rtx_insn
*rti
= insn
->rtl ();
2988 if (!rti
|| !INSN_P (rti
))
2991 rtx pat
= PATTERN (rti
);
2993 if (reload_completed
2994 && should_handle_writeback (writeback_type::ALL
)
2995 && pair_mem_insn_p (rti
, load_p
))
2996 try_promote_writeback (insn
, load_p
);
2998 if (GET_CODE (pat
) != SET
)
3001 if (track_stores
&& MEM_P (XEXP (pat
, 0)))
3002 bb_state
.track_access (insn
, false, XEXP (pat
, 0));
3003 else if (track_loads
&& MEM_P (XEXP (pat
, 1)))
3004 bb_state
.track_access (insn
, true, XEXP (pat
, 1));
3007 bb_state
.transform ();
3008 bb_state
.cleanup_tombstones ();