libcpp, c, middle-end: Optimize initializers using #embed in C
[official-gcc.git] / gcc / pair-fusion.cc
blob653055fdcf67a8744811972c885e0bd2944521d2
1 // Pass to fuse adjacent loads/stores into paired memory accesses.
2 // Copyright (C) 2023-2024 Free Software Foundation, Inc.
3 //
4 // This file is part of GCC.
5 //
6 // GCC is free software; you can redistribute it and/or modify it
7 // under the terms of the GNU General Public License as published by
8 // the Free Software Foundation; either version 3, or (at your option)
9 // any later version.
11 // GCC is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
16 // You should have received a copy of the GNU General Public License
17 // along with GCC; see the file COPYING3. If not see
18 // <http://www.gnu.org/licenses/>.
20 #define INCLUDE_ALGORITHM
21 #define INCLUDE_FUNCTIONAL
22 #define INCLUDE_LIST
23 #define INCLUDE_TYPE_TRAITS
24 #define INCLUDE_ARRAY
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "rtl.h"
30 #include "df.h"
31 #include "rtl-iter.h"
32 #include "rtl-ssa.h"
33 #include "cfgcleanup.h"
34 #include "tree-pass.h"
35 #include "ordered-hash-map.h"
36 #include "tree-dfa.h"
37 #include "fold-const.h"
38 #include "tree-hash-traits.h"
39 #include "print-tree.h"
40 #include "pair-fusion.h"
42 using namespace rtl_ssa;
44 // We pack these fields (load_p, fpsimd_p, and size) into an integer
45 // (LFS) which we use as part of the key into the main hash tables.
47 // The idea is that we group candidates together only if they agree on
48 // the fields below. Candidates that disagree on any of these
49 // properties shouldn't be merged together.
50 struct lfs_fields
52 bool load_p;
53 bool fpsimd_p;
54 unsigned size;
57 using insn_list_t = std::list<insn_info *>;
59 // Information about the accesses at a given offset from a particular
60 // base. Stored in an access_group, see below.
61 struct access_record
63 poly_int64 offset;
64 std::list<insn_info *> cand_insns;
65 std::list<access_record>::iterator place;
67 access_record (poly_int64 off) : offset (off) {}
70 // A group of accesses where adjacent accesses could be ldp/stp
71 // candidates. The splay tree supports efficient insertion,
72 // while the list supports efficient iteration.
73 struct access_group
75 splay_tree<access_record *> tree;
76 std::list<access_record> list;
78 template<typename Alloc>
79 inline void track (Alloc node_alloc, poly_int64 offset, insn_info *insn);
82 // Test if this base candidate is viable according to HAZARDS.
83 bool
84 base_cand::viable () const
86 return !hazards[0] || !hazards[1] || (*hazards[0] > *hazards[1]);
89 // Information about an alternate base. For a def_info D, it may
90 // instead be expressed as D = BASE + OFFSET.
91 struct alt_base
93 def_info *base;
94 poly_int64 offset;
97 // Virtual base class for load/store walkers used in alias analysis.
98 struct alias_walker
100 virtual bool conflict_p (int &budget) const = 0;
101 virtual insn_info *insn () const = 0;
102 virtual bool valid () const = 0;
103 virtual void advance () = 0;
107 pair_fusion::pair_fusion ()
109 calculate_dominance_info (CDI_DOMINATORS);
110 df_analyze ();
111 crtl->ssa = new rtl_ssa::function_info (cfun);
114 pair_fusion::~pair_fusion ()
116 if (crtl->ssa->perform_pending_updates ())
117 cleanup_cfg (0);
119 free_dominance_info (CDI_DOMINATORS);
121 delete crtl->ssa;
122 crtl->ssa = nullptr;
125 // This is the main function to start the pass.
126 void
127 pair_fusion::run ()
129 if (!track_loads_p () && !track_stores_p ())
130 return;
132 for (auto bb : crtl->ssa->bbs ())
133 process_block (bb);
136 // State used by the pass for a given basic block.
137 struct pair_fusion_bb_info
139 using def_hash = nofree_ptr_hash<def_info>;
140 using expr_key_t = pair_hash<tree_operand_hash, int_hash<int, -1, -2>>;
141 using def_key_t = pair_hash<def_hash, int_hash<int, -1, -2>>;
143 // Map of <tree base, LFS> -> access_group.
144 ordered_hash_map<expr_key_t, access_group> expr_map;
146 // Map of <RTL-SSA def_info *, LFS> -> access_group.
147 ordered_hash_map<def_key_t, access_group> def_map;
149 // Given the def_info for an RTL base register, express it as an offset from
150 // some canonical base instead.
152 // Canonicalizing bases in this way allows us to identify adjacent accesses
153 // even if they see different base register defs.
154 hash_map<def_hash, alt_base> canon_base_map;
156 static const size_t obstack_alignment = sizeof (void *);
158 pair_fusion_bb_info (bb_info *bb, pair_fusion *d)
159 : m_bb (bb), m_pass (d), m_emitted_tombstone (false)
161 obstack_specify_allocation (&m_obstack, OBSTACK_CHUNK_SIZE,
162 obstack_alignment, obstack_chunk_alloc,
163 obstack_chunk_free);
165 ~pair_fusion_bb_info ()
167 obstack_free (&m_obstack, nullptr);
169 if (m_emitted_tombstone)
171 bitmap_release (&m_tombstone_bitmap);
172 bitmap_obstack_release (&m_bitmap_obstack);
176 inline void track_access (insn_info *, bool load, rtx mem);
177 inline void transform ();
178 inline void cleanup_tombstones ();
180 private:
181 obstack m_obstack;
182 bb_info *m_bb;
183 pair_fusion *m_pass;
185 // State for keeping track of tombstone insns emitted for this BB.
186 bitmap_obstack m_bitmap_obstack;
187 bitmap_head m_tombstone_bitmap;
188 bool m_emitted_tombstone;
190 inline splay_tree_node<access_record *> *node_alloc (access_record *);
192 template<typename Map>
193 inline void traverse_base_map (Map &map);
194 inline void transform_for_base (int load_size, access_group &group);
196 inline void merge_pairs (insn_list_t &, insn_list_t &,
197 bool load_p, unsigned access_size);
199 inline bool try_fuse_pair (bool load_p, unsigned access_size,
200 insn_info *i1, insn_info *i2);
202 inline bool fuse_pair (bool load_p, unsigned access_size,
203 int writeback,
204 insn_info *i1, insn_info *i2,
205 base_cand &base,
206 const insn_range_info &move_range);
208 inline void track_tombstone (int uid);
210 inline bool track_via_mem_expr (insn_info *, rtx mem, lfs_fields lfs);
213 splay_tree_node<access_record *> *
214 pair_fusion_bb_info::node_alloc (access_record *access)
216 using T = splay_tree_node<access_record *>;
217 void *addr = obstack_alloc (&m_obstack, sizeof (T));
218 return new (addr) T (access);
221 // Given a mem MEM, if the address has side effects, return a MEM that accesses
222 // the same address but without the side effects. Otherwise, return
223 // MEM unchanged.
224 static rtx
225 drop_writeback (rtx mem)
227 rtx addr = XEXP (mem, 0);
229 if (!side_effects_p (addr))
230 return mem;
232 switch (GET_CODE (addr))
234 case PRE_MODIFY:
235 addr = XEXP (addr, 1);
236 break;
237 case POST_MODIFY:
238 case POST_INC:
239 case POST_DEC:
240 addr = XEXP (addr, 0);
241 break;
242 case PRE_INC:
243 case PRE_DEC:
245 poly_int64 adjustment = GET_MODE_SIZE (GET_MODE (mem));
246 if (GET_CODE (addr) == PRE_DEC)
247 adjustment *= -1;
248 addr = plus_constant (GET_MODE (addr), XEXP (addr, 0), adjustment);
249 break;
251 default:
252 gcc_unreachable ();
255 return change_address (mem, GET_MODE (mem), addr);
258 // Convenience wrapper around strip_offset that can also look through
259 // RTX_AUTOINC addresses. The interface is like strip_offset except we take a
260 // MEM so that we know the mode of the access.
261 static rtx
262 pair_mem_strip_offset (rtx mem, poly_int64 *offset)
264 rtx addr = XEXP (mem, 0);
266 switch (GET_CODE (addr))
268 case PRE_MODIFY:
269 case POST_MODIFY:
270 addr = strip_offset (XEXP (addr, 1), offset);
271 gcc_checking_assert (REG_P (addr));
272 gcc_checking_assert (rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr));
273 break;
274 case PRE_INC:
275 case POST_INC:
276 addr = XEXP (addr, 0);
277 *offset = GET_MODE_SIZE (GET_MODE (mem));
278 gcc_checking_assert (REG_P (addr));
279 break;
280 case PRE_DEC:
281 case POST_DEC:
282 addr = XEXP (addr, 0);
283 *offset = -GET_MODE_SIZE (GET_MODE (mem));
284 gcc_checking_assert (REG_P (addr));
285 break;
287 default:
288 addr = strip_offset (addr, offset);
291 return addr;
294 // Return true if X is a PRE_{INC,DEC,MODIFY} rtx.
295 static bool
296 any_pre_modify_p (rtx x)
298 const auto code = GET_CODE (x);
299 return code == PRE_INC || code == PRE_DEC || code == PRE_MODIFY;
302 // Return true if X is a POST_{INC,DEC,MODIFY} rtx.
303 static bool
304 any_post_modify_p (rtx x)
306 const auto code = GET_CODE (x);
307 return code == POST_INC || code == POST_DEC || code == POST_MODIFY;
310 // Given LFS (load_p, fpsimd_p, size) fields in FIELDS, encode these
311 // into an integer for use as a hash table key.
312 static int
313 encode_lfs (lfs_fields fields)
315 int size_log2 = exact_log2 (fields.size);
316 gcc_checking_assert (size_log2 >= 2 && size_log2 <= 4);
317 return ((int)fields.load_p << 3)
318 | ((int)fields.fpsimd_p << 2)
319 | (size_log2 - 2);
322 // Inverse of encode_lfs.
323 static lfs_fields
324 decode_lfs (int lfs)
326 bool load_p = (lfs & (1 << 3));
327 bool fpsimd_p = (lfs & (1 << 2));
328 unsigned size = 1U << ((lfs & 3) + 2);
329 return { load_p, fpsimd_p, size };
332 // Track the access INSN at offset OFFSET in this access group.
333 // ALLOC_NODE is used to allocate splay tree nodes.
334 template<typename Alloc>
335 void
336 access_group::track (Alloc alloc_node, poly_int64 offset, insn_info *insn)
338 auto insert_before = [&](std::list<access_record>::iterator after)
340 auto it = list.emplace (after, offset);
341 it->cand_insns.push_back (insn);
342 it->place = it;
343 return &*it;
346 if (!list.size ())
348 auto access = insert_before (list.end ());
349 tree.insert_max_node (alloc_node (access));
350 return;
353 auto compare = [&](splay_tree_node<access_record *> *node)
355 return compare_sizes_for_sort (offset, node->value ()->offset);
357 auto result = tree.lookup (compare);
358 splay_tree_node<access_record *> *node = tree.root ();
359 if (result == 0)
360 node->value ()->cand_insns.push_back (insn);
361 else
363 auto it = node->value ()->place;
364 auto after = (result > 0) ? std::next (it) : it;
365 auto access = insert_before (after);
366 tree.insert_child (node, result > 0, alloc_node (access));
370 // Given a candidate access INSN (with mem MEM), see if it has a suitable
371 // MEM_EXPR base (i.e. a tree decl) relative to which we can track the access.
372 // LFS is used as part of the key to the hash table, see track_access.
373 bool
374 pair_fusion_bb_info::track_via_mem_expr (insn_info *insn, rtx mem,
375 lfs_fields lfs)
377 if (!MEM_EXPR (mem) || !MEM_OFFSET_KNOWN_P (mem))
378 return false;
380 poly_int64 offset;
381 tree base_expr = get_addr_base_and_unit_offset (MEM_EXPR (mem),
382 &offset);
383 if (!base_expr || !DECL_P (base_expr))
384 return false;
386 offset += MEM_OFFSET (mem);
388 const machine_mode mem_mode = GET_MODE (mem);
389 const HOST_WIDE_INT mem_size = GET_MODE_SIZE (mem_mode).to_constant ();
391 // Punt on misaligned offsets. Paired memory access instructions require
392 // offsets to be a multiple of the access size, and we believe that
393 // misaligned offsets on MEM_EXPR bases are likely to lead to misaligned
394 // offsets w.r.t. RTL bases.
395 if (!multiple_p (offset, mem_size))
396 return false;
398 const auto key = std::make_pair (base_expr, encode_lfs (lfs));
399 access_group &group = expr_map.get_or_insert (key, NULL);
400 auto alloc = [&](access_record *access) { return node_alloc (access); };
401 group.track (alloc, offset, insn);
403 if (dump_file)
405 fprintf (dump_file, "[bb %u] tracking insn %d via ",
406 m_bb->index (), insn->uid ());
407 print_node_brief (dump_file, "mem expr", base_expr, 0);
408 fprintf (dump_file, " [L=%d FP=%d, %smode, off=",
409 lfs.load_p, lfs.fpsimd_p, mode_name[mem_mode]);
410 print_dec (offset, dump_file);
411 fprintf (dump_file, "]\n");
414 return true;
417 // Main function to begin pair discovery. Given a memory access INSN,
418 // determine whether it could be a candidate for fusing into a paired
419 // access, and if so, track it in the appropriate data structure for
420 // this basic block. LOAD_P is true if the access is a load, and MEM
421 // is the mem rtx that occurs in INSN.
422 void
423 pair_fusion_bb_info::track_access (insn_info *insn, bool load_p, rtx mem)
425 // We can't combine volatile MEMs, so punt on these.
426 if (MEM_VOLATILE_P (mem))
427 return;
429 // Ignore writeback accesses if the hook says to do so.
430 if (!m_pass->should_handle_writeback (writeback_type::EXISTING)
431 && GET_RTX_CLASS (GET_CODE (XEXP (mem, 0))) == RTX_AUTOINC)
432 return;
434 const machine_mode mem_mode = GET_MODE (mem);
435 if (!m_pass->pair_operand_mode_ok_p (mem_mode))
436 return;
438 rtx reg_op = XEXP (PATTERN (insn->rtl ()), !load_p);
440 if (!m_pass->pair_reg_operand_ok_p (load_p, reg_op, mem_mode))
441 return;
443 // We want to segregate FP/SIMD accesses from GPR accesses.
444 const bool fpsimd_op_p = m_pass->fpsimd_op_p (reg_op, mem_mode, load_p);
446 // Note pair_operand_mode_ok_p already rejected VL modes.
447 const unsigned mem_size = GET_MODE_SIZE (mem_mode).to_constant ();
448 const lfs_fields lfs = { load_p, fpsimd_op_p, mem_size };
450 if (track_via_mem_expr (insn, mem, lfs))
451 return;
453 poly_int64 mem_off;
454 rtx addr = XEXP (mem, 0);
455 const bool autoinc_p = GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC;
456 rtx base = pair_mem_strip_offset (mem, &mem_off);
457 if (!REG_P (base))
458 return;
460 // Need to calculate two (possibly different) offsets:
461 // - Offset at which the access occurs.
462 // - Offset of the new base def.
463 poly_int64 access_off;
464 if (autoinc_p && any_post_modify_p (addr))
465 access_off = 0;
466 else
467 access_off = mem_off;
469 poly_int64 new_def_off = mem_off;
471 // Punt on accesses relative to eliminable regs. Since we don't know the
472 // elimination offset pre-RA, we should postpone forming pairs on such
473 // accesses until after RA.
475 // As it stands, addresses in range for an individual load/store but not
476 // for a paired access are currently reloaded inefficiently,
477 // ending up with a separate base register for each pair.
479 // In theory LRA should make use of
480 // targetm.legitimize_address_displacement to promote sharing of
481 // bases among multiple (nearby) address reloads, but the current
482 // LRA code returns early from process_address_1 for operands that
483 // satisfy "m", even if they don't satisfy the real (relaxed) address
484 // constraint; this early return means we never get to the code
485 // that calls targetm.legitimize_address_displacement.
487 // So for now, it's better to punt when we can't be sure that the
488 // offset is in range for paired access. On aarch64, out-of-range cases
489 // can then be handled after RA by the out-of-range LDP/STP peepholes.
490 // Eventually, it would be nice to handle known out-of-range opportunities
491 // in the pass itself (for stack accesses, this would be in the post-RA pass).
492 if (!reload_completed
493 && (REGNO (base) == FRAME_POINTER_REGNUM
494 || REGNO (base) == ARG_POINTER_REGNUM))
495 return;
497 // Now need to find def of base register.
498 use_info *base_use = find_access (insn->uses (), REGNO (base));
499 gcc_assert (base_use);
500 def_info *base_def = base_use->def ();
501 if (!base_def)
503 if (dump_file)
504 fprintf (dump_file,
505 "base register (regno %d) of insn %d is undefined",
506 REGNO (base), insn->uid ());
507 return;
510 alt_base *canon_base = canon_base_map.get (base_def);
511 if (canon_base)
513 // Express this as the combined offset from the canonical base.
514 base_def = canon_base->base;
515 new_def_off += canon_base->offset;
516 access_off += canon_base->offset;
519 if (autoinc_p)
521 auto def = find_access (insn->defs (), REGNO (base));
522 gcc_assert (def);
524 // Record that DEF = BASE_DEF + MEM_OFF.
525 if (dump_file)
527 pretty_printer pp;
528 pp_access (&pp, def, 0);
529 pp_string (&pp, " = ");
530 pp_access (&pp, base_def, 0);
531 fprintf (dump_file, "[bb %u] recording %s + ",
532 m_bb->index (), pp_formatted_text (&pp));
533 print_dec (new_def_off, dump_file);
534 fprintf (dump_file, "\n");
537 alt_base base_rec { base_def, new_def_off };
538 if (canon_base_map.put (def, base_rec))
539 gcc_unreachable (); // Base defs should be unique.
542 // Punt on misaligned offsets. Paired memory accesses require offsets
543 // to be a multiple of the access size.
544 if (!multiple_p (mem_off, mem_size))
545 return;
547 const auto key = std::make_pair (base_def, encode_lfs (lfs));
548 access_group &group = def_map.get_or_insert (key, NULL);
549 auto alloc = [&](access_record *access) { return node_alloc (access); };
550 group.track (alloc, access_off, insn);
552 if (dump_file)
554 pretty_printer pp;
555 pp_access (&pp, base_def, 0);
557 fprintf (dump_file, "[bb %u] tracking insn %d via %s",
558 m_bb->index (), insn->uid (), pp_formatted_text (&pp));
559 fprintf (dump_file,
560 " [L=%d, WB=%d, FP=%d, %smode, off=",
561 lfs.load_p, autoinc_p, lfs.fpsimd_p, mode_name[mem_mode]);
562 print_dec (access_off, dump_file);
563 fprintf (dump_file, "]\n");
567 // Return the latest dataflow hazard before INSN.
569 // If IGNORE is non-NULL, this points to a sub-rtx which we should ignore for
570 // dataflow purposes. This is needed when considering changing the RTL base of
571 // an access discovered through a MEM_EXPR base.
573 // If IGNORE_INSN is non-NULL, we should further ignore any hazards arising
574 // from that insn.
576 // N.B. we ignore any defs/uses of memory here as we deal with that separately,
577 // making use of alias disambiguation.
578 static insn_info *
579 latest_hazard_before (insn_info *insn, rtx *ignore,
580 insn_info *ignore_insn = nullptr)
582 insn_info *result = nullptr;
584 // If the insn can throw then it is at the end of a BB and we can't
585 // move it, model this by recording a hazard in the previous insn
586 // which will prevent moving the insn up.
587 if (cfun->can_throw_non_call_exceptions
588 && find_reg_note (insn->rtl (), REG_EH_REGION, NULL_RTX))
589 return insn->prev_nondebug_insn ();
591 // Return true if we registered the hazard.
592 auto hazard = [&](insn_info *h) -> bool
594 gcc_checking_assert (*h < *insn);
595 if (h == ignore_insn)
596 return false;
598 if (!result || *h > *result)
599 result = h;
601 return true;
604 rtx pat = PATTERN (insn->rtl ());
605 auto ignore_use = [&](use_info *u)
607 if (u->is_mem ())
608 return true;
610 return !refers_to_regno_p (u->regno (), u->regno () + 1, pat, ignore);
613 // Find defs of uses in INSN (RaW).
614 for (auto use : insn->uses ())
615 if (!ignore_use (use) && use->def ())
616 hazard (use->def ()->insn ());
618 // Find previous defs (WaW) or previous uses (WaR) of defs in INSN.
619 for (auto def : insn->defs ())
621 if (def->is_mem ())
622 continue;
624 if (def->prev_def ())
626 hazard (def->prev_def ()->insn ()); // WaW
628 auto set = dyn_cast<set_info *> (def->prev_def ());
629 if (set && set->has_nondebug_insn_uses ())
630 for (auto use : set->reverse_nondebug_insn_uses ())
631 if (use->insn () != insn && hazard (use->insn ())) // WaR
632 break;
635 if (!HARD_REGISTER_NUM_P (def->regno ()))
636 continue;
638 // Also need to check backwards for call clobbers (WaW).
639 for (auto call_group : def->ebb ()->call_clobbers ())
641 if (!call_group->clobbers (def->resource ()))
642 continue;
644 auto clobber_insn = prev_call_clobbers (*call_group, def->insn (),
645 ignore_nothing ());
646 if (clobber_insn)
647 hazard (clobber_insn);
652 return result;
655 // Return the first dataflow hazard after INSN.
657 // If IGNORE is non-NULL, this points to a sub-rtx which we should ignore for
658 // dataflow purposes. This is needed when considering changing the RTL base of
659 // an access discovered through a MEM_EXPR base.
661 // N.B. we ignore any defs/uses of memory here as we deal with that separately,
662 // making use of alias disambiguation.
663 static insn_info *
664 first_hazard_after (insn_info *insn, rtx *ignore)
666 insn_info *result = nullptr;
667 auto hazard = [insn, &result](insn_info *h)
669 gcc_checking_assert (*h > *insn);
670 if (!result || *h < *result)
671 result = h;
674 rtx pat = PATTERN (insn->rtl ());
675 auto ignore_use = [&](use_info *u)
677 if (u->is_mem ())
678 return true;
680 return !refers_to_regno_p (u->regno (), u->regno () + 1, pat, ignore);
683 for (auto def : insn->defs ())
685 if (def->is_mem ())
686 continue;
688 if (def->next_def ())
689 hazard (def->next_def ()->insn ()); // WaW
691 auto set = dyn_cast<set_info *> (def);
692 if (set && set->has_nondebug_insn_uses ())
693 hazard (set->first_nondebug_insn_use ()->insn ()); // RaW
695 if (!HARD_REGISTER_NUM_P (def->regno ()))
696 continue;
698 // Also check for call clobbers of this def (WaW).
699 for (auto call_group : def->ebb ()->call_clobbers ())
701 if (!call_group->clobbers (def->resource ()))
702 continue;
704 auto clobber_insn = next_call_clobbers (*call_group, def->insn (),
705 ignore_nothing ());
706 if (clobber_insn)
707 hazard (clobber_insn);
711 // Find any subsequent defs of uses in INSN (WaR).
712 for (auto use : insn->uses ())
714 if (ignore_use (use))
715 continue;
717 if (use->def ())
719 auto def = use->def ()->next_def ();
720 if (def && def->insn () == insn)
721 def = def->next_def ();
723 if (def)
724 hazard (def->insn ());
727 if (!HARD_REGISTER_NUM_P (use->regno ()))
728 continue;
730 // Also need to handle call clobbers of our uses (again WaR).
732 // See restrict_movement_for_uses for why we don't need to check
733 // backwards for call clobbers.
734 for (auto call_group : use->ebb ()->call_clobbers ())
736 if (!call_group->clobbers (use->resource ()))
737 continue;
739 auto clobber_insn = next_call_clobbers (*call_group, use->insn (),
740 ignore_nothing ());
741 if (clobber_insn)
742 hazard (clobber_insn);
746 return result;
749 // Return true iff R1 and R2 overlap.
750 static bool
751 ranges_overlap_p (const insn_range_info &r1, const insn_range_info &r2)
753 // If either range is empty, then their intersection is empty.
754 if (!r1 || !r2)
755 return false;
757 // When do they not overlap? When one range finishes before the other
758 // starts, i.e. (*r1.last < *r2.first || *r2.last < *r1.first).
759 // Inverting this, we get the below.
760 return *r1.last >= *r2.first && *r2.last >= *r1.first;
763 // Get the range of insns that def feeds.
764 static insn_range_info get_def_range (def_info *def)
766 insn_info *last = def->next_def ()->insn ()->prev_nondebug_insn ();
767 return { def->insn (), last };
770 // Given a def (of memory), return the downwards range within which we
771 // can safely move this def.
772 static insn_range_info
773 def_downwards_move_range (def_info *def)
775 auto range = get_def_range (def);
777 auto set = dyn_cast<set_info *> (def);
778 if (!set || !set->has_any_uses ())
779 return range;
781 auto use = set->first_nondebug_insn_use ();
782 if (use)
783 range = move_earlier_than (range, use->insn ());
785 return range;
788 // Given a def (of memory), return the upwards range within which we can
789 // safely move this def.
790 static insn_range_info
791 def_upwards_move_range (def_info *def)
793 def_info *prev = def->prev_def ();
794 insn_range_info range { prev->insn (), def->insn () };
796 auto set = dyn_cast<set_info *> (prev);
797 if (!set || !set->has_any_uses ())
798 return range;
800 auto use = set->last_nondebug_insn_use ();
801 if (use)
802 range = move_later_than (range, use->insn ());
804 return range;
807 // Class that implements a state machine for building the changes needed to form
808 // a store pair instruction. This allows us to easily build the changes in
809 // program order, as required by rtl-ssa.
810 struct store_change_builder
812 enum class state
814 FIRST,
815 INSERT,
816 FIXUP_USE,
817 LAST,
818 DONE
821 enum class action
823 TOMBSTONE,
824 CHANGE,
825 INSERT,
826 FIXUP_USE
829 struct change
831 action type;
832 insn_info *insn;
835 bool done () const { return m_state == state::DONE; }
837 store_change_builder (insn_info *insns[2],
838 insn_info *repurpose,
839 insn_info *dest)
840 : m_state (state::FIRST), m_insns { insns[0], insns[1] },
841 m_repurpose (repurpose), m_dest (dest), m_use (nullptr) {}
843 change get_change () const
845 switch (m_state)
847 case state::FIRST:
848 return {
849 m_insns[0] == m_repurpose ? action::CHANGE : action::TOMBSTONE,
850 m_insns[0]
852 case state::LAST:
853 return {
854 m_insns[1] == m_repurpose ? action::CHANGE : action::TOMBSTONE,
855 m_insns[1]
857 case state::INSERT:
858 return { action::INSERT, m_dest };
859 case state::FIXUP_USE:
860 return { action::FIXUP_USE, m_use->insn () };
861 case state::DONE:
862 break;
865 gcc_unreachable ();
868 // Transition to the next state.
869 void advance ()
871 switch (m_state)
873 case state::FIRST:
874 if (m_repurpose)
875 m_state = state::LAST;
876 else
877 m_state = state::INSERT;
878 break;
879 case state::INSERT:
881 def_info *def = memory_access (m_insns[0]->defs ());
882 while (*def->next_def ()->insn () <= *m_dest)
883 def = def->next_def ();
885 // Now we know DEF feeds the insertion point for the new stp.
886 // Look for any uses of DEF that will consume the new stp.
887 gcc_assert (*def->insn () <= *m_dest
888 && *def->next_def ()->insn () > *m_dest);
890 auto set = as_a<set_info *> (def);
891 for (auto use : set->nondebug_insn_uses ())
892 if (*use->insn () > *m_dest)
894 m_use = use;
895 break;
898 if (m_use)
899 m_state = state::FIXUP_USE;
900 else
901 m_state = state::LAST;
902 break;
904 case state::FIXUP_USE:
905 m_use = m_use->next_nondebug_insn_use ();
906 if (!m_use)
907 m_state = state::LAST;
908 break;
909 case state::LAST:
910 m_state = state::DONE;
911 break;
912 case state::DONE:
913 gcc_unreachable ();
917 private:
918 state m_state;
920 // Original candidate stores.
921 insn_info *m_insns[2];
923 // If non-null, this is a candidate insn to change into an stp. Otherwise we
924 // are deleting both original insns and inserting a new insn for the stp.
925 insn_info *m_repurpose;
927 // Destionation of the stp, it will be placed immediately after m_dest.
928 insn_info *m_dest;
930 // Current nondebug use that needs updating due to stp insertion.
931 use_info *m_use;
934 // Given candidate store insns FIRST and SECOND, see if we can re-purpose one
935 // of them (together with its def of memory) for the stp insn. If so, return
936 // that insn. Otherwise, return null.
937 static insn_info *
938 try_repurpose_store (insn_info *first,
939 insn_info *second,
940 const insn_range_info &move_range)
942 def_info * const defs[2] = {
943 memory_access (first->defs ()),
944 memory_access (second->defs ())
947 if (move_range.includes (first)
948 || ranges_overlap_p (move_range, def_downwards_move_range (defs[0])))
949 return first;
951 if (move_range.includes (second)
952 || ranges_overlap_p (move_range, def_upwards_move_range (defs[1])))
953 return second;
955 return nullptr;
958 // Generate the RTL pattern for a "tombstone"; used temporarily during this pass
959 // to replace stores that are marked for deletion where we can't immediately
960 // delete the store (since there are uses of mem hanging off the store).
962 // These are deleted at the end of the pass and uses re-parented appropriately
963 // at this point.
964 static rtx
965 gen_tombstone (void)
967 return gen_rtx_CLOBBER (VOIDmode,
968 gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)));
971 // Go through the reg notes rooted at NOTE, dropping those that we should drop,
972 // and preserving those that we want to keep by prepending them to (and
973 // returning) RESULT. EH_REGION is used to make sure we have at most one
974 // REG_EH_REGION note in the resulting list. FR_EXPR is used to return any
975 // REG_FRAME_RELATED_EXPR note we find, as these can need special handling in
976 // combine_reg_notes.
977 static rtx
978 filter_notes (rtx note, rtx result, bool *eh_region, rtx *fr_expr)
980 for (; note; note = XEXP (note, 1))
982 switch (REG_NOTE_KIND (note))
984 case REG_DEAD:
985 // REG_DEAD notes aren't required to be maintained.
986 case REG_EQUAL:
987 case REG_EQUIV:
988 case REG_UNUSED:
989 case REG_NOALIAS:
990 // These can all be dropped. For REG_EQU{AL,IV} they cannot apply to
991 // non-single_set insns, and REG_UNUSED is re-computed by RTl-SSA, see
992 // rtl-ssa/changes.cc:update_notes.
994 // Similarly, REG_NOALIAS cannot apply to a parallel.
995 case REG_INC:
996 // When we form the pair insn, the reg update is implemented
997 // as just another SET in the parallel, so isn't really an
998 // auto-increment in the RTL sense, hence we drop the note.
999 break;
1000 case REG_EH_REGION:
1001 gcc_assert (!*eh_region);
1002 *eh_region = true;
1003 result = alloc_reg_note (REG_EH_REGION, XEXP (note, 0), result);
1004 break;
1005 case REG_CFA_DEF_CFA:
1006 case REG_CFA_OFFSET:
1007 case REG_CFA_RESTORE:
1008 result = alloc_reg_note (REG_NOTE_KIND (note),
1009 copy_rtx (XEXP (note, 0)),
1010 result);
1011 break;
1012 case REG_FRAME_RELATED_EXPR:
1013 gcc_assert (!*fr_expr);
1014 *fr_expr = copy_rtx (XEXP (note, 0));
1015 break;
1016 default:
1017 // Unexpected REG_NOTE kind.
1018 gcc_unreachable ();
1022 return result;
1025 // Return the notes that should be attached to a combination of I1 and I2, where
1026 // *I1 < *I2. LOAD_P is true for loads.
1027 static rtx
1028 combine_reg_notes (insn_info *i1, insn_info *i2, bool load_p)
1030 // Temporary storage for REG_FRAME_RELATED_EXPR notes.
1031 rtx fr_expr[2] = {};
1033 bool found_eh_region = false;
1034 rtx result = NULL_RTX;
1035 result = filter_notes (REG_NOTES (i2->rtl ()), result,
1036 &found_eh_region, fr_expr + 1);
1037 result = filter_notes (REG_NOTES (i1->rtl ()), result,
1038 &found_eh_region, fr_expr);
1040 if (!load_p)
1042 // Simple frame-related sp-relative saves don't need CFI notes, but when
1043 // we combine them into an stp we will need a CFI note as dwarf2cfi can't
1044 // interpret the unspec pair representation directly.
1045 if (RTX_FRAME_RELATED_P (i1->rtl ()) && !fr_expr[0])
1046 fr_expr[0] = copy_rtx (PATTERN (i1->rtl ()));
1047 if (RTX_FRAME_RELATED_P (i2->rtl ()) && !fr_expr[1])
1048 fr_expr[1] = copy_rtx (PATTERN (i2->rtl ()));
1051 rtx fr_pat = NULL_RTX;
1052 if (fr_expr[0] && fr_expr[1])
1054 // Combining two frame-related insns, need to construct
1055 // a REG_FRAME_RELATED_EXPR note which represents the combined
1056 // operation.
1057 RTX_FRAME_RELATED_P (fr_expr[1]) = 1;
1058 fr_pat = gen_rtx_PARALLEL (VOIDmode,
1059 gen_rtvec (2, fr_expr[0], fr_expr[1]));
1061 else
1062 fr_pat = fr_expr[0] ? fr_expr[0] : fr_expr[1];
1064 if (fr_pat)
1065 result = alloc_reg_note (REG_FRAME_RELATED_EXPR,
1066 fr_pat, result);
1068 return result;
1071 // Given two memory accesses in PATS, at least one of which is of a
1072 // writeback form, extract two non-writeback memory accesses addressed
1073 // relative to the initial value of the base register, and output these
1074 // in PATS. Return an rtx that represents the overall change to the
1075 // base register.
1076 static rtx
1077 extract_writebacks (bool load_p, rtx pats[2], int changed)
1079 rtx base_reg = NULL_RTX;
1080 poly_int64 current_offset = 0;
1082 poly_int64 offsets[2];
1084 for (int i = 0; i < 2; i++)
1086 rtx mem = XEXP (pats[i], load_p);
1087 rtx reg = XEXP (pats[i], !load_p);
1089 rtx addr = XEXP (mem, 0);
1090 const bool autoinc_p = GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC;
1092 poly_int64 offset;
1093 rtx this_base = pair_mem_strip_offset (mem, &offset);
1094 gcc_assert (REG_P (this_base));
1095 if (base_reg)
1096 gcc_assert (rtx_equal_p (base_reg, this_base));
1097 else
1098 base_reg = this_base;
1100 // If we changed base for the current insn, then we already
1101 // derived the correct mem for this insn from the effective
1102 // address of the other access.
1103 if (i == changed)
1105 gcc_checking_assert (!autoinc_p);
1106 offsets[i] = offset;
1107 continue;
1110 if (autoinc_p && any_pre_modify_p (addr))
1111 current_offset += offset;
1113 poly_int64 this_off = current_offset;
1114 if (!autoinc_p)
1115 this_off += offset;
1117 offsets[i] = this_off;
1118 rtx new_mem = change_address (mem, GET_MODE (mem),
1119 plus_constant (GET_MODE (base_reg),
1120 base_reg, this_off));
1121 pats[i] = load_p
1122 ? gen_rtx_SET (reg, new_mem)
1123 : gen_rtx_SET (new_mem, reg);
1125 if (autoinc_p && any_post_modify_p (addr))
1126 current_offset += offset;
1129 if (known_eq (current_offset, 0))
1130 return NULL_RTX;
1132 return gen_rtx_SET (base_reg, plus_constant (GET_MODE (base_reg),
1133 base_reg, current_offset));
1136 // INSNS contains either {nullptr, pair insn} (when promoting an existing
1137 // non-writeback pair) or contains the candidate insns used to form the pair
1138 // (when fusing a new pair).
1140 // PAIR_RANGE specifies where we want to form the final pair.
1141 // INITIAL_OFFSET gives the current base offset for the pair.
1142 // Bit I of INITIAL_WRITEBACK is set if INSNS[I] initially had writeback.
1143 // ACCESS_SIZE gives the access size for a single arm of the pair.
1144 // BASE_DEF gives the initial def of the base register consumed by the pair.
1146 // Given the above, this function looks for a trailing destructive update of the
1147 // base register. If there is one, we choose the first such update after
1148 // PAIR_DST that is still in the same BB as our pair. We return the new def in
1149 // *ADD_DEF and the resulting writeback effect in *WRITEBACK_EFFECT.
1150 insn_info *
1151 pair_fusion::find_trailing_add (insn_info *insns[2],
1152 const insn_range_info &pair_range,
1153 int initial_writeback,
1154 rtx *writeback_effect,
1155 def_info **add_def,
1156 def_info *base_def,
1157 poly_int64 initial_offset,
1158 unsigned access_size)
1160 // Punt on frame-related insns, it is better to be conservative and
1161 // not try to form writeback pairs here, and means we don't have to
1162 // worry about the writeback case in forming REG_FRAME_RELATED_EXPR
1163 // notes (see combine_reg_notes).
1164 if ((insns[0] && RTX_FRAME_RELATED_P (insns[0]->rtl ()))
1165 || RTX_FRAME_RELATED_P (insns[1]->rtl ()))
1166 return nullptr;
1168 insn_info *pair_dst = pair_range.singleton ();
1169 gcc_assert (pair_dst);
1171 def_info *def = base_def->next_def ();
1173 // In the case that either of the initial pair insns had writeback,
1174 // then there will be intervening defs of the base register.
1175 // Skip over these.
1176 for (int i = 0; i < 2; i++)
1177 if (initial_writeback & (1 << i))
1179 gcc_assert (def->insn () == insns[i]);
1180 def = def->next_def ();
1183 if (!def || def->bb () != pair_dst->bb ())
1184 return nullptr;
1186 // DEF should now be the first def of the base register after PAIR_DST.
1187 insn_info *cand = def->insn ();
1188 gcc_assert (*cand > *pair_dst);
1190 const auto base_regno = base_def->regno ();
1192 // If CAND doesn't also use our base register,
1193 // it can't destructively update it.
1194 if (!find_access (cand->uses (), base_regno))
1195 return nullptr;
1197 auto rti = cand->rtl ();
1199 if (!INSN_P (rti))
1200 return nullptr;
1202 auto pat = PATTERN (rti);
1203 if (GET_CODE (pat) != SET)
1204 return nullptr;
1206 auto dest = XEXP (pat, 0);
1207 if (!REG_P (dest) || REGNO (dest) != base_regno)
1208 return nullptr;
1210 poly_int64 offset;
1211 rtx rhs_base = strip_offset (XEXP (pat, 1), &offset);
1212 if (!REG_P (rhs_base)
1213 || REGNO (rhs_base) != base_regno
1214 || !offset.is_constant ())
1215 return nullptr;
1217 // If the initial base offset is zero, we can handle any add offset
1218 // (post-inc). Otherwise, we require the offsets to match (pre-inc).
1219 if (!known_eq (initial_offset, 0) && !known_eq (offset, initial_offset))
1220 return nullptr;
1222 auto off_hwi = offset.to_constant ();
1224 if (off_hwi % access_size != 0)
1225 return nullptr;
1227 off_hwi /= access_size;
1229 if (!pair_mem_in_range_p (off_hwi))
1230 return nullptr;
1232 auto dump_prefix = [&]()
1234 if (!insns[0])
1235 fprintf (dump_file, "existing pair i%d: ", insns[1]->uid ());
1236 else
1237 fprintf (dump_file, " (%d,%d)",
1238 insns[0]->uid (), insns[1]->uid ());
1241 insn_info *hazard = latest_hazard_before (cand, nullptr, insns[1]);
1242 if (!hazard || *hazard <= *pair_dst)
1244 if (dump_file)
1246 dump_prefix ();
1247 fprintf (dump_file,
1248 "folding in trailing add (%d) to use writeback form\n",
1249 cand->uid ());
1252 *add_def = def;
1253 *writeback_effect = copy_rtx (pat);
1254 return cand;
1257 if (dump_file)
1259 dump_prefix ();
1260 fprintf (dump_file,
1261 "can't fold in trailing add (%d), hazard = %d\n",
1262 cand->uid (), hazard->uid ());
1265 return nullptr;
1268 // We just emitted a tombstone with uid UID, track it in a bitmap for
1269 // this BB so we can easily identify it later when cleaning up tombstones.
1270 void
1271 pair_fusion_bb_info::track_tombstone (int uid)
1273 if (!m_emitted_tombstone)
1275 // Lazily initialize the bitmap for tracking tombstone insns.
1276 bitmap_obstack_initialize (&m_bitmap_obstack);
1277 bitmap_initialize (&m_tombstone_bitmap, &m_bitmap_obstack);
1278 m_emitted_tombstone = true;
1281 if (!bitmap_set_bit (&m_tombstone_bitmap, uid))
1282 gcc_unreachable (); // Bit should have changed.
1285 // Reset the debug insn containing USE (the debug insn has been
1286 // optimized away).
1287 static void
1288 reset_debug_use (use_info *use)
1290 auto use_insn = use->insn ();
1291 auto use_rtl = use_insn->rtl ();
1292 insn_change change (use_insn);
1293 change.new_uses = {};
1294 INSN_VAR_LOCATION_LOC (use_rtl) = gen_rtx_UNKNOWN_VAR_LOC ();
1295 crtl->ssa->change_insn (change);
1298 // USE is a debug use that needs updating because DEF (a def of the same
1299 // register) is being re-ordered over it. If BASE is non-null, then DEF
1300 // is an update of the register BASE by a constant, given by WB_OFFSET,
1301 // and we can preserve debug info by accounting for the change in side
1302 // effects.
1303 static void
1304 fixup_debug_use (obstack_watermark &attempt,
1305 use_info *use,
1306 def_info *def,
1307 rtx base,
1308 poly_int64 wb_offset)
1310 auto use_insn = use->insn ();
1311 if (base)
1313 auto use_rtl = use_insn->rtl ();
1314 insn_change change (use_insn);
1316 gcc_checking_assert (REG_P (base) && use->regno () == REGNO (base));
1317 change.new_uses = check_remove_regno_access (attempt,
1318 change.new_uses,
1319 use->regno ());
1321 // The effect of the writeback is to add WB_OFFSET to BASE. If
1322 // we're re-ordering DEF below USE, then we update USE by adding
1323 // WB_OFFSET to it. Otherwise, if we're re-ordering DEF above
1324 // USE, we update USE by undoing the effect of the writeback
1325 // (subtracting WB_OFFSET).
1326 use_info *new_use;
1327 if (*def->insn () > *use_insn)
1329 // We now need USE_INSN to consume DEF. Create a new use of DEF.
1331 // N.B. this means until we call change_insns for the main change
1332 // group we will temporarily have a debug use consuming a def that
1333 // comes after it, but RTL-SSA doesn't currently support updating
1334 // debug insns as part of the main change group (together with
1335 // nondebug changes), so we will have to live with this update
1336 // leaving the IR being temporarily inconsistent. It seems to
1337 // work out OK once the main change group is applied.
1338 wb_offset *= -1;
1339 new_use = crtl->ssa->create_use (attempt,
1340 use_insn,
1341 as_a<set_info *> (def));
1343 else
1344 new_use = find_access (def->insn ()->uses (), use->regno ());
1346 change.new_uses = insert_access (attempt, new_use, change.new_uses);
1348 if (dump_file)
1350 const char *dir = (*def->insn () < *use_insn) ? "down" : "up";
1351 pretty_printer pp;
1352 pp_string (&pp, "[");
1353 pp_access (&pp, use, 0);
1354 pp_string (&pp, "]");
1355 pp_string (&pp, " due to wb def ");
1356 pp_string (&pp, "[");
1357 pp_access (&pp, def, 0);
1358 pp_string (&pp, "]");
1359 fprintf (dump_file,
1360 " i%d: fix up debug use %s re-ordered %s, "
1361 "sub r%u -> r%u + ",
1362 use_insn->uid (), pp_formatted_text (&pp),
1363 dir, REGNO (base), REGNO (base));
1364 print_dec (wb_offset, dump_file);
1365 fprintf (dump_file, "\n");
1368 insn_propagation prop (use_rtl, base,
1369 plus_constant (GET_MODE (base), base, wb_offset));
1370 if (prop.apply_to_pattern (&INSN_VAR_LOCATION_LOC (use_rtl)))
1371 crtl->ssa->change_insn (change);
1372 else
1374 if (dump_file)
1375 fprintf (dump_file, " i%d: RTL substitution failed (%s)"
1376 ", resetting debug insn", use_insn->uid (),
1377 prop.failure_reason);
1378 reset_debug_use (use);
1381 else
1383 if (dump_file)
1385 pretty_printer pp;
1386 pp_string (&pp, "[");
1387 pp_access (&pp, use, 0);
1388 pp_string (&pp, "] due to re-ordered load def [");
1389 pp_access (&pp, def, 0);
1390 pp_string (&pp, "]");
1391 fprintf (dump_file, " i%d: resetting debug use %s\n",
1392 use_insn->uid (), pp_formatted_text (&pp));
1394 reset_debug_use (use);
1398 // Update debug uses when folding in a trailing add insn to form a
1399 // writeback pair.
1401 // ATTEMPT is used to allocate RTL-SSA temporaries for the changes,
1402 // the final pair is placed immediately after PAIR_DST, TRAILING_ADD
1403 // is a trailing add insn which is being folded into the pair to make it
1404 // use writeback addressing, and WRITEBACK_EFFECT is the pattern for
1405 // TRAILING_ADD.
1406 static void
1407 fixup_debug_uses_trailing_add (obstack_watermark &attempt,
1408 insn_info *pair_dst,
1409 insn_info *trailing_add,
1410 rtx writeback_effect)
1412 rtx base = SET_DEST (writeback_effect);
1414 poly_int64 wb_offset;
1415 rtx base2 = strip_offset (SET_SRC (writeback_effect), &wb_offset);
1416 gcc_checking_assert (rtx_equal_p (base, base2));
1418 auto defs = trailing_add->defs ();
1419 gcc_checking_assert (defs.size () == 1);
1420 def_info *def = defs[0];
1422 if (auto set = safe_dyn_cast<set_info *> (def->prev_def ()))
1423 for (auto use : iterate_safely (set->debug_insn_uses ()))
1424 if (*use->insn () > *pair_dst)
1425 // DEF is getting re-ordered above USE, fix up USE accordingly.
1426 fixup_debug_use (attempt, use, def, base, wb_offset);
1429 // Called from fuse_pair, fixes up any debug uses that will be affected
1430 // by the changes.
1432 // ATTEMPT is the obstack watermark used to allocate RTL-SSA temporaries for
1433 // the changes, INSNS gives the candidate insns: at this point the use/def
1434 // information should still be as on entry to fuse_pair, but the patterns may
1435 // have changed, hence we pass ORIG_RTL which contains the original patterns
1436 // for the candidate insns.
1438 // The final pair will be placed immediately after PAIR_DST, LOAD_P is true if
1439 // it is a load pair, bit I of WRITEBACK is set if INSNS[I] originally had
1440 // writeback, and WRITEBACK_EFFECT is an rtx describing the overall update to
1441 // the base register in the final pair (if any). BASE_REGNO gives the register
1442 // number of the base register used in the final pair.
1443 static void
1444 fixup_debug_uses (obstack_watermark &attempt,
1445 insn_info *insns[2],
1446 rtx orig_rtl[2],
1447 insn_info *pair_dst,
1448 insn_info *trailing_add,
1449 bool load_p,
1450 int writeback,
1451 rtx writeback_effect,
1452 unsigned base_regno)
1454 // USE is a debug use that needs updating because DEF (a def of the
1455 // resource) is being re-ordered over it. If WRITEBACK_PAT is non-NULL,
1456 // then it gives the original RTL pattern for DEF's insn, and DEF is a
1457 // writeback update of the base register.
1459 // This simply unpacks WRITEBACK_PAT if needed and calls fixup_debug_use.
1460 auto update_debug_use = [&](use_info *use, def_info *def,
1461 rtx writeback_pat)
1463 poly_int64 offset = 0;
1464 rtx base = NULL_RTX;
1465 if (writeback_pat)
1467 rtx mem = XEXP (writeback_pat, load_p);
1468 gcc_checking_assert (GET_RTX_CLASS (GET_CODE (XEXP (mem, 0)))
1469 == RTX_AUTOINC);
1471 base = pair_mem_strip_offset (mem, &offset);
1472 gcc_checking_assert (REG_P (base) && REGNO (base) == base_regno);
1474 fixup_debug_use (attempt, use, def, base, offset);
1477 // Reset any debug uses of mem over which we re-ordered a store.
1479 // It would be nice to try and preserve debug info here, but it seems that
1480 // would require doing alias analysis to see if the store aliases with the
1481 // debug use, which seems a little extravagant just to preserve debug info.
1482 if (!load_p)
1484 auto def = memory_access (insns[0]->defs ());
1485 auto last_def = memory_access (insns[1]->defs ());
1486 for (; def != last_def; def = def->next_def ())
1488 auto set = as_a<set_info *> (def);
1489 for (auto use : iterate_safely (set->debug_insn_uses ()))
1491 if (dump_file)
1492 fprintf (dump_file, " i%d: resetting debug use of mem\n",
1493 use->insn ()->uid ());
1494 reset_debug_use (use);
1499 // Now let's take care of register uses, starting with debug uses
1500 // attached to defs from our first insn.
1501 for (auto def : insns[0]->defs ())
1503 auto set = dyn_cast<set_info *> (def);
1504 if (!set || set->is_mem () || !set->first_debug_insn_use ())
1505 continue;
1507 def_info *defs[2] = {
1508 def,
1509 find_access (insns[1]->defs (), def->regno ())
1512 rtx writeback_pats[2] = {};
1513 if (def->regno () == base_regno)
1514 for (int i = 0; i < 2; i++)
1515 if (writeback & (1 << i))
1517 gcc_checking_assert (defs[i]);
1518 writeback_pats[i] = orig_rtl[i];
1521 // Now that we've characterized the defs involved, go through the
1522 // debug uses and determine how to update them (if needed).
1523 for (auto use : iterate_safely (set->debug_insn_uses ()))
1525 if (*pair_dst < *use->insn () && defs[1])
1526 // We're re-ordering defs[1] above a previous use of the
1527 // same resource.
1528 update_debug_use (use, defs[1], writeback_pats[1]);
1529 else if (*pair_dst >= *use->insn ())
1530 // We're re-ordering defs[0] below its use.
1531 update_debug_use (use, defs[0], writeback_pats[0]);
1535 // Now let's look at registers which are def'd by the second insn
1536 // but not by the first insn, there may still be debug uses of a
1537 // previous def which can be affected by moving the second insn up.
1538 for (auto def : insns[1]->defs ())
1540 // This should be M log N where N is the number of defs in
1541 // insns[0] and M is the number of defs in insns[1].
1542 if (def->is_mem () || find_access (insns[0]->defs (), def->regno ()))
1543 continue;
1545 auto prev_set = safe_dyn_cast<set_info *> (def->prev_def ());
1546 if (!prev_set)
1547 continue;
1549 rtx writeback_pat = NULL_RTX;
1550 if (def->regno () == base_regno && (writeback & 2))
1551 writeback_pat = orig_rtl[1];
1553 // We have a def in insns[1] which isn't def'd by the first insn.
1554 // Look to the previous def and see if it has any debug uses.
1555 for (auto use : iterate_safely (prev_set->debug_insn_uses ()))
1556 if (*pair_dst < *use->insn ())
1557 // We're ordering DEF above a previous use of the same register.
1558 update_debug_use (use, def, writeback_pat);
1561 if ((writeback & 2) && !writeback_effect)
1563 // If the second insn initially had writeback but the final
1564 // pair does not, then there may be trailing debug uses of the
1565 // second writeback def which need re-parenting: do that.
1566 auto def = find_access (insns[1]->defs (), base_regno);
1567 gcc_assert (def);
1568 auto set = as_a<set_info *> (def);
1569 for (auto use : iterate_safely (set->debug_insn_uses ()))
1571 insn_change change (use->insn ());
1572 change.new_uses = check_remove_regno_access (attempt,
1573 change.new_uses,
1574 base_regno);
1575 auto new_use = find_access (insns[0]->uses (), base_regno);
1577 // N.B. insns must have already shared a common base due to writeback.
1578 gcc_assert (new_use);
1580 if (dump_file)
1581 fprintf (dump_file,
1582 " i%d: cancelling wb, re-parenting trailing debug use\n",
1583 use->insn ()->uid ());
1585 change.new_uses = insert_access (attempt, new_use, change.new_uses);
1586 crtl->ssa->change_insn (change);
1589 else if (trailing_add)
1590 fixup_debug_uses_trailing_add (attempt, pair_dst, trailing_add,
1591 writeback_effect);
1594 // Try and actually fuse the pair given by insns I1 and I2.
1596 // Here we've done enough analysis to know this is safe, we only
1597 // reject the pair at this stage if either the tuning policy says to,
1598 // or recog fails on the final pair insn.
1600 // LOAD_P is true for loads, ACCESS_SIZE gives the access size of each
1601 // candidate insn. Bit i of WRITEBACK is set if the ith insn (in program
1602 // order) uses writeback.
1604 // BASE gives the chosen base candidate for the pair and MOVE_RANGE is
1605 // a singleton range which says where to place the pair.
1606 bool
1607 pair_fusion_bb_info::fuse_pair (bool load_p,
1608 unsigned access_size,
1609 int writeback,
1610 insn_info *i1, insn_info *i2,
1611 base_cand &base,
1612 const insn_range_info &move_range)
1614 auto attempt = crtl->ssa->new_change_attempt ();
1616 auto make_change = [&attempt](insn_info *insn)
1618 return crtl->ssa->change_alloc<insn_change> (attempt, insn);
1620 auto make_delete = [&attempt](insn_info *insn)
1622 return crtl->ssa->change_alloc<insn_change> (attempt,
1623 insn,
1624 insn_change::DELETE);
1627 insn_info *first = (*i1 < *i2) ? i1 : i2;
1628 insn_info *second = (first == i1) ? i2 : i1;
1630 insn_info *pair_dst = move_range.singleton ();
1631 gcc_assert (pair_dst);
1633 insn_info *insns[2] = { first, second };
1635 auto_vec<insn_change *> changes;
1636 auto_vec<int, 2> tombstone_uids (2);
1638 rtx pats[2] = {
1639 PATTERN (first->rtl ()),
1640 PATTERN (second->rtl ())
1643 // Make copies of the patterns as we might need to refer to the original RTL
1644 // later, for example when updating debug uses (which is after we've updated
1645 // one or both of the patterns in the candidate insns).
1646 rtx orig_rtl[2];
1647 for (int i = 0; i < 2; i++)
1648 orig_rtl[i] = copy_rtx (pats[i]);
1650 use_array input_uses[2] = { first->uses (), second->uses () };
1651 def_array input_defs[2] = { first->defs (), second->defs () };
1653 int changed_insn = -1;
1654 if (base.from_insn != -1)
1656 // If we're not already using a shared base, we need
1657 // to re-write one of the accesses to use the base from
1658 // the other insn.
1659 gcc_checking_assert (base.from_insn == 0 || base.from_insn == 1);
1660 changed_insn = !base.from_insn;
1662 rtx base_pat = pats[base.from_insn];
1663 rtx change_pat = pats[changed_insn];
1664 rtx base_mem = XEXP (base_pat, load_p);
1665 rtx change_mem = XEXP (change_pat, load_p);
1667 const bool lower_base_p = (insns[base.from_insn] == i1);
1668 HOST_WIDE_INT adjust_amt = access_size;
1669 if (!lower_base_p)
1670 adjust_amt *= -1;
1672 rtx change_reg = XEXP (change_pat, !load_p);
1673 rtx effective_base = drop_writeback (base_mem);
1674 rtx adjusted_addr = plus_constant (Pmode,
1675 XEXP (effective_base, 0),
1676 adjust_amt);
1677 rtx new_mem = replace_equiv_address_nv (change_mem, adjusted_addr);
1678 rtx new_set = load_p
1679 ? gen_rtx_SET (change_reg, new_mem)
1680 : gen_rtx_SET (new_mem, change_reg);
1682 pats[changed_insn] = new_set;
1684 auto keep_use = [&](use_info *u)
1686 return refers_to_regno_p (u->regno (), u->regno () + 1,
1687 change_pat, &XEXP (change_pat, load_p));
1690 // Drop any uses that only occur in the old address.
1691 input_uses[changed_insn] = filter_accesses (attempt,
1692 input_uses[changed_insn],
1693 keep_use);
1696 rtx writeback_effect = NULL_RTX;
1697 if (writeback)
1698 writeback_effect = extract_writebacks (load_p, pats, changed_insn);
1700 const auto base_regno = base.def->regno ();
1702 if (base.from_insn == -1 && (writeback & 1))
1704 // If the first of the candidate insns had a writeback form, we'll need to
1705 // drop the use of the updated base register from the second insn's uses.
1707 // N.B. we needn't worry about the base register occurring as a store
1708 // operand, as we checked that there was no non-address true dependence
1709 // between the insns in try_fuse_pair.
1710 gcc_checking_assert (find_access (input_uses[1], base_regno));
1711 input_uses[1] = check_remove_regno_access (attempt,
1712 input_uses[1],
1713 base_regno);
1716 // Go through and drop uses that only occur in register notes,
1717 // as we won't be preserving those.
1718 for (int i = 0; i < 2; i++)
1720 auto rti = insns[i]->rtl ();
1721 if (!REG_NOTES (rti))
1722 continue;
1724 input_uses[i] = remove_note_accesses (attempt, input_uses[i]);
1727 // Edge case: if the first insn is a writeback load and the
1728 // second insn is a non-writeback load which transfers into the base
1729 // register, then we should drop the writeback altogether as the
1730 // update of the base register from the second load should prevail.
1732 // For example:
1733 // ldr x2, [x1], #8
1734 // ldr x1, [x1]
1735 // -->
1736 // ldp x2, x1, [x1]
1737 if (writeback == 1
1738 && load_p
1739 && find_access (input_defs[1], base_regno))
1741 if (dump_file)
1742 fprintf (dump_file,
1743 " load pair: i%d has wb but subsequent i%d has non-wb "
1744 "update of base (r%d), dropping wb\n",
1745 insns[0]->uid (), insns[1]->uid (), base_regno);
1746 gcc_assert (writeback_effect);
1747 writeback_effect = NULL_RTX;
1750 // So far the patterns have been in instruction order,
1751 // now we want them in offset order.
1752 if (i1 != first)
1753 std::swap (pats[0], pats[1]);
1755 poly_int64 offsets[2];
1756 for (int i = 0; i < 2; i++)
1758 rtx mem = XEXP (pats[i], load_p);
1759 gcc_checking_assert (MEM_P (mem));
1760 rtx base = strip_offset (XEXP (mem, 0), offsets + i);
1761 gcc_checking_assert (REG_P (base));
1762 gcc_checking_assert (base_regno == REGNO (base));
1765 // If either of the original insns had writeback, but the resulting pair insn
1766 // does not (can happen e.g. in the load pair edge case above, or if the
1767 // writeback effects cancel out), then drop the def (s) of the base register
1768 // as appropriate.
1770 // Also drop the first def in the case that both of the original insns had
1771 // writeback. The second def could well have uses, but the first def should
1772 // only be used by the second insn (and we dropped that use above).
1773 for (int i = 0; i < 2; i++)
1774 if ((!writeback_effect && (writeback & (1 << i)))
1775 || (i == 0 && writeback == 3))
1776 input_defs[i] = check_remove_regno_access (attempt,
1777 input_defs[i],
1778 base_regno);
1780 // If we don't currently have a writeback pair, and we don't have
1781 // a load that clobbers the base register, look for a trailing destructive
1782 // update of the base register and try and fold it in to make this into a
1783 // writeback pair.
1784 insn_info *trailing_add = nullptr;
1785 if (m_pass->should_handle_writeback (writeback_type::ALL)
1786 && !writeback_effect
1787 && (!load_p || (!refers_to_regno_p (base_regno, base_regno + 1,
1788 XEXP (pats[0], 0), nullptr)
1789 && !refers_to_regno_p (base_regno, base_regno + 1,
1790 XEXP (pats[1], 0), nullptr))))
1792 def_info *add_def;
1793 trailing_add = m_pass->find_trailing_add (insns, move_range, writeback,
1794 &writeback_effect,
1795 &add_def, base.def, offsets[0],
1796 access_size);
1797 if (trailing_add)
1799 // The def of the base register from the trailing add should prevail.
1800 input_defs[0] = insert_access (attempt, add_def, input_defs[0]);
1801 gcc_assert (input_defs[0].is_valid ());
1805 // Now that we know what base mem we're going to use, check if it's OK
1806 // with the pair mem policy.
1807 rtx first_mem = XEXP (pats[0], load_p);
1808 if (!m_pass->pair_mem_ok_with_policy (first_mem, load_p))
1810 if (dump_file)
1811 fprintf (dump_file,
1812 "punting on pair (%d,%d), pair mem policy says no\n",
1813 i1->uid (), i2->uid ());
1814 return false;
1817 rtx reg_notes = combine_reg_notes (first, second, load_p);
1819 rtx pair_pat = m_pass->gen_pair (pats, writeback_effect, load_p);
1820 insn_change *pair_change = nullptr;
1821 auto set_pair_pat = [pair_pat,reg_notes](insn_change *change) {
1822 rtx_insn *rti = change->insn ()->rtl ();
1823 validate_unshare_change (rti, &PATTERN (rti), pair_pat, true);
1824 validate_change (rti, &REG_NOTES (rti), reg_notes, true);
1827 if (load_p)
1829 changes.safe_push (make_delete (first));
1830 pair_change = make_change (second);
1831 changes.safe_push (pair_change);
1833 pair_change->move_range = move_range;
1834 pair_change->new_defs = merge_access_arrays (attempt,
1835 input_defs[0],
1836 input_defs[1]);
1837 gcc_assert (pair_change->new_defs.is_valid ());
1839 pair_change->new_uses
1840 = merge_access_arrays (attempt,
1841 drop_memory_access (input_uses[0]),
1842 drop_memory_access (input_uses[1]));
1843 gcc_assert (pair_change->new_uses.is_valid ());
1844 set_pair_pat (pair_change);
1846 else
1848 using Action = store_change_builder::action;
1849 insn_info *store_to_change = try_repurpose_store (first, second,
1850 move_range);
1851 store_change_builder builder (insns, store_to_change, pair_dst);
1852 insn_change *change;
1853 set_info *new_set = nullptr;
1854 for (; !builder.done (); builder.advance ())
1856 auto action = builder.get_change ();
1857 change = (action.type == Action::INSERT)
1858 ? nullptr : make_change (action.insn);
1859 switch (action.type)
1861 case Action::CHANGE:
1863 set_pair_pat (change);
1864 change->new_uses = merge_access_arrays (attempt,
1865 input_uses[0],
1866 input_uses[1]);
1867 auto d1 = drop_memory_access (input_defs[0]);
1868 auto d2 = drop_memory_access (input_defs[1]);
1869 change->new_defs = merge_access_arrays (attempt, d1, d2);
1870 gcc_assert (change->new_defs.is_valid ());
1871 def_info *store_def = memory_access (change->insn ()->defs ());
1872 change->new_defs = insert_access (attempt,
1873 store_def,
1874 change->new_defs);
1875 gcc_assert (change->new_defs.is_valid ());
1876 change->move_range = move_range;
1877 pair_change = change;
1878 break;
1880 case Action::TOMBSTONE:
1882 tombstone_uids.quick_push (change->insn ()->uid ());
1883 rtx_insn *rti = change->insn ()->rtl ();
1884 validate_change (rti, &PATTERN (rti), gen_tombstone (), true);
1885 validate_change (rti, &REG_NOTES (rti), NULL_RTX, true);
1886 change->new_uses = use_array (nullptr, 0);
1887 break;
1889 case Action::INSERT:
1891 if (dump_file)
1892 fprintf (dump_file,
1893 " stp: cannot re-purpose candidate stores\n");
1895 auto new_insn = crtl->ssa->create_insn (attempt, INSN, pair_pat);
1896 change = make_change (new_insn);
1897 change->move_range = move_range;
1898 change->new_uses = merge_access_arrays (attempt,
1899 input_uses[0],
1900 input_uses[1]);
1901 gcc_assert (change->new_uses.is_valid ());
1903 auto d1 = drop_memory_access (input_defs[0]);
1904 auto d2 = drop_memory_access (input_defs[1]);
1905 change->new_defs = merge_access_arrays (attempt, d1, d2);
1906 gcc_assert (change->new_defs.is_valid ());
1908 new_set = crtl->ssa->create_set (attempt, new_insn, memory);
1909 change->new_defs = insert_access (attempt, new_set,
1910 change->new_defs);
1911 gcc_assert (change->new_defs.is_valid ());
1912 pair_change = change;
1913 break;
1915 case Action::FIXUP_USE:
1917 // This use now needs to consume memory from our stp.
1918 if (dump_file)
1919 fprintf (dump_file,
1920 " stp: changing i%d to use mem from new stp "
1921 "(after i%d)\n",
1922 action.insn->uid (), pair_dst->uid ());
1923 change->new_uses = drop_memory_access (change->new_uses);
1924 gcc_assert (new_set);
1925 auto new_use = crtl->ssa->create_use (attempt, action.insn,
1926 new_set);
1927 change->new_uses = insert_access (attempt, new_use,
1928 change->new_uses);
1929 break;
1932 changes.safe_push (change);
1936 if (trailing_add)
1937 changes.safe_push (make_delete (trailing_add));
1938 else if ((writeback & 2) && !writeback_effect)
1940 // The second insn initially had writeback but now the pair does not,
1941 // need to update any nondebug uses of the base register def in the
1942 // second insn. We'll take care of debug uses later.
1943 auto def = find_access (insns[1]->defs (), base_regno);
1944 gcc_assert (def);
1945 auto set = dyn_cast<set_info *> (def);
1946 if (set && set->has_nondebug_uses ())
1948 auto orig_use = find_access (insns[0]->uses (), base_regno);
1949 for (auto use : set->nondebug_insn_uses ())
1951 auto change = make_change (use->insn ());
1952 change->new_uses = check_remove_regno_access (attempt,
1953 change->new_uses,
1954 base_regno);
1955 change->new_uses = insert_access (attempt,
1956 orig_use,
1957 change->new_uses);
1958 changes.safe_push (change);
1963 auto ignore = ignore_changing_insns (changes);
1964 for (unsigned i = 0; i < changes.length (); i++)
1965 gcc_assert (rtl_ssa::restrict_movement (*changes[i], ignore));
1967 // Check the pair pattern is recog'd.
1968 if (!rtl_ssa::recog (attempt, *pair_change, ignore))
1970 if (dump_file)
1971 fprintf (dump_file, " failed to form pair, recog failed\n");
1973 // Free any reg notes we allocated.
1974 while (reg_notes)
1976 rtx next = XEXP (reg_notes, 1);
1977 free_EXPR_LIST_node (reg_notes);
1978 reg_notes = next;
1980 cancel_changes (0);
1981 return false;
1984 gcc_assert (crtl->ssa->verify_insn_changes (changes));
1986 // Fix up any debug uses that will be affected by the changes.
1987 if (MAY_HAVE_DEBUG_INSNS)
1988 fixup_debug_uses (attempt, insns, orig_rtl, pair_dst, trailing_add,
1989 load_p, writeback, writeback_effect, base_regno);
1991 confirm_change_group ();
1992 crtl->ssa->change_insns (changes);
1994 gcc_checking_assert (tombstone_uids.length () <= 2);
1995 for (auto uid : tombstone_uids)
1996 track_tombstone (uid);
1998 return true;
2001 // Return true if STORE_INSN may modify mem rtx MEM. Make sure we keep
2002 // within our BUDGET for alias analysis.
2003 static bool
2004 store_modifies_mem_p (rtx mem, insn_info *store_insn, int &budget)
2006 if (!budget)
2008 if (dump_file)
2010 fprintf (dump_file,
2011 "exceeded budget, assuming store %d aliases with mem ",
2012 store_insn->uid ());
2013 print_simple_rtl (dump_file, mem);
2014 fprintf (dump_file, "\n");
2017 return true;
2020 budget--;
2021 return memory_modified_in_insn_p (mem, store_insn->rtl ());
2024 // Return true if LOAD may be modified by STORE. Make sure we keep
2025 // within our BUDGET for alias analysis.
2026 static bool
2027 load_modified_by_store_p (insn_info *load,
2028 insn_info *store,
2029 int &budget)
2031 gcc_checking_assert (budget >= 0);
2033 if (!budget)
2035 if (dump_file)
2037 fprintf (dump_file,
2038 "exceeded budget, assuming load %d aliases with store %d\n",
2039 load->uid (), store->uid ());
2041 return true;
2044 // It isn't safe to re-order stores over calls.
2045 if (CALL_P (load->rtl ()))
2046 return true;
2048 budget--;
2050 // Iterate over all MEMs in the load, seeing if any alias with
2051 // our store.
2052 subrtx_var_iterator::array_type array;
2053 rtx pat = PATTERN (load->rtl ());
2054 FOR_EACH_SUBRTX_VAR (iter, array, pat, NONCONST)
2055 if (MEM_P (*iter) && memory_modified_in_insn_p (*iter, store->rtl ()))
2056 return true;
2058 return false;
2061 // Implement some common functionality used by both store_walker
2062 // and load_walker.
2063 template<bool reverse>
2064 class def_walker : public alias_walker
2066 protected:
2067 using def_iter_t = typename std::conditional<reverse,
2068 reverse_def_iterator, def_iterator>::type;
2070 static use_info *start_use_chain (def_iter_t &def_iter)
2072 set_info *set = nullptr;
2073 for (; *def_iter; def_iter++)
2075 set = dyn_cast<set_info *> (*def_iter);
2076 if (!set)
2077 continue;
2079 use_info *use = reverse
2080 ? set->last_nondebug_insn_use ()
2081 : set->first_nondebug_insn_use ();
2083 if (use)
2084 return use;
2087 return nullptr;
2090 def_iter_t def_iter;
2091 insn_info *limit;
2092 def_walker (def_info *def, insn_info *limit) :
2093 def_iter (def), limit (limit) {}
2095 virtual bool iter_valid () const { return *def_iter; }
2097 public:
2098 insn_info *insn () const override { return (*def_iter)->insn (); }
2099 void advance () override { def_iter++; }
2100 bool valid () const override final
2102 if (!iter_valid ())
2103 return false;
2105 if (reverse)
2106 return *(insn ()) > *limit;
2107 else
2108 return *(insn ()) < *limit;
2112 // alias_walker that iterates over stores.
2113 template<bool reverse, typename InsnPredicate>
2114 class store_walker : public def_walker<reverse>
2116 rtx cand_mem;
2117 InsnPredicate tombstone_p;
2119 public:
2120 store_walker (def_info *mem_def, rtx mem, insn_info *limit_insn,
2121 InsnPredicate tombstone_fn) :
2122 def_walker<reverse> (mem_def, limit_insn),
2123 cand_mem (mem), tombstone_p (tombstone_fn) {}
2125 bool conflict_p (int &budget) const override final
2127 if (tombstone_p (this->insn ()))
2128 return false;
2130 return store_modifies_mem_p (cand_mem, this->insn (), budget);
2134 // alias_walker that iterates over loads.
2135 template<bool reverse>
2136 class load_walker : public def_walker<reverse>
2138 using Base = def_walker<reverse>;
2139 using use_iter_t = typename std::conditional<reverse,
2140 reverse_use_iterator, nondebug_insn_use_iterator>::type;
2142 use_iter_t use_iter;
2143 insn_info *cand_store;
2145 bool iter_valid () const override final { return *use_iter; }
2147 public:
2148 void advance () override final
2150 use_iter++;
2151 if (*use_iter)
2152 return;
2153 this->def_iter++;
2154 use_iter = Base::start_use_chain (this->def_iter);
2157 insn_info *insn () const override final
2159 return (*use_iter)->insn ();
2162 bool conflict_p (int &budget) const override final
2164 return load_modified_by_store_p (insn (), cand_store, budget);
2167 load_walker (def_info *def, insn_info *store, insn_info *limit_insn)
2168 : Base (def, limit_insn),
2169 use_iter (Base::start_use_chain (this->def_iter)),
2170 cand_store (store) {}
2173 // Process our alias_walkers in a round-robin fashion, proceeding until
2174 // nothing more can be learned from alias analysis.
2176 // We try to maintain the invariant that if a walker becomes invalid, we
2177 // set its pointer to null.
2178 void
2179 pair_fusion::do_alias_analysis (insn_info *alias_hazards[4],
2180 alias_walker *walkers[4],
2181 bool load_p)
2183 const int n_walkers = 2 + (2 * !load_p);
2184 int budget = pair_mem_alias_check_limit ();
2186 auto next_walker = [walkers,n_walkers](int current) -> int {
2187 for (int j = 1; j <= n_walkers; j++)
2189 int idx = (current + j) % n_walkers;
2190 if (walkers[idx])
2191 return idx;
2193 return -1;
2196 int i = -1;
2197 for (int j = 0; j < n_walkers; j++)
2199 alias_hazards[j] = nullptr;
2200 if (!walkers[j])
2201 continue;
2203 if (!walkers[j]->valid ())
2204 walkers[j] = nullptr;
2205 else if (i == -1)
2206 i = j;
2209 while (i >= 0)
2211 int insn_i = i % 2;
2212 int paired_i = (i & 2) + !insn_i;
2213 int pair_fst = (i & 2);
2214 int pair_snd = (i & 2) + 1;
2216 if (walkers[i]->conflict_p (budget))
2218 alias_hazards[i] = walkers[i]->insn ();
2220 // We got an aliasing conflict for this {load,store} walker,
2221 // so we don't need to walk any further.
2222 walkers[i] = nullptr;
2224 // If we have a pair of alias conflicts that prevent
2225 // forming the pair, stop. There's no need to do further
2226 // analysis.
2227 if (alias_hazards[paired_i]
2228 && (*alias_hazards[pair_fst] <= *alias_hazards[pair_snd]))
2229 return;
2231 if (!load_p)
2233 int other_pair_fst = (pair_fst ? 0 : 2);
2234 int other_paired_i = other_pair_fst + !insn_i;
2236 int x_pair_fst = (i == pair_fst) ? i : other_paired_i;
2237 int x_pair_snd = (i == pair_fst) ? other_paired_i : i;
2239 // Similarly, handle the case where we have a {load,store}
2240 // or {store,load} alias hazard pair that prevents forming
2241 // the pair.
2242 if (alias_hazards[other_paired_i]
2243 && *alias_hazards[x_pair_fst] <= *alias_hazards[x_pair_snd])
2244 return;
2248 if (walkers[i])
2250 walkers[i]->advance ();
2252 if (!walkers[i]->valid ())
2253 walkers[i] = nullptr;
2256 i = next_walker (i);
2260 // Given INSNS (in program order) which are known to be adjacent, look
2261 // to see if either insn has a suitable RTL (register) base that we can
2262 // use to form a pair. Push these to BASE_CANDS if we find any. CAND_MEMs
2263 // gives the relevant mems from the candidate insns, ACCESS_SIZE gives the
2264 // size of a single candidate access, and REVERSED says whether the accesses
2265 // are inverted in offset order.
2267 // Returns an integer where bit (1 << i) is set if INSNS[i] uses writeback
2268 // addressing.
2270 pair_fusion::get_viable_bases (insn_info *insns[2],
2271 vec<base_cand> &base_cands,
2272 rtx cand_mems[2],
2273 unsigned access_size,
2274 bool reversed)
2276 // We discovered this pair through a common base. Need to ensure that
2277 // we have a common base register that is live at both locations.
2278 def_info *base_defs[2] = {};
2279 int writeback = 0;
2280 for (int i = 0; i < 2; i++)
2282 const bool is_lower = (i == reversed);
2283 poly_int64 poly_off;
2284 rtx base = pair_mem_strip_offset (cand_mems[i], &poly_off);
2285 if (GET_RTX_CLASS (GET_CODE (XEXP (cand_mems[i], 0))) == RTX_AUTOINC)
2286 writeback |= (1 << i);
2288 if (!REG_P (base) || !poly_off.is_constant ())
2289 continue;
2291 // Punt on accesses relative to eliminable regs. See the comment in
2292 // pair_fusion_bb_info::track_access for a detailed explanation of this.
2293 if (!reload_completed
2294 && (REGNO (base) == FRAME_POINTER_REGNUM
2295 || REGNO (base) == ARG_POINTER_REGNUM))
2296 continue;
2298 HOST_WIDE_INT base_off = poly_off.to_constant ();
2300 // It should be unlikely that we ever punt here, since MEM_EXPR offset
2301 // alignment should be a good proxy for register offset alignment.
2302 if (base_off % access_size != 0)
2304 if (dump_file)
2305 fprintf (dump_file,
2306 "base not viable, offset misaligned (insn %d)\n",
2307 insns[i]->uid ());
2308 continue;
2311 base_off /= access_size;
2313 if (!is_lower)
2314 base_off--;
2316 if (!pair_mem_in_range_p (base_off))
2317 continue;
2319 use_info *use = find_access (insns[i]->uses (), REGNO (base));
2320 gcc_assert (use);
2321 base_defs[i] = use->def ();
2324 if (!base_defs[0] && !base_defs[1])
2326 if (dump_file)
2327 fprintf (dump_file, "no viable base register for pair (%d,%d)\n",
2328 insns[0]->uid (), insns[1]->uid ());
2329 return writeback;
2332 for (int i = 0; i < 2; i++)
2333 if ((writeback & (1 << i)) && !base_defs[i])
2335 if (dump_file)
2336 fprintf (dump_file, "insn %d has writeback but base isn't viable\n",
2337 insns[i]->uid ());
2338 return writeback;
2341 if (writeback == 3
2342 && base_defs[0]->regno () != base_defs[1]->regno ())
2344 if (dump_file)
2345 fprintf (dump_file,
2346 "pair (%d,%d): double writeback with distinct regs (%d,%d): "
2347 "punting\n",
2348 insns[0]->uid (), insns[1]->uid (),
2349 base_defs[0]->regno (), base_defs[1]->regno ());
2350 return writeback;
2353 if (base_defs[0] && base_defs[1]
2354 && base_defs[0]->regno () == base_defs[1]->regno ())
2356 // Easy case: insns already share the same base reg.
2357 base_cands.quick_push (base_defs[0]);
2358 return writeback;
2361 // Otherwise, we know that one of the bases must change.
2363 // Note that if there is writeback we must use the writeback base
2364 // (we know now there is exactly one).
2365 for (int i = 0; i < 2; i++)
2366 if (base_defs[i] && (!writeback || (writeback & (1 << i))))
2367 base_cands.quick_push (base_cand { base_defs[i], i });
2369 return writeback;
2372 // Given two adjacent memory accesses of the same size, I1 and I2, try
2373 // and see if we can merge them into a paired access.
2375 // ACCESS_SIZE gives the (common) size of a single access, LOAD_P is true
2376 // if the accesses are both loads, otherwise they are both stores.
2377 bool
2378 pair_fusion_bb_info::try_fuse_pair (bool load_p, unsigned access_size,
2379 insn_info *i1, insn_info *i2)
2381 if (dump_file)
2382 fprintf (dump_file, "analyzing pair (load=%d): (%d,%d)\n",
2383 load_p, i1->uid (), i2->uid ());
2385 insn_info *insns[2];
2386 bool reversed = false;
2387 if (*i1 < *i2)
2389 insns[0] = i1;
2390 insns[1] = i2;
2392 else
2394 insns[0] = i2;
2395 insns[1] = i1;
2396 reversed = true;
2399 rtx cand_mems[2];
2400 rtx reg_ops[2];
2401 rtx pats[2];
2402 for (int i = 0; i < 2; i++)
2404 pats[i] = PATTERN (insns[i]->rtl ());
2405 cand_mems[i] = XEXP (pats[i], load_p);
2406 reg_ops[i] = XEXP (pats[i], !load_p);
2409 if (load_p && reg_overlap_mentioned_p (reg_ops[0], reg_ops[1]))
2411 if (dump_file)
2412 fprintf (dump_file,
2413 "punting on load pair due to reg conflcits (%d,%d)\n",
2414 insns[0]->uid (), insns[1]->uid ());
2415 return false;
2418 if (cfun->can_throw_non_call_exceptions
2419 && find_reg_note (insns[0]->rtl (), REG_EH_REGION, NULL_RTX)
2420 && find_reg_note (insns[1]->rtl (), REG_EH_REGION, NULL_RTX))
2422 if (dump_file)
2423 fprintf (dump_file,
2424 "can't combine insns with EH side effects (%d,%d)\n",
2425 insns[0]->uid (), insns[1]->uid ());
2426 return false;
2429 auto_vec<base_cand, 2> base_cands (2);
2431 int writeback = m_pass->get_viable_bases (insns, base_cands, cand_mems,
2432 access_size, reversed);
2433 if (base_cands.is_empty ())
2435 if (dump_file)
2436 fprintf (dump_file, "no viable base for pair (%d,%d)\n",
2437 insns[0]->uid (), insns[1]->uid ());
2438 return false;
2441 // Punt on frame-related insns with writeback. We probably won't see
2442 // these in practice, but this is conservative and ensures we don't
2443 // have to worry about these later on.
2444 if (writeback && (RTX_FRAME_RELATED_P (i1->rtl ())
2445 || RTX_FRAME_RELATED_P (i2->rtl ())))
2447 if (dump_file)
2448 fprintf (dump_file,
2449 "rejecting pair (%d,%d): frame-related insn with writeback\n",
2450 i1->uid (), i2->uid ());
2451 return false;
2454 rtx *ignore = &XEXP (pats[1], load_p);
2455 for (auto use : insns[1]->uses ())
2456 if (!use->is_mem ()
2457 && refers_to_regno_p (use->regno (), use->regno () + 1, pats[1], ignore)
2458 && use->def () && use->def ()->insn () == insns[0])
2460 // N.B. we allow a true dependence on the base address, as this
2461 // happens in the case of auto-inc accesses. Consider a post-increment
2462 // load followed by a regular indexed load, for example.
2463 if (dump_file)
2464 fprintf (dump_file,
2465 "%d has non-address true dependence on %d, rejecting pair\n",
2466 insns[1]->uid (), insns[0]->uid ());
2467 return false;
2470 unsigned i = 0;
2471 while (i < base_cands.length ())
2473 base_cand &cand = base_cands[i];
2475 rtx *ignore[2] = {};
2476 for (int j = 0; j < 2; j++)
2477 if (cand.from_insn == !j)
2478 ignore[j] = &XEXP (cand_mems[j], 0);
2480 insn_info *h = first_hazard_after (insns[0], ignore[0]);
2481 if (h && *h < *insns[1])
2482 cand.hazards[0] = h;
2484 h = latest_hazard_before (insns[1], ignore[1]);
2485 if (h && *h > *insns[0])
2486 cand.hazards[1] = h;
2488 if (!cand.viable ())
2490 if (dump_file)
2491 fprintf (dump_file,
2492 "pair (%d,%d): rejecting base %d due to dataflow "
2493 "hazards (%d,%d)\n",
2494 insns[0]->uid (),
2495 insns[1]->uid (),
2496 cand.def->regno (),
2497 cand.hazards[0]->uid (),
2498 cand.hazards[1]->uid ());
2500 base_cands.ordered_remove (i);
2502 else
2503 i++;
2506 if (base_cands.is_empty ())
2508 if (dump_file)
2509 fprintf (dump_file,
2510 "can't form pair (%d,%d) due to dataflow hazards\n",
2511 insns[0]->uid (), insns[1]->uid ());
2512 return false;
2515 insn_info *alias_hazards[4] = {};
2517 // First def of memory after the first insn, and last def of memory
2518 // before the second insn, respectively.
2519 def_info *mem_defs[2] = {};
2520 if (load_p)
2522 if (!MEM_READONLY_P (cand_mems[0]))
2524 mem_defs[0] = memory_access (insns[0]->uses ())->def ();
2525 gcc_checking_assert (mem_defs[0]);
2526 mem_defs[0] = mem_defs[0]->next_def ();
2528 if (!MEM_READONLY_P (cand_mems[1]))
2530 mem_defs[1] = memory_access (insns[1]->uses ())->def ();
2531 gcc_checking_assert (mem_defs[1]);
2534 else
2536 mem_defs[0] = memory_access (insns[0]->defs ())->next_def ();
2537 mem_defs[1] = memory_access (insns[1]->defs ())->prev_def ();
2538 gcc_checking_assert (mem_defs[0]);
2539 gcc_checking_assert (mem_defs[1]);
2542 auto tombstone_p = [&](insn_info *insn) -> bool {
2543 return m_emitted_tombstone
2544 && bitmap_bit_p (&m_tombstone_bitmap, insn->uid ());
2547 store_walker<false, decltype(tombstone_p)>
2548 forward_store_walker (mem_defs[0], cand_mems[0], insns[1], tombstone_p);
2550 store_walker<true, decltype(tombstone_p)>
2551 backward_store_walker (mem_defs[1], cand_mems[1], insns[0], tombstone_p);
2553 alias_walker *walkers[4] = {};
2554 if (mem_defs[0])
2555 walkers[0] = &forward_store_walker;
2556 if (mem_defs[1])
2557 walkers[1] = &backward_store_walker;
2559 if (load_p && (mem_defs[0] || mem_defs[1]))
2560 m_pass->do_alias_analysis (alias_hazards, walkers, load_p);
2561 else
2563 // We want to find any loads hanging off the first store.
2564 mem_defs[0] = memory_access (insns[0]->defs ());
2565 load_walker<false> forward_load_walker (mem_defs[0], insns[0], insns[1]);
2566 load_walker<true> backward_load_walker (mem_defs[1], insns[1], insns[0]);
2567 walkers[2] = &forward_load_walker;
2568 walkers[3] = &backward_load_walker;
2569 m_pass->do_alias_analysis (alias_hazards, walkers, load_p);
2570 // Now consolidate hazards back down.
2571 if (alias_hazards[2]
2572 && (!alias_hazards[0] || (*alias_hazards[2] < *alias_hazards[0])))
2573 alias_hazards[0] = alias_hazards[2];
2575 if (alias_hazards[3]
2576 && (!alias_hazards[1] || (*alias_hazards[3] > *alias_hazards[1])))
2577 alias_hazards[1] = alias_hazards[3];
2580 if (alias_hazards[0] && alias_hazards[1]
2581 && *alias_hazards[0] <= *alias_hazards[1])
2583 if (dump_file)
2584 fprintf (dump_file,
2585 "cannot form pair (%d,%d) due to alias conflicts (%d,%d)\n",
2586 i1->uid (), i2->uid (),
2587 alias_hazards[0]->uid (), alias_hazards[1]->uid ());
2588 return false;
2591 // Now narrow the hazards on each base candidate using
2592 // the alias hazards.
2593 i = 0;
2594 while (i < base_cands.length ())
2596 base_cand &cand = base_cands[i];
2597 if (alias_hazards[0] && (!cand.hazards[0]
2598 || *alias_hazards[0] < *cand.hazards[0]))
2599 cand.hazards[0] = alias_hazards[0];
2600 if (alias_hazards[1] && (!cand.hazards[1]
2601 || *alias_hazards[1] > *cand.hazards[1]))
2602 cand.hazards[1] = alias_hazards[1];
2604 if (cand.viable ())
2605 i++;
2606 else
2608 if (dump_file)
2609 fprintf (dump_file, "pair (%d,%d): rejecting base %d due to "
2610 "alias/dataflow hazards (%d,%d)",
2611 insns[0]->uid (), insns[1]->uid (),
2612 cand.def->regno (),
2613 cand.hazards[0]->uid (),
2614 cand.hazards[1]->uid ());
2616 base_cands.ordered_remove (i);
2620 if (base_cands.is_empty ())
2622 if (dump_file)
2623 fprintf (dump_file,
2624 "cannot form pair (%d,%d) due to alias/dataflow hazards",
2625 insns[0]->uid (), insns[1]->uid ());
2627 return false;
2630 base_cand *base = &base_cands[0];
2631 if (base_cands.length () > 1)
2633 // If there are still multiple viable bases, it makes sense
2634 // to choose one that allows us to reduce register pressure,
2635 // for loads this means moving further down, for stores this
2636 // means moving further up.
2637 gcc_checking_assert (base_cands.length () == 2);
2638 const int hazard_i = !load_p;
2639 if (base->hazards[hazard_i])
2641 if (!base_cands[1].hazards[hazard_i])
2642 base = &base_cands[1];
2643 else if (load_p
2644 && *base_cands[1].hazards[hazard_i]
2645 > *(base->hazards[hazard_i]))
2646 base = &base_cands[1];
2647 else if (!load_p
2648 && *base_cands[1].hazards[hazard_i]
2649 < *(base->hazards[hazard_i]))
2650 base = &base_cands[1];
2654 // Otherwise, hazards[0] > hazards[1].
2655 // Pair can be formed anywhere in (hazards[1], hazards[0]).
2656 insn_range_info range (insns[0], insns[1]);
2657 if (base->hazards[1])
2658 range.first = base->hazards[1];
2659 if (base->hazards[0])
2660 range.last = base->hazards[0]->prev_nondebug_insn ();
2662 // If the second insn can throw, narrow the move range to exactly that insn.
2663 // This prevents us trying to move the second insn from the end of the BB.
2664 if (cfun->can_throw_non_call_exceptions
2665 && find_reg_note (insns[1]->rtl (), REG_EH_REGION, NULL_RTX))
2667 gcc_assert (range.includes (insns[1]));
2668 range = insn_range_info (insns[1]);
2671 // Placement strategy: push loads down and pull stores up, this should
2672 // help register pressure by reducing live ranges.
2673 if (load_p)
2674 range.first = range.last;
2675 else
2676 range.last = range.first;
2678 if (dump_file)
2680 auto print_hazard = [](insn_info *i)
2682 if (i)
2683 fprintf (dump_file, "%d", i->uid ());
2684 else
2685 fprintf (dump_file, "-");
2687 auto print_pair = [print_hazard](insn_info **i)
2689 print_hazard (i[0]);
2690 fprintf (dump_file, ",");
2691 print_hazard (i[1]);
2694 fprintf (dump_file, "fusing pair [L=%d] (%d,%d), base=%d, hazards: (",
2695 load_p, insns[0]->uid (), insns[1]->uid (),
2696 base->def->regno ());
2697 print_pair (base->hazards);
2698 fprintf (dump_file, "), move_range: (%d,%d)\n",
2699 range.first->uid (), range.last->uid ());
2702 return fuse_pair (load_p, access_size, writeback,
2703 i1, i2, *base, range);
2706 static void
2707 dump_insn_list (FILE *f, const insn_list_t &l)
2709 fprintf (f, "(");
2711 auto i = l.begin ();
2712 auto end = l.end ();
2714 if (i != end)
2715 fprintf (f, "%d", (*i)->uid ());
2716 i++;
2718 for (; i != end; i++)
2719 fprintf (f, ", %d", (*i)->uid ());
2721 fprintf (f, ")");
2724 DEBUG_FUNCTION void
2725 debug (const insn_list_t &l)
2727 dump_insn_list (stderr, l);
2728 fprintf (stderr, "\n");
2731 // LEFT_LIST and RIGHT_LIST are lists of candidate instructions where all insns
2732 // in LEFT_LIST are known to be adjacent to those in RIGHT_LIST.
2734 // This function traverses the resulting 2D matrix of possible pair candidates
2735 // and attempts to merge them into pairs.
2737 // The algorithm is straightforward: if we consider a combined list of
2738 // candidates X obtained by merging LEFT_LIST and RIGHT_LIST in program order,
2739 // then we advance through X until we reach a crossing point (where X[i] and
2740 // X[i+1] come from different source lists).
2742 // At this point we know X[i] and X[i+1] are adjacent accesses, and we try to
2743 // fuse them into a pair. If this succeeds, we remove X[i] and X[i+1] from
2744 // their original lists and continue as above.
2746 // In the failure case, we advance through the source list containing X[i] and
2747 // continue as above (proceeding to the next crossing point).
2749 // The rationale for skipping over groups of consecutive candidates from the
2750 // same source list is as follows:
2752 // In the store case, the insns in the group can't be re-ordered over each
2753 // other as they are guaranteed to store to the same location, so we're
2754 // guaranteed not to lose opportunities by doing this.
2756 // In the load case, subsequent loads from the same location are either
2757 // redundant (in which case they should have been cleaned up by an earlier
2758 // optimization pass) or there is an intervening aliasing hazard, in which case
2759 // we can't re-order them anyway, so provided earlier passes have cleaned up
2760 // redundant loads, we shouldn't miss opportunities by doing this.
2761 void
2762 pair_fusion_bb_info::merge_pairs (insn_list_t &left_list,
2763 insn_list_t &right_list,
2764 bool load_p,
2765 unsigned access_size)
2767 if (dump_file)
2769 fprintf (dump_file, "merge_pairs [L=%d], cand vecs ", load_p);
2770 dump_insn_list (dump_file, left_list);
2771 fprintf (dump_file, " x ");
2772 dump_insn_list (dump_file, right_list);
2773 fprintf (dump_file, "\n");
2776 auto iter_l = left_list.begin ();
2777 auto iter_r = right_list.begin ();
2779 while (iter_l != left_list.end () && iter_r != right_list.end ())
2781 auto next_l = std::next (iter_l);
2782 auto next_r = std::next (iter_r);
2783 if (**iter_l < **iter_r
2784 && next_l != left_list.end ()
2785 && **next_l < **iter_r)
2786 iter_l = next_l;
2787 else if (**iter_r < **iter_l
2788 && next_r != right_list.end ()
2789 && **next_r < **iter_l)
2790 iter_r = next_r;
2791 else if (try_fuse_pair (load_p, access_size, *iter_l, *iter_r))
2793 left_list.erase (iter_l);
2794 iter_l = next_l;
2795 right_list.erase (iter_r);
2796 iter_r = next_r;
2798 else if (**iter_l < **iter_r)
2799 iter_l = next_l;
2800 else
2801 iter_r = next_r;
2805 // Iterate over the accesses in GROUP, looking for adjacent sets
2806 // of accesses. If we find two sets of adjacent accesses, call
2807 // merge_pairs.
2808 void
2809 pair_fusion_bb_info::transform_for_base (int encoded_lfs,
2810 access_group &group)
2812 const auto lfs = decode_lfs (encoded_lfs);
2813 const unsigned access_size = lfs.size;
2815 bool skip_next = true;
2816 access_record *prev_access = nullptr;
2818 for (auto &access : group.list)
2820 if (skip_next)
2821 skip_next = false;
2822 else if (known_eq (access.offset, prev_access->offset + access_size))
2824 merge_pairs (prev_access->cand_insns,
2825 access.cand_insns,
2826 lfs.load_p,
2827 access_size);
2828 skip_next = access.cand_insns.empty ();
2830 prev_access = &access;
2834 // If we emitted tombstone insns for this BB, iterate through the BB
2835 // and remove all the tombstone insns, being sure to reparent any uses
2836 // of mem to previous defs when we do this.
2837 void
2838 pair_fusion_bb_info::cleanup_tombstones ()
2840 // No need to do anything if we didn't emit a tombstone insn for this BB.
2841 if (!m_emitted_tombstone)
2842 return;
2844 for (auto insn : iterate_safely (m_bb->nondebug_insns ()))
2846 if (!insn->is_real ()
2847 || !bitmap_bit_p (&m_tombstone_bitmap, insn->uid ()))
2848 continue;
2850 auto set = as_a<set_info *> (memory_access (insn->defs ()));
2851 if (set->has_any_uses ())
2853 auto prev_set = as_a<set_info *> (set->prev_def ());
2854 while (set->first_use ())
2855 crtl->ssa->reparent_use (set->first_use (), prev_set);
2858 // Now set has no uses, we can delete it.
2859 insn_change change (insn, insn_change::DELETE);
2860 crtl->ssa->change_insn (change);
2864 template<typename Map>
2865 void
2866 pair_fusion_bb_info::traverse_base_map (Map &map)
2868 for (auto kv : map)
2870 const auto &key = kv.first;
2871 auto &value = kv.second;
2872 transform_for_base (key.second, value);
2876 void
2877 pair_fusion_bb_info::transform ()
2879 traverse_base_map (expr_map);
2880 traverse_base_map (def_map);
2883 // Given an existing pair insn INSN, look for a trailing update of
2884 // the base register which we can fold in to make this pair use
2885 // a writeback addressing mode.
2886 void
2887 pair_fusion::try_promote_writeback (insn_info *insn, bool load_p)
2889 rtx regs[2];
2891 rtx mem = destructure_pair (regs, PATTERN (insn->rtl ()), load_p);
2892 gcc_checking_assert (MEM_P (mem));
2894 poly_int64 offset;
2895 rtx base = strip_offset (XEXP (mem, 0), &offset);
2896 gcc_assert (REG_P (base));
2898 const auto access_size = GET_MODE_SIZE (GET_MODE (mem)).to_constant () / 2;
2900 if (find_access (insn->defs (), REGNO (base)))
2902 gcc_assert (load_p);
2903 if (dump_file)
2904 fprintf (dump_file,
2905 "ldp %d clobbers base r%d, can't promote to writeback\n",
2906 insn->uid (), REGNO (base));
2907 return;
2910 auto base_use = find_access (insn->uses (), REGNO (base));
2911 gcc_assert (base_use);
2913 if (!base_use->def ())
2915 if (dump_file)
2916 fprintf (dump_file,
2917 "found pair (i%d, L=%d): but base r%d is upwards exposed\n",
2918 insn->uid (), load_p, REGNO (base));
2919 return;
2922 auto base_def = base_use->def ();
2924 rtx wb_effect = NULL_RTX;
2925 def_info *add_def;
2926 const insn_range_info pair_range (insn);
2927 insn_info *insns[2] = { nullptr, insn };
2928 insn_info *trailing_add
2929 = find_trailing_add (insns, pair_range, 0, &wb_effect,
2930 &add_def, base_def, offset,
2931 access_size);
2932 if (!trailing_add)
2933 return;
2935 auto attempt = crtl->ssa->new_change_attempt ();
2937 insn_change pair_change (insn);
2938 insn_change del_change (trailing_add, insn_change::DELETE);
2939 insn_change *changes[] = { &pair_change, &del_change };
2941 rtx pair_pat = gen_promote_writeback_pair (wb_effect, mem, regs, load_p);
2942 validate_unshare_change (insn->rtl (), &PATTERN (insn->rtl ()), pair_pat,
2943 true);
2945 // The pair must gain the def of the base register from the add.
2946 pair_change.new_defs = insert_access (attempt,
2947 add_def,
2948 pair_change.new_defs);
2949 gcc_assert (pair_change.new_defs.is_valid ());
2951 auto ignore = ignore_changing_insns (changes);
2952 for (unsigned i = 0; i < ARRAY_SIZE (changes); i++)
2953 gcc_assert (rtl_ssa::restrict_movement (*changes[i], ignore));
2955 if (!rtl_ssa::recog (attempt, pair_change, ignore))
2957 if (dump_file)
2958 fprintf (dump_file, "i%d: recog failed on wb pair, bailing out\n",
2959 insn->uid ());
2960 cancel_changes (0);
2961 return;
2964 gcc_assert (crtl->ssa->verify_insn_changes (changes));
2966 if (MAY_HAVE_DEBUG_INSNS)
2967 fixup_debug_uses_trailing_add (attempt, insn, trailing_add, wb_effect);
2969 confirm_change_group ();
2970 crtl->ssa->change_insns (changes);
2973 // Main function for the pass. Iterate over the insns in BB looking
2974 // for load/store candidates. If running after RA, also try and promote
2975 // non-writeback pairs to use writeback addressing. Then try to fuse
2976 // candidates into pairs.
2977 void pair_fusion::process_block (bb_info *bb)
2979 const bool track_loads = track_loads_p ();
2980 const bool track_stores = track_stores_p ();
2982 pair_fusion_bb_info bb_state (bb, this);
2984 for (auto insn : bb->nondebug_insns ())
2986 rtx_insn *rti = insn->rtl ();
2988 if (!rti || !INSN_P (rti))
2989 continue;
2991 rtx pat = PATTERN (rti);
2992 bool load_p;
2993 if (reload_completed
2994 && should_handle_writeback (writeback_type::ALL)
2995 && pair_mem_insn_p (rti, load_p))
2996 try_promote_writeback (insn, load_p);
2998 if (GET_CODE (pat) != SET)
2999 continue;
3001 if (track_stores && MEM_P (XEXP (pat, 0)))
3002 bb_state.track_access (insn, false, XEXP (pat, 0));
3003 else if (track_loads && MEM_P (XEXP (pat, 1)))
3004 bb_state.track_access (insn, true, XEXP (pat, 1));
3007 bb_state.transform ();
3008 bb_state.cleanup_tombstones ();